From 283ada82a03057a4e1b7a0f049b19e2cd629e58a Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Mon, 4 Jul 2022 10:42:04 +0200 Subject: [PATCH] WIP: Remove SYS_CLONE (to be done in userspace). --- src/allocator/mod.rs | 3 +- src/arch/x86_64/interrupt/syscall.rs | 20 - src/arch/x86_64/paging/mapper.rs | 24 +- src/arch/x86_64/paging/mod.rs | 8 +- src/context/context.rs | 26 +- src/context/list.rs | 15 +- src/context/memory.rs | 318 ++++++--------- src/debugger.rs | 8 +- src/ptrace.rs | 10 +- src/scheme/live.rs | 3 +- src/scheme/memory.rs | 22 +- src/scheme/mod.rs | 2 +- src/scheme/proc.rs | 162 +++++--- src/scheme/sys/context.rs | 20 +- src/scheme/user.rs | 23 +- src/syscall/debug.rs | 3 +- src/syscall/driver.rs | 15 +- src/syscall/fs.rs | 10 +- src/syscall/mod.rs | 34 +- src/syscall/process.rs | 563 ++------------------------- 20 files changed, 380 insertions(+), 909 deletions(-) diff --git a/src/allocator/mod.rs b/src/allocator/mod.rs index dfc618a..0617843 100644 --- a/src/allocator/mod.rs +++ b/src/allocator/mod.rs @@ -1,3 +1,4 @@ +use rmm::Flusher; use crate::paging::{ActivePageTable, Page, PageFlags, VirtualAddress, mapper::PageFlushAll, entry::EntryFlags}; #[cfg(not(feature="slab"))] @@ -13,7 +14,7 @@ mod linked_list; mod slab; unsafe fn map_heap(active_table: &mut ActivePageTable, offset: usize, size: usize) { - let flush_all = PageFlushAll::new(); + let mut flush_all = PageFlushAll::new(); let heap_start_page = Page::containing_address(VirtualAddress::new(offset)); let heap_end_page = Page::containing_address(VirtualAddress::new(offset + size-1)); diff --git a/src/arch/x86_64/interrupt/syscall.rs b/src/arch/x86_64/interrupt/syscall.rs index 70fd2a6..803a6d8 100644 --- a/src/arch/x86_64/interrupt/syscall.rs +++ b/src/arch/x86_64/interrupt/syscall.rs @@ -160,23 +160,3 @@ interrupt_stack!(syscall, |stack| { syscall::syscall(scratch.rax, stack.preserved.rbx, scratch.rcx, scratch.rdx, scratch.rsi, scratch.rdi, rbp, stack) }) }); - -#[naked] -pub unsafe extern "C" fn clone_ret() { - core::arch::asm!(concat!( - // The address of this instruction is injected by `clone` in process.rs, on - // top of the stack syscall->inner in this file, which is done using the rbp - // register we save there. - // - // The top of our stack here is the address pointed to by rbp, which is: - // - // - the previous rbp - // - the return location - // - // Our goal is to return from the parent function, inner, so we restore - // rbp... - "pop rbp\n", - // ...and we return to the address at the top of the stack - "ret\n", - ), options(noreturn)); -} diff --git a/src/arch/x86_64/paging/mapper.rs b/src/arch/x86_64/paging/mapper.rs index ecc2d0b..95f404b 100644 --- a/src/arch/x86_64/paging/mapper.rs +++ b/src/arch/x86_64/paging/mapper.rs @@ -1,13 +1,15 @@ use super::{linear_phys_to_virt, Page, PAGE_SIZE, PageFlags, PhysicalAddress, VirtualAddress}; + +use crate::ipi::{ipi, IpiKind, IpiTarget}; use crate::memory::{allocate_frames, deallocate_frames, Enomem, Frame}; use super::RmmA; use super::table::{Table, Level4}; -pub use rmm::{PageFlush, PageFlushAll}; +pub use rmm::{Flusher, PageFlush, PageFlushAll}; pub struct Mapper<'table> { - p4: &'table mut Table, + pub(in super) p4: &'table mut Table, } impl core::fmt::Debug for Mapper<'_> { @@ -192,3 +194,21 @@ impl<'table> Mapper<'table> { .map(|frame| PhysicalAddress::new(frame.start_address().data() + offset)) } } + +pub struct InactiveFlusher { _inner: () } +impl InactiveFlusher { + // TODO: cpu id + pub fn new() -> Self { Self { _inner: () } } +} + +impl Flusher for InactiveFlusher { + fn consume(&mut self, flush: PageFlush) { + // TODO: Push to TLB "mailbox" or tell it to reload CR3 if there are too many entries. + unsafe { flush.ignore(); } + } +} +impl Drop for InactiveFlusher { + fn drop(&mut self) { + ipi(IpiKind::Tlb, IpiTarget::Other); + } +} diff --git a/src/arch/x86_64/paging/mod.rs b/src/arch/x86_64/paging/mod.rs index cf732b9..aca613e 100644 --- a/src/arch/x86_64/paging/mod.rs +++ b/src/arch/x86_64/paging/mod.rs @@ -14,6 +14,7 @@ use self::table::{Level4, Table}; pub use rmm::{ Arch as RmmArch, + Flusher, PageFlags, PhysicalAddress, TableKind, @@ -112,7 +113,7 @@ unsafe fn map_percpu(cpu_id: usize, mapper: &mut Mapper) -> PageFlushAll { let start = crate::KERNEL_PERCPU_OFFSET + crate::KERNEL_PERCPU_SIZE * cpu_id; let end = start + size; - let flush_all = PageFlushAll::new(); + let mut flush_all = PageFlushAll::new(); let start_page = Page::containing_address(VirtualAddress::new(start)); let end_page = Page::containing_address(VirtualAddress::new(end - 1)); for page in Page::range_inclusive(start_page, end_page) { @@ -288,6 +289,11 @@ impl ActivePageTable { pub unsafe fn address(&self) -> usize { RmmA::table().data() } + pub fn mapper<'a>(&'a mut self) -> Mapper<'a> { + Mapper { + p4: self.p4, + } + } } impl Drop for ActivePageTable { diff --git a/src/context/context.rs b/src/context/context.rs index 614318a..b351ee5 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -16,13 +16,13 @@ use crate::arch::{interrupt::InterruptStack, paging::PAGE_SIZE}; use crate::common::unique::Unique; use crate::context::arch; use crate::context::file::{FileDescriptor, FileDescription}; -use crate::context::memory::UserGrants; +use crate::context::memory::{AddrSpace, new_addrspace, UserGrants}; use crate::ipi::{ipi, IpiKind, IpiTarget}; use crate::scheme::{SchemeNamespace, FileHandle}; use crate::sync::WaitMap; use crate::syscall::data::SigAction; -use crate::syscall::error::{Result, Error, ENOMEM}; +use crate::syscall::error::{Result, Error, ENOMEM, ESRCH}; use crate::syscall::flag::{SIG_DFL, SigActionFlags}; /// Unique identifier for a context (i.e. `pid`). @@ -226,8 +226,9 @@ pub struct Context { pub ksig: Option<(arch::Context, Option>, Option>, u8)>, /// Restore ksig context on next switch pub ksig_restore: bool, - /// User grants - pub grants: Arc>, + /// Address space containing a page table lock, and grants. Normally this will have a value, + /// but can be None while the context is being reaped. + pub addr_space: Option>>, /// The name of the context pub name: Arc>>, /// The current working directory @@ -307,7 +308,7 @@ impl Context { let syscall_head = AlignedBox::try_zeroed()?; let syscall_tail = AlignedBox::try_zeroed()?; - Ok(Context { + let mut this = Context { id, pgid: id, ppid: ContextId::from(0), @@ -336,7 +337,7 @@ impl Context { kstack: None, ksig: None, ksig_restore: false, - grants: Arc::new(RwLock::new(UserGrants::default())), + addr_space: None, name: Arc::new(RwLock::new(String::new().into_boxed_str())), cwd: Arc::new(RwLock::new(String::new())), files: Arc::new(RwLock::new(Vec::new())), @@ -351,7 +352,9 @@ impl Context { regs: None, ptrace_stop: false, sigstack: None, - }) + }; + this.set_addr_space(new_addrspace()?.1); + Ok(this) } /// Make a relative path absolute @@ -520,4 +523,13 @@ impl Context { None } } + + pub fn addr_space(&self) -> Result<&Arc>> { + self.addr_space.as_ref().ok_or(Error::new(ESRCH)) + } + pub fn set_addr_space(&mut self, addr_space: Arc>) { + assert!(!self.running); + self.arch.set_page_utable(addr_space.read().frame.utable.start_address().data()); + self.addr_space = Some(addr_space); + } } diff --git a/src/context/list.rs b/src/context/list.rs index f1a9b54..dc11536 100644 --- a/src/context/list.rs +++ b/src/context/list.rs @@ -7,7 +7,7 @@ use core::sync::atomic::Ordering; use crate::paging::{ActivePageTable, TableKind}; use spin::RwLock; -use crate::syscall::error::{Result, Error, EAGAIN}; +use crate::syscall::error::{Result, Error, EAGAIN, ENOMEM}; use super::context::{Context, ContextId}; /// Context list type @@ -79,7 +79,11 @@ impl ContextList { let context_lock = self.new_context()?; { let mut context = context_lock.write(); - let mut fx = unsafe { Box::from_raw(crate::ALLOCATOR.alloc(Layout::from_size_align_unchecked(1024, 16)) as *mut [u8; 1024]) }; + let mut fx = unsafe { + let ptr = crate::ALLOCATOR.alloc(Layout::from_size_align_unchecked(1024, 16)) as *mut [u8; 1024]; + if ptr.is_null() { return Err(Error::new(ENOMEM)); } + Box::from_raw(ptr) + }; for b in fx.iter_mut() { *b = 0; } @@ -100,13 +104,6 @@ impl ContextList { context.arch.set_context_handle(); } - let mut new_tables = super::memory::setup_new_utable()?; - new_tables.take(); - - context.arch.set_page_utable(unsafe { new_tables.new_utable.address() }); - #[cfg(target_arch = "aarch64")] - context.arch.set_page_ktable(unsafe { new_tables.new_ktable.address() }); - context.arch.set_fx(fx.as_ptr() as usize); context.arch.set_stack(stack.as_ptr() as usize + offset); context.kfx = Some(fx); diff --git a/src/context/memory.rs b/src/context/memory.rs index 7461da6..95e57f5 100644 --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -5,7 +5,8 @@ use core::cmp::{self, Eq, Ordering, PartialEq, PartialOrd}; use core::fmt::{self, Debug}; use core::intrinsics; use core::ops::Deref; -use spin::Mutex; +use core::sync::atomic; +use spin::{Mutex, RwLock}; use syscall::{ flag::MapFlags, error::*, @@ -14,9 +15,8 @@ use rmm::Arch as _; use crate::arch::paging::PAGE_SIZE; use crate::context::file::FileDescriptor; -use crate::ipi::{ipi, IpiKind, IpiTarget}; use crate::memory::Frame; -use crate::paging::mapper::PageFlushAll; +use crate::paging::mapper::{Flusher, InactiveFlusher, Mapper, PageFlushAll}; use crate::paging::{ActivePageTable, InactivePageTable, Page, PageFlags, PageIter, PhysicalAddress, RmmA, TableKind, VirtualAddress}; /// Round down to the nearest multiple of page size @@ -47,6 +47,76 @@ impl Drop for UnmapResult { } } +int_like!(PtId, usize); + +static ADDRSPACES: RwLock>>> = RwLock::new(BTreeMap::new()); +static NEXT_PTID: atomic::AtomicUsize = atomic::AtomicUsize::new(1); + +pub fn new_addrspace() -> Result<(PtId, Arc>)> { + let id = PtId::from(NEXT_PTID.fetch_add(1, atomic::Ordering::Relaxed)); + let arc = Arc::try_new(RwLock::new(AddrSpace::new(id)?)).map_err(|_| Error::new(ENOMEM))?; + ADDRSPACES.write().insert(id, Arc::clone(&arc)); + Ok((id, arc)) +} +pub fn addrspace(id: PtId) -> Option>> { + ADDRSPACES.read().get(&id).map(Arc::clone) +} + +#[derive(Debug)] +pub struct AddrSpace { + pub frame: Tables, + pub grants: UserGrants, + pub id: PtId, +} +impl AddrSpace { + /// Attempt to clone an existing address space so that all mappings are copied (CoW). + // TODO: Actually use CoW! + pub fn try_clone(&self) -> Result<(PtId, Arc>)> { + let (id, mut new) = new_addrspace()?; + + // TODO: Abstract away this. + let (mut inactive, mut active); + + // TODO: aarch64 + let mut this_mapper = if self.frame.utable.start_address().data() == unsafe { x86::controlregs::cr3() } as usize { + active = unsafe { ActivePageTable::new(rmm::TableKind::User) }; + active.mapper() + } else { + inactive = unsafe { InactivePageTable::from_address(self.frame.utable.start_address().data()) }; + inactive.mapper() + }; + let mut new_mapper = unsafe { InactivePageTable::from_address(new.read().frame.utable.start_address().data()) }; + + for grant in self.grants.iter() { + // TODO: Fail if there are borrowed grants, rather than simply ignoring them? + if !grant.is_owned() { continue; } + + let new_grant = Grant::zeroed(Page::containing_address(grant.start_address()), grant.size() / PAGE_SIZE, grant.flags(), &mut new_mapper.mapper(), ())?; + + for page in new_grant.pages() { + // FIXME: ENOMEM is wrong here, it cannot fail. + let current_frame = this_mapper.translate_page(page).ok_or(Error::new(ENOMEM))?.start_address().data() as *const u8; + let new_frame = new_mapper.mapper().translate_page(page).ok_or(Error::new(ENOMEM))?.start_address().data() as *mut u8; + + // TODO: Replace this with CoW + unsafe { + new_frame.copy_from_nonoverlapping(current_frame, PAGE_SIZE); + } + } + + new.write().grants.insert(new_grant); + } + Ok((id, new)) + } + pub fn new(id: PtId) -> Result { + Ok(Self { + grants: UserGrants::new(), + frame: setup_new_utable()?, + id, + }) + } +} + #[derive(Debug)] pub struct UserGrants { inner: BTreeSet, @@ -406,7 +476,7 @@ impl Grant { pub fn physmap(from: PhysicalAddress, to: VirtualAddress, size: usize, flags: PageFlags) -> Grant { let mut active_table = unsafe { ActivePageTable::new(to.kind()) }; - let flush_all = PageFlushAll::new(); + let mut flush_all = PageFlushAll::new(); let start_page = Page::containing_address(to); let end_page = Page::containing_address(VirtualAddress::new(to.data() + size - 1)); @@ -429,40 +499,10 @@ impl Grant { desc_opt: None, } } - - pub fn map(to: VirtualAddress, size: usize, flags: PageFlags) -> Grant { - let mut active_table = unsafe { ActivePageTable::new(to.kind()) }; - - let flush_all = PageFlushAll::new(); - - let start_page = Page::containing_address(to); - let end_page = Page::containing_address(VirtualAddress::new(to.data() + size - 1)); - for page in Page::range_inclusive(start_page, end_page) { - let result = active_table - .map(page, flags) - .expect("TODO: handle ENOMEM in Grant::map"); - flush_all.consume(result); - } - - flush_all.flush(); - - Grant { - region: Region { - start: to, - size, - }, - flags, - mapped: true, - owned: true, - desc_opt: None, - } - } - pub fn zeroed_inactive(dst: Page, page_count: usize, flags: PageFlags, table: &mut InactivePageTable) -> Result { - let mut inactive_mapper = table.mapper(); - + pub fn zeroed(dst: Page, page_count: usize, flags: PageFlags, mapper: &mut Mapper, mut flusher: impl Flusher) -> Result { for page in Page::range_exclusive(dst, dst.next_by(page_count)) { - let flush = inactive_mapper.map(page, flags).map_err(|_| Error::new(ENOMEM))?; - unsafe { flush.ignore(); } + let flush = mapper.map(page, flags).map_err(|_| Error::new(ENOMEM))?; + flusher.consume(flush); } Ok(Grant { region: Region { start: dst.start_address(), size: page_count * PAGE_SIZE }, flags, mapped: true, owned: true, desc_opt: None }) } @@ -487,8 +527,6 @@ impl Grant { unsafe { inactive_flush.ignore(); } } - ipi(IpiKind::Tlb, IpiTarget::Other); - Grant { region: Region { start: dst, @@ -501,97 +539,22 @@ impl Grant { } } - /// This function should only be used in clone! - pub(crate) fn secret_clone(&self, inactive_table: &mut InactivePageTable) -> Grant { - assert!(self.mapped); - - let active_table = unsafe { ActivePageTable::new(TableKind::User) }; - let mut inactive_mapper = inactive_table.mapper(); - - for page in self.pages() { - //TODO: One function to do both? - let flags = active_table.translate_page_flags(page).expect("grant references unmapped memory"); - let old_frame = active_table.translate_page(page).expect("grant references unmapped memory"); - - let frame = if self.owned { - // TODO: CoW paging - let new_frame = crate::memory::allocate_frames(1) - .expect("TODO: handle ENOMEM in Grant::secret_clone"); - - unsafe { - // We might as well use self.start_address() directly, but if we were to - // introduce SMAP it would help to only move to/from kernel memory, and we are - // copying physical frames anyway. - let src_pointer = RmmA::phys_to_virt(old_frame.start_address()).data() as *const u8; - let dst_pointer = RmmA::phys_to_virt(new_frame.start_address()).data() as *mut u8; - dst_pointer.copy_from_nonoverlapping(src_pointer, PAGE_SIZE); - } - - new_frame - } else { - old_frame - }; - - let flush = inactive_mapper.map_to(page, frame, flags); - // SAFETY: This happens within an inactive table. - unsafe { flush.ignore() } - } - - Grant { - region: Region { - start: self.region.start, - size: self.region.size, - }, - flags: self.flags, - mapped: true, - owned: self.owned, - desc_opt: self.desc_opt.clone() - } - } - pub fn flags(&self) -> PageFlags { self.flags } - pub fn unmap(mut self) -> UnmapResult { + pub fn unmap(mut self, mapper: &mut Mapper, mut flusher: impl Flusher) -> UnmapResult { assert!(self.mapped); - let mut active_table = unsafe { ActivePageTable::new(self.start_address().kind()) }; - - let flush_all = PageFlushAll::new(); - for page in self.pages() { - let (result, frame) = active_table.unmap_return(page, false); + let (result, frame) = mapper.unmap_return(page, false); if self.owned { //TODO: make sure this frame can be safely freed, physical use counter crate::memory::deallocate_frames(frame, 1); } - flush_all.consume(result); + flusher.consume(result); } - flush_all.flush(); - - self.mapped = false; - - // TODO: This imposes a large cost on unmapping, but that cost cannot be avoided without modifying fmap and funmap - UnmapResult { file_desc: self.desc_opt.take() } - } - - pub fn unmap_inactive(mut self, other_table: &mut InactivePageTable) -> UnmapResult { - assert!(self.mapped); - - for page in self.pages() { - let (result, frame) = other_table.mapper().unmap_return(page, false); - if self.owned { - //TODO: make sure this frame can be safely freed, physical use counter - crate::memory::deallocate_frames(frame, 1); - } - // This is not the active table, so the flush can be ignored - unsafe { result.ignore(); } - } - - ipi(IpiKind::Tlb, IpiTarget::Other); - self.mapped = false; // TODO: This imposes a large cost on unmapping, but that cost cannot be avoided without modifying fmap and funmap @@ -636,34 +599,6 @@ impl Grant { Some((before_grant, self, after_grant)) } - pub fn move_to_address_space(&mut self, new_start: Page, new_page_table: &mut InactivePageTable, flags: PageFlags, flush_all: &mut PageFlushAll) -> Grant { - assert!(self.mapped); - - let mut active_table = unsafe { ActivePageTable::new(TableKind::User) }; - let mut new_mapper = new_page_table.mapper(); - let keep_parents = false; - - for (i, page) in self.pages().enumerate() { - unsafe { - let (flush, frame) = active_table.unmap_return(page, keep_parents); - flush_all.consume(flush); - - let flush = new_mapper.map_to(new_start.next_by(i), frame, flags); - flush.ignore(); - } - } - - let was_owned = core::mem::replace(&mut self.owned, false); - self.mapped = false; - - Self { - region: Region::new(new_start.start_address(), self.region.size), - flags, - mapped: true, - owned: was_owned, - desc_opt: self.desc_opt.clone(), - } - } } impl Deref for Grant { @@ -704,79 +639,68 @@ impl Drop for Grant { pub const DANGLING: usize = 1 << (usize::BITS - 2); -pub struct NewTables { +#[derive(Debug)] +pub struct Tables { #[cfg(target_arch = "aarch64")] - pub new_ktable: InactivePageTable, - pub new_utable: InactivePageTable, + pub ktable: Frame, - taken: bool, -} -impl NewTables { - pub fn take(&mut self) { - self.taken = true; - } + pub utable: Frame, } -impl Drop for NewTables { +impl Drop for Tables { fn drop(&mut self) { - if self.taken { return } + use crate::memory::deallocate_frames; + deallocate_frames(Frame::containing_address(PhysicalAddress::new(self.utable.start_address().data())), 1); - unsafe { - use crate::memory::deallocate_frames; - deallocate_frames(Frame::containing_address(PhysicalAddress::new(self.new_utable.address())), 1); - - #[cfg(target_arch = "aarch64")] - deallocate_frames(Frame::containing_address(PhysicalAddress::new(self.new_ktable.address())), 1); - } + #[cfg(target_arch = "aarch64")] + deallocate_frames(Frame::containing_address(PhysicalAddress::new(self.ktable.start_address().data())), 1); } } /// Allocates a new identically mapped ktable and empty utable (same memory on x86_64). -pub fn setup_new_utable() -> Result { - let mut new_utable = unsafe { InactivePageTable::new(crate::memory::allocate_frames(1).ok_or(Error::new(ENOMEM))?) }; +pub fn setup_new_utable() -> Result { + let mut new_utable = crate::memory::allocate_frames(1).ok_or(Error::new(ENOMEM))?; - let mut new_ktable = if cfg!(target_arch = "aarch64") { - unsafe { InactivePageTable::new(crate::memory::allocate_frames(1).ok_or(Error::new(ENOMEM))?) } - } else { - unsafe { InactivePageTable::from_address(new_utable.address()) } - }; + // TODO: There is only supposed to be one ktable, right? Use a global variable to store the + // ktable (or access it from a control register) on architectures which have ktables, or obtain + // it from *any* utable on architectures which do not. + #[cfg(target_arch = "aarch64")] + let new_ktable = crate::memory::allocate_frames(1).ok_or(Error::new(ENOMEM))?; let active_ktable = unsafe { ActivePageTable::new(TableKind::Kernel) }; - // Copy kernel image mapping - { - let frame = active_ktable.p4()[crate::KERNEL_PML4].pointed_frame().expect("kernel image not mapped"); - let flags = active_ktable.p4()[crate::KERNEL_PML4].flags(); + #[cfg(target_arch = "aarch64")] + let ktable = &new_ktable; - new_ktable.mapper().p4_mut()[crate::KERNEL_PML4].set(frame, flags); - } + #[cfg(not(target_arch = "aarch64"))] + let ktable = &new_utable; + + let mut new_mapper = unsafe { InactivePageTable::from_address(ktable.start_address().data()) }; + + let mut copy_mapping = |p4_no| { + let frame = active_ktable.p4()[p4_no].pointed_frame().expect("kernel image not mapped"); + let flags = active_ktable.p4()[p4_no].flags(); + + new_mapper.mapper().p4_mut()[p4_no].set(frame, flags); + }; + // TODO: Just copy all 256 mappings? + + // Copy kernel image mapping + copy_mapping(crate::KERNEL_PML4); // Copy kernel heap mapping - { - let frame = active_ktable.p4()[crate::KERNEL_HEAP_PML4].pointed_frame().expect("kernel heap not mapped"); - let flags = active_ktable.p4()[crate::KERNEL_HEAP_PML4].flags(); - - new_ktable.mapper().p4_mut()[crate::KERNEL_HEAP_PML4].set(frame, flags); - } + copy_mapping(crate::KERNEL_HEAP_PML4); // Copy physmap mapping - { - let frame = active_ktable.p4()[crate::PHYS_PML4].pointed_frame().expect("physmap not mapped"); - let flags = active_ktable.p4()[crate::PHYS_PML4].flags(); - new_ktable.mapper().p4_mut()[crate::PHYS_PML4].set(frame, flags); - } - // Copy kernel percpu (similar to TLS) mapping. - { - let frame = active_ktable.p4()[crate::KERNEL_PERCPU_PML4].pointed_frame().expect("kernel TLS not mapped"); - let flags = active_ktable.p4()[crate::KERNEL_PERCPU_PML4].flags(); - new_ktable.mapper().p4_mut()[crate::KERNEL_PERCPU_PML4].set(frame, flags); - } + copy_mapping(crate::PHYS_PML4); - Ok(NewTables { - taken: false, - new_utable, + // Copy kernel percpu (similar to TLS) mapping. + copy_mapping(crate::KERNEL_PERCPU_PML4); + + Ok(Tables { + utable: new_utable, #[cfg(target_arch = "aarch64")] - new_ktable, + ktable: new_ktable, }) } diff --git a/src/debugger.rs b/src/debugger.rs index e49edf9..6157f24 100644 --- a/src/debugger.rs +++ b/src/debugger.rs @@ -19,11 +19,11 @@ pub unsafe fn debugger() { if let Some((a, b, c, d, e, f)) = context.syscall { println!("syscall: {}", crate::syscall::debug::format_call(a, b, c, d, e, f)); } - { - let grants = context.grants.read(); - if ! grants.is_empty() { + if let Some(ref addr_space) = context.addr_space { + let addr_space = addr_space.read(); + if ! addr_space.grants.is_empty() { println!("grants:"); - for grant in grants.iter() { + for grant in addr_space.grants.iter() { let region = grant.region(); println!( " virt 0x{:016x}:0x{:016x} size 0x{:08x} {}", diff --git a/src/ptrace.rs b/src/ptrace.rs index 2234aa6..dc76ee4 100644 --- a/src/ptrace.rs +++ b/src/ptrace.rs @@ -190,7 +190,11 @@ pub fn is_traced(pid: ContextId) -> bool { /// Trigger a notification to the event: scheme fn proc_trigger_event(file_id: usize, flags: EventFlags) { - event::trigger(proc::PROC_SCHEME_ID.load(Ordering::SeqCst), file_id, flags); + if let Some(scheme_id) = proc::PROC_SCHEME_ID.get() { + event::trigger(*scheme_id, file_id, flags); + } else { + log::warn!("Failed to trigger proc event: scheme never initialized"); + } } /// Dispatch an event to any tracer tracing `self`. This will cause @@ -471,6 +475,10 @@ fn page_aligned_chunks(mut start: usize, mut len: usize) -> impl Iterator impl Iterator> + '_ { let mut table = unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }; + // TODO: Iterate over grants instead to avoid yielding None too many times. What if + // context_memory is used for an entire process's address space, where the stack is at the very + // end? Alternatively we can skip pages recursively, i.e. first skip unpopulated PML4s and then + // onwards. page_aligned_chunks(offset.data(), len).map(move |(addr, len)| unsafe { // [addr,addr+len) is a continuous page starting and/or ending at page boundaries, with the // possible exception of an unaligned head/tail. diff --git a/src/scheme/live.rs b/src/scheme/live.rs index 9ccb4b0..b5d1e17 100644 --- a/src/scheme/live.rs +++ b/src/scheme/live.rs @@ -5,6 +5,7 @@ use alloc::collections::BTreeMap; use core::{slice, str}; use core::sync::atomic::{AtomicUsize, Ordering}; use spin::RwLock; +use rmm::Flusher; use syscall::data::Stat; use syscall::error::*; @@ -55,7 +56,7 @@ impl DiskScheme { let virt = phys + crate::PHYS_OFFSET; unsafe { let mut active_table = ActivePageTable::new(TableKind::Kernel); - let flush_all = PageFlushAll::new(); + let mut flush_all = PageFlushAll::new(); let start_page = Page::containing_address(VirtualAddress::new(virt)); let end_page = Page::containing_address(VirtualAddress::new(virt + size - 1)); for page in Page::range_inclusive(start_page, end_page) { diff --git a/src/scheme/memory.rs b/src/scheme/memory.rs index 4dd65c4..44c7d16 100644 --- a/src/scheme/memory.rs +++ b/src/scheme/memory.rs @@ -1,7 +1,7 @@ use crate::context; use crate::context::memory::{page_flags, Grant}; use crate::memory::{free_frames, used_frames, PAGE_SIZE}; -use crate::paging::{ActivePageTable, VirtualAddress}; +use crate::paging::{ActivePageTable, mapper::PageFlushAll, Page, VirtualAddress}; use crate::syscall::data::{Map, OldMap, StatVfs}; use crate::syscall::error::*; use crate::syscall::flag::MapFlags; @@ -23,25 +23,11 @@ impl MemoryScheme { let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; let context = context_lock.read(); - let mut grants = context.grants.write(); + let mut addr_space = context.addr_space()?.write(); - let region = grants.find_free_at(VirtualAddress::new(map.address), map.size, map.flags)?.round(); + let region = addr_space.grants.find_free_at(VirtualAddress::new(map.address), map.size, map.flags)?.round(); - { - // Make sure it's *absolutely* not mapped already - // TODO: Keep track of all allocated memory so this isn't necessary - - let active_table = unsafe { ActivePageTable::new(rmm::TableKind::User) }; - - for page in region.pages() { - if let Some(flags) = active_table.translate_page_flags(page).filter(|flags| flags.has_present()) { - println!("page at {:#x} was already mapped, flags: {:?}", page.start_address().data(), flags); - return Err(Error::new(EEXIST)) - } - } - } - - grants.insert(Grant::map(region.start_address(), region.size(), page_flags(map.flags))); + addr_space.grants.insert(Grant::zeroed(Page::containing_address(region.start_address()), map.size / PAGE_SIZE, page_flags(map.flags), &mut *unsafe { ActivePageTable::new(rmm::TableKind::User) }, PageFlushAll::new())?); Ok(region.start_address().data()) } diff --git a/src/scheme/mod.rs b/src/scheme/mod.rs index 885c8e0..0311356 100644 --- a/src/scheme/mod.rs +++ b/src/scheme/mod.rs @@ -301,7 +301,7 @@ pub fn schemes_mut() -> RwLockWriteGuard<'static, SchemeList> { } pub trait KernelScheme: Scheme + Send + Sync + 'static { - #[allow(unused_arguments)] + #[allow(unused_variables)] fn kfmap(&self, number: usize, map: &syscall::data::Map, target_context: &Arc>) -> Result { log::error!("Returning ENOSYS since kfmap can only be called on UserScheme schemes"); Err(Error::new(ENOSYS)) diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index 70bc077..31222a3 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -1,6 +1,6 @@ use crate::{ - arch::paging::{ActivePageTable, InactivePageTable, mapper::{Mapper, PageFlushAll}, Page, VirtualAddress}, - context::{self, Context, ContextId, Status, memory::{Grant, page_flags, Region}}, + arch::paging::{ActivePageTable, Flusher, InactivePageTable, mapper::{InactiveFlusher, Mapper, PageFlushAll}, Page, RmmA, VirtualAddress}, + context::{self, Context, ContextId, Status, memory::{addrspace, Grant, new_addrspace, PtId, page_flags, Region}}, memory::PAGE_SIZE, ptrace, scheme::{AtomicSchemeId, SchemeId}, @@ -32,7 +32,7 @@ use core::{ str, sync::atomic::{AtomicUsize, Ordering}, }; -use spin::RwLock; +use spin::{Once, RwLock}; fn read_from(dst: &mut [u8], src: &[u8], offset: &mut usize) -> Result { let byte_count = cmp::min(dst.len(), src.len().saturating_sub(*offset)); @@ -68,7 +68,7 @@ where } fn try_stop_context(pid: ContextId, mut callback: F) -> Result where - F: FnMut(&mut Context) -> Result, + F: FnOnce(&mut Context) -> Result, { if pid == context::context_id() { return Err(Error::new(EBADF)); @@ -118,6 +118,8 @@ enum Operation { Sigstack, Attr(Attr), Files, + AddrSpace { id: PtId }, + CurrentAddrSpace, } #[derive(Clone, Copy, PartialEq, Eq)] enum Attr { @@ -216,7 +218,7 @@ impl Handle { } } -pub static PROC_SCHEME_ID: AtomicSchemeId = AtomicSchemeId::default(); +pub static PROC_SCHEME_ID: Once = Once::new(); pub struct ProcScheme { next_id: AtomicUsize, @@ -231,7 +233,7 @@ pub enum Access { impl ProcScheme { pub fn new(scheme_id: SchemeId) -> Self { - PROC_SCHEME_ID.store(scheme_id, Ordering::SeqCst); + PROC_SCHEME_ID.call_once(|| scheme_id); Self { next_id: AtomicUsize::new(0), @@ -246,6 +248,11 @@ impl ProcScheme { access: Access::Restricted, } } + fn new_handle(&self, handle: Handle) -> Result { + let id = self.next_id.fetch_add(1, Ordering::Relaxed); + let _ = self.handles.write().insert(id, handle); + Ok(id) + } } impl Scheme for ProcScheme { @@ -264,7 +271,8 @@ impl Scheme for ProcScheme { let operation = match parts.next() { Some("mem") => Operation::Memory, - Some("grants") => Operation::Grants, + Some("addrspace") => Operation::AddrSpace { id: context::contexts().current().ok_or(Error::new(ESRCH))?.read().addr_space()?.read().id }, + Some("current-addrspace") => Operation::CurrentAddrSpace, Some("regs/float") => Operation::Regs(RegsKind::Float), Some("regs/int") => Operation::Regs(RegsKind::Int), Some("regs/env") => Operation::Regs(RegsKind::Env), @@ -340,9 +348,16 @@ impl Scheme for ProcScheme { } }; - let id = self.next_id.fetch_add(1, Ordering::SeqCst); + let id = self.new_handle(Handle { + info: Info { + flags, + pid, + operation, + }, + data, + })?; - if let Operation::Trace { .. } = operation { + if let Operation::Trace = operation { if !ptrace::try_new_session(pid, id) { // There is no good way to handle id being occupied for nothing // here, is there? @@ -355,44 +370,41 @@ impl Scheme for ProcScheme { } } - self.handles.write().insert(id, Handle { - info: Info { - flags, - pid, - operation, - }, - data, - }); Ok(id) } - /// Using dup for `proc:` simply opens another operation on the same PID - /// ```rust,ignore - /// let trace = syscall::open("proc:1234/trace")?; - /// - /// // let regs = syscall::open("proc:1234/regs/int")?; - /// let regs = syscall::dup(trace, "regs/int")?; - /// ``` + /// Dup is currently used to implement clone() and execve(). fn dup(&self, old_id: usize, buf: &[u8]) -> Result { let info = { let handles = self.handles.read(); let handle = handles.get(&old_id).ok_or(Error::new(EBADF))?; + handle.info }; - let buf_str = str::from_utf8(buf).map_err(|_| Error::new(EINVAL))?; + self.new_handle(match info.operation { + Operation::AddrSpace { id } => { + let new_ptid = match buf { + // TODO: Better way to obtain new empty address spaces, perhaps using SYS_OPEN. But + // in that case, what scheme? + b"empty" => new_addrspace()?.0, + // Reuse same ID. + b"shared" => id, + b"exclusive" => addrspace(id).ok_or(Error::new(EBADFD))?.read().try_clone()?.0, - let mut path = format!("{}/", info.pid.into()); - path.push_str(buf_str); - - let (uid, gid) = { - let contexts = context::contexts(); - let context = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context.read(); - (context.euid, context.egid) - }; - - self.open(&path, info.flags, uid, gid) + _ => return Err(Error::new(EINVAL)), + }; + Handle { + info: Info { + flags: 0, + pid: info.pid, + operation: Operation::AddrSpace { id: new_ptid }, + }, + data: OperationData::Other, + } + } + _ => return Err(Error::new(EINVAL)), + }) } fn seek(&self, id: usize, pos: isize, whence: usize) -> Result { @@ -421,6 +433,7 @@ impl Scheme for ProcScheme { }; match info.operation { + Operation::Grants => return Err(Error::new(ENOSYS)), Operation::Static(_) => { let mut handles = self.handles.write(); let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; @@ -455,8 +468,7 @@ impl Scheme for ProcScheme { data.offset = VirtualAddress::new(data.offset.data() + bytes_read); Ok(bytes_read) }, - // TODO: Allow reading process mappings? - Operation::Grants => return Err(Error::new(EBADF)), + Operation::AddrSpace { .. } => return Err(Error::new(EBADF)), Operation::Regs(kind) => { union Output { @@ -586,6 +598,14 @@ impl Scheme for ProcScheme { read_from(buf, &data.buf, &mut data.offset) } + // TODO: Replace write() with SYS_DUP_FORWARD. + // TODO: Find a better way to switch address spaces, since they also require switching + // the instruction and stack pointer. Maybe remove `/regs` altogether and replace it + // with `/ctx` + Operation::CurrentAddrSpace => { + //read_from(buf, &usize::to_ne_bytes(id.into()), &mut 0) + Ok(0) + } } } @@ -606,6 +626,7 @@ impl Scheme for ProcScheme { }; match info.operation { + Operation::Grants => Err(Error::new(ENOSYS)), Operation::Static(_) => Err(Error::new(EBADF)), Operation::Memory => { // Won't context switch, don't worry about the locks @@ -631,7 +652,7 @@ impl Scheme for ProcScheme { data.offset = VirtualAddress::new(data.offset.data() + bytes_written); Ok(bytes_written) }, - Operation::Grants => { + Operation::AddrSpace { .. } => { // FIXME: Forbid upgrading external mappings. let pid = self.handles.read() @@ -649,51 +670,52 @@ impl Scheme for ProcScheme { return Err(Error::new(EINVAL)); } - let is_inactive = pid != context::context_id(); + let is_active = pid == context::context_id(); let callback = |context: &mut Context| { - let mut inactive = is_inactive.then(|| unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }); + let (mut inactive, mut active); - let mut grants = context.grants.write(); + let mut addr_space = context.addr_space()?.write(); - let conflicting = grants.conflicts(region).map(|g| *g.region()).collect::>(); + let (mut mapper, mut flusher) = if is_active { + active = (unsafe { ActivePageTable::new(rmm::TableKind::User) }, PageFlushAll::new()); + (active.0.mapper(), &mut active.1 as &mut dyn Flusher) + } else { + inactive = (unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }, InactiveFlusher::new()); + (inactive.0.mapper(), &mut inactive.1 as &mut dyn Flusher) + }; + + let conflicting = addr_space.grants.conflicts(region).map(|g| *g.region()).collect::>(); for conflicting_region in conflicting { - let whole_grant = grants.take(&conflicting_region).ok_or(Error::new(EBADFD))?; + let whole_grant = addr_space.grants.take(&conflicting_region).ok_or(Error::new(EBADFD))?; let (before_opt, current, after_opt) = whole_grant.extract(region.intersect(conflicting_region)).ok_or(Error::new(EBADFD))?; if let Some(before) = before_opt { - grants.insert(before); + addr_space.grants.insert(before); } if let Some(after) = after_opt { - grants.insert(after); + addr_space.grants.insert(after); } - let res = if let Some(ref mut inactive) = inactive { - current.unmap_inactive(inactive) - } else { - current.unmap() - }; + let res = current.unmap(&mut mapper, &mut flusher); + if res.file_desc.is_some() { - drop(grants); return Err(Error::new(EBUSY)); } - // TODO: Partial free if grant is mapped externally. + // TODO: Partial free if grant is mapped externally, or fail and force + // userspace to do it. } if flags.intersects(MapFlags::PROT_READ | MapFlags::PROT_EXEC | MapFlags::PROT_WRITE) { - let base = VirtualAddress::new(base); + let base = Page::containing_address(VirtualAddress::new(base)); - if let Some(ref mut inactive) = inactive { - grants.insert(Grant::zeroed_inactive(Page::containing_address(base), size / PAGE_SIZE, page_flags(flags), inactive).unwrap()); - } else { - grants.insert(Grant::map(base, size, page_flags(flags))); - } + addr_space.grants.insert(Grant::zeroed(base, size / PAGE_SIZE, page_flags(flags), &mut mapper, flusher)?); } Ok(()) }; - if is_inactive { + if is_active { with_context_mut(pid, callback)?; } else { try_stop_context(pid, callback)?; @@ -868,6 +890,24 @@ impl Scheme for ProcScheme { Ok(buf.len()) } Operation::Files => return Err(Error::new(EBADF)), + Operation::CurrentAddrSpace { .. } => { + let mut iter = buf.array_chunks::<{mem::size_of::()}>().copied().map(usize::from_ne_bytes); + let id = iter.next().ok_or(Error::new(EINVAL))?; + let sp = iter.next().ok_or(Error::new(EINVAL))?; + let ip = iter.next().ok_or(Error::new(EINVAL))?; + + let space = addrspace(PtId::from(id)).ok_or(Error::new(EINVAL))?; + + try_stop_context(info.pid, |context| unsafe { + let regs = &mut ptrace::regs_for_mut(context).ok_or(Error::new(ESRCH))?.iret; + regs.rip = ip; + regs.rsp = sp; + + context.set_addr_space(space); + Ok(()) + })?; + Ok(3 * mem::size_of::()) + } } } @@ -911,6 +951,8 @@ impl Scheme for ProcScheme { Operation::Attr(Attr::Uid) => "uid", Operation::Attr(Attr::Gid) => "gid", Operation::Files => "files", + Operation::AddrSpace { .. } => "addrspace", + Operation::CurrentAddrSpace => "current-addrspace", }); read_from(buf, &path.as_bytes(), &mut 0) diff --git a/src/scheme/sys/context.rs b/src/scheme/sys/context.rs index 1a776a2..8602e9b 100644 --- a/src/scheme/sys/context.rs +++ b/src/scheme/sys/context.rs @@ -28,11 +28,15 @@ pub fn resource() -> Result> { let mut stat_string = String::new(); // TODO: All user programs must have some grant in order for executable memory to even // exist, but is this a good indicator of whether it is user or kernel? - if context.grants.read().is_empty() { - stat_string.push('K'); + stat_string.push(if let Ok(addr_space) = context.addr_space() { + if addr_space.read().grants.is_empty() { + 'K' + } else { + 'U' + } } else { - stat_string.push('U'); - } + 'R' + }); match context.status { context::Status::Runnable => { stat_string.push('R'); @@ -79,9 +83,11 @@ pub fn resource() -> Result> { if let Some(ref kstack) = context.kstack { memory += kstack.len(); } - for grant in context.grants.read().iter() { - if grant.is_owned() { - memory += grant.size(); + if let Ok(addr_space) = context.addr_space() { + for grant in addr_space.read().grants.iter() { + if grant.is_owned() { + memory += grant.size(); + } } } diff --git a/src/scheme/user.rs b/src/scheme/user.rs index 55c7f20..c87e694 100644 --- a/src/scheme/user.rs +++ b/src/scheme/user.rs @@ -13,7 +13,7 @@ use crate::event; use crate::paging::{PAGE_SIZE, InactivePageTable, VirtualAddress}; use crate::scheme::{AtomicSchemeId, SchemeId}; use crate::sync::{WaitQueue, WaitMap}; -use crate::syscall::data::{Map, OldMap, Packet, Stat, StatVfs, TimeSpec}; +use crate::syscall::data::{Map, Packet, Stat, StatVfs, TimeSpec}; use crate::syscall::error::*; use crate::syscall::flag::{EventFlags, EVENT_READ, O_NONBLOCK, MapFlags, PROT_READ, PROT_WRITE}; use crate::syscall::number::*; @@ -145,15 +145,15 @@ impl UserInner { let mut new_table = unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }; - let mut grants = context.grants.write(); + let mut addr_space = context.addr_space()?.write(); let src_address = round_down_pages(address); let offset = address - src_address; let src_region = Region::new(VirtualAddress::new(src_address), offset + size).round(); - let dst_region = grants.find_free_at(VirtualAddress::new(dst_address), src_region.size(), flags)?; + let dst_region = addr_space.grants.find_free_at(VirtualAddress::new(dst_address), src_region.size(), flags)?; //TODO: Use syscall_head and syscall_tail to avoid leaking data - grants.insert(Grant::map_inactive( + addr_space.grants.insert(Grant::map_inactive( src_region.start_address(), dst_region.start_address(), src_region.size(), @@ -166,7 +166,6 @@ impl UserInner { } pub fn release(&self, address: usize) -> Result<()> { - //dbg!(address); if address == DANGLING { return Ok(()); } @@ -174,13 +173,13 @@ impl UserInner { let mut context = context_lock.write(); let mut other_table = unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }; - let mut grants = context.grants.write(); + let mut addr_space = context.addr_space()?.write(); - let region = match grants.contains(VirtualAddress::new(address)).map(Region::from) { + let region = match addr_space.grants.contains(VirtualAddress::new(address)).map(Region::from) { Some(region) => region, None => return Err(Error::new(EFAULT)), }; - grants.take(®ion).unwrap().unmap_inactive(&mut other_table); + addr_space.grants.take(®ion).unwrap().unmap(&mut other_table.mapper(), crate::paging::mapper::InactiveFlusher::new()); Ok(()) } @@ -242,8 +241,8 @@ impl UserInner { if let Ok(grant_address) = res { if let Some(context_lock) = context_weak.upgrade() { let context = context_lock.read(); - let mut grants = context.grants.write(); - grants.funmap.insert( + let mut addr_space = context.addr_space()?.write(); + addr_space.grants.funmap.insert( Region::new(grant_address, map.size), VirtualAddress::new(address) ); @@ -437,8 +436,8 @@ impl Scheme for UserScheme { let contexts = context::contexts(); let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; let context = context_lock.read(); - let mut grants = context.grants.write(); - let funmap = &mut grants.funmap; + let mut addr_space = context.addr_space()?.write(); + let funmap = &mut addr_space.grants.funmap; let entry = funmap.range(..=Region::byte(VirtualAddress::new(grant_address))).next_back(); let grant_address = VirtualAddress::new(grant_address); diff --git a/src/syscall/debug.rs b/src/syscall/debug.rs index ced9eec..993575f 100644 --- a/src/syscall/debug.rs +++ b/src/syscall/debug.rs @@ -1,8 +1,7 @@ use core::{ascii, mem}; use alloc::string::String; -use alloc::vec::Vec; -use super::data::{OldMap, Map, Stat, TimeSpec}; +use super::data::{Map, Stat, TimeSpec}; use super::flag::*; use super::number::*; use super::validate::*; diff --git a/src/syscall/driver.rs b/src/syscall/driver.rs index f1fc77f..103602a 100644 --- a/src/syscall/driver.rs +++ b/src/syscall/driver.rs @@ -88,9 +88,9 @@ pub fn inner_physmap(physical_address: usize, size: usize, flags: PhysmapFlags) let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; let context = context_lock.read(); - let mut grants = context.grants.write(); + let mut addr_space = context.addr_space()?.write(); - let dst_address = grants.find_free(size).ok_or(Error::new(ENOMEM))?; + let dst_address = addr_space.grants.find_free(size).ok_or(Error::new(ENOMEM))?; let mut page_flags = PageFlags::new().user(true); if flags.contains(PHYSMAP_WRITE) { @@ -104,7 +104,7 @@ pub fn inner_physmap(physical_address: usize, size: usize, flags: PhysmapFlags) page_flags = page_flags.custom_flag(EntryFlags::NO_CACHE.bits(), true); } - grants.insert(Grant::physmap( + addr_space.grants.insert(Grant::physmap( PhysicalAddress::new(physical_address), dst_address.start_address(), size, @@ -113,6 +113,7 @@ pub fn inner_physmap(physical_address: usize, size: usize, flags: PhysmapFlags) Ok(dst_address.start_address().data()) } +// TODO: Remove this syscall, funmap makes it redundant. pub fn physmap(physical_address: usize, size: usize, flags: PhysmapFlags) -> Result { enforce_root()?; inner_physmap(physical_address, size, flags) @@ -126,10 +127,12 @@ pub fn inner_physunmap(virtual_address: usize) -> Result { let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; let context = context_lock.read(); - let mut grants = context.grants.write(); + let mut addr_space = context.addr_space()?.write(); - if let Some(region) = grants.contains(VirtualAddress::new(virtual_address)).map(Region::from) { - grants.take(®ion).unwrap().unmap(); + if let Some(region) = addr_space.grants.contains(VirtualAddress::new(virtual_address)).map(Region::from) { + use crate::paging::{ActivePageTable, mapper::PageFlushAll, TableKind}; + + addr_space.grants.take(®ion).unwrap().unmap(&mut *unsafe { ActivePageTable::new(TableKind::User) }, PageFlushAll::new()); return Ok(0); } diff --git a/src/syscall/fs.rs b/src/syscall/fs.rs index cf833dc..21fc692 100644 --- a/src/syscall/fs.rs +++ b/src/syscall/fs.rs @@ -2,7 +2,6 @@ use alloc::sync::Arc; use alloc::vec::Vec; use core::str; -use core::sync::atomic::Ordering; use spin::RwLock; use crate::context::file::{FileDescriptor, FileDescription}; @@ -482,11 +481,11 @@ pub fn funmap(virtual_address: usize, length: usize) -> Result { let requested = Region::new(virtual_address, length); { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; + let context_lock = Arc::clone(context::contexts().current().ok_or(Error::new(ESRCH))?); let context = context_lock.read(); - let mut grants = context.grants.write(); + let mut addr_space = context.addr_space()?.write(); + let grants = &mut addr_space.grants; let conflicting: Vec = grants.conflicts(requested).map(Region::from).collect(); @@ -507,9 +506,10 @@ pub fn funmap(virtual_address: usize, length: usize) -> Result { if let Some(after) = after { grants.insert(after); } + use crate::paging::{ActivePageTable, mapper::PageFlushAll, TableKind}; // Remove irrelevant region - grant.unmap(); + grant.unmap(&mut *unsafe { ActivePageTable::new(TableKind::User) }, PageFlushAll::new()); } } diff --git a/src/syscall/mod.rs b/src/syscall/mod.rs index 2f4f7b2..e25cf04 100644 --- a/src/syscall/mod.rs +++ b/src/syscall/mod.rs @@ -25,9 +25,9 @@ pub use self::process::*; pub use self::time::*; pub use self::validate::*; -use self::data::{CloneInfo, ExecMemRange, Map, SigAction, Stat, TimeSpec}; +use self::data::{Map, SigAction, Stat, TimeSpec}; use self::error::{Error, Result, ENOSYS, EINVAL}; -use self::flag::{CloneFlags, MapFlags, PhysmapFlags, WaitFlags}; +use self::flag::{MapFlags, PhysmapFlags, WaitFlags}; use self::number::*; use crate::context::ContextId; @@ -112,36 +112,6 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u SYS_GETPGID => getpgid(ContextId::from(b)).map(ContextId::into), SYS_GETPPID => getppid().map(ContextId::into), - SYS_EXEC => exec(validate_slice(b as *const ExecMemRange, c)?, d, e), - SYS_CLONE => { - let b = CloneFlags::from_bits_truncate(b); - - let info = if b.contains(CloneFlags::CLONE_VM) { - if d < core::mem::size_of::() { - return Err(Error::new(EINVAL)); - } - Some(&validate_slice(c as *const CloneInfo, 1)?[0]) - } else { None }; - - #[cfg(not(target_arch = "x86_64"))] - { - //TODO: CLONE_STACK - let ret = clone(b, bp).map(ContextId::into); - ret - } - - #[cfg(target_arch = "x86_64")] - { - let old_rsp = stack.iret.rsp; - // TODO: Unify CLONE_STACK and CLONE_VM. - if b.contains(flag::CLONE_STACK) { - stack.iret.rsp = info.as_ref().ok_or(Error::new(EINVAL))?.target_stack; - } - let ret = clone(b, bp, info).map(ContextId::into); - stack.iret.rsp = old_rsp; - ret - } - }, SYS_EXIT => exit((b & 0xFF) << 8), SYS_KILL => kill(ContextId::from(b), c), SYS_WAITPID => waitpid(ContextId::from(b), c, WaitFlags::from_bits_truncate(d)).map(ContextId::into), diff --git a/src/syscall/process.rs b/src/syscall/process.rs index dc85d70..418f464 100644 --- a/src/syscall/process.rs +++ b/src/syscall/process.rs @@ -1,380 +1,27 @@ use alloc::{ boxed::Box, - collections::BTreeSet, - string::String, sync::Arc, vec::Vec, }; -use core::alloc::{GlobalAlloc, Layout}; -use core::convert::TryFrom; -use core::ops::DerefMut; -use core::{intrinsics, mem, str}; -use crate::context::file::{FileDescription, FileDescriptor}; +use core::mem; use spin::{RwLock, RwLockWriteGuard}; -use crate::context::{Context, ContextId, WaitpidKey}; -use crate::context::memory::{Grant, Region, NewTables, page_flags, setup_new_utable, UserGrants}; +use crate::context::{Context, ContextId, memory, WaitpidKey}; use crate::context; -#[cfg(not(feature="doc"))] -use crate::elf::{self, program_header}; use crate::interrupt; -use crate::ipi::{ipi, IpiKind, IpiTarget}; -use crate::memory::{allocate_frames, Frame, PhysicalAddress}; -use crate::paging::mapper::PageFlushAll; -use crate::paging::{ActivePageTable, InactivePageTable, Page, PageFlags, RmmA, TableKind, VirtualAddress, PAGE_SIZE}; -use crate::{ptrace, syscall}; -use crate::scheme::FileHandle; +use crate::paging::mapper::{Flusher, InactiveFlusher, PageFlushAll}; +use crate::paging::{ActivePageTable, InactivePageTable, Page, PageFlags, RmmA, RmmArch, TableKind, VirtualAddress, PAGE_SIZE}; +use crate::ptrace; use crate::start::usermode; -use crate::syscall::data::{CloneInfo, ExecMemRange, SigAction, Stat}; +use crate::syscall::data::SigAction; use crate::syscall::error::*; -use crate::syscall::flag::{wifcontinued, wifstopped, AT_ENTRY, AT_NULL, AT_PHDR, AT_PHENT, AT_PHNUM, CloneFlags, - CLONE_FILES, CLONE_FS, CLONE_SIGHAND, CLONE_STACK, CLONE_VFORK, CLONE_VM, - MapFlags, PROT_EXEC, PROT_READ, PROT_WRITE, PTRACE_EVENT_CLONE, - PTRACE_STOP_EXIT, SigActionFlags, SIG_BLOCK, SIG_DFL, SIG_SETMASK, SIG_UNBLOCK, - SIGCONT, SIGTERM, WaitFlags, WCONTINUED, WNOHANG, WUNTRACED}; +use crate::syscall::flag::{wifcontinued, wifstopped, MapFlags, PROT_EXEC, PROT_READ, PROT_WRITE, + PTRACE_STOP_EXIT, SIG_BLOCK, SIG_SETMASK, SIG_UNBLOCK, + SIGCONT, SIGTERM, WaitFlags, WCONTINUED, WNOHANG, WUNTRACED}; use crate::syscall::ptrace_event; -use crate::syscall::validate::{validate_slice, validate_slice_mut}; - -pub fn clone(flags: CloneFlags, stack_base: usize, info: Option<&CloneInfo>) -> Result { - let ppid; - let pid; - { - let pgid; - let ruid; - let rgid; - let rns; - let euid; - let egid; - let ens; - let umask; - let sigmask; - let mut cpu_id_opt = None; - let arch; - let vfork; - let mut kfx_opt = None; - let mut kstack_opt = None; - let mut offset = 0; - let mut grants; - let name; - let cwd; - let files; - let actions; - let old_sigstack; - - // Copy from old process - { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - - ppid = context.id; - pgid = context.pgid; - ruid = context.ruid; - rgid = context.rgid; - rns = context.rns; - euid = context.euid; - egid = context.egid; - ens = context.ens; - sigmask = context.sigmask; - umask = context.umask; - old_sigstack = context.sigstack; - - // Uncomment to disable threads on different CPUs - //TODO: fix memory allocation races when this is removed - if flags.contains(CLONE_VM) { - cpu_id_opt = context.cpu_id; - } - - // TODO: Fill with newest registers. - arch = context.arch.clone(); - - if let Some(ref fx) = context.kfx { - let new_fx = unsafe { - let new_fx_ptr = crate::ALLOCATOR.alloc(Layout::from_size_align_unchecked(1024, 16)); - if new_fx_ptr.is_null() { - // FIXME: It's mildly ironic that the only place where clone can fail with - // ENOMEM, is when copying 1024 bytes to merely store vector registers. - // Although in order to achieve full kernel-panic immunity, we'll need to - // completely phase out all usage of liballoc data structures, and use our - // own library/port liballoc, since panicking on OOM is not good for a - // kernel. - return Err(Error::new(ENOMEM)); - } - new_fx_ptr.copy_from_nonoverlapping(fx.as_ptr(), fx.len()); - Box::from_raw(new_fx_ptr as *mut [u8; 1024]) - }; - kfx_opt = Some(new_fx); - } - - #[cfg(target_arch = "x86_64")] - { - if let Some(ref stack) = context.kstack { - // Get the relative offset to the return address of the function - // obtaining `stack_base`. - // - // (base pointer - start of stack) - one - offset = stack_base - stack.as_ptr() as usize - mem::size_of::(); // Add clone ret - // FIXME: This is incredibly UB, making Rust think the current stack being - // copied is simply a regular immutable slice. This part should either be - // written in assembly or have clone moved to userspace. - let mut new_stack = stack.clone(); - - unsafe { - // Set clone's return value to zero. This is done because - // the clone won't return like normal, which means the value - // would otherwise never get set. - if let Some(regs) = ptrace::rebase_regs_ptr_mut(context.regs, Some(&mut new_stack)) { - (*regs).scratch.rax = 0; - } - - // Change the return address of the child (previously - // syscall) to the arch-specific clone_ret callback - let func_ptr = new_stack.as_mut_ptr().add(offset); - *(func_ptr as *mut usize) = interrupt::syscall::clone_ret as usize; - } - - kstack_opt = Some(new_stack); - } - } - - #[cfg(not(target_arch = "x86_64"))] - { - if let Some(ref stack) = context.kstack { - offset = stack_base - stack.as_ptr() as usize; - let mut new_stack = stack.clone(); - - kstack_opt = Some(new_stack); - } - } - - grants = Arc::clone(&context.grants); - - if flags.contains(CLONE_VM) { - name = Arc::clone(&context.name); - } else { - name = Arc::new(RwLock::new(context.name.read().clone())); - } - - if flags.contains(CLONE_FS) { - cwd = Arc::clone(&context.cwd); - } else { - cwd = Arc::new(RwLock::new(context.cwd.read().clone())); - } - - if flags.contains(CLONE_FILES) { - files = Arc::clone(&context.files); - } else { - files = Arc::new(RwLock::new(context.files.read().clone())); - } - - if flags.contains(CLONE_SIGHAND) { - actions = Arc::clone(&context.actions); - } else { - actions = Arc::new(RwLock::new(context.actions.read().clone())); - } - } - - // If not cloning files, dup to get a new number from scheme - // This has to be done outside the context lock to prevent deadlocks - if !flags.contains(CLONE_FILES) { - for (_fd, file_opt) in files.write().iter_mut().enumerate() { - let new_file_opt = if let Some(ref file) = *file_opt { - Some(FileDescriptor { - description: Arc::clone(&file.description), - cloexec: file.cloexec, - }) - } else { - None - }; - - *file_opt = new_file_opt; - } - } - - let maps_to_reobtain = if flags.contains(CLONE_VM) { - Vec::new() - } else { - grants.read().iter().filter_map(|grant| grant.desc_opt.as_ref().and_then(|file_ref| { - let FileDescription { scheme, number, .. } = { *file_ref.desc.description.read() }; - let scheme_arc = match crate::scheme::schemes().get(scheme) { - Some(s) => Arc::downgrade(s), - None => return None, - }; - let map = crate::syscall::data::Map { - address: grant.start_address().data(), - size: grant.size(), - offset: file_ref.offset, - flags: file_ref.flags | MapFlags::MAP_FIXED_NOREPLACE, - }; - - Some((scheme_arc, number, map)) - })).collect() - }; - - // If vfork, block the current process - // This has to be done after the operations that may require context switches - if flags.contains(CLONE_VFORK) { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let mut context = context_lock.write(); - context.block("vfork"); - vfork = true; - } else { - vfork = false; - } - - // Set up new process - let new_context_lock = { - let mut contexts = context::contexts_mut(); - let context_lock = contexts.new_context()?; - let mut context = context_lock.write(); - - pid = context.id; - - context.pgid = pgid; - context.ppid = ppid; - context.ruid = ruid; - context.rgid = rgid; - context.rns = rns; - context.euid = euid; - context.egid = egid; - context.ens = ens; - context.sigmask = sigmask; - context.umask = umask; - - //TODO: Better CPU balancing - if let Some(cpu_id) = cpu_id_opt { - context.cpu_id = Some(cpu_id); - } else { - context.cpu_id = Some(pid.into() % crate::cpu_count()); - } - - // Start as blocked. This is to ensure the context is never switched before any grants - // that have to be remapped, are mapped. - context.status = context::Status::Blocked; - - context.vfork = vfork; - - context.arch = arch; - - // This is needed because these registers may have changed after this context was - // switched to, but before this was called. - #[cfg(all(target_arch = "x86_64", feature = "x86_fsgsbase"))] - unsafe { - context.arch.fsbase = x86::bits64::segmentation::rdfsbase() as usize; - x86::bits64::segmentation::swapgs(); - context.arch.gsbase = x86::bits64::segmentation::rdgsbase() as usize; - x86::bits64::segmentation::swapgs(); - } - - if flags.contains(CloneFlags::CLONE_VM) { - // Reuse same CR3, same grants, everything. - context.grants = grants; - } else { - // TODO: Handle ENOMEM - let mut new_tables = setup_new_utable().expect("failed to allocate new page tables for cloned process"); - - let mut new_grants = UserGrants::new(); - for old_grant in grants.read().iter().filter(|g| g.desc_opt.is_none()) { - new_grants.insert(old_grant.secret_clone(&mut new_tables.new_utable)); - } - context.grants = Arc::new(RwLock::new(new_grants)); - - drop(grants); - - new_tables.take(); - - context.arch.set_page_utable(unsafe { new_tables.new_utable.address() }); - - #[cfg(target_arch = "aarch64")] - context.arch.set_page_ktable(unsafe { new_tables.new_ktable.address() }); - } - - if let Some(fx) = kfx_opt.take() { - context.arch.set_fx(fx.as_ptr() as usize); - context.kfx = Some(fx); - } - - // Set kernel stack - if let Some(stack) = kstack_opt.take() { - context.arch.set_stack(stack.as_ptr() as usize + offset); - context.kstack = Some(stack); - #[cfg(target_arch = "aarch64")] - { - context.arch.set_lr(interrupt::syscall::clone_ret as usize); - } - } - - // TODO: Clone ksig? - - #[cfg(target_arch = "aarch64")] - { - if let Some(stack) = &mut context.kstack { - unsafe { - // stack_base contains a pointer to InterruptStack. Get its offset from - // stack_base itself - let istack_offset = *(stack_base as *const u64) - stack_base as u64; - - // Get the top of the new process' stack - let new_sp = stack.as_mut_ptr().add(offset); - - // Update the pointer to the InterruptStack to reflect the new process' - // stack. (Without this the pointer would be InterruptStack on the parent - // process' stack). - *(new_sp as *mut u64) = new_sp as u64 + istack_offset; - - // Update tpidr_el0 in the new process' InterruptStack - let mut interrupt_stack = &mut *(stack.as_mut_ptr().add(offset + istack_offset as usize) as *mut crate::arch::interrupt::InterruptStack); - interrupt_stack.iret.tpidr_el0 = tcb_addr; - } - } - } - - - context.name = name; - - context.cwd = cwd; - - context.files = files; - - context.actions = actions; - - if flags.contains(CLONE_VM) { - context.sigstack = info.and_then(|info| (info.target_sigstack != !0).then(|| info.target_sigstack)); - } else { - context.sigstack = old_sigstack; - } - - Arc::clone(context_lock) - }; - for (scheme_weak, number, map) in maps_to_reobtain { - let scheme = match scheme_weak.upgrade() { - Some(s) => s, - None => continue, - }; - let _ = scheme.kfmap(number, &map, &new_context_lock); - } - new_context_lock.write().status = context::Status::Runnable; - } - - if ptrace::send_event(ptrace_event!(PTRACE_EVENT_CLONE, pid.into())).is_some() { - // Freeze the clone, allow ptrace to put breakpoints - // to it before it starts - let contexts = context::contexts(); - let context = contexts.get(pid).expect("Newly created context doesn't exist??"); - let mut context = context.write(); - context.ptrace_stop = true; - } - - // Race to pick up the new process! - ipi(IpiKind::Switch, IpiTarget::Other); - - let _ = unsafe { context::switch() }; - - Ok(pid) -} +use crate::syscall::validate::validate_slice_mut; fn empty<'lock>(context_lock: &'lock RwLock, mut context: RwLockWriteGuard<'lock, Context>, reaping: bool) -> RwLockWriteGuard<'lock, Context> { // NOTE: If we do not replace the grants `Arc`, then a strange situation can appear where the @@ -383,27 +30,23 @@ fn empty<'lock>(context_lock: &'lock RwLock, mut context: RwLockWriteGu // remaining references to the grants, where there are in fact none. However, if either one is // reaped before, then that reference will disappear, and no leak will occur. // - // By removing the reference to the grants when the context will no longer be used, this + // By removing the reference to the address space when the context will no longer be used, this // problem will never occur. + let addr_space_arc = match context.addr_space.take() { + Some(a) => a, + None => return context, + }; - // FIXME, UNOPTIMIZED: Right now, this will allocate memory in order to store the new empty - // grants, which may not even be used (only in fexec I think). We should turn grants into an - // `Option`, and only reinitialize it there. - let mut grants_arc = mem::take(&mut context.grants); - - if let Some(grants_lock_mut) = Arc::get_mut(&mut grants_arc) { - let mut grants_guard = grants_lock_mut.get_mut(); - - let grants = mem::replace(&mut *grants_guard, UserGrants::default()); - for grant in grants.into_iter() { + if let Ok(addr_space) = Arc::try_unwrap(addr_space_arc).map(RwLock::into_inner) { + for grant in addr_space.grants.into_iter() { let unmap_result = if reaping { log::error!("{}: {}: Grant should not exist: {:?}", context.id.into(), *context.name.read(), grant); let mut new_table = unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }; - grant.unmap_inactive(&mut new_table) + grant.unmap(&mut new_table.mapper(), &mut InactiveFlusher::new()) } else { - grant.unmap() + grant.unmap(&mut *unsafe { ActivePageTable::new(rmm::TableKind::User) }, PageFlushAll::new()) }; if unmap_result.file_desc.is_some() { @@ -418,14 +61,6 @@ fn empty<'lock>(context_lock: &'lock RwLock, mut context: RwLockWriteGu context } -struct ExecFile(FileHandle); - -impl Drop for ExecFile { - fn drop(&mut self) { - let _ = syscall::close(self.0); - } -} - pub fn exit(status: usize) -> ! { ptrace::breakpoint_callback(PTRACE_STOP_EXIT, Some(ptrace_event!(PTRACE_STOP_EXIT, status))); @@ -436,16 +71,10 @@ pub fn exit(status: usize) -> ! { Arc::clone(&context_lock) }; - let mut close_files = Vec::new(); + let mut close_files; let pid = { let mut context = context_lock.write(); - { - let mut lock = context.files.write(); - if Arc::strong_count(&context.files) == 1 { - mem::swap(lock.deref_mut(), &mut close_files); - } - } - context.files = Arc::new(RwLock::new(Vec::new())); + close_files = Arc::try_unwrap(mem::take(&mut context.files)).map_or_else(|_| Vec::new(), RwLock::into_inner); context.id }; @@ -669,7 +298,7 @@ pub fn mprotect(address: usize, size: usize, flags: MapFlags) -> Result { let mut active_table = unsafe { ActivePageTable::new(TableKind::User) }; - let flush_all = PageFlushAll::new(); + let mut flush_all = PageFlushAll::new(); let start_page = Page::containing_address(VirtualAddress::new(address)); let end_page = Page::containing_address(VirtualAddress::new(end_address)); @@ -999,19 +628,31 @@ pub fn usermode_bootstrap(mut data: Box<[u8]>) -> ! { assert!(!data.is_empty()); const LOAD_BASE: usize = 0; - let grant = context::memory::Grant::map(VirtualAddress::new(LOAD_BASE), ((data.len()+PAGE_SIZE-1)/PAGE_SIZE)*PAGE_SIZE, PageFlags::new().user(true).write(true).execute(true)); { let mut active_table = unsafe { ActivePageTable::new(TableKind::User) }; + let grant = context::memory::Grant::zeroed(Page::containing_address(VirtualAddress::new(LOAD_BASE)), (data.len()+PAGE_SIZE-1)/PAGE_SIZE, PageFlags::new().user(true).write(true).execute(true), &mut active_table, PageFlushAll::new()).expect("failed to allocate memory for bootstrap"); + + for (index, page) in grant.pages().enumerate() { let len = if data.len() - index * PAGE_SIZE < PAGE_SIZE { data.len() % PAGE_SIZE } else { PAGE_SIZE }; - let frame = active_table.translate_page(page).expect("expected mapped init memory to have a corresponding frame"); - unsafe { ((frame.start_address().data() + crate::PHYS_OFFSET) as *mut u8).copy_from_nonoverlapping(data.as_ptr().add(index * PAGE_SIZE), len); } - } - } - context::contexts().current().expect("expected a context to exist when executing init").read().grants.write().insert(grant); + let physaddr = active_table.translate_page(page) + .expect("expected mapped init memory to have a corresponding frame") + .start_address(); + + unsafe { + (RmmA::phys_to_virt(physaddr).data() as *mut u8).copy_from_nonoverlapping(data.as_ptr().add(index * PAGE_SIZE), len); + } + } + context::contexts().current() + .expect("expected a context to exist when executing init") + .read().addr_space() + .expect("expected bootstrap context to have an address space") + .write().grants.insert(grant); + } + log::info!("Usermode bootstrap"); drop(data); @@ -1022,127 +663,3 @@ pub fn usermode_bootstrap(mut data: Box<[u8]>) -> ! { usermode(start, 0, 0, 0); } } - -pub fn exec(memranges: &[ExecMemRange], instruction_ptr: usize, stack_ptr: usize) -> Result { - // TODO: rlimit? - if memranges.len() > 1024 { - return Err(Error::new(EINVAL)); - } - - let mut new_grants = UserGrants::new(); - - { - let current_context_lock = Arc::clone(context::contexts().current().ok_or(Error::new(ESRCH))?); - - // Linux will always destroy other threads immediately if one of them executes execve(2). - // At the moment the Redox kernel is ignorant of threads, other than them sharing files, - // memory, etc. We fail with EBUSY if any resources that are being replaced, are shared. - - let mut old_grants = Arc::try_unwrap(mem::take(&mut current_context_lock.write().grants)).map_err(|_| Error::new(EBUSY))?.into_inner(); - // TODO: Allow multiple contexts which share the file table, to have one of them run exec? - let mut old_files = Arc::try_unwrap(mem::take(&mut current_context_lock.write().files)).map_err(|_| Error::new(EBUSY))?.into_inner(); - - // FIXME: Handle leak in case of ENOMEM. - let mut new_tables = setup_new_utable()?; - - let mut flush = PageFlushAll::new(); - - // FIXME: This is to the extreme, but fetch with atomic volatile? - for memrange in memranges.iter().copied() { - let old_address = if memrange.old_address == !0 { None } else { Some(memrange.old_address) }; - - if memrange.address % PAGE_SIZE != 0 || old_address.map_or(false, |a| a % PAGE_SIZE != 0) || memrange.size % PAGE_SIZE != 0 { - return Err(Error::new(EINVAL)); - } - if memrange.size == 0 { continue } - - let new_start = Page::containing_address(VirtualAddress::new(memrange.address)); - let flags = MapFlags::from_bits(memrange.flags).ok_or(Error::new(EINVAL))?; - let page_count = memrange.size / PAGE_SIZE; - let flags = page_flags(flags); - - if let Some(old_address) = old_address { - let old_start = VirtualAddress::new(memrange.old_address); - - let entire_region = Region::new(old_start, memrange.size); - - // TODO: This will do one B-Tree search for each memrange. If a process runs exec - // and keeps every range the way it is, then this would be O(n log n)! - loop { - let region = match old_grants.conflicts(entire_region).next().map(|g| *g.region()) { - Some(r) => r, - None => break, - }; - let owned = old_grants.take(®ion).expect("cannot fail"); - let (before, mut current, after) = owned.extract(region).expect("cannot fail"); - - if let Some(before) = before { old_grants.insert(before); } - if let Some(after) = after { old_grants.insert(after); } - - new_grants.insert(current.move_to_address_space(new_start, &mut new_tables.new_utable, flags, &mut flush)); - } - } else { - new_grants.insert(Grant::zeroed_inactive(new_start, page_count, flags, &mut new_tables.new_utable)?); - } - } - - { - unsafe { flush.ignore(); } - - new_tables.take(); - - let mut context = current_context_lock.write(); - context.grants = Arc::new(RwLock::new(new_grants)); - - let old_utable = context.arch.get_page_utable(); - let old_frame = Frame::containing_address(PhysicalAddress::new(old_utable)); - - context.arch.set_page_utable(unsafe { new_tables.new_utable.address() }); - - #[cfg(target_arch = "x86_64")] - unsafe { x86::controlregs::cr3_write(new_tables.new_utable.address() as u64); } - - for old_grant in old_grants.into_iter() { - old_grant.unmap_inactive(&mut unsafe { InactivePageTable::from_address(old_utable) }); - } - crate::memory::deallocate_frames(old_frame, 1); - - #[cfg(target_arch = "aarch64")] - context.arch.set_page_ktable(unsafe { new_tables.new_ktable.address() }); - - context.actions = Arc::new(RwLock::new(vec![( - SigAction { - sa_handler: unsafe { mem::transmute(SIG_DFL) }, - sa_mask: [0; 2], - sa_flags: SigActionFlags::empty(), - }, - 0 - ); 128])); - let was_vfork = mem::replace(&mut context.vfork, false); - - // TODO: Reuse in place if the file table is not shared. - drop(context); - - let mut context = current_context_lock.write(); - - context.files = Arc::new(RwLock::new(old_files)); - let ppid = context.ppid; - drop(context); - - // TODO: Should this code be preserved as is? - if was_vfork { - let contexts = context::contexts(); - if let Some(context_lock) = contexts.get(ppid) { - let mut context = context_lock.write(); - if !context.unblock() { - println!("{} not blocked for exec vfork unblock", ppid.into()); - } - } else { - println!("{} not found for exec vfork unblock", ppid.into()); - } - } - } - } - - unsafe { usermode(instruction_ptr, stack_ptr, 0, 0); } -}