diff --git a/src/context/memory.rs b/src/context/memory.rs index 244d51e..6fd567c 100644 --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -125,7 +125,7 @@ impl UserGrants { // ... but it already exists if flags.contains(MapFlags::MAP_FIXED_NOREPLACE) { - println!("grant: conflicts with: {:#x} - {:#x}", grant.start_address().data(), grant.end_address().data()); + println!("grant: {:#x} conflicts with: {:#x} - {:#x}", address.data(), grant.start_address().data(), grant.end_address().data()); return Err(Error::new(EEXIST)); } else if flags.contains(MapFlags::MAP_FIXED) { // TODO: Overwrite existing grant diff --git a/src/scheme/mod.rs b/src/scheme/mod.rs index d781d6a..885c8e0 100644 --- a/src/scheme/mod.rs +++ b/src/scheme/mod.rs @@ -16,6 +16,7 @@ use alloc::{ use core::sync::atomic::AtomicUsize; use spin::{Once, RwLock, RwLockReadGuard, RwLockWriteGuard}; +use crate::context::Context; use crate::syscall::error::*; use crate::syscall::scheme::Scheme; @@ -299,4 +300,10 @@ pub fn schemes_mut() -> RwLockWriteGuard<'static, SchemeList> { SCHEMES.call_once(init_schemes).write() } -pub trait KernelScheme: Scheme + Send + Sync + 'static {} +pub trait KernelScheme: Scheme + Send + Sync + 'static { + #[allow(unused_arguments)] + fn kfmap(&self, number: usize, map: &syscall::data::Map, target_context: &Arc>) -> Result { + log::error!("Returning ENOSYS since kfmap can only be called on UserScheme schemes"); + Err(Error::new(ENOSYS)) + } +} diff --git a/src/scheme/user.rs b/src/scheme/user.rs index 3ba08f4..55c7f20 100644 --- a/src/scheme/user.rs +++ b/src/scheme/user.rs @@ -152,10 +152,6 @@ impl UserInner { let src_region = Region::new(VirtualAddress::new(src_address), offset + size).round(); let dst_region = grants.find_free_at(VirtualAddress::new(dst_address), src_region.size(), flags)?; - /*if !dst_region.intersect(Region::new(VirtualAddress::new(0x39d000), 1)).is_empty() { - dbg!(dst_region); - }*/ - //TODO: Use syscall_head and syscall_tail to avoid leaking data grants.insert(Grant::map_inactive( src_region.start_address(), @@ -233,6 +229,9 @@ impl UserInner { _ => println!("Unknown scheme -> kernel message {}", packet.a) } } else { + // The motivation of doing this here instead of within the fmap handler, is that we + // can operate on an inactive table. This reduces the number of page table reloads + // from two (context switch + active TLB flush) to one (context switch). if let Some((context_weak, desc, map)) = self.fmap.lock().remove(&packet.id) { if let Ok(address) = Error::demux(packet.a) { if address % PAGE_SIZE > 0 { @@ -274,6 +273,51 @@ impl UserInner { pub fn fsync(&self) -> Result { Ok(0) } + + fn fmap_inner(&self, file: usize, map: &Map, context_lock: &Arc>) -> Result { + let (pid, uid, gid, context_weak, desc) = { + let context = context_lock.read(); + // TODO: Faster, cleaner mechanism to get descriptor + let scheme = self.scheme_id.load(Ordering::SeqCst); + let mut desc_res = Err(Error::new(EBADF)); + for context_file_opt in context.files.read().iter() { + if let Some(context_file) = context_file_opt { + let (context_scheme, context_number) = { + let desc = context_file.description.read(); + (desc.scheme, desc.number) + }; + if context_scheme == scheme && context_number == file { + desc_res = Ok(context_file.clone()); + break; + } + } + } + let desc = desc_res?; + (context.id, context.euid, context.egid, Arc::downgrade(context_lock), desc) + }; + drop(context_lock); + + let address = self.capture(map)?; + + let id = self.next_id.fetch_add(1, Ordering::Relaxed); + + self.fmap.lock().insert(id, (context_weak, desc, *map)); + + let result = self.call_inner(Packet { + id, + pid: pid.into(), + uid, + gid, + a: SYS_FMAP, + b: file, + c: address, + d: mem::size_of::() + }); + + let _ = self.release(address); + + result + } } /// `UserInner` has to be wrapped @@ -384,49 +428,7 @@ impl Scheme for UserScheme { fn fmap(&self, file: usize, map: &Map) -> Result { let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - let (pid, uid, gid, context_lock, desc) = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - // TODO: Faster, cleaner mechanism to get descriptor - let scheme = inner.scheme_id.load(Ordering::SeqCst); - let mut desc_res = Err(Error::new(EBADF)); - for context_file_opt in context.files.read().iter() { - if let Some(context_file) = context_file_opt { - let (context_scheme, context_number) = { - let desc = context_file.description.read(); - (desc.scheme, desc.number) - }; - if context_scheme == scheme && context_number == file { - desc_res = Ok(context_file.clone()); - break; - } - } - } - let desc = desc_res?; - (context.id, context.euid, context.egid, Arc::downgrade(&context_lock), desc) - }; - - let address = inner.capture(map)?; - - let id = inner.next_id.fetch_add(1, Ordering::SeqCst); - - inner.fmap.lock().insert(id, (context_lock, desc, *map)); - - let result = inner.call_inner(Packet { - id, - pid: pid.into(), - uid, - gid, - a: SYS_FMAP, - b: file, - c: address, - d: mem::size_of::() - }); - - let _ = inner.release(address); - - result + inner.fmap_inner(file, map, &Arc::clone(context::contexts().current().ok_or(Error::new(ESRCH))?)) } fn funmap(&self, grant_address: usize, size: usize) -> Result { @@ -528,4 +530,9 @@ impl Scheme for UserScheme { inner.call(SYS_CLOSE, file, 0, 0) } } -impl crate::scheme::KernelScheme for UserScheme {} +impl crate::scheme::KernelScheme for UserScheme { + fn kfmap(&self, number: usize, map: &Map, target_context: &Arc>) -> Result { + let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; + inner.fmap_inner(number, map, target_context) + } +} diff --git a/src/syscall/process.rs b/src/syscall/process.rs index 243ca48..090b7ca 100644 --- a/src/syscall/process.rs +++ b/src/syscall/process.rs @@ -189,55 +189,25 @@ pub fn clone(flags: CloneFlags, stack_base: usize, info: Option<&CloneInfo>) -> } } - // If not cloning virtual memory, use fmap to re-obtain every grant where possible - if !flags.contains(CLONE_VM) { - let mut grants = grants.write(); - let old_grants = mem::take(&mut *grants); - - // TODO: Find some way to do this without having to allocate. - - // TODO: Check that the current process is not allowed to serve any scheme this logic - // could interfere with. Deadlocks would otherwise seem inevitable. - - for mut grant in old_grants.into_iter() { - let region = *grant.region(); - let address = region.start_address().data(); - let size = region.size(); - - let new_grant = if let Some(ref mut file_ref) = grant.desc_opt.take() { - // TODO: Technically this is redundant as the grants are already secret_cloned. - // Maybe grants with fds can be excluded from that step? - grant.unmap(); - - let FileDescription { scheme, number, .. } = { *file_ref.desc.description.read() }; - let scheme_arc = match crate::scheme::schemes().get(scheme) { - Some(s) => Arc::clone(s), - None => continue, - }; - let map = crate::syscall::data::Map { - address, - size, - offset: file_ref.offset, - flags: file_ref.flags | MapFlags::MAP_FIXED_NOREPLACE, - }; - - let ptr = match scheme_arc.fmap(number, &map) { - Ok(new_range) => new_range as *mut u8, - Err(_) => continue, - }; - - // This will eventually be freed from the parent context after move_to is - // called. - context::contexts().current().ok_or(Error::new(ESRCH))? - .read().grants.write() - .take(&Region::new(VirtualAddress::new(ptr as usize), map.size)) - .ok_or(Error::new(EFAULT))? - } else { - grant + let maps_to_reobtain = if flags.contains(CLONE_VM) { + Vec::new() + } else { + grants.read().iter().filter_map(|grant| grant.desc_opt.as_ref().and_then(|file_ref| { + let FileDescription { scheme, number, .. } = { *file_ref.desc.description.read() }; + let scheme_arc = match crate::scheme::schemes().get(scheme) { + Some(s) => Arc::downgrade(s), + None => return None, }; - grants.insert(new_grant); - } - } + let map = crate::syscall::data::Map { + address: grant.start_address().data(), + size: grant.size(), + offset: file_ref.offset, + flags: file_ref.flags | MapFlags::MAP_FIXED_NOREPLACE, + }; + + Some((scheme_arc, number, map)) + })).collect() + }; // If vfork, block the current process // This has to be done after the operations that may require context switches @@ -252,7 +222,7 @@ pub fn clone(flags: CloneFlags, stack_base: usize, info: Option<&CloneInfo>) -> } // Set up new process - { + let new_context_lock = { let mut contexts = context::contexts_mut(); let context_lock = contexts.new_context()?; let mut context = context_lock.write(); @@ -277,7 +247,9 @@ pub fn clone(flags: CloneFlags, stack_base: usize, info: Option<&CloneInfo>) -> context.cpu_id = Some(pid.into() % crate::cpu_count()); } - context.status = context::Status::Runnable; + // Start as blocked. This is to ensure the context is never switched before any grants + // that have to be remapped, are mapped. + context.status = context::Status::Blocked; context.vfork = vfork; @@ -299,7 +271,7 @@ pub fn clone(flags: CloneFlags, stack_base: usize, info: Option<&CloneInfo>) -> let mut new_tables = setup_new_utable().expect("failed to allocate new page tables for cloned process"); let mut new_grants = UserGrants::new(); - for old_grant in grants.read().iter() { + for old_grant in grants.read().iter().filter(|g| g.desc_opt.is_none()) { new_grants.insert(old_grant.secret_clone(&mut new_tables.new_utable)); } context.grants = Arc::new(RwLock::new(new_grants)); @@ -368,7 +340,17 @@ pub fn clone(flags: CloneFlags, stack_base: usize, info: Option<&CloneInfo>) -> } else { context.sigstack = old_sigstack; } + + Arc::clone(context_lock) + }; + for (scheme_weak, number, map) in maps_to_reobtain { + let scheme = match scheme_weak.upgrade() { + Some(s) => s, + None => continue, + }; + let _ = scheme.kfmap(number, &map, &new_context_lock); } + new_context_lock.write().unblock(); } if ptrace::send_event(ptrace_event!(PTRACE_EVENT_CLONE, pid.into())).is_some() {