diff --git a/src/context/context.rs b/src/context/context.rs index abb5363..b9bc233 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -258,11 +258,6 @@ pub struct Context { /// set since there is no interrupt stack (unless the kernel stack is copied, but that is in my /// opinion hackier and less efficient than this (and UB to do in Rust)). pub clone_entry: Option<[usize; 2]>, - /// Lowest offset for mmap invocations where the user has not already specified the offset - /// (using MAP_FIXED/MAP_FIXED_NOREPLACE). Cf. Linux's `/proc/sys/vm/mmap_min_addr`, but with - /// the exception that we have a memory safe kernel which doesn't have to protect itself - /// against null pointers, so fixed mmaps are still allowed. - pub mmap_min: usize, } // Necessary because GlobalAlloc::dealloc requires the layout to be the same, and therefore Box @@ -378,7 +373,6 @@ impl Context { ptrace_stop: false, sigstack: None, clone_entry: None, - mmap_min: MMAP_MIN_DEFAULT, }; Ok(this) } @@ -572,5 +566,3 @@ impl Context { ); 128])) } } - -pub const MMAP_MIN_DEFAULT: usize = PAGE_SIZE; diff --git a/src/context/memory.rs b/src/context/memory.rs index 13891d8..61f695e 100644 --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -4,7 +4,7 @@ use core::borrow::Borrow; use core::cmp::{self, Eq, Ordering, PartialEq, PartialOrd}; use core::fmt::{self, Debug}; use core::ops::Deref; -use spin::RwLock; +use spin::{RwLock, RwLockWriteGuard}; use syscall::{ flag::MapFlags, error::*, @@ -17,6 +17,8 @@ use crate::memory::{Enomem, Frame}; use crate::paging::mapper::{Flusher, InactiveFlusher, PageFlushAll}; use crate::paging::{KernelMapper, Page, PageFlags, PageIter, PageMapper, PhysicalAddress, RmmA, round_up_pages, VirtualAddress}; +pub const MMAP_MIN_DEFAULT: usize = PAGE_SIZE; + pub fn page_flags(flags: MapFlags) -> PageFlags { PageFlags::new() .user(true) @@ -52,6 +54,11 @@ pub fn new_addrspace() -> Result>> { pub struct AddrSpace { pub table: Table, pub grants: UserGrants, + /// Lowest offset for mmap invocations where the user has not already specified the offset + /// (using MAP_FIXED/MAP_FIXED_NOREPLACE). Cf. Linux's `/proc/sys/vm/mmap_min_addr`, but with + /// the exception that we have a memory safe kernel which doesn't have to protect itself + /// against null pointers, so fixed mmaps to address zero are still allowed. + pub mmap_min: usize, } impl AddrSpace { pub fn current() -> Result>> { @@ -101,6 +108,7 @@ impl AddrSpace { Ok(Self { grants: UserGrants::new(), table: setup_new_utable()?, + mmap_min: MMAP_MIN_DEFAULT, }) } pub fn is_current(&self) -> bool { @@ -131,7 +139,7 @@ impl AddrSpace { if let Some(before) = before { self.grants.insert(before); } if let Some(after) = after { self.grants.insert(after); } - if !grant.is_owned() && flags.contains(MapFlags::PROT_WRITE) && !grant.flags().has_write() { + if !grant.can_have_flags(flags) { self.grants.insert(grant); return Err(Error::new(EACCES)); } @@ -150,6 +158,75 @@ impl AddrSpace { } Ok(()) } + pub fn munmap(mut self: RwLockWriteGuard<'_, Self>, page: Page, page_count: usize) { + let mut notify_files = Vec::new(); + + let requested = Region::new(page.start_address(), page_count * PAGE_SIZE); + let mut flusher = PageFlushAll::new(); + + let conflicting: Vec = self.grants.conflicts(requested).map(Region::from).collect(); + + for conflict in conflicting { + let grant = self.grants.take(&conflict).expect("conflicting region didn't exist"); + let intersection = grant.intersect(requested); + let (before, mut grant, after) = grant.extract(intersection.round()).expect("conflicting region shared no common parts"); + + // Notify scheme that holds grant + if let Some(file_desc) = grant.desc_opt.take() { + notify_files.push((file_desc, intersection)); + } + + // Keep untouched regions + if let Some(before) = before { + self.grants.insert(before); + } + if let Some(after) = after { + self.grants.insert(after); + } + + // Remove irrelevant region + grant.unmap(&mut self.table.utable, &mut flusher); + } + drop(self); + + for (file_ref, intersection) in notify_files { + let scheme_id = { file_ref.desc.description.read().scheme }; + + let scheme = match crate::scheme::schemes().get(scheme_id) { + Some(scheme) => Arc::clone(scheme), + // One could argue that EBADFD could be returned here, but we have already unmapped + // the memory. + None => continue, + }; + // Same here, we don't really care about errors when schemes respond to unmap events. + // The caller wants the memory to be unmapped, period. When already unmapped, what + // would we do with error codes anyway? + let _ = scheme.funmap(intersection.start_address().data(), intersection.size()); + + let _ = file_ref.desc.close(); + } + } + pub fn mmap(&mut self, page: Option, page_count: usize, flags: MapFlags, map: impl FnOnce(Page, PageFlags, &mut PageMapper, &mut dyn Flusher) -> Result) -> Result { + // Finally, the end of all "T0DO: Abstract with other grant creation"! + + let region = match page { + Some(page) => self.grants.find_free_at(self.mmap_min, page.start_address(), page_count * PAGE_SIZE, flags)?, + None => self.grants.find_free(self.mmap_min, page_count * PAGE_SIZE).ok_or(Error::new(ENOMEM))?, + }; + let page = Page::containing_address(region.start_address()); + + let (mut active, mut inactive); + let flusher = if self.is_current() { + active = PageFlushAll::new(); + &mut active as &mut dyn Flusher + } else { + inactive = InactiveFlusher::new(); + &mut inactive as &mut dyn Flusher + }; + + self.grants.insert(map(page, page_flags(flags), &mut self.table.utable, flusher)?); + Ok(page) + } } #[derive(Debug)] @@ -668,6 +745,9 @@ impl Grant { self.flags = flags; } + pub fn can_have_flags(&self, flags: MapFlags) -> bool { + self.owned || ((self.flags.has_write() || !flags.contains(MapFlags::PROT_WRITE)) && (self.flags.has_execute() || !flags.contains(MapFlags::PROT_EXEC))) + } pub fn unmap(mut self, mapper: &mut PageMapper, mut flusher: impl Flusher) -> UnmapResult { assert!(self.mapped); diff --git a/src/lib.rs b/src/lib.rs index f4f4c47..18646e0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -43,9 +43,11 @@ #![deny(unused_must_use)] #![feature(allocator_api)] +#![feature(arbitrary_self_types)] #![feature(array_chunks)] #![feature(asm_const, asm_sym)] // TODO: Relax requirements of most asm invocations #![cfg_attr(target_arch = "aarch64", feature(llvm_asm))] // TODO: Rewrite using asm! +#![feature(bool_to_option)] #![feature(concat_idents)] #![feature(const_btree_new)] #![feature(const_ptr_offset_from)] diff --git a/src/ptrace.rs b/src/ptrace.rs index f1f0118..81d0611 100644 --- a/src/ptrace.rs +++ b/src/ptrace.rs @@ -465,7 +465,10 @@ fn page_aligned_chunks(mut start: usize, mut len: usize) -> impl Iterator impl Iterator> + '_ { +pub fn context_memory(addrspace: &mut AddrSpace, offset: VirtualAddress, len: usize) -> impl Iterator> + '_ { + let end = core::cmp::min(offset.data().saturating_add(len), crate::USER_END_OFFSET); + let len = end - offset.data(); + // TODO: Iterate over grants instead to avoid yielding None too many times. What if // context_memory is used for an entire process's address space, where the stack is at the very // end? Alternatively we can skip pages recursively, i.e. first skip unpopulated PML4s and then @@ -474,12 +477,9 @@ pub fn context_memory(addrspace: &mut AddrSpace, offset: VirtualAddress, len: us // [addr,addr+len) is a continuous page starting and/or ending at page boundaries, with the // possible exception of an unaligned head/tail. - //log::info!("ADDR {:p} LEN {:#0x}", page as *const u8, len); - - // FIXME: verify flags before giving out slice - let (address, _flags) = addrspace.table.utable.translate(VirtualAddress::new(addr))?; + let (address, flags) = addrspace.table.utable.translate(VirtualAddress::new(addr))?; let start = RmmA::phys_to_virt(address).data() + addr % crate::memory::PAGE_SIZE; - Some(core::ptr::slice_from_raw_parts_mut(start as *mut u8, len)) + Some((core::ptr::slice_from_raw_parts_mut(start as *mut u8, len), flags.has_write())) }) } diff --git a/src/scheme/memory.rs b/src/scheme/memory.rs index 2668f07..603ece5 100644 --- a/src/scheme/memory.rs +++ b/src/scheme/memory.rs @@ -1,7 +1,11 @@ +use alloc::sync::Arc; +use spin::RwLock; + use crate::context; -use crate::context::memory::{page_flags, Grant}; +use crate::context::memory::{AddrSpace, page_flags, Grant}; use crate::memory::{free_frames, used_frames, PAGE_SIZE}; use crate::paging::{mapper::PageFlushAll, Page, VirtualAddress}; + use crate::syscall::data::{Map, StatVfs}; use crate::syscall::error::*; use crate::syscall::flag::MapFlags; @@ -14,23 +18,16 @@ impl MemoryScheme { MemoryScheme } - pub fn fmap_anonymous(map: &Map) -> Result { - //TODO: Abstract with other grant creation - if map.size == 0 { - return Ok(0); - } - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); + pub fn fmap_anonymous(addr_space: &Arc>, map: &Map) -> Result { + let (requested_page, page_count) = crate::syscall::validate::validate_region(map.address, map.size)?; - let mut addr_space = context.addr_space()?.write(); - let addr_space = &mut *addr_space; + let page = addr_space + .write() + .mmap((map.address != 0).then_some(requested_page), page_count, map.flags, |page, flags, mapper, flusher| { + Ok(Grant::zeroed(page, page_count, flags, mapper, flusher)?) + })?; - let region = addr_space.grants.find_free_at(context.mmap_min, VirtualAddress::new(map.address), map.size, map.flags)?.round(); - - addr_space.grants.insert(Grant::zeroed(Page::containing_address(region.start_address()), map.size / PAGE_SIZE, page_flags(map.flags), &mut addr_space.table.utable, PageFlushAll::new())?); - - Ok(region.start_address().data()) + Ok(page.start_address().data()) } } impl Scheme for MemoryScheme { @@ -51,7 +48,7 @@ impl Scheme for MemoryScheme { } fn fmap(&self, _id: usize, map: &Map) -> Result { - Self::fmap_anonymous(map) + Self::fmap_anonymous(&Arc::clone(context::current()?.read().addr_space()?), map) } fn fcntl(&self, _id: usize, _cmd: usize, _arg: usize) -> Result { @@ -72,4 +69,8 @@ impl Scheme for MemoryScheme { Ok(0) } } -impl crate::scheme::KernelScheme for MemoryScheme {} +impl crate::scheme::KernelScheme for MemoryScheme { + fn kfmap(&self, _number: usize, addr_space: &Arc>, map: &Map, _consume: bool) -> Result { + Self::fmap_anonymous(addr_space, map) + } +} diff --git a/src/scheme/mod.rs b/src/scheme/mod.rs index d5e1d5d..2e7df39 100644 --- a/src/scheme/mod.rs +++ b/src/scheme/mod.rs @@ -306,4 +306,8 @@ pub trait KernelScheme: Scheme + Send + Sync + 'static { fn as_sigactions(&self, number: usize) -> Result>>> { Err(Error::new(EBADF)) } + + fn kfmap(&self, number: usize, addr_space: &Arc>, map: &crate::syscall::data::Map, consume: bool) -> Result { + Err(Error::new(EOPNOTSUPP)) + } } diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index cb33744..23d7f9c 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -146,7 +146,7 @@ enum Operation { CurrentSigactions, AwaitingSigactionsChange(Arc>>), - MmapMinAddr, + MmapMinAddr(Arc>), } #[derive(Clone, Copy, PartialEq, Eq)] enum Attr { @@ -283,12 +283,16 @@ impl ProcScheme { } } +fn get_context(id: ContextId) -> Result>> { + context::contexts().get(id).ok_or(Error::new(ENOENT)).map(Arc::clone) +} + impl ProcScheme { fn open_inner(&self, pid: ContextId, operation_str: Option<&str>, flags: usize, uid: u32, gid: u32) -> Result { let operation = match operation_str { - Some("mem") => Operation::Memory { addrspace: AddrSpace::current()? }, - Some("addrspace") => Operation::AddrSpace { addrspace: AddrSpace::current()? }, - Some("filetable") => Operation::Filetable { filetable: Arc::clone(&context::current()?.read().files) }, + Some("mem") => Operation::Memory { addrspace: Arc::clone(get_context(pid)?.read().addr_space().map_err(|_| Error::new(ENOENT))?) }, + Some("addrspace") => Operation::AddrSpace { addrspace: Arc::clone(get_context(pid)?.read().addr_space().map_err(|_| Error::new(ENOENT))?) }, + Some("filetable") => Operation::Filetable { filetable: Arc::clone(&get_context(pid)?.read().files) }, Some("current-addrspace") => Operation::CurrentAddrSpace, Some("current-filetable") => Operation::CurrentFiletable, Some("regs/float") => Operation::Regs(RegsKind::Float), @@ -302,9 +306,9 @@ impl ProcScheme { Some("uid") => Operation::Attr(Attr::Uid), Some("gid") => Operation::Attr(Attr::Gid), Some("open_via_dup") => Operation::OpenViaDup, - Some("sigactions") => Operation::Sigactions(Arc::clone(&context::current()?.read().actions)), + Some("sigactions") => Operation::Sigactions(Arc::clone(&get_context(pid)?.read().actions)), Some("current-sigactions") => Operation::CurrentSigactions, - Some("mmap-min-addr") => Operation::MmapMinAddr, + Some("mmap-min-addr") => Operation::MmapMinAddr(Arc::clone(get_context(pid)?.read().addr_space().map_err(|_| Error::new(ENOENT))?)), _ => return Err(Error::new(EINVAL)) }; @@ -459,6 +463,7 @@ impl Scheme for ProcScheme { b"empty" => (Operation::AddrSpace { addrspace: new_addrspace()? }, false), b"exclusive" => (Operation::AddrSpace { addrspace: addrspace.write().try_clone()? }, false), b"mem" => (Operation::Memory { addrspace: Arc::clone(&addrspace) }, true), + b"mmap-min-addr" => (Operation::MmapMinAddr(Arc::clone(&addrspace)), false), grant_handle if grant_handle.starts_with(b"grant-") => { let start_addr = usize::from_str_radix(core::str::from_utf8(&grant_handle[6..]).map_err(|_| Error::new(EINVAL))?, 16).map_err(|_| Error::new(EINVAL))?; @@ -529,7 +534,7 @@ impl Scheme for ProcScheme { let mut bytes_read = 0; for chunk_opt in ptrace::context_memory(&mut *addrspace.write(), data.offset, buf.len()) { - let chunk = chunk_opt.ok_or(Error::new(EFAULT))?; + let (chunk, _writable) = chunk_opt.ok_or(Error::new(EFAULT))?; let dst_slice = &mut buf[bytes_read..bytes_read + chunk.len()]; unsafe { chunk.as_mut_ptr().copy_to_nonoverlapping(dst_slice.as_mut_ptr(), dst_slice.len()); @@ -694,8 +699,8 @@ impl Scheme for ProcScheme { read_from(buf, &data.buf, &mut data.offset) } - Operation::MmapMinAddr => { - let val = with_context(info.pid, |context| Ok(context.mmap_min))?; + Operation::MmapMinAddr(ref addrspace) => { + let val = addrspace.read().mmap_min; *buf.array_chunks_mut::<{mem::size_of::()}>().next().unwrap() = usize::to_ne_bytes(val); Ok(mem::size_of::()) } @@ -734,7 +739,10 @@ impl Scheme for ProcScheme { let mut bytes_written = 0; for chunk_opt in ptrace::context_memory(&mut *addrspace.write(), data.offset, buf.len()) { - let chunk = chunk_opt.ok_or(Error::new(EFAULT))?; + let (chunk, writable) = chunk_opt.ok_or(Error::new(EFAULT))?; + + if !writable { return Err(Error::new(EACCES)); } + let src_slice = &buf[bytes_written..bytes_written + chunk.len()]; unsafe { chunk.as_mut_ptr().copy_from_nonoverlapping(src_slice.as_ptr(), src_slice.len()); @@ -742,94 +750,42 @@ impl Scheme for ProcScheme { bytes_written += chunk.len(); } - data.offset = VirtualAddress::new(data.offset.data() + bytes_written); + data.offset = data.offset.add(bytes_written); Ok(bytes_written) }, Operation::AddrSpace { addrspace } => { - // FIXME: Forbid upgrading external mappings. - let mut chunks = buf.array_chunks::<{mem::size_of::()}>().copied().map(usize::from_ne_bytes); - // Update grant mappings, like mprotect but allowed to target other contexts. - let base = chunks.next().ok_or(Error::new(EINVAL))?; - let size = chunks.next().ok_or(Error::new(EINVAL))?; - let flags = chunks.next().and_then(|f| MapFlags::from_bits(f)).ok_or(Error::new(EINVAL))?; - let src_address = chunks.next(); + let mut next = || chunks.next().ok_or(Error::new(EINVAL)); - if base % PAGE_SIZE != 0 || size % PAGE_SIZE != 0 || base.saturating_add(size) > crate::USER_END_OFFSET { - return Err(Error::new(EINVAL)); + match next()? { + op @ ADDRSPACE_OP_MMAP | op @ ADDRSPACE_OP_TRANSFER => { + let fd = next()?; + let offset = next()?; + let (page, page_count) = crate::syscall::validate_region(next()?, next()?)?; + let flags = MapFlags::from_bits(next()?).ok_or(Error::new(EINVAL))?; + + if !flags.contains(MapFlags::MAP_FIXED) { + return Err(Error::new(EOPNOTSUPP)); + } + + let (scheme, number) = extract_scheme_number(fd)?; + + return scheme.kfmap(number, &addrspace, &Map { offset, size: page_count * PAGE_SIZE, address: page.start_address().data(), flags }, op == ADDRSPACE_OP_TRANSFER); + } + ADDRSPACE_OP_MUNMAP => { + let (page, page_count) = crate::syscall::validate_region(next()?, next()?)?; + + addrspace.write().munmap(page, page_count); + } + ADDRSPACE_OP_MPROTECT => { + let (page, page_count) = crate::syscall::validate_region(next()?, next()?)?; + let flags = MapFlags::from_bits(next()?).ok_or(Error::new(EINVAL))?; + + addrspace.write().mprotect(page, page_count, flags)?; + } + _ => return Err(Error::new(EINVAL)), } - - let mut addrspace = addrspace.write(); - let addrspace = &mut *addrspace; - let is_active = addrspace.is_current(); - - let (mut inactive, mut active); - - let mut flusher = if is_active { - active = PageFlushAll::new(); - &mut active as &mut dyn Flusher - } else { - inactive = InactiveFlusher::new(); - &mut inactive as &mut dyn Flusher - }; - - let region = Region::new(VirtualAddress::new(base), size); - let conflicting = addrspace.grants.conflicts(region).map(|g| *g.region()).collect::>(); - for conflicting_region in conflicting { - let whole_grant = addrspace.grants.take(&conflicting_region).ok_or(Error::new(EBADFD))?; - let (before_opt, current, after_opt) = whole_grant.extract(region.intersect(conflicting_region)).ok_or(Error::new(EBADFD))?; - - if let Some(before) = before_opt { - addrspace.grants.insert(before); - } - if let Some(after) = after_opt { - addrspace.grants.insert(after); - } - - let res = current.unmap(&mut addrspace.table.utable, &mut flusher); - - if res.file_desc.is_some() { - // We prefer avoiding file operations from within the kernel. If userspace - // updates grants that overlap, it might as well enumerate grants and call - // partial funmap on its own. - return Err(Error::new(EBUSY)); - } - } - - let base_page = Page::containing_address(VirtualAddress::new(base)); - - if let Some(src_address) = src_address { - // Forbid transferring grants to the same address space! - if is_active { return Err(Error::new(EBUSY)); } - - let current_addrspace = AddrSpace::current()?; - let mut current_addrspace = current_addrspace.write(); - let current_addrspace = &mut *current_addrspace; - let src_grant = current_addrspace.grants.take(&Region::new(VirtualAddress::new(src_address), size)).ok_or(Error::new(EINVAL))?; - - if src_address % PAGE_SIZE != 0 || src_address.saturating_add(size) > crate::USER_END_OFFSET { - return Err(Error::new(EINVAL)); - } - - // TODO: Allow downgrading flags? - - addrspace.grants.insert(Grant::transfer( - src_grant, - base_page, - &mut current_addrspace.table.utable, - &mut addrspace.table.utable, - PageFlushAll::new(), - flusher, - )?); - } else if flags.intersects(MapFlags::PROT_READ | MapFlags::PROT_EXEC | MapFlags::PROT_WRITE) { - addrspace.grants.insert(Grant::zeroed(base_page, size / PAGE_SIZE, page_flags(flags), &mut addrspace.table.utable, flusher)?); - } - - // TODO: Set some "in use" flag every time an address space is switched to? This - // way, we know what hardware threads are using any given page table, which we need - // to know while doing TLB shootdown. - - Ok((3 + usize::from(src_address.is_some())) * mem::size_of::()) + Ok(0) } Operation::Regs(kind) => match kind { RegsKind::Float => { @@ -1036,9 +992,10 @@ impl Scheme for ProcScheme { self.handles.write().get_mut(&id).ok_or(Error::new(EBADF))?.info.operation = Operation::AwaitingSigactionsChange(sigactions); Ok(mem::size_of::()) } - Operation::MmapMinAddr => { + Operation::MmapMinAddr(ref addrspace) => { let val = usize::from_ne_bytes(<[u8; mem::size_of::()]>::try_from(buf).map_err(|_| Error::new(EINVAL))?); - with_context_mut(info.pid, |context| { context.mmap_min = val; Ok(()) })?; + if val % PAGE_SIZE != 0 || val > crate::USER_END_OFFSET { return Err(Error::new(EINVAL)); } + addrspace.write().mmap_min = val; Ok(mem::size_of::()) } _ => return Err(Error::new(EBADF)), @@ -1091,7 +1048,7 @@ impl Scheme for ProcScheme { Operation::CurrentFiletable => "current-filetable", Operation::CurrentSigactions => "current-sigactions", Operation::OpenViaDup => "open-via-dup", - Operation::MmapMinAddr => "mmap-min-addr", + Operation::MmapMinAddr(_) => "mmap-min-addr", _ => return Err(Error::new(EOPNOTSUPP)), }); @@ -1138,21 +1095,16 @@ impl Scheme for ProcScheme { let prev_addr_space = context.set_addr_space(new); - if let Some(mut prev) = prev_addr_space.and_then(|a| Arc::try_unwrap(a).ok()).map(RwLock::into_inner) { - // We are the last reference to the address space; therefore it must be - // unmapped. - - // TODO: Optimize away clearing of page tables? In that case, what about memory - // deallocation? - for grant in prev.grants.into_iter() { - grant.unmap(&mut prev.table.utable, ()); - } + if let Some(prev_addr_space) = prev_addr_space { + maybe_cleanup_addr_space(prev_addr_space); } Ok(()) })?; let _ = ptrace::send_event(crate::syscall::ptrace_event!(PTRACE_EVENT_ADDRSPACE_SWITCH, 0)); } + Operation::AddrSpace { addrspace } | Operation::Memory { addrspace } | Operation::MmapMinAddr(addrspace) => maybe_cleanup_addr_space(addrspace), + Operation::AwaitingFiletableChange(new) => with_context_mut(handle.info.pid, |context: &mut Context| { context.files = new; Ok(()) @@ -1178,19 +1130,8 @@ impl Scheme for ProcScheme { } Ok(0) } - // TODO: Support borrowing someone else's memory. fn fmap(&self, id: usize, map: &Map) -> Result { - let description_lock = match self.handles.read().get(&id) { - Some(Handle { info: Info { operation: Operation::GrantHandle { ref description }, .. }, .. }) => Arc::clone(description), - _ => return Err(Error::new(EBADF)), - }; - let (scheme_id, number) = { - let description = description_lock.read(); - - (description.scheme, description.number) - }; - let scheme = Arc::clone(scheme::schemes().get(scheme_id).ok_or(Error::new(EBADFD))?); - scheme.fmap(number, map) + self.kfmap(id, &AddrSpace::current()?, map, false) } } impl KernelScheme for ProcScheme { @@ -1215,6 +1156,71 @@ impl KernelScheme for ProcScheme { Err(Error::new(EBADF)) } } + fn kfmap(&self, id: usize, dst_addr_space: &Arc>, map: &crate::syscall::data::Map, consume: bool) -> Result { + let info = self.handles.read().get(&id).ok_or(Error::new(EBADF))?.info.clone(); + + match info.operation { + Operation::GrantHandle { ref description } => { + let (scheme_id, number) = { + let description = description.read(); + + (description.scheme, description.number) + }; + let scheme = Arc::clone(scheme::schemes().get(scheme_id).ok_or(Error::new(EBADFD))?); + scheme.fmap(number, map) + } + Operation::AddrSpace { ref addrspace } => { + if Arc::ptr_eq(addrspace, dst_addr_space) { + return Err(Error::new(EBUSY)); + } + // Limit to transferring/borrowing at most one grant, or part of a grant (splitting + // will be mandatory if grants are coalesced). + + let (requested_dst_page, page_count) = crate::syscall::validate_region(map.address, map.size)?; + let (src_page, _) = crate::syscall::validate_region(map.offset, map.size)?; + + let requested_dst_page = (map.address != 0).then_some(requested_dst_page); + + let mut src_addr_space = addrspace.write(); + let src_addr_space = &mut *src_addr_space; + let mut dst_addr_space = dst_addr_space.write(); + + let src_grant_region = { + let src_region = Region::new(src_page.start_address(), page_count * PAGE_SIZE); + let mut conflicts = src_addr_space.grants.conflicts(src_region); + let first = conflicts.next().ok_or(Error::new(EINVAL))?; + if conflicts.next().is_some() { + return Err(Error::new(EINVAL)); + } + + if !first.can_have_flags(map.flags) { + return Err(Error::new(EACCES)); + } + + first.region().intersect(src_region) + }; + + let grant_page_count = src_grant_region.size() / PAGE_SIZE; + + let src_mapper = &mut src_addr_space.table.utable; + + let result_page = if consume { + let grant = src_addr_space.grants.take(&src_grant_region).expect("grant cannot disappear"); + let (before, middle, after) = grant.extract(src_grant_region).expect("called intersect(), must succeed"); + + if let Some(before) = before { src_addr_space.grants.insert(before); } + if let Some(after) = after { src_addr_space.grants.insert(after); } + + dst_addr_space.mmap(requested_dst_page, grant_page_count, map.flags, |dst_page, flags, dst_mapper, dst_flusher| Ok(Grant::transfer(middle, dst_page, src_mapper, dst_mapper, InactiveFlusher::new(), dst_flusher)?))? + } else { + dst_addr_space.mmap(requested_dst_page, grant_page_count, map.flags, |dst_page, flags, dst_mapper, flusher| Ok(Grant::borrow(Page::containing_address(src_grant_region.start_address()), dst_page, grant_page_count, flags, None, src_mapper, dst_mapper, flusher)?))? + }; + + Ok(result_page.start_address().data()) + } + _ => return Err(Error::new(EBADF)), + } + } } extern "C" fn clone_handler() { let context_lock = Arc::clone(context::contexts().current().expect("expected the current context to be set in a spawn closure")); @@ -1273,3 +1279,16 @@ fn extract_scheme_number(fd: usize) -> Result<(Arc, usize)> { Ok((scheme, number)) } +fn maybe_cleanup_addr_space(addr_space: Arc>) { + if let Ok(mut space) = Arc::try_unwrap(addr_space).map(RwLock::into_inner) { + // We are the last reference to the address space; therefore it must be + // unmapped. + + // TODO: Optimize away clearing of page tables? In that case, what about memory + // deallocation? + for grant in space.grants.into_iter() { + grant.unmap(&mut space.table.utable, ()); + } + } + +} diff --git a/src/scheme/user.rs b/src/scheme/user.rs index 115d6bd..0f7f2cc 100644 --- a/src/scheme/user.rs +++ b/src/scheme/user.rs @@ -8,7 +8,7 @@ use spin::{Mutex, RwLock}; use crate::context::{self, Context}; use crate::context::file::FileDescriptor; -use crate::context::memory::{DANGLING, page_flags, Grant, Region, GrantFileRef}; +use crate::context::memory::{AddrSpace, DANGLING, page_flags, Grant, Region, GrantFileRef}; use crate::event; use crate::paging::{PAGE_SIZE, mapper::InactiveFlusher, Page, round_down_pages, round_up_pages, VirtualAddress}; use crate::scheme::{AtomicSchemeId, SchemeId}; @@ -128,8 +128,6 @@ impl UserInner { // where the initial context is closed. fn capture_inner(context_weak: &Weak>, dst_address: usize, address: usize, size: usize, flags: MapFlags, desc_opt: Option) -> Result { - // TODO: More abstractions over grant creation! - if size == 0 { // NOTE: Rather than returning NULL, we return a dummy dangling address, that is also // non-canonical on x86. This means that scheme handlers do not need to check the @@ -143,36 +141,23 @@ impl UserInner { return Ok(VirtualAddress::new(DANGLING)); } - let context_lock = context_weak.upgrade().ok_or(Error::new(ESRCH))?; - let mut context = context_lock.write(); + let dst_addr_space = Arc::clone(context_weak.upgrade().ok_or(Error::new(ESRCH))?.read().addr_space()?); + let mut dst_addr_space = dst_addr_space.write(); - let mut addr_space = context.addr_space()?.write(); - let addr_space = &mut *addr_space; + let src_page = Page::containing_address(VirtualAddress::new(round_down_pages(address))); + let offset = address - src_page.start_address().data(); + let page_count = round_up_pages(offset + size) / PAGE_SIZE; + let requested_dst_page = (dst_address != 0).then_some(Page::containing_address(VirtualAddress::new(round_down_pages(dst_address)))); - let src_address = round_down_pages(address); - let dst_address = round_down_pages(dst_address); - let offset = address - src_address; - let aligned_size = round_up_pages(offset + size); - let dst_region = addr_space.grants.find_free_at(context.mmap_min, VirtualAddress::new(dst_address), aligned_size, flags)?; - - let current_addrspace = Arc::clone( - context::contexts().current().ok_or(Error::new(ESRCH))? - .read().addr_space()? - ); + let current_addrspace = AddrSpace::current()?; + let mut current_addrspace = current_addrspace.write(); //TODO: Use syscall_head and syscall_tail to avoid leaking data - addr_space.grants.insert(Grant::borrow( - Page::containing_address(VirtualAddress::new(src_address)), - Page::containing_address(dst_region.start_address()), - aligned_size / PAGE_SIZE, - page_flags(flags), - desc_opt, - &mut current_addrspace.write().table.utable, - &mut addr_space.table.utable, - InactiveFlusher::new(), - )?); + let dst_page = dst_addr_space.mmap(requested_dst_page, page_count, flags, |dst_page, page_flags, mapper, flusher| { + Ok(Grant::borrow(src_page, dst_page, page_count, page_flags, desc_opt, &mut current_addrspace.table.utable, mapper, flusher)?) + })?; - Ok(VirtualAddress::new(dst_region.start_address().data() + offset)) + Ok(dst_page.start_address().add(offset)) } pub fn release(&self, address: usize) -> Result<()> { diff --git a/src/syscall/driver.rs b/src/syscall/driver.rs index 8a8d294..9440dd0 100644 --- a/src/syscall/driver.rs +++ b/src/syscall/driver.rs @@ -77,47 +77,38 @@ pub fn physfree(physical_address: usize, size: usize) -> Result { // `physaddr` to `address` (optional) will map that physical address. We would have to find out // some way to pass flags such as WRITE_COMBINE/NO_CACHE however. pub fn inner_physmap(physical_address: usize, size: usize, flags: PhysmapFlags) -> Result { - //TODO: Abstract with other grant creation - if size == 0 { - return Ok(DANGLING); - } - if size % PAGE_SIZE != 0 || physical_address % PAGE_SIZE != 0 { + // TODO: Check physical_address against MAXPHYADDR. + + let end = 1 << 52; + if physical_address.saturating_add(size) > end || physical_address % PAGE_SIZE != 0 || size % PAGE_SIZE != 0 { return Err(Error::new(EINVAL)); } - // TODO: Enforce size being a multiple of the page size, fail otherwise. let addr_space = Arc::clone(context::current()?.read().addr_space()?); - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); + let mut addr_space = addr_space.write(); - let mut addr_space = context.addr_space()?.write(); - let addr_space = &mut *addr_space; + addr_space.mmap(None, size / PAGE_SIZE, Default::default(), |dst_page, _, dst_mapper, dst_flusher| { + let mut page_flags = PageFlags::new().user(true); + if flags.contains(PHYSMAP_WRITE) { + page_flags = page_flags.write(true); + } + if flags.contains(PHYSMAP_WRITE_COMBINE) { + page_flags = page_flags.custom_flag(EntryFlags::HUGE_PAGE.bits(), true); + } + #[cfg(target_arch = "x86_64")] // TODO: AARCH64 + if flags.contains(PHYSMAP_NO_CACHE) { + page_flags = page_flags.custom_flag(EntryFlags::NO_CACHE.bits(), true); + } + Grant::physmap( + Frame::containing_address(PhysicalAddress::new(physical_address)), + dst_page, + size / PAGE_SIZE, + page_flags, + dst_mapper, + dst_flusher, + ) + }).map(|page| page.start_address().data()) - let dst_address = addr_space.grants.find_free(context.mmap_min, size).ok_or(Error::new(ENOMEM))?; - - let mut page_flags = PageFlags::new().user(true); - if flags.contains(PHYSMAP_WRITE) { - page_flags = page_flags.write(true); - } - if flags.contains(PHYSMAP_WRITE_COMBINE) { - page_flags = page_flags.custom_flag(EntryFlags::HUGE_PAGE.bits(), true); - } - #[cfg(target_arch = "x86_64")] // TODO: AARCH64 - if flags.contains(PHYSMAP_NO_CACHE) { - page_flags = page_flags.custom_flag(EntryFlags::NO_CACHE.bits(), true); - } - - addr_space.grants.insert(Grant::physmap( - Frame::containing_address(PhysicalAddress::new(physical_address)), - Page::containing_address(dst_address.start_address()), - size / PAGE_SIZE, - page_flags, - &mut addr_space.table.utable, - PageFlushAll::new(), - )?); - - Ok(dst_address.start_address().data()) } // TODO: Remove this syscall, funmap makes it redundant. pub fn physmap(physical_address: usize, size: usize, flags: PhysmapFlags) -> Result { diff --git a/src/syscall/fs.rs b/src/syscall/fs.rs index 7b8b052..353eab5 100644 --- a/src/syscall/fs.rs +++ b/src/syscall/fs.rs @@ -1,14 +1,10 @@ //! Filesystem syscalls use alloc::sync::Arc; -use alloc::vec::Vec; use core::str; use spin::RwLock; use crate::context::file::{FileDescriptor, FileDescription}; -use crate::context::memory::Region; use crate::context; -use crate::memory::PAGE_SIZE; -use crate::paging::{mapper::PageFlushAll, VirtualAddress}; use crate::scheme::{self, FileHandle}; use crate::syscall::data::{Packet, Stat}; use crate::syscall::error::*; @@ -469,64 +465,10 @@ pub fn fstat(fd: FileHandle, stat: &mut Stat) -> Result { } pub fn funmap(virtual_address: usize, length: usize) -> Result { - if virtual_address == 0 || length == 0 { - return Ok(0); - } else if virtual_address % PAGE_SIZE != 0 || length % PAGE_SIZE != 0 { - return Err(Error::new(EINVAL)); - } + let (page, page_count) = crate::syscall::validate::validate_region(virtual_address, length)?; - let mut notify_files = Vec::new(); - - let virtual_address = VirtualAddress::new(virtual_address); - let requested = Region::new(virtual_address, length); - let mut flusher = PageFlushAll::new(); - - { - let context_lock = Arc::clone(context::contexts().current().ok_or(Error::new(ESRCH))?); - let context = context_lock.read(); - - let mut addr_space = context.addr_space()?.write(); - let addr_space = &mut *addr_space; - - let conflicting: Vec = addr_space.grants.conflicts(requested).map(Region::from).collect(); - - for conflict in conflicting { - let grant = addr_space.grants.take(&conflict).expect("conflicting region didn't exist"); - let intersection = grant.intersect(requested); - let (before, mut grant, after) = grant.extract(intersection.round()).expect("conflicting region shared no common parts"); - - // Notify scheme that holds grant - if let Some(file_desc) = grant.desc_opt.take() { - notify_files.push((file_desc, intersection)); - } - - // Keep untouched regions - if let Some(before) = before { - addr_space.grants.insert(before); - } - if let Some(after) = after { - addr_space.grants.insert(after); - } - - // Remove irrelevant region - grant.unmap(&mut addr_space.table.utable, &mut flusher); - } - } - - for (file_ref, intersection) in notify_files { - let scheme_id = { file_ref.desc.description.read().scheme }; - - let scheme = { - let schemes = scheme::schemes(); - let scheme = schemes.get(scheme_id).ok_or(Error::new(EBADF))?; - scheme.clone() - }; - let res = scheme.funmap(intersection.start_address().data(), intersection.size()); - - let _ = file_ref.desc.close(); - - res?; - } + let addr_space = Arc::clone(context::current()?.read().addr_space()?); + addr_space.write().munmap(page, page_count); Ok(0) } diff --git a/src/syscall/mod.rs b/src/syscall/mod.rs index e25cf04..613bb00 100644 --- a/src/syscall/mod.rs +++ b/src/syscall/mod.rs @@ -25,6 +25,8 @@ pub use self::process::*; pub use self::time::*; pub use self::validate::*; +use self::scheme::Scheme as _; + use self::data::{Map, SigAction, Stat, TimeSpec}; use self::error::{Error, Result, ENOSYS, EINVAL}; use self::flag::{MapFlags, PhysmapFlags, WaitFlags}; @@ -70,7 +72,7 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u match a & SYS_ARG { SYS_ARG_SLICE => match a { SYS_FMAP if b == !0 => { - MemoryScheme::fmap_anonymous(unsafe { validate_ref(c as *const Map, d)? }) + MemoryScheme.fmap(!0, unsafe { validate_ref(c as *const Map, d)? }) }, _ => file_op_slice(a, fd, validate_slice(c as *const u8, d)?), }