diff --git a/Cargo.toml b/Cargo.toml index 82ead03..2bd3aa7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,9 +44,7 @@ raw-cpuid = "10.2.0" x86 = { version = "0.47.0", default-features = false } [features] -# TODO: Fix multicore -#default = ["acpi", "multi_core", "graphical_debug", "serial_debug"] -default = ["acpi", "graphical_debug", "serial_debug"] +default = ["acpi", "multi_core", "graphical_debug", "serial_debug"] acpi = [] doc = [] graphical_debug = [] diff --git a/src/arch/x86_64/interrupt/handler.rs b/src/arch/x86_64/interrupt/handler.rs index d8fa21f..ddbef82 100644 --- a/src/arch/x86_64/interrupt/handler.rs +++ b/src/arch/x86_64/interrupt/handler.rs @@ -150,8 +150,6 @@ impl InterruptStack { /// Loads all registers from a struct used by the proc: /// scheme to read/write registers. pub fn load(&mut self, all: &IntRegisters) { - // TODO: Which of these should be allowed to change? - self.preserved.r15 = all.r15; self.preserved.r14 = all.r14; self.preserved.r13 = all.r13; @@ -168,9 +166,11 @@ impl InterruptStack { self.scratch.rcx = all.rcx; self.scratch.rax = all.rax; self.iret.rip = all.rip; + self.iret.rsp = all.rsp; - // These should probably be restricted - // self.iret.cs = all.cs; + // CS and SS are immutable + + // TODO: RFLAGS should be restricted before being changeable // self.iret.rflags = all.eflags; } /// Enables the "Trap Flag" in the FLAGS register, causing the CPU diff --git a/src/arch/x86_64/paging/mapper.rs b/src/arch/x86_64/paging/mapper.rs index babefcc..ecc2d0b 100644 --- a/src/arch/x86_64/paging/mapper.rs +++ b/src/arch/x86_64/paging/mapper.rs @@ -171,17 +171,18 @@ impl<'table> Mapper<'table> { } pub fn translate_page(&self, page: Page) -> Option { - self.p4().next_table(page.p4_index()) - .and_then(|p3| p3.next_table(page.p3_index())) - .and_then(|p2| p2.next_table(page.p2_index())) - .and_then(|p1| p1[page.p1_index()].pointed_frame()) + self.translate_page_and_flags(page).map(|(frame, _)| frame) } pub fn translate_page_flags(&self, page: Page) -> Option> { + self.translate_page_and_flags(page).map(|(_, flags)| flags) + } + pub fn translate_page_and_flags(&self, page: Page) -> Option<(Frame, PageFlags)> { self.p4().next_table(page.p4_index()) .and_then(|p3| p3.next_table(page.p3_index())) .and_then(|p2| p2.next_table(page.p2_index())) - .and_then(|p1| Some(p1[page.p1_index()].flags())) + .map(|p1| &p1[page.p1_index()]) + .and_then(|entry| Some((entry.pointed_frame()?, entry.flags()))) } /// Translate a virtual address to a physical one diff --git a/src/context/memory.rs b/src/context/memory.rs index 6fd567c..619e93e 100644 --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -168,20 +168,20 @@ impl UserGrants { } } } - fn unreserve(&mut self, grant: &Region) { + fn unreserve(holes: &mut BTreeMap, grant: &Region) { // The size of any possible hole directly after the to-be-freed region. - let exactly_after_size = self.holes.remove(&grant.end_address()); + let exactly_after_size = holes.remove(&grant.end_address()); // There was a range that began exactly prior to the to-be-freed region, so simply // increment the size such that it occupies the grant too. If in additional there was a // grant directly after the grant, include it too in the size. - if let Some((hole_offset, hole_size)) = self.holes.range_mut(..grant.start_address()).next_back().filter(|(offset, size)| offset.data() + **size == grant.start_address().data()) { + if let Some((hole_offset, hole_size)) = holes.range_mut(..grant.start_address()).next_back().filter(|(offset, size)| offset.data() + **size == grant.start_address().data()) { *hole_size = grant.end_address().data() - hole_offset.data() + exactly_after_size.unwrap_or(0); } else { // There was no free region directly before the to-be-freed region, however will // now unconditionally insert a new free region where the grant was, and add that extra // size if there was something after it. - self.holes.insert(grant.start_address(), grant.size() + exactly_after_size.unwrap_or(0)); + holes.insert(grant.start_address(), grant.size() + exactly_after_size.unwrap_or(0)); } } pub fn insert(&mut self, grant: Grant) { @@ -193,7 +193,7 @@ impl UserGrants { } pub fn take(&mut self, region: &Region) -> Option { let grant = self.inner.take(region)?; - self.unreserve(region); + Self::unreserve(&mut self.holes, region); Some(grant) } pub fn iter(&self) -> impl Iterator + '_ { diff --git a/src/lib.rs b/src/lib.rs index 1fbfbe8..80ba746 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -43,6 +43,7 @@ #![deny(unused_must_use)] #![feature(allocator_api)] +#![feature(array_chunks)] #![feature(asm_const, asm_sym)] // TODO: Relax requirements of most asm invocations #![cfg_attr(target_arch = "aarch64", feature(llvm_asm))] // TODO: Rewrite using asm! #![feature(concat_idents)] @@ -53,6 +54,7 @@ #![feature(lang_items)] #![feature(naked_functions)] #![feature(ptr_internals)] +#![feature(slice_ptr_get, slice_ptr_len)] #![feature(thread_local)] #![no_std] diff --git a/src/ptrace.rs b/src/ptrace.rs index 7f7711a..7327170 100644 --- a/src/ptrace.rs +++ b/src/ptrace.rs @@ -2,6 +2,8 @@ //! handling should go here, unless they closely depend on the design //! of the scheme. +use rmm::Arch; + use crate::{ arch::{ interrupt::InterruptStack, @@ -21,6 +23,7 @@ use crate::{ flag::*, ptrace_event }, + CurrentRmmArch as RmmA, }; use alloc::{ @@ -445,66 +448,41 @@ pub unsafe fn regs_for_mut(context: &mut Context) -> Option<&mut InterruptStack> // |_| |_|\___|_| |_| |_|\___/|_| \__, | // |___/ -pub fn with_context_memory(context: &mut Context, offset: VirtualAddress, len: usize, f: F) -> Result<()> -where F: FnOnce(*mut u8) -> Result<()> -{ - // As far as I understand, mapping any regions following - // USER_TMP_MISC_OFFSET is safe because no other memory location - // is used after it. In the future it might be necessary to define - // a maximum amount of pages that can be mapped in one batch, - // which could be used to either internally retry `read`/`write` - // in `proc:/mem`, or return a partial read/write. - let start = Page::containing_address(VirtualAddress::new(crate::USER_TMP_MISC_OFFSET)); +// Returns an iterator which splits [start, start + len) into an iterator of possibly trimmed +// pages. +fn page_aligned_chunks(mut start: usize, mut len: usize) -> impl Iterator { + // TODO: Define this elsewhere! + #[cfg(target_arch = "x86_64")] + const KERNEL_SPLIT_START: usize = crate::PML4_SIZE * 256; - let mut active_page_table = unsafe { ActivePageTable::new(TableKind::User) }; - let mut target_page_table = unsafe { - InactivePageTable::from_address(context.arch.get_page_utable()) - }; - - // Find the physical frames for all pages - let mut frames = Vec::new(); - - { - let mapper = target_page_table.mapper(); - - let mut inner = || -> Result<()> { - let start = Page::containing_address(offset); - let end = Page::containing_address(VirtualAddress::new(offset.data() + len - 1)); - for page in Page::range_inclusive(start, end) { - frames.push(( - mapper.translate_page(page).ok_or(Error::new(EFAULT))?, - mapper.translate_page_flags(page).ok_or(Error::new(EFAULT))? - )); - } - Ok(()) - }; - inner()?; + // Ensure no pages can overlap with kernel memory. + if start.saturating_add(len) > KERNEL_SPLIT_START { + len = KERNEL_SPLIT_START.saturating_sub(start); } - // Map all the physical frames into linear pages - let pages = frames.len(); - let mut page = start; - let flush_all = PageFlushAll::new(); - for (frame, mut flags) in frames { - flags = flags.execute(false).write(true); - flush_all.consume(active_page_table.map_to(page, frame, flags)); + let first_len = core::cmp::min(len, PAGE_SIZE - start % PAGE_SIZE); + let first = Some((start, first_len)).filter(|(_, len)| *len > 0); + start += first_len; + len -= first_len; - page = page.next(); - } + let last_len = len % PAGE_SIZE; + len -= last_len; + let last = Some((start + len, last_len)).filter(|(_, len)| *len > 0); - flush_all.flush(); - - let res = f((start.start_address().data() + offset.data() % PAGE_SIZE) as *mut u8); - - // Unmap all the pages (but allow no deallocation!) - let mut page = start; - let flush_all = PageFlushAll::new(); - for _ in 0..pages { - flush_all.consume(active_page_table.unmap_return(page, true).0); - page = page.next(); - } - - flush_all.flush(); - - res + first.into_iter().chain((start..start + len).step_by(PAGE_SIZE).map(|off| (off, PAGE_SIZE))).chain(last) +} + +pub fn context_memory(context: &mut Context, offset: VirtualAddress, len: usize) -> impl Iterator> + '_ { + let mut table = unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }; + + page_aligned_chunks(offset.data(), len).map(move |(addr, len)| unsafe { + // [addr,addr+len) is a continuous page starting and/or ending at page boundaries, with the + // possible exception of an unaligned head/tail. + + //log::info!("ADDR {:p} LEN {:#0x}", page as *const u8, len); + + let frame = table.mapper().translate_page(Page::containing_address(VirtualAddress::new(addr)))?; + let start = RmmA::phys_to_virt(frame.start_address()).data() + addr % crate::memory::PAGE_SIZE; + Some(core::ptr::slice_from_raw_parts_mut(start as *mut u8, len)) + }) } diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index e3365e2..76bff3c 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -1,6 +1,7 @@ use crate::{ - arch::paging::VirtualAddress, - context::{self, Context, ContextId, Status}, + arch::paging::{ActivePageTable, InactivePageTable, mapper::{Mapper, PageFlushAll}, Page, VirtualAddress}, + context::{self, Context, ContextId, Status, memory::{Grant, page_flags, Region}}, + memory::PAGE_SIZE, ptrace, scheme::{AtomicSchemeId, SchemeId}, syscall::{ @@ -19,6 +20,8 @@ use crate::{ use alloc::{ boxed::Box, collections::BTreeMap, + string::{String, ToString}, + sync::Arc, vec::Vec, }; use core::{ @@ -31,6 +34,14 @@ use core::{ }; use spin::RwLock; +fn read_from(dst: &mut [u8], src: &[u8], offset: &mut usize) -> Result { + let byte_count = cmp::min(dst.len(), src.len().saturating_sub(*offset)); + let next_offset = offset.saturating_add(byte_count); + dst[..byte_count].copy_from_slice(&src[*offset..next_offset]); + *offset = next_offset; + Ok(byte_count) +} + fn with_context(pid: ContextId, callback: F) -> Result where F: FnOnce(&Context) -> Result, @@ -99,22 +110,27 @@ enum RegsKind { #[derive(Clone, Copy, PartialEq, Eq)] enum Operation { Memory, + Grants, Regs(RegsKind), Trace, Static(&'static str), Name, Sigstack, + Attr(Attr), + Files, +} +#[derive(Clone, Copy, PartialEq, Eq)] +enum Attr { + Uid, + Gid, + // TODO: namespace, tid, etc. } impl Operation { fn needs_child_process(self) -> bool { - match self { - Self::Memory => true, - Self::Regs(_) => true, - Self::Trace => true, - Self::Static(_) => false, - Self::Name => false, - Self::Sigstack => false, - } + matches!(self, Self::Memory | Self::Grants | Self::Regs(_) | Self::Trace | Self::Files) + } + fn needs_root(self) -> bool { + matches!(self, Self::Attr(_)) } } struct MemData { @@ -248,6 +264,7 @@ impl Scheme for ProcScheme { let operation = match parts.next() { Some("mem") => Operation::Memory, + Some("grants") => Operation::Grants, Some("regs/float") => Operation::Regs(RegsKind::Float), Some("regs/int") => Operation::Regs(RegsKind::Int), Some("regs/env") => Operation::Regs(RegsKind::Env), @@ -255,13 +272,16 @@ impl Scheme for ProcScheme { Some("exe") => Operation::Static("exe"), Some("name") => Operation::Name, Some("sigstack") => Operation::Sigstack, + Some("uid") => Operation::Attr(Attr::Uid), + Some("gid") => Operation::Attr(Attr::Gid), + Some("files") => Operation::Files, _ => return Err(Error::new(EINVAL)) }; let contexts = context::contexts(); let target = contexts.get(pid).ok_or(Error::new(ESRCH))?; - let data; + let mut data; { let target = target.read(); @@ -303,6 +323,20 @@ impl Scheme for ProcScheme { None => return Err(Error::new(EPERM)), } } + } else if operation.needs_root() && (uid != 0 || gid != 0) { + return Err(Error::new(EPERM)); + } + + if matches!(operation, Operation::Files) { + data = OperationData::Static(StaticData::new({ + use core::fmt::Write; + + let mut data = String::new(); + for index in target.files.read().iter().enumerate().filter_map(|(idx, val)| val.as_ref().map(|_| idx)) { + write!(data, "{}\n", index).unwrap(); + } + data.into_bytes().into_boxed_slice() + })); } }; @@ -407,14 +441,23 @@ impl Scheme for ProcScheme { let context = contexts.get(info.pid).ok_or(Error::new(ESRCH))?; let mut context = context.write(); - ptrace::with_context_memory(&mut context, data.offset, buf.len(), |ptr| { - buf.copy_from_slice(validate::validate_slice(ptr, buf.len())?); - Ok(()) - })?; + let mut bytes_read = 0; - data.offset = VirtualAddress::new(data.offset.data() + buf.len()); - Ok(buf.len()) + for chunk_opt in ptrace::context_memory(&mut *context, data.offset, buf.len()) { + let chunk = chunk_opt.ok_or(Error::new(EFAULT))?; + let dst_slice = &mut buf[bytes_read..bytes_read + chunk.len()]; + unsafe { + chunk.as_mut_ptr().copy_to_nonoverlapping(dst_slice.as_mut_ptr(), dst_slice.len()); + } + bytes_read += chunk.len(); + } + + data.offset = VirtualAddress::new(data.offset.data() + bytes_read); + Ok(bytes_read) }, + // TODO: Allow reading process mappings? + Operation::Grants => return Err(Error::new(EBADF)), + Operation::Regs(kind) => { union Output { float: FloatRegisters, @@ -526,19 +569,22 @@ impl Scheme for ProcScheme { // Return read events Ok(read * mem::size_of::()) } - Operation::Name => match &*context::contexts().current().ok_or(Error::new(ESRCH))?.read().name.read() { - name => { - let to_copy = cmp::min(buf.len(), name.len()); - buf[..to_copy].copy_from_slice(&name.as_bytes()[..to_copy]); - Ok(to_copy) - } + Operation::Name => read_from(buf, context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?.read().name.read().as_bytes(), &mut 0), + Operation::Sigstack => read_from(buf, &context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?.read().sigstack.unwrap_or(!0).to_ne_bytes(), &mut 0), + Operation::Attr(attr) => { + let src_buf = match (attr, &*Arc::clone(context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?).read()) { + (Attr::Uid, context) => context.euid.to_string(), + (Attr::Gid, context) => context.egid.to_string(), + }.into_bytes(); + + read_from(buf, &src_buf, &mut 0) } - Operation::Sigstack => match context::contexts().current().ok_or(Error::new(ESRCH))?.read().sigstack.unwrap_or(!0).to_ne_bytes() { - sigstack => { - let to_copy = cmp::min(buf.len(), sigstack.len()); - buf[..to_copy].copy_from_slice(&sigstack[..to_copy]); - Ok(to_copy) - } + Operation::Files => { + let mut handles = self.handles.write(); + let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; + let data = handle.data.static_data().expect("operations can't change"); + + read_from(buf, &data.buf, &mut data.offset) } } } @@ -571,14 +617,89 @@ impl Scheme for ProcScheme { let context = contexts.get(info.pid).ok_or(Error::new(ESRCH))?; let mut context = context.write(); - ptrace::with_context_memory(&mut context, data.offset, buf.len(), |ptr| { - validate::validate_slice_mut(ptr, buf.len())?.copy_from_slice(buf); - Ok(()) - })?; + let mut bytes_written = 0; - data.offset = VirtualAddress::new(data.offset.data() + buf.len()); - Ok(buf.len()) + for chunk_opt in ptrace::context_memory(&mut *context, data.offset, buf.len()) { + let chunk = chunk_opt.ok_or(Error::new(EFAULT))?; + let src_slice = &buf[bytes_written..bytes_written + chunk.len()]; + unsafe { + chunk.as_mut_ptr().copy_from_nonoverlapping(src_slice.as_ptr(), src_slice.len()); + } + bytes_written += chunk.len(); + } + + data.offset = VirtualAddress::new(data.offset.data() + bytes_written); + Ok(bytes_written) }, + Operation::Grants => { + // FIXME: Forbid upgrading external mappings. + + let pid = self.handles.read() + .get(&id).ok_or(Error::new(EBADF))? + .info.pid; + + let mut chunks = buf.array_chunks::<{mem::size_of::()}>().copied().map(usize::from_ne_bytes); + // Update grant mappings, like mprotect but allowed to target other contexts. + let base = chunks.next().ok_or(Error::new(EINVAL))?; + let size = chunks.next().ok_or(Error::new(EINVAL))?; + let flags = chunks.next().and_then(|f| MapFlags::from_bits(f)).ok_or(Error::new(EINVAL))?; + let region = Region::new(VirtualAddress::new(base), size); + + if base % PAGE_SIZE != 0 || size % PAGE_SIZE != 0 || base.saturating_add(size) > crate::PML4_SIZE * 256 { + return Err(Error::new(EINVAL)); + } + + let is_inactive = pid != context::context_id(); + + let callback = |context: &mut Context| { + let mut inactive = is_inactive.then(|| unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }); + + let mut grants = context.grants.write(); + + let conflicting = grants.conflicts(region).map(|g| *g.region()).collect::>(); + for conflicting_region in conflicting { + let whole_grant = grants.take(&conflicting_region).ok_or(Error::new(EBADFD))?; + let (before_opt, current, after_opt) = whole_grant.extract(region.intersect(conflicting_region)).ok_or(Error::new(EBADFD))?; + + if let Some(before) = before_opt { + grants.insert(before); + } + if let Some(after) = after_opt { + grants.insert(after); + } + + let res = if let Some(ref mut inactive) = inactive { + current.unmap_inactive(inactive) + } else { + current.unmap() + }; + if res.file_desc.is_some() { + drop(grants); + return Err(Error::new(EBUSY)); + } + + // TODO: Partial free if grant is mapped externally. + } + + if flags.intersects(MapFlags::PROT_READ | MapFlags::PROT_EXEC | MapFlags::PROT_WRITE) { + let base = VirtualAddress::new(base); + + if let Some(ref mut inactive) = inactive { + grants.insert(Grant::zeroed_inactive(Page::containing_address(base), size / PAGE_SIZE, page_flags(flags), inactive).unwrap()); + } else { + grants.insert(Grant::map(base, size, page_flags(flags))); + } + } + Ok(()) + }; + + if is_inactive { + with_context_mut(pid, callback)?; + } else { + try_stop_context(pid, callback)?; + } + Ok(3 * mem::size_of::()) + } Operation::Regs(kind) => match kind { RegsKind::Float => { if buf.len() < mem::size_of::() { @@ -727,15 +848,26 @@ impl Scheme for ProcScheme { }, Operation::Name => { let utf8 = alloc::string::String::from_utf8(buf.to_vec()).map_err(|_| Error::new(EINVAL))?.into_boxed_str(); - *context::contexts().current().ok_or(Error::new(ESRCH))?.read().name.write() = utf8; + *context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?.read().name.write() = utf8; Ok(buf.len()) } Operation::Sigstack => { let bytes = <[u8; mem::size_of::()]>::try_from(buf).map_err(|_| Error::new(EINVAL))?; let sigstack = usize::from_ne_bytes(bytes); - context::contexts().current().ok_or(Error::new(ESRCH))?.write().sigstack = (sigstack != !0).then(|| sigstack); + context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?.write().sigstack = (sigstack != !0).then(|| sigstack); Ok(buf.len()) } + Operation::Attr(attr) => { + let context_lock = Arc::clone(context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?); + let id = core::str::from_utf8(buf).map_err(|_| Error::new(EINVAL))?.parse::().map_err(|_| Error::new(EINVAL))?; + + match attr { + Attr::Uid => context_lock.write().euid = id, + Attr::Gid => context_lock.write().egid = id, + } + Ok(buf.len()) + } + Operation::Files => return Err(Error::new(EBADF)), } } @@ -768,6 +900,7 @@ impl Scheme for ProcScheme { let path = format!("proc:{}/{}", handle.info.pid.into(), match handle.info.operation { Operation::Memory => "mem", + Operation::Grants => "grants", Operation::Regs(RegsKind::Float) => "regs/float", Operation::Regs(RegsKind::Int) => "regs/int", Operation::Regs(RegsKind::Env) => "regs/env", @@ -775,12 +908,12 @@ impl Scheme for ProcScheme { Operation::Static(path) => path, Operation::Name => "name", Operation::Sigstack => "sigstack", + Operation::Attr(Attr::Uid) => "uid", + Operation::Attr(Attr::Gid) => "gid", + Operation::Files => "files", }); - let len = cmp::min(path.len(), buf.len()); - buf[..len].copy_from_slice(&path.as_bytes()[..len]); - - Ok(len) + read_from(buf, &path.as_bytes(), &mut 0) } fn fstat(&self, id: usize, stat: &mut Stat) -> Result { diff --git a/src/syscall/process.rs b/src/syscall/process.rs index 090b7ca..51eecfe 100644 --- a/src/syscall/process.rs +++ b/src/syscall/process.rs @@ -260,7 +260,9 @@ pub fn clone(flags: CloneFlags, stack_base: usize, info: Option<&CloneInfo>) -> #[cfg(all(target_arch = "x86_64", feature = "x86_fsgsbase"))] unsafe { context.arch.fsbase = x86::bits64::segmentation::rdfsbase() as usize; + x86::bits64::segmentation::swapgs(); context.arch.gsbase = x86::bits64::segmentation::rdgsbase() as usize; + x86::bits64::segmentation::swapgs(); } if flags.contains(CloneFlags::CLONE_VM) { @@ -1116,10 +1118,6 @@ pub fn exec(memranges: &[ExecMemRange], instruction_ptr: usize, stack_ptr: usize // TODO: Reuse in place if the file table is not shared. drop(context); - for file_slot in old_files.iter_mut().filter(|file_opt| file_opt.as_ref().map_or(false, |file| file.cloexec)) { - let file = file_slot.take().expect("iterator filter requires file slot to be occupied, not None"); - let _ = file.close(); - } let mut context = current_context_lock.write(); context.files = Arc::new(RwLock::new(old_files));