diff --git a/Cargo.lock b/Cargo.lock index 33f358d..7c81df0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -116,9 +116,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.16" +version = "0.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6389c490849ff5bc16be905ae24bc913a9c8892e19b2341dbc175e14c341c2b8" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" dependencies = [ "cfg-if", ] diff --git a/rmm b/rmm index 0944b17..9462df0 160000 --- a/rmm +++ b/rmm @@ -1 +1 @@ -Subproject commit 0944b17983223966e339a25f9328bdb77a59d5c7 +Subproject commit 9462df03e786312b6ce197cf56113d411412cbb2 diff --git a/src/arch/x86_64/paging/mod.rs b/src/arch/x86_64/paging/mod.rs index 347aebc..51e3587 100644 --- a/src/arch/x86_64/paging/mod.rs +++ b/src/arch/x86_64/paging/mod.rs @@ -386,7 +386,10 @@ impl Page { } } - pub fn range_inclusive(start: Page, end: Page) -> PageIter { + pub fn range_inclusive(start: Page, r#final: Page) -> PageIter { + PageIter { start, end: r#final.next() } + } + pub fn range_exclusive(start: Page, end: Page) -> PageIter { PageIter { start, end } } @@ -406,7 +409,7 @@ impl Iterator for PageIter { type Item = Page; fn next(&mut self) -> Option { - if self.start <= self.end { + if self.start < self.end { let page = self.start; self.start = self.start.next(); Some(page) diff --git a/src/context/context.rs b/src/context/context.rs index 99a8e7f..276626c 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -226,12 +226,6 @@ pub struct Context { pub ksig: Option<(arch::Context, Option>, Option>, u8)>, /// Restore ksig context on next switch pub ksig_restore: bool, - /// Executable image - pub image: Vec, - /// User stack - pub stack: Option, - /// User signal stack - pub sigstack: Option, /// User grants pub grants: Arc>, /// The name of the context @@ -338,9 +332,6 @@ impl Context { kstack: None, ksig: None, ksig_restore: false, - image: Vec::new(), - stack: None, - sigstack: None, grants: Arc::new(RwLock::new(UserGrants::default())), name: Arc::new(RwLock::new(String::new().into_boxed_str())), cwd: Arc::new(RwLock::new(String::new())), diff --git a/src/context/memory.rs b/src/context/memory.rs index 209d908..65fa3f4 100644 --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -77,7 +77,8 @@ impl UserGrants { // Get last used region let last = self.inner.iter().next_back().map(Region::from).unwrap_or(Region::new(VirtualAddress::new(0), 0)); // At the earliest, start at grant offset - let address = cmp::max(last.end_address().data(), crate::USER_GRANT_OFFSET); + // TODO + let address = last.start_address().data() - size; // Create new region Region::new(VirtualAddress::new(address), size) } @@ -224,7 +225,7 @@ impl Region { pub fn pages(&self) -> PageIter { Page::range_inclusive( Page::containing_address(self.start_address()), - Page::containing_address(self.end_address()) + Page::containing_address(self.final_address()) ) } diff --git a/src/lib.rs b/src/lib.rs index b174389..5dcbcc8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -172,33 +172,27 @@ static mut INIT_ENV: &[u8] = &[]; /// Initialize userspace by running the initfs:bin/init process /// This function will also set the CWD to initfs:bin and open debug: as stdio pub extern fn userspace_init() { - let path = "initfs:/bin/init"; - let env = unsafe { INIT_ENV }; + let path = "initfs:/bin/bootstrap"; if let Err(err) = syscall::chdir("initfs:") { info!("Failed to enter initfs ({}).", err); panic!("Unexpected error while trying to enter initfs:."); } - assert_eq!(syscall::open("debug:", syscall::flag::O_RDONLY).map(FileHandle::into), Ok(0)); - assert_eq!(syscall::open("debug:", syscall::flag::O_WRONLY).map(FileHandle::into), Ok(1)); - assert_eq!(syscall::open("debug:", syscall::flag::O_WRONLY).map(FileHandle::into), Ok(2)); - let fd = syscall::open(path, syscall::flag::O_RDONLY).expect("failed to open init"); - let mut args = Vec::new(); - args.push(path.as_bytes().to_vec().into_boxed_slice()); + let mut total_bytes_read = 0; + let mut data = Vec::new(); - let mut vars = Vec::new(); - for var in env.split(|b| *b == b'\n') { - if ! var.is_empty() { - vars.push(var.to_vec().into_boxed_slice()); - } + loop { + data.resize(total_bytes_read + 4096, 0); + let bytes_read = syscall::file_op_mut_slice(syscall::number::SYS_READ, fd, &mut data[total_bytes_read..]).expect("failed to read init"); + if bytes_read == 0 { break } + total_bytes_read += bytes_read; } + let _ = syscall::close(fd); - syscall::fexec_kernel(fd, args.into_boxed_slice(), vars.into_boxed_slice(), None, None).expect("failed to execute init"); - - panic!("init returned"); + crate::syscall::process::usermode_bootstrap(data.into_boxed_slice()); } /// This is the kernel entry point for the primary CPU. The arch crate is responsible for calling this diff --git a/src/scheme/memory.rs b/src/scheme/memory.rs index 2636ebf..cac3743 100644 --- a/src/scheme/memory.rs +++ b/src/scheme/memory.rs @@ -17,34 +17,33 @@ impl MemoryScheme { pub fn fmap_anonymous(map: &Map) -> Result { //TODO: Abstract with other grant creation if map.size == 0 { - Ok(0) - } else { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); + return Ok(0); + } + let contexts = context::contexts(); + let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; + let context = context_lock.read(); - let mut grants = context.grants.write(); + let mut grants = context.grants.write(); - let region = grants.find_free_at(VirtualAddress::new(map.address), map.size, map.flags)?.round(); + let region = grants.find_free_at(VirtualAddress::new(map.address), map.size, map.flags)?.round(); - { - // Make sure it's *absolutely* not mapped already - // TODO: Keep track of all allocated memory so this isn't necessary + { + // Make sure it's *absolutely* not mapped already + // TODO: Keep track of all allocated memory so this isn't necessary - let active_table = unsafe { ActivePageTable::new(VirtualAddress::new(map.address).kind()) }; + let active_table = unsafe { ActivePageTable::new(rmm::TableKind::User) }; - for page in region.pages() { - if active_table.translate_page(page).is_some() { - println!("page at {:#x} was already mapped", page.start_address().data()); - return Err(Error::new(EEXIST)) - } + for page in region.pages() { + if active_table.translate_page(page).is_some() { + println!("page at {:#x} was already mapped", page.start_address().data()); + return Err(Error::new(EEXIST)) } } - - grants.insert(Grant::map(region.start_address(), region.size(), page_flags(map.flags))); - - Ok(region.start_address().data()) } + + grants.insert(Grant::map(region.start_address(), region.size(), page_flags(map.flags))); + + Ok(region.start_address().data()) } } impl Scheme for MemoryScheme { diff --git a/src/scheme/sys/context.rs b/src/scheme/sys/context.rs index 3002f55..1a776a2 100644 --- a/src/scheme/sys/context.rs +++ b/src/scheme/sys/context.rs @@ -26,10 +26,12 @@ pub fn resource() -> Result> { let context = context_lock.read(); let mut stat_string = String::new(); - if context.stack.is_some() { - stat_string.push('U'); - } else { + // TODO: All user programs must have some grant in order for executable memory to even + // exist, but is this a good indicator of whether it is user or kernel? + if context.grants.read().is_empty() { stat_string.push('K'); + } else { + stat_string.push('U'); } match context.status { context::Status::Runnable => { @@ -77,19 +79,6 @@ pub fn resource() -> Result> { if let Some(ref kstack) = context.kstack { memory += kstack.len(); } - for shared_mem in context.image.iter() { - shared_mem.with(|mem| { - memory += mem.size(); - }); - } - if let Some(ref stack) = context.stack { - stack.with(|stack| { - memory += stack.size(); - }); - } - if let Some(ref sigstack) = context.sigstack { - memory += sigstack.size(); - } for grant in context.grants.read().iter() { if grant.is_owned() { memory += grant.size(); diff --git a/src/syscall/debug.rs b/src/syscall/debug.rs index 4567058..5f60785 100644 --- a/src/syscall/debug.rs +++ b/src/syscall/debug.rs @@ -192,8 +192,8 @@ pub fn format_call(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize) - b ), //TODO: Cleanup, do not allocate - SYS_FEXEC => format!( - "fexec({}, {:?}, {:?})", + /*SYS_EXEC => format!( + "exec({}, {:?}, {:?})", b, validate_slice( c as *const [usize; 2], @@ -213,7 +213,7 @@ pub fn format_call(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize) - .and_then(|s| ::core::str::from_utf8(s).ok()) ).collect::>>() }) - ), + ),*/ SYS_FUTEX => format!( "futex({:#X} [{:?}], {}, {}, {}, {})", b, diff --git a/src/syscall/mod.rs b/src/syscall/mod.rs index cb64c4c..f4e7386 100644 --- a/src/syscall/mod.rs +++ b/src/syscall/mod.rs @@ -83,7 +83,6 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u SYS_DUP => dup(fd, validate_slice(c as *const u8, d)?).map(FileHandle::into), SYS_DUP2 => dup2(fd, FileHandle::from(c), validate_slice(d as *const u8, e)?).map(FileHandle::into), SYS_FCNTL => fcntl(fd, c, d), - SYS_FEXEC => fexec(fd, validate_slice(c as *const [usize; 2], d)?, validate_slice(e as *const [usize; 2], f)?), SYS_FRENAME => frename(fd, validate_str(c as *const u8, d)?), SYS_FUNMAP => funmap(b, c), SYS_FMAP_OLD => { @@ -210,13 +209,12 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u } } - /* let debug = { let contexts = crate::context::contexts(); if let Some(context_lock) = contexts.current() { let context = context_lock.read(); let name = context.name.read(); - if name.contains("redoxfs") { + if true || name.contains("redoxfs") { if a == SYS_CLOCK_GETTIME || a == SYS_YIELD { false } else if (a == SYS_WRITE || a == SYS_FSYNC) && (b == 1 || b == 2) { @@ -241,7 +239,6 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u println!("{}", debug::format_call(a, b, c, d, e, f)); } - */ // The next lines set the current syscall in the context struct, then once the inner() function // completes, we set the current syscall to none. @@ -266,7 +263,6 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u } } - /* if debug { let contexts = crate::context::contexts(); if let Some(context_lock) = contexts.current() { @@ -285,7 +281,6 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u } } } - */ // errormux turns Result into -errno Error::mux(result) diff --git a/src/syscall/process.rs b/src/syscall/process.rs index c246568..11fd16b 100644 --- a/src/syscall/process.rs +++ b/src/syscall/process.rs @@ -6,6 +6,7 @@ use alloc::{ vec::Vec, }; use core::alloc::{GlobalAlloc, Layout}; +use core::convert::TryFrom; use core::ops::DerefMut; use core::{intrinsics, mem, str}; use spin::{RwLock, RwLockWriteGuard}; @@ -53,9 +54,6 @@ pub fn clone(flags: CloneFlags, stack_base: usize) -> Result { let mut kfx_opt = None; let mut kstack_opt = None; let mut offset = 0; - let mut image = vec![]; - let mut stack_opt = None; - let mut sigstack_opt = None; let mut grants; let name; let cwd; @@ -143,74 +141,6 @@ pub fn clone(flags: CloneFlags, stack_base: usize) -> Result { } } - if flags.contains(CLONE_VM) { - for memory_shared in context.image.iter() { - image.push(memory_shared.clone()); - } - } else { - for memory_shared in context.image.iter() { - memory_shared.with(|memory| { - let mut new_memory = context::memory::Memory::new( - VirtualAddress::new(memory.start_address().data() + crate::USER_TMP_OFFSET), - memory.size(), - PageFlags::new().write(true), - false - ); - - unsafe { - intrinsics::copy(memory.start_address().data() as *const u8, - new_memory.start_address().data() as *mut u8, - memory.size()); - } - - new_memory.remap(memory.flags()); - image.push(new_memory.to_shared()); - }); - } - } - - if let Some(ref stack_shared) = context.stack { - if flags.contains(CLONE_STACK) { - stack_opt = Some(stack_shared.clone()); - } else { - stack_shared.with(|stack| { - let mut new_stack = context::memory::Memory::new( - VirtualAddress::new(crate::USER_TMP_STACK_OFFSET), - stack.size(), - PageFlags::new().write(true), - false - ); - - unsafe { - intrinsics::copy(stack.start_address().data() as *const u8, - new_stack.start_address().data() as *mut u8, - stack.size()); - } - - new_stack.remap(stack.flags()); - stack_opt = Some(new_stack.to_shared()); - }); - } - } - - if let Some(ref sigstack) = context.sigstack { - let mut new_sigstack = context::memory::Memory::new( - VirtualAddress::new(crate::USER_TMP_SIGSTACK_OFFSET), - sigstack.size(), - PageFlags::new().write(true), - false - ); - - unsafe { - intrinsics::copy(sigstack.start_address().data() as *const u8, - new_sigstack.start_address().data() as *mut u8, - sigstack.size()); - } - - new_sigstack.remap(sigstack.flags()); - sigstack_opt = Some(new_sigstack); - } - if flags.contains(CLONE_VM) { grants = Arc::clone(&context.grants); } else { @@ -438,70 +368,6 @@ pub fn clone(flags: CloneFlags, stack_base: usize) -> Result { // TODO: Clone ksig? - // Setup image, heap, and grants - if flags.contains(CLONE_VM) { - // Copy user image mapping, if found - if ! image.is_empty() { - let frame = active_utable.p4()[crate::USER_PML4].pointed_frame().expect("user image not mapped"); - let flags = active_utable.p4()[crate::USER_PML4].flags(); - - new_utable.mapper().p4_mut()[crate::USER_PML4].set(frame, flags); - } - context.image = image; - - // Copy grant mapping - if ! grants.read().is_empty() { - let frame = active_utable.p4()[crate::USER_GRANT_PML4].pointed_frame().expect("user grants not mapped"); - let flags = active_utable.p4()[crate::USER_GRANT_PML4].flags(); - - new_utable.mapper().p4_mut()[crate::USER_GRANT_PML4].set(frame, flags); - } - context.grants = grants; - } else { - // Move copy of image - for memory_shared in image.iter_mut() { - memory_shared.with(|memory| { - let start = VirtualAddress::new(memory.start_address().data() - crate::USER_TMP_OFFSET + crate::USER_OFFSET); - memory.move_to(start, &mut new_utable); - }); - } - context.image = image; - - // Move grants - { - let mut grants = grants.write(); - let old_grants = mem::replace(&mut *grants, UserGrants::default()); - - for mut grant in old_grants.inner.into_iter() { - let start = VirtualAddress::new(grant.start_address().data() + crate::USER_GRANT_OFFSET - crate::USER_TMP_GRANT_OFFSET); - grant.move_to(start, &mut new_utable); - grants.insert(grant); - } - } - context.grants = grants; - } - - // Setup user stack - if let Some(stack_shared) = stack_opt { - if flags.contains(CLONE_STACK) { - let frame = active_utable.p4()[crate::USER_STACK_PML4].pointed_frame().expect("user stack not mapped"); - let flags = active_utable.p4()[crate::USER_STACK_PML4].flags(); - - new_utable.mapper().p4_mut()[crate::USER_STACK_PML4].set(frame, flags); - } else { - stack_shared.with(|stack| { - stack.move_to(VirtualAddress::new(crate::USER_STACK_OFFSET), &mut new_utable); - }); - } - context.stack = Some(stack_shared); - } - - // Setup user sigstack - if let Some(mut sigstack) = sigstack_opt { - sigstack.move_to(VirtualAddress::new(crate::USER_SIGSTACK_OFFSET), &mut new_utable); - context.sigstack = Some(sigstack); - } - #[cfg(target_arch = "aarch64")] { if let Some(stack) = &mut context.kstack { @@ -553,18 +419,6 @@ pub fn clone(flags: CloneFlags, stack_base: usize) -> Result { } fn empty<'lock>(context_lock: &'lock RwLock, mut context: RwLockWriteGuard<'lock, Context>, reaping: bool) -> RwLockWriteGuard<'lock, Context> { - if reaping { - // Memory should already be unmapped - assert!(context.image.is_empty()); - assert!(context.stack.is_none()); - assert!(context.sigstack.is_none()); - } else { - // Unmap previous image, heap, grants, stack - context.image.clear(); - drop(context.stack.take()); - drop(context.sigstack.take()); - } - // NOTE: If we do not replace the grants `Arc`, then a strange situation can appear where the // main thread and another thread exit simultaneously before either one is reaped. If that // happens, then the last context that runs exit will think that there is still are still @@ -580,9 +434,7 @@ fn empty<'lock>(context_lock: &'lock RwLock, mut context: RwLockWriteGu let mut grants_arc = mem::take(&mut context.grants); if let Some(grants_lock_mut) = Arc::get_mut(&mut grants_arc) { - // TODO: Use get_mut to bypass the need to acquire a lock when there we already have an - // exclusive reference from `Arc::get_mut`. This will require updating `spin`. - let mut grants_guard = grants_lock_mut.write(); + let mut grants_guard = grants_lock_mut.get_mut(); let grants = mem::replace(&mut *grants_guard, UserGrants::default()); for grant in grants.inner.into_iter() { @@ -616,450 +468,6 @@ impl Drop for ExecFile { } } -#[allow(clippy::too_many_arguments)] -fn fexec_noreturn( - setuid: Option, - setgid: Option, - name: Box, - data: Box<[u8]>, - phdr_grant: context::memory::Grant, - args: Box<[Box<[u8]>]>, - vars: Box<[Box<[u8]>]>, - auxv: Box<[usize]>, -) -> ! { - let entry; - let singlestep; - let mut sp = crate::USER_STACK_OFFSET + crate::USER_STACK_SIZE - 256; - - { - let (vfork, ppid, files) = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH)).expect("exec_noreturn pid not found"); - let mut context = context_lock.write(); - - singlestep = unsafe { - ptrace::regs_for(&context).map(|s| s.is_singlestep()).unwrap_or(false) - }; - - context.name = Arc::new(RwLock::new(name)); - - context = empty(&context_lock, context, false); - - context.grants.write().insert(phdr_grant); - - #[cfg(all(target_arch = "x86_64"))] - { - context.arch.fsbase = 0; - context.arch.gsbase = 0; - - #[cfg(feature = "x86_fsgsbase")] - unsafe { - x86::bits64::segmentation::wrfsbase(0); - x86::bits64::segmentation::swapgs(); - x86::bits64::segmentation::wrgsbase(0); - x86::bits64::segmentation::swapgs(); - } - #[cfg(not(feature = "x86_fsgsbase"))] - unsafe { - x86::msr::wrmsr(x86::msr::IA32_FS_BASE, 0); - x86::msr::wrmsr(x86::msr::IA32_KERNEL_GSBASE, 0); - } - } - - if let Some(uid) = setuid { - context.euid = uid; - } - - if let Some(gid) = setgid { - context.egid = gid; - } - - // Map and copy new segments - { - let elf = elf::Elf::from(&data).unwrap(); - entry = elf.entry(); - - for segment in elf.segments() { - match segment.p_type { - program_header::PT_LOAD => { - let voff = segment.p_vaddr as usize % PAGE_SIZE; - let vaddr = segment.p_vaddr as usize - voff; - - let mut memory = context::memory::Memory::new( - VirtualAddress::new(vaddr), - segment.p_memsz as usize + voff, - PageFlags::new().write(true), - true - ); - - unsafe { - // Copy file data - intrinsics::copy((elf.data.as_ptr() as usize + segment.p_offset as usize) as *const u8, - segment.p_vaddr as *mut u8, - segment.p_filesz as usize); - } - - let mut flags = PageFlags::new().user(true); - - // W ^ X. If it is executable, do not allow it to be writable, even if requested - if segment.p_flags & program_header::PF_X == program_header::PF_X { - flags = flags.execute(true); - } else if segment.p_flags & program_header::PF_W == program_header::PF_W { - flags = flags.write(true); - } - - memory.remap(flags); - - context.image.push(memory.to_shared()); - }, - _ => (), - } - } - } - - // Map stack - context.stack = Some(context::memory::Memory::new( - VirtualAddress::new(crate::USER_STACK_OFFSET), - crate::USER_STACK_SIZE, - PageFlags::new().write(true).user(true), - true - ).to_shared()); - - // Map stack - context.sigstack = Some(context::memory::Memory::new( - VirtualAddress::new(crate::USER_SIGSTACK_OFFSET), - crate::USER_SIGSTACK_SIZE, - PageFlags::new().write(true).user(true), - true - )); - - // Data no longer required, can deallocate - drop(data); - - let mut push = |arg| { - sp -= mem::size_of::(); - unsafe { *(sp as *mut usize) = arg; } - }; - - // Push auxiliary vector - push(AT_NULL); - for &arg in auxv.iter().rev() { - push(arg); - } - - drop(auxv); // no longer required - - let mut arg_size = 0; - - // Push environment variables and arguments - for iter in &[&vars, &args] { - // Push null-terminator - push(0); - - // Push pointer to content - for arg in iter.iter().rev() { - push(crate::USER_ARG_OFFSET + arg_size); - arg_size += arg.len() + 1; - } - } - - // For some reason, Linux pushes the argument count here (in - // addition to being null-terminated), but not the environment - // variable count. - // TODO: Push more counts? Less? Stop having null-termination? - push(args.len()); - - // Write environment and argument pointers to USER_ARG_OFFSET - if arg_size > 0 { - let mut memory = context::memory::Memory::new( - VirtualAddress::new(crate::USER_ARG_OFFSET), - arg_size, - PageFlags::new().write(true), - true - ); - - let mut arg_offset = 0; - for arg in vars.iter().rev().chain(args.iter().rev()) { - unsafe { - intrinsics::copy(arg.as_ptr(), - (crate::USER_ARG_OFFSET + arg_offset) as *mut u8, - arg.len()); - } - arg_offset += arg.len(); - - unsafe { - *((crate::USER_ARG_OFFSET + arg_offset) as *mut u8) = 0; - } - arg_offset += 1; - } - - memory.remap(PageFlags::new().user(true)); - - context.image.push(memory.to_shared()); - } - - // Args and vars no longer required, can deallocate - drop(args); - drop(vars); - - context.actions = Arc::new(RwLock::new(vec![( - SigAction { - sa_handler: unsafe { mem::transmute(SIG_DFL) }, - sa_mask: [0; 2], - sa_flags: SigActionFlags::empty(), - }, - 0 - ); 128])); - - let vfork = context.vfork; - context.vfork = false; - - let files = Arc::clone(&context.files); - - (vfork, context.ppid, files) - }; - - for (_fd, file_opt) in files.write().iter_mut().enumerate() { - let mut cloexec = false; - if let Some(ref file) = *file_opt { - if file.cloexec { - cloexec = true; - } - } - - if cloexec { - let _ = file_opt.take().unwrap().close(); - } - } - - if vfork { - let contexts = context::contexts(); - if let Some(context_lock) = contexts.get(ppid) { - let mut context = context_lock.write(); - if ! context.unblock() { - println!("{} not blocked for exec vfork unblock", ppid.into()); - } - } else { - println!("{} not found for exec vfork unblock", ppid.into()); - } - } - } - - // Go to usermode - unsafe { usermode(entry, sp, 0, usize::from(singlestep)) } -} - -pub fn fexec_kernel(fd: FileHandle, args: Box<[Box<[u8]>]>, vars: Box<[Box<[u8]>]>, name_override_opt: Option>, auxv: Option<(Vec, context::memory::Grant)>) -> Result { - let (uid, gid) = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - (context.euid, context.egid) - }; - - let mut stat: Stat; - let name: String; - let mut data: Vec; - { - let file = ExecFile(fd); - - stat = Stat::default(); - syscall::file_op_mut_slice(syscall::number::SYS_FSTAT, file.0, &mut stat)?; - - let mut perm = stat.st_mode & 0o7; - if stat.st_uid == uid { - perm |= (stat.st_mode >> 6) & 0o7; - } - if stat.st_gid == gid { - perm |= (stat.st_mode >> 3) & 0o7; - } - if uid == 0 { - perm |= 0o7; - } - - if perm & 0o1 != 0o1 { - return Err(Error::new(EACCES)); - } - - if let Some(name_override) = name_override_opt { - name = String::from(name_override); - } else { - let mut name_bytes = vec![0; 4096]; - let len = syscall::file_op_mut_slice(syscall::number::SYS_FPATH, file.0, &mut name_bytes)?; - name_bytes.truncate(len); - name = match String::from_utf8(name_bytes) { - Ok(ok) => ok, - Err(_err) => { - //TODO: print error? - return Err(Error::new(EINVAL)); - } - }; - } - - //TODO: Only read elf header, not entire file. Then read required segments - data = vec![0; stat.st_size as usize]; - syscall::file_op_mut_slice(syscall::number::SYS_READ, file.0, &mut data)?; - drop(file); - } - - // Set UID and GID are determined after resolving any hashbangs - let setuid = if stat.st_mode & syscall::flag::MODE_SETUID == syscall::flag::MODE_SETUID { - Some(stat.st_uid) - } else { - None - }; - - let setgid = if stat.st_mode & syscall::flag::MODE_SETGID == syscall::flag::MODE_SETGID { - Some(stat.st_gid) - } else { - None - }; - - // The argument list is limited to avoid using too much userspace stack - // This check is done last to allow all hashbangs to be resolved - // - // This should be based on the size of the userspace stack, divided - // by the cost of each argument, which should be usize * 2, with - // one additional argument added to represent the total size of the - // argument pointer array and potential padding - // - // A limit of 4095 would mean a stack of (4095 + 1) * 8 * 2 = 65536, or 64KB - if (args.len() + vars.len()) > 4095 { - return Err(Error::new(E2BIG)); - } - - let elf = match elf::Elf::from(&data) { - Ok(elf) => elf, - Err(err) => { - let contexts = context::contexts(); - if let Some(context_lock) = contexts.current() { - let context = context_lock.read(); - println!( - "{}: {}: fexec failed to execute {}: {}", - context.id.into(), - *context.name.read(), - fd.into(), - err - ); - } - return Err(Error::new(ENOEXEC)); - } - }; - - // `fexec_kernel` can recurse if an interpreter is found. We get the - // auxiliary vector from the first invocation, which is passed via an - // argument, or if this is the first one we create it. - let (auxv, phdr_grant) = if let Some((auxv, phdr_grant)) = auxv { - (auxv, phdr_grant) - } else { - let phdr_grant = match context::contexts().current().ok_or(Error::new(ESRCH))?.read().grants.write() { - grants => { - let size = elf.program_headers_size() * elf.program_header_count(); - let aligned_size = (size + PAGE_SIZE - 1) / PAGE_SIZE * PAGE_SIZE; - - if aligned_size > MAX_PHDRS_SIZE { - return Err(Error::new(ENOMEM)); - } - - let phdrs_region = grants.find_free(aligned_size); - let grant = context::memory::Grant::map(phdrs_region.start_address(), aligned_size, PageFlags::new().write(true).user(true)); - - unsafe { - let dst = core::slice::from_raw_parts_mut(grant.start_address().data() as *mut u8, aligned_size); - dst[..size].copy_from_slice(&data[elf.program_headers()..elf.program_headers() + elf.program_headers_size() * elf.program_header_count()]); - } - - grant - } - }; - let mut auxv = Vec::with_capacity(3); - - auxv.push(AT_ENTRY); - auxv.push(elf.entry()); - auxv.push(AT_PHDR); - auxv.push(phdr_grant.start_address().data()); - auxv.push(AT_PHENT); - auxv.push(elf.program_headers_size()); - auxv.push(AT_PHNUM); - auxv.push(elf.program_header_count()); - - (auxv, phdr_grant) - }; - - // We check the validity of all loadable sections here - for segment in elf.segments() { - match segment.p_type { - program_header::PT_INTERP => { - //TODO: length restraint, parse interp earlier - let mut interp = vec![0; segment.p_memsz as usize]; - unsafe { - intrinsics::copy((elf.data.as_ptr() as usize + segment.p_offset as usize) as *const u8, - interp.as_mut_ptr(), - segment.p_filesz as usize); - } - - let mut i = 0; - while i < interp.len() { - if interp[i] == 0 { - break; - } - i += 1; - } - interp.truncate(i); - - let interp_str = str::from_utf8(&interp).map_err(|_| Error::new(EINVAL))?; - - let interp_fd = super::fs::open(interp_str, super::flag::O_RDONLY | super::flag::O_CLOEXEC)?; - - let mut args_vec = Vec::from(args); - //TODO: pass file handle in auxv - let name_override = name.into_boxed_str(); - args_vec[0] = name_override.clone().into(); - - // Drop variables, since fexec_kernel probably won't return - drop(elf); - drop(interp); - - return fexec_kernel( - interp_fd, - args_vec.into_boxed_slice(), - vars, - Some(name_override), - Some((auxv, phdr_grant)), - ); - }, - _ => (), - } - } - - // This is the point of no return, quite literaly. Any checks for validity need - // to be done before, and appropriate errors returned. Otherwise, we have nothing - // to return to. - fexec_noreturn(setuid, setgid, name.into_boxed_str(), data.into_boxed_slice(), phdr_grant, args, vars, auxv.into_boxed_slice()); -} -const MAX_PHDRS_SIZE: usize = PAGE_SIZE; - -pub fn fexec(fd: FileHandle, arg_ptrs: &[[usize; 2]], var_ptrs: &[[usize; 2]]) -> Result { - let mut args = Vec::new(); - for arg_ptr in arg_ptrs { - let arg = validate_slice(arg_ptr[0] as *const u8, arg_ptr[1])?; - // Argument must be moved into kernel space before exec unmaps all memory - args.push(arg.to_vec().into_boxed_slice()); - } - - let mut vars = Vec::new(); - for var_ptr in var_ptrs { - let var = validate_slice(var_ptr[0] as *const u8, var_ptr[1])?; - // Argument must be moved into kernel space before exec unmaps all memory - vars.push(var.to_vec().into_boxed_slice()); - } - - // Neither arg_ptrs nor var_ptrs should be used after this point, the kernel - // now has owned copies in args and vars - - fexec_kernel(fd, args.into_boxed_slice(), vars.into_boxed_slice(), None, None) -} - pub fn exit(status: usize) -> ! { ptrace::breakpoint_callback(PTRACE_STOP_EXIT, Some(ptrace_event!(PTRACE_STOP_EXIT, status))); @@ -1629,3 +1037,29 @@ pub fn waitpid(pid: ContextId, status_ptr: usize, flags: WaitFlags) -> Result) -> ! { + assert!(!data.is_empty()); + + const LOAD_BASE: usize = 0; + let grant = context::memory::Grant::map(VirtualAddress::new(LOAD_BASE), data.len(), PageFlags::new().user(true).write(true).execute(true)); + + let mut active_table = unsafe { ActivePageTable::new(TableKind::User) }; + + for (index, page) in grant.pages().enumerate() { + let len = if data.len() - index * PAGE_SIZE < PAGE_SIZE { data.len() % PAGE_SIZE } else { PAGE_SIZE }; + let frame = active_table.translate_page(page).expect("expected mapped init memory to have a corresponding frame"); + unsafe { ((frame.start_address().data() + crate::KERNEL_OFFSET) as *mut u8).copy_from_nonoverlapping(data.as_ptr().add(index * PAGE_SIZE), len); } + } + context::contexts().current().expect("expected a context to exist when executing init").read().grants.write().insert(grant); + + drop(data); + + #[cfg(target_arch = "x86_64")] + unsafe { + let start = ((LOAD_BASE + 0x18) as *mut usize).read(); + // Start with the (probably) ELF executable loaded, without any stack the ability to load + // sections to arbitrary addresses. + crate::arch::start::usermode(start, 0, 0, 0); + } +} diff --git a/syscall b/syscall index 0c98fbd..d6af266 160000 --- a/syscall +++ b/syscall @@ -1 +1 @@ -Subproject commit 0c98fbd16212282aeb3db17c991472885a9b79be +Subproject commit d6af266119e7b4a3b0e9a04c63b3cfcfac94781a