From 788526a3b3586731023c5ee6b0bf5cdaae746828 Mon Sep 17 00:00:00 2001 From: Jeremy Soller Date: Tue, 2 Jul 2019 07:38:26 -0600 Subject: [PATCH] Bare-bones ptracing functionality Since even a very basic ptrace can be nice to have, I thought I would split the, perhaps rather big, ptrace project up in multiple PRs to make as few changes as necessary in each. This PR contains the initial registry modifying bits and only a very basic security measure. Letting this out to the community should be good for spotting bugs and maybe getting some hype ;) --- src/arch/x86_64/interrupt/exception.rs | 47 +++- src/arch/x86_64/interrupt/irq.rs | 21 +- src/arch/x86_64/interrupt/syscall.rs | 228 ++++++++-------- src/arch/x86_64/macros.rs | 46 ++++ src/common/mod.rs | 1 + src/common/unique.rs | 33 +++ src/context/context.rs | 16 +- src/context/switch.rs | 2 +- src/lib.rs | 3 + src/ptrace.rs | 208 +++++++++++++++ src/scheme/mod.rs | 24 +- src/scheme/proc.rs | 351 +++++++++++++++++++++++++ src/syscall/debug.rs | 39 +-- src/syscall/driver.rs | 3 +- src/syscall/process.rs | 38 ++- syscall | 2 +- 16 files changed, 881 insertions(+), 181 deletions(-) create mode 100644 src/common/unique.rs create mode 100644 src/ptrace.rs create mode 100644 src/scheme/proc.rs diff --git a/src/arch/x86_64/interrupt/exception.rs b/src/arch/x86_64/interrupt/exception.rs index 864a4c3..7d58efd 100644 --- a/src/arch/x86_64/interrupt/exception.rs +++ b/src/arch/x86_64/interrupt/exception.rs @@ -1,5 +1,10 @@ -use crate::interrupt::stack_trace; -use crate::syscall::flag::*; +use crate::{ + common::unique::Unique, + context, + interrupt::stack_trace, + ptrace, + syscall::flag::* +}; extern { fn ksignal(signal: usize); @@ -13,9 +18,41 @@ interrupt_stack_p!(divide_by_zero, stack, { }); interrupt_stack!(debug, stack, { - println!("Debug trap"); - stack.dump(); - ksignal(SIGTRAP); + match ptrace::breakpoint_callback_dryrun(true) { + Some(_) => { + { + let contexts = context::contexts(); + if let Some(context) = contexts.current() { + let mut context = context.write(); + if let Some(ref mut kstack) = context.kstack { + context.regs = Some((kstack.as_mut_ptr() as usize, Unique::new_unchecked(stack))); + } + } + } + + let had_singlestep = stack.iret.rflags & (1 << 8) == 1 << 8; + stack.set_singlestep(false); + if ptrace::breakpoint_callback(true).is_none() { + // There is no guarantee that this is Some(_) just + // because the dryrun is Some(_). So, if there wasn't + // *actually* any breakpoint, restore the trap flag. + stack.set_singlestep(had_singlestep); + } + + { + let contexts = context::contexts(); + if let Some(context) = contexts.current() { + let mut context = context.write(); + context.regs = None; + } + } + }, + None => { + println!("Debug trap"); + stack.dump(); + ksignal(SIGTRAP); + } + } }); interrupt_stack!(non_maskable, stack, { diff --git a/src/arch/x86_64/interrupt/irq.rs b/src/arch/x86_64/interrupt/irq.rs index 6ffeb8c..fbc5bb8 100644 --- a/src/arch/x86_64/interrupt/irq.rs +++ b/src/arch/x86_64/interrupt/irq.rs @@ -1,5 +1,6 @@ use core::sync::atomic::{AtomicUsize, Ordering}; +use crate::common::unique::Unique; use crate::context; use crate::context::timeout; use crate::device::pic; @@ -40,7 +41,7 @@ pub unsafe fn acknowledge(irq: usize) { } } -interrupt!(pit, { +interrupt_stack!(pit, stack, { // Saves CPU time by not sending IRQ event irq_trigger(0); const PIT_RATE: u64 = 2_250_286; @@ -61,7 +62,25 @@ interrupt!(pit, { timeout::trigger(); if PIT_TICKS.fetch_add(1, Ordering::SeqCst) >= 10 { + { + let contexts = crate::context::contexts(); + if let Some(context) = contexts.current() { + let mut context = context.write(); + // Make all registers available to e.g. the proc: + // scheme + if let Some(ref mut kstack) = context.kstack { + context.regs = Some((kstack.as_mut_ptr() as usize, Unique::new_unchecked(stack))); + } + } + } let _ = context::switch(); + { + let contexts = crate::context::contexts(); + if let Some(context) = contexts.current() { + let mut context = context.write(); + context.regs = None; + } + } } }); diff --git a/src/arch/x86_64/interrupt/syscall.rs b/src/arch/x86_64/interrupt/syscall.rs index 099904f..2a4d68e 100644 --- a/src/arch/x86_64/interrupt/syscall.rs +++ b/src/arch/x86_64/interrupt/syscall.rs @@ -1,25 +1,74 @@ +use crate::arch::macros::InterruptStack; use crate::arch::{gdt, pti}; -use crate::syscall; +use crate::common::unique::Unique; +use crate::{context, ptrace, syscall}; use x86::shared::msr; pub unsafe fn init() { msr::wrmsr(msr::IA32_STAR, ((gdt::GDT_KERNEL_CODE as u64) << 3) << 32); msr::wrmsr(msr::IA32_LSTAR, syscall_instruction as u64); - msr::wrmsr(msr::IA32_FMASK, 1 << 9); + msr::wrmsr(msr::IA32_FMASK, 0x0300); // Clear trap flag and interrupt enable msr::wrmsr(msr::IA32_KERNEL_GS_BASE, &gdt::TSS as *const _ as u64); let efer = msr::rdmsr(msr::IA32_EFER); msr::wrmsr(msr::IA32_EFER, efer | 1); } +// Not a function pointer because it somehow messes up the returning +// from clone() (via clone_ret()). Not sure what the problem is. +macro_rules! with_interrupt_stack { + (unsafe fn $wrapped:ident($stack:ident) -> usize $code:block) => { + /// Because of how clones work, we need a function that returns a + /// usize. Here, `inner` will be this function. The child process in a + /// clone will terminate this function with a 0 return value, and it + /// might also have updated the interrupt_stack pointer. + #[inline(never)] + unsafe fn $wrapped(stack: *mut SyscallStack) { + let stack = &mut *stack; + { + let contexts = context::contexts(); + if let Some(context) = contexts.current() { + let mut context = context.write(); + if let Some(ref mut kstack) = context.kstack { + context.regs = Some((kstack.as_mut_ptr() as usize, Unique::new_unchecked(&mut stack.interrupt_stack))); + } + } + } + + let is_sysemu = ptrace::breakpoint_callback(false); + if !is_sysemu.unwrap_or(false) { + // If not on a sysemu breakpoint + let $stack = &mut *stack; + $stack.interrupt_stack.scratch.rax = $code; + + if is_sysemu.is_some() { + // Only callback if there was a pre-syscall + // callback too. + ptrace::breakpoint_callback(false); + } + } + + { + let contexts = context::contexts(); + if let Some(context) = contexts.current() { + let mut context = context.write(); + context.regs = None; + } + } + } + } +} + #[naked] pub unsafe extern fn syscall_instruction() { - #[inline(never)] - unsafe fn inner(stack: &mut SyscallStack) -> usize { - let rbp; - asm!("" : "={rbp}"(rbp) : : : "intel", "volatile"); + with_interrupt_stack! { + unsafe fn inner(stack) -> usize { + let rbp; + asm!("" : "={rbp}"(rbp) : : : "intel", "volatile"); - syscall::syscall(stack.rax, stack.rdi, stack.rsi, stack.rdx, stack.r10, stack.r8, rbp, stack) + let scratch = &stack.interrupt_stack.scratch; + syscall::syscall(scratch.rax, scratch.rdi, scratch.rsi, scratch.rdx, scratch.r10, scratch.r8, rbp, stack) + } } // Yes, this is magic. No, you don't need to understand @@ -40,76 +89,12 @@ pub unsafe extern fn syscall_instruction() { : : "intel", "volatile"); - // Push scratch registers - asm!("push rax - push rbx - push rcx - push rdx - push rdi - push rsi - push r8 - push r9 - push r10 - push r11 - push fs - mov r11, 0x18 - mov fs, r11" - : : : : "intel", "volatile"); - - // Get reference to stack variables - let rsp: usize; - asm!("" : "={rsp}"(rsp) : : : "intel", "volatile"); - - // Map kernel - pti::map(); - - let a = inner(&mut *(rsp as *mut SyscallStack)); - - // Unmap kernel - pti::unmap(); - - asm!("" : : "{rax}"(a) : : "intel", "volatile"); - - // Interrupt return - asm!("pop fs - pop r11 - pop r10 - pop r9 - pop r8 - pop rsi - pop rdi - pop rdx - pop rcx - pop rbx - add rsp, 8 - iretq" - : : : : "intel", "volatile"); -} - -#[naked] -pub unsafe extern fn syscall() { - #[inline(never)] - unsafe fn inner(stack: &mut SyscallStack) -> usize { - let rbp; - asm!("" : "={rbp}"(rbp) : : : "intel", "volatile"); - - syscall::syscall(stack.rax, stack.rbx, stack.rcx, stack.rdx, stack.rsi, stack.rdi, rbp, stack) - } - // Push scratch registers - asm!("push rax - push rbx - push rcx - push rdx - push rdi - push rsi - push r8 - push r9 - push r10 - push r11 - push fs + scratch_push!(); + asm!("push fs mov r11, 0x18 - mov fs, r11" + mov fs, r11 + push rbx" : : : : "intel", "volatile"); // Get reference to stack variables @@ -119,56 +104,81 @@ pub unsafe extern fn syscall() { // Map kernel pti::map(); - let a = inner(&mut *(rsp as *mut SyscallStack)); + inner(rsp as *mut SyscallStack); // Unmap kernel pti::unmap(); - asm!("" : : "{rax}"(a) : : "intel", "volatile"); + // Interrupt return + asm!("pop rbx + pop fs" + : : : : "intel", "volatile"); + scratch_pop!(); + asm!("iretq" : : : : "intel", "volatile"); +} + +#[naked] +pub unsafe extern fn syscall() { + with_interrupt_stack! { + unsafe fn inner(stack) -> usize { + let rbp; + asm!("" : "={rbp}"(rbp) : : : "intel", "volatile"); + + let scratch = &stack.interrupt_stack.scratch; + syscall::syscall(scratch.rax, stack.rbx, scratch.rcx, scratch.rdx, scratch.rsi, scratch.rdi, rbp, stack) + } + } + + // Push scratch registers + scratch_push!(); + asm!("push fs + mov r11, 0x18 + mov fs, r11 + push rbx" + : : : : "intel", "volatile"); + + // Get reference to stack variables + let rsp: usize; + asm!("" : "={rsp}"(rsp) : : : "intel", "volatile"); + + // Map kernel + pti::map(); + + inner(rsp as *mut SyscallStack); + + // Unmap kernel + pti::unmap(); // Interrupt return - asm!("pop fs - pop r11 - pop r10 - pop r9 - pop r8 - pop rsi - pop rdi - pop rdx - pop rcx - pop rbx - add rsp, 8 - iretq" - : : : : "intel", "volatile"); + asm!("pop rbx + pop fs" + : : : : "intel", "volatile"); + scratch_pop!(); + asm!("iretq" : : : : "intel", "volatile"); } #[allow(dead_code)] #[repr(packed)] pub struct SyscallStack { - pub fs: usize, - pub r11: usize, - pub r10: usize, - pub r9: usize, - pub r8: usize, - pub rsi: usize, - pub rdi: usize, - pub rdx: usize, - pub rcx: usize, pub rbx: usize, - pub rax: usize, - pub rip: usize, - pub cs: usize, - pub rflags: usize, + pub interrupt_stack: InterruptStack, + // Will only be present if syscall is called from another ring pub rsp: usize, pub ss: usize, } #[naked] -pub unsafe extern fn clone_ret() { - asm!(" - pop rbp - xor rax, rax - " - : : : : "intel", "volatile"); +pub unsafe extern "C" fn clone_ret() { + // The C x86_64 ABI specifies that rbp is pushed to save the old + // call frame. Popping rbp means we're using the parent's call + // frame and thus will not only return from this function but also + // from the function above this one. + // When this is called, the stack should have been + // interrupt->inner->syscall->clone + // then changed to + // interrupt->inner->clone_ret->clone + // so this will return from "inner". + + asm!("pop rbp" : : : : "intel", "volatile"); } diff --git a/src/arch/x86_64/macros.rs b/src/arch/x86_64/macros.rs index d41ee93..da0b037 100644 --- a/src/arch/x86_64/macros.rs +++ b/src/arch/x86_64/macros.rs @@ -1,3 +1,5 @@ +use syscall::data::IntRegisters; + /// Print to console #[macro_export] macro_rules! print { @@ -204,6 +206,50 @@ impl InterruptStack { self.scratch.dump(); println!("FS: {:>016X}", { self.fs }); } + /// Saves all registers to a struct used by the proc: + /// scheme to read/write registers. + pub fn save(&self, all: &mut IntRegisters) { + all.fs = self.fs; + all.r11 = self.scratch.r11; + all.r10 = self.scratch.r10; + all.r9 = self.scratch.r9; + all.r8 = self.scratch.r8; + all.rsi = self.scratch.rsi; + all.rdi = self.scratch.rdi; + all.rdx = self.scratch.rdx; + all.rcx = self.scratch.rcx; + all.rax = self.scratch.rax; + all.rip = self.iret.rip; + all.cs = self.iret.cs; + all.eflags = self.iret.rflags; + } + /// Loads all registers from a struct used by the proc: + /// scheme to read/write registers. + pub fn load(&mut self, all: &IntRegisters) { + self.fs = all.fs; + self.scratch.r11 = all.r11; + self.scratch.r10 = all.r10; + self.scratch.r9 = all.r9; + self.scratch.r8 = all.r8; + self.scratch.rsi = all.rsi; + self.scratch.rdi = all.rdi; + self.scratch.rdx = all.rdx; + self.scratch.rcx = all.rcx; + self.scratch.rax = all.rax; + self.iret.rip = all.rip; + self.iret.cs = all.cs; + self.iret.rflags = all.eflags; + } + /// Enables the "Trap Flag" in the FLAGS register, causing the CPU + /// to send a Debug exception after the next instruction. This is + /// used for singlestep in the proc: scheme. + pub fn set_singlestep(&mut self, enabled: bool) { + if enabled { + self.iret.rflags |= 1 << 8; + } else { + self.iret.rflags &= !(1 << 8); + } + } } #[macro_export] diff --git a/src/common/mod.rs b/src/common/mod.rs index 29f6412..7ad826b 100644 --- a/src/common/mod.rs +++ b/src/common/mod.rs @@ -1,2 +1,3 @@ #[macro_use] pub mod int_like; +pub mod unique; diff --git a/src/common/unique.rs b/src/common/unique.rs new file mode 100644 index 0000000..214f5b5 --- /dev/null +++ b/src/common/unique.rs @@ -0,0 +1,33 @@ +use core::{fmt, ptr::NonNull}; + +/// A small wrapper around NonNull that is Send + Sync, which is +/// only correct if the pointer is never accessed from multiple +/// locations across threads. Which is always, if the pointer is +/// unique. +pub struct Unique(NonNull); + +impl Copy for Unique {} +impl Clone for Unique { + fn clone(&self) -> Self { + *self + } +} +unsafe impl Send for Unique {} +unsafe impl Sync for Unique {} + +impl Unique { + pub fn new(ptr: *mut T) -> Self { + Self(NonNull::new(ptr).unwrap()) + } + pub unsafe fn new_unchecked(ptr: *mut T) -> Self { + Self(NonNull::new_unchecked(ptr)) + } + pub fn as_ptr(&self) -> *mut T { + self.0.as_ptr() + } +} +impl fmt::Debug for Unique { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:?}", self.0) + } +} diff --git a/src/context/context.rs b/src/context/context.rs index c27f7de..269b4ee 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -7,15 +7,16 @@ use core::cmp::Ordering; use core::mem; use spin::Mutex; -use crate::arch::paging::PAGE_SIZE; +use crate::arch::{macros::InterruptStack, paging::PAGE_SIZE}; +use crate::common::unique::Unique; use crate::context::arch; use crate::context::file::FileDescriptor; use crate::context::memory::{Grant, Memory, SharedMemory, Tls}; use crate::ipi::{ipi, IpiKind, IpiTarget}; use crate::scheme::{SchemeNamespace, FileHandle}; +use crate::sync::WaitMap; use crate::syscall::data::SigAction; use crate::syscall::flag::SIG_DFL; -use crate::sync::WaitMap; /// Unique identifier for a context (i.e. `pid`). use ::core::sync::atomic::AtomicUsize; @@ -165,6 +166,15 @@ pub struct Context { pub files: Arc>>>, /// Signal actions pub actions: Arc>>, + /// The pointer to the user-space registers, saved after certain + /// interrupts. This pointer is somewhere inside kstack, and the + /// kstack address at the time of creation is the first element in + /// this tuple. + pub regs: Option<(usize, Unique)>, + /// A somewhat hacky way to initially stop a context when creating + /// a new instance of the proc: scheme, entirely separate from + /// signals or any other way to restart a process. + pub ptrace_stop: bool } impl Context { @@ -216,6 +226,8 @@ impl Context { }, 0 ); 128])), + regs: None, + ptrace_stop: false } } diff --git a/src/context/switch.rs b/src/context/switch.rs index 7d25365..cf55220 100644 --- a/src/context/switch.rs +++ b/src/context/switch.rs @@ -55,7 +55,7 @@ unsafe fn update(context: &mut Context, cpu_id: usize) { unsafe fn runnable(context: &Context, cpu_id: usize) -> bool { // Switch to context if it needs to run, is not currently running, and is owned by the current CPU - !context.running && context.status == Status::Runnable && context.cpu_id == Some(cpu_id) + !context.running && !context.ptrace_stop && context.status == Status::Runnable && context.cpu_id == Some(cpu_id) } /// Switch to the next context diff --git a/src/lib.rs b/src/lib.rs index 109ea39..24b265a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -93,6 +93,9 @@ pub mod memory; #[cfg(not(any(feature="doc", test)))] pub mod panic; +/// Process tracing +pub mod ptrace; + /// Schemes, filesystem handlers pub mod scheme; diff --git a/src/ptrace.rs b/src/ptrace.rs new file mode 100644 index 0000000..5f8cb27 --- /dev/null +++ b/src/ptrace.rs @@ -0,0 +1,208 @@ +use crate::{ + arch::macros::InterruptStack, + common::unique::Unique, + context::{self, Context, ContextId, Status}, + sync::WaitCondition +}; + +use alloc::{ + boxed::Box, + collections::BTreeMap, + sync::Arc +}; +use spin::{Once, RwLock, RwLockReadGuard, RwLockWriteGuard}; +use syscall::error::*; + +// ____ _ _ _ +// | __ ) _ __ ___ __ _| | ___ __ ___ (_)_ __ | |_ ___ +// | _ \| '__/ _ \/ _` | |/ / '_ \ / _ \| | '_ \| __/ __| +// | |_) | | | __/ (_| | <| |_) | (_) | | | | | |_\__ \ +// |____/|_| \___|\__,_|_|\_\ .__/ \___/|_|_| |_|\__|___/ +// |_| + +struct Handle { + tracee: Arc, + tracer: Arc, + reached: bool, + + sysemu: bool, + singlestep: bool +} + +static BREAKPOINTS: Once>> = Once::new(); + +fn init_breakpoints() -> RwLock> { + RwLock::new(BTreeMap::new()) +} +fn breakpoints() -> RwLockReadGuard<'static, BTreeMap> { + BREAKPOINTS.call_once(init_breakpoints).read() +} +fn breakpoints_mut() -> RwLockWriteGuard<'static, BTreeMap> { + BREAKPOINTS.call_once(init_breakpoints).write() +} + +fn inner_cont(pid: ContextId) -> Option { + // Remove the breakpoint to both save space and also make sure any + // yet unreached but obsolete breakpoints don't stop the program. + let handle = breakpoints_mut().remove(&pid)?; + handle.tracee.notify(); + Some(handle) +} + +/// Continue the process with the specified ID +pub fn cont(pid: ContextId) { + inner_cont(pid); +} + +/// Create a new breakpoint for the specified tracee, optionally with a sysemu flag +pub fn set_breakpoint(pid: ContextId, sysemu: bool, singlestep: bool) { + let (tracee, tracer) = match inner_cont(pid) { + Some(breakpoint) => (breakpoint.tracee, breakpoint.tracer), + None => ( + Arc::new(WaitCondition::new()), + Arc::new(WaitCondition::new()) + ) + }; + + breakpoints_mut().insert(pid, Handle { + tracee, + tracer, + reached: false, + sysemu, + singlestep + }); +} + +/// Wait for the tracee to stop. +/// Note: Don't call while holding any locks, this will switch contexts +pub fn wait_breakpoint(pid: ContextId) -> Result<()> { + let tracer = { + let breakpoints = breakpoints(); + match breakpoints.get(&pid) { + Some(breakpoint) if !breakpoint.reached => Arc::clone(&breakpoint.tracer), + _ => return Ok(()) + } + }; + while !tracer.wait() {} + + let contexts = context::contexts(); + let context = contexts.get(pid).ok_or(Error::new(ESRCH))?; + let context = context.read(); + if let Status::Exited(_) = context.status { + return Err(Error::new(ESRCH)); + } + Ok(()) +} + +/// Returns the same value as breakpoint_callback would do, but +/// doesn't actually perform the action. You should not rely too +/// heavily on this value, as the lock *is* released between this call +/// and another. +pub fn breakpoint_callback_dryrun(singlestep: bool) -> Option { + let contexts = context::contexts(); + let context = contexts.current()?; + let context = context.read(); + + let breakpoints = breakpoints(); + let breakpoint = breakpoints.get(&context.id)?; + if breakpoint.singlestep != singlestep { + return None; + } + Some(breakpoint.sysemu) +} + +/// Notify the tracer and await green flag to continue. +/// Note: Don't call while holding any locks, this will switch contexts +pub fn breakpoint_callback(singlestep: bool) -> Option { + // Can't hold any locks when executing wait() + let (tracee, sysemu) = { + let contexts = context::contexts(); + let context = contexts.current()?; + let context = context.read(); + + let mut breakpoints = breakpoints_mut(); + let breakpoint = breakpoints.get_mut(&context.id)?; + + // TODO: How should singlesteps interact with syscalls? How + // does Linux handle this? + + // if singlestep && !breakpoint.singlestep { + if breakpoint.singlestep != singlestep { + return None; + } + + breakpoint.tracer.notify(); + // In case no tracer is waiting, make sure the next one gets + // the memo + breakpoint.reached = true; + + ( + Arc::clone(&breakpoint.tracee), + breakpoint.sysemu + ) + }; + + while !tracee.wait() {} + + Some(sysemu) +} + +/// Call when a context is closed to alert any tracers +pub fn close(pid: ContextId) { + { + let breakpoints = breakpoints(); + if let Some(breakpoint) = breakpoints.get(&pid) { + breakpoint.tracer.notify(); + } + } + + breakpoints_mut().remove(&pid); +} + +// ____ _ _ +// | _ \ ___ __ _(_)___| |_ ___ _ __ ___ +// | |_) / _ \/ _` | / __| __/ _ \ '__/ __| +// | _ < __/ (_| | \__ \ || __/ | \__ \ +// |_| \_\___|\__, |_|___/\__\___|_| |___/ +// |___/ + +/// Return the InterruptStack pointer, but relative to the specified +/// stack instead of the original. +pub unsafe fn rebase_regs_ptr( + regs: Option<(usize, Unique)>, + kstack: Option<&Box<[u8]>> +) -> Option<*const InterruptStack> { + let (old_base, ptr) = regs?; + let new_base = kstack?.as_ptr() as usize; + Some((ptr.as_ptr() as usize - old_base + new_base) as *const _) +} +/// Return the InterruptStack pointer, but relative to the specified +/// stack instead of the original. +pub unsafe fn rebase_regs_ptr_mut( + regs: Option<(usize, Unique)>, + kstack: Option<&mut Box<[u8]>> +) -> Option<*mut InterruptStack> { + let (old_base, ptr) = regs?; + let new_base = kstack?.as_mut_ptr() as usize; + Some((ptr.as_ptr() as usize - old_base + new_base) as *mut _) +} + +/// Return a reference to the InterruptStack struct in memory. If the +/// kernel stack has been backed up by a signal handler, this instead +/// returns the struct inside that memory, as that will later be +/// restored and otherwise undo all your changes. See `update(...)` in +/// context/switch.rs. +pub unsafe fn regs_for(context: &Context) -> Option<&InterruptStack> { + Some(&*match context.ksig { + Some((_, _, ref kstack)) => rebase_regs_ptr(context.regs, kstack.as_ref())?, + None => context.regs?.1.as_ptr() + }) +} + +/// Mutable version of `regs_for` +pub unsafe fn regs_for_mut(context: &mut Context) -> Option<&mut InterruptStack> { + Some(&mut *match context.ksig { + Some((_, _, ref mut kstack)) => rebase_regs_ptr_mut(context.regs, kstack.as_mut())?, + None => context.regs?.1.as_ptr() + }) +} diff --git a/src/scheme/mod.rs b/src/scheme/mod.rs index 4cf9958..da5f5cd 100644 --- a/src/scheme/mod.rs +++ b/src/scheme/mod.rs @@ -22,6 +22,7 @@ use self::irq::IrqScheme; use self::itimer::ITimerScheme; use self::memory::MemoryScheme; use self::pipe::PipeScheme; +use self::proc::ProcScheme; use self::root::RootScheme; use self::sys::SysScheme; use self::time::TimeScheme; @@ -51,6 +52,9 @@ pub mod memory; /// `pipe:` - used internally by the kernel to implement `pipe` pub mod pipe; +/// `proc:` - allows tracing processes and reading/writing their memory +pub mod proc; + /// `:` - allows the creation of userspace schemes, tightly dependent on `user` pub mod root; @@ -128,29 +132,21 @@ impl SchemeList { } /// Initialize the root namespace - #[cfg(not(feature="live"))] fn new_root(&mut self) { // Do common namespace initialization let ns = self.new_ns(); - // Debug, Initfs and IRQ are only available in the root namespace. Pipe is special + // These schemes should only be available on the root self.insert(ns, Box::new(*b"debug"), |scheme_id| Arc::new(Box::new(DebugScheme::new(scheme_id)))).unwrap(); self.insert(ns, Box::new(*b"initfs"), |_| Arc::new(Box::new(InitFsScheme::new()))).unwrap(); self.insert(ns, Box::new(*b"irq"), |scheme_id| Arc::new(Box::new(IrqScheme::new(scheme_id)))).unwrap(); - self.insert(ns, Box::new(*b"pipe"), |scheme_id| Arc::new(Box::new(PipeScheme::new(scheme_id)))).unwrap(); - } + self.insert(ns, Box::new(*b"proc"), |_| Arc::new(Box::new(ProcScheme::new()))).unwrap(); - /// Initialize the root namespace - with live disk - #[cfg(feature="live")] - fn new_root(&mut self) { - // Do common namespace initialization - let ns = self.new_ns(); + #[cfg(feature = "live")] { + self.insert(ns, Box::new(*b"disk/live"), |_| Arc::new(Box::new(self::live::DiskScheme::new()))).unwrap(); + } - // Debug, Disk, Initfs and IRQ are only available in the root namespace. Pipe is special - self.insert(ns, Box::new(*b"debug"), |scheme_id| Arc::new(Box::new(DebugScheme::new(scheme_id)))).unwrap(); - self.insert(ns, Box::new(*b"disk/live"), |_| Arc::new(Box::new(self::live::DiskScheme::new()))).unwrap(); - self.insert(ns, Box::new(*b"initfs"), |_| Arc::new(Box::new(InitFsScheme::new()))).unwrap(); - self.insert(ns, Box::new(*b"irq"), |scheme_id| Arc::new(Box::new(IrqScheme::new(scheme_id)))).unwrap(); + // Pipe is special and needs to be in the root namespace self.insert(ns, Box::new(*b"pipe"), |scheme_id| Arc::new(Box::new(PipeScheme::new(scheme_id)))).unwrap(); } diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs new file mode 100644 index 0000000..6edd478 --- /dev/null +++ b/src/scheme/proc.rs @@ -0,0 +1,351 @@ +use crate::{ + context::{self, ContextId, Status}, + ptrace +}; + +use alloc::collections::{BTreeMap, BTreeSet}; +use core::{ + cmp, + mem, + slice, + sync::atomic::{AtomicUsize, Ordering} +}; +use spin::{Mutex, RwLock}; +use syscall::{ + data::{IntRegisters, FloatRegisters}, + error::*, + flag::*, + scheme::Scheme +}; + +#[derive(Clone, Copy)] +enum RegsKind { + Float, + Int +} +#[derive(Clone, Copy)] +enum Operation { + Memory, + Regs(RegsKind), + Trace +} + +#[derive(Clone, Copy)] +struct Handle { + flags: usize, + pid: ContextId, + operation: Operation +} + +pub struct ProcScheme { + next_id: AtomicUsize, + handles: RwLock>, + traced: Mutex> +} + +impl ProcScheme { + pub fn new() -> Self { + Self { + next_id: AtomicUsize::new(0), + handles: RwLock::new(BTreeMap::new()), + traced: Mutex::new(BTreeSet::new()) + } + } +} + +impl Scheme for ProcScheme { + fn open(&self, path: &[u8], flags: usize, uid: u32, gid: u32) -> Result { + let path = core::str::from_utf8(path).map_err(|_| Error::new(EINVAL))?; + let mut parts = path.splitn(2, '/'); + let pid = parts.next() + .and_then(|s| s.parse().ok()) + .map(ContextId::from) + .ok_or(Error::new(EINVAL))?; + let operation = match parts.next() { + Some("mem") => Operation::Memory, + Some("regs/float") => Operation::Regs(RegsKind::Float), + Some("regs/int") => Operation::Regs(RegsKind::Int), + Some("trace") => Operation::Trace, + _ => return Err(Error::new(EINVAL)) + }; + + let contexts = context::contexts(); + let context = contexts.get(pid).ok_or(Error::new(ESRCH))?; + + { + // TODO: Put better security here? + + let context = context.read(); + if uid != 0 && gid != 0 + && uid != context.euid && gid != context.egid { + return Err(Error::new(EPERM)); + } + } + + if let Operation::Trace = operation { + let mut traced = self.traced.lock(); + + if traced.contains(&pid) { + return Err(Error::new(EBUSY)); + } + traced.insert(pid); + + let mut context = context.write(); + context.ptrace_stop = true; + } + + let id = self.next_id.fetch_add(1, Ordering::SeqCst); + self.handles.write().insert(id, Handle { + flags, + pid, + operation + }); + Ok(id) + } + + /// Using dup for `proc:` simply opens another operation on the same PID + /// ```rust,ignore + /// let trace = syscall::open("proc:1234/trace")?; + /// + /// // let regs = syscall::open("proc:1234/regs/int")?; + /// let regs = syscall::dup(trace, "regs/int")?; + /// ``` + fn dup(&self, old_id: usize, buf: &[u8]) -> Result { + let handle = { + let handles = self.handles.read(); + *handles.get(&old_id).ok_or(Error::new(EBADF))? + }; + let mut path = format!("{}/", handle.pid.into()).into_bytes(); + path.extend_from_slice(buf); + + let (uid, gid) = { + let contexts = context::contexts(); + let context = contexts.current().ok_or(Error::new(ESRCH))?; + let context = context.read(); + (context.euid, context.egid) + }; + + self.open(&path, handle.flags, uid, gid) + } + + fn read(&self, id: usize, buf: &mut [u8]) -> Result { + // Can't hold locks during the context switch later when + // waiting for a process to stop running. + let handle = { + let handles = self.handles.read(); + *handles.get(&id).ok_or(Error::new(EBADF))? + }; + + match handle.operation { + Operation::Memory => { + // let contexts = context::contexts(); + // let context = contexts.get(handle.pid).ok_or(Error::new(ESRCH))?; + // let context = context.read(); + + // for grant in &*context.grants.lock() { + // println!("Grant: {} -> {}", grant.start.get(), grant.size); + // } + // unimplemented!(); + return Err(Error::new(EBADF)); + }, + Operation::Regs(kind) => { + union Output { + _float: FloatRegisters, + int: IntRegisters + } + let mut first = true; + let (output, size) = loop { + if !first { + // We've tried this before, so lets wait before retrying + unsafe { context::switch(); } + } + first = false; + + let contexts = context::contexts(); + let context = contexts.get(handle.pid).ok_or(Error::new(ESRCH))?; + let context = context.read(); + + break match kind { + RegsKind::Float => { + // TODO!! + // (Output { float: FloatRegisters::default() }, mem::size_of::()) + return Err(Error::new(EBADF)); + }, + RegsKind::Int => match unsafe { ptrace::regs_for(&context) } { + None => { + // Another CPU is running this process, wait until it's stopped. + continue; + }, + Some(stack) => { + let mut regs = IntRegisters::default(); + + stack.save(&mut regs); + + (Output { int: regs }, mem::size_of::()) + } + } + }; + }; + + let bytes = unsafe { + slice::from_raw_parts(&output as *const _ as *const u8, mem::size_of::()) + }; + let len = cmp::min(buf.len(), size); + buf[..len].copy_from_slice(&bytes[..len]); + + Ok(len) + }, + Operation::Trace => Err(Error::new(EBADF)) + } + } + + fn write(&self, id: usize, buf: &[u8]) -> Result { + // Can't hold locks during the context switch later when + // waiting for a process to stop running. + let handle = { + let handles = self.handles.read(); + *handles.get(&id).ok_or(Error::new(EBADF))? + }; + + let mut first = true; + match handle.operation { + Operation::Memory => { + // unimplemented!() + return Err(Error::new(EBADF)); + }, + Operation::Regs(kind) => loop { + if !first { + // We've tried this before, so lets wait before retrying + unsafe { context::switch(); } + } + first = false; + + let contexts = context::contexts(); + let context = contexts.get(handle.pid).ok_or(Error::new(ESRCH))?; + let mut context = context.write(); + + break match kind { + RegsKind::Float => { + // TODO!! + unimplemented!(); + }, + RegsKind::Int => match unsafe { ptrace::regs_for_mut(&mut context) } { + None => { + // Another CPU is running this process, wait until it's stopped. + continue; + }, + Some(stack) => { + if buf.len() < mem::size_of::() { + return Ok(0); + } + let regs = unsafe { + *(buf as *const _ as *const IntRegisters) + }; + + stack.load(®s); + + Ok(mem::size_of::()) + } + } + }; + }, + Operation::Trace => { + if buf.len() < 1 { + return Ok(0); + } + let op = buf[0]; + let sysemu = op & PTRACE_SYSEMU == PTRACE_SYSEMU; + + let mut blocking = handle.flags & O_NONBLOCK != O_NONBLOCK; + let mut wait_breakpoint = false; + let mut singlestep = false; + + match op & PTRACE_OPERATIONMASK { + PTRACE_CONT => { ptrace::cont(handle.pid); }, + PTRACE_SYSCALL | PTRACE_SINGLESTEP => { // <- not a bitwise OR + singlestep = op & PTRACE_OPERATIONMASK == PTRACE_SINGLESTEP; + ptrace::set_breakpoint(handle.pid, sysemu, singlestep); + wait_breakpoint = true; + }, + PTRACE_WAIT => { + wait_breakpoint = true; + blocking = true; + }, + _ => return Err(Error::new(EINVAL)) + } + + let mut first = true; + loop { + if !first { + // We've tried this before, so lets wait before retrying + unsafe { context::switch(); } + } + first = false; + + let contexts = context::contexts(); + let context = contexts.get(handle.pid).ok_or(Error::new(ESRCH))?; + let mut context = context.write(); + if let Status::Exited(_) = context.status { + return Err(Error::new(ESRCH)); + } + + if singlestep { + match unsafe { ptrace::regs_for_mut(&mut context) } { + None => continue, + Some(stack) => stack.set_singlestep(true) + } + } + + context.ptrace_stop = false; + break; + } + + if wait_breakpoint && blocking { + ptrace::wait_breakpoint(handle.pid)?; + } + + Ok(1) + } + } + } + + fn fcntl(&self, id: usize, cmd: usize, arg: usize) -> Result { + let mut handles = self.handles.write(); + let mut handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; + + match cmd { + F_SETFL => { handle.flags = arg; Ok(0) }, + F_GETFL => return Ok(handle.flags), + _ => return Err(Error::new(EINVAL)) + } + } + + fn fpath(&self, id: usize, buf: &mut [u8]) -> Result { + let handles = self.handles.read(); + let handle = handles.get(&id).ok_or(Error::new(EBADF))?; + + let path = format!("proc:{}/{}", handle.pid.into(), match handle.operation { + Operation::Memory => "mem", + Operation::Regs(RegsKind::Float) => "regs/float", + Operation::Regs(RegsKind::Int) => "regs/int", + Operation::Trace => "trace" + }); + + let len = cmp::min(path.len(), buf.len()); + buf[..len].copy_from_slice(&path.as_bytes()[..len]); + + Ok(len) + } + + fn close(&self, id: usize) -> Result { + let handle = self.handles.write().remove(&id).ok_or(Error::new(EBADF))?; + ptrace::cont(handle.pid); + + let contexts = context::contexts(); + if let Some(context) = contexts.get(handle.pid) { + let mut context = context.write(); + context.ptrace_stop = false; + } + Ok(0) + } +} diff --git a/src/syscall/debug.rs b/src/syscall/debug.rs index 1499298..853380f 100644 --- a/src/syscall/debug.rs +++ b/src/syscall/debug.rs @@ -1,5 +1,4 @@ -use core::mem; -use core::ops::Range; +use core::{ascii, mem}; use alloc::string::String; use alloc::vec::Vec; @@ -8,47 +7,13 @@ use super::flag::*; use super::number::*; use super::validate::*; -// Copied from std -pub struct EscapeDefault { - range: Range, - data: [u8; 4], -} - -pub fn escape_default(c: u8) -> EscapeDefault { - let (data, len) = match c { - b'\t' => ([b'\\', b't', 0, 0], 2), - b'\r' => ([b'\\', b'r', 0, 0], 2), - b'\n' => ([b'\\', b'n', 0, 0], 2), - b'\\' => ([b'\\', b'\\', 0, 0], 2), - b'\'' => ([b'\\', b'\'', 0, 0], 2), - b'"' => ([b'\\', b'"', 0, 0], 2), - b'\x20' ... b'\x7e' => ([c, 0, 0, 0], 1), - _ => ([b'\\', b'x', hexify(c >> 4), hexify(c & 0xf)], 4), - }; - - return EscapeDefault { range: (0.. len), data: data }; - - fn hexify(b: u8) -> u8 { - match b { - 0 ... 9 => b'0' + b, - _ => b'a' + b - 10, - } - } -} - -impl Iterator for EscapeDefault { - type Item = u8; - fn next(&mut self) -> Option { self.range.next().map(|i| self.data[i]) } - fn size_hint(&self) -> (usize, Option) { self.range.size_hint() } -} - struct ByteStr<'a>(&'a[u8]); impl<'a> ::core::fmt::Debug for ByteStr<'a> { fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result { write!(f, "\"")?; for i in self.0 { - for ch in escape_default(*i) { + for ch in ascii::escape_default(*i) { write!(f, "{}", ch as char)?; } } diff --git a/src/syscall/driver.rs b/src/syscall/driver.rs index 5672706..360d3a4 100644 --- a/src/syscall/driver.rs +++ b/src/syscall/driver.rs @@ -25,7 +25,8 @@ pub fn iopl(level: usize, stack: &mut SyscallStack) -> Result { return Err(Error::new(EINVAL)); } - stack.rflags = (stack.rflags & !(3 << 12)) | ((level & 3) << 12); + let iret = &mut stack.interrupt_stack.iret; + iret.rflags = (iret.rflags & !(3 << 12)) | ((level & 3) << 12); Ok(0) } diff --git a/src/syscall/process.rs b/src/syscall/process.rs index d1a7298..7d07c41 100644 --- a/src/syscall/process.rs +++ b/src/syscall/process.rs @@ -6,21 +6,21 @@ use core::{intrinsics, mem}; use core::ops::DerefMut; use spin::Mutex; +use crate::context::file::FileDescriptor; +use crate::context::{ContextId, WaitpidKey}; +use crate::context; +#[cfg(not(feature="doc"))] +use crate::elf::{self, program_header}; +use crate::interrupt; +use crate::ipi::{ipi, IpiKind, IpiTarget}; use crate::memory::allocate_frames; -use crate::paging::{ActivePageTable, InactivePageTable, Page, VirtualAddress, PAGE_SIZE}; use crate::paging::entry::EntryFlags; use crate::paging::mapper::MapperFlushAll; use crate::paging::temporary_page::TemporaryPage; -use crate::start::usermode; -use crate::interrupt; -use crate::context; -use crate::context::{ContextId, WaitpidKey}; -use crate::context::file::FileDescriptor; -#[cfg(not(feature="doc"))] -use crate::elf::{self, program_header}; -use crate::ipi::{ipi, IpiKind, IpiTarget}; +use crate::paging::{ActivePageTable, InactivePageTable, Page, VirtualAddress, PAGE_SIZE}; +use crate::ptrace; use crate::scheme::FileHandle; -use crate::syscall; +use crate::start::usermode; use crate::syscall::data::{SigAction, Stat}; use crate::syscall::error::*; use crate::syscall::flag::{CLONE_VFORK, CLONE_VM, CLONE_FS, CLONE_FILES, CLONE_SIGHAND, CLONE_STACK, @@ -28,6 +28,7 @@ use crate::syscall::flag::{CLONE_VFORK, CLONE_VM, CLONE_FS, CLONE_FILES, CLONE_S SIG_DFL, SIG_BLOCK, SIG_UNBLOCK, SIG_SETMASK, SIGCONT, SIGTERM, WCONTINUED, WNOHANG, WUNTRACED, wifcontinued, wifstopped}; use crate::syscall::validate::{validate_slice, validate_slice_mut}; +use crate::syscall; pub fn brk(address: usize) -> Result { let contexts = context::contexts(); @@ -128,10 +129,25 @@ pub fn clone(flags: usize, stack_base: usize) -> Result { } if let Some(ref stack) = context.kstack { + // Get the relative offset to the return address of this function + // (base pointer - start of stack) - one offset = stack_base - stack.as_ptr() as usize - mem::size_of::(); // Add clone ret let mut new_stack = stack.clone(); unsafe { + if let Some(regs) = ptrace::rebase_regs_ptr_mut(context.regs, Some(&mut new_stack)) { + // We'll need to tell the clone that it should + // return 0, but that's it. We don't actually + // clone the registers, because it will then + // become None and be exempt from all kinds of + // ptracing until the current syscall has + // completed. + (*regs).scratch.rax = 0; + } + + // Change the return address of the child + // (previously syscall) to the arch-specific + // clone_ret callback let func_ptr = new_stack.as_mut_ptr().offset(offset as isize); *(func_ptr as *mut usize) = interrupt::syscall::clone_ret as usize; } @@ -1052,6 +1068,8 @@ pub fn exit(status: usize) -> ! { context.id }; + ptrace::close(pid); + // Files must be closed while context is valid so that messages can be passed for (_fd, file_option) in close_files.drain(..).enumerate() { if let Some(file) = file_option { diff --git a/syscall b/syscall index 5cdc240..f8eda5c 160000 --- a/syscall +++ b/syscall @@ -1 +1 @@ -Subproject commit 5cdc240d1338b53a1d8b9f652b0cc628317f3109 +Subproject commit f8eda5ce1bd6fe7f276302493ec54a75a7335fd0