diff --git a/src/context/arch/x86_64.rs b/src/context/arch/x86_64.rs index 64b0268..ee7ce5b 100644 --- a/src/context/arch/x86_64.rs +++ b/src/context/arch/x86_64.rs @@ -1,5 +1,5 @@ use core::mem; -use core::sync::atomic::{AtomicBool, Ordering}; +use core::sync::atomic::AtomicBool; use crate::syscall::FloatRegisters; @@ -12,9 +12,8 @@ pub static CONTEXT_SWITCH_LOCK: AtomicBool = AtomicBool::new(false); const ST_RESERVED: u128 = 0xFFFF_FFFF_FFFF_0000_0000_0000_0000_0000; #[derive(Clone, Debug)] +#[repr(C)] pub struct Context { - /// FX valid? - loadable: bool, /// FX location fx: usize, /// Page table pointer @@ -34,13 +33,22 @@ pub struct Context { /// Base pointer rbp: usize, /// Stack pointer - rsp: usize + rsp: usize, + /// FX valid? + loadable: AbiCompatBool, +} + +#[repr(u8)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum AbiCompatBool { + False, + True, } impl Context { pub fn new() -> Context { Context { - loadable: false, + loadable: AbiCompatBool::False, fx: 0, cr3: 0, rflags: 0, @@ -59,7 +67,7 @@ impl Context { } pub fn get_fx_regs(&self) -> Option { - if !self.loadable { + if self.loadable == AbiCompatBool::False { return None; } let mut regs = unsafe { *(self.fx as *const FloatRegisters) }; @@ -74,7 +82,7 @@ impl Context { } pub fn set_fx_regs(&mut self, mut new: FloatRegisters) -> bool { - if !self.loadable { + if self.loadable == AbiCompatBool::False { return false; } @@ -126,53 +134,95 @@ impl Context { self.rsp += mem::size_of::(); value } +} - /// Switch to the next context by restoring its stack and registers - /// Check disassembly! - #[cold] - #[inline(never)] - #[naked] - pub unsafe fn switch_to(&mut self, next: &mut Context) { - asm!("fxsave64 [{}]", in(reg) (self.fx)); - self.loadable = true; - if next.loadable { - asm!("fxrstor64 [{}]", in(reg) (next.fx)); - }else{ - asm!("fninit"); - } +/// Switch to the next context by restoring its stack and registers +/// Check disassembly! +#[cold] +#[inline(never)] +#[naked] +pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) { + asm!( + // As a quick reminder for those who are unfamiliar with the System V ABI (extern "C"): + // + // - the current parameters are passed in the registers `rdi`, `rsi`, + // - we can modify scratch registers, e.g. rax + // - we cannot change callee-preserved registers arbitrarily, e.g. rbx, which is why we + // store them here in the first place. + " + // load `prev.fx` + mov rax, [rdi + 0x00] - asm!("mov {}, cr3", out(reg) (self.cr3)); - if next.cr3 != self.cr3 { - asm!("mov cr3, {}", in(reg) (next.cr3)); - } + // save processor SSE/FPU/AVX state in `prev.fx` pointee + fxsave64 [rax] - asm!("pushfq ; pop {}", out(reg) (self.rflags)); - asm!("push {} ; popfq", in(reg) (next.rflags)); + // set `prev.loadable` to true + mov BYTE PTR [rdi + 0x50], {true} + // compare `next.loadable` with true + cmp BYTE PTR [rsi + 0x50], {true} + je switch_to.next_is_loadable - asm!("mov {}, rbx", out(reg) (self.rbx)); - asm!("mov rbx, {}", in(reg) (next.rbx)); + fninit + jmp switch_to.after_fx - asm!("mov {}, r12", out(reg) (self.r12)); - asm!("mov r12, {}", in(reg) (next.r12)); + switch_to.next_is_loadable: + mov rax, [rsi + 0x00] + fxrstor64 [rax] - asm!("mov {}, r13", out(reg) (self.r13)); - asm!("mov r13, {}", in(reg) (next.r13)); + switch_to.after_fx: + // Save the current CR3, and load the next CR3 if not identical + mov rcx, cr3 + mov [rdi + 0x08], rcx + mov rax, [rsi + 0x08] + cmp rax, rcx - asm!("mov {}, r14", out(reg) (self.r14)); - asm!("mov r14, {}", in(reg) (next.r14)); + je switch_to.same_cr3 + mov cr3, rax - asm!("mov {}, r15", out(reg) (self.r15)); - asm!("mov r15, {}", in(reg) (next.r15)); + switch_to.same_cr3: + // Save old registers, and load new ones + mov [rdi + 0x18], rbx + mov rbx, [rsi + 0x18] - asm!("mov {}, rsp", out(reg) (self.rsp)); - asm!("mov rsp, {}", in(reg) (next.rsp)); + mov [rdi + 0x20], r12 + mov r12, [rsi + 0x20] - asm!("mov {}, rbp", out(reg) (self.rbp)); - asm!("mov rbp, {}", in(reg) (next.rbp)); + mov [rdi + 0x28], r13 + mov r13, [rsi + 0x28] - // Unset global lock after loading registers but before switch - CONTEXT_SWITCH_LOCK.store(false, Ordering::SeqCst); - } + mov [rdi + 0x30], r14 + mov r14, [rsi + 0x30] + + mov [rdi + 0x38], r15 + mov r15, [rsi + 0x38] + + mov [rdi + 0x40], rbp + mov rbp, [rsi + 0x40] + + mov [rdi + 0x48], rsp + mov rsp, [rsi + 0x48] + + // push RFLAGS (can only be modified via stack) + pushfq + // pop RFLAGS into `self.rflags` + pop QWORD PTR [rdi + 0x10] + + // push `next.rflags` + push QWORD PTR [rsi + 0x10] + // pop into RFLAGS + popfq + + // When we return, we cannot even guarantee that the return address on the stack, points to + // the calling function, `context::switch`. Thus, we have to execute this Rust hook by + // ourselves, which will unlock the contexts before the later switch. + + call {switch_hook} + + ", + + true = const(AbiCompatBool::True as u8), + switch_hook = sym crate::context::switch_finish_hook, + ); } #[allow(dead_code)] @@ -195,37 +245,38 @@ pub struct SignalHandlerStack { #[naked] unsafe extern fn signal_handler_wrapper() { #[inline(never)] - unsafe fn inner(stack: &SignalHandlerStack) { + unsafe extern "C" fn inner(stack: &SignalHandlerStack) { (stack.handler)(stack.sig); } // Push scratch registers - asm!("push rax - push rcx - push rdx - push rdi - push rsi - push r8 - push r9 - push r10 - push r11"); + asm!( + " + push rax + push rcx + push rdx + push rdi + push rsi + push r8 + push r9 + push r10 + push r11 - // Get reference to stack variables - let rsp: usize; - asm!("mov {}, rsp", out(reg) rsp); + mov rdi, rsp + call {inner} - // Call inner rust function - inner(&*(rsp as *const SignalHandlerStack)); + pop r11 + pop r10 + pop r9 + pop r8 + pop rsi + pop rdi + pop rdx + pop rcx + pop rax + add rsp, 16 + ", - // Pop scratch registers, error code, and return - asm!("pop r11 - pop r10 - pop r9 - pop r8 - pop rsi - pop rdi - pop rdx - pop rcx - pop rax - add rsp, 16"); + inner = sym inner, + ); } diff --git a/src/context/mod.rs b/src/context/mod.rs index 3d8b7ec..efc6bcf 100644 --- a/src/context/mod.rs +++ b/src/context/mod.rs @@ -34,6 +34,8 @@ pub mod signal; /// Timeout handling pub mod timeout; +pub use self::switch::switch_finish_hook; + /// Limit on number of contexts pub const CONTEXT_MAX_CONTEXTS: usize = (isize::max_value() as usize) - 1; diff --git a/src/context/switch.rs b/src/context/switch.rs index cc8ac69..484e7a0 100644 --- a/src/context/switch.rs +++ b/src/context/switch.rs @@ -1,8 +1,11 @@ +use core::cell::Cell; use core::ops::Bound; use core::sync::atomic::Ordering; use alloc::sync::Arc; +use spin::RwLock; + use crate::context::signal::signal_handler; use crate::context::{arch, contexts, Context, Status, CONTEXT_ID}; use crate::gdt; @@ -64,6 +67,24 @@ unsafe fn update(context: &mut Context, cpu_id: usize) { } } +struct SwitchResult { + prev_lock: Arc>, + next_lock: Arc>, +} + +pub unsafe extern "C" fn switch_finish_hook() { + if let Some(SwitchResult { prev_lock, next_lock }) = SWITCH_RESULT.take() { + prev_lock.force_write_unlock(); + next_lock.force_write_unlock(); + } else { + panic!("SWITCH_RESULT was not set"); + } + arch::CONTEXT_SWITCH_LOCK.store(false, Ordering::SeqCst); +} + +#[thread_local] +static SWITCH_RESULT: Cell> = Cell::new(None); + unsafe fn runnable(context: &Context, cpu_id: usize) -> bool { // Switch to context if it needs to run, is not currently running, and is owned by the current CPU !context.running && !context.ptrace_stop && context.status == Status::Runnable && context.cpu_id == Some(cpu_id) @@ -163,24 +184,24 @@ pub unsafe fn switch() -> bool { to_context.arch.signal_stack(signal_handler, sig); } - let from_ptr: *mut Context = &mut *from_context_guard; - let to_ptr: *mut Context = &mut *to_context; + let from_arch_ptr: *mut arch::Context = &mut from_context_guard.arch; + core::mem::forget(from_context_guard); - // FIXME: Ensure that this critical section is somehow still protected by the lock, and not - // just for other processes' context switching, but for other operations which do not - // require CONTEXT_SWITCH_LOCK. - // - // What I suggest, is that we wrap the Context struct (typically stored as - // `Arc>`), into a wrapper with interior locking for the inner context type - // (which could be something like `RwLock`). The wrapper would also contain - // a field of type `UnsafeCell`, which would be accessible if and only if the - // `running` field is set to false, making that field function as a lock. - drop(from_context_guard); - drop(from_context_lock); - to_context_lock.force_write_unlock(); - drop(to_context_lock); + let prev_arch: &mut arch::Context = &mut *from_arch_ptr; + let next_arch: &mut arch::Context = &mut to_context.arch; - (*from_ptr).arch.switch_to(&mut (*to_ptr).arch); + // to_context_guard only exists as a raw pointer, but is still locked + + SWITCH_RESULT.set(Some(SwitchResult { + prev_lock: from_context_lock, + next_lock: to_context_lock, + })); + + arch::switch_to(prev_arch, next_arch); + + // NOTE: After switch_to is called, the return address can even be different from the + // current return address, meaning that we cannot use local variables here, and that we + // need to use the `switch_finish_hook` to be able to release the locks. true } else {