diff --git a/src/context/arch/x86_64.rs b/src/context/arch/x86_64.rs index 3066e50..85855dc 100644 --- a/src/context/arch/x86_64.rs +++ b/src/context/arch/x86_64.rs @@ -1,9 +1,13 @@ use core::mem; use core::sync::atomic::AtomicBool; +use alloc::sync::Arc; + +use crate::paging::{RmmA, RmmArch}; use crate::syscall::FloatRegisters; use memoffset::offset_of; +use spin::Once; /// This must be used by the kernel to ensure that context switches are done atomically /// Compare and exchange this to true when beginning a context switch on any CPU @@ -19,10 +23,6 @@ pub const KFX_ALIGN: usize = 16; #[derive(Clone, Debug)] #[repr(C)] pub struct Context { - /// FX location - fx: usize, - /// Page table pointer - cr3: usize, /// RFLAGS register rflags: usize, /// RBX register @@ -54,8 +54,6 @@ pub struct Context { impl Context { pub fn new() -> Context { Context { - fx: 0, - cr3: 0, rflags: 0, rbx: 0, r12: 0, @@ -69,50 +67,6 @@ impl Context { } } - pub fn get_page_utable(&self) -> usize { - self.cr3 - } - - pub fn get_fx_regs(&self) -> FloatRegisters { - let mut regs = unsafe { *(self.fx as *const FloatRegisters) }; - regs._reserved = 0; - let mut new_st = regs.st_space; - for st in &mut new_st { - // Only allow access to the 80 lowest bits - *st &= !ST_RESERVED; - } - regs.st_space = new_st; - regs - } - - pub fn set_fx_regs(&mut self, mut new: FloatRegisters) { - { - let old = unsafe { &*(self.fx as *const FloatRegisters) }; - new._reserved = old._reserved; - let old_st = new.st_space; - let mut new_st = new.st_space; - for (new_st, old_st) in new_st.iter_mut().zip(&old_st) { - *new_st &= !ST_RESERVED; - *new_st |= old_st & ST_RESERVED; - } - new.st_space = new_st; - - // Make sure we don't use `old` from now on - } - - unsafe { - *(self.fx as *mut FloatRegisters) = new; - } - } - - pub fn set_fx(&mut self, address: usize) { - self.fx = address; - } - - pub fn set_page_utable(&mut self, address: usize) { - self.cr3 = address; - } - pub fn set_stack(&mut self, address: usize) { self.rsp = address; } @@ -134,61 +88,103 @@ impl Context { value } } - -macro_rules! load_msr( - ($name:literal, $offset:literal) => { - concat!(" - mov ecx, {", $name, "} - mov rdx, [rsi + {", $offset, "}] - mov eax, edx - shr rdx, 32 - - // MSR <= EDX:EAX - wrmsr - ") +impl super::Context { + pub fn get_fx_regs(&self) -> FloatRegisters { + let mut regs = unsafe { self.kfx.as_ptr().cast::().read() }; + regs._reserved = 0; + let mut new_st = regs.st_space; + for st in &mut new_st { + // Only allow access to the 80 lowest bits + *st &= !ST_RESERVED; + } + regs.st_space = new_st; + regs } -); -// NOTE: RAX is a scratch register and can be set to whatever. There is also no return -// value in switch_to, to it will also never be read. The same goes for RDX, and RCX. -// TODO: Use runtime code patching (perhaps in the bootloader) by pushing alternative code -// sequences into a specialized section, with some macro resembling Linux's `.ALTERNATIVE`. -#[cfg(feature = "x86_fsgsbase")] -macro_rules! switch_fsgsbase( - () => { - " - // placeholder: {MSR_FSBASE} {MSR_KERNELGSBASE} + pub fn set_fx_regs(&mut self, mut new: FloatRegisters) { + { + let old = unsafe { &*(self.kfx.as_ptr().cast::()) }; + new._reserved = old._reserved; + let old_st = new.st_space; + let mut new_st = new.st_space; + for (new_st, old_st) in new_st.iter_mut().zip(&old_st) { + *new_st &= !ST_RESERVED; + *new_st |= old_st & ST_RESERVED; + } + new.st_space = new_st; - rdfsbase rax - mov [rdi + {off_fsbase}], rax - mov rax, [rsi + {off_fsbase}] - wrfsbase rax + // Make sure we don't use `old` from now on + } - swapgs - rdgsbase rax - mov [rdi + {off_gsbase}], rax - mov rax, [rsi + {off_gsbase}] - wrgsbase rax - swapgs - " + unsafe { + self.kfx.as_mut_ptr().cast::().write(new); + } } -); +} -#[cfg(not(feature = "x86_fsgsbase"))] -macro_rules! switch_fsgsbase( - () => { - concat!( - load_msr!("MSR_FSBASE", "off_fsbase"), - load_msr!("MSR_KERNELGSBASE", "off_gsbase"), - ) - } -); +pub static EMPTY_CR3: Once = Once::new(); +// SAFETY: EMPTY_CR3 must be initialized. +pub unsafe fn empty_cr3() -> rmm::PhysicalAddress { + debug_assert!(EMPTY_CR3.poll().is_some()); + *EMPTY_CR3.get_unchecked() +} /// Switch to the next context by restoring its stack and registers -/// Check disassembly! +pub unsafe fn switch_to(prev: &mut super::Context, next: &mut super::Context) { + core::arch::asm!(" + fxsave64 [{prev_fx}] + fxrstor64 [{next_fx}] + ", prev_fx = in(reg) prev.kfx.as_mut_ptr(), + next_fx = in(reg) next.kfx.as_ptr(), + ); + + { + use x86::{bits64::segmentation::*, msr}; + + // This is so much shorter in Rust! + + if cfg!(feature = "x86_fsgsbase") { + prev.arch.fsbase = rdfsbase() as usize; + wrfsbase(next.arch.fsbase as u64); + swapgs(); + prev.arch.gsbase = rdgsbase() as usize; + wrgsbase(next.arch.gsbase as u64); + swapgs(); + } else { + prev.arch.fsbase = msr::rdmsr(msr::IA32_FS_BASE) as usize; + msr::wrmsr(msr::IA32_FS_BASE, next.arch.fsbase as u64); + prev.arch.gsbase = msr::rdmsr(msr::IA32_KERNEL_GSBASE) as usize; + msr::wrmsr(msr::IA32_KERNEL_GSBASE, next.arch.gsbase as u64); + } + } + + match next.addr_space { + // Since Arc is essentially just wraps a pointer, in this case a regular pointer (as + // opposed to dyn or slice fat pointers), and NonNull optimization exists, map_or will + // hopefully be optimized down to checking prev and next pointers, as next cannot be null. + Some(ref next_space) => if prev.addr_space.as_ref().map_or(true, |prev_space| !Arc::ptr_eq(&prev_space, &next_space)) { + // Suppose we have two sibling threads A and B. A runs on CPU 0 and B on CPU 1. A + // recently called yield and is now here about to switch back. Meanwhile, B is + // currently creating a new mapping in their shared address space, for example a + // message on a channel. + // + // Unless we acquire this lock, it may be possible that the TLB will not contain new + // entries. While this can be caught and corrected in a page fault handler, this is not + // true when entries are removed from a page table! + let next_space = next_space.read(); + RmmA::set_table(next_space.frame.utable.start_address()); + } + None => { + RmmA::set_table(empty_cr3()); + } + } + switch_to_inner(&mut prev.arch, &mut next.arch) +} + +// Check disassembly! #[naked] -pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) { +unsafe extern "sysv64" fn switch_to_inner(_prev: &mut Context, _next: &mut Context) { use Context as Cx; core::arch::asm!( @@ -199,28 +195,6 @@ pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) { // - we cannot change callee-preserved registers arbitrarily, e.g. rbx, which is why we // store them here in the first place. concat!(" - // load `prev.fx` - mov rax, [rdi + {off_fx}] - - // save processor SSE/FPU/AVX state in `prev.fx` pointee - fxsave64 [rax] - - // load `next.fx` - mov rax, [rsi + {off_fx}] - - // load processor SSE/FPU/AVX state from `next.fx` pointee - fxrstor64 [rax] - - // Save the current CR3, and load the next CR3 if not identical - mov rcx, cr3 - mov [rdi + {off_cr3}], rcx - mov rax, [rsi + {off_cr3}] - cmp rax, rcx - - je 4f - mov cr3, rax - -4: // Save old registers, and load new ones mov [rdi + {off_rbx}], rbx mov rbx, [rsi + {off_rbx}] @@ -243,10 +217,6 @@ pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) { mov [rdi + {off_rsp}], rsp mov rsp, [rsi + {off_rsp}] - ", - switch_fsgsbase!(), - " - // push RFLAGS (can only be modified via stack) pushfq // pop RFLAGS into `self.rflags` @@ -266,8 +236,6 @@ pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) { "), - off_fx = const(offset_of!(Cx, fx)), - off_cr3 = const(offset_of!(Cx, cr3)), off_rflags = const(offset_of!(Cx, rflags)), off_rbx = const(offset_of!(Cx, rbx)), @@ -278,12 +246,6 @@ pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) { off_rbp = const(offset_of!(Cx, rbp)), off_rsp = const(offset_of!(Cx, rsp)), - off_fsbase = const(offset_of!(Cx, fsbase)), - off_gsbase = const(offset_of!(Cx, gsbase)), - - MSR_FSBASE = const(x86::msr::IA32_FS_BASE), - MSR_KERNELGSBASE = const(x86::msr::IA32_KERNEL_GSBASE), - switch_hook = sym crate::context::switch_finish_hook, options(noreturn), ); diff --git a/src/context/context.rs b/src/context/context.rs index 2c8b2ef..76248d6 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -220,15 +220,17 @@ pub struct Context { /// The architecture specific context pub arch: arch::Context, /// Kernel FX - used to store SIMD and FPU registers on context switch - pub kfx: Option>, + pub kfx: AlignedBox<[u8; {arch::KFX_SIZE}], {arch::KFX_ALIGN}>, /// Kernel stack pub kstack: Option>, /// Kernel signal backup: Registers, Kernel FX, Kernel Stack, Signal number - pub ksig: Option<(arch::Context, Option>, Option>, u8)>, + pub ksig: Option<(arch::Context, AlignedBox<[u8; arch::KFX_SIZE], {arch::KFX_ALIGN}>, Option>, u8)>, /// Restore ksig context on next switch pub ksig_restore: bool, /// Address space containing a page table lock, and grants. Normally this will have a value, - /// but can be None while the context is being reaped. + /// but can be None while the context is being reaped or when a new context is created but has + /// not yet had its address space changed. Note that these are only for user mappings; kernel + /// mappings are universal and independent on address spaces or contexts. pub addr_space: Option>>, /// The name of the context pub name: Arc>>, @@ -358,7 +360,7 @@ impl Context { pending: VecDeque::new(), wake: None, arch: arch::Context::new(), - kfx: None, + kfx: AlignedBox::<[u8; arch::KFX_SIZE], {arch::KFX_ALIGN}>::try_zeroed()?, kstack: None, ksig: None, ksig_restore: false, @@ -379,7 +381,6 @@ impl Context { sigstack: None, clone_entry: None, }; - let _ = this.set_addr_space(new_addrspace()?); Ok(this) } @@ -562,14 +563,6 @@ impl Context { } } - self.arch.set_page_utable(physaddr.data()); self.addr_space.replace(addr_space) } - - pub fn init_fx(&mut self) -> Result<(), Enomem> { - let mut fx = AlignedBox::<[u8; arch::KFX_SIZE], {arch::KFX_ALIGN}>::try_zeroed()?; - self.arch.set_fx(fx.as_mut_ptr() as usize); - self.kfx = Some(fx); - Ok(()) - } } diff --git a/src/context/list.rs b/src/context/list.rs index 7019a14..6594e4c 100644 --- a/src/context/list.rs +++ b/src/context/list.rs @@ -79,7 +79,7 @@ impl ContextList { let context_lock = self.new_context()?; { let mut context = context_lock.write(); - context.init_fx()?; + let _ = context.set_addr_space(super::memory::new_addrspace()?); let mut stack = vec![0; 65_536].into_boxed_slice(); let offset = stack.len() - mem::size_of::(); diff --git a/src/context/memory.rs b/src/context/memory.rs index 2ce7460..eaed5db 100644 --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -258,6 +258,7 @@ impl UserGrants { } } pub fn insert(&mut self, grant: Grant) { + assert!(self.conflicts(*grant).next().is_none()); self.reserve(&grant); self.inner.insert(grant); } @@ -663,9 +664,6 @@ pub const DANGLING: usize = 1 << (usize::BITS - 2); #[derive(Debug)] pub struct Tables { - #[cfg(target_arch = "aarch64")] - pub ktable: Frame, - pub utable: Frame, } @@ -673,9 +671,6 @@ impl Drop for Tables { fn drop(&mut self) { use crate::memory::deallocate_frames; deallocate_frames(Frame::containing_address(PhysicalAddress::new(self.utable.start_address().data())), 1); - - #[cfg(target_arch = "aarch64")] - deallocate_frames(Frame::containing_address(PhysicalAddress::new(self.ktable.start_address().data())), 1); } } @@ -683,46 +678,37 @@ impl Drop for Tables { pub fn setup_new_utable() -> Result { let mut new_utable = crate::memory::allocate_frames(1).ok_or(Error::new(ENOMEM))?; - // TODO: There is only supposed to be one ktable, right? Use a global variable to store the - // ktable (or access it from a control register) on architectures which have ktables, or obtain - // it from *any* utable on architectures which do not. - #[cfg(target_arch = "aarch64")] - let new_ktable = crate::memory::allocate_frames(1).ok_or(Error::new(ENOMEM))?; + #[cfg(target_arch = "x86_64")] + { + let active_ktable = unsafe { ActivePageTable::new(TableKind::Kernel) }; + let mut new_ktable = unsafe { InactivePageTable::from_address(new_utable.start_address().data()) }; - let active_ktable = unsafe { ActivePageTable::new(TableKind::Kernel) }; + let mut copy_mapping = |p4_no| { + let frame = active_ktable.p4()[p4_no].pointed_frame() + .unwrap_or_else(|| panic!("expected kernel PML {} to be mapped", p4_no)); + let flags = active_ktable.p4()[p4_no].flags(); - #[cfg(target_arch = "aarch64")] - let ktable = &new_ktable; + new_ktable.mapper().p4_mut()[p4_no].set(frame, flags); + }; + // TODO: Just copy all 256 mappings? Or copy KERNEL_PML4+KERNEL_PERCPU_PML4 (needed for + // paranoid ISRs which can occur anywhere; we don't want interrupts to triple fault!) and + // map lazily via page faults in the kernel. - #[cfg(not(target_arch = "aarch64"))] - let ktable = &new_utable; + // Copy kernel image mapping + copy_mapping(crate::KERNEL_PML4); - let mut new_mapper = unsafe { InactivePageTable::from_address(ktable.start_address().data()) }; + // Copy kernel heap mapping + copy_mapping(crate::KERNEL_HEAP_PML4); - let mut copy_mapping = |p4_no| { - let frame = active_ktable.p4()[p4_no].pointed_frame().expect("kernel image not mapped"); - let flags = active_ktable.p4()[p4_no].flags(); + // Copy physmap mapping + copy_mapping(crate::PHYS_PML4); - new_mapper.mapper().p4_mut()[p4_no].set(frame, flags); - }; - // TODO: Just copy all 256 mappings? - - // Copy kernel image mapping - copy_mapping(crate::KERNEL_PML4); - - // Copy kernel heap mapping - copy_mapping(crate::KERNEL_HEAP_PML4); - - // Copy physmap mapping - copy_mapping(crate::PHYS_PML4); - - // Copy kernel percpu (similar to TLS) mapping. - copy_mapping(crate::KERNEL_PERCPU_PML4); + // Copy kernel percpu (similar to TLS) mapping. + copy_mapping(crate::KERNEL_PERCPU_PML4); + } Ok(Tables { utable: new_utable, - #[cfg(target_arch = "aarch64")] - ktable: new_ktable, }) } diff --git a/src/context/mod.rs b/src/context/mod.rs index a45efdc..c282c8a 100644 --- a/src/context/mod.rs +++ b/src/context/mod.rs @@ -6,6 +6,8 @@ use core::alloc::{GlobalAlloc, Layout}; use core::sync::atomic::Ordering; use spin::{Once, RwLock, RwLockReadGuard, RwLockWriteGuard}; +use crate::paging::{RmmA, RmmArch}; + pub use self::context::{Context, ContextId, ContextSnapshot, Status, WaitpidKey}; pub use self::list::ContextList; pub use self::switch::switch; @@ -53,11 +55,14 @@ static CONTEXTS: RwLock = RwLock::new(ContextList::new()); #[thread_local] static CONTEXT_ID: context::AtomicContextId = context::AtomicContextId::default(); +pub use self::arch::empty_cr3; + pub fn init() { let mut contexts = contexts_mut(); let context_lock = contexts.new_context().expect("could not initialize first context"); let mut context = context_lock.write(); - context.init_fx().expect("failed to allocate FX for first context"); + + self::arch::EMPTY_CR3.call_once(|| unsafe { RmmA::table() }); context.status = Status::Runnable; context.running = true; @@ -65,11 +70,6 @@ pub fn init() { CONTEXT_ID.store(context.id, Ordering::SeqCst); } -/// Initialize contexts, called if needed -fn init_contexts() -> RwLock { - RwLock::new(ContextList::new()) -} - /// Get the global schemes list, const pub fn contexts() -> RwLockReadGuard<'static, ContextList> { CONTEXTS.read() diff --git a/src/context/switch.rs b/src/context/switch.rs index e641142..8a565e5 100644 --- a/src/context/switch.rs +++ b/src/context/switch.rs @@ -29,11 +29,7 @@ unsafe fn update(context: &mut Context, cpu_id: usize) { let ksig = context.ksig.take().expect("context::switch: ksig not set with ksig_restore"); context.arch = ksig.0; - if let Some(ref mut kfx) = context.kfx { - kfx.copy_from_slice(&*ksig.1.expect("context::switch: ksig kfx not set with ksig_restore")); - } else { - panic!("context::switch: kfx not set with ksig_restore"); - } + context.kfx.copy_from_slice(&*ksig.1); if let Some(ref mut kstack) = context.kstack { kstack.copy_from_slice(&ksig.2.expect("context::switch: ksig kstack not set with ksig_restore")); @@ -194,11 +190,11 @@ pub unsafe fn switch() -> bool { to_context.arch.signal_stack(signal_handler, sig); } - let from_arch_ptr: *mut arch::Context = &mut from_context_guard.arch; + let from_ptr: *mut Context = &mut *from_context_guard; core::mem::forget(from_context_guard); - let prev_arch: &mut arch::Context = &mut *from_arch_ptr; - let next_arch: &mut arch::Context = &mut to_context.arch; + let prev: &mut Context = &mut *from_ptr; + let next: &mut Context = &mut *to_context; // to_context_guard only exists as a raw pointer, but is still locked @@ -207,7 +203,7 @@ pub unsafe fn switch() -> bool { next_lock: to_context_lock, })); - arch::switch_to(prev_arch, next_arch); + arch::switch_to(prev, next); // NOTE: After switch_to is called, the return address can even be different from the // current return address, meaning that we cannot use local variables here, and that we diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index 5318b99..3fa2a93 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -572,11 +572,7 @@ impl Scheme for ProcScheme { RegsKind::Float => with_context(info.pid, |context| { // NOTE: The kernel will never touch floats - // In the rare case of not having floating - // point registers uninitiated, return - // empty everything. - let fx = context.kfx.as_ref().map(|_| context.arch.get_fx_regs()).unwrap_or_default(); - Ok((Output { float: fx }, mem::size_of::())) + Ok((Output { float: context.get_fx_regs() }, mem::size_of::())) })?, RegsKind::Int => try_stop_context(info.pid, |context| match unsafe { ptrace::regs_for(&context) } { None => { @@ -835,7 +831,7 @@ impl Scheme for ProcScheme { // Ignore the rare case of floating point // registers being uninitiated - let _ = context.arch.set_fx_regs(regs); + let _ = context.set_fx_regs(regs); Ok(mem::size_of::()) }) diff --git a/src/syscall/process.rs b/src/syscall/process.rs index 7ad39bb..4dd223f 100644 --- a/src/syscall/process.rs +++ b/src/syscall/process.rs @@ -7,13 +7,13 @@ use core::mem; use spin::{RwLock, RwLockWriteGuard}; -use crate::context::{Context, ContextId, memory, WaitpidKey}; +use crate::context::{Context, ContextId, WaitpidKey}; use crate::Bootstrap; use crate::context; use crate::interrupt; use crate::paging::mapper::{Flusher, InactiveFlusher, PageFlushAll}; -use crate::paging::{ActivePageTable, InactivePageTable, Page, PageFlags, RmmA, RmmArch, TableKind, VirtualAddress, PAGE_SIZE}; +use crate::paging::{ActivePageTable, InactivePageTable, Page, PageFlags, RmmArch, TableKind, VirtualAddress, PAGE_SIZE}; use crate::ptrace; use crate::start::usermode; use crate::syscall::data::SigAction; @@ -43,7 +43,7 @@ fn empty<'lock>(context_lock: &'lock RwLock, mut context: RwLockWriteGu let unmap_result = if reaping { log::error!("{}: {}: Grant should not exist: {:?}", context.id.into(), *context.name.read(), grant); - let mut new_table = unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }; + let mut new_table = unsafe { InactivePageTable::from_address(addr_space.frame.utable.start_address().data()) }; grant.unmap(&mut new_table.mapper(), &mut InactiveFlusher::new()) } else {