From 4bd137f36ec5eb70e8e4309ab9d21be556dcd138 Mon Sep 17 00:00:00 2001 From: Jeremy Soller Date: Wed, 17 Aug 2022 10:48:23 -0600 Subject: [PATCH] Fixes for x86 32-bit --- src/arch/x86/gdt.rs | 37 ++------- src/arch/x86/interrupt/handler.rs | 132 ++---------------------------- src/arch/x86/start.rs | 60 +------------- src/context/arch/x86.rs | 3 +- 4 files changed, 16 insertions(+), 216 deletions(-) diff --git a/src/arch/x86/gdt.rs b/src/arch/x86/gdt.rs index 2a58c4f..baddf22 100644 --- a/src/arch/x86/gdt.rs +++ b/src/arch/x86/gdt.rs @@ -104,7 +104,8 @@ pub unsafe fn set_tss_stack(stack: usize) { #[cfg(not(feature = "pti"))] pub unsafe fn set_tss_stack(stack: usize) { KPCR.tss.0.ss0 = (GDT_KERNEL_DATA << 3) as u16; - KPCR.tss.0.esp0 = stack as u32; + //TODO: should PHYS_OFFSET be added on x86_64 as well? + KPCR.tss.0.esp0 = (stack + crate::PHYS_OFFSET) as u32; } // Initialize GDT @@ -138,9 +139,6 @@ pub unsafe fn init() { /// Initialize GDT with TLS pub unsafe fn init_paging(cpu_id: u32, tcb_offset: usize, stack_offset: usize) { - // Set temporary TLS segment to the self-pointer of the Thread Control Block. - x86::msr::wrmsr(x86::msr::IA32_GS_BASE, tcb_offset as u64); - //TODO: will this work with multicore? { INIT_GDT[GDT_KERNEL_KPCR].set_offset(tcb_offset as u32); @@ -165,6 +163,8 @@ pub unsafe fn init_paging(cpu_id: u32, tcb_offset: usize, stack_offset: usize) { // Once we have fetched the real KPCR address, set the TLS segment to the TCB pointer there. kpcr.tcb_end = (tcb_offset as *const usize).read(); + GDT[GDT_KERNEL_KPCR].set_offset(tcb_offset as u32); + { // We can now access our TSS, via the KPCR, which is a thread local let tss = &kpcr.tss.0 as *const _ as usize as u32; @@ -183,41 +183,16 @@ pub unsafe fn init_paging(cpu_id: u32, tcb_offset: usize, stack_offset: usize) { // Load the new GDT, which is correctly located in thread local storage. dtables::lgdt(&gdtr); - // Ensure that GS always points to the KPCR in kernel space. - x86::msr::wrmsr(x86::msr::IA32_GS_BASE, kpcr as *mut _ as usize as u64); - // Inside kernel space, GS should _always_ point to the TSS. When leaving userspace, `swapgs` - // is called again, making the userspace GS always point to user data. - x86::msr::wrmsr(x86::msr::IA32_KERNEL_GSBASE, 0); - - // Set the User TLS segment to zero, before we create any contexts and start scheduling. - x86::msr::wrmsr(x86::msr::IA32_FS_BASE, 0); - // Reload the segment descriptors load_cs(SegmentSelector::new(GDT_KERNEL_CODE as u16, Ring::Ring0)); segmentation::load_ds(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0)); segmentation::load_es(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0)); + segmentation::load_fs(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0)); + segmentation::load_gs(SegmentSelector::new(GDT_KERNEL_KPCR as u16, Ring::Ring0)); segmentation::load_ss(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0)); - // NOTE: FS has already been updated while calling set_tcb. - // NOTE: We do not want to load GS again, since it has already been loaded into - // GDT_KERNEL_KPCR. Instead, we use the base MSR to allow for a 64-bit offset. - // Load the task register task::load_tr(SegmentSelector::new(GDT_TSS as u16, Ring::Ring0)); - - let has_fsgsbase = cpuid().map_or(false, |cpuid| { - cpuid.get_extended_feature_info().map_or(false, |extended_features| { - extended_features.has_fsgsbase() - }) - }); - - if cfg!(feature = "x86_fsgsbase") { - assert!(has_fsgsbase, "running kernel with features not supported by the current CPU"); - } - - if has_fsgsbase { - x86::controlregs::cr4_write(x86::controlregs::cr4() | x86::controlregs::Cr4::CR4_ENABLE_FSGSBASE); - } } #[derive(Copy, Clone, Debug)] diff --git a/src/arch/x86/interrupt/handler.rs b/src/arch/x86/interrupt/handler.rs index a601df1..e996f69 100644 --- a/src/arch/x86/interrupt/handler.rs +++ b/src/arch/x86/interrupt/handler.rs @@ -64,12 +64,6 @@ impl IretRegisters { println!("ESP: {:016x}", { self.esp }); println!("SS: {:016x}", { self.ss }); } - unsafe { - let fsbase = x86::msr::rdmsr(x86::msr::IA32_FS_BASE); - let gsbase = x86::msr::rdmsr(x86::msr::IA32_KERNEL_GSBASE); - let kgsbase = x86::msr::rdmsr(x86::msr::IA32_GS_BASE); - println!("FSBASE {:016x}\nGSBASE {:016x}\nKGSBASE {:016x}", fsbase, gsbase, kgsbase); - } } } @@ -209,111 +203,12 @@ macro_rules! pop_preserved { pop ebx " }; } -macro_rules! swapgs_iff_ring3_fast { - () => { " - // Check whether the last two bits ESP+8 (code segment) are equal to zero. - test DWORD PTR [esp + 8], 0x3 - // Skip the SWAPGS instruction if CS & 0b11 == 0b00. - jz 1f - //TODO swapgs - 1: - " }; -} -macro_rules! swapgs_iff_ring3_fast_errorcode { - () => { " - test DWORD PTR [esp + 16], 0x3 - jz 1f - //TODO swapgs - 1: - " }; -} - -macro_rules! save_gsbase_paranoid { - () => { " - mov ecx, {IA32_GS_BASE} - rdmsr - shl edx, 32 - or eax, edx - - push eax - " } -} - -macro_rules! restore_gsbase_paranoid { - () => { " - pop edx - - mov ecx, {IA32_GS_BASE} - mov eax, edx - shr edx, 32 - wrmsr - " } -} - -macro_rules! set_gsbase_paranoid { - () => { " - mov ecx, {IA32_GS_BASE} - mov eax, edx - shr edx, 32 - wrmsr - " } -} - -macro_rules! save_and_set_gsbase_paranoid { - // For paranoid interrupt entries, we have to be extremely careful with how we use IA32_GS_BASE - // and IA32_KERNEL_GS_BASE. If FSGSBASE is enabled, then we have no way to differentiate these - // two, as paranoid interrupts (e.g. NMIs) can occur even in kernel mode. In fact, they can - // even occur within another IRQ, so we cannot check the the privilege level via the stack. - // - // What we do instead, is using a special entry in the GDT, since we know that the GDT will - // always be thread local, as it contains the TSS. This gives us more than 32 bits to work - // with, which already is the largest x2APIC ID that an x86 CPU can handle. Luckily we can also - // use the stack, even though there might be interrupts in between. - // - // TODO: Linux uses the Interrupt Stack Table to figure out which NMIs were nested. Perhaps - // this could be done here, because if nested (sp > initial_sp), that means the NMI could not - // have come from userspace. But then, knowing the initial sp would somehow have to involve - // percpu, which brings us back to square one. But it might be useful if we would allow faults - // in NMIs. If we do detect a nested interrupt, then we can perform the iretd procedure - // ourselves, so that the newly nested NMI still blocks additional interrupts while still - // returning to the previously (faulting) NMI. See https://lwn.net/Articles/484932/, although I - // think the solution becomes a bit simpler when we cannot longer rely on GSBASE anymore. - - () => { concat!( - save_gsbase_paranoid!(), - - // Allocate stack space for 8 bytes GDT base and 2 bytes size (ignored). - "sub esp, 16\n", - // Set it to the GDT base. - "sgdt [esp + 6]\n", - // Get the base pointer - " - mov eax, [esp + 8] - add esp, 16 - ", - // Load the lower 32 bits of that GDT entry. - "mov edx, [eax + {gdt_cpu_id_offset}]\n", - // Calculate the percpu offset. - " - mov ebx, {KERNEL_PERCPU_OFFSET} - shl edx, {KERNEL_PERCPU_SHIFT} - add edx, ebx - ", - // Set GSBASE to EAX accordingly - set_gsbase_paranoid!(), - ) } -} -macro_rules! nop { - () => { " - // Unused: {IA32_GS_BASE} {KERNEL_PERCPU_OFFSET} {KERNEL_PERCPU_SHIFT} {gdt_cpu_id_offset} - " } -} #[macro_export] macro_rules! interrupt_stack { // XXX: Apparently we cannot use $expr and check for bool exhaustiveness, so we will have to // use idents directly instead. - ($name:ident, $save1:ident!, $save2:ident!, $rstor2:ident!, $rstor1:ident!, is_paranoid: $is_paranoid:expr, |$stack:ident| $code:block) => { + ($name:ident, is_paranoid: $is_paranoid:expr, |$stack:ident| $code:block) => { #[naked] pub unsafe extern "C" fn $name() { unsafe extern "C" fn inner($stack: &mut $crate::arch::x86::interrupt::InterruptStack) { @@ -335,49 +230,39 @@ macro_rules! interrupt_stack { } core::arch::asm!(concat!( // Backup all userspace registers to stack - $save1!(), "push eax\n", push_scratch!(), push_preserved!(), - $save2!(), - // TODO: Map PTI // $crate::arch::x86::pti::map(); // Call inner function with pointer to stack " - mov edi, esp + push esp call {inner} + pop esp ", // TODO: Unmap PTI // $crate::arch::x86::pti::unmap(); - $rstor2!(), - // Restore all userspace registers pop_preserved!(), pop_scratch!(), - $rstor1!(), "iretd\n", ), inner = sym inner, - IA32_GS_BASE = const(x86::msr::IA32_GS_BASE), - KERNEL_PERCPU_SHIFT = const(crate::KERNEL_PERCPU_SHIFT), - KERNEL_PERCPU_OFFSET = const(crate::KERNEL_PERCPU_OFFSET), - - gdt_cpu_id_offset = const(crate::gdt::GDT_CPU_ID_CONTAINER * core::mem::size_of::()), options(noreturn), ); } }; - ($name:ident, |$stack:ident| $code:block) => { interrupt_stack!($name, swapgs_iff_ring3_fast!, nop!, nop!, swapgs_iff_ring3_fast!, is_paranoid: false, |$stack| $code); }; - ($name:ident, @paranoid, |$stack:ident| $code:block) => { interrupt_stack!($name, nop!, save_and_set_gsbase_paranoid!, restore_gsbase_paranoid!, nop!, is_paranoid: true, |$stack| $code); } + ($name:ident, |$stack:ident| $code:block) => { interrupt_stack!($name, is_paranoid: false, |$stack| $code); }; + ($name:ident, @paranoid, |$stack:ident| $code:block) => { interrupt_stack!($name, is_paranoid: true, |$stack| $code); } } #[macro_export] @@ -391,7 +276,6 @@ macro_rules! interrupt { core::arch::asm!(concat!( // Backup all userspace registers to stack - swapgs_iff_ring3_fast!(), "push eax\n", push_scratch!(), @@ -407,7 +291,6 @@ macro_rules! interrupt { // Restore all userspace registers pop_scratch!(), - swapgs_iff_ring3_fast!(), "iretd\n", ), @@ -445,7 +328,6 @@ macro_rules! interrupt_error { } core::arch::asm!(concat!( - swapgs_iff_ring3_fast_errorcode!(), // Move eax into code's place, put code in last instead (to be // compatible with InterruptStack) "xchg [esp], eax\n", @@ -462,8 +344,9 @@ macro_rules! interrupt_error { // Call inner function with pointer to stack " - mov edi, esp + push esp call {inner} + pop esp ", // TODO: Unmap PTI @@ -477,7 +360,6 @@ macro_rules! interrupt_error { pop_scratch!(), // The error code has already been popped, so use the regular macro. - swapgs_iff_ring3_fast!(), "iretd\n", ), diff --git a/src/arch/x86/start.rs b/src/arch/x86/start.rs index a134001..511fe83 100644 --- a/src/arch/x86/start.rs +++ b/src/arch/x86/start.rs @@ -301,64 +301,6 @@ macro_rules! inner_pit_unmap( } ); -#[cfg(not(feature = "x86_fsgsbase"))] -macro_rules! save_fsgsbase( - () => { - " - mov ecx, {MSR_FSBASE} - rdmsr - shl rdx, 32 - or rdx, rax - mov r14, rdx - - mov ecx, {MSR_GSBASE} - rdmsr - shl rdx, 32 - or rdx, rax - mov r13, rdx - " - } -); -#[cfg(feature = "x86_fsgsbase")] -macro_rules! save_fsgsbase( - () => { - " - // placeholder: {MSR_FSBASE} {MSR_GSBASE} - rdfsbase r14 - rdgsbase r13 - " - } -); - -#[cfg(feature = "x86_fsgsbase")] -macro_rules! restore_fsgsbase( - () => { - " - wrfsbase r14 - wrgsbase r13 - " - } -); - -#[cfg(not(feature = "x86_fsgsbase"))] -macro_rules! restore_fsgsbase( - () => { - " - mov ecx, {MSR_FSBASE} - mov rdx, r14 - mov eax, edx - shr rdx, 32 - wrmsr - - mov ecx, {MSR_GSBASE} - mov rdx, r13 - mov eax, edx - shr rdx, 32 - wrmsr - " - } -); - #[naked] // TODO: AbiCompatBool pub unsafe extern "C" fn usermode(_ip: usize, _sp: usize, _arg: usize, _is_singlestep: usize) -> ! { @@ -381,7 +323,7 @@ pub unsafe extern "C" fn usermode(_ip: usize, _sp: usize, _arg: usize, _is_singl mov ds, eax mov es, eax mov fs, eax - mov gs, eax + // gs keeps kernel selector for simplicity // Set up iret stack push eax // stack selector diff --git a/src/context/arch/x86.rs b/src/context/arch/x86.rs index 6b4cfde..48efa05 100644 --- a/src/context/arch/x86.rs +++ b/src/context/arch/x86.rs @@ -258,8 +258,9 @@ unsafe extern fn signal_handler_wrapper() { push edi push esi - mov edi, esp + push esp call {inner} + pop esp pop esi pop edi