diff --git a/src/arch/x86_64/device/local_apic.rs b/src/arch/x86_64/device/local_apic.rs index 4f7c515..ab2c784 100644 --- a/src/arch/x86_64/device/local_apic.rs +++ b/src/arch/x86_64/device/local_apic.rs @@ -106,10 +106,15 @@ impl LocalApic { unsafe { wrmsr(IA32_X2APIC_ICR, value); } } else { unsafe { - while self.read(0x300) & 1 << 12 == 1 << 12 {} + const PENDING: u32 = 1 << 12; + while self.read(0x300) & PENDING == PENDING { + core::hint::spin_loop(); + } self.write(0x310, (value >> 32) as u32); self.write(0x300, value as u32); - while self.read(0x300) & 1 << 12 == 1 << 12 {} + while self.read(0x300) & PENDING == PENDING { + core::hint::spin_loop(); + } } } } @@ -123,6 +128,11 @@ impl LocalApic { } self.set_icr(icr); } + // Not used just yet, but allows triggering an NMI to another processor. + pub fn ipi_nmi(&mut self, apic_id: u32) { + let shift = if self.x2 { 32 } else { 56 }; + self.set_icr((u64::from(apic_id) << shift) | (1 << 14) | (0b100 << 8)); + } pub unsafe fn eoi(&mut self) { if self.x2 { diff --git a/src/arch/x86_64/gdt.rs b/src/arch/x86_64/gdt.rs index b55db05..6842dd9 100644 --- a/src/arch/x86_64/gdt.rs +++ b/src/arch/x86_64/gdt.rs @@ -21,6 +21,7 @@ pub const GDT_USER_DATA: usize = 5; pub const GDT_USER_CODE: usize = 6; pub const GDT_TSS: usize = 7; pub const GDT_TSS_HIGH: usize = 8; +pub const GDT_CPU_ID_CONTAINER: usize = 9; pub const GDT_A_PRESENT: u8 = 1 << 7; pub const GDT_A_RING_0: u8 = 0 << 5; @@ -145,7 +146,7 @@ pub unsafe fn init() { } /// Initialize GDT with TLS -pub unsafe fn init_paging(tcb_offset: usize, stack_offset: usize) { +pub unsafe fn init_paging(cpu_id: u32, tcb_offset: usize, stack_offset: usize) { // Set temporary TLS segment to the self-pointer of the Thread Control Block. x86::msr::wrmsr(x86::msr::IA32_GS_BASE, tcb_offset as u64); @@ -179,6 +180,10 @@ pub unsafe fn init_paging(tcb_offset: usize, stack_offset: usize) { (&mut GDT[GDT_TSS_HIGH] as *mut GdtEntry).cast::().write(tss_hi); } + // And finally, populate the last GDT entry with the current CPU ID, to allow paranoid + // interrupt handlers to safely use TLS. + (&mut GDT[GDT_CPU_ID_CONTAINER] as *mut GdtEntry).cast::().write(cpu_id); + // Set the stack pointer to use when coming back from userspace. set_tss_stack(stack_offset); diff --git a/src/arch/x86_64/interrupt/handler.rs b/src/arch/x86_64/interrupt/handler.rs index 4cf27e8..609d7b2 100644 --- a/src/arch/x86_64/interrupt/handler.rs +++ b/src/arch/x86_64/interrupt/handler.rs @@ -83,7 +83,8 @@ impl IretRegisters { unsafe { let fsbase = x86::msr::rdmsr(x86::msr::IA32_FS_BASE); let gsbase = x86::msr::rdmsr(x86::msr::IA32_KERNEL_GSBASE); - println!("FSBASE {:>016X}\nGSBASE {:016X}", fsbase, gsbase); + let kgsbase = x86::msr::rdmsr(x86::msr::IA32_GS_BASE); + println!("FSBASE {:>016X}\nGSBASE {:016X}\nKGSBASE {:016X}", fsbase, gsbase, kgsbase); } } } @@ -151,7 +152,6 @@ impl InterruptStack { pub fn load(&mut self, all: &IntRegisters) { // TODO: Which of these should be allowed to change? - // self.fs = all.fs; self.preserved.r15 = all.r15; self.preserved.r14 = all.r14; self.preserved.r13 = all.r13; @@ -319,14 +319,14 @@ macro_rules! restore_gsbase_paranoid { macro_rules! set_gsbase_paranoid { () => { " // Unused: {IA32_GS_BASE} - wrgsbase rax + wrgsbase rdx " } } #[cfg(not(feature = "x86_fsgsbase"))] macro_rules! set_gsbase_paranoid { () => { " mov ecx, {IA32_GS_BASE} - mov rdx, rax + mov eax, edx shr rdx, 32 wrmsr " } @@ -338,16 +338,20 @@ macro_rules! save_and_set_gsbase_paranoid { // two, as paranoid interrupts (e.g. NMIs) can occur even in kernel mode. In fact, they can // even occur within another IRQ, so we cannot check the the privilege level via the stack. // - // TODO: Linux uses the Interrupt Stack Table to figure out which NMIs were nested. Perhaps - // this could be done here, because if nested (sp > initial_sp), that means the NMI could not - // have come from userspace. But then, knowing the initial sp would somehow have to involve - // percpu, which brings us back to square one. But it might be useful if we would allow faults - // in NMIs. - // // What we do instead, is using a special entry in the GDT, since we know that the GDT will // always be thread local, as it contains the TSS. This gives us more than 32 bits to work // with, which already is the largest x2APIC ID that an x86 CPU can handle. Luckily we can also // use the stack, even though there might be interrupts in between. + // + // TODO: Linux uses the Interrupt Stack Table to figure out which NMIs were nested. Perhaps + // this could be done here, because if nested (sp > initial_sp), that means the NMI could not + // have come from userspace. But then, knowing the initial sp would somehow have to involve + // percpu, which brings us back to square one. But it might be useful if we would allow faults + // in NMIs. If we do detect a nested interrupt, then we can perform the iretq procedure + // ourselves, so that the newly nested NMI still blocks additional interrupts while still + // returning to the previously (faulting) NMI. See https://lwn.net/Articles/484932/, although I + // think the solution becomes a bit simpler when we cannot longer rely on GSBASE anymore. + () => { concat!( save_gsbase_paranoid!(), @@ -361,12 +365,12 @@ macro_rules! save_and_set_gsbase_paranoid { add rsp, 16 ", // Load the lower 32 bits of that GDT entry. - "mov eax, [rax]\n", + "mov edx, [rax + {gdt_cpu_id_offset}]\n", // Calculate the percpu offset. " mov rbx, {KERNEL_PERCPU_OFFSET} - shl rax, {KERNEL_PERCPU_SHIFT} - add rax, rbx + shl rdx, {KERNEL_PERCPU_SHIFT} + add rdx, rbx ", // Set GSBASE to RAX accordingly set_gsbase_paranoid!(), @@ -374,7 +378,7 @@ macro_rules! save_and_set_gsbase_paranoid { } macro_rules! nop { () => { " - // Unused: {IA32_GS_BASE} {KERNEL_PERCPU_OFFSET} {KERNEL_PERCPU_SHIFT} + // Unused: {IA32_GS_BASE} {KERNEL_PERCPU_OFFSET} {KERNEL_PERCPU_SHIFT} {gdt_cpu_id_offset} " } } @@ -438,6 +442,8 @@ macro_rules! interrupt_stack { KERNEL_PERCPU_SHIFT = const(crate::KERNEL_PERCPU_SHIFT), KERNEL_PERCPU_OFFSET = const(crate::KERNEL_PERCPU_OFFSET), + gdt_cpu_id_offset = const(crate::gdt::GDT_CPU_ID_CONTAINER * core::mem::size_of::()), + options(noreturn), ); diff --git a/src/arch/x86_64/start.rs b/src/arch/x86_64/start.rs index 9f94e08..b07901c 100644 --- a/src/arch/x86_64/start.rs +++ b/src/arch/x86_64/start.rs @@ -113,7 +113,7 @@ pub unsafe extern fn kstart(args_ptr: *const KernelArgs) -> ! { let (mut active_table, tcb_offset) = paging::init(0); // Set up GDT after paging with TLS - gdt::init_paging(tcb_offset, stack_base + stack_size); + gdt::init_paging(0, tcb_offset, stack_base + stack_size); // Set up IDT idt::init_paging_bsp(); @@ -206,7 +206,7 @@ pub unsafe extern fn kstart_ap(args_ptr: *const KernelArgsAp) -> ! { let tcb_offset = paging::init_ap(cpu_id, bsp_table); // Set up GDT with TLS - gdt::init_paging(tcb_offset, stack_end); + gdt::init_paging(cpu_id as u32, tcb_offset, stack_end); // Set up IDT for AP idt::init_paging_post_heap(false, cpu_id);