diff --git a/src/arch/x86_64/gdt.rs b/src/arch/x86_64/gdt.rs index e999abe..32553b0 100644 --- a/src/arch/x86_64/gdt.rs +++ b/src/arch/x86_64/gdt.rs @@ -1,6 +1,8 @@ //! Global descriptor table +use core::convert::TryInto; use core::mem; + use x86::segmentation::load_cs; use x86::bits64::task::TaskStateSegment; use x86::Ring; @@ -13,7 +15,7 @@ use crate::paging::PAGE_SIZE; pub const GDT_NULL: usize = 0; pub const GDT_KERNEL_CODE: usize = 1; pub const GDT_KERNEL_DATA: usize = 2; -pub const GDT_KERNEL_TLS: usize = 3; +pub const GDT_KERNEL_KPCR: usize = 3; pub const GDT_USER_CODE32_UNUSED: usize = 4; pub const GDT_USER_DATA: usize = 5; pub const GDT_USER_CODE: usize = 6; @@ -39,11 +41,6 @@ pub const GDT_F_PAGE_SIZE: u8 = 1 << 7; pub const GDT_F_PROTECTED_MODE: u8 = 1 << 6; pub const GDT_F_LONG_MODE: u8 = 1 << 5; -static mut INIT_GDTR: DescriptorTablePointer = DescriptorTablePointer { - limit: 0, - base: 0 as *const SegmentDescriptor -}; - static mut INIT_GDT: [GdtEntry; 4] = [ // Null GdtEntry::new(0, 0, 0, 0), @@ -55,12 +52,6 @@ static mut INIT_GDT: [GdtEntry; 4] = [ GdtEntry::new(0, 0, GDT_A_PRESENT | GDT_A_RING_3 | GDT_A_SYSTEM | GDT_A_PRIVILEGE, GDT_F_LONG_MODE) ]; -#[thread_local] -pub static mut GDTR: DescriptorTablePointer = DescriptorTablePointer { - limit: 0, - base: 0 as *const SegmentDescriptor -}; - #[thread_local] pub static mut GDT: [GdtEntry; 10] = [ // Null @@ -85,28 +76,26 @@ pub static mut GDT: [GdtEntry; 10] = [ GdtEntry::new(0, 0, 0, 0), ]; -#[repr(packed)] -pub struct TssWrapper { - base: TaskStateSegment, - _pad: u64, - _user_stack: u64, -} -impl core::ops::Deref for TssWrapper { - type Target = TaskStateSegment; +#[repr(C, align(16))] +pub struct ProcessorControlRegion { + // NOTE: If you plan to change any fields here, please make sure that you also modify the + // offsets in the syscall instruction handler accordingly! - fn deref(&self) -> &Self::Target { - &self.base - } -} -impl core::ops::DerefMut for TssWrapper { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.base - } + pub tcb_end: usize, + pub user_rsp_tmp: usize, + pub tss: TssWrapper, } +// NOTE: Despite not using #[repr(packed)], we do know that while there may be some padding +// inserted before and after the TSS, the main TSS structure will remain intact. +#[repr(C, align(16))] +pub struct TssWrapper(pub TaskStateSegment); + #[thread_local] -pub static mut TSS: TssWrapper = TssWrapper { - base: TaskStateSegment { +pub static mut KPCR: ProcessorControlRegion = ProcessorControlRegion { + tcb_end: 0, + user_rsp_tmp: 0, + tss: TssWrapper(TaskStateSegment { reserved: 0, rsp: [0; 3], reserved2: 0, @@ -114,95 +103,117 @@ pub static mut TSS: TssWrapper = TssWrapper { reserved3: 0, reserved4: 0, iomap_base: 0xFFFF - }, - _pad: 0_u64, - // Accessed only from assembly, at `gs:[0x70]` - _user_stack: 0_u64, + }), }; pub unsafe fn set_tcb(pid: usize) { GDT[GDT_USER_TLS].set_offset((crate::USER_TCB_OFFSET + pid * PAGE_SIZE) as u32); + x86::segmentation::load_fs(SegmentSelector::new(GDT_USER_TLS as u16, Ring::Ring3)); } #[cfg(feature = "pti")] pub unsafe fn set_tss_stack(stack: usize) { use super::pti::{PTI_CPU_STACK, PTI_CONTEXT_STACK}; - TSS.rsp[0] = (PTI_CPU_STACK.as_ptr() as usize + PTI_CPU_STACK.len()) as u64; + KPCR.tss.0.rsp[0] = (PTI_CPU_STACK.as_ptr() as usize + PTI_CPU_STACK.len()) as u64; PTI_CONTEXT_STACK = stack; } #[cfg(not(feature = "pti"))] pub unsafe fn set_tss_stack(stack: usize) { - TSS.rsp[0] = stack as u64; + KPCR.tss.0.rsp[0] = stack as u64; } // Initialize GDT pub unsafe fn init() { - // Setup the initial GDT with TLS, so we can setup the TLS GDT (a little confusing) - // This means that each CPU will have its own GDT, but we only need to define it once as a thread local - INIT_GDTR.limit = (INIT_GDT.len() * mem::size_of::() - 1) as u16; - INIT_GDTR.base = INIT_GDT.as_ptr() as *const SegmentDescriptor; + { + // Setup the initial GDT with TLS, so we can setup the TLS GDT (a little confusing) + // This means that each CPU will have its own GDT, but we only need to define it once as a thread local - // Load the initial GDT, before we have access to thread locals - dtables::lgdt(&INIT_GDTR); + let limit = (INIT_GDT.len() * mem::size_of::() - 1) + .try_into() + .expect("initial GDT way too large"); + let base = INIT_GDT.as_ptr() as *const SegmentDescriptor; + + let init_gdtr: DescriptorTablePointer = DescriptorTablePointer { + limit, + base, + }; + + // Load the initial GDT, before we have access to thread locals + dtables::lgdt(&init_gdtr); + } // Load the segment descriptors load_cs(SegmentSelector::new(GDT_KERNEL_CODE as u16, Ring::Ring0)); segmentation::load_ds(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0)); segmentation::load_es(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0)); segmentation::load_fs(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0)); - segmentation::load_gs(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0)); + segmentation::load_gs(SegmentSelector::new(GDT_KERNEL_KPCR as u16, Ring::Ring0)); segmentation::load_ss(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0)); } /// Initialize GDT with TLS pub unsafe fn init_paging(tcb_offset: usize, stack_offset: usize) { - // Set the TLS segment to the offset of the Thread Control Block - INIT_GDT[GDT_KERNEL_TLS].set_offset(tcb_offset as u32); + // Set temporary TLS segment to the self-pointer of the Thread Control Block. + x86::msr::wrmsr(x86::msr::IA32_GS_BASE, tcb_offset as u64); - // Load the initial GDT, before we have access to thread locals - dtables::lgdt(&INIT_GDTR); + // Now that we have access to thread locals, begin by getting a pointer to the Processor + // Control Region. + let kpcr = &mut KPCR; - // Load the segment descriptors - segmentation::load_fs(SegmentSelector::new(GDT_KERNEL_TLS as u16, Ring::Ring0)); + // Then, setup the AP's individual GDT + let limit = (GDT.len() * mem::size_of::() - 1) + .try_into() + .expect("main GDT way too large"); + let base = GDT.as_ptr() as *const SegmentDescriptor; - // Now that we have access to thread locals, setup the AP's individual GDT - GDTR.limit = (GDT.len() * mem::size_of::() - 1) as u16; - GDTR.base = GDT.as_ptr() as *const SegmentDescriptor; + let gdtr: DescriptorTablePointer = DescriptorTablePointer { + limit, + base, + }; - // Set the TLS segment to the offset of the Thread Control Block - GDT[GDT_KERNEL_TLS].set_offset(tcb_offset as u32); + // Once we have fetched the real KPCR address, set the TLS segment to the TCB pointer there. + kpcr.tcb_end = (tcb_offset as *const usize).read(); + + { + // We can now access our TSS, via the KPCR, which is a thread local + let tss = &kpcr.tss.0 as *const _ as usize as u64; + let tss_lo = (tss & 0xFFFF_FFFF) as u32; + let tss_hi = (tss >> 32) as u32; + + GDT[GDT_TSS].set_offset(tss_lo); + GDT[GDT_TSS].set_limit(mem::size_of::() as u32); + + (&mut GDT[GDT_TSS_HIGH] as *mut GdtEntry).cast::().write(tss_hi); + } + + // Set the stack pointer to use when coming back from userspace. + set_tss_stack(stack_offset); + + // Load the new GDT, which is correctly located in thread local storage. + dtables::lgdt(&gdtr); + + // Ensure that GS always points to the KPCR in kernel space. + x86::msr::wrmsr(x86::msr::IA32_GS_BASE, kpcr as *mut _ as usize as u64); + // Inside kernel space, GS should _always_ point to the TSS. When leaving userspace, `swapgs` + // is called again, making the userspace GS always point to user data. + x86::msr::wrmsr(x86::msr::IA32_KERNEL_GSBASE, 0); // Set the User TLS segment to the offset of the user TCB set_tcb(0); - // We can now access our TSS, which is a thread local - GDT[GDT_TSS].set_offset(&TSS as *const _ as u32); - GDT[GDT_TSS].set_limit(mem::size_of::() as u32); - - // Set the stack pointer when coming back from userspace - set_tss_stack(stack_offset); - - // Load the new GDT, which is correctly located in thread local storage - dtables::lgdt(&GDTR); - // Reload the segment descriptors load_cs(SegmentSelector::new(GDT_KERNEL_CODE as u16, Ring::Ring0)); segmentation::load_ds(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0)); segmentation::load_es(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0)); - segmentation::load_fs(SegmentSelector::new(GDT_KERNEL_TLS as u16, Ring::Ring0)); - - segmentation::load_gs(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0)); segmentation::load_ss(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0)); + // NOTE: FS has already been updated while calling set_tcb. + // NOTE: We do not want to load GS again, since it has already been loaded into + // GDT_KERNEL_KPCR. Instead, we use the base MSR to allow for a 64-bit offset. + // Load the task register task::load_tr(SegmentSelector::new(GDT_TSS as u16, Ring::Ring0)); - - // Ensure that GS always points to the TSS segment in kernel space. - x86::msr::wrmsr(x86::msr::IA32_GS_BASE, &TSS as *const _ as usize as u64); - // Inside kernel space, GS should _always_ point to the TSS. When leaving userspace, `swapgs` - // is called again, making the userspace GS always point to user data. - x86::msr::wrmsr(x86::msr::IA32_KERNEL_GSBASE, 0); } #[derive(Copy, Clone, Debug)] diff --git a/src/arch/x86_64/idt.rs b/src/arch/x86_64/idt.rs index 06c7171..58ff7ff 100644 --- a/src/arch/x86_64/idt.rs +++ b/src/arch/x86_64/idt.rs @@ -202,7 +202,7 @@ pub unsafe fn init_generic(is_bsp: bool, idt: &mut Idt) { let address = base_address.data() + BACKUP_STACK_SIZE; // Put them in the 1st entry of the IST. - crate::gdt::TSS.ist[usize::from(index - 1)] = address as u64; + crate::gdt::KPCR.tss.0.ist[usize::from(index - 1)] = address as u64; index }; diff --git a/src/arch/x86_64/interrupt/handler.rs b/src/arch/x86_64/interrupt/handler.rs index 626e1e3..d5a93be 100644 --- a/src/arch/x86_64/interrupt/handler.rs +++ b/src/arch/x86_64/interrupt/handler.rs @@ -86,7 +86,6 @@ impl IretRegisters { #[derive(Default)] #[repr(packed)] pub struct InterruptStack { - pub fs: usize, pub preserved: PreservedRegisters, pub scratch: ScratchRegisters, pub iret: IretRegisters, @@ -97,13 +96,10 @@ impl InterruptStack { self.iret.dump(); self.scratch.dump(); self.preserved.dump(); - println!("FS: {:>016X}", { self.fs }); } /// Saves all registers to a struct used by the proc: /// scheme to read/write registers. pub fn save(&self, all: &mut IntRegisters) { - all.fs = self.fs; - all.r15 = self.preserved.r15; all.r14 = self.preserved.r14; all.r13 = self.preserved.r13; @@ -284,31 +280,6 @@ macro_rules! pop_preserved { pop rbx " }; } - -#[macro_export] -macro_rules! push_fs { - () => { " - // Push fs - push fs - - // Load kernel tls - // - // NOTE: We can't load the value directly into `fs`. So we need to use a - // scratch register (as preserved registers aren't backed up by the - // interrupt! macro) to store it. We also can't use `rax` as the temporary - // value, as during errors that's already used for the error code. - mov rcx, 0x18 - mov fs, cx - " }; -} -#[macro_export] -macro_rules! pop_fs { - () => { " - // Pop fs - pop fs - " }; -} - macro_rules! swapgs_iff_ring3_fast { () => { " // Check whether the last two bits RSP+8 (code segment) are equal to zero. @@ -371,7 +342,6 @@ macro_rules! interrupt_stack { "push rax\n", push_scratch!(), push_preserved!(), - push_fs!(), // TODO: Map PTI // $crate::arch::x86_64::pti::map(); @@ -384,7 +354,6 @@ macro_rules! interrupt_stack { // $crate::arch::x86_64::pti::unmap(); // Restore all userspace registers - pop_fs!(), pop_preserved!(), pop_scratch!(), @@ -410,7 +379,6 @@ macro_rules! interrupt { swapgs_iff_ring3_fast!(), "push rax\n", push_scratch!(), - push_fs!(), // TODO: Map PTI // $crate::arch::x86_64::pti::map(); @@ -422,7 +390,6 @@ macro_rules! interrupt { // $crate::arch::x86_64::pti::unmap(); // Restore all userspace registers - pop_fs!(), pop_scratch!(), swapgs_iff_ring3_fast!(), @@ -457,7 +424,6 @@ macro_rules! interrupt_error { // Push all userspace registers push_scratch!(), push_preserved!(), - push_fs!(), // Put code in, it's now in rax "push rax\n", @@ -476,7 +442,6 @@ macro_rules! interrupt_error { "add rsp, 8\n", // Restore all userspace registers - pop_fs!(), pop_preserved!(), pop_scratch!(), diff --git a/src/arch/x86_64/interrupt/syscall.rs b/src/arch/x86_64/interrupt/syscall.rs index 90992c8..3fbc49a 100644 --- a/src/arch/x86_64/interrupt/syscall.rs +++ b/src/arch/x86_64/interrupt/syscall.rs @@ -62,10 +62,10 @@ function!(syscall_instruction => { // Yes, this is magic. No, you don't need to understand " swapgs // Set gs segment to TSS - mov gs:[0x70], rsp // Save userspace stack pointer - mov rsp, gs:[4] // Load kernel stack pointer - push QWORD PTR 5 * 8 + 3 // Push fake SS (resembling iret stack frame) - push QWORD PTR gs:[0x70] // Push userspace rsp + mov gs:[0x08], rsp // Save userspace stack pointer + mov rsp, gs:[0x14] // Load kernel stack pointer + push QWORD PTR 5 * 8 + 3 // Push fake userspace SS (resembling iret frame) + push QWORD PTR gs:[0x08] // Push userspace rsp push r11 // Push rflags push QWORD PTR 6 * 8 + 3 // Push fake CS (resembling iret stack frame) push rcx // Push userspace return pointer @@ -75,7 +75,6 @@ function!(syscall_instruction => { "push rax\n", push_scratch!(), push_preserved!(), - push_fs!(), // TODO: Map PTI // $crate::arch::x86_64::pti::map(); @@ -88,7 +87,6 @@ function!(syscall_instruction => { // $crate::arch::x86_64::pti::unmap(); // Pop context registers - pop_fs!(), pop_preserved!(), pop_scratch!(), @@ -115,8 +113,8 @@ function!(syscall_instruction => { pop rcx // Pop userspace return pointer add rsp, 8 // Pop fake userspace CS pop r11 // Pop rflags - pop QWORD PTR gs:[0x70] // Pop userspace stack pointer - mov rsp, gs:[0x70] // Restore userspace stack pointer + pop QWORD PTR gs:[0x08] // Pop userspace stack pointer + mov rsp, gs:[0x08] // Restore userspace stack pointer swapgs // Restore gs from TSS to user data sysretq // Return into userspace; RCX=>RIP,R11=>RFLAGS