Fixes for x86 32-bit

This commit is contained in:
Jeremy Soller
2022-08-17 10:48:23 -06:00
parent 1eda828877
commit 4bd137f36e
4 changed files with 16 additions and 216 deletions

View File

@@ -104,7 +104,8 @@ pub unsafe fn set_tss_stack(stack: usize) {
#[cfg(not(feature = "pti"))]
pub unsafe fn set_tss_stack(stack: usize) {
KPCR.tss.0.ss0 = (GDT_KERNEL_DATA << 3) as u16;
KPCR.tss.0.esp0 = stack as u32;
//TODO: should PHYS_OFFSET be added on x86_64 as well?
KPCR.tss.0.esp0 = (stack + crate::PHYS_OFFSET) as u32;
}
// Initialize GDT
@@ -138,9 +139,6 @@ pub unsafe fn init() {
/// Initialize GDT with TLS
pub unsafe fn init_paging(cpu_id: u32, tcb_offset: usize, stack_offset: usize) {
// Set temporary TLS segment to the self-pointer of the Thread Control Block.
x86::msr::wrmsr(x86::msr::IA32_GS_BASE, tcb_offset as u64);
//TODO: will this work with multicore?
{
INIT_GDT[GDT_KERNEL_KPCR].set_offset(tcb_offset as u32);
@@ -165,6 +163,8 @@ pub unsafe fn init_paging(cpu_id: u32, tcb_offset: usize, stack_offset: usize) {
// Once we have fetched the real KPCR address, set the TLS segment to the TCB pointer there.
kpcr.tcb_end = (tcb_offset as *const usize).read();
GDT[GDT_KERNEL_KPCR].set_offset(tcb_offset as u32);
{
// We can now access our TSS, via the KPCR, which is a thread local
let tss = &kpcr.tss.0 as *const _ as usize as u32;
@@ -183,41 +183,16 @@ pub unsafe fn init_paging(cpu_id: u32, tcb_offset: usize, stack_offset: usize) {
// Load the new GDT, which is correctly located in thread local storage.
dtables::lgdt(&gdtr);
// Ensure that GS always points to the KPCR in kernel space.
x86::msr::wrmsr(x86::msr::IA32_GS_BASE, kpcr as *mut _ as usize as u64);
// Inside kernel space, GS should _always_ point to the TSS. When leaving userspace, `swapgs`
// is called again, making the userspace GS always point to user data.
x86::msr::wrmsr(x86::msr::IA32_KERNEL_GSBASE, 0);
// Set the User TLS segment to zero, before we create any contexts and start scheduling.
x86::msr::wrmsr(x86::msr::IA32_FS_BASE, 0);
// Reload the segment descriptors
load_cs(SegmentSelector::new(GDT_KERNEL_CODE as u16, Ring::Ring0));
segmentation::load_ds(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0));
segmentation::load_es(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0));
segmentation::load_fs(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0));
segmentation::load_gs(SegmentSelector::new(GDT_KERNEL_KPCR as u16, Ring::Ring0));
segmentation::load_ss(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0));
// NOTE: FS has already been updated while calling set_tcb.
// NOTE: We do not want to load GS again, since it has already been loaded into
// GDT_KERNEL_KPCR. Instead, we use the base MSR to allow for a 64-bit offset.
// Load the task register
task::load_tr(SegmentSelector::new(GDT_TSS as u16, Ring::Ring0));
let has_fsgsbase = cpuid().map_or(false, |cpuid| {
cpuid.get_extended_feature_info().map_or(false, |extended_features| {
extended_features.has_fsgsbase()
})
});
if cfg!(feature = "x86_fsgsbase") {
assert!(has_fsgsbase, "running kernel with features not supported by the current CPU");
}
if has_fsgsbase {
x86::controlregs::cr4_write(x86::controlregs::cr4() | x86::controlregs::Cr4::CR4_ENABLE_FSGSBASE);
}
}
#[derive(Copy, Clone, Debug)]

View File

@@ -64,12 +64,6 @@ impl IretRegisters {
println!("ESP: {:016x}", { self.esp });
println!("SS: {:016x}", { self.ss });
}
unsafe {
let fsbase = x86::msr::rdmsr(x86::msr::IA32_FS_BASE);
let gsbase = x86::msr::rdmsr(x86::msr::IA32_KERNEL_GSBASE);
let kgsbase = x86::msr::rdmsr(x86::msr::IA32_GS_BASE);
println!("FSBASE {:016x}\nGSBASE {:016x}\nKGSBASE {:016x}", fsbase, gsbase, kgsbase);
}
}
}
@@ -209,111 +203,12 @@ macro_rules! pop_preserved {
pop ebx
" };
}
macro_rules! swapgs_iff_ring3_fast {
() => { "
// Check whether the last two bits ESP+8 (code segment) are equal to zero.
test DWORD PTR [esp + 8], 0x3
// Skip the SWAPGS instruction if CS & 0b11 == 0b00.
jz 1f
//TODO swapgs
1:
" };
}
macro_rules! swapgs_iff_ring3_fast_errorcode {
() => { "
test DWORD PTR [esp + 16], 0x3
jz 1f
//TODO swapgs
1:
" };
}
macro_rules! save_gsbase_paranoid {
() => { "
mov ecx, {IA32_GS_BASE}
rdmsr
shl edx, 32
or eax, edx
push eax
" }
}
macro_rules! restore_gsbase_paranoid {
() => { "
pop edx
mov ecx, {IA32_GS_BASE}
mov eax, edx
shr edx, 32
wrmsr
" }
}
macro_rules! set_gsbase_paranoid {
() => { "
mov ecx, {IA32_GS_BASE}
mov eax, edx
shr edx, 32
wrmsr
" }
}
macro_rules! save_and_set_gsbase_paranoid {
// For paranoid interrupt entries, we have to be extremely careful with how we use IA32_GS_BASE
// and IA32_KERNEL_GS_BASE. If FSGSBASE is enabled, then we have no way to differentiate these
// two, as paranoid interrupts (e.g. NMIs) can occur even in kernel mode. In fact, they can
// even occur within another IRQ, so we cannot check the the privilege level via the stack.
//
// What we do instead, is using a special entry in the GDT, since we know that the GDT will
// always be thread local, as it contains the TSS. This gives us more than 32 bits to work
// with, which already is the largest x2APIC ID that an x86 CPU can handle. Luckily we can also
// use the stack, even though there might be interrupts in between.
//
// TODO: Linux uses the Interrupt Stack Table to figure out which NMIs were nested. Perhaps
// this could be done here, because if nested (sp > initial_sp), that means the NMI could not
// have come from userspace. But then, knowing the initial sp would somehow have to involve
// percpu, which brings us back to square one. But it might be useful if we would allow faults
// in NMIs. If we do detect a nested interrupt, then we can perform the iretd procedure
// ourselves, so that the newly nested NMI still blocks additional interrupts while still
// returning to the previously (faulting) NMI. See https://lwn.net/Articles/484932/, although I
// think the solution becomes a bit simpler when we cannot longer rely on GSBASE anymore.
() => { concat!(
save_gsbase_paranoid!(),
// Allocate stack space for 8 bytes GDT base and 2 bytes size (ignored).
"sub esp, 16\n",
// Set it to the GDT base.
"sgdt [esp + 6]\n",
// Get the base pointer
"
mov eax, [esp + 8]
add esp, 16
",
// Load the lower 32 bits of that GDT entry.
"mov edx, [eax + {gdt_cpu_id_offset}]\n",
// Calculate the percpu offset.
"
mov ebx, {KERNEL_PERCPU_OFFSET}
shl edx, {KERNEL_PERCPU_SHIFT}
add edx, ebx
",
// Set GSBASE to EAX accordingly
set_gsbase_paranoid!(),
) }
}
macro_rules! nop {
() => { "
// Unused: {IA32_GS_BASE} {KERNEL_PERCPU_OFFSET} {KERNEL_PERCPU_SHIFT} {gdt_cpu_id_offset}
" }
}
#[macro_export]
macro_rules! interrupt_stack {
// XXX: Apparently we cannot use $expr and check for bool exhaustiveness, so we will have to
// use idents directly instead.
($name:ident, $save1:ident!, $save2:ident!, $rstor2:ident!, $rstor1:ident!, is_paranoid: $is_paranoid:expr, |$stack:ident| $code:block) => {
($name:ident, is_paranoid: $is_paranoid:expr, |$stack:ident| $code:block) => {
#[naked]
pub unsafe extern "C" fn $name() {
unsafe extern "C" fn inner($stack: &mut $crate::arch::x86::interrupt::InterruptStack) {
@@ -335,49 +230,39 @@ macro_rules! interrupt_stack {
}
core::arch::asm!(concat!(
// Backup all userspace registers to stack
$save1!(),
"push eax\n",
push_scratch!(),
push_preserved!(),
$save2!(),
// TODO: Map PTI
// $crate::arch::x86::pti::map();
// Call inner function with pointer to stack
"
mov edi, esp
push esp
call {inner}
pop esp
",
// TODO: Unmap PTI
// $crate::arch::x86::pti::unmap();
$rstor2!(),
// Restore all userspace registers
pop_preserved!(),
pop_scratch!(),
$rstor1!(),
"iretd\n",
),
inner = sym inner,
IA32_GS_BASE = const(x86::msr::IA32_GS_BASE),
KERNEL_PERCPU_SHIFT = const(crate::KERNEL_PERCPU_SHIFT),
KERNEL_PERCPU_OFFSET = const(crate::KERNEL_PERCPU_OFFSET),
gdt_cpu_id_offset = const(crate::gdt::GDT_CPU_ID_CONTAINER * core::mem::size_of::<crate::gdt::GdtEntry>()),
options(noreturn),
);
}
};
($name:ident, |$stack:ident| $code:block) => { interrupt_stack!($name, swapgs_iff_ring3_fast!, nop!, nop!, swapgs_iff_ring3_fast!, is_paranoid: false, |$stack| $code); };
($name:ident, @paranoid, |$stack:ident| $code:block) => { interrupt_stack!($name, nop!, save_and_set_gsbase_paranoid!, restore_gsbase_paranoid!, nop!, is_paranoid: true, |$stack| $code); }
($name:ident, |$stack:ident| $code:block) => { interrupt_stack!($name, is_paranoid: false, |$stack| $code); };
($name:ident, @paranoid, |$stack:ident| $code:block) => { interrupt_stack!($name, is_paranoid: true, |$stack| $code); }
}
#[macro_export]
@@ -391,7 +276,6 @@ macro_rules! interrupt {
core::arch::asm!(concat!(
// Backup all userspace registers to stack
swapgs_iff_ring3_fast!(),
"push eax\n",
push_scratch!(),
@@ -407,7 +291,6 @@ macro_rules! interrupt {
// Restore all userspace registers
pop_scratch!(),
swapgs_iff_ring3_fast!(),
"iretd\n",
),
@@ -445,7 +328,6 @@ macro_rules! interrupt_error {
}
core::arch::asm!(concat!(
swapgs_iff_ring3_fast_errorcode!(),
// Move eax into code's place, put code in last instead (to be
// compatible with InterruptStack)
"xchg [esp], eax\n",
@@ -462,8 +344,9 @@ macro_rules! interrupt_error {
// Call inner function with pointer to stack
"
mov edi, esp
push esp
call {inner}
pop esp
",
// TODO: Unmap PTI
@@ -477,7 +360,6 @@ macro_rules! interrupt_error {
pop_scratch!(),
// The error code has already been popped, so use the regular macro.
swapgs_iff_ring3_fast!(),
"iretd\n",
),

View File

@@ -301,64 +301,6 @@ macro_rules! inner_pit_unmap(
}
);
#[cfg(not(feature = "x86_fsgsbase"))]
macro_rules! save_fsgsbase(
() => {
"
mov ecx, {MSR_FSBASE}
rdmsr
shl rdx, 32
or rdx, rax
mov r14, rdx
mov ecx, {MSR_GSBASE}
rdmsr
shl rdx, 32
or rdx, rax
mov r13, rdx
"
}
);
#[cfg(feature = "x86_fsgsbase")]
macro_rules! save_fsgsbase(
() => {
"
// placeholder: {MSR_FSBASE} {MSR_GSBASE}
rdfsbase r14
rdgsbase r13
"
}
);
#[cfg(feature = "x86_fsgsbase")]
macro_rules! restore_fsgsbase(
() => {
"
wrfsbase r14
wrgsbase r13
"
}
);
#[cfg(not(feature = "x86_fsgsbase"))]
macro_rules! restore_fsgsbase(
() => {
"
mov ecx, {MSR_FSBASE}
mov rdx, r14
mov eax, edx
shr rdx, 32
wrmsr
mov ecx, {MSR_GSBASE}
mov rdx, r13
mov eax, edx
shr rdx, 32
wrmsr
"
}
);
#[naked]
// TODO: AbiCompatBool
pub unsafe extern "C" fn usermode(_ip: usize, _sp: usize, _arg: usize, _is_singlestep: usize) -> ! {
@@ -381,7 +323,7 @@ pub unsafe extern "C" fn usermode(_ip: usize, _sp: usize, _arg: usize, _is_singl
mov ds, eax
mov es, eax
mov fs, eax
mov gs, eax
// gs keeps kernel selector for simplicity
// Set up iret stack
push eax // stack selector

View File

@@ -258,8 +258,9 @@ unsafe extern fn signal_handler_wrapper() {
push edi
push esi
mov edi, esp
push esp
call {inner}
pop esp
pop esi
pop edi