Fixes for x86 32-bit
This commit is contained in:
@@ -104,7 +104,8 @@ pub unsafe fn set_tss_stack(stack: usize) {
|
||||
#[cfg(not(feature = "pti"))]
|
||||
pub unsafe fn set_tss_stack(stack: usize) {
|
||||
KPCR.tss.0.ss0 = (GDT_KERNEL_DATA << 3) as u16;
|
||||
KPCR.tss.0.esp0 = stack as u32;
|
||||
//TODO: should PHYS_OFFSET be added on x86_64 as well?
|
||||
KPCR.tss.0.esp0 = (stack + crate::PHYS_OFFSET) as u32;
|
||||
}
|
||||
|
||||
// Initialize GDT
|
||||
@@ -138,9 +139,6 @@ pub unsafe fn init() {
|
||||
|
||||
/// Initialize GDT with TLS
|
||||
pub unsafe fn init_paging(cpu_id: u32, tcb_offset: usize, stack_offset: usize) {
|
||||
// Set temporary TLS segment to the self-pointer of the Thread Control Block.
|
||||
x86::msr::wrmsr(x86::msr::IA32_GS_BASE, tcb_offset as u64);
|
||||
|
||||
//TODO: will this work with multicore?
|
||||
{
|
||||
INIT_GDT[GDT_KERNEL_KPCR].set_offset(tcb_offset as u32);
|
||||
@@ -165,6 +163,8 @@ pub unsafe fn init_paging(cpu_id: u32, tcb_offset: usize, stack_offset: usize) {
|
||||
// Once we have fetched the real KPCR address, set the TLS segment to the TCB pointer there.
|
||||
kpcr.tcb_end = (tcb_offset as *const usize).read();
|
||||
|
||||
GDT[GDT_KERNEL_KPCR].set_offset(tcb_offset as u32);
|
||||
|
||||
{
|
||||
// We can now access our TSS, via the KPCR, which is a thread local
|
||||
let tss = &kpcr.tss.0 as *const _ as usize as u32;
|
||||
@@ -183,41 +183,16 @@ pub unsafe fn init_paging(cpu_id: u32, tcb_offset: usize, stack_offset: usize) {
|
||||
// Load the new GDT, which is correctly located in thread local storage.
|
||||
dtables::lgdt(&gdtr);
|
||||
|
||||
// Ensure that GS always points to the KPCR in kernel space.
|
||||
x86::msr::wrmsr(x86::msr::IA32_GS_BASE, kpcr as *mut _ as usize as u64);
|
||||
// Inside kernel space, GS should _always_ point to the TSS. When leaving userspace, `swapgs`
|
||||
// is called again, making the userspace GS always point to user data.
|
||||
x86::msr::wrmsr(x86::msr::IA32_KERNEL_GSBASE, 0);
|
||||
|
||||
// Set the User TLS segment to zero, before we create any contexts and start scheduling.
|
||||
x86::msr::wrmsr(x86::msr::IA32_FS_BASE, 0);
|
||||
|
||||
// Reload the segment descriptors
|
||||
load_cs(SegmentSelector::new(GDT_KERNEL_CODE as u16, Ring::Ring0));
|
||||
segmentation::load_ds(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0));
|
||||
segmentation::load_es(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0));
|
||||
segmentation::load_fs(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0));
|
||||
segmentation::load_gs(SegmentSelector::new(GDT_KERNEL_KPCR as u16, Ring::Ring0));
|
||||
segmentation::load_ss(SegmentSelector::new(GDT_KERNEL_DATA as u16, Ring::Ring0));
|
||||
|
||||
// NOTE: FS has already been updated while calling set_tcb.
|
||||
// NOTE: We do not want to load GS again, since it has already been loaded into
|
||||
// GDT_KERNEL_KPCR. Instead, we use the base MSR to allow for a 64-bit offset.
|
||||
|
||||
// Load the task register
|
||||
task::load_tr(SegmentSelector::new(GDT_TSS as u16, Ring::Ring0));
|
||||
|
||||
let has_fsgsbase = cpuid().map_or(false, |cpuid| {
|
||||
cpuid.get_extended_feature_info().map_or(false, |extended_features| {
|
||||
extended_features.has_fsgsbase()
|
||||
})
|
||||
});
|
||||
|
||||
if cfg!(feature = "x86_fsgsbase") {
|
||||
assert!(has_fsgsbase, "running kernel with features not supported by the current CPU");
|
||||
}
|
||||
|
||||
if has_fsgsbase {
|
||||
x86::controlregs::cr4_write(x86::controlregs::cr4() | x86::controlregs::Cr4::CR4_ENABLE_FSGSBASE);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
|
||||
@@ -64,12 +64,6 @@ impl IretRegisters {
|
||||
println!("ESP: {:016x}", { self.esp });
|
||||
println!("SS: {:016x}", { self.ss });
|
||||
}
|
||||
unsafe {
|
||||
let fsbase = x86::msr::rdmsr(x86::msr::IA32_FS_BASE);
|
||||
let gsbase = x86::msr::rdmsr(x86::msr::IA32_KERNEL_GSBASE);
|
||||
let kgsbase = x86::msr::rdmsr(x86::msr::IA32_GS_BASE);
|
||||
println!("FSBASE {:016x}\nGSBASE {:016x}\nKGSBASE {:016x}", fsbase, gsbase, kgsbase);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -209,111 +203,12 @@ macro_rules! pop_preserved {
|
||||
pop ebx
|
||||
" };
|
||||
}
|
||||
macro_rules! swapgs_iff_ring3_fast {
|
||||
() => { "
|
||||
// Check whether the last two bits ESP+8 (code segment) are equal to zero.
|
||||
test DWORD PTR [esp + 8], 0x3
|
||||
// Skip the SWAPGS instruction if CS & 0b11 == 0b00.
|
||||
jz 1f
|
||||
//TODO swapgs
|
||||
1:
|
||||
" };
|
||||
}
|
||||
macro_rules! swapgs_iff_ring3_fast_errorcode {
|
||||
() => { "
|
||||
test DWORD PTR [esp + 16], 0x3
|
||||
jz 1f
|
||||
//TODO swapgs
|
||||
1:
|
||||
" };
|
||||
}
|
||||
|
||||
macro_rules! save_gsbase_paranoid {
|
||||
() => { "
|
||||
mov ecx, {IA32_GS_BASE}
|
||||
rdmsr
|
||||
shl edx, 32
|
||||
or eax, edx
|
||||
|
||||
push eax
|
||||
" }
|
||||
}
|
||||
|
||||
macro_rules! restore_gsbase_paranoid {
|
||||
() => { "
|
||||
pop edx
|
||||
|
||||
mov ecx, {IA32_GS_BASE}
|
||||
mov eax, edx
|
||||
shr edx, 32
|
||||
wrmsr
|
||||
" }
|
||||
}
|
||||
|
||||
macro_rules! set_gsbase_paranoid {
|
||||
() => { "
|
||||
mov ecx, {IA32_GS_BASE}
|
||||
mov eax, edx
|
||||
shr edx, 32
|
||||
wrmsr
|
||||
" }
|
||||
}
|
||||
|
||||
macro_rules! save_and_set_gsbase_paranoid {
|
||||
// For paranoid interrupt entries, we have to be extremely careful with how we use IA32_GS_BASE
|
||||
// and IA32_KERNEL_GS_BASE. If FSGSBASE is enabled, then we have no way to differentiate these
|
||||
// two, as paranoid interrupts (e.g. NMIs) can occur even in kernel mode. In fact, they can
|
||||
// even occur within another IRQ, so we cannot check the the privilege level via the stack.
|
||||
//
|
||||
// What we do instead, is using a special entry in the GDT, since we know that the GDT will
|
||||
// always be thread local, as it contains the TSS. This gives us more than 32 bits to work
|
||||
// with, which already is the largest x2APIC ID that an x86 CPU can handle. Luckily we can also
|
||||
// use the stack, even though there might be interrupts in between.
|
||||
//
|
||||
// TODO: Linux uses the Interrupt Stack Table to figure out which NMIs were nested. Perhaps
|
||||
// this could be done here, because if nested (sp > initial_sp), that means the NMI could not
|
||||
// have come from userspace. But then, knowing the initial sp would somehow have to involve
|
||||
// percpu, which brings us back to square one. But it might be useful if we would allow faults
|
||||
// in NMIs. If we do detect a nested interrupt, then we can perform the iretd procedure
|
||||
// ourselves, so that the newly nested NMI still blocks additional interrupts while still
|
||||
// returning to the previously (faulting) NMI. See https://lwn.net/Articles/484932/, although I
|
||||
// think the solution becomes a bit simpler when we cannot longer rely on GSBASE anymore.
|
||||
|
||||
() => { concat!(
|
||||
save_gsbase_paranoid!(),
|
||||
|
||||
// Allocate stack space for 8 bytes GDT base and 2 bytes size (ignored).
|
||||
"sub esp, 16\n",
|
||||
// Set it to the GDT base.
|
||||
"sgdt [esp + 6]\n",
|
||||
// Get the base pointer
|
||||
"
|
||||
mov eax, [esp + 8]
|
||||
add esp, 16
|
||||
",
|
||||
// Load the lower 32 bits of that GDT entry.
|
||||
"mov edx, [eax + {gdt_cpu_id_offset}]\n",
|
||||
// Calculate the percpu offset.
|
||||
"
|
||||
mov ebx, {KERNEL_PERCPU_OFFSET}
|
||||
shl edx, {KERNEL_PERCPU_SHIFT}
|
||||
add edx, ebx
|
||||
",
|
||||
// Set GSBASE to EAX accordingly
|
||||
set_gsbase_paranoid!(),
|
||||
) }
|
||||
}
|
||||
macro_rules! nop {
|
||||
() => { "
|
||||
// Unused: {IA32_GS_BASE} {KERNEL_PERCPU_OFFSET} {KERNEL_PERCPU_SHIFT} {gdt_cpu_id_offset}
|
||||
" }
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! interrupt_stack {
|
||||
// XXX: Apparently we cannot use $expr and check for bool exhaustiveness, so we will have to
|
||||
// use idents directly instead.
|
||||
($name:ident, $save1:ident!, $save2:ident!, $rstor2:ident!, $rstor1:ident!, is_paranoid: $is_paranoid:expr, |$stack:ident| $code:block) => {
|
||||
($name:ident, is_paranoid: $is_paranoid:expr, |$stack:ident| $code:block) => {
|
||||
#[naked]
|
||||
pub unsafe extern "C" fn $name() {
|
||||
unsafe extern "C" fn inner($stack: &mut $crate::arch::x86::interrupt::InterruptStack) {
|
||||
@@ -335,49 +230,39 @@ macro_rules! interrupt_stack {
|
||||
}
|
||||
core::arch::asm!(concat!(
|
||||
// Backup all userspace registers to stack
|
||||
$save1!(),
|
||||
"push eax\n",
|
||||
push_scratch!(),
|
||||
push_preserved!(),
|
||||
|
||||
$save2!(),
|
||||
|
||||
// TODO: Map PTI
|
||||
// $crate::arch::x86::pti::map();
|
||||
|
||||
// Call inner function with pointer to stack
|
||||
"
|
||||
mov edi, esp
|
||||
push esp
|
||||
call {inner}
|
||||
pop esp
|
||||
",
|
||||
|
||||
// TODO: Unmap PTI
|
||||
// $crate::arch::x86::pti::unmap();
|
||||
|
||||
$rstor2!(),
|
||||
|
||||
// Restore all userspace registers
|
||||
pop_preserved!(),
|
||||
pop_scratch!(),
|
||||
|
||||
$rstor1!(),
|
||||
"iretd\n",
|
||||
),
|
||||
|
||||
inner = sym inner,
|
||||
IA32_GS_BASE = const(x86::msr::IA32_GS_BASE),
|
||||
KERNEL_PERCPU_SHIFT = const(crate::KERNEL_PERCPU_SHIFT),
|
||||
KERNEL_PERCPU_OFFSET = const(crate::KERNEL_PERCPU_OFFSET),
|
||||
|
||||
gdt_cpu_id_offset = const(crate::gdt::GDT_CPU_ID_CONTAINER * core::mem::size_of::<crate::gdt::GdtEntry>()),
|
||||
|
||||
options(noreturn),
|
||||
|
||||
);
|
||||
}
|
||||
};
|
||||
($name:ident, |$stack:ident| $code:block) => { interrupt_stack!($name, swapgs_iff_ring3_fast!, nop!, nop!, swapgs_iff_ring3_fast!, is_paranoid: false, |$stack| $code); };
|
||||
($name:ident, @paranoid, |$stack:ident| $code:block) => { interrupt_stack!($name, nop!, save_and_set_gsbase_paranoid!, restore_gsbase_paranoid!, nop!, is_paranoid: true, |$stack| $code); }
|
||||
($name:ident, |$stack:ident| $code:block) => { interrupt_stack!($name, is_paranoid: false, |$stack| $code); };
|
||||
($name:ident, @paranoid, |$stack:ident| $code:block) => { interrupt_stack!($name, is_paranoid: true, |$stack| $code); }
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
@@ -391,7 +276,6 @@ macro_rules! interrupt {
|
||||
|
||||
core::arch::asm!(concat!(
|
||||
// Backup all userspace registers to stack
|
||||
swapgs_iff_ring3_fast!(),
|
||||
"push eax\n",
|
||||
push_scratch!(),
|
||||
|
||||
@@ -407,7 +291,6 @@ macro_rules! interrupt {
|
||||
// Restore all userspace registers
|
||||
pop_scratch!(),
|
||||
|
||||
swapgs_iff_ring3_fast!(),
|
||||
"iretd\n",
|
||||
),
|
||||
|
||||
@@ -445,7 +328,6 @@ macro_rules! interrupt_error {
|
||||
}
|
||||
|
||||
core::arch::asm!(concat!(
|
||||
swapgs_iff_ring3_fast_errorcode!(),
|
||||
// Move eax into code's place, put code in last instead (to be
|
||||
// compatible with InterruptStack)
|
||||
"xchg [esp], eax\n",
|
||||
@@ -462,8 +344,9 @@ macro_rules! interrupt_error {
|
||||
|
||||
// Call inner function with pointer to stack
|
||||
"
|
||||
mov edi, esp
|
||||
push esp
|
||||
call {inner}
|
||||
pop esp
|
||||
",
|
||||
|
||||
// TODO: Unmap PTI
|
||||
@@ -477,7 +360,6 @@ macro_rules! interrupt_error {
|
||||
pop_scratch!(),
|
||||
|
||||
// The error code has already been popped, so use the regular macro.
|
||||
swapgs_iff_ring3_fast!(),
|
||||
"iretd\n",
|
||||
),
|
||||
|
||||
|
||||
@@ -301,64 +301,6 @@ macro_rules! inner_pit_unmap(
|
||||
}
|
||||
);
|
||||
|
||||
#[cfg(not(feature = "x86_fsgsbase"))]
|
||||
macro_rules! save_fsgsbase(
|
||||
() => {
|
||||
"
|
||||
mov ecx, {MSR_FSBASE}
|
||||
rdmsr
|
||||
shl rdx, 32
|
||||
or rdx, rax
|
||||
mov r14, rdx
|
||||
|
||||
mov ecx, {MSR_GSBASE}
|
||||
rdmsr
|
||||
shl rdx, 32
|
||||
or rdx, rax
|
||||
mov r13, rdx
|
||||
"
|
||||
}
|
||||
);
|
||||
#[cfg(feature = "x86_fsgsbase")]
|
||||
macro_rules! save_fsgsbase(
|
||||
() => {
|
||||
"
|
||||
// placeholder: {MSR_FSBASE} {MSR_GSBASE}
|
||||
rdfsbase r14
|
||||
rdgsbase r13
|
||||
"
|
||||
}
|
||||
);
|
||||
|
||||
#[cfg(feature = "x86_fsgsbase")]
|
||||
macro_rules! restore_fsgsbase(
|
||||
() => {
|
||||
"
|
||||
wrfsbase r14
|
||||
wrgsbase r13
|
||||
"
|
||||
}
|
||||
);
|
||||
|
||||
#[cfg(not(feature = "x86_fsgsbase"))]
|
||||
macro_rules! restore_fsgsbase(
|
||||
() => {
|
||||
"
|
||||
mov ecx, {MSR_FSBASE}
|
||||
mov rdx, r14
|
||||
mov eax, edx
|
||||
shr rdx, 32
|
||||
wrmsr
|
||||
|
||||
mov ecx, {MSR_GSBASE}
|
||||
mov rdx, r13
|
||||
mov eax, edx
|
||||
shr rdx, 32
|
||||
wrmsr
|
||||
"
|
||||
}
|
||||
);
|
||||
|
||||
#[naked]
|
||||
// TODO: AbiCompatBool
|
||||
pub unsafe extern "C" fn usermode(_ip: usize, _sp: usize, _arg: usize, _is_singlestep: usize) -> ! {
|
||||
@@ -381,7 +323,7 @@ pub unsafe extern "C" fn usermode(_ip: usize, _sp: usize, _arg: usize, _is_singl
|
||||
mov ds, eax
|
||||
mov es, eax
|
||||
mov fs, eax
|
||||
mov gs, eax
|
||||
// gs keeps kernel selector for simplicity
|
||||
|
||||
// Set up iret stack
|
||||
push eax // stack selector
|
||||
|
||||
@@ -258,8 +258,9 @@ unsafe extern fn signal_handler_wrapper() {
|
||||
push edi
|
||||
push esi
|
||||
|
||||
mov edi, esp
|
||||
push esp
|
||||
call {inner}
|
||||
pop esp
|
||||
|
||||
pop esi
|
||||
pop edi
|
||||
|
||||
Reference in New Issue
Block a user