Support fsgsbase at compile time.
This commit is contained in:
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -1,7 +1,5 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.0.1"
|
||||
|
||||
@@ -56,6 +56,9 @@ serial_debug = []
|
||||
system76_ec_debug = []
|
||||
slab = ["slab_allocator"]
|
||||
|
||||
# TODO: Either wait for LLVM 12 and use target_feature, or use another system for cpu features
|
||||
x86_fsgsbase = []
|
||||
|
||||
[profile.dev]
|
||||
# Kernel doesn't yet work great with debug mode :(
|
||||
opt-level = 3
|
||||
|
||||
@@ -19,9 +19,8 @@ pub const GDT_KERNEL_KPCR: usize = 3;
|
||||
pub const GDT_USER_CODE32_UNUSED: usize = 4;
|
||||
pub const GDT_USER_DATA: usize = 5;
|
||||
pub const GDT_USER_CODE: usize = 6;
|
||||
pub const GDT_USER_TLS: usize = 7;
|
||||
pub const GDT_TSS: usize = 8;
|
||||
pub const GDT_TSS_HIGH: usize = 9;
|
||||
pub const GDT_TSS: usize = 7;
|
||||
pub const GDT_TSS_HIGH: usize = 8;
|
||||
|
||||
pub const GDT_A_PRESENT: u8 = 1 << 7;
|
||||
pub const GDT_A_RING_0: u8 = 0 << 5;
|
||||
@@ -53,7 +52,7 @@ static mut INIT_GDT: [GdtEntry; 4] = [
|
||||
];
|
||||
|
||||
#[thread_local]
|
||||
pub static mut GDT: [GdtEntry; 10] = [
|
||||
pub static mut GDT: [GdtEntry; 9] = [
|
||||
// Null
|
||||
GdtEntry::new(0, 0, 0, 0),
|
||||
// Kernel code
|
||||
@@ -68,8 +67,6 @@ pub static mut GDT: [GdtEntry; 10] = [
|
||||
GdtEntry::new(0, 0, GDT_A_PRESENT | GDT_A_RING_3 | GDT_A_SYSTEM | GDT_A_PRIVILEGE, GDT_F_LONG_MODE),
|
||||
// User (64-bit) code
|
||||
GdtEntry::new(0, 0, GDT_A_PRESENT | GDT_A_RING_3 | GDT_A_SYSTEM | GDT_A_EXECUTABLE | GDT_A_PRIVILEGE, GDT_F_LONG_MODE),
|
||||
// User TLS
|
||||
GdtEntry::new(0, 0, GDT_A_PRESENT | GDT_A_RING_3 | GDT_A_SYSTEM | GDT_A_PRIVILEGE, GDT_F_LONG_MODE),
|
||||
// TSS
|
||||
GdtEntry::new(0, 0, GDT_A_PRESENT | GDT_A_RING_3 | GDT_A_TSS_AVAIL, 0),
|
||||
// TSS must be 16 bytes long, twice the normal size
|
||||
@@ -106,11 +103,6 @@ pub static mut KPCR: ProcessorControlRegion = ProcessorControlRegion {
|
||||
}),
|
||||
};
|
||||
|
||||
pub unsafe fn set_tcb(pid: usize) {
|
||||
GDT[GDT_USER_TLS].set_offset((crate::USER_TCB_OFFSET + pid * PAGE_SIZE) as u32);
|
||||
x86::segmentation::load_fs(SegmentSelector::new(GDT_USER_TLS as u16, Ring::Ring3));
|
||||
}
|
||||
|
||||
#[cfg(feature = "pti")]
|
||||
pub unsafe fn set_tss_stack(stack: usize) {
|
||||
use super::pti::{PTI_CPU_STACK, PTI_CONTEXT_STACK};
|
||||
@@ -199,8 +191,8 @@ pub unsafe fn init_paging(tcb_offset: usize, stack_offset: usize) {
|
||||
// is called again, making the userspace GS always point to user data.
|
||||
x86::msr::wrmsr(x86::msr::IA32_KERNEL_GSBASE, 0);
|
||||
|
||||
// Set the User TLS segment to the offset of the user TCB
|
||||
set_tcb(0);
|
||||
// Set the User TLS segment to zero, before we create any contexts and start scheduling.
|
||||
x86::msr::wrmsr(x86::msr::IA32_FS_BASE, 0);
|
||||
|
||||
// Reload the segment descriptors
|
||||
load_cs(SegmentSelector::new(GDT_KERNEL_CODE as u16, Ring::Ring0));
|
||||
@@ -214,6 +206,18 @@ pub unsafe fn init_paging(tcb_offset: usize, stack_offset: usize) {
|
||||
|
||||
// Load the task register
|
||||
task::load_tr(SegmentSelector::new(GDT_TSS as u16, Ring::Ring0));
|
||||
|
||||
let has_fsgsbase = raw_cpuid::CpuId::new()
|
||||
.get_extended_feature_info()
|
||||
.map_or(false, |extended_features| extended_features.has_fsgsbase());
|
||||
|
||||
if cfg!(feature = "x86_fsgsbase") {
|
||||
assert!(has_fsgsbase, "running kernel with features not supported by the current CPU");
|
||||
}
|
||||
|
||||
if has_fsgsbase {
|
||||
x86::controlregs::cr4_write(x86::controlregs::cr4() | x86::controlregs::Cr4::CR4_ENABLE_FSGSBASE);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
|
||||
@@ -45,6 +45,7 @@ pub use ::rmm::X8664Arch as CurrentRmmArch;
|
||||
|
||||
// Flags
|
||||
pub mod flags {
|
||||
pub const FLAG_SINGLESTEP: usize = 1 << 8;
|
||||
pub const SHIFT_SINGLESTEP: usize = 8;
|
||||
pub const FLAG_SINGLESTEP: usize = 1 << SHIFT_SINGLESTEP;
|
||||
pub const FLAG_INTERRUPTS: usize = 1 << 9;
|
||||
}
|
||||
|
||||
@@ -239,45 +239,130 @@ pub unsafe extern fn kstart_ap(args_ptr: *const KernelArgsAp) -> ! {
|
||||
crate::kmain_ap(cpu_id);
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "pit"))]
|
||||
macro_rules! inner_pit_unmap(
|
||||
() => {
|
||||
"
|
||||
// unused: {pti_unmap}
|
||||
"
|
||||
}
|
||||
);
|
||||
#[cfg(feature = "pit")]
|
||||
macro_rules! inner_pit_unmap(
|
||||
() => {
|
||||
"
|
||||
push rdi
|
||||
push rsi
|
||||
push rdx
|
||||
push rcx
|
||||
sub rsp, 8
|
||||
|
||||
call {pti_unmap}
|
||||
|
||||
add rsp, 8
|
||||
pop rcx
|
||||
pop rdx
|
||||
pop rsi
|
||||
pop rdi
|
||||
"
|
||||
}
|
||||
);
|
||||
|
||||
#[cfg(not(feature = "x86_fsgsbase"))]
|
||||
macro_rules! save_fsgsbase(
|
||||
() => {
|
||||
"
|
||||
mov ecx, {MSR_FSBASE}
|
||||
rdmsr
|
||||
shl rdx, 32
|
||||
mov edx, eax
|
||||
mov r14, rdx
|
||||
|
||||
mov ecx, {MSR_GSBASE}
|
||||
rdmsr
|
||||
shl rdx, 32
|
||||
mov edx, eax
|
||||
mov r13, rdx
|
||||
"
|
||||
}
|
||||
);
|
||||
#[cfg(feature = "x86_fsgsbase")]
|
||||
macro_rules! save_fsgsbase(
|
||||
() => {
|
||||
"
|
||||
// placeholder: {MSR_FSBASE} {MSR_GSBASE}
|
||||
rdfsbase r14
|
||||
rdgsbase r13
|
||||
"
|
||||
}
|
||||
);
|
||||
|
||||
#[cfg(feature = "x86_fsgsbase")]
|
||||
macro_rules! restore_fsgsbase(
|
||||
() => {
|
||||
"
|
||||
wrfsbase r14
|
||||
wrgsbase r13
|
||||
"
|
||||
}
|
||||
);
|
||||
|
||||
#[cfg(not(feature = "x86_fsgsbase"))]
|
||||
macro_rules! restore_fsgsbase(
|
||||
() => {
|
||||
"
|
||||
mov ecx, {MSR_FSBASE}
|
||||
mov rdx, r14
|
||||
mov eax, edx
|
||||
shr rdx, 32
|
||||
wrmsr
|
||||
|
||||
mov ecx, {MSR_GSBASE}
|
||||
mov rdx, r13
|
||||
mov eax, edx
|
||||
shr rdx, 32
|
||||
wrmsr
|
||||
"
|
||||
}
|
||||
);
|
||||
|
||||
#[naked]
|
||||
#[inline(never)]
|
||||
// TODO: AbiCompatBool
|
||||
pub unsafe extern "C" fn usermode(_ip: usize, _sp: usize, _arg: usize, _singlestep: u32) -> ! {
|
||||
pub unsafe extern "C" fn usermode(_ip: usize, _sp: usize, _arg: usize, _is_singlestep: usize) -> ! {
|
||||
// rdi, rsi, rdx, rcx
|
||||
asm!(
|
||||
"
|
||||
mov rbx, {flag_interrupts}
|
||||
test ecx, ecx
|
||||
jz .after_singlestep_branch
|
||||
or rbx, {flag_singlestep}
|
||||
concat!("
|
||||
shl rcx, {shift_singlestep}
|
||||
or rcx, {flag_interrupts}
|
||||
|
||||
.after_singlestep_branch:
|
||||
", inner_pit_unmap!(), "
|
||||
|
||||
// save `ip` (rdi), `sp` (rsi), and `arg` (rdx) in callee-preserved registers, so that
|
||||
// they are not modified by `pti_unmap`
|
||||
// Save rdx for later
|
||||
mov r12, rdx
|
||||
|
||||
mov r13, rdi
|
||||
mov r14, rsi
|
||||
mov r15, rdx
|
||||
call {pti_unmap}
|
||||
// Target RFLAGS
|
||||
mov r11, rcx
|
||||
|
||||
// Go to usermode
|
||||
swapgs
|
||||
mov r8, {user_data_seg_selector}
|
||||
mov r9, {user_tls_seg_selector}
|
||||
mov ds, r8d
|
||||
mov es, r8d
|
||||
mov fs, r9d
|
||||
mov gs, r8d
|
||||
|
||||
// Target RFLAGS
|
||||
mov r11, rbx
|
||||
", save_fsgsbase!(), "
|
||||
|
||||
mov r15, {user_data_seg_selector}
|
||||
mov ds, r15d
|
||||
mov es, r15d
|
||||
mov fs, r15d
|
||||
mov gs, r15d
|
||||
|
||||
", restore_fsgsbase!(), "
|
||||
|
||||
// Target instruction pointer
|
||||
mov rcx, r13
|
||||
mov rcx, rdi
|
||||
// Target stack pointer
|
||||
mov rsp, r14
|
||||
mov rsp, rsi
|
||||
// Target argument
|
||||
mov rdi, r15
|
||||
mov rdi, r12
|
||||
|
||||
xor rax, rax
|
||||
xor rbx, rbx
|
||||
@@ -304,13 +389,16 @@ pub unsafe extern "C" fn usermode(_ip: usize, _sp: usize, _arg: usize, _singlest
|
||||
// middle here, is minimal, unless the attacker already has partial control of kernel
|
||||
// memory.)
|
||||
sysretq
|
||||
",
|
||||
"),
|
||||
|
||||
flag_interrupts = const(FLAG_INTERRUPTS),
|
||||
flag_singlestep = const(FLAG_SINGLESTEP),
|
||||
shift_singlestep = const(SHIFT_SINGLESTEP),
|
||||
pti_unmap = sym pti::unmap,
|
||||
user_data_seg_selector = const(gdt::GDT_USER_DATA << 3 | 3),
|
||||
user_tls_seg_selector = const(gdt::GDT_USER_TLS << 3 | 3),
|
||||
|
||||
MSR_FSBASE = const(x86::msr::IA32_FS_BASE),
|
||||
MSR_GSBASE = const(x86::msr::IA32_GS_BASE),
|
||||
|
||||
options(noreturn),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -36,6 +36,10 @@ pub struct Context {
|
||||
rbp: usize,
|
||||
/// Stack pointer
|
||||
rsp: usize,
|
||||
/// FSBASE
|
||||
pub fsbase: usize,
|
||||
/// GSBASE
|
||||
gsbase: usize,
|
||||
/// FX valid?
|
||||
loadable: AbiCompatBool,
|
||||
}
|
||||
@@ -48,7 +52,7 @@ enum AbiCompatBool {
|
||||
}
|
||||
|
||||
impl Context {
|
||||
pub fn new() -> Context {
|
||||
pub fn new(pid: usize) -> Context {
|
||||
Context {
|
||||
loadable: AbiCompatBool::False,
|
||||
fx: 0,
|
||||
@@ -60,9 +64,14 @@ impl Context {
|
||||
r14: 0,
|
||||
r15: 0,
|
||||
rbp: 0,
|
||||
rsp: 0
|
||||
rsp: 0,
|
||||
fsbase: crate::USER_TCB_OFFSET + pid * crate::memory::PAGE_SIZE,
|
||||
gsbase: 0,
|
||||
}
|
||||
}
|
||||
pub fn update_tcb(&mut self, pid: usize) {
|
||||
self.fsbase = crate::USER_TCB_OFFSET + pid * crate::memory::PAGE_SIZE;
|
||||
}
|
||||
|
||||
pub fn get_page_utable(&mut self) -> usize {
|
||||
self.cr3
|
||||
@@ -138,6 +147,66 @@ impl Context {
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! switch_msr(
|
||||
($name:literal, $offset:literal) => {
|
||||
concat!("
|
||||
// EDX:EAX <= MSR
|
||||
|
||||
mov ecx, {", $name, "}
|
||||
rdmsr
|
||||
shl rdx, 32
|
||||
mov edx, eax
|
||||
|
||||
// Save old, load new.
|
||||
|
||||
mov [rdi + {", $offset, "}], rdx
|
||||
mov rdx, [rsi + {", $offset, "}]
|
||||
mov eax, edx
|
||||
shr rdx, 32
|
||||
|
||||
// MSR <= EDX:EAX
|
||||
wrmsr
|
||||
")
|
||||
}
|
||||
);
|
||||
|
||||
// NOTE: RAX is a scratch register and can be set to whatever. There is also no return
|
||||
// value in switch_to, to it will also never be read. The same goes for RDX, and RCX.
|
||||
// TODO: Use runtime code patching (perhaps in the bootloader) by pushing alternative code
|
||||
// sequences into a specialized section, with some macro resembling Linux's `.ALTERNATIVE`.
|
||||
#[cfg(feature = "x86_fsgsbase")]
|
||||
macro_rules! switch_fsgsbase(
|
||||
() => {
|
||||
"
|
||||
// placeholder: {MSR_FSBASE} {MSR_KERNELGSBASE}
|
||||
|
||||
rdfsbase rax
|
||||
mov [rdi + {off_fsbase}], rax
|
||||
mov rax, [rsi + {off_fsbase}]
|
||||
wrfsbase rax
|
||||
|
||||
swapgs
|
||||
rdgsbase rax
|
||||
mov [rdi + {off_gsbase}], rax
|
||||
mov rax, [rsi + {off_gsbase}]
|
||||
wrgsbase rax
|
||||
swapgs
|
||||
"
|
||||
}
|
||||
);
|
||||
|
||||
#[cfg(not(feature = "x86_fsgsbase"))]
|
||||
macro_rules! switch_fsgsbase(
|
||||
() => {
|
||||
// TODO: Is it faster to perform two 32-bit memory accesses, rather than shifting?
|
||||
concat!(
|
||||
switch_msr!("MSR_FSBASE", "off_fsbase"),
|
||||
switch_msr!("MSR_KERNELGSBASE", "off_gsbase"),
|
||||
)
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
/// Switch to the next context by restoring its stack and registers
|
||||
/// Check disassembly!
|
||||
#[inline(never)]
|
||||
@@ -152,7 +221,7 @@ pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) {
|
||||
// - we can modify scratch registers, e.g. rax
|
||||
// - we cannot change callee-preserved registers arbitrarily, e.g. rbx, which is why we
|
||||
// store them here in the first place.
|
||||
"
|
||||
concat!("
|
||||
// load `prev.fx`
|
||||
mov rax, [rdi + {off_fx}]
|
||||
|
||||
@@ -163,26 +232,26 @@ pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) {
|
||||
mov BYTE PTR [rdi + {off_loadable}], {true}
|
||||
// compare `next.loadable` with true
|
||||
cmp BYTE PTR [rsi + {off_loadable}], {true}
|
||||
je switch_to.next_is_loadable
|
||||
je 3f
|
||||
|
||||
fninit
|
||||
jmp switch_to.after_fx
|
||||
jmp 3f
|
||||
|
||||
switch_to.next_is_loadable:
|
||||
2:
|
||||
mov rax, [rsi + {off_fx}]
|
||||
fxrstor64 [rax]
|
||||
|
||||
switch_to.after_fx:
|
||||
3:
|
||||
// Save the current CR3, and load the next CR3 if not identical
|
||||
mov rcx, cr3
|
||||
mov [rdi + {off_cr3}], rcx
|
||||
mov rax, [rsi + {off_cr3}]
|
||||
cmp rax, rcx
|
||||
|
||||
je switch_to.same_cr3
|
||||
je 4f
|
||||
mov cr3, rax
|
||||
|
||||
switch_to.same_cr3:
|
||||
4:
|
||||
// Save old registers, and load new ones
|
||||
mov [rdi + {off_rbx}], rbx
|
||||
mov rbx, [rsi + {off_rbx}]
|
||||
@@ -205,6 +274,10 @@ pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) {
|
||||
mov [rdi + {off_rsp}], rsp
|
||||
mov rsp, [rsi + {off_rsp}]
|
||||
|
||||
",
|
||||
switch_fsgsbase!(),
|
||||
"
|
||||
|
||||
// push RFLAGS (can only be modified via stack)
|
||||
pushfq
|
||||
// pop RFLAGS into `self.rflags`
|
||||
@@ -222,7 +295,7 @@ pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) {
|
||||
// Note that switch_finish_hook will be responsible for executing `ret`.
|
||||
jmp {switch_hook}
|
||||
|
||||
",
|
||||
"),
|
||||
|
||||
off_fx = const(offset_of!(Cx, fx)),
|
||||
off_cr3 = const(offset_of!(Cx, cr3)),
|
||||
@@ -237,12 +310,17 @@ pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) {
|
||||
off_rbp = const(offset_of!(Cx, rbp)),
|
||||
off_rsp = const(offset_of!(Cx, rsp)),
|
||||
|
||||
off_fsbase = const(offset_of!(Cx, fsbase)),
|
||||
off_gsbase = const(offset_of!(Cx, gsbase)),
|
||||
|
||||
MSR_FSBASE = const(x86::msr::IA32_FS_BASE),
|
||||
MSR_KERNELGSBASE = const(x86::msr::IA32_KERNEL_GSBASE),
|
||||
|
||||
true = const(AbiCompatBool::True as u8),
|
||||
switch_hook = sym crate::context::switch_finish_hook,
|
||||
options(noreturn),
|
||||
);
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[repr(packed)]
|
||||
pub struct SignalHandlerStack {
|
||||
|
||||
@@ -282,7 +282,7 @@ impl Context {
|
||||
waitpid: Arc::new(WaitMap::new()),
|
||||
pending: VecDeque::new(),
|
||||
wake: None,
|
||||
arch: arch::Context::new(),
|
||||
arch: arch::Context::new(id.into()),
|
||||
kfx: None,
|
||||
kstack: None,
|
||||
ksig: None,
|
||||
|
||||
@@ -122,7 +122,7 @@ pub extern "C" fn signal_handler(sig: usize) {
|
||||
sp -= mem::size_of::<usize>();
|
||||
*(sp as *mut usize) = restorer;
|
||||
|
||||
usermode(handler, sp, sig, u32::from(singlestep));
|
||||
usermode(handler, sp, sig, usize::from(singlestep));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -173,7 +173,6 @@ pub unsafe fn switch() -> bool {
|
||||
if let Some(ref stack) = to_context.kstack {
|
||||
gdt::set_tss_stack(stack.as_ptr() as usize + stack.len());
|
||||
}
|
||||
gdt::set_tcb(to_context.id.into());
|
||||
}
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
{
|
||||
|
||||
@@ -378,6 +378,10 @@ pub fn clone(flags: CloneFlags, stack_base: usize) -> Result<ContextId> {
|
||||
let mut new_ktable = unsafe {
|
||||
InactivePageTable::from_address(new_utable.address())
|
||||
};
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{
|
||||
context.arch.update_tcb(pid.into());
|
||||
}
|
||||
|
||||
// Copy kernel image mapping
|
||||
{
|
||||
@@ -904,7 +908,7 @@ fn fexec_noreturn(
|
||||
}
|
||||
|
||||
// Go to usermode
|
||||
unsafe { usermode(entry, sp, 0, u32::from(singlestep)) }
|
||||
unsafe { usermode(entry, sp, 0, usize::from(singlestep)) }
|
||||
}
|
||||
|
||||
pub fn fexec_kernel(fd: FileHandle, args: Box<[Box<[u8]>]>, vars: Box<[Box<[u8]>]>, name_override_opt: Option<Box<str>>, auxv: Option<Vec<usize>>) -> Result<usize> {
|
||||
|
||||
Reference in New Issue
Block a user