Support fsgsbase at compile time.

This commit is contained in:
4lDO2
2021-06-22 17:21:03 +02:00
parent 9c3cf84453
commit 0968e4f87e
10 changed files with 233 additions and 58 deletions

2
Cargo.lock generated
View File

@@ -1,7 +1,5 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "autocfg"
version = "1.0.1"

View File

@@ -56,6 +56,9 @@ serial_debug = []
system76_ec_debug = []
slab = ["slab_allocator"]
# TODO: Either wait for LLVM 12 and use target_feature, or use another system for cpu features
x86_fsgsbase = []
[profile.dev]
# Kernel doesn't yet work great with debug mode :(
opt-level = 3

View File

@@ -19,9 +19,8 @@ pub const GDT_KERNEL_KPCR: usize = 3;
pub const GDT_USER_CODE32_UNUSED: usize = 4;
pub const GDT_USER_DATA: usize = 5;
pub const GDT_USER_CODE: usize = 6;
pub const GDT_USER_TLS: usize = 7;
pub const GDT_TSS: usize = 8;
pub const GDT_TSS_HIGH: usize = 9;
pub const GDT_TSS: usize = 7;
pub const GDT_TSS_HIGH: usize = 8;
pub const GDT_A_PRESENT: u8 = 1 << 7;
pub const GDT_A_RING_0: u8 = 0 << 5;
@@ -53,7 +52,7 @@ static mut INIT_GDT: [GdtEntry; 4] = [
];
#[thread_local]
pub static mut GDT: [GdtEntry; 10] = [
pub static mut GDT: [GdtEntry; 9] = [
// Null
GdtEntry::new(0, 0, 0, 0),
// Kernel code
@@ -68,8 +67,6 @@ pub static mut GDT: [GdtEntry; 10] = [
GdtEntry::new(0, 0, GDT_A_PRESENT | GDT_A_RING_3 | GDT_A_SYSTEM | GDT_A_PRIVILEGE, GDT_F_LONG_MODE),
// User (64-bit) code
GdtEntry::new(0, 0, GDT_A_PRESENT | GDT_A_RING_3 | GDT_A_SYSTEM | GDT_A_EXECUTABLE | GDT_A_PRIVILEGE, GDT_F_LONG_MODE),
// User TLS
GdtEntry::new(0, 0, GDT_A_PRESENT | GDT_A_RING_3 | GDT_A_SYSTEM | GDT_A_PRIVILEGE, GDT_F_LONG_MODE),
// TSS
GdtEntry::new(0, 0, GDT_A_PRESENT | GDT_A_RING_3 | GDT_A_TSS_AVAIL, 0),
// TSS must be 16 bytes long, twice the normal size
@@ -106,11 +103,6 @@ pub static mut KPCR: ProcessorControlRegion = ProcessorControlRegion {
}),
};
pub unsafe fn set_tcb(pid: usize) {
GDT[GDT_USER_TLS].set_offset((crate::USER_TCB_OFFSET + pid * PAGE_SIZE) as u32);
x86::segmentation::load_fs(SegmentSelector::new(GDT_USER_TLS as u16, Ring::Ring3));
}
#[cfg(feature = "pti")]
pub unsafe fn set_tss_stack(stack: usize) {
use super::pti::{PTI_CPU_STACK, PTI_CONTEXT_STACK};
@@ -199,8 +191,8 @@ pub unsafe fn init_paging(tcb_offset: usize, stack_offset: usize) {
// is called again, making the userspace GS always point to user data.
x86::msr::wrmsr(x86::msr::IA32_KERNEL_GSBASE, 0);
// Set the User TLS segment to the offset of the user TCB
set_tcb(0);
// Set the User TLS segment to zero, before we create any contexts and start scheduling.
x86::msr::wrmsr(x86::msr::IA32_FS_BASE, 0);
// Reload the segment descriptors
load_cs(SegmentSelector::new(GDT_KERNEL_CODE as u16, Ring::Ring0));
@@ -214,6 +206,18 @@ pub unsafe fn init_paging(tcb_offset: usize, stack_offset: usize) {
// Load the task register
task::load_tr(SegmentSelector::new(GDT_TSS as u16, Ring::Ring0));
let has_fsgsbase = raw_cpuid::CpuId::new()
.get_extended_feature_info()
.map_or(false, |extended_features| extended_features.has_fsgsbase());
if cfg!(feature = "x86_fsgsbase") {
assert!(has_fsgsbase, "running kernel with features not supported by the current CPU");
}
if has_fsgsbase {
x86::controlregs::cr4_write(x86::controlregs::cr4() | x86::controlregs::Cr4::CR4_ENABLE_FSGSBASE);
}
}
#[derive(Copy, Clone, Debug)]

View File

@@ -45,6 +45,7 @@ pub use ::rmm::X8664Arch as CurrentRmmArch;
// Flags
pub mod flags {
pub const FLAG_SINGLESTEP: usize = 1 << 8;
pub const SHIFT_SINGLESTEP: usize = 8;
pub const FLAG_SINGLESTEP: usize = 1 << SHIFT_SINGLESTEP;
pub const FLAG_INTERRUPTS: usize = 1 << 9;
}

View File

@@ -239,45 +239,130 @@ pub unsafe extern fn kstart_ap(args_ptr: *const KernelArgsAp) -> ! {
crate::kmain_ap(cpu_id);
}
#[cfg(not(feature = "pit"))]
macro_rules! inner_pit_unmap(
() => {
"
// unused: {pti_unmap}
"
}
);
#[cfg(feature = "pit")]
macro_rules! inner_pit_unmap(
() => {
"
push rdi
push rsi
push rdx
push rcx
sub rsp, 8
call {pti_unmap}
add rsp, 8
pop rcx
pop rdx
pop rsi
pop rdi
"
}
);
#[cfg(not(feature = "x86_fsgsbase"))]
macro_rules! save_fsgsbase(
() => {
"
mov ecx, {MSR_FSBASE}
rdmsr
shl rdx, 32
mov edx, eax
mov r14, rdx
mov ecx, {MSR_GSBASE}
rdmsr
shl rdx, 32
mov edx, eax
mov r13, rdx
"
}
);
#[cfg(feature = "x86_fsgsbase")]
macro_rules! save_fsgsbase(
() => {
"
// placeholder: {MSR_FSBASE} {MSR_GSBASE}
rdfsbase r14
rdgsbase r13
"
}
);
#[cfg(feature = "x86_fsgsbase")]
macro_rules! restore_fsgsbase(
() => {
"
wrfsbase r14
wrgsbase r13
"
}
);
#[cfg(not(feature = "x86_fsgsbase"))]
macro_rules! restore_fsgsbase(
() => {
"
mov ecx, {MSR_FSBASE}
mov rdx, r14
mov eax, edx
shr rdx, 32
wrmsr
mov ecx, {MSR_GSBASE}
mov rdx, r13
mov eax, edx
shr rdx, 32
wrmsr
"
}
);
#[naked]
#[inline(never)]
// TODO: AbiCompatBool
pub unsafe extern "C" fn usermode(_ip: usize, _sp: usize, _arg: usize, _singlestep: u32) -> ! {
pub unsafe extern "C" fn usermode(_ip: usize, _sp: usize, _arg: usize, _is_singlestep: usize) -> ! {
// rdi, rsi, rdx, rcx
asm!(
"
mov rbx, {flag_interrupts}
test ecx, ecx
jz .after_singlestep_branch
or rbx, {flag_singlestep}
concat!("
shl rcx, {shift_singlestep}
or rcx, {flag_interrupts}
.after_singlestep_branch:
", inner_pit_unmap!(), "
// save `ip` (rdi), `sp` (rsi), and `arg` (rdx) in callee-preserved registers, so that
// they are not modified by `pti_unmap`
// Save rdx for later
mov r12, rdx
mov r13, rdi
mov r14, rsi
mov r15, rdx
call {pti_unmap}
// Target RFLAGS
mov r11, rcx
// Go to usermode
swapgs
mov r8, {user_data_seg_selector}
mov r9, {user_tls_seg_selector}
mov ds, r8d
mov es, r8d
mov fs, r9d
mov gs, r8d
// Target RFLAGS
mov r11, rbx
", save_fsgsbase!(), "
mov r15, {user_data_seg_selector}
mov ds, r15d
mov es, r15d
mov fs, r15d
mov gs, r15d
", restore_fsgsbase!(), "
// Target instruction pointer
mov rcx, r13
mov rcx, rdi
// Target stack pointer
mov rsp, r14
mov rsp, rsi
// Target argument
mov rdi, r15
mov rdi, r12
xor rax, rax
xor rbx, rbx
@@ -304,13 +389,16 @@ pub unsafe extern "C" fn usermode(_ip: usize, _sp: usize, _arg: usize, _singlest
// middle here, is minimal, unless the attacker already has partial control of kernel
// memory.)
sysretq
",
"),
flag_interrupts = const(FLAG_INTERRUPTS),
flag_singlestep = const(FLAG_SINGLESTEP),
shift_singlestep = const(SHIFT_SINGLESTEP),
pti_unmap = sym pti::unmap,
user_data_seg_selector = const(gdt::GDT_USER_DATA << 3 | 3),
user_tls_seg_selector = const(gdt::GDT_USER_TLS << 3 | 3),
MSR_FSBASE = const(x86::msr::IA32_FS_BASE),
MSR_GSBASE = const(x86::msr::IA32_GS_BASE),
options(noreturn),
);
}

View File

@@ -36,6 +36,10 @@ pub struct Context {
rbp: usize,
/// Stack pointer
rsp: usize,
/// FSBASE
pub fsbase: usize,
/// GSBASE
gsbase: usize,
/// FX valid?
loadable: AbiCompatBool,
}
@@ -48,7 +52,7 @@ enum AbiCompatBool {
}
impl Context {
pub fn new() -> Context {
pub fn new(pid: usize) -> Context {
Context {
loadable: AbiCompatBool::False,
fx: 0,
@@ -60,9 +64,14 @@ impl Context {
r14: 0,
r15: 0,
rbp: 0,
rsp: 0
rsp: 0,
fsbase: crate::USER_TCB_OFFSET + pid * crate::memory::PAGE_SIZE,
gsbase: 0,
}
}
pub fn update_tcb(&mut self, pid: usize) {
self.fsbase = crate::USER_TCB_OFFSET + pid * crate::memory::PAGE_SIZE;
}
pub fn get_page_utable(&mut self) -> usize {
self.cr3
@@ -138,6 +147,66 @@ impl Context {
}
}
macro_rules! switch_msr(
($name:literal, $offset:literal) => {
concat!("
// EDX:EAX <= MSR
mov ecx, {", $name, "}
rdmsr
shl rdx, 32
mov edx, eax
// Save old, load new.
mov [rdi + {", $offset, "}], rdx
mov rdx, [rsi + {", $offset, "}]
mov eax, edx
shr rdx, 32
// MSR <= EDX:EAX
wrmsr
")
}
);
// NOTE: RAX is a scratch register and can be set to whatever. There is also no return
// value in switch_to, to it will also never be read. The same goes for RDX, and RCX.
// TODO: Use runtime code patching (perhaps in the bootloader) by pushing alternative code
// sequences into a specialized section, with some macro resembling Linux's `.ALTERNATIVE`.
#[cfg(feature = "x86_fsgsbase")]
macro_rules! switch_fsgsbase(
() => {
"
// placeholder: {MSR_FSBASE} {MSR_KERNELGSBASE}
rdfsbase rax
mov [rdi + {off_fsbase}], rax
mov rax, [rsi + {off_fsbase}]
wrfsbase rax
swapgs
rdgsbase rax
mov [rdi + {off_gsbase}], rax
mov rax, [rsi + {off_gsbase}]
wrgsbase rax
swapgs
"
}
);
#[cfg(not(feature = "x86_fsgsbase"))]
macro_rules! switch_fsgsbase(
() => {
// TODO: Is it faster to perform two 32-bit memory accesses, rather than shifting?
concat!(
switch_msr!("MSR_FSBASE", "off_fsbase"),
switch_msr!("MSR_KERNELGSBASE", "off_gsbase"),
)
}
);
/// Switch to the next context by restoring its stack and registers
/// Check disassembly!
#[inline(never)]
@@ -152,7 +221,7 @@ pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) {
// - we can modify scratch registers, e.g. rax
// - we cannot change callee-preserved registers arbitrarily, e.g. rbx, which is why we
// store them here in the first place.
"
concat!("
// load `prev.fx`
mov rax, [rdi + {off_fx}]
@@ -163,26 +232,26 @@ pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) {
mov BYTE PTR [rdi + {off_loadable}], {true}
// compare `next.loadable` with true
cmp BYTE PTR [rsi + {off_loadable}], {true}
je switch_to.next_is_loadable
je 3f
fninit
jmp switch_to.after_fx
jmp 3f
switch_to.next_is_loadable:
2:
mov rax, [rsi + {off_fx}]
fxrstor64 [rax]
switch_to.after_fx:
3:
// Save the current CR3, and load the next CR3 if not identical
mov rcx, cr3
mov [rdi + {off_cr3}], rcx
mov rax, [rsi + {off_cr3}]
cmp rax, rcx
je switch_to.same_cr3
je 4f
mov cr3, rax
switch_to.same_cr3:
4:
// Save old registers, and load new ones
mov [rdi + {off_rbx}], rbx
mov rbx, [rsi + {off_rbx}]
@@ -205,6 +274,10 @@ pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) {
mov [rdi + {off_rsp}], rsp
mov rsp, [rsi + {off_rsp}]
",
switch_fsgsbase!(),
"
// push RFLAGS (can only be modified via stack)
pushfq
// pop RFLAGS into `self.rflags`
@@ -222,7 +295,7 @@ pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) {
// Note that switch_finish_hook will be responsible for executing `ret`.
jmp {switch_hook}
",
"),
off_fx = const(offset_of!(Cx, fx)),
off_cr3 = const(offset_of!(Cx, cr3)),
@@ -237,12 +310,17 @@ pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) {
off_rbp = const(offset_of!(Cx, rbp)),
off_rsp = const(offset_of!(Cx, rsp)),
off_fsbase = const(offset_of!(Cx, fsbase)),
off_gsbase = const(offset_of!(Cx, gsbase)),
MSR_FSBASE = const(x86::msr::IA32_FS_BASE),
MSR_KERNELGSBASE = const(x86::msr::IA32_KERNEL_GSBASE),
true = const(AbiCompatBool::True as u8),
switch_hook = sym crate::context::switch_finish_hook,
options(noreturn),
);
}
#[allow(dead_code)]
#[repr(packed)]
pub struct SignalHandlerStack {

View File

@@ -282,7 +282,7 @@ impl Context {
waitpid: Arc::new(WaitMap::new()),
pending: VecDeque::new(),
wake: None,
arch: arch::Context::new(),
arch: arch::Context::new(id.into()),
kfx: None,
kstack: None,
ksig: None,

View File

@@ -122,7 +122,7 @@ pub extern "C" fn signal_handler(sig: usize) {
sp -= mem::size_of::<usize>();
*(sp as *mut usize) = restorer;
usermode(handler, sp, sig, u32::from(singlestep));
usermode(handler, sp, sig, usize::from(singlestep));
}
}

View File

@@ -173,7 +173,6 @@ pub unsafe fn switch() -> bool {
if let Some(ref stack) = to_context.kstack {
gdt::set_tss_stack(stack.as_ptr() as usize + stack.len());
}
gdt::set_tcb(to_context.id.into());
}
#[cfg(target_arch = "aarch64")]
{

View File

@@ -378,6 +378,10 @@ pub fn clone(flags: CloneFlags, stack_base: usize) -> Result<ContextId> {
let mut new_ktable = unsafe {
InactivePageTable::from_address(new_utable.address())
};
#[cfg(target_arch = "x86_64")]
{
context.arch.update_tcb(pid.into());
}
// Copy kernel image mapping
{
@@ -904,7 +908,7 @@ fn fexec_noreturn(
}
// Go to usermode
unsafe { usermode(entry, sp, 0, u32::from(singlestep)) }
unsafe { usermode(entry, sp, 0, usize::from(singlestep)) }
}
pub fn fexec_kernel(fd: FileHandle, args: Box<[Box<[u8]>]>, vars: Box<[Box<[u8]>]>, name_override_opt: Option<Box<str>>, auxv: Option<Vec<usize>>) -> Result<usize> {