WIP: Let userspace manage fsbase/gsbase and TLS.
This commit is contained in:
@@ -39,13 +39,6 @@
|
||||
pub const USER_OFFSET: usize = 0;
|
||||
pub const USER_PML4: usize = (USER_OFFSET & PML4_MASK)/PML4_SIZE;
|
||||
|
||||
/// Offset to user TCB
|
||||
/// Each process has 4096 bytes, at an offset of 4096 * PID
|
||||
// TODO: Get a real 64-bit offset, and allow loading ELF sections higher up than the current
|
||||
// limit, iff the processor supports fsgsbase (in which case it is cheap to use 64-bit FS
|
||||
// offsets).
|
||||
pub const USER_TCB_OFFSET: usize = 0xB000_0000;
|
||||
|
||||
/// Offset to user arguments
|
||||
pub const USER_ARG_OFFSET: usize = USER_OFFSET + PML4_SIZE/2;
|
||||
|
||||
@@ -69,14 +62,8 @@
|
||||
/// Size of user sigstack
|
||||
pub const USER_SIGSTACK_SIZE: usize = 256 * 1024; // 256 KB
|
||||
|
||||
/// Offset to user TLS
|
||||
pub const USER_TLS_OFFSET: usize = USER_SIGSTACK_OFFSET + PML4_SIZE;
|
||||
pub const USER_TLS_PML4: usize = (USER_TLS_OFFSET & PML4_MASK)/PML4_SIZE;
|
||||
// Maximum TLS allocated to each PID, should be approximately 8 MB
|
||||
pub const USER_TLS_SIZE: usize = PML4_SIZE / 65536;
|
||||
|
||||
/// Offset to user temporary image (used when cloning)
|
||||
pub const USER_TMP_OFFSET: usize = USER_TLS_OFFSET + PML4_SIZE;
|
||||
pub const USER_TMP_OFFSET: usize = USER_SIGSTACK_OFFSET + PML4_SIZE;
|
||||
pub const USER_TMP_PML4: usize = (USER_TMP_OFFSET & PML4_MASK)/PML4_SIZE;
|
||||
|
||||
/// Offset to user temporary heap (used when cloning)
|
||||
@@ -95,10 +82,6 @@
|
||||
pub const USER_TMP_SIGSTACK_OFFSET: usize = USER_TMP_STACK_OFFSET + PML4_SIZE;
|
||||
pub const USER_TMP_SIGSTACK_PML4: usize = (USER_TMP_SIGSTACK_OFFSET & PML4_MASK)/PML4_SIZE;
|
||||
|
||||
/// Offset to user temporary tls (used when cloning)
|
||||
pub const USER_TMP_TLS_OFFSET: usize = USER_TMP_SIGSTACK_OFFSET + PML4_SIZE;
|
||||
pub const USER_TMP_TLS_PML4: usize = (USER_TMP_TLS_OFFSET & PML4_MASK)/PML4_SIZE;
|
||||
|
||||
/// Offset for usage in other temporary pages
|
||||
pub const USER_TMP_MISC_OFFSET: usize = USER_TMP_TLS_OFFSET + PML4_SIZE;
|
||||
pub const USER_TMP_MISC_OFFSET: usize = USER_TMP_SIGSTACK_OFFSET + PML4_SIZE;
|
||||
pub const USER_TMP_MISC_PML4: usize = (USER_TMP_MISC_OFFSET & PML4_MASK)/PML4_SIZE;
|
||||
|
||||
@@ -80,6 +80,11 @@ impl IretRegisters {
|
||||
println!("RSP: {:>016X}", { self.rsp });
|
||||
println!("SS: {:>016X}", { self.ss });
|
||||
}
|
||||
unsafe {
|
||||
let fsbase = x86::msr::rdmsr(x86::msr::IA32_FS_BASE);
|
||||
let gsbase = x86::msr::rdmsr(x86::msr::IA32_KERNEL_GSBASE);
|
||||
println!("FSBASE {:>016X}\nGSBASE {:016X}", fsbase, gsbase);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -275,13 +275,13 @@ macro_rules! save_fsgsbase(
|
||||
mov ecx, {MSR_FSBASE}
|
||||
rdmsr
|
||||
shl rdx, 32
|
||||
mov edx, eax
|
||||
or rdx, rax
|
||||
mov r14, rdx
|
||||
|
||||
mov ecx, {MSR_GSBASE}
|
||||
rdmsr
|
||||
shl rdx, 32
|
||||
mov edx, eax
|
||||
or rdx, rax
|
||||
mov r13, rdx
|
||||
"
|
||||
}
|
||||
@@ -354,8 +354,11 @@ pub unsafe extern "C" fn usermode(_ip: usize, _sp: usize, _arg: usize, _is_singl
|
||||
mov es, r15d
|
||||
mov fs, r15d
|
||||
mov gs, r15d
|
||||
",
|
||||
|
||||
", restore_fsgsbase!(), "
|
||||
// SS and CS will later be set via sysretq.
|
||||
|
||||
restore_fsgsbase!(), "
|
||||
|
||||
// Target instruction pointer
|
||||
mov rcx, rdi
|
||||
@@ -382,14 +385,15 @@ pub unsafe extern "C" fn usermode(_ip: usize, _sp: usize, _arg: usize, _is_singl
|
||||
xor r15, r15
|
||||
|
||||
fninit
|
||||
|
||||
",
|
||||
// NOTE: Regarding the sysretq vulnerability, this is safe as we cannot modify RCX,
|
||||
// even though the caller can give us the wrong address. But, it's marked unsafe, so
|
||||
// the caller is responsible for this! (And, the likelihood of rcx being changed in the
|
||||
// middle here, is minimal, unless the attacker already has partial control of kernel
|
||||
// memory.)
|
||||
"
|
||||
sysretq
|
||||
"),
|
||||
"),
|
||||
|
||||
flag_interrupts = const(FLAG_INTERRUPTS),
|
||||
shift_singlestep = const(SHIFT_SINGLESTEP),
|
||||
|
||||
@@ -36,10 +36,16 @@ pub struct Context {
|
||||
rbp: usize,
|
||||
/// Stack pointer
|
||||
rsp: usize,
|
||||
/// FSBASE
|
||||
pub fsbase: usize,
|
||||
/// GSBASE
|
||||
gsbase: usize,
|
||||
/// FSBASE.
|
||||
///
|
||||
/// NOTE: Same fsgsbase behavior as with gsbase.
|
||||
pub(crate) fsbase: usize,
|
||||
/// GSBASE.
|
||||
///
|
||||
/// NOTE: Without fsgsbase, this register will strictly be equal to the register value when
|
||||
/// running. With fsgsbase, this is neither saved nor restored upon every syscall (there is no
|
||||
/// need to!), and thus it must be re-read from the register before copying this struct.
|
||||
pub(crate) gsbase: usize,
|
||||
/// FX valid?
|
||||
loadable: AbiCompatBool,
|
||||
}
|
||||
@@ -52,7 +58,7 @@ enum AbiCompatBool {
|
||||
}
|
||||
|
||||
impl Context {
|
||||
pub fn new(pid: usize) -> Context {
|
||||
pub fn new() -> Context {
|
||||
Context {
|
||||
loadable: AbiCompatBool::False,
|
||||
fx: 0,
|
||||
@@ -65,13 +71,10 @@ impl Context {
|
||||
r15: 0,
|
||||
rbp: 0,
|
||||
rsp: 0,
|
||||
fsbase: crate::USER_TCB_OFFSET + pid * crate::memory::PAGE_SIZE,
|
||||
fsbase: 0,
|
||||
gsbase: 0,
|
||||
}
|
||||
}
|
||||
pub fn update_tcb(&mut self, pid: usize) {
|
||||
self.fsbase = crate::USER_TCB_OFFSET + pid * crate::memory::PAGE_SIZE;
|
||||
}
|
||||
|
||||
pub fn get_page_utable(&mut self) -> usize {
|
||||
self.cr3
|
||||
@@ -147,19 +150,10 @@ impl Context {
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! switch_msr(
|
||||
macro_rules! load_msr(
|
||||
($name:literal, $offset:literal) => {
|
||||
concat!("
|
||||
// EDX:EAX <= MSR
|
||||
|
||||
mov ecx, {", $name, "}
|
||||
rdmsr
|
||||
shl rdx, 32
|
||||
mov edx, eax
|
||||
|
||||
// Save old, load new.
|
||||
|
||||
mov [rdi + {", $offset, "}], rdx
|
||||
mov rdx, [rsi + {", $offset, "}]
|
||||
mov eax, edx
|
||||
shr rdx, 32
|
||||
@@ -198,10 +192,9 @@ macro_rules! switch_fsgsbase(
|
||||
#[cfg(not(feature = "x86_fsgsbase"))]
|
||||
macro_rules! switch_fsgsbase(
|
||||
() => {
|
||||
// TODO: Is it faster to perform two 32-bit memory accesses, rather than shifting?
|
||||
concat!(
|
||||
switch_msr!("MSR_FSBASE", "off_fsbase"),
|
||||
switch_msr!("MSR_KERNELGSBASE", "off_gsbase"),
|
||||
load_msr!("MSR_FSBASE", "off_fsbase"),
|
||||
load_msr!("MSR_KERNELGSBASE", "off_gsbase"),
|
||||
)
|
||||
}
|
||||
);
|
||||
|
||||
@@ -9,6 +9,7 @@ use core::{
|
||||
alloc::{GlobalAlloc, Layout},
|
||||
cmp::Ordering,
|
||||
mem,
|
||||
ptr::NonNull,
|
||||
};
|
||||
use spin::RwLock;
|
||||
|
||||
@@ -20,7 +21,9 @@ use crate::context::memory::{UserGrants, Memory, SharedMemory, Tls};
|
||||
use crate::ipi::{ipi, IpiKind, IpiTarget};
|
||||
use crate::scheme::{SchemeNamespace, FileHandle};
|
||||
use crate::sync::WaitMap;
|
||||
|
||||
use crate::syscall::data::SigAction;
|
||||
use crate::syscall::error::{Result, Error, ENOMEM};
|
||||
use crate::syscall::flag::{SIG_DFL, SigActionFlags};
|
||||
|
||||
/// Unique identifier for a context (i.e. `pid`).
|
||||
@@ -203,9 +206,9 @@ pub struct Context {
|
||||
/// Current system call
|
||||
pub syscall: Option<(usize, usize, usize, usize, usize, usize)>,
|
||||
/// Head buffer to use when system call buffers are not page aligned
|
||||
pub syscall_head: Box<[u8]>,
|
||||
pub syscall_head: AlignedBox<[u8; PAGE_SIZE], PAGE_SIZE>,
|
||||
/// Tail buffer to use when system call buffers are not page aligned
|
||||
pub syscall_tail: Box<[u8]>,
|
||||
pub syscall_tail: AlignedBox<[u8; PAGE_SIZE], PAGE_SIZE>,
|
||||
/// Context is halting parent
|
||||
pub vfork: bool,
|
||||
/// Context is being waited on
|
||||
@@ -230,8 +233,6 @@ pub struct Context {
|
||||
pub stack: Option<SharedMemory>,
|
||||
/// User signal stack
|
||||
pub sigstack: Option<Memory>,
|
||||
/// User Thread local storage
|
||||
pub tls: Option<Tls>,
|
||||
/// User grants
|
||||
pub grants: Arc<RwLock<UserGrants>>,
|
||||
/// The name of the context
|
||||
@@ -253,12 +254,63 @@ pub struct Context {
|
||||
pub ptrace_stop: bool
|
||||
}
|
||||
|
||||
impl Context {
|
||||
pub fn new(id: ContextId) -> Context {
|
||||
let syscall_head = unsafe { Box::from_raw(crate::ALLOCATOR.alloc(Layout::from_size_align_unchecked(PAGE_SIZE, PAGE_SIZE)) as *mut [u8; PAGE_SIZE]) };
|
||||
let syscall_tail = unsafe { Box::from_raw(crate::ALLOCATOR.alloc(Layout::from_size_align_unchecked(PAGE_SIZE, PAGE_SIZE)) as *mut [u8; PAGE_SIZE]) };
|
||||
// Necessary because GlobalAlloc::dealloc requires the layout to be the same, and therefore Box
|
||||
// cannot be used for increased alignment directly.
|
||||
// TODO: move to common?
|
||||
pub struct AlignedBox<T, const ALIGN: usize> {
|
||||
inner: Unique<T>,
|
||||
}
|
||||
pub unsafe trait ValidForZero {}
|
||||
unsafe impl<const N: usize> ValidForZero for [u8; N] {}
|
||||
|
||||
Context {
|
||||
impl<T, const ALIGN: usize> AlignedBox<T, ALIGN> {
|
||||
const LAYOUT: core::alloc::Layout = {
|
||||
const fn max(a: usize, b: usize) -> usize {
|
||||
if a > b { a } else { b }
|
||||
}
|
||||
|
||||
match core::alloc::Layout::from_size_align(mem::size_of::<T>(), max(mem::align_of::<T>(), ALIGN)) {
|
||||
Ok(l) => l,
|
||||
Err(_) => panic!("layout validation failed at compile time"),
|
||||
}
|
||||
};
|
||||
#[inline(always)]
|
||||
pub fn try_zeroed() -> Result<Self>
|
||||
where
|
||||
T: ValidForZero,
|
||||
{
|
||||
Ok(unsafe {
|
||||
let ptr = crate::ALLOCATOR.alloc_zeroed(Self::LAYOUT);
|
||||
if ptr.is_null() {
|
||||
return Err(Error::new(ENOMEM))?;
|
||||
}
|
||||
Self {
|
||||
inner: Unique::new_unchecked(ptr.cast()),
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, const ALIGN: usize> core::fmt::Debug for AlignedBox<T, ALIGN> {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
write!(f, "[aligned box at {:p}, size {} alignment {}]", self.inner.as_ptr(), mem::size_of::<T>(), mem::align_of::<T>())
|
||||
}
|
||||
}
|
||||
impl<T, const ALIGN: usize> Drop for AlignedBox<T, ALIGN> {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
core::ptr::drop_in_place(self.inner.as_ptr());
|
||||
crate::ALLOCATOR.dealloc(self.inner.as_ptr().cast(), Self::LAYOUT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Context {
|
||||
pub fn new(id: ContextId) -> Result<Context> {
|
||||
let syscall_head = AlignedBox::try_zeroed()?;
|
||||
let syscall_tail = AlignedBox::try_zeroed()?;
|
||||
|
||||
Ok(Context {
|
||||
id,
|
||||
pgid: id,
|
||||
ppid: ContextId::from(0),
|
||||
@@ -282,7 +334,7 @@ impl Context {
|
||||
waitpid: Arc::new(WaitMap::new()),
|
||||
pending: VecDeque::new(),
|
||||
wake: None,
|
||||
arch: arch::Context::new(id.into()),
|
||||
arch: arch::Context::new(),
|
||||
kfx: None,
|
||||
kstack: None,
|
||||
ksig: None,
|
||||
@@ -290,7 +342,6 @@ impl Context {
|
||||
image: Vec::new(),
|
||||
stack: None,
|
||||
sigstack: None,
|
||||
tls: None,
|
||||
grants: Arc::new(RwLock::new(UserGrants::default())),
|
||||
name: Arc::new(RwLock::new(String::new().into_boxed_str())),
|
||||
cwd: Arc::new(RwLock::new(String::new())),
|
||||
@@ -305,7 +356,7 @@ impl Context {
|
||||
); 128])),
|
||||
regs: None,
|
||||
ptrace_stop: false
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Make a relative path absolute
|
||||
|
||||
@@ -69,7 +69,7 @@ impl ContextList {
|
||||
let id = ContextId::from(self.next_id);
|
||||
self.next_id += 1;
|
||||
|
||||
assert!(self.map.insert(id, Arc::new(RwLock::new(Context::new(id)))).is_none());
|
||||
assert!(self.map.insert(id, Arc::new(RwLock::new(Context::new(id)?))).is_none());
|
||||
|
||||
Ok(self.map.get(&id).expect("Failed to insert new context. ID is out of bounds."))
|
||||
}
|
||||
|
||||
@@ -82,6 +82,12 @@ impl<'a> Elf<'a> {
|
||||
pub fn program_headers(&self) -> usize {
|
||||
self.header.e_phoff as usize
|
||||
}
|
||||
pub fn program_header_count(&self) -> usize {
|
||||
self.header.e_phnum as usize
|
||||
}
|
||||
pub fn program_headers_size(&self) -> usize {
|
||||
self.header.e_phentsize as usize
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ElfSections<'a> {
|
||||
|
||||
@@ -48,6 +48,7 @@
|
||||
#![feature(concat_idents)]
|
||||
#![feature(const_btree_new)]
|
||||
#![feature(const_maybe_uninit_as_ptr)]
|
||||
#![feature(const_panic)]
|
||||
#![feature(const_ptr_offset_from)]
|
||||
#![feature(const_raw_ptr_deref)]
|
||||
#![feature(core_intrinsics)]
|
||||
|
||||
@@ -137,6 +137,7 @@ impl SchemeList {
|
||||
//TODO: Only memory: is in the null namespace right now. It should be removed when
|
||||
//anonymous mmap's are implemented
|
||||
self.insert(ns, "memory", |_| Arc::new(MemoryScheme::new())).unwrap();
|
||||
self.insert(ns, "thisproc", |_| Arc::new(ProcScheme::restricted())).unwrap();
|
||||
}
|
||||
|
||||
/// Initialize a new namespace
|
||||
@@ -168,6 +169,7 @@ impl SchemeList {
|
||||
self.insert(ns, "initfs", |_| Arc::new(InitFsScheme::new())).unwrap();
|
||||
self.insert(ns, "irq", |scheme_id| Arc::new(IrqScheme::new(scheme_id))).unwrap();
|
||||
self.insert(ns, "proc", |scheme_id| Arc::new(ProcScheme::new(scheme_id))).unwrap();
|
||||
self.insert(ns, "thisproc", |_| Arc::new(ProcScheme::restricted())).unwrap();
|
||||
self.insert(ns, "serio", |scheme_id| Arc::new(SerioScheme::new(scheme_id))).unwrap();
|
||||
|
||||
#[cfg(feature = "live")] {
|
||||
|
||||
@@ -6,6 +6,7 @@ use crate::{
|
||||
syscall::{
|
||||
FloatRegisters,
|
||||
IntRegisters,
|
||||
EnvRegisters,
|
||||
data::{PtraceEvent, Stat},
|
||||
error::*,
|
||||
flag::*,
|
||||
@@ -57,6 +58,9 @@ fn try_stop_context<F, T>(pid: ContextId, mut callback: F) -> Result<T>
|
||||
where
|
||||
F: FnMut(&mut Context) -> Result<T>,
|
||||
{
|
||||
if pid == context::context_id() {
|
||||
return Err(Error::new(EBADF));
|
||||
}
|
||||
// Stop process
|
||||
let (was_stopped, mut running) = with_context_mut(pid, |context| {
|
||||
let was_stopped = context.ptrace_stop;
|
||||
@@ -88,7 +92,8 @@ where
|
||||
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||
enum RegsKind {
|
||||
Float,
|
||||
Int
|
||||
Int,
|
||||
Env,
|
||||
}
|
||||
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||
enum Operation {
|
||||
@@ -195,6 +200,12 @@ pub static PROC_SCHEME_ID: AtomicSchemeId = AtomicSchemeId::default();
|
||||
pub struct ProcScheme {
|
||||
next_id: AtomicUsize,
|
||||
handles: RwLock<BTreeMap<usize, Handle>>,
|
||||
access: Access,
|
||||
}
|
||||
#[derive(PartialEq)]
|
||||
pub enum Access {
|
||||
OtherProcesses,
|
||||
Restricted,
|
||||
}
|
||||
|
||||
impl ProcScheme {
|
||||
@@ -204,6 +215,14 @@ impl ProcScheme {
|
||||
Self {
|
||||
next_id: AtomicUsize::new(0),
|
||||
handles: RwLock::new(BTreeMap::new()),
|
||||
access: Access::OtherProcesses,
|
||||
}
|
||||
}
|
||||
pub fn restricted() -> Self {
|
||||
Self {
|
||||
next_id: AtomicUsize::new(0),
|
||||
handles: RwLock::new(BTreeMap::new()),
|
||||
access: Access::Restricted,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -211,15 +230,22 @@ impl ProcScheme {
|
||||
impl Scheme for ProcScheme {
|
||||
fn open(&self, path: &str, flags: usize, uid: u32, gid: u32) -> Result<usize> {
|
||||
let mut parts = path.splitn(2, '/');
|
||||
let pid = parts.next()
|
||||
.and_then(|s| s.parse().ok())
|
||||
.map(ContextId::from)
|
||||
.ok_or(Error::new(EINVAL))?;
|
||||
let pid_str = parts.next()
|
||||
.ok_or(Error::new(ENOENT))?;
|
||||
|
||||
let pid = if pid_str == "current" {
|
||||
context::context_id()
|
||||
} else if self.access == Access::Restricted {
|
||||
return Err(Error::new(EACCES));
|
||||
} else {
|
||||
ContextId::from(pid_str.parse().map_err(|_| Error::new(ENOENT))?)
|
||||
};
|
||||
|
||||
let operation = match parts.next() {
|
||||
Some("mem") => Operation::Memory,
|
||||
Some("regs/float") => Operation::Regs(RegsKind::Float),
|
||||
Some("regs/int") => Operation::Regs(RegsKind::Int),
|
||||
Some("regs/env") => Operation::Regs(RegsKind::Env),
|
||||
Some("trace") => Operation::Trace,
|
||||
Some("exe") => Operation::Static("exe"),
|
||||
_ => return Err(Error::new(EINVAL))
|
||||
@@ -382,7 +408,8 @@ impl Scheme for ProcScheme {
|
||||
Operation::Regs(kind) => {
|
||||
union Output {
|
||||
float: FloatRegisters,
|
||||
int: IntRegisters
|
||||
int: IntRegisters,
|
||||
env: EnvRegisters,
|
||||
}
|
||||
|
||||
let (output, size) = match kind {
|
||||
@@ -406,7 +433,37 @@ impl Scheme for ProcScheme {
|
||||
stack.save(&mut regs);
|
||||
Ok((Output { int: regs }, mem::size_of::<IntRegisters>()))
|
||||
}
|
||||
})?
|
||||
})?,
|
||||
RegsKind::Env => {
|
||||
let (fsbase, gsbase) = if info.pid == context::context_id() {
|
||||
#[cfg(not(feature = "x86_fsgsbase"))]
|
||||
unsafe {
|
||||
(
|
||||
x86::msr::rdmsr(x86::msr::IA32_FS_BASE),
|
||||
x86::msr::rdmsr(x86::msr::IA32_KERNEL_GSBASE),
|
||||
)
|
||||
}
|
||||
#[cfg(feature = "x86_fsgsbase")]
|
||||
unsafe {
|
||||
use x86::bits64::segmentation::*;
|
||||
|
||||
(
|
||||
rdfsbase(),
|
||||
{
|
||||
swapgs();
|
||||
let gsbase = rdgsbase();
|
||||
swapgs();
|
||||
gsbase
|
||||
}
|
||||
)
|
||||
}
|
||||
} else {
|
||||
try_stop_context(info.pid, |context| {
|
||||
Ok((context.arch.fsbase as u64, context.arch.gsbase as u64))
|
||||
})?
|
||||
};
|
||||
(Output { env: EnvRegisters { fsbase, gsbase }}, mem::size_of::<EnvRegisters>())
|
||||
}
|
||||
};
|
||||
|
||||
let bytes = unsafe {
|
||||
@@ -503,6 +560,9 @@ impl Scheme for ProcScheme {
|
||||
if buf.len() < mem::size_of::<FloatRegisters>() {
|
||||
return Ok(0);
|
||||
}
|
||||
if (buf.as_ptr() as usize) % mem::align_of::<FloatRegisters>() != 0 {
|
||||
return Err(Error::new(EINVAL));
|
||||
}
|
||||
let regs = unsafe {
|
||||
*(buf as *const _ as *const FloatRegisters)
|
||||
};
|
||||
@@ -521,6 +581,9 @@ impl Scheme for ProcScheme {
|
||||
if buf.len() < mem::size_of::<IntRegisters>() {
|
||||
return Ok(0);
|
||||
}
|
||||
if (buf.as_ptr() as usize) % mem::align_of::<FloatRegisters>() != 0 {
|
||||
return Err(Error::new(EINVAL));
|
||||
}
|
||||
let regs = unsafe {
|
||||
*(buf as *const _ as *const IntRegisters)
|
||||
};
|
||||
@@ -537,6 +600,57 @@ impl Scheme for ProcScheme {
|
||||
}
|
||||
})
|
||||
}
|
||||
RegsKind::Env => {
|
||||
if buf.len() < mem::size_of::<EnvRegisters>() {
|
||||
return Ok(0);
|
||||
}
|
||||
if (buf.as_ptr() as usize) % mem::align_of::<EnvRegisters>() != 0 {
|
||||
return Err(Error::new(EINVAL));
|
||||
}
|
||||
let regs = unsafe {
|
||||
*(buf as *const _ as *const EnvRegisters)
|
||||
};
|
||||
use rmm::{Arch as _, X8664Arch};
|
||||
if !(X8664Arch::virt_is_valid(VirtualAddress::new(regs.fsbase as usize)) && X8664Arch::virt_is_valid(VirtualAddress::new(regs.gsbase as usize))) {
|
||||
return Err(Error::new(EINVAL));
|
||||
}
|
||||
|
||||
if info.pid == context::context_id() {
|
||||
#[cfg(not(feature = "x86_fsgsbase"))]
|
||||
unsafe {
|
||||
x86::msr::wrmsr(x86::msr::IA32_FS_BASE, regs.fsbase);
|
||||
// We have to write to KERNEL_GSBASE, because when the kernel returns to
|
||||
// userspace, it will have executed SWAPGS first.
|
||||
x86::msr::wrmsr(x86::msr::IA32_KERNEL_GSBASE, regs.gsbase);
|
||||
|
||||
match context::contexts().current().ok_or(Error::new(ESRCH))?.write().arch {
|
||||
ref mut arch => {
|
||||
arch.fsbase = regs.fsbase as usize;
|
||||
arch.gsbase = regs.gsbase as usize;
|
||||
}
|
||||
}
|
||||
}
|
||||
#[cfg(feature = "x86_fsgsbase")]
|
||||
unsafe {
|
||||
use x86::bits64::segmentation::*;
|
||||
|
||||
wrfsbase(regs.fsbase);
|
||||
swapgs();
|
||||
wrgsbase(regs.gsbase);
|
||||
swapgs();
|
||||
|
||||
// No need to update the current context; with fsgsbase enabled, these
|
||||
// registers are automatically saved and restored.
|
||||
}
|
||||
} else {
|
||||
try_stop_context(info.pid, |context| {
|
||||
context.arch.fsbase = regs.fsbase as usize;
|
||||
context.arch.gsbase = regs.gsbase as usize;
|
||||
Ok(())
|
||||
})?;
|
||||
}
|
||||
Ok(mem::size_of::<EnvRegisters>())
|
||||
}
|
||||
},
|
||||
Operation::Trace => {
|
||||
if buf.len() < mem::size_of::<u64>() {
|
||||
@@ -621,6 +735,7 @@ impl Scheme for ProcScheme {
|
||||
Operation::Memory => "mem",
|
||||
Operation::Regs(RegsKind::Float) => "regs/float",
|
||||
Operation::Regs(RegsKind::Int) => "regs/int",
|
||||
Operation::Regs(RegsKind::Env) => "regs/env",
|
||||
Operation::Trace => "trace",
|
||||
Operation::Static(path) => path,
|
||||
});
|
||||
|
||||
@@ -7,6 +7,7 @@ extern crate syscall;
|
||||
pub use self::syscall::{
|
||||
FloatRegisters,
|
||||
IntRegisters,
|
||||
EnvRegisters,
|
||||
data,
|
||||
error,
|
||||
flag,
|
||||
|
||||
@@ -27,7 +27,7 @@ use crate::scheme::FileHandle;
|
||||
use crate::start::usermode;
|
||||
use crate::syscall::data::{SigAction, Stat};
|
||||
use crate::syscall::error::*;
|
||||
use crate::syscall::flag::{wifcontinued, wifstopped, AT_ENTRY, AT_NULL, AT_PHDR, CloneFlags,
|
||||
use crate::syscall::flag::{wifcontinued, wifstopped, AT_ENTRY, AT_NULL, AT_PHDR, AT_PHENT, AT_PHNUM, CloneFlags,
|
||||
CLONE_FILES, CLONE_FS, CLONE_SIGHAND, CLONE_STACK, CLONE_VFORK, CLONE_VM,
|
||||
MapFlags, PROT_EXEC, PROT_READ, PROT_WRITE, PTRACE_EVENT_CLONE,
|
||||
PTRACE_STOP_EXIT, SigActionFlags, SIG_BLOCK, SIG_DFL, SIG_SETMASK, SIG_UNBLOCK,
|
||||
@@ -57,7 +57,6 @@ pub fn clone(flags: CloneFlags, stack_base: usize) -> Result<ContextId> {
|
||||
let mut image = vec![];
|
||||
let mut stack_opt = None;
|
||||
let mut sigstack_opt = None;
|
||||
let mut tls_opt = None;
|
||||
let grants;
|
||||
let name;
|
||||
let cwd;
|
||||
@@ -202,36 +201,6 @@ pub fn clone(flags: CloneFlags, stack_base: usize) -> Result<ContextId> {
|
||||
sigstack_opt = Some(new_sigstack);
|
||||
}
|
||||
|
||||
if let Some(ref tls) = context.tls {
|
||||
let mut new_tls = context::memory::Tls {
|
||||
master: tls.master,
|
||||
file_size: tls.file_size,
|
||||
mem: context::memory::Memory::new(
|
||||
VirtualAddress::new(crate::USER_TMP_TLS_OFFSET),
|
||||
tls.mem.size(),
|
||||
PageFlags::new().write(true),
|
||||
true
|
||||
),
|
||||
offset: tls.offset,
|
||||
};
|
||||
|
||||
|
||||
if flags.contains(CLONE_VM) {
|
||||
unsafe {
|
||||
new_tls.load();
|
||||
}
|
||||
} else {
|
||||
unsafe {
|
||||
intrinsics::copy(tls.mem.start_address().data() as *const u8,
|
||||
new_tls.mem.start_address().data() as *mut u8,
|
||||
tls.mem.size());
|
||||
}
|
||||
}
|
||||
|
||||
new_tls.mem.remap(tls.mem.flags());
|
||||
tls_opt = Some(new_tls);
|
||||
}
|
||||
|
||||
if flags.contains(CLONE_VM) {
|
||||
grants = Arc::clone(&context.grants);
|
||||
} else {
|
||||
@@ -352,6 +321,14 @@ pub fn clone(flags: CloneFlags, stack_base: usize) -> Result<ContextId> {
|
||||
|
||||
context.arch = arch;
|
||||
|
||||
// This is needed because these registers may have changed after this context was
|
||||
// switched to, but before this was called.
|
||||
#[cfg(all(target_arch = "x86_64", feature = "x86_fsgsbase"))]
|
||||
unsafe {
|
||||
context.arch.fsbase = x86::bits64::segmentation::rdfsbase() as usize;
|
||||
context.arch.gsbase = x86::bits64::segmentation::rdgsbase() as usize;
|
||||
}
|
||||
|
||||
let mut active_utable = unsafe { ActivePageTable::new(TableKind::User) };
|
||||
let mut active_ktable = unsafe { ActivePageTable::new(TableKind::Kernel) };
|
||||
|
||||
@@ -378,10 +355,6 @@ pub fn clone(flags: CloneFlags, stack_base: usize) -> Result<ContextId> {
|
||||
let mut new_ktable = unsafe {
|
||||
InactivePageTable::from_address(new_utable.address())
|
||||
};
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{
|
||||
context.arch.update_tcb(pid.into());
|
||||
}
|
||||
|
||||
// Copy kernel image mapping
|
||||
{
|
||||
@@ -502,15 +475,6 @@ pub fn clone(flags: CloneFlags, stack_base: usize) -> Result<ContextId> {
|
||||
context.sigstack = Some(sigstack);
|
||||
}
|
||||
|
||||
// Set up TCB
|
||||
let tcb_addr = crate::USER_TCB_OFFSET + context.id.into() * PAGE_SIZE;
|
||||
let mut tcb = context::memory::Memory::new(
|
||||
VirtualAddress::new(tcb_addr),
|
||||
PAGE_SIZE,
|
||||
PageFlags::new().write(true).user(true),
|
||||
true
|
||||
);
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
{
|
||||
if let Some(stack) = &mut context.kstack {
|
||||
@@ -534,38 +498,6 @@ pub fn clone(flags: CloneFlags, stack_base: usize) -> Result<ContextId> {
|
||||
}
|
||||
}
|
||||
|
||||
// Setup user TLS
|
||||
if let Some(mut tls) = tls_opt {
|
||||
// Copy TLS mapping
|
||||
{
|
||||
let frame = active_utable.p4()[crate::USER_TLS_PML4].pointed_frame().expect("user tls not mapped");
|
||||
let flags = active_utable.p4()[crate::USER_TLS_PML4].flags();
|
||||
active_utable.with(&mut new_utable, &mut temporary_upage, |mapper| {
|
||||
mapper.p4_mut()[crate::USER_TLS_PML4].set(frame, flags);
|
||||
});
|
||||
}
|
||||
|
||||
// TODO: Make sure size is not greater than USER_TLS_SIZE
|
||||
let tls_addr = crate::USER_TLS_OFFSET + context.id.into() * crate::USER_TLS_SIZE;
|
||||
//println!("{}: Copy TLS: address 0x{:x}, size 0x{:x}", context.id.into(), tls_addr, tls.mem.size());
|
||||
tls.mem.move_to(VirtualAddress::new(tls_addr), &mut new_utable, &mut temporary_upage);
|
||||
unsafe {
|
||||
*(tcb_addr as *mut usize) = tls.mem.start_address().data() + tls.mem.size();
|
||||
}
|
||||
context.tls = Some(tls);
|
||||
} else {
|
||||
//println!("{}: Copy TCB", context.id.into());
|
||||
let parent_tcb_addr = crate::USER_TCB_OFFSET + ppid.into() * PAGE_SIZE;
|
||||
unsafe {
|
||||
intrinsics::copy(parent_tcb_addr as *const u8,
|
||||
tcb_addr as *mut u8,
|
||||
tcb.size());
|
||||
}
|
||||
}
|
||||
|
||||
tcb.move_to(VirtualAddress::new(tcb_addr), &mut new_utable, &mut temporary_upage);
|
||||
context.image.push(tcb.to_shared());
|
||||
|
||||
context.name = name;
|
||||
|
||||
context.cwd = cwd;
|
||||
@@ -599,13 +531,11 @@ fn empty(context: &mut context::Context, reaping: bool) {
|
||||
assert!(context.image.is_empty());
|
||||
assert!(context.stack.is_none());
|
||||
assert!(context.sigstack.is_none());
|
||||
assert!(context.tls.is_none());
|
||||
} else {
|
||||
// Unmap previous image, heap, grants, stack, and tls
|
||||
// Unmap previous image, heap, grants, stack
|
||||
context.image.clear();
|
||||
drop(context.stack.take());
|
||||
drop(context.sigstack.take());
|
||||
drop(context.tls.take());
|
||||
}
|
||||
|
||||
// NOTE: If we do not replace the grants `Arc`, then a strange situation can appear where the
|
||||
@@ -651,10 +581,12 @@ impl Drop for ExecFile {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn fexec_noreturn(
|
||||
setuid: Option<u32>,
|
||||
setgid: Option<u32>,
|
||||
name: Box<str>,
|
||||
phdrs_region: core::ops::Range<usize>,
|
||||
data: Box<[u8]>,
|
||||
args: Box<[Box<[u8]>]>,
|
||||
vars: Box<[Box<[u8]>]>,
|
||||
@@ -664,6 +596,11 @@ fn fexec_noreturn(
|
||||
let singlestep;
|
||||
let mut sp = crate::USER_STACK_OFFSET + crate::USER_STACK_SIZE - 256;
|
||||
|
||||
let phdrs_len = 4096;
|
||||
let phdrs_base_addr = sp - phdrs_len;
|
||||
|
||||
sp -= phdrs_len;
|
||||
|
||||
{
|
||||
let (vfork, ppid, files) = {
|
||||
let contexts = context::contexts();
|
||||
@@ -678,6 +615,25 @@ fn fexec_noreturn(
|
||||
|
||||
empty(&mut context, false);
|
||||
|
||||
#[cfg(all(target_arch = "x86_64"))]
|
||||
{
|
||||
context.arch.fsbase = 0;
|
||||
context.arch.gsbase = 0;
|
||||
|
||||
#[cfg(feature = "x86_fsgsbase")]
|
||||
unsafe {
|
||||
x86::bits64::segmentation::wrfsbase(0);
|
||||
x86::bits64::segmentation::swapgs();
|
||||
x86::bits64::segmentation::wrgsbase(0);
|
||||
x86::bits64::segmentation::swapgs();
|
||||
}
|
||||
#[cfg(not(feature = "x86_fsgsbase"))]
|
||||
unsafe {
|
||||
x86::msr::wrmsr(x86::msr::IA32_FS_BASE, 0);
|
||||
x86::msr::wrmsr(x86::msr::IA32_KERNEL_GSBASE, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(uid) = setuid {
|
||||
context.euid = uid;
|
||||
}
|
||||
@@ -687,20 +643,10 @@ fn fexec_noreturn(
|
||||
}
|
||||
|
||||
// Map and copy new segments
|
||||
let mut tls_opt = None;
|
||||
{
|
||||
let elf = elf::Elf::from(&data).unwrap();
|
||||
entry = elf.entry();
|
||||
|
||||
// Always map TCB
|
||||
let tcb_addr = crate::USER_TCB_OFFSET + context.id.into() * PAGE_SIZE;
|
||||
let tcb_mem = context::memory::Memory::new(
|
||||
VirtualAddress::new(tcb_addr),
|
||||
PAGE_SIZE,
|
||||
PageFlags::new().write(true).user(true),
|
||||
true
|
||||
);
|
||||
|
||||
for segment in elf.segments() {
|
||||
match segment.p_type {
|
||||
program_header::PT_LOAD => {
|
||||
@@ -734,45 +680,11 @@ fn fexec_noreturn(
|
||||
|
||||
context.image.push(memory.to_shared());
|
||||
},
|
||||
program_header::PT_TLS => {
|
||||
let aligned_size = if segment.p_align > 0 {
|
||||
((segment.p_memsz + (segment.p_align - 1))/segment.p_align) * segment.p_align
|
||||
} else {
|
||||
segment.p_memsz
|
||||
} as usize;
|
||||
let rounded_size = ((aligned_size + PAGE_SIZE - 1)/PAGE_SIZE) * PAGE_SIZE;
|
||||
let rounded_offset = rounded_size - aligned_size;
|
||||
|
||||
// TODO: Make sure size is not greater than USER_TLS_SIZE
|
||||
let tls_addr = crate::USER_TLS_OFFSET + context.id.into() * crate::USER_TLS_SIZE;
|
||||
let tls = context::memory::Tls {
|
||||
master: VirtualAddress::new(segment.p_vaddr as usize),
|
||||
file_size: segment.p_filesz as usize,
|
||||
mem: context::memory::Memory::new(
|
||||
VirtualAddress::new(tls_addr),
|
||||
rounded_size as usize,
|
||||
PageFlags::new().write(true).user(true),
|
||||
true
|
||||
),
|
||||
offset: rounded_offset as usize,
|
||||
};
|
||||
|
||||
unsafe {
|
||||
*(tcb_addr as *mut usize) = tls.mem.start_address().data() + tls.mem.size();
|
||||
}
|
||||
|
||||
tls_opt = Some(tls);
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
context.image.push(tcb_mem.to_shared());
|
||||
}
|
||||
|
||||
// Data no longer required, can deallocate
|
||||
drop(data);
|
||||
|
||||
// Map stack
|
||||
context.stack = Some(context::memory::Memory::new(
|
||||
VirtualAddress::new(crate::USER_STACK_OFFSET),
|
||||
@@ -789,20 +701,19 @@ fn fexec_noreturn(
|
||||
true
|
||||
));
|
||||
|
||||
// Map TLS
|
||||
if let Some(mut tls) = tls_opt {
|
||||
unsafe {
|
||||
tls.load();
|
||||
}
|
||||
|
||||
context.tls = Some(tls);
|
||||
}
|
||||
|
||||
let mut push = |arg| {
|
||||
sp -= mem::size_of::<usize>();
|
||||
unsafe { *(sp as *mut usize) = arg; }
|
||||
};
|
||||
|
||||
unsafe {
|
||||
let mut source = core::slice::from_raw_parts_mut(phdrs_base_addr as *mut u8, phdrs_len);
|
||||
source[..phdrs_region.len()].copy_from_slice(&data[phdrs_region.clone()]);
|
||||
}
|
||||
|
||||
// Data no longer required, can deallocate
|
||||
drop(data);
|
||||
|
||||
// Push auxiliary vector
|
||||
push(AT_NULL);
|
||||
for &arg in auxv.iter().rev() {
|
||||
@@ -1019,7 +930,11 @@ pub fn fexec_kernel(fd: FileHandle, args: Box<[Box<[u8]>]>, vars: Box<[Box<[u8]>
|
||||
auxv.push(AT_ENTRY);
|
||||
auxv.push(elf.entry());
|
||||
auxv.push(AT_PHDR);
|
||||
auxv.push(elf.program_headers());
|
||||
auxv.push(crate::USER_STACK_OFFSET + crate::USER_STACK_SIZE - 256 - 4096);
|
||||
auxv.push(AT_PHENT);
|
||||
auxv.push(elf.program_headers_size());
|
||||
auxv.push(AT_PHNUM);
|
||||
auxv.push(elf.program_header_count());
|
||||
|
||||
auxv
|
||||
};
|
||||
@@ -1068,26 +983,19 @@ pub fn fexec_kernel(fd: FileHandle, args: Box<[Box<[u8]>]>, vars: Box<[Box<[u8]>
|
||||
Some(auxv),
|
||||
);
|
||||
},
|
||||
program_header::PT_LOAD => {
|
||||
let voff = segment.p_vaddr as usize % PAGE_SIZE;
|
||||
let vaddr = segment.p_vaddr as usize - voff;
|
||||
|
||||
// Due to the Userspace and kernel TLS bases being located right above 2GB,
|
||||
// limit any loadable sections to lower than that. Eventually we will need
|
||||
// to replace this with a more intelligent TLS address
|
||||
if vaddr >= 0x8000_0000 {
|
||||
println!("exec: invalid section address {:X}", segment.p_vaddr);
|
||||
return Err(Error::new(ENOEXEC));
|
||||
}
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
let phdr_range = elf.program_headers()..elf.program_headers() + elf.program_headers_size() * elf.program_header_count();
|
||||
if phdr_range.len() > 4096 {
|
||||
return Err(Error::new(ENOMEM));
|
||||
}
|
||||
|
||||
// This is the point of no return, quite literaly. Any checks for validity need
|
||||
// to be done before, and appropriate errors returned. Otherwise, we have nothing
|
||||
// to return to.
|
||||
fexec_noreturn(setuid, setgid, name.into_boxed_str(), data.into_boxed_slice(), args, vars, auxv.into_boxed_slice());
|
||||
fexec_noreturn(setuid, setgid, name.into_boxed_str(), phdr_range, data.into_boxed_slice(), args, vars, auxv.into_boxed_slice());
|
||||
}
|
||||
|
||||
pub fn fexec(fd: FileHandle, arg_ptrs: &[[usize; 2]], var_ptrs: &[[usize; 2]]) -> Result<usize> {
|
||||
|
||||
2
syscall
2
syscall
Submodule syscall updated: 841b5f4221...519a09e964
Reference in New Issue
Block a user