WIP: Remove SYS_CLONE (to be done in userspace).

This commit is contained in:
4lDO2
2022-07-04 10:42:04 +02:00
parent 563121596d
commit 283ada82a0
20 changed files with 380 additions and 909 deletions

View File

@@ -1,3 +1,4 @@
use rmm::Flusher;
use crate::paging::{ActivePageTable, Page, PageFlags, VirtualAddress, mapper::PageFlushAll, entry::EntryFlags};
#[cfg(not(feature="slab"))]
@@ -13,7 +14,7 @@ mod linked_list;
mod slab;
unsafe fn map_heap(active_table: &mut ActivePageTable, offset: usize, size: usize) {
let flush_all = PageFlushAll::new();
let mut flush_all = PageFlushAll::new();
let heap_start_page = Page::containing_address(VirtualAddress::new(offset));
let heap_end_page = Page::containing_address(VirtualAddress::new(offset + size-1));

View File

@@ -160,23 +160,3 @@ interrupt_stack!(syscall, |stack| {
syscall::syscall(scratch.rax, stack.preserved.rbx, scratch.rcx, scratch.rdx, scratch.rsi, scratch.rdi, rbp, stack)
})
});
#[naked]
pub unsafe extern "C" fn clone_ret() {
core::arch::asm!(concat!(
// The address of this instruction is injected by `clone` in process.rs, on
// top of the stack syscall->inner in this file, which is done using the rbp
// register we save there.
//
// The top of our stack here is the address pointed to by rbp, which is:
//
// - the previous rbp
// - the return location
//
// Our goal is to return from the parent function, inner, so we restore
// rbp...
"pop rbp\n",
// ...and we return to the address at the top of the stack
"ret\n",
), options(noreturn));
}

View File

@@ -1,13 +1,15 @@
use super::{linear_phys_to_virt, Page, PAGE_SIZE, PageFlags, PhysicalAddress, VirtualAddress};
use crate::ipi::{ipi, IpiKind, IpiTarget};
use crate::memory::{allocate_frames, deallocate_frames, Enomem, Frame};
use super::RmmA;
use super::table::{Table, Level4};
pub use rmm::{PageFlush, PageFlushAll};
pub use rmm::{Flusher, PageFlush, PageFlushAll};
pub struct Mapper<'table> {
p4: &'table mut Table<Level4>,
pub(in super) p4: &'table mut Table<Level4>,
}
impl core::fmt::Debug for Mapper<'_> {
@@ -192,3 +194,21 @@ impl<'table> Mapper<'table> {
.map(|frame| PhysicalAddress::new(frame.start_address().data() + offset))
}
}
pub struct InactiveFlusher { _inner: () }
impl InactiveFlusher {
// TODO: cpu id
pub fn new() -> Self { Self { _inner: () } }
}
impl Flusher<RmmA> for InactiveFlusher {
fn consume(&mut self, flush: PageFlush<RmmA>) {
// TODO: Push to TLB "mailbox" or tell it to reload CR3 if there are too many entries.
unsafe { flush.ignore(); }
}
}
impl Drop for InactiveFlusher {
fn drop(&mut self) {
ipi(IpiKind::Tlb, IpiTarget::Other);
}
}

View File

@@ -14,6 +14,7 @@ use self::table::{Level4, Table};
pub use rmm::{
Arch as RmmArch,
Flusher,
PageFlags,
PhysicalAddress,
TableKind,
@@ -112,7 +113,7 @@ unsafe fn map_percpu(cpu_id: usize, mapper: &mut Mapper) -> PageFlushAll<RmmA> {
let start = crate::KERNEL_PERCPU_OFFSET + crate::KERNEL_PERCPU_SIZE * cpu_id;
let end = start + size;
let flush_all = PageFlushAll::new();
let mut flush_all = PageFlushAll::new();
let start_page = Page::containing_address(VirtualAddress::new(start));
let end_page = Page::containing_address(VirtualAddress::new(end - 1));
for page in Page::range_inclusive(start_page, end_page) {
@@ -288,6 +289,11 @@ impl ActivePageTable {
pub unsafe fn address(&self) -> usize {
RmmA::table().data()
}
pub fn mapper<'a>(&'a mut self) -> Mapper<'a> {
Mapper {
p4: self.p4,
}
}
}
impl Drop for ActivePageTable {

View File

@@ -16,13 +16,13 @@ use crate::arch::{interrupt::InterruptStack, paging::PAGE_SIZE};
use crate::common::unique::Unique;
use crate::context::arch;
use crate::context::file::{FileDescriptor, FileDescription};
use crate::context::memory::UserGrants;
use crate::context::memory::{AddrSpace, new_addrspace, UserGrants};
use crate::ipi::{ipi, IpiKind, IpiTarget};
use crate::scheme::{SchemeNamespace, FileHandle};
use crate::sync::WaitMap;
use crate::syscall::data::SigAction;
use crate::syscall::error::{Result, Error, ENOMEM};
use crate::syscall::error::{Result, Error, ENOMEM, ESRCH};
use crate::syscall::flag::{SIG_DFL, SigActionFlags};
/// Unique identifier for a context (i.e. `pid`).
@@ -226,8 +226,9 @@ pub struct Context {
pub ksig: Option<(arch::Context, Option<Box<[u8]>>, Option<Box<[u8]>>, u8)>,
/// Restore ksig context on next switch
pub ksig_restore: bool,
/// User grants
pub grants: Arc<RwLock<UserGrants>>,
/// Address space containing a page table lock, and grants. Normally this will have a value,
/// but can be None while the context is being reaped.
pub addr_space: Option<Arc<RwLock<AddrSpace>>>,
/// The name of the context
pub name: Arc<RwLock<Box<str>>>,
/// The current working directory
@@ -307,7 +308,7 @@ impl Context {
let syscall_head = AlignedBox::try_zeroed()?;
let syscall_tail = AlignedBox::try_zeroed()?;
Ok(Context {
let mut this = Context {
id,
pgid: id,
ppid: ContextId::from(0),
@@ -336,7 +337,7 @@ impl Context {
kstack: None,
ksig: None,
ksig_restore: false,
grants: Arc::new(RwLock::new(UserGrants::default())),
addr_space: None,
name: Arc::new(RwLock::new(String::new().into_boxed_str())),
cwd: Arc::new(RwLock::new(String::new())),
files: Arc::new(RwLock::new(Vec::new())),
@@ -351,7 +352,9 @@ impl Context {
regs: None,
ptrace_stop: false,
sigstack: None,
})
};
this.set_addr_space(new_addrspace()?.1);
Ok(this)
}
/// Make a relative path absolute
@@ -520,4 +523,13 @@ impl Context {
None
}
}
pub fn addr_space(&self) -> Result<&Arc<RwLock<AddrSpace>>> {
self.addr_space.as_ref().ok_or(Error::new(ESRCH))
}
pub fn set_addr_space(&mut self, addr_space: Arc<RwLock<AddrSpace>>) {
assert!(!self.running);
self.arch.set_page_utable(addr_space.read().frame.utable.start_address().data());
self.addr_space = Some(addr_space);
}
}

View File

@@ -7,7 +7,7 @@ use core::sync::atomic::Ordering;
use crate::paging::{ActivePageTable, TableKind};
use spin::RwLock;
use crate::syscall::error::{Result, Error, EAGAIN};
use crate::syscall::error::{Result, Error, EAGAIN, ENOMEM};
use super::context::{Context, ContextId};
/// Context list type
@@ -79,7 +79,11 @@ impl ContextList {
let context_lock = self.new_context()?;
{
let mut context = context_lock.write();
let mut fx = unsafe { Box::from_raw(crate::ALLOCATOR.alloc(Layout::from_size_align_unchecked(1024, 16)) as *mut [u8; 1024]) };
let mut fx = unsafe {
let ptr = crate::ALLOCATOR.alloc(Layout::from_size_align_unchecked(1024, 16)) as *mut [u8; 1024];
if ptr.is_null() { return Err(Error::new(ENOMEM)); }
Box::from_raw(ptr)
};
for b in fx.iter_mut() {
*b = 0;
}
@@ -100,13 +104,6 @@ impl ContextList {
context.arch.set_context_handle();
}
let mut new_tables = super::memory::setup_new_utable()?;
new_tables.take();
context.arch.set_page_utable(unsafe { new_tables.new_utable.address() });
#[cfg(target_arch = "aarch64")]
context.arch.set_page_ktable(unsafe { new_tables.new_ktable.address() });
context.arch.set_fx(fx.as_ptr() as usize);
context.arch.set_stack(stack.as_ptr() as usize + offset);
context.kfx = Some(fx);

View File

@@ -5,7 +5,8 @@ use core::cmp::{self, Eq, Ordering, PartialEq, PartialOrd};
use core::fmt::{self, Debug};
use core::intrinsics;
use core::ops::Deref;
use spin::Mutex;
use core::sync::atomic;
use spin::{Mutex, RwLock};
use syscall::{
flag::MapFlags,
error::*,
@@ -14,9 +15,8 @@ use rmm::Arch as _;
use crate::arch::paging::PAGE_SIZE;
use crate::context::file::FileDescriptor;
use crate::ipi::{ipi, IpiKind, IpiTarget};
use crate::memory::Frame;
use crate::paging::mapper::PageFlushAll;
use crate::paging::mapper::{Flusher, InactiveFlusher, Mapper, PageFlushAll};
use crate::paging::{ActivePageTable, InactivePageTable, Page, PageFlags, PageIter, PhysicalAddress, RmmA, TableKind, VirtualAddress};
/// Round down to the nearest multiple of page size
@@ -47,6 +47,76 @@ impl Drop for UnmapResult {
}
}
int_like!(PtId, usize);
static ADDRSPACES: RwLock<BTreeMap<PtId, Arc<RwLock<AddrSpace>>>> = RwLock::new(BTreeMap::new());
static NEXT_PTID: atomic::AtomicUsize = atomic::AtomicUsize::new(1);
pub fn new_addrspace() -> Result<(PtId, Arc<RwLock<AddrSpace>>)> {
let id = PtId::from(NEXT_PTID.fetch_add(1, atomic::Ordering::Relaxed));
let arc = Arc::try_new(RwLock::new(AddrSpace::new(id)?)).map_err(|_| Error::new(ENOMEM))?;
ADDRSPACES.write().insert(id, Arc::clone(&arc));
Ok((id, arc))
}
pub fn addrspace(id: PtId) -> Option<Arc<RwLock<AddrSpace>>> {
ADDRSPACES.read().get(&id).map(Arc::clone)
}
#[derive(Debug)]
pub struct AddrSpace {
pub frame: Tables,
pub grants: UserGrants,
pub id: PtId,
}
impl AddrSpace {
/// Attempt to clone an existing address space so that all mappings are copied (CoW).
// TODO: Actually use CoW!
pub fn try_clone(&self) -> Result<(PtId, Arc<RwLock<Self>>)> {
let (id, mut new) = new_addrspace()?;
// TODO: Abstract away this.
let (mut inactive, mut active);
// TODO: aarch64
let mut this_mapper = if self.frame.utable.start_address().data() == unsafe { x86::controlregs::cr3() } as usize {
active = unsafe { ActivePageTable::new(rmm::TableKind::User) };
active.mapper()
} else {
inactive = unsafe { InactivePageTable::from_address(self.frame.utable.start_address().data()) };
inactive.mapper()
};
let mut new_mapper = unsafe { InactivePageTable::from_address(new.read().frame.utable.start_address().data()) };
for grant in self.grants.iter() {
// TODO: Fail if there are borrowed grants, rather than simply ignoring them?
if !grant.is_owned() { continue; }
let new_grant = Grant::zeroed(Page::containing_address(grant.start_address()), grant.size() / PAGE_SIZE, grant.flags(), &mut new_mapper.mapper(), ())?;
for page in new_grant.pages() {
// FIXME: ENOMEM is wrong here, it cannot fail.
let current_frame = this_mapper.translate_page(page).ok_or(Error::new(ENOMEM))?.start_address().data() as *const u8;
let new_frame = new_mapper.mapper().translate_page(page).ok_or(Error::new(ENOMEM))?.start_address().data() as *mut u8;
// TODO: Replace this with CoW
unsafe {
new_frame.copy_from_nonoverlapping(current_frame, PAGE_SIZE);
}
}
new.write().grants.insert(new_grant);
}
Ok((id, new))
}
pub fn new(id: PtId) -> Result<Self> {
Ok(Self {
grants: UserGrants::new(),
frame: setup_new_utable()?,
id,
})
}
}
#[derive(Debug)]
pub struct UserGrants {
inner: BTreeSet<Grant>,
@@ -406,7 +476,7 @@ impl Grant {
pub fn physmap(from: PhysicalAddress, to: VirtualAddress, size: usize, flags: PageFlags<RmmA>) -> Grant {
let mut active_table = unsafe { ActivePageTable::new(to.kind()) };
let flush_all = PageFlushAll::new();
let mut flush_all = PageFlushAll::new();
let start_page = Page::containing_address(to);
let end_page = Page::containing_address(VirtualAddress::new(to.data() + size - 1));
@@ -429,40 +499,10 @@ impl Grant {
desc_opt: None,
}
}
pub fn map(to: VirtualAddress, size: usize, flags: PageFlags<RmmA>) -> Grant {
let mut active_table = unsafe { ActivePageTable::new(to.kind()) };
let flush_all = PageFlushAll::new();
let start_page = Page::containing_address(to);
let end_page = Page::containing_address(VirtualAddress::new(to.data() + size - 1));
for page in Page::range_inclusive(start_page, end_page) {
let result = active_table
.map(page, flags)
.expect("TODO: handle ENOMEM in Grant::map");
flush_all.consume(result);
}
flush_all.flush();
Grant {
region: Region {
start: to,
size,
},
flags,
mapped: true,
owned: true,
desc_opt: None,
}
}
pub fn zeroed_inactive(dst: Page, page_count: usize, flags: PageFlags<RmmA>, table: &mut InactivePageTable) -> Result<Grant> {
let mut inactive_mapper = table.mapper();
pub fn zeroed(dst: Page, page_count: usize, flags: PageFlags<RmmA>, mapper: &mut Mapper, mut flusher: impl Flusher<RmmA>) -> Result<Grant> {
for page in Page::range_exclusive(dst, dst.next_by(page_count)) {
let flush = inactive_mapper.map(page, flags).map_err(|_| Error::new(ENOMEM))?;
unsafe { flush.ignore(); }
let flush = mapper.map(page, flags).map_err(|_| Error::new(ENOMEM))?;
flusher.consume(flush);
}
Ok(Grant { region: Region { start: dst.start_address(), size: page_count * PAGE_SIZE }, flags, mapped: true, owned: true, desc_opt: None })
}
@@ -487,8 +527,6 @@ impl Grant {
unsafe { inactive_flush.ignore(); }
}
ipi(IpiKind::Tlb, IpiTarget::Other);
Grant {
region: Region {
start: dst,
@@ -501,97 +539,22 @@ impl Grant {
}
}
/// This function should only be used in clone!
pub(crate) fn secret_clone(&self, inactive_table: &mut InactivePageTable) -> Grant {
assert!(self.mapped);
let active_table = unsafe { ActivePageTable::new(TableKind::User) };
let mut inactive_mapper = inactive_table.mapper();
for page in self.pages() {
//TODO: One function to do both?
let flags = active_table.translate_page_flags(page).expect("grant references unmapped memory");
let old_frame = active_table.translate_page(page).expect("grant references unmapped memory");
let frame = if self.owned {
// TODO: CoW paging
let new_frame = crate::memory::allocate_frames(1)
.expect("TODO: handle ENOMEM in Grant::secret_clone");
unsafe {
// We might as well use self.start_address() directly, but if we were to
// introduce SMAP it would help to only move to/from kernel memory, and we are
// copying physical frames anyway.
let src_pointer = RmmA::phys_to_virt(old_frame.start_address()).data() as *const u8;
let dst_pointer = RmmA::phys_to_virt(new_frame.start_address()).data() as *mut u8;
dst_pointer.copy_from_nonoverlapping(src_pointer, PAGE_SIZE);
}
new_frame
} else {
old_frame
};
let flush = inactive_mapper.map_to(page, frame, flags);
// SAFETY: This happens within an inactive table.
unsafe { flush.ignore() }
}
Grant {
region: Region {
start: self.region.start,
size: self.region.size,
},
flags: self.flags,
mapped: true,
owned: self.owned,
desc_opt: self.desc_opt.clone()
}
}
pub fn flags(&self) -> PageFlags<RmmA> {
self.flags
}
pub fn unmap(mut self) -> UnmapResult {
pub fn unmap(mut self, mapper: &mut Mapper, mut flusher: impl Flusher<RmmA>) -> UnmapResult {
assert!(self.mapped);
let mut active_table = unsafe { ActivePageTable::new(self.start_address().kind()) };
let flush_all = PageFlushAll::new();
for page in self.pages() {
let (result, frame) = active_table.unmap_return(page, false);
let (result, frame) = mapper.unmap_return(page, false);
if self.owned {
//TODO: make sure this frame can be safely freed, physical use counter
crate::memory::deallocate_frames(frame, 1);
}
flush_all.consume(result);
flusher.consume(result);
}
flush_all.flush();
self.mapped = false;
// TODO: This imposes a large cost on unmapping, but that cost cannot be avoided without modifying fmap and funmap
UnmapResult { file_desc: self.desc_opt.take() }
}
pub fn unmap_inactive(mut self, other_table: &mut InactivePageTable) -> UnmapResult {
assert!(self.mapped);
for page in self.pages() {
let (result, frame) = other_table.mapper().unmap_return(page, false);
if self.owned {
//TODO: make sure this frame can be safely freed, physical use counter
crate::memory::deallocate_frames(frame, 1);
}
// This is not the active table, so the flush can be ignored
unsafe { result.ignore(); }
}
ipi(IpiKind::Tlb, IpiTarget::Other);
self.mapped = false;
// TODO: This imposes a large cost on unmapping, but that cost cannot be avoided without modifying fmap and funmap
@@ -636,34 +599,6 @@ impl Grant {
Some((before_grant, self, after_grant))
}
pub fn move_to_address_space(&mut self, new_start: Page, new_page_table: &mut InactivePageTable, flags: PageFlags<RmmA>, flush_all: &mut PageFlushAll<RmmA>) -> Grant {
assert!(self.mapped);
let mut active_table = unsafe { ActivePageTable::new(TableKind::User) };
let mut new_mapper = new_page_table.mapper();
let keep_parents = false;
for (i, page) in self.pages().enumerate() {
unsafe {
let (flush, frame) = active_table.unmap_return(page, keep_parents);
flush_all.consume(flush);
let flush = new_mapper.map_to(new_start.next_by(i), frame, flags);
flush.ignore();
}
}
let was_owned = core::mem::replace(&mut self.owned, false);
self.mapped = false;
Self {
region: Region::new(new_start.start_address(), self.region.size),
flags,
mapped: true,
owned: was_owned,
desc_opt: self.desc_opt.clone(),
}
}
}
impl Deref for Grant {
@@ -704,79 +639,68 @@ impl Drop for Grant {
pub const DANGLING: usize = 1 << (usize::BITS - 2);
pub struct NewTables {
#[derive(Debug)]
pub struct Tables {
#[cfg(target_arch = "aarch64")]
pub new_ktable: InactivePageTable,
pub new_utable: InactivePageTable,
pub ktable: Frame,
taken: bool,
}
impl NewTables {
pub fn take(&mut self) {
self.taken = true;
}
pub utable: Frame,
}
impl Drop for NewTables {
impl Drop for Tables {
fn drop(&mut self) {
if self.taken { return }
use crate::memory::deallocate_frames;
deallocate_frames(Frame::containing_address(PhysicalAddress::new(self.utable.start_address().data())), 1);
unsafe {
use crate::memory::deallocate_frames;
deallocate_frames(Frame::containing_address(PhysicalAddress::new(self.new_utable.address())), 1);
#[cfg(target_arch = "aarch64")]
deallocate_frames(Frame::containing_address(PhysicalAddress::new(self.new_ktable.address())), 1);
}
#[cfg(target_arch = "aarch64")]
deallocate_frames(Frame::containing_address(PhysicalAddress::new(self.ktable.start_address().data())), 1);
}
}
/// Allocates a new identically mapped ktable and empty utable (same memory on x86_64).
pub fn setup_new_utable() -> Result<NewTables> {
let mut new_utable = unsafe { InactivePageTable::new(crate::memory::allocate_frames(1).ok_or(Error::new(ENOMEM))?) };
pub fn setup_new_utable() -> Result<Tables> {
let mut new_utable = crate::memory::allocate_frames(1).ok_or(Error::new(ENOMEM))?;
let mut new_ktable = if cfg!(target_arch = "aarch64") {
unsafe { InactivePageTable::new(crate::memory::allocate_frames(1).ok_or(Error::new(ENOMEM))?) }
} else {
unsafe { InactivePageTable::from_address(new_utable.address()) }
};
// TODO: There is only supposed to be one ktable, right? Use a global variable to store the
// ktable (or access it from a control register) on architectures which have ktables, or obtain
// it from *any* utable on architectures which do not.
#[cfg(target_arch = "aarch64")]
let new_ktable = crate::memory::allocate_frames(1).ok_or(Error::new(ENOMEM))?;
let active_ktable = unsafe { ActivePageTable::new(TableKind::Kernel) };
// Copy kernel image mapping
{
let frame = active_ktable.p4()[crate::KERNEL_PML4].pointed_frame().expect("kernel image not mapped");
let flags = active_ktable.p4()[crate::KERNEL_PML4].flags();
#[cfg(target_arch = "aarch64")]
let ktable = &new_ktable;
new_ktable.mapper().p4_mut()[crate::KERNEL_PML4].set(frame, flags);
}
#[cfg(not(target_arch = "aarch64"))]
let ktable = &new_utable;
let mut new_mapper = unsafe { InactivePageTable::from_address(ktable.start_address().data()) };
let mut copy_mapping = |p4_no| {
let frame = active_ktable.p4()[p4_no].pointed_frame().expect("kernel image not mapped");
let flags = active_ktable.p4()[p4_no].flags();
new_mapper.mapper().p4_mut()[p4_no].set(frame, flags);
};
// TODO: Just copy all 256 mappings?
// Copy kernel image mapping
copy_mapping(crate::KERNEL_PML4);
// Copy kernel heap mapping
{
let frame = active_ktable.p4()[crate::KERNEL_HEAP_PML4].pointed_frame().expect("kernel heap not mapped");
let flags = active_ktable.p4()[crate::KERNEL_HEAP_PML4].flags();
new_ktable.mapper().p4_mut()[crate::KERNEL_HEAP_PML4].set(frame, flags);
}
copy_mapping(crate::KERNEL_HEAP_PML4);
// Copy physmap mapping
{
let frame = active_ktable.p4()[crate::PHYS_PML4].pointed_frame().expect("physmap not mapped");
let flags = active_ktable.p4()[crate::PHYS_PML4].flags();
new_ktable.mapper().p4_mut()[crate::PHYS_PML4].set(frame, flags);
}
// Copy kernel percpu (similar to TLS) mapping.
{
let frame = active_ktable.p4()[crate::KERNEL_PERCPU_PML4].pointed_frame().expect("kernel TLS not mapped");
let flags = active_ktable.p4()[crate::KERNEL_PERCPU_PML4].flags();
new_ktable.mapper().p4_mut()[crate::KERNEL_PERCPU_PML4].set(frame, flags);
}
copy_mapping(crate::PHYS_PML4);
Ok(NewTables {
taken: false,
new_utable,
// Copy kernel percpu (similar to TLS) mapping.
copy_mapping(crate::KERNEL_PERCPU_PML4);
Ok(Tables {
utable: new_utable,
#[cfg(target_arch = "aarch64")]
new_ktable,
ktable: new_ktable,
})
}

View File

@@ -19,11 +19,11 @@ pub unsafe fn debugger() {
if let Some((a, b, c, d, e, f)) = context.syscall {
println!("syscall: {}", crate::syscall::debug::format_call(a, b, c, d, e, f));
}
{
let grants = context.grants.read();
if ! grants.is_empty() {
if let Some(ref addr_space) = context.addr_space {
let addr_space = addr_space.read();
if ! addr_space.grants.is_empty() {
println!("grants:");
for grant in grants.iter() {
for grant in addr_space.grants.iter() {
let region = grant.region();
println!(
" virt 0x{:016x}:0x{:016x} size 0x{:08x} {}",

View File

@@ -190,7 +190,11 @@ pub fn is_traced(pid: ContextId) -> bool {
/// Trigger a notification to the event: scheme
fn proc_trigger_event(file_id: usize, flags: EventFlags) {
event::trigger(proc::PROC_SCHEME_ID.load(Ordering::SeqCst), file_id, flags);
if let Some(scheme_id) = proc::PROC_SCHEME_ID.get() {
event::trigger(*scheme_id, file_id, flags);
} else {
log::warn!("Failed to trigger proc event: scheme never initialized");
}
}
/// Dispatch an event to any tracer tracing `self`. This will cause
@@ -471,6 +475,10 @@ fn page_aligned_chunks(mut start: usize, mut len: usize) -> impl Iterator<Item =
pub fn context_memory(context: &mut Context, offset: VirtualAddress, len: usize) -> impl Iterator<Item = Option<*mut [u8]>> + '_ {
let mut table = unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) };
// TODO: Iterate over grants instead to avoid yielding None too many times. What if
// context_memory is used for an entire process's address space, where the stack is at the very
// end? Alternatively we can skip pages recursively, i.e. first skip unpopulated PML4s and then
// onwards.
page_aligned_chunks(offset.data(), len).map(move |(addr, len)| unsafe {
// [addr,addr+len) is a continuous page starting and/or ending at page boundaries, with the
// possible exception of an unaligned head/tail.

View File

@@ -5,6 +5,7 @@ use alloc::collections::BTreeMap;
use core::{slice, str};
use core::sync::atomic::{AtomicUsize, Ordering};
use spin::RwLock;
use rmm::Flusher;
use syscall::data::Stat;
use syscall::error::*;
@@ -55,7 +56,7 @@ impl DiskScheme {
let virt = phys + crate::PHYS_OFFSET;
unsafe {
let mut active_table = ActivePageTable::new(TableKind::Kernel);
let flush_all = PageFlushAll::new();
let mut flush_all = PageFlushAll::new();
let start_page = Page::containing_address(VirtualAddress::new(virt));
let end_page = Page::containing_address(VirtualAddress::new(virt + size - 1));
for page in Page::range_inclusive(start_page, end_page) {

View File

@@ -1,7 +1,7 @@
use crate::context;
use crate::context::memory::{page_flags, Grant};
use crate::memory::{free_frames, used_frames, PAGE_SIZE};
use crate::paging::{ActivePageTable, VirtualAddress};
use crate::paging::{ActivePageTable, mapper::PageFlushAll, Page, VirtualAddress};
use crate::syscall::data::{Map, OldMap, StatVfs};
use crate::syscall::error::*;
use crate::syscall::flag::MapFlags;
@@ -23,25 +23,11 @@ impl MemoryScheme {
let context_lock = contexts.current().ok_or(Error::new(ESRCH))?;
let context = context_lock.read();
let mut grants = context.grants.write();
let mut addr_space = context.addr_space()?.write();
let region = grants.find_free_at(VirtualAddress::new(map.address), map.size, map.flags)?.round();
let region = addr_space.grants.find_free_at(VirtualAddress::new(map.address), map.size, map.flags)?.round();
{
// Make sure it's *absolutely* not mapped already
// TODO: Keep track of all allocated memory so this isn't necessary
let active_table = unsafe { ActivePageTable::new(rmm::TableKind::User) };
for page in region.pages() {
if let Some(flags) = active_table.translate_page_flags(page).filter(|flags| flags.has_present()) {
println!("page at {:#x} was already mapped, flags: {:?}", page.start_address().data(), flags);
return Err(Error::new(EEXIST))
}
}
}
grants.insert(Grant::map(region.start_address(), region.size(), page_flags(map.flags)));
addr_space.grants.insert(Grant::zeroed(Page::containing_address(region.start_address()), map.size / PAGE_SIZE, page_flags(map.flags), &mut *unsafe { ActivePageTable::new(rmm::TableKind::User) }, PageFlushAll::new())?);
Ok(region.start_address().data())
}

View File

@@ -301,7 +301,7 @@ pub fn schemes_mut() -> RwLockWriteGuard<'static, SchemeList> {
}
pub trait KernelScheme: Scheme + Send + Sync + 'static {
#[allow(unused_arguments)]
#[allow(unused_variables)]
fn kfmap(&self, number: usize, map: &syscall::data::Map, target_context: &Arc<RwLock<Context>>) -> Result<usize> {
log::error!("Returning ENOSYS since kfmap can only be called on UserScheme schemes");
Err(Error::new(ENOSYS))

View File

@@ -1,6 +1,6 @@
use crate::{
arch::paging::{ActivePageTable, InactivePageTable, mapper::{Mapper, PageFlushAll}, Page, VirtualAddress},
context::{self, Context, ContextId, Status, memory::{Grant, page_flags, Region}},
arch::paging::{ActivePageTable, Flusher, InactivePageTable, mapper::{InactiveFlusher, Mapper, PageFlushAll}, Page, RmmA, VirtualAddress},
context::{self, Context, ContextId, Status, memory::{addrspace, Grant, new_addrspace, PtId, page_flags, Region}},
memory::PAGE_SIZE,
ptrace,
scheme::{AtomicSchemeId, SchemeId},
@@ -32,7 +32,7 @@ use core::{
str,
sync::atomic::{AtomicUsize, Ordering},
};
use spin::RwLock;
use spin::{Once, RwLock};
fn read_from(dst: &mut [u8], src: &[u8], offset: &mut usize) -> Result<usize> {
let byte_count = cmp::min(dst.len(), src.len().saturating_sub(*offset));
@@ -68,7 +68,7 @@ where
}
fn try_stop_context<F, T>(pid: ContextId, mut callback: F) -> Result<T>
where
F: FnMut(&mut Context) -> Result<T>,
F: FnOnce(&mut Context) -> Result<T>,
{
if pid == context::context_id() {
return Err(Error::new(EBADF));
@@ -118,6 +118,8 @@ enum Operation {
Sigstack,
Attr(Attr),
Files,
AddrSpace { id: PtId },
CurrentAddrSpace,
}
#[derive(Clone, Copy, PartialEq, Eq)]
enum Attr {
@@ -216,7 +218,7 @@ impl Handle {
}
}
pub static PROC_SCHEME_ID: AtomicSchemeId = AtomicSchemeId::default();
pub static PROC_SCHEME_ID: Once<SchemeId> = Once::new();
pub struct ProcScheme {
next_id: AtomicUsize,
@@ -231,7 +233,7 @@ pub enum Access {
impl ProcScheme {
pub fn new(scheme_id: SchemeId) -> Self {
PROC_SCHEME_ID.store(scheme_id, Ordering::SeqCst);
PROC_SCHEME_ID.call_once(|| scheme_id);
Self {
next_id: AtomicUsize::new(0),
@@ -246,6 +248,11 @@ impl ProcScheme {
access: Access::Restricted,
}
}
fn new_handle(&self, handle: Handle) -> Result<usize> {
let id = self.next_id.fetch_add(1, Ordering::Relaxed);
let _ = self.handles.write().insert(id, handle);
Ok(id)
}
}
impl Scheme for ProcScheme {
@@ -264,7 +271,8 @@ impl Scheme for ProcScheme {
let operation = match parts.next() {
Some("mem") => Operation::Memory,
Some("grants") => Operation::Grants,
Some("addrspace") => Operation::AddrSpace { id: context::contexts().current().ok_or(Error::new(ESRCH))?.read().addr_space()?.read().id },
Some("current-addrspace") => Operation::CurrentAddrSpace,
Some("regs/float") => Operation::Regs(RegsKind::Float),
Some("regs/int") => Operation::Regs(RegsKind::Int),
Some("regs/env") => Operation::Regs(RegsKind::Env),
@@ -340,9 +348,16 @@ impl Scheme for ProcScheme {
}
};
let id = self.next_id.fetch_add(1, Ordering::SeqCst);
let id = self.new_handle(Handle {
info: Info {
flags,
pid,
operation,
},
data,
})?;
if let Operation::Trace { .. } = operation {
if let Operation::Trace = operation {
if !ptrace::try_new_session(pid, id) {
// There is no good way to handle id being occupied for nothing
// here, is there?
@@ -355,44 +370,41 @@ impl Scheme for ProcScheme {
}
}
self.handles.write().insert(id, Handle {
info: Info {
flags,
pid,
operation,
},
data,
});
Ok(id)
}
/// Using dup for `proc:` simply opens another operation on the same PID
/// ```rust,ignore
/// let trace = syscall::open("proc:1234/trace")?;
///
/// // let regs = syscall::open("proc:1234/regs/int")?;
/// let regs = syscall::dup(trace, "regs/int")?;
/// ```
/// Dup is currently used to implement clone() and execve().
fn dup(&self, old_id: usize, buf: &[u8]) -> Result<usize> {
let info = {
let handles = self.handles.read();
let handle = handles.get(&old_id).ok_or(Error::new(EBADF))?;
handle.info
};
let buf_str = str::from_utf8(buf).map_err(|_| Error::new(EINVAL))?;
self.new_handle(match info.operation {
Operation::AddrSpace { id } => {
let new_ptid = match buf {
// TODO: Better way to obtain new empty address spaces, perhaps using SYS_OPEN. But
// in that case, what scheme?
b"empty" => new_addrspace()?.0,
// Reuse same ID.
b"shared" => id,
b"exclusive" => addrspace(id).ok_or(Error::new(EBADFD))?.read().try_clone()?.0,
let mut path = format!("{}/", info.pid.into());
path.push_str(buf_str);
let (uid, gid) = {
let contexts = context::contexts();
let context = contexts.current().ok_or(Error::new(ESRCH))?;
let context = context.read();
(context.euid, context.egid)
};
self.open(&path, info.flags, uid, gid)
_ => return Err(Error::new(EINVAL)),
};
Handle {
info: Info {
flags: 0,
pid: info.pid,
operation: Operation::AddrSpace { id: new_ptid },
},
data: OperationData::Other,
}
}
_ => return Err(Error::new(EINVAL)),
})
}
fn seek(&self, id: usize, pos: isize, whence: usize) -> Result<isize> {
@@ -421,6 +433,7 @@ impl Scheme for ProcScheme {
};
match info.operation {
Operation::Grants => return Err(Error::new(ENOSYS)),
Operation::Static(_) => {
let mut handles = self.handles.write();
let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?;
@@ -455,8 +468,7 @@ impl Scheme for ProcScheme {
data.offset = VirtualAddress::new(data.offset.data() + bytes_read);
Ok(bytes_read)
},
// TODO: Allow reading process mappings?
Operation::Grants => return Err(Error::new(EBADF)),
Operation::AddrSpace { .. } => return Err(Error::new(EBADF)),
Operation::Regs(kind) => {
union Output {
@@ -586,6 +598,14 @@ impl Scheme for ProcScheme {
read_from(buf, &data.buf, &mut data.offset)
}
// TODO: Replace write() with SYS_DUP_FORWARD.
// TODO: Find a better way to switch address spaces, since they also require switching
// the instruction and stack pointer. Maybe remove `<pid>/regs` altogether and replace it
// with `<pid>/ctx`
Operation::CurrentAddrSpace => {
//read_from(buf, &usize::to_ne_bytes(id.into()), &mut 0)
Ok(0)
}
}
}
@@ -606,6 +626,7 @@ impl Scheme for ProcScheme {
};
match info.operation {
Operation::Grants => Err(Error::new(ENOSYS)),
Operation::Static(_) => Err(Error::new(EBADF)),
Operation::Memory => {
// Won't context switch, don't worry about the locks
@@ -631,7 +652,7 @@ impl Scheme for ProcScheme {
data.offset = VirtualAddress::new(data.offset.data() + bytes_written);
Ok(bytes_written)
},
Operation::Grants => {
Operation::AddrSpace { .. } => {
// FIXME: Forbid upgrading external mappings.
let pid = self.handles.read()
@@ -649,51 +670,52 @@ impl Scheme for ProcScheme {
return Err(Error::new(EINVAL));
}
let is_inactive = pid != context::context_id();
let is_active = pid == context::context_id();
let callback = |context: &mut Context| {
let mut inactive = is_inactive.then(|| unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) });
let (mut inactive, mut active);
let mut grants = context.grants.write();
let mut addr_space = context.addr_space()?.write();
let conflicting = grants.conflicts(region).map(|g| *g.region()).collect::<Vec<_>>();
let (mut mapper, mut flusher) = if is_active {
active = (unsafe { ActivePageTable::new(rmm::TableKind::User) }, PageFlushAll::new());
(active.0.mapper(), &mut active.1 as &mut dyn Flusher<RmmA>)
} else {
inactive = (unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }, InactiveFlusher::new());
(inactive.0.mapper(), &mut inactive.1 as &mut dyn Flusher<RmmA>)
};
let conflicting = addr_space.grants.conflicts(region).map(|g| *g.region()).collect::<Vec<_>>();
for conflicting_region in conflicting {
let whole_grant = grants.take(&conflicting_region).ok_or(Error::new(EBADFD))?;
let whole_grant = addr_space.grants.take(&conflicting_region).ok_or(Error::new(EBADFD))?;
let (before_opt, current, after_opt) = whole_grant.extract(region.intersect(conflicting_region)).ok_or(Error::new(EBADFD))?;
if let Some(before) = before_opt {
grants.insert(before);
addr_space.grants.insert(before);
}
if let Some(after) = after_opt {
grants.insert(after);
addr_space.grants.insert(after);
}
let res = if let Some(ref mut inactive) = inactive {
current.unmap_inactive(inactive)
} else {
current.unmap()
};
let res = current.unmap(&mut mapper, &mut flusher);
if res.file_desc.is_some() {
drop(grants);
return Err(Error::new(EBUSY));
}
// TODO: Partial free if grant is mapped externally.
// TODO: Partial free if grant is mapped externally, or fail and force
// userspace to do it.
}
if flags.intersects(MapFlags::PROT_READ | MapFlags::PROT_EXEC | MapFlags::PROT_WRITE) {
let base = VirtualAddress::new(base);
let base = Page::containing_address(VirtualAddress::new(base));
if let Some(ref mut inactive) = inactive {
grants.insert(Grant::zeroed_inactive(Page::containing_address(base), size / PAGE_SIZE, page_flags(flags), inactive).unwrap());
} else {
grants.insert(Grant::map(base, size, page_flags(flags)));
}
addr_space.grants.insert(Grant::zeroed(base, size / PAGE_SIZE, page_flags(flags), &mut mapper, flusher)?);
}
Ok(())
};
if is_inactive {
if is_active {
with_context_mut(pid, callback)?;
} else {
try_stop_context(pid, callback)?;
@@ -868,6 +890,24 @@ impl Scheme for ProcScheme {
Ok(buf.len())
}
Operation::Files => return Err(Error::new(EBADF)),
Operation::CurrentAddrSpace { .. } => {
let mut iter = buf.array_chunks::<{mem::size_of::<usize>()}>().copied().map(usize::from_ne_bytes);
let id = iter.next().ok_or(Error::new(EINVAL))?;
let sp = iter.next().ok_or(Error::new(EINVAL))?;
let ip = iter.next().ok_or(Error::new(EINVAL))?;
let space = addrspace(PtId::from(id)).ok_or(Error::new(EINVAL))?;
try_stop_context(info.pid, |context| unsafe {
let regs = &mut ptrace::regs_for_mut(context).ok_or(Error::new(ESRCH))?.iret;
regs.rip = ip;
regs.rsp = sp;
context.set_addr_space(space);
Ok(())
})?;
Ok(3 * mem::size_of::<usize>())
}
}
}
@@ -911,6 +951,8 @@ impl Scheme for ProcScheme {
Operation::Attr(Attr::Uid) => "uid",
Operation::Attr(Attr::Gid) => "gid",
Operation::Files => "files",
Operation::AddrSpace { .. } => "addrspace",
Operation::CurrentAddrSpace => "current-addrspace",
});
read_from(buf, &path.as_bytes(), &mut 0)

View File

@@ -28,11 +28,15 @@ pub fn resource() -> Result<Vec<u8>> {
let mut stat_string = String::new();
// TODO: All user programs must have some grant in order for executable memory to even
// exist, but is this a good indicator of whether it is user or kernel?
if context.grants.read().is_empty() {
stat_string.push('K');
stat_string.push(if let Ok(addr_space) = context.addr_space() {
if addr_space.read().grants.is_empty() {
'K'
} else {
'U'
}
} else {
stat_string.push('U');
}
'R'
});
match context.status {
context::Status::Runnable => {
stat_string.push('R');
@@ -79,9 +83,11 @@ pub fn resource() -> Result<Vec<u8>> {
if let Some(ref kstack) = context.kstack {
memory += kstack.len();
}
for grant in context.grants.read().iter() {
if grant.is_owned() {
memory += grant.size();
if let Ok(addr_space) = context.addr_space() {
for grant in addr_space.read().grants.iter() {
if grant.is_owned() {
memory += grant.size();
}
}
}

View File

@@ -13,7 +13,7 @@ use crate::event;
use crate::paging::{PAGE_SIZE, InactivePageTable, VirtualAddress};
use crate::scheme::{AtomicSchemeId, SchemeId};
use crate::sync::{WaitQueue, WaitMap};
use crate::syscall::data::{Map, OldMap, Packet, Stat, StatVfs, TimeSpec};
use crate::syscall::data::{Map, Packet, Stat, StatVfs, TimeSpec};
use crate::syscall::error::*;
use crate::syscall::flag::{EventFlags, EVENT_READ, O_NONBLOCK, MapFlags, PROT_READ, PROT_WRITE};
use crate::syscall::number::*;
@@ -145,15 +145,15 @@ impl UserInner {
let mut new_table = unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) };
let mut grants = context.grants.write();
let mut addr_space = context.addr_space()?.write();
let src_address = round_down_pages(address);
let offset = address - src_address;
let src_region = Region::new(VirtualAddress::new(src_address), offset + size).round();
let dst_region = grants.find_free_at(VirtualAddress::new(dst_address), src_region.size(), flags)?;
let dst_region = addr_space.grants.find_free_at(VirtualAddress::new(dst_address), src_region.size(), flags)?;
//TODO: Use syscall_head and syscall_tail to avoid leaking data
grants.insert(Grant::map_inactive(
addr_space.grants.insert(Grant::map_inactive(
src_region.start_address(),
dst_region.start_address(),
src_region.size(),
@@ -166,7 +166,6 @@ impl UserInner {
}
pub fn release(&self, address: usize) -> Result<()> {
//dbg!(address);
if address == DANGLING {
return Ok(());
}
@@ -174,13 +173,13 @@ impl UserInner {
let mut context = context_lock.write();
let mut other_table = unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) };
let mut grants = context.grants.write();
let mut addr_space = context.addr_space()?.write();
let region = match grants.contains(VirtualAddress::new(address)).map(Region::from) {
let region = match addr_space.grants.contains(VirtualAddress::new(address)).map(Region::from) {
Some(region) => region,
None => return Err(Error::new(EFAULT)),
};
grants.take(&region).unwrap().unmap_inactive(&mut other_table);
addr_space.grants.take(&region).unwrap().unmap(&mut other_table.mapper(), crate::paging::mapper::InactiveFlusher::new());
Ok(())
}
@@ -242,8 +241,8 @@ impl UserInner {
if let Ok(grant_address) = res {
if let Some(context_lock) = context_weak.upgrade() {
let context = context_lock.read();
let mut grants = context.grants.write();
grants.funmap.insert(
let mut addr_space = context.addr_space()?.write();
addr_space.grants.funmap.insert(
Region::new(grant_address, map.size),
VirtualAddress::new(address)
);
@@ -437,8 +436,8 @@ impl Scheme for UserScheme {
let contexts = context::contexts();
let context_lock = contexts.current().ok_or(Error::new(ESRCH))?;
let context = context_lock.read();
let mut grants = context.grants.write();
let funmap = &mut grants.funmap;
let mut addr_space = context.addr_space()?.write();
let funmap = &mut addr_space.grants.funmap;
let entry = funmap.range(..=Region::byte(VirtualAddress::new(grant_address))).next_back();
let grant_address = VirtualAddress::new(grant_address);

View File

@@ -1,8 +1,7 @@
use core::{ascii, mem};
use alloc::string::String;
use alloc::vec::Vec;
use super::data::{OldMap, Map, Stat, TimeSpec};
use super::data::{Map, Stat, TimeSpec};
use super::flag::*;
use super::number::*;
use super::validate::*;

View File

@@ -88,9 +88,9 @@ pub fn inner_physmap(physical_address: usize, size: usize, flags: PhysmapFlags)
let context_lock = contexts.current().ok_or(Error::new(ESRCH))?;
let context = context_lock.read();
let mut grants = context.grants.write();
let mut addr_space = context.addr_space()?.write();
let dst_address = grants.find_free(size).ok_or(Error::new(ENOMEM))?;
let dst_address = addr_space.grants.find_free(size).ok_or(Error::new(ENOMEM))?;
let mut page_flags = PageFlags::new().user(true);
if flags.contains(PHYSMAP_WRITE) {
@@ -104,7 +104,7 @@ pub fn inner_physmap(physical_address: usize, size: usize, flags: PhysmapFlags)
page_flags = page_flags.custom_flag(EntryFlags::NO_CACHE.bits(), true);
}
grants.insert(Grant::physmap(
addr_space.grants.insert(Grant::physmap(
PhysicalAddress::new(physical_address),
dst_address.start_address(),
size,
@@ -113,6 +113,7 @@ pub fn inner_physmap(physical_address: usize, size: usize, flags: PhysmapFlags)
Ok(dst_address.start_address().data())
}
// TODO: Remove this syscall, funmap makes it redundant.
pub fn physmap(physical_address: usize, size: usize, flags: PhysmapFlags) -> Result<usize> {
enforce_root()?;
inner_physmap(physical_address, size, flags)
@@ -126,10 +127,12 @@ pub fn inner_physunmap(virtual_address: usize) -> Result<usize> {
let context_lock = contexts.current().ok_or(Error::new(ESRCH))?;
let context = context_lock.read();
let mut grants = context.grants.write();
let mut addr_space = context.addr_space()?.write();
if let Some(region) = grants.contains(VirtualAddress::new(virtual_address)).map(Region::from) {
grants.take(&region).unwrap().unmap();
if let Some(region) = addr_space.grants.contains(VirtualAddress::new(virtual_address)).map(Region::from) {
use crate::paging::{ActivePageTable, mapper::PageFlushAll, TableKind};
addr_space.grants.take(&region).unwrap().unmap(&mut *unsafe { ActivePageTable::new(TableKind::User) }, PageFlushAll::new());
return Ok(0);
}

View File

@@ -2,7 +2,6 @@
use alloc::sync::Arc;
use alloc::vec::Vec;
use core::str;
use core::sync::atomic::Ordering;
use spin::RwLock;
use crate::context::file::{FileDescriptor, FileDescription};
@@ -482,11 +481,11 @@ pub fn funmap(virtual_address: usize, length: usize) -> Result<usize> {
let requested = Region::new(virtual_address, length);
{
let contexts = context::contexts();
let context_lock = contexts.current().ok_or(Error::new(ESRCH))?;
let context_lock = Arc::clone(context::contexts().current().ok_or(Error::new(ESRCH))?);
let context = context_lock.read();
let mut grants = context.grants.write();
let mut addr_space = context.addr_space()?.write();
let grants = &mut addr_space.grants;
let conflicting: Vec<Region> = grants.conflicts(requested).map(Region::from).collect();
@@ -507,9 +506,10 @@ pub fn funmap(virtual_address: usize, length: usize) -> Result<usize> {
if let Some(after) = after {
grants.insert(after);
}
use crate::paging::{ActivePageTable, mapper::PageFlushAll, TableKind};
// Remove irrelevant region
grant.unmap();
grant.unmap(&mut *unsafe { ActivePageTable::new(TableKind::User) }, PageFlushAll::new());
}
}

View File

@@ -25,9 +25,9 @@ pub use self::process::*;
pub use self::time::*;
pub use self::validate::*;
use self::data::{CloneInfo, ExecMemRange, Map, SigAction, Stat, TimeSpec};
use self::data::{Map, SigAction, Stat, TimeSpec};
use self::error::{Error, Result, ENOSYS, EINVAL};
use self::flag::{CloneFlags, MapFlags, PhysmapFlags, WaitFlags};
use self::flag::{MapFlags, PhysmapFlags, WaitFlags};
use self::number::*;
use crate::context::ContextId;
@@ -112,36 +112,6 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u
SYS_GETPGID => getpgid(ContextId::from(b)).map(ContextId::into),
SYS_GETPPID => getppid().map(ContextId::into),
SYS_EXEC => exec(validate_slice(b as *const ExecMemRange, c)?, d, e),
SYS_CLONE => {
let b = CloneFlags::from_bits_truncate(b);
let info = if b.contains(CloneFlags::CLONE_VM) {
if d < core::mem::size_of::<CloneInfo>() {
return Err(Error::new(EINVAL));
}
Some(&validate_slice(c as *const CloneInfo, 1)?[0])
} else { None };
#[cfg(not(target_arch = "x86_64"))]
{
//TODO: CLONE_STACK
let ret = clone(b, bp).map(ContextId::into);
ret
}
#[cfg(target_arch = "x86_64")]
{
let old_rsp = stack.iret.rsp;
// TODO: Unify CLONE_STACK and CLONE_VM.
if b.contains(flag::CLONE_STACK) {
stack.iret.rsp = info.as_ref().ok_or(Error::new(EINVAL))?.target_stack;
}
let ret = clone(b, bp, info).map(ContextId::into);
stack.iret.rsp = old_rsp;
ret
}
},
SYS_EXIT => exit((b & 0xFF) << 8),
SYS_KILL => kill(ContextId::from(b), c),
SYS_WAITPID => waitpid(ContextId::from(b), c, WaitFlags::from_bits_truncate(d)).map(ContextId::into),

View File

@@ -1,380 +1,27 @@
use alloc::{
boxed::Box,
collections::BTreeSet,
string::String,
sync::Arc,
vec::Vec,
};
use core::alloc::{GlobalAlloc, Layout};
use core::convert::TryFrom;
use core::ops::DerefMut;
use core::{intrinsics, mem, str};
use crate::context::file::{FileDescription, FileDescriptor};
use core::mem;
use spin::{RwLock, RwLockWriteGuard};
use crate::context::{Context, ContextId, WaitpidKey};
use crate::context::memory::{Grant, Region, NewTables, page_flags, setup_new_utable, UserGrants};
use crate::context::{Context, ContextId, memory, WaitpidKey};
use crate::context;
#[cfg(not(feature="doc"))]
use crate::elf::{self, program_header};
use crate::interrupt;
use crate::ipi::{ipi, IpiKind, IpiTarget};
use crate::memory::{allocate_frames, Frame, PhysicalAddress};
use crate::paging::mapper::PageFlushAll;
use crate::paging::{ActivePageTable, InactivePageTable, Page, PageFlags, RmmA, TableKind, VirtualAddress, PAGE_SIZE};
use crate::{ptrace, syscall};
use crate::scheme::FileHandle;
use crate::paging::mapper::{Flusher, InactiveFlusher, PageFlushAll};
use crate::paging::{ActivePageTable, InactivePageTable, Page, PageFlags, RmmA, RmmArch, TableKind, VirtualAddress, PAGE_SIZE};
use crate::ptrace;
use crate::start::usermode;
use crate::syscall::data::{CloneInfo, ExecMemRange, SigAction, Stat};
use crate::syscall::data::SigAction;
use crate::syscall::error::*;
use crate::syscall::flag::{wifcontinued, wifstopped, AT_ENTRY, AT_NULL, AT_PHDR, AT_PHENT, AT_PHNUM, CloneFlags,
CLONE_FILES, CLONE_FS, CLONE_SIGHAND, CLONE_STACK, CLONE_VFORK, CLONE_VM,
MapFlags, PROT_EXEC, PROT_READ, PROT_WRITE, PTRACE_EVENT_CLONE,
PTRACE_STOP_EXIT, SigActionFlags, SIG_BLOCK, SIG_DFL, SIG_SETMASK, SIG_UNBLOCK,
SIGCONT, SIGTERM, WaitFlags, WCONTINUED, WNOHANG, WUNTRACED};
use crate::syscall::flag::{wifcontinued, wifstopped, MapFlags, PROT_EXEC, PROT_READ, PROT_WRITE,
PTRACE_STOP_EXIT, SIG_BLOCK, SIG_SETMASK, SIG_UNBLOCK,
SIGCONT, SIGTERM, WaitFlags, WCONTINUED, WNOHANG, WUNTRACED};
use crate::syscall::ptrace_event;
use crate::syscall::validate::{validate_slice, validate_slice_mut};
pub fn clone(flags: CloneFlags, stack_base: usize, info: Option<&CloneInfo>) -> Result<ContextId> {
let ppid;
let pid;
{
let pgid;
let ruid;
let rgid;
let rns;
let euid;
let egid;
let ens;
let umask;
let sigmask;
let mut cpu_id_opt = None;
let arch;
let vfork;
let mut kfx_opt = None;
let mut kstack_opt = None;
let mut offset = 0;
let mut grants;
let name;
let cwd;
let files;
let actions;
let old_sigstack;
// Copy from old process
{
let contexts = context::contexts();
let context_lock = contexts.current().ok_or(Error::new(ESRCH))?;
let context = context_lock.read();
ppid = context.id;
pgid = context.pgid;
ruid = context.ruid;
rgid = context.rgid;
rns = context.rns;
euid = context.euid;
egid = context.egid;
ens = context.ens;
sigmask = context.sigmask;
umask = context.umask;
old_sigstack = context.sigstack;
// Uncomment to disable threads on different CPUs
//TODO: fix memory allocation races when this is removed
if flags.contains(CLONE_VM) {
cpu_id_opt = context.cpu_id;
}
// TODO: Fill with newest registers.
arch = context.arch.clone();
if let Some(ref fx) = context.kfx {
let new_fx = unsafe {
let new_fx_ptr = crate::ALLOCATOR.alloc(Layout::from_size_align_unchecked(1024, 16));
if new_fx_ptr.is_null() {
// FIXME: It's mildly ironic that the only place where clone can fail with
// ENOMEM, is when copying 1024 bytes to merely store vector registers.
// Although in order to achieve full kernel-panic immunity, we'll need to
// completely phase out all usage of liballoc data structures, and use our
// own library/port liballoc, since panicking on OOM is not good for a
// kernel.
return Err(Error::new(ENOMEM));
}
new_fx_ptr.copy_from_nonoverlapping(fx.as_ptr(), fx.len());
Box::from_raw(new_fx_ptr as *mut [u8; 1024])
};
kfx_opt = Some(new_fx);
}
#[cfg(target_arch = "x86_64")]
{
if let Some(ref stack) = context.kstack {
// Get the relative offset to the return address of the function
// obtaining `stack_base`.
//
// (base pointer - start of stack) - one
offset = stack_base - stack.as_ptr() as usize - mem::size_of::<usize>(); // Add clone ret
// FIXME: This is incredibly UB, making Rust think the current stack being
// copied is simply a regular immutable slice. This part should either be
// written in assembly or have clone moved to userspace.
let mut new_stack = stack.clone();
unsafe {
// Set clone's return value to zero. This is done because
// the clone won't return like normal, which means the value
// would otherwise never get set.
if let Some(regs) = ptrace::rebase_regs_ptr_mut(context.regs, Some(&mut new_stack)) {
(*regs).scratch.rax = 0;
}
// Change the return address of the child (previously
// syscall) to the arch-specific clone_ret callback
let func_ptr = new_stack.as_mut_ptr().add(offset);
*(func_ptr as *mut usize) = interrupt::syscall::clone_ret as usize;
}
kstack_opt = Some(new_stack);
}
}
#[cfg(not(target_arch = "x86_64"))]
{
if let Some(ref stack) = context.kstack {
offset = stack_base - stack.as_ptr() as usize;
let mut new_stack = stack.clone();
kstack_opt = Some(new_stack);
}
}
grants = Arc::clone(&context.grants);
if flags.contains(CLONE_VM) {
name = Arc::clone(&context.name);
} else {
name = Arc::new(RwLock::new(context.name.read().clone()));
}
if flags.contains(CLONE_FS) {
cwd = Arc::clone(&context.cwd);
} else {
cwd = Arc::new(RwLock::new(context.cwd.read().clone()));
}
if flags.contains(CLONE_FILES) {
files = Arc::clone(&context.files);
} else {
files = Arc::new(RwLock::new(context.files.read().clone()));
}
if flags.contains(CLONE_SIGHAND) {
actions = Arc::clone(&context.actions);
} else {
actions = Arc::new(RwLock::new(context.actions.read().clone()));
}
}
// If not cloning files, dup to get a new number from scheme
// This has to be done outside the context lock to prevent deadlocks
if !flags.contains(CLONE_FILES) {
for (_fd, file_opt) in files.write().iter_mut().enumerate() {
let new_file_opt = if let Some(ref file) = *file_opt {
Some(FileDescriptor {
description: Arc::clone(&file.description),
cloexec: file.cloexec,
})
} else {
None
};
*file_opt = new_file_opt;
}
}
let maps_to_reobtain = if flags.contains(CLONE_VM) {
Vec::new()
} else {
grants.read().iter().filter_map(|grant| grant.desc_opt.as_ref().and_then(|file_ref| {
let FileDescription { scheme, number, .. } = { *file_ref.desc.description.read() };
let scheme_arc = match crate::scheme::schemes().get(scheme) {
Some(s) => Arc::downgrade(s),
None => return None,
};
let map = crate::syscall::data::Map {
address: grant.start_address().data(),
size: grant.size(),
offset: file_ref.offset,
flags: file_ref.flags | MapFlags::MAP_FIXED_NOREPLACE,
};
Some((scheme_arc, number, map))
})).collect()
};
// If vfork, block the current process
// This has to be done after the operations that may require context switches
if flags.contains(CLONE_VFORK) {
let contexts = context::contexts();
let context_lock = contexts.current().ok_or(Error::new(ESRCH))?;
let mut context = context_lock.write();
context.block("vfork");
vfork = true;
} else {
vfork = false;
}
// Set up new process
let new_context_lock = {
let mut contexts = context::contexts_mut();
let context_lock = contexts.new_context()?;
let mut context = context_lock.write();
pid = context.id;
context.pgid = pgid;
context.ppid = ppid;
context.ruid = ruid;
context.rgid = rgid;
context.rns = rns;
context.euid = euid;
context.egid = egid;
context.ens = ens;
context.sigmask = sigmask;
context.umask = umask;
//TODO: Better CPU balancing
if let Some(cpu_id) = cpu_id_opt {
context.cpu_id = Some(cpu_id);
} else {
context.cpu_id = Some(pid.into() % crate::cpu_count());
}
// Start as blocked. This is to ensure the context is never switched before any grants
// that have to be remapped, are mapped.
context.status = context::Status::Blocked;
context.vfork = vfork;
context.arch = arch;
// This is needed because these registers may have changed after this context was
// switched to, but before this was called.
#[cfg(all(target_arch = "x86_64", feature = "x86_fsgsbase"))]
unsafe {
context.arch.fsbase = x86::bits64::segmentation::rdfsbase() as usize;
x86::bits64::segmentation::swapgs();
context.arch.gsbase = x86::bits64::segmentation::rdgsbase() as usize;
x86::bits64::segmentation::swapgs();
}
if flags.contains(CloneFlags::CLONE_VM) {
// Reuse same CR3, same grants, everything.
context.grants = grants;
} else {
// TODO: Handle ENOMEM
let mut new_tables = setup_new_utable().expect("failed to allocate new page tables for cloned process");
let mut new_grants = UserGrants::new();
for old_grant in grants.read().iter().filter(|g| g.desc_opt.is_none()) {
new_grants.insert(old_grant.secret_clone(&mut new_tables.new_utable));
}
context.grants = Arc::new(RwLock::new(new_grants));
drop(grants);
new_tables.take();
context.arch.set_page_utable(unsafe { new_tables.new_utable.address() });
#[cfg(target_arch = "aarch64")]
context.arch.set_page_ktable(unsafe { new_tables.new_ktable.address() });
}
if let Some(fx) = kfx_opt.take() {
context.arch.set_fx(fx.as_ptr() as usize);
context.kfx = Some(fx);
}
// Set kernel stack
if let Some(stack) = kstack_opt.take() {
context.arch.set_stack(stack.as_ptr() as usize + offset);
context.kstack = Some(stack);
#[cfg(target_arch = "aarch64")]
{
context.arch.set_lr(interrupt::syscall::clone_ret as usize);
}
}
// TODO: Clone ksig?
#[cfg(target_arch = "aarch64")]
{
if let Some(stack) = &mut context.kstack {
unsafe {
// stack_base contains a pointer to InterruptStack. Get its offset from
// stack_base itself
let istack_offset = *(stack_base as *const u64) - stack_base as u64;
// Get the top of the new process' stack
let new_sp = stack.as_mut_ptr().add(offset);
// Update the pointer to the InterruptStack to reflect the new process'
// stack. (Without this the pointer would be InterruptStack on the parent
// process' stack).
*(new_sp as *mut u64) = new_sp as u64 + istack_offset;
// Update tpidr_el0 in the new process' InterruptStack
let mut interrupt_stack = &mut *(stack.as_mut_ptr().add(offset + istack_offset as usize) as *mut crate::arch::interrupt::InterruptStack);
interrupt_stack.iret.tpidr_el0 = tcb_addr;
}
}
}
context.name = name;
context.cwd = cwd;
context.files = files;
context.actions = actions;
if flags.contains(CLONE_VM) {
context.sigstack = info.and_then(|info| (info.target_sigstack != !0).then(|| info.target_sigstack));
} else {
context.sigstack = old_sigstack;
}
Arc::clone(context_lock)
};
for (scheme_weak, number, map) in maps_to_reobtain {
let scheme = match scheme_weak.upgrade() {
Some(s) => s,
None => continue,
};
let _ = scheme.kfmap(number, &map, &new_context_lock);
}
new_context_lock.write().status = context::Status::Runnable;
}
if ptrace::send_event(ptrace_event!(PTRACE_EVENT_CLONE, pid.into())).is_some() {
// Freeze the clone, allow ptrace to put breakpoints
// to it before it starts
let contexts = context::contexts();
let context = contexts.get(pid).expect("Newly created context doesn't exist??");
let mut context = context.write();
context.ptrace_stop = true;
}
// Race to pick up the new process!
ipi(IpiKind::Switch, IpiTarget::Other);
let _ = unsafe { context::switch() };
Ok(pid)
}
use crate::syscall::validate::validate_slice_mut;
fn empty<'lock>(context_lock: &'lock RwLock<Context>, mut context: RwLockWriteGuard<'lock, Context>, reaping: bool) -> RwLockWriteGuard<'lock, Context> {
// NOTE: If we do not replace the grants `Arc`, then a strange situation can appear where the
@@ -383,27 +30,23 @@ fn empty<'lock>(context_lock: &'lock RwLock<Context>, mut context: RwLockWriteGu
// remaining references to the grants, where there are in fact none. However, if either one is
// reaped before, then that reference will disappear, and no leak will occur.
//
// By removing the reference to the grants when the context will no longer be used, this
// By removing the reference to the address space when the context will no longer be used, this
// problem will never occur.
let addr_space_arc = match context.addr_space.take() {
Some(a) => a,
None => return context,
};
// FIXME, UNOPTIMIZED: Right now, this will allocate memory in order to store the new empty
// grants, which may not even be used (only in fexec I think). We should turn grants into an
// `Option`, and only reinitialize it there.
let mut grants_arc = mem::take(&mut context.grants);
if let Some(grants_lock_mut) = Arc::get_mut(&mut grants_arc) {
let mut grants_guard = grants_lock_mut.get_mut();
let grants = mem::replace(&mut *grants_guard, UserGrants::default());
for grant in grants.into_iter() {
if let Ok(addr_space) = Arc::try_unwrap(addr_space_arc).map(RwLock::into_inner) {
for grant in addr_space.grants.into_iter() {
let unmap_result = if reaping {
log::error!("{}: {}: Grant should not exist: {:?}", context.id.into(), *context.name.read(), grant);
let mut new_table = unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) };
grant.unmap_inactive(&mut new_table)
grant.unmap(&mut new_table.mapper(), &mut InactiveFlusher::new())
} else {
grant.unmap()
grant.unmap(&mut *unsafe { ActivePageTable::new(rmm::TableKind::User) }, PageFlushAll::new())
};
if unmap_result.file_desc.is_some() {
@@ -418,14 +61,6 @@ fn empty<'lock>(context_lock: &'lock RwLock<Context>, mut context: RwLockWriteGu
context
}
struct ExecFile(FileHandle);
impl Drop for ExecFile {
fn drop(&mut self) {
let _ = syscall::close(self.0);
}
}
pub fn exit(status: usize) -> ! {
ptrace::breakpoint_callback(PTRACE_STOP_EXIT, Some(ptrace_event!(PTRACE_STOP_EXIT, status)));
@@ -436,16 +71,10 @@ pub fn exit(status: usize) -> ! {
Arc::clone(&context_lock)
};
let mut close_files = Vec::new();
let mut close_files;
let pid = {
let mut context = context_lock.write();
{
let mut lock = context.files.write();
if Arc::strong_count(&context.files) == 1 {
mem::swap(lock.deref_mut(), &mut close_files);
}
}
context.files = Arc::new(RwLock::new(Vec::new()));
close_files = Arc::try_unwrap(mem::take(&mut context.files)).map_or_else(|_| Vec::new(), RwLock::into_inner);
context.id
};
@@ -669,7 +298,7 @@ pub fn mprotect(address: usize, size: usize, flags: MapFlags) -> Result<usize> {
let mut active_table = unsafe { ActivePageTable::new(TableKind::User) };
let flush_all = PageFlushAll::new();
let mut flush_all = PageFlushAll::new();
let start_page = Page::containing_address(VirtualAddress::new(address));
let end_page = Page::containing_address(VirtualAddress::new(end_address));
@@ -999,19 +628,31 @@ pub fn usermode_bootstrap(mut data: Box<[u8]>) -> ! {
assert!(!data.is_empty());
const LOAD_BASE: usize = 0;
let grant = context::memory::Grant::map(VirtualAddress::new(LOAD_BASE), ((data.len()+PAGE_SIZE-1)/PAGE_SIZE)*PAGE_SIZE, PageFlags::new().user(true).write(true).execute(true));
{
let mut active_table = unsafe { ActivePageTable::new(TableKind::User) };
let grant = context::memory::Grant::zeroed(Page::containing_address(VirtualAddress::new(LOAD_BASE)), (data.len()+PAGE_SIZE-1)/PAGE_SIZE, PageFlags::new().user(true).write(true).execute(true), &mut active_table, PageFlushAll::new()).expect("failed to allocate memory for bootstrap");
for (index, page) in grant.pages().enumerate() {
let len = if data.len() - index * PAGE_SIZE < PAGE_SIZE { data.len() % PAGE_SIZE } else { PAGE_SIZE };
let frame = active_table.translate_page(page).expect("expected mapped init memory to have a corresponding frame");
unsafe { ((frame.start_address().data() + crate::PHYS_OFFSET) as *mut u8).copy_from_nonoverlapping(data.as_ptr().add(index * PAGE_SIZE), len); }
}
}
context::contexts().current().expect("expected a context to exist when executing init").read().grants.write().insert(grant);
let physaddr = active_table.translate_page(page)
.expect("expected mapped init memory to have a corresponding frame")
.start_address();
unsafe {
(RmmA::phys_to_virt(physaddr).data() as *mut u8).copy_from_nonoverlapping(data.as_ptr().add(index * PAGE_SIZE), len);
}
}
context::contexts().current()
.expect("expected a context to exist when executing init")
.read().addr_space()
.expect("expected bootstrap context to have an address space")
.write().grants.insert(grant);
}
log::info!("Usermode bootstrap");
drop(data);
@@ -1022,127 +663,3 @@ pub fn usermode_bootstrap(mut data: Box<[u8]>) -> ! {
usermode(start, 0, 0, 0);
}
}
pub fn exec(memranges: &[ExecMemRange], instruction_ptr: usize, stack_ptr: usize) -> Result<usize> {
// TODO: rlimit?
if memranges.len() > 1024 {
return Err(Error::new(EINVAL));
}
let mut new_grants = UserGrants::new();
{
let current_context_lock = Arc::clone(context::contexts().current().ok_or(Error::new(ESRCH))?);
// Linux will always destroy other threads immediately if one of them executes execve(2).
// At the moment the Redox kernel is ignorant of threads, other than them sharing files,
// memory, etc. We fail with EBUSY if any resources that are being replaced, are shared.
let mut old_grants = Arc::try_unwrap(mem::take(&mut current_context_lock.write().grants)).map_err(|_| Error::new(EBUSY))?.into_inner();
// TODO: Allow multiple contexts which share the file table, to have one of them run exec?
let mut old_files = Arc::try_unwrap(mem::take(&mut current_context_lock.write().files)).map_err(|_| Error::new(EBUSY))?.into_inner();
// FIXME: Handle leak in case of ENOMEM.
let mut new_tables = setup_new_utable()?;
let mut flush = PageFlushAll::new();
// FIXME: This is to the extreme, but fetch with atomic volatile?
for memrange in memranges.iter().copied() {
let old_address = if memrange.old_address == !0 { None } else { Some(memrange.old_address) };
if memrange.address % PAGE_SIZE != 0 || old_address.map_or(false, |a| a % PAGE_SIZE != 0) || memrange.size % PAGE_SIZE != 0 {
return Err(Error::new(EINVAL));
}
if memrange.size == 0 { continue }
let new_start = Page::containing_address(VirtualAddress::new(memrange.address));
let flags = MapFlags::from_bits(memrange.flags).ok_or(Error::new(EINVAL))?;
let page_count = memrange.size / PAGE_SIZE;
let flags = page_flags(flags);
if let Some(old_address) = old_address {
let old_start = VirtualAddress::new(memrange.old_address);
let entire_region = Region::new(old_start, memrange.size);
// TODO: This will do one B-Tree search for each memrange. If a process runs exec
// and keeps every range the way it is, then this would be O(n log n)!
loop {
let region = match old_grants.conflicts(entire_region).next().map(|g| *g.region()) {
Some(r) => r,
None => break,
};
let owned = old_grants.take(&region).expect("cannot fail");
let (before, mut current, after) = owned.extract(region).expect("cannot fail");
if let Some(before) = before { old_grants.insert(before); }
if let Some(after) = after { old_grants.insert(after); }
new_grants.insert(current.move_to_address_space(new_start, &mut new_tables.new_utable, flags, &mut flush));
}
} else {
new_grants.insert(Grant::zeroed_inactive(new_start, page_count, flags, &mut new_tables.new_utable)?);
}
}
{
unsafe { flush.ignore(); }
new_tables.take();
let mut context = current_context_lock.write();
context.grants = Arc::new(RwLock::new(new_grants));
let old_utable = context.arch.get_page_utable();
let old_frame = Frame::containing_address(PhysicalAddress::new(old_utable));
context.arch.set_page_utable(unsafe { new_tables.new_utable.address() });
#[cfg(target_arch = "x86_64")]
unsafe { x86::controlregs::cr3_write(new_tables.new_utable.address() as u64); }
for old_grant in old_grants.into_iter() {
old_grant.unmap_inactive(&mut unsafe { InactivePageTable::from_address(old_utable) });
}
crate::memory::deallocate_frames(old_frame, 1);
#[cfg(target_arch = "aarch64")]
context.arch.set_page_ktable(unsafe { new_tables.new_ktable.address() });
context.actions = Arc::new(RwLock::new(vec![(
SigAction {
sa_handler: unsafe { mem::transmute(SIG_DFL) },
sa_mask: [0; 2],
sa_flags: SigActionFlags::empty(),
},
0
); 128]));
let was_vfork = mem::replace(&mut context.vfork, false);
// TODO: Reuse in place if the file table is not shared.
drop(context);
let mut context = current_context_lock.write();
context.files = Arc::new(RwLock::new(old_files));
let ppid = context.ppid;
drop(context);
// TODO: Should this code be preserved as is?
if was_vfork {
let contexts = context::contexts();
if let Some(context_lock) = contexts.get(ppid) {
let mut context = context_lock.write();
if !context.unblock() {
println!("{} not blocked for exec vfork unblock", ppid.into());
}
} else {
println!("{} not found for exec vfork unblock", ppid.into());
}
}
}
}
unsafe { usermode(instruction_ptr, stack_ptr, 0, 0); }
}