Files
moss-kernel/src/process/exec.rs
Matthew Leach 87fe041ba0 procfs: implement /proc/<PID>/maps
Implement the `maps` file which shows a process's VMA entries. Example
output:

```
[root@moss-machine /]# cat /proc/1/maps
500000000000-500000117000 r-xp 0000000000                    /bin/bash
50000012b000-500000130000 r--p 000011b000                    /bin/bash
500000130000-50000013e000 rw-p 0000120000                    /bin/bash
700000000000-70000002b000 r-xp 0000000000                    /lib/ld-linux-aarch64.so.1
70000003e000-700000040000 r--p 000002e000                    /lib/ld-linux-aarch64.so.1
700000040000-700000042000 rw-p 0000030000                    /lib/ld-linux-aarch64.so.1
7fffff510000-7fffff585000 r-xp 0000000000                    /usr/lib/libncursesw.so.6
7fffff585000-7fffff59b000 ---p 0000075000                    /usr/lib/libncursesw.so.6
7fffff59b000-7fffff5a0000 r--p 000007b000                    /usr/lib/libncursesw.so.6
7fffff5a0000-7fffff5a1000 rw-p 0000080000                    /usr/lib/libncursesw.so.6
7fffff5b0000-7fffff760000 r-xp 0000000000                    /usr/lib/libc.so.6
7fffff760000-7fffff76d000 ---p 00001b0000                    /usr/lib/libc.so.6
7fffff76d000-7fffff770000 r--p 00001bd000                    /usr/lib/libc.so.6
7fffff770000-7fffff772000 rw-p 00001c0000                    /usr/lib/libc.so.6
7fffff772000-7fffff779000 rw-p 0000000000
7fffff780000-7fffff7d9000 r-xp 0000000000                    /usr/lib/libreadline.so.8
7fffff7d9000-7fffff7ed000 ---p 0000059000                    /usr/lib/libreadline.so.8
7fffff7ed000-7fffff7f0000 r--p 000005d000                    /usr/lib/libreadline.so.8
7fffff7f0000-7fffff7f6000 rw-p 0000060000                    /usr/lib/libreadline.so.8
7fffff7f6000-7fffff7fb000 rw-p 0000000000
7fffff800000-800000063000 rw-p 0000000000                    [stack]
```
2026-01-27 06:10:43 +00:00

466 lines
15 KiB
Rust

use crate::ArchImpl;
use crate::process::Comm;
use crate::process::ptrace::{TracePoint, ptrace_stop};
use crate::sched::current::current_task_shared;
use crate::{
arch::Arch,
fs::VFS,
memory::{
page::ClaimedPage,
uaccess::{copy_from_user, cstr::UserCStr},
},
process::{ctx::Context, thread_group::signal::SignalActionState},
sched::current::current_task,
};
use alloc::{string::String, vec};
use alloc::{string::ToString, sync::Arc, vec::Vec};
use auxv::{AT_BASE, AT_ENTRY, AT_NULL, AT_PAGESZ, AT_PHDR, AT_PHENT, AT_PHNUM, AT_RANDOM};
use core::{ffi::c_char, mem, slice};
use libkernel::{
UserAddressSpace, VirtualMemory,
error::{ExecError, KernelError, Result},
fs::{Inode, path::Path},
memory::{
PAGE_SIZE,
address::{TUA, VA},
permissions::PtePermissions,
proc_vm::{
ProcessVM,
memory_map::MemoryMap,
vmarea::{VMAPermissions, VMArea, VMAreaKind},
},
region::VirtMemoryRegion,
},
};
use object::Endian;
use object::elf::{ET_DYN, ProgramHeader64};
use object::{
LittleEndian,
elf::{self, PT_LOAD},
read::elf::{FileHeader, ProgramHeader},
};
mod auxv;
const LINKER_BIAS: usize = 0x0000_7000_0000_0000;
const PROG_BIAS: usize = 0x0000_5000_0000_0000;
const STACK_END: usize = 0x0000_8000_0000_0000;
const STACK_SZ: usize = 0x2000 * 0x400;
const STACK_START: usize = STACK_END - STACK_SZ;
/// Process a set of progream headers from an ELF. Create VMAs for all `PT_LOAD`
/// segments, optionally applying `bias` to the load address.
///
/// If a VMA was found that contains the headers themselves, the address of the
/// *VMA* is returned.
fn process_prog_headers<E: Endian>(
hdrs: &[ProgramHeader64<E>],
vmas: &mut Vec<VMArea>,
bias: Option<usize>,
elf_file: Arc<dyn Inode>,
path: &Path,
endian: E,
) -> Option<VA> {
let mut hdr_addr = None;
for hdr in hdrs {
if hdr.p_type(endian) == PT_LOAD {
let mut vma = VMArea::from_pheader(elf_file.clone(), *hdr, endian, bias);
// Find PHDR: Assumption segment with p_offset == 0 contains
// headers.
if hdr.p_offset.get(endian) == 0 {
hdr_addr = Some(vma.region().start_address());
}
vma.set_name(path.as_str());
vmas.push(vma);
}
}
hdr_addr
}
async fn exec_elf(
inode: Arc<dyn Inode>,
path: &Path,
argv: Vec<String>,
envp: Vec<String>,
) -> Result<()> {
// Read ELF header
let mut buf = [0u8; core::mem::size_of::<elf::FileHeader64<LittleEndian>>()];
inode.read_at(0, &mut buf).await?;
let elf = elf::FileHeader64::<LittleEndian>::parse(buf.as_slice())
.map_err(|_| ExecError::InvalidElfFormat)?;
let endian = elf.endian().unwrap();
// Read full program header table
let ph_table_size = elf.e_phnum.get(endian) as usize * elf.e_phentsize.get(endian) as usize
+ elf.e_phoff.get(endian) as usize;
let mut ph_buf = vec![0u8; ph_table_size];
inode.read_at(0, &mut ph_buf).await?;
let hdrs = elf
.program_headers(endian, ph_buf.as_slice())
.map_err(|_| ExecError::InvalidPHdrFormat)?;
// Detect PT_INTERP (dynamic linker) if present
let mut interp_path: Option<String> = None;
for hdr in hdrs.iter() {
if hdr.p_type(endian) == elf::PT_INTERP {
let off = hdr.p_offset(endian) as usize;
let filesz = hdr.p_filesz(endian) as usize;
if filesz == 0 {
break;
}
let mut ibuf = vec![0u8; filesz];
inode.read_at(off as u64, &mut ibuf).await?;
let len = ibuf.iter().position(|&b| b == 0).unwrap_or(filesz);
let s = core::str::from_utf8(&ibuf[..len]).map_err(|_| ExecError::InvalidElfFormat)?;
interp_path = Some(s.to_string());
break;
}
}
// Set up a program bias for PIE.
let main_bias = if elf.e_type.get(endian) == ET_DYN {
Some(PROG_BIAS)
} else {
None
};
let mut auxv = vec![
AT_PHNUM,
elf.e_phnum.get(endian) as _,
AT_PHENT,
elf.e_phentsize(endian) as _,
];
let mut vmas = Vec::new();
// Process the binary program headers.
if let Some(hdr_addr) =
process_prog_headers(hdrs, &mut vmas, main_bias, inode.clone(), path, endian)
{
auxv.push(AT_PHDR);
auxv.push(hdr_addr.add_bytes(elf.e_phoff(endian) as _).value() as _);
}
let main_entry = VA::from_value(elf.e_entry(endian) as usize + main_bias.unwrap_or(0));
// AT_ENTRY is the same in the static and interp case.
auxv.push(AT_ENTRY);
auxv.push(main_entry.value() as _);
let entry_addr = if let Some(path) = interp_path {
auxv.push(AT_BASE);
auxv.push(LINKER_BIAS as _);
// Returns the entry address of the interp program.
process_interp(path, &mut vmas).await?
} else {
// Otherwise, it's just the binary itself.
main_entry
};
let mut stack_vma = VMArea::new(
VirtMemoryRegion::new(VA::from_value(STACK_START), STACK_SZ),
VMAreaKind::Anon,
VMAPermissions::rw(),
);
stack_vma.set_name("[stack]");
vmas.push(stack_vma);
let mut mem_map = MemoryMap::from_vmas(vmas)?;
let stack_ptr = setup_user_stack(&mut mem_map, &argv, &envp, auxv)?;
// We are now committed to the exec. Inform ptrace.
ptrace_stop(TracePoint::Exec).await;
let user_ctx = ArchImpl::new_user_context(entry_addr, stack_ptr);
let mut vm = ProcessVM::from_map(mem_map);
// We don't have to worry about actually calling for a full context switch
// here. Parts of the old process that are replaced will go out of scope and
// be cleaned up (open files, etc.); We don't need to preserve any extra
// state. Simply activate the new process's address space.
vm.mm_mut().address_space_mut().activate();
let new_comm = argv.first().map(|s| Comm::new(s.as_str()));
{
let mut current_task = current_task();
if let Some(new_comm) = new_comm {
*current_task.comm.lock_save_irq() = new_comm;
}
current_task.ctx = Context::from_user_ctx(user_ctx);
*current_task.vm.lock_save_irq() = vm;
*current_task.process.signals.lock_save_irq() = SignalActionState::new_default();
}
// Close all the CLOEXEC FDs.
let mut fd_table = current_task().fd_table.lock_save_irq().clone();
fd_table.close_cloexec_entries().await;
*current_task().fd_table.lock_save_irq() = fd_table;
Ok(())
}
async fn exec_script(
path: &Path,
inode: Arc<dyn Inode>,
argv: Vec<String>,
envp: Vec<String>,
) -> Result<()> {
// Parse shebang line to get interpreter path and arguments
let mut buf = vec![0u8; 256];
let n = inode.read_at(0, &mut buf).await?;
let shebang_line =
core::str::from_utf8(&buf[..n]).map_err(|_| ExecError::InvalidScriptFormat)?;
let first_line = shebang_line
.lines()
.next()
.ok_or(ExecError::InvalidScriptFormat)?;
let parts: Vec<&str> = first_line[2..].split_whitespace().collect();
if parts.is_empty() {
return Err(ExecError::InvalidScriptFormat)?;
}
let interp_path = parts[0];
let interp_args: Vec<String> = parts[1..].iter().map(|s| s.to_string()).collect();
// Build new argv: [interpreter, interp_args..., script_path, original_argv...]
let mut new_argv = Vec::new();
new_argv.push(interp_path.to_string());
new_argv.extend(interp_args);
new_argv.push(path.as_str().to_string());
new_argv.extend(argv.into_iter().skip(1)); // Skip original argv[0]
// Resolve interpreter inode
let interp_path = Path::new(interp_path);
let task = current_task_shared();
let interp_inode = VFS
.resolve_path(interp_path, VFS.root_inode(), &task)
.await?;
// Execute interpreter
exec_elf(interp_inode, interp_path, new_argv, envp).await?;
Ok(())
}
pub async fn kernel_exec(
path: &Path,
inode: Arc<dyn Inode>,
argv: Vec<String>,
envp: Vec<String>,
) -> Result<()> {
let mut buf = [0u8; 4];
inode.read_at(0, &mut buf).await?;
if buf == [0x7F, b'E', b'L', b'F'] {
exec_elf(inode, path, argv, envp).await
} else if buf.starts_with(b"#!") {
exec_script(path, inode, argv, envp).await
} else {
Err(ExecError::InvalidElfFormat.into())
}
}
// Sets up the user stack according to the System V ABI.
//
// The stack layout from high addresses to low addresses is:
// - Argument and Environment strings
// - Padding to 16-byte boundary
// - Auxiliary Vector (auxv)
// - Environment pointers (envp)
// - Argument pointers (argv)
// - Argument count (argc)
//
// The final stack pointer will point to `argc`.
fn setup_user_stack(
mm: &mut MemoryMap<<ArchImpl as VirtualMemory>::ProcessAddressSpace>,
argv: &[String],
envp: &[String],
mut auxv: Vec<u64>,
) -> Result<VA> {
// Calculate the space needed and the virtual addresses for all strings and
// pointers.
let mut string_addrs = Vec::new();
let mut total_string_size = 0;
// We add strings to the stack from top-down.
for s in envp.iter().chain(argv.iter()) {
let len = s.len() + 1; // +1 for null terminator
total_string_size += len;
string_addrs.push(len); // Temporarily store length
}
let mut current_va = STACK_END;
for len in string_addrs.iter_mut().rev() {
// Now calculate the final virtual address of each string.
current_va -= *len;
*len = current_va; // Replace length with the VA
}
let (envp_addrs, argv_addrs) = string_addrs.split_at(envp.len());
let mut info_block = Vec::<u64>::new();
info_block.push(argv.len() as u64); // argc
info_block.extend(argv_addrs.iter().map(|&addr| addr as u64));
info_block.push(0); // Null terminator for argv
info_block.extend(envp_addrs.iter().map(|&addr| addr as u64));
info_block.push(0); // Null terminator for envp
// Add auxiliary vectors
auxv.push(AT_PAGESZ);
auxv.push(PAGE_SIZE as u64);
auxv.push(AT_RANDOM);
// TODO: SECURITY: Actually make this a random value.
auxv.push(STACK_END as u64 - 0x10);
auxv.push(AT_NULL);
auxv.push(0);
info_block.append(&mut auxv);
let info_block_size = info_block.len() * mem::size_of::<u64>();
// The top of the info block must be 16-byte aligned. The stack pointer on
// entry to the new process must also be 16-byte aligned.
let strings_base_va = STACK_END - total_string_size;
let final_sp_unaligned = strings_base_va - info_block_size;
let final_sp_val = final_sp_unaligned & !0xF; // Align down to 16 bytes
let total_stack_size = STACK_END - final_sp_val;
if total_stack_size > STACK_SZ {
return Err(KernelError::TooLarge);
}
let mut stack_image = vec![0u8; total_stack_size];
// Write strings into the image
let mut string_cursor = STACK_END;
for s in envp.iter().chain(argv.iter()).rev() {
string_cursor -= s.len() + 1;
let offset = total_stack_size - (STACK_END - string_cursor);
stack_image[offset..offset + s.len()].copy_from_slice(s.as_bytes());
// Null terminator is already there from vec![0;...].
}
// Write info block into the image
let info_block_bytes: &[u8] =
unsafe { slice::from_raw_parts(info_block.as_ptr().cast(), info_block_size) };
let info_block_offset = total_stack_size - (STACK_END - final_sp_val);
stack_image[info_block_offset..info_block_offset + info_block_size]
.copy_from_slice(info_block_bytes);
// Allocate pages, copy image, and map into user space
let num_pages = total_stack_size.div_ceil(PAGE_SIZE);
for i in 0..num_pages {
let mut page = ClaimedPage::alloc_zeroed()?;
// Calculate the slice of the stack image that corresponds to this page
let image_end = total_stack_size - i * PAGE_SIZE;
let image_start = image_end.saturating_sub(PAGE_SIZE);
let image_slice = &stack_image[image_start..image_end];
// Copy the data
let page_slice = page.as_slice_mut();
page_slice[PAGE_SIZE - image_slice.len()..].copy_from_slice(image_slice);
// Map the page to the correct virtual address
let page_va = VA::from_value(STACK_END - (i + 1) * PAGE_SIZE);
mm.address_space_mut()
.map_page(page.leak(), page_va, PtePermissions::rw(true))?;
}
Ok(VA::from_value(final_sp_val))
}
// Dynamic linker path: map PT_INTERP interpreter and return start address of
// the interpreter program.
async fn process_interp(interp_path: String, vmas: &mut Vec<VMArea>) -> Result<VA> {
// Resolve interpreter path from root; this assumes interp_path is absolute.
let task = current_task_shared();
let path = Path::new(&interp_path);
let interp_inode = VFS.resolve_path(path, VFS.root_inode(), &task).await?;
// Parse interpreter ELF header
let mut hdr_buf = [0u8; core::mem::size_of::<elf::FileHeader64<LittleEndian>>()];
interp_inode.read_at(0, &mut hdr_buf).await?;
let interp_elf = elf::FileHeader64::<LittleEndian>::parse(&hdr_buf[..])
.map_err(|_| ExecError::InvalidElfFormat)?;
let iendian = interp_elf.endian().unwrap();
// Read interpreter program headers
let interp_ph_table_size = interp_elf.e_phnum.get(iendian) as usize
* interp_elf.e_phentsize.get(iendian) as usize
+ interp_elf.e_phoff.get(iendian) as usize;
let mut interp_ph_buf = vec![0u8; interp_ph_table_size];
interp_inode.read_at(0, &mut interp_ph_buf).await?;
let interp_hdrs = interp_elf
.program_headers(iendian, &interp_ph_buf[..])
.map_err(|_| ExecError::InvalidPHdrFormat)?;
// Build VMAs for interpreter
process_prog_headers(
interp_hdrs,
vmas,
Some(LINKER_BIAS),
interp_inode,
path,
iendian,
);
let interp_entry = VA::from_value(LINKER_BIAS + interp_elf.e_entry(iendian) as usize);
Ok(interp_entry)
}
pub async fn sys_execve(
path: TUA<c_char>,
mut usr_argv: TUA<TUA<c_char>>,
mut usr_env: TUA<TUA<c_char>>,
) -> Result<usize> {
let task = current_task_shared();
let mut buf = [0; 1024];
let mut argv = Vec::new();
let mut envp = Vec::new();
loop {
let ptr = copy_from_user(usr_argv).await?;
if ptr.is_null() {
break;
}
let str = UserCStr::from_ptr(ptr).copy_from_user(&mut buf).await?;
argv.push(str.to_string());
usr_argv = usr_argv.add_objs(1);
}
loop {
let ptr = copy_from_user(usr_env).await?;
if ptr.is_null() {
break;
}
let str = UserCStr::from_ptr(ptr).copy_from_user(&mut buf).await?;
envp.push(str.to_string());
usr_env = usr_env.add_objs(1);
}
let path = Path::new(UserCStr::from_ptr(path).copy_from_user(&mut buf).await?);
let inode = VFS.resolve_path(path, VFS.root_inode(), &task).await?;
kernel_exec(path, inode, argv, envp).await?;
Ok(0)
}