Merge pull request #57 from arihant2math/futex

This commit is contained in:
Matthew Leach
2025-12-21 13:30:40 +00:00
committed by GitHub
5 changed files with 169 additions and 1 deletions

View File

@@ -162,6 +162,9 @@ pub enum KernelError {
#[error("Buffer is full")]
BufferFull,
#[error("Operation would block")]
TryAgain,
#[error("No such process")]
NoProcess,

View File

@@ -44,6 +44,7 @@ pub fn kern_err_to_syscall(err: KernelError) -> isize {
KernelError::BadFd => EBADF,
KernelError::InvalidValue => EINVAL,
KernelError::Fault => EFAULT,
KernelError::TryAgain => EAGAIN,
KernelError::BrokenPipe => EPIPE,
KernelError::Fs(FsError::NotFound) => ENOENT,
KernelError::NotATty => ENOTTY,

View File

@@ -62,7 +62,7 @@ use crate::{
umask::sys_umask,
wait::sys_wait4,
},
threading::{sys_set_robust_list, sys_set_tid_address},
threading::{sys_futex, sys_set_robust_list, sys_set_tid_address},
},
sched::current_task,
};
@@ -172,6 +172,17 @@ pub async fn handle_syscall() {
0x5d => sys_exit(arg1 as _),
0x5e => sys_exit_group(arg1 as _),
0x60 => sys_set_tid_address(VA::from_value(arg1 as _)).await,
0x62 => {
sys_futex(
TUA::from_value(arg1 as _),
arg2 as _,
arg3 as _,
VA::from_value(arg4 as _),
TUA::from_value(arg5 as _),
arg6 as _,
)
.await
}
0x63 => sys_set_robust_list(TUA::from_value(arg1 as _), arg2 as _).await,
0x65 => sys_nanosleep(TUA::from_value(arg1 as _), TUA::from_value(arg2 as _)).await,
0x71 => sys_clock_gettime(arg1 as _, TUA::from_value(arg2 as _)).await,

View File

@@ -1,11 +1,40 @@
use core::ffi::c_long;
use core::mem::size_of;
use crate::memory::uaccess::copy_from_user;
use crate::sched::current_task;
use crate::sync::{OnceLock, SpinLock};
use alloc::{collections::BTreeMap, sync::Arc};
use libkernel::sync::waker_set::{WakerSet, wait_until};
use libkernel::{
error::{KernelError, Result},
memory::address::{TUA, VA},
};
/// A per-futex wait queue holding wakers for blocked tasks.
struct FutexWaitQueue {
wakers: WakerSet,
/// Number of pending wake-ups for waiters on this futex.
wakeups: usize,
}
impl FutexWaitQueue {
fn new() -> Self {
Self {
wakers: WakerSet::new(),
wakeups: 0,
}
}
}
/// Global futex table mapping a user address to its wait queue.
// TODO: statically allocate an array of SpinLock<Vec<FutexWaitQueue>>.
// Then hash into that table to find it's bucket
// TODO: Should be physical address, not user address
#[allow(clippy::type_complexity)]
static FUTEX_TABLE: OnceLock<SpinLock<BTreeMap<TUA<u32>, Arc<SpinLock<FutexWaitQueue>>>>> =
OnceLock::new();
pub async fn sys_set_tid_address(_tidptr: VA) -> Result<usize> {
let tid = current_task().tid;
@@ -38,3 +67,88 @@ pub async fn sys_set_robust_list(head: TUA<RobustListHead>, len: usize) -> Resul
Ok(0)
}
const FUTEX_WAIT: i32 = 0;
const FUTEX_WAKE: i32 = 1;
const FUTEX_WAIT_BITSET: i32 = 9;
const FUTEX_WAKE_BITSET: i32 = 10;
const FUTEX_PRIVATE_FLAG: i32 = 128;
pub async fn sys_futex(
uaddr: TUA<u32>,
op: i32,
val: u32,
_timeout: VA,
_uaddr2: TUA<u32>,
_val3: u32,
) -> Result<usize> {
// Strip PRIVATE flag if present
let cmd = op & !FUTEX_PRIVATE_FLAG;
// TODO: support bitset variants properly
match cmd {
FUTEX_WAIT | FUTEX_WAIT_BITSET => {
// Ensure the wait-queue exists *before* we begin checking the
// futex word so that a racing FUTEX_WAKE cannot miss us. This
// avoids the classic lost-wake-up race where a waker runs between
// our value check and queue insertion.
//
// After publishing the queue we perform a second sanity check on
// the user word, mirroring Linuxs “double read” strategy.
// Obtain (or create) the wait-queue for this futex word.
let table = FUTEX_TABLE.get_or_init(|| SpinLock::new(BTreeMap::new()));
let waitq_arc = {
let mut guard = table.lock_save_irq();
guard
.entry(uaddr)
.or_insert_with(|| Arc::new(SpinLock::new(FutexWaitQueue::new())))
.clone()
};
let current: u32 = copy_from_user(uaddr).await?;
if current != val {
return Err(KernelError::TryAgain);
}
// TODO: When we have try_ variants of locking primitives, use them here
wait_until(
waitq_arc.clone(),
|state| &mut state.wakers,
|state| {
if state.wakeups > 0 {
state.wakeups -= 1;
Some(())
} else {
None
}
},
)
.await;
Ok(0)
}
FUTEX_WAKE | FUTEX_WAKE_BITSET => {
let nr_wake = val as usize;
let mut woke = 0;
if let Some(table) = FUTEX_TABLE.get()
&& let Some(waitq_arc) = table.lock_save_irq().get(&uaddr).cloned()
{
let mut waitq = waitq_arc.lock_save_irq();
for _ in 0..nr_wake {
// Record a pending wake-up and attempt to wake a single waiter.
waitq.wakeups = waitq.wakeups.saturating_add(1);
waitq.wakers.wake_one();
woke += 1;
}
}
Ok(woke)
}
_ => Err(KernelError::NotSupported),
}
}

View File

@@ -129,6 +129,44 @@ fn test_write() {
println!(" OK");
}
fn test_futex() {
print!("Testing futex syscall ...");
let mut futex_word: libc::c_uint = 0;
let addr = &mut futex_word as *mut libc::c_uint;
unsafe {
// FUTEX_WAKE should succeed (no waiters, returns 0)
let ret = libc::syscall(
libc::SYS_futex,
addr,
libc::FUTEX_WAKE,
1,
std::ptr::null::<libc::c_void>(),
std::ptr::null::<libc::c_void>(),
0,
);
if ret < 0 {
panic!("futex wake failed");
}
// FUTEX_WAIT with an *unexpected* value (1) should fail immediately and
// return -1 with errno = EAGAIN. We just check the return value here
// to avoid blocking the test.
let ret2 = libc::syscall(
libc::SYS_futex,
addr,
libc::FUTEX_WAIT,
1u32, // expected value differs from actual (0)
std::ptr::null::<libc::c_void>(),
std::ptr::null::<libc::c_void>(),
0,
);
if ret2 != -1 {
panic!("futex wait did not error out as expected");
}
}
println!(" OK");
}
fn test_rust_file() {
print!("Testing rust file operations ...");
use std::fs::{self, File};
@@ -195,6 +233,7 @@ fn main() {
run_test(test_fork);
run_test(test_read);
run_test(test_write);
run_test(test_futex);
run_test(test_rust_file);
run_test(test_rust_dir);
let end = std::time::Instant::now();