From b75f29804ffa8700968f5fa89211ac0f7c9f693a Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Wed, 24 Dec 2025 22:05:44 -0800 Subject: [PATCH] schedule in round-robin fashion --- src/arch/arm64/boot/mod.rs | 2 +- src/arch/arm64/mod.rs | 4 + src/arch/mod.rs | 2 + src/drivers/fs/proc.rs | 5 +- src/main.rs | 2 + src/process/clone.rs | 2 +- src/sched/mod.rs | 152 ++++++++++++++++++++++++++++++++----- src/sched/uspc_ret.rs | 5 +- 8 files changed, 145 insertions(+), 29 deletions(-) diff --git a/src/arch/arm64/boot/mod.rs b/src/arch/arm64/boot/mod.rs index 73afff3..c5d22cf 100644 --- a/src/arch/arm64/boot/mod.rs +++ b/src/arch/arm64/boot/mod.rs @@ -34,7 +34,7 @@ use libkernel::{ }; use logical_map::setup_logical_map; use memory::{setup_allocator, setup_stack_and_heap}; -use secondary::{boot_secondaries, cpu_count, save_idmap, secondary_booted}; +pub(crate) use secondary::{boot_secondaries, cpu_count, save_idmap, secondary_booted}; mod exception_level; mod logical_map; diff --git a/src/arch/arm64/mod.rs b/src/arch/arm64/mod.rs index 5dba458..30b150f 100644 --- a/src/arch/arm64/mod.rs +++ b/src/arch/arm64/mod.rs @@ -90,6 +90,10 @@ impl Arch for Aarch64 { "aarch64" } + fn cpu_count() -> usize { + boot::cpu_count() + } + fn do_signal( sig: SigId, action: UserspaceSigAction, diff --git a/src/arch/mod.rs b/src/arch/mod.rs index 81e7611..243bcab 100644 --- a/src/arch/mod.rs +++ b/src/arch/mod.rs @@ -27,6 +27,8 @@ pub trait Arch: CpuOps + VirtualMemory { fn name() -> &'static str; + fn cpu_count() -> usize; + /// Prepares the initial context for a new user-space thread. This sets up /// the stack frame so that when we context-switch to it, it will begin /// execution at the specified `entry_point`. diff --git a/src/drivers/fs/proc.rs b/src/drivers/fs/proc.rs index d7ff231..1191c5a 100644 --- a/src/drivers/fs/proc.rs +++ b/src/drivers/fs/proc.rs @@ -1,7 +1,7 @@ #![allow(clippy::module_name_repetitions)] use crate::process::thread_group::Tgid; -use crate::sched::{SCHED_STATE, current_task}; +use crate::sched::{current_task, find_task_by_descriptor}; use crate::sync::OnceLock; use crate::{ drivers::{Driver, FilesystemDriver}, @@ -287,10 +287,9 @@ impl Inode for ProcTaskFileInode { async fn read_at(&self, offset: u64, buf: &mut [u8]) -> Result { let pid = self.pid; let task_list = TASK_LIST.lock_save_irq(); - // TODO: Does not obtain details for tasks that are on other CPUs let id = task_list.iter().find(|(desc, _)| desc.tgid() == pid); let task_details = if let Some((desc, _)) = id { - SCHED_STATE.borrow().run_queue.get(desc).cloned() + find_task_by_descriptor(desc) } else { None }; diff --git a/src/main.rs b/src/main.rs index 2af3225..52dbb92 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,6 +2,8 @@ #![no_main] #![feature(used_with_arg)] #![feature(likely_unlikely)] +#![feature(sync_unsafe_cell)] + use alloc::{ boxed::Box, string::{String, ToString}, diff --git a/src/process/clone.rs b/src/process/clone.rs index 36398bf..4242fb2 100644 --- a/src/process/clone.rs +++ b/src/process/clone.rs @@ -161,7 +161,7 @@ pub async fn sys_clone( let tid = new_task.tid; - sched::insert_task(Arc::new(new_task)); + sched::insert_task(Arc::new(new_task), None); // Honour CLONE_*SETTID semantics for the parent and (shared-VM) child. if flags.contains(CloneFlags::CLONE_PARENT_SETTID) && !parent_tidptr.is_null() { diff --git a/src/sched/mod.rs b/src/sched/mod.rs index 5d0d215..89b01a4 100644 --- a/src/sched/mod.rs +++ b/src/sched/mod.rs @@ -6,12 +6,106 @@ use crate::{ sync::OnceLock, }; use alloc::{boxed::Box, collections::btree_map::BTreeMap, sync::Arc}; +use alloc::vec::Vec; +use core::cell::{OnceCell, SyncUnsafeCell}; use core::cmp::Ordering; -use libkernel::{UserAddressSpace, error::Result}; +use core::sync::atomic::AtomicUsize; +use libkernel::{CpuOps, UserAddressSpace, error::Result}; pub mod uspc_ret; pub mod waker; +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub struct CpuId(usize); + +impl CpuId { + pub fn this() -> CpuId { + CpuId(ArchImpl::id()) + } +} + + +// TODO: arbitrary cap. +pub static SCHED_STATES: [SyncUnsafeCell>; 128] = [const { SyncUnsafeCell::new(None) }; 128]; + +per_cpu! { + pub static CPU_ID: OnceCell = OnceCell::new; +} + +fn get_cpu_id() -> CpuId { + CPU_ID.borrow() + .get() + .cloned() + .unwrap_or_else(|| CpuId::this()) +} + +fn get_sched_state() -> &'static mut SchedState { + let cpu_id = get_cpu_id(); + let idx = cpu_id.0; + debug_assert!(idx < SCHED_STATES.len(), "CPU id out of bounds"); + + // Get a mutable reference to the Option stored in the static array. + let slot: &mut Option = unsafe { &mut *SCHED_STATES[idx].get() }; + + if slot.is_none() { + *slot = Some(SchedState::new()); + } + + slot.as_mut().expect("SchedState not initialized") +} + +fn get_sched_state_by_id(cpu_id: CpuId) -> Option<&'static mut SchedState> { + let idx = cpu_id.0; + debug_assert!(idx < SCHED_STATES.len(), "CPU id out of bounds"); + + // Get a mutable reference to the Option stored in the static array. + let slot: &mut Option = unsafe { &mut *SCHED_STATES[idx].get() }; + + if slot.is_none() { + *slot = Some(SchedState::new()); + } + + slot.as_mut() +} + +fn with_cpu_sched_state(cpu_id: CpuId, f: impl FnOnce(&mut SchedState)) { + let Some(sched_state) = get_sched_state_by_id(cpu_id) else { + log::error!("No sched state for CPU {:?}", cpu_id); + return; + }; + f(sched_state); +} + +pub fn all_tasks() -> Vec> { + let mut tasks = Vec::new(); + + for slot in SCHED_STATES.iter() { + let slot: &mut Option = unsafe { &mut *slot.get() }; + + if let Some(sched_state) = slot.as_mut() { + for task in sched_state.run_queue.values() { + tasks.push(task.clone()); + } + } + } + + tasks +} + +pub fn find_task_by_descriptor(descriptor: &TaskDescriptor) -> Option> { + for slot in SCHED_STATES.iter() { + let slot: &mut Option = unsafe { &mut *slot.get() }; + + if let Some(sched_state) = slot.as_mut() { + if let Some(task) = sched_state.run_queue.get(descriptor) { + return Some(task.clone()); + } + } + } + + None +} + /// Schedule a new task. /// /// This function is the core of the kernel's scheduler. It is responsible for @@ -44,9 +138,20 @@ fn schedule() { } let previous_task = current_task(); - let mut sched_state = SCHED_STATE.borrow_mut(); + let sched_state = get_sched_state(); let next_task = sched_state.find_next_runnable_task(); + static SCHEDULE_COUNT: AtomicUsize = AtomicUsize::new(0); + let count = SCHEDULE_COUNT.fetch_add(1, core::sync::atomic::Ordering::Relaxed); + if count % 1000 == 0 { + log::debug!( + "Scheduling: CPU {:?}, prev PID {}, next PID {}", + get_cpu_id(), + previous_task.tid.value(), + next_task.tid.value() + ); + } + sched_state .switch_to_task(Some(previous_task), next_task.clone()) .expect("Could not schedule next task"); @@ -59,12 +164,23 @@ pub fn spawn_kernel_work(fut: impl Future + 'static + Send) { .put_kernel_work(Box::pin(fut)); } -/// Insert the given task onto *this* CPUs runqueue. -pub fn insert_task(task: Arc) { - SCHED_STATE - .borrow_mut() - .run_queue - .insert(task.descriptor(), task); +fn get_next_cpu() -> CpuId { + static NEXT_CPU: AtomicUsize = AtomicUsize::new(0); + + let cpu_count = ArchImpl::cpu_count(); + let cpu_id = NEXT_CPU.fetch_add(1, core::sync::atomic::Ordering::Relaxed) % cpu_count; + + CpuId(cpu_id) +} + +/// Insert the given task onto a CPU's run queue. +pub fn insert_task(task: Arc, cpu: Option) { + let cpu = cpu.unwrap_or_else(get_next_cpu); + with_cpu_sched_state(cpu, |sched_state| { + sched_state + .run_queue + .insert(task.descriptor(), task); + }); } pub struct SchedState { @@ -166,13 +282,8 @@ impl SchedState { } } -per_cpu! { - pub static SCHED_STATE: SchedState = SchedState::new; -} - pub fn current_task() -> Arc { - SCHED_STATE - .borrow() + get_sched_state() .running_task .as_ref() .expect("Current task called before initial task created") @@ -199,11 +310,10 @@ pub fn sched_init() { task_list.insert(init_task.descriptor(), Arc::downgrade(&init_task.state)); } - insert_task(idle_task); - insert_task(init_task.clone()); + insert_task(idle_task, Some(CpuId::this())); + insert_task(init_task.clone(), Some(CpuId::this())); - SCHED_STATE - .borrow_mut() + get_sched_state() .switch_to_task(None, init_task) .expect("Failed to switch to init task"); } @@ -211,10 +321,10 @@ pub fn sched_init() { pub fn sched_init_secondary() { let idle_task = get_idle_task(); - insert_task(idle_task.clone()); + // Important to ensure that the idle task is in the TASK_LIST for this CPU. + insert_task(idle_task.clone(), Some(CpuId::this())); - SCHED_STATE - .borrow_mut() + get_sched_state() .switch_to_task(None, idle_task) .expect("Failed to swtich to idle task"); } diff --git a/src/sched/uspc_ret.rs b/src/sched/uspc_ret.rs index 8593e34..1c21cf1 100644 --- a/src/sched/uspc_ret.rs +++ b/src/sched/uspc_ret.rs @@ -1,4 +1,4 @@ -use super::{SCHED_STATE, current_task, schedule, waker::create_waker}; +use super::{get_sched_state, current_task, schedule, waker::create_waker}; use crate::process::TASK_LIST; use crate::{ arch::{Arch, ArchImpl}, @@ -159,8 +159,7 @@ pub fn dispatch_userspace_task(ctx: *mut UserCtx) { // task to execute, removing this task from the // runqueue, reaping it's resouces. if task.state.lock_save_irq().is_finished() { - SCHED_STATE - .borrow_mut() + get_sched_state() .run_queue .remove(&task.descriptor()); let mut task_list = TASK_LIST.lock_save_irq();