From 41dec05046551ea40dc4aafe34735f7a5831f6fc Mon Sep 17 00:00:00 2001 From: Matthew Leach Date: Thu, 1 Jan 2026 21:07:04 +0000 Subject: [PATCH] sched: implement fast-path return Previously, the scheduler unconditionally performed a full runqueue search:(`find_next_runnable_desc`) on every invocation, including every timer tick and syscall return. This resulted in unnecessary overhead. This change introduces a "lazy preemption" model: 1. Fast-Path Optimization: `do_schedule` now checks if the current task is valid, is not the Idle task, and still has virtual budget remaining. If these conditions are met and `force_resched` is not set, the scheduler returns immediately without locking the runqueue. 2. Preemption & Idle Handling: - `insert_into_runq` now sets `force_resched` if the new task has an earlier deadline than the current task, or if the current task is Idle. - The Idle task is explicitly excluded from the fast-path to ensure immediate context switching when new work arrives. --- src/sched/mod.rs | 116 +++++++++++++++++++++++++++------------- src/sched/runqueue.rs | 41 ++++++-------- src/sched/sched_task.rs | 32 +++++++---- src/sched/uspc_ret.rs | 6 ++- 4 files changed, 123 insertions(+), 72 deletions(-) diff --git a/src/sched/mod.rs b/src/sched/mod.rs index 87b6500..974521e 100644 --- a/src/sched/mod.rs +++ b/src/sched/mod.rs @@ -73,41 +73,13 @@ fn schedule() { return; } - let mut sched = SCHED_STATE.borrow_mut(); + SCHED_STATE.borrow_mut().do_schedule(); +} - // Update Clocks - let now_inst = now().expect("System timer not initialised"); - - sched.advance_vclock(now_inst); - - if let Some(current) = sched.run_q.current_mut() { - current.tick(now_inst); - } - - // Select Next Task - let next_task_desc = sched.run_q.find_next_runnable_desc(sched.vclock); - - match sched.run_q.switch_tasks(next_task_desc, now_inst) { - SwitchResult::AlreadyRunning => { - // Nothing to do. - return; - } - SwitchResult::Blocked { old_task } => { - // If the blocked task has finished, allow it to drop here so it's - // resources are released. - if !old_task.state.lock_save_irq().is_finished() { - sched.wait_q.insert(old_task.descriptor(), old_task); - } - } - // fall-thru. - SwitchResult::Preempted => {} - } - - // Update all context since the task has switched. - if let Some(new_current) = sched.run_q.current_mut() { - ArchImpl::context_switch(new_current.t_shared.clone()); - CUR_TASK_PTR.borrow_mut().set_current(&mut new_current.task); - } +/// Set the force resched task for this CPU. This ensures that the next time +/// schedule() is called a full run of the schduling algorithm will occur. +fn force_resched() { + SCHED_STATE.borrow_mut().force_resched = true; } pub fn spawn_kernel_work(fut: impl Future + 'static + Send) { @@ -159,6 +131,8 @@ pub struct SchedState { vclock: u128, /// Real-time moment when `vclock` was last updated. last_update: Option, + /// Force a reschedule. + force_resched: bool, } unsafe impl Send for SchedState {} @@ -170,6 +144,7 @@ impl SchedState { wait_q: BTreeMap::new(), vclock: 0, last_update: None, + force_resched: false, } } @@ -190,12 +165,24 @@ impl SchedState { self.last_update = Some(now_inst); } - fn insert_into_runq(&mut self, task: Box) { + fn insert_into_runq(&mut self, mut new_task: Box) { let now = now().expect("systimer not running"); self.advance_vclock(now); - self.run_q.enqueue_task(task, self.vclock); + new_task.inserting_into_runqueue(self.vclock); + + if let Some(current) = self.run_q.current() { + // We force a reschedule if: + // + // We are currently idling, OR The new task has an earlier deadline + // than the current task. + if current.is_idle_task() || new_task.v_deadline < current.v_deadline { + self.force_resched = true; + } + } + + self.run_q.enqueue_task(new_task); } pub fn wakeup(&mut self, desc: TaskDescriptor) { @@ -205,6 +192,63 @@ impl SchedState { warn!("Spurious wakeup for task {:?}", desc); } } + + pub fn do_schedule(&mut self) { + // Update Clocks + let now_inst = now().expect("System timer not initialised"); + + self.advance_vclock(now_inst); + + let mut needs_resched = self.force_resched; + + if let Some(current) = self.run_q.current_mut() { + // If the current task is IDLE, we always want to proceed to the + // scheduler core to see if a real task has arrived. + if current.is_idle_task() { + needs_resched = true; + } + else if current.tick(now_inst) { + // Otherwise, check if the real task expired + needs_resched = true; + } + } else { + needs_resched = true; + } + + if !needs_resched { + // Fast Path: Only return if we have a valid task, it has budget, + // AND it's not the idle task. + return; + } + + // Reset the force flag for next time. + self.force_resched = false; + + // Select Next Task. + let next_task_desc = self.run_q.find_next_runnable_desc(self.vclock); + + match self.run_q.switch_tasks(next_task_desc, now_inst) { + SwitchResult::AlreadyRunning => { + // Nothing to do. + return; + } + SwitchResult::Blocked { old_task } => { + // If the blocked task has finished, allow it to drop here so it's + // resources are released. + if !old_task.state.lock_save_irq().is_finished() { + self.wait_q.insert(old_task.descriptor(), old_task); + } + } + // fall-thru. + SwitchResult::Preempted => {} + } + + // Update all context since the task has switched. + if let Some(new_current) = self.run_q.current_mut() { + ArchImpl::context_switch(new_current.t_shared.clone()); + CUR_TASK_PTR.borrow_mut().set_current(&mut new_current.task); + } + } } pub fn sched_init() { diff --git a/src/sched/runqueue.rs b/src/sched/runqueue.rs index b5ec994..ef79a58 100644 --- a/src/sched/runqueue.rs +++ b/src/sched/runqueue.rs @@ -41,24 +41,11 @@ impl RunQueue { } } - fn insert_task(&mut self, task: Box) { - if !task.is_idle_task() { - self.total_weight = self.total_weight.saturating_add(task.weight() as u64); - } - - if let Some(old_task) = self.queue.insert(task.descriptor(), task) { - // Handle the edge case where we overwrite a task. If we replaced - // someone, we must subtract their weight to avoid accounting drift. - warn!("Overwrote active task {:?}", old_task.descriptor()); - self.total_weight = self.total_weight.saturating_sub(old_task.weight() as u64); - } - } - pub fn switch_tasks(&mut self, next_task: TaskDescriptor, now_inst: Instant) -> SwitchResult { - if let Some(current) = self.current() { - if current.descriptor() == next_task { - return SwitchResult::AlreadyRunning; - } + if let Some(current) = self.current() + && current.descriptor() == next_task + { + return SwitchResult::AlreadyRunning; } let mut new_task = match self.queue.remove(&next_task) { @@ -101,6 +88,7 @@ impl RunQueue { self.total_weight } + #[allow(clippy::borrowed_box)] pub fn current(&self) -> Option<&Box> { self.running_task.as_ref() } @@ -112,7 +100,7 @@ impl RunQueue { fn fallback_current_or_idle(&self) -> TaskDescriptor { if let Some(ref current) = self.running_task { let s = *current.state.lock_save_irq(); - if !current.is_idle_task() && (s == TaskState::Runnable || s == TaskState::Running) { + if !current.is_idle_task() && (s == TaskState::Runnable || s == TaskState::Running) { return current.descriptor(); } } @@ -165,12 +153,17 @@ impl RunQueue { best_queued_desc } - /// Inserts `task` into this CPU's run-queue and updates all EEVDF - /// accounting information (eligible time, virtual deadline and the cached - /// weight sum). - pub fn enqueue_task(&mut self, mut new_task: Box, vclock: u128) { - new_task.inserting_into_runqueue(vclock); + /// Inserts `task` into this CPU's run-queue. + pub fn enqueue_task(&mut self, new_task: Box) { + if !new_task.is_idle_task() { + self.total_weight = self.total_weight.saturating_add(new_task.weight() as u64); + } - self.insert_task(new_task); + if let Some(old_task) = self.queue.insert(new_task.descriptor(), new_task) { + // Handle the edge case where we overwrite a task. If we replaced + // someone, we must subtract their weight to avoid accounting drift. + warn!("Overwrote active task {:?}", old_task.descriptor()); + self.total_weight = self.total_weight.saturating_sub(old_task.weight() as u64); + } } } diff --git a/src/sched/sched_task.rs b/src/sched/sched_task.rs index c5636b6..6050f6f 100644 --- a/src/sched/sched_task.rs +++ b/src/sched/sched_task.rs @@ -52,8 +52,16 @@ impl SchedulableTask { }) } - /// Update accounting info for this task given the latest time. - pub fn tick(&mut self, now: Instant) { + /// Re-issue a virtual deadline + pub fn replenish_deadline(&mut self) { + let q_ns: u128 = DEFAULT_TIME_SLICE.as_nanos(); + let v_delta = (q_ns << VT_FIXED_SHIFT) / self.weight() as u128; + self.v_deadline = self.v_eligible + v_delta; + } + + /// Update accounting info for this task given the latest time. Returns + /// `true` when we should try to reschedule another task, `false` otherwise. + pub fn tick(&mut self, now: Instant) -> bool { let dv_increment = if let Some(start) = self.exec_start { let delta = now - start; let w = self.weight() as u128; @@ -68,13 +76,18 @@ impl SchedulableTask { // (EEVDF: v_ei += t_used / w_i). self.v_eligible = self.v_eligible.saturating_add(dv_increment); - // Re-issue a virtual deadline - let q_ns: u128 = DEFAULT_TIME_SLICE.as_nanos(); - let v_delta = (q_ns << VT_FIXED_SHIFT) / self.weight() as u128; - let v_ei = self.v_eligible; - self.v_deadline = v_ei + v_delta; - self.exec_start = Some(now); + + // Has the task exceeded its deadline? + if self.v_eligible >= self.v_deadline { + self.replenish_deadline(); + + true + } else { + // Task still has budget. Do nothing. Return to userspace + // immediately. + false + } } /// Compute this task's scheduling weight. @@ -119,8 +132,7 @@ impl SchedulableTask { // Grant it an initial virtual deadline proportional to its weight. let q_ns: u128 = DEFAULT_TIME_SLICE.as_nanos(); let v_delta = (q_ns << VT_FIXED_SHIFT) / self.weight() as u128; - let new_v_deadline = vclock + v_delta; - self.v_deadline = new_v_deadline; + self.v_deadline = vclock + v_delta; // Since the task is not executing yet, its exec_start must be `None`. self.exec_start = None; diff --git a/src/sched/uspc_ret.rs b/src/sched/uspc_ret.rs index 85481e4..99c13c9 100644 --- a/src/sched/uspc_ret.rs +++ b/src/sched/uspc_ret.rs @@ -1,4 +1,4 @@ -use super::{current::current_task, schedule, waker::create_waker}; +use super::{current::current_task, force_resched, schedule, waker::create_waker}; use crate::{ arch::{Arch, ArchImpl}, process::{ @@ -128,7 +128,8 @@ pub fn dispatch_userspace_task(ctx: *mut UserCtx) { // task. // Task is currently running or is runnable and will now sleep. TaskState::Running | TaskState::Runnable => { - *task_state = TaskState::Sleeping + force_resched(); + *task_state = TaskState::Sleeping; } // If we were woken between the future returning // `Poll::Pending` and acquiring the lock above, @@ -194,6 +195,7 @@ pub fn dispatch_userspace_task(ctx: *mut UserCtx) { match *task_state { // Task is runnable or running, put it to sleep. TaskState::Running | TaskState::Runnable => { + force_resched(); *task_state = TaskState::Sleeping } // If we were woken between the future returning