sched: implement fast-path return

Previously, the scheduler unconditionally performed a full runqueue
search:(`find_next_runnable_desc`) on every invocation, including every
timer tick and syscall return. This resulted in unnecessary overhead.

This change introduces a "lazy preemption" model:

1. Fast-Path Optimization: `do_schedule` now checks if the current task
   is valid, is not the Idle task, and still has virtual budget remaining.
   If these conditions are met and `force_resched` is not set, the
   scheduler returns immediately without locking the runqueue.

2. Preemption & Idle Handling:
   - `insert_into_runq` now sets `force_resched` if the new task has an
     earlier deadline than the current task, or if the current task is
     Idle.
   - The Idle task is explicitly excluded from the fast-path to ensure
     immediate context switching when new work arrives.
This commit is contained in:
Matthew Leach
2026-01-01 21:07:04 +00:00
parent fca835b573
commit 41dec05046
4 changed files with 123 additions and 72 deletions

View File

@@ -73,41 +73,13 @@ fn schedule() {
return;
}
let mut sched = SCHED_STATE.borrow_mut();
SCHED_STATE.borrow_mut().do_schedule();
}
// Update Clocks
let now_inst = now().expect("System timer not initialised");
sched.advance_vclock(now_inst);
if let Some(current) = sched.run_q.current_mut() {
current.tick(now_inst);
}
// Select Next Task
let next_task_desc = sched.run_q.find_next_runnable_desc(sched.vclock);
match sched.run_q.switch_tasks(next_task_desc, now_inst) {
SwitchResult::AlreadyRunning => {
// Nothing to do.
return;
}
SwitchResult::Blocked { old_task } => {
// If the blocked task has finished, allow it to drop here so it's
// resources are released.
if !old_task.state.lock_save_irq().is_finished() {
sched.wait_q.insert(old_task.descriptor(), old_task);
}
}
// fall-thru.
SwitchResult::Preempted => {}
}
// Update all context since the task has switched.
if let Some(new_current) = sched.run_q.current_mut() {
ArchImpl::context_switch(new_current.t_shared.clone());
CUR_TASK_PTR.borrow_mut().set_current(&mut new_current.task);
}
/// Set the force resched task for this CPU. This ensures that the next time
/// schedule() is called a full run of the schduling algorithm will occur.
fn force_resched() {
SCHED_STATE.borrow_mut().force_resched = true;
}
pub fn spawn_kernel_work(fut: impl Future<Output = ()> + 'static + Send) {
@@ -159,6 +131,8 @@ pub struct SchedState {
vclock: u128,
/// Real-time moment when `vclock` was last updated.
last_update: Option<Instant>,
/// Force a reschedule.
force_resched: bool,
}
unsafe impl Send for SchedState {}
@@ -170,6 +144,7 @@ impl SchedState {
wait_q: BTreeMap::new(),
vclock: 0,
last_update: None,
force_resched: false,
}
}
@@ -190,12 +165,24 @@ impl SchedState {
self.last_update = Some(now_inst);
}
fn insert_into_runq(&mut self, task: Box<SchedulableTask>) {
fn insert_into_runq(&mut self, mut new_task: Box<SchedulableTask>) {
let now = now().expect("systimer not running");
self.advance_vclock(now);
self.run_q.enqueue_task(task, self.vclock);
new_task.inserting_into_runqueue(self.vclock);
if let Some(current) = self.run_q.current() {
// We force a reschedule if:
//
// We are currently idling, OR The new task has an earlier deadline
// than the current task.
if current.is_idle_task() || new_task.v_deadline < current.v_deadline {
self.force_resched = true;
}
}
self.run_q.enqueue_task(new_task);
}
pub fn wakeup(&mut self, desc: TaskDescriptor) {
@@ -205,6 +192,63 @@ impl SchedState {
warn!("Spurious wakeup for task {:?}", desc);
}
}
pub fn do_schedule(&mut self) {
// Update Clocks
let now_inst = now().expect("System timer not initialised");
self.advance_vclock(now_inst);
let mut needs_resched = self.force_resched;
if let Some(current) = self.run_q.current_mut() {
// If the current task is IDLE, we always want to proceed to the
// scheduler core to see if a real task has arrived.
if current.is_idle_task() {
needs_resched = true;
}
else if current.tick(now_inst) {
// Otherwise, check if the real task expired
needs_resched = true;
}
} else {
needs_resched = true;
}
if !needs_resched {
// Fast Path: Only return if we have a valid task, it has budget,
// AND it's not the idle task.
return;
}
// Reset the force flag for next time.
self.force_resched = false;
// Select Next Task.
let next_task_desc = self.run_q.find_next_runnable_desc(self.vclock);
match self.run_q.switch_tasks(next_task_desc, now_inst) {
SwitchResult::AlreadyRunning => {
// Nothing to do.
return;
}
SwitchResult::Blocked { old_task } => {
// If the blocked task has finished, allow it to drop here so it's
// resources are released.
if !old_task.state.lock_save_irq().is_finished() {
self.wait_q.insert(old_task.descriptor(), old_task);
}
}
// fall-thru.
SwitchResult::Preempted => {}
}
// Update all context since the task has switched.
if let Some(new_current) = self.run_q.current_mut() {
ArchImpl::context_switch(new_current.t_shared.clone());
CUR_TASK_PTR.borrow_mut().set_current(&mut new_current.task);
}
}
}
pub fn sched_init() {

View File

@@ -41,24 +41,11 @@ impl RunQueue {
}
}
fn insert_task(&mut self, task: Box<SchedulableTask>) {
if !task.is_idle_task() {
self.total_weight = self.total_weight.saturating_add(task.weight() as u64);
}
if let Some(old_task) = self.queue.insert(task.descriptor(), task) {
// Handle the edge case where we overwrite a task. If we replaced
// someone, we must subtract their weight to avoid accounting drift.
warn!("Overwrote active task {:?}", old_task.descriptor());
self.total_weight = self.total_weight.saturating_sub(old_task.weight() as u64);
}
}
pub fn switch_tasks(&mut self, next_task: TaskDescriptor, now_inst: Instant) -> SwitchResult {
if let Some(current) = self.current() {
if current.descriptor() == next_task {
return SwitchResult::AlreadyRunning;
}
if let Some(current) = self.current()
&& current.descriptor() == next_task
{
return SwitchResult::AlreadyRunning;
}
let mut new_task = match self.queue.remove(&next_task) {
@@ -101,6 +88,7 @@ impl RunQueue {
self.total_weight
}
#[allow(clippy::borrowed_box)]
pub fn current(&self) -> Option<&Box<SchedulableTask>> {
self.running_task.as_ref()
}
@@ -112,7 +100,7 @@ impl RunQueue {
fn fallback_current_or_idle(&self) -> TaskDescriptor {
if let Some(ref current) = self.running_task {
let s = *current.state.lock_save_irq();
if !current.is_idle_task() && (s == TaskState::Runnable || s == TaskState::Running) {
if !current.is_idle_task() && (s == TaskState::Runnable || s == TaskState::Running) {
return current.descriptor();
}
}
@@ -165,12 +153,17 @@ impl RunQueue {
best_queued_desc
}
/// Inserts `task` into this CPU's run-queue and updates all EEVDF
/// accounting information (eligible time, virtual deadline and the cached
/// weight sum).
pub fn enqueue_task(&mut self, mut new_task: Box<SchedulableTask>, vclock: u128) {
new_task.inserting_into_runqueue(vclock);
/// Inserts `task` into this CPU's run-queue.
pub fn enqueue_task(&mut self, new_task: Box<SchedulableTask>) {
if !new_task.is_idle_task() {
self.total_weight = self.total_weight.saturating_add(new_task.weight() as u64);
}
self.insert_task(new_task);
if let Some(old_task) = self.queue.insert(new_task.descriptor(), new_task) {
// Handle the edge case where we overwrite a task. If we replaced
// someone, we must subtract their weight to avoid accounting drift.
warn!("Overwrote active task {:?}", old_task.descriptor());
self.total_weight = self.total_weight.saturating_sub(old_task.weight() as u64);
}
}
}

View File

@@ -52,8 +52,16 @@ impl SchedulableTask {
})
}
/// Update accounting info for this task given the latest time.
pub fn tick(&mut self, now: Instant) {
/// Re-issue a virtual deadline
pub fn replenish_deadline(&mut self) {
let q_ns: u128 = DEFAULT_TIME_SLICE.as_nanos();
let v_delta = (q_ns << VT_FIXED_SHIFT) / self.weight() as u128;
self.v_deadline = self.v_eligible + v_delta;
}
/// Update accounting info for this task given the latest time. Returns
/// `true` when we should try to reschedule another task, `false` otherwise.
pub fn tick(&mut self, now: Instant) -> bool {
let dv_increment = if let Some(start) = self.exec_start {
let delta = now - start;
let w = self.weight() as u128;
@@ -68,13 +76,18 @@ impl SchedulableTask {
// (EEVDF: v_ei += t_used / w_i).
self.v_eligible = self.v_eligible.saturating_add(dv_increment);
// Re-issue a virtual deadline
let q_ns: u128 = DEFAULT_TIME_SLICE.as_nanos();
let v_delta = (q_ns << VT_FIXED_SHIFT) / self.weight() as u128;
let v_ei = self.v_eligible;
self.v_deadline = v_ei + v_delta;
self.exec_start = Some(now);
// Has the task exceeded its deadline?
if self.v_eligible >= self.v_deadline {
self.replenish_deadline();
true
} else {
// Task still has budget. Do nothing. Return to userspace
// immediately.
false
}
}
/// Compute this task's scheduling weight.
@@ -119,8 +132,7 @@ impl SchedulableTask {
// Grant it an initial virtual deadline proportional to its weight.
let q_ns: u128 = DEFAULT_TIME_SLICE.as_nanos();
let v_delta = (q_ns << VT_FIXED_SHIFT) / self.weight() as u128;
let new_v_deadline = vclock + v_delta;
self.v_deadline = new_v_deadline;
self.v_deadline = vclock + v_delta;
// Since the task is not executing yet, its exec_start must be `None`.
self.exec_start = None;

View File

@@ -1,4 +1,4 @@
use super::{current::current_task, schedule, waker::create_waker};
use super::{current::current_task, force_resched, schedule, waker::create_waker};
use crate::{
arch::{Arch, ArchImpl},
process::{
@@ -128,7 +128,8 @@ pub fn dispatch_userspace_task(ctx: *mut UserCtx) {
// task.
// Task is currently running or is runnable and will now sleep.
TaskState::Running | TaskState::Runnable => {
*task_state = TaskState::Sleeping
force_resched();
*task_state = TaskState::Sleeping;
}
// If we were woken between the future returning
// `Poll::Pending` and acquiring the lock above,
@@ -194,6 +195,7 @@ pub fn dispatch_userspace_task(ctx: *mut UserCtx) {
match *task_state {
// Task is runnable or running, put it to sleep.
TaskState::Running | TaskState::Runnable => {
force_resched();
*task_state = TaskState::Sleeping
}
// If we were woken between the future returning