arch: arm64: detect kernel stack overflow condition

Currently, when a kernel stack overflow occures, the exception handler
blindly attempts to write the current context to the stack. If the SP
isn't valid this causes another fault, and so on - locking up the
system.

This commit re-arranges the stack layout, performs SP validation before
usage and switches to an emergency stack when SP isn't valid. This
allows the handler to run and panic gracefully.

Fixes: #98
This commit is contained in:
Matthew Leach
2026-01-03 15:34:48 +00:00
committed by Ashwin Naren
parent 57e0aa364c
commit d8bcc015de
8 changed files with 177 additions and 75 deletions

View File

@@ -152,7 +152,7 @@ impl<K: MemKind, T> Address<K, T> {
}
#[must_use]
pub fn add_bytes(self, n: usize) -> Self {
pub const fn add_bytes(self, n: usize) -> Self {
Self::from_value(self.value() + n)
}

View File

@@ -64,8 +64,8 @@ impl<T: MemKind> MemoryRegion<T> {
/// Create a memory region from a start and end address.
///
/// The size is calculated as `end - start`. No alignment is enforced.
pub fn from_start_end_address(start: Address<T, ()>, end: Address<T, ()>) -> Self {
assert!(end >= start);
pub const fn from_start_end_address(start: Address<T, ()>, end: Address<T, ()>) -> Self {
assert!(end.value() >= start.value());
Self {
address: start,

View File

@@ -42,6 +42,17 @@ SECTIONS
__percpu_end = .;
}
/*
* Vectors are here in the binary, but they might be remapped virtually
* later. `.vectors` and `.vectors.impl` must remain contiguous.
*/
. = ALIGN(PAGE_SIZE);
.vectors : {
__vectors_start = .;
KEEP(*(.vectors))
*(.vectors.impl)
__vectors_end = .;
}
.bss :
{

View File

@@ -21,9 +21,15 @@ use libkernel::{
};
use log::info;
const KERNEL_STACK_SZ: usize = 256 * 1024; // 32 KiB
const KERNEL_STACK_SHIFT: usize = 15; // 32KiB.
const KERNEL_STACK_SZ: usize = 1 << KERNEL_STACK_SHIFT;
pub const KERNEL_STACK_PG_ORDER: usize = (KERNEL_STACK_SZ / PAGE_SIZE).ilog2() as usize;
pub const KERNEL_STACK_AREA: VirtMemoryRegion = VirtMemoryRegion::from_start_end_address(
VA::from_value(0xffff_b800_0000_0000),
VA::from_value(0xffff_c000_0000_0000),
);
const KERNEL_HEAP_SZ: usize = 64 * 1024 * 1024; // 64 MiB
pub fn setup_allocator(dtb_ptr: TPA<u8>, image_start: PA, image_end: PA) -> Result<()> {
@@ -100,12 +106,20 @@ pub fn setup_allocator(dtb_ptr: TPA<u8>, image_start: PA, image_end: PA) -> Resu
}
pub fn allocate_kstack_region() -> VirtMemoryRegion {
static mut CURRENT_VA: VA = VA::from_value(0xffff_b800_0000_0000);
// Start allocating stacks at the second valid stack slot in
// `KERNEL_STACK_AREA`. This ensures that the faulting address of a stack
// overflow on the first allocated stack would still lie withing
// `KERNEL_STACK_AREA`.
static mut CURRENT_VA: VA = KERNEL_STACK_AREA
.start_address()
.add_bytes(KERNEL_STACK_SZ * 2);
let range = VirtMemoryRegion::new(unsafe { CURRENT_VA }, KERNEL_STACK_SZ);
// Add a guard page between allocations.
unsafe { CURRENT_VA = range.end_address().add_pages(1) };
// Add a guard region between allocations, this ensures that the
// `KERNEL_STACK_SHIFT` bit is set between stacks, allow us to detect stack
// overflow in the kernel.
unsafe { CURRENT_VA = CURRENT_VA.add_bytes(KERNEL_STACK_SZ * 2) };
range
}

View File

@@ -36,7 +36,7 @@ use secondary::{boot_secondaries, cpu_count, save_idmap, secondary_booted};
mod exception_level;
mod logical_map;
mod memory;
pub(super) mod memory;
mod paging_bootstrap;
pub(super) mod secondary;

View File

@@ -6,70 +6,81 @@
movk \register, #:abs_g0_nc:\symbol
.endm
restore_ctx_and_eret:
ldp lr, x1, [x0, #(16 * 15)]
ldp x2, x3, [x0, #(16 * 16)]
ldr x4, [x0, #(16 * 17)]
.macro __save_and_call handler
// Save general-purpose registers x0x29
stp x0, x1, [sp, #(16 * 0)]
stp x2, x3, [sp, #(16 * 1)]
stp x4, x5, [sp, #(16 * 2)]
stp x6, x7, [sp, #(16 * 3)]
stp x8, x9, [sp, #(16 * 4)]
stp x10, x11, [sp, #(16 * 5)]
stp x12, x13, [sp, #(16 * 6)]
stp x14, x15, [sp, #(16 * 7)]
stp x16, x17, [sp, #(16 * 8)]
stp x18, x19, [sp, #(16 * 9)]
stp x20, x21, [sp, #(16 * 10)]
stp x22, x23, [sp, #(16 * 11)]
stp x24, x25, [sp, #(16 * 12)]
stp x26, x27, [sp, #(16 * 13)]
stp x28, x29, [sp, #(16 * 14)]
msr ELR_EL1, x1
msr SPSR_EL1, x2
msr SP_EL0, x3
msr TPIDR_EL0, x4
// Save system registers and Link Register
mrs x1, ELR_EL1
mrs x2, SPSR_EL1
mrs x3, SP_EL0
mrs x4, TPIDR_EL0
stp lr, x1, [sp, #(16 * 15)]
stp x2, x3, [sp, #(16 * 16)]
str x4, [sp, #(16 * 17)]
ldp x2, x3, [x0, #(16 * 1)]
ldp x4, x5, [x0, #(16 * 2)]
ldp x6, x7, [x0, #(16 * 3)]
ldp x8, x9, [x0, #(16 * 4)]
ldp x10, x11, [x0, #(16 * 5)]
ldp x12, x13, [x0, #(16 * 6)]
ldp x14, x15, [x0, #(16 * 7)]
ldp x16, x17, [x0, #(16 * 8)]
ldp x18, x19, [x0, #(16 * 9)]
ldp x20, x21, [x0, #(16 * 10)]
ldp x22, x23, [x0, #(16 * 11)]
ldp x24, x25, [x0, #(16 * 12)]
ldp x26, x27, [x0, #(16 * 13)]
ldp x28, x29, [x0, #(16 * 14)]
ldp x0, x1, [x0, #(16 * 0)]
mov x0, sp
eret
// Call handler
adr_a x1, \handler
blr x1
// Exit
b restore_ctx_and_eret
.endm
.macro vector_handler handler
__vector_\handler:
sub sp, sp, #(16 * 18)
sub sp, sp, #(16 * 18)
// Save general-purpose registers x0x29
stp x0, x1, [sp, #(16 * 0)]
stp x2, x3, [sp, #(16 * 1)]
stp x4, x5, [sp, #(16 * 2)]
stp x6, x7, [sp, #(16 * 3)]
stp x8, x9, [sp, #(16 * 4)]
stp x10, x11, [sp, #(16 * 5)]
stp x12, x13, [sp, #(16 * 6)]
stp x14, x15, [sp, #(16 * 7)]
stp x16, x17, [sp, #(16 * 8)]
stp x18, x19, [sp, #(16 * 9)]
stp x20, x21, [sp, #(16 * 10)]
stp x22, x23, [sp, #(16 * 11)]
stp x24, x25, [sp, #(16 * 12)]
stp x26, x27, [sp, #(16 * 13)]
stp x28, x29, [sp, #(16 * 14)]
b __impl_\handler
mrs x1, ELR_EL1
mrs x2, SPSR_EL1
mrs x3, SP_EL0
mrs x4, TPIDR_EL0
.pushsection .vectors.impl, "ax"
__impl_\handler:
__save_and_call \handler
.popsection
.endm
stp lr, x1, [sp, #(16 * 15)]
stp x2, x3, [sp, #(16 * 16)]
str x4, [sp, #(16 * 17)]
.macro kvector_handler handler
__vector_\handler:
sub sp, sp, #(16 * 18)
mov x0, sp
// Detect stack overflow without clobbering GP registers.
msr TPIDR_EL1, x0
mov x0, sp
//TODO: share this const value with Rust.
tbnz x0, #15, 0f // #15 = KERNEL_STACK_SHIFT.
mrs x0, TPIDR_EL1
b __impl_\handler
adr_a x1, \handler
blr x1
0:
// Stack overflow detected. Switch to the emergency stack and call the
// handler to (presumably) panic the kernel.
ldr x0, =EMERG_STACK_END
ldr x0, [x0]
mov sp, x0
sub sp, sp, #(16 * 18)
mrs x0, TPIDR_EL1
b __impl_\handler
b exception_return
.pushsection .vectors.impl, "ax"
__impl_\handler:
__save_and_call \handler
.popsection
.endm
.section .vectors, "ax"
@@ -86,13 +97,13 @@ exception_vectors:
vector_handler el1_serror_sp0
.org 0x200
vector_handler el1_sync_spx
kvector_handler el1_sync_spx
.org 0x280
vector_handler el1_irq_spx
kvector_handler el1_irq_spx
.org 0x300
vector_handler el1_fiq_spx
kvector_handler el1_fiq_spx
.org 0x380
vector_handler el1_serror_spx
kvector_handler el1_serror_spx
.org 0x400
vector_handler el0_sync
@@ -103,8 +114,42 @@ exception_vectors:
.org 0x580
vector_handler el0_serror
// Common exit path
.section .vectors.impl, "ax"
.global restore_ctx_and_eret
restore_ctx_and_eret:
add sp, sp, #(0x10 * 18)
ldp lr, x1, [x0, #(16 * 15)]
ldp x2, x3, [x0, #(16 * 16)]
ldr x4, [x0, #(16 * 17)]
msr ELR_EL1, x1
msr SPSR_EL1, x2
msr SP_EL0, x3
msr TPIDR_EL0, x4
ldp x2, x3, [x0, #(16 * 1)]
ldp x4, x5, [x0, #(16 * 2)]
ldp x6, x7, [x0, #(16 * 3)]
ldp x8, x9, [x0, #(16 * 4)]
ldp x10, x11, [x0, #(16 * 5)]
ldp x12, x13, [x0, #(16 * 6)]
ldp x14, x15, [x0, #(16 * 7)]
ldp x16, x17, [x0, #(16 * 8)]
ldp x18, x19, [x0, #(16 * 9)]
ldp x20, x21, [x0, #(16 * 10)]
ldp x22, x23, [x0, #(16 * 11)]
ldp x24, x25, [x0, #(16 * 12)]
ldp x26, x27, [x0, #(16 * 13)]
ldp x28, x29, [x0, #(16 * 14)]
ldp x0, x1, [x0, #(16 * 0)]
eret
.section .text
.global exception_return
exception_return:
add sp, sp, #(0x10 * 18)
adr_a x1 restore_ctx_and_eret
br x1
adr_a x1 restore_ctx_and_eret
br x1

View File

@@ -3,9 +3,10 @@ use super::memory::{
fault::{handle_kernel_mem_fault, handle_mem_fault},
};
use crate::{
arch::ArchImpl,
arch::{ArchImpl, arm64::boot::memory::KERNEL_STACK_PG_ORDER},
interrupts::get_interrupt_root,
ksym_pa,
memory::PAGE_ALLOC,
sched::{current::current_task, uspc_ret::dispatch_userspace_task},
spawn_kernel_work,
};
@@ -16,6 +17,7 @@ use libkernel::{
KernAddressSpace, VirtualMemory,
error::Result,
memory::{
address::VA,
permissions::PtePermissions,
region::{PhysMemoryRegion, VirtMemoryRegion},
},
@@ -26,12 +28,14 @@ use tock_registers::interfaces::Writeable;
pub mod esr;
mod syscall;
const EXCEPTION_TBL_SZ: usize = 0x800;
unsafe extern "C" {
pub static exception_vectors: u8;
pub static __vectors_start: u8;
pub static __vectors_end: u8;
}
#[unsafe(no_mangle)]
pub static EMERG_STACK_END: VA = VA::from_value(0xffff_c000_0000_0000);
#[repr(C)]
#[derive(Clone, Copy)]
pub struct ExceptionState {
@@ -200,17 +204,35 @@ extern "C" fn el0_serror(state: &mut ExceptionState) {
}
pub fn exceptions_init() -> Result<()> {
let pa = ksym_pa!(exception_vectors);
let region = PhysMemoryRegion::new(pa, EXCEPTION_TBL_SZ);
let start = ksym_pa!(__vectors_start);
let end = ksym_pa!(__vectors_end);
let region = PhysMemoryRegion::from_start_end_address(start, end);
let mappable_region = region.to_mappable_region();
ArchImpl::kern_address_space().lock_save_irq().map_normal(
let mut kspc = ArchImpl::kern_address_space().lock_save_irq();
kspc.map_normal(
mappable_region.region(),
VirtMemoryRegion::new(EXCEPTION_BASE, mappable_region.region().size()),
PtePermissions::rx(false),
)?;
let emerg_stack = PAGE_ALLOC
.get()
.unwrap()
.alloc_frames(KERNEL_STACK_PG_ORDER as _)?
.leak();
kspc.map_normal(
emerg_stack,
VirtMemoryRegion::new(
EMERG_STACK_END.sub_bytes(emerg_stack.size()),
emerg_stack.size(),
),
PtePermissions::rw(false),
)?;
secondary_exceptions_init();
Ok(())

View File

@@ -2,6 +2,7 @@ use core::mem;
use crate::{
arch::arm64::{
boot::memory::KERNEL_STACK_AREA,
exceptions::{
ExceptionState,
esr::{AbortIss, Exception, IfscCategory},
@@ -100,7 +101,16 @@ pub fn handle_kernel_mem_fault(exception: Exception, info: AbortIss, state: &mut
// If the source of the fault (ELR), wasn't in the uacess fixup section,
// then any abort genereated by the kernel is a panic since we don't
// demand-page any kernel memory.
panic!("Kernel memory fault detected. Context: {}", state);
//
// Try and differentiate between a stack overflow condition and other
// faults.
if let Some(far) = info.far
&& KERNEL_STACK_AREA.contains_address(VA::from_value(far as _))
{
panic!("Kernel stack overflow detected. Context:\n{}", state);
} else {
panic!("Kernel memory fault detected. Context:\n{}", state);
}
}
pub fn handle_mem_fault(exception: Exception, info: AbortIss) {