libkernel: pg_tear_down: add more flexibility

The current tear down implementation assumes that all data mappings &
intermediate tables need to be free'd. That might not provide enough
flexibility for all tear-down scenarios.

Provide a more flexible API, passing current walk state into a control
function which makes a descision regarding tear-down and recursion.
This commit is contained in:
Matthew Leach
2026-05-01 15:26:58 +01:00
committed by Ashwin Naren
parent d0b10adf7c
commit 72ef61fb10
5 changed files with 905 additions and 121 deletions

View File

@@ -1,67 +1,118 @@
//! Utilities for tearing down and freeing page table hierarchies.
use super::pg_descriptors::L0Descriptor;
use super::pg_tables::{L0Table, L1Table, L3Table};
use crate::error::Result;
use crate::memory::region::PhysMemoryRegion;
use crate::memory::region::{PhysMemoryRegion, VirtMemoryRegion};
use crate::memory::{
PAGE_SIZE,
address::TPA,
address::{TPA, VA},
paging::{
PaMapper, PageTableMapper, PgTable, PgTableArray, TableMapper,
tear_down::RecursiveTeardownWalker, walk::WalkContext,
PaMapper, PageTableEntry, PageTableMapper, PgTable, PgTableArray, TableMapper,
tear_down::{EntryKind, RecursiveTeardownWalker, TeardownAction, TeardownEntry},
walk::WalkContext,
},
};
// Implementation for L3 (Leaf Table)
impl RecursiveTeardownWalker for L3Table {
fn tear_down<F, PM>(
fn tear_down<Control, Dealloc, PM>(
table_pa: TPA<PgTableArray<Self>>,
ctx: &mut WalkContext<PM>,
deallocator: &mut F,
base_va: VA,
depth: u8,
control: &mut Control,
deallocator: &mut Dealloc,
) -> Result<()>
where
PM: PageTableMapper,
F: FnMut(PhysMemoryRegion),
Control: FnMut(&TeardownEntry) -> TeardownAction,
Dealloc: FnMut(PhysMemoryRegion),
{
unsafe {
ctx.mapper.with_page_table(table_pa, |pgtable| {
let table = L3Table::from_ptr(pgtable);
let mut cursor = 0;
for idx in 0..Self::DESCRIPTORS_PER_PAGE {
let desc = table.get_idx(idx);
if let Some(addr) = desc.mapped_address() {
deallocator(PhysMemoryRegion::new(addr, PAGE_SIZE));
loop {
let next_item = unsafe {
ctx.mapper.with_page_table(table_pa, |pgtable| {
let table = L3Table::from_ptr(pgtable);
for i in cursor..Self::DESCRIPTORS_PER_PAGE {
if let Some(addr) = table.get_idx(i).mapped_address() {
return Some((i, addr));
}
}
None
})?
};
match next_item {
Some((found_idx, addr)) => {
let entry_va =
base_va.add_bytes(found_idx << <L3Table as PgTable>::Descriptor::MAP_SHIFT);
let entry = TeardownEntry {
kind: EntryKind::Mapping(VirtMemoryRegion::new(entry_va, PAGE_SIZE)),
depth,
region: PhysMemoryRegion::new(addr, PAGE_SIZE),
};
let action = control(&entry);
if !matches!(action, TeardownAction::Skip) {
// Clear the leaf PTE before releasing the data frame.
if matches!(action, TeardownAction::FreeAndClear) {
unsafe {
ctx.mapper.with_page_table(table_pa, |pgtable| {
L3Table::from_ptr(pgtable)
.to_raw_ptr()
.add(found_idx)
.write_volatile(0u64);
})?;
}
}
deallocator(entry.region);
}
cursor = found_idx + 1;
}
})?;
None => break,
}
}
Ok(())
}
}
/// Walks the page table hierarchy for a given address space and applies a
/// freeing closure to every allocated frame.
/// Walks the page table hierarchy for a given address space and invokes
/// `control` and `deallocator` for every frame encountered.
///
/// # Parameters
/// - `l0_table`: The physical address of the root (L0) page table.
/// - `ctx`: The context for the operation (mapper).
/// - `deallocator`: A closure called for every physical address that needs freeing,
/// along with the size of the allocation in bytes:
/// 1. The User Data frames (Payload).
/// 2. The L1, L2, and L3 Page Table frames.
/// 3. The L0 Root Table frame.
///
/// Note; Block mappings (2MiB / 1GiB) are freed with their actual size.
pub fn tear_down_address_space<F, PM>(
/// - `l0_table`: Physical address of the root (L0) page table.
/// - `ctx`: Walk context (mapper + TLB invalidator).
/// - `control`: Called for every entry *before* any recursion or deallocation.
/// Returns the [`TeardownAction`] that drives walker behaviour:
/// - [`TeardownAction::Free`] — recurse (for tables) then `deallocator`.
/// - [`TeardownAction::FreeAndClear`] — recurse, zero the PTE, then `deallocator`.
/// - [`TeardownAction::Skip`] — skip the subtree entirely.
///
/// `control` must **not** free physical memory itself.
///
/// [`EntryKind::Mapping`] entries carry their virtual address inside the
/// [`VirtMemoryRegion`] associated value. [`EntryKind::IntermediateTable`]
/// and [`EntryKind::RootTable`] entries carry no virtual-address information.
///
/// - `deallocator`: Called by the walker (outside any live `with_page_table`
/// window) to release a frame. Always called after any PTE clearing.
///
/// Block mappings (2 MiB / 1 GiB) are reported as
/// [`EntryKind::Mapping`] with the appropriate region size.
pub fn tear_down_address_space<Control, Dealloc, PM>(
l0_table: TPA<PgTableArray<L0Table>>,
ctx: &mut WalkContext<PM>,
mut deallocator: F,
mut control: Control,
mut deallocator: Dealloc,
) -> Result<()>
where
PM: PageTableMapper,
F: FnMut(PhysMemoryRegion),
Control: FnMut(&TeardownEntry) -> TeardownAction,
Dealloc: FnMut(PhysMemoryRegion),
{
// L0Descriptor cannot encode block mappings, so iterate entries explicitly
// rather than relying on the blanket RecursiveTeardownWalker impl.
@@ -82,15 +133,47 @@ where
match next_item {
Some((idx, l1_addr)) => {
L1Table::tear_down(l1_addr, ctx, &mut deallocator)?;
deallocator(PhysMemoryRegion::new(l1_addr.to_untyped(), PAGE_SIZE));
let entry_va = VA::from_value(idx << L0Descriptor::MAP_SHIFT);
let entry = TeardownEntry {
kind: EntryKind::IntermediateTable,
depth: 1,
region: PhysMemoryRegion::new(l1_addr.to_untyped(), PAGE_SIZE),
};
let action = control(&entry);
if !matches!(action, TeardownAction::Skip) {
L1Table::tear_down(l1_addr, ctx, entry_va, 1, &mut control, &mut deallocator)?;
if matches!(action, TeardownAction::FreeAndClear) {
unsafe {
ctx.mapper.with_page_table(l0_table, |pgtable| {
L0Table::from_ptr(pgtable)
.to_raw_ptr()
.add(idx)
.write_volatile(0u64);
})?;
}
}
deallocator(entry.region);
}
cursor = idx + 1;
}
None => break,
}
}
deallocator(PhysMemoryRegion::new(l0_table.to_untyped(), PAGE_SIZE));
// Offer the root table frame to the caller.
let root_entry = TeardownEntry {
kind: EntryKind::RootTable,
depth: 0,
region: PhysMemoryRegion::new(l0_table.to_untyped(), PAGE_SIZE),
};
if !matches!(control(&root_entry), TeardownAction::Skip) {
deallocator(root_entry.region);
}
Ok(())
}
@@ -109,12 +192,26 @@ mod tests {
};
use std::collections::HashMap;
/// Tear down `l0_table` freeing every frame the walker visits.
fn capture_freed_pages<PM: PageTableMapper>(
l0_table: TPA<PgTableArray<L0Table>>,
ctx: &mut WalkContext<PM>,
) -> HashMap<usize, usize> {
capture_freed_pages_filtered(l0_table, ctx, |_| TeardownAction::Free)
}
/// Tear down `l0_table` using a custom `control` closure.
fn capture_freed_pages_filtered<PM, Control>(
l0_table: TPA<PgTableArray<L0Table>>,
ctx: &mut WalkContext<PM>,
control: Control,
) -> HashMap<usize, usize>
where
PM: PageTableMapper,
Control: FnMut(&TeardownEntry) -> TeardownAction,
{
let mut freed_map = HashMap::new();
tear_down_address_space(l0_table, ctx, |region| {
tear_down_address_space(l0_table, ctx, control, |region| {
if freed_map
.insert(region.start_address().value(), region.size())
.is_some()
@@ -320,4 +417,230 @@ mod tests {
// Total: block + L1 + L0 = 3 entries.
assert_eq!(freed.len(), 3);
}
#[test]
fn teardown_skip_data_pages() {
let mut harness = TestHarness::new(10);
let va = VA::from_value(0x1_0000_0000);
let pa = 0x8_0000;
harness
.map_4k_pages(pa, va.value(), 1, PtePermissions::ro(false))
.unwrap();
// Free only page table frames, not data pages (shared-mapping scenario).
let freed = capture_freed_pages_filtered(
harness.inner.root_table,
&mut harness.inner.create_walk_ctx(),
|entry| match entry.kind {
EntryKind::Mapping(_) => TeardownAction::Skip,
_ => TeardownAction::Free,
},
);
// Only the 4 page table frames are freed; data page is kept.
assert_eq!(freed.len(), 4);
assert!(!freed.contains_key(&pa));
assert!(freed.contains_key(&harness.inner.root_table.value()));
}
// -----------------------------------------------------------------------
// FreeAndClear tests
// -----------------------------------------------------------------------
#[test]
fn teardown_free_and_clear_zeroes_ptes() {
// Map one page so each table frame has exactly one non-zero entry.
// After FreeAndClear teardown, every table frame must have all slots zero.
let mut harness = TestHarness::new(10);
harness
.map_4k_pages(0x8_0000, 0x1_0000_0000, 1, PtePermissions::ro(false))
.unwrap();
let mut table_pas: Vec<usize> = vec![harness.inner.root_table.value()];
tear_down_address_space(
harness.inner.root_table,
&mut harness.inner.create_walk_ctx(),
|entry| {
if matches!(entry.kind, EntryKind::IntermediateTable) {
table_pas.push(entry.region.start_address().value());
}
TeardownAction::FreeAndClear
},
|_| {},
)
.unwrap();
// Every slot in every table frame must now be zero.
// In the PassthroughMapper harness, PAs are heap pointers, so the
// memory is still valid to read even after "freeing".
for &table_pa in &table_pas {
for idx in 0..512_usize {
let slot = unsafe { (table_pa as *const u64).add(idx).read_volatile() };
assert_eq!(
slot, 0,
"table frame {:#x} slot {idx} should be zeroed after FreeAndClear",
table_pa
);
}
}
}
#[test]
fn teardown_selective_clear() {
// Two 4K pages in the same L3 table (slots 0 and 1).
// FreeAndClear the first (VA 0x1_0000_0000), Free (no clear) the second.
// After teardown: L3 slot 0 == 0, L3 slot 1 != 0.
let mut harness = TestHarness::new(10);
harness
.map_4k_pages(0xA_0000, 0x1_0000_0000, 1, PtePermissions::ro(false))
.unwrap();
harness
.map_4k_pages(0xB_0000, 0x1_0000_1000, 1, PtePermissions::ro(false))
.unwrap();
let mut l3_pa: Option<usize> = None;
tear_down_address_space(
harness.inner.root_table,
&mut harness.inner.create_walk_ctx(),
|entry| {
// Capture the L3 frame (depth-3 IntermediateTable).
if matches!(entry.kind, EntryKind::IntermediateTable) && entry.depth == 3 {
l3_pa = Some(entry.region.start_address().value());
}
if let EntryKind::Mapping(virt) = &entry.kind {
if virt.start_address().value() == 0x1_0000_0000 {
return TeardownAction::FreeAndClear;
}
if virt.start_address().value() == 0x1_0000_1000 {
return TeardownAction::Free;
}
}
TeardownAction::Free
},
|_| {},
)
.unwrap();
let l3_pa = l3_pa.expect("L3 frame must have been observed");
// The cleared slot (slot 0 in L3) must be zero.
let slot0 = unsafe { (l3_pa as *const u64).add(0).read_volatile() };
assert_eq!(slot0, 0, "L3 slot 0 should be zeroed after FreeAndClear");
// The un-cleared slot (slot 1 in L3) must remain non-zero.
let slot1 = unsafe { (l3_pa as *const u64).add(1).read_volatile() };
assert_ne!(slot1, 0, "L3 slot 1 should remain non-zero after Free");
}
// -----------------------------------------------------------------------
// Skip behaviour tests
// -----------------------------------------------------------------------
#[test]
fn teardown_skip_root_table() {
// RootTable + Skip must not free the root, but must still free everything else.
let mut harness = TestHarness::new(10);
harness
.map_4k_pages(0x8_0000, 0x1_0000_0000, 1, PtePermissions::ro(false))
.unwrap();
let root_pa = harness.inner.root_table.value();
let freed = capture_freed_pages_filtered(
harness.inner.root_table,
&mut harness.inner.create_walk_ctx(),
|entry| match entry.kind {
EntryKind::RootTable => TeardownAction::Skip,
_ => TeardownAction::Free,
},
);
assert!(
!freed.contains_key(&root_pa),
"Root table must not be freed"
);
assert!(freed.contains_key(&0x8_0000), "Data page must be freed");
// L1 + L2 + L3 frame + data page = 4 freed (root skipped).
assert_eq!(freed.len(), 4);
}
#[test]
fn teardown_skip_entire_subtree() {
// Map one page; Skip the depth-1 IntermediateTable (L1 frame).
// Verify: (a) data page and all descendant frames are not freed,
// (b) control is NOT called for descendants (no spurious recursion).
let mut harness = TestHarness::new(10);
let pa = 0xA_0000;
harness
.map_4k_pages(pa, 0x1_0000_0000, 1, PtePermissions::ro(false))
.unwrap();
let mut control_calls = 0usize;
let freed = capture_freed_pages_filtered(
harness.inner.root_table,
&mut harness.inner.create_walk_ctx(),
|entry| {
control_calls += 1;
if matches!(entry.kind, EntryKind::IntermediateTable) && entry.depth == 1 {
TeardownAction::Skip
} else {
TeardownAction::Free
}
},
);
assert!(
!freed.contains_key(&pa),
"Skipped subtree data page must not be freed"
);
// Only the root table is freed; the rest was skipped.
assert_eq!(freed.len(), 1);
// control is called for root (1) + depth-1 entry (1) = 2 total.
assert_eq!(
control_calls, 2,
"control must not recurse into the skipped subtree"
);
}
// -----------------------------------------------------------------------
// Entry metadata test
// -----------------------------------------------------------------------
#[test]
fn teardown_entry_metadata() {
// For a single 4K page at VA 0x1_0000_0000 the walker must report
// exactly these (kind, depth, va) tuples, in any order.
let mut harness = TestHarness::new(10);
harness
.map_4k_pages(0x8_0000, 0x1_0000_0000, 1, PtePermissions::ro(false))
.unwrap();
let mut entries: Vec<(&'static str, u8, Option<usize>)> = Vec::new();
capture_freed_pages_filtered(
harness.inner.root_table,
&mut harness.inner.create_walk_ctx(),
|e| {
let (kind, va) = match &e.kind {
EntryKind::Mapping(virt) => ("Mapping", Some(virt.start_address().value())),
EntryKind::IntermediateTable => ("IntermediateTable", None),
EntryKind::RootTable => ("RootTable", None),
};
entries.push((kind, e.depth, va));
TeardownAction::Free
},
);
entries.sort_unstable_by_key(|&(k, d, v)| (d, v, k));
assert_eq!(
entries,
vec![
("RootTable", 0, None),
("IntermediateTable", 1, None), // L1 frame
("IntermediateTable", 2, None), // L2 frame
("IntermediateTable", 3, None), // L3 frame
("Mapping", 3, Some(0x1_0000_0000)), // mapped page
]
);
}
}

View File

@@ -1,71 +1,122 @@
//! Utilities for tearing down and freeing page table hierarchies.
use super::pg_descriptors::PML4E;
use super::pg_tables::{PDPTable, PML4Table, PTable};
use crate::error::Result;
use crate::memory::paging::TableMapper;
use crate::memory::region::PhysMemoryRegion;
use crate::memory::region::{PhysMemoryRegion, VirtMemoryRegion};
use crate::memory::{
PAGE_SIZE,
address::TPA,
address::{TPA, VA},
paging::{
PaMapper, PageTableMapper, PgTable, PgTableArray, tear_down::RecursiveTeardownWalker,
PaMapper, PageTableEntry, PageTableMapper, PgTable, PgTableArray,
tear_down::{EntryKind, RecursiveTeardownWalker, TeardownAction, TeardownEntry},
walk::WalkContext,
},
};
// Implementation for PTable (Leaf Table)
impl RecursiveTeardownWalker for PTable {
fn tear_down<F, PM>(
fn tear_down<Control, Dealloc, PM>(
table_pa: TPA<PgTableArray<Self>>,
ctx: &mut WalkContext<PM>,
deallocator: &mut F,
base_va: VA,
depth: u8,
control: &mut Control,
deallocator: &mut Dealloc,
) -> Result<()>
where
PM: PageTableMapper,
F: FnMut(PhysMemoryRegion),
Control: FnMut(&TeardownEntry) -> TeardownAction,
Dealloc: FnMut(PhysMemoryRegion),
{
unsafe {
ctx.mapper.with_page_table(table_pa, |pgtable| {
let table = PTable::from_ptr(pgtable);
let mut cursor = 0;
for idx in 0..Self::DESCRIPTORS_PER_PAGE {
let desc = table.get_idx(idx);
if let Some(addr) = desc.mapped_address() {
deallocator(PhysMemoryRegion::new(addr, PAGE_SIZE));
loop {
let next_item = unsafe {
ctx.mapper.with_page_table(table_pa, |pgtable| {
let table = PTable::from_ptr(pgtable);
for i in cursor..Self::DESCRIPTORS_PER_PAGE {
if let Some(addr) = table.get_idx(i).mapped_address() {
return Some((i, addr));
}
}
None
})?
};
match next_item {
Some((found_idx, addr)) => {
let entry_va =
base_va.add_bytes(found_idx << <PTable as PgTable>::Descriptor::MAP_SHIFT);
let entry = TeardownEntry {
kind: EntryKind::Mapping(VirtMemoryRegion::new(entry_va, PAGE_SIZE)),
depth,
region: PhysMemoryRegion::new(addr, PAGE_SIZE),
};
let action = control(&entry);
if !matches!(action, TeardownAction::Skip) {
// Clear the leaf PTE before releasing the data frame.
if matches!(action, TeardownAction::FreeAndClear) {
unsafe {
ctx.mapper.with_page_table(table_pa, |pgtable| {
PTable::from_ptr(pgtable)
.to_raw_ptr()
.add(found_idx)
.write_volatile(0u64);
})?;
}
}
deallocator(entry.region);
}
cursor = found_idx + 1;
}
})?;
None => break,
}
}
Ok(())
}
}
/// Walks the page table hierarchy for a given address space and applies a
/// freeing closure to every lower-half canonical address allocated frame.
/// Walks the page table hierarchy for a given address space and invokes
/// `control` and `deallocator` for every frame encountered.
///
/// # Parameters
/// - `pml4_table`: The physical address of the root (PML4) page table.
/// - `ctx`: The context for the operation (mapper).
/// - `deallocator`: A closure called for every physical address that needs freeing.
/// This includes:
/// 1. The User Data frames (Payload).
/// 2. The PDP, PD, and PTable Page Table frames.
/// 3. The PML4 Root Table frame.
///
/// Block mappings (2MiB / 1GiB) are freed with their actual size.
/// - `pml4_table`: Physical address of the root (PML4) page table.
/// - `ctx`: Walk context (mapper + TLB invalidator).
/// - `control`: Called for every entry *before* any recursion or deallocation.
/// Returns the [`TeardownAction`] that drives walker behaviour:
/// - [`TeardownAction::Free`] — recurse (for tables) then `deallocator`.
/// - [`TeardownAction::FreeAndClear`] — recurse, zero the PTE, then `deallocator`.
/// - [`TeardownAction::Skip`] — skip the subtree entirely.
///
/// *Note* PDPTables which are pointed to by PML4 indexes [256-511] are not
/// free'd.
pub fn tear_down_address_space<F, PM>(
/// `control` must **not** free physical memory itself.
///
/// [`EntryKind::Mapping`] entries carry their virtual address inside the
/// [`VirtMemoryRegion`] associated value. [`EntryKind::IntermediateTable`]
/// and [`EntryKind::RootTable`] entries carry no virtual-address
/// information. To filter upper-half x86_64 entries (e.g. kernel mappings),
/// inspect the VA of `Mapping` entries; note that this cannot short-circuit
/// an entire PML4 subtree, only its leaf entries.
///
/// - `deallocator`: Called by the walker (outside any live `with_page_table`
/// window) to release a frame. Always called after any PTE clearing.
///
/// Block mappings (2 MiB / 1 GiB) are reported as
/// [`EntryKind::Mapping`] with the appropriate region size.
pub fn tear_down_address_space<Control, Dealloc, PM>(
pml4_table: TPA<PgTableArray<PML4Table>>,
ctx: &mut WalkContext<PM>,
mut deallocator: F,
mut control: Control,
mut deallocator: Dealloc,
) -> Result<()>
where
PM: PageTableMapper,
F: FnMut(PhysMemoryRegion),
Control: FnMut(&TeardownEntry) -> TeardownAction,
Dealloc: FnMut(PhysMemoryRegion),
{
let mut cursor = 0;
@@ -73,7 +124,7 @@ where
let next_item = unsafe {
ctx.mapper.with_page_table(pml4_table, |pml4_tbl| {
let table = PML4Table::from_ptr(pml4_tbl);
for i in cursor..256 {
for i in cursor..512 {
if let Some(addr) = table.get_idx(i).next_table_address() {
return Some((i, addr));
}
@@ -84,15 +135,54 @@ where
match next_item {
Some((idx, pdp_addr)) => {
PDPTable::tear_down(pdp_addr, ctx, &mut deallocator)?;
deallocator(PhysMemoryRegion::new(pdp_addr.to_untyped(), PAGE_SIZE));
let entry_va = VA::from_value(idx << PML4E::MAP_SHIFT);
let entry = TeardownEntry {
kind: EntryKind::IntermediateTable,
depth: 1,
region: PhysMemoryRegion::new(pdp_addr.to_untyped(), PAGE_SIZE),
};
let action = control(&entry);
if !matches!(action, TeardownAction::Skip) {
PDPTable::tear_down(
pdp_addr,
ctx,
entry_va,
1,
&mut control,
&mut deallocator,
)?;
if matches!(action, TeardownAction::FreeAndClear) {
unsafe {
ctx.mapper.with_page_table(pml4_table, |pml4_tbl| {
PML4Table::from_ptr(pml4_tbl)
.to_raw_ptr()
.add(idx)
.write_volatile(0u64);
})?;
}
}
deallocator(entry.region);
}
cursor = idx + 1;
}
None => break,
}
}
deallocator(PhysMemoryRegion::new(pml4_table.to_untyped(), PAGE_SIZE));
// Offer the root table frame to the caller.
let root_entry = TeardownEntry {
kind: EntryKind::RootTable,
depth: 0,
region: PhysMemoryRegion::new(pml4_table.to_untyped(), PAGE_SIZE),
};
if !matches!(control(&root_entry), TeardownAction::Skip) {
deallocator(root_entry.region);
}
Ok(())
}
@@ -111,12 +201,26 @@ mod tests {
};
use std::collections::HashMap;
/// Tear down `root_table` freeing every frame the walker visits.
fn capture_freed_pages<PM: PageTableMapper>(
root_table: TPA<PgTableArray<PML4Table>>,
ctx: &mut WalkContext<PM>,
) -> HashMap<usize, usize> {
capture_freed_pages_filtered(root_table, ctx, |_| TeardownAction::Free)
}
/// Tear down `root_table` using a custom `control` closure.
fn capture_freed_pages_filtered<PM, Control>(
root_table: TPA<PgTableArray<PML4Table>>,
ctx: &mut WalkContext<PM>,
control: Control,
) -> HashMap<usize, usize>
where
PM: PageTableMapper,
Control: FnMut(&TeardownEntry) -> TeardownAction,
{
let mut freed_map = HashMap::new();
tear_down_address_space(root_table, ctx, |region| {
tear_down_address_space(root_table, ctx, control, |region| {
if freed_map
.insert(region.start_address().value(), region.size())
.is_some()
@@ -258,7 +362,12 @@ mod tests {
}
#[test]
fn teardown_does_not_free_kernel_tables() {
fn teardown_skip_mappings_by_va() {
// Demonstrates VA-based filtering of leaf mappings with the new API.
// Unlike the old API (where `entry.va` on IntermediateTable allowed
// skipping whole PML4 subtrees), only `Mapping` entries carry VA.
// Intermediate table frames for the filtered subtree are still freed;
// only the data frame itself is preserved.
let mut harness = TestHarness::new(15);
// Map a page in userspace (PML4 index 0).
@@ -267,9 +376,7 @@ mod tests {
.map_4k_pages(user_pa, 0x0000_0001_0000_0000, 1, PtePermissions::rw(false))
.unwrap();
// Map a page at a canonical kernel VA (PML4 index 256, bits [63:48] = 0xFFFF).
// This populates the upper half of the PML4, which tear_down_address_space
// must NOT touch.
// Map a page at a "high" VA (PML4 index 256, kernel half).
let kernel_pa = 0x2_0000;
harness
.map_4k_pages(
@@ -280,23 +387,26 @@ mod tests {
)
.unwrap();
let freed = capture_freed_pages(
// Skip Mapping entries in the upper half; free everything else.
let freed = capture_freed_pages_filtered(
harness.inner.root_table,
&mut harness.inner.create_walk_ctx(),
|entry| {
if let EntryKind::Mapping(virt) = &entry.kind {
if virt.start_address().value() >= (1usize << 47) {
return TeardownAction::Skip;
}
}
TeardownAction::Free
},
);
// Only the userspace hierarchy must be freed:
// 1 payload (user_pa)
// 1 PT table (userspace branch)
// 1 PD table (userspace branch)
// 1 PDP table (userspace branch)
// 1 PML4 root
assert_eq!(freed.len(), 5);
assert!(freed.contains_key(&user_pa));
// The kernel payload and its intermediate tables (PML4 index >= 256) must
// not be freed — they belong to the kernel and outlive this address space.
// The kernel data frame is preserved.
assert!(!freed.contains_key(&kernel_pa));
// The user data frame is freed.
assert!(freed.contains_key(&user_pa));
// user branch (data+PT+PD+PDP) + kernel branch (PT+PD+PDP, no data) + root = 8.
assert_eq!(freed.len(), 8);
}
#[test]
@@ -364,4 +474,221 @@ mod tests {
// Total: block + PDP + PML4 = 3 entries.
assert_eq!(freed.len(), 3);
}
#[test]
fn teardown_skip_data_pages() {
let mut harness = TestHarness::new(10);
let va = VA::from_value(0x1_0000_0000);
let pa = 0x8_0000;
harness
.map_4k_pages(pa, va.value(), 1, PtePermissions::ro(false))
.unwrap();
// Free only page table frames, not data pages (shared-mapping scenario).
let freed = capture_freed_pages_filtered(
harness.inner.root_table,
&mut harness.inner.create_walk_ctx(),
|entry| match entry.kind {
EntryKind::Mapping(_) => TeardownAction::Skip,
_ => TeardownAction::Free,
},
);
// Only the 4 page table frames are freed; data page is kept.
assert_eq!(freed.len(), 4);
assert!(!freed.contains_key(&pa));
assert!(freed.contains_key(&harness.inner.root_table.value()));
}
#[test]
fn teardown_free_and_clear_zeroes_ptes() {
// Map one page so each table frame has exactly one non-zero entry.
// After FreeAndClear teardown, every table frame must have all slots zero.
let mut harness = TestHarness::new(10);
harness
.map_4k_pages(0x8_0000, 0x1_0000_0000, 1, PtePermissions::ro(false))
.unwrap();
// Collect table frame PAs (root + all intermediates) during teardown.
let mut table_pas: Vec<usize> = vec![harness.inner.root_table.value()];
tear_down_address_space(
harness.inner.root_table,
&mut harness.inner.create_walk_ctx(),
|entry| {
if matches!(entry.kind, EntryKind::IntermediateTable) {
table_pas.push(entry.region.start_address().value());
}
TeardownAction::FreeAndClear
},
|_| {},
)
.unwrap();
// Every slot in every table frame must now be zero — the parent-slot
// clear for each level (and the leaf PTE clear) must have fired.
// In the PassthroughMapper harness, PAs are heap pointers, so the
// memory is still valid to read even after "freeing".
for &table_pa in &table_pas {
for idx in 0..512_usize {
let slot = unsafe { (table_pa as *const u64).add(idx).read_volatile() };
assert_eq!(
slot, 0,
"table frame {:#x} slot {idx} should be zeroed after FreeAndClear",
table_pa
);
}
}
}
#[test]
fn teardown_selective_clear() {
// Two 4K pages in the same PT (slots 0 and 1).
// FreeAndClear the first (VA 0x1_0000_0000), Free (no clear) the second.
// After teardown: PT slot 0 == 0, PT slot 1 != 0.
let mut harness = TestHarness::new(10);
harness
.map_4k_pages(0xA_0000, 0x1_0000_0000, 1, PtePermissions::ro(false))
.unwrap();
harness
.map_4k_pages(0xB_0000, 0x1_0000_1000, 1, PtePermissions::ro(false))
.unwrap();
let mut pt_pa: Option<usize> = None;
tear_down_address_space(
harness.inner.root_table,
&mut harness.inner.create_walk_ctx(),
|entry| {
// Capture the PT frame (depth-3 IntermediateTable).
if matches!(entry.kind, EntryKind::IntermediateTable) && entry.depth == 3 {
pt_pa = Some(entry.region.start_address().value());
}
if let EntryKind::Mapping(virt) = &entry.kind {
if virt.start_address().value() == 0x1_0000_0000 {
return TeardownAction::FreeAndClear;
}
if virt.start_address().value() == 0x1_0000_1000 {
return TeardownAction::Free;
}
}
TeardownAction::Free
},
|_| {},
)
.unwrap();
let pt_pa = pt_pa.expect("PT frame must have been observed");
// The cleared slot (slot 0 in PT) must be zero.
let slot0 = unsafe { (pt_pa as *const u64).add(0).read_volatile() };
assert_eq!(slot0, 0, "PT slot 0 should be zeroed after FreeAndClear");
// The un-cleared slot (slot 1 in PT) must remain non-zero.
let slot1 = unsafe { (pt_pa as *const u64).add(1).read_volatile() };
assert_ne!(slot1, 0, "PT slot 1 should remain non-zero after Free");
}
#[test]
fn teardown_skip_root_table() {
// RootTable + Skip must not free the root, but must still free everything else.
let mut harness = TestHarness::new(10);
harness
.map_4k_pages(0x8_0000, 0x1_0000_0000, 1, PtePermissions::ro(false))
.unwrap();
let root_pa = harness.inner.root_table.value();
let freed = capture_freed_pages_filtered(
harness.inner.root_table,
&mut harness.inner.create_walk_ctx(),
|entry| match entry.kind {
EntryKind::RootTable => TeardownAction::Skip,
_ => TeardownAction::Free,
},
);
assert!(
!freed.contains_key(&root_pa),
"Root table must not be freed"
);
assert!(freed.contains_key(&0x8_0000), "Data page must be freed");
// PDP + PDE + PTE frame + data page = 4 freed (root skipped).
assert_eq!(freed.len(), 4);
}
#[test]
fn teardown_skip_entire_subtree() {
// Map one page; Skip the depth-1 IntermediateTable (PDP frame).
// Verify: (a) data page and all descendant frames are not freed,
// (b) control is NOT called for descendants (no spurious recursion).
let mut harness = TestHarness::new(10);
let pa = 0xA_0000;
harness
.map_4k_pages(pa, 0x1_0000_0000, 1, PtePermissions::ro(false))
.unwrap();
let mut control_calls = 0usize;
let freed = capture_freed_pages_filtered(
harness.inner.root_table,
&mut harness.inner.create_walk_ctx(),
|entry| {
control_calls += 1;
if matches!(entry.kind, EntryKind::IntermediateTable) && entry.depth == 1 {
TeardownAction::Skip
} else {
TeardownAction::Free
}
},
);
assert!(
!freed.contains_key(&pa),
"Skipped subtree data page must not be freed"
);
// Only the root table is freed; the rest was skipped.
assert_eq!(freed.len(), 1);
// control is called for root (1) + depth-1 entry (1) = 2 total.
assert_eq!(
control_calls, 2,
"control must not recurse into the skipped subtree"
);
}
#[test]
fn teardown_entry_metadata() {
// For a single 4K page at VA 0x1_0000_0000 the walker must report
// exactly these (kind, depth, va) tuples, in any order.
let mut harness = TestHarness::new(10);
harness
.map_4k_pages(0x8_0000, 0x1_0000_0000, 1, PtePermissions::ro(false))
.unwrap();
let mut entries: Vec<(&'static str, u8, Option<usize>)> = Vec::new();
capture_freed_pages_filtered(
harness.inner.root_table,
&mut harness.inner.create_walk_ctx(),
|e| {
let (kind, va) = match &e.kind {
EntryKind::Mapping(virt) => ("Mapping", Some(virt.start_address().value())),
EntryKind::IntermediateTable => ("IntermediateTable", None),
EntryKind::RootTable => ("RootTable", None),
};
entries.push((kind, e.depth, va));
TeardownAction::Free
},
);
// Sort by (depth, va, kind) for a deterministic comparison.
entries.sort_unstable_by_key(|&(k, d, v)| (d, v, k));
assert_eq!(
entries,
vec![
("RootTable", 0, None),
("IntermediateTable", 1, None), // PDP frame
("IntermediateTable", 2, None), // PD frame
("IntermediateTable", 3, None), // PT frame
("Mapping", 3, Some(0x1_0000_0000)), // mapped page
]
);
}
}

View File

@@ -10,7 +10,7 @@ use permissions::PtePermissions;
pub mod permissions;
pub mod smalloc_page_allocator;
pub(crate) mod tear_down;
pub mod tear_down;
pub mod walk;
#[cfg(test)]

View File

@@ -1,63 +1,140 @@
//! Page-table tear-down and deallocation.
use crate::memory::{
PAGE_SIZE,
address::{PA, TPA},
address::{PA, TPA, VA},
paging::{
PaMapper, PageTableEntry, PageTableMapper, PgTable, PgTableArray, TableMapper,
TableMapperTable, walk::WalkContext,
},
region::PhysMemoryRegion,
region::{PhysMemoryRegion, VirtMemoryRegion},
};
enum EntryKind<NL: PgTable> {
/// Identifies the kind of physical frame a [`TeardownEntry`] describes.
pub enum EntryKind {
/// A leaf page mapping.
///
/// The [`VirtMemoryRegion`] covers the entire mapping virtual region.
Mapping(VirtMemoryRegion),
/// An intermediate page table frame (not the root).
IntermediateTable,
/// The root page table frame (PML4 / L0).
RootTable,
}
/// Describes a single physical frame encountered during a page table tear-down.
pub struct TeardownEntry {
/// What kind of frame this is.
///
/// For [`EntryKind::Mapping`] entries the virtual region is embedded in the
/// variant. [`EntryKind::IntermediateTable`] and [`EntryKind::RootTable`]
/// entries describe infrastructure frames and carry no virtual-address
/// information.
pub kind: EntryKind,
/// Depth in the page table tree. Root = 0, incrementing toward leaves.
///
/// Intermediate table frames and the leaf mappings within the same table
/// share the same depth value; [`EntryKind`] disambiguates.
pub depth: u8,
/// The physical memory region of this frame.
pub region: PhysMemoryRegion,
}
/// The action the walker should take for a [`TeardownEntry`].
///
/// The `control` closure returns this to drive walker behaviour. The actual
/// freeing is always performed by the separate `deallocator` closure, called
/// by the walker in the correct order (after clearing if requested).
#[allow(missing_docs)]
pub enum TeardownAction {
/// Recurse into this subtree (for [`EntryKind::IntermediateTable`] entries)
/// and then call `deallocator` for its frame. For leaf entries, just call
/// `deallocator`.
Free,
/// Like [`TeardownAction::Free`], but also zero the page table entry in the
/// parent table *before* calling `deallocator`. The zeroing uses
/// `write_volatile` and happens before the frame is released to the
/// allocator, closing the stale-PTE window.
FreeAndClear,
/// Skip this entry entirely. For [`EntryKind::IntermediateTable`] entries,
/// the subtree is not walked. `deallocator` is not called.
Skip,
}
// Walker-internal discriminant used while scanning a table.
enum LocalEntryKind<NL: PgTable> {
Table(TPA<PgTableArray<NL>>),
Block(PA),
}
/// Trait implemented by each page table level that can be recursively walked
/// during a tear-down.
pub trait RecursiveTeardownWalker: PgTable + Sized {
fn tear_down<F, PM>(
/// Walk all mappings in `table_pa` and invoke `control` / `deallocator`
/// for each frame found.
///
/// - `base_va`: the virtual address of the first byte covered by
/// `table_pa`'s first entry.
/// - `depth`: the tree depth of `table_pa` itself (root's children = 1).
/// - `control`: called for each entry *before* any recursion or
/// deallocation. Returns the action the walker should take. Must not
/// free physical memory — use `deallocator` for that.
/// - `deallocator`: called by the walker (never inside a live
/// `with_page_table` window) to physically release a frame.
fn tear_down<Control, Dealloc, PM>(
table_pa: TPA<PgTableArray<Self>>,
ctx: &mut WalkContext<PM>,
deallocator: &mut F,
base_va: VA,
depth: u8,
control: &mut Control,
deallocator: &mut Dealloc,
) -> crate::error::Result<()>
where
PM: PageTableMapper,
F: FnMut(PhysMemoryRegion);
Control: FnMut(&TeardownEntry) -> TeardownAction,
Dealloc: FnMut(PhysMemoryRegion);
}
/// Blanket impl for intermediate table levels whose descriptors support both
/// table and block mappings (e.g. PDE/PDPE on x86_64, L1/L2 on arm64).
/// table and block mappings.
///
/// Root table levels (PML4, L0) are excluded because their descriptors cannot
/// encode block mappings — those are handled explicitly by `tear_down_address_space`.
/// encode block mappings — those are handled explicitly by
/// `tear_down_address_space`.
impl<T> RecursiveTeardownWalker for T
where
T: TableMapperTable,
T::Descriptor: PaMapper,
<T::Descriptor as TableMapper>::NextLevel: RecursiveTeardownWalker,
{
fn tear_down<F, PM>(
fn tear_down<Control, Dealloc, PM>(
table_pa: TPA<PgTableArray<Self>>,
ctx: &mut WalkContext<PM>,
deallocator: &mut F,
base_va: VA,
depth: u8,
control: &mut Control,
deallocator: &mut Dealloc,
) -> crate::error::Result<()>
where
PM: PageTableMapper,
F: FnMut(PhysMemoryRegion),
Control: FnMut(&TeardownEntry) -> TeardownAction,
Dealloc: FnMut(PhysMemoryRegion),
{
let mut cursor = 0;
loop {
let next_item: Option<(usize, EntryKind<<T::Descriptor as TableMapper>::NextLevel>)> = unsafe {
let next_item: Option<(
usize,
LocalEntryKind<<T::Descriptor as TableMapper>::NextLevel>,
)> = unsafe {
ctx.mapper.with_page_table(table_pa, |pgtable| {
let table = Self::from_ptr(pgtable);
for i in cursor..<T as PgTable>::DESCRIPTORS_PER_PAGE {
let desc = table.get_idx(i);
if let Some(addr) = desc.next_table_address() {
return Some((i, EntryKind::Table(addr)));
return Some((i, LocalEntryKind::Table(addr)));
} else if let Some(pa) = desc.mapped_address() {
return Some((i, EntryKind::Block(pa)));
return Some((i, LocalEntryKind::Block(pa)));
}
}
None
@@ -65,24 +142,76 @@ where
};
match next_item {
Some((found_idx, EntryKind::Table(table_addr))) => {
// Recurse into the child table, then free its frame.
<T::Descriptor as TableMapper>::NextLevel::tear_down(
table_addr,
ctx,
deallocator,
)?;
deallocator(PhysMemoryRegion::new(table_addr.to_untyped(), PAGE_SIZE));
Some((found_idx, LocalEntryKind::Table(table_addr))) => {
let entry_va = base_va
.add_bytes(found_idx << <T::Descriptor as PageTableEntry>::MAP_SHIFT);
let child_depth = depth + 1;
let entry = TeardownEntry {
kind: EntryKind::IntermediateTable,
depth: child_depth,
region: PhysMemoryRegion::new(table_addr.to_untyped(), PAGE_SIZE),
};
let action = control(&entry);
if !matches!(action, TeardownAction::Skip) {
// Recurse into the child table first.
<T::Descriptor as TableMapper>::NextLevel::tear_down(
table_addr,
ctx,
entry_va,
child_depth,
control,
deallocator,
)?;
// Clear the PTE in this table before releasing the child
// frame, ensuring no stale translations outlive the frame.
if matches!(action, TeardownAction::FreeAndClear) {
unsafe {
ctx.mapper.with_page_table(table_pa, |pgtable| {
Self::from_ptr(pgtable)
.to_raw_ptr()
.add(found_idx)
.write_volatile(0u64);
})?;
}
}
deallocator(entry.region);
}
cursor = found_idx + 1;
}
Some((found_idx, EntryKind::Block(pa))) => {
// Block frame: size is the coverage of one entry at this level.
deallocator(PhysMemoryRegion::new(
pa,
1 << <T::Descriptor as PageTableEntry>::MAP_SHIFT,
));
Some((found_idx, LocalEntryKind::Block(pa))) => {
let entry_va = base_va
.add_bytes(found_idx << <T::Descriptor as PageTableEntry>::MAP_SHIFT);
let map_size = 1 << <T::Descriptor as PageTableEntry>::MAP_SHIFT;
let entry = TeardownEntry {
kind: EntryKind::Mapping(VirtMemoryRegion::new(entry_va, map_size)),
depth,
region: PhysMemoryRegion::new(pa, map_size),
};
let action = control(&entry);
if !matches!(action, TeardownAction::Skip) {
// Clear the block PTE before releasing the frame.
if matches!(action, TeardownAction::FreeAndClear) {
unsafe {
ctx.mapper.with_page_table(table_pa, |pgtable| {
Self::from_ptr(pgtable)
.to_raw_ptr()
.add(found_idx)
.write_volatile(0u64);
})?;
}
}
deallocator(entry.region);
}
cursor = found_idx + 1;
}
None => break,
}
}

View File

@@ -23,7 +23,7 @@ use libkernel::{
page::PageFrame,
paging::{
PaMapper, PageAllocator, PageTableEntry, PgTableArray, permissions::PtePermissions,
walk::WalkContext,
tear_down::TeardownAction, walk::WalkContext,
},
proc_vm::address_space::{PageInfo, UserAddressSpace},
region::{PhysMemoryRegion, VirtMemoryRegion},
@@ -209,9 +209,14 @@ impl Drop for Arm64ProcessAddressSpace {
invalidator: &AllEl0TlbInvalidator::new(),
};
if tear_down_address_space(self.l0_table, &mut walk_ctx, |region| unsafe {
PAGE_ALLOC.get().unwrap().alloc_from_region(region);
})
if tear_down_address_space(
self.l0_table,
&mut walk_ctx,
|_| TeardownAction::Free,
|region| unsafe {
PAGE_ALLOC.get().unwrap().alloc_from_region(region);
},
)
.is_err()
{
warn!("Address space tear down failed. Probable memory leakage!");