From 72ef61fb10ff1d4b00036d8a2fdc07222dc0b51c Mon Sep 17 00:00:00 2001 From: Matthew Leach Date: Fri, 1 May 2026 15:26:58 +0100 Subject: [PATCH] libkernel: pg_tear_down: add more flexibility The current tear down implementation assumes that all data mappings & intermediate tables need to be free'd. That might not provide enough flexibility for all tear-down scenarios. Provide a more flexible API, passing current walk state into a control function which makes a descision regarding tear-down and recursion. --- .../src/arch/arm64/memory/pg_tear_down.rs | 391 ++++++++++++++-- .../src/arch/x86_64/memory/pg_tear_down.rs | 431 +++++++++++++++--- libkernel/src/memory/paging/mod.rs | 2 +- libkernel/src/memory/paging/tear_down.rs | 189 ++++++-- src/arch/arm64/memory/address_space.rs | 13 +- 5 files changed, 905 insertions(+), 121 deletions(-) diff --git a/libkernel/src/arch/arm64/memory/pg_tear_down.rs b/libkernel/src/arch/arm64/memory/pg_tear_down.rs index 7cfc72f..997973d 100644 --- a/libkernel/src/arch/arm64/memory/pg_tear_down.rs +++ b/libkernel/src/arch/arm64/memory/pg_tear_down.rs @@ -1,67 +1,118 @@ //! Utilities for tearing down and freeing page table hierarchies. +use super::pg_descriptors::L0Descriptor; use super::pg_tables::{L0Table, L1Table, L3Table}; use crate::error::Result; -use crate::memory::region::PhysMemoryRegion; +use crate::memory::region::{PhysMemoryRegion, VirtMemoryRegion}; use crate::memory::{ PAGE_SIZE, - address::TPA, + address::{TPA, VA}, paging::{ - PaMapper, PageTableMapper, PgTable, PgTableArray, TableMapper, - tear_down::RecursiveTeardownWalker, walk::WalkContext, + PaMapper, PageTableEntry, PageTableMapper, PgTable, PgTableArray, TableMapper, + tear_down::{EntryKind, RecursiveTeardownWalker, TeardownAction, TeardownEntry}, + walk::WalkContext, }, }; // Implementation for L3 (Leaf Table) impl RecursiveTeardownWalker for L3Table { - fn tear_down( + fn tear_down( table_pa: TPA>, ctx: &mut WalkContext, - deallocator: &mut F, + base_va: VA, + depth: u8, + control: &mut Control, + deallocator: &mut Dealloc, ) -> Result<()> where PM: PageTableMapper, - F: FnMut(PhysMemoryRegion), + Control: FnMut(&TeardownEntry) -> TeardownAction, + Dealloc: FnMut(PhysMemoryRegion), { - unsafe { - ctx.mapper.with_page_table(table_pa, |pgtable| { - let table = L3Table::from_ptr(pgtable); + let mut cursor = 0; - for idx in 0..Self::DESCRIPTORS_PER_PAGE { - let desc = table.get_idx(idx); - - if let Some(addr) = desc.mapped_address() { - deallocator(PhysMemoryRegion::new(addr, PAGE_SIZE)); + loop { + let next_item = unsafe { + ctx.mapper.with_page_table(table_pa, |pgtable| { + let table = L3Table::from_ptr(pgtable); + for i in cursor..Self::DESCRIPTORS_PER_PAGE { + if let Some(addr) = table.get_idx(i).mapped_address() { + return Some((i, addr)); + } } + None + })? + }; + + match next_item { + Some((found_idx, addr)) => { + let entry_va = + base_va.add_bytes(found_idx << ::Descriptor::MAP_SHIFT); + let entry = TeardownEntry { + kind: EntryKind::Mapping(VirtMemoryRegion::new(entry_va, PAGE_SIZE)), + depth, + region: PhysMemoryRegion::new(addr, PAGE_SIZE), + }; + let action = control(&entry); + + if !matches!(action, TeardownAction::Skip) { + // Clear the leaf PTE before releasing the data frame. + if matches!(action, TeardownAction::FreeAndClear) { + unsafe { + ctx.mapper.with_page_table(table_pa, |pgtable| { + L3Table::from_ptr(pgtable) + .to_raw_ptr() + .add(found_idx) + .write_volatile(0u64); + })?; + } + } + deallocator(entry.region); + } + + cursor = found_idx + 1; } - })?; + None => break, + } } Ok(()) } } -/// Walks the page table hierarchy for a given address space and applies a -/// freeing closure to every allocated frame. +/// Walks the page table hierarchy for a given address space and invokes +/// `control` and `deallocator` for every frame encountered. /// /// # Parameters -/// - `l0_table`: The physical address of the root (L0) page table. -/// - `ctx`: The context for the operation (mapper). -/// - `deallocator`: A closure called for every physical address that needs freeing, -/// along with the size of the allocation in bytes: -/// 1. The User Data frames (Payload). -/// 2. The L1, L2, and L3 Page Table frames. -/// 3. The L0 Root Table frame. -/// -/// Note; Block mappings (2MiB / 1GiB) are freed with their actual size. -pub fn tear_down_address_space( +/// - `l0_table`: Physical address of the root (L0) page table. +/// - `ctx`: Walk context (mapper + TLB invalidator). +/// - `control`: Called for every entry *before* any recursion or deallocation. +/// Returns the [`TeardownAction`] that drives walker behaviour: +/// - [`TeardownAction::Free`] — recurse (for tables) then `deallocator`. +/// - [`TeardownAction::FreeAndClear`] — recurse, zero the PTE, then `deallocator`. +/// - [`TeardownAction::Skip`] — skip the subtree entirely. +/// +/// `control` must **not** free physical memory itself. +/// +/// [`EntryKind::Mapping`] entries carry their virtual address inside the +/// [`VirtMemoryRegion`] associated value. [`EntryKind::IntermediateTable`] +/// and [`EntryKind::RootTable`] entries carry no virtual-address information. +/// +/// - `deallocator`: Called by the walker (outside any live `with_page_table` +/// window) to release a frame. Always called after any PTE clearing. +/// +/// Block mappings (2 MiB / 1 GiB) are reported as +/// [`EntryKind::Mapping`] with the appropriate region size. +pub fn tear_down_address_space( l0_table: TPA>, ctx: &mut WalkContext, - mut deallocator: F, + mut control: Control, + mut deallocator: Dealloc, ) -> Result<()> where PM: PageTableMapper, - F: FnMut(PhysMemoryRegion), + Control: FnMut(&TeardownEntry) -> TeardownAction, + Dealloc: FnMut(PhysMemoryRegion), { // L0Descriptor cannot encode block mappings, so iterate entries explicitly // rather than relying on the blanket RecursiveTeardownWalker impl. @@ -82,15 +133,47 @@ where match next_item { Some((idx, l1_addr)) => { - L1Table::tear_down(l1_addr, ctx, &mut deallocator)?; - deallocator(PhysMemoryRegion::new(l1_addr.to_untyped(), PAGE_SIZE)); + let entry_va = VA::from_value(idx << L0Descriptor::MAP_SHIFT); + let entry = TeardownEntry { + kind: EntryKind::IntermediateTable, + depth: 1, + region: PhysMemoryRegion::new(l1_addr.to_untyped(), PAGE_SIZE), + }; + let action = control(&entry); + + if !matches!(action, TeardownAction::Skip) { + L1Table::tear_down(l1_addr, ctx, entry_va, 1, &mut control, &mut deallocator)?; + + if matches!(action, TeardownAction::FreeAndClear) { + unsafe { + ctx.mapper.with_page_table(l0_table, |pgtable| { + L0Table::from_ptr(pgtable) + .to_raw_ptr() + .add(idx) + .write_volatile(0u64); + })?; + } + } + + deallocator(entry.region); + } + cursor = idx + 1; } None => break, } } - deallocator(PhysMemoryRegion::new(l0_table.to_untyped(), PAGE_SIZE)); + // Offer the root table frame to the caller. + let root_entry = TeardownEntry { + kind: EntryKind::RootTable, + depth: 0, + region: PhysMemoryRegion::new(l0_table.to_untyped(), PAGE_SIZE), + }; + if !matches!(control(&root_entry), TeardownAction::Skip) { + deallocator(root_entry.region); + } + Ok(()) } @@ -109,12 +192,26 @@ mod tests { }; use std::collections::HashMap; + /// Tear down `l0_table` freeing every frame the walker visits. fn capture_freed_pages( l0_table: TPA>, ctx: &mut WalkContext, ) -> HashMap { + capture_freed_pages_filtered(l0_table, ctx, |_| TeardownAction::Free) + } + + /// Tear down `l0_table` using a custom `control` closure. + fn capture_freed_pages_filtered( + l0_table: TPA>, + ctx: &mut WalkContext, + control: Control, + ) -> HashMap + where + PM: PageTableMapper, + Control: FnMut(&TeardownEntry) -> TeardownAction, + { let mut freed_map = HashMap::new(); - tear_down_address_space(l0_table, ctx, |region| { + tear_down_address_space(l0_table, ctx, control, |region| { if freed_map .insert(region.start_address().value(), region.size()) .is_some() @@ -320,4 +417,230 @@ mod tests { // Total: block + L1 + L0 = 3 entries. assert_eq!(freed.len(), 3); } + + #[test] + fn teardown_skip_data_pages() { + let mut harness = TestHarness::new(10); + let va = VA::from_value(0x1_0000_0000); + let pa = 0x8_0000; + + harness + .map_4k_pages(pa, va.value(), 1, PtePermissions::ro(false)) + .unwrap(); + + // Free only page table frames, not data pages (shared-mapping scenario). + let freed = capture_freed_pages_filtered( + harness.inner.root_table, + &mut harness.inner.create_walk_ctx(), + |entry| match entry.kind { + EntryKind::Mapping(_) => TeardownAction::Skip, + _ => TeardownAction::Free, + }, + ); + + // Only the 4 page table frames are freed; data page is kept. + assert_eq!(freed.len(), 4); + assert!(!freed.contains_key(&pa)); + assert!(freed.contains_key(&harness.inner.root_table.value())); + } + + // ----------------------------------------------------------------------- + // FreeAndClear tests + // ----------------------------------------------------------------------- + + #[test] + fn teardown_free_and_clear_zeroes_ptes() { + // Map one page so each table frame has exactly one non-zero entry. + // After FreeAndClear teardown, every table frame must have all slots zero. + let mut harness = TestHarness::new(10); + harness + .map_4k_pages(0x8_0000, 0x1_0000_0000, 1, PtePermissions::ro(false)) + .unwrap(); + + let mut table_pas: Vec = vec![harness.inner.root_table.value()]; + tear_down_address_space( + harness.inner.root_table, + &mut harness.inner.create_walk_ctx(), + |entry| { + if matches!(entry.kind, EntryKind::IntermediateTable) { + table_pas.push(entry.region.start_address().value()); + } + TeardownAction::FreeAndClear + }, + |_| {}, + ) + .unwrap(); + + // Every slot in every table frame must now be zero. + // In the PassthroughMapper harness, PAs are heap pointers, so the + // memory is still valid to read even after "freeing". + for &table_pa in &table_pas { + for idx in 0..512_usize { + let slot = unsafe { (table_pa as *const u64).add(idx).read_volatile() }; + assert_eq!( + slot, 0, + "table frame {:#x} slot {idx} should be zeroed after FreeAndClear", + table_pa + ); + } + } + } + + #[test] + fn teardown_selective_clear() { + // Two 4K pages in the same L3 table (slots 0 and 1). + // FreeAndClear the first (VA 0x1_0000_0000), Free (no clear) the second. + // After teardown: L3 slot 0 == 0, L3 slot 1 != 0. + let mut harness = TestHarness::new(10); + harness + .map_4k_pages(0xA_0000, 0x1_0000_0000, 1, PtePermissions::ro(false)) + .unwrap(); + harness + .map_4k_pages(0xB_0000, 0x1_0000_1000, 1, PtePermissions::ro(false)) + .unwrap(); + + let mut l3_pa: Option = None; + tear_down_address_space( + harness.inner.root_table, + &mut harness.inner.create_walk_ctx(), + |entry| { + // Capture the L3 frame (depth-3 IntermediateTable). + if matches!(entry.kind, EntryKind::IntermediateTable) && entry.depth == 3 { + l3_pa = Some(entry.region.start_address().value()); + } + if let EntryKind::Mapping(virt) = &entry.kind { + if virt.start_address().value() == 0x1_0000_0000 { + return TeardownAction::FreeAndClear; + } + if virt.start_address().value() == 0x1_0000_1000 { + return TeardownAction::Free; + } + } + TeardownAction::Free + }, + |_| {}, + ) + .unwrap(); + + let l3_pa = l3_pa.expect("L3 frame must have been observed"); + + // The cleared slot (slot 0 in L3) must be zero. + let slot0 = unsafe { (l3_pa as *const u64).add(0).read_volatile() }; + assert_eq!(slot0, 0, "L3 slot 0 should be zeroed after FreeAndClear"); + + // The un-cleared slot (slot 1 in L3) must remain non-zero. + let slot1 = unsafe { (l3_pa as *const u64).add(1).read_volatile() }; + assert_ne!(slot1, 0, "L3 slot 1 should remain non-zero after Free"); + } + + // ----------------------------------------------------------------------- + // Skip behaviour tests + // ----------------------------------------------------------------------- + + #[test] + fn teardown_skip_root_table() { + // RootTable + Skip must not free the root, but must still free everything else. + let mut harness = TestHarness::new(10); + harness + .map_4k_pages(0x8_0000, 0x1_0000_0000, 1, PtePermissions::ro(false)) + .unwrap(); + + let root_pa = harness.inner.root_table.value(); + let freed = capture_freed_pages_filtered( + harness.inner.root_table, + &mut harness.inner.create_walk_ctx(), + |entry| match entry.kind { + EntryKind::RootTable => TeardownAction::Skip, + _ => TeardownAction::Free, + }, + ); + + assert!( + !freed.contains_key(&root_pa), + "Root table must not be freed" + ); + assert!(freed.contains_key(&0x8_0000), "Data page must be freed"); + // L1 + L2 + L3 frame + data page = 4 freed (root skipped). + assert_eq!(freed.len(), 4); + } + + #[test] + fn teardown_skip_entire_subtree() { + // Map one page; Skip the depth-1 IntermediateTable (L1 frame). + // Verify: (a) data page and all descendant frames are not freed, + // (b) control is NOT called for descendants (no spurious recursion). + let mut harness = TestHarness::new(10); + let pa = 0xA_0000; + harness + .map_4k_pages(pa, 0x1_0000_0000, 1, PtePermissions::ro(false)) + .unwrap(); + + let mut control_calls = 0usize; + let freed = capture_freed_pages_filtered( + harness.inner.root_table, + &mut harness.inner.create_walk_ctx(), + |entry| { + control_calls += 1; + if matches!(entry.kind, EntryKind::IntermediateTable) && entry.depth == 1 { + TeardownAction::Skip + } else { + TeardownAction::Free + } + }, + ); + + assert!( + !freed.contains_key(&pa), + "Skipped subtree data page must not be freed" + ); + // Only the root table is freed; the rest was skipped. + assert_eq!(freed.len(), 1); + // control is called for root (1) + depth-1 entry (1) = 2 total. + assert_eq!( + control_calls, 2, + "control must not recurse into the skipped subtree" + ); + } + + // ----------------------------------------------------------------------- + // Entry metadata test + // ----------------------------------------------------------------------- + + #[test] + fn teardown_entry_metadata() { + // For a single 4K page at VA 0x1_0000_0000 the walker must report + // exactly these (kind, depth, va) tuples, in any order. + let mut harness = TestHarness::new(10); + harness + .map_4k_pages(0x8_0000, 0x1_0000_0000, 1, PtePermissions::ro(false)) + .unwrap(); + + let mut entries: Vec<(&'static str, u8, Option)> = Vec::new(); + capture_freed_pages_filtered( + harness.inner.root_table, + &mut harness.inner.create_walk_ctx(), + |e| { + let (kind, va) = match &e.kind { + EntryKind::Mapping(virt) => ("Mapping", Some(virt.start_address().value())), + EntryKind::IntermediateTable => ("IntermediateTable", None), + EntryKind::RootTable => ("RootTable", None), + }; + entries.push((kind, e.depth, va)); + TeardownAction::Free + }, + ); + + entries.sort_unstable_by_key(|&(k, d, v)| (d, v, k)); + + assert_eq!( + entries, + vec![ + ("RootTable", 0, None), + ("IntermediateTable", 1, None), // L1 frame + ("IntermediateTable", 2, None), // L2 frame + ("IntermediateTable", 3, None), // L3 frame + ("Mapping", 3, Some(0x1_0000_0000)), // mapped page + ] + ); + } } diff --git a/libkernel/src/arch/x86_64/memory/pg_tear_down.rs b/libkernel/src/arch/x86_64/memory/pg_tear_down.rs index 0db69e8..43f7538 100644 --- a/libkernel/src/arch/x86_64/memory/pg_tear_down.rs +++ b/libkernel/src/arch/x86_64/memory/pg_tear_down.rs @@ -1,71 +1,122 @@ //! Utilities for tearing down and freeing page table hierarchies. +use super::pg_descriptors::PML4E; use super::pg_tables::{PDPTable, PML4Table, PTable}; use crate::error::Result; use crate::memory::paging::TableMapper; -use crate::memory::region::PhysMemoryRegion; +use crate::memory::region::{PhysMemoryRegion, VirtMemoryRegion}; use crate::memory::{ PAGE_SIZE, - address::TPA, + address::{TPA, VA}, paging::{ - PaMapper, PageTableMapper, PgTable, PgTableArray, tear_down::RecursiveTeardownWalker, + PaMapper, PageTableEntry, PageTableMapper, PgTable, PgTableArray, + tear_down::{EntryKind, RecursiveTeardownWalker, TeardownAction, TeardownEntry}, walk::WalkContext, }, }; // Implementation for PTable (Leaf Table) impl RecursiveTeardownWalker for PTable { - fn tear_down( + fn tear_down( table_pa: TPA>, ctx: &mut WalkContext, - deallocator: &mut F, + base_va: VA, + depth: u8, + control: &mut Control, + deallocator: &mut Dealloc, ) -> Result<()> where PM: PageTableMapper, - F: FnMut(PhysMemoryRegion), + Control: FnMut(&TeardownEntry) -> TeardownAction, + Dealloc: FnMut(PhysMemoryRegion), { - unsafe { - ctx.mapper.with_page_table(table_pa, |pgtable| { - let table = PTable::from_ptr(pgtable); + let mut cursor = 0; - for idx in 0..Self::DESCRIPTORS_PER_PAGE { - let desc = table.get_idx(idx); - - if let Some(addr) = desc.mapped_address() { - deallocator(PhysMemoryRegion::new(addr, PAGE_SIZE)); + loop { + let next_item = unsafe { + ctx.mapper.with_page_table(table_pa, |pgtable| { + let table = PTable::from_ptr(pgtable); + for i in cursor..Self::DESCRIPTORS_PER_PAGE { + if let Some(addr) = table.get_idx(i).mapped_address() { + return Some((i, addr)); + } } + None + })? + }; + + match next_item { + Some((found_idx, addr)) => { + let entry_va = + base_va.add_bytes(found_idx << ::Descriptor::MAP_SHIFT); + let entry = TeardownEntry { + kind: EntryKind::Mapping(VirtMemoryRegion::new(entry_va, PAGE_SIZE)), + depth, + region: PhysMemoryRegion::new(addr, PAGE_SIZE), + }; + let action = control(&entry); + + if !matches!(action, TeardownAction::Skip) { + // Clear the leaf PTE before releasing the data frame. + if matches!(action, TeardownAction::FreeAndClear) { + unsafe { + ctx.mapper.with_page_table(table_pa, |pgtable| { + PTable::from_ptr(pgtable) + .to_raw_ptr() + .add(found_idx) + .write_volatile(0u64); + })?; + } + } + deallocator(entry.region); + } + + cursor = found_idx + 1; } - })?; + None => break, + } } Ok(()) } } -/// Walks the page table hierarchy for a given address space and applies a -/// freeing closure to every lower-half canonical address allocated frame. +/// Walks the page table hierarchy for a given address space and invokes +/// `control` and `deallocator` for every frame encountered. /// /// # Parameters -/// - `pml4_table`: The physical address of the root (PML4) page table. -/// - `ctx`: The context for the operation (mapper). -/// - `deallocator`: A closure called for every physical address that needs freeing. -/// This includes: -/// 1. The User Data frames (Payload). -/// 2. The PDP, PD, and PTable Page Table frames. -/// 3. The PML4 Root Table frame. -/// -/// Block mappings (2MiB / 1GiB) are freed with their actual size. +/// - `pml4_table`: Physical address of the root (PML4) page table. +/// - `ctx`: Walk context (mapper + TLB invalidator). +/// - `control`: Called for every entry *before* any recursion or deallocation. +/// Returns the [`TeardownAction`] that drives walker behaviour: +/// - [`TeardownAction::Free`] — recurse (for tables) then `deallocator`. +/// - [`TeardownAction::FreeAndClear`] — recurse, zero the PTE, then `deallocator`. +/// - [`TeardownAction::Skip`] — skip the subtree entirely. /// -/// *Note* PDPTables which are pointed to by PML4 indexes [256-511] are not -/// free'd. -pub fn tear_down_address_space( +/// `control` must **not** free physical memory itself. +/// +/// [`EntryKind::Mapping`] entries carry their virtual address inside the +/// [`VirtMemoryRegion`] associated value. [`EntryKind::IntermediateTable`] +/// and [`EntryKind::RootTable`] entries carry no virtual-address +/// information. To filter upper-half x86_64 entries (e.g. kernel mappings), +/// inspect the VA of `Mapping` entries; note that this cannot short-circuit +/// an entire PML4 subtree, only its leaf entries. +/// +/// - `deallocator`: Called by the walker (outside any live `with_page_table` +/// window) to release a frame. Always called after any PTE clearing. +/// +/// Block mappings (2 MiB / 1 GiB) are reported as +/// [`EntryKind::Mapping`] with the appropriate region size. +pub fn tear_down_address_space( pml4_table: TPA>, ctx: &mut WalkContext, - mut deallocator: F, + mut control: Control, + mut deallocator: Dealloc, ) -> Result<()> where PM: PageTableMapper, - F: FnMut(PhysMemoryRegion), + Control: FnMut(&TeardownEntry) -> TeardownAction, + Dealloc: FnMut(PhysMemoryRegion), { let mut cursor = 0; @@ -73,7 +124,7 @@ where let next_item = unsafe { ctx.mapper.with_page_table(pml4_table, |pml4_tbl| { let table = PML4Table::from_ptr(pml4_tbl); - for i in cursor..256 { + for i in cursor..512 { if let Some(addr) = table.get_idx(i).next_table_address() { return Some((i, addr)); } @@ -84,15 +135,54 @@ where match next_item { Some((idx, pdp_addr)) => { - PDPTable::tear_down(pdp_addr, ctx, &mut deallocator)?; - deallocator(PhysMemoryRegion::new(pdp_addr.to_untyped(), PAGE_SIZE)); + let entry_va = VA::from_value(idx << PML4E::MAP_SHIFT); + let entry = TeardownEntry { + kind: EntryKind::IntermediateTable, + depth: 1, + region: PhysMemoryRegion::new(pdp_addr.to_untyped(), PAGE_SIZE), + }; + let action = control(&entry); + + if !matches!(action, TeardownAction::Skip) { + PDPTable::tear_down( + pdp_addr, + ctx, + entry_va, + 1, + &mut control, + &mut deallocator, + )?; + + if matches!(action, TeardownAction::FreeAndClear) { + unsafe { + ctx.mapper.with_page_table(pml4_table, |pml4_tbl| { + PML4Table::from_ptr(pml4_tbl) + .to_raw_ptr() + .add(idx) + .write_volatile(0u64); + })?; + } + } + + deallocator(entry.region); + } + cursor = idx + 1; } None => break, } } - deallocator(PhysMemoryRegion::new(pml4_table.to_untyped(), PAGE_SIZE)); + // Offer the root table frame to the caller. + let root_entry = TeardownEntry { + kind: EntryKind::RootTable, + depth: 0, + region: PhysMemoryRegion::new(pml4_table.to_untyped(), PAGE_SIZE), + }; + if !matches!(control(&root_entry), TeardownAction::Skip) { + deallocator(root_entry.region); + } + Ok(()) } @@ -111,12 +201,26 @@ mod tests { }; use std::collections::HashMap; + /// Tear down `root_table` freeing every frame the walker visits. fn capture_freed_pages( root_table: TPA>, ctx: &mut WalkContext, ) -> HashMap { + capture_freed_pages_filtered(root_table, ctx, |_| TeardownAction::Free) + } + + /// Tear down `root_table` using a custom `control` closure. + fn capture_freed_pages_filtered( + root_table: TPA>, + ctx: &mut WalkContext, + control: Control, + ) -> HashMap + where + PM: PageTableMapper, + Control: FnMut(&TeardownEntry) -> TeardownAction, + { let mut freed_map = HashMap::new(); - tear_down_address_space(root_table, ctx, |region| { + tear_down_address_space(root_table, ctx, control, |region| { if freed_map .insert(region.start_address().value(), region.size()) .is_some() @@ -258,7 +362,12 @@ mod tests { } #[test] - fn teardown_does_not_free_kernel_tables() { + fn teardown_skip_mappings_by_va() { + // Demonstrates VA-based filtering of leaf mappings with the new API. + // Unlike the old API (where `entry.va` on IntermediateTable allowed + // skipping whole PML4 subtrees), only `Mapping` entries carry VA. + // Intermediate table frames for the filtered subtree are still freed; + // only the data frame itself is preserved. let mut harness = TestHarness::new(15); // Map a page in userspace (PML4 index 0). @@ -267,9 +376,7 @@ mod tests { .map_4k_pages(user_pa, 0x0000_0001_0000_0000, 1, PtePermissions::rw(false)) .unwrap(); - // Map a page at a canonical kernel VA (PML4 index 256, bits [63:48] = 0xFFFF). - // This populates the upper half of the PML4, which tear_down_address_space - // must NOT touch. + // Map a page at a "high" VA (PML4 index 256, kernel half). let kernel_pa = 0x2_0000; harness .map_4k_pages( @@ -280,23 +387,26 @@ mod tests { ) .unwrap(); - let freed = capture_freed_pages( + // Skip Mapping entries in the upper half; free everything else. + let freed = capture_freed_pages_filtered( harness.inner.root_table, &mut harness.inner.create_walk_ctx(), + |entry| { + if let EntryKind::Mapping(virt) = &entry.kind { + if virt.start_address().value() >= (1usize << 47) { + return TeardownAction::Skip; + } + } + TeardownAction::Free + }, ); - // Only the userspace hierarchy must be freed: - // 1 payload (user_pa) - // 1 PT table (userspace branch) - // 1 PD table (userspace branch) - // 1 PDP table (userspace branch) - // 1 PML4 root - assert_eq!(freed.len(), 5); - assert!(freed.contains_key(&user_pa)); - - // The kernel payload and its intermediate tables (PML4 index >= 256) must - // not be freed — they belong to the kernel and outlive this address space. + // The kernel data frame is preserved. assert!(!freed.contains_key(&kernel_pa)); + // The user data frame is freed. + assert!(freed.contains_key(&user_pa)); + // user branch (data+PT+PD+PDP) + kernel branch (PT+PD+PDP, no data) + root = 8. + assert_eq!(freed.len(), 8); } #[test] @@ -364,4 +474,221 @@ mod tests { // Total: block + PDP + PML4 = 3 entries. assert_eq!(freed.len(), 3); } + + #[test] + fn teardown_skip_data_pages() { + let mut harness = TestHarness::new(10); + let va = VA::from_value(0x1_0000_0000); + let pa = 0x8_0000; + + harness + .map_4k_pages(pa, va.value(), 1, PtePermissions::ro(false)) + .unwrap(); + + // Free only page table frames, not data pages (shared-mapping scenario). + let freed = capture_freed_pages_filtered( + harness.inner.root_table, + &mut harness.inner.create_walk_ctx(), + |entry| match entry.kind { + EntryKind::Mapping(_) => TeardownAction::Skip, + _ => TeardownAction::Free, + }, + ); + + // Only the 4 page table frames are freed; data page is kept. + assert_eq!(freed.len(), 4); + assert!(!freed.contains_key(&pa)); + assert!(freed.contains_key(&harness.inner.root_table.value())); + } + + #[test] + fn teardown_free_and_clear_zeroes_ptes() { + // Map one page so each table frame has exactly one non-zero entry. + // After FreeAndClear teardown, every table frame must have all slots zero. + let mut harness = TestHarness::new(10); + harness + .map_4k_pages(0x8_0000, 0x1_0000_0000, 1, PtePermissions::ro(false)) + .unwrap(); + + // Collect table frame PAs (root + all intermediates) during teardown. + let mut table_pas: Vec = vec![harness.inner.root_table.value()]; + tear_down_address_space( + harness.inner.root_table, + &mut harness.inner.create_walk_ctx(), + |entry| { + if matches!(entry.kind, EntryKind::IntermediateTable) { + table_pas.push(entry.region.start_address().value()); + } + TeardownAction::FreeAndClear + }, + |_| {}, + ) + .unwrap(); + + // Every slot in every table frame must now be zero — the parent-slot + // clear for each level (and the leaf PTE clear) must have fired. + // In the PassthroughMapper harness, PAs are heap pointers, so the + // memory is still valid to read even after "freeing". + for &table_pa in &table_pas { + for idx in 0..512_usize { + let slot = unsafe { (table_pa as *const u64).add(idx).read_volatile() }; + assert_eq!( + slot, 0, + "table frame {:#x} slot {idx} should be zeroed after FreeAndClear", + table_pa + ); + } + } + } + + #[test] + fn teardown_selective_clear() { + // Two 4K pages in the same PT (slots 0 and 1). + // FreeAndClear the first (VA 0x1_0000_0000), Free (no clear) the second. + // After teardown: PT slot 0 == 0, PT slot 1 != 0. + let mut harness = TestHarness::new(10); + harness + .map_4k_pages(0xA_0000, 0x1_0000_0000, 1, PtePermissions::ro(false)) + .unwrap(); + harness + .map_4k_pages(0xB_0000, 0x1_0000_1000, 1, PtePermissions::ro(false)) + .unwrap(); + + let mut pt_pa: Option = None; + tear_down_address_space( + harness.inner.root_table, + &mut harness.inner.create_walk_ctx(), + |entry| { + // Capture the PT frame (depth-3 IntermediateTable). + if matches!(entry.kind, EntryKind::IntermediateTable) && entry.depth == 3 { + pt_pa = Some(entry.region.start_address().value()); + } + if let EntryKind::Mapping(virt) = &entry.kind { + if virt.start_address().value() == 0x1_0000_0000 { + return TeardownAction::FreeAndClear; + } + if virt.start_address().value() == 0x1_0000_1000 { + return TeardownAction::Free; + } + } + TeardownAction::Free + }, + |_| {}, + ) + .unwrap(); + + let pt_pa = pt_pa.expect("PT frame must have been observed"); + + // The cleared slot (slot 0 in PT) must be zero. + let slot0 = unsafe { (pt_pa as *const u64).add(0).read_volatile() }; + assert_eq!(slot0, 0, "PT slot 0 should be zeroed after FreeAndClear"); + + // The un-cleared slot (slot 1 in PT) must remain non-zero. + let slot1 = unsafe { (pt_pa as *const u64).add(1).read_volatile() }; + assert_ne!(slot1, 0, "PT slot 1 should remain non-zero after Free"); + } + + #[test] + fn teardown_skip_root_table() { + // RootTable + Skip must not free the root, but must still free everything else. + let mut harness = TestHarness::new(10); + harness + .map_4k_pages(0x8_0000, 0x1_0000_0000, 1, PtePermissions::ro(false)) + .unwrap(); + + let root_pa = harness.inner.root_table.value(); + let freed = capture_freed_pages_filtered( + harness.inner.root_table, + &mut harness.inner.create_walk_ctx(), + |entry| match entry.kind { + EntryKind::RootTable => TeardownAction::Skip, + _ => TeardownAction::Free, + }, + ); + + assert!( + !freed.contains_key(&root_pa), + "Root table must not be freed" + ); + assert!(freed.contains_key(&0x8_0000), "Data page must be freed"); + // PDP + PDE + PTE frame + data page = 4 freed (root skipped). + assert_eq!(freed.len(), 4); + } + + #[test] + fn teardown_skip_entire_subtree() { + // Map one page; Skip the depth-1 IntermediateTable (PDP frame). + // Verify: (a) data page and all descendant frames are not freed, + // (b) control is NOT called for descendants (no spurious recursion). + let mut harness = TestHarness::new(10); + let pa = 0xA_0000; + harness + .map_4k_pages(pa, 0x1_0000_0000, 1, PtePermissions::ro(false)) + .unwrap(); + + let mut control_calls = 0usize; + let freed = capture_freed_pages_filtered( + harness.inner.root_table, + &mut harness.inner.create_walk_ctx(), + |entry| { + control_calls += 1; + if matches!(entry.kind, EntryKind::IntermediateTable) && entry.depth == 1 { + TeardownAction::Skip + } else { + TeardownAction::Free + } + }, + ); + + assert!( + !freed.contains_key(&pa), + "Skipped subtree data page must not be freed" + ); + // Only the root table is freed; the rest was skipped. + assert_eq!(freed.len(), 1); + // control is called for root (1) + depth-1 entry (1) = 2 total. + assert_eq!( + control_calls, 2, + "control must not recurse into the skipped subtree" + ); + } + + #[test] + fn teardown_entry_metadata() { + // For a single 4K page at VA 0x1_0000_0000 the walker must report + // exactly these (kind, depth, va) tuples, in any order. + let mut harness = TestHarness::new(10); + harness + .map_4k_pages(0x8_0000, 0x1_0000_0000, 1, PtePermissions::ro(false)) + .unwrap(); + + let mut entries: Vec<(&'static str, u8, Option)> = Vec::new(); + capture_freed_pages_filtered( + harness.inner.root_table, + &mut harness.inner.create_walk_ctx(), + |e| { + let (kind, va) = match &e.kind { + EntryKind::Mapping(virt) => ("Mapping", Some(virt.start_address().value())), + EntryKind::IntermediateTable => ("IntermediateTable", None), + EntryKind::RootTable => ("RootTable", None), + }; + entries.push((kind, e.depth, va)); + TeardownAction::Free + }, + ); + + // Sort by (depth, va, kind) for a deterministic comparison. + entries.sort_unstable_by_key(|&(k, d, v)| (d, v, k)); + + assert_eq!( + entries, + vec![ + ("RootTable", 0, None), + ("IntermediateTable", 1, None), // PDP frame + ("IntermediateTable", 2, None), // PD frame + ("IntermediateTable", 3, None), // PT frame + ("Mapping", 3, Some(0x1_0000_0000)), // mapped page + ] + ); + } } diff --git a/libkernel/src/memory/paging/mod.rs b/libkernel/src/memory/paging/mod.rs index 2e7143b..feab2bd 100644 --- a/libkernel/src/memory/paging/mod.rs +++ b/libkernel/src/memory/paging/mod.rs @@ -10,7 +10,7 @@ use permissions::PtePermissions; pub mod permissions; pub mod smalloc_page_allocator; -pub(crate) mod tear_down; +pub mod tear_down; pub mod walk; #[cfg(test)] diff --git a/libkernel/src/memory/paging/tear_down.rs b/libkernel/src/memory/paging/tear_down.rs index 0013861..0dce9eb 100644 --- a/libkernel/src/memory/paging/tear_down.rs +++ b/libkernel/src/memory/paging/tear_down.rs @@ -1,63 +1,140 @@ +//! Page-table tear-down and deallocation. + use crate::memory::{ PAGE_SIZE, - address::{PA, TPA}, + address::{PA, TPA, VA}, paging::{ PaMapper, PageTableEntry, PageTableMapper, PgTable, PgTableArray, TableMapper, TableMapperTable, walk::WalkContext, }, - region::PhysMemoryRegion, + region::{PhysMemoryRegion, VirtMemoryRegion}, }; -enum EntryKind { +/// Identifies the kind of physical frame a [`TeardownEntry`] describes. +pub enum EntryKind { + /// A leaf page mapping. + /// + /// The [`VirtMemoryRegion`] covers the entire mapping virtual region. + Mapping(VirtMemoryRegion), + /// An intermediate page table frame (not the root). + IntermediateTable, + /// The root page table frame (PML4 / L0). + RootTable, +} + +/// Describes a single physical frame encountered during a page table tear-down. +pub struct TeardownEntry { + /// What kind of frame this is. + /// + /// For [`EntryKind::Mapping`] entries the virtual region is embedded in the + /// variant. [`EntryKind::IntermediateTable`] and [`EntryKind::RootTable`] + /// entries describe infrastructure frames and carry no virtual-address + /// information. + pub kind: EntryKind, + /// Depth in the page table tree. Root = 0, incrementing toward leaves. + /// + /// Intermediate table frames and the leaf mappings within the same table + /// share the same depth value; [`EntryKind`] disambiguates. + pub depth: u8, + /// The physical memory region of this frame. + pub region: PhysMemoryRegion, +} + +/// The action the walker should take for a [`TeardownEntry`]. +/// +/// The `control` closure returns this to drive walker behaviour. The actual +/// freeing is always performed by the separate `deallocator` closure, called +/// by the walker in the correct order (after clearing if requested). +#[allow(missing_docs)] +pub enum TeardownAction { + /// Recurse into this subtree (for [`EntryKind::IntermediateTable`] entries) + /// and then call `deallocator` for its frame. For leaf entries, just call + /// `deallocator`. + Free, + /// Like [`TeardownAction::Free`], but also zero the page table entry in the + /// parent table *before* calling `deallocator`. The zeroing uses + /// `write_volatile` and happens before the frame is released to the + /// allocator, closing the stale-PTE window. + FreeAndClear, + /// Skip this entry entirely. For [`EntryKind::IntermediateTable`] entries, + /// the subtree is not walked. `deallocator` is not called. + Skip, +} + +// Walker-internal discriminant used while scanning a table. +enum LocalEntryKind { Table(TPA>), Block(PA), } +/// Trait implemented by each page table level that can be recursively walked +/// during a tear-down. pub trait RecursiveTeardownWalker: PgTable + Sized { - fn tear_down( + /// Walk all mappings in `table_pa` and invoke `control` / `deallocator` + /// for each frame found. + /// + /// - `base_va`: the virtual address of the first byte covered by + /// `table_pa`'s first entry. + /// - `depth`: the tree depth of `table_pa` itself (root's children = 1). + /// - `control`: called for each entry *before* any recursion or + /// deallocation. Returns the action the walker should take. Must not + /// free physical memory — use `deallocator` for that. + /// - `deallocator`: called by the walker (never inside a live + /// `with_page_table` window) to physically release a frame. + fn tear_down( table_pa: TPA>, ctx: &mut WalkContext, - deallocator: &mut F, + base_va: VA, + depth: u8, + control: &mut Control, + deallocator: &mut Dealloc, ) -> crate::error::Result<()> where PM: PageTableMapper, - F: FnMut(PhysMemoryRegion); + Control: FnMut(&TeardownEntry) -> TeardownAction, + Dealloc: FnMut(PhysMemoryRegion); } /// Blanket impl for intermediate table levels whose descriptors support both -/// table and block mappings (e.g. PDE/PDPE on x86_64, L1/L2 on arm64). +/// table and block mappings. /// /// Root table levels (PML4, L0) are excluded because their descriptors cannot -/// encode block mappings — those are handled explicitly by `tear_down_address_space`. +/// encode block mappings — those are handled explicitly by +/// `tear_down_address_space`. impl RecursiveTeardownWalker for T where T: TableMapperTable, T::Descriptor: PaMapper, ::NextLevel: RecursiveTeardownWalker, { - fn tear_down( + fn tear_down( table_pa: TPA>, ctx: &mut WalkContext, - deallocator: &mut F, + base_va: VA, + depth: u8, + control: &mut Control, + deallocator: &mut Dealloc, ) -> crate::error::Result<()> where PM: PageTableMapper, - F: FnMut(PhysMemoryRegion), + Control: FnMut(&TeardownEntry) -> TeardownAction, + Dealloc: FnMut(PhysMemoryRegion), { let mut cursor = 0; loop { - let next_item: Option<(usize, EntryKind<::NextLevel>)> = unsafe { + let next_item: Option<( + usize, + LocalEntryKind<::NextLevel>, + )> = unsafe { ctx.mapper.with_page_table(table_pa, |pgtable| { let table = Self::from_ptr(pgtable); - for i in cursor..::DESCRIPTORS_PER_PAGE { let desc = table.get_idx(i); - if let Some(addr) = desc.next_table_address() { - return Some((i, EntryKind::Table(addr))); + return Some((i, LocalEntryKind::Table(addr))); } else if let Some(pa) = desc.mapped_address() { - return Some((i, EntryKind::Block(pa))); + return Some((i, LocalEntryKind::Block(pa))); } } None @@ -65,24 +142,76 @@ where }; match next_item { - Some((found_idx, EntryKind::Table(table_addr))) => { - // Recurse into the child table, then free its frame. - ::NextLevel::tear_down( - table_addr, - ctx, - deallocator, - )?; - deallocator(PhysMemoryRegion::new(table_addr.to_untyped(), PAGE_SIZE)); + Some((found_idx, LocalEntryKind::Table(table_addr))) => { + let entry_va = base_va + .add_bytes(found_idx << ::MAP_SHIFT); + let child_depth = depth + 1; + let entry = TeardownEntry { + kind: EntryKind::IntermediateTable, + depth: child_depth, + region: PhysMemoryRegion::new(table_addr.to_untyped(), PAGE_SIZE), + }; + let action = control(&entry); + + if !matches!(action, TeardownAction::Skip) { + // Recurse into the child table first. + ::NextLevel::tear_down( + table_addr, + ctx, + entry_va, + child_depth, + control, + deallocator, + )?; + + // Clear the PTE in this table before releasing the child + // frame, ensuring no stale translations outlive the frame. + if matches!(action, TeardownAction::FreeAndClear) { + unsafe { + ctx.mapper.with_page_table(table_pa, |pgtable| { + Self::from_ptr(pgtable) + .to_raw_ptr() + .add(found_idx) + .write_volatile(0u64); + })?; + } + } + + deallocator(entry.region); + } + cursor = found_idx + 1; } - Some((found_idx, EntryKind::Block(pa))) => { - // Block frame: size is the coverage of one entry at this level. - deallocator(PhysMemoryRegion::new( - pa, - 1 << ::MAP_SHIFT, - )); + + Some((found_idx, LocalEntryKind::Block(pa))) => { + let entry_va = base_va + .add_bytes(found_idx << ::MAP_SHIFT); + let map_size = 1 << ::MAP_SHIFT; + let entry = TeardownEntry { + kind: EntryKind::Mapping(VirtMemoryRegion::new(entry_va, map_size)), + depth, + region: PhysMemoryRegion::new(pa, map_size), + }; + let action = control(&entry); + + if !matches!(action, TeardownAction::Skip) { + // Clear the block PTE before releasing the frame. + if matches!(action, TeardownAction::FreeAndClear) { + unsafe { + ctx.mapper.with_page_table(table_pa, |pgtable| { + Self::from_ptr(pgtable) + .to_raw_ptr() + .add(found_idx) + .write_volatile(0u64); + })?; + } + } + deallocator(entry.region); + } + cursor = found_idx + 1; } + None => break, } } diff --git a/src/arch/arm64/memory/address_space.rs b/src/arch/arm64/memory/address_space.rs index fd7d0d1..74848bb 100644 --- a/src/arch/arm64/memory/address_space.rs +++ b/src/arch/arm64/memory/address_space.rs @@ -23,7 +23,7 @@ use libkernel::{ page::PageFrame, paging::{ PaMapper, PageAllocator, PageTableEntry, PgTableArray, permissions::PtePermissions, - walk::WalkContext, + tear_down::TeardownAction, walk::WalkContext, }, proc_vm::address_space::{PageInfo, UserAddressSpace}, region::{PhysMemoryRegion, VirtMemoryRegion}, @@ -209,9 +209,14 @@ impl Drop for Arm64ProcessAddressSpace { invalidator: &AllEl0TlbInvalidator::new(), }; - if tear_down_address_space(self.l0_table, &mut walk_ctx, |region| unsafe { - PAGE_ALLOC.get().unwrap().alloc_from_region(region); - }) + if tear_down_address_space( + self.l0_table, + &mut walk_ctx, + |_| TeardownAction::Free, + |region| unsafe { + PAGE_ALLOC.get().unwrap().alloc_from_region(region); + }, + ) .is_err() { warn!("Address space tear down failed. Probable memory leakage!");