From c1bea5ecbbb59b49be0429961be20692c34d936f Mon Sep 17 00:00:00 2001 From: Matthew Leach Date: Thu, 11 Dec 2025 22:31:11 +0000 Subject: [PATCH] tmpfs: new Add basic directory inode implementation and tmpfs filesystem implementation. --- libkernel/src/fs/filesystems/mod.rs | 1 + libkernel/src/fs/filesystems/tmpfs.rs | 747 ++++++++++++++++++++++++++ 2 files changed, 748 insertions(+) create mode 100644 libkernel/src/fs/filesystems/tmpfs.rs diff --git a/libkernel/src/fs/filesystems/mod.rs b/libkernel/src/fs/filesystems/mod.rs index 43475dd..c50261a 100644 --- a/libkernel/src/fs/filesystems/mod.rs +++ b/libkernel/src/fs/filesystems/mod.rs @@ -1 +1,2 @@ pub mod fat32; +pub mod tmpfs; diff --git a/libkernel/src/fs/filesystems/tmpfs.rs b/libkernel/src/fs/filesystems/tmpfs.rs new file mode 100644 index 0000000..f7871c7 --- /dev/null +++ b/libkernel/src/fs/filesystems/tmpfs.rs @@ -0,0 +1,747 @@ +use crate::{ + CpuOps, + error::{FsError, KernelError, Result}, + fs::{ + DirStream, Dirent, FileType, Filesystem, Inode, InodeId, + attr::{FileAttr, FilePermissions}, + }, + memory::{ + PAGE_SIZE, + address::{AddressTranslator, VA}, + page::ClaimedPage, + page_alloc::PageAllocGetter, + }, + sync::spinlock::SpinLockIrq, +}; +use alloc::{ + boxed::Box, + string::{String, ToString}, + sync::{Arc, Weak}, + vec::Vec, +}; +use async_trait::async_trait; +use core::{ + cmp::min, + marker::PhantomData, + mem::size_of, + sync::atomic::{AtomicU64, Ordering}, +}; + +const BLOCK_SZ: usize = PAGE_SIZE; + +// Calculate max size based on how many pointers fit in one page (the indirect +// block) +const MAX_SZ: usize = BLOCK_SZ * (PAGE_SIZE / size_of::<*mut u8>()); + +struct TmpFsRegInner +where + C: CpuOps, + G: PageAllocGetter, + T: AddressTranslator<()>, +{ + indirect_block: ClaimedPage, + size: usize, + allocated_blocks: usize, +} + +impl TmpFsRegInner +where + C: CpuOps, + G: PageAllocGetter, + T: AddressTranslator<()>, +{ + fn block_slot_ptr(&mut self, block_idx: usize) -> *mut *mut u8 { + debug_assert!(block_idx < PAGE_SIZE / size_of::<*mut u8>()); + unsafe { + self.indirect_block + .as_ptr_mut() + .cast::<*mut u8>() + .add(block_idx) + } + } + + fn block_ptr_mut(&mut self, block_idx: usize) -> *mut u8 { + unsafe { *self.block_slot_ptr(block_idx) } + } + + fn try_alloc_block(&mut self, block_idx: usize) -> Result<*mut u8> { + // Ensure no discontinuity. If we write to block 5, blocks 0-4 must exist. + // We iterate up to and including the target block_idx. + for i in self.allocated_blocks..=block_idx { + let new_page = ClaimedPage::::alloc_zeroed()?; + + unsafe { + *self.block_slot_ptr(i) = new_page.as_ptr_mut(); + } + + new_page.leak(); + self.allocated_blocks += 1; + } + + Ok(self.block_ptr_mut(block_idx)) + } +} + +impl Drop for TmpFsRegInner +where + C: CpuOps, + G: PageAllocGetter, + T: AddressTranslator<()>, +{ + fn drop(&mut self) { + for i in 0..self.allocated_blocks { + let ptr = unsafe { *self.block_slot_ptr(i) }; + if !ptr.is_null() { + // SAFETY: This pointer was obtained from ClaimedPage::leak() in + // the block allocation code. + unsafe { + ClaimedPage::::from_pfn( + VA::from_ptr_mut(ptr.cast()).to_pa::().to_pfn(), + ) + }; + + // Drop happens here, releasing memory. + } + } + } +} + +struct TmpFsReg +where + C: CpuOps, + G: PageAllocGetter, + T: AddressTranslator<()>, +{ + id: InodeId, + inner: SpinLockIrq, C>, +} + +impl TmpFsReg +where + C: CpuOps, + G: PageAllocGetter, + T: AddressTranslator<()>, +{ + fn new(id: InodeId) -> Result { + Ok(Self { + id, + inner: SpinLockIrq::new(TmpFsRegInner { + indirect_block: ClaimedPage::::alloc_zeroed()?, + size: 0, + allocated_blocks: 0, + }), + }) + } + + fn offset_to_block_locus(offset: usize) -> (usize, usize) { + (offset / BLOCK_SZ, offset % BLOCK_SZ) + } +} + +#[async_trait] +impl Inode for TmpFsReg +where + C: CpuOps, + G: PageAllocGetter, + T: AddressTranslator<()>, +{ + fn id(&self) -> InodeId { + self.id + } + + async fn read_at(&self, mut offset: u64, buf: &mut [u8]) -> Result { + let mut inner = self.inner.lock_save_irq(); + + if offset as usize >= inner.size { + return Ok(0); + } + + let mut bytes_to_read = min(inner.size - offset as usize, buf.len()); + let mut buf_ptr = buf.as_mut_ptr(); + let mut total_read = 0; + + while bytes_to_read > 0 { + let (blk_idx, blk_offset) = Self::offset_to_block_locus(offset as _); + + // Check if this block is actually allocated (sparse file protection) + // though try_alloc_block enforces continuity, so this check is sanity. + if blk_idx >= inner.allocated_blocks { + break; + } + + let bytes_in_block = BLOCK_SZ - blk_offset; + let chunk_len = min(bytes_to_read, bytes_in_block); + + unsafe { + let src = inner.block_ptr_mut(blk_idx).add(blk_offset); + src.copy_to_nonoverlapping(buf_ptr, chunk_len); + buf_ptr = buf_ptr.add(chunk_len); + }; + + offset += chunk_len as u64; + total_read += chunk_len; + bytes_to_read -= chunk_len; + } + + Ok(total_read) + } + + async fn write_at(&self, mut offset: u64, buf: &[u8]) -> Result { + if offset as usize >= MAX_SZ { + return Err(FsError::OutOfBounds.into()); + } + + let mut inner = self.inner.lock_save_irq(); + + // Calculate how much we can write without exceeding MAX_SZ + let available_space = MAX_SZ.saturating_sub(offset as usize); + let mut bytes_to_write = min(buf.len(), available_space); + + if bytes_to_write == 0 { + return Ok(0); + } + + let mut buf_ptr = buf.as_ptr(); + let mut total_written = 0; + + while bytes_to_write > 0 { + let (blk_idx, blk_offset) = Self::offset_to_block_locus(offset as _); + + // Ensure the block exists + let block_ptr = inner.try_alloc_block(blk_idx)?; + + let bytes_in_block = BLOCK_SZ - blk_offset; + let chunk_len = min(bytes_to_write, bytes_in_block); + + unsafe { + let dst = block_ptr.add(blk_offset); + dst.copy_from_nonoverlapping(buf_ptr, chunk_len); + buf_ptr = buf_ptr.add(chunk_len); + } + + offset += chunk_len as u64; + total_written += chunk_len; + bytes_to_write -= chunk_len; + } + + // Update file size if we extended it + if offset as usize > inner.size { + inner.size = offset as usize; + } + + Ok(total_written) + } + + async fn truncate(&self, size: u64) -> Result<()> { + let mut inner = self.inner.lock_save_irq(); + let new_size = size as usize; + + if new_size > MAX_SZ { + return Err(FsError::OutOfBounds.into()); + } + + // Handle Expansion + if new_size > inner.size { + // We just update the size. The holes are implicitly zeroed by + // read_at logic, and write_at will fill them with zeroed pages when + // touched. + inner.size = new_size; + return Ok(()); + } + + // 2. Handle Shrinking + if new_size < inner.size { + // Calculate number of blocks required for the new size. + let new_blk_count = new_size.div_ceil(BLOCK_SZ); + + // Free the excess blocks from the end + while inner.allocated_blocks > new_blk_count { + let release_idx = inner.allocated_blocks - 1; + + unsafe { + let ptr_slot = inner.block_slot_ptr(release_idx); + let ptr = *ptr_slot; + + if !ptr.is_null() { + // Reconstruct the ClaimedPage to drop it (returning frame to allocator) + let page = ClaimedPage::::from_pfn( + VA::from_ptr_mut(ptr.cast()).to_pa::().to_pfn(), + ); + + drop(page); + + // Null the slot to prevent double-free in Drop + *ptr_slot = core::ptr::null_mut(); + } + } + + inner.allocated_blocks -= 1; + } + + // Zero out trailing data in the last retained page. This is POSIX + // behavior: bytes past the new EOF must appear as zero if we extend + // the file later. + if new_blk_count > 0 { + let last_blk_idx = new_blk_count - 1; + let offset_in_block = new_size % BLOCK_SZ; + + if offset_in_block > 0 { + let ptr = inner.block_ptr_mut(last_blk_idx); + unsafe { + let tail_ptr = ptr.add(offset_in_block); + let tail_len = BLOCK_SZ - offset_in_block; + tail_ptr.write_bytes(0, tail_len); + } + } + } + + inner.size = new_size; + } + + Ok(()) + } + + async fn getattr(&self) -> Result { + let inner = self.inner.lock_save_irq(); + Ok(FileAttr { + size: inner.size as u64, + // Populate other fields (perms, time) with defaults or store them in TmpFsReg + ..Default::default() + }) + } +} + +struct TmpFsDirEnt { + name: String, + id: InodeId, + kind: FileType, + inode: Arc, +} + +struct TmpFsDirInode +where + C: CpuOps, + G: PageAllocGetter, + T: AddressTranslator<()>, +{ + entries: SpinLockIrq, C>, + attrs: FileAttr, + id: u64, + fs: Weak>, + this: Weak, +} + +struct TmpFsDirReader +where + C: CpuOps, + G: PageAllocGetter, + T: AddressTranslator<()>, +{ + inode: Arc>, + offset: usize, +} + +#[async_trait] +impl DirStream for TmpFsDirReader +where + C: CpuOps, + G: PageAllocGetter, + T: AddressTranslator<()>, +{ + async fn next_entry(&mut self) -> Result> { + let guard = self.inode.entries.lock_save_irq(); + if let Some(entry) = guard.get(self.offset) { + self.offset += 1; + + let dent = Some(Dirent { + id: entry.id, + name: entry.name.clone(), + file_type: entry.kind, + offset: self.offset as _, + }); + + Ok(dent) + } else { + Ok(None) + } + } +} + +#[async_trait] +impl Inode for TmpFsDirInode +where + C: CpuOps, + G: PageAllocGetter, + T: AddressTranslator<()>, +{ + fn id(&self) -> crate::fs::InodeId { + InodeId::from_fsid_and_inodeid(self.fs.upgrade().unwrap().id(), self.id) + } + + async fn lookup(&self, name: &str) -> Result> { + self.entries + .lock_save_irq() + .iter() + .find(|x| x.name == name) + .map(|x| x.inode.clone()) + .ok_or(FsError::NotFound.into()) + } + + async fn getattr(&self) -> Result { + Ok(self.attrs.clone()) + } + + async fn readdir(&self, start_offset: u64) -> Result> { + Ok(Box::new(TmpFsDirReader { + inode: self.this.upgrade().unwrap(), + offset: start_offset as _, + })) + } + + async fn create( + &self, + name: &str, + file_type: FileType, + mode: FilePermissions, + ) -> Result> { + let mut entries = self.entries.lock_save_irq(); + + if entries.iter().any(|e| e.name == name) { + return Err(FsError::AlreadyExists.into()); + } + + let fs = self.fs.upgrade().ok_or(FsError::InvalidFs)?; + let new_id = fs.alloc_inode_id(); + let inode_id = InodeId::from_fsid_and_inodeid(fs.id(), new_id); + + let inode: Arc = match file_type { + FileType::File => Arc::new(TmpFsReg::::new(inode_id)?), + FileType::Directory => TmpFsDirInode::::new(new_id, self.fs.clone(), mode), + _ => return Err(KernelError::NotSupported), + }; + + entries.push(TmpFsDirEnt { + name: name.to_string(), + id: inode_id, + kind: file_type, + inode: inode.clone(), + }); + + Ok(inode) + } +} + +impl TmpFsDirInode +where + C: CpuOps, + G: PageAllocGetter, + T: AddressTranslator<()>, +{ + pub fn new(id: u64, fs: Weak>, mode: FilePermissions) -> Arc { + Arc::new_cyclic(|weak_this| Self { + entries: SpinLockIrq::new(Vec::new()), + attrs: FileAttr { + size: 0, + file_type: FileType::Directory, + block_size: BLOCK_SZ as _, + mode, + ..Default::default() + }, + id, + fs, + this: weak_this.clone(), + }) + } +} + +pub struct TmpFs +where + C: CpuOps, + G: PageAllocGetter, + T: AddressTranslator<()>, +{ + id: u64, + next_inode_id: AtomicU64, + root: Arc>, + pg_allocator: PhantomData, + _phantom: PhantomData, +} + +impl TmpFs +where + C: CpuOps, + G: PageAllocGetter, + T: AddressTranslator<()>, +{ + pub fn new(fs_id: u64) -> Arc { + Arc::new_cyclic(|weak_fs| { + let root = + TmpFsDirInode::new(1, weak_fs.clone(), FilePermissions::from_bits_retain(0o766)); + + Self { + id: fs_id, + next_inode_id: AtomicU64::new(2), + root, + pg_allocator: PhantomData, + _phantom: PhantomData, + } + }) + } + + pub fn alloc_inode_id(&self) -> u64 { + self.next_inode_id.fetch_add(1, Ordering::Relaxed) + } +} + +#[async_trait] +impl Filesystem for TmpFs +where + C: CpuOps, + G: PageAllocGetter, + T: AddressTranslator<()>, +{ + async fn root_inode(&self) -> Result> { + Ok(self.root.clone()) + } + + fn id(&self) -> u64 { + self.id + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::fs::{InodeId, FileType, attr::FilePermissions}; + use crate::memory::{ + PAGE_SIZE, + address::IdentityTranslator, + page_alloc::{self, FrameAllocator, PageAllocGetter}, + }; + use crate::sync::once_lock::OnceLock; + use crate::test::MockCpuOps; + use alloc::vec; + use std::sync::Arc; + + static PG_ALLOC: OnceLock, MockCpuOps> = OnceLock::new(); + + struct TmpFsPgAllocGetter {} + + impl PageAllocGetter for TmpFsPgAllocGetter { + fn global_page_alloc() -> &'static OnceLock, MockCpuOps> { + &PG_ALLOC + } + } + + /// Initializes the global allocator for the test suite. + fn init_allocator() { + PG_ALLOC.get_or_init(|| { + // Allocate 32MB for the test heap to ensure we don't run out during large file tests + page_alloc::tests::TestFixture::new(&[(0, 32 * 1024 * 1024)], &[]).leak_allocator() + }); + } + + /// Creates a fresh Filesystem and a detached regular file for isolated file testing. + fn setup_env() -> ( + Arc>, + TmpFsReg, + ) { + init_allocator(); + let fs = TmpFs::new(0); + let reg = TmpFsReg::new(InodeId::from_fsid_and_inodeid(0, 1024)).unwrap(); + (fs, reg) + } + + /// Creates just the Filesystem to test directory hierarchies. + fn setup_fs() -> Arc> { + init_allocator(); + TmpFs::new(1) + } + + #[tokio::test] + async fn test_simple_read_write() { + let (_, reg) = setup_env(); + let data = b"Hello, Kernel!"; + let mut buf = vec![0u8; 100]; + + let written = reg.write_at(0, data).await.expect("Write failed"); + assert_eq!(written, data.len()); + + let read = reg.read_at(0, &mut buf).await.expect("Read failed"); + assert_eq!(read, data.len()); + assert_eq!(&buf[..read], data); + + let attr = reg.getattr().await.expect("Getattr failed"); + assert_eq!(attr.size, data.len() as u64); + } + + #[tokio::test] + async fn test_write_across_page_boundary() { + let (_, reg) = setup_env(); + + let data_len = 5000; + let data: Vec = (0..data_len).map(|i| (i % 255) as u8).collect(); + + let written = reg.write_at(0, &data).await.expect("Write failed"); + assert_eq!(written, data_len); + + let mut buf = vec![0u8; data_len]; + let read = reg.read_at(0, &mut buf).await.expect("Read failed"); + assert_eq!(read, data_len); + assert_eq!(buf, data); + } + + #[tokio::test] + async fn test_sparse_write_and_read() { + let (_, reg) = setup_env(); + + // Write at offset 5000 (Allocates Block 1, leaves Block 0 unallocated/sparse) + let data = b"Sparse"; + reg.write_at(5000, data).await.expect("Write failed"); + + let mut buf = vec![0u8; 10]; + let read = reg.read_at(0, &mut buf).await.expect("Read hole failed"); + assert_eq!(read, 10); + assert_eq!(buf, vec![0u8; 10], "Sparse region should be zeroed"); + + let read = reg.read_at(5000, &mut buf).await.expect("Read data failed"); + assert_eq!(read, data.len()); + assert_eq!(&buf[..read], data); + + let attr = reg.getattr().await.unwrap(); + assert_eq!(attr.size, 5000 + data.len() as u64); + } + + #[tokio::test] + async fn test_write_append() { + let (_, reg) = setup_env(); + + reg.write_at(0, b"Hello").await.unwrap(); + reg.write_at(5, b" World").await.unwrap(); + + let mut buf = vec![0u8; 20]; + let read = reg.read_at(0, &mut buf).await.unwrap(); + assert_eq!(&buf[..read], b"Hello World"); + } + + #[tokio::test] + async fn test_write_out_of_bounds() { + let (_, reg) = setup_env(); + + let max_sz = (PAGE_SIZE * (PAGE_SIZE / 8)) as u64; + + let res = reg.write_at(max_sz, b"X").await; + + assert!(res.is_err(), "Should fail to write beyond MAX_SZ"); + } + + #[tokio::test] + async fn test_truncate() { + let (_, reg) = setup_env(); + + reg.write_at(0, b"1234567890").await.unwrap(); + + reg.truncate(5).await.expect("Truncate down failed"); + let attr = reg.getattr().await.unwrap(); + assert_eq!(attr.size, 5); + + reg.truncate(10).await.expect("Truncate up failed"); + let attr = reg.getattr().await.unwrap(); + assert_eq!(attr.size, 10); + + let mut buf = vec![0u8; 10]; + let read = reg.read_at(0, &mut buf).await.unwrap(); + assert_eq!(read, 10); + // "12345" + 5 zeros + assert_eq!(&buf[..5], b"12345"); + assert_eq!(&buf[5..], &[0,0,0,0,0]); + } + + #[tokio::test] + async fn test_dir_create_and_lookup() { + let fs = setup_fs(); + let root = fs.root_inode().await.unwrap(); + + // Create a file + let file_inode = root.create( + "test_file.txt", + FileType::File, + FilePermissions::from_bits_retain(0), + ).await.expect("Create failed"); + + // Lookup + let found = root.lookup("test_file.txt").await.expect("Lookup failed"); + assert_eq!(found.id(), file_inode.id()); + + // Lookup non-existent + let err = root.lookup("ghost.txt").await; + assert!(err.is_err()); // Should be NotFound + } + + #[tokio::test] + async fn test_dir_create_duplicate() { + let fs = setup_fs(); + let root = fs.root_inode().await.unwrap(); + + root.create("dup", FileType::File, FilePermissions::from_bits_retain(0)).await.unwrap(); + + let res = root.create("dup", FileType::File, FilePermissions::empty()).await; + assert!(res.is_err(), "Should not allow duplicate file creation"); + } + + #[tokio::test] + async fn test_dir_subdirectories() { + let fs = setup_fs(); + let root = fs.root_inode().await.unwrap(); + + // Create /subdir + let subdir = root.create("subdir", FileType::Directory, FilePermissions::empty()) + .await.unwrap(); + + // Create /subdir/inner + let inner = subdir.create("inner", FileType::File, FilePermissions::empty()) + .await.unwrap(); + + // Verify hierarchy + let found_subdir = root.lookup("subdir").await.unwrap(); + let found_inner = found_subdir.lookup("inner").await.unwrap(); + assert_eq!(found_inner.id(), inner.id()); + } + + #[tokio::test] + async fn test_readdir() { + let fs = setup_fs(); + let root = fs.root_inode().await.unwrap(); + + // Create files in "random" order + root.create("c.txt", FileType::File, FilePermissions::empty()).await.unwrap(); + root.create("a.txt", FileType::File, FilePermissions::empty()).await.unwrap(); + root.create("b.dir", FileType::Directory, FilePermissions::empty()).await.unwrap(); + + let mut dir_stream = root.readdir(0).await.expect("Readdir failed"); + + let mut names = Vec::new(); + while let Some(dent) = dir_stream.next_entry().await.unwrap() { + names.push(dent.name); + } + + // We don't guarantee order in the current implementation (it's a Vec push), + // but let's verify existence. + assert!(names.contains(&"a.txt".to_string())); + assert!(names.contains(&"b.dir".to_string())); + assert!(names.contains(&"c.txt".to_string())); + assert_eq!(names.len(), 3); + } + + #[tokio::test] + async fn test_inode_id_uniqueness() { + let fs = setup_fs(); + let root = fs.root_inode().await.unwrap(); + + let f1 = root.create("f1", FileType::File, FilePermissions::empty()).await.unwrap(); + let f2 = root.create("f2", FileType::File, FilePermissions::empty()).await.unwrap(); + + assert_ne!(f1.id(), f2.id()); + assert_ne!(f1.id(), root.id()); + } +}