Files
moss-kernel/libkernel/src/fs/filesystems/tmpfs.rs
Matthew Leach 9db6162414 formatting
2025-12-17 20:04:31 -08:00

771 lines
22 KiB
Rust

use crate::{
CpuOps,
error::{FsError, KernelError, Result},
fs::{
DirStream, Dirent, FileType, Filesystem, Inode, InodeId,
attr::{FileAttr, FilePermissions},
},
memory::{
PAGE_SIZE,
address::{AddressTranslator, VA},
page::ClaimedPage,
page_alloc::PageAllocGetter,
},
sync::spinlock::SpinLockIrq,
};
use alloc::{
boxed::Box,
string::{String, ToString},
sync::{Arc, Weak},
vec::Vec,
};
use async_trait::async_trait;
use core::{
cmp::min,
marker::PhantomData,
mem::size_of,
sync::atomic::{AtomicU64, Ordering},
};
const BLOCK_SZ: usize = PAGE_SIZE;
// Calculate max size based on how many pointers fit in one page (the indirect
// block)
const MAX_SZ: usize = BLOCK_SZ * (PAGE_SIZE / size_of::<*mut u8>());
struct TmpFsRegInner<C, G, T>
where
C: CpuOps,
G: PageAllocGetter<C>,
T: AddressTranslator<()>,
{
indirect_block: ClaimedPage<C, G, T>,
size: usize,
allocated_blocks: usize,
}
impl<C, G, T> TmpFsRegInner<C, G, T>
where
C: CpuOps,
G: PageAllocGetter<C>,
T: AddressTranslator<()>,
{
fn block_slot_ptr(&mut self, block_idx: usize) -> *mut *mut u8 {
debug_assert!(block_idx < PAGE_SIZE / size_of::<*mut u8>());
unsafe {
self.indirect_block
.as_ptr_mut()
.cast::<*mut u8>()
.add(block_idx)
}
}
fn block_ptr_mut(&mut self, block_idx: usize) -> *mut u8 {
unsafe { *self.block_slot_ptr(block_idx) }
}
fn try_alloc_block(&mut self, block_idx: usize) -> Result<*mut u8> {
// Ensure no discontinuity. If we write to block 5, blocks 0-4 must exist.
// We iterate up to and including the target block_idx.
for i in self.allocated_blocks..=block_idx {
let new_page = ClaimedPage::<C, G, T>::alloc_zeroed()?;
unsafe {
*self.block_slot_ptr(i) = new_page.as_ptr_mut();
}
new_page.leak();
self.allocated_blocks += 1;
}
Ok(self.block_ptr_mut(block_idx))
}
}
impl<C, G, T> Drop for TmpFsRegInner<C, G, T>
where
C: CpuOps,
G: PageAllocGetter<C>,
T: AddressTranslator<()>,
{
fn drop(&mut self) {
for i in 0..self.allocated_blocks {
let ptr = unsafe { *self.block_slot_ptr(i) };
if !ptr.is_null() {
// SAFETY: This pointer was obtained from ClaimedPage::leak() in
// the block allocation code.
unsafe {
ClaimedPage::<C, G, T>::from_pfn(
VA::from_ptr_mut(ptr.cast()).to_pa::<T>().to_pfn(),
)
};
// Drop happens here, releasing memory.
}
}
}
}
struct TmpFsReg<C, G, T>
where
C: CpuOps,
G: PageAllocGetter<C>,
T: AddressTranslator<()>,
{
id: InodeId,
inner: SpinLockIrq<TmpFsRegInner<C, G, T>, C>,
}
impl<C, G, T> TmpFsReg<C, G, T>
where
C: CpuOps,
G: PageAllocGetter<C>,
T: AddressTranslator<()>,
{
fn new(id: InodeId) -> Result<Self> {
Ok(Self {
id,
inner: SpinLockIrq::new(TmpFsRegInner {
indirect_block: ClaimedPage::<C, G, T>::alloc_zeroed()?,
size: 0,
allocated_blocks: 0,
}),
})
}
fn offset_to_block_locus(offset: usize) -> (usize, usize) {
(offset / BLOCK_SZ, offset % BLOCK_SZ)
}
}
#[async_trait]
impl<C, G, T> Inode for TmpFsReg<C, G, T>
where
C: CpuOps,
G: PageAllocGetter<C>,
T: AddressTranslator<()>,
{
fn id(&self) -> InodeId {
self.id
}
async fn read_at(&self, mut offset: u64, buf: &mut [u8]) -> Result<usize> {
let mut inner = self.inner.lock_save_irq();
if offset as usize >= inner.size {
return Ok(0);
}
let mut bytes_to_read = min(inner.size - offset as usize, buf.len());
let mut buf_ptr = buf.as_mut_ptr();
let mut total_read = 0;
while bytes_to_read > 0 {
let (blk_idx, blk_offset) = Self::offset_to_block_locus(offset as _);
// Check if this block is actually allocated (sparse file protection)
// though try_alloc_block enforces continuity, so this check is sanity.
if blk_idx >= inner.allocated_blocks {
break;
}
let bytes_in_block = BLOCK_SZ - blk_offset;
let chunk_len = min(bytes_to_read, bytes_in_block);
unsafe {
let src = inner.block_ptr_mut(blk_idx).add(blk_offset);
src.copy_to_nonoverlapping(buf_ptr, chunk_len);
buf_ptr = buf_ptr.add(chunk_len);
};
offset += chunk_len as u64;
total_read += chunk_len;
bytes_to_read -= chunk_len;
}
Ok(total_read)
}
async fn write_at(&self, mut offset: u64, buf: &[u8]) -> Result<usize> {
if offset as usize >= MAX_SZ {
return Err(FsError::OutOfBounds.into());
}
let mut inner = self.inner.lock_save_irq();
// Calculate how much we can write without exceeding MAX_SZ
let available_space = MAX_SZ.saturating_sub(offset as usize);
let mut bytes_to_write = min(buf.len(), available_space);
if bytes_to_write == 0 {
return Ok(0);
}
let mut buf_ptr = buf.as_ptr();
let mut total_written = 0;
while bytes_to_write > 0 {
let (blk_idx, blk_offset) = Self::offset_to_block_locus(offset as _);
// Ensure the block exists
let block_ptr = inner.try_alloc_block(blk_idx)?;
let bytes_in_block = BLOCK_SZ - blk_offset;
let chunk_len = min(bytes_to_write, bytes_in_block);
unsafe {
let dst = block_ptr.add(blk_offset);
dst.copy_from_nonoverlapping(buf_ptr, chunk_len);
buf_ptr = buf_ptr.add(chunk_len);
}
offset += chunk_len as u64;
total_written += chunk_len;
bytes_to_write -= chunk_len;
}
// Update file size if we extended it
if offset as usize > inner.size {
inner.size = offset as usize;
}
Ok(total_written)
}
async fn truncate(&self, size: u64) -> Result<()> {
let mut inner = self.inner.lock_save_irq();
let new_size = size as usize;
if new_size > MAX_SZ {
return Err(FsError::OutOfBounds.into());
}
// Handle Expansion
if new_size > inner.size {
// We just update the size. The holes are implicitly zeroed by
// read_at logic, and write_at will fill them with zeroed pages when
// touched.
inner.size = new_size;
return Ok(());
}
// 2. Handle Shrinking
if new_size < inner.size {
// Calculate number of blocks required for the new size.
let new_blk_count = new_size.div_ceil(BLOCK_SZ);
// Free the excess blocks from the end
while inner.allocated_blocks > new_blk_count {
let release_idx = inner.allocated_blocks - 1;
unsafe {
let ptr_slot = inner.block_slot_ptr(release_idx);
let ptr = *ptr_slot;
if !ptr.is_null() {
// Reconstruct the ClaimedPage to drop it (returning frame to allocator)
let page = ClaimedPage::<C, G, T>::from_pfn(
VA::from_ptr_mut(ptr.cast()).to_pa::<T>().to_pfn(),
);
drop(page);
// Null the slot to prevent double-free in Drop
*ptr_slot = core::ptr::null_mut();
}
}
inner.allocated_blocks -= 1;
}
// Zero out trailing data in the last retained page. This is POSIX
// behavior: bytes past the new EOF must appear as zero if we extend
// the file later.
if new_blk_count > 0 {
let last_blk_idx = new_blk_count - 1;
let offset_in_block = new_size % BLOCK_SZ;
if offset_in_block > 0 {
let ptr = inner.block_ptr_mut(last_blk_idx);
unsafe {
let tail_ptr = ptr.add(offset_in_block);
let tail_len = BLOCK_SZ - offset_in_block;
tail_ptr.write_bytes(0, tail_len);
}
}
}
inner.size = new_size;
}
Ok(())
}
async fn getattr(&self) -> Result<FileAttr> {
let inner = self.inner.lock_save_irq();
Ok(FileAttr {
size: inner.size as u64,
// Populate other fields (perms, time) with defaults or store them in TmpFsReg
..Default::default()
})
}
}
struct TmpFsDirEnt {
name: String,
id: InodeId,
kind: FileType,
inode: Arc<dyn Inode>,
}
struct TmpFsDirInode<C, G, T>
where
C: CpuOps,
G: PageAllocGetter<C>,
T: AddressTranslator<()>,
{
entries: SpinLockIrq<Vec<TmpFsDirEnt>, C>,
attrs: FileAttr,
id: u64,
fs: Weak<TmpFs<C, G, T>>,
this: Weak<Self>,
}
struct TmpFsDirReader<C, G, T>
where
C: CpuOps,
G: PageAllocGetter<C>,
T: AddressTranslator<()>,
{
inode: Arc<TmpFsDirInode<C, G, T>>,
offset: usize,
}
#[async_trait]
impl<C, G, T> DirStream for TmpFsDirReader<C, G, T>
where
C: CpuOps,
G: PageAllocGetter<C>,
T: AddressTranslator<()>,
{
async fn next_entry(&mut self) -> Result<Option<Dirent>> {
let guard = self.inode.entries.lock_save_irq();
if let Some(entry) = guard.get(self.offset) {
self.offset += 1;
let dent = Some(Dirent {
id: entry.id,
name: entry.name.clone(),
file_type: entry.kind,
offset: self.offset as _,
});
Ok(dent)
} else {
Ok(None)
}
}
}
#[async_trait]
impl<C, G, T> Inode for TmpFsDirInode<C, G, T>
where
C: CpuOps,
G: PageAllocGetter<C>,
T: AddressTranslator<()>,
{
fn id(&self) -> crate::fs::InodeId {
InodeId::from_fsid_and_inodeid(self.fs.upgrade().unwrap().id(), self.id)
}
async fn lookup(&self, name: &str) -> Result<Arc<dyn Inode>> {
self.entries
.lock_save_irq()
.iter()
.find(|x| x.name == name)
.map(|x| x.inode.clone())
.ok_or(FsError::NotFound.into())
}
async fn getattr(&self) -> Result<FileAttr> {
Ok(self.attrs.clone())
}
async fn readdir(&self, start_offset: u64) -> Result<Box<dyn DirStream>> {
Ok(Box::new(TmpFsDirReader {
inode: self.this.upgrade().unwrap(),
offset: start_offset as _,
}))
}
async fn create(
&self,
name: &str,
file_type: FileType,
mode: FilePermissions,
) -> Result<Arc<dyn Inode>> {
let mut entries = self.entries.lock_save_irq();
if entries.iter().any(|e| e.name == name) {
return Err(FsError::AlreadyExists.into());
}
let fs = self.fs.upgrade().ok_or(FsError::InvalidFs)?;
let new_id = fs.alloc_inode_id();
let inode_id = InodeId::from_fsid_and_inodeid(fs.id(), new_id);
let inode: Arc<dyn Inode> = match file_type {
FileType::File => Arc::new(TmpFsReg::<C, G, T>::new(inode_id)?),
FileType::Directory => TmpFsDirInode::<C, G, T>::new(new_id, self.fs.clone(), mode),
_ => return Err(KernelError::NotSupported),
};
entries.push(TmpFsDirEnt {
name: name.to_string(),
id: inode_id,
kind: file_type,
inode: inode.clone(),
});
Ok(inode)
}
}
impl<C, G, T> TmpFsDirInode<C, G, T>
where
C: CpuOps,
G: PageAllocGetter<C>,
T: AddressTranslator<()>,
{
pub fn new(id: u64, fs: Weak<TmpFs<C, G, T>>, mode: FilePermissions) -> Arc<Self> {
Arc::new_cyclic(|weak_this| Self {
entries: SpinLockIrq::new(Vec::new()),
attrs: FileAttr {
size: 0,
file_type: FileType::Directory,
block_size: BLOCK_SZ as _,
mode,
..Default::default()
},
id,
fs,
this: weak_this.clone(),
})
}
}
pub struct TmpFs<C, G, T>
where
C: CpuOps,
G: PageAllocGetter<C>,
T: AddressTranslator<()>,
{
id: u64,
next_inode_id: AtomicU64,
root: Arc<TmpFsDirInode<C, G, T>>,
pg_allocator: PhantomData<G>,
_phantom: PhantomData<T>,
}
impl<C, G, T> TmpFs<C, G, T>
where
C: CpuOps,
G: PageAllocGetter<C>,
T: AddressTranslator<()>,
{
pub fn new(fs_id: u64) -> Arc<Self> {
Arc::new_cyclic(|weak_fs| {
let root =
TmpFsDirInode::new(1, weak_fs.clone(), FilePermissions::from_bits_retain(0o766));
Self {
id: fs_id,
next_inode_id: AtomicU64::new(2),
root,
pg_allocator: PhantomData,
_phantom: PhantomData,
}
})
}
pub fn alloc_inode_id(&self) -> u64 {
self.next_inode_id.fetch_add(1, Ordering::Relaxed)
}
}
#[async_trait]
impl<C, G, T> Filesystem for TmpFs<C, G, T>
where
C: CpuOps,
G: PageAllocGetter<C>,
T: AddressTranslator<()>,
{
async fn root_inode(&self) -> Result<Arc<dyn Inode>> {
Ok(self.root.clone())
}
fn id(&self) -> u64 {
self.id
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::fs::{FileType, InodeId, attr::FilePermissions};
use crate::memory::{
PAGE_SIZE,
address::IdentityTranslator,
page_alloc::{self, FrameAllocator, PageAllocGetter},
};
use crate::sync::once_lock::OnceLock;
use crate::test::MockCpuOps;
use alloc::vec;
use std::sync::Arc;
static PG_ALLOC: OnceLock<FrameAllocator<MockCpuOps>, MockCpuOps> = OnceLock::new();
struct TmpFsPgAllocGetter {}
impl PageAllocGetter<MockCpuOps> for TmpFsPgAllocGetter {
fn global_page_alloc() -> &'static OnceLock<FrameAllocator<MockCpuOps>, MockCpuOps> {
&PG_ALLOC
}
}
/// Initializes the global allocator for the test suite.
fn init_allocator() {
PG_ALLOC.get_or_init(|| {
// Allocate 32MB for the test heap to ensure we don't run out during large file tests
page_alloc::tests::TestFixture::new(&[(0, 32 * 1024 * 1024)], &[]).leak_allocator()
});
}
/// Creates a fresh Filesystem and a detached regular file for isolated file testing.
fn setup_env() -> (
Arc<TmpFs<MockCpuOps, TmpFsPgAllocGetter, IdentityTranslator>>,
TmpFsReg<MockCpuOps, TmpFsPgAllocGetter, IdentityTranslator>,
) {
init_allocator();
let fs = TmpFs::new(0);
let reg = TmpFsReg::new(InodeId::from_fsid_and_inodeid(0, 1024)).unwrap();
(fs, reg)
}
/// Creates just the Filesystem to test directory hierarchies.
fn setup_fs() -> Arc<TmpFs<MockCpuOps, TmpFsPgAllocGetter, IdentityTranslator>> {
init_allocator();
TmpFs::new(1)
}
#[tokio::test]
async fn test_simple_read_write() {
let (_, reg) = setup_env();
let data = b"Hello, Kernel!";
let mut buf = vec![0u8; 100];
let written = reg.write_at(0, data).await.expect("Write failed");
assert_eq!(written, data.len());
let read = reg.read_at(0, &mut buf).await.expect("Read failed");
assert_eq!(read, data.len());
assert_eq!(&buf[..read], data);
let attr = reg.getattr().await.expect("Getattr failed");
assert_eq!(attr.size, data.len() as u64);
}
#[tokio::test]
async fn test_write_across_page_boundary() {
let (_, reg) = setup_env();
let data_len = 5000;
let data: Vec<u8> = (0..data_len).map(|i| (i % 255) as u8).collect();
let written = reg.write_at(0, &data).await.expect("Write failed");
assert_eq!(written, data_len);
let mut buf = vec![0u8; data_len];
let read = reg.read_at(0, &mut buf).await.expect("Read failed");
assert_eq!(read, data_len);
assert_eq!(buf, data);
}
#[tokio::test]
async fn test_sparse_write_and_read() {
let (_, reg) = setup_env();
// Write at offset 5000 (Allocates Block 1, leaves Block 0 unallocated/sparse)
let data = b"Sparse";
reg.write_at(5000, data).await.expect("Write failed");
let mut buf = vec![0u8; 10];
let read = reg.read_at(0, &mut buf).await.expect("Read hole failed");
assert_eq!(read, 10);
assert_eq!(buf, vec![0u8; 10], "Sparse region should be zeroed");
let read = reg.read_at(5000, &mut buf).await.expect("Read data failed");
assert_eq!(read, data.len());
assert_eq!(&buf[..read], data);
let attr = reg.getattr().await.unwrap();
assert_eq!(attr.size, 5000 + data.len() as u64);
}
#[tokio::test]
async fn test_write_append() {
let (_, reg) = setup_env();
reg.write_at(0, b"Hello").await.unwrap();
reg.write_at(5, b" World").await.unwrap();
let mut buf = vec![0u8; 20];
let read = reg.read_at(0, &mut buf).await.unwrap();
assert_eq!(&buf[..read], b"Hello World");
}
#[tokio::test]
async fn test_write_out_of_bounds() {
let (_, reg) = setup_env();
let max_sz = (PAGE_SIZE * (PAGE_SIZE / 8)) as u64;
let res = reg.write_at(max_sz, b"X").await;
assert!(res.is_err(), "Should fail to write beyond MAX_SZ");
}
#[tokio::test]
async fn test_truncate() {
let (_, reg) = setup_env();
reg.write_at(0, b"1234567890").await.unwrap();
reg.truncate(5).await.expect("Truncate down failed");
let attr = reg.getattr().await.unwrap();
assert_eq!(attr.size, 5);
reg.truncate(10).await.expect("Truncate up failed");
let attr = reg.getattr().await.unwrap();
assert_eq!(attr.size, 10);
let mut buf = vec![0u8; 10];
let read = reg.read_at(0, &mut buf).await.unwrap();
assert_eq!(read, 10);
// "12345" + 5 zeros
assert_eq!(&buf[..5], b"12345");
assert_eq!(&buf[5..], &[0, 0, 0, 0, 0]);
}
#[tokio::test]
async fn test_dir_create_and_lookup() {
let fs = setup_fs();
let root = fs.root_inode().await.unwrap();
// Create a file
let file_inode = root
.create(
"test_file.txt",
FileType::File,
FilePermissions::from_bits_retain(0),
)
.await
.expect("Create failed");
// Lookup
let found = root.lookup("test_file.txt").await.expect("Lookup failed");
assert_eq!(found.id(), file_inode.id());
// Lookup non-existent
let err = root.lookup("ghost.txt").await;
assert!(err.is_err()); // Should be NotFound
}
#[tokio::test]
async fn test_dir_create_duplicate() {
let fs = setup_fs();
let root = fs.root_inode().await.unwrap();
root.create("dup", FileType::File, FilePermissions::from_bits_retain(0))
.await
.unwrap();
let res = root
.create("dup", FileType::File, FilePermissions::empty())
.await;
assert!(res.is_err(), "Should not allow duplicate file creation");
}
#[tokio::test]
async fn test_dir_subdirectories() {
let fs = setup_fs();
let root = fs.root_inode().await.unwrap();
// Create /subdir
let subdir = root
.create("subdir", FileType::Directory, FilePermissions::empty())
.await
.unwrap();
// Create /subdir/inner
let inner = subdir
.create("inner", FileType::File, FilePermissions::empty())
.await
.unwrap();
// Verify hierarchy
let found_subdir = root.lookup("subdir").await.unwrap();
let found_inner = found_subdir.lookup("inner").await.unwrap();
assert_eq!(found_inner.id(), inner.id());
}
#[tokio::test]
async fn test_readdir() {
let fs = setup_fs();
let root = fs.root_inode().await.unwrap();
// Create files in "random" order
root.create("c.txt", FileType::File, FilePermissions::empty())
.await
.unwrap();
root.create("a.txt", FileType::File, FilePermissions::empty())
.await
.unwrap();
root.create("b.dir", FileType::Directory, FilePermissions::empty())
.await
.unwrap();
let mut dir_stream = root.readdir(0).await.expect("Readdir failed");
let mut names = Vec::new();
while let Some(dent) = dir_stream.next_entry().await.unwrap() {
names.push(dent.name);
}
// We don't guarantee order in the current implementation (it's a Vec push),
// but let's verify existence.
assert!(names.contains(&"a.txt".to_string()));
assert!(names.contains(&"b.dir".to_string()));
assert!(names.contains(&"c.txt".to_string()));
assert_eq!(names.len(), 3);
}
#[tokio::test]
async fn test_inode_id_uniqueness() {
let fs = setup_fs();
let root = fs.root_inode().await.unwrap();
let f1 = root
.create("f1", FileType::File, FilePermissions::empty())
.await
.unwrap();
let f2 = root
.create("f2", FileType::File, FilePermissions::empty())
.await
.unwrap();
assert_ne!(f1.id(), f2.id());
assert_ne!(f1.id(), root.id());
}
}