From 606485c398a962ef02cfb31a2d20458848f634f7 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 24 Oct 2025 02:45:10 +0000 Subject: [PATCH] Refactor: Organize job logs and flatten job database Co-authored-by: ijamespine --- RUNTIME_CLEANUP_SUMMARY.md | 117 +++++++++++++++++++++++++++ core/benchmarks/src/cli/commands.rs | 5 +- core/benchmarks/src/core_boot/mod.rs | 7 +- core/examples/indexing_demo.rs | 10 +-- core/src/config/app_config.rs | 9 +-- core/src/context.rs | 4 +- core/src/infra/job/database.rs | 11 +-- core/src/infra/job/manager.rs | 6 +- core/src/lib.rs | 18 ++--- core/src/library/manager.rs | 4 - core/src/library/mod.rs | 5 ++ 11 files changed, 149 insertions(+), 47 deletions(-) create mode 100644 RUNTIME_CLEANUP_SUMMARY.md diff --git a/RUNTIME_CLEANUP_SUMMARY.md b/RUNTIME_CLEANUP_SUMMARY.md new file mode 100644 index 000000000..650ffb288 --- /dev/null +++ b/RUNTIME_CLEANUP_SUMMARY.md @@ -0,0 +1,117 @@ +# Runtime Data Folder Cleanup Summary + +This document summarizes the changes made to clean up the Spacedrive runtime data folder structure for release. + +## Changes Made + +### 1. Jobs Database Flattened ✅ + +**Before:** +- Path: `library/jobs.db/jobs.db` (folder containing database file) +- Issue: Created unnecessary directory nesting + +**After:** +- Path: `library/jobs.db` (database file at library root) +- Fix: Modified `init_database()` in `core/src/infra/job/database.rs` to create parent directory only, not treat the path as a directory + +**Files Changed:** +- `core/src/infra/job/database.rs` - Fixed database initialization to use file path directly + +### 2. Job Logs Moved to Library Folder ✅ + +**Before:** +- Path: `data_dir/job_logs/` (global, shared across all libraries) +- Issue: Job logs should be library-scoped for isolation and portability + +**After:** +- Path: `library/logs/` (per-library) +- Each library has its own job logs directory + +**Files Changed:** +- `core/src/library/mod.rs` - Added `job_logs_dir()` method +- `core/src/infra/job/manager.rs` - Use library-specific logs directory +- `core/src/config/app_config.rs` - Removed global `job_logs_dir()` method +- `core/src/lib.rs` - Updated initialization to not create global job logs dir +- `core/src/context.rs` - Changed `set_job_logging()` to accept optional path +- `core/examples/indexing_demo.rs` - Updated to reflect per-library logs +- `core/benchmarks/src/core_boot/mod.rs` - Removed job_logs_dir from CoreBoot +- `core/benchmarks/src/cli/commands.rs` - Updated display message + +### 3. Removed Indexes Folder ✅ + +**Before:** +- Created `library/indexes/` directory on library initialization +- Not actually used by any code + +**After:** +- No longer created + +**Files Changed:** +- `core/src/library/manager.rs` - Removed `indexes` directory creation in two places + +### 4. Removed Thumbnails Folder Pre-creation ✅ + +**Before:** +- Created `library/thumbnails/` directory on library initialization +- Will transition to sidecars + +**After:** +- No longer pre-created +- Thumbnails will be stored as sidecars in `library/sidecars/` +- Directory will be created on-demand when thumbnails are generated + +**Files Changed:** +- `core/src/library/manager.rs` - Removed `thumbnails` directory pre-creation + +### 5. Documentation Created ✅ + +Created comprehensive documentation at `/workspace/docs/data-structure.md` covering: +- Complete data directory structure +- Application-level configuration +- Library-level organization +- Migration notes +- Code examples +- Best practices + +## New Library Structure + +``` +.sdlibrary/ +├── library.json # Library configuration and metadata +├── database.db # Library database (SQLite) +├── jobs.db # Job state and history (SQLite) [FIXED: flattened] +├── logs/ # Job logs for this library [NEW: moved from global] +├── previews/ # Preview files +├── exports/ # Exported data +└── sidecars/ # Virtual sidecar root for derivative data +``` + +## Directories Removed from Pre-creation + +- `indexes/` - Not used +- `thumbnails/` - Will be sidecars + +## Benefits + +1. **Cleaner Structure**: Flattened jobs.db eliminates unnecessary nesting +2. **Better Isolation**: Job logs are now per-library, improving portability +3. **Reduced Clutter**: Removed unused indexes directory +4. **Future-Ready**: Thumbnails will transition to sidecars +5. **Improved Portability**: Each library is more self-contained + +## Backward Compatibility + +Existing installations will continue to work, but new libraries will use the cleaned-up structure. Users may need to manually migrate old job logs if needed, but this is not critical as job logs are typically transient. + +## Testing Recommendations + +1. Create a new library and verify structure matches new layout +2. Run a job and verify logs appear in `library/logs/` +3. Verify jobs.db is created at `library/jobs.db` (not `library/jobs.db/jobs.db`) +4. Test library portability by moving a library folder to a different location + +## Status + +✅ All changes complete and code compiles successfully +✅ Documentation created +✅ Examples and benchmarks updated diff --git a/core/benchmarks/src/cli/commands.rs b/core/benchmarks/src/cli/commands.rs index 7dca6af0b..835c6e6b2 100644 --- a/core/benchmarks/src/cli/commands.rs +++ b/core/benchmarks/src/cli/commands.rs @@ -264,9 +264,8 @@ async fn run_scenario( println!("Booting isolated core for scenario '{}'...", scenario); let boot = bench::core_boot::boot_isolated_with_core(&scenario, None).await?; println!( - "Core boot complete. Data dir: {} | Job logs: {}", - boot.data_dir.display(), - boot.job_logs_dir.display() + "Core boot complete. Data dir: {} | Job logs: per-library", + boot.data_dir.display() ); // Parse recipe diff --git a/core/benchmarks/src/core_boot/mod.rs b/core/benchmarks/src/core_boot/mod.rs index caf6e29bc..3c8e62ca7 100644 --- a/core/benchmarks/src/core_boot/mod.rs +++ b/core/benchmarks/src/core_boot/mod.rs @@ -4,15 +4,13 @@ use std::sync::Arc; #[derive(Clone)] pub struct CoreBoot { pub data_dir: PathBuf, - pub job_logs_dir: PathBuf, pub core: Arc, } impl CoreBoot { - pub fn new(data_dir: PathBuf, job_logs_dir: PathBuf, core: Arc) -> Self { + pub fn new(data_dir: PathBuf, core: Arc) -> Self { Self { data_dir, - job_logs_dir, core, } } @@ -40,7 +38,6 @@ pub async fn boot_isolated_with_core( if bench_cfg.job_logging.max_file_size < 50 * 1024 * 1024 { bench_cfg.job_logging.max_file_size = 50 * 1024 * 1024; } - let job_logs_dir = bench_cfg.job_logs_dir(); bench_cfg .save() .map_err(|e| anyhow::anyhow!("save bench config: {}", e))?; @@ -49,5 +46,5 @@ pub async fn boot_isolated_with_core( .await .map_err(|e| anyhow::anyhow!("init core: {}", e))?; let core = Arc::new(core); - Ok(CoreBoot::new(bench_data_dir, job_logs_dir, core)) + Ok(CoreBoot::new(bench_data_dir, core)) } diff --git a/core/examples/indexing_demo.rs b/core/examples/indexing_demo.rs index 5bea9c487..591e917a9 100644 --- a/core/examples/indexing_demo.rs +++ b/core/examples/indexing_demo.rs @@ -49,18 +49,14 @@ async fn main() -> Result<(), Box> { include_debug: true, // Include debug logs for full detail }; - config.save()?; - println!( - " Job logging enabled to: {}", - config.job_logs_dir().display() - ); - } + config.save()?; + println!(" Job logging enabled (logs stored per-library)"); +} let core = Core::new(data_dir.clone()).await?; println!(" Core initialized with job logging"); println!(" Device ID: {}", core.device.device_id()?); println!(" Data directory: {:?}", data_dir); - println!(" Job logs directory: {:?}\n", data_dir.join("job_logs")); // 2. Get or create library println!("2. Setting up library..."); diff --git a/core/src/config/app_config.rs b/core/src/config/app_config.rs index ccbe9dae8..c46dcd91e 100644 --- a/core/src/config/app_config.rs +++ b/core/src/config/app_config.rs @@ -176,11 +176,6 @@ impl AppConfig { self.data_dir.join("libraries") } - /// Get the path for job logs directory - pub fn job_logs_dir(&self) -> PathBuf { - self.data_dir.join(&self.job_logging.log_directory) - } - /// Ensure all required directories exist /// /// Note: On iOS, create_dir_all() can fail with "Operation not permitted" due to sandboxing @@ -199,9 +194,7 @@ impl AppConfig { fs::create_dir_all(&self.data_dir)?; fs::create_dir_all(self.logs_dir())?; fs::create_dir_all(self.libraries_dir())?; - if self.job_logging.enabled { - fs::create_dir_all(self.job_logs_dir())?; - } + // Job logs are now stored per-library } Ok(()) diff --git a/core/src/context.rs b/core/src/context.rs index f7fd125a4..c39909ca5 100644 --- a/core/src/context.rs +++ b/core/src/context.rs @@ -72,9 +72,9 @@ impl CoreContext { } /// Set job logging configuration - pub fn set_job_logging(&mut self, config: JobLoggingConfig, logs_dir: PathBuf) { + pub fn set_job_logging(&mut self, config: JobLoggingConfig, logs_dir: Option) { self.job_logging_config = Some(config); - self.job_logs_dir = Some(logs_dir); + self.job_logs_dir = logs_dir; } /// Helper method for services to get the networking service diff --git a/core/src/infra/job/database.rs b/core/src/infra/job/database.rs index 3f0aa3886..fd3036c04 100644 --- a/core/src/infra/job/database.rs +++ b/core/src/infra/job/database.rs @@ -115,12 +115,13 @@ pub mod checkpoint { } /// Initialize job database -pub async fn init_database(path: &Path) -> JobResult { - // Ensure the directory exists - tokio::fs::create_dir_all(path).await?; +pub async fn init_database(db_file_path: &Path) -> JobResult { + // Ensure the parent directory exists + if let Some(parent) = db_file_path.parent() { + tokio::fs::create_dir_all(parent).await?; + } - let db_path = path.join("jobs.db"); - let db_url = format!("sqlite://{}?mode=rwc", db_path.display()); + let db_url = format!("sqlite://{}?mode=rwc", db_file_path.display()); let db = sea_orm::Database::connect(&db_url).await?; diff --git a/core/src/infra/job/manager.rs b/core/src/infra/job/manager.rs index b49af7f92..31cf1b8e2 100644 --- a/core/src/infra/job/manager.rs +++ b/core/src/infra/job/manager.rs @@ -52,7 +52,7 @@ impl JobManager { context: Arc, library_id: uuid::Uuid, ) -> JobResult { - // Initialize job database + // Initialize job database at library root let job_db_path = data_dir.join("jobs.db"); let db = database::init_database(&job_db_path).await?; @@ -277,7 +277,7 @@ impl JobManager { let executor = erased_job.create_executor( job_id, job_name.to_string(), - library, + library.clone(), self.db.clone(), status_tx.clone(), progress_tx, @@ -289,7 +289,7 @@ impl JobManager { networking, volume_manager, self.context.job_logging_config.clone(), - self.context.job_logs_dir.clone(), + Some(library.job_logs_dir()), Some(persistence_complete_tx), ); diff --git a/core/src/lib.rs b/core/src/lib.rs index 42156eca5..c5a5f1a34 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -140,16 +140,14 @@ impl Core { library_key_manager.clone(), ); - // Enable per-job file logging by default - let mut app_config = config.write().await; - if !app_config.job_logging.enabled { - app_config.job_logging.enabled = true; - } - // Ensure directory exists and apply to context - let logs_dir = app_config.job_logs_dir(); - let _ = std::fs::create_dir_all(&logs_dir); - context_inner.set_job_logging(app_config.job_logging.clone(), logs_dir); - drop(app_config); + // Enable per-job file logging by default + let mut app_config = config.write().await; + if !app_config.job_logging.enabled { + app_config.job_logging.enabled = true; + } + // Job logs are now stored per-library, not globally + context_inner.set_job_logging(app_config.job_logging.clone(), None); + drop(app_config); // Create the shared context let context = Arc::new(context_inner); diff --git a/core/src/library/manager.rs b/core/src/library/manager.rs index fe095e871..983178983 100644 --- a/core/src/library/manager.rs +++ b/core/src/library/manager.rs @@ -671,9 +671,7 @@ impl LibraryManager { context: Arc, ) -> Result<()> { // Create subdirectories - tokio::fs::create_dir_all(path.join("thumbnails")).await?; tokio::fs::create_dir_all(path.join("previews")).await?; - tokio::fs::create_dir_all(path.join("indexes")).await?; tokio::fs::create_dir_all(path.join("exports")).await?; // Virtual Sidecar root (for derivative data linked by Entry/Content IDs) tokio::fs::create_dir_all(path.join("sidecars")).await?; @@ -730,9 +728,7 @@ impl LibraryManager { context: Arc, ) -> Result<()> { // Create subdirectories - tokio::fs::create_dir_all(path.join("thumbnails")).await?; tokio::fs::create_dir_all(path.join("previews")).await?; - tokio::fs::create_dir_all(path.join("indexes")).await?; tokio::fs::create_dir_all(path.join("exports")).await?; tokio::fs::create_dir_all(path.join("sidecars")).await?; diff --git a/core/src/library/mod.rs b/core/src/library/mod.rs index fc589607d..cba3923c4 100644 --- a/core/src/library/mod.rs +++ b/core/src/library/mod.rs @@ -303,6 +303,11 @@ impl Library { self.path.join("thumbnails") } + /// Get the job logs directory for this library + pub fn job_logs_dir(&self) -> PathBuf { + self.path.join("logs") + } + /// Get the path for a specific thumbnail with size pub fn thumbnail_path(&self, cas_id: &str, size: u32) -> PathBuf { if cas_id.len() < 4 {