Merge pull request #2901 from spacedriveapp/ephemeral-cache

Introduce ephemeral index cache
This commit is contained in:
Jamie Pine
2025-12-08 17:38:20 -08:00
committed by GitHub
99 changed files with 11880 additions and 7942 deletions

View File

@@ -38,6 +38,7 @@ lütke
marietti
mbps
mehrzad
memmap
Mjpeg
Mmap
mpscrr
@@ -77,6 +78,7 @@ tobiaslutke
tokio
tombstoned
typecheck
Uninit
unwatch
uuid
vdfs

BIN
Cargo.lock generated
View File

Binary file not shown.

View File

@@ -4,6 +4,7 @@ use uuid::Uuid;
use sd_core::{
domain::addressing::SdPath,
ops::core::ephemeral_status::EphemeralCacheStatusInput,
ops::indexing::{
input::IndexInput,
job::{IndexMode, IndexPersistence, IndexScope},
@@ -169,3 +170,19 @@ impl IndexVerifyArgs {
}
}
}
/// Arguments for ephemeral cache status
#[derive(Args, Debug, Clone)]
pub struct EphemeralCacheArgs {
/// Filter by path substring
#[arg(long)]
pub filter: Option<String>,
}
impl EphemeralCacheArgs {
pub fn to_input(&self) -> EphemeralCacheStatusInput {
EphemeralCacheStatusInput {
path_filter: self.filter.clone(),
}
}
}

View File

@@ -2,6 +2,7 @@ pub mod args;
use anyhow::Result;
use clap::Subcommand;
use comfy_table::{presets::UTF8_BORDERS_ONLY, Attribute, Cell, Table};
use crate::util::prelude::*;
@@ -20,6 +21,8 @@ pub enum IndexCmd {
Browse(BrowseArgs),
/// Verify index integrity for a path
Verify(IndexVerifyArgs),
/// Show ephemeral index cache status
EphemeralCache(EphemeralCacheArgs),
}
pub async fn run(ctx: &Context, cmd: IndexCmd) -> Result<()> {
@@ -232,6 +235,118 @@ pub async fn run(ctx: &Context, cmd: IndexCmd) -> Result<()> {
}
);
}
IndexCmd::EphemeralCache(args) => {
let input = args.to_input();
let out: sd_core::ops::core::ephemeral_status::EphemeralCacheStatus =
execute_core_query!(ctx, input);
print_output!(
ctx,
&out,
|status: &sd_core::ops::core::ephemeral_status::EphemeralCacheStatus| {
println!();
println!("╔══════════════════════════════════════════════════════════════╗");
println!("║ UNIFIED EPHEMERAL INDEX CACHE ║");
println!("╠══════════════════════════════════════════════════════════════╣");
println!(
"║ Indexed Paths: {:3} In Progress: {:3}",
status.indexed_paths_count, status.indexing_in_progress_count
);
println!("╚══════════════════════════════════════════════════════════════╝");
// Show unified index stats
let stats = &status.index_stats;
println!();
let mut stats_table = Table::new();
stats_table.load_preset(UTF8_BORDERS_ONLY);
stats_table.set_header(vec![
Cell::new("SHARED INDEX STATS").add_attribute(Attribute::Bold),
Cell::new(""),
]);
stats_table.add_row(vec![
"Total entries (shared arena)",
&stats.total_entries.to_string(),
]);
stats_table.add_row(vec![
"Path index count",
&stats.path_index_count.to_string(),
]);
stats_table.add_row(vec![
"Unique names (shared)",
&stats.unique_names.to_string(),
]);
stats_table.add_row(vec![
"Interned strings (shared)",
&stats.interned_strings.to_string(),
]);
stats_table.add_row(vec!["Content kinds", &stats.content_kinds.to_string()]);
stats_table.add_row(vec![
"Memory usage",
&format_bytes(stats.memory_bytes as u64),
]);
stats_table.add_row(vec!["Cache age", &format!("{:.1}s", stats.age_seconds)]);
stats_table.add_row(vec!["Idle time", &format!("{:.1}s", stats.idle_seconds)]);
println!("{}", stats_table);
// Show indexed paths
if status.indexed_paths.is_empty() && status.paths_in_progress.is_empty() {
println!("\n No paths indexed yet.");
} else {
// Paths in progress
if !status.paths_in_progress.is_empty() {
println!();
let mut progress_table = Table::new();
progress_table.load_preset(UTF8_BORDERS_ONLY);
progress_table
.set_header(vec![Cell::new("INDEXING IN PROGRESS")
.add_attribute(Attribute::Bold)]);
for path in &status.paths_in_progress {
progress_table.add_row(vec![format!("{}", path.display())]);
}
println!("{}", progress_table);
}
// Indexed paths
if !status.indexed_paths.is_empty() {
println!();
let mut paths_table = Table::new();
paths_table.load_preset(UTF8_BORDERS_ONLY);
paths_table.set_header(vec![
Cell::new("INDEXED PATHS").add_attribute(Attribute::Bold),
Cell::new("Children"),
]);
for info in &status.indexed_paths {
paths_table.add_row(vec![
format!("{}", info.path.display()),
info.child_count.to_string(),
]);
}
println!("{}", paths_table);
}
}
println!();
}
);
}
}
Ok(())
}
fn format_bytes(bytes: u64) -> String {
const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
let mut size = bytes as f64;
let mut unit_index = 0;
while size >= 1024.0 && unit_index < UNITS.len() - 1 {
size /= 1024.0;
unit_index += 1;
}
if unit_index == 0 {
format!("{} {}", bytes, UNITS[unit_index])
} else {
format!("{:.1} {}", size, UNITS[unit_index])
}
}

View File

@@ -268,4 +268,4 @@ async fn start_daemon(data_dir: &PathBuf) -> Result<()> {
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
Ok(())
}
}

View File

@@ -2,5 +2,8 @@
"name": "sd-mobile-core",
"version": "1.0.0",
"main": "./src/index.ts",
"types": "./src/index.ts"
"types": "./src/index.ts",
"peerDependencies": {
"expo-modules-core": "*"
}
}

View File

@@ -1,13 +1,7 @@
// @ts-ignore - Expo modules types may not be available in all environments
const { EventEmitter, NativeModulesProxy } = require("expo-modules-core");
// TODO: Test if we can rely on Expo's autolinking instead of manually requiring the module
import { requireNativeModule, EventEmitter } from "expo-modules-core";
const SDMobileCoreModule = NativeModulesProxy?.SDMobileCore;
if (!SDMobileCoreModule) {
throw new Error(
"SDMobileCore native module not found. Did you run 'cargo xtask build-mobile' and rebuild the app?",
);
}
const SDMobileCoreModule = requireNativeModule("SDMobileCore");
const emitter = new EventEmitter(SDMobileCoreModule);

View File

@@ -800,17 +800,25 @@ async fn stop_daemon_process(
async fn check_daemon_installed() -> Result<bool, String> {
#[cfg(target_os = "macos")]
{
let home = std::env::var("HOME").map_err(|_| "Could not determine home directory".to_string())?;
let plist_path = std::path::PathBuf::from(home).join("Library/LaunchAgents/com.spacedrive.daemon.plist");
let home =
std::env::var("HOME").map_err(|_| "Could not determine home directory".to_string())?;
let plist_path =
std::path::PathBuf::from(home).join("Library/LaunchAgents/com.spacedrive.daemon.plist");
let exists = plist_path.exists();
tracing::info!("Checking daemon installation at {}: {}", plist_path.display(), exists);
tracing::info!(
"Checking daemon installation at {}: {}",
plist_path.display(),
exists
);
Ok(exists)
}
#[cfg(target_os = "linux")]
{
let home = std::env::var("HOME").map_err(|_| "Could not determine home directory".to_string())?;
let service_path = std::path::PathBuf::from(home).join(".config/systemd/user/spacedrive-daemon.service");
let home =
std::env::var("HOME").map_err(|_| "Could not determine home directory".to_string())?;
let service_path =
std::path::PathBuf::from(home).join(".config/systemd/user/spacedrive-daemon.service");
Ok(service_path.exists())
}
@@ -865,7 +873,8 @@ async fn install_daemon_service(
{
use std::io::Write;
let home = std::env::var("HOME").map_err(|_| "Could not determine home directory".to_string())?;
let home =
std::env::var("HOME").map_err(|_| "Could not determine home directory".to_string())?;
let launch_agents_dir = std::path::PathBuf::from(&home).join("Library/LaunchAgents");
std::fs::create_dir_all(&launch_agents_dir)
@@ -881,7 +890,10 @@ async fn install_daemon_service(
.join("sd-daemon");
if !daemon_path.exists() {
return Err(format!("Daemon binary not found at {}", daemon_path.display()));
return Err(format!(
"Daemon binary not found at {}",
daemon_path.display()
));
}
let log_dir = data_dir.join("logs");
@@ -938,7 +950,10 @@ async fn install_daemon_service(
.output()
.map_err(|e| format!("Failed to load service: {}", e))?;
tracing::info!("launchctl load output: {:?}", String::from_utf8_lossy(&output.stdout));
tracing::info!(
"launchctl load output: {:?}",
String::from_utf8_lossy(&output.stdout)
);
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
tracing::error!("launchctl load failed: {:?}", stderr);
@@ -980,7 +995,8 @@ async fn install_daemon_service(
{
use std::io::Write;
let home = std::env::var("HOME").map_err(|_| "Could not determine home directory".to_string())?;
let home =
std::env::var("HOME").map_err(|_| "Could not determine home directory".to_string())?;
let systemd_dir = std::path::PathBuf::from(&home).join(".config/systemd/user");
std::fs::create_dir_all(&systemd_dir)
@@ -995,7 +1011,10 @@ async fn install_daemon_service(
.join("sd-daemon");
if !daemon_path.exists() {
return Err(format!("Daemon binary not found at {}", daemon_path.display()));
return Err(format!(
"Daemon binary not found at {}",
daemon_path.display()
));
}
let service_content = format!(
@@ -1112,7 +1131,10 @@ WantedBy=default.target
.join("sd-daemon.exe");
if !daemon_path.exists() {
return Err(format!("Daemon binary not found at {}", daemon_path.display()));
return Err(format!(
"Daemon binary not found at {}",
daemon_path.display()
));
}
// Delete existing task if it exists
@@ -1248,8 +1270,10 @@ WantedBy=default.target
async fn uninstall_daemon_service() -> Result<(), String> {
#[cfg(target_os = "macos")]
{
let home = std::env::var("HOME").map_err(|_| "Could not determine home directory".to_string())?;
let plist_path = std::path::PathBuf::from(&home).join("Library/LaunchAgents/com.spacedrive.daemon.plist");
let home =
std::env::var("HOME").map_err(|_| "Could not determine home directory".to_string())?;
let plist_path = std::path::PathBuf::from(&home)
.join("Library/LaunchAgents/com.spacedrive.daemon.plist");
if plist_path.exists() {
// Unload the service
@@ -1266,8 +1290,10 @@ async fn uninstall_daemon_service() -> Result<(), String> {
#[cfg(target_os = "linux")]
{
let home = std::env::var("HOME").map_err(|_| "Could not determine home directory".to_string())?;
let service_path = std::path::PathBuf::from(&home).join(".config/systemd/user/spacedrive-daemon.service");
let home =
std::env::var("HOME").map_err(|_| "Could not determine home directory".to_string())?;
let service_path =
std::path::PathBuf::from(&home).join(".config/systemd/user/spacedrive-daemon.service");
if service_path.exists() {
// Stop and disable the service

View File

@@ -19,9 +19,10 @@ cli = []
[dependencies]
# Async runtime
async-trait = "0.1"
futures = "0.3"
tokio = { version = "1.40", features = ["full"] }
async-channel = { workspace = true }
async-trait = "0.1"
futures = "0.3"
tokio = { version = "1.40", features = ["full"] }
# Database
sea-orm = { version = "1.1", features = [
@@ -166,6 +167,12 @@ once_cell = "1.20"
rand = "0.8" # Random number generation for secure delete
tempfile = "3.14" # Temporary directories for testing
uuid = { version = "1.11", features = ["serde", "v4", "v5", "v7"] }
# High-performance ephemeral index
memmap2 = "0.9" # Memory-mapped file support for arena storage
smallvec = "1.13" # Small vector optimization for children arrays
parking_lot = "0.12" # Fast mutex for name cache
num_cpus = "1.16" # CPU count for parallel walker
whoami = "1.5"
# Secure storage

View File

@@ -3,9 +3,9 @@
use crate::{
config::JobLoggingConfig, crypto::key_manager::KeyManager, device::DeviceManager,
infra::action::manager::ActionManager, infra::event::EventBus, infra::sync::TransactionManager,
library::LibraryManager, service::network::NetworkingService,
service::session::SessionStateService, service::sidecar_manager::SidecarManager,
volume::VolumeManager,
library::LibraryManager, ops::indexing::ephemeral::EphemeralIndexCache,
service::network::NetworkingService, service::session::SessionStateService,
service::sidecar_manager::SidecarManager, volume::VolumeManager,
};
use std::{path::PathBuf, sync::Arc};
use tokio::sync::{Mutex, RwLock};
@@ -22,6 +22,8 @@ pub struct CoreContext {
pub action_manager: Arc<RwLock<Option<Arc<ActionManager>>>>,
pub networking: Arc<RwLock<Option<Arc<NetworkingService>>>>,
pub plugin_manager: Arc<RwLock<Option<Arc<RwLock<crate::infra::extension::PluginManager>>>>>,
// Ephemeral index cache for unmanaged paths
pub ephemeral_index_cache: Arc<EphemeralIndexCache>,
// Job logging configuration
pub job_logging_config: Option<JobLoggingConfig>,
pub job_logs_dir: Option<PathBuf>,
@@ -47,11 +49,19 @@ impl CoreContext {
action_manager: Arc::new(RwLock::new(None)),
networking: Arc::new(RwLock::new(None)),
plugin_manager: Arc::new(RwLock::new(None)),
ephemeral_index_cache: Arc::new(
EphemeralIndexCache::new().expect("Failed to create ephemeral index cache"),
),
job_logging_config: None,
job_logs_dir: None,
}
}
/// Get the ephemeral index cache
pub fn ephemeral_cache(&self) -> &Arc<EphemeralIndexCache> {
&self.ephemeral_index_cache
}
/// Get the library manager
pub async fn libraries(&self) -> Arc<LibraryManager> {
self.library_manager.read().await.clone().unwrap()

View File

@@ -138,7 +138,8 @@ impl CloudCredentialManager {
// Decrypt
let library_key = self.key_manager.get_library_key(library_id).await?;
let decrypted = self.decrypt_credential(&credential_model.encrypted_credential, &library_key)?;
let decrypted =
self.decrypt_credential(&credential_model.encrypted_credential, &library_key)?;
// Deserialize
let credential: CloudCredential = serde_json::from_slice(&decrypted)?;

View File

@@ -78,8 +78,8 @@ pub struct File {
pub accessed_at: Option<DateTime<Utc>>,
/// Additional computed fields
pub content_kind: ContentKind, // This is redundant with ContentIdentity, it lives inside
pub is_local: bool, // this is also redundant with SdPath
pub content_kind: ContentKind, // Populated by the ephemeral indexer, for when a File does not have a ContentIdentity
pub is_local: bool, // this is redundant with SdPath
/// Video duration (for grid display optimization)
pub duration_seconds: Option<f64>,
@@ -425,7 +425,7 @@ impl File {
/// This is used for ephemeral indexing where files are discovered but not persisted to the database.
pub fn from_ephemeral(
id: Uuid,
metadata: &crate::ops::indexing::entry::EntryMetadata,
metadata: &crate::ops::indexing::database_storage::EntryMetadata,
sd_path: SdPath,
) -> Self {
let is_local = sd_path.is_local();

View File

@@ -145,6 +145,35 @@ impl FileTypeRegistry {
.collect()
}
/// Fast identification by extension only (no file I/O)
///
/// This is useful for quick file type detection during indexing where
/// we don't need high-confidence identification. Returns the content kind
/// based purely on extension matching.
///
/// Returns `ContentKind::Unknown` if the extension is not recognized.
pub fn identify_by_extension(&self, path: &Path) -> ContentKind {
let extension = match path.extension().and_then(|s| s.to_str()) {
Some(ext) => ext,
None => return ContentKind::Unknown,
};
let candidates = self.get_by_extension(extension);
match candidates.len() {
0 => ContentKind::Unknown,
1 => candidates[0].category,
_ => {
// Multiple matches - pick highest priority
candidates
.iter()
.max_by_key(|ft| ft.priority)
.map(|ft| ft.category)
.unwrap_or(ContentKind::Unknown)
}
}
}
/// Identify a file type from a path
pub async fn identify(&self, path: &Path) -> Result<IdentificationResult> {
// Get extension

View File

@@ -91,9 +91,7 @@ impl ActionManager {
.await?;
// Validate the action first
let validation_result = action
.validate(&library, self.context.clone())
.await?;
let validation_result = action.validate(&library, self.context.clone()).await?;
// Check if confirmation is required
match validation_result {

View File

@@ -58,14 +58,18 @@ pub trait CoreAction: Send + Sync + 'static {
fn validate(
&self,
_context: std::sync::Arc<crate::context::CoreContext>,
) -> impl std::future::Future<Output = Result<ValidationResult, crate::infra::action::error::ActionError>> + Send
{
) -> impl std::future::Future<
Output = Result<ValidationResult, crate::infra::action::error::ActionError>,
> + Send {
async { Ok(ValidationResult::Success) }
}
/// Resolve a user confirmation choice (optional)
/// Called when the action previously returned RequiresConfirmation
fn resolve_confirmation(&mut self, _choice_index: usize) -> Result<(), crate::infra::action::error::ActionError> {
fn resolve_confirmation(
&mut self,
_choice_index: usize,
) -> Result<(), crate::infra::action::error::ActionError> {
Ok(())
}
@@ -102,14 +106,18 @@ pub trait LibraryAction: Send + Sync + 'static {
&self,
_library: &std::sync::Arc<crate::library::Library>,
_context: std::sync::Arc<crate::context::CoreContext>,
) -> impl std::future::Future<Output = Result<ValidationResult, crate::infra::action::error::ActionError>> + Send
{
) -> impl std::future::Future<
Output = Result<ValidationResult, crate::infra::action::error::ActionError>,
> + Send {
async { Ok(ValidationResult::Success) }
}
/// Resolve a user confirmation choice (optional)
/// Called when the action previously returned RequiresConfirmation
fn resolve_confirmation(&mut self, _choice_index: usize) -> Result<(), crate::infra::action::error::ActionError> {
fn resolve_confirmation(
&mut self,
_choice_index: usize,
) -> Result<(), crate::infra::action::error::ActionError> {
Ok(())
}

View File

@@ -336,7 +336,7 @@ impl crate::infra::sync::Syncable for Model {
// Use delete_subtree_internal to cascade delete entire subtree
// This avoids creating tombstones (we're applying a tombstone)
crate::ops::indexing::responder::delete_subtree_internal(entry.id, db).await?;
crate::ops::indexing::DatabaseStorage::delete_subtree(entry.id, db).await?;
Ok(())
}

View File

@@ -330,7 +330,7 @@ impl Syncable for Model {
// Delete root entry tree first if it exists
// Use delete_subtree_internal to avoid creating tombstones (we're applying a tombstone)
if let Some(entry_id) = location.entry_id {
crate::ops::indexing::responder::delete_subtree_internal(entry_id, db).await?;
crate::ops::indexing::DatabaseStorage::delete_subtree(entry_id, db).await?;
}
// Delete location record

View File

@@ -94,16 +94,13 @@ impl BatchAggregator {
}
/// Add records to the batch
pub async fn add_records(
&self,
model_type: String,
count: u64,
peer_id: Option<Uuid>,
) {
pub async fn add_records(&self, model_type: String, count: u64, peer_id: Option<Uuid>) {
let key = BatchKey { peer_id };
let mut batches = self.pending_batches.write().await;
let batch = batches.entry(key.clone()).or_insert_with(|| PendingBatch::new(peer_id));
let batch = batches
.entry(key.clone())
.or_insert_with(|| PendingBatch::new(peer_id));
batch.add(model_type, count);
@@ -195,7 +192,8 @@ impl BatchAggregator {
let keys_to_flush: Vec<BatchKey> = batches
.iter()
.filter(|(_, batch)| {
now.signed_duration_since(batch.started_at) >= chrono::Duration::from_std(self.config.flush_interval).unwrap()
now.signed_duration_since(batch.started_at)
>= chrono::Duration::from_std(self.config.flush_interval).unwrap()
})
.map(|(k, _)| k.clone())
.collect();

View File

@@ -25,11 +25,7 @@ pub struct SyncEventLogger {
impl SyncEventLogger {
/// Create a new event logger
pub fn new(
library_id: Uuid,
device_id: Uuid,
conn: Arc<DatabaseConnection>,
) -> Self {
pub fn new(library_id: Uuid, device_id: Uuid, conn: Arc<DatabaseConnection>) -> Self {
Self {
library_id,
device_id,
@@ -55,10 +51,7 @@ impl SyncEventLogger {
.map(|d| serde_json::to_string(d))
.transpose()?;
let model_types_str = event
.model_types
.as_ref()
.map(|types| types.join(","));
let model_types_str = event.model_types.as_ref().map(|types| types.join(","));
self.conn
.execute(Statement::from_sql_and_values(
@@ -161,8 +154,7 @@ impl SyncEventLogger {
where_clause, limit, offset
);
let param_values: Vec<sea_orm::Value> =
params.into_iter().map(|p| p.into()).collect();
let param_values: Vec<sea_orm::Value> = params.into_iter().map(|p| p.into()).collect();
let stmt = Statement::from_sql_and_values(DbBackend::Sqlite, &sql, param_values);
@@ -194,8 +186,7 @@ impl SyncEventLogger {
Ok(SyncEventLog {
id: Some(id),
timestamp: DateTime::parse_from_rfc3339(&timestamp_str)?
.with_timezone(&Utc),
timestamp: DateTime::parse_from_rfc3339(&timestamp_str)?.with_timezone(&Utc),
device_id: Uuid::parse_str(&device_id_str)?,
event_type: SyncEventType::from_str(&event_type_str)
.ok_or_else(|| anyhow::anyhow!("Invalid event type: {}", event_type_str))?,
@@ -213,8 +204,7 @@ impl SyncEventLogger {
peer_device_id: peer_device_id_str
.as_ref()
.and_then(|s| Uuid::parse_str(s).ok()),
model_types: model_types_str
.map(|s| s.split(',').map(|t| t.to_string()).collect()),
model_types: model_types_str.map(|s| s.split(',').map(|t| t.to_string()).collect()),
record_count: record_count.map(|c| c as u64),
duration_ms: duration_ms.map(|d| d as u64),
})

View File

@@ -182,8 +182,7 @@ impl QueryBuilder {
}
pub fn add_model_type_filter(&mut self, model_type: &str) {
self.where_clauses
.push("model_types LIKE ?".to_string());
self.where_clauses.push("model_types LIKE ?".to_string());
self.params.push(format!("%{}%", model_type));
}

View File

@@ -33,11 +33,7 @@ pub struct SyncEventLog {
impl SyncEventLog {
/// Create a new event with common fields pre-filled
pub fn new(
device_id: Uuid,
event_type: SyncEventType,
summary: impl Into<String>,
) -> Self {
pub fn new(device_id: Uuid, event_type: SyncEventType, summary: impl Into<String>) -> Self {
let (category, severity) = event_type.default_category_and_severity();
Self {

View File

@@ -110,7 +110,11 @@ impl Core {
)?);
// Initialize device manager
let device = Arc::new(DeviceManager::init(&data_dir, key_manager.clone(), system_device_name)?);
let device = Arc::new(DeviceManager::init(
&data_dir,
key_manager.clone(),
system_device_name,
)?);
// Set a global device ID and slug for convenience
crate::device::set_current_device_id(device.device_id()?);

View File

@@ -1245,7 +1245,10 @@ impl LibraryManager {
.await
{
Ok((location_id, _)) => {
info!("Created default location '{}' at {:?} ({})", name, path, location_id);
info!(
"Created default location '{}' at {:?} ({})",
name, path, location_id
);
}
Err(e) => {
warn!("Failed to create default location '{}': {}", name, e);

View File

@@ -501,7 +501,7 @@ impl LocationManager {
// Delete the root entry tree first if it exists
// Use delete_subtree_internal to avoid creating entry tombstones (we'll tombstone the location instead)
if let Some(entry_id) = location.entry_id {
crate::ops::indexing::responder::delete_subtree_internal(entry_id, library.db().conn())
crate::ops::indexing::DatabaseStorage::delete_subtree(entry_id, library.db().conn())
.await
.map_err(|e| LocationError::Other(format!("Failed to delete entry tree: {}", e)))?;
}

View File

@@ -0,0 +1,9 @@
//! Ephemeral index cache status query
//!
//! Provides debugging information about the ephemeral index cache.
pub mod output;
pub mod query;
pub use output::*;
pub use query::*;

View File

@@ -0,0 +1,98 @@
//! Ephemeral index cache status output types
use serde::{Deserialize, Serialize};
use specta::Type;
use std::path::PathBuf;
/// Status of the unified ephemeral index cache
#[derive(Debug, Clone, Serialize, Deserialize, Type)]
pub struct EphemeralCacheStatus {
/// Number of paths that have been indexed
pub indexed_paths_count: usize,
/// Number of paths currently being indexed
pub indexing_in_progress_count: usize,
/// Unified index statistics (shared arena and string interning)
pub index_stats: UnifiedIndexStats,
/// List of indexed paths (directories whose contents are ready)
pub indexed_paths: Vec<IndexedPathInfo>,
/// List of paths currently being indexed
pub paths_in_progress: Vec<PathBuf>,
// Legacy fields for backward compatibility
#[serde(skip_serializing_if = "Option::is_none")]
pub total_indexes: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub indexing_in_progress: Option<usize>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
pub indexes: Vec<EphemeralIndexInfo>,
}
/// Statistics for the unified ephemeral index
#[derive(Debug, Clone, Serialize, Deserialize, Type)]
pub struct UnifiedIndexStats {
/// Total entries in the shared arena
pub total_entries: usize,
/// Number of entries indexed by path
pub path_index_count: usize,
/// Number of unique interned names (shared across all paths)
pub unique_names: usize,
/// Number of interned strings in shared cache
pub interned_strings: usize,
/// Number of content kinds stored
pub content_kinds: usize,
/// Estimated memory usage in bytes
pub memory_bytes: usize,
/// Age of the cache in seconds
pub age_seconds: f64,
/// Seconds since last access
pub idle_seconds: f64,
}
/// Information about an indexed path
#[derive(Debug, Clone, Serialize, Deserialize, Type)]
pub struct IndexedPathInfo {
/// The directory path that was indexed
pub path: PathBuf,
/// Number of direct children in this directory
pub child_count: usize,
}
/// Legacy: Information about a single ephemeral index (for backward compatibility)
#[derive(Debug, Clone, Serialize, Deserialize, Type)]
pub struct EphemeralIndexInfo {
/// Root path this index covers
pub root_path: PathBuf,
/// Whether indexing is currently in progress
pub indexing_in_progress: bool,
/// Total entries in the arena
pub total_entries: usize,
/// Number of entries indexed by path
pub path_index_count: usize,
/// Number of unique interned names
pub unique_names: usize,
/// Number of interned strings in cache
pub interned_strings: usize,
/// Number of content kinds stored
pub content_kinds: usize,
/// Estimated memory usage in bytes
pub memory_bytes: usize,
/// Age of the index in seconds
pub age_seconds: f64,
/// Seconds since last access
pub idle_seconds: f64,
/// Indexer job statistics (files/dirs/bytes counted)
pub job_stats: JobStats,
}
/// Statistics from the indexer job
#[derive(Debug, Clone, Serialize, Deserialize, Type)]
pub struct JobStats {
/// Number of files indexed
pub files: u64,
/// Number of directories indexed
pub dirs: u64,
/// Number of symlinks indexed
pub symlinks: u64,
/// Total bytes indexed
pub bytes: u64,
}

View File

@@ -0,0 +1,107 @@
//! Ephemeral index cache status query
//!
//! Provides a snapshot of the unified ephemeral index for debugging.
use super::output::*;
use crate::{
context::CoreContext,
infra::query::{CoreQuery, QueryResult},
};
use serde::{Deserialize, Serialize};
use specta::Type;
use std::sync::Arc;
/// Input for the ephemeral cache status query
#[derive(Debug, Clone, Serialize, Deserialize, Type, Default)]
pub struct EphemeralCacheStatusInput {
/// Optional: only include indexed paths containing this substring
#[serde(default)]
pub path_filter: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Type)]
pub struct EphemeralCacheStatusQuery {
input: EphemeralCacheStatusInput,
}
impl CoreQuery for EphemeralCacheStatusQuery {
type Input = EphemeralCacheStatusInput;
type Output = EphemeralCacheStatus;
fn from_input(input: Self::Input) -> QueryResult<Self> {
Ok(Self { input })
}
async fn execute(
self,
context: Arc<CoreContext>,
_session: crate::infra::api::SessionContext,
) -> QueryResult<Self::Output> {
let cache = context.ephemeral_cache();
// Get cache stats
let cache_stats = cache.stats();
let all_indexed_paths = cache.indexed_paths();
let paths_in_progress = cache.paths_in_progress();
// Get the global index for detailed stats
let global_index = cache.get_global_index();
let index = global_index.read().await;
let stats = index.get_stats();
// Build unified index stats
let index_stats = UnifiedIndexStats {
total_entries: stats.total_entries,
path_index_count: index.path_index_count(),
unique_names: stats.unique_names,
interned_strings: stats.interned_strings,
content_kinds: index.content_kinds_count(),
memory_bytes: stats.memory_bytes,
age_seconds: cache.age().as_secs_f64(),
idle_seconds: index.idle_time().as_secs_f64(),
};
// Build indexed paths info with child counts
let mut indexed_paths = Vec::new();
for path in all_indexed_paths {
// Apply path filter if provided
if let Some(ref filter) = self.input.path_filter {
if !path.to_string_lossy().contains(filter) {
continue;
}
}
// Get child count for this directory
let child_count = index.list_directory(&path).map(|c| c.len()).unwrap_or(0);
indexed_paths.push(IndexedPathInfo { path, child_count });
}
// Sort by path for consistent output
indexed_paths.sort_by(|a, b| a.path.cmp(&b.path));
// Filter paths in progress
let filtered_in_progress: Vec<_> = if let Some(ref filter) = self.input.path_filter {
paths_in_progress
.into_iter()
.filter(|p| p.to_string_lossy().contains(filter))
.collect()
} else {
paths_in_progress
};
Ok(EphemeralCacheStatus {
indexed_paths_count: cache_stats.indexed_paths,
indexing_in_progress_count: cache_stats.indexing_in_progress,
index_stats,
indexed_paths,
paths_in_progress: filtered_in_progress,
// Legacy fields
total_indexes: None,
indexing_in_progress: None,
indexes: Vec::new(),
})
}
}
crate::register_core_query!(EphemeralCacheStatusQuery, "core.ephemeral_status");

View File

@@ -1,2 +1,3 @@
pub mod ephemeral_status;
pub mod events;
pub mod status;

View File

@@ -140,7 +140,9 @@ impl LibraryQuery for DirectoryListingQuery {
if let Some(should_use_ephemeral) = self.check_location_index_mode(db.conn()).await {
if should_use_ephemeral {
tracing::info!("Location has IndexMode::None, using ephemeral indexing");
return self.query_ephemeral_directory_impl(context, library_id).await;
return self
.query_ephemeral_directory_impl(context, library_id)
.await;
}
}
@@ -615,39 +617,173 @@ impl DirectoryListingQuery {
})
}
/// Query ephemeral directory (not indexed) - trigger on-demand indexing
/// Query ephemeral directory (not indexed) - check cache first, then trigger on-demand indexing
async fn query_ephemeral_directory_impl(
&self,
context: Arc<CoreContext>,
library_id: Uuid,
) -> QueryResult<DirectoryListingOutput> {
use crate::ops::indexing::{IndexMode, IndexScope, IndexerJob, IndexerJobConfig};
use crate::domain::file::File;
use crate::ops::indexing::{IndexScope, IndexerJob, IndexerJobConfig};
// Get the local path for cache lookup
let local_path = match &self.input.path {
SdPath::Physical { path, .. } => path.clone(),
_ => {
tracing::warn!(
"Ephemeral indexing only supported for physical paths: {:?}",
self.input.path
);
return Ok(DirectoryListingOutput {
files: Vec::new(),
total_count: 0,
has_more: false,
});
}
};
let cache = context.ephemeral_cache();
// Check if we have a cached index that covers this path
if let Some(index) = cache.get_for_path(&local_path) {
tracing::info!(
"Found cached ephemeral index for path: {}",
local_path.display()
);
// Try to get directory listing from cached index
let index_guard = index.read().await;
// Check if the index actually has entries for this directory
if let Some(children) = index_guard.list_directory(&local_path) {
tracing::debug!(
"Cached index has {} children for {}",
children.len(),
local_path.display()
);
// Convert cached entries to File objects
let mut files = Vec::new();
for child_path in children {
if let Some(metadata) = index_guard.get_entry_ref(&child_path) {
// Apply hidden file filter
if !self.input.include_hidden.unwrap_or(false) && metadata.is_hidden {
continue;
}
// Get UUID from index
let entry_uuid = index_guard
.get_entry_uuid(&child_path)
.unwrap_or_else(Uuid::new_v4);
// Build SdPath for this entry
let entry_sd_path = SdPath::Physical {
device_slug: match &self.input.path {
SdPath::Physical { device_slug, .. } => device_slug.clone(),
_ => String::new(),
},
path: child_path.clone(),
};
// Get content kind from index (identified by extension)
let content_kind = index_guard.get_content_kind(&child_path);
// Convert to File
let mut file = File::from_ephemeral(entry_uuid, &metadata, entry_sd_path);
file.content_kind = content_kind;
files.push(file);
}
}
// Apply sorting
self.sort_files(&mut files);
// Apply limit
let total_count = files.len() as u32;
let has_more = if let Some(limit) = self.input.limit {
if files.len() > limit as usize {
files.truncate(limit as usize);
true
} else {
false
}
} else {
false
};
return Ok(DirectoryListingOutput {
files,
total_count,
has_more,
});
}
// Index exists but doesn't have this directory yet
// Fall through to spawn indexer job
tracing::debug!(
"Cached index doesn't contain directory: {}",
local_path.display()
);
}
// No cached index or index doesn't cover this path
// Check if indexing is already in progress
if cache.is_indexing(&local_path) {
tracing::info!("Indexing already in progress for: {}", local_path.display());
// Return empty, UI will get updates via events
return Ok(DirectoryListingOutput {
files: Vec::new(),
total_count: 0,
has_more: false,
});
}
tracing::info!(
"Path not indexed, triggering ephemeral indexing for: {:?}",
"No cached index, triggering ephemeral indexing for: {:?}",
self.input.path
);
// Get library to dispatch indexer job
if let Some(library) = context.get_library(library_id).await {
// Create cache entry and get the index to share with the job
let ephemeral_index = cache.create_for_indexing(local_path.clone());
// Clear any stale entries from previous indexing (prevents ghost files)
let cleared = cache.clear_for_reindex(&local_path).await;
if cleared > 0 {
tracing::debug!(
"Cleared {} stale entries for re-indexing: {}",
cleared,
local_path.display()
);
}
// Create ephemeral indexer job for this directory (shallow, current scope only)
let config = IndexerJobConfig::ephemeral_browse(
self.input.path.clone(),
IndexScope::Current, // Only current directory, not recursive
);
let indexer_job = IndexerJob::new(config);
let mut indexer_job = IndexerJob::new(config);
// Dispatch job asynchronously (fire and forget)
// Share the cached index with the job
indexer_job.set_ephemeral_index(ephemeral_index);
// Dispatch job asynchronously
// The job will emit ResourceChanged events as files are discovered
if let Err(e) = library.jobs().dispatch(indexer_job).await {
tracing::warn!(
"Failed to dispatch ephemeral indexer for {:?}: {}",
self.input.path,
e
);
} else {
tracing::info!("Dispatched ephemeral indexer for {:?}", self.input.path);
match library.jobs().dispatch(indexer_job).await {
Ok(_) => {
tracing::info!("Dispatched ephemeral indexer for {:?}", self.input.path);
}
Err(e) => {
tracing::warn!(
"Failed to dispatch ephemeral indexer for {:?}: {}",
self.input.path,
e
);
// Mark indexing as not in progress since job failed
cache.mark_indexing_complete(&local_path);
}
}
}
@@ -659,6 +795,42 @@ impl DirectoryListingQuery {
has_more: false,
})
}
/// Sort files according to the input options
fn sort_files(&self, files: &mut Vec<File>) {
use crate::domain::file::EntryKind;
let folders_first = self.input.folders_first.unwrap_or(false);
files.sort_by(|a, b| {
// Folders first if enabled
if folders_first {
let a_is_dir = matches!(a.kind, EntryKind::Directory);
let b_is_dir = matches!(b.kind, EntryKind::Directory);
if a_is_dir != b_is_dir {
return b_is_dir.cmp(&a_is_dir); // Directories first
}
}
// Then apply sort order
match self.input.sort_by {
DirectorySortBy::Name => a.name.to_lowercase().cmp(&b.name.to_lowercase()),
DirectorySortBy::Modified => b.modified_at.cmp(&a.modified_at),
DirectorySortBy::Size => b.size.cmp(&a.size),
DirectorySortBy::Type => {
// Sort by kind (directories first), then name
if !folders_first {
let a_is_dir = matches!(a.kind, EntryKind::Directory);
let b_is_dir = matches!(b.kind, EntryKind::Directory);
if a_is_dir != b_is_dir {
return b_is_dir.cmp(&a_is_dir);
}
}
a.name.to_lowercase().cmp(&b.name.to_lowercase())
}
}
});
}
}
impl DirectoryListingQuery {
@@ -677,7 +849,9 @@ impl DirectoryListingQuery {
for loc in locations {
// Get the location's root path
if let Some(entry_id) = loc.entry_id {
if let Ok(Some(dir_path)) = directory_paths::Entity::find_by_id(entry_id).one(db).await {
if let Ok(Some(dir_path)) =
directory_paths::Entity::find_by_id(entry_id).one(db).await
{
// Check if this location's path is a parent of the requested path
if path_str.starts_with(&dir_path.path) {
// Check if index_mode is "none"

View File

@@ -1,4 +1,9 @@
//! Indexing action handler
//! # Indexing Action Handler
//!
//! Bridges user-facing indexing requests (from CLI, API, UI) to the internal IndexerJob system.
//! Actions validate inputs, convert paths to SdPaths, dispatch jobs to the library's job queue,
//! and track execution context for observability. Each action can spawn multiple jobs (one per
//! path), but returns only the last handle for API simplicity.
use super::job::{IndexMode, IndexPersistence, IndexScope, IndexerJob, IndexerJobConfig};
use super::IndexInput;
@@ -64,7 +69,6 @@ impl LibraryAction for IndexingAction {
_library: &std::sync::Arc<crate::library::Library>,
_context: std::sync::Arc<crate::context::CoreContext>,
) -> Result<crate::infra::action::ValidationResult, ActionError> {
// Validate input
if let Err(errors) = self.input.validate() {
return Err(ActionError::Validation {
field: "paths".to_string(),
@@ -79,10 +83,6 @@ impl LibraryAction for IndexingAction {
library: std::sync::Arc<crate::library::Library>,
context: Arc<CoreContext>,
) -> Result<Self::Output, ActionError> {
// Validation is now handled by ActionManager before execute
// For now, submit one job per path (sequentially). Could be parallelized later.
// Return the handle of the last job submitted for convenience.
let mut last_handle: Option<JobHandle> = None;
for path in &self.input.paths {
@@ -93,16 +93,13 @@ impl LibraryAction for IndexingAction {
IndexerJobConfig::ephemeral_browse(sd_path, self.input.scope)
}
IndexPersistence::Persistent => {
// Persistent indexing expects a location context. For now, default to recursive path walk with selected mode.
// If we later bind paths to a location, we can set location_id properly.
// Here use ui_navigation/new with mode overridden below when possible.
// Persistent mode stores entries in the database but doesn't require a location binding yet.
let mut c = IndexerJobConfig::ephemeral_browse(sd_path, self.input.scope);
c.persistence = IndexPersistence::Persistent;
c
}
};
// Apply selected mode
config.mode = self.input.mode;
// TODO: Apply include_hidden via rule_toggles when available

View File

@@ -0,0 +1,283 @@
//! Change detector for batch indexing scans.
//!
//! The `ChangeDetector` compares database state against filesystem state
//! during indexer job scans. It identifies:
//! - New files/directories (not in database)
//! - Modified entries (size or mtime changed)
//! - Moved entries (same inode, different path)
//! - Deleted entries (in database but not on disk)
use super::types::Change;
use crate::infra::job::prelude::JobContext;
use crate::ops::indexing::state::EntryKind;
use std::{
collections::HashMap,
path::{Path, PathBuf},
time::SystemTime,
};
/// Tracks changes between database state and filesystem during batch scans.
///
/// Used by the indexer job to efficiently detect what needs to be created,
/// updated, moved, or deleted. Loads existing entries from the database,
/// then compares against filesystem walks.
pub struct ChangeDetector {
/// Maps paths to their database entries
path_to_entry: HashMap<PathBuf, DatabaseEntry>,
/// Maps inodes to paths (for detecting moves)
inode_to_path: HashMap<u64, PathBuf>,
/// Precision for timestamp comparison (some filesystems have lower precision)
timestamp_precision_ms: i64,
/// Cache for file existence checks to avoid repeated filesystem calls
existence_cache: HashMap<PathBuf, bool>,
}
#[derive(Debug, Clone)]
struct DatabaseEntry {
id: i32,
path: PathBuf,
kind: EntryKind,
size: u64,
modified: Option<SystemTime>,
inode: Option<u64>,
}
impl ChangeDetector {
/// Create a new change detector
pub fn new() -> Self {
Self {
path_to_entry: HashMap::new(),
inode_to_path: HashMap::new(),
timestamp_precision_ms: 1, // Default to 1ms precision
existence_cache: HashMap::new(),
}
}
/// Load existing entries from database for a location, scoped to indexing path
pub async fn load_existing_entries(
&mut self,
ctx: &JobContext<'_>,
location_id: i32,
indexing_path: &Path,
) -> Result<(), crate::infra::job::prelude::JobError> {
use crate::infra::db::entities;
use crate::infra::job::prelude::JobError;
use crate::ops::indexing::change_detection::DatabaseAdapterForJob;
use crate::ops::indexing::persistence::IndexPersistence;
use sea_orm::{ColumnTrait, EntityTrait, QueryFilter};
let location_record = entities::location::Entity::find_by_id(location_id)
.one(ctx.library_db())
.await
.map_err(|e| JobError::execution(format!("Failed to find location: {}", e)))?
.ok_or_else(|| JobError::execution("Location not found".to_string()))?;
// Create a persistent writer adapter to leverage the unified query logic
let persistence =
DatabaseAdapterForJob::new(ctx, location_record.uuid, location_record.entry_id);
// Use the scoped query method
let existing_entries = persistence.get_existing_entries(indexing_path).await?;
// Process the results into our internal data structures
for (full_path, (id, inode, modified_time, size)) in existing_entries {
let entry_kind = if full_path.is_dir() {
EntryKind::Directory
} else {
EntryKind::File
};
let db_entry = DatabaseEntry {
id,
path: full_path.clone(),
kind: entry_kind,
size,
modified: modified_time,
inode,
};
self.path_to_entry.insert(full_path.clone(), db_entry);
if let Some(inode_val) = inode {
self.inode_to_path.insert(inode_val, full_path);
}
}
ctx.log(format!(
"Loaded {} existing entries for change detection",
self.path_to_entry.len()
));
use tracing::warn;
if self.path_to_entry.is_empty() {
warn!("ChangeDetector loaded 0 entries - database may be locked or empty");
} else {
warn!(
"ChangeDetector loaded {} entries successfully",
self.path_to_entry.len()
);
}
Ok(())
}
/// Check if a path represents a change.
///
/// Returns Some(Change) if the path is new, modified, or moved.
/// Returns None if the path exists in database with same metadata.
pub fn check_path(
&mut self,
path: &Path,
metadata: &std::fs::Metadata,
inode: Option<u64>,
) -> Option<Change> {
// Check if path exists in database
if let Some(db_entry) = self.path_to_entry.get(path) {
// Check for modifications
if self.is_modified(db_entry, metadata) {
return Some(Change::Modified {
path: path.to_path_buf(),
entry_id: db_entry.id,
old_modified: db_entry.modified,
new_modified: metadata.modified().ok(),
});
}
// No change for this path
return None;
}
// Path not in database - check if it's a move or hard link
if let Some(inode_val) = inode {
if let Some(old_path) = self.inode_to_path.get(&inode_val).cloned() {
if old_path != path {
if let Some(db_entry) = self.path_to_entry.get(&old_path).cloned() {
// Check if the old path still exists on disk (with caching)
if self.path_exists_cached(&old_path) {
// Hard link: Both paths exist and point to same inode
use tracing::debug;
debug!(
"Hard link detected - existing: {:?}, new: {:?}, inode: {}",
old_path, path, inode_val
);
// Fall through to "New" - both entries should exist
} else {
// Genuine move: Old path no longer exists
use tracing::info;
info!(
"Move detected - old: {:?}, new: {:?}, inode: {}",
old_path, path, inode_val
);
return Some(Change::Moved {
old_path,
new_path: path.to_path_buf(),
entry_id: db_entry.id,
inode: inode_val,
});
}
}
}
}
}
// New file/directory
Some(Change::New(path.to_path_buf()))
}
/// Find deleted entries (in DB but not seen during scan).
pub fn find_deleted(&self, seen_paths: &std::collections::HashSet<PathBuf>) -> Vec<Change> {
self.path_to_entry
.iter()
.filter(|(path, _)| !seen_paths.contains(*path))
.map(|(path, entry)| Change::Deleted {
path: path.clone(),
entry_id: entry.id,
})
.collect()
}
/// Check if an entry has been modified
fn is_modified(&self, db_entry: &DatabaseEntry, metadata: &std::fs::Metadata) -> bool {
// Check size first (fast)
if db_entry.size != metadata.len() {
return true;
}
// Check modification time
if let (Some(db_modified), Ok(fs_modified)) = (db_entry.modified, metadata.modified()) {
let db_time = db_modified
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as i64;
let fs_time = fs_modified
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as i64;
if (db_time - fs_time).abs() > self.timestamp_precision_ms {
return true;
}
}
false
}
/// Set timestamp precision for comparison (in milliseconds)
pub fn set_timestamp_precision(&mut self, precision_ms: i64) {
self.timestamp_precision_ms = precision_ms;
}
/// Get the number of tracked entries
pub fn entry_count(&self) -> usize {
self.path_to_entry.len()
}
/// Check if a path exists with caching to reduce filesystem calls
fn path_exists_cached(&mut self, path: &Path) -> bool {
if let Some(&cached_result) = self.existence_cache.get(path) {
return cached_result;
}
let exists = path.exists();
self.existence_cache.insert(path.to_path_buf(), exists);
exists
}
}
impl Default for ChangeDetector {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_new_file_detection() {
let mut detector = ChangeDetector::new();
let new_path = PathBuf::from("/test/new_file.txt");
// Create a temporary file for testing
let temp_dir = tempfile::tempdir().unwrap();
let test_file = temp_dir.path().join("test.txt");
std::fs::write(&test_file, "test content").unwrap();
let metadata = std::fs::metadata(&test_file).unwrap();
let result = detector.check_path(&new_path, &metadata, None);
match result {
Some(Change::Created { path, .. }) => assert_eq!(path, new_path),
_ => panic!("Expected new file detection"),
}
}
#[test]
fn test_entry_count() {
let detector = ChangeDetector::new();
assert_eq!(detector.entry_count(), 0);
}
}

View File

@@ -0,0 +1,515 @@
//! Change handler for responding to filesystem events.
//!
//! This module provides the `ChangeHandler` trait and shared logic for
//! processing filesystem changes. Both persistent (database) and ephemeral
//! (in-memory) handlers implement this trait.
use super::types::{ChangeConfig, ChangeType, EntryRef};
use crate::ops::indexing::rules::{build_default_ruler, RuleToggles, RulerDecision};
use crate::ops::indexing::state::{DirEntry, EntryKind};
use anyhow::Result;
use std::path::Path;
use std::sync::Arc;
/// Abstracts storage operations for filesystem change handling.
///
/// Both persistent (database) and ephemeral (in-memory) handlers implement
/// this trait, allowing the same change processing logic to work with both
/// storage backends.
#[async_trait::async_trait]
pub trait ChangeHandler: Send + Sync {
/// Find an entry by its full filesystem path.
async fn find_by_path(&self, path: &Path) -> Result<Option<EntryRef>>;
/// Find an entry by inode (for move detection).
async fn find_by_inode(&self, inode: u64) -> Result<Option<EntryRef>>;
/// Create a new entry from filesystem metadata.
async fn create(&mut self, metadata: &DirEntry, parent_path: &Path) -> Result<EntryRef>;
/// Update an existing entry's metadata.
async fn update(&mut self, entry: &EntryRef, metadata: &DirEntry) -> Result<()>;
/// Move an entry from old path to new path.
async fn move_entry(
&mut self,
entry: &EntryRef,
old_path: &Path,
new_path: &Path,
new_parent_path: &Path,
) -> Result<()>;
/// Delete an entry and all its descendants.
async fn delete(&mut self, entry: &EntryRef) -> Result<()>;
/// Run post-create/modify processors (thumbnails, content hash).
/// No-op for ephemeral handlers.
async fn run_processors(&self, entry: &EntryRef, is_new: bool) -> Result<()>;
/// Emit appropriate events for UI updates.
async fn emit_change_event(&self, entry: &EntryRef, change_type: ChangeType) -> Result<()>;
/// Handle directory recursion after creation.
/// Persistent: spawns indexer job. Ephemeral: inline shallow index.
async fn handle_new_directory(&self, path: &Path) -> Result<()>;
}
/// Check if a path exists, distinguishing between "doesn't exist" and "can't access".
///
/// Critical for preventing false deletions when volumes go offline.
pub async fn path_exists_safe(
path: &Path,
backend: Option<&Arc<dyn crate::volume::VolumeBackend>>,
) -> Result<bool> {
use crate::volume::error::VolumeError;
if let Some(backend) = backend {
match backend.exists(path).await {
Ok(exists) => Ok(exists),
Err(VolumeError::NotMounted(_)) => {
tracing::warn!(
"Volume not mounted when checking path existence: {}",
path.display()
);
Err(anyhow::anyhow!(
"Volume not mounted, cannot verify path existence"
))
}
Err(VolumeError::Io(ref e)) if e.kind() == std::io::ErrorKind::NotFound => Ok(false),
Err(VolumeError::Io(io_err)) => {
tracing::warn!(
"IO error when checking path existence for {}: {}",
path.display(),
io_err
);
Err(anyhow::anyhow!(
"IO error, volume may be offline: {}",
io_err
))
}
Err(e) => {
tracing::warn!(
"Volume error when checking path existence for {}: {}",
path.display(),
e
);
Err(e.into())
}
}
} else {
match tokio::fs::try_exists(path).await {
Ok(exists) => Ok(exists),
Err(e) => {
tracing::warn!(
"Cannot verify path existence for {} (volume may be offline): {}",
path.display(),
e
);
Err(anyhow::anyhow!("Cannot access path: {}", e))
}
}
}
}
/// Evaluates indexing rules to determine if a path should be skipped.
pub async fn should_filter_path(
path: &Path,
rule_toggles: RuleToggles,
location_root: &Path,
backend: Option<&Arc<dyn crate::volume::VolumeBackend>>,
) -> Result<bool> {
let ruler = build_default_ruler(rule_toggles, location_root, path).await;
let metadata = if let Some(backend) = backend {
backend
.metadata(path)
.await
.map_err(|e| anyhow::anyhow!("Failed to get metadata via backend: {}", e))?
} else {
let fs_meta = tokio::fs::metadata(path).await?;
crate::volume::backend::RawMetadata {
kind: if fs_meta.is_dir() {
EntryKind::Directory
} else if fs_meta.is_symlink() {
EntryKind::Symlink
} else {
EntryKind::File
},
size: fs_meta.len(),
modified: fs_meta.modified().ok(),
created: fs_meta.created().ok(),
accessed: fs_meta.accessed().ok(),
inode: None,
permissions: None,
}
};
struct SimpleMetadata {
is_dir: bool,
}
impl crate::ops::indexing::rules::MetadataForIndexerRules for SimpleMetadata {
fn is_dir(&self) -> bool {
self.is_dir
}
}
let simple_meta = SimpleMetadata {
is_dir: metadata.kind == EntryKind::Directory,
};
match ruler.evaluate_path(path, &simple_meta).await {
Ok(RulerDecision::Reject) => {
tracing::debug!("Filtered path by indexing rules: {}", path.display());
Ok(true)
}
Ok(RulerDecision::Accept) => Ok(false),
Err(e) => {
tracing::warn!("Error evaluating rules for {}: {}", path.display(), e);
Ok(false)
}
}
}
/// Extracts filesystem metadata into a DirEntry.
pub async fn build_dir_entry(
path: &Path,
backend: Option<&Arc<dyn crate::volume::VolumeBackend>>,
) -> Result<DirEntry> {
use crate::ops::indexing::database_storage::DatabaseStorage;
let meta = DatabaseStorage::extract_metadata(path, backend).await?;
Ok(DirEntry {
path: meta.path,
kind: meta.kind,
size: meta.size,
modified: meta.modified,
inode: meta.inode,
})
}
/// Apply a batch of filesystem changes using the provided handler.
///
/// Processes events in the correct order: removes first, then renames,
/// creates, and finally modifies.
pub async fn apply_batch<H: ChangeHandler>(
handler: &mut H,
events: Vec<crate::infra::event::FsRawEventKind>,
config: &ChangeConfig<'_>,
) -> Result<()> {
use crate::infra::event::FsRawEventKind;
if events.is_empty() {
return Ok(());
}
let mut creates = Vec::new();
let mut modifies = Vec::new();
let mut removes = Vec::new();
let mut renames = Vec::new();
for event in events {
match event {
FsRawEventKind::Create { path } => creates.push(path),
FsRawEventKind::Modify { path } => modifies.push(path),
FsRawEventKind::Remove { path } => removes.push(path),
FsRawEventKind::Rename { from, to } => renames.push((from, to)),
}
}
// Deduplicate
creates.sort();
creates.dedup();
modifies.sort();
modifies.dedup();
removes.sort();
removes.dedup();
tracing::debug!(
"Processing batch: {} creates, {} modifies, {} removes, {} renames",
creates.len(),
modifies.len(),
removes.len(),
renames.len()
);
// Process in order: removes, renames, creates, modifies
for path in removes {
if let Err(e) = handle_remove(handler, &path).await {
tracing::error!("Failed to handle remove for {}: {}", path.display(), e);
}
}
for (from, to) in renames {
if let Err(e) = handle_rename(handler, &from, &to, config).await {
tracing::error!(
"Failed to handle rename from {} to {}: {}",
from.display(),
to.display(),
e
);
}
}
for path in creates {
if let Err(e) = handle_create(handler, &path, config).await {
tracing::error!("Failed to handle create for {}: {}", path.display(), e);
}
}
for path in modifies {
if let Err(e) = handle_modify(handler, &path, config).await {
tracing::error!("Failed to handle modify for {}: {}", path.display(), e);
}
}
Ok(())
}
/// Handle a create event.
pub async fn handle_create<H: ChangeHandler>(
handler: &mut H,
path: &Path,
config: &ChangeConfig<'_>,
) -> Result<()> {
tracing::debug!("Create: {}", path.display());
match path_exists_safe(path, config.volume_backend).await {
Ok(true) => {}
Ok(false) => {
tracing::debug!("Path no longer exists, skipping create: {}", path.display());
return Ok(());
}
Err(e) => {
tracing::warn!(
"Skipping create event for inaccessible path {}: {}",
path.display(),
e
);
return Ok(());
}
}
if should_filter_path(
path,
config.rule_toggles,
config.location_root,
config.volume_backend,
)
.await?
{
tracing::debug!("Skipping filtered path: {}", path.display());
return Ok(());
}
let metadata = build_dir_entry(path, config.volume_backend).await?;
if handler.find_by_path(path).await?.is_some() {
tracing::debug!(
"Entry already exists at path {}, treating as modify",
path.display()
);
return handle_modify(handler, path, config).await;
}
if let Some(inode) = metadata.inode {
if let Some(existing) = handler.find_by_inode(inode).await? {
if existing.path != path {
tracing::debug!(
"Detected inode-based move: {} -> {}",
existing.path.display(),
path.display()
);
let old_path = existing.path.clone();
handler
.move_entry(
&existing,
&old_path,
path,
path.parent().unwrap_or(Path::new("/")),
)
.await?;
handler
.emit_change_event(&existing, ChangeType::Moved)
.await?;
return Ok(());
}
}
}
let parent_path = path.parent().unwrap_or(Path::new("/"));
let entry = handler.create(&metadata, parent_path).await?;
if entry.is_directory() {
handler.handle_new_directory(path).await?;
} else {
handler.run_processors(&entry, true).await?;
}
handler
.emit_change_event(&entry, ChangeType::Created)
.await?;
Ok(())
}
/// Handle a modify event.
pub async fn handle_modify<H: ChangeHandler>(
handler: &mut H,
path: &Path,
config: &ChangeConfig<'_>,
) -> Result<()> {
tracing::debug!("Modify: {}", path.display());
match path_exists_safe(path, config.volume_backend).await {
Ok(true) => {}
Ok(false) => {
tracing::debug!("Path no longer exists, skipping modify: {}", path.display());
return Ok(());
}
Err(e) => {
tracing::warn!(
"Skipping modify event for inaccessible path {}: {}",
path.display(),
e
);
return Ok(());
}
}
if should_filter_path(
path,
config.rule_toggles,
config.location_root,
config.volume_backend,
)
.await?
{
tracing::debug!("Skipping filtered path: {}", path.display());
return Ok(());
}
let metadata = build_dir_entry(path, config.volume_backend).await?;
if let Some(inode) = metadata.inode {
if let Some(existing) = handler.find_by_inode(inode).await? {
if existing.path != path {
tracing::debug!(
"Detected inode-based move during modify: {} -> {}",
existing.path.display(),
path.display()
);
let old_path = existing.path.clone();
handler
.move_entry(
&existing,
&old_path,
path,
path.parent().unwrap_or(Path::new("/")),
)
.await?;
handler
.emit_change_event(&existing, ChangeType::Moved)
.await?;
return Ok(());
}
}
}
if let Some(entry) = handler.find_by_path(path).await? {
handler.update(&entry, &metadata).await?;
if !entry.is_directory() {
handler.run_processors(&entry, false).await?;
}
handler
.emit_change_event(&entry, ChangeType::Modified)
.await?;
} else {
tracing::debug!(
"Entry not found for path, skipping modify: {}",
path.display()
);
}
Ok(())
}
/// Handle a remove event.
pub async fn handle_remove<H: ChangeHandler>(handler: &mut H, path: &Path) -> Result<()> {
tracing::debug!("Remove: {}", path.display());
if let Some(entry) = handler.find_by_path(path).await? {
handler.delete(&entry).await?;
handler
.emit_change_event(&entry, ChangeType::Deleted)
.await?;
tracing::debug!("Deleted entry for path: {}", path.display());
} else {
tracing::debug!(
"Entry not found for path, skipping remove: {}",
path.display()
);
}
Ok(())
}
/// Handle a rename event.
pub async fn handle_rename<H: ChangeHandler>(
handler: &mut H,
from: &Path,
to: &Path,
config: &ChangeConfig<'_>,
) -> Result<()> {
tracing::debug!("Rename: {} -> {}", from.display(), to.display());
match path_exists_safe(to, config.volume_backend).await {
Ok(true) => {}
Ok(false) => {
tracing::debug!(
"Destination path doesn't exist, skipping rename: {}",
to.display()
);
return Ok(());
}
Err(e) => {
tracing::warn!(
"Skipping rename event for inaccessible destination {}: {}",
to.display(),
e
);
return Ok(());
}
}
if should_filter_path(
to,
config.rule_toggles,
config.location_root,
config.volume_backend,
)
.await?
{
tracing::debug!(
"Destination path is filtered, removing entry: {}",
to.display()
);
return handle_remove(handler, from).await;
}
if let Some(entry) = handler.find_by_path(from).await? {
handler
.move_entry(&entry, from, to, to.parent().unwrap_or(Path::new("/")))
.await?;
handler.emit_change_event(&entry, ChangeType::Moved).await?;
tracing::debug!("Moved entry {} -> {}", from.display(), to.display());
} else {
tracing::debug!(
"Entry not found for old path {}, skipping rename",
from.display()
);
}
Ok(())
}

View File

@@ -1,423 +1,25 @@
//! Change detection for incremental indexing
//! # Change Detection
//!
//! This module provides efficient change detection using:
//! - Inode tracking for move/rename detection
//! - Modification time comparison
//! - Size verification
//! - Directory hierarchy tracking
//! Tracks filesystem changes through two complementary subsystems: batch
//! detection during indexer jobs (`detector`) and real-time handling of watcher
//! events (`handler`). Both produce the same `Change` type and share inode-based
//! move detection, so a file moved while the indexer is running behaves
//! identically to one moved while the watcher is active.
//!
//! Changes route to either `DatabaseAdapter` (database writes for
//! managed locations) or `MemoryAdapter` (in-memory updates for browsing
//! sessions). This split keeps browsed directories responsive without
//! polluting the database with temporary entries.
use super::state::EntryKind;
use crate::infra::{db::entities, job::prelude::JobContext};
use sea_orm::{ColumnTrait, EntityTrait, QueryFilter, QuerySelect};
use std::{
collections::HashMap,
path::{Path, PathBuf},
time::SystemTime,
pub mod detector;
pub mod handler;
pub mod persistent;
pub mod types;
pub use detector::ChangeDetector;
pub use handler::{
apply_batch, build_dir_entry, handle_create, handle_modify, handle_rename, handle_remove,
path_exists_safe, should_filter_path, ChangeHandler,
};
/// Represents a change detected in the file system
#[derive(Debug, Clone)]
pub enum Change {
/// New file/directory not in database
New(PathBuf),
/// File/directory modified (content or metadata changed)
Modified {
path: PathBuf,
entry_id: i32,
old_modified: Option<SystemTime>,
new_modified: Option<SystemTime>,
},
/// File/directory moved or renamed (same inode, different path)
Moved {
old_path: PathBuf,
new_path: PathBuf,
entry_id: i32,
inode: u64,
},
/// File/directory deleted (exists in DB but not on disk)
Deleted { path: PathBuf, entry_id: i32 },
}
/// Tracks changes between database state and file system
pub struct ChangeDetector {
/// Maps paths to their database entries
path_to_entry: HashMap<PathBuf, DatabaseEntry>,
/// Maps inodes to paths (for detecting moves)
inode_to_path: HashMap<u64, PathBuf>,
/// Precision for timestamp comparison (some filesystems have lower precision)
timestamp_precision_ms: i64,
/// Cache for file existence checks to avoid repeated filesystem calls
existence_cache: HashMap<PathBuf, bool>,
}
#[derive(Debug, Clone)]
struct DatabaseEntry {
id: i32,
path: PathBuf,
kind: EntryKind,
size: u64,
modified: Option<SystemTime>,
inode: Option<u64>,
}
impl ChangeDetector {
/// Create a new change detector
pub fn new() -> Self {
Self {
path_to_entry: HashMap::new(),
inode_to_path: HashMap::new(),
timestamp_precision_ms: 1, // Default to 1ms precision
existence_cache: HashMap::new(),
}
}
/// Load existing entries from database for a location, scoped to indexing path
pub async fn load_existing_entries(
&mut self,
ctx: &JobContext<'_>,
location_id: i32,
indexing_path: &Path,
) -> Result<(), crate::infra::job::prelude::JobError> {
use super::persistence::{DatabasePersistence, IndexPersistence};
use crate::infra::job::prelude::JobError;
// For change detection, we need to get the location's root entry ID
use crate::infra::db::entities;
use sea_orm::{ColumnTrait, EntityTrait, QueryFilter};
let location_record = entities::location::Entity::find_by_id(location_id)
.one(ctx.library_db())
.await
.map_err(|e| JobError::execution(format!("Failed to find location: {}", e)))?
.ok_or_else(|| JobError::execution("Location not found".to_string()))?;
// Create a database persistence instance to leverage the scoped query logic
let persistence = DatabasePersistence::new(ctx, 0, location_record.entry_id); // device_id not needed for query
// Use the scoped query method
let existing_entries = persistence.get_existing_entries(indexing_path).await?;
// Process the results into our internal data structures
for (full_path, (id, inode, modified_time, size)) in existing_entries {
// Determine entry kind from the path (we could query this, but for change detection we mainly care about existence)
// For now, we'll assume File for simplicity since change detection primarily cares about path/inode/timestamp
let entry_kind = if full_path.is_dir() {
EntryKind::Directory
} else {
EntryKind::File
};
// Now we have accurate size information from the database
let db_entry = DatabaseEntry {
id,
path: full_path.clone(),
kind: entry_kind,
size,
modified: modified_time,
inode,
};
// Track by path
self.path_to_entry.insert(full_path.clone(), db_entry);
// Track by inode if available
if let Some(inode_val) = inode {
self.inode_to_path.insert(inode_val, full_path);
}
}
ctx.log(format!(
"Loaded {} existing entries for change detection",
self.path_to_entry.len()
));
// DEBUG: Log if we failed to load entries
use tracing::warn;
if self.path_to_entry.is_empty() {
warn!("DEBUG: ChangeDetector loaded 0 entries - database may be locked or empty");
} else {
warn!(
"DEBUG: ChangeDetector loaded {} entries successfully",
self.path_to_entry.len()
);
}
Ok(())
}
/// Check if a path represents a change
pub fn check_path(
&mut self,
path: &Path,
metadata: &std::fs::Metadata,
inode: Option<u64>,
) -> Option<Change> {
// Check if path exists in database
if let Some(db_entry) = self.path_to_entry.get(path) {
// Check for modifications
if self.is_modified(db_entry, metadata) {
return Some(Change::Modified {
path: path.to_path_buf(),
entry_id: db_entry.id,
old_modified: db_entry.modified,
new_modified: metadata.modified().ok(),
});
}
// No change for this path
return None;
}
// Path not in database - check if it's a move or hard link
if let Some(inode_val) = inode {
if let Some(old_path) = self.inode_to_path.get(&inode_val).cloned() {
if old_path != path {
if let Some(db_entry) = self.path_to_entry.get(&old_path).cloned() {
// Check if the old path still exists on disk (with caching)
// - If old path exists: This is a hard link (both paths are valid)
// - If old path doesn't exist: This is a genuine move
if self.path_exists_cached(&old_path) {
// Hard link: Both paths exist and point to same inode
// Treat current path as a new entry (don't skip it)
use tracing::debug;
debug!(
"Hard link detected - existing: {:?}, new: {:?}, inode: {}",
old_path, path, inode_val
);
// Fall through to "New file/directory" - both entries should exist
} else {
// Genuine move: Old path no longer exists, same inode at new path
use tracing::info;
info!(
"Genuine move detected - old: {:?}, new: {:?}, inode: {}",
old_path, path, inode_val
);
return Some(Change::Moved {
old_path,
new_path: path.to_path_buf(),
entry_id: db_entry.id,
inode: inode_val,
});
}
}
}
}
}
// New file/directory
Some(Change::New(path.to_path_buf()))
}
/// Find deleted entries (in DB but not seen during scan)
pub fn find_deleted(&self, seen_paths: &std::collections::HashSet<PathBuf>) -> Vec<Change> {
self.path_to_entry
.iter()
.filter(|(path, _)| !seen_paths.contains(*path))
.map(|(path, entry)| Change::Deleted {
path: path.clone(),
entry_id: entry.id,
})
.collect()
}
/// Check if an entry has been modified
fn is_modified(&self, db_entry: &DatabaseEntry, metadata: &std::fs::Metadata) -> bool {
// Check size first (fast)
if db_entry.size != metadata.len() {
return true;
}
// Check modification time
if let (Some(db_modified), Ok(fs_modified)) = (db_entry.modified, metadata.modified()) {
// Compare with precision tolerance
let db_time = db_modified
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as i64;
let fs_time = fs_modified
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as i64;
if (db_time - fs_time).abs() > self.timestamp_precision_ms {
return true;
}
}
false
}
/// Set timestamp precision for comparison (in milliseconds)
pub fn set_timestamp_precision(&mut self, precision_ms: i64) {
self.timestamp_precision_ms = precision_ms;
}
/// Get the number of tracked entries
pub fn entry_count(&self) -> usize {
self.path_to_entry.len()
}
/// Check if a path exists with caching to reduce filesystem calls
fn path_exists_cached(&mut self, path: &Path) -> bool {
// Check cache first
if let Some(&cached_result) = self.existence_cache.get(path) {
return cached_result;
}
// Not in cache, check filesystem and cache the result
let exists = path.exists();
self.existence_cache.insert(path.to_path_buf(), exists);
exists
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::time::SystemTime;
// Mock metadata struct for testing
pub struct MockMetadata {
size: u64,
modified: SystemTime,
}
impl MockMetadata {
pub fn new(size: u64) -> Self {
Self {
size,
modified: SystemTime::now(),
}
}
pub fn len(&self) -> u64 {
self.size
}
pub fn modified(&self) -> Result<SystemTime, std::io::Error> {
Ok(self.modified)
}
}
// Helper to test change detection with mock metadata
fn test_check_path(
detector: &mut ChangeDetector,
path: &Path,
size: u64,
inode: Option<u64>,
) -> Option<Change> {
let mock_metadata = MockMetadata::new(size);
// We need to manually call the logic since we can't easily mock std::fs::Metadata
// Check if path exists in database
if let Some(db_entry) = detector.path_to_entry.get(path) {
// Check for modifications (simplified for testing)
if db_entry.size != mock_metadata.len() {
return Some(Change::Modified {
path: path.to_path_buf(),
entry_id: db_entry.id,
old_modified: db_entry.modified,
new_modified: Some(mock_metadata.modified),
});
}
return None;
}
// Path not in database - check if it's a move or hard link
if let Some(inode_val) = inode {
if let Some(old_path) = detector.inode_to_path.get(&inode_val) {
if old_path != path {
if let Some(db_entry) = detector.path_to_entry.get(old_path) {
// In mock tests, we can't easily check file existence
// For testing purposes, assume it's a hard link (treat as new entry)
// In real scenarios, the actual file existence check would determine behavior
// Fall through to treat as new entry
}
}
}
}
// New file/directory
Some(Change::New(path.to_path_buf()))
}
#[test]
fn test_hard_link_detection() {
let mut detector = ChangeDetector::new();
// Add a test entry
let db_path = PathBuf::from("/test/dir1/file.txt");
let db_entry = DatabaseEntry {
id: 1,
path: db_path.clone(),
kind: EntryKind::File,
size: 1000,
modified: Some(SystemTime::now()),
inode: Some(12345),
};
detector.path_to_entry.insert(db_path.clone(), db_entry);
detector.inode_to_path.insert(12345, db_path);
// Test hard link detection (same inode, different path, both should exist)
let hard_link_path = PathBuf::from("/test/dir2/hardlink.txt");
// Since we can't easily mock file existence in tests, we'll test the logic
// In a real scenario, if both paths exist, it should be treated as a new entry
let result = test_check_path(&mut detector, &hard_link_path, 1000, Some(12345));
// In our mock test, this will be treated as new since we can't check file existence
match result {
Some(Change::New(path)) => assert_eq!(path, hard_link_path),
_ => panic!("Expected hard link to be treated as new entry"),
}
}
#[test]
fn test_consistent_behavior() {
let mut detector = ChangeDetector::new();
// Add a test entry
let db_path = PathBuf::from("/test/dir1/file.txt");
let db_entry = DatabaseEntry {
id: 1,
path: db_path.clone(),
kind: EntryKind::File,
size: 1000,
modified: Some(SystemTime::now()),
inode: Some(12345),
};
detector.path_to_entry.insert(db_path.clone(), db_entry);
detector.inode_to_path.insert(12345, db_path.clone());
// Test consistent behavior: same inode at different path
// In our mock test environment, this will be treated as a new entry
// (since we can't mock file existence checks easily)
let other_path = PathBuf::from("/test/dir2/other_file.txt");
let result = test_check_path(&mut detector, &other_path, 1000, Some(12345));
match result {
Some(Change::New(path)) => assert_eq!(path, other_path),
_ => panic!("Expected consistent behavior: treat as new entry"),
}
}
#[test]
fn test_new_file_detection() {
let mut detector = ChangeDetector::new();
// Test new file detection
let new_path = PathBuf::from("/test/new_file.txt");
match test_check_path(&mut detector, &new_path, 500, None) {
Some(Change::New(p)) => assert_eq!(p, new_path),
_ => panic!("Expected new file detection"),
}
}
}
pub use persistent::{DatabaseAdapter, DatabaseAdapterForJob};
pub use types::{Change, ChangeConfig, ChangeMetadata, ChangeType, EntryRef};

View File

@@ -0,0 +1,838 @@
//! Unified database adapter for both watcher and indexer pipelines.
//!
//! This module provides `DatabaseAdapter`, which implements both `ChangeHandler`
//! (for the watcher pipeline) and `IndexPersistence` (for the indexer job).
//! Both pipelines share the same database write logic through `DatabaseStorage`,
//! eliminating code duplication.
use super::handler::ChangeHandler;
use super::types::{ChangeType, EntryRef};
use crate::context::CoreContext;
use crate::infra::db::entities;
use crate::infra::job::prelude::{JobContext, JobError, JobResult};
use crate::ops::indexing::persistence::IndexPersistence;
use crate::ops::indexing::state::{DirEntry, EntryKind};
use anyhow::Result;
use sea_orm::{ColumnTrait, EntityTrait, QueryFilter, TransactionTrait};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use uuid::Uuid;
/// Unified writer for persistent (database-backed) index storage.
///
/// Implements both `ChangeHandler` (for the watcher pipeline) and `IndexPersistence`
/// (for the indexer job pipeline). Both pipelines share:
/// - The same `DBWriter` for CRUD operations
/// - Closure table management
/// - Directory path tracking
/// - Entry ID caching for hierarchy construction
pub struct DatabaseAdapter {
context: Arc<CoreContext>,
library_id: Uuid,
location_id: Uuid,
location_root_entry_id: i32,
db: sea_orm::DatabaseConnection,
volume_backend: Option<Arc<dyn crate::volume::VolumeBackend>>,
entry_id_cache: HashMap<PathBuf, i32>,
}
impl DatabaseAdapter {
pub async fn new(
context: Arc<CoreContext>,
library_id: Uuid,
location_id: Uuid,
_location_root: &Path,
volume_backend: Option<Arc<dyn crate::volume::VolumeBackend>>,
) -> Result<Self> {
let library = context
.get_library(library_id)
.await
.ok_or_else(|| anyhow::anyhow!("Library not found: {}", library_id))?;
let db = library.db().conn().clone();
let location_record = entities::location::Entity::find()
.filter(entities::location::Column::Uuid.eq(location_id))
.one(&db)
.await?
.ok_or_else(|| anyhow::anyhow!("Location not found: {}", location_id))?;
let location_root_entry_id = location_record
.entry_id
.ok_or_else(|| anyhow::anyhow!("Location {} has no root entry", location_id))?;
Ok(Self {
context,
library_id,
location_id,
location_root_entry_id,
db,
volume_backend,
entry_id_cache: HashMap::new(),
})
}
async fn resolve_entry_id(&self, path: &Path) -> Result<Option<i32>> {
if let Some(id) = self.resolve_directory_entry_id(path).await? {
return Ok(Some(id));
}
self.resolve_file_entry_id(path).await
}
async fn resolve_directory_entry_id(&self, path: &Path) -> Result<Option<i32>> {
use sea_orm::FromQueryResult;
let path_str = path.to_string_lossy().to_string();
#[derive(Debug, FromQueryResult)]
struct DirectoryEntryId {
entry_id: i32,
}
let result = DirectoryEntryId::find_by_statement(sea_orm::Statement::from_sql_and_values(
sea_orm::DbBackend::Sqlite,
r#"
SELECT dp.entry_id
FROM directory_paths dp
INNER JOIN entry_closure ec ON ec.descendant_id = dp.entry_id
WHERE dp.path = ?
AND ec.ancestor_id = ?
"#,
vec![path_str.into(), self.location_root_entry_id.into()],
))
.one(&self.db)
.await?;
Ok(result.map(|r| r.entry_id))
}
async fn resolve_file_entry_id(&self, path: &Path) -> Result<Option<i32>> {
let parent = match path.parent() {
Some(p) => p,
None => return Ok(None),
};
let parent_id = match self.resolve_directory_entry_id(parent).await? {
Some(id) => id,
None => return Ok(None),
};
let name = path
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_string();
let ext = path
.extension()
.and_then(|s| s.to_str())
.map(|s| s.to_lowercase());
let mut q = entities::entry::Entity::find()
.filter(entities::entry::Column::ParentId.eq(parent_id))
.filter(entities::entry::Column::Name.eq(name));
if let Some(e) = ext {
q = q.filter(entities::entry::Column::Extension.eq(e));
} else {
q = q.filter(entities::entry::Column::Extension.is_null());
}
let model = q.one(&self.db).await?;
Ok(model.map(|m| m.id))
}
}
#[async_trait::async_trait]
impl ChangeHandler for DatabaseAdapter {
async fn find_by_path(&self, path: &Path) -> Result<Option<EntryRef>> {
let entry_id = match self.resolve_entry_id(path).await? {
Some(id) => id,
None => return Ok(None),
};
let entry = entities::entry::Entity::find_by_id(entry_id)
.one(&self.db)
.await?
.ok_or_else(|| anyhow::anyhow!("Entry {} not found after ID lookup", entry_id))?;
let kind = match entry.kind {
0 => EntryKind::File,
1 => EntryKind::Directory,
2 => EntryKind::Symlink,
_ => EntryKind::File,
};
Ok(Some(EntryRef {
id: entry.id,
uuid: entry.uuid,
path: path.to_path_buf(),
kind,
}))
}
async fn find_by_inode(&self, inode: u64) -> Result<Option<EntryRef>> {
let inode_val = inode as i64;
let entry = entities::entry::Entity::find()
.filter(entities::entry::Column::Inode.eq(inode_val))
.one(&self.db)
.await?;
match entry {
Some(e) => {
let full_path = crate::ops::indexing::PathResolver::get_full_path(&self.db, e.id)
.await
.unwrap_or_else(|_| PathBuf::from(&e.name));
let kind = match e.kind {
0 => EntryKind::File,
1 => EntryKind::Directory,
2 => EntryKind::Symlink,
_ => EntryKind::File,
};
Ok(Some(EntryRef {
id: e.id,
uuid: e.uuid,
path: full_path,
kind,
}))
}
None => Ok(None),
}
}
async fn create(&mut self, metadata: &DirEntry, parent_path: &Path) -> Result<EntryRef> {
use crate::domain::addressing::SdPath;
use crate::ops::indexing::database_storage::DatabaseStorage;
use crate::ops::indexing::state::IndexerState;
let mut state = IndexerState::new(&SdPath::local(&metadata.path));
let library = self.context.get_library(self.library_id).await;
// Cache Management: Check cache first, then query DB if needed
if let Some(&parent_id) = self.entry_id_cache.get(parent_path) {
state
.entry_id_cache
.insert(parent_path.to_path_buf(), parent_id);
} else if let Ok(Some(parent_id)) = DatabaseStorage::resolve_parent_id(&self.db, parent_path).await
{
// Cache the parent ID for future lookups
state
.entry_id_cache
.insert(parent_path.to_path_buf(), parent_id);
self.entry_id_cache
.insert(parent_path.to_path_buf(), parent_id);
}
let entry_id = DatabaseStorage::create_entry(
&mut state,
&self.db,
library.as_deref(),
metadata,
0,
parent_path,
)
.await
.map_err(|e| anyhow::anyhow!("Failed to create entry: {}", e))?;
self.entry_id_cache.insert(metadata.path.clone(), entry_id);
let entry = entities::entry::Entity::find_by_id(entry_id)
.one(&self.db)
.await?
.ok_or_else(|| anyhow::anyhow!("Entry not found after creation"))?;
Ok(EntryRef {
id: entry.id,
uuid: entry.uuid,
path: metadata.path.clone(),
kind: metadata.kind,
})
}
async fn update(&mut self, entry: &EntryRef, metadata: &DirEntry) -> Result<()> {
use crate::ops::indexing::database_storage::DatabaseStorage;
DatabaseStorage::update_entry(&self.db, entry.id, metadata)
.await
.map_err(|e| anyhow::anyhow!("Failed to update entry: {}", e))?;
Ok(())
}
async fn move_entry(
&mut self,
entry: &EntryRef,
old_path: &Path,
new_path: &Path,
new_parent_path: &Path,
) -> Result<()> {
use crate::domain::addressing::SdPath;
use crate::ops::indexing::database_storage::DatabaseStorage;
use crate::ops::indexing::state::IndexerState;
let mut state = IndexerState::new(&SdPath::local(old_path));
// Cache Management: Check cache first, then query DB if needed
if let Some(&parent_id) = self.entry_id_cache.get(new_parent_path) {
state
.entry_id_cache
.insert(new_parent_path.to_path_buf(), parent_id);
} else if let Ok(Some(parent_id)) =
DatabaseStorage::resolve_parent_id(&self.db, new_parent_path).await
{
state
.entry_id_cache
.insert(new_parent_path.to_path_buf(), parent_id);
self.entry_id_cache
.insert(new_parent_path.to_path_buf(), parent_id);
}
DatabaseStorage::move_entry(
&mut state,
&self.db,
entry.id,
old_path,
new_path,
new_parent_path,
)
.await
.map_err(|e| anyhow::anyhow!("Failed to move entry: {}", e))?;
self.entry_id_cache.remove(old_path);
self.entry_id_cache.insert(new_path.to_path_buf(), entry.id);
Ok(())
}
async fn delete(&mut self, entry: &EntryRef) -> Result<()> {
let mut to_delete_ids: Vec<i32> = vec![entry.id];
if let Ok(rows) = entities::entry_closure::Entity::find()
.filter(entities::entry_closure::Column::AncestorId.eq(entry.id))
.all(&self.db)
.await
{
to_delete_ids.extend(rows.into_iter().map(|r| r.descendant_id));
}
let mut queue = vec![entry.id];
let mut visited = std::collections::HashSet::from([entry.id]);
while let Some(parent) = queue.pop() {
if let Ok(children) = entities::entry::Entity::find()
.filter(entities::entry::Column::ParentId.eq(parent))
.all(&self.db)
.await
{
for child in children {
if visited.insert(child.id) {
to_delete_ids.push(child.id);
queue.push(child.id);
}
}
}
}
to_delete_ids.sort_unstable();
to_delete_ids.dedup();
let entries_to_delete = if !to_delete_ids.is_empty() {
let mut all_entries = Vec::new();
for chunk in to_delete_ids.chunks(900) {
let batch = entities::entry::Entity::find()
.filter(entities::entry::Column::Id.is_in(chunk.to_vec()))
.all(&self.db)
.await?;
all_entries.extend(batch);
}
all_entries
} else {
Vec::new()
};
if !entries_to_delete.is_empty() {
if let Some(library) = self.context.get_library(self.library_id).await {
let _ = library
.sync_models_batch(
&entries_to_delete,
crate::infra::sync::ChangeType::Delete,
&self.db,
)
.await;
}
}
let txn = self.db.begin().await?;
if !to_delete_ids.is_empty() {
let _ = entities::entry_closure::Entity::delete_many()
.filter(entities::entry_closure::Column::DescendantId.is_in(to_delete_ids.clone()))
.exec(&txn)
.await;
let _ = entities::entry_closure::Entity::delete_many()
.filter(entities::entry_closure::Column::AncestorId.is_in(to_delete_ids.clone()))
.exec(&txn)
.await;
let _ = entities::directory_paths::Entity::delete_many()
.filter(entities::directory_paths::Column::EntryId.is_in(to_delete_ids.clone()))
.exec(&txn)
.await;
let _ = entities::entry::Entity::delete_many()
.filter(entities::entry::Column::Id.is_in(to_delete_ids))
.exec(&txn)
.await;
}
txn.commit().await?;
self.entry_id_cache.remove(&entry.path);
Ok(())
}
async fn run_processors(&self, entry: &EntryRef, _is_new: bool) -> Result<()> {
use crate::ops::indexing::processor::{
load_location_processor_config, ContentHashProcessor, ProcessorEntry,
};
use crate::ops::media::{
ocr::OcrProcessor, proxy::ProxyProcessor, speech::SpeechToTextProcessor,
thumbnail::ThumbnailProcessor, thumbstrip::ThumbstripProcessor,
};
if entry.is_directory() {
return Ok(());
}
let Some(library) = self.context.get_library(self.library_id).await else {
return Ok(());
};
let proc_config = load_location_processor_config(self.location_id, &self.db)
.await
.unwrap_or_default();
let build_proc_entry = |db: &sea_orm::DatabaseConnection,
entry: &EntryRef|
-> std::pin::Pin<
Box<dyn std::future::Future<Output = Result<ProcessorEntry>> + Send + '_>,
> {
let entry = entry.clone();
let db = db.clone();
Box::pin(async move {
let db_entry = entities::entry::Entity::find_by_id(entry.id)
.one(&db)
.await?
.ok_or_else(|| anyhow::anyhow!("Entry not found"))?;
let mime_type = if let Some(content_id) = db_entry.content_id {
if let Ok(Some(ci)) = entities::content_identity::Entity::find_by_id(content_id)
.one(&db)
.await
{
if let Some(mime_id) = ci.mime_type_id {
if let Ok(Some(mime)) = entities::mime_type::Entity::find_by_id(mime_id)
.one(&db)
.await
{
Some(mime.mime_type)
} else {
None
}
} else {
None
}
} else {
None
}
} else {
None
};
Ok(ProcessorEntry {
id: entry.id,
uuid: entry.uuid,
path: entry.path.clone(),
kind: entry.kind,
size: db_entry.size as u64,
content_id: db_entry.content_id,
mime_type,
})
})
};
// Content hash (run first - other processors may need the content_id)
if proc_config
.watcher_processors
.iter()
.any(|c| c.processor_type == "content_hash" && c.enabled)
{
let proc_entry = build_proc_entry(&self.db, entry).await?;
let content_proc = ContentHashProcessor::new(self.library_id);
if let Err(e) = content_proc.process(&self.db, &proc_entry).await {
tracing::warn!("Content hash processing failed: {}", e);
}
}
// Thumbnail
if proc_config
.watcher_processors
.iter()
.any(|c| c.processor_type == "thumbnail" && c.enabled)
{
let proc_entry = build_proc_entry(&self.db, entry).await?;
let thumb_proc = ThumbnailProcessor::new(library.clone());
if thumb_proc.should_process(&proc_entry) {
if let Err(e) = thumb_proc.process(&self.db, &proc_entry).await {
tracing::warn!("Thumbnail processing failed: {}", e);
}
}
}
// Thumbstrip
if proc_config
.watcher_processors
.iter()
.any(|c| c.processor_type == "thumbstrip" && c.enabled)
{
let proc_entry = build_proc_entry(&self.db, entry).await?;
let settings = proc_config
.watcher_processors
.iter()
.find(|c| c.processor_type == "thumbstrip")
.map(|c| &c.settings);
let thumbstrip_proc = if let Some(settings) = settings {
ThumbstripProcessor::new(library.clone())
.with_settings(settings)
.unwrap_or_else(|e| {
tracing::warn!("Failed to parse thumbstrip settings: {}", e);
ThumbstripProcessor::new(library.clone())
})
} else {
ThumbstripProcessor::new(library.clone())
};
if thumbstrip_proc.should_process(&proc_entry) {
if let Err(e) = thumbstrip_proc.process(&self.db, &proc_entry).await {
tracing::warn!("Thumbstrip processing failed: {}", e);
}
}
}
// Proxy
if proc_config
.watcher_processors
.iter()
.any(|c| c.processor_type == "proxy" && c.enabled)
{
let proc_entry = build_proc_entry(&self.db, entry).await?;
let settings = proc_config
.watcher_processors
.iter()
.find(|c| c.processor_type == "proxy")
.map(|c| &c.settings);
let proxy_proc = if let Some(settings) = settings {
ProxyProcessor::new(library.clone())
.with_settings(settings)
.unwrap_or_else(|e| {
tracing::warn!("Failed to parse proxy settings: {}", e);
ProxyProcessor::new(library.clone())
})
} else {
ProxyProcessor::new(library.clone())
};
if proxy_proc.should_process(&proc_entry) {
if let Err(e) = proxy_proc.process(&self.db, &proc_entry).await {
tracing::warn!("Proxy processing failed: {}", e);
}
}
}
// OCR
if proc_config
.watcher_processors
.iter()
.any(|c| c.processor_type == "ocr" && c.enabled)
{
let proc_entry = build_proc_entry(&self.db, entry).await?;
let ocr_proc = OcrProcessor::new(library.clone());
if ocr_proc.should_process(&proc_entry) {
if let Err(e) = ocr_proc.process(&self.db, &proc_entry).await {
tracing::warn!("OCR processing failed: {}", e);
}
}
}
// Speech-to-text
if proc_config
.watcher_processors
.iter()
.any(|c| c.processor_type == "speech_to_text" && c.enabled)
{
let proc_entry = build_proc_entry(&self.db, entry).await?;
let speech_proc = SpeechToTextProcessor::new(library.clone());
if speech_proc.should_process(&proc_entry) {
if let Err(e) = speech_proc.process(&self.db, &proc_entry).await {
tracing::warn!("Speech-to-text processing failed: {}", e);
}
}
}
Ok(())
}
async fn emit_change_event(&self, entry: &EntryRef, change_type: ChangeType) -> Result<()> {
use crate::domain::ResourceManager;
if let Some(uuid) = entry.uuid {
let resource_manager =
ResourceManager::new(Arc::new(self.db.clone()), self.context.events.clone());
if let Err(e) = resource_manager
.emit_resource_events("entry", vec![uuid])
.await
{
tracing::warn!(
"Failed to emit resource event for {:?} entry: {}",
change_type,
e
);
}
}
Ok(())
}
async fn handle_new_directory(&self, path: &Path) -> Result<()> {
use crate::domain::addressing::SdPath;
use crate::ops::indexing::job::{IndexMode, IndexerJob};
let Some(library) = self.context.get_library(self.library_id).await else {
return Ok(());
};
let index_mode = if let Ok(Some(loc)) = entities::location::Entity::find()
.filter(entities::location::Column::Uuid.eq(self.location_id))
.one(&self.db)
.await
{
match loc.index_mode.as_str() {
"shallow" => IndexMode::Shallow,
"content" => IndexMode::Content,
"deep" => IndexMode::Deep,
_ => IndexMode::Content,
}
} else {
IndexMode::Content
};
let indexer_job =
IndexerJob::from_location(self.location_id, SdPath::local(path), index_mode);
if let Err(e) = library.jobs().dispatch(indexer_job).await {
tracing::warn!(
"Failed to spawn indexer job for directory {}: {}",
path.display(),
e
);
} else {
tracing::debug!(
"Spawned recursive indexer job for directory: {}",
path.display()
);
}
Ok(())
}
}
// ============================================================================
// IndexPersistence Implementation (Job Pipeline)
// ============================================================================
/// Adapter for using PersistentWriter in the job pipeline.
///
/// The job system expects an `IndexPersistence` trait, but works with `JobContext`
/// instead of `CoreContext`. This adapter wraps `PersistentWriter` and delegates
/// storage operations to `DBWriter`, ensuring both pipelines use identical logic.
pub struct DatabaseAdapterForJob<'a> {
ctx: &'a JobContext<'a>,
library_id: Uuid,
location_root_entry_id: Option<i32>,
}
impl<'a> DatabaseAdapterForJob<'a> {
pub fn new(
ctx: &'a JobContext<'a>,
library_id: Uuid,
location_root_entry_id: Option<i32>,
) -> Self {
Self {
ctx,
library_id,
location_root_entry_id,
}
}
}
#[async_trait::async_trait]
impl<'a> IndexPersistence for DatabaseAdapterForJob<'a> {
async fn store_entry(
&self,
entry: &DirEntry,
_location_id: Option<i32>,
location_root_path: &Path,
) -> JobResult<i32> {
use crate::domain::addressing::SdPath;
use crate::ops::indexing::database_storage::DatabaseStorage;
use crate::ops::indexing::state::IndexerState;
let mut state = IndexerState::new(&SdPath::local(&entry.path));
// Cache Management: Resolve parent ID if needed (for job pipeline)
// The job processes entries in hierarchy order, but we still need to ensure
// the parent ID is cached before creating this entry
if let Some(parent_path) = entry.path.parent() {
if !state.entry_id_cache.contains_key(parent_path) {
if let Ok(Some(parent_id)) =
DatabaseStorage::resolve_parent_id(self.ctx.library_db(), parent_path).await
{
state
.entry_id_cache
.insert(parent_path.to_path_buf(), parent_id);
}
}
}
let entry_id = DatabaseStorage::create_entry(
&mut state,
self.ctx.library_db(),
Some(self.ctx.library()),
entry,
0,
location_root_path,
)
.await?;
Ok(entry_id)
}
async fn store_content_identity(
&self,
entry_id: i32,
path: &Path,
cas_id: String,
) -> JobResult<()> {
use crate::ops::indexing::database_storage::DatabaseStorage;
DatabaseStorage::link_to_content_identity(
self.ctx.library_db(),
entry_id,
path,
cas_id,
self.library_id,
)
.await
.map(|_| ())
}
async fn get_existing_entries(
&self,
indexing_path: &Path,
) -> JobResult<
HashMap<std::path::PathBuf, (i32, Option<u64>, Option<std::time::SystemTime>, u64)>,
> {
use crate::infra::db::entities::{directory_paths, entry_closure};
use sea_orm::{ColumnTrait, EntityTrait, QueryFilter};
let location_root_entry_id = match self.location_root_entry_id {
Some(id) => id,
None => return Ok(HashMap::new()),
};
let indexing_path_str = indexing_path.to_string_lossy().to_string();
let indexing_path_entry_id = if let Ok(Some(dir_record)) = directory_paths::Entity::find()
.filter(directory_paths::Column::Path.eq(&indexing_path_str))
.one(self.ctx.library_db())
.await
{
dir_record.entry_id
} else {
location_root_entry_id
};
let descendant_ids = entry_closure::Entity::find()
.filter(entry_closure::Column::AncestorId.eq(indexing_path_entry_id))
.all(self.ctx.library_db())
.await
.map_err(|e| JobError::execution(format!("Failed to query closure table: {}", e)))?
.into_iter()
.map(|ec| ec.descendant_id)
.collect::<Vec<i32>>();
let mut all_entry_ids = vec![indexing_path_entry_id];
all_entry_ids.extend(descendant_ids);
let mut existing_entries: Vec<entities::entry::Model> = Vec::new();
let chunk_size: usize = 900;
for chunk in all_entry_ids.chunks(chunk_size) {
let mut batch = entities::entry::Entity::find()
.filter(entities::entry::Column::Id.is_in(chunk.to_vec()))
.all(self.ctx.library_db())
.await
.map_err(|e| {
JobError::execution(format!("Failed to query existing entries: {}", e))
})?;
existing_entries.append(&mut batch);
}
let mut result = HashMap::new();
self.ctx.log(format!(
"Loading {} existing entries",
existing_entries.len()
));
for entry in existing_entries {
let full_path =
crate::ops::indexing::PathResolver::get_full_path(self.ctx.library_db(), entry.id)
.await
.unwrap_or_else(|_| PathBuf::from(&entry.name));
let modified_time =
entry
.modified_at
.timestamp()
.try_into()
.ok()
.and_then(|secs: u64| {
std::time::UNIX_EPOCH.checked_add(std::time::Duration::from_secs(secs))
});
result.insert(
full_path,
(
entry.id,
entry.inode.map(|i| i as u64),
modified_time,
entry.size as u64,
),
);
}
Ok(result)
}
async fn update_entry(&self, entry_id: i32, entry: &DirEntry) -> JobResult<()> {
use crate::ops::indexing::database_storage::DatabaseStorage;
DatabaseStorage::update_entry(self.ctx.library_db(), entry_id, entry).await
}
fn is_persistent(&self) -> bool {
true
}
}

View File

@@ -0,0 +1,135 @@
//! Shared types for change detection and handling.
//!
//! This module defines the common vocabulary used by both:
//! - The detector (batch scanning during indexer jobs)
//! - The handler (real-time response to watcher events)
use crate::ops::indexing::state::EntryKind;
use std::path::PathBuf;
use std::time::SystemTime;
use uuid::Uuid;
/// A detected or reported filesystem change.
///
/// This enum represents changes that can come from either:
/// - The `ChangeDetector` during batch indexing scans
/// - The file watcher via `FsRawEventKind` conversion
#[derive(Debug, Clone)]
pub enum Change {
/// New file/directory (not in storage).
New(PathBuf),
/// File/directory modified (content or metadata changed).
Modified {
path: PathBuf,
entry_id: i32,
old_modified: Option<SystemTime>,
new_modified: Option<SystemTime>,
},
/// File/directory moved or renamed (same inode, different path).
Moved {
old_path: PathBuf,
new_path: PathBuf,
entry_id: i32,
inode: u64,
},
/// File/directory deleted (existed in storage but not on disk).
Deleted { path: PathBuf, entry_id: i32 },
}
impl Change {
/// Get the primary path affected by this change.
pub fn path(&self) -> &PathBuf {
match self {
Change::New(path) => path,
Change::Modified { path, .. } => path,
Change::Moved { new_path, .. } => new_path,
Change::Deleted { path, .. } => path,
}
}
/// Get the change type for event emission.
pub fn change_type(&self) -> ChangeType {
match self {
Change::New(_) => ChangeType::Created,
Change::Modified { .. } => ChangeType::Modified,
Change::Moved { .. } => ChangeType::Moved,
Change::Deleted { .. } => ChangeType::Deleted,
}
}
/// Create a Change from an FsRawEventKind (for watcher integration).
/// Note: These variants don't have entry_ids since they come from the watcher.
pub fn from_fs_event(event: crate::infra::event::FsRawEventKind) -> Self {
use crate::infra::event::FsRawEventKind;
match event {
FsRawEventKind::Create { path } => Change::New(path),
FsRawEventKind::Modify { path } => Change::Modified {
path,
entry_id: 0, // Placeholder - handler will look up real ID
old_modified: None,
new_modified: None,
},
FsRawEventKind::Remove { path } => Change::Deleted {
path,
entry_id: 0, // Placeholder - handler will look up real ID
},
FsRawEventKind::Rename { from, to } => Change::Moved {
old_path: from,
new_path: to,
entry_id: 0, // Placeholder - handler will look up real ID
inode: 0,
},
}
}
}
/// Metadata about a change, populated during detection.
#[derive(Debug, Clone)]
pub struct ChangeMetadata {
pub size: u64,
pub modified: Option<SystemTime>,
pub inode: Option<u64>,
pub kind: EntryKind,
}
/// Type of change for event emission and logging.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ChangeType {
Created,
Modified,
Moved,
Deleted,
}
/// Reference to an entry in either persistent or ephemeral storage.
///
/// Provides a uniform way to refer to entries regardless of storage backend.
/// Persistent entries have database IDs; ephemeral entries have synthetic IDs.
#[derive(Debug, Clone)]
pub struct EntryRef {
/// For persistent: database entry ID. For ephemeral: synthetic ID.
pub id: i32,
/// UUID for sync and event emission.
pub uuid: Option<Uuid>,
/// Full filesystem path.
pub path: PathBuf,
/// Entry kind (file/directory/symlink).
pub kind: EntryKind,
}
impl EntryRef {
pub fn is_directory(&self) -> bool {
self.kind == EntryKind::Directory
}
}
/// Configuration for change handling operations.
pub struct ChangeConfig<'a> {
pub rule_toggles: crate::ops::indexing::rules::RuleToggles,
pub location_root: &'a std::path::Path,
pub volume_backend: Option<&'a std::sync::Arc<dyn crate::volume::VolumeBackend>>,
}

View File

@@ -1,63 +0,0 @@
//! Lightweight context abstraction for indexing operations
//!
//! Provides a minimal interface required by indexing code paths so they can run
//! either inside the job system (with `JobContext`) or outside of it (watcher
//! responder) without duplicating logic.
use sea_orm::DatabaseConnection;
use std::sync::Arc;
use uuid::Uuid;
use crate::{context::CoreContext, infra::job::prelude::JobContext, library::Library};
/// Minimal capabilities needed by indexing operations
pub trait IndexingCtx {
/// Access to the library database connection
fn library_db(&self) -> &DatabaseConnection;
/// Access to the library for sync operations (optional - only available in job context)
fn library(&self) -> Option<&Library> {
None
}
/// Lightweight logging hook
fn log(&self, message: impl AsRef<str>) {
tracing::debug!(message = %message.as_ref());
}
}
impl<'a> IndexingCtx for JobContext<'a> {
fn library_db(&self) -> &DatabaseConnection {
self.library_db()
}
fn library(&self) -> Option<&Library> {
Some(self.library())
}
}
/// Context for responder paths running outside the job system
pub struct ResponderCtx {
/// Cloned DB connection for the target library
db: DatabaseConnection,
}
impl ResponderCtx {
/// Build a responder context for a specific library
pub async fn new(context: &Arc<CoreContext>, library_id: Uuid) -> anyhow::Result<Self> {
let library: Arc<Library> = context
.get_library(library_id)
.await
.ok_or_else(|| anyhow::anyhow!("Library not found: {}", library_id))?;
Ok(Self {
db: library.db().conn().clone(),
})
}
}
impl IndexingCtx for ResponderCtx {
fn library_db(&self) -> &DatabaseConnection {
&self.db
}
}

View File

File diff suppressed because it is too large Load Diff

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,326 @@
//! # Memory-Mapped Arena for Ephemeral File Nodes
//!
//! `NodeArena` stores file nodes in memory-mapped temporary files, allowing the OS
//! to page data in and out as needed. This prevents out-of-memory errors when browsing
//! large network shares or external drives with millions of files.
//!
//! Entries are stored contiguously at stable u32 indices (EntryIds), providing O(1)
//! lookup while keeping memory usage bounded. When RAM is tight, the OS pages cold
//! entries to disk automatically. The backing file is anonymous and cleaned up on drop,
//! so no manual file management is needed.
//!
//! The arena doubles capacity (1024 → 2048 → 4096 → ...) when full, minimizing
//! expensive remap operations while staying within Vec-like amortized O(1) insertion.
use super::types::{EntryId, FileNode};
use memmap2::{MmapMut, MmapOptions};
use std::{
io,
mem::{self, MaybeUninit},
num::NonZeroUsize,
slice,
};
use tempfile::NamedTempFile;
const CAPACITY: usize = 1024;
/// Slab allocator backed by an anonymous memory-mapped temporary file.
///
/// The OS manages paging, allowing large indexes to spill to disk under memory
/// pressure without crashing. EntryIds remain stable across capacity growth,
/// enabling parent-child relationships to persist through remaps.
pub struct NodeArena {
file: NamedTempFile,
mmap: MmapMut,
capacity: NonZeroUsize,
len: usize,
}
impl NodeArena {
pub fn new() -> io::Result<Self> {
Self::with_capacity(CAPACITY)
}
pub fn with_capacity(capacity: usize) -> io::Result<Self> {
let capacity = NonZeroUsize::new(capacity.max(1)).unwrap();
let mut file = NamedTempFile::new()?;
let mmap = Self::map_file(&mut file, capacity)?;
Ok(Self {
file,
mmap,
capacity,
len: 0,
})
}
fn map_file(file: &mut NamedTempFile, slots: NonZeroUsize) -> io::Result<MmapMut> {
let bytes = (slots.get() as u64).saturating_mul(mem::size_of::<FileNode>() as u64);
file.as_file_mut().set_len(bytes)?;
unsafe { MmapOptions::new().map_mut(file.as_file()) }
}
/// Doubles capacity until min_capacity is reached.
fn ensure_capacity(&mut self, min_capacity: NonZeroUsize) -> io::Result<()> {
if min_capacity <= self.capacity {
return Ok(());
}
let mut new_capacity = self.capacity;
while new_capacity < min_capacity {
new_capacity = new_capacity.saturating_mul(NonZeroUsize::new(2).unwrap());
}
self.remap(new_capacity)
}
/// Flushes dirty pages, expands the file, and remaps with new capacity.
fn remap(&mut self, new_capacity: NonZeroUsize) -> io::Result<()> {
assert!(new_capacity.get() >= self.len);
self.mmap.flush()?;
self.mmap = Self::map_file(&mut self.file, new_capacity)?;
self.capacity = new_capacity;
Ok(())
}
fn grow(&mut self) -> io::Result<()> {
let desired = self.capacity.saturating_mul(NonZeroUsize::new(2).unwrap());
self.ensure_capacity(desired)
}
fn entries(&self) -> &[MaybeUninit<FileNode>] {
unsafe {
slice::from_raw_parts(
self.mmap.as_ptr().cast::<MaybeUninit<FileNode>>(),
self.capacity.get(),
)
}
}
fn entries_mut(&mut self) -> &mut [MaybeUninit<FileNode>] {
unsafe {
slice::from_raw_parts_mut(
self.mmap.as_mut_ptr().cast::<MaybeUninit<FileNode>>(),
self.capacity.get(),
)
}
}
/// Appends a node and returns its stable ID.
///
/// The arena grows automatically when full, remapping to a larger capacity.
/// EntryIds remain valid across remaps since they're just indices.
pub fn insert(&mut self, node: FileNode) -> io::Result<EntryId> {
if self.len == self.capacity.get() {
self.grow()?;
}
let index = self.len;
let id = EntryId::from_usize(index);
unsafe {
self.entries_mut().get_unchecked_mut(index).write(node);
}
self.len += 1;
Ok(id)
}
pub fn get(&self, id: EntryId) -> Option<&FileNode> {
if id.as_usize() < self.len {
Some(unsafe {
self.entries()
.get_unchecked(id.as_usize())
.assume_init_ref()
})
} else {
None
}
}
pub fn get_mut(&mut self, id: EntryId) -> Option<&mut FileNode> {
if id.as_usize() < self.len {
Some(unsafe {
self.entries_mut()
.get_unchecked_mut(id.as_usize())
.assume_init_mut()
})
} else {
None
}
}
pub fn len(&self) -> usize {
self.len
}
pub fn is_empty(&self) -> bool {
self.len == 0
}
/// No-op for memory-mapped arenas; the OS manages paging.
pub fn shrink_to_fit(&mut self) {}
pub fn capacity(&self) -> usize {
self.capacity.get()
}
pub fn reserve(&mut self, additional: usize) -> io::Result<()> {
let new_capacity = self.len.saturating_add(additional);
if let Some(min_cap) = NonZeroUsize::new(new_capacity) {
self.ensure_capacity(min_cap)?;
}
Ok(())
}
pub fn iter(&self) -> impl Iterator<Item = (EntryId, &FileNode)> {
(0..self.len).map(move |i| {
let id = EntryId::from_usize(i);
let node = unsafe { self.entries().get_unchecked(i).assume_init_ref() };
(id, node)
})
}
pub fn iter_mut(&mut self) -> ArenaIterMut<'_> {
let len = self.len;
ArenaIterMut {
entries: self.entries_mut(),
len,
index: 0,
}
}
/// Reports total allocation including mmap overhead and child vectors.
pub fn memory_usage(&self) -> usize {
mem::size_of::<Self>()
+ (self.capacity.get() * mem::size_of::<FileNode>())
+ (0..self.len)
.filter_map(|i| self.get(EntryId::from_usize(i)))
.map(|n| n.children.capacity() * mem::size_of::<EntryId>())
.sum::<usize>()
}
}
pub struct ArenaIterMut<'a> {
entries: &'a mut [MaybeUninit<FileNode>],
len: usize,
index: usize,
}
impl<'a> Iterator for ArenaIterMut<'a> {
type Item = (EntryId, &'a mut FileNode);
fn next(&mut self) -> Option<Self::Item> {
if self.index >= self.len {
return None;
}
let id = EntryId::from_usize(self.index);
let node = unsafe {
let ptr = self.entries.as_mut_ptr().add(self.index);
&mut *(*ptr).as_mut_ptr()
};
self.index += 1;
Some((id, node))
}
}
impl Default for NodeArena {
fn default() -> Self {
Self::new().expect("Failed to create default NodeArena")
}
}
impl Drop for NodeArena {
fn drop(&mut self) {
for i in 0..self.len {
unsafe {
self.entries_mut().get_unchecked_mut(i).assume_init_drop();
}
}
let _ = self.mmap.flush();
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ops::indexing::ephemeral::types::{
FileType, MaybeEntryId, NameRef, NodeState, PackedMetadata,
};
fn make_test_node(name: &'static str) -> FileNode {
let meta = PackedMetadata::new(NodeState::Accessible, FileType::File, 100);
FileNode::new(NameRef::new(name, MaybeEntryId::NONE), meta)
}
#[test]
fn test_insert_and_get() {
let mut arena = NodeArena::new().expect("failed to create arena");
let id1 = arena
.insert(make_test_node("file1.txt"))
.expect("insert failed");
let id2 = arena
.insert(make_test_node("file2.txt"))
.expect("insert failed");
assert_eq!(arena.len(), 2);
assert_eq!(arena.get(id1).unwrap().name(), "file1.txt");
assert_eq!(arena.get(id2).unwrap().name(), "file2.txt");
}
#[test]
fn test_get_nonexistent() {
let arena = NodeArena::new().expect("failed to create arena");
assert!(arena.get(EntryId::from_usize(0)).is_none());
}
#[test]
fn test_iteration() {
let mut arena = NodeArena::new().expect("failed to create arena");
arena.insert(make_test_node("a")).expect("insert failed");
arena.insert(make_test_node("b")).expect("insert failed");
arena.insert(make_test_node("c")).expect("insert failed");
let names: Vec<&str> = arena.iter().map(|(_, node)| node.name()).collect();
assert_eq!(names, vec!["a", "b", "c"]);
}
#[test]
fn test_with_capacity() {
let arena = NodeArena::with_capacity(1000).expect("failed to create arena");
assert!(arena.capacity() >= 1000);
assert!(arena.is_empty());
}
#[test]
fn test_shrink_to_fit() {
let mut arena = NodeArena::with_capacity(1000).expect("failed to create arena");
arena.insert(make_test_node("a")).expect("insert failed");
arena.shrink_to_fit();
assert!(arena.capacity() >= 1000);
}
#[test]
fn test_large_arena_growth() {
let mut arena = NodeArena::new().expect("failed to create arena");
for i in 0..10_000 {
let node = make_test_node(&format!("file{}.txt", i));
arena.insert(node).expect("insert should succeed");
}
assert_eq!(arena.len(), 10_000);
assert!(arena.capacity() >= 10_000);
for i in 0..10_000 {
let id = EntryId::from_usize(i);
let node = arena.get(id).expect("node should exist");
assert_eq!(node.name(), format!("file{}.txt", i));
}
}
}

View File

@@ -0,0 +1,446 @@
//! # Ephemeral Index Cache
//!
//! Thread-safe wrapper around a single global `EphemeralIndex`. All browsed
//! directories share one arena and string pool, keeping memory at ~50 bytes per
//! entry regardless of how many paths the user navigates. The cache tracks which
//! paths are indexed (queryable), in-progress (being scanned), or watched
//! (receiving live filesystem updates via `MemoryAdapter`).
use super::EphemeralIndex;
use parking_lot::RwLock;
use std::{
collections::HashSet,
path::{Path, PathBuf},
sync::Arc,
time::Instant,
};
use tokio::sync::RwLock as TokioRwLock;
/// Global cache with a single unified ephemeral index
///
/// Instead of separate indexes per path, all entries live in one shared index.
/// This maximizes memory efficiency through shared string interning and arena.
pub struct EphemeralIndexCache {
/// Single global index containing all browsed entries
index: Arc<TokioRwLock<EphemeralIndex>>,
/// Paths whose immediate children have been indexed (ready for queries)
indexed_paths: RwLock<HashSet<PathBuf>>,
/// Paths currently being indexed
indexing_in_progress: RwLock<HashSet<PathBuf>>,
/// Paths registered for filesystem watching (subset of indexed_paths)
watched_paths: RwLock<HashSet<PathBuf>>,
/// When the cache was created
created_at: Instant,
}
impl EphemeralIndexCache {
/// Create a new cache with an empty global index
pub fn new() -> std::io::Result<Self> {
Ok(Self {
index: Arc::new(TokioRwLock::new(EphemeralIndex::new()?)),
indexed_paths: RwLock::new(HashSet::new()),
indexing_in_progress: RwLock::new(HashSet::new()),
watched_paths: RwLock::new(HashSet::new()),
created_at: Instant::now(),
})
}
/// Get the global index if the given path has been indexed
///
/// Returns Some(index) if this path's contents are available,
/// None if the path hasn't been browsed yet.
pub fn get_for_path(&self, path: &Path) -> Option<Arc<TokioRwLock<EphemeralIndex>>> {
let indexed = self.indexed_paths.read();
if indexed.contains(path) {
Some(self.index.clone())
} else {
None
}
}
/// Get the global index unconditionally (for internal use)
pub fn get_global_index(&self) -> Arc<TokioRwLock<EphemeralIndex>> {
self.index.clone()
}
/// Check if a path has been fully indexed
pub fn is_indexed(&self, path: &Path) -> bool {
self.indexed_paths.read().contains(path)
}
/// Check if indexing is in progress for a path
pub fn is_indexing(&self, path: &Path) -> bool {
self.indexing_in_progress.read().contains(path)
}
/// Prepare the global index for indexing a new path
///
/// Marks the path as indexing-in-progress and returns the global index.
/// The indexer job should add entries to this shared index.
///
/// If the path was previously indexed, clears its children first to
/// prevent ghost entries from deleted files.
pub fn create_for_indexing(&self, path: PathBuf) -> Arc<TokioRwLock<EphemeralIndex>> {
let mut in_progress = self.indexing_in_progress.write();
let mut indexed = self.indexed_paths.write();
// If this path was previously indexed, remove it from indexed set
// The actual clearing of stale entries happens asynchronously via clear_for_reindex
indexed.remove(&path);
in_progress.insert(path);
self.index.clone()
}
/// Clear stale entries for a path before re-indexing (async version)
///
/// Removes files and unbrowsed subdirectories, preserving subdirectories
/// that were explicitly navigated to. Verifies preserved directories still
/// exist on the filesystem and removes deleted ones from tracking.
pub async fn clear_for_reindex(&self, path: &Path) -> usize {
let indexed = self.indexed_paths.read().clone();
let mut index = self.index.write().await;
let (cleared, deleted_browsed_dirs) = index.clear_directory_children(path, &indexed);
if !deleted_browsed_dirs.is_empty() {
let mut indexed_paths = self.indexed_paths.write();
for deleted_path in deleted_browsed_dirs {
indexed_paths.remove(&deleted_path);
}
}
cleared
}
/// Mark indexing as complete for a path
///
/// Moves the path from "in progress" to "indexed" state.
pub fn mark_indexing_complete(&self, path: &Path) {
let mut in_progress = self.indexing_in_progress.write();
let mut indexed = self.indexed_paths.write();
in_progress.remove(path);
indexed.insert(path.to_path_buf());
}
/// Remove a path from the indexed set (e.g., on invalidation)
///
/// Note: This doesn't remove entries from the index itself,
/// just marks the path as needing re-indexing.
pub fn invalidate_path(&self, path: &Path) {
let mut indexed = self.indexed_paths.write();
indexed.remove(path);
}
/// Get the number of indexed paths
pub fn len(&self) -> usize {
self.indexed_paths.read().len()
}
/// Check if no paths have been indexed
pub fn is_empty(&self) -> bool {
self.indexed_paths.read().is_empty()
}
/// Get all indexed paths
pub fn indexed_paths(&self) -> Vec<PathBuf> {
self.indexed_paths.read().iter().cloned().collect()
}
/// Get all paths currently being indexed
pub fn paths_in_progress(&self) -> Vec<PathBuf> {
self.indexing_in_progress.read().iter().cloned().collect()
}
/// Register a path for filesystem watching.
///
/// When registered, the watcher service will monitor this path for changes
/// and update the ephemeral index via `MemoryAdapter`. The path
/// must already be indexed.
pub fn register_for_watching(&self, path: PathBuf) -> bool {
let indexed = self.indexed_paths.read();
if !indexed.contains(&path) {
return false;
}
drop(indexed);
let mut watched = self.watched_paths.write();
watched.insert(path);
true
}
/// Unregister a path from filesystem watching.
pub fn unregister_from_watching(&self, path: &Path) {
let mut watched = self.watched_paths.write();
watched.remove(path);
}
/// Check if a path is registered for watching.
pub fn is_watched(&self, path: &Path) -> bool {
self.watched_paths.read().contains(path)
}
/// Get all watched paths.
pub fn watched_paths(&self) -> Vec<PathBuf> {
self.watched_paths.read().iter().cloned().collect()
}
/// Find the watched root path that contains the given path.
///
/// If the given path is under a watched directory, returns that directory.
/// Used by the watcher to route events to the ephemeral handler.
pub fn find_watched_root(&self, path: &Path) -> Option<PathBuf> {
let watched = self.watched_paths.read();
let mut best_match: Option<&PathBuf> = None;
let mut best_len = 0;
for watched_path in watched.iter() {
if path.starts_with(watched_path) {
let len = watched_path.as_os_str().len();
if len > best_len {
best_len = len;
best_match = Some(watched_path);
}
}
}
best_match.cloned()
}
/// Check if any path in an event batch is under an ephemeral watched path.
///
/// Returns the watched root if found.
pub fn find_watched_root_for_any<'a, I>(&self, paths: I) -> Option<PathBuf>
where
I: IntoIterator<Item = &'a Path>,
{
for path in paths {
if let Some(root) = self.find_watched_root(path) {
return Some(root);
}
}
None
}
/// Get cache statistics
pub fn stats(&self) -> EphemeralIndexCacheStats {
let indexed = self.indexed_paths.read();
let in_progress = self.indexing_in_progress.read();
let watched = self.watched_paths.read();
EphemeralIndexCacheStats {
indexed_paths: indexed.len(),
indexing_in_progress: in_progress.len(),
watched_paths: watched.len(),
}
}
/// Get how long the cache has existed
pub fn age(&self) -> std::time::Duration {
self.created_at.elapsed()
}
/// Legacy: Get age for a specific path (returns cache age since all share one index)
pub fn get_age(&self, _path: &Path) -> Option<f64> {
Some(self.created_at.elapsed().as_secs_f64())
}
// Legacy compatibility methods
/// Legacy: Get an index by exact path (for backward compatibility)
#[deprecated(note = "Use get_for_path instead")]
pub fn get(&self, path: &Path) -> Option<Arc<TokioRwLock<EphemeralIndex>>> {
self.get_for_path(path)
}
/// Legacy: Get all cached paths (returns indexed paths)
#[deprecated(note = "Use indexed_paths instead")]
pub fn cached_paths(&self) -> Vec<PathBuf> {
self.indexed_paths()
}
/// Legacy: Insert (no-op, entries are added directly to global index)
#[deprecated(note = "Entries should be added directly to the global index")]
pub fn insert(&self, path: PathBuf, _index: Arc<TokioRwLock<EphemeralIndex>>) {
let mut indexed = self.indexed_paths.write();
indexed.insert(path);
}
/// Legacy: Remove (just invalidates the path)
#[deprecated(note = "Use invalidate_path instead")]
pub fn remove(&self, path: &Path) {
self.invalidate_path(path);
}
}
impl Default for EphemeralIndexCache {
fn default() -> Self {
Self::new().expect("Failed to create default EphemeralIndexCache")
}
}
/// Statistics about the ephemeral index cache
#[derive(Debug, Clone)]
pub struct EphemeralIndexCacheStats {
/// Number of paths that have been indexed
pub indexed_paths: usize,
/// Number of paths currently being indexed
pub indexing_in_progress: usize,
/// Number of paths registered for filesystem watching
pub watched_paths: usize,
}
impl EphemeralIndexCacheStats {
/// Legacy: total_entries now means indexed_paths
pub fn total_entries(&self) -> usize {
self.indexed_paths
}
/// Legacy: indexing_count now means indexing_in_progress
pub fn indexing_count(&self) -> usize {
self.indexing_in_progress
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_single_global_index() {
let cache = EphemeralIndexCache::new().expect("failed to create cache");
// Initially no paths are indexed
assert!(cache.is_empty());
assert!(cache.get_for_path(Path::new("/test")).is_none());
}
#[test]
fn test_indexing_workflow() {
let cache = EphemeralIndexCache::new().expect("failed to create cache");
let path = PathBuf::from("/test/path");
// Start indexing
let _index = cache.create_for_indexing(path.clone());
assert!(cache.is_indexing(&path));
assert!(!cache.is_indexed(&path));
// Complete indexing
cache.mark_indexing_complete(&path);
assert!(!cache.is_indexing(&path));
assert!(cache.is_indexed(&path));
// Now get_for_path returns the index
assert!(cache.get_for_path(&path).is_some());
}
#[test]
fn test_shared_index_across_paths() {
let cache = EphemeralIndexCache::new().expect("failed to create cache");
let path1 = PathBuf::from("/test/path1");
let path2 = PathBuf::from("/test/path2");
// Start indexing both paths
let index1 = cache.create_for_indexing(path1.clone());
let index2 = cache.create_for_indexing(path2.clone());
// They should be the same index
assert!(Arc::ptr_eq(&index1, &index2));
// Complete both
cache.mark_indexing_complete(&path1);
cache.mark_indexing_complete(&path2);
// Both paths now indexed
assert!(cache.is_indexed(&path1));
assert!(cache.is_indexed(&path2));
assert_eq!(cache.len(), 2);
}
#[test]
fn test_invalidate_path() {
let cache = EphemeralIndexCache::new().expect("failed to create cache");
let path = PathBuf::from("/test/path");
// Index the path
let _index = cache.create_for_indexing(path.clone());
cache.mark_indexing_complete(&path);
assert!(cache.is_indexed(&path));
// Invalidate it
cache.invalidate_path(&path);
assert!(!cache.is_indexed(&path));
// get_for_path now returns None
assert!(cache.get_for_path(&path).is_none());
}
#[test]
fn test_stats() {
let cache = EphemeralIndexCache::new().expect("failed to create cache");
let path1 = PathBuf::from("/ready");
let path2 = PathBuf::from("/in_progress");
// One indexed, one in progress
let _index = cache.create_for_indexing(path1.clone());
cache.mark_indexing_complete(&path1);
let _index = cache.create_for_indexing(path2.clone());
let stats = cache.stats();
assert_eq!(stats.indexed_paths, 1);
assert_eq!(stats.indexing_in_progress, 1);
}
#[test]
fn test_watch_registration() {
let cache = EphemeralIndexCache::new().expect("failed to create cache");
let path = PathBuf::from("/test/watched");
// Can't watch a path that's not indexed
assert!(!cache.register_for_watching(path.clone()));
assert!(!cache.is_watched(&path));
// Index the path first
let _index = cache.create_for_indexing(path.clone());
cache.mark_indexing_complete(&path);
// Now we can register for watching
assert!(cache.register_for_watching(path.clone()));
assert!(cache.is_watched(&path));
// Stats should reflect watched path
let stats = cache.stats();
assert_eq!(stats.watched_paths, 1);
// Unregister
cache.unregister_from_watching(&path);
assert!(!cache.is_watched(&path));
}
#[test]
fn test_find_watched_root() {
let cache = EphemeralIndexCache::new().expect("failed to create cache");
let root = PathBuf::from("/mnt/nas");
let child = PathBuf::from("/mnt/nas/documents/report.pdf");
// Index and watch the root
let _index = cache.create_for_indexing(root.clone());
cache.mark_indexing_complete(&root);
cache.register_for_watching(root.clone());
// Child path should find the watched root
assert_eq!(cache.find_watched_root(&child), Some(root.clone()));
// Unrelated path should not find a root
assert_eq!(cache.find_watched_root(Path::new("/other/path")), None);
}
}

View File

@@ -0,0 +1,527 @@
//! Memory-efficient index for browsing paths outside managed locations.
//!
//! Ephemeral indexing lets users navigate unmanaged directories (network shares,
//! external drives) without adding them as permanent locations. Instead of writing
//! to the database, entries live in this memory-only structure until the session
//! ends or the path is promoted to a managed location.
//!
//! Memory usage is ~50 bytes per entry vs ~200 bytes with a naive `HashMap<PathBuf, Entry>`
//! approach. The optimization comes from:
//! - **NodeArena:** Contiguous slab allocation with pointer-sized entry IDs
//! - **NameCache:** String interning (one copy of "index.js" for thousands of node_modules files)
//! - **NameRegistry:** Trie-based prefix search without full-text indexing overhead
//!
//! Multiple directory trees can coexist in the same index (e.g., browsing both
//! `/mnt/nas` and `/media/usb` simultaneously), sharing the string interning pool
//! for maximum deduplication.
use crate::domain::ContentKind;
use crate::filetype::FileTypeRegistry;
use crate::ops::indexing::database_storage::EntryMetadata;
use crate::ops::indexing::state::{EntryKind, IndexerStats};
use super::types::{FileNode, FileType, MaybeEntryId, NameRef, NodeState, PackedMetadata};
use super::{EntryId, NameCache, NameRegistry, NodeArena};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::{Duration, Instant};
use uuid::Uuid;
/// Memory-efficient index for browsing unmanaged paths.
pub struct EphemeralIndex {
arena: NodeArena,
cache: Arc<NameCache>,
registry: NameRegistry,
path_index: HashMap<PathBuf, EntryId>,
entry_uuids: HashMap<PathBuf, Uuid>,
content_kinds: HashMap<PathBuf, ContentKind>,
created_at: Instant,
last_accessed: Instant,
pub stats: IndexerStats,
}
impl std::fmt::Debug for EphemeralIndex {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("EphemeralIndex")
.field("entry_count", &self.arena.len())
.field("interned_names", &self.cache.len())
.field("path_count", &self.path_index.len())
.finish()
}
}
impl EphemeralIndex {
pub fn new() -> std::io::Result<Self> {
let cache = Arc::new(NameCache::new());
let arena = NodeArena::new()?;
let registry = NameRegistry::new();
let now = Instant::now();
Ok(Self {
arena,
cache,
registry,
path_index: HashMap::new(),
entry_uuids: HashMap::new(),
content_kinds: HashMap::new(),
created_at: now,
last_accessed: now,
stats: IndexerStats::default(),
})
}
/// Ensures a directory exists, creating all missing ancestors recursively.
///
/// This method guarantees that `list_directory()` works immediately after
/// `add_entry()` without a separate tree-building pass. Parent directories
/// are created from root to leaf, so the full ancestor chain exists before
/// any child is added.
pub fn ensure_directory(&mut self, path: &Path) -> std::io::Result<EntryId> {
if let Some(&id) = self.path_index.get(path) {
return Ok(id);
}
let parent_id = if let Some(parent_path) = path.parent() {
if parent_path.as_os_str().is_empty() {
None
} else {
Some(self.ensure_directory(parent_path)?)
}
} else {
None
};
let name = self.cache.intern(
path.file_name()
.map(|s| s.to_string_lossy())
.as_deref()
.unwrap_or("/"),
);
let parent_ref = parent_id
.map(MaybeEntryId::some)
.unwrap_or(MaybeEntryId::NONE);
let meta = PackedMetadata::new(NodeState::Accessible, FileType::Directory, 0);
let node = FileNode::new(NameRef::new(name, parent_ref), meta);
let id = self.arena.insert(node)?;
// Add to parent's children
if let Some(parent_id) = parent_id {
if let Some(parent) = self.arena.get_mut(parent_id) {
parent.add_child(id);
}
}
self.path_index.insert(path.to_path_buf(), id);
self.registry.insert(name, id);
let uuid = Uuid::new_v4();
self.entry_uuids.insert(path.to_path_buf(), uuid);
Ok(id)
}
/// Adds an entry to the index, returning its content kind if successful.
///
/// Content kind is identified by file extension (no I/O needed), which is
/// sufficient for ephemeral browsing where speed is critical. Returns Ok(None)
/// if the entry already exists (prevents duplicate entries when re-indexing
/// a directory).
pub fn add_entry(
&mut self,
path: PathBuf,
uuid: Uuid,
metadata: EntryMetadata,
) -> std::io::Result<Option<ContentKind>> {
if self.path_index.contains_key(&path) {
tracing::trace!("Skipping duplicate entry: {}", path.display());
return Ok(None);
}
// Ensure parent directories exist before adding this entry, building the ancestor
// chain from root to leaf. The &mut borrow happens before name interning to avoid
// holding the cache lock while recursing.
let parent_id = if let Some(parent_path) = path.parent() {
if parent_path.as_os_str().is_empty() {
None
} else if let Some(&existing_id) = self.path_index.get(parent_path) {
Some(existing_id)
} else {
Some(self.ensure_directory(parent_path)?)
}
} else {
None
};
let name = self.cache.intern(
path.file_name()
.map(|s| s.to_string_lossy())
.as_deref()
.unwrap_or("unknown"),
);
let file_type = FileType::from(metadata.kind);
let meta = PackedMetadata::new(NodeState::Accessible, file_type, metadata.size)
.with_times(metadata.modified, metadata.created);
let parent_ref = parent_id
.map(MaybeEntryId::some)
.unwrap_or(MaybeEntryId::NONE);
let node = FileNode::new(NameRef::new(name, parent_ref), meta);
let id = self.arena.insert(node)?;
// Add to parent's children
if let Some(parent_id) = parent_id {
if let Some(parent) = self.arena.get_mut(parent_id) {
parent.add_child(id);
}
}
let content_kind = if metadata.kind == EntryKind::File {
let registry = FileTypeRegistry::default();
registry.identify_by_extension(&path)
} else if metadata.kind == EntryKind::Directory {
ContentKind::Unknown
} else {
ContentKind::Unknown
};
self.path_index.insert(path.clone(), id);
self.registry.insert(name, id);
self.entry_uuids.insert(path.clone(), uuid);
self.content_kinds.insert(path, content_kind);
self.last_accessed = Instant::now();
Ok(Some(content_kind))
}
pub fn get_entry(&mut self, path: &PathBuf) -> Option<EntryMetadata> {
let id = self.path_index.get(path)?;
let node = self.arena.get(*id)?;
self.last_accessed = Instant::now();
Some(EntryMetadata {
path: path.clone(),
kind: EntryKind::from(node.meta.file_type()),
size: node.meta.size(),
modified: node.meta.mtime_as_system_time(),
accessed: None,
created: node.meta.ctime_as_system_time(),
inode: None,
permissions: None,
is_hidden: path
.file_name()
.and_then(|n| n.to_str())
.map(|n| n.starts_with('.'))
.unwrap_or(false),
})
}
/// Get entry reference for read-only access (doesn't update last_accessed)
pub fn get_entry_ref(&self, path: &PathBuf) -> Option<EntryMetadata> {
let id = self.path_index.get(path)?;
let node = self.arena.get(*id)?;
Some(EntryMetadata {
path: path.clone(),
kind: EntryKind::from(node.meta.file_type()),
size: node.meta.size(),
modified: node.meta.mtime_as_system_time(),
accessed: None,
created: node.meta.ctime_as_system_time(),
inode: None,
permissions: None,
is_hidden: path
.file_name()
.and_then(|n| n.to_str())
.map(|n| n.starts_with('.'))
.unwrap_or(false),
})
}
pub fn get_entry_uuid(&self, path: &PathBuf) -> Option<Uuid> {
self.entry_uuids.get(path).copied()
}
pub fn get_content_kind(&self, path: &PathBuf) -> ContentKind {
self.content_kinds
.get(path)
.copied()
.unwrap_or(ContentKind::Unknown)
}
pub fn list_directory(&self, path: &Path) -> Option<Vec<PathBuf>> {
let id = self.path_index.get(path)?;
let node = self.arena.get(*id)?;
Some(
node.children
.iter()
.filter_map(|&child_id| self.reconstruct_path(child_id))
.collect(),
)
}
/// Clears entries before re-indexing, preserving explicitly browsed subdirectories.
///
/// Since ephemeral indexing is shallow, subdirectories that were explicitly
/// navigated to (in `indexed_paths`) should be preserved as separate index
/// branches. Unbrowsed subdirectories are refreshed with the parent.
///
/// Returns (cleared_count, deleted_browsed_dirs) where deleted_browsed_dirs
/// contains paths that were in indexed_paths but no longer exist on disk.
pub fn clear_directory_children(
&mut self,
dir_path: &Path,
indexed_paths: &std::collections::HashSet<PathBuf>,
) -> (usize, Vec<PathBuf>) {
let dir_id = match self.path_index.get(dir_path) {
Some(&id) => id,
None => return (0, Vec::new()),
};
let dir_node = match self.arena.get(dir_id) {
Some(node) => node,
None => return (0, Vec::new()),
};
let mut deleted_browsed_dirs = Vec::new();
// Collect children to remove
let mut children_to_remove: Vec<(PathBuf, EntryId)> = dir_node
.children
.iter()
.filter_map(|&child_id| {
let child_node = self.arena.get(child_id)?;
let child_path = self.reconstruct_path(child_id)?;
// Preserve subdirectories that were explicitly browsed AND still exist
if child_node.is_directory() && indexed_paths.contains(&child_path) {
// Verify the directory still exists on the filesystem
if std::fs::metadata(&child_path).is_ok() {
return None; // Preserve - still exists and was browsed
}
// Directory was deleted - track for removal from indexed_paths
tracing::debug!(
"Removing deleted browsed directory: {}",
child_path.display()
);
deleted_browsed_dirs.push(child_path.clone());
}
// Remove everything else (files, unbrowsed directories, deleted directories)
Some((child_path, child_id))
})
.collect();
let cleared = children_to_remove.len();
// Remove from indexes
for (child_path, _) in &children_to_remove {
self.path_index.remove(child_path);
self.entry_uuids.remove(child_path);
self.content_kinds.remove(child_path);
}
// Update parent's children list
if let Some(dir_node) = self.arena.get_mut(dir_id) {
let removed_ids: std::collections::HashSet<_> =
children_to_remove.iter().map(|(_, id)| id).collect();
dir_node
.children
.retain(|child_id| !removed_ids.contains(child_id));
}
if cleared > 0 {
tracing::debug!(
"Cleared {} entries from {} (preserved browsed subdirs)",
cleared,
dir_path.display()
);
}
(cleared, deleted_browsed_dirs)
}
fn reconstruct_path(&self, id: EntryId) -> Option<PathBuf> {
let mut segments = Vec::new();
let mut current = id;
while let Some(node) = self.arena.get(current) {
segments.push(node.name().to_owned());
if let Some(parent) = node.parent() {
current = parent;
} else {
break;
}
}
if segments.is_empty() {
return None;
}
let mut path = PathBuf::from("/");
for segment in segments.into_iter().rev() {
path.push(segment);
}
Some(path)
}
pub fn find_by_name(&self, name: &str) -> Vec<PathBuf> {
self.registry
.get(name)
.map(|ids| {
ids.iter()
.filter_map(|&id| self.reconstruct_path(id))
.collect()
})
.unwrap_or_default()
}
pub fn find_by_prefix(&self, prefix: &str) -> Vec<PathBuf> {
self.registry
.find_prefix(prefix)
.iter()
.filter_map(|&id| self.reconstruct_path(id))
.collect()
}
pub fn age(&self) -> Duration {
self.created_at.elapsed()
}
pub fn idle_time(&self) -> Duration {
self.last_accessed.elapsed()
}
pub fn len(&self) -> usize {
self.arena.len()
}
pub fn is_empty(&self) -> bool {
self.arena.is_empty()
}
pub fn memory_usage(&self) -> usize {
self.arena.memory_usage()
+ self.cache.memory_usage()
+ self.registry.memory_usage()
+ self.path_index.capacity()
* (std::mem::size_of::<PathBuf>() + std::mem::size_of::<EntryId>())
+ self.entry_uuids.capacity()
* (std::mem::size_of::<PathBuf>() + std::mem::size_of::<Uuid>())
}
pub fn get_stats(&self) -> EphemeralIndexStats {
EphemeralIndexStats {
total_entries: self.arena.len(),
unique_names: self.registry.unique_names(),
interned_strings: self.cache.len(),
memory_bytes: self.memory_usage(),
}
}
pub fn content_kinds_count(&self) -> usize {
self.content_kinds.len()
}
pub fn path_index_count(&self) -> usize {
self.path_index.len()
}
/// Check if an entry exists at the given path.
pub fn has_entry(&self, path: &Path) -> bool {
self.path_index.contains_key(path)
}
/// Remove an entry at the given path.
///
/// Returns true if the entry was removed, false if it didn't exist.
/// For directories, this only removes the directory entry itself, not its children.
/// Use `remove_directory_tree` to remove a directory and all its descendants.
pub fn remove_entry(&mut self, path: &Path) -> bool {
let existed = self.path_index.remove(path).is_some();
self.entry_uuids.remove(path);
self.content_kinds.remove(path);
existed
}
/// Remove a directory and all its descendants.
///
/// Returns the number of entries removed.
pub fn remove_directory_tree(&mut self, path: &Path) -> usize {
let prefix = path.to_string_lossy().to_string();
let keys_to_remove: Vec<_> = self
.path_index
.keys()
.filter(|k| {
let k_str = k.to_string_lossy();
k_str == prefix || k_str.starts_with(&format!("{}/", prefix))
})
.cloned()
.collect();
let count = keys_to_remove.len();
for key in keys_to_remove {
self.path_index.remove(&key);
self.entry_uuids.remove(&key);
self.content_kinds.remove(&key);
}
count
}
/// Reconstructs paths for all entries and returns them as a HashMap.
///
/// For large indexes, this can be expensive since it walks the tree to rebuild
/// every path. Prefer using `list_directory()` or `find_by_name()` for targeted
/// queries when possible.
pub fn entries(&self) -> HashMap<PathBuf, EntryMetadata> {
let mut result = HashMap::with_capacity(self.path_index.len());
for (path, &id) in &self.path_index {
if let Some(node) = self.arena.get(id) {
let metadata = EntryMetadata {
path: path.clone(),
kind: EntryKind::from(node.meta.file_type()),
size: node.meta.size(),
modified: node.meta.mtime_as_system_time(),
accessed: None,
created: node.meta.ctime_as_system_time(),
inode: None,
permissions: None,
is_hidden: path
.file_name()
.and_then(|n| n.to_str())
.map(|n| n.starts_with('.'))
.unwrap_or(false),
};
result.insert(path.clone(), metadata);
}
}
result
}
}
impl Default for EphemeralIndex {
fn default() -> Self {
Self::new().expect("Failed to create default EphemeralIndex")
}
}
/// Statistics about an ephemeral index
#[derive(Debug, Clone)]
pub struct EphemeralIndexStats {
pub total_entries: usize,
pub unique_names: usize,
pub interned_strings: usize,
pub memory_bytes: usize,
}

View File

@@ -0,0 +1,56 @@
//! High-performance ephemeral index storage backend
//!
//! This module provides memory-efficient storage for ephemeral file indexes,
//! achieving 3-4x memory reduction compared to HashMap<PathBuf, EntryMetadata>.
//!
//! ## Architecture
//!
//! ```text
//! EphemeralIndex
//! ├── NodeArena: Vec<FileNode> - Contiguous node storage
//! ├── NameCache: BTreeSet<Box<str>> - String interning pool
//! ├── NameRegistry: BTreeMap - Fast name lookups
//! └── path_index: HashMap<PathBuf, EntryId> - Path to node mapping
//! ```
//!
//! ## Memory Comparison
//!
//! | Files | HashMap Approach | This Module | Reduction |
//! |-------|------------------|-------------|-----------|
//! | 10K | 2-3 MB | 0.5 MB | 4-6x |
//! | 100K | 20-30 MB | 5 MB | 4-6x |
//! | 1M | 200-300 MB | 50 MB | 4-6x |
//!
//! ## Usage
//!
//! ```rust,ignore
//! use sd_core::ops::indexing::ephemeral::EphemeralIndex;
//!
//! // Create a unified index (supports multiple directory trees)
//! let mut index = EphemeralIndex::new();
//!
//! // Add entries with full paths - parent chain is created automatically
//! index.add_entry(path, uuid, metadata);
//!
//! // Query
//! let entry = index.get_entry(&path);
//! let children = index.list_directory(&parent);
//! ```
pub mod arena;
pub mod cache;
pub mod index;
pub mod name;
pub mod registry;
pub mod responder;
pub mod types;
pub mod writer;
// Re-export public types
pub use arena::NodeArena;
pub use cache::EphemeralIndexCache;
pub use index::{EphemeralIndex, EphemeralIndexStats};
pub use name::NameCache;
pub use registry::NameRegistry;
pub use types::{EntryId, FileNode, FileType, MaybeEntryId, NameRef, NodeState, PackedMetadata};
pub use writer::MemoryAdapter;

View File

@@ -0,0 +1,206 @@
//! String interning cache for deduplicating filenames
//!
//! The NameCache provides global string interning to reduce memory usage.
//! Common filenames like `.git`, `node_modules`, `target`, `README.md` etc.
//! are stored only once and referenced via pointers.
//!
//! Benefits:
//! - 30-40% memory reduction on typical filesystems
//! - Pointer-based equality (faster comparisons)
//! - Stable references for NameRef
use parking_lot::Mutex;
use std::collections::BTreeSet;
/// Global string interning pool for deduplicating filenames
///
/// Strings are stored in a BTreeSet for ordered iteration and fast lookup.
/// The Mutex ensures thread-safe access for concurrent indexing.
pub struct NameCache {
inner: Mutex<BTreeSet<Box<str>>>,
}
impl NameCache {
/// Create a new empty cache
pub fn new() -> Self {
Self {
inner: Mutex::new(BTreeSet::new()),
}
}
/// Intern a string and return a stable reference
///
/// If the string already exists, returns a reference to the existing copy.
/// If not, inserts a new copy and returns a reference to it.
///
/// # Safety
/// The returned reference is valid as long as the NameCache exists.
/// NameCache never removes strings, so references remain stable.
pub fn intern<'cache>(&'cache self, name: &str) -> &'cache str {
let mut inner = self.inner.lock();
// Check if already interned
if let Some(existing) = inner.get(name) {
// SAFETY: BTreeSet owns the Box<str>, which lives as long as NameCache.
// We return a reference with lifetime tied to &self.
return unsafe { &*(existing.as_ref() as *const str) };
}
// Insert new string
let boxed: Box<str> = name.into();
let ptr = boxed.as_ref() as *const str;
inner.insert(boxed);
// SAFETY: We just inserted the string, and NameCache never removes strings.
// The pointer remains valid as long as NameCache exists.
unsafe { &*ptr }
}
/// Get the number of interned strings
pub fn len(&self) -> usize {
self.inner.lock().len()
}
/// Check if the cache is empty
pub fn is_empty(&self) -> bool {
self.inner.lock().is_empty()
}
/// Check if a string is already interned
pub fn contains(&self, name: &str) -> bool {
self.inner.lock().contains(name)
}
/// Get approximate memory usage in bytes
pub fn memory_usage(&self) -> usize {
let inner = self.inner.lock();
// Base struct size + BTreeSet overhead + string contents
std::mem::size_of::<Self>()
+ inner.len() * std::mem::size_of::<Box<str>>()
+ inner.iter().map(|s| s.len()).sum::<usize>()
}
/// Iterate over all interned strings
pub fn iter(&self) -> impl Iterator<Item = String> {
let inner = self.inner.lock();
inner
.iter()
.map(|s| s.to_string())
.collect::<Vec<_>>()
.into_iter()
}
}
impl Default for NameCache {
fn default() -> Self {
Self::new()
}
}
// SAFETY: NameCache uses Mutex for thread-safe access
unsafe impl Send for NameCache {}
unsafe impl Sync for NameCache {}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_intern_returns_same_pointer() {
let cache = NameCache::new();
let s1 = cache.intern("hello");
let s2 = cache.intern("hello");
// Same pointer means same interned string
assert!(std::ptr::eq(s1, s2));
assert_eq!(s1, "hello");
}
#[test]
fn test_intern_different_strings() {
let cache = NameCache::new();
let s1 = cache.intern("hello");
let s2 = cache.intern("world");
assert!(!std::ptr::eq(s1, s2));
assert_eq!(s1, "hello");
assert_eq!(s2, "world");
}
#[test]
fn test_len_and_contains() {
let cache = NameCache::new();
assert_eq!(cache.len(), 0);
assert!(!cache.contains("test"));
cache.intern("test");
assert_eq!(cache.len(), 1);
assert!(cache.contains("test"));
// Interning same string doesn't increase count
cache.intern("test");
assert_eq!(cache.len(), 1);
}
#[test]
fn test_common_filenames() {
let cache = NameCache::new();
// Simulate common filesystem patterns
let common_names = [
".git",
".gitignore",
"node_modules",
"target",
"Cargo.toml",
"README.md",
"package.json",
"src",
"lib",
"main.rs",
];
for name in &common_names {
cache.intern(name);
}
// All unique, so length equals count
assert_eq!(cache.len(), common_names.len());
// Interning again returns same references
for name in &common_names {
let ptr1 = cache.intern(name);
let ptr2 = cache.intern(name);
assert!(std::ptr::eq(ptr1, ptr2));
}
}
#[test]
fn test_thread_safety() {
use std::sync::Arc;
use std::thread;
let cache = Arc::new(NameCache::new());
let mut handles = vec![];
for i in 0..10 {
let cache = Arc::clone(&cache);
handles.push(thread::spawn(move || {
for j in 0..100 {
let name = format!("file_{}_{}", i, j);
cache.intern(&name);
}
}));
}
for handle in handles {
handle.join().unwrap();
}
// Should have 1000 unique strings
assert_eq!(cache.len(), 1000);
}
}

View File

@@ -0,0 +1,224 @@
//! Name-based lookup registry for fast queries
//!
//! The NameRegistry provides O(log k) lookups by filename across the entire index.
//! This enables efficient queries like "find all files named 'package.json'".
//!
//! Features:
//! - Fast exact name lookup: O(log k) where k = unique filenames
//! - Prefix search for autocomplete
//! - Multiple entries per name (common for files like 'index.js', 'README.md')
use super::types::EntryId;
use std::collections::BTreeMap;
/// Maps filenames to node IDs for fast name-based queries
///
/// Uses BTreeMap for ordered iteration and efficient prefix searches.
/// Each name can map to multiple EntryIds (e.g., many 'index.js' files).
pub struct NameRegistry {
/// Maps interned name pointers to entry IDs
/// Using *const str as key since we use interned strings from NameCache
map: BTreeMap<NameKey, Vec<EntryId>>,
}
/// Key type for the registry that wraps an interned string pointer
#[derive(Clone, Copy, PartialEq, Eq)]
struct NameKey(*const str);
impl NameKey {
fn as_str(&self) -> &str {
// SAFETY: The pointer comes from NameCache and remains valid
unsafe { &*self.0 }
}
}
impl PartialOrd for NameKey {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for NameKey {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.as_str().cmp(other.as_str())
}
}
// SAFETY: NameKey contains a pointer to an interned string that lives
// as long as the NameCache. Since NameCache is thread-safe and never
// deallocates, NameKey is safe to use across threads.
unsafe impl Send for NameKey {}
unsafe impl Sync for NameKey {}
impl NameRegistry {
/// Create a new empty registry
pub fn new() -> Self {
Self {
map: BTreeMap::new(),
}
}
/// Register a name-to-entry mapping
///
/// # Arguments
/// * `name` - An interned string reference from NameCache
/// * `id` - The EntryId to associate with this name
pub fn insert(&mut self, name: &str, id: EntryId) {
let key = NameKey(name as *const str);
self.map.entry(key).or_default().push(id);
}
/// Get all entries with the exact name
pub fn get(&self, name: &str) -> Option<&[EntryId]> {
// We need to find by string content, not pointer
// This is less efficient but works with non-interned queries
for (key, ids) in &self.map {
if key.as_str() == name {
return Some(ids.as_slice());
}
}
None
}
/// Get all entries with the exact name (using interned pointer)
///
/// More efficient when you have an interned string
pub fn get_interned(&self, name: &str) -> Option<&[EntryId]> {
let key = NameKey(name as *const str);
self.map.get(&key).map(|v| v.as_slice())
}
/// Find all entries with names starting with the given prefix
///
/// Useful for autocomplete and directory listings
pub fn find_prefix(&self, prefix: &str) -> Vec<EntryId> {
self.map
.iter()
.filter(|(k, _)| k.as_str().starts_with(prefix))
.flat_map(|(_, ids)| ids.iter().copied())
.collect()
}
/// Find all entries with names containing the given substring
pub fn find_containing(&self, substring: &str) -> Vec<EntryId> {
self.map
.iter()
.filter(|(k, _)| k.as_str().contains(substring))
.flat_map(|(_, ids)| ids.iter().copied())
.collect()
}
/// Get the number of unique names
pub fn unique_names(&self) -> usize {
self.map.len()
}
/// Get the total number of entries
pub fn total_entries(&self) -> usize {
self.map.values().map(|v| v.len()).sum()
}
/// Check if a name exists in the registry
pub fn contains(&self, name: &str) -> bool {
self.get(name).is_some()
}
/// Get approximate memory usage in bytes
pub fn memory_usage(&self) -> usize {
std::mem::size_of::<Self>()
+ self.map.len() * std::mem::size_of::<(NameKey, Vec<EntryId>)>()
+ self
.map
.values()
.map(|v| v.capacity() * std::mem::size_of::<EntryId>())
.sum::<usize>()
}
/// Iterate over all (name, entry_ids) pairs
pub fn iter(&self) -> impl Iterator<Item = (&str, &[EntryId])> {
self.map.iter().map(|(k, v)| (k.as_str(), v.as_slice()))
}
}
impl Default for NameRegistry {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_insert_and_get() {
let mut registry = NameRegistry::new();
let name = "test.txt";
let id = EntryId::from_usize(42);
registry.insert(name, id);
let result = registry.get("test.txt");
assert!(result.is_some());
assert_eq!(result.unwrap(), &[id]);
}
#[test]
fn test_multiple_entries_same_name() {
let mut registry = NameRegistry::new();
// Many projects have multiple index.js files
let name = "index.js";
let ids: Vec<EntryId> = (0..5).map(|i| EntryId::from_usize(i)).collect();
for &id in &ids {
registry.insert(name, id);
}
let result = registry.get("index.js").unwrap();
assert_eq!(result.len(), 5);
}
#[test]
fn test_find_prefix() {
let mut registry = NameRegistry::new();
registry.insert("README.md", EntryId::from_usize(1));
registry.insert("README.txt", EntryId::from_usize(2));
registry.insert("README", EntryId::from_usize(3));
registry.insert("Rakefile", EntryId::from_usize(4));
let results = registry.find_prefix("README");
assert_eq!(results.len(), 3);
}
#[test]
fn test_find_containing() {
let mut registry = NameRegistry::new();
registry.insert("my_test.rs", EntryId::from_usize(1));
registry.insert("test_utils.rs", EntryId::from_usize(2));
registry.insert("integration_test.rs", EntryId::from_usize(3));
registry.insert("main.rs", EntryId::from_usize(4));
let results = registry.find_containing("test");
assert_eq!(results.len(), 3);
}
#[test]
fn test_unique_names_vs_total() {
let mut registry = NameRegistry::new();
// 3 unique names, 6 total entries
registry.insert("a.txt", EntryId::from_usize(1));
registry.insert("a.txt", EntryId::from_usize(2));
registry.insert("b.txt", EntryId::from_usize(3));
registry.insert("b.txt", EntryId::from_usize(4));
registry.insert("c.txt", EntryId::from_usize(5));
registry.insert("c.txt", EntryId::from_usize(6));
assert_eq!(registry.unique_names(), 3);
assert_eq!(registry.total_entries(), 6);
}
}

View File

@@ -0,0 +1,126 @@
//! Ephemeral responder for updating in-memory indexes on filesystem changes.
//!
//! This module processes filesystem events against the ephemeral index cache.
//! When a user is browsing an ephemeral directory (external drive, network share)
//! and files change, the responder updates the in-memory index to reflect changes.
//!
//! ## Usage
//!
//! ```rust,ignore
//! use sd_core::ops::indexing::ephemeral::responder;
//!
//! // Check if an event should be handled by the ephemeral system
//! if let Some(root) = responder::find_ephemeral_root(&path, &context) {
//! responder::process_event(&context, &root, event_kind).await?;
//! }
//! ```
use crate::context::CoreContext;
use crate::infra::event::FsRawEventKind;
use crate::ops::indexing::change_detection::{self, ChangeConfig};
use crate::ops::indexing::rules::RuleToggles;
use anyhow::Result;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use super::MemoryAdapter;
/// Check if a path falls under an ephemeral watched directory.
///
/// Returns the watched root path if found.
pub fn find_ephemeral_root(path: &Path, context: &CoreContext) -> Option<PathBuf> {
context.ephemeral_cache().find_watched_root(path)
}
/// Check if any path in a batch of events falls under an ephemeral watched directory.
pub fn find_ephemeral_root_for_events(
events: &[FsRawEventKind],
context: &CoreContext,
) -> Option<PathBuf> {
let paths: Vec<&Path> = events
.iter()
.flat_map(|e| match e {
FsRawEventKind::Create { path } => vec![path.as_path()],
FsRawEventKind::Modify { path } => vec![path.as_path()],
FsRawEventKind::Remove { path } => vec![path.as_path()],
FsRawEventKind::Rename { from, to } => vec![from.as_path(), to.as_path()],
})
.collect();
context
.ephemeral_cache()
.find_watched_root_for_any(paths.into_iter())
}
/// Process a batch of filesystem events against the ephemeral index.
///
/// Creates an `MemoryAdapter` and processes the events using shared
/// handler logic. The ephemeral index is updated in-place and ResourceChanged
/// events are emitted for UI updates.
pub async fn apply_batch(
context: &Arc<CoreContext>,
root_path: &Path,
events: Vec<FsRawEventKind>,
rule_toggles: RuleToggles,
) -> Result<()> {
if events.is_empty() {
return Ok(());
}
let index = context.ephemeral_cache().get_global_index();
let event_bus = context.events.clone();
let mut writer = MemoryAdapter::new(index, event_bus, root_path.to_path_buf());
let config = ChangeConfig {
rule_toggles,
location_root: root_path,
volume_backend: None, // Ephemeral paths typically don't use volume backends
};
change_detection::apply_batch(&mut writer, events, &config).await
}
/// Process a single filesystem event against the ephemeral index.
pub async fn apply(
context: &Arc<CoreContext>,
root_path: &Path,
event: FsRawEventKind,
rule_toggles: RuleToggles,
) -> Result<()> {
apply_batch(context, root_path, vec![event], rule_toggles).await
}
/// Register an ephemeral path for filesystem watching.
///
/// After calling this, filesystem events under the path will be detectable
/// via `find_ephemeral_root`. The path must already be indexed in the
/// ephemeral cache.
///
/// Returns true if registration succeeded, false if the path is not indexed.
pub fn register_for_watching(context: &CoreContext, path: PathBuf) -> bool {
context.ephemeral_cache().register_for_watching(path)
}
/// Unregister an ephemeral path from filesystem watching.
pub fn unregister_from_watching(context: &CoreContext, path: &Path) {
context.ephemeral_cache().unregister_from_watching(path)
}
/// Check if any ephemeral paths are being watched.
pub fn has_watched_paths(context: &CoreContext) -> bool {
!context.ephemeral_cache().watched_paths().is_empty()
}
/// Get all currently watched ephemeral paths.
pub fn watched_paths(context: &CoreContext) -> Vec<PathBuf> {
context.ephemeral_cache().watched_paths()
}
#[cfg(test)]
mod tests {
use super::*;
// Integration tests would require a full CoreContext setup
// Unit tests for the helper functions are covered by index_cache tests
}

View File

@@ -0,0 +1,470 @@
//! Core types for efficient ephemeral index storage
//!
//! This module provides compact data structures for storing file system entries
//! with minimal memory overhead. Key optimizations:
//! - 32-bit node IDs (4 bytes vs 8 bytes for u64)
//! - Bit-packed metadata (16 bytes for state, type, size, mtime, ctime)
//! - String interning via NameRef pointers
//!
//! Memory per node: ~48 bytes vs ~200 bytes with HashMap<PathBuf, EntryMetadata>
use smallvec::SmallVec;
use std::time::{SystemTime, UNIX_EPOCH};
/// Identifies a node in the arena. Uses u32 to halve memory vs u64
/// while supporting up to 4.3 billion nodes.
#[repr(transparent)]
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub struct EntryId(u32);
impl EntryId {
/// Create an EntryId from a usize index
///
/// # Panics
/// Panics if index >= u32::MAX - 1 (reserved for NONE sentinel)
pub fn from_usize(index: usize) -> Self {
assert!(
index < u32::MAX as usize - 1,
"EntryId overflow: index {} exceeds maximum",
index
);
Self(index as u32)
}
/// Get the underlying index as usize
pub fn as_usize(self) -> usize {
self.0 as usize
}
/// Get the raw u32 value
pub fn as_u32(self) -> u32 {
self.0
}
}
/// Optional EntryId using u32::MAX as None sentinel
/// This saves 8 bytes per optional reference vs Option<EntryId>
#[repr(transparent)]
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct MaybeEntryId(u32);
impl MaybeEntryId {
/// The sentinel value representing None
pub const NONE: Self = Self(u32::MAX);
/// Create a Some variant
pub fn some(id: EntryId) -> Self {
debug_assert!(id.0 != u32::MAX, "EntryId cannot use reserved NONE value");
Self(id.0)
}
/// Convert to Option<EntryId>
pub fn as_option(self) -> Option<EntryId> {
if self.0 == u32::MAX {
None
} else {
Some(EntryId(self.0))
}
}
/// Check if this is None
pub fn is_none(self) -> bool {
self.0 == u32::MAX
}
/// Check if this is Some
pub fn is_some(self) -> bool {
self.0 != u32::MAX
}
}
impl Default for MaybeEntryId {
fn default() -> Self {
Self::NONE
}
}
impl From<Option<EntryId>> for MaybeEntryId {
fn from(opt: Option<EntryId>) -> Self {
match opt {
Some(id) => Self::some(id),
None => Self::NONE,
}
}
}
/// Node state indicating accessibility
#[repr(u8)]
#[derive(Copy, Clone, Debug, PartialEq, Eq, Default)]
pub enum NodeState {
#[default]
Unknown = 0,
Accessible = 1,
Inaccessible = 2,
}
impl NodeState {
pub fn from_u8(value: u8) -> Self {
match value {
0 => Self::Unknown,
1 => Self::Accessible,
2 => Self::Inaccessible,
_ => Self::Unknown,
}
}
}
/// File type classification
#[repr(u8)]
#[derive(Copy, Clone, Debug, PartialEq, Eq, Default)]
pub enum FileType {
#[default]
Unknown = 0,
File = 1,
Directory = 2,
Symlink = 3,
}
impl FileType {
pub fn from_u8(value: u8) -> Self {
match value {
0 => Self::Unknown,
1 => Self::File,
2 => Self::Directory,
3 => Self::Symlink,
_ => Self::Unknown,
}
}
}
/// Convert from state::EntryKind to FileType
impl From<super::super::state::EntryKind> for FileType {
fn from(kind: super::super::state::EntryKind) -> Self {
match kind {
super::super::state::EntryKind::File => FileType::File,
super::super::state::EntryKind::Directory => FileType::Directory,
super::super::state::EntryKind::Symlink => FileType::Symlink,
}
}
}
/// Convert from FileType to state::EntryKind
impl From<FileType> for super::super::state::EntryKind {
fn from(ft: FileType) -> Self {
match ft {
FileType::File => super::super::state::EntryKind::File,
FileType::Directory => super::super::state::EntryKind::Directory,
FileType::Symlink => super::super::state::EntryKind::Symlink,
FileType::Unknown => super::super::state::EntryKind::File, // Default to file
}
}
}
/// Compact metadata packed into 16 bytes
///
/// Layout:
/// - Bits 62-63: state (2 bits)
/// - Bits 60-61: type (2 bits)
/// - Bits 0-59: size (60 bits, max ~1 exabyte)
/// - mtime: seconds since epoch (32 bits)
/// - ctime: seconds since epoch (32 bits)
#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct PackedMetadata {
/// Bits 62-63: state, 60-61: type, 0-59: size
state_type_size: u64,
/// Modified time (seconds since epoch, 0 = None)
mtime: u32,
/// Created time (seconds since epoch, 0 = None)
ctime: u32,
}
impl PackedMetadata {
const SIZE_MASK: u64 = (1u64 << 60) - 1;
const TYPE_SHIFT: u32 = 60;
const STATE_SHIFT: u32 = 62;
/// Create new packed metadata
pub fn new(state: NodeState, file_type: FileType, size: u64) -> Self {
// Clamp size to 60 bits (max ~1 exabyte)
let size = size.min(Self::SIZE_MASK);
let packed =
size | ((file_type as u64) << Self::TYPE_SHIFT) | ((state as u64) << Self::STATE_SHIFT);
Self {
state_type_size: packed,
mtime: 0,
ctime: 0,
}
}
/// Get the file size
pub fn size(&self) -> u64 {
self.state_type_size & Self::SIZE_MASK
}
/// Get the file type
pub fn file_type(&self) -> FileType {
FileType::from_u8(((self.state_type_size >> Self::TYPE_SHIFT) & 0b11) as u8)
}
/// Get the node state
pub fn state(&self) -> NodeState {
NodeState::from_u8(((self.state_type_size >> Self::STATE_SHIFT) & 0b11) as u8)
}
/// Set timestamps
pub fn with_times(mut self, mtime: Option<SystemTime>, ctime: Option<SystemTime>) -> Self {
self.mtime = mtime
.and_then(|t| t.duration_since(UNIX_EPOCH).ok())
.map(|d| d.as_secs() as u32)
.unwrap_or(0);
self.ctime = ctime
.and_then(|t| t.duration_since(UNIX_EPOCH).ok())
.map(|d| d.as_secs() as u32)
.unwrap_or(0);
self
}
/// Get modified time as SystemTime
pub fn mtime_as_system_time(&self) -> Option<SystemTime> {
if self.mtime == 0 {
None
} else {
Some(UNIX_EPOCH + std::time::Duration::from_secs(self.mtime as u64))
}
}
/// Get created time as SystemTime
pub fn ctime_as_system_time(&self) -> Option<SystemTime> {
if self.ctime == 0 {
None
} else {
Some(UNIX_EPOCH + std::time::Duration::from_secs(self.ctime as u64))
}
}
/// Get raw mtime value
pub fn mtime_secs(&self) -> u32 {
self.mtime
}
/// Get raw ctime value
pub fn ctime_secs(&self) -> u32 {
self.ctime
}
}
impl Default for PackedMetadata {
fn default() -> Self {
Self::new(NodeState::Unknown, FileType::Unknown, 0)
}
}
/// Reference to an interned string with parent link
///
/// Memory layout: 16 bytes total
/// - ptr: 8 bytes (pointer to string in NameCache)
/// - len: 4 bytes (string length)
/// - parent: 4 bytes (parent EntryId or NONE)
#[repr(C)]
pub struct NameRef {
/// Pointer to string in NameCache (stable reference)
ptr: *const u8,
/// String length
len: u32,
/// Parent node ID (u32::MAX if root)
parent: MaybeEntryId,
}
// SAFETY: NameRef contains a raw pointer to an interned string that lives
// as long as the NameCache. The NameCache is owned by EphemeralIndex and
// never deallocates strings. This makes NameRef safe to send between threads.
unsafe impl Send for NameRef {}
unsafe impl Sync for NameRef {}
impl NameRef {
/// Create a new NameRef from an interned string
///
/// # Safety
/// The interned string must live as long as any NameRef referencing it.
/// This is guaranteed when used with NameCache.
pub fn new(interned: &str, parent: MaybeEntryId) -> Self {
Self {
ptr: interned.as_ptr(),
len: interned.len() as u32,
parent,
}
}
/// Get the filename
///
/// # Safety
/// Assumes the interned string is still valid. This is guaranteed
/// when NameCache is not dropped before NameRef.
pub fn name(&self) -> &str {
unsafe {
std::str::from_utf8_unchecked(std::slice::from_raw_parts(self.ptr, self.len as usize))
}
}
/// Get the parent entry ID
pub fn parent(&self) -> Option<EntryId> {
self.parent.as_option()
}
}
impl std::fmt::Debug for NameRef {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("NameRef")
.field("name", &self.name())
.field("parent", &self.parent.as_option())
.finish()
}
}
/// Single node in the file tree
///
/// Memory: ~48 bytes total
/// - name_ref: 16 bytes
/// - children: 8-24 bytes (SmallVec with inline storage)
/// - meta: 16 bytes
pub struct FileNode {
/// Interned filename + parent reference
pub name_ref: NameRef,
/// Child node IDs (directories only)
/// SmallVec stores 0 elements inline (8 bytes), grows on heap when needed
pub children: SmallVec<[EntryId; 0]>,
/// Packed metadata
pub meta: PackedMetadata,
}
impl FileNode {
/// Create a new file node
pub fn new(name_ref: NameRef, meta: PackedMetadata) -> Self {
Self {
name_ref,
children: SmallVec::new(),
meta,
}
}
/// Get the filename
pub fn name(&self) -> &str {
self.name_ref.name()
}
/// Get the parent entry ID
pub fn parent(&self) -> Option<EntryId> {
self.name_ref.parent()
}
/// Check if this is a directory
pub fn is_directory(&self) -> bool {
self.meta.file_type() == FileType::Directory
}
/// Add a child (for directories) - checks for duplicates
pub fn add_child(&mut self, child_id: EntryId) {
// Prevent duplicate children
if !self.children.contains(&child_id) {
self.children.push(child_id);
}
}
}
impl std::fmt::Debug for FileNode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("FileNode")
.field("name", &self.name())
.field("type", &self.meta.file_type())
.field("size", &self.meta.size())
.field("children", &self.children.len())
.finish()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_entry_id_roundtrip() {
let id = EntryId::from_usize(42);
assert_eq!(id.as_usize(), 42);
assert_eq!(id.as_u32(), 42);
}
#[test]
fn test_maybe_entry_id() {
let none = MaybeEntryId::NONE;
assert!(none.is_none());
assert!(!none.is_some());
assert_eq!(none.as_option(), None);
let some = MaybeEntryId::some(EntryId::from_usize(42));
assert!(!some.is_none());
assert!(some.is_some());
assert_eq!(some.as_option(), Some(EntryId::from_usize(42)));
}
#[test]
fn test_packed_metadata_size() {
// Verify struct size is 16 bytes
assert_eq!(std::mem::size_of::<PackedMetadata>(), 16);
}
#[test]
fn test_packed_metadata_roundtrip() {
let meta = PackedMetadata::new(NodeState::Accessible, FileType::File, 12345);
assert_eq!(meta.state(), NodeState::Accessible);
assert_eq!(meta.file_type(), FileType::File);
assert_eq!(meta.size(), 12345);
}
#[test]
fn test_packed_metadata_max_size() {
// Test that large sizes are clamped
let meta = PackedMetadata::new(NodeState::Accessible, FileType::File, u64::MAX);
// Size should be clamped to 60-bit max
assert_eq!(meta.size(), (1u64 << 60) - 1);
assert_eq!(meta.file_type(), FileType::File);
}
#[test]
fn test_packed_metadata_times() {
use std::time::Duration;
let mtime = UNIX_EPOCH + Duration::from_secs(1700000000);
let ctime = UNIX_EPOCH + Duration::from_secs(1600000000);
let meta = PackedMetadata::new(NodeState::Accessible, FileType::File, 1000)
.with_times(Some(mtime), Some(ctime));
assert_eq!(meta.mtime_secs(), 1700000000);
assert_eq!(meta.ctime_secs(), 1600000000);
assert!(meta.mtime_as_system_time().is_some());
assert!(meta.ctime_as_system_time().is_some());
}
#[test]
fn test_name_ref_size() {
// Verify NameRef is 16 bytes
assert_eq!(std::mem::size_of::<NameRef>(), 16);
}
#[test]
fn test_file_type_conversion() {
use crate::ops::indexing::state::EntryKind;
assert_eq!(FileType::from(EntryKind::File), FileType::File);
assert_eq!(FileType::from(EntryKind::Directory), FileType::Directory);
assert_eq!(FileType::from(EntryKind::Symlink), FileType::Symlink);
assert_eq!(EntryKind::from(FileType::File), EntryKind::File);
assert_eq!(EntryKind::from(FileType::Directory), EntryKind::Directory);
assert_eq!(EntryKind::from(FileType::Symlink), EntryKind::Symlink);
}
}

View File

@@ -0,0 +1,477 @@
//! Unified ephemeral writer for both watcher and indexer pipelines.
//!
//! This module consolidates `EphemeralChangeHandler` and `EphemeralPersistence`
//! into a single implementation that serves both the file watcher and indexer job.
//! Both pipelines share the same entry storage logic, UUID generation, and event
//! emission, eliminating code duplication.
//!
use crate::infra::event::EventBus;
use crate::infra::job::prelude::{JobError, JobResult};
use crate::ops::indexing::change_detection::handler::{build_dir_entry, ChangeHandler};
use crate::ops::indexing::change_detection::types::{ChangeType, EntryRef};
use crate::ops::indexing::database_storage::EntryMetadata;
use crate::ops::indexing::persistence::IndexPersistence;
use crate::ops::indexing::state::{DirEntry, EntryKind};
use super::EphemeralIndex;
use anyhow::Result;
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicI32, Ordering};
use std::sync::Arc;
use std::time::SystemTime;
use tokio::sync::RwLock;
use uuid::Uuid;
/// Unified writer for ephemeral (in-memory) index storage.
///
/// Implements both `ChangeHandler` (for the watcher pipeline) and `IndexPersistence`
/// (for the indexer job pipeline). Both pipelines share:
/// - The same `EphemeralIndex` storage
/// - UUID generation and tracking
/// - Event emission for UI updates
/// - Entry ID generation
pub struct MemoryAdapter {
index: Arc<RwLock<EphemeralIndex>>,
event_bus: Arc<EventBus>,
root_path: PathBuf,
next_id: AtomicI32,
}
impl MemoryAdapter {
pub fn new(
index: Arc<RwLock<EphemeralIndex>>,
event_bus: Arc<EventBus>,
root_path: PathBuf,
) -> Self {
Self {
index,
event_bus,
root_path,
next_id: AtomicI32::new(1),
}
}
fn next_id(&self) -> i32 {
self.next_id.fetch_add(1, Ordering::SeqCst)
}
/// Core write operation shared by both watcher and indexer pipelines.
async fn add_entry_internal(
&self,
path: &Path,
uuid: Uuid,
metadata: EntryMetadata,
) -> Result<(i32, Option<crate::domain::ContentKind>)> {
let content_kind = {
let mut index = self.index.write().await;
index
.add_entry(path.to_path_buf(), uuid, metadata.clone())
.map_err(|e| anyhow::anyhow!("Failed to add entry to ephemeral index: {}", e))?
};
let entry_id = self.next_id();
Ok((entry_id, content_kind))
}
async fn emit_resource_changed(
&self,
uuid: Uuid,
path: &Path,
metadata: &EntryMetadata,
content_kind: crate::domain::ContentKind,
) {
use crate::device::get_current_device_slug;
use crate::domain::addressing::SdPath;
use crate::domain::file::File;
use crate::infra::event::{Event, ResourceMetadata};
let device_slug = get_current_device_slug();
let sd_path = SdPath::Physical {
device_slug: device_slug.clone(),
path: path.to_path_buf(),
};
let mut file = File::from_ephemeral(uuid, metadata, sd_path);
file.content_kind = content_kind;
let parent_path = path.parent().map(|p| SdPath::Physical {
device_slug: file.sd_path.device_slug().unwrap_or("local").to_string(),
path: p.to_path_buf(),
});
let affected_paths = parent_path.into_iter().collect();
if let Ok(resource_json) = serde_json::to_value(&file) {
self.event_bus.emit(Event::ResourceChanged {
resource_type: "file".to_string(),
resource: resource_json,
metadata: Some(ResourceMetadata {
no_merge_fields: vec!["sd_path".to_string()],
alternate_ids: vec![],
affected_paths,
}),
});
}
}
}
#[async_trait::async_trait]
impl ChangeHandler for MemoryAdapter {
async fn find_by_path(&self, path: &Path) -> Result<Option<EntryRef>> {
let index = self.index.read().await;
if let Some(metadata) = index.get_entry_ref(&path.to_path_buf()) {
let uuid = index.get_entry_uuid(&path.to_path_buf());
Ok(Some(EntryRef {
id: 0,
uuid,
path: path.to_path_buf(),
kind: metadata.kind,
}))
} else {
Ok(None)
}
}
async fn find_by_inode(&self, _inode: u64) -> Result<Option<EntryRef>> {
// Inode tracking is skipped to minimize memory overhead; fall back to path-only detection.
Ok(None)
}
async fn create(&mut self, metadata: &DirEntry, _parent_path: &Path) -> Result<EntryRef> {
let entry_uuid = Uuid::new_v4();
let entry_metadata = EntryMetadata::from(metadata.clone());
let (entry_id, content_kind) = self
.add_entry_internal(&metadata.path, entry_uuid, entry_metadata.clone())
.await?;
if let Some(content_kind) = content_kind {
self.emit_resource_changed(entry_uuid, &metadata.path, &entry_metadata, content_kind)
.await;
}
Ok(EntryRef {
id: entry_id,
uuid: Some(entry_uuid),
path: metadata.path.clone(),
kind: metadata.kind,
})
}
async fn update(&mut self, entry: &EntryRef, metadata: &DirEntry) -> Result<()> {
let uuid = entry.uuid.unwrap_or_else(Uuid::new_v4);
let entry_metadata = EntryMetadata::from(metadata.clone());
{
let mut index = self.index.write().await;
let _ = index.add_entry(metadata.path.clone(), uuid, entry_metadata);
}
Ok(())
}
async fn move_entry(
&mut self,
entry: &EntryRef,
old_path: &Path,
new_path: &Path,
_new_parent_path: &Path,
) -> Result<()> {
let metadata = build_dir_entry(new_path, None).await?;
{
let mut index = self.index.write().await;
index.remove_entry(old_path);
let uuid = entry.uuid.unwrap_or_else(Uuid::new_v4);
let entry_metadata = EntryMetadata::from(metadata.clone());
let _ = index.add_entry(new_path.to_path_buf(), uuid, entry_metadata);
}
Ok(())
}
async fn delete(&mut self, entry: &EntryRef) -> Result<()> {
{
let mut index = self.index.write().await;
if entry.is_directory() {
index.remove_directory_tree(&entry.path);
} else {
index.remove_entry(&entry.path);
}
}
Ok(())
}
async fn run_processors(&self, _entry: &EntryRef, _is_new: bool) -> Result<()> {
// File processors (thumbnails, content hash) are disabled to ensure responsive, low-overhead browsing.
Ok(())
}
async fn emit_change_event(&self, entry: &EntryRef, _change_type: ChangeType) -> Result<()> {
let Some(uuid) = entry.uuid else {
return Ok(());
};
let content_kind = {
let index = self.index.read().await;
index.get_content_kind(&entry.path)
};
let metadata = build_dir_entry(&entry.path, None).await.ok();
if let Some(meta) = metadata {
let entry_metadata = EntryMetadata::from(meta);
self.emit_resource_changed(uuid, &entry.path, &entry_metadata, content_kind)
.await;
}
Ok(())
}
async fn handle_new_directory(&self, path: &Path) -> Result<()> {
use crate::ops::indexing::database_storage::DatabaseStorage;
let mut entries = match tokio::fs::read_dir(path).await {
Ok(e) => e,
Err(e) => {
tracing::warn!(
"Failed to read directory {} for ephemeral indexing: {}",
path.display(),
e
);
return Ok(());
}
};
let mut index = self.index.write().await;
while let Ok(Some(entry)) = entries.next_entry().await {
let entry_path = entry.path();
if let Ok(metadata) = entry.metadata().await {
let kind = if metadata.is_dir() {
EntryKind::Directory
} else if metadata.is_symlink() {
EntryKind::Symlink
} else {
EntryKind::File
};
let entry_metadata = EntryMetadata {
path: entry_path.clone(),
kind,
size: metadata.len(),
modified: metadata.modified().ok(),
accessed: metadata.accessed().ok(),
created: metadata.created().ok(),
inode: DatabaseStorage::get_inode(&metadata),
permissions: None,
is_hidden: entry_path
.file_name()
.and_then(|n| n.to_str())
.map(|n| n.starts_with('.'))
.unwrap_or(false),
};
let uuid = Uuid::new_v4();
let _ = index.add_entry(entry_path, uuid, entry_metadata);
}
}
Ok(())
}
}
#[async_trait::async_trait]
impl IndexPersistence for MemoryAdapter {
async fn store_entry(
&self,
entry: &DirEntry,
_location_id: Option<i32>,
_location_root_path: &Path,
) -> JobResult<i32> {
use crate::ops::indexing::database_storage::DatabaseStorage;
let metadata = DatabaseStorage::extract_metadata(&entry.path, None)
.await
.map_err(|e| JobError::execution(format!("Failed to extract metadata: {}", e)))?;
let entry_uuid = Uuid::new_v4();
let (entry_id, content_kind) = {
let mut index = self.index.write().await;
let content_kind = index
.add_entry(entry.path.clone(), entry_uuid, metadata.clone())
.map_err(|e| {
tracing::error!("Failed to add entry to ephemeral index: {}", e);
JobError::execution(format!("Failed to add entry: {}", e))
})?;
if content_kind.is_some() {
match entry.kind {
EntryKind::File => index.stats.files += 1,
EntryKind::Directory => index.stats.dirs += 1,
EntryKind::Symlink => index.stats.symlinks += 1,
}
index.stats.bytes += entry.size;
}
(self.next_id(), content_kind)
};
if let Some(content_kind) = content_kind {
self.emit_resource_changed(entry_uuid, &entry.path, &metadata, content_kind)
.await;
}
Ok(entry_id)
}
async fn store_content_identity(
&self,
_entry_id: i32,
_path: &Path,
_cas_id: String,
) -> JobResult<()> {
Ok(())
}
async fn get_existing_entries(
&self,
_indexing_path: &Path,
) -> JobResult<HashMap<PathBuf, (i32, Option<u64>, Option<SystemTime>, u64)>> {
Ok(HashMap::new())
}
async fn update_entry(&self, _entry_id: i32, _entry: &DirEntry) -> JobResult<()> {
Ok(())
}
fn is_persistent(&self) -> bool {
false
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::infra::event::Event;
use tempfile::TempDir;
#[tokio::test]
async fn test_ephemeral_writer_as_change_handler() {
let temp_dir = TempDir::new().unwrap();
let test_file = temp_dir.path().join("test.txt");
std::fs::write(&test_file, b"test content").unwrap();
let index = Arc::new(RwLock::new(
EphemeralIndex::new().expect("failed to create ephemeral index"),
));
let event_bus = Arc::new(EventBus::new(1024));
let mut writer =
MemoryAdapter::new(index.clone(), event_bus, temp_dir.path().to_path_buf());
let dir_entry = DirEntry {
path: test_file.clone(),
kind: EntryKind::File,
size: 12,
modified: Some(std::time::SystemTime::now()),
inode: Some(12345),
};
let entry_ref = writer
.create(&dir_entry, temp_dir.path())
.await
.expect("create should succeed");
assert!(entry_ref.uuid.is_some());
assert_eq!(entry_ref.path, test_file);
assert_eq!(entry_ref.kind, EntryKind::File);
let found = writer
.find_by_path(&test_file)
.await
.expect("find should succeed");
assert!(found.is_some());
}
#[tokio::test]
async fn test_ephemeral_writer_as_index_persistence() {
let temp_dir = TempDir::new().unwrap();
let test_file = temp_dir.path().join("test.txt");
std::fs::write(&test_file, b"test content").unwrap();
let index = Arc::new(RwLock::new(
EphemeralIndex::new().expect("failed to create ephemeral index"),
));
let event_bus = Arc::new(EventBus::new(1024));
let writer = MemoryAdapter::new(index.clone(), event_bus, temp_dir.path().to_path_buf());
let dir_entry = DirEntry {
path: test_file.clone(),
kind: EntryKind::File,
size: 12,
modified: Some(std::time::SystemTime::now()),
inode: Some(12345),
};
let entry_id = writer
.store_entry(&dir_entry, None, temp_dir.path())
.await
.expect("store_entry should succeed");
assert!(entry_id > 0);
assert!(!writer.is_persistent());
let idx = index.read().await;
assert!(idx.has_entry(&test_file));
}
#[tokio::test]
async fn test_event_emission_consistency() {
let temp_dir = TempDir::new().unwrap();
let test_file = temp_dir.path().join("test.txt");
std::fs::write(&test_file, b"test content").unwrap();
let index = Arc::new(RwLock::new(
EphemeralIndex::new().expect("failed to create ephemeral index"),
));
let event_bus = Arc::new(EventBus::new(1024));
let mut subscriber = event_bus.subscribe();
let writer = MemoryAdapter::new(index.clone(), event_bus, temp_dir.path().to_path_buf());
let dir_entry = DirEntry {
path: test_file.clone(),
kind: EntryKind::File,
size: 12,
modified: Some(std::time::SystemTime::now()),
inode: Some(12345),
};
writer
.store_entry(&dir_entry, None, temp_dir.path())
.await
.expect("store_entry should succeed");
let event =
tokio::time::timeout(tokio::time::Duration::from_millis(100), subscriber.recv()).await;
assert!(event.is_ok(), "Should receive an event");
if let Ok(Ok(Event::ResourceChanged { resource, .. })) = event {
let uuid = resource["id"].as_str();
assert!(uuid.is_some(), "Event should have UUID");
}
}
}

View File

@@ -1,17 +1,22 @@
//! Hierarchical query helpers using closure table
//! # Closure Table Query Helpers
//!
//! Provides O(1) tree traversal operations using a precomputed closure table.
//! The closure table stores all ancestor-descendant relationships with their depths,
//! eliminating recursive queries for common operations like "get all children".
//! Each insert updates the closure table to maintain transitive relationships,
//! trading write complexity for instant read performance.
//!
//! For path resolution, use [`PathResolver::get_full_path`] which provides O(1)
//! lookups via the `directory_paths` cache table.
use crate::infra::db::entities::{entry, entry_closure};
use sea_orm::{
ColumnTrait, Condition, DbConn, EntityTrait, JoinType, PaginatorTrait, QueryFilter, QueryOrder,
QuerySelect, RelationTrait,
};
use std::path::PathBuf;
use sea_orm::{ColumnTrait, DbConn, EntityTrait, PaginatorTrait, QueryFilter, QueryOrder};
/// Hierarchical query helpers for efficient tree operations
/// Namespace for closure table queries that avoid recursive database operations.
pub struct HierarchyQuery;
impl HierarchyQuery {
/// Get direct children of an entry
/// Returns direct children only (depth 1), sorted by name.
pub async fn get_children(
db: &DbConn,
parent_id: i32,
@@ -23,12 +28,14 @@ impl HierarchyQuery {
.await
}
/// Get all descendants of an entry (recursive)
/// Returns all descendants at any depth using the closure table (not recursive).
///
/// Excludes the entry itself (depth > 0). Results are ordered by depth (shallowest first).
/// Chunks queries to respect SQLite's parameter limit.
pub async fn get_descendants(
db: &DbConn,
ancestor_id: i32,
) -> Result<Vec<entry::Model>, sea_orm::DbErr> {
// First get all descendant IDs from closure table
let descendant_ids = entry_closure::Entity::find()
.filter(entry_closure::Column::AncestorId.eq(ancestor_id))
.filter(entry_closure::Column::Depth.gt(0))
@@ -39,7 +46,6 @@ impl HierarchyQuery {
.map(|ec| ec.descendant_id)
.collect::<Vec<i32>>();
// Then fetch the entries
if descendant_ids.is_empty() {
return Ok(vec![]);
}
@@ -59,12 +65,14 @@ impl HierarchyQuery {
}
}
/// Get all ancestors of an entry (path to root)
/// Returns all ancestors from root to immediate parent, enabling breadcrumb construction.
///
/// Excludes the entry itself (depth > 0). Results are ordered deepest-first, so reverse
/// iteration builds paths from root downward.
pub async fn get_ancestors(
db: &DbConn,
descendant_id: i32,
) -> Result<Vec<entry::Model>, sea_orm::DbErr> {
// First get all ancestor IDs from closure table
let ancestor_ids = entry_closure::Entity::find()
.filter(entry_closure::Column::DescendantId.eq(descendant_id))
.filter(entry_closure::Column::Depth.gt(0))
@@ -75,7 +83,6 @@ impl HierarchyQuery {
.map(|ec| ec.ancestor_id)
.collect::<Vec<i32>>();
// Then fetch the entries
if ancestor_ids.is_empty() {
return Ok(vec![]);
}
@@ -94,13 +101,14 @@ impl HierarchyQuery {
}
}
/// Get entries at a specific depth below an ancestor
/// Returns entries at exactly the specified depth (e.g., all grandchildren = depth 2).
///
/// Useful for level-by-level tree rendering without fetching the entire subtree.
pub async fn get_at_depth(
db: &DbConn,
ancestor_id: i32,
depth: i32,
) -> Result<Vec<entry::Model>, sea_orm::DbErr> {
// First get IDs at the specific depth
let entry_ids = entry_closure::Entity::find()
.filter(entry_closure::Column::AncestorId.eq(ancestor_id))
.filter(entry_closure::Column::Depth.eq(depth))
@@ -110,7 +118,6 @@ impl HierarchyQuery {
.map(|ec| ec.descendant_id)
.collect::<Vec<i32>>();
// Then fetch the entries
if entry_ids.is_empty() {
return Ok(vec![]);
}
@@ -130,36 +137,7 @@ impl HierarchyQuery {
}
}
/// Build a full path for an entry by traversing ancestors
pub async fn build_full_path(
db: &DbConn,
entry_id: i32,
location_path: &str,
) -> Result<PathBuf, sea_orm::DbErr> {
// Get the entry itself
let entry = entry::Entity::find_by_id(entry_id)
.one(db)
.await?
.ok_or_else(|| sea_orm::DbErr::RecordNotFound("Entry not found".to_string()))?;
// Get all ancestors in order (root to parent)
let ancestors = Self::get_ancestors(db, entry_id).await?;
// Build the path
let mut path = PathBuf::from(location_path);
// Add ancestor names
for ancestor in ancestors {
path.push(&ancestor.name);
}
// Add the entry's own name
path.push(&entry.name);
Ok(path)
}
/// Count total descendants of an entry
/// Counts descendants at any depth without fetching full entry records.
pub async fn count_descendants(db: &DbConn, ancestor_id: i32) -> Result<u64, sea_orm::DbErr> {
entry_closure::Entity::find()
.filter(entry_closure::Column::AncestorId.eq(ancestor_id))
@@ -168,13 +146,16 @@ impl HierarchyQuery {
.await
}
/// Get subtree size (total size of all descendant files)
/// Sums the size field across all descendants (files and directories).
///
/// Note: This is a naive sum. For accurate directory subtree sizes, use the
/// pre-aggregated aggregate_size field computed during the aggregation phase.
pub async fn get_subtree_size(db: &DbConn, ancestor_id: i32) -> Result<i64, sea_orm::DbErr> {
let descendants = Self::get_descendants(db, ancestor_id).await?;
Ok(descendants.iter().map(|e| e.size).sum())
}
/// Check if an entry is an ancestor of another
/// Checks if potential_ancestor_id is anywhere above potential_descendant_id in the tree.
pub async fn is_ancestor_of(
db: &DbConn,
potential_ancestor_id: i32,
@@ -190,17 +171,18 @@ impl HierarchyQuery {
Ok(count > 0)
}
/// Find common ancestor of two entries
/// Finds the lowest (deepest) ancestor shared by both entries, if any.
///
/// Returns None if the entries are in different trees (different locations).
/// Useful for determining relative path operations.
pub async fn find_common_ancestor(
db: &DbConn,
entry1_id: i32,
entry2_id: i32,
) -> Result<Option<entry::Model>, sea_orm::DbErr> {
// Get ancestors of both entries
let ancestors1 = Self::get_ancestors(db, entry1_id).await?;
let ancestors2 = Self::get_ancestors(db, entry2_id).await?;
// Find the first common ancestor (starting from the deepest)
for ancestor1 in ancestors1.iter().rev() {
for ancestor2 in &ancestors2 {
if ancestor1.id == ancestor2.id {

View File

@@ -1,4 +1,9 @@
//! Core input types for indexing operations
//! # Indexing Input Types
//!
//! Defines IndexInput, the canonical request shape for all indexing operations regardless
//! of origin (CLI, API, UI). This type is deserialized from external requests, validated,
//! and converted into IndexerJobConfig for internal execution. Separating input from config
//! keeps the public API stable while internal job parameters evolve.
use super::job::{IndexMode, IndexPersistence, IndexScope};
use serde::{Deserialize, Serialize};
@@ -28,7 +33,7 @@ pub struct IndexInput {
}
impl IndexInput {
/// Create a new input with sane defaults
/// Creates an input with defaults: recursive deep indexing of ephemeral entries, excluding hidden files.
pub fn new<P: IntoIterator<Item = PathBuf>>(library_id: uuid::Uuid, paths: P) -> Self {
Self {
library_id,
@@ -65,7 +70,7 @@ impl IndexInput {
self
}
/// Validate the input
/// Checks that at least one path is provided; other fields are structurally valid via types.
pub fn validate(&self) -> Result<(), Vec<String>> {
let mut errors = Vec::new();

View File

@@ -1,4 +1,9 @@
//! Main indexer job implementation
//! Indexer job implementation.
//!
//! This module contains the main `IndexerJob` struct that orchestrates the multi-phase
//! indexing pipeline. The job supports both persistent indexing (for managed locations)
//! and ephemeral indexing (for external drives, network shares, and temporary browsing).
//!
use crate::{
domain::addressing::SdPath,
@@ -8,33 +13,47 @@ use crate::{
use sea_orm::{ColumnTrait, EntityTrait, QueryFilter};
use serde::{Deserialize, Serialize};
use specta::Type;
use std::{collections::HashMap, path::PathBuf, sync::Arc, time::Duration};
use std::{
path::{Path, PathBuf},
sync::Arc,
time::Duration,
};
use tokio::sync::RwLock;
use tracing::{info, warn};
use uuid::Uuid;
use super::{
entry::EntryMetadata,
ephemeral::EphemeralIndex,
metrics::{IndexerMetrics, PhaseTimer},
phases,
state::{IndexError, IndexPhase, IndexerProgress, IndexerState, IndexerStats, Phase},
PathResolver,
};
/// Indexing mode determines the depth of indexing
/// How deeply to index files, from metadata-only to full processing.
///
/// IndexMode controls the trade-off between indexing speed and feature completeness.
/// Shallow mode is fast enough for ephemeral browsing, while Deep mode enables
/// duplicate detection, thumbnail generation, and full-text search at the cost of
/// significantly longer indexing time.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, Type)]
pub enum IndexMode {
/// Location exists but is not indexed
None,
/// Just filesystem metadata (fastest)
/// Just filesystem metadata
Shallow,
/// Generate content identities (moderate)
/// Generate content identities via sampled BLAKE3 hashing (enables duplicate detection)
Content,
/// Full indexing with thumbnails and text extraction (slowest)
/// Full indexing with thumbnails and text extraction
Deep,
}
/// Indexing scope determines how much of the directory tree to process
/// Whether to index just one directory level or recurse through subdirectories.
///
/// Current scope is used for UI navigation where users expand folders on-demand,
/// while Recursive scope is used for full location indexing. Current scope with
/// persistent storage enables progressive indexing where the UI drives which
/// directories get indexed based on user interaction.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Type)]
pub enum IndexScope {
/// Index only the current directory (single level)
@@ -68,7 +87,14 @@ impl std::fmt::Display for IndexScope {
}
}
/// Determines whether indexing results are persisted to database or kept in memory
/// Whether to write indexing results to the database or keep them in memory.
///
/// Ephemeral persistence allows users to browse external drives and network shares
/// without adding them as managed locations. The in-memory index survives for the
/// session duration and provides the same API surface as persistent entries, enabling
/// features like search and navigation to work identically for both modes. If an
/// ephemeral path is later promoted to a managed location, UUIDs are preserved to
/// maintain continuity for user metadata.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Type)]
pub enum IndexPersistence {
/// Write all results to database (normal operation)
@@ -83,21 +109,24 @@ impl Default for IndexPersistence {
}
}
/// Enhanced configuration for indexer jobs
/// Configuration for an indexer job, supporting both persistent and ephemeral indexing.
///
/// Persistent jobs require a location_id to identify which managed location they're
/// indexing. Ephemeral jobs (browsing unmanaged paths) use location_id = None and
/// store results in memory instead of the database.
#[derive(Debug, Clone, Serialize, Deserialize, Type)]
pub struct IndexerJobConfig {
pub location_id: Option<Uuid>, // None for ephemeral indexing
pub location_id: Option<Uuid>,
pub path: SdPath,
pub mode: IndexMode,
pub scope: IndexScope,
pub persistence: IndexPersistence,
pub max_depth: Option<u32>, // Override for Current scope or depth limiting
pub max_depth: Option<u32>,
#[serde(default)]
pub rule_toggles: super::rules::RuleToggles,
}
impl IndexerJobConfig {
/// Create a new configuration for persistent recursive indexing (traditional)
pub fn new(location_id: Uuid, path: SdPath, mode: IndexMode) -> Self {
Self {
location_id: Some(location_id),
@@ -110,7 +139,6 @@ impl IndexerJobConfig {
}
}
/// Create configuration for UI directory navigation (quick current scan)
pub fn ui_navigation(location_id: Uuid, path: SdPath) -> Self {
Self {
location_id: Some(location_id),
@@ -123,12 +151,11 @@ impl IndexerJobConfig {
}
}
/// Create configuration for ephemeral path browsing (outside managed locations)
pub fn ephemeral_browse(path: SdPath, scope: IndexScope) -> Self {
Self {
location_id: None,
path,
mode: IndexMode::Content, // Enable content identification for ephemeral browsing
mode: IndexMode::Shallow,
scope,
persistence: IndexPersistence::Ephemeral,
max_depth: if scope == IndexScope::Current {
@@ -151,89 +178,25 @@ impl IndexerJobConfig {
}
}
/// In-memory storage for ephemeral indexing results
#[derive(Debug)]
pub struct EphemeralIndex {
pub entries: HashMap<PathBuf, EntryMetadata>,
pub entry_uuids: HashMap<PathBuf, Uuid>,
pub content_identities: HashMap<String, EphemeralContentIdentity>,
pub created_at: std::time::Instant,
pub last_accessed: std::time::Instant,
pub root_path: PathBuf,
pub stats: IndexerStats,
}
/// Simplified content identity for ephemeral storage
#[derive(Debug, Clone)]
pub struct EphemeralContentIdentity {
pub cas_id: String,
pub mime_type: Option<String>,
pub file_size: u64,
pub entry_count: u32,
}
impl EphemeralIndex {
pub fn new(root_path: PathBuf) -> Self {
let now = std::time::Instant::now();
Self {
entries: HashMap::new(),
entry_uuids: HashMap::new(),
content_identities: HashMap::new(),
created_at: now,
last_accessed: now,
root_path,
stats: IndexerStats::default(),
}
}
pub fn add_entry(&mut self, path: PathBuf, uuid: Uuid, metadata: EntryMetadata) {
self.entries.insert(path.clone(), metadata);
self.entry_uuids.insert(path, uuid);
self.last_accessed = std::time::Instant::now();
}
pub fn get_entry(&mut self, path: &PathBuf) -> Option<&EntryMetadata> {
self.last_accessed = std::time::Instant::now();
self.entries.get(path)
}
pub fn get_entry_uuid(&self, path: &PathBuf) -> Option<Uuid> {
self.entry_uuids.get(path).copied()
}
pub fn add_content_identity(&mut self, cas_id: String, content: EphemeralContentIdentity) {
self.content_identities.insert(cas_id, content);
self.last_accessed = std::time::Instant::now();
}
pub fn age(&self) -> Duration {
self.created_at.elapsed()
}
pub fn idle_time(&self) -> Duration {
self.last_accessed.elapsed()
}
}
/// Indexer job - discovers and indexes files in a location
/// Orchestrates multi-phase file indexing for both persistent and ephemeral modes.
///
/// The job executes as a state machine progressing through Discovery, Processing,
/// Aggregation, and ContentIdentification phases. State is automatically serialized
/// between phases, allowing the job to survive app restarts and resume from the last
/// completed phase. Ephemeral jobs (browsing unmanaged paths) skip aggregation and
/// content identification, storing results in memory via `EphemeralIndex`.
#[derive(Debug, Serialize, Deserialize, Job)]
pub struct IndexerJob {
pub config: IndexerJobConfig,
// Resumable state
state: Option<IndexerState>,
// Ephemeral storage for non-persistent jobs
#[serde(skip)]
ephemeral_index: Option<Arc<RwLock<EphemeralIndex>>>,
// Performance tracking
#[serde(skip)]
timer: Option<PhaseTimer>,
#[serde(skip)]
db_operations: (u64, u64), // (reads, writes)
db_operations: (u64, u64),
#[serde(skip)]
batch_info: (u64, usize), // (count, total_size)
batch_info: (u64, usize),
}
impl Job for IndexerJob {
@@ -250,30 +213,8 @@ impl DynJob for IndexerJob {
impl JobProgress for IndexerProgress {}
#[async_trait::async_trait]
impl JobHandler for IndexerJob {
type Output = IndexerOutput;
async fn run(&mut self, ctx: JobContext<'_>) -> JobResult<Self::Output> {
// Initialize timer
if self.timer.is_none() {
self.timer = Some(PhaseTimer::new());
}
// Initialize ephemeral index if needed
if self.config.is_ephemeral() && self.ephemeral_index.is_none() {
let root_path =
self.config.path.as_local_path().ok_or_else(|| {
JobError::execution("Path not accessible locally".to_string())
})?;
self.ephemeral_index = Some(Arc::new(RwLock::new(EphemeralIndex::new(
root_path.to_path_buf(),
))));
ctx.log("Initialized ephemeral index for non-persistent job");
}
// Initialize or restore state
// Ensure state is always created early to avoid serialization issues
impl IndexerJob {
async fn run_job_phases(&mut self, ctx: &JobContext<'_>) -> JobResult<IndexerOutput> {
if self.state.is_none() {
ctx.log(format!(
"Starting new indexer job (scope: {}, persistence: {:?})",
@@ -283,30 +224,18 @@ impl JobHandler for IndexerJob {
self.state = Some(IndexerState::new(&self.config.path));
} else {
ctx.log("Resuming indexer from saved state");
let state = self.state.as_ref().unwrap();
info!("INDEXER_STATE: Job resuming with saved state - phase: {:?}, entry_batches: {}, entries_for_content: {}, seen_paths: {}",
state.phase,
state.entry_batches.len(),
state.entries_for_content.len(),
state.seen_paths.len());
warn!(
"DEBUG: Resumed state - phase: {:?}, entry_batches: {}, entries_for_content: {}",
state.phase,
state.entry_batches.len(),
state.entries_for_content.len()
);
self.state.as_ref().unwrap().phase,
self.state.as_ref().unwrap().entry_batches.len(),
self.state.as_ref().unwrap().entries_for_content.len(),
self.state.as_ref().unwrap().seen_paths.len());
}
let state = self.state.as_mut().unwrap();
// Get root path ONCE for the entire job
// For cloud volumes, we use the path component from the SdPath (e.g., "/" or "folder/")
// since discovery operates through the volume backend (not direct filesystem access)
let root_path_buf = if let Some(p) = self.config.path.as_local_path() {
p.to_path_buf()
} else if let Some(cloud_path) = self.config.path.cloud_path() {
// Cloud path - use the path component within the cloud volume
// The actual I/O will go through the volume backend
PathBuf::from(cloud_path)
} else if !self.config.is_ephemeral() {
let loc_uuid = self
@@ -334,7 +263,6 @@ impl JobHandler for IndexerJob {
};
let root_path = root_path_buf.as_path();
// Get volume backend for the entire job
let volume_backend: Option<Arc<dyn crate::volume::VolumeBackend>> =
if let Some(vm) = ctx.volume_manager() {
match vm
@@ -349,7 +277,6 @@ impl JobHandler for IndexerJob {
Some(vm.backend_for_volume(&mut volume))
}
Ok(None) => {
// For cloud paths, we MUST have a volume - can't fall back to local
if self.config.path.is_cloud() {
ctx.log(format!(
"Cloud volume not found for path: {}",
@@ -361,7 +288,6 @@ impl JobHandler for IndexerJob {
)));
}
// For local paths, we can fall back to LocalBackend
ctx.log(format!(
"No volume found for path: {}, will use LocalBackend fallback",
self.config.path
@@ -381,27 +307,23 @@ impl JobHandler for IndexerJob {
None
};
// Seed discovery queue if it wasn't initialized due to device-id timing
if state.dirs_to_walk.is_empty() {
state.dirs_to_walk.push_back(root_path.to_path_buf());
}
// Main state machine loop
loop {
ctx.check_interrupt().await?;
let current_phase = state.phase.clone();
warn!("DEBUG: IndexerJob entering phase: {:?}", current_phase);
match current_phase {
Phase::Discovery => {
// For cloud volumes, construct the base URL for building absolute paths
let cloud_url_base = if let Some((service, identifier, _)) = self.config.path.as_cloud() {
Some(format!("{}://{}/", service.scheme(), identifier))
} else {
None
};
let cloud_url_base =
if let Some((service, identifier, _)) = self.config.path.as_cloud() {
Some(format!("{}://{}/", service.scheme(), identifier))
} else {
None
};
// Use scope-aware discovery
if self.config.is_current_scope() {
Self::run_current_scope_discovery_static(state, &ctx, root_path).await?;
} else {
@@ -416,18 +338,15 @@ impl JobHandler for IndexerJob {
.await?;
}
// Track batch info
self.batch_info.0 = state.entry_batches.len() as u64;
self.batch_info.1 = state.entry_batches.iter().map(|b| b.len()).sum();
// Start processing timer
if let Some(timer) = &mut self.timer {
timer.start_processing();
}
}
Phase::Processing => {
warn!("DEBUG: IndexerJob starting Processing phase");
if self.config.is_ephemeral() {
let ephemeral_index = self.ephemeral_index.clone().ok_or_else(|| {
JobError::execution("Ephemeral index not initialized".to_string())
@@ -436,6 +355,7 @@ impl JobHandler for IndexerJob {
state,
&ctx,
ephemeral_index,
root_path,
volume_backend.as_ref(),
)
.await?;
@@ -452,8 +372,7 @@ impl JobHandler for IndexerJob {
)
.await?;
// Update DB operation counts
self.db_operations.1 += state.entry_batches.len() as u64 * 100; // Estimate
self.db_operations.1 += state.entry_batches.len() as u64 * 100;
}
}
@@ -468,13 +387,11 @@ impl JobHandler for IndexerJob {
)
.await?;
} else {
// Skip aggregation for ephemeral jobs
ctx.log("Skipping aggregation phase for ephemeral job");
state.phase = Phase::ContentIdentification;
ctx.log("Skipping aggregation and content phases for ephemeral job (content kind identified by extension)");
state.phase = Phase::Complete;
continue;
}
// Start content timer
if let Some(timer) = &mut self.timer {
timer.start_content();
}
@@ -483,14 +400,9 @@ impl JobHandler for IndexerJob {
Phase::ContentIdentification => {
if self.config.mode >= IndexMode::Content {
if self.config.is_ephemeral() {
let ephemeral_index =
self.ephemeral_index.clone().ok_or_else(|| {
JobError::execution(
"Ephemeral index not initialized".to_string(),
)
})?;
Self::run_ephemeral_content_phase_static(state, &ctx, ephemeral_index)
.await?;
ctx.log("Skipping content identification for ephemeral job");
state.phase = Phase::Complete;
continue;
} else {
let library_id = ctx.library().id();
phases::run_content_phase(
@@ -510,15 +422,8 @@ impl JobHandler for IndexerJob {
Phase::Complete => break,
}
// State is automatically saved during job serialization on shutdown
warn!(
"DEBUG: IndexerJob completed phase: {:?}, next phase will be: {:?}",
current_phase, state.phase
);
}
// Send final progress update
let final_progress = IndexerProgress {
phase: IndexPhase::Finalizing {
processed: 0,
@@ -531,27 +436,23 @@ impl JobHandler for IndexerJob {
scope: None,
persistence: None,
is_ephemeral: false,
action_context: None, // TODO: Pass action context from job state
action_context: None,
};
ctx.progress(Progress::generic(final_progress.to_generic_progress()));
// Calculate final metrics
let metrics = if let Some(timer) = &self.timer {
IndexerMetrics::calculate(&state.stats, timer, self.db_operations, self.batch_info)
} else {
IndexerMetrics::default()
};
// Log summary
ctx.log(&metrics.format_summary());
// If Deep mode, dispatch thumbnail generation job after indexing completes
if self.config.mode == IndexMode::Deep && !self.config.is_ephemeral() {
use crate::ops::media::thumbnail::{ThumbnailJob, ThumbnailJobConfig};
ctx.log("Deep mode enabled - dispatching thumbnail generation job");
// Dispatch thumbnail job for all entries in this location
let thumbnail_config = ThumbnailJobConfig::default();
let thumbnail_job = ThumbnailJob::new(thumbnail_config);
@@ -561,12 +462,10 @@ impl JobHandler for IndexerJob {
}
Err(e) => {
ctx.log(format!("Warning: Failed to dispatch thumbnail job: {}", e));
// Don't fail the indexing job if thumbnail dispatch fails
}
}
}
// Generate final output
Ok(IndexerOutput {
location_id: self.config.location_id,
stats: state.stats,
@@ -580,33 +479,70 @@ impl JobHandler for IndexerJob {
},
})
}
}
// JobHandler trait implementation
#[async_trait::async_trait]
impl JobHandler for IndexerJob {
type Output = IndexerOutput;
async fn run(&mut self, ctx: JobContext<'_>) -> JobResult<Self::Output> {
if self.timer.is_none() {
self.timer = Some(PhaseTimer::new());
}
if self.config.is_ephemeral() && self.ephemeral_index.is_none() {
let index = EphemeralIndex::new()
.map_err(|e| JobError::Other(format!("Failed to create ephemeral index: {}", e)))?;
self.ephemeral_index = Some(Arc::new(RwLock::new(index)));
ctx.log("Initialized ephemeral index for non-persistent job");
}
let result = self.run_job_phases(&ctx).await;
// Mark ephemeral indexing complete even on failure to prevent the indexing
// flag from being stuck forever. Without this, a failed ephemeral job would
// block all future indexing attempts for that path until app restart.
if self.config.is_ephemeral() {
if let Some(local_path) = self.config.path.as_local_path() {
ctx.library()
.core_context()
.ephemeral_cache()
.mark_indexing_complete(local_path);
match &result {
Ok(_) => ctx.log(format!(
"Marked ephemeral indexing complete for: {}",
local_path.display()
)),
Err(e) => ctx.log(format!(
"Marked ephemeral indexing complete (job failed: {}) for: {}",
e,
local_path.display()
)),
}
}
}
result
}
async fn on_resume(&mut self, ctx: &JobContext<'_>) -> JobResult {
// State is already loaded from serialization
warn!("DEBUG: IndexerJob on_resume called");
if let Some(state) = &self.state {
warn!(
"DEBUG: IndexerJob has state, resuming in {:?} phase",
state.phase
);
ctx.log(format!("Resuming indexer in {:?} phase", state.phase));
ctx.log(format!(
"Progress: {} files, {} dirs, {} errors so far",
state.stats.files, state.stats.dirs, state.stats.errors
));
// Reinitialize timer for resumed job
self.timer = Some(PhaseTimer::new());
} else {
warn!("DEBUG: IndexerJob has no state during resume - creating new state!");
// If state is missing, create it now (this shouldn't happen in normal operation)
self.state = Some(IndexerState::new(&self.config.path));
}
Ok(())
}
async fn on_pause(&mut self, ctx: &JobContext<'_>) -> JobResult {
ctx.log("Pausing indexer job - state will be preserved");
ctx.log("Pausing indexer job");
Ok(())
}
@@ -622,13 +558,11 @@ impl JobHandler for IndexerJob {
}
fn is_resuming(&self) -> bool {
// If we have existing state, we're resuming
self.state.is_some()
}
}
impl IndexerJob {
/// Create a new indexer job with enhanced configuration
pub fn new(config: IndexerJobConfig) -> Self {
Self {
config,
@@ -640,54 +574,49 @@ impl IndexerJob {
}
}
/// Create a traditional persistent recursive indexer job
pub fn from_location(location_id: Uuid, root_path: SdPath, mode: IndexMode) -> Self {
Self::new(IndexerJobConfig::new(location_id, root_path, mode))
}
/// Create a shallow indexer job (metadata only)
pub fn shallow(location_id: Uuid, root_path: SdPath) -> Self {
Self::from_location(location_id, root_path, IndexMode::Shallow)
}
/// Create a content indexer job (with CAS IDs)
pub fn with_content(location_id: Uuid, root_path: SdPath) -> Self {
Self::from_location(location_id, root_path, IndexMode::Content)
}
/// Create a deep indexer job (full processing)
pub fn deep(location_id: Uuid, root_path: SdPath) -> Self {
Self::from_location(location_id, root_path, IndexMode::Deep)
}
/// Create a UI navigation job (current scope, quick scan)
pub fn ui_navigation(location_id: Uuid, path: SdPath) -> Self {
Self::new(IndexerJobConfig::ui_navigation(location_id, path))
}
/// Set the ephemeral index storage (must be called before dispatching for ephemeral jobs)
/// This allows external code to maintain a reference to the same storage the job uses
/// Sets the ephemeral index storage that the job will use.
///
/// This must be called before dispatching ephemeral jobs. It allows external code
/// (like the ephemeral cache manager) to maintain a reference to the same storage
/// the job uses, enabling direct access to indexing results without job-to-caller
/// communication overhead.
pub fn set_ephemeral_index(&mut self, index: Arc<RwLock<EphemeralIndex>>) {
self.ephemeral_index = Some(index);
}
/// Create an ephemeral browsing job (no database writes)
pub fn ephemeral_browse(path: SdPath, scope: IndexScope) -> Self {
Self::new(IndexerJobConfig::ephemeral_browse(path, scope))
}
/// Run current scope discovery (single level only)
async fn run_current_scope_discovery_static(
state: &mut IndexerState,
ctx: &JobContext<'_>,
root_path: &std::path::Path,
) -> JobResult<()> {
use super::entry::EntryProcessor;
use super::database_storage::DatabaseStorage;
use super::state::{DirEntry, EntryKind};
use tokio::fs;
ctx.log("Starting current scope discovery (single level)");
let mut entries = fs::read_dir(root_path)
.await
.map_err(|e| JobError::execution(format!("Failed to read directory: {}", e)))?;
@@ -716,13 +645,12 @@ impl IndexerJob {
kind: entry_kind,
size: metadata.len(),
modified: metadata.modified().ok(),
inode: EntryProcessor::get_inode(&metadata),
inode: DatabaseStorage::get_inode(&metadata),
};
state.pending_entries.push(dir_entry);
state.items_since_last_update += 1;
// Update stats
match entry_kind {
EntryKind::File => state.stats.files += 1,
EntryKind::Directory => state.stats.dirs += 1,
@@ -730,7 +658,6 @@ impl IndexerJob {
}
}
// Create single batch and move to processing
if !state.pending_entries.is_empty() {
let batch = state.create_batch();
state.entry_batches.push(batch);
@@ -745,156 +672,34 @@ impl IndexerJob {
Ok(())
}
/// Run ephemeral processing (store in memory instead of database)
async fn run_ephemeral_processing_static(
state: &mut IndexerState,
ctx: &JobContext<'_>,
ephemeral_index: Arc<RwLock<EphemeralIndex>>,
volume_backend: Option<&Arc<dyn crate::volume::VolumeBackend>>,
root_path: &Path,
_volume_backend: Option<&Arc<dyn crate::volume::VolumeBackend>>,
) -> JobResult<()> {
use super::persistence::PersistenceFactory;
ctx.log("Starting ephemeral processing");
// Get root path from ephemeral index
let root_path = {
let index = ephemeral_index.read().await;
index.root_path.clone()
};
// Get event bus from library
let event_bus = Some(ctx.library().event_bus().clone());
// Create ephemeral persistence layer (emits events as entries are stored)
let persistence =
PersistenceFactory::ephemeral(ephemeral_index.clone(), event_bus, root_path.clone());
let persistence = PersistenceFactory::ephemeral(
ephemeral_index.clone(),
event_bus,
root_path.to_path_buf(),
);
// Process all batches through persistence layer
while let Some(batch) = state.entry_batches.pop() {
for entry in batch {
// Store entry (this will emit ResourceChanged events)
let entry_id = persistence.store_entry(&entry, None, &root_path).await?;
// Queue files for content identification
if entry.kind == super::state::EntryKind::File && entry.size > 0 {
state
.entries_for_content
.push((entry_id, entry.path.clone()));
}
let _entry_id = persistence.store_entry(&entry, None, root_path).await?;
}
}
state.phase = Phase::ContentIdentification;
ctx.log("Ephemeral processing complete");
Ok(())
}
/// Run ephemeral content identification
async fn run_ephemeral_content_phase_static(
state: &mut IndexerState,
ctx: &JobContext<'_>,
ephemeral_index: Arc<RwLock<EphemeralIndex>>,
) -> JobResult<()> {
use crate::domain::content_identity::ContentHashGenerator;
use crate::ops::indexing::persistence::PersistenceFactory;
ctx.log(format!(
"Starting ephemeral content identification for {} files",
state.entries_for_content.len()
));
if state.entries_for_content.is_empty() {
state.phase = Phase::Complete;
return Ok(());
}
// Get root path and event bus
let (root_path, event_bus) = {
let index = ephemeral_index.read().await;
(
index.root_path.clone(),
Some(ctx.library().event_bus().clone()),
)
};
// Create ephemeral persistence for event emission
let persistence =
PersistenceFactory::ephemeral(ephemeral_index.clone(), event_bus, root_path);
// Process files for content identification
let mut success_count = 0;
let mut error_count = 0;
// Process in chunks to emit progress
const CHUNK_SIZE: usize = 50;
let total = state.entries_for_content.len();
while !state.entries_for_content.is_empty() {
ctx.check_interrupt().await?;
let chunk_size = CHUNK_SIZE.min(state.entries_for_content.len());
let chunk: Vec<_> = state.entries_for_content.drain(..chunk_size).collect();
// Process chunk in parallel
let hash_futures: Vec<_> = chunk
.iter()
.map(|(entry_id, path)| async move {
let hash_result = ContentHashGenerator::generate_content_hash(path).await;
(*entry_id, path.clone(), hash_result)
})
.collect();
let results = futures::future::join_all(hash_futures).await;
// Store results and emit events
for (entry_id, path, hash_result) in results {
match hash_result {
Ok(cas_id) => {
// Store via persistence (this emits ResourceChanged event with content_identity)
if let Err(e) = persistence
.store_content_identity(entry_id, &path, cas_id.clone())
.await
{
ctx.add_non_critical_error(format!(
"Failed to store content identity for {}: {}",
path.display(),
e
));
error_count += 1;
} else {
success_count += 1;
}
}
Err(e) => {
// Skip empty files or errors
if !matches!(e, crate::domain::ContentHashError::EmptyFile) {
ctx.add_non_critical_error(format!(
"Failed to hash {}: {}",
path.display(),
e
));
error_count += 1;
}
}
}
}
ctx.log(format!(
"Content identification progress: {}/{} (success: {}, errors: {})",
total - state.entries_for_content.len(),
total,
success_count,
error_count
));
}
state.phase = Phase::Complete;
ctx.log(format!(
"Ephemeral content identification complete: {} files processed, {} errors",
success_count, error_count
));
ctx.log("Ephemeral processing complete");
Ok(())
}
}

View File

@@ -1,10 +1,14 @@
//! Performance metrics and monitoring for the indexer
//! # Indexer Performance Metrics
//!
//! Tracks timing, throughput, database activity, and error counts across all indexing phases.
//! Metrics are computed at job completion and logged for performance analysis. They're also
//! serialized for API responses so clients can display progress summaries and detect bottlenecks.
use serde::{Deserialize, Serialize};
use specta::Type;
use std::time::{Duration, Instant};
/// Comprehensive metrics for indexing operations
/// Complete snapshot of indexer performance after job completion.
#[derive(Debug, Clone, Serialize, Deserialize, Type)]
pub struct IndexerMetrics {
// Timing
@@ -59,7 +63,7 @@ impl Default for IndexerMetrics {
}
}
/// Tracks timing for different phases
/// Tracks phase transition times to compute per-phase durations without overlapping timers.
#[derive(Debug)]
pub struct PhaseTimer {
phase_start: Instant,
@@ -109,7 +113,7 @@ impl PhaseTimer {
}
impl IndexerMetrics {
/// Calculate final metrics from state and timer
/// Computes metrics after job completion by combining accumulated stats with elapsed timers.
pub fn calculate(
stats: &super::state::IndexerStats,
timer: &PhaseTimer,
@@ -163,7 +167,7 @@ impl IndexerMetrics {
}
}
/// Format metrics for logging
/// Formats metrics as a multi-line summary suitable for job completion logs.
pub fn format_summary(&self) -> String {
format!(
"Indexing completed in {:.2}s:\n\

View File

@@ -1,17 +1,29 @@
//! Production-ready indexing system for Spacedrive
//! # Spacedrive's File Indexing System
//!
//! This module implements a sophisticated file indexing system with:
//! - Multi-phase processing (discovery, processing, content identification)
//! - Full resumability with checkpoint support
//! - Incremental indexing with change detection
//! - Efficient batch processing
//! - Comprehensive error handling
//! - Performance monitoring and metrics
//! `core::ops::indexing` provides a multi-phase indexing pipeline that turns
//! raw filesystem paths into searchable database entries. The system handles
//! both persistent locations (managed directories) and ephemeral browsing sessions
//! (external drives, network shares), ensuring every file gets a stable UUID for
//! sync and user data attachment.
//!
//! ## Example
//! ```rust,no_run
//! use spacedrive_core::ops::indexing::{IndexerJob, IndexerJobConfig, IndexMode};
//! use spacedrive_core::domain::addressing::SdPath;
//! use uuid::Uuid;
//!
//! # async fn example(library: &spacedrive_core::library::Library, location_id: Uuid, path: SdPath) -> Result<(), Box<dyn std::error::Error>> {
//! let config = IndexerJobConfig::new(location_id, path, IndexMode::Content);
//! let job = IndexerJob::new(config);
//! library.jobs().dispatch(job).await?;
//! # Ok(())
//! # }
//! ```
pub mod action;
pub mod change_detection;
pub mod ctx;
pub mod entry;
pub mod database_storage;
pub mod ephemeral;
pub mod hierarchy;
pub mod input;
pub mod job;
@@ -26,16 +38,16 @@ pub mod rules;
pub mod state;
pub mod verify;
// Re-exports for convenience
pub use action::IndexingAction;
pub use ctx::{IndexingCtx, ResponderCtx};
pub use entry::{EntryMetadata, EntryProcessor};
pub use change_detection::{
apply_batch as apply_change_batch, Change, ChangeConfig, ChangeDetector, ChangeHandler,
ChangeType, EntryRef, DatabaseAdapter, DatabaseAdapterForJob,
};
pub use database_storage::{DatabaseStorage, EntryMetadata};
pub use ephemeral::{EphemeralIndex, EphemeralIndexCache, EphemeralIndexStats, MemoryAdapter};
pub use hierarchy::HierarchyQuery;
pub use input::IndexInput;
pub use job::{
EphemeralContentIdentity, EphemeralIndex, IndexMode, IndexPersistence, IndexScope, IndexerJob,
IndexerJobConfig, IndexerOutput,
};
pub use job::{IndexMode, IndexScope, IndexerJob, IndexerJobConfig, IndexerOutput};
pub use metrics::IndexerMetrics;
pub use path_resolver::PathResolver;
pub use persistence::{IndexPersistence as PersistenceTrait, PersistenceFactory};
@@ -46,8 +58,5 @@ pub use rules::{
pub use state::{IndexPhase, IndexerProgress, IndexerState, IndexerStats};
pub use verify::{IndexVerifyAction, IndexVerifyInput, IndexVerifyOutput, IntegrityReport};
// Rules system will be integrated here in the future
// pub mod rules;
#[cfg(test)]
mod tests;

View File

@@ -1,6 +1,9 @@
//! Path resolution service for the pure hierarchical model
//! # Path Resolution via directory_paths Cache
//!
//! This service provides efficient path resolution by utilizing the directory_paths lookup table.
//! Resolves full filesystem paths for entries without walking parent_id chains. The directory_paths
//! table caches absolute paths for all directories, making lookups O(1) instead of O(depth). Files
//! are resolved by joining their parent's cached path with the filename. This table is updated during
//! indexing and move operations to keep paths in sync with the entry hierarchy.
use std::path::PathBuf;
@@ -11,12 +14,11 @@ use crate::infra::db::entities::{directory_paths, entry, DirectoryPaths, Entry};
pub struct PathResolver;
impl PathResolver {
/// Get the full path for any entry (file or directory)
/// Resolves the absolute path by looking up directories in the cache or reconstructing file paths.
pub async fn get_full_path<C: ConnectionTrait>(
db: &C,
entry_id: i32,
) -> Result<PathBuf, DbErr> {
// First, get the entry to determine if it's a file or directory
let entry = Entry::find_by_id(entry_id)
.one(db)
.await?
@@ -24,7 +26,6 @@ impl PathResolver {
match entry.entry_kind() {
crate::infra::db::entities::entry::EntryKind::Directory => {
// For directories, lookup in directory_paths table
let dir_path = DirectoryPaths::find_by_id(entry_id)
.one(db)
.await?
@@ -37,7 +38,6 @@ impl PathResolver {
Ok(PathBuf::from(dir_path.path))
}
_ => {
// For files, get parent directory path and append full filename (name + extension)
if let Some(parent_id) = entry.parent_id {
let parent_path = DirectoryPaths::find_by_id(parent_id)
.one(db)
@@ -49,7 +49,6 @@ impl PathResolver {
))
})?;
// Reconstruct full filename: name + extension
let full_filename = if let Some(ext) = &entry.extension {
format!("{}.{}", entry.name, ext)
} else {
@@ -58,8 +57,6 @@ impl PathResolver {
Ok(PathBuf::from(parent_path.path).join(full_filename))
} else {
// Root file (shouldn't normally happen)
// Still need to add extension if present
let full_filename = if let Some(ext) = &entry.extension {
format!("{}.{}", entry.name, ext)
} else {
@@ -71,7 +68,7 @@ impl PathResolver {
}
}
/// Get the path for a directory from the cache
/// Fetches the cached path string directly from directory_paths without entry lookup.
pub async fn get_directory_path<C: ConnectionTrait>(
db: &C,
directory_id: i32,
@@ -88,7 +85,9 @@ impl PathResolver {
})
}
/// Build the full path for a new directory entry
/// Constructs the path string for a new directory by joining its parent's path with its name.
///
/// Used during indexing to populate the directory_paths table for newly discovered directories.
pub async fn build_directory_path<C: ConnectionTrait>(
db: &C,
parent_id: Option<i32>,
@@ -98,17 +97,15 @@ impl PathResolver {
let parent_path = Self::get_directory_path(db, parent_id).await?;
Ok(format!("{}/{}", parent_path, name))
} else {
// Root directory
Ok(name.to_string())
}
}
/// Get paths for multiple entries efficiently
/// Resolves paths for multiple entries in batched queries to minimize database round-trips.
pub async fn get_paths_batch<C: ConnectionTrait>(
db: &C,
entry_ids: Vec<i32>,
) -> Result<Vec<(i32, PathBuf)>, DbErr> {
// First, fetch all entries to determine types
let mut entries: Vec<entry::Model> = Vec::new();
let chunk_size: usize = 900;
for chunk in entry_ids.chunks(chunk_size) {
@@ -121,7 +118,6 @@ impl PathResolver {
let mut results = Vec::with_capacity(entries.len());
// Separate directories and files
let mut directory_ids = Vec::new();
let mut file_entries = Vec::new();
@@ -136,7 +132,6 @@ impl PathResolver {
}
}
// Batch fetch directory paths
if !directory_ids.is_empty() {
let mut dir_paths: Vec<directory_paths::Model> = Vec::new();
for chunk in directory_ids.chunks(chunk_size) {
@@ -152,7 +147,6 @@ impl PathResolver {
}
}
// Handle files by fetching parent paths
if !file_entries.is_empty() {
let parent_ids: Vec<i32> = file_entries.iter().filter_map(|e| e.parent_id).collect();
@@ -165,7 +159,6 @@ impl PathResolver {
parent_paths.append(&mut batch);
}
// Create a map for quick lookup
let parent_map: std::collections::HashMap<i32, String> = parent_paths
.into_iter()
.map(|dp| (dp.entry_id, dp.path))
@@ -188,15 +181,16 @@ impl PathResolver {
Ok(results)
}
/// Update all descendant directory paths after a move operation
/// This should be called in a background job after moving a directory
/// Bulk-updates descendant directory paths after moving a directory tree.
///
/// Uses a single SQL REPLACE to rewrite all paths under the moved directory's old prefix.
/// Should be called after updating the moved directory's entry.parent_id and directory_paths.path.
pub async fn update_descendant_paths<C: ConnectionTrait>(
db: &C,
moved_directory_id: i32,
old_path: &str,
new_path: &str,
) -> Result<u64, DbErr> {
// Use raw SQL for efficient bulk update
let sql = r#"
UPDATE directory_paths
SET path = REPLACE(path, ?, ?)

View File

@@ -1,37 +1,36 @@
//! Persistence abstraction layer for indexing operations
//! # Persistence Abstraction for Indexing
//!
//! This module provides a unified interface for storing indexing results
//! either persistently in the database or ephemerally in memory.
//! `core::ops::indexing::persistence` provides a unified interface for storing
//! indexing results either persistently in the database or ephemerally in memory.
//! This abstraction allows the same indexing pipeline to work for both managed
//! locations (database-backed) and ephemeral browsing (memory-only).
//!
//! For ephemeral storage, use `MemoryAdapter` from `crate::ops::indexing::ephemeral`
//! which implements both `IndexPersistence` and `ChangeHandler`.
//!
//! For persistent storage, use `DatabaseAdapterForJob` from `crate::ops::indexing::change_detection`
//! which implements `IndexPersistence` and delegates to `DBWriter` for database writes.
use crate::{
filetype::FileTypeRegistry,
infra::{
db::entities::{self, directory_paths, entry_closure},
job::prelude::{JobContext, JobError, JobResult},
},
};
use sea_orm::{
ActiveModelTrait, ActiveValue::Set, ColumnTrait, Condition, ConnectionTrait, DbBackend,
EntityTrait, JoinType, QueryFilter, QuerySelect, RelationTrait, Statement, TransactionTrait,
};
use crate::infra::job::prelude::{JobError, JobResult};
use std::{
collections::HashMap,
path::{Path, PathBuf},
sync::Arc,
};
use tokio::sync::RwLock;
use uuid::Uuid;
use super::{
job::{EphemeralContentIdentity, EphemeralIndex},
state::{DirEntry, EntryKind},
PathResolver,
};
/// Abstraction for storing indexing results
use super::{ephemeral::EphemeralIndex, state::DirEntry};
/// Unified storage interface for persistent and ephemeral indexing.
///
/// Implementations handle either database writes (`DatabaseAdapterForJob`) or
/// in-memory storage (`MemoryAdapter`). The indexing pipeline calls
/// these methods without knowing which backend is active.
#[async_trait::async_trait]
pub trait IndexPersistence: Send + Sync {
/// Store an entry and return its ID
/// Stores an entry and returns its ID for linking content identities.
///
/// For database persistence, this creates an `entry` row and updates the closure table.
/// For ephemeral persistence, this adds the entry to the in-memory index and emits
/// a ResourceChanged event for immediate UI updates.
async fn store_entry(
&self,
entry: &DirEntry,
@@ -39,7 +38,11 @@ pub trait IndexPersistence: Send + Sync {
location_root_path: &Path,
) -> JobResult<i32>;
/// Store content identity and link to entry
/// Links a content identity (hash) to an entry.
///
/// For database persistence, this creates or finds a `content_identity` row and updates
/// the entry's `content_id` foreign key. For ephemeral persistence, this is a no-op since
/// in-memory indexes don't track content deduplication across sessions.
async fn store_content_identity(
&self,
entry_id: i32,
@@ -47,7 +50,12 @@ pub trait IndexPersistence: Send + Sync {
cas_id: String,
) -> JobResult<()>;
/// Get existing entries for change detection, scoped to the indexing path
/// Retrieves existing entries under a path for change detection.
///
/// Returns a map of path -> (entry_id, inode, modified_time, size) for all entries
/// under the indexing path. Change detection compares this snapshot against the
/// current filesystem to identify additions, modifications, and deletions. Ephemeral
/// persistence returns an empty map since it doesn't support incremental indexing.
async fn get_existing_entries(
&self,
indexing_path: &Path,
@@ -55,637 +63,49 @@ pub trait IndexPersistence: Send + Sync {
HashMap<std::path::PathBuf, (i32, Option<u64>, Option<std::time::SystemTime>, u64)>,
>;
/// Update an existing entry
async fn update_entry(&self, entry_id: i32, entry: &DirEntry) -> JobResult<()>;
/// Check if this persistence layer supports operations
/// Returns true for database persistence, false for ephemeral.
///
/// Used by the indexing pipeline to determine whether to perform expensive operations
/// like change detection (database only) or content hashing (database only).
fn is_persistent(&self) -> bool;
}
/// Database-backed persistence implementation
pub struct DatabasePersistence<'a> {
ctx: &'a JobContext<'a>,
device_id: i32,
location_root_entry_id: Option<i32>, // The root entry ID of the location being indexed
entry_id_cache: Arc<RwLock<HashMap<std::path::PathBuf, i32>>>,
}
impl<'a> DatabasePersistence<'a> {
pub fn new(
ctx: &'a JobContext<'a>,
device_id: i32,
location_root_entry_id: Option<i32>,
) -> Self {
Self {
ctx,
device_id,
location_root_entry_id,
entry_id_cache: Arc::new(RwLock::new(HashMap::new())),
}
}
}
#[async_trait::async_trait]
impl<'a> IndexPersistence for DatabasePersistence<'a> {
async fn store_entry(
&self,
entry: &DirEntry,
_location_id: Option<i32>,
location_root_path: &Path,
) -> JobResult<i32> {
use super::entry::EntryProcessor;
// CRITICAL FIX: Do NOT clone the cache!
// The previous clone-modify-write pattern caused cache corruption:
// 1. Thread A clones cache, processes entry, writes back
// 2. Thread B clones cache (stale snapshot), processes entry, writes back
// 3. Thread B's write overwrites Thread A's updates -> lost updates
// 4. Worse: concurrent HashMap mutations could cause data corruption
//
// Instead, we manage the cache directly with proper locking.
// We look up the parent, then create the entry, then cache it.
// All cache operations are protected by the RwLock.
// Find parent entry ID with proper locking
let parent_id = if let Some(parent_path) = entry.path.parent() {
// Try cache first (read lock)
let cached_parent = {
let cache = self.entry_id_cache.read().await;
cache.get(parent_path).copied()
};
if let Some(id) = cached_parent {
Some(id)
} else {
// Not in cache, check database (no lock held during async DB query)
let parent_path_str = parent_path.to_string_lossy().to_string();
if let Ok(Some(dir_path_record)) = entities::directory_paths::Entity::find()
.filter(entities::directory_paths::Column::Path.eq(&parent_path_str))
.one(self.ctx.library_db())
.await
{
// Found in database, cache it (write lock)
let mut cache = self.entry_id_cache.write().await;
cache.insert(parent_path.to_path_buf(), dir_path_record.entry_id);
Some(dir_path_record.entry_id)
} else {
// Parent truly not found
tracing::warn!(
"Parent not found for {}: {}",
entry.path.display(),
parent_path.display()
);
None
}
}
} else {
None
};
// Now create the entry using the old implementation (not EntryProcessor)
// We can't easily use EntryProcessor without IndexerState, and creating
// IndexerState with clone causes the bug we're trying to fix.
// TODO: Refactor EntryProcessor to work without full IndexerState
// For now, inline the entry creation logic with our properly-locked cache
use entities::entry_closure;
let extension = match entry.kind {
EntryKind::File => entry
.path
.extension()
.and_then(|ext| ext.to_str())
.map(|ext| ext.to_lowercase()),
EntryKind::Directory | EntryKind::Symlink => None,
};
let name = match entry.kind {
EntryKind::File => entry
.path
.file_stem()
.map(|stem| stem.to_string_lossy().to_string())
.unwrap_or_else(|| {
entry
.path
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_else(|| "unknown".to_string())
}),
EntryKind::Directory | EntryKind::Symlink => entry
.path
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_else(|| "unknown".to_string()),
};
let modified_at = entry
.modified
.and_then(|t| {
chrono::DateTime::from_timestamp(
t.duration_since(std::time::UNIX_EPOCH).ok()?.as_secs() as i64,
0,
)
})
.unwrap_or_else(|| chrono::Utc::now());
let entry_uuid = Some(Uuid::new_v4());
let new_entry = entities::entry::ActiveModel {
uuid: Set(entry_uuid),
name: Set(name.clone()),
kind: Set(EntryProcessor::entry_kind_to_int(entry.kind)),
extension: Set(extension),
metadata_id: Set(None),
content_id: Set(None),
size: Set(entry.size as i64),
aggregate_size: Set(0),
child_count: Set(0),
file_count: Set(0),
created_at: Set(chrono::Utc::now()),
modified_at: Set(modified_at),
accessed_at: Set(None),
permissions: Set(None),
inode: Set(entry.inode.map(|i| i as i64)),
parent_id: Set(parent_id),
..Default::default()
};
let txn = self
.ctx
.library_db()
.begin()
.await
.map_err(|e| JobError::execution(format!("Failed to begin transaction: {}", e)))?;
let result = new_entry
.insert(&txn)
.await
.map_err(|e| JobError::execution(format!("Failed to create entry: {}", e)))?;
let self_closure = entry_closure::ActiveModel {
ancestor_id: Set(result.id),
descendant_id: Set(result.id),
depth: Set(0),
..Default::default()
};
self_closure
.insert(&txn)
.await
.map_err(|e| JobError::execution(format!("Failed to insert self-closure: {}", e)))?;
if let Some(parent_id) = parent_id {
txn.execute(Statement::from_sql_and_values(
DbBackend::Sqlite,
"INSERT INTO entry_closure (ancestor_id, descendant_id, depth) \
SELECT ancestor_id, ?, depth + 1 \
FROM entry_closure \
WHERE descendant_id = ?",
vec![result.id.into(), parent_id.into()],
))
.await
.map_err(|e| {
JobError::execution(format!("Failed to populate ancestor closures: {}", e))
})?;
}
if entry.kind == EntryKind::Directory {
let absolute_path = entry.path.to_string_lossy().to_string();
let dir_path_entry = entities::directory_paths::ActiveModel {
entry_id: Set(result.id),
path: Set(absolute_path),
..Default::default()
};
dir_path_entry.insert(&txn).await.map_err(|e| {
JobError::execution(format!("Failed to insert directory path: {}", e))
})?;
}
txn.commit()
.await
.map_err(|e| JobError::execution(format!("Failed to commit transaction: {}", e)))?;
tracing::info!(
"ENTRY_SYNC: About to sync entry name={} uuid={:?}",
result.name,
result.uuid
);
if let Err(e) = self
.ctx
.library()
.sync_model_with_db(
&result,
crate::infra::sync::ChangeType::Insert,
self.ctx.library_db(),
)
.await
{
tracing::warn!(
"ENTRY_SYNC: Failed to sync entry {}: {}",
result
.uuid
.map(|u| u.to_string())
.unwrap_or_else(|| "no-uuid".to_string()),
e
);
} else {
tracing::info!(
"ENTRY_SYNC: Successfully synced entry name={} uuid={:?}",
result.name,
result.uuid
);
}
// Cache the entry ID for potential children (write lock)
{
let mut cache = self.entry_id_cache.write().await;
cache.insert(entry.path.clone(), result.id);
}
Ok(result.id)
}
async fn store_content_identity(
&self,
entry_id: i32,
path: &Path,
cas_id: String,
) -> JobResult<()> {
use super::entry::EntryProcessor;
// Use the library ID from the context
let library_id = self.ctx.library().id();
// Delegate to existing implementation with the library_id
EntryProcessor::link_to_content_identity(self.ctx, entry_id, path, cas_id, library_id)
.await
.map(|_| ())
}
async fn get_existing_entries(
&self,
indexing_path: &Path,
) -> JobResult<
HashMap<std::path::PathBuf, (i32, Option<u64>, Option<std::time::SystemTime>, u64)>,
> {
use sea_orm::{ColumnTrait, EntityTrait, QueryFilter};
// If we don't have a location root entry ID, we can't find existing entries
let location_root_entry_id = match self.location_root_entry_id {
Some(id) => id,
None => return Ok(HashMap::new()),
};
// Query descendants of the indexing path
let indexing_path_str = indexing_path.to_string_lossy().to_string();
let indexing_path_entry_id = if let Ok(Some(dir_record)) = directory_paths::Entity::find()
.filter(directory_paths::Column::Path.eq(&indexing_path_str))
.one(self.ctx.library_db())
.await
{
// Indexing path exists in DB - use its entry ID
dir_record.entry_id
} else {
// This is safe because if the path doesn't exist, there are no descendants to delete
location_root_entry_id
};
// Get all descendants of the indexing path
let descendant_ids = entry_closure::Entity::find()
.filter(entry_closure::Column::AncestorId.eq(indexing_path_entry_id))
.all(self.ctx.library_db())
.await
.map_err(|e| JobError::execution(format!("Failed to query closure table: {}", e)))?
.into_iter()
.map(|ec| ec.descendant_id)
.collect::<Vec<i32>>();
// Add the indexing path entry itself
let mut all_entry_ids = vec![indexing_path_entry_id];
all_entry_ids.extend(descendant_ids);
// Fetch all entries (chunked to avoid SQLite variable limit)
let mut existing_entries: Vec<entities::entry::Model> = Vec::new();
let chunk_size: usize = 900;
for chunk in all_entry_ids.chunks(chunk_size) {
let mut batch = entities::entry::Entity::find()
.filter(entities::entry::Column::Id.is_in(chunk.to_vec()))
.all(self.ctx.library_db())
.await
.map_err(|e| {
JobError::execution(format!("Failed to query existing entries: {}", e))
})?;
existing_entries.append(&mut batch);
}
let mut result = HashMap::new();
self.ctx.log(format!(
"Loading {} existing entries",
existing_entries.len()
));
for entry in existing_entries {
// Build full path for the entry using PathResolver
let full_path = PathResolver::get_full_path(self.ctx.library_db(), entry.id)
.await
.unwrap_or_else(|_| PathBuf::from(&entry.name));
// Convert timestamp to SystemTime for comparison
let modified_time =
entry
.modified_at
.timestamp()
.try_into()
.ok()
.and_then(|secs: u64| {
std::time::UNIX_EPOCH.checked_add(std::time::Duration::from_secs(secs))
});
result.insert(
full_path,
(
entry.id,
entry.inode.map(|i| i as u64),
modified_time,
entry.size as u64,
),
);
}
Ok(result)
}
async fn update_entry(&self, entry_id: i32, entry: &DirEntry) -> JobResult<()> {
use super::entry::EntryProcessor;
// Delegate to existing implementation
EntryProcessor::update_entry(self.ctx, entry_id, entry).await
}
fn is_persistent(&self) -> bool {
true
}
}
/// In-memory ephemeral persistence implementation
pub struct EphemeralPersistence {
index: Arc<RwLock<EphemeralIndex>>,
next_entry_id: Arc<RwLock<i32>>,
event_bus: Option<Arc<crate::infra::event::EventBus>>,
root_path: PathBuf,
}
impl EphemeralPersistence {
pub fn new(
index: Arc<RwLock<EphemeralIndex>>,
event_bus: Option<Arc<crate::infra::event::EventBus>>,
root_path: PathBuf,
) -> Self {
Self {
index,
next_entry_id: Arc::new(RwLock::new(1)),
event_bus,
root_path,
}
}
async fn get_next_id(&self) -> i32 {
let mut id = self.next_entry_id.write().await;
let current = *id;
*id += 1;
current
}
}
#[async_trait::async_trait]
impl IndexPersistence for EphemeralPersistence {
async fn store_entry(
&self,
entry: &DirEntry,
_location_id: Option<i32>,
_location_root_path: &Path,
) -> JobResult<i32> {
use super::entry::EntryProcessor;
// Extract full metadata
// Note: Ephemeral persistence uses direct filesystem (None backend)
let metadata = EntryProcessor::extract_metadata(&entry.path, None)
.await
.map_err(|e| JobError::execution(format!("Failed to extract metadata: {}", e)))?;
// Generate a stable UUID for this ephemeral entry
let entry_id = self.get_next_id().await;
let entry_uuid = Uuid::new_v4();
// Store in ephemeral index with UUID
{
let mut index = self.index.write().await;
index.add_entry(entry.path.clone(), entry_uuid, metadata.clone());
// Update stats
match entry.kind {
EntryKind::File => index.stats.files += 1,
EntryKind::Directory => index.stats.dirs += 1,
EntryKind::Symlink => index.stats.symlinks += 1,
}
index.stats.bytes += entry.size;
}
// Emit ResourceChanged event for UI
if let Some(event_bus) = &self.event_bus {
use crate::device::get_current_device_slug;
use crate::domain::addressing::SdPath;
use crate::domain::file::File;
use crate::infra::event::{Event, ResourceMetadata};
// Build SdPath - for ephemeral indexing, we use Physical paths
let device_slug = get_current_device_slug();
let sd_path = SdPath::Physical {
device_slug: device_slug.clone(),
path: entry.path.clone(),
};
// Build File domain object from ephemeral data
let file = File::from_ephemeral(entry_uuid, &metadata, sd_path);
// Emit event with path metadata for filtering
let parent_path = entry.path.parent().map(|p| SdPath::Physical {
device_slug: file.sd_path.device_slug().unwrap_or("local").to_string(),
path: p.to_path_buf(),
});
let affected_paths = if let Some(parent) = parent_path {
vec![parent]
} else {
vec![]
};
if let Ok(resource_json) = serde_json::to_value(&file) {
event_bus.emit(Event::ResourceChanged {
resource_type: "file".to_string(),
resource: resource_json,
metadata: Some(ResourceMetadata {
no_merge_fields: vec!["sd_path".to_string()],
alternate_ids: vec![],
affected_paths,
}),
});
}
}
Ok(entry_id)
}
async fn store_content_identity(
&self,
entry_id: i32,
path: &Path,
cas_id: String,
) -> JobResult<()> {
// Get file size
let file_size = tokio::fs::symlink_metadata(path)
.await
.map(|m| m.len())
.unwrap_or(0);
// Detect file type using the file type registry
let registry = FileTypeRegistry::default();
let (mime_type, content_kind) = if let Ok(result) = registry.identify(path).await {
(
result.file_type.primary_mime_type().map(|s| s.to_string()),
result.file_type.category,
)
} else {
(None, crate::domain::ContentKind::Unknown)
};
let content_identity = EphemeralContentIdentity {
cas_id: cas_id.clone(),
mime_type: mime_type.clone(),
file_size,
entry_count: 1,
};
// Store in ephemeral index
{
let mut index = self.index.write().await;
index.add_content_identity(cas_id.clone(), content_identity);
}
// Emit ResourceChanged event with updated content_identity
if let Some(event_bus) = &self.event_bus {
use crate::device::get_current_device_slug;
use crate::domain::addressing::SdPath;
use crate::domain::content_identity::ContentIdentity;
use crate::domain::file::File;
use crate::infra::event::{Event, ResourceMetadata};
// Get the stored metadata and UUID for this entry
let (metadata_opt, entry_uuid_opt) = {
let index = self.index.read().await;
(index.entries.get(path).cloned(), index.get_entry_uuid(&path.to_path_buf()))
};
if let (Some(metadata), Some(entry_uuid)) = (metadata_opt, entry_uuid_opt) {
// Build SdPath
let device_slug = get_current_device_slug();
let sd_path = SdPath::Physical {
device_slug: device_slug.clone(),
path: path.to_path_buf(),
};
// Build File with content_identity
let mut file = File::from_ephemeral(entry_uuid, &metadata, sd_path);
// Add content identity
file.content_identity = Some(ContentIdentity {
uuid: uuid::Uuid::new_v4(),
kind: content_kind,
content_hash: cas_id.clone(),
integrity_hash: None,
mime_type_id: None,
text_content: None,
total_size: file_size as i64,
entry_count: 1,
first_seen_at: chrono::Utc::now(),
last_verified_at: chrono::Utc::now(),
});
file.content_kind = content_kind;
// Emit event with updated file
let parent_path = path.parent().map(|p| SdPath::Physical {
device_slug,
path: p.to_path_buf(),
});
let affected_paths = if let Some(parent) = parent_path {
vec![parent]
} else {
vec![]
};
if let Ok(resource_json) = serde_json::to_value(&file) {
event_bus.emit(Event::ResourceChanged {
resource_type: "file".to_string(),
resource: resource_json,
metadata: Some(ResourceMetadata {
no_merge_fields: vec!["sd_path".to_string()],
alternate_ids: vec![],
affected_paths,
}),
});
}
}
}
Ok(())
}
async fn get_existing_entries(
&self,
_indexing_path: &Path,
) -> JobResult<
HashMap<std::path::PathBuf, (i32, Option<u64>, Option<std::time::SystemTime>, u64)>,
> {
// Ephemeral persistence doesn't support change detection
Ok(HashMap::new())
}
async fn update_entry(&self, _entry_id: i32, _entry: &DirEntry) -> JobResult<()> {
// Updates not needed for ephemeral storage
Ok(())
}
fn is_persistent(&self) -> bool {
false
}
}
/// Factory for creating appropriate persistence implementations
pub struct PersistenceFactory;
impl PersistenceFactory {
/// Create a database persistence instance
/// Create a database persistence instance using the unified DatabaseAdapterForJob.
///
/// This delegates to `DBWriter` for all database operations, ensuring
/// consistency between the watcher and indexer pipelines.
pub fn database<'a>(
ctx: &'a JobContext<'a>,
device_id: i32,
ctx: &'a crate::infra::job::prelude::JobContext<'a>,
library_id: uuid::Uuid,
location_root_entry_id: Option<i32>,
) -> Box<dyn IndexPersistence + 'a> {
Box::new(DatabasePersistence::new(
use crate::ops::indexing::change_detection::DatabaseAdapterForJob;
Box::new(DatabaseAdapterForJob::new(
ctx,
device_id,
library_id,
location_root_entry_id,
))
}
/// Create an ephemeral persistence instance
/// Create an ephemeral persistence instance using the unified MemoryAdapter.
pub fn ephemeral(
index: Arc<RwLock<EphemeralIndex>>,
event_bus: Option<Arc<crate::infra::event::EventBus>>,
index: std::sync::Arc<tokio::sync::RwLock<EphemeralIndex>>,
event_bus: Option<std::sync::Arc<crate::infra::event::EventBus>>,
root_path: PathBuf,
) -> Box<dyn IndexPersistence + Send + Sync> {
Box::new(EphemeralPersistence::new(index, event_bus, root_path))
use super::ephemeral::MemoryAdapter;
let event_bus = event_bus
.unwrap_or_else(|| std::sync::Arc::new(crate::infra::event::EventBus::new(1024)));
Box::new(MemoryAdapter::new(index, event_bus, root_path))
}
}
@@ -693,42 +113,31 @@ impl PersistenceFactory {
mod tests {
use super::*;
use crate::infra::event::Event;
use crate::ops::indexing::ephemeral::MemoryAdapter;
use crate::ops::indexing::state::{DirEntry, EntryKind};
use std::sync::Mutex;
use std::sync::Arc;
use tempfile::TempDir;
use tokio::sync::RwLock;
#[tokio::test]
async fn test_ephemeral_uuid_consistency() {
// Create temp directory for test
async fn test_ephemeral_writer_via_factory() {
let temp_dir = TempDir::new().unwrap();
let test_file = temp_dir.path().join("test.txt");
std::fs::write(&test_file, b"test content").unwrap();
// Create ephemeral index
let index = Arc::new(RwLock::new(EphemeralIndex::new(
temp_dir.path().to_path_buf(),
)));
let index = Arc::new(RwLock::new(
EphemeralIndex::new().expect("failed to create ephemeral index"),
));
// Create event collector
let collected_events = Arc::new(Mutex::new(Vec::new()));
let events_clone = collected_events.clone();
let event_bus = Arc::new(crate::infra::event::EventBus::new(1024));
let mut subscriber = event_bus.subscribe();
// Create mock event bus that collects events
let event_bus = Arc::new(crate::infra::event::EventBus::new());
let _subscription = event_bus.subscribe(move |event| {
if let Event::ResourceChanged { resource, .. } = event {
events_clone.lock().unwrap().push(resource.clone());
}
});
// Create ephemeral persistence
let persistence = EphemeralPersistence::new(
let writer = PersistenceFactory::ephemeral(
index.clone(),
Some(event_bus),
temp_dir.path().to_path_buf(),
);
// Store entry (processing phase)
let dir_entry = DirEntry {
path: test_file.clone(),
kind: EntryKind::File,
@@ -737,47 +146,53 @@ mod tests {
inode: Some(12345),
};
let entry_id = persistence
let entry_id = writer
.store_entry(&dir_entry, None, temp_dir.path())
.await
.unwrap();
// Store content identity (content phase)
let cas_id = "test_hash_123".to_string();
persistence
.store_content_identity(entry_id, &test_file, cas_id)
assert!(entry_id > 0);
assert!(!writer.is_persistent());
let event =
tokio::time::timeout(tokio::time::Duration::from_millis(100), subscriber.recv()).await;
assert!(event.is_ok(), "Should receive an event");
if let Ok(Ok(Event::ResourceChanged { resource, .. })) = event {
let uuid = resource["id"].as_str();
assert!(uuid.is_some(), "Event should have UUID");
}
}
#[tokio::test]
async fn test_ephemeral_writer_direct() {
let temp_dir = TempDir::new().unwrap();
let test_file = temp_dir.path().join("test.txt");
std::fs::write(&test_file, b"test content").unwrap();
let index = Arc::new(RwLock::new(
EphemeralIndex::new().expect("failed to create ephemeral index"),
));
let event_bus = Arc::new(crate::infra::event::EventBus::new(1024));
let writer = MemoryAdapter::new(index.clone(), event_bus, temp_dir.path().to_path_buf());
let dir_entry = DirEntry {
path: test_file.clone(),
kind: EntryKind::File,
size: 12,
modified: Some(std::time::SystemTime::now()),
inode: Some(12345),
};
let entry_id = writer
.store_entry(&dir_entry, None, temp_dir.path())
.await
.unwrap();
// Give events time to propagate
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
assert!(entry_id > 0);
// Collect all events
let events = collected_events.lock().unwrap();
// Should have 2 events: one from store_entry, one from store_content_identity
assert_eq!(
events.len(),
2,
"Expected 2 ResourceChanged events (processing + content phases)"
);
// Extract UUIDs from both events
let uuid1 = events[0]["id"].as_str().expect("First event should have UUID");
let uuid2 = events[1]["id"].as_str().expect("Second event should have UUID");
// CRITICAL: Both events must have the same UUID for the same file
assert_eq!(
uuid1, uuid2,
"UUID mismatch! Processing phase emitted UUID {} but content phase emitted UUID {}. \
These should be identical so the UI can match the events.",
uuid1, uuid2
);
// Verify the second event has content_identity
assert!(
events[1]["content_identity"].is_object(),
"Second event should include content_identity"
);
let idx = index.read().await;
assert!(idx.has_entry(&test_file));
}
}

View File

@@ -1,4 +1,14 @@
//! Directory size aggregation phase
//! # Directory Size Aggregation
//!
//! Computes total sizes and file counts for directories by traversing from deepest
//! leaves to the root. Each directory's `aggregate_size` includes all descendant files,
//! and `file_count` tracks the total number of files (not subdirectories) contained
//! within. This data powers folder size displays in the UI and enables sorting by size.
//!
//! Processing order matters: children must be aggregated before their parents, so we
//! sort directories by depth (deepest first) before computing. Without this, parent
//! totals would miss unaggregated child contributions. The closure table provides all
//! descendants in one query instead of recursive tree walks.
use crate::{
infra::{
@@ -15,7 +25,12 @@ use sea_orm::{
use std::collections::HashMap;
use uuid::Uuid;
/// Run the directory aggregation phase
/// Aggregates directory sizes and file counts from leaves to root.
///
/// Queries all directories under the location using the closure table, sorts them by
/// depth (deepest first), then computes aggregate_size and file_count for each by
/// summing direct children. Updates indexed_at after each directory so sync picks up
/// the aggregated values. Skips locations without an entry_id (not yet indexed).
pub async fn run_aggregation_phase(
location_id: Uuid,
state: &mut IndexerState,
@@ -23,7 +38,6 @@ pub async fn run_aggregation_phase(
) -> Result<(), JobError> {
ctx.log("Starting directory size aggregation phase");
// Get the location record
let location_record = entities::location::Entity::find()
.filter(entities::location::Column::Uuid.eq(location_id))
.one(ctx.library_db())
@@ -33,8 +47,6 @@ pub async fn run_aggregation_phase(
let location_id_i32 = location_record.id;
// Find all directories under this location using closure table
// First get all descendant IDs
let descendant_ids = entities::entry_closure::Entity::find()
.filter(entities::entry_closure::Column::AncestorId.eq(location_record.entry_id))
.all(ctx.library_db())
@@ -44,14 +56,12 @@ pub async fn run_aggregation_phase(
.map(|ec| ec.descendant_id)
.collect::<Vec<i32>>();
// Add the root entry itself (skip if location has no entry_id)
let Some(root_entry_id) = location_record.entry_id else {
return Ok(()); // Skip if location not yet synced
return Ok(());
};
let mut all_entry_ids = vec![root_entry_id];
all_entry_ids.extend(descendant_ids);
// Now get all directories from these entries
let mut directories: Vec<entities::entry::Model> = Vec::new();
// SQLite has a bind parameter limit (~999). Query in safe chunks.
let chunk_size: usize = 900;
@@ -65,18 +75,15 @@ pub async fn run_aggregation_phase(
directories.append(&mut batch);
}
// Sort directories by their depth in the hierarchy (deepest first)
// We'll use a simple approach: count parents
// Count depth by following parent links up to root.
let mut dir_depths: Vec<(entities::entry::Model, usize)> = Vec::new();
for directory in directories {
let mut depth = 0;
let mut current_parent_id = directory.parent_id;
// Count the depth by following parent links
while let Some(parent_id) = current_parent_id {
depth += 1;
// Find the parent to get its parent_id
if let Ok(Some(parent)) = entities::entry::Entity::find_by_id(parent_id)
.one(ctx.library_db())
.await
@@ -90,7 +97,6 @@ pub async fn run_aggregation_phase(
dir_depths.push((directory, depth));
}
// Sort by depth (deepest first)
dir_depths.sort_by(|a, b| b.1.cmp(&a.1));
let directories: Vec<entities::entry::Model> =
dir_depths.into_iter().map(|(dir, _)| dir).collect();
@@ -98,7 +104,6 @@ pub async fn run_aggregation_phase(
let total_dirs = directories.len();
ctx.log(format!("Found {} directories to aggregate", total_dirs));
// Process directories from leaves to root
let mut processed = 0;
let aggregator = DirectoryAggregator::new(ctx.library_db().clone());
@@ -125,16 +130,14 @@ pub async fn run_aggregation_phase(
};
ctx.progress(Progress::generic(indexer_progress.to_generic_progress()));
// Calculate aggregate values for this directory
match aggregator.aggregate_directory(&directory).await {
Ok((aggregate_size, child_count, file_count)) => {
// Update the directory entry
let directory_name = directory.name.clone();
let mut active_dir: entities::entry::ActiveModel = directory.into();
active_dir.aggregate_size = Set(aggregate_size);
active_dir.child_count = Set(child_count);
active_dir.file_count = Set(file_count);
// Update indexed_at so aggregate changes are picked up by sync
// Bump indexed_at so sync picks up aggregate changes.
active_dir.indexed_at = Set(Some(chrono::Utc::now()));
active_dir.update(ctx.library_db()).await.map_err(|e| {
@@ -153,8 +156,6 @@ pub async fn run_aggregation_phase(
));
}
}
// State is automatically saved during job serialization on shutdown
}
ctx.log(format!(
@@ -174,12 +175,15 @@ impl DirectoryAggregator {
Self { db }
}
/// Calculate aggregate size, child count, and file count for a directory
/// Computes aggregate values by summing direct children only.
///
/// Files contribute their size directly. Subdirectories contribute their already-computed
/// aggregate_size and file_count (this is why we process deepest-first). Symlinks are
/// treated as files for counting purposes.
async fn aggregate_directory(
&self,
directory: &entities::entry::Model,
) -> Result<(i64, i32, i32), DbErr> {
// Get all direct children using parent_id only
let children = entities::entry::Entity::find()
.filter(entities::entry::Column::ParentId.eq(directory.id))
.all(&self.db)
@@ -192,21 +196,19 @@ impl DirectoryAggregator {
for child in children {
match child.kind {
0 => {
// File
aggregate_size += child.size;
file_count += 1;
}
1 => {
// Directory
aggregate_size += child.aggregate_size;
file_count += child.file_count;
}
2 => {
// Symlink - count as file
// Symlinks count as files.
aggregate_size += child.size;
file_count += 1;
}
_ => {} // Unknown type, skip
_ => {}
}
}
@@ -214,9 +216,12 @@ impl DirectoryAggregator {
}
}
/// One-time migration to calculate all directory sizes for existing data
/// Backfills aggregate_size and file_count for all existing directories across all locations.
///
/// This is a one-time migration for databases created before aggregation was added.
/// Safe to run multiple times (idempotent). Processes each location independently,
/// sorting directories by depth within each location tree.
pub async fn migrate_directory_sizes(db: &DatabaseConnection) -> Result<(), DbErr> {
// Get all locations
let locations = entities::location::Entity::find().all(db).await?;
for location in locations {
@@ -225,7 +230,6 @@ pub async fn migrate_directory_sizes(db: &DatabaseConnection) -> Result<(), DbEr
location.name.as_deref().unwrap_or("Unknown")
);
// Find all directories under this location using closure table
let Some(root_entry_id) = location.entry_id else {
tracing::warn!(
"Skipping location {} - entry_id not set (not yet synced)",
@@ -256,7 +260,6 @@ pub async fn migrate_directory_sizes(db: &DatabaseConnection) -> Result<(), DbEr
directories.append(&mut batch);
}
// Sort by depth (deepest first) - same logic as above
let mut dir_depths: Vec<(entities::entry::Model, usize)> = Vec::new();
for directory in directories {
@@ -290,7 +293,7 @@ pub async fn migrate_directory_sizes(db: &DatabaseConnection) -> Result<(), DbEr
active_dir.aggregate_size = Set(aggregate_size);
active_dir.child_count = Set(child_count);
active_dir.file_count = Set(file_count);
// Update indexed_at so aggregate changes are picked up by sync
// Bump indexed_at so sync picks up aggregate changes.
active_dir.indexed_at = Set(Some(chrono::Utc::now()));
active_dir.update(db).await?;

View File

@@ -1,12 +1,17 @@
//! Content identification phase - generates CAS IDs and links content
//! # Content Identification and Hashing
//!
//! `core::ops::indexing::phases::content` generates BLAKE3 content hashes for files and
//! links entries to content_identity records for deduplication. Processes files in parallel
//! chunks, supports both local filesystem and cloud backends (S3, Dropbox), and carefully
//! orders sync operations (content identities before entries) to prevent foreign key violations
//! on receiving devices.
use crate::{
domain::content_identity::ContentHashGenerator,
infra::job::generic_progress::ToGenericProgress,
infra::job::prelude::{JobContext, JobError, Progress},
ops::indexing::{
ctx::IndexingCtx,
entry::EntryProcessor,
database_storage::DatabaseStorage,
processor::{ContentHashProcessor, ProcessorEntry},
state::{EntryKind, IndexError, IndexPhase, IndexerProgress, IndexerState},
},
@@ -15,21 +20,27 @@ use std::path::Path;
use std::sync::Arc;
use tracing::warn;
/// Strip cloud URL prefix from DirEntry path to get backend-relative path
/// Strips cloud URL schemes to convert full URIs into backend-relative paths.
///
/// Backends expect relative keys ("folder/file.txt"), not full URIs ("s3://bucket/folder/file.txt").
/// For S3 paths like "s3://my-bucket/docs/report.pdf", this returns "docs/report.pdf".
/// Local paths pass through unchanged.
fn to_backend_path(path: &Path) -> std::path::PathBuf {
let path_str = path.to_string_lossy();
if let Some(after_scheme) = path_str.strip_prefix("s3://") {
// Strip s3://bucket/ prefix to get just the key
if let Some(slash_pos) = after_scheme.find('/') {
let key = &after_scheme[slash_pos + 1..];
return std::path::PathBuf::from(key);
}
}
// Return as-is for local paths
path.to_path_buf()
}
/// Run the content identification phase
/// Generates BLAKE3 content hashes for files and links them to content identities.
///
/// Processes files in parallel chunks for throughput, uses volume backends for cloud files,
/// syncs content identities before entries (to prevent foreign key violations), and emits
/// ResourceChanged events for UI updates. Empty files are skipped (no content to hash).
pub async fn run_content_phase(
state: &mut IndexerState,
ctx: &JobContext<'_>,
@@ -52,7 +63,6 @@ pub async fn run_content_phase(
let mut success_count = 0;
let mut error_count = 0;
// Process in chunks for better performance and memory usage
const CHUNK_SIZE: usize = 100;
while !state.entries_for_content.is_empty() {
@@ -62,7 +72,6 @@ pub async fn run_content_phase(
let chunk: Vec<_> = state.entries_for_content.drain(..chunk_size).collect();
let chunk_len = chunk.len();
// Report progress BEFORE processing (using current processed count)
let indexer_progress = IndexerProgress {
phase: IndexPhase::ContentIdentification {
current: processed,
@@ -75,22 +84,18 @@ pub async fn run_content_phase(
scope: None,
persistence: None,
is_ephemeral: false,
action_context: None, // TODO: Pass action context from job state
action_context: None,
};
ctx.progress(Progress::generic(indexer_progress.to_generic_progress()));
// Process chunk in parallel for better performance
let content_hash_futures: Vec<_> = chunk
.iter()
.map(|(entry_id, path)| {
let backend_clone = volume_backend.cloned();
async move {
let hash_result = if let Some(backend) = backend_clone {
// Use backend for content hashing (supports both local and cloud)
// For cloud paths, strip the URL prefix to get backend-relative path
let backend_path = to_backend_path(path);
// Get file size first
match backend.metadata(&backend_path).await {
Ok(meta) => {
ContentHashGenerator::generate_content_hash_with_backend(
@@ -105,7 +110,6 @@ pub async fn run_content_phase(
)),
}
} else {
// No backend - use local filesystem path
ContentHashGenerator::generate_content_hash(path).await
};
(*entry_id, path.clone(), hash_result)
@@ -113,22 +117,18 @@ pub async fn run_content_phase(
})
.collect();
// Wait for all content hash generations to complete
let hash_results = futures::future::join_all(content_hash_futures).await;
// Collect results for batch syncing
let mut content_identities_to_sync = Vec::new();
let mut entries_to_sync = Vec::new();
// Process results
for (entry_id, path, hash_result) in hash_results {
// Check for interruption during result processing
ctx.check_interrupt().await?;
match hash_result {
Ok(content_hash) => {
match EntryProcessor::link_to_content_identity(
ctx,
match DatabaseStorage::link_to_content_identity(
ctx.library_db(),
entry_id,
&path,
content_hash.clone(),
@@ -143,7 +143,6 @@ pub async fn run_content_phase(
content_hash
));
// Collect for batch sync
content_identities_to_sync.push(result.content_identity);
entries_to_sync.push(result.entry);
@@ -188,86 +187,67 @@ pub async fn run_content_phase(
}
}
// Batch sync content identities (shared resources)
if !content_identities_to_sync.is_empty() {
match IndexingCtx::library(ctx) {
Some(library) => {
match library
.sync_models_batch(
&content_identities_to_sync,
crate::infra::sync::ChangeType::Insert,
ctx.library_db(),
)
.await
{
Ok(()) => {
ctx.log(format!(
"Batch synced {} content identities",
content_identities_to_sync.len()
));
}
Err(e) => {
tracing::warn!(
"Failed to batch sync {} content identities: {}",
content_identities_to_sync.len(),
e
);
}
}
let library = ctx.library();
match library
.sync_models_batch(
&content_identities_to_sync,
crate::infra::sync::ChangeType::Insert,
ctx.library_db(),
)
.await
{
Ok(()) => {
ctx.log(format!(
"Batch synced {} content identities",
content_identities_to_sync.len()
));
}
None => {
ctx.log("Sync disabled - content identities saved locally only");
Err(e) => {
tracing::warn!(
"Failed to batch sync {} content identities: {}",
content_identities_to_sync.len(),
e
);
}
}
}
// Yield to allow content_identity events to be emitted before entry updates
// This ensures content_identities arrive on receiving devices before entries that reference them
// Prevents FK orphaning where entry UPDATE arrives before content_identity exists
// Yield to let content_identity sync messages propagate before entry updates.
// Without this, receiving devices might process entry.content_id foreign keys before
// the referenced content_identity row exists, causing foreign key constraint violations.
tokio::task::yield_now().await;
// Batch sync entries (device-owned, now sync-ready with content_id assigned)
if !entries_to_sync.is_empty() {
match IndexingCtx::library(ctx) {
Some(library) => {
match library
.sync_models_batch(
&entries_to_sync,
crate::infra::sync::ChangeType::Update,
ctx.library_db(),
)
.await
{
Ok(()) => {
ctx.log(format!(
"Batch synced {} entries with content IDs",
entries_to_sync.len()
));
}
Err(e) => {
tracing::warn!(
"Failed to batch sync {} entries: {}",
entries_to_sync.len(),
e
);
}
}
let library = ctx.library();
match library
.sync_models_batch(
&entries_to_sync,
crate::infra::sync::ChangeType::Update,
ctx.library_db(),
)
.await
{
Ok(()) => {
ctx.log(format!(
"Batch synced {} entries with content IDs",
entries_to_sync.len()
));
}
None => {
ctx.log("Sync disabled - entries saved locally only");
Err(e) => {
tracing::warn!(
"Failed to batch sync {} entries: {}",
entries_to_sync.len(),
e
);
}
}
}
// Update processed count AFTER processing chunk
processed += chunk_len;
// Update rate tracking
state.items_since_last_update += chunk_len as u64;
// Emit ResourceChanged events for affected Files
if !entries_to_sync.is_empty() {
// Collect entry UUIDs from successfully processed entries
let entry_ids_for_events: Vec<uuid::Uuid> = entries_to_sync
.iter()
.filter_map(|entry_model| entry_model.uuid)
@@ -288,8 +268,6 @@ pub async fn run_content_phase(
}
}
}
// State is automatically saved during job serialization on shutdown
}
ctx.log(format!(

View File

@@ -1,15 +1,22 @@
//! Discovery phase - walks directories and collects entries
//! # Directory Discovery Phase
//!
//! `core::ops::indexing::phases::discovery` implements parallel directory traversal
//! using a work-stealing pattern inspired by Rayon. Workers pull directories from a
//! shared queue, read their contents, filter entries against indexing rules, and
//! directly enqueue subdirectories for other workers to process.
use crate::{
infra::job::generic_progress::ToGenericProgress,
infra::job::prelude::{JobContext, JobError, Progress},
ops::indexing::{
entry::EntryProcessor,
database_storage::DatabaseStorage,
rules::{build_default_ruler, RuleToggles, RulerDecision},
state::{DirEntry, EntryKind, IndexError, IndexPhase, IndexerProgress, IndexerState},
},
};
use async_channel as chan;
use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering};
use std::time::Instant;
use std::{path::Path, sync::Arc};
@@ -22,7 +29,11 @@ impl crate::ops::indexing::rules::MetadataForIndexerRules for SimpleMetadata {
}
}
/// Run the discovery phase of indexing
/// Runs parallel directory discovery or falls back to sequential for concurrency = 1.
///
/// Spawns worker tasks that walk the directory tree, apply filtering rules, and collect
/// entries into batches for the processing phase. Falls back to sequential traversal
/// when concurrency is 1 to avoid task spawning overhead for single-threaded scenarios.
pub async fn run_discovery_phase(
state: &mut IndexerState,
ctx: &JobContext<'_>,
@@ -31,33 +42,406 @@ pub async fn run_discovery_phase(
volume_backend: Option<&Arc<dyn crate::volume::VolumeBackend>>,
cloud_url_base: Option<String>,
) -> Result<(), JobError> {
let concurrency = state.discovery_concurrency;
if concurrency <= 1 {
return run_discovery_phase_sequential(
state,
ctx,
root_path,
rule_toggles,
volume_backend,
cloud_url_base,
)
.await;
}
ctx.log(format!(
"Discovery phase starting from: {}",
root_path.display()
"Discovery phase starting from: {} (concurrency: {})",
root_path.display(),
concurrency
));
ctx.log(format!(
"Initial directories to walk: {}",
state.dirs_to_walk.len()
));
let mut skipped_count = 0u64;
run_parallel_discovery(
state,
ctx,
root_path,
rule_toggles,
volume_backend,
cloud_url_base,
)
.await
}
let toggles = rule_toggles;
/// Parallel discovery using work-stealing with N worker tasks and atomic coordination.
///
/// Workers pull directories from a shared queue, read contents, filter against rules,
/// and directly enqueue subdirectories. A monitor task watches `pending_work` (atomic
/// counter) and signals shutdown when it reaches zero, avoiding explicit work completion
/// messages that would require coordinator awareness.
async fn run_parallel_discovery(
state: &mut IndexerState,
ctx: &JobContext<'_>,
root_path: &Path,
rule_toggles: RuleToggles,
volume_backend: Option<&Arc<dyn crate::volume::VolumeBackend>>,
cloud_url_base: Option<String>,
) -> Result<(), JobError> {
let concurrency = state.discovery_concurrency;
let (work_tx, work_rx) = chan::unbounded::<PathBuf>();
let (result_tx, result_rx) = chan::unbounded::<DiscoveryResult>();
// INVARIANT: `pending_work` is incremented BEFORE enqueuing work and decremented AFTER
// completing it. When it reaches zero, all work is done and shutdown can be signaled.
// This avoids coordinator bottlenecks from explicit "work done" messages.
let pending_work = Arc::new(AtomicUsize::new(0));
let skipped_count = Arc::new(AtomicU64::new(0));
let shutdown = Arc::new(AtomicBool::new(false));
// Shared across all workers to prevent duplicate processing when symlinks create cycles
// or multiple paths (e.g., /home/user/docs and /mnt/docs) lead to the same directory.
let seen_paths = Arc::new(parking_lot::RwLock::new(std::collections::HashSet::new()));
while let Some(dir) = state.dirs_to_walk.pop_front() {
pending_work.fetch_add(1, Ordering::Release);
work_tx
.send(dir)
.await
.map_err(|_| JobError::execution("Work channel closed"))?;
}
let mut workers = Vec::new();
for worker_id in 0..concurrency {
let work_rx = work_rx.clone();
let work_tx = work_tx.clone();
let result_tx = result_tx.clone();
let pending_work = Arc::clone(&pending_work);
let skipped_count = Arc::clone(&skipped_count);
let shutdown = Arc::clone(&shutdown);
let seen_paths = Arc::clone(&seen_paths);
let root_path = root_path.to_path_buf();
let volume_backend = volume_backend.cloned();
let cloud_url_base = cloud_url_base.clone();
let worker = tokio::spawn(async move {
discovery_worker_rayon(
worker_id,
work_rx,
work_tx,
result_tx,
pending_work,
skipped_count,
shutdown,
seen_paths,
root_path,
rule_toggles,
volume_backend,
cloud_url_base,
)
.await
});
workers.push(worker);
}
// Monitor polls `pending_work` and signals shutdown when it hits zero, allowing workers
// to exit gracefully without needing explicit "I'm done" messages to a coordinator.
let monitor = tokio::spawn({
let shutdown = Arc::clone(&shutdown);
let pending_work = Arc::clone(&pending_work);
async move {
loop {
tokio::time::sleep(tokio::time::Duration::from_millis(10)).await;
if pending_work.load(Ordering::Acquire) == 0 {
shutdown.store(true, Ordering::Release);
break;
}
}
}
});
drop(work_tx);
drop(result_tx);
let mut total_processed = 0u64;
while let Ok(result) = result_rx.recv().await {
match result {
DiscoveryResult::Entry(entry) => {
state.pending_entries.push(entry);
total_processed += 1;
if state.should_create_batch() {
let batch = state.create_batch();
state.entry_batches.push(batch);
}
}
DiscoveryResult::Stats {
files,
dirs,
symlinks,
bytes,
} => {
state.stats.files += files;
state.stats.dirs += dirs;
state.stats.symlinks += symlinks;
state.stats.bytes += bytes;
}
DiscoveryResult::Error(error) => {
state.add_error(error);
}
DiscoveryResult::Progress { dirs_queued } => {
let indexer_progress = IndexerProgress {
phase: IndexPhase::Discovery { dirs_queued },
current_path: root_path.display().to_string(),
total_found: state.stats,
processing_rate: state.calculate_rate(),
estimated_remaining: state.estimate_remaining(),
scope: None,
persistence: None,
is_ephemeral: false,
action_context: None,
};
ctx.progress(Progress::generic(indexer_progress.to_generic_progress()));
state.items_since_last_update += 1;
}
DiscoveryResult::QueueDirectories(_) => {
unreachable!("Workers should not send QueueDirectories in Rayon-style mode");
}
}
ctx.check_interrupt().await?;
}
monitor
.await
.map_err(|e| JobError::execution(format!("Monitor task failed: {}", e)))?;
for worker in workers {
worker
.await
.map_err(|e| JobError::execution(format!("Worker task failed: {}", e)))?;
}
if !state.pending_entries.is_empty() {
let final_batch_size = state.pending_entries.len();
ctx.log(format!(
"Creating final batch with {} entries",
final_batch_size
));
let batch = state.create_batch();
state.entry_batches.push(batch);
}
let skipped = skipped_count.load(Ordering::SeqCst);
state.stats.skipped = skipped;
ctx.log(format!(
"Parallel discovery complete: {} files, {} dirs, {} symlinks, {} skipped, {} batches created",
state.stats.files,
state.stats.dirs,
state.stats.symlinks,
skipped,
state.entry_batches.len()
));
state.phase = crate::ops::indexing::state::Phase::Processing;
Ok(())
}
/// Messages sent from workers to the coordinator via the result channel.
///
/// Workers send entries, stats updates, progress notifications, and errors through this
/// enum instead of directly mutating shared state. QueueDirectories is unused in the
/// work-stealing implementation (workers directly enqueue subdirectories).
enum DiscoveryResult {
Entry(DirEntry),
QueueDirectories(Vec<PathBuf>),
Stats {
files: u64,
dirs: u64,
symlinks: u64,
bytes: u64,
},
Error(IndexError),
Progress {
dirs_queued: usize,
},
}
/// Worker task that pulls directories, reads contents, filters entries, and enqueues subdirectories.
///
/// Workers check the shutdown signal, pull work with a timeout to avoid blocking forever,
/// skip already-seen paths (using the shared RwLock), apply filtering rules, and directly
/// enqueue subdirectories for other workers. The atomic `pending_work` counter tracks
/// in-flight work: incremented before enqueue, decremented after processing completes.
async fn discovery_worker_rayon(
_worker_id: usize,
work_rx: chan::Receiver<PathBuf>,
work_tx: chan::Sender<PathBuf>,
result_tx: chan::Sender<DiscoveryResult>,
pending_work: Arc<AtomicUsize>,
skipped_count: Arc<AtomicU64>,
shutdown: Arc<AtomicBool>,
seen_paths: Arc<parking_lot::RwLock<std::collections::HashSet<PathBuf>>>,
root_path: PathBuf,
rule_toggles: RuleToggles,
volume_backend: Option<Arc<dyn crate::volume::VolumeBackend>>,
cloud_url_base: Option<String>,
) {
loop {
if shutdown.load(Ordering::Acquire) {
break;
}
let dir_path = match tokio::time::timeout(
tokio::time::Duration::from_millis(50),
work_rx.recv(),
)
.await
{
Ok(Ok(path)) => path,
Ok(Err(_)) => break,
Err(_) => continue,
};
{
let mut seen = seen_paths.write();
if !seen.insert(dir_path.clone()) {
pending_work.fetch_sub(1, Ordering::Release);
continue;
}
}
let dir_ruler = build_default_ruler(rule_toggles, &root_path, &dir_path).await;
match read_directory(
&dir_path,
volume_backend.as_ref(),
cloud_url_base.as_deref(),
)
.await
{
Ok(entries) => {
let mut local_stats = LocalStats::default();
for entry in entries {
let decision = dir_ruler
.evaluate_path(
&entry.path,
&SimpleMetadata {
is_dir: matches!(entry.kind, EntryKind::Directory),
},
)
.await;
if matches!(decision, Ok(RulerDecision::Reject)) {
skipped_count.fetch_add(1, Ordering::Relaxed);
continue;
}
if let Err(err) = decision {
let _ = result_tx
.send(DiscoveryResult::Error(IndexError::FilterCheck {
path: entry.path.to_string_lossy().to_string(),
error: err.to_string(),
}))
.await;
continue;
}
match entry.kind {
EntryKind::Directory => {
local_stats.dirs += 1;
// Increment BEFORE enqueuing so the monitor never sees pending_work=0 while
// work is in flight. Decrement only happens after processing completes.
pending_work.fetch_add(1, Ordering::Release);
if work_tx.send(entry.path.clone()).await.is_err() {
pending_work.fetch_sub(1, Ordering::Release);
}
let _ = result_tx.send(DiscoveryResult::Entry(entry)).await;
}
EntryKind::File => {
local_stats.files += 1;
local_stats.bytes += entry.size;
let _ = result_tx.send(DiscoveryResult::Entry(entry)).await;
}
EntryKind::Symlink => {
local_stats.symlinks += 1;
let _ = result_tx.send(DiscoveryResult::Entry(entry)).await;
}
}
}
let _ = result_tx
.send(DiscoveryResult::Stats {
files: local_stats.files,
dirs: local_stats.dirs,
symlinks: local_stats.symlinks,
bytes: local_stats.bytes,
})
.await;
let dirs_queued = pending_work.load(Ordering::Acquire);
let _ = result_tx
.send(DiscoveryResult::Progress { dirs_queued })
.await;
}
Err(e) => {
let _ = result_tx
.send(DiscoveryResult::Error(IndexError::ReadDir {
path: dir_path.to_string_lossy().to_string(),
error: e.to_string(),
}))
.await;
}
}
pending_work.fetch_sub(1, Ordering::Release);
}
}
#[derive(Default)]
struct LocalStats {
files: u64,
dirs: u64,
symlinks: u64,
bytes: u64,
}
/// Single-threaded directory traversal fallback for concurrency = 1.
///
/// Uses a simple queue-based approach without task spawning overhead. Processes
/// directories one at a time, applies filters, and accumulates entries into batches.
/// Useful for debugging or when parallel overhead exceeds benefits (small directory trees).
async fn run_discovery_phase_sequential(
state: &mut IndexerState,
ctx: &JobContext<'_>,
root_path: &Path,
rule_toggles: RuleToggles,
volume_backend: Option<&Arc<dyn crate::volume::VolumeBackend>>,
cloud_url_base: Option<String>,
) -> Result<(), JobError> {
ctx.log(format!(
"Discovery phase starting from: {} (sequential mode)",
root_path.display()
));
let mut skipped_count = 0u64;
while let Some(dir_path) = state.dirs_to_walk.pop_front() {
ctx.check_interrupt().await?;
// Skip if already seen (handles symlink loops)
if !state.seen_paths.insert(dir_path.clone()) {
continue;
}
// Build rules in the context of the current directory for gitignore behavior
let dir_ruler = build_default_ruler(toggles, root_path, &dir_path).await;
let dir_ruler = build_default_ruler(rule_toggles, root_path, &dir_path).await;
// Do not skip the directory itself by rules; only apply rules to its entries
// Update progress
let indexer_progress = IndexerProgress {
phase: IndexPhase::Discovery {
dirs_queued: state.dirs_to_walk.len(),
@@ -69,21 +453,18 @@ pub async fn run_discovery_phase(
scope: None,
persistence: None,
is_ephemeral: false,
action_context: None, // TODO: Pass action context from job state
action_context: None,
};
ctx.progress(Progress::generic(indexer_progress.to_generic_progress()));
// Read directory entries with per-dir FS timing
match read_directory(&dir_path, volume_backend, cloud_url_base.as_deref()).await {
Ok(entries) => {
let entry_count = entries.len();
let mut added_count = 0;
for entry in entries {
// Check for interruption during entry processing
ctx.check_interrupt().await?;
// Skip filtered entries via rules engine
let decision = dir_ruler
.evaluate_path(
&entry.path,
@@ -95,7 +476,6 @@ pub async fn run_discovery_phase(
if matches!(decision, Ok(RulerDecision::Reject)) {
state.stats.skipped += 1;
skipped_count += 1;
eprintln!("[discovery] Filtered entry: {}", entry.path.display());
continue;
}
if let Err(err) = decision {
@@ -135,7 +515,6 @@ pub async fn run_discovery_phase(
));
}
// Batch entries
if state.should_create_batch() {
let batch = state.create_batch();
state.entry_batches.push(batch);
@@ -151,13 +530,9 @@ pub async fn run_discovery_phase(
}
}
// Update rate tracking
state.items_since_last_update += 1;
// State is automatically saved during job serialization on shutdown
}
// Final batch
if !state.pending_entries.is_empty() {
let final_batch_size = state.pending_entries.len();
ctx.log(format!(
@@ -181,31 +556,32 @@ pub async fn run_discovery_phase(
Ok(())
}
/// Read a directory and extract metadata
/// Reads a directory through a volume backend, falling back to LocalBackend if none provided.
///
/// Uses the provided volume backend if available, otherwise creates a LocalBackend fallback.
/// The backend is typically provided once per indexer job from the root volume lookup.
/// Volume backends abstract local filesystems and cloud storage (S3, Dropbox) behind a
/// unified interface. When indexing managed locations, the backend is provided upfront from
/// volume registration. For ephemeral browsing or untracked paths, this creates a temporary
/// LocalBackend on demand.
async fn read_directory(
path: &Path,
volume_backend: Option<&Arc<dyn crate::volume::VolumeBackend>>,
cloud_url_base: Option<&str>,
) -> Result<Vec<DirEntry>, std::io::Error> {
// Use provided backend or create LocalBackend fallback
let backend: Arc<dyn crate::volume::VolumeBackend> = match volume_backend {
Some(backend) => Arc::clone(backend),
None => {
// Fallback: create temporary LocalBackend
// This happens when no volume is tracked for the indexing path
Arc::new(crate::volume::LocalBackend::new(
path.parent().unwrap_or(path),
))
}
None => Arc::new(crate::volume::LocalBackend::new(
path.parent().unwrap_or(path),
)),
};
read_directory_with_backend(backend.as_ref(), path, cloud_url_base).await
}
/// Read a directory using a volume backend (local or cloud)
/// Reads directory contents via a volume backend and converts paths for cloud vs local.
///
/// For cloud volumes, prepends the cloud URL base (e.g., "s3://bucket/") to build proper
/// hierarchical paths. For local volumes, uses standard PathBuf joins. This ensures cloud
/// entries have full URIs like "s3://bucket/folder/file.txt" instead of relative paths.
async fn read_directory_with_backend(
backend: &dyn crate::volume::VolumeBackend,
path: &Path,
@@ -218,13 +594,10 @@ async fn read_directory_with_backend(
.await
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
// Convert RawDirEntry to DirEntry
let entries: Vec<DirEntry> = raw_entries
.into_iter()
.map(|raw| {
// For cloud volumes, prepend the cloud URL base to build proper hierarchical paths
let full_path = if let Some(base) = cloud_url_base {
// Cloud: s3://bucket/ + relative_path + filename
let relative = path.to_string_lossy();
let joined = if relative.is_empty() {
raw.name.clone()
@@ -233,7 +606,6 @@ async fn read_directory_with_backend(
};
PathBuf::from(format!("{}{}", base, joined))
} else {
// Local: just join normally
path.join(&raw.name)
};

View File

@@ -1,10 +1,15 @@
//! Indexer phases implementation
//! # Indexer Execution Phases
//!
//! The indexer operates in distinct phases for clarity and resumability:
//! 1. Discovery - Walk directories and collect entries
//! 2. Processing - Create/update database records
//! 3. Aggregation - Calculate directory sizes
//! 4. Content - Generate content identities
//! The indexer runs in four sequential phases to enable resumability and incremental
//! progress tracking. Each phase is independently checkpointed so interrupted jobs can
//! resume mid-phase without reprocessing completed work. This prevents re-walking large
//! directories or re-hashing files after crashes or cancellations.
//!
//! Discovery walks the filesystem and collects raw metadata. Processing converts those
//! entries into database records with stable UUIDs. Aggregation bubbles up directory
//! sizes from leaves to root (required for accurate folder size reporting). Content
//! identification hashes file contents for deduplication and generates deterministic
//! sync UUIDs.
pub mod aggregation;
pub mod content;

View File

@@ -1,4 +1,10 @@
//! Processing phase - creates/updates database entries
//! # Entry Processing and Change Detection
//!
//! `core::ops::indexing::phases::processing` converts discovered filesystem entries into
//! database records, applying change detection to identify new, modified, moved, and deleted
//! entries. Processes entries in depth-first order (parents before children) within database
//! transactions, preserving ephemeral UUIDs from prior browsing sessions and validating that
//! indexing paths stay within location boundaries to prevent cross-location data corruption.
use crate::{
infra::{
@@ -8,7 +14,7 @@ use crate::{
},
ops::indexing::{
change_detection::{Change, ChangeDetector},
entry::EntryProcessor,
database_storage::DatabaseStorage,
state::{DirEntry, EntryKind, IndexError, IndexPhase, IndexerProgress, IndexerState},
IndexMode,
},
@@ -18,16 +24,24 @@ use std::{path::Path, sync::Arc};
use tracing::warn;
use uuid::Uuid;
/// Check if an error is a unique constraint violation
/// Detects SQLite unique constraint violations from concurrent watcher and indexer writes.
///
/// When the file watcher creates an entry while the indexer is processing the same file,
/// both try to insert with the same (path, parent_id) combination. This is benign - the entry
/// exists, which is the desired outcome. We detect and skip these instead of failing the job.
fn is_unique_constraint_violation(error: &JobError) -> bool {
// Check if the error contains SQLite unique constraint violation messages
let error_msg = error.to_string().to_lowercase();
error_msg.contains("unique constraint")
|| error_msg.contains("unique index")
|| error_msg.contains("constraint failed")
}
/// Run the processing phase of indexing
/// Processes discovered entries into database records with change detection and UUID preservation.
///
/// Sorts all entries by depth (parents before children) to ensure hierarchy integrity, applies
/// change detection to identify new/modified/moved/deleted entries, processes changes within
/// batch transactions, preserves ephemeral UUIDs from browsing sessions, validates indexing
/// boundaries to prevent cross-location corruption, and emits sync/event batches for UI updates.
pub async fn run_processing_phase(
location_id: Uuid,
state: &mut IndexerState,
@@ -42,13 +56,26 @@ pub async fn run_processing_phase(
total_batches
));
// Populate ephemeral UUIDs so entries browsed before enabling indexing keep the same UUID,
// preserving tags and notes attached during ephemeral mode. Without this, promoting a browsed
// folder to a managed location would orphan all existing user metadata.
let ephemeral_cache = ctx.library().core_context().ephemeral_cache();
let preserved_count = state
.populate_ephemeral_uuids(ephemeral_cache, location_root_path)
.await;
if preserved_count > 0 {
ctx.log(format!(
"Found {} ephemeral UUIDs to preserve from previous browsing",
preserved_count
));
}
if total_batches == 0 {
ctx.log("No batches to process - transitioning to Aggregation phase");
state.phase = crate::ops::indexing::state::Phase::Aggregation;
return Ok(());
}
// Get the actual location record from database
let location_record = entities::location::Entity::find()
.filter(entities::location::Column::Uuid.eq(location_id))
.one(ctx.library_db())
@@ -66,8 +93,10 @@ pub async fn run_processing_phase(
device_id, location_id_i32, location_entry_id
));
// CRITICAL SAFETY CHECK: Validate that the indexing path is within this location's boundaries
// This prevents catastrophic cross-location deletion if the watcher routes events incorrectly
// SAFETY: Validate indexing path is within location boundaries to prevent catastrophic
// cross-location deletion if watcher routing bugs send events for /home/user/photos to a
// /home/user/documents location. Without this check, we'd delete all documents entries
// not present in photos, wiping the database.
let location_actual_path = crate::ops::indexing::path_resolver::PathResolver::get_full_path(
ctx.library_db(),
location_entry_id,
@@ -75,18 +104,14 @@ pub async fn run_processing_phase(
.await
.map_err(|e| JobError::execution(format!("Failed to resolve location root path: {}", e)))?;
// For cloud paths, compare strings instead of PathBuf (cloud paths have empty path component for root)
let location_actual_str = location_actual_path.to_string_lossy();
let is_cloud_path =
location_actual_str.contains("://") && !location_actual_str.starts_with("local://");
let is_within_boundaries = if is_cloud_path {
// For cloud paths, check if the root path matches or is a subpath
let root_str = location_root_path.to_string_lossy();
// Empty path means root of cloud location, which is always valid
root_str.is_empty() || location_actual_str.starts_with(root_str.as_ref())
} else {
// For local paths, use standard PathBuf comparison
location_root_path.starts_with(&location_actual_path)
};
@@ -105,8 +130,9 @@ pub async fn run_processing_phase(
location_actual_path.display()
));
// Seed cache with ancestor directories from location root to indexing path
// This prevents the ghost folder bug where subpath reindexing creates wrong parent_ids
// Seed entry ID cache with all ancestors between location root and indexing path.
// Without this, re-indexing /home/user/docs/photos would fail to find /home/user/docs
// in the cache and create a duplicate "docs" folder with wrong parent_id.
let _ = state
.seed_ancestor_cache(
ctx.library_db(),
@@ -116,8 +142,6 @@ pub async fn run_processing_phase(
)
.await;
// Load existing entries for change detection scoped to the indexing path
// Note: location_root_path is the actual path being indexed (could be a subpath of the location)
let mut change_detector = ChangeDetector::new();
if !state.existing_entries.is_empty() || mode != IndexMode::Shallow {
ctx.log("Loading existing entries for change detection...");
@@ -130,22 +154,21 @@ pub async fn run_processing_phase(
));
}
// Flatten all batches and sort globally by depth to ensure parents are always processed before children
// Sort all discovered entries by depth (parents before children) to ensure parent entries
// exist in the database before we try to create children with parent_id foreign keys.
// Without this, creating /a/b/c.txt before /a would fail the parent_id constraint.
ctx.log("Flattening and sorting all entries by depth...");
let mut all_entries: Vec<DirEntry> = Vec::new();
while let Some(batch) = state.entry_batches.pop() {
all_entries.extend(batch);
}
// Sort all entries by depth first, then by type
all_entries.sort_by(|a, b| {
let a_depth = a.path.components().count();
let b_depth = b.path.components().count();
// First sort by depth (parents before children)
match a_depth.cmp(&b_depth) {
std::cmp::Ordering::Equal => {
// Then sort by type (directories before files at same depth)
let a_priority = match a.kind {
EntryKind::Directory => 0,
EntryKind::Symlink => 1,
@@ -167,8 +190,7 @@ pub async fn run_processing_phase(
all_entries.len()
));
// Re-batch the sorted entries for processing
let batch_size = 1000; // Use a reasonable batch size
let batch_size = 1000;
let mut sorted_batches: Vec<Vec<DirEntry>> = Vec::new();
let mut current_batch = Vec::with_capacity(batch_size);
@@ -185,7 +207,6 @@ pub async fn run_processing_phase(
sorted_batches.push(current_batch);
}
// Use pop() below to consume batches. Reverse so that the first (shallowest) batch is processed first.
state.entry_batches = sorted_batches;
state.entry_batches.reverse();
let total_batches = state.entry_batches.len();
@@ -212,29 +233,22 @@ pub async fn run_processing_phase(
scope: None,
persistence: None,
is_ephemeral: false,
action_context: None, // TODO: Pass action context from job state
action_context: None,
};
ctx.progress(Progress::generic(indexer_progress.to_generic_progress()));
// Check for interruption before starting transaction
ctx.check_interrupt().await?;
// Begin a single transaction for all new entry creations in this batch
let txn = ctx.library_db().begin().await.map_err(|e| {
JobError::execution(format!("Failed to begin processing transaction: {}", e))
})?;
// Accumulate related rows for bulk insert
let mut bulk_self_closures: Vec<entities::entry_closure::ActiveModel> = Vec::new();
let mut bulk_dir_paths: Vec<entities::directory_paths::ActiveModel> = Vec::new();
let mut created_entries: Vec<entities::entry::Model> = Vec::new();
// Process batch - check for changes and create/update entries
// (Already sorted globally by depth)
for entry in batch {
// Check for interruption during batch processing
if let Err(e) = ctx.check_interrupt().await {
// Rollback transaction before propagating interruption
if let Err(rollback_err) = txn.rollback().await {
warn!(
"Failed to rollback transaction during interruption: {}",
@@ -244,19 +258,14 @@ pub async fn run_processing_phase(
return Err(e);
}
// Add to seen_paths for delete detection (important for resumed jobs)
state.seen_paths.insert(entry.path.clone());
// Check for changes
// Note: For cloud backends, we skip change detection for now since we can't
// access std::fs::Metadata directly. Cloud entries are always treated as "new"
// on first index. Future: implement cloud-specific change detection using
// backend metadata.
// Cloud backends can't use std::fs::Metadata for change detection since files don't
// exist locally. We treat cloud entries as always "new" for now. Future enhancement:
// use backend-provided ETag or modified_at for cloud change detection.
let change = if volume_backend.is_some() && !volume_backend.unwrap().is_local() {
// Cloud backend - treat as new for now
Some(Change::New(entry.path.clone()))
} else {
// Local backend - use standard change detection
let metadata = match std::fs::symlink_metadata(&entry.path) {
Ok(m) => m,
Err(e) => {
@@ -273,10 +282,8 @@ pub async fn run_processing_phase(
match change {
Some(Change::New(_)) => {
// Create new entry within batch transaction
match EntryProcessor::create_entry_in_conn(
match DatabaseStorage::create_entry_in_conn(
state,
ctx,
&entry,
device_id,
location_root_path,
@@ -295,25 +302,18 @@ pub async fn run_processing_phase(
));
total_processed += 1;
// Track for content identification if needed
if mode >= IndexMode::Content && entry.kind == EntryKind::File {
state.entries_for_content.push((entry_id, entry.path));
}
// Collect for batch sync after transaction commits
created_entries.push(entry_model);
// end Some(Change::New)
}
Err(e) => {
// Check if this is a unique constraint violation
// This can happen when the watcher creates an entry while the indexer is running
if is_unique_constraint_violation(&e) {
ctx.log(format!(
"Entry already exists (created by watcher): {}",
entry.path.display()
));
// This is not an error - the entry exists, which is what we want
// Just skip it and continue
} else {
let error_msg = format!(
"Failed to create entry for {}: {}",
@@ -331,8 +331,7 @@ pub async fn run_processing_phase(
}
Some(Change::Modified { entry_id, .. }) => {
// Update existing entry within batch transaction
match EntryProcessor::update_entry_in_conn(ctx, entry_id, &entry, &txn).await {
match DatabaseStorage::update_entry_in_conn(entry_id, &entry, &txn).await {
Ok(()) => {
ctx.log(format!(
"Updated entry {}: {}",
@@ -341,7 +340,6 @@ pub async fn run_processing_phase(
));
total_processed += 1;
// Re-process content if needed
if mode >= IndexMode::Content && entry.kind == EntryKind::File {
state.entries_for_content.push((entry_id, entry.path));
}
@@ -363,14 +361,13 @@ pub async fn run_processing_phase(
entry_id,
..
}) => {
// Handle move - update path in database
ctx.log(format!(
"Detected move: {} -> {}",
old_path.display(),
new_path.display()
));
match EntryProcessor::simple_move_entry_in_conn(
state, ctx, entry_id, &old_path, &new_path, &txn,
match DatabaseStorage::simple_move_entry_in_conn(
state, entry_id, &old_path, &new_path, &txn,
)
.await
{
@@ -383,7 +380,6 @@ pub async fn run_processing_phase(
));
total_processed += 1;
// Re-process content if needed for moved files
if mode >= IndexMode::Content && entry.kind == EntryKind::File {
state.entries_for_content.push((entry_id, new_path));
}

View File

@@ -1,14 +1,20 @@
//! Content hash processor - atomic operation for generating and linking content identities
//! # Content Hash Processor
//!
//! Generates BLAKE3 content hashes for files and links them to content_identity records. Each
//! processor execution is atomic: hash generation, identity creation/lookup, and entry linking
//! happen in a single transaction. This ensures entries either have valid content_id references
//! or remain unlinked if processing fails.
use super::{ctx::IndexingCtx, entry::EntryProcessor, state::EntryKind};
use super::{database_storage::DatabaseStorage, state::EntryKind};
use crate::domain::content_identity::ContentHashGenerator;
use anyhow::Result;
use sea_orm::DatabaseConnection;
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
use tracing::debug;
use uuid::Uuid;
/// Entry data for processor execution
/// Minimal entry snapshot required for content processing without full database models.
#[derive(Debug, Clone)]
pub struct ProcessorEntry {
pub id: i32,
@@ -20,7 +26,7 @@ pub struct ProcessorEntry {
pub mime_type: Option<String>,
}
/// Result of processor execution
/// Outcome of a single processor run: success/failure, artifacts created, and bytes processed.
#[derive(Debug, Clone)]
pub struct ProcessorResult {
pub success: bool,
@@ -49,7 +55,7 @@ impl ProcessorResult {
}
}
/// Processor configuration
/// Per-processor settings: type, enabled flag, and arbitrary JSON config.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProcessorConfig {
pub processor_type: String,
@@ -58,7 +64,7 @@ pub struct ProcessorConfig {
pub settings: serde_json::Value,
}
/// Location processor configuration
/// Collection of processors that run automatically on watcher events for a location.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LocationProcessorConfig {
#[serde(default)]
@@ -84,7 +90,7 @@ impl Default for LocationProcessorConfig {
},
ProcessorConfig {
processor_type: "thumbstrip".to_string(),
enabled: true, // Fast enough for auto-generation (~6s)
enabled: true, // ~6s per video, acceptable for auto-generation.
settings: serde_json::json!({
"variants": ["thumbstrip_preview"],
"regenerate": false
@@ -92,7 +98,7 @@ impl Default for LocationProcessorConfig {
},
ProcessorConfig {
processor_type: "proxy".to_string(),
enabled: false, // Disabled by default (user opt-in, ~8s per video)
enabled: false, // User opt-in required (~8s per video).
settings: serde_json::json!({
"enabled": false,
"max_file_size_gb": 5,
@@ -101,7 +107,7 @@ impl Default for LocationProcessorConfig {
},
ProcessorConfig {
processor_type: "ocr".to_string(),
enabled: false, // Disabled by default (expensive)
enabled: false, // Expensive, user opt-in.
settings: serde_json::json!({
"languages": ["eng"],
"min_confidence": 0.6
@@ -109,7 +115,7 @@ impl Default for LocationProcessorConfig {
},
ProcessorConfig {
processor_type: "speech_to_text".to_string(),
enabled: false, // Disabled by default (very expensive)
enabled: false, // Very expensive, user opt-in.
settings: serde_json::json!({
"model": "base",
"language": null
@@ -120,7 +126,7 @@ impl Default for LocationProcessorConfig {
}
}
/// Content hash processor
/// Generates BLAKE3 hashes and creates content_identity records for files.
pub struct ContentHashProcessor {
library_id: Uuid,
}
@@ -132,7 +138,7 @@ impl ContentHashProcessor {
pub async fn process(
&self,
ctx: &impl IndexingCtx,
db: &DatabaseConnection,
entry: &ProcessorEntry,
) -> Result<ProcessorResult> {
if !matches!(entry.kind, EntryKind::File) || entry.content_id.is_some() {
@@ -144,8 +150,8 @@ impl ContentHashProcessor {
let content_hash = ContentHashGenerator::generate_content_hash(&entry.path).await?;
debug!("✓ Generated content hash: {}", content_hash);
EntryProcessor::link_to_content_identity(
ctx,
DatabaseStorage::link_to_content_identity(
db,
entry.id,
&entry.path,
content_hash,
@@ -159,7 +165,7 @@ impl ContentHashProcessor {
}
}
/// Load processor configuration for a location
/// Loads processor config from the location's database record, falling back to defaults.
pub async fn load_location_processor_config(
_location_id: Uuid,
_db: &sea_orm::DatabaseConnection,

View File

@@ -1,4 +1,8 @@
//! IndexerProgress to GenericProgress conversion
//! # IndexerProgress to GenericProgress Conversion
//!
//! Maps indexer-specific progress (phases, stats) to the generic job progress format for UI display.
//! Each phase is assigned a percentage range to show continuous progress across all four stages.
//! The converter handles path filtering to distinguish between real filesystem paths and status messages.
use super::state::{IndexPhase, IndexerProgress};
use crate::{
@@ -73,20 +77,16 @@ impl ToGenericProgress for IndexerProgress {
}
};
// Convert current_path string to SdPath only if it's a real filesystem path
// During aggregation, current_path contains status messages like "Aggregating directory 3846/3877: info"
// During other phases, it might contain actual file paths
// Filter out status messages from current_path - only convert real filesystem paths to SdPath.
let current_path = if !self.current_path.is_empty()
&& !self.current_path.starts_with("Aggregating directory")
&& !self.current_path.starts_with("Finalizing")
{
// Only create SdPath if it looks like a real path (absolute or relative with separators)
let path_buf = PathBuf::from(&self.current_path);
if path_buf.is_absolute()
|| self.current_path.contains('/')
|| self.current_path.contains('\\')
{
// Try to parse as URI first (for cloud paths), fall back to local path
SdPath::from_uri(&self.current_path)
.ok()
.or_else(|| Some(SdPath::local(path_buf)))
@@ -97,34 +97,25 @@ impl ToGenericProgress for IndexerProgress {
None
};
// completion_info is already set correctly from phase matching above
let final_completion = completion_info;
// Create the generic progress
let mut progress = GenericProgress::new(percentage, &phase_name, &phase_message)
.with_bytes(self.total_found.bytes, self.total_found.bytes) // Total bytes found so far
.with_performance(
self.processing_rate,
self.estimated_remaining,
None, // Could calculate elapsed time from start
)
.with_errors(self.total_found.errors, 0) // No separate warning count in IndexerStats
.with_metadata(self); // Include original indexer progress as metadata
.with_bytes(self.total_found.bytes, self.total_found.bytes)
.with_performance(self.processing_rate, self.estimated_remaining, None)
.with_errors(self.total_found.errors, 0)
.with_metadata(self);
// Set completion data - for finalizing phase, manually set to avoid auto-percentage calculation
// Finalizing phase uses manual completion to preserve custom percentage ranges.
match &self.phase {
IndexPhase::Finalizing { .. } => {
// Manually set completion to preserve our custom percentage calculation
progress.completion.completed = final_completion.0;
progress.completion.total = final_completion.1;
}
_ => {
// For other phases, use normal with_completion which auto-calculates percentage
progress = progress.with_completion(final_completion.0, final_completion.1);
}
}
// Set current path if available
if let Some(path) = current_path {
progress = progress.with_current_path(path);
}

View File

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,9 @@
//! Indexer state management and progress tracking
//! State management and progress tracking for indexer jobs.
//!
//! This module defines the resumable state machine that tracks indexing progress
//! across all phases. The state is automatically serialized during job shutdowns,
//! allowing indexing to resume from the last completed phase rather than starting
//! over from scratch.
use crate::domain::addressing::SdPath;
@@ -9,8 +14,9 @@ use std::{
path::PathBuf,
time::{Duration, Instant},
};
use uuid::Uuid;
/// Indexer progress information
/// Progress information sent to UI during indexing operations.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IndexerProgress {
pub phase: IndexPhase,
@@ -23,13 +29,11 @@ pub struct IndexerProgress {
#[serde(skip_serializing_if = "Option::is_none")]
pub persistence: Option<super::job::IndexPersistence>,
pub is_ephemeral: bool,
/// Action context that spawned this job (if available)
#[serde(skip_serializing_if = "Option::is_none")]
pub action_context: Option<crate::infra::action::context::ActionContext>,
}
/// Statistics collected during indexing
/// Cumulative statistics tracked throughout the indexing process.
#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, Type)]
pub struct IndexerStats {
pub files: u64,
@@ -40,7 +44,7 @@ pub struct IndexerStats {
pub errors: u64,
}
/// Current phase of the indexing operation
/// Public-facing phase information exposed to the UI.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum IndexPhase {
Discovery { dirs_queued: usize },
@@ -49,7 +53,11 @@ pub enum IndexPhase {
Finalizing { processed: usize, total: usize },
}
/// Internal phases for state machine
/// Internal phase enum used by the indexer state machine.
///
/// The state machine progresses linearly through these phases. Each phase
/// completes atomically before transitioning to the next, ensuring the job
/// can resume from a clean checkpoint if interrupted.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub(crate) enum Phase {
Discovery,
@@ -59,14 +67,17 @@ pub(crate) enum Phase {
Complete,
}
/// Directory entry found during discovery
/// Filesystem entry discovered during the discovery phase.
///
/// These are lightweight representations of files and directories found on disk.
/// They're collected in batches before being processed into full database entries,
/// allowing discovery to run ahead of persistence without blocking.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DirEntry {
pub path: PathBuf,
pub kind: EntryKind,
pub size: u64,
pub modified: Option<std::time::SystemTime>,
#[serde(skip_serializing_if = "Option::is_none")]
pub inode: Option<u64>,
}
@@ -77,7 +88,11 @@ pub enum EntryKind {
Symlink,
}
/// Errors that occur during indexing
/// Errors encountered during indexing that don't halt the entire job.
///
/// These errors are logged and accumulated but don't cause job failure. This allows
/// indexing to continue even when individual files are inaccessible due to permissions,
/// file locks, or I/O errors.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum IndexError {
ReadDir { path: String, error: String },
@@ -86,43 +101,36 @@ pub enum IndexError {
FilterCheck { path: String, error: String },
}
/// Resumable indexer state
/// Complete state for a resumable indexer job.
///
/// This struct holds all data needed to resume indexing from any phase. The state
/// is automatically serialized when the job system shuts down, allowing long-running
/// indexing operations to survive app restarts without losing progress.
#[derive(Debug, Serialize, Deserialize)]
pub struct IndexerState {
pub(crate) phase: Phase,
#[serde(skip, default = "Instant::now")]
pub(crate) started_at: Instant,
// Discovery phase
pub(crate) dirs_to_walk: VecDeque<PathBuf>,
pub(crate) pending_entries: Vec<DirEntry>,
pub(crate) seen_paths: HashSet<PathBuf>,
// Processing phase
pub(crate) entry_batches: Vec<Vec<DirEntry>>,
// Content phase
pub(crate) entries_for_content: Vec<(i32, PathBuf)>, // (entry_id, path)
// Database operations
pub(crate) entry_id_cache: HashMap<PathBuf, i32>, // path -> entry_id for parent lookups
// Change detection
pub(crate) entries_for_content: Vec<(i32, PathBuf)>,
pub(crate) entry_id_cache: HashMap<PathBuf, i32>,
// UUIDs from ephemeral indexing preserved when creating persistent entries.
// This ensures files browsed before enabling indexing keep the same UUID,
// preventing orphaned tags and flashing Quick Look previews when a browsed
// folder is later added as a managed location.
#[serde(skip, default)]
pub(crate) ephemeral_uuids: HashMap<PathBuf, Uuid>,
pub(crate) existing_entries:
HashMap<PathBuf, (i32, Option<u64>, Option<std::time::SystemTime>)>, // path -> (id, inode, modified)
// Statistics
HashMap<PathBuf, (i32, Option<u64>, Option<std::time::SystemTime>)>,
pub(crate) stats: IndexerStats,
pub(crate) errors: Vec<IndexError>,
// Performance tracking
#[serde(skip, default = "Instant::now")]
pub(crate) last_progress_time: Instant,
pub(crate) items_since_last_update: u64,
// Configuration
pub(crate) batch_size: usize,
// Discovery config (Phase 2)
pub(crate) discovery_concurrency: usize,
pub(crate) dirs_channel_capacity: usize,
pub(crate) entries_channel_capacity: usize,
@@ -135,6 +143,10 @@ impl IndexerState {
dirs_to_walk.push_back(path.to_path_buf());
}
let discovery_concurrency = std::thread::available_parallelism()
.map(|n| usize::max(n.get() / 2, 1))
.unwrap_or(4);
Self {
phase: Phase::Discovery,
started_at: Instant::now(),
@@ -144,18 +156,59 @@ impl IndexerState {
entry_batches: Vec::new(),
entries_for_content: Vec::new(),
entry_id_cache: HashMap::new(),
ephemeral_uuids: HashMap::new(),
existing_entries: HashMap::new(),
stats: Default::default(),
errors: Vec::new(),
last_progress_time: Instant::now(),
items_since_last_update: 0,
batch_size: 1000,
discovery_concurrency: 1,
discovery_concurrency,
dirs_channel_capacity: 4096,
entries_channel_capacity: 16384,
}
}
/// Extracts UUIDs from the ephemeral cache for reuse during persistent indexing.
///
/// When a directory is browsed before being added as a managed location, ephemeral
/// indexing assigns UUIDs to each entry. This method preserves those UUIDs so that
/// user metadata (tags, notes) attached during browsing remains valid after the
/// directory is promoted to a managed location. Without preservation, adding a
/// browsed folder as a location would orphan all existing tags and cause Quick Look
/// previews to flash as UUIDs change.
pub async fn populate_ephemeral_uuids(
&mut self,
ephemeral_cache: &super::ephemeral::EphemeralIndexCache,
root_path: &std::path::Path,
) -> usize {
if let Some(index) = ephemeral_cache.get_for_path(root_path) {
let index_read = index.read().await;
let entries = index_read.entries();
for path in entries.keys() {
if let Some(entry_uuid) = index_read.get_entry_uuid(path) {
self.ephemeral_uuids.insert(path.clone(), entry_uuid);
}
}
let count = self.ephemeral_uuids.len();
tracing::info!(
"Populated {} ephemeral UUIDs for preservation from cache covering {}",
count,
root_path.display()
);
count
} else {
tracing::debug!("No ephemeral index found for path: {}", root_path.display());
0
}
}
pub fn get_ephemeral_uuid(&self, path: &std::path::Path) -> Option<Uuid> {
self.ephemeral_uuids.get(path).copied()
}
pub fn calculate_rate(&mut self) -> f32 {
let elapsed = self.last_progress_time.elapsed();
if elapsed.as_secs() > 0 {
@@ -169,7 +222,6 @@ impl IndexerState {
}
pub fn estimate_remaining(&self) -> Option<Duration> {
// TODO: Implement based on current rate and remaining work
None
}
@@ -186,8 +238,12 @@ impl IndexerState {
std::mem::take(&mut self.pending_entries)
}
/// Seed the entry ID cache with all ancestor directories from location root to target path
/// This prevents the ghost folder bug where subpath reindexing creates entries with wrong parent_id
/// Seeds the entry ID cache with all ancestor directories from location root to target path.
///
/// This prevents the ghost folder bug where subpath reindexing creates entries with the
/// wrong parent_id. When indexing a subdirectory, parent lookups must find the existing
/// ancestor entries rather than creating duplicates. Seeding ensures the cache is warm
/// before processing begins.
pub async fn seed_ancestor_cache<'a>(
&mut self,
db: &sea_orm::DatabaseConnection,
@@ -198,18 +254,15 @@ impl IndexerState {
use crate::infra::db::entities::directory_paths;
use sea_orm::{ColumnTrait, EntityTrait, QueryFilter};
// Seed location root
self.entry_id_cache
.insert(location_root_path.to_path_buf(), location_entry_id);
// Seed all intermediate ancestors between location root and target path
if let Ok(relative_path) = target_path.strip_prefix(location_root_path) {
let mut current_path = location_root_path.to_path_buf();
for component in relative_path.components() {
current_path.push(component);
// Look up this ancestor in directory_paths table
if let Ok(Some(dir_record)) = directory_paths::Entity::find()
.filter(
directory_paths::Column::Path
@@ -227,3 +280,72 @@ impl IndexerState {
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::domain::addressing::SdPath;
#[test]
fn test_ephemeral_uuid_lookup() {
let sd_path = SdPath::Physical {
device_slug: "local".to_string(),
path: PathBuf::from("/test"),
};
let mut state = IndexerState::new(&sd_path);
// Initially no ephemeral UUIDs
assert!(state
.get_ephemeral_uuid(std::path::Path::new("/test/file.txt"))
.is_none());
// Add an ephemeral UUID
let test_uuid = Uuid::new_v4();
state
.ephemeral_uuids
.insert(PathBuf::from("/test/file.txt"), test_uuid);
// Now we can retrieve it
assert_eq!(
state.get_ephemeral_uuid(std::path::Path::new("/test/file.txt")),
Some(test_uuid)
);
// Non-existent path still returns None
assert!(state
.get_ephemeral_uuid(std::path::Path::new("/test/other.txt"))
.is_none());
}
#[test]
fn test_ephemeral_uuid_preservation_concept() {
// This test demonstrates the UUID preservation concept:
// When ephemeral_uuids is populated, the same UUID should be used
// instead of generating a new one
let sd_path = SdPath::Physical {
device_slug: "local".to_string(),
path: PathBuf::from("/test"),
};
let mut state = IndexerState::new(&sd_path);
// Simulate an ephemeral UUID from previous browsing
let preserved_uuid = Uuid::new_v4();
let test_path = PathBuf::from("/test/document.pdf");
state
.ephemeral_uuids
.insert(test_path.clone(), preserved_uuid);
// When creating an entry, the code should check get_ephemeral_uuid first
let entry_uuid = if let Some(ephemeral_uuid) = state.get_ephemeral_uuid(&test_path) {
// Preserve the ephemeral UUID
ephemeral_uuid
} else {
// Generate a new UUID
Uuid::new_v4()
};
// The preserved UUID should be used
assert_eq!(entry_uuid, preserved_uuid);
}
}

View File

@@ -9,10 +9,9 @@ use crate::{
db::entities,
},
ops::indexing::{
entry::EntryProcessor,
job::{
EphemeralIndex, IndexMode, IndexPersistence, IndexScope, IndexerJob, IndexerJobConfig,
},
database_storage::DatabaseStorage,
ephemeral::EphemeralIndex,
job::{IndexMode, IndexPersistence, IndexScope, IndexerJob, IndexerJobConfig},
path_resolver::PathResolver,
state::EntryKind,
},
@@ -98,13 +97,16 @@ impl IndexVerifyAction {
library: &Arc<crate::library::Library>,
context: &Arc<CoreContext>,
path: &Path,
) -> Result<HashMap<PathBuf, crate::ops::indexing::entry::EntryMetadata>, ActionError> {
) -> Result<HashMap<PathBuf, crate::ops::indexing::database_storage::EntryMetadata>, ActionError> {
use tokio::sync::RwLock;
tracing::debug!("Running ephemeral indexer job on {}", path.display());
// Create ephemeral index storage that we'll share with the job
let ephemeral_index = Arc::new(RwLock::new(EphemeralIndex::new(path.to_path_buf())));
let ephemeral_index =
Arc::new(RwLock::new(EphemeralIndex::new().map_err(|e| {
ActionError::from(std::io::Error::new(std::io::ErrorKind::Other, e))
})?));
// Subscribe to job events before dispatching
let mut event_subscriber = context.events.subscribe();
@@ -183,7 +185,7 @@ impl IndexVerifyAction {
// Extract the results from our shared ephemeral index
let entries = {
let index = ephemeral_index.read().await;
index.entries.clone()
index.entries()
};
tracing::debug!(
@@ -401,7 +403,7 @@ impl IndexVerifyAction {
/// Compare ephemeral index with database entries
async fn compare_indexes(
&self,
fs_entries: HashMap<PathBuf, crate::ops::indexing::entry::EntryMetadata>,
fs_entries: HashMap<PathBuf, crate::ops::indexing::database_storage::EntryMetadata>,
mut db_entries: HashMap<PathBuf, (entities::entry::Model, PathBuf)>,
root_path: &Path,
) -> Result<IntegrityReport, ActionError> {

View File

@@ -79,7 +79,10 @@ impl CoreAction for LibraryOpenAction {
"library.open"
}
async fn validate(&self, _context: Arc<CoreContext>) -> Result<crate::infra::action::ValidationResult, ActionError> {
async fn validate(
&self,
_context: Arc<CoreContext>,
) -> Result<crate::infra::action::ValidationResult, ActionError> {
// Check if the path exists
if !self.input.path.exists() {
return Err(ActionError::Validation {

View File

@@ -66,12 +66,15 @@ impl LibraryAction for EnableIndexingAction {
.ok_or_else(|| ActionError::LocationNotFound(self.input.id))?;
// Parse the index mode
let index_mode: IndexMode = self.input.index_mode.as_str().parse().map_err(|e| {
ActionError::Validation {
field: "index_mode".to_string(),
message: format!("Invalid index mode: {}", e),
}
})?;
let index_mode: IndexMode =
self.input
.index_mode
.as_str()
.parse()
.map_err(|e| ActionError::Validation {
field: "index_mode".to_string(),
message: format!("Invalid index mode: {}", e),
})?;
// Don't allow setting to None
if index_mode == IndexMode::None {

View File

@@ -13,6 +13,9 @@ pub struct EnableIndexingOutput {
impl EnableIndexingOutput {
pub fn new(location_id: Uuid, job_id: String) -> Self {
Self { location_id, job_id }
Self {
location_id,
job_id,
}
}
}

View File

@@ -595,61 +595,72 @@ impl ThumbnailJob {
let is_cloud = Self::is_cloud_path(&entry.relative_path);
// For cloud files, download to temp file. For local files, use direct path
let (source_path, temp_file): (std::path::PathBuf, Option<tempfile::NamedTempFile>) = if is_cloud {
// Cloud path - need to download via volume backend
let volume_manager = ctx.volume_manager()
.ok_or_else(|| ThumbnailError::other("VolumeManager not available for cloud file"))?;
let (source_path, temp_file): (std::path::PathBuf, Option<tempfile::NamedTempFile>) =
if is_cloud {
// Cloud path - need to download via volume backend
let volume_manager = ctx.volume_manager().ok_or_else(|| {
ThumbnailError::other("VolumeManager not available for cloud file")
})?;
// Parse the cloud path to get an SdPath
use crate::domain::addressing::SdPath;
let sdpath = SdPath::from_uri_with_context(&entry.relative_path, &library.core_context())
.await
.map_err(|e| ThumbnailError::other(format!("Failed to parse cloud path: {}", e)))?;
// Parse the cloud path to get an SdPath
use crate::domain::addressing::SdPath;
let sdpath =
SdPath::from_uri_with_context(&entry.relative_path, &library.core_context())
.await
.map_err(|e| {
ThumbnailError::other(format!("Failed to parse cloud path: {}", e))
})?;
// Resolve the volume backend for this path
let volume = volume_manager
.resolve_volume_for_sdpath(&sdpath, &library)
.await
.map_err(|e| ThumbnailError::other(format!("Failed to resolve volume: {}", e)))?
.ok_or_else(|| ThumbnailError::other("No volume found for cloud path"))?;
// Resolve the volume backend for this path
let volume = volume_manager
.resolve_volume_for_sdpath(&sdpath, &library)
.await
.map_err(|e| ThumbnailError::other(format!("Failed to resolve volume: {}", e)))?
.ok_or_else(|| ThumbnailError::other("No volume found for cloud path"))?;
let backend = volume.backend
.as_ref()
.ok_or_else(|| ThumbnailError::other("Volume has no backend"))?;
let backend = volume
.backend
.as_ref()
.ok_or_else(|| ThumbnailError::other("Volume has no backend"))?;
// Get the backend-relative path (strip s3://bucket/ prefix)
let backend_path = Self::to_backend_path(&entry.relative_path);
// Get the backend-relative path (strip s3://bucket/ prefix)
let backend_path = Self::to_backend_path(&entry.relative_path);
// Download file content from cloud
let file_data = backend
.read(&backend_path)
.await
.map_err(|e| ThumbnailError::other(format!("Failed to read cloud file: {}", e)))?;
// Download file content from cloud
let file_data = backend.read(&backend_path).await.map_err(|e| {
ThumbnailError::other(format!("Failed to read cloud file: {}", e))
})?;
// Write to temporary file
let mut temp = tempfile::NamedTempFile::new()
.map_err(|e| ThumbnailError::other(format!("Failed to create temp file: {}", e)))?;
// Write to temporary file
let mut temp = tempfile::NamedTempFile::new().map_err(|e| {
ThumbnailError::other(format!("Failed to create temp file: {}", e))
})?;
use std::io::Write;
temp.write_all(&file_data)
.map_err(|e| ThumbnailError::other(format!("Failed to write temp file: {}", e)))?;
temp.flush()
.map_err(|e| ThumbnailError::other(format!("Failed to flush temp file: {}", e)))?;
use std::io::Write;
temp.write_all(&file_data).map_err(|e| {
ThumbnailError::other(format!("Failed to write temp file: {}", e))
})?;
temp.flush().map_err(|e| {
ThumbnailError::other(format!("Failed to flush temp file: {}", e))
})?;
let temp_path = temp.path().to_path_buf();
ctx.log(format!("Downloaded cloud file {} to temp location", entry.relative_path));
let temp_path = temp.path().to_path_buf();
ctx.log(format!(
"Downloaded cloud file {} to temp location",
entry.relative_path
));
(temp_path, Some(temp))
} else {
// Local path - use direct filesystem access
let source_path = library.path().join(&entry.relative_path);
(temp_path, Some(temp))
} else {
// Local path - use direct filesystem access
let source_path = library.path().join(&entry.relative_path);
if !source_path.exists() {
return Err(ThumbnailError::FileNotFound(entry.relative_path.clone()));
}
if !source_path.exists() {
return Err(ThumbnailError::FileNotFound(entry.relative_path.clone()));
}
(source_path, None)
};
(source_path, None)
};
let mut total_thumbnail_size = 0u64;

View File

@@ -95,7 +95,10 @@ impl CoreAction for LibrarySyncSetupAction {
}
// DEPRICATED: Sync no longer requires a leader device
async fn validate(&self, context: Arc<crate::context::CoreContext>) -> Result<crate::infra::action::ValidationResult, ActionError> {
async fn validate(
&self,
context: Arc<crate::context::CoreContext>,
) -> Result<crate::infra::action::ValidationResult, ActionError> {
// Validate leader device is one of the two devices
if self.input.leader_device_id != self.input.local_device_id
&& self.input.leader_device_id != self.input.remote_device_id

View File

@@ -519,11 +519,8 @@ mod tests {
);
let events = Arc::new(EventBus::default());
let device_manager = Arc::new(DeviceManager::init(
temp_dir.path(),
key_manager.clone(),
None,
).unwrap());
let device_manager =
Arc::new(DeviceManager::init(temp_dir.path(), key_manager.clone(), None).unwrap());
let volume_manager = Arc::new(crate::volume::VolumeManager::new(
uuid::Uuid::new_v4(), // Test device ID
crate::volume::VolumeDetectionConfig::default(),
@@ -570,11 +567,8 @@ mod tests {
);
let events = Arc::new(EventBus::default());
let device_manager = Arc::new(DeviceManager::init(
temp_dir.path(),
key_manager.clone(),
None,
).unwrap());
let device_manager =
Arc::new(DeviceManager::init(temp_dir.path(), key_manager.clone(), None).unwrap());
let volume_manager = Arc::new(crate::volume::VolumeManager::new(
uuid::Uuid::new_v4(), // Test device ID
crate::volume::VolumeDetectionConfig::default(),

View File

@@ -73,8 +73,7 @@ impl DevicePersistence {
/// Save list of paired device IDs
async fn save_device_list(&self, device_ids: &[Uuid]) -> Result<()> {
let data =
serde_json::to_vec(device_ids).map_err(|e| NetworkingError::Serialization(e))?;
let data = serde_json::to_vec(device_ids).map_err(|e| NetworkingError::Serialization(e))?;
self.key_manager
.set_secret(Self::DEVICE_LIST_KEY, &data)
.await
@@ -92,8 +91,7 @@ impl DevicePersistence {
for (device_id, device) in devices {
let key = Self::device_key(*device_id);
let data =
serde_json::to_vec(device).map_err(|e| NetworkingError::Serialization(e))?;
let data = serde_json::to_vec(device).map_err(|e| NetworkingError::Serialization(e))?;
self.key_manager
.set_secret(&key, &data)
.await
@@ -114,18 +112,16 @@ impl DevicePersistence {
for device_id in device_ids {
let key = Self::device_key(device_id);
match self.key_manager.get_secret(&key).await {
Ok(data) => {
match serde_json::from_slice::<PersistedPairedDevice>(&data) {
Ok(device) => {
if !device.session_keys.is_expired() {
devices.insert(device_id, device);
}
}
Err(e) => {
eprintln!("Failed to deserialize device {}: {}", device_id, e);
Ok(data) => match serde_json::from_slice::<PersistedPairedDevice>(&data) {
Ok(device) => {
if !device.session_keys.is_expired() {
devices.insert(device_id, device);
}
}
}
Err(e) => {
eprintln!("Failed to deserialize device {}: {}", device_id, e);
}
},
Err(e) => {
eprintln!("Failed to load device {}: {}", device_id, e);
}
@@ -279,7 +275,9 @@ impl DevicePersistence {
self.key_manager
.delete_secret(Self::DEVICE_LIST_KEY)
.await
.map_err(|e| NetworkingError::Protocol(format!("Failed to clear device list: {}", e)))?;
.map_err(|e| {
NetworkingError::Protocol(format!("Failed to clear device list: {}", e))
})?;
Ok(())
}
@@ -413,5 +411,4 @@ mod tests {
session_keys.shared_secret
);
}
}

View File

@@ -497,10 +497,19 @@ impl SyncProtocolHandler {
let log_handler = backfill_manager.log_handler();
let response = log_handler
.handle_event_log_request(requesting_device, since, event_types, correlation_id, limit)
.handle_event_log_request(
requesting_device,
since,
event_types,
correlation_id,
limit,
)
.await
.map_err(|e| {
NetworkingError::Protocol(format!("Failed to handle event log request: {}", e))
NetworkingError::Protocol(format!(
"Failed to handle event log request: {}",
e
))
})?;
Ok(Some(response))
@@ -725,11 +734,8 @@ mod tests {
KeyManager::new_with_fallback(temp_dir.path().to_path_buf(), Some(device_key_fallback))
.unwrap(),
);
let device_manager = Arc::new(DeviceManager::init(
temp_dir.path(),
key_manager.clone(),
None,
).unwrap());
let device_manager =
Arc::new(DeviceManager::init(temp_dir.path(), key_manager.clone(), None).unwrap());
let logger = Arc::new(crate::service::network::utils::SilentLogger);
let registry = DeviceRegistry::new(device_manager, key_manager, logger);
let device_registry = Arc::new(tokio::sync::RwLock::new(registry));

View File

@@ -126,7 +126,10 @@ impl BackfillManager {
let event = SyncEventLog::new(
self.device_id,
SyncEventType::BackfillSessionStarted,
format!("Backfill session started with {} available peers", available_peers.len()),
format!(
"Backfill session started with {} available peers",
available_peers.len()
),
)
.with_correlation_id(session_id)
.with_details(json!({
@@ -175,7 +178,11 @@ impl BackfillManager {
let event = SyncEventLog::new(
self.device_id,
SyncEventType::BackfillSessionStarted,
format!("Selected peer {} from {} candidates", selected_peer, available_peers.len()),
format!(
"Selected peer {} from {} candidates",
selected_peer,
available_peers.len()
),
)
.with_correlation_id(session_id)
.with_peer(selected_peer)
@@ -1077,7 +1084,11 @@ impl BackfillManager {
// Feed batch aggregator for event logging
self.batch_aggregator
.add_records("shared_resources".to_string(), batch_size as u64, Some(peer))
.add_records(
"shared_resources".to_string(),
batch_size as u64,
Some(peer),
)
.await;
// Log progress every 10,000 records for large backfills

View File

@@ -201,11 +201,8 @@ impl SyncService {
);
// Create protocol handlers
let mut log_handler = LogSyncHandler::new(
library_id,
library.db().clone(),
peer_sync.clone(),
);
let mut log_handler =
LogSyncHandler::new(library_id, library.db().clone(), peer_sync.clone());
log_handler.set_event_logger(event_logger.clone());
let log_handler = Arc::new(log_handler);

View File

@@ -2669,7 +2669,10 @@ impl PeerSync {
let event = SyncEventLog::new(
self.device_id,
SyncEventType::SyncError,
format!("Buffer overflow: {} updates dropped during backfill", dropped_count),
format!(
"Buffer overflow: {} updates dropped during backfill",
dropped_count
),
)
.with_severity(EventSeverity::Error)
.with_details(json!({

View File

@@ -173,7 +173,9 @@ impl BufferQueue {
warn!(
current_size = queue.len(),
max_size = self.max_size,
total_dropped = self.dropped_count.load(std::sync::atomic::Ordering::Relaxed),
total_dropped = self
.dropped_count
.load(std::sync::atomic::Ordering::Relaxed),
"Buffer queue at capacity, dropping new update"
);
return;

View File

@@ -139,6 +139,8 @@ pub struct LocationWatcher {
context: Arc<CoreContext>,
/// Currently watched locations
watched_locations: Arc<RwLock<HashMap<Uuid, WatchedLocation>>>,
/// Ephemeral watches (shallow, non-recursive) keyed by path
ephemeral_watches: Arc<RwLock<HashMap<PathBuf, EphemeralWatch>>>,
/// File system watcher
watcher: Arc<RwLock<Option<RecommendedWatcher>>>,
/// Whether the service is running
@@ -170,6 +172,15 @@ pub struct WatchedLocation {
pub rule_toggles: crate::ops::indexing::rules::RuleToggles,
}
/// Information about an ephemeral watch (shallow, non-recursive)
#[derive(Debug, Clone)]
pub struct EphemeralWatch {
/// Path being watched
pub path: PathBuf,
/// Indexing rule toggles for filtering events
pub rule_toggles: crate::ops::indexing::rules::RuleToggles,
}
impl LocationWatcher {
/// Create a new location watcher
pub fn new(
@@ -184,6 +195,7 @@ impl LocationWatcher {
events,
context,
watched_locations: Arc::new(RwLock::new(HashMap::new())),
ephemeral_watches: Arc::new(RwLock::new(HashMap::new())),
watcher: Arc::new(RwLock::new(None)),
is_running: Arc::new(RwLock::new(false)),
platform_handler,
@@ -508,6 +520,135 @@ impl LocationWatcher {
.collect()
}
// ========================================================================
// Ephemeral Watch Support (shallow, non-recursive)
// ========================================================================
/// Add an ephemeral watch for a directory (shallow, immediate children only).
///
/// Unlike location watches which are recursive, ephemeral watches only monitor
/// immediate children of the watched directory. This is appropriate for ephemeral
/// browsing where only the current directory's contents are indexed.
///
/// The path should already be indexed in the ephemeral cache before calling this.
pub async fn add_ephemeral_watch(
&self,
path: PathBuf,
rule_toggles: crate::ops::indexing::rules::RuleToggles,
) -> Result<()> {
// Check if path is valid
if !path.exists() {
return Err(anyhow::anyhow!(
"Cannot watch non-existent path: {}",
path.display()
));
}
if !path.is_dir() {
return Err(anyhow::anyhow!(
"Cannot watch non-directory path: {}",
path.display()
));
}
// Check if already watching
{
let watches = self.ephemeral_watches.read().await;
if watches.contains_key(&path) {
debug!("Already watching ephemeral path: {}", path.display());
return Ok(());
}
}
// Register in ephemeral cache
self.context
.ephemeral_cache()
.register_for_watching(path.clone());
// Add to our tracking
{
let mut watches = self.ephemeral_watches.write().await;
watches.insert(
path.clone(),
EphemeralWatch {
path: path.clone(),
rule_toggles,
},
);
}
// Add to file system watcher with NonRecursive mode
if *self.is_running.read().await {
if let Some(watcher) = self.watcher.write().await.as_mut() {
watcher.watch(&path, RecursiveMode::NonRecursive)?;
info!("Started shallow ephemeral watch for: {}", path.display());
}
}
Ok(())
}
/// Remove an ephemeral watch
pub async fn remove_ephemeral_watch(&self, path: &Path) -> Result<()> {
let watch = {
let mut watches = self.ephemeral_watches.write().await;
watches.remove(path)
};
if let Some(watch) = watch {
// Unregister from ephemeral cache
self.context
.ephemeral_cache()
.unregister_from_watching(&watch.path);
// Remove from file system watcher
if *self.is_running.read().await {
if let Some(watcher) = self.watcher.write().await.as_mut() {
if let Err(e) = watcher.unwatch(&watch.path) {
warn!(
"Failed to unwatch ephemeral path {}: {}",
watch.path.display(),
e
);
} else {
info!("Stopped ephemeral watch for: {}", watch.path.display());
}
}
}
}
Ok(())
}
/// Get all ephemeral watches
pub async fn get_ephemeral_watches(&self) -> Vec<EphemeralWatch> {
self.ephemeral_watches
.read()
.await
.values()
.cloned()
.collect()
}
/// Check if a path has an ephemeral watch
pub async fn has_ephemeral_watch(&self, path: &Path) -> bool {
self.ephemeral_watches.read().await.contains_key(path)
}
/// Find the ephemeral watch that covers a given path (if any).
///
/// For shallow watches, only returns a match if the path is an immediate
/// child of a watched directory.
pub async fn find_ephemeral_watch_for_path(&self, path: &Path) -> Option<EphemeralWatch> {
let watches = self.ephemeral_watches.read().await;
// Get the parent directory of the event path
let parent = path.parent()?;
// Check if the parent is being watched
watches.get(parent).cloned()
}
/// Load existing locations from the database and add them to the watcher
async fn load_existing_locations(&self) -> Result<()> {
info!("Loading existing locations from database...");
@@ -674,11 +815,13 @@ impl LocationWatcher {
async fn start_event_loop(&self) -> Result<()> {
let platform_handler = self.platform_handler.clone();
let watched_locations = self.watched_locations.clone();
let ephemeral_watches = self.ephemeral_watches.clone();
let workers = self.workers.clone();
let is_running = self.is_running.clone();
let debug_mode = self.config.debug_mode;
let metrics = self.metrics.clone();
let events = self.events.clone();
let context = self.context.clone();
let (tx, mut rx) = mpsc::channel(self.config.event_buffer_size);
let tx_clone = tx.clone();
@@ -731,6 +874,17 @@ impl LocationWatcher {
}
drop(locations);
// Watch all ephemeral paths (non-recursive/shallow)
let ephemeral = ephemeral_watches.read().await;
for watch in ephemeral.values() {
watcher.watch(&watch.path, RecursiveMode::NonRecursive)?;
info!(
"Started shallow ephemeral watch for: {}",
watch.path.display()
);
}
drop(ephemeral);
// Store watcher
*self.watcher.write().await = Some(watcher);
@@ -762,6 +916,46 @@ impl LocationWatcher {
FsRawEventKind::Rename { from, .. } => Some(from.as_path()),
};
// First, check if this is an ephemeral watch event
// For shallow watches, only process if path is immediate child
let mut handled_by_ephemeral = false;
if let Some(event_path) = event_path {
let parent = event_path.parent();
if let Some(parent_path) = parent {
let ephemeral = ephemeral_watches.read().await;
if let Some(watch) = ephemeral.get(parent_path) {
debug!(
"Ephemeral watch match for {}: parent {} is watched",
event_path.display(),
parent_path.display()
);
handled_by_ephemeral = true;
// Process via ephemeral handler
let ctx = context.clone();
let root = watch.path.clone();
let toggles = watch.rule_toggles;
let event_kind = kind.clone();
tokio::spawn(async move {
if let Err(e) = crate::ops::indexing::ephemeral::responder::apply(
&ctx,
&root,
event_kind,
toggles,
).await {
warn!("Failed to process ephemeral event: {}", e);
}
});
}
}
}
// Skip location matching if handled by ephemeral
if handled_by_ephemeral {
continue;
}
// Find the location for this event by matching path prefix
// CRITICAL: Must match by path, not just library_id, to avoid routing
// events to the wrong location when multiple locations exist in one library
@@ -995,6 +1189,7 @@ impl LocationWatcher {
events: events.clone(),
context: context.clone(),
watched_locations: watched_locations.clone(),
ephemeral_watches: Arc::new(RwLock::new(HashMap::new())),
watcher: watcher_ref.clone(),
is_running: is_running.clone(),
platform_handler: platform_handler.clone(),
@@ -1033,6 +1228,7 @@ impl LocationWatcher {
events: events.clone(),
context: context.clone(),
watched_locations: watched_locations.clone(),
ephemeral_watches: Arc::new(RwLock::new(HashMap::new())),
watcher: watcher_ref.clone(),
is_running: is_running.clone(),
platform_handler: platform_handler.clone(),

View File

@@ -319,4 +319,4 @@ mod tests {
assert!(backend.exists(test_path).await.unwrap());
}
}
}

View File

@@ -13,32 +13,32 @@ use tracing::{debug, warn};
pub struct NtfsHandler;
impl NtfsHandler {
pub fn new() -> Self {
Self
}
pub fn new() -> Self {
Self
}
/// Check if two paths are on the same NTFS volume
pub async fn same_physical_storage(&self, path1: &Path, path2: &Path) -> bool {
// Check if both paths are on the same NTFS volume
if let (Ok(vol1), Ok(vol2)) = (
self.get_volume_info(path1).await,
self.get_volume_info(path2).await,
) {
// Same volume GUID = same physical storage
return vol1.volume_guid == vol2.volume_guid;
}
/// Check if two paths are on the same NTFS volume
pub async fn same_physical_storage(&self, path1: &Path, path2: &Path) -> bool {
// Check if both paths are on the same NTFS volume
if let (Ok(vol1), Ok(vol2)) = (
self.get_volume_info(path1).await,
self.get_volume_info(path2).await,
) {
// Same volume GUID = same physical storage
return vol1.volume_guid == vol2.volume_guid;
}
false
}
false
}
/// Get NTFS volume information for a path
async fn get_volume_info(&self, path: &Path) -> VolumeResult<NtfsVolumeInfo> {
let path = path.to_path_buf();
/// Get NTFS volume information for a path
async fn get_volume_info(&self, path: &Path) -> VolumeResult<NtfsVolumeInfo> {
let path = path.to_path_buf();
task::spawn_blocking(move || {
// Use PowerShell to get volume information
let script = format!(
r#"
task::spawn_blocking(move || {
// Use PowerShell to get volume information
let script = format!(
r#"
$volume = Get-Volume -FilePath '{}'
$partition = Get-Partition -DriveLetter $volume.DriveLetter
$disk = Get-Disk -Number $partition.DiskNumber
@@ -55,66 +55,66 @@ impl NtfsHandler {
MediaType = $disk.MediaType
}} | ConvertTo-Json
"#,
path.display()
);
path.display()
);
let output = std::process::Command::new("powershell")
.args(["-Command", &script])
.output()
.map_err(|e| {
crate::volume::error::VolumeError::platform(format!(
"Failed to run PowerShell: {}",
e
))
})?;
let output = std::process::Command::new("powershell")
.args(["-Command", &script])
.output()
.map_err(|e| {
crate::volume::error::VolumeError::platform(format!(
"Failed to run PowerShell: {}",
e
))
})?;
if !output.status.success() {
return Err(crate::volume::error::VolumeError::platform(
"PowerShell command failed".to_string(),
));
}
if !output.status.success() {
return Err(crate::volume::error::VolumeError::platform(
"PowerShell command failed".to_string(),
));
}
let output_text = String::from_utf8_lossy(&output.stdout);
parse_volume_info(&output_text)
})
.await
.map_err(|e| {
crate::volume::error::VolumeError::platform(format!("Task join error: {}", e))
})?
}
let output_text = String::from_utf8_lossy(&output.stdout);
parse_volume_info(&output_text)
})
.await
.map_err(|e| {
crate::volume::error::VolumeError::platform(format!("Task join error: {}", e))
})?
}
/// Check if NTFS hardlinks are supported (they always are on NTFS)
pub async fn supports_hardlinks(&self, path: &Path) -> bool {
// NTFS always supports hardlinks
if let Ok(vol_info) = self.get_volume_info(path).await {
return vol_info.file_system == "NTFS";
}
false
}
/// Check if NTFS hardlinks are supported (they always are on NTFS)
pub async fn supports_hardlinks(&self, path: &Path) -> bool {
// NTFS always supports hardlinks
if let Ok(vol_info) = self.get_volume_info(path).await {
return vol_info.file_system == "NTFS";
}
false
}
/// Check if NTFS junction points are supported
pub async fn supports_junctions(&self, path: &Path) -> bool {
// NTFS supports junction points (directory symbolic links)
if let Ok(vol_info) = self.get_volume_info(path).await {
return vol_info.file_system == "NTFS";
}
false
}
/// Check if NTFS junction points are supported
pub async fn supports_junctions(&self, path: &Path) -> bool {
// NTFS supports junction points (directory symbolic links)
if let Ok(vol_info) = self.get_volume_info(path).await {
return vol_info.file_system == "NTFS";
}
false
}
/// Resolve junction points and symbolic links
pub async fn resolve_ntfs_path(&self, path: &Path) -> PathBuf {
let path = path.to_path_buf();
// Clone the path so we have an owned copy to move into the closure
// while keeping the original 'path' available for the fallback (unwrap_or)
let path_clone = path.clone();
/// Resolve junction points and symbolic links
pub async fn resolve_ntfs_path(&self, path: &Path) -> PathBuf {
let path = path.to_path_buf();
// Clone the path so we have an owned copy to move into the closure
// while keeping the original 'path' available for the fallback (unwrap_or)
let path_clone = path.clone();
let result = task::spawn_blocking(move || {
// Use the cloned path inside the closure
let path = path_clone;
// Use PowerShell to resolve the path
let script = format!(
r#"
let result = task::spawn_blocking(move || {
// Use the cloned path inside the closure
let path = path_clone;
// Use PowerShell to resolve the path
let script = format!(
r#"
try {{
$resolvedPath = Resolve-Path -Path '{}' -ErrorAction Stop
Write-Output $resolvedPath.Path
@@ -122,40 +122,40 @@ impl NtfsHandler {
Write-Output '{}'
}}
"#,
path.display(),
path.display()
);
path.display(),
path.display()
);
let output = std::process::Command::new("powershell")
.args(["-Command", &script])
.output();
let output = std::process::Command::new("powershell")
.args(["-Command", &script])
.output();
match output {
Ok(output) if output.status.success() => {
let resolved = String::from_utf8_lossy(&output.stdout).trim().to_string();
if !resolved.is_empty() {
PathBuf::from(resolved)
} else {
path
}
}
_ => path,
}
})
.await;
match output {
Ok(output) if output.status.success() => {
let resolved = String::from_utf8_lossy(&output.stdout).trim().to_string();
if !resolved.is_empty() {
PathBuf::from(resolved)
} else {
path
}
}
_ => path,
}
})
.await;
// If the task fails (e.g. panic), return the original path
result.unwrap_or(path)
}
// If the task fails (e.g. panic), return the original path
result.unwrap_or(path)
}
/// Get NTFS file system features
pub async fn get_ntfs_features(&self, path: &Path) -> VolumeResult<NtfsFeatures> {
let path = path.to_path_buf();
/// Get NTFS file system features
pub async fn get_ntfs_features(&self, path: &Path) -> VolumeResult<NtfsFeatures> {
let path = path.to_path_buf();
task::spawn_blocking(move || {
// Use fsutil to get NTFS features
let script = format!(
r#"
task::spawn_blocking(move || {
// Use fsutil to get NTFS features
let script = format!(
r#"
$driveLetter = Split-Path -Path '{}' -Qualifier
$features = @{{}}
@@ -183,237 +183,237 @@ impl NtfsHandler {
$features | ConvertTo-Json
"#,
path.display()
);
path.display()
);
let output = std::process::Command::new("powershell")
.args(["-Command", &script])
.output()
.map_err(|e| {
crate::volume::error::VolumeError::platform(format!(
"Failed to run PowerShell: {}",
e
))
})?;
let output = std::process::Command::new("powershell")
.args(["-Command", &script])
.output()
.map_err(|e| {
crate::volume::error::VolumeError::platform(format!(
"Failed to run PowerShell: {}",
e
))
})?;
if !output.status.success() {
// Return default NTFS features
return Ok(NtfsFeatures {
supports_hardlinks: true,
supports_junctions: true,
supports_symlinks: true,
supports_streams: true,
supports_compression: true,
supports_encryption: true,
});
}
if !output.status.success() {
// Return default NTFS features
return Ok(NtfsFeatures {
supports_hardlinks: true,
supports_junctions: true,
supports_symlinks: true,
supports_streams: true,
supports_compression: true,
supports_encryption: true,
});
}
let output_text = String::from_utf8_lossy(&output.stdout);
parse_ntfs_features(&output_text)
})
.await
.map_err(|e| {
crate::volume::error::VolumeError::platform(format!("Task join error: {}", e))
})?
}
let output_text = String::from_utf8_lossy(&output.stdout);
parse_ntfs_features(&output_text)
})
.await
.map_err(|e| {
crate::volume::error::VolumeError::platform(format!("Task join error: {}", e))
})?
}
}
#[async_trait]
impl super::FilesystemHandler for NtfsHandler {
async fn enhance_volume(&self, volume: &mut Volume) -> VolumeResult<()> {
// Add NTFS-specific information like feature support
if let Some(mount_point) = volume.mount_point.to_str() {
if let Ok(features) = self.get_ntfs_features(Path::new(mount_point)).await {
debug!("Enhanced NTFS volume with features: {:?}", features);
// Could store NTFS features in volume metadata
}
}
Ok(())
}
async fn enhance_volume(&self, volume: &mut Volume) -> VolumeResult<()> {
// Add NTFS-specific information like feature support
if let Some(mount_point) = volume.mount_point.to_str() {
if let Ok(features) = self.get_ntfs_features(Path::new(mount_point)).await {
debug!("Enhanced NTFS volume with features: {:?}", features);
// Could store NTFS features in volume metadata
}
}
Ok(())
}
async fn same_physical_storage(&self, path1: &Path, path2: &Path) -> bool {
self.same_physical_storage(path1, path2).await
}
async fn same_physical_storage(&self, path1: &Path, path2: &Path) -> bool {
self.same_physical_storage(path1, path2).await
}
fn get_copy_strategy(&self) -> Box<dyn crate::ops::files::copy::strategy::CopyStrategy> {
// Use streaming copy for NTFS (no built-in CoW like APFS/ReFS)
// Could potentially use hardlinks for same-volume copies
Box::new(crate::ops::files::copy::strategy::LocalStreamCopyStrategy)
}
fn get_copy_strategy(&self) -> Box<dyn crate::ops::files::copy::strategy::CopyStrategy> {
// Use streaming copy for NTFS (no built-in CoW like APFS/ReFS)
// Could potentially use hardlinks for same-volume copies
Box::new(crate::ops::files::copy::strategy::LocalStreamCopyStrategy)
}
fn contains_path(&self, volume: &Volume, path: &std::path::Path) -> bool {
// Check primary mount point
if path.starts_with(&volume.mount_point) {
return true;
}
fn contains_path(&self, volume: &Volume, path: &std::path::Path) -> bool {
// Check primary mount point
if path.starts_with(&volume.mount_point) {
return true;
}
// Check additional mount points
if volume.mount_points.iter().any(|mp| path.starts_with(mp)) {
return true;
}
// Check additional mount points
if volume.mount_points.iter().any(|mp| path.starts_with(mp)) {
return true;
}
// TODO: NTFS-specific logic for junction points and mount points
// Windows can have volumes mounted as folders (mount points) within other volumes
// NTFS also supports junction points and symbolic links that may need resolution
// TODO: NTFS-specific logic for junction points and mount points
// Windows can have volumes mounted as folders (mount points) within other volumes
// NTFS also supports junction points and symbolic links that may need resolution
false
}
false
}
}
/// NTFS volume information
#[derive(Debug, Clone)]
pub struct NtfsVolumeInfo {
pub volume_guid: String,
pub file_system: String,
pub drive_letter: Option<char>,
pub label: Option<String>,
pub size_bytes: u64,
pub available_bytes: u64,
pub disk_number: Option<u32>,
pub partition_number: Option<u32>,
pub media_type: Option<String>,
pub volume_guid: String,
pub file_system: String,
pub drive_letter: Option<char>,
pub label: Option<String>,
pub size_bytes: u64,
pub available_bytes: u64,
pub disk_number: Option<u32>,
pub partition_number: Option<u32>,
pub media_type: Option<String>,
}
/// NTFS filesystem features
#[derive(Debug, Clone)]
pub struct NtfsFeatures {
pub supports_hardlinks: bool,
pub supports_junctions: bool,
pub supports_symlinks: bool,
pub supports_streams: bool,
pub supports_compression: bool,
pub supports_encryption: bool,
pub supports_hardlinks: bool,
pub supports_junctions: bool,
pub supports_symlinks: bool,
pub supports_streams: bool,
pub supports_compression: bool,
pub supports_encryption: bool,
}
/// Parse PowerShell volume info JSON output
fn parse_volume_info(json_output: &str) -> VolumeResult<NtfsVolumeInfo> {
// Simple JSON parsing - in production, you'd use serde_json
let json_output = json_output.trim();
// Simple JSON parsing - in production, you'd use serde_json
let json_output = json_output.trim();
let volume_guid = extract_json_string(json_output, "VolumeGuid").unwrap_or_default();
let file_system = extract_json_string(json_output, "FileSystem").unwrap_or_default();
let drive_letter_str = extract_json_string(json_output, "DriveLetter");
let label = extract_json_string(json_output, "Label");
let size_bytes = extract_json_number(json_output, "Size").unwrap_or(0);
let available_bytes = extract_json_number(json_output, "SizeRemaining").unwrap_or(0);
let disk_number = extract_json_number(json_output, "DiskNumber").map(|n| n as u32);
let partition_number = extract_json_number(json_output, "PartitionNumber").map(|n| n as u32);
let media_type = extract_json_string(json_output, "MediaType");
let volume_guid = extract_json_string(json_output, "VolumeGuid").unwrap_or_default();
let file_system = extract_json_string(json_output, "FileSystem").unwrap_or_default();
let drive_letter_str = extract_json_string(json_output, "DriveLetter");
let label = extract_json_string(json_output, "Label");
let size_bytes = extract_json_number(json_output, "Size").unwrap_or(0);
let available_bytes = extract_json_number(json_output, "SizeRemaining").unwrap_or(0);
let disk_number = extract_json_number(json_output, "DiskNumber").map(|n| n as u32);
let partition_number = extract_json_number(json_output, "PartitionNumber").map(|n| n as u32);
let media_type = extract_json_string(json_output, "MediaType");
let drive_letter = drive_letter_str.and_then(|s| s.chars().next());
let drive_letter = drive_letter_str.and_then(|s| s.chars().next());
Ok(NtfsVolumeInfo {
volume_guid,
file_system,
drive_letter,
label,
size_bytes,
available_bytes,
disk_number,
partition_number,
media_type,
})
Ok(NtfsVolumeInfo {
volume_guid,
file_system,
drive_letter,
label,
size_bytes,
available_bytes,
disk_number,
partition_number,
media_type,
})
}
/// Parse NTFS features JSON output
fn parse_ntfs_features(json_output: &str) -> VolumeResult<NtfsFeatures> {
// Simple parsing - in production, use proper JSON parser
let json_output = json_output.trim();
// Simple parsing - in production, use proper JSON parser
let json_output = json_output.trim();
let supports_compression =
extract_json_bool(json_output, "SupportsCompression").unwrap_or(true);
let supports_encryption = extract_json_bool(json_output, "SupportsEncryption").unwrap_or(true);
let supports_compression =
extract_json_bool(json_output, "SupportsCompression").unwrap_or(true);
let supports_encryption = extract_json_bool(json_output, "SupportsEncryption").unwrap_or(true);
Ok(NtfsFeatures {
supports_hardlinks: true, // NTFS always supports these
supports_junctions: true,
supports_symlinks: true,
supports_streams: true,
supports_compression,
supports_encryption,
})
Ok(NtfsFeatures {
supports_hardlinks: true, // NTFS always supports these
supports_junctions: true,
supports_symlinks: true,
supports_streams: true,
supports_compression,
supports_encryption,
})
}
/// Extract string value from JSON (simple implementation)
fn extract_json_string(json: &str, key: &str) -> Option<String> {
let pattern = format!("\"{}\":", key);
if let Some(start) = json.find(&pattern) {
let start = start + pattern.len();
if let Some(value_start) = json[start..].find('"') {
let value_start = start + value_start + 1;
if let Some(value_end) = json[value_start..].find('"') {
let value = &json[value_start..value_start + value_end];
if value != "null" && !value.is_empty() {
return Some(value.to_string());
}
}
}
}
None
let pattern = format!("\"{}\":", key);
if let Some(start) = json.find(&pattern) {
let start = start + pattern.len();
if let Some(value_start) = json[start..].find('"') {
let value_start = start + value_start + 1;
if let Some(value_end) = json[value_start..].find('"') {
let value = &json[value_start..value_start + value_end];
if value != "null" && !value.is_empty() {
return Some(value.to_string());
}
}
}
}
None
}
/// Extract number value from JSON (simple implementation)
fn extract_json_number(json: &str, key: &str) -> Option<u64> {
let pattern = format!("\"{}\":", key);
if let Some(start) = json.find(&pattern) {
let start = start + pattern.len();
let remaining = json[start..].trim_start();
if let Some(end) = remaining.find(|c: char| !c.is_ascii_digit()) {
let number_str = &remaining[..end];
return number_str.parse().ok();
}
}
None
let pattern = format!("\"{}\":", key);
if let Some(start) = json.find(&pattern) {
let start = start + pattern.len();
let remaining = json[start..].trim_start();
if let Some(end) = remaining.find(|c: char| !c.is_ascii_digit()) {
let number_str = &remaining[..end];
return number_str.parse().ok();
}
}
None
}
/// Extract boolean value from JSON (simple implementation)
fn extract_json_bool(json: &str, key: &str) -> Option<bool> {
let pattern = format!("\"{}\":", key);
if let Some(start) = json.find(&pattern) {
let start = start + pattern.len();
let remaining = json[start..].trim_start();
if remaining.starts_with("true") {
return Some(true);
} else if remaining.starts_with("false") {
return Some(false);
}
}
None
let pattern = format!("\"{}\":", key);
if let Some(start) = json.find(&pattern) {
let start = start + pattern.len();
let remaining = json[start..].trim_start();
if remaining.starts_with("true") {
return Some(true);
} else if remaining.starts_with("false") {
return Some(false);
}
}
None
}
/// Enhance volume with NTFS-specific information from Windows
pub async fn enhance_volume_from_windows(volume: &mut Volume) -> VolumeResult<()> {
// FIX: Import the trait from the correct module
use crate::volume::fs::FilesystemHandler;
// FIX: Import the trait from the correct module
use crate::volume::fs::FilesystemHandler;
let handler = NtfsHandler::new();
handler.enhance_volume(volume).await
let handler = NtfsHandler::new();
handler.enhance_volume(volume).await
}
#[cfg(test)]
mod tests {
use super::*;
use super::*;
#[test]
fn test_extract_json_string() {
let json =
r#"{"VolumeGuid": "12345678-1234-1234-1234-123456789abc", "FileSystem": "NTFS"}"#;
assert_eq!(
extract_json_string(json, "VolumeGuid"),
Some("12345678-1234-1234-1234-123456789abc".to_string())
);
assert_eq!(
extract_json_string(json, "FileSystem"),
Some("NTFS".to_string())
);
assert_eq!(extract_json_string(json, "NonExistent"), None);
}
#[test]
fn test_extract_json_string() {
let json =
r#"{"VolumeGuid": "12345678-1234-1234-1234-123456789abc", "FileSystem": "NTFS"}"#;
assert_eq!(
extract_json_string(json, "VolumeGuid"),
Some("12345678-1234-1234-1234-123456789abc".to_string())
);
assert_eq!(
extract_json_string(json, "FileSystem"),
Some("NTFS".to_string())
);
assert_eq!(extract_json_string(json, "NonExistent"), None);
}
#[test]
fn test_extract_json_bool() {
let json = r#"{"SupportsCompression": true, "SupportsEncryption": false}"#;
assert_eq!(extract_json_bool(json, "SupportsCompression"), Some(true));
assert_eq!(extract_json_bool(json, "SupportsEncryption"), Some(false));
assert_eq!(extract_json_bool(json, "NonExistent"), None);
}
}
#[test]
fn test_extract_json_bool() {
let json = r#"{"SupportsCompression": true, "SupportsEncryption": false}"#;
assert_eq!(extract_json_bool(json, "SupportsCompression"), Some(true));
assert_eq!(extract_json_bool(json, "SupportsEncryption"), Some(false));
assert_eq!(extract_json_bool(json, "NonExistent"), None);
}
}

View File

@@ -312,11 +312,11 @@ fn extract_json_number(json: &str, key: &str) -> Option<u64> {
/// Enhance volume with ReFS-specific information from Windows
pub async fn enhance_volume_from_windows(volume: &mut Volume) -> VolumeResult<()> {
// Import the trait from the parent module so the enhance_volume method is available
use super::FilesystemHandler;
// Import the trait from the parent module so the enhance_volume method is available
use super::FilesystemHandler;
let handler = RefsHandler::new();
handler.enhance_volume(volume).await
let handler = RefsHandler::new();
handler.enhance_volume(volume).await
}
#[cfg(test)]

View File

@@ -167,10 +167,10 @@ impl VolumeManager {
// Try to load credentials and recreate the backend
let credential_manager = CloudCredentialManager::new(
key_manager.clone(),
library.db().clone(),
library.id(),
);
key_manager.clone(),
library.db().clone(),
library.id(),
);
match credential_manager
.get_credential(library.id(), &db_volume.fingerprint)

View File

@@ -238,10 +238,10 @@ pub fn should_include_volume(volume: &Volume, config: &VolumeDetectionConfig) ->
return false;
}
// FIX: Use parentheses to call the method
// FIX: Use parentheses to call the method
if !config.include_virtual && volume.total_bytes_capacity() == 0 {
return false;
}
true
}
}

View File

@@ -173,15 +173,16 @@ async fn test_location_indexing() -> Result<(), Box<dyn std::error::Error>> {
// 8. Verify indexed entries in database
// Helper to get all entry IDs under the location
let get_location_entry_ids = || async {
let descendant_ids = entry_closure::Entity::find()
.filter(entry_closure::Column::AncestorId.eq(location_entry_id))
let location_id = location_entry_id.expect("Location should have entry_id");
let descendant_ids: Vec<i32> = entry_closure::Entity::find()
.filter(entry_closure::Column::AncestorId.eq(location_id))
.all(db.conn())
.await?
.into_iter()
.map(|ec| ec.descendant_id)
.collect::<Vec<i32>>();
.collect();
let mut all_ids = vec![location_entry_id];
let mut all_ids = vec![location_id];
all_ids.extend(descendant_ids);
Ok::<Vec<i32>, anyhow::Error>(all_ids)
};
@@ -337,15 +338,16 @@ async fn test_incremental_indexing() -> Result<(), Box<dyn std::error::Error>> {
}
// Get all entry IDs under this location
let descendant_ids = entry_closure::Entity::find()
.filter(entry_closure::Column::AncestorId.eq(location_entry_id))
let location_id = location_entry_id.expect("Location should have entry_id");
let descendant_ids: Vec<i32> = entry_closure::Entity::find()
.filter(entry_closure::Column::AncestorId.eq(location_id))
.all(db.conn())
.await?
.into_iter()
.map(|ec| ec.descendant_id)
.collect::<Vec<i32>>();
.collect();
let mut all_entry_ids = vec![location_entry_id];
let mut all_entry_ids = vec![location_id];
all_entry_ids.extend(descendant_ids);
let initial_file_count = entities::entry::Entity::find()

View File

@@ -77,7 +77,10 @@ impl EventLogTestHarness {
// Initialize sync service
library_alice
.init_sync_service(device_alice_id, transport_alice.clone() as Arc<dyn NetworkTransport>)
.init_sync_service(
device_alice_id,
transport_alice.clone() as Arc<dyn NetworkTransport>,
)
.await?;
// Start sync service
@@ -105,7 +108,8 @@ impl EventLogTestHarness {
let stmt = Statement::from_string(
DatabaseBackend::Sqlite,
"SELECT event_type, summary, correlation_id FROM sync_event_log ORDER BY timestamp".to_string(),
"SELECT event_type, summary, correlation_id FROM sync_event_log ORDER BY timestamp"
.to_string(),
);
let rows = event_logger.conn().query_all(stmt).await?;
@@ -195,10 +199,7 @@ async fn test_backfill_session_correlation() -> anyhow::Result<()> {
let events = harness.query_events_api(query).await?;
tracing::info!(
event_count = events.len(),
"Events retrieved via query API"
);
tracing::info!(event_count = events.len(), "Events retrieved via query API");
// Verify query API works (even if no events yet)
assert!(
@@ -240,8 +241,8 @@ async fn test_event_query_filtering() -> anyhow::Result<()> {
tokio::time::sleep(Duration::from_millis(200)).await;
// Test filtering by event type
let query = SyncEventQuery::new(library_id)
.with_event_types(vec![SyncEventType::StateTransition]);
let query =
SyncEventQuery::new(library_id).with_event_types(vec![SyncEventType::StateTransition]);
let events = harness.query_events_api(query).await?;
@@ -260,8 +261,8 @@ async fn test_event_query_filtering() -> anyhow::Result<()> {
}
// Test filtering by category
let query_category = SyncEventQuery::new(library_id)
.with_categories(vec![EventCategory::Lifecycle]);
let query_category =
SyncEventQuery::new(library_id).with_categories(vec![EventCategory::Lifecycle]);
let lifecycle_events = harness.query_events_api(query_category).await?;
@@ -353,15 +354,12 @@ async fn test_batch_aggregation() -> anyhow::Result<()> {
// Query batch ingestion events
let library_id = harness.library_alice.id();
let query = SyncEventQuery::new(library_id)
.with_event_types(vec![SyncEventType::BatchIngestion]);
let query =
SyncEventQuery::new(library_id).with_event_types(vec![SyncEventType::BatchIngestion]);
let events = harness.query_events_api(query).await?;
tracing::info!(
batch_events = events.len(),
"Batch ingestion events logged"
);
tracing::info!(batch_events = events.len(), "Batch ingestion events logged");
// Should have one batch event aggregating all the adds
assert!(
@@ -417,9 +415,7 @@ async fn test_buffer_overflow_logging() -> anyhow::Result<()> {
// For testing, we'll simulate by tracking drops manually
// Transition to Ready (this checks for dropped count)
peer_sync
.set_state_for_test(DeviceSyncState::Ready)
.await;
peer_sync.set_state_for_test(DeviceSyncState::Ready).await;
tokio::time::sleep(Duration::from_millis(200)).await;
@@ -431,17 +427,11 @@ async fn test_buffer_overflow_logging() -> anyhow::Result<()> {
let error_events = harness.query_events_api(query).await?;
tracing::info!(
error_count = error_events.len(),
"Error events logged"
);
tracing::info!(error_count = error_events.len(), "Error events logged");
// Note: Buffer overflow only logs if drops actually occurred
// This test verifies the infrastructure exists, even if no drops happened
assert!(
error_events.len() >= 0,
"Error event query should work"
);
assert!(error_events.len() >= 0, "Error event query should work");
Ok(())
}

View File

@@ -3,19 +3,25 @@ title: Indexing
sidebarTitle: Indexing
---
The indexing system discovers and analyzes your files through a sophisticated multi-phase process. Built on Spacedrive's job system, it provides resumable operations, real-time progress tracking, and supports both persistent library indexing and ephemeral browsing of external drives.
The indexing system discovers and analyzes your files through a multi-phase pipeline. Built on Spacedrive's job system, it provides resumable operations, real-time progress tracking, and supports both persistent library indexing and ephemeral browsing of external drives.
## Architecture Overview
The indexing system consists of several key components working together:
The indexing system consists of specialized components working together:
**IndexerJob** orchestrates the entire indexing process as a resumable job. It maintains state across application restarts and provides detailed progress reporting.
**IndexerState** preserves all necessary information to resume indexing from any interruption point. This includes the current phase, directories to process, and accumulated statistics.
**IndexerState** preserves all necessary information to resume indexing from any interruption point. This includes the current phase, directories to process, accumulated statistics, and ephemeral UUID mappings for preserving user metadata across browsing-to-persistent transitions.
**EntryProcessor** handles the complex task of creating and updating database records while maintaining referential integrity through materialized paths.
**DatabaseStorage** provides the low-level database CRUD layer. All database operations (create, update, move, delete) flow through this module for consistency.
**FileTypeRegistry** identifies files through a combination of extensions, magic bytes, and content analysis to provide accurate type detection.
**DatabaseAdapter** implements both `ChangeHandler` (for filesystem watcher events) and `IndexPersistence` (for indexer job batches). Both pipelines use the same code to write entries to the database via `DatabaseStorage`.
**MemoryAdapter** implements both `ChangeHandler` (for filesystem watcher events) and `IndexPersistence` (for indexer job batches). Both pipelines use the same code to write entries to the in-memory `EphemeralIndex`.
This dual-implementation architecture unifies watcher and job pipelines, eliminating code duplication between real-time filesystem monitoring and batch indexing operations.
**FileTypeRegistry** identifies files through extensions, magic bytes, and content analysis.
The system integrates deeply with Spacedrive's job infrastructure, which provides automatic state persistence through MessagePack serialization. When you pause an indexing operation, the entire job state is saved to a dedicated jobs database, allowing seamless resumption even after application restarts.
@@ -24,63 +30,153 @@ The system integrates deeply with Spacedrive's job infrastructure, which provide
architecture ensures no work is lost if interrupted.
</Note>
## Database Architecture
The indexing system uses a closure table for hierarchy management instead of recursive queries:
### Closure Table
Parent-child relationships are stored in the `entry_closure` table with precomputed ancestor-descendant pairs. This makes "find all descendants" queries O(1) regardless of nesting depth, at the cost of additional storage (worst-case N² for deeply nested trees).
```sql
CREATE TABLE entry_closure (
ancestor_id INTEGER,
descendant_id INTEGER,
depth INTEGER
);
```
The closure table stores all transitive relationships. For a file at `/home/user/docs/report.pdf`, entries exist for:
- (home_id, report_id, depth=3)
- (user_id, report_id, depth=2)
- (docs_id, report_id, depth=1)
- (report_id, report_id, depth=0)
Move operations require rebuilding closures for the entire moved subtree, which can affect thousands of rows when moving large directories.
### Directory Paths Cache
The `directory_paths` table provides O(1) absolute path lookups for directories:
```sql
CREATE TABLE directory_paths (
entry_id INTEGER PRIMARY KEY,
path TEXT UNIQUE
);
```
This eliminates recursive parent traversal when building file paths. Each directory stores its complete absolute path, enabling instant resolution for child entries.
### Entries Table
```sql
CREATE TABLE entry (
id INTEGER PRIMARY KEY,
uuid UUID UNIQUE,
parent_id INTEGER,
name TEXT,
extension TEXT,
kind INTEGER,
size BIGINT,
inode BIGINT,
content_id INTEGER,
aggregate_size BIGINT,
child_count INTEGER,
file_count INTEGER
);
```
## Indexing Phases
The indexer operates through four distinct phases, each designed to be interruptible and resumable:
The indexer operates through five distinct phases, each designed to be interruptible and resumable:
### Phase 1: Discovery
The discovery phase walks your filesystem to build a list of all files and directories. This phase is optimized for speed, collecting just enough information to plan the work ahead:
Discovery walks the filesystem using parallel workers with a work-stealing model. On systems with 8+ cores, multiple threads scan directories concurrently, communicating via channels to maximize disk throughput.
```rust
// Discovery maintains a queue of directories to process
pub struct DiscoveryPhase {
dirs_to_walk: VecDeque<PathBuf>,
seen_paths: HashSet<PathBuf>, // Cycle detection
}
```
Discovered entries are filtered through `IndexerRuler`, which applies toggleable system rules (like `NO_HIDDEN`, `NO_DEV_DIRS`) and dynamically loaded `.gitignore` patterns when inside a Git repository.
The phase uses a breadth-first traversal to ensure shallow directories are processed first, providing quicker initial results. Progress is measured by directories discovered versus total estimated.
Progress is measured by directories discovered. Entries are collected into batches of 1,000 items before moving to processing.
### Phase 2: Processing
Processing creates or updates database entries for each discovered item. This is where Spacedrive builds its understanding of your file structure:
Processing converts discovered entries into database records. Entries are sorted by depth (parents before children) to maintain referential integrity during batch insertion.
```rust
// Batch processing for efficiency
const BATCH_SIZE: usize = 1000;
**Change Detection** runs during this phase. The `ChangeDetector` loads existing database entries for the indexing path, then compares against filesystem state to identify:
// Process entries in parent-first order
let sorted_batch = batch.sort_by_depth();
persistence.process_batch(sorted_batch, &mut entry_cache)?;
```
- **New**: Paths not in database
- **Modified**: Size or mtime differs
- **Moved**: Same inode at different path
- **Deleted**: In database but missing from filesystem
The system uses materialized paths instead of parent IDs, making queries faster and eliminating complex recursive lookups. Each entry stores its full path prefix, enabling instant directory listings.
Changes are processed in batch transactions. Each batch inserts closure table rows, updates the directory paths cache, and syncs entries across devices.
**Ephemeral UUID Preservation** happens here. When a browsed folder is promoted to a managed location, UUIDs assigned during ephemeral indexing are preserved (`state.ephemeral_uuids`). This prevents orphaning user metadata like tags and notes attached during browsing sessions.
The processing phase validates that the indexing path stays within location boundaries, preventing catastrophic cross-location deletion if watcher routing bugs send events for the wrong path.
### Phase 3: Aggregation
Aggregation calculates sizes and counts for directories by traversing the tree bottom-up. This phase provides the statistics you see in the UI:
Aggregation walks the entry tree bottom-up, computing directory statistics:
- Total size including subdirectories
- Direct child count
- Recursive file count
- Aggregate content types
- `aggregate_size`: Total bytes including subdirectories
- `child_count`: Direct children only
- `file_count`: Recursive file count
These aggregates are stored in the entry table and enable instant directory size display without traversing descendants.
### Phase 4: Content Identification
The final phase generates content-addressed storage (CAS) identifiers and performs deep file analysis:
Content identification generates BLAKE3 hashes for files, linking entries to `content_identity` records for deduplication.
Content identities use deterministic v5 UUIDs (namespace hash of `content_hash + library_id`) so different devices can independently identify identical files and merge metadata without coordination. This enables offline duplicate detection across library peers.
**Sync Order**: Content identities must be synced before entries to avoid foreign key violations on receiving devices. The job system enforces this ordering.
For new content, file type identification runs via `FileTypeRegistry` to populate `kind_id` and `mime_type_id` fields.
### Phase 5: Finalizing
Finalizing handles post-processing tasks like directory aggregation updates and potential processor dispatch (thumbnail generation for Deep Mode).
## Change Detection System
The indexing system includes both batch and real-time change detection:
### Batch Change Detection
`ChangeDetector` compares database state against filesystem during indexer job scans:
```rust
// Sampled hashing for large files
let cas_id = cas_generator
.generate_cas_id(path, file_size)
.await?;
let mut detector = ChangeDetector::new();
detector.load_existing_entries(ctx, location_id, indexing_path).await?;
// Link to content identity for deduplication
content_processor.link_or_create(entry_id, cas_id)?;
for entry in discovered_entries {
if let Some(change) = detector.check_path(&path, &metadata, inode) {
// Process New, Modified, or Moved change
}
}
let deleted = detector.find_deleted(&seen_paths);
```
This phase enables deduplication, content-based search, and file tracking across renames.
The detector tracks paths by inode to identify moves. On Unix systems, inodes provide stable file identity across renames. Windows falls back to path-only matching since file indices are unstable across reboots.
### Real-Time Change Detection
Both `DatabaseAdapter` and `MemoryAdapter` implement the `ChangeHandler` trait, which defines the interface for responding to filesystem watcher events:
```rust
pub trait ChangeHandler {
async fn find_by_path(&self, path: &Path) -> Result<Option<EntryRef>>;
async fn create(&mut self, metadata: &DirEntry, parent_path: &Path) -> Result<EntryRef>;
async fn update(&mut self, entry: &EntryRef, metadata: &DirEntry) -> Result<()>;
async fn move_entry(&mut self, entry: &EntryRef, old_path: &Path, new_path: &Path) -> Result<()>;
async fn delete(&mut self, entry: &EntryRef) -> Result<()>;
}
```
The watcher routes events to the appropriate handler based on whether the path belongs to a persistent location (`DatabaseAdapter` → database) or ephemeral session (`MemoryAdapter` → memory).
## Indexing Modes and Scopes
@@ -88,29 +184,21 @@ The system provides flexible configuration through modes and scopes:
### Index Modes
**Shallow Mode** extracts only filesystem metadata (name, size, dates). Completes in under 500ms for typical directories. Perfect for responsive UI navigation.
**Shallow Mode** extracts only filesystem metadata (name, size, dates). Completes in under 500ms for typical directories.
**Content Mode** adds cryptographic hashing to identify files by content. Enables deduplication and content tracking. Moderate performance impact.
**Content Mode** adds BLAKE3 hashing to identify files by content. Enables deduplication and content tracking.
**Deep Mode** performs full analysis including thumbnails and media metadata extraction. Best for photo and video libraries.
**Deep Mode** performs full analysis including file type identification and metadata extraction. Triggers thumbnail generation for images and videos.
### Index Scopes
**Current Scope** indexes only the immediate directory contents:
**Current Scope** indexes only immediate directory contents. Used for responsive UI navigation.
```rust
IndexerJobConfig::ui_navigation(location_id, path)
```
**Recursive Scope** indexes the entire directory tree:
```rust
IndexerJobConfig::new(location_id, path, IndexMode::Deep)
```
**Recursive Scope** indexes the entire directory tree. Used for full location indexing.
## Persistence and Ephemeral Indexing
One of Spacedrive's key innovations is supporting both persistent and ephemeral indexing modes.
Spacedrive supports both persistent and ephemeral indexing modes:
### Persistent Indexing
@@ -123,27 +211,63 @@ Persistent indexing stores all data in the database permanently. This is the def
### Ephemeral Indexing
Ephemeral indexing keeps data in memory only, perfect for browsing external drives:
Ephemeral indexing keeps data in memory only, perfect for browsing external drives without permanent storage.
```rust
let config = IndexerJobConfig::ephemeral_browse(
usb_path,
IndexScope::Current
);
```
The ephemeral system uses highly memory-optimized structures:
The ephemeral index uses an LRU cache with automatic cleanup:
**NodeArena**: Slab allocator for `FileNode` entries with pointer-sized entry IDs. Provides contiguous memory layout for cache efficiency.
- No database writes
- Session-based lifetime
- Memory-efficient storage
- Automatic expiration
**NameCache**: Global string interning pool. One copy of "index.js" serves thousands of node_modules files.
**NameRegistry**: BTreeMap for fast name-based lookups without full-text indexing overhead.
Memory usage is around 50 bytes per entry vs 200+ bytes with naive `HashMap<PathBuf, Entry>` approach. This 4-6x reduction enables browsing hundreds of thousands of files without database overhead.
Multiple directory trees can coexist in the same `EphemeralIndex` (browsing both `/mnt/nas` and `/media/usb` simultaneously), sharing the string interning pool for maximum deduplication.
The `EphemeralIndexCache` tracks which paths have been indexed, are currently being indexed, or are registered for filesystem watching. When a watched path receives filesystem events, `EphemeralWriter` updates the in-memory index in real-time.
<Info>
Ephemeral mode lets you explore USB drives or network shares without
permanently adding them to your library.
</Info>
## Indexer Rules
The `IndexerRuler` applies filtering rules during discovery to skip unwanted files:
**System Rules** are toggleable patterns like:
- `NO_HIDDEN`: Skip dotfiles (`.git`, `.DS_Store`)
- `NO_DEV_DIRS`: Skip `node_modules`, `target`, `dist`
- `NO_SYSTEM`: Skip OS folders (`System32`, `Windows`)
**Git Integration**: When indexing inside a Git repository, rules are dynamically loaded from `.gitignore` files. This automatically excludes build artifacts and local configuration.
Rules return a `RulerDecision` (Accept/Reject) for each path during discovery, preventing unwanted entries from ever reaching the processing phase.
## Index Integrity Verification
The `IndexVerifyAction` checks integrity by running a fresh ephemeral scan and comparing metadata against the existing persistent index:
```rust
let verify = IndexVerifyAction::from_input(IndexVerifyInput { path }).await?;
let output = verify.execute(library, context).await?;
// output.report contains:
// - missing_from_index: Files on disk but not in database
// - stale_in_index: Entries in database but missing from filesystem
// - metadata_mismatches: Size, mtime, or inode differences
```
The verification system detects:
- **MissingFromIndex**: Files created outside Spacedrive
- **StaleInIndex**: Deleted files not yet purged from database
- **SizeMismatch**: Files modified externally
- **ModifiedTimeMismatch**: Timestamp drift (with 1-second tolerance)
- **InodeMismatch**: File replacement or filesystem corruption
Verification runs as a library action and returns a detailed `IntegrityReport` with per-file diagnostics.
## Job System Integration
The indexing system leverages Spacedrive's job infrastructure for reliability and monitoring.
@@ -159,8 +283,8 @@ pub struct IndexerState {
dirs_to_walk: VecDeque<PathBuf>,
entry_batches: Vec<Vec<DirEntry>>,
entry_id_cache: HashMap<PathBuf, i32>,
ephemeral_uuids: HashMap<PathBuf, Uuid>,
stats: IndexerStats,
// ... checkpoint data
}
```
@@ -172,25 +296,17 @@ Real-time progress flows through multiple channels:
```rust
pub struct IndexerProgress {
phase: String,
items_done: u64,
total_items: u64,
bytes_per_second: f64,
eta_seconds: Option<u32>,
pub phase: IndexPhase,
pub total_found: IndexerStats,
pub processing_rate: f32,
pub estimated_remaining: Option<Duration>,
}
```
Progress updates are:
- Sent to UI via channels
- Persisted to database
- Available through job queries
- Used for time estimates
Progress updates are sent to the UI via channels, persisted to the database, and available through job queries for time estimates.
### Error Handling
The job system provides structured error handling:
**Non-critical errors** are accumulated but don't stop indexing:
- Permission denied on individual files
@@ -203,47 +319,6 @@ The job system provides structured error handling:
- Filesystem unmounted
- Out of disk space
## Database Schema
The indexer populates several key tables designed for query performance.
### Entries Table
The core table uses materialized paths for efficient queries:
```sql
CREATE TABLE entries (
id INTEGER PRIMARY KEY,
uuid UUID UNIQUE,
location_id INTEGER,
relative_path TEXT, -- Parent path (materialized)
name TEXT, -- Without extension
extension TEXT,
kind INTEGER, -- 0=File, 1=Directory
size BIGINT,
inode BIGINT, -- Change detection
content_id INTEGER
);
-- Key indexes for performance
CREATE INDEX idx_entries_location_path
ON entries(location_id, relative_path);
```
### Content Identities Table
Enables deduplication across your library:
```sql
CREATE TABLE content_identities (
id INTEGER PRIMARY KEY,
cas_id TEXT UNIQUE,
kind_id INTEGER,
total_size BIGINT,
entry_count INTEGER
);
```
## Performance Characteristics
Indexing performance varies by mode and scope:
@@ -259,32 +334,12 @@ Indexing performance varies by mode and scope:
**Batch Processing**: Groups operations into transactions of 1,000 items, reducing database overhead by 30x.
**Parallel I/O**: Content identification runs on multiple threads, saturating disk bandwidth on fast storage.
**Parallel Discovery**: Work-stealing model with atomic counters for directory traversal, using half of available CPU cores by default.
**Smart Caching**: The entry ID cache eliminates redundant parent lookups, critical for deep directory trees.
**Entry ID Cache**: Eliminates redundant parent lookups during hierarchy construction, critical for deep directory trees.
**Checkpoint Strategy**: Checkpoints occur every 5,000 items or 30 seconds, balancing durability with performance.
## Change Detection
The indexer efficiently detects changes without full rescans:
```rust
// Platform-specific change detection
#[cfg(unix)]
let file_id = metadata.ino(); // inode
#[cfg(windows)]
let file_id = get_file_index(path)?; // File index
```
Detection capabilities:
- New files: Appear with unknown inodes
- Modified files: Same inode, different size/mtime
- Moved files: Known inode at new path
- Deleted files: Missing from filesystem walk
## Usage Examples
### Quick UI Navigation
@@ -310,7 +365,7 @@ let job = IndexerJob::new(config);
### Full Library Location
Comprehensive indexing with all features:
Full indexing with content identification:
```rust
let config = IndexerJobConfig::new(
@@ -318,8 +373,6 @@ let config = IndexerJobConfig::new(
path,
IndexMode::Deep
);
config.with_checkpointing(true)
.with_filters(indexer_rules);
```
## CLI Commands
@@ -342,9 +395,9 @@ spacedrive job monitor # Watch progress
### Common Issues
**Slow Indexing**: Check for large node_modules or build directories. Use `.spacedriveignore` files to exclude them.
**Slow Indexing**: Check for large `node_modules` or build directories. System rules automatically skip common patterns, or use `.gitignore` to exclude project-specific artifacts.
**High Memory Usage**: Reduce batch size or avoid ephemeral mode for very large directories.
**High Memory Usage**: Reduce batch size for directories over 1M files. Ephemeral mode uses around 50 bytes per entry, so 100K files requires roughly 5MB.
**Resume Not Working**: Ensure the jobs database isn't corrupted. Check logs for serialization errors.
@@ -364,7 +417,7 @@ spacedrive job info <job-id> --detailed
## Platform Notes
**Windows**: Uses file indices for change detection. Supports long paths transparently. Network drives may require polling.
**Windows**: Uses file indices for change detection where available, falling back to path-only matching. Supports long paths transparently. Network drives may require polling.
**macOS**: Leverages FSEvents and native inodes. Integrates with Time Machine exclusions. APFS provides efficient cloning.
@@ -372,15 +425,15 @@ spacedrive job info <job-id> --detailed
## Best Practices
1. **Start shallow** for new locations to verify configuration
2. **Use filters** to exclude build artifacts and caches
3. **Monitor progress** through the job system instead of polling
4. **Schedule deep scans** during low-usage periods
5. **Enable checkpointing** for locations over 100K files
1. **Start shallow** for new locations to verify configuration before deep scans
2. **Use Git repositories** to automatically inherit `.gitignore` exclusions
3. **Monitor progress** through the job system instead of polling the database
4. **Schedule deep scans** during low-usage periods for large photo/video libraries
5. **Enable checkpointing** for locations over 100K files to survive interruptions
<Warning>
Always let indexing jobs complete or pause them properly. Force-killing can
corrupt the job state.
corrupt the job state and require reindexing from scratch.
</Warning>
## Related Documentation

View File

@@ -1,10 +1,10 @@
import { SpacedriveProvider, type SpacedriveClient } from "./context";
import { ReactQueryDevtools } from "@tanstack/react-query-devtools";
import {
RouterProvider,
Outlet,
useLocation,
useParams,
RouterProvider,
Outlet,
useLocation,
useParams,
} from "react-router-dom";
import { useEffect, useMemo } from "react";
import { Dialogs } from "@sd/ui";
@@ -12,27 +12,35 @@ import { Inspector, type InspectorVariant } from "./Inspector";
import { TopBarProvider, TopBar } from "./TopBar";
import { motion, AnimatePresence } from "framer-motion";
import {
ExplorerProvider,
useExplorer,
Sidebar,
getSpaceItemKeyFromRoute,
ExplorerProvider,
useExplorer,
Sidebar,
getSpaceItemKeyFromRoute,
} from "./components/Explorer";
import {
SelectionProvider,
useSelection,
SelectionProvider,
useSelection,
} from "./components/Explorer/SelectionContext";
import { KeyboardHandler } from "./components/Explorer/KeyboardHandler";
import { TagAssignmentMode } from "./components/Explorer/TagAssignmentMode";
import { SpacesSidebar } from "./components/SpacesSidebar";
import {
QuickPreviewFullscreen,
PREVIEW_LAYER_ID,
QuickPreviewFullscreen,
PREVIEW_LAYER_ID,
} from "./components/QuickPreview";
import { createExplorerRouter } from "./router";
import { useNormalizedQuery, useLibraryMutation } from "./context";
import { usePlatform } from "./platform";
import type { LocationInfo } from "@sd/ts-client";
import { DndContext, DragOverlay, PointerSensor, useSensor, useSensors, pointerWithin, rectIntersection } from "@dnd-kit/core";
import {
DndContext,
DragOverlay,
PointerSensor,
useSensor,
useSensors,
pointerWithin,
rectIntersection,
} from "@dnd-kit/core";
import type { CollisionDetection } from "@dnd-kit/core";
import { useState } from "react";
import type { File } from "@sd/ts-client";
@@ -40,214 +48,236 @@ import { File as FileComponent } from "./components/Explorer/File";
import { DaemonDisconnectedOverlay } from "./components/DaemonDisconnectedOverlay";
interface AppProps {
client: SpacedriveClient;
client: SpacedriveClient;
}
export function ExplorerLayout() {
const location = useLocation();
const params = useParams();
const platform = usePlatform();
const {
sidebarVisible,
inspectorVisible,
setInspectorVisible,
quickPreviewFileId,
setQuickPreviewFileId,
closeQuickPreview,
currentFiles,
tagModeActive,
setTagModeActive,
viewMode,
setSpaceItemId,
} = useExplorer();
const { selectedFiles, selectFile } = useSelection();
const location = useLocation();
const params = useParams();
const platform = usePlatform();
const {
sidebarVisible,
inspectorVisible,
setInspectorVisible,
quickPreviewFileId,
setQuickPreviewFileId,
closeQuickPreview,
currentFiles,
tagModeActive,
setTagModeActive,
viewMode,
setSpaceItemId,
} = useExplorer();
const { selectedFiles, selectFile } = useSelection();
// Sync route with explorer context for view preferences
useEffect(() => {
const spaceItemKey = getSpaceItemKeyFromRoute(
location.pathname,
location.search,
);
setSpaceItemId(spaceItemKey);
}, [location.pathname, location.search, setSpaceItemId]);
// Sync route with explorer context for view preferences
useEffect(() => {
const spaceItemKey = getSpaceItemKeyFromRoute(
location.pathname,
location.search,
);
setSpaceItemId(spaceItemKey);
}, [location.pathname, location.search, setSpaceItemId]);
// Sync QuickPreview with selection - Explorer is source of truth
useEffect(() => {
if (!quickPreviewFileId) return;
// Sync QuickPreview with selection - Explorer is source of truth
useEffect(() => {
if (!quickPreviewFileId) return;
// When selection changes and QuickPreview is open, update preview to match selection
if (selectedFiles.length === 1 && selectedFiles[0].id !== quickPreviewFileId) {
setQuickPreviewFileId(selectedFiles[0].id);
}
}, [selectedFiles, quickPreviewFileId, setQuickPreviewFileId]);
// When selection changes and QuickPreview is open, update preview to match selection
if (
selectedFiles.length === 1 &&
selectedFiles[0].id !== quickPreviewFileId
) {
setQuickPreviewFileId(selectedFiles[0].id);
}
}, [selectedFiles, quickPreviewFileId, setQuickPreviewFileId]);
// Check if we're on Overview (hide inspector) or in Knowledge view (has its own inspector)
const isOverview = location.pathname === "/";
const isKnowledgeView = viewMode === "knowledge";
// Check if we're on Overview (hide inspector) or in Knowledge view (has its own inspector)
const isOverview = location.pathname === "/";
const isKnowledgeView = viewMode === "knowledge";
// Fetch locations to get current location info
const locationsQuery = useNormalizedQuery<
null,
{ locations: LocationInfo[] }
>({
wireMethod: "query:locations.list",
input: null,
resourceType: "location",
});
// Fetch locations to get current location info
const locationsQuery = useNormalizedQuery<
null,
{ locations: LocationInfo[] }
>({
wireMethod: "query:locations.list",
input: null,
resourceType: "location",
});
// Get current location if we're on a location route
const currentLocation = useMemo(() => {
if (!params.locationId || !locationsQuery.data?.locations) return null;
return (
locationsQuery.data.locations.find(
(loc) => loc.id === params.locationId,
) || null
);
}, [params.locationId, locationsQuery.data]);
// Get current location if we're on a location route
const currentLocation = useMemo(() => {
if (!params.locationId || !locationsQuery.data?.locations) return null;
return (
locationsQuery.data.locations.find(
(loc) => loc.id === params.locationId,
) || null
);
}, [params.locationId, locationsQuery.data]);
useEffect(() => {
// Listen for inspector window close events
if (!platform.onWindowEvent) return;
useEffect(() => {
// Listen for inspector window close events
if (!platform.onWindowEvent) return;
let unlisten: (() => void) | undefined;
let unlisten: (() => void) | undefined;
(async () => {
try {
unlisten = await platform.onWindowEvent(
"inspector-window-closed",
() => {
// Show embedded inspector when floating window closes
setInspectorVisible(true);
},
);
} catch (err) {
console.error("Failed to setup inspector close listener:", err);
}
})();
(async () => {
try {
unlisten = await platform.onWindowEvent(
"inspector-window-closed",
() => {
// Show embedded inspector when floating window closes
setInspectorVisible(true);
},
);
} catch (err) {
console.error("Failed to setup inspector close listener:", err);
}
})();
return () => {
unlisten?.();
};
}, [platform, setInspectorVisible]);
return () => {
unlisten?.();
};
}, [platform, setInspectorVisible]);
const handlePopOutInspector = async () => {
if (!platform.showWindow) return;
const handlePopOutInspector = async () => {
if (!platform.showWindow) return;
try {
await platform.showWindow({
type: "Inspector",
item_id: null,
});
// Hide the embedded inspector when popped out
setInspectorVisible(false);
} catch (err) {
console.error("Failed to pop out inspector:", err);
}
};
try {
await platform.showWindow({
type: "Inspector",
item_id: null,
});
// Hide the embedded inspector when popped out
setInspectorVisible(false);
} catch (err) {
console.error("Failed to pop out inspector:", err);
}
};
const isPreviewActive = !!quickPreviewFileId;
const isPreviewActive = !!quickPreviewFileId;
return (
<div className="relative flex h-screen select-none overflow-hidden text-sidebar-ink bg-app rounded-[10px] border border-transparent frame">
{/* Preview layer - portal target for fullscreen preview, sits between content and sidebar/inspector */}
<div
id={PREVIEW_LAYER_ID}
className="absolute inset-0 z-40 pointer-events-none [&>*]:pointer-events-auto"
/>
return (
<div className="relative flex h-screen select-none overflow-hidden text-sidebar-ink bg-app rounded-[10px] border border-transparent frame">
{/* Preview layer - portal target for fullscreen preview, sits between content and sidebar/inspector */}
<div
id={PREVIEW_LAYER_ID}
className="absolute inset-0 z-40 pointer-events-none [&>*]:pointer-events-auto"
/>
<TopBar
sidebarWidth={sidebarVisible ? 224 : 0}
inspectorWidth={
inspectorVisible && !isOverview && !isKnowledgeView ? 284 : 0
}
isPreviewActive={isPreviewActive}
/>
<TopBar
sidebarWidth={sidebarVisible ? 224 : 0}
inspectorWidth={
inspectorVisible && !isOverview && !isKnowledgeView
? 284
: 0
}
isPreviewActive={isPreviewActive}
/>
<AnimatePresence initial={false} mode="popLayout">
{sidebarVisible && (
<motion.div
initial={{ x: -220, width: 0 }}
animate={{ x: 0, width: 220 }}
exit={{ x: -220, width: 0 }}
transition={{ duration: 0.3, ease: [0.25, 1, 0.5, 1] }}
className="relative z-50 overflow-hidden"
>
<SpacesSidebar isPreviewActive={isPreviewActive} />
</motion.div>
)}
</AnimatePresence>
<AnimatePresence initial={false} mode="popLayout">
{sidebarVisible && (
<motion.div
initial={{ x: -220, width: 0 }}
animate={{ x: 0, width: 220 }}
exit={{ x: -220, width: 0 }}
transition={{ duration: 0.3, ease: [0.25, 1, 0.5, 1] }}
className="relative z-50 overflow-hidden"
>
<SpacesSidebar isPreviewActive={isPreviewActive} />
</motion.div>
)}
</AnimatePresence>
<div className="relative flex-1 overflow-hidden z-30">
{/* Router content renders here */}
<Outlet />
<div className="relative flex-1 overflow-hidden z-30">
{/* Router content renders here */}
<Outlet />
{/* Tag Assignment Mode - positioned at bottom of main content area */}
<TagAssignmentMode
isActive={tagModeActive}
onExit={() => setTagModeActive(false)}
/>
</div>
{/* Tag Assignment Mode - positioned at bottom of main content area */}
<TagAssignmentMode
isActive={tagModeActive}
onExit={() => setTagModeActive(false)}
/>
</div>
{/* Keyboard handler (invisible, doesn't cause parent rerenders) */}
<KeyboardHandler />
{/* Keyboard handler (invisible, doesn't cause parent rerenders) */}
<KeyboardHandler />
<AnimatePresence initial={false}>
{/* Hide inspector on Overview screen and Knowledge view (has its own) */}
{inspectorVisible && !isOverview && !isKnowledgeView && (
<motion.div
initial={{ width: 0 }}
animate={{ width: 280 }}
exit={{ width: 0 }}
transition={{ duration: 0.3, ease: [0.25, 1, 0.5, 1] }}
className="relative z-50 overflow-hidden"
>
<div className="w-[280px] min-w-[280px] flex flex-col h-full p-2 bg-transparent">
<Inspector
currentLocation={currentLocation}
onPopOut={handlePopOutInspector}
isPreviewActive={isPreviewActive}
/>
</div>
</motion.div>
)}
</AnimatePresence>
<AnimatePresence initial={false}>
{/* Hide inspector on Overview screen and Knowledge view (has its own) */}
{inspectorVisible && !isOverview && !isKnowledgeView && (
<motion.div
initial={{ width: 0 }}
animate={{ width: 280 }}
exit={{ width: 0 }}
transition={{ duration: 0.3, ease: [0.25, 1, 0.5, 1] }}
className="relative z-50 overflow-hidden"
>
<div className="w-[280px] min-w-[280px] flex flex-col h-full p-2 bg-transparent">
<Inspector
currentLocation={currentLocation}
onPopOut={handlePopOutInspector}
isPreviewActive={isPreviewActive}
/>
</div>
</motion.div>
)}
</AnimatePresence>
{/* Quick Preview - renders via portal into preview layer */}
{quickPreviewFileId && (() => {
const currentIndex = currentFiles.findIndex(f => f.id === quickPreviewFileId);
const hasPrevious = currentIndex > 0;
const hasNext = currentIndex < currentFiles.length - 1;
{/* Quick Preview - renders via portal into preview layer */}
{quickPreviewFileId &&
(() => {
const currentIndex = currentFiles.findIndex(
(f) => f.id === quickPreviewFileId,
);
const hasPrevious = currentIndex > 0;
const hasNext = currentIndex < currentFiles.length - 1;
const handleNext = () => {
if (hasNext && currentFiles[currentIndex + 1]) {
selectFile(currentFiles[currentIndex + 1], currentFiles, false, false);
}
};
const handleNext = () => {
if (hasNext && currentFiles[currentIndex + 1]) {
selectFile(
currentFiles[currentIndex + 1],
currentFiles,
false,
false,
);
}
};
const handlePrevious = () => {
if (hasPrevious && currentFiles[currentIndex - 1]) {
selectFile(currentFiles[currentIndex - 1], currentFiles, false, false);
}
};
const handlePrevious = () => {
if (hasPrevious && currentFiles[currentIndex - 1]) {
selectFile(
currentFiles[currentIndex - 1],
currentFiles,
false,
false,
);
}
};
return (
<QuickPreviewFullscreen
fileId={quickPreviewFileId}
isOpen={!!quickPreviewFileId}
onClose={closeQuickPreview}
onNext={handleNext}
onPrevious={handlePrevious}
hasPrevious={hasPrevious}
hasNext={hasNext}
sidebarWidth={sidebarVisible ? 220 : 0}
inspectorWidth={
inspectorVisible && !isOverview && !isKnowledgeView ? 280 : 0
}
/>
);
})()}
</div>
);
return (
<QuickPreviewFullscreen
fileId={quickPreviewFileId}
isOpen={!!quickPreviewFileId}
onClose={closeQuickPreview}
onNext={handleNext}
onPrevious={handlePrevious}
hasPrevious={hasPrevious}
hasNext={hasNext}
sidebarWidth={sidebarVisible ? 220 : 0}
inspectorWidth={
inspectorVisible &&
!isOverview &&
!isKnowledgeView
? 280
: 0
}
/>
);
})()}
</div>
);
}
/**
@@ -275,142 +305,153 @@ export function ExplorerLayout() {
* - Data: { type, spaceId, groupId? }
*/
function DndWrapper({ children }: { children: React.ReactNode }) {
const sensors = useSensors(
useSensor(PointerSensor, {
activationConstraint: {
distance: 8, // Require 8px movement before activating drag
},
})
);
const addItem = useLibraryMutation("spaces.add_item");
const [activeItem, setActiveItem] = useState<any>(null);
const sensors = useSensors(
useSensor(PointerSensor, {
activationConstraint: {
distance: 8, // Require 8px movement before activating drag
},
}),
);
const addItem = useLibraryMutation("spaces.add_item");
const [activeItem, setActiveItem] = useState<any>(null);
// Custom collision detection: prefer -top zones over -bottom zones to avoid double lines
const customCollision: CollisionDetection = (args) => {
const collisions = pointerWithin(args);
if (!collisions || collisions.length === 0) return collisions;
// Custom collision detection: prefer -top zones over -bottom zones to avoid double lines
const customCollision: CollisionDetection = (args) => {
const collisions = pointerWithin(args);
if (!collisions || collisions.length === 0) return collisions;
// If we have multiple collisions, prefer -top over -bottom
const hasTop = collisions.find(c => String(c.id).endsWith('-top'));
const hasMiddle = collisions.find(c => String(c.id).endsWith('-middle'));
// If we have multiple collisions, prefer -top over -bottom
const hasTop = collisions.find((c) => String(c.id).endsWith("-top"));
const hasMiddle = collisions.find((c) =>
String(c.id).endsWith("-middle"),
);
if (hasMiddle) return [hasMiddle]; // Middle zone takes priority
if (hasTop) return [hasTop]; // Top zone over bottom
return [collisions[0]]; // First collision
};
if (hasMiddle) return [hasMiddle]; // Middle zone takes priority
if (hasTop) return [hasTop]; // Top zone over bottom
return [collisions[0]]; // First collision
};
const handleDragStart = (event: any) => {
setActiveItem(event.active.data.current);
};
const handleDragStart = (event: any) => {
setActiveItem(event.active.data.current);
};
const handleDragEnd = async (event: any) => {
const { active, over } = event;
const handleDragEnd = async (event: any) => {
const { active, over } = event;
setActiveItem(null);
setActiveItem(null);
if (!over || !active.data.current) return;
if (!over || !active.data.current) return;
const dragData = active.data.current;
const dropData = over.data.current;
const dragData = active.data.current;
const dropData = over.data.current;
if (!dragData || dragData.type !== "explorer-file") return;
if (!dragData || dragData.type !== "explorer-file") return;
// Insert before/after sidebar items (adds item to space/group)
if (dropData?.action === "insert-before" || dropData?.action === "insert-after") {
if (!dropData.spaceId) return;
// Insert before/after sidebar items (adds item to space/group)
if (
dropData?.action === "insert-before" ||
dropData?.action === "insert-after"
) {
if (!dropData.spaceId) return;
try {
await addItem.mutateAsync({
space_id: dropData.spaceId,
group_id: dropData.groupId || null,
item_type: { Path: { sd_path: dragData.sdPath } },
});
// TODO: Implement proper ordering relative to itemId
} catch (err) {
console.error("Failed to add item:", err);
}
return;
}
try {
await addItem.mutateAsync({
space_id: dropData.spaceId,
group_id: dropData.groupId || null,
item_type: { Path: { sd_path: dragData.sdPath } },
});
// TODO: Implement proper ordering relative to itemId
} catch (err) {
console.error("Failed to add item:", err);
}
return;
}
// Move file into location/volume/folder
if (dropData?.action === "move-into") {
// TODO: Implement with files.move mutation based on targetType
// - location: Use targetPath
// - volume: Look up volume root path
// - folder: Use targetPath from Path item
return;
}
// Move file into location/volume/folder
if (dropData?.action === "move-into") {
// TODO: Implement with files.move mutation based on targetType
// - location: Use targetPath
// - volume: Look up volume root path
// - folder: Use targetPath from Path item
return;
}
// Drop on space root area (adds to space)
if (dropData?.type === "space" && dragData.type === "explorer-file") {
try {
await addItem.mutateAsync({
space_id: dropData.spaceId,
group_id: null,
item_type: { Path: { sd_path: dragData.sdPath } },
});
} catch (err) {
console.error("Failed to add item:", err);
}
}
// Drop on space root area (adds to space)
if (dropData?.type === "space" && dragData.type === "explorer-file") {
try {
await addItem.mutateAsync({
space_id: dropData.spaceId,
group_id: null,
item_type: { Path: { sd_path: dragData.sdPath } },
});
} catch (err) {
console.error("Failed to add item:", err);
}
}
// Drop on group area (adds to group)
if (dropData?.type === "group" && dragData.type === "explorer-file") {
try {
await addItem.mutateAsync({
space_id: dropData.spaceId,
group_id: dropData.groupId,
item_type: { Path: { sd_path: dragData.sdPath } },
});
} catch (err) {
console.error("Failed to add item to group:", err);
}
}
};
// Drop on group area (adds to group)
if (dropData?.type === "group" && dragData.type === "explorer-file") {
try {
await addItem.mutateAsync({
space_id: dropData.spaceId,
group_id: dropData.groupId,
item_type: { Path: { sd_path: dragData.sdPath } },
});
} catch (err) {
console.error("Failed to add item to group:", err);
}
}
};
return (
<DndContext
sensors={sensors}
collisionDetection={customCollision}
onDragStart={handleDragStart}
onDragEnd={handleDragEnd}
>
{children}
<DragOverlay dropAnimation={null}>
{activeItem?.file && activeItem.gridSize ? (
<div style={{ width: activeItem.gridSize }}>
<div className="flex flex-col items-center gap-2 p-1 rounded-lg">
<div className="rounded-lg p-2">
<FileComponent.Thumb file={activeItem.file} size={Math.max(activeItem.gridSize * 0.6, 60)} />
</div>
<div className="text-sm truncate px-2 py-0.5 rounded-md bg-accent text-white max-w-full">
{activeItem.name}
</div>
</div>
</div>
) : null}
</DragOverlay>
</DndContext>
);
return (
<DndContext
sensors={sensors}
collisionDetection={customCollision}
onDragStart={handleDragStart}
onDragEnd={handleDragEnd}
>
{children}
<DragOverlay dropAnimation={null}>
{activeItem?.file && activeItem.gridSize ? (
<div style={{ width: activeItem.gridSize }}>
<div className="flex flex-col items-center gap-2 p-1 rounded-lg">
<div className="rounded-lg p-2">
<FileComponent.Thumb
file={activeItem.file}
size={Math.max(
activeItem.gridSize * 0.6,
60,
)}
/>
</div>
<div className="text-sm truncate px-2 py-0.5 rounded-md bg-accent text-white max-w-full">
{activeItem.name}
</div>
</div>
</div>
) : null}
</DragOverlay>
</DndContext>
);
}
export function Explorer({ client }: AppProps) {
const router = createExplorerRouter();
const router = createExplorerRouter();
return (
<SpacedriveProvider client={client}>
<DndWrapper>
<TopBarProvider>
<SelectionProvider>
<ExplorerProvider>
<RouterProvider router={router} />
</ExplorerProvider>
</SelectionProvider>
</TopBarProvider>
</DndWrapper>
<DaemonDisconnectedOverlay />
<Dialogs />
<ReactQueryDevtools initialIsOpen={false} />
</SpacedriveProvider>
);
return (
<SpacedriveProvider client={client}>
<DndWrapper>
<TopBarProvider>
<SelectionProvider>
<ExplorerProvider>
<RouterProvider router={router} />
</ExplorerProvider>
</SelectionProvider>
</TopBarProvider>
</DndWrapper>
<DaemonDisconnectedOverlay />
<Dialogs />
<ReactQueryDevtools initialIsOpen={false} />
</SpacedriveProvider>
);
}

View File

@@ -3,14 +3,15 @@ import clsx from "clsx";
import { getIcon } from "@sd/assets/util";
import type { File } from "@sd/ts-client";
import { ThumbstripScrubber } from "./ThumbstripScrubber";
import { getContentKind } from "../utils";
interface ThumbProps {
file: File;
size?: number;
className?: string;
frameClassName?: string; // Custom frame styling (border, radius, bg)
iconScale?: number; // Scale factor for fallback icon (0-1, default 1)
squareMode?: boolean; // Whether thumbnail is cropped to square (media view) or maintains aspect ratio
file: File;
size?: number;
className?: string;
frameClassName?: string; // Custom frame styling (border, radius, bg)
iconScale?: number; // Scale factor for fallback icon (0-1, default 1)
squareMode?: boolean; // Whether thumbnail is cropped to square (media view) or maintains aspect ratio
}
// Global cache for thumbnail loaded states (survives component unmount/remount)
@@ -18,203 +19,209 @@ const thumbLoadedCache = new Map<string, boolean>();
const thumbErrorCache = new Map<string, boolean>();
export const Thumb = memo(function Thumb({
file,
size = 100,
className,
frameClassName,
iconScale = 1,
squareMode = false,
file,
size = 100,
className,
frameClassName,
iconScale = 1,
squareMode = false,
}: ThumbProps) {
const cacheKey = `${file.id}-${size}`;
const cacheKey = `${file.id}-${size}`;
const [thumbLoaded, setThumbLoaded] = useState(
() => thumbLoadedCache.get(cacheKey) || false,
);
const [thumbError, setThumbError] = useState(
() => thumbErrorCache.get(cacheKey) || false,
);
const [thumbLoaded, setThumbLoaded] = useState(
() => thumbLoadedCache.get(cacheKey) || false,
);
const [thumbError, setThumbError] = useState(
() => thumbErrorCache.get(cacheKey) || false,
);
// Update cache when state changes
useEffect(() => {
if (thumbLoaded) thumbLoadedCache.set(cacheKey, true);
}, [thumbLoaded, cacheKey]);
// Update cache when state changes
useEffect(() => {
if (thumbLoaded) thumbLoadedCache.set(cacheKey, true);
}, [thumbLoaded, cacheKey]);
useEffect(() => {
if (thumbError) thumbErrorCache.set(cacheKey, true);
}, [thumbError, cacheKey]);
useEffect(() => {
if (thumbError) thumbErrorCache.set(cacheKey, true);
}, [thumbError, cacheKey]);
const iconSize = size * iconScale;
const iconSize = size * iconScale;
// Check if this is a video with thumbstrip sidecar
const isVideo = file.content_identity?.kind === "video";
const hasThumbstrip = file.sidecars?.some((s) => s.kind === "thumbstrip");
// Check if this is a video with thumbstrip sidecar
const isVideo = getContentKind(file) === "video";
const hasThumbstrip = file.sidecars?.some((s) => s.kind === "thumbstrip");
// Get appropriate thumbnail URL from sidecars based on size
const getThumbnailUrl = (targetSize: number) => {
const serverUrl = (window as any).__SPACEDRIVE_SERVER_URL__;
const libraryId = (window as any).__SPACEDRIVE_LIBRARY_ID__;
// Get appropriate thumbnail URL from sidecars based on size
const getThumbnailUrl = (targetSize: number) => {
const serverUrl = (window as any).__SPACEDRIVE_SERVER_URL__;
const libraryId = (window as any).__SPACEDRIVE_LIBRARY_ID__;
if (!serverUrl || !libraryId) {
return null;
}
if (!serverUrl || !libraryId) {
return null;
}
// Need content_identity to build sidecar URL
if (!file.content_identity?.uuid) {
return null;
}
// Need content_identity to build sidecar URL
if (!file.content_identity?.uuid) {
return null;
}
// Find thumbnail sidecar closest to requested size
const thumbnails = file.sidecars.filter((s) => s.kind === "thumb");
// Find thumbnail sidecar closest to requested size
const thumbnails = file.sidecars.filter((s) => s.kind === "thumb");
if (thumbnails.length === 0) {
return null;
}
if (thumbnails.length === 0) {
return null;
}
// Prefer 1x (lower resolution) variants for better performance
// Only use higher resolution for very large sizes (>400px)
const preferredSize = targetSize <= 400 ? targetSize * 0.6 : targetSize;
// Prefer 1x (lower resolution) variants for better performance
// Only use higher resolution for very large sizes (>400px)
const preferredSize = targetSize <= 400 ? targetSize * 0.6 : targetSize;
const thumbnail = thumbnails.sort((a, b) => {
// Parse variant (e.g., "grid@1x", "detail@1x") to get size and scale
const aSize = parseInt(
a.variant.split("x")[0]?.replace(/\D/g, "") || "0",
);
const bSize = parseInt(
b.variant.split("x")[0]?.replace(/\D/g, "") || "0",
);
const thumbnail = thumbnails.sort((a, b) => {
// Parse variant (e.g., "grid@1x", "detail@1x") to get size and scale
const aSize = parseInt(
a.variant.split("x")[0]?.replace(/\D/g, "") || "0",
);
const bSize = parseInt(
b.variant.split("x")[0]?.replace(/\D/g, "") || "0",
);
// Extract scale factor (1x, 2x, 3x) from variants like "grid@1x" or "detail@2x"
const aScaleMatch = a.variant.match(/@(\d+)x/);
const bScaleMatch = b.variant.match(/@(\d+)x/);
const aScale = aScaleMatch ? parseInt(aScaleMatch[1]) : 1;
const bScale = bScaleMatch ? parseInt(bScaleMatch[1]) : 1;
// Extract scale factor (1x, 2x, 3x) from variants like "grid@1x" or "detail@2x"
const aScaleMatch = a.variant.match(/@(\d+)x/);
const bScaleMatch = b.variant.match(/@(\d+)x/);
const aScale = aScaleMatch ? parseInt(aScaleMatch[1]) : 1;
const bScale = bScaleMatch ? parseInt(bScaleMatch[1]) : 1;
// Strongly prefer 1x variants (add penalty for higher scales)
const aPenalty = (aScale - 1) * 100;
const bPenalty = (bScale - 1) * 100;
// Strongly prefer 1x variants (add penalty for higher scales)
const aPenalty = (aScale - 1) * 100;
const bPenalty = (bScale - 1) * 100;
// Find closest match to preferred size, with scale penalty
return (
Math.abs(aSize - preferredSize) +
aPenalty -
(Math.abs(bSize - preferredSize) + bPenalty)
);
})[0];
// Find closest match to preferred size, with scale penalty
return (
Math.abs(aSize - preferredSize) +
aPenalty -
(Math.abs(bSize - preferredSize) + bPenalty)
);
})[0];
const contentUuid = file.content_identity.uuid;
const url = `${serverUrl}/sidecar/${libraryId}/${contentUuid}/${thumbnail.kind}/${thumbnail.variant}.${thumbnail.format}`;
const contentUuid = file.content_identity.uuid;
const url = `${serverUrl}/sidecar/${libraryId}/${contentUuid}/${thumbnail.kind}/${thumbnail.variant}.${thumbnail.format}`;
return url;
};
return url;
};
const thumbnailSrc = getThumbnailUrl(size);
const thumbnailSrc = getThumbnailUrl(size);
// This is jank and has to be done in several places. Ideally a util function.
const fileKind =
file?.content_identity?.kind && file.content_identity.kind !== "unknown"
? file.content_identity.kind
: file.kind === "File"
? file.extension || "File"
: file.kind;
// this too
const kindCapitalized = fileKind.charAt(0).toUpperCase() + fileKind.slice(1);
// Get content kind (prefers content_identity.kind, falls back to content_kind)
const contentKind = getContentKind(file);
const fileKind =
contentKind && contentKind !== "unknown"
? contentKind
: file.kind === "File"
? file.extension || "File"
: file.kind;
const kindCapitalized =
fileKind.charAt(0).toUpperCase() + fileKind.slice(1);
const icon = getIcon(
kindCapitalized,
true, // Dark theme
file.extension,
file.kind === "Directory",
);
const icon = getIcon(
kindCapitalized,
true, // Dark theme
file.extension,
file.kind === "Directory",
);
return (
<div
className={clsx(
"relative flex shrink-0 grow-0 items-center justify-center",
className,
)}
style={{
width: size,
height: size,
minWidth: size,
minHeight: size,
maxWidth: size,
maxHeight: size,
}}
>
{/* Always show icon first (instant), then thumbnail loads over it */}
<img
src={icon}
alt=""
className={clsx(
"object-contain transition-opacity",
// Only hide icon if we actually have a thumbnail that loaded
thumbLoaded && thumbnailSrc && "opacity-0",
)}
style={{
width: iconSize,
height: iconSize,
maxWidth: "100%",
maxHeight: "100%",
}}
/>
return (
<div
className={clsx(
"relative flex shrink-0 grow-0 items-center justify-center",
className,
)}
style={{
width: size,
height: size,
minWidth: size,
minHeight: size,
maxWidth: size,
maxHeight: size,
}}
>
{/* Always show icon first (instant), then thumbnail loads over it */}
<img
src={icon}
alt=""
className={clsx(
"object-contain transition-opacity",
// Only hide icon if we actually have a thumbnail that loaded
thumbLoaded && thumbnailSrc && "opacity-0",
)}
style={{
width: iconSize,
height: iconSize,
maxWidth: "100%",
maxHeight: "100%",
}}
/>
{/* Load thumbnail if available */}
{thumbnailSrc && !thumbError && (
<img
src={thumbnailSrc}
alt={file.name}
className={clsx(
"absolute inset-0 m-auto max-h-full max-w-full object-contain transition-opacity",
// Default frame styling (can be overridden)
frameClassName ||
"rounded-lg border border-app-line/50 bg-app-box/30",
!thumbLoaded && "opacity-0",
)}
onLoad={() => setThumbLoaded(true)}
onError={() => setThumbError(true)}
/>
)}
{/* Load thumbnail if available */}
{thumbnailSrc && !thumbError && (
<img
src={thumbnailSrc}
alt={file.name}
className={clsx(
"absolute inset-0 m-auto max-h-full max-w-full object-contain transition-opacity",
// Default frame styling (can be overridden)
frameClassName ||
"rounded-lg border border-app-line/50 bg-app-box/30",
!thumbLoaded && "opacity-0",
)}
onLoad={() => setThumbLoaded(true)}
onError={() => setThumbError(true)}
/>
)}
{/* Thumbstrip scrubber overlay (for videos with thumbstrips) */}
{isVideo && hasThumbstrip && thumbLoaded && (
<ThumbstripScrubber file={file} size={size} squareMode={squareMode} />
)}
</div>
);
{/* Thumbstrip scrubber overlay (for videos with thumbstrips) */}
{isVideo && hasThumbstrip && thumbLoaded && (
<ThumbstripScrubber
file={file}
size={size}
squareMode={squareMode}
/>
)}
</div>
);
});
export function Icon({
file,
size = 24,
className,
file,
size = 24,
className,
}: {
file: File;
size?: number;
className?: string;
file: File;
size?: number;
className?: string;
}) {
// This is jank and has to be done in several places. Ideally a util function.
const fileKind =
file?.content_identity?.kind && file.content_identity.kind !== "unknown"
? file.content_identity.kind
: file.kind === "File"
? file.extension || "File"
: file.kind;
// this too
const kindCapitalized = fileKind.charAt(0).toUpperCase() + fileKind.slice(1);
// Get content kind (prefers content_identity.kind, falls back to content_kind)
const contentKind = getContentKind(file);
const fileKind =
contentKind && contentKind !== "unknown"
? contentKind
: file.kind === "File"
? file.extension || "File"
: file.kind;
const kindCapitalized =
fileKind.charAt(0).toUpperCase() + fileKind.slice(1);
const icon = getIcon(
kindCapitalized,
true, // Dark theme
file.extension,
file.kind === "Directory",
);
const icon = getIcon(
kindCapitalized,
true, // Dark theme
file.extension,
file.kind === "Directory",
);
return (
<img
src={icon}
alt=""
className={className}
style={{ width: size, height: size }}
/>
);
return (
<img
src={icon}
alt=""
className={className}
style={{ width: size, height: size }}
/>
);
}

View File

@@ -349,22 +349,34 @@ const jobOptions: JobOption[] = [
export function useAddStorageDialog(
onStorageAdded?: (id: string) => void,
initialPath?: string,
) {
return dialogManager.create((props) => (
<AddStorageDialog {...props} onStorageAdded={onStorageAdded} />
<AddStorageDialog
{...props}
onStorageAdded={onStorageAdded}
initialPath={initialPath}
/>
));
}
function AddStorageDialog(props: {
id: number;
onStorageAdded?: (id: string) => void;
initialPath?: string;
}) {
const dialog = useDialog(props);
const platform = usePlatform();
const [step, setStep] = useState<ModalStep>("category");
// Derive initial folder name from path
const initialFolderName =
props.initialPath?.split("/").filter(Boolean).pop() || "";
const [step, setStep] = useState<ModalStep>(
props.initialPath ? "local-config" : "category",
);
const [selectedCategory, setSelectedCategory] =
useState<StorageCategory | null>(null);
useState<StorageCategory | null>(props.initialPath ? "local" : null);
const [selectedProvider, setSelectedProvider] =
useState<CloudProvider | null>(null);
const [tab, setTab] = useState<SettingsTab>("preset");
@@ -385,8 +397,8 @@ function AddStorageDialog(props: {
const localForm = useForm<LocalFolderFormData>({
defaultValues: {
path: "",
name: "",
path: props.initialPath || "",
name: initialFolderName,
mode: "Deep",
},
});
@@ -404,7 +416,9 @@ function AddStorageDialog(props: {
const currentMode = localForm.watch("mode");
const [selectedJobs, setSelectedJobs] = useState<Set<string>>(
new Set(
jobOptions.filter((j) => j.presets.includes("Deep")).map((j) => j.id),
jobOptions
.filter((j) => j.presets.includes("Deep"))
.map((j) => j.id),
),
);
@@ -539,7 +553,9 @@ function AddStorageDialog(props: {
localForm.setError("root", {
type: "manual",
message:
error instanceof Error ? error.message : "Failed to add location",
error instanceof Error
? error.message
: "Failed to add location",
});
}
});
@@ -692,7 +708,11 @@ function AddStorageDialog(props: {
"border-app-line bg-app-box hover:bg-app-hover hover:border-accent/50",
)}
>
<img src={category.icon} className="size-12" alt="" />
<img
src={category.icon}
className="size-12"
alt=""
/>
<div className="text-center">
<div className="text-sm font-medium text-ink">
{category.label}
@@ -733,7 +753,11 @@ function AddStorageDialog(props: {
"border-app-line bg-app-box hover:bg-app-hover hover:border-accent/50",
)}
>
<img src={provider.icon} className="size-10" alt="" />
<img
src={provider.icon}
className="size-10"
alt=""
/>
<div className="text-xs font-medium text-ink text-center">
{provider.name}
</div>
@@ -761,8 +785,9 @@ function AddStorageDialog(props: {
<div className="rounded-lg bg-accent/10 border border-accent/20 p-4 text-sm text-ink">
<strong>Coming Soon</strong>
<p className="mt-1 text-ink-dull">
Network protocol support (SMB, NFS, SFTP, WebDAV) is currently in
development. Check back in a future update!
Network protocol support (SMB, NFS, SFTP, WebDAV) is
currently in development. Check back in a future
update!
</p>
</div>
<div className="grid grid-cols-2 gap-3 opacity-50 pointer-events-none">
@@ -776,7 +801,11 @@ function AddStorageDialog(props: {
"border-app-line bg-app-box",
)}
>
<img src={protocol.icon} className="size-8" alt="" />
<img
src={protocol.icon}
className="size-8"
alt=""
/>
<div className="text-left">
<div className="text-sm font-medium text-ink">
{protocol.name}
@@ -820,17 +849,27 @@ function AddStorageDialog(props: {
"border-app-line bg-app-box hover:bg-app-hover hover:border-accent/50",
)}
>
<img src={HDDIcon} className="size-8" alt="" />
<img
src={HDDIcon}
className="size-8"
alt=""
/>
<div className="flex-1 min-w-0">
<div className="text-sm font-medium text-ink truncate">
{volume.name}
</div>
<div className="text-xs text-ink-faint">
{volume.mount_point} {volume.filesystem}
{volume.mount_point} {" "}
{volume.filesystem}
</div>
</div>
<div className="text-xs text-ink-dull">
{volume.total_capacity ? (volume.total_capacity / 1e9).toFixed(0) : '?'} GB
{volume.total_capacity
? (
volume.total_capacity / 1e9
).toFixed(0)
: "?"}{" "}
GB
</div>
</button>
))}
@@ -838,8 +877,8 @@ function AddStorageDialog(props: {
) : (
<div className="rounded-lg bg-app-box border border-app-line p-6 text-center">
<p className="text-sm text-ink-dull">
No untracked external drives found. Connect a drive and refresh
to see it here.
No untracked external drives found. Connect a
drive and refresh to see it here.
</p>
</div>
)}
@@ -867,7 +906,9 @@ function AddStorageDialog(props: {
<div className="relative">
<Input
value={localForm.watch("path") || ""}
onChange={(e) => localForm.setValue("path", e.target.value)}
onChange={(e) =>
localForm.setValue("path", e.target.value)
}
placeholder="Select a custom folder"
size="lg"
className="pr-14"
@@ -880,34 +921,40 @@ function AddStorageDialog(props: {
</div>
</div>
{suggestedLocations && suggestedLocations.locations.length > 0 && (
<div className="space-y-2">
<Label>Suggested Locations</Label>
<div className="grid grid-cols-2 gap-2 max-h-[280px] overflow-y-auto pr-1">
{suggestedLocations.locations.map((loc) => (
<button
key={loc.path}
type="button"
onClick={() => handleSelectSuggested(loc.path, loc.name)}
className="flex items-center gap-3 rounded-lg border border-app-line bg-app-box p-3 text-left transition-all hover:bg-app-hover hover:border-accent/50 h-fit"
>
<Folder
className="size-5 shrink-0 text-accent"
weight="fill"
/>
<div className="min-w-0 flex-1">
<div className="text-sm font-medium text-ink truncate">
{loc.name}
{suggestedLocations &&
suggestedLocations.locations.length > 0 && (
<div className="space-y-2">
<Label>Suggested Locations</Label>
<div className="grid grid-cols-2 gap-2 max-h-[280px] overflow-y-auto pr-1">
{suggestedLocations.locations.map((loc) => (
<button
key={loc.path}
type="button"
onClick={() =>
handleSelectSuggested(
loc.path,
loc.name,
)
}
className="flex items-center gap-3 rounded-lg border border-app-line bg-app-box p-3 text-left transition-all hover:bg-app-hover hover:border-accent/50 h-fit"
>
<Folder
className="size-5 shrink-0 text-accent"
weight="fill"
/>
<div className="min-w-0 flex-1">
<div className="text-sm font-medium text-ink truncate">
{loc.name}
</div>
<div className="text-xs text-ink-faint truncate">
{loc.path}
</div>
</div>
<div className="text-xs text-ink-faint truncate">
{loc.path}
</div>
</div>
</button>
))}
</button>
))}
</div>
</div>
</div>
)}
)}
</div>
</StorageDialog>
);
@@ -939,11 +986,16 @@ function AddStorageDialog(props: {
/>
</div>
<Tabs.Root value={tab} onValueChange={(v) => setTab(v as SettingsTab)}>
<Tabs.Root
value={tab}
onValueChange={(v) => setTab(v as SettingsTab)}
>
<Tabs.List>
<Tabs.Trigger value="preset">Preset</Tabs.Trigger>
<Tabs.Trigger value="jobs">
Jobs {selectedJobs.size > 0 && `(${selectedJobs.size})`}
Jobs{" "}
{selectedJobs.size > 0 &&
`(${selectedJobs.size})`}
</Tabs.Trigger>
</Tabs.List>
@@ -952,12 +1004,15 @@ function AddStorageDialog(props: {
<Label>Indexing Mode</Label>
<div className="grid grid-cols-3 gap-2">
{indexModes.map((mode) => {
const isSelected = currentMode === mode.value;
const isSelected =
currentMode === mode.value;
return (
<button
key={mode.value}
type="button"
onClick={() => handleModeChange(mode.value)}
onClick={() =>
handleModeChange(mode.value)
}
className={clsx(
"rounded-lg border p-3 text-left transition-all",
isSelected
@@ -981,17 +1036,21 @@ function AddStorageDialog(props: {
<Tabs.Content value="jobs" className="pt-3">
<div className="space-y-3 max-h-[280px] overflow-y-auto pr-1">
<p className="text-xs text-ink-faint">
Select which jobs to run after indexing. Extensions can add
more jobs.
Select which jobs to run after indexing.
Extensions can add more jobs.
</p>
<div className="grid grid-cols-2 gap-2">
{jobOptions.map((job) => {
const isSelected = selectedJobs.has(job.id);
const isSelected = selectedJobs.has(
job.id,
);
return (
<button
key={job.id}
type="button"
onClick={() => toggleJob(job.id)}
onClick={() =>
toggleJob(job.id)
}
className={clsx(
"flex items-start gap-2 rounded-lg border p-3 text-left transition-all",
isSelected
@@ -1110,8 +1169,10 @@ function AddStorageDialog(props: {
<div className="space-y-2">
<Label>
Endpoint
{provider.id === "r2" && " (e.g., https://account.r2.cloudflarestorage.com)"}
{provider.id === "minio" && " (e.g., http://localhost:9000)"}
{provider.id === "r2" &&
" (e.g., https://account.r2.cloudflarestorage.com)"}
{provider.id === "minio" &&
" (e.g., http://localhost:9000)"}
</Label>
<Input
{...cloudForm.register("endpoint")}

View File

@@ -7,8 +7,17 @@ import { getDeviceIconBySlug, useLibraryMutation } from "@sd/ts-client";
import { sdPathToUri } from "../utils";
import LaptopIcon from "@sd/assets/icons/Laptop.png";
import { useNormalizedQuery } from "@sd/ts-client";
import { TopBarButton, Popover, usePopover, PopoverContainer, PopoverSection, PopoverDivider } from "@sd/ui";
import {
TopBarButton,
Popover,
usePopover,
PopoverContainer,
PopoverSection,
PopoverDivider,
Button,
} from "@sd/ui";
import { useSelection } from "../SelectionContext";
import { useAddStorageDialog } from "./AddStorageModal";
interface PathBarProps {
path: SdPath;
@@ -118,77 +127,113 @@ function IndexIndicator({ path }: { path: SdPath }) {
// Find location with longest matching prefix
return locations
.filter((loc) => {
if (!loc.sd_path || !("Physical" in loc.sd_path)) return false;
if (!loc.sd_path || !("Physical" in loc.sd_path))
return false;
const locPath = loc.sd_path.Physical.path;
return pathStr.startsWith(locPath);
})
.sort((a, b) => {
const aPath = ("Physical" in a.sd_path!) ? a.sd_path!.Physical.path : "";
const bPath = ("Physical" in b.sd_path!) ? b.sd_path!.Physical.path : "";
const aPath =
"Physical" in a.sd_path!
? a.sd_path!.Physical.path
: "";
const bPath =
"Physical" in b.sd_path!
? b.sd_path!.Physical.path
: "";
return bPath.length - aPath.length;
})[0];
}
return undefined;
})();
if (!matchingLocation) return null;
const isIndexed = matchingLocation.index_mode !== "none";
const isIndexed =
matchingLocation?.index_mode !== undefined &&
matchingLocation.index_mode !== "none";
return (
<>
<Popover
popover={popover}
trigger={
<TopBarButton
icon={Eye}
active={isIndexed}
title={isIndexed ? "Location is indexed" : "Location not indexed"}
className={isIndexed ? "!text-blue-500" : undefined}
title={isIndexed ? "Location is indexed" : "Not indexed"}
/>
}
>
<PopoverContainer>
<PopoverSection>
<div className="px-2 py-1.5">
<div className="text-xs font-semibold text-ink">{matchingLocation.name ?? "Unknown"}</div>
<div className="text-xs text-ink-dull mt-0.5">
{isIndexed ? `Indexed (${matchingLocation.index_mode})` : "Not indexed"}
{matchingLocation ? (
<>
<PopoverSection>
<div className="px-2 py-1.5">
<div className="text-xs font-semibold text-ink">
{matchingLocation.name}
</div>
<div className="text-xs text-ink-dull mt-0.5">
{isIndexed
? `Indexed (${matchingLocation.index_mode})`
: "Not indexed"}
</div>
</div>
</PopoverSection>
<PopoverDivider />
<PopoverSection>
{!isIndexed && (
<button
onClick={async () => {
await enableIndexing.mutateAsync({
id: matchingLocation.id,
index_mode: "deep",
});
popover.setOpen(false);
}}
className="flex items-center gap-2 px-2 py-1.5 rounded-md text-xs font-medium text-ink hover:bg-app-hover transition-colors"
>
<Eye size={16} />
Enable Indexing
</button>
)}
<button
onClick={() => {
clearSelection();
popover.setOpen(false);
}}
className="flex items-center gap-2 px-2 py-1.5 rounded-md text-xs font-medium text-ink hover:bg-app-hover transition-colors"
>
<Folder size={16} />
Open Location Inspector
</button>
</PopoverSection>
</>
) : (
<PopoverSection>
<div className="px-2 py-1.5">
<div className="text-xs text-ink-dull mb-2">
Path is outside any location
</div>
<Button
size="sm"
variant="accent"
onClick={() => {
const initialPath =
"Physical" in path
? path.Physical.path
: undefined;
useAddStorageDialog(undefined, initialPath);
popover.setOpen(false);
}}
>
Add Location
</Button>
</div>
</div>
</PopoverSection>
<PopoverDivider />
<PopoverSection>
{!isIndexed && (
<button
onClick={async () => {
await enableIndexing.mutateAsync({
id: matchingLocation.id,
index_mode: "deep",
});
popover.setOpen(false);
}}
className="flex items-center gap-2 px-2 py-1.5 rounded-md text-xs font-medium text-ink hover:bg-app-hover transition-colors"
>
<Eye size={16} />
Enable Indexing
</button>
)}
<button
onClick={() => {
clearSelection();
popover.setOpen(false);
}}
className="flex items-center gap-2 px-2 py-1.5 rounded-md text-xs font-medium text-ink hover:bg-app-hover transition-colors"
>
<Folder size={16} />
Open Location Inspector
</button>
</PopoverSection>
</PopoverSection>
)}
</PopoverContainer>
</Popover>
</>
);
}
@@ -257,71 +302,69 @@ export function PathBar({ path, devices, onNavigate }: PathBarProps) {
"focus-within:bg-sidebar-box/30 focus-within:border-sidebar-line/40",
)}
>
<img
src={deviceIcon}
alt="Device"
className="size-5 opacity-60 flex-shrink-0"
/>
<img
src={deviceIcon}
alt="Device"
className="size-5 opacity-60 flex-shrink-0"
/>
{showUri ? (
<input
type="text"
value={uri}
readOnly
className={clsx(
"bg-transparent border-0 outline-none ring-0 flex-1 min-w-0",
"text-xs font-medium text-sidebar-ink",
"placeholder:text-sidebar-inkFaint",
"select-all cursor-text",
"focus:ring-0 focus:outline-none",
)}
placeholder="No path selected"
/>
) : isExpanded ? (
<div className="flex items-center gap-1 flex-1 min-w-0 overflow-hidden">
{segments.map((segment, index) => {
const isLast = index === segments.length - 1;
return (
<div
key={index}
className="flex items-center gap-1 flex-shrink-0"
>
<button
onClick={() =>
!isLast && onNavigate(segment.path)
}
disabled={isLast}
className={clsx(
"text-xs font-medium transition-colors whitespace-nowrap",
isLast
? "text-sidebar-ink cursor-default"
: "text-sidebar-inkDull hover:text-sidebar-ink cursor-pointer",
)}
{showUri ? (
<input
type="text"
value={uri}
readOnly
className={clsx(
"bg-transparent border-0 outline-none ring-0 flex-1 min-w-0",
"text-xs font-medium text-sidebar-ink",
"placeholder:text-sidebar-inkFaint",
"select-all cursor-text",
"focus:ring-0 focus:outline-none",
)}
placeholder="No path selected"
/>
) : isExpanded ? (
<div className="flex items-center gap-1 flex-1 min-w-0 overflow-hidden">
{segments.map((segment, index) => {
const isLast = index === segments.length - 1;
return (
<div
key={index}
className="flex items-center gap-1 flex-shrink-0"
>
{segment.name}
</button>
{!isLast && (
<CaretRight size={12} />
)}
</div>
);
})}
</div>
) : (
<input
type="text"
value={currentDir}
readOnly
className={clsx(
"bg-transparent border-0 outline-none ring-0 flex-1 min-w-0",
"text-xs font-medium text-sidebar-ink",
"placeholder:text-sidebar-inkFaint",
"select-all cursor-text",
"focus:ring-0 focus:outline-none",
)}
placeholder="No path selected"
/>
)}
<button
onClick={() =>
!isLast && onNavigate(segment.path)
}
disabled={isLast}
className={clsx(
"text-xs font-medium transition-colors whitespace-nowrap",
isLast
? "text-sidebar-ink cursor-default"
: "text-sidebar-inkDull hover:text-sidebar-ink cursor-pointer",
)}
>
{segment.name}
</button>
{!isLast && <CaretRight size={12} />}
</div>
);
})}
</div>
) : (
<input
type="text"
value={currentDir}
readOnly
className={clsx(
"bg-transparent border-0 outline-none ring-0 flex-1 min-w-0",
"text-xs font-medium text-sidebar-ink",
"placeholder:text-sidebar-inkFaint",
"select-all cursor-text",
"focus:ring-0 focus:outline-none",
)}
placeholder="No path selected"
/>
)}
</motion.div>
<IndexIndicator path={path} />
</div>

View File

@@ -2,66 +2,74 @@ import LaptopIcon from "@sd/assets/icons/Laptop.png";
import MobileIcon from "@sd/assets/icons/Mobile.png";
import ServerIcon from "@sd/assets/icons/Server.png";
import PCIcon from "@sd/assets/icons/PC.png";
import type { SdPath } from "@sd/ts-client";
import type { ContentKind, File, SdPath } from "@sd/ts-client";
/**
* Get the content kind for a file, preferring content_identity.kind if available,
* falling back to content_kind (identified by extension during ephemeral indexing).
*/
export function getContentKind(file: File | null | undefined): ContentKind {
return file?.content_identity?.kind ?? file?.content_kind ?? "unknown";
}
export function formatBytes(bytes: number): string {
if (bytes === 0) return "0 B";
const k = 1024;
const sizes = ["B", "KB", "MB", "GB", "TB"];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return Math.round(bytes / Math.pow(k, i)) + " " + sizes[i];
if (bytes === 0) return "0 B";
const k = 1024;
const sizes = ["B", "KB", "MB", "GB", "TB"];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return Math.round(bytes / Math.pow(k, i)) + " " + sizes[i];
}
export function formatRelativeTime(date: Date | string): string {
const d = typeof date === "string" ? new Date(date) : date;
const now = new Date();
const diff = now.getTime() - d.getTime();
const seconds = Math.floor(diff / 1000);
const minutes = Math.floor(seconds / 60);
const hours = Math.floor(minutes / 60);
const days = Math.floor(hours / 24);
const d = typeof date === "string" ? new Date(date) : date;
const now = new Date();
const diff = now.getTime() - d.getTime();
const seconds = Math.floor(diff / 1000);
const minutes = Math.floor(seconds / 60);
const hours = Math.floor(minutes / 60);
const days = Math.floor(hours / 24);
if (days > 7) return d.toLocaleDateString();
if (days > 0) return `${days}d ago`;
if (hours > 0) return `${hours}h ago`;
if (minutes > 0) return `${minutes}m ago`;
return "Just now";
if (days > 7) return d.toLocaleDateString();
if (days > 0) return `${days}d ago`;
if (hours > 0) return `${hours}h ago`;
if (minutes > 0) return `${minutes}m ago`;
return "Just now";
}
export function getDeviceIcon(os: string, model?: string): string {
const osLower = os.toLowerCase();
const osLower = os.toLowerCase();
if (osLower.includes("ios") || osLower.includes("android")) {
return MobileIcon;
}
if (osLower.includes("ios") || osLower.includes("android")) {
return MobileIcon;
}
if (osLower.includes("windows")) {
return PCIcon;
}
if (osLower.includes("windows")) {
return PCIcon;
}
if (osLower.includes("server") || model?.toLowerCase().includes("server")) {
return ServerIcon;
}
if (osLower.includes("server") || model?.toLowerCase().includes("server")) {
return ServerIcon;
}
return LaptopIcon;
return LaptopIcon;
}
export function sdPathToUri(sdPath: SdPath): string {
if ("Physical" in sdPath) {
const { device_slug, path } = sdPath.Physical;
return `local://${device_slug}${path}`;
}
if ("Physical" in sdPath) {
const { device_slug, path } = sdPath.Physical;
return `local://${device_slug}${path}`;
}
if ("Cloud" in sdPath) {
const { service, identifier, path } = sdPath.Cloud;
const scheme = service.toLowerCase();
return `${scheme}://${identifier}/${path}`;
}
if ("Cloud" in sdPath) {
const { service, identifier, path } = sdPath.Cloud;
const scheme = service.toLowerCase();
return `${scheme}://${identifier}/${path}`;
}
if ("Content" in sdPath) {
const { content_id } = sdPath.Content;
return `content://${content_id}`;
}
if ("Content" in sdPath) {
const { content_id } = sdPath.Content;
return `content://${content_id}`;
}
return "";
return "";
}

View File

File diff suppressed because it is too large Load Diff

View File

@@ -1,395 +1,425 @@
import {
Sparkle,
Tag as TagIcon,
Chat,
Database,
FilmStrip,
Image,
MusicNote,
File as FileIcon,
Folder,
FileText,
Sparkle,
Tag as TagIcon,
Chat,
Database,
FilmStrip,
Image,
MusicNote,
File as FileIcon,
Folder,
FileText,
} from "@phosphor-icons/react";
import { KnowledgeInspector } from "../../../inspectors/KnowledgeInspector";
import { useExplorer } from "../context";
import { useNormalizedQuery } from "../../../context";
import type { File, ContentKind } from "@sd/ts-client";
import { getContentKind } from "../utils";
import { useMemo } from "react";
import clsx from "clsx";
import { File as FileComponent } from "../File";
const CONTENT_KIND_ICONS: Record<ContentKind, React.ElementType> = {
image: Image,
video: FilmStrip,
audio: MusicNote,
document: FileText,
archive: Folder,
code: FileText,
text: FileText,
database: Database,
book: FileText,
font: FileText,
mesh: FileIcon,
config: FileText,
encrypted: FileIcon,
key: FileIcon,
executable: FileIcon,
binary: FileIcon,
spreadsheet: FileText,
presentation: FileText,
email: FileText,
calendar: FileText,
contact: FileText,
web: FileText,
shortcut: FileIcon,
package: Folder,
model_entry: FileIcon,
unknown: FileIcon,
image: Image,
video: FilmStrip,
audio: MusicNote,
document: FileText,
archive: Folder,
code: FileText,
text: FileText,
database: Database,
book: FileText,
font: FileText,
mesh: FileIcon,
config: FileText,
encrypted: FileIcon,
key: FileIcon,
executable: FileIcon,
binary: FileIcon,
spreadsheet: FileText,
presentation: FileText,
email: FileText,
calendar: FileText,
contact: FileText,
web: FileText,
shortcut: FileIcon,
package: Folder,
model_entry: FileIcon,
unknown: FileIcon,
};
const CONTENT_KIND_LABELS: Record<ContentKind, string> = {
image: "Images",
video: "Videos",
audio: "Audio",
document: "Documents",
archive: "Archives",
code: "Code",
text: "Text",
database: "Databases",
book: "Books",
font: "Fonts",
mesh: "3D Models",
config: "Config",
encrypted: "Encrypted",
key: "Keys",
executable: "Apps",
binary: "Binary",
spreadsheet: "Spreadsheets",
presentation: "Presentations",
email: "Emails",
calendar: "Calendar",
contact: "Contacts",
web: "Web",
shortcut: "Shortcuts",
package: "Packages",
model_entry: "Models",
unknown: "Other",
image: "Images",
video: "Videos",
audio: "Audio",
document: "Documents",
archive: "Archives",
code: "Code",
text: "Text",
database: "Databases",
book: "Books",
font: "Fonts",
mesh: "3D Models",
config: "Config",
encrypted: "Encrypted",
key: "Keys",
executable: "Apps",
binary: "Binary",
spreadsheet: "Spreadsheets",
presentation: "Presentations",
email: "Emails",
calendar: "Calendar",
contact: "Contacts",
web: "Web",
shortcut: "Shortcuts",
package: "Packages",
model_entry: "Models",
unknown: "Other",
};
export function KnowledgeView() {
const { inspectorVisible, currentPath, sortBy, viewSettings } = useExplorer();
const { inspectorVisible, currentPath, sortBy, viewSettings } =
useExplorer();
const directoryQuery = useNormalizedQuery({
wireMethod: "query:files.directory_listing",
input: currentPath
? {
path: currentPath,
limit: null,
include_hidden: false,
sort_by: sortBy,
folders_first: viewSettings.foldersFirst,
}
: null,
resourceType: "file",
enabled: !!currentPath,
});
const directoryQuery = useNormalizedQuery({
wireMethod: "query:files.directory_listing",
input: currentPath
? {
path: currentPath,
limit: null,
include_hidden: false,
sort_by: sortBy,
folders_first: viewSettings.foldersFirst,
}
: null,
resourceType: "file",
enabled: !!currentPath,
});
const files = (directoryQuery.data?.files || []) as File[];
const files = (directoryQuery.data?.files || []) as File[];
// Group files by content kind
const filesByKind = useMemo(() => {
const groups = new Map<ContentKind, File[]>();
// Group files by content kind
const filesByKind = useMemo(() => {
const groups = new Map<ContentKind, File[]>();
files.forEach((file) => {
const kind = file.content_identity?.kind || "unknown";
if (!groups.has(kind)) {
groups.set(kind, []);
}
groups.get(kind)!.push(file);
});
files.forEach((file) => {
const kind = getContentKind(file) || "unknown";
if (!groups.has(kind)) {
groups.set(kind, []);
}
groups.get(kind)!.push(file);
});
// Sort by count and return top categories
return Array.from(groups.entries())
.sort((a, b) => b[1].length - a[1].length)
.slice(0, 6);
}, [files]);
// Sort by count and return top categories
return Array.from(groups.entries())
.sort((a, b) => b[1].length - a[1].length)
.slice(0, 6);
}, [files]);
// Collect all unique tags
const allTags = useMemo(() => {
const tagMap = new Map<string, { id: string; name: string; color: string; count: number }>();
// Collect all unique tags
const allTags = useMemo(() => {
const tagMap = new Map<
string,
{ id: string; name: string; color: string; count: number }
>();
files.forEach((file) => {
file.tags?.forEach((tag) => {
if (tagMap.has(tag.id)) {
tagMap.get(tag.id)!.count++;
} else {
tagMap.set(tag.id, {
id: tag.id,
name: tag.canonical_name,
color: tag.color || "#3B82F6",
count: 1,
});
}
});
});
files.forEach((file) => {
file.tags?.forEach((tag) => {
if (tagMap.has(tag.id)) {
tagMap.get(tag.id)!.count++;
} else {
tagMap.set(tag.id, {
id: tag.id,
name: tag.canonical_name,
color: tag.color || "#3B82F6",
count: 1,
});
}
});
});
return Array.from(tagMap.values()).sort((a, b) => b.count - a.count);
}, [files]);
return Array.from(tagMap.values()).sort((a, b) => b.count - a.count);
}, [files]);
return (
<div className="flex h-full gap-2">
{/* Main content area */}
<div className="flex-1 overflow-y-auto no-scrollbar px-6 py-4">
<div className="max-w-5xl space-y-6">
{/* Header */}
<div className="flex items-center gap-3">
<Sparkle className="size-8 text-accent" weight="fill" />
<div>
<h1 className="text-2xl font-semibold text-ink">Knowledge View</h1>
<p className="text-sm text-ink-dull">
AI-powered insights for {files.length} items
</p>
</div>
</div>
return (
<div className="flex h-full gap-2">
{/* Main content area */}
<div className="flex-1 overflow-y-auto no-scrollbar px-6 py-4">
<div className="max-w-5xl space-y-6">
{/* Header */}
<div className="flex items-center gap-3">
<Sparkle className="size-8 text-accent" weight="fill" />
<div>
<h1 className="text-2xl font-semibold text-ink">
Knowledge View
</h1>
<p className="text-sm text-ink-dull">
AI-powered insights for {files.length} items
</p>
</div>
</div>
{/* Content Piles */}
<Section title="Content" icon={Folder}>
<div className="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-6 gap-4">
{filesByKind.map(([kind, kindFiles]) => (
<ContentPile
key={kind}
kind={kind}
files={kindFiles.slice(0, 3)}
totalCount={kindFiles.length}
/>
))}
</div>
</Section>
{/* Content Piles */}
<Section title="Content" icon={Folder}>
<div className="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-6 gap-4">
{filesByKind.map(([kind, kindFiles]) => (
<ContentPile
key={kind}
kind={kind}
files={kindFiles.slice(0, 3)}
totalCount={kindFiles.length}
/>
))}
</div>
</Section>
{/* Tags */}
{allTags.length > 0 && (
<Section title="Tags" icon={TagIcon}>
<div className="flex flex-wrap gap-2">
{allTags.map((tag) => (
<button
key={tag.id}
className="flex items-center gap-2 px-3 py-1.5 rounded-full bg-app-box hover:bg-app-hover border border-app-line transition-colors"
>
<div
className="size-2 rounded-full"
style={{ backgroundColor: tag.color }}
/>
<span className="text-xs font-medium text-ink">{tag.name}</span>
<span className="text-xs text-ink-dull">({tag.count})</span>
</button>
))}
</div>
</Section>
)}
{/* Tags */}
{allTags.length > 0 && (
<Section title="Tags" icon={TagIcon}>
<div className="flex flex-wrap gap-2">
{allTags.map((tag) => (
<button
key={tag.id}
className="flex items-center gap-2 px-3 py-1.5 rounded-full bg-app-box hover:bg-app-hover border border-app-line transition-colors"
>
<div
className="size-2 rounded-full"
style={{
backgroundColor: tag.color,
}}
/>
<span className="text-xs font-medium text-ink">
{tag.name}
</span>
<span className="text-xs text-ink-dull">
({tag.count})
</span>
</button>
))}
</div>
</Section>
)}
{/* Summary & Conversations */}
<div className="grid grid-cols-1 lg:grid-cols-2 gap-4">
{/* Summary */}
<Section title="Summary" icon={Sparkle}>
<div className="space-y-2 text-sm text-ink-dull">
<p>
This directory contains {files.length} items across{" "}
{filesByKind.length} content types.
</p>
{filesByKind.length > 0 && (
<p>
Most common type: {CONTENT_KIND_LABELS[filesByKind[0][0]]} (
{filesByKind[0][1].length} items)
</p>
)}
{allTags.length > 0 && (
<p>Tagged items: {allTags.reduce((sum, tag) => sum + tag.count, 0)}</p>
)}
</div>
</Section>
{/* Summary & Conversations */}
<div className="grid grid-cols-1 lg:grid-cols-2 gap-4">
{/* Summary */}
<Section title="Summary" icon={Sparkle}>
<div className="space-y-2 text-sm text-ink-dull">
<p>
This directory contains {files.length} items
across {filesByKind.length} content types.
</p>
{filesByKind.length > 0 && (
<p>
Most common type:{" "}
{CONTENT_KIND_LABELS[filesByKind[0][0]]}{" "}
({filesByKind[0][1].length} items)
</p>
)}
{allTags.length > 0 && (
<p>
Tagged items:{" "}
{allTags.reduce(
(sum, tag) => sum + tag.count,
0,
)}
</p>
)}
</div>
</Section>
{/* Conversations */}
<Section title="Conversations" icon={Chat}>
<div className="grid grid-cols-2 gap-2">
<ConversationCard
title="Organize photos"
preview="Can you help sort these by date?"
time="2h ago"
/>
<ConversationCard
title="Find duplicates"
preview="Looking for duplicate files in..."
time="Yesterday"
/>
</div>
</Section>
</div>
{/* Conversations */}
<Section title="Conversations" icon={Chat}>
<div className="grid grid-cols-2 gap-2">
<ConversationCard
title="Organize photos"
preview="Can you help sort these by date?"
time="2h ago"
/>
<ConversationCard
title="Find duplicates"
preview="Looking for duplicate files in..."
time="Yesterday"
/>
</div>
</Section>
</div>
{/* Intelligence Sidecars */}
<Section title="Intelligence" icon={Database}>
<div className="grid grid-cols-1 md:grid-cols-2 gap-3">
<SidecarItem
kind="OCR Text"
variant="Extracted text from 12 images"
status="ready"
size="2.4 MB"
/>
<SidecarItem
kind="Thumbnails"
variant="Generated for 48 media files"
status="ready"
size="8.1 MB"
/>
<SidecarItem
kind="Video Transcripts"
variant="Speech-to-text from 3 videos"
status="pending"
size="—"
/>
<SidecarItem
kind="Embeddings"
variant="Semantic vectors for search"
status="ready"
size="14.2 MB"
/>
</div>
</Section>
</div>
</div>
{/* Intelligence Sidecars */}
<Section title="Intelligence" icon={Database}>
<div className="grid grid-cols-1 md:grid-cols-2 gap-3">
<SidecarItem
kind="OCR Text"
variant="Extracted text from 12 images"
status="ready"
size="2.4 MB"
/>
<SidecarItem
kind="Thumbnails"
variant="Generated for 48 media files"
status="ready"
size="8.1 MB"
/>
<SidecarItem
kind="Video Transcripts"
variant="Speech-to-text from 3 videos"
status="pending"
size="—"
/>
<SidecarItem
kind="Embeddings"
variant="Semantic vectors for search"
status="ready"
size="14.2 MB"
/>
</div>
</Section>
</div>
</div>
{/* Dedicated Knowledge Inspector */}
{inspectorVisible && (
<div className="w-96 h-full shrink-0 pr-2 pb-2">
<div className="h-full rounded-lg overflow-hidden bg-sidebar/65">
<KnowledgeInspector />
</div>
</div>
)}
</div>
);
{/* Dedicated Knowledge Inspector */}
{inspectorVisible && (
<div className="w-96 h-full shrink-0 pr-2 pb-2">
<div className="h-full rounded-lg overflow-hidden bg-sidebar/65">
<KnowledgeInspector />
</div>
</div>
)}
</div>
);
}
function Section({
title,
icon: Icon,
children,
title,
icon: Icon,
children,
}: {
title: string;
icon: React.ElementType;
children: React.ReactNode;
title: string;
icon: React.ElementType;
children: React.ReactNode;
}) {
return (
<div className="space-y-3">
<div className="flex items-center gap-2">
<Icon className="size-4 text-ink-dull" weight="bold" />
<h2 className="text-sm font-semibold text-ink">{title}</h2>
</div>
{children}
</div>
);
return (
<div className="space-y-3">
<div className="flex items-center gap-2">
<Icon className="size-4 text-ink-dull" weight="bold" />
<h2 className="text-sm font-semibold text-ink">{title}</h2>
</div>
{children}
</div>
);
}
function ContentPile({
kind,
files,
totalCount,
kind,
files,
totalCount,
}: {
kind: ContentKind;
files: File[];
totalCount: number;
kind: ContentKind;
files: File[];
totalCount: number;
}) {
const Icon = CONTENT_KIND_ICONS[kind];
const label = CONTENT_KIND_LABELS[kind];
const Icon = CONTENT_KIND_ICONS[kind];
const label = CONTENT_KIND_LABELS[kind];
return (
<button className="group flex flex-col items-center gap-2 p-3 rounded-lg hover:bg-app-box/40 transition-colors">
{/* Stacked file previews */}
<div className="relative w-full aspect-square">
{files.length > 0 ? (
files.map((file, i) => (
<div
key={file.id}
className="absolute inset-0"
style={{
transform: `rotate(${(i - 1) * 3}deg) translateY(${i * 2}px)`,
zIndex: files.length - i,
}}
>
<FileComponent.Thumb
file={file}
size={120}
iconScale={0.5}
className="w-full h-full rounded-md shadow-sm"
/>
</div>
))
) : (
<div className="flex items-center justify-center w-full h-full">
<Icon className="size-12 text-ink-faint" weight="thin" />
</div>
)}
</div>
return (
<button className="group flex flex-col items-center gap-2 p-3 rounded-lg hover:bg-app-box/40 transition-colors">
{/* Stacked file previews */}
<div className="relative w-full aspect-square">
{files.length > 0 ? (
files.map((file, i) => (
<div
key={file.id}
className="absolute inset-0"
style={{
transform: `rotate(${(i - 1) * 3}deg) translateY(${i * 2}px)`,
zIndex: files.length - i,
}}
>
<FileComponent.Thumb
file={file}
size={120}
iconScale={0.5}
className="w-full h-full rounded-md shadow-sm"
/>
</div>
))
) : (
<div className="flex items-center justify-center w-full h-full">
<Icon
className="size-12 text-ink-faint"
weight="thin"
/>
</div>
)}
</div>
{/* Label */}
<div className="text-center">
<div className="text-xs font-medium text-ink">{label}</div>
<div className="text-[10px] text-ink-dull">{totalCount} items</div>
</div>
</button>
);
{/* Label */}
<div className="text-center">
<div className="text-xs font-medium text-ink">{label}</div>
<div className="text-[10px] text-ink-dull">
{totalCount} items
</div>
</div>
</button>
);
}
function ConversationCard({
title,
preview,
time,
title,
preview,
time,
}: {
title: string;
preview: string;
time: string;
title: string;
preview: string;
time: string;
}) {
return (
<button className="flex flex-col gap-1.5 p-3 rounded-lg bg-app-box/40 hover:bg-app-box border border-app-line/50 hover:border-app-line transition-colors text-left">
<div className="flex items-start justify-between gap-2">
<div className="text-xs font-medium text-ink truncate">{title}</div>
<Sparkle className="size-3 text-accent shrink-0" weight="fill" />
</div>
<p className="text-[11px] text-ink-dull line-clamp-2">{preview}</p>
<span className="text-[10px] text-ink-faint">{time}</span>
</button>
);
return (
<button className="flex flex-col gap-1.5 p-3 rounded-lg bg-app-box/40 hover:bg-app-box border border-app-line/50 hover:border-app-line transition-colors text-left">
<div className="flex items-start justify-between gap-2">
<div className="text-xs font-medium text-ink truncate">
{title}
</div>
<Sparkle
className="size-3 text-accent shrink-0"
weight="fill"
/>
</div>
<p className="text-[11px] text-ink-dull line-clamp-2">{preview}</p>
<span className="text-[10px] text-ink-faint">{time}</span>
</button>
);
}
function SidecarItem({
kind,
variant,
status,
size,
kind,
variant,
status,
size,
}: {
kind: string;
variant: string;
status: "ready" | "pending";
size: string;
kind: string;
variant: string;
status: "ready" | "pending";
size: string;
}) {
return (
<div className="flex items-start gap-3 p-3 rounded-lg bg-app-box/40 border border-app-line/50">
<div className="size-10 shrink-0 rounded-md bg-accent/10 border border-accent/20 flex items-center justify-center">
<Database className="size-5 text-accent" weight="bold" />
</div>
<div className="flex-1 min-w-0">
<div className="text-xs font-medium text-ink">{kind}</div>
<div className="text-[11px] text-ink-dull">{variant}</div>
<div className="text-[10px] text-ink-faint mt-1">{size}</div>
</div>
<span
className={clsx(
"text-[10px] font-semibold px-2 py-0.5 rounded-full shrink-0",
status === "ready" && "bg-accent/20 text-accent",
status === "pending" && "bg-ink-faint/20 text-ink-dull",
)}
>
{status}
</span>
</div>
);
return (
<div className="flex items-start gap-3 p-3 rounded-lg bg-app-box/40 border border-app-line/50">
<div className="size-10 shrink-0 rounded-md bg-accent/10 border border-accent/20 flex items-center justify-center">
<Database className="size-5 text-accent" weight="bold" />
</div>
<div className="flex-1 min-w-0">
<div className="text-xs font-medium text-ink">{kind}</div>
<div className="text-[11px] text-ink-dull">{variant}</div>
<div className="text-[10px] text-ink-faint mt-1">{size}</div>
</div>
<span
className={clsx(
"text-[10px] font-semibold px-2 py-0.5 rounded-full shrink-0",
status === "ready" && "bg-accent/20 text-accent",
status === "pending" && "bg-ink-faint/20 text-ink-dull",
)}
>
{status}
</span>
</div>
);
}

View File

@@ -21,6 +21,7 @@ import type { File } from "@sd/ts-client";
import { File as FileComponent } from "../../File";
import { useExplorer } from "../../context";
import { useSelection } from "../../SelectionContext";
import { getContentKind } from "../../utils";
import { useContextMenu } from "../../../../hooks/useContextMenu";
import { useJobDispatch } from "../../../../hooks/useJobDispatch";
import { useLibraryMutation } from "../../../../context";
@@ -29,7 +30,7 @@ import { usePlatform } from "../../../../platform";
function formatDuration(seconds: number): string {
const mins = Math.floor(seconds / 60);
const secs = Math.floor(seconds % 60);
return `${mins}:${String(secs).padStart(2, '0')}`;
return `${mins}:${String(secs).padStart(2, "0")}`;
}
interface MediaViewItemProps {
@@ -37,7 +38,12 @@ interface MediaViewItemProps {
allFiles: File[];
selected: boolean;
focused: boolean;
onSelect: (file: File, files: File[], multi?: boolean, range?: boolean) => void;
onSelect: (
file: File,
files: File[],
multi?: boolean,
range?: boolean,
) => void;
size: number;
}
@@ -90,7 +96,8 @@ export const MediaViewItem = memo(function MediaViewItem({
}
},
keybind: "⌘⇧R",
condition: () => "Physical" in file.sd_path && !!platform.revealFile,
condition: () =>
"Physical" in file.sd_path && !!platform.revealFile,
},
{ type: "separator" },
{
@@ -140,7 +147,11 @@ export const MediaViewItem = memo(function MediaViewItem({
keybind: "⌘V",
condition: () => {
const clipboard = window.__SPACEDRIVE__?.clipboard;
return !!clipboard && !!clipboard.files && clipboard.files.length > 0;
return (
!!clipboard &&
!!clipboard.files &&
clipboard.files.length > 0
);
},
},
// Media Processing submenus
@@ -148,7 +159,7 @@ export const MediaViewItem = memo(function MediaViewItem({
type: "submenu",
icon: Image,
label: "Image Processing",
condition: () => file.content_identity?.kind === "image",
condition: () => getContentKind(file) === "image",
submenu: [
{
icon: Sparkle,
@@ -190,7 +201,7 @@ export const MediaViewItem = memo(function MediaViewItem({
type: "submenu",
icon: Video,
label: "Video Processing",
condition: () => file.content_identity?.kind === "video",
condition: () => getContentKind(file) === "video",
submenu: [
{
icon: FilmStrip,
@@ -202,7 +213,10 @@ export const MediaViewItem = memo(function MediaViewItem({
frame_count: 10,
});
},
condition: () => !file.sidecars?.some((s) => s.kind === "thumbstrip"),
condition: () =>
!file.sidecars?.some(
(s) => s.kind === "thumbstrip",
),
},
{
icon: Sparkle,
@@ -256,7 +270,7 @@ export const MediaViewItem = memo(function MediaViewItem({
type: "submenu",
icon: Microphone,
label: "Audio Processing",
condition: () => file.content_identity?.kind === "audio",
condition: () => getContentKind(file) === "audio",
submenu: [
{
icon: TextAa,
@@ -314,7 +328,7 @@ export const MediaViewItem = memo(function MediaViewItem({
label: "Generate Thumbstrips (Videos)",
onClick: async () => {
const videos = selectedFiles.filter(
(f) => f.content_identity?.kind === "video",
(f) => getContentKind(f) === "video",
);
if (videos.length > 0) {
await runJob("thumbstrip", {
@@ -323,7 +337,9 @@ export const MediaViewItem = memo(function MediaViewItem({
}
},
condition: () =>
selectedFiles.some((f) => f.content_identity?.kind === "video"),
selectedFiles.some(
(f) => getContentKind(f) === "video",
),
},
],
},
@@ -344,7 +360,9 @@ export const MediaViewItem = memo(function MediaViewItem({
if (confirm(message)) {
try {
await deleteFiles.mutateAsync({
targets: { paths: targets.map((f) => f.sd_path) },
targets: {
paths: targets.map((f) => f.sd_path),
},
permanent: false,
recursive: true,
});

View File

@@ -1,4 +1,4 @@
import { useEffect } from "react";
import { useEffect, useRef } from "react";
import { useLibraryQuery, useSpacedriveClient } from "../../../context";
/**
@@ -7,52 +7,60 @@ import { useLibraryQuery, useSpacedriveClient } from "../../../context";
* Events trigger a refetch rather than incrementing/decrementing counts manually.
*/
export function useJobCount() {
const client = useSpacedriveClient();
const client = useSpacedriveClient();
const { data, refetch } = useLibraryQuery({
type: "jobs.list",
input: { status: null },
});
const { data, refetch } = useLibraryQuery({
type: "jobs.list",
input: { status: null },
});
// Subscribe to job state changes and refetch when they occur
useEffect(() => {
if (!client) return;
// Ref for stable refetch access (prevents effect re-runs when refetch reference changes)
const refetchRef = useRef(refetch);
useEffect(() => {
refetchRef.current = refetch;
}, [refetch]);
let unsubscribe: (() => void) | undefined;
let isCancelled = false;
// Subscribe to job state changes and refetch when they occur
useEffect(() => {
if (!client) return;
const filter = {
event_types: [
"JobQueued",
"JobStarted",
"JobCompleted",
"JobFailed",
"JobCancelled",
"JobPaused",
"JobResumed",
],
};
let unsubscribe: (() => void) | undefined;
let isCancelled = false;
client.subscribeFiltered(filter, () => refetch()).then((unsub) => {
if (isCancelled) {
unsub();
} else {
unsubscribe = unsub;
}
});
const filter = {
event_types: [
"JobQueued",
"JobStarted",
"JobCompleted",
"JobFailed",
"JobCancelled",
"JobPaused",
"JobResumed",
],
};
return () => {
isCancelled = true;
unsubscribe?.();
};
}, [client, refetch]);
client
.subscribeFiltered(filter, () => refetchRef.current())
.then((unsub) => {
if (isCancelled) {
unsub();
} else {
unsubscribe = unsub;
}
});
const jobs = data?.jobs ?? [];
const runningCount = jobs.filter(j => j.status === "running").length;
const pausedCount = jobs.filter(j => j.status === "paused").length;
return () => {
isCancelled = true;
unsubscribe?.();
};
}, [client]);
return {
activeJobCount: runningCount + pausedCount,
hasRunningJobs: runningCount > 0,
};
const jobs = data?.jobs ?? [];
const runningCount = jobs.filter((j) => j.status === "running").length;
const pausedCount = jobs.filter((j) => j.status === "paused").length;
return {
activeJobCount: runningCount + pausedCount,
hasRunningJobs: runningCount > 0,
};
}

View File

@@ -1,12 +1,12 @@
import type { File, ContentKind } from "@sd/ts-client";
import { File as FileComponent } from "../Explorer/File";
import { formatBytes } from "../Explorer/utils";
import { formatBytes, getContentKind } from "../Explorer/utils";
import { usePlatform } from "../../platform";
import { useState, useEffect, useRef } from "react";
import {
MagnifyingGlassPlus,
MagnifyingGlassMinus,
ArrowCounterClockwise,
MagnifyingGlassPlus,
MagnifyingGlassMinus,
ArrowCounterClockwise,
} from "@phosphor-icons/react";
import { VideoPlayer } from "./VideoPlayer";
import { AudioPlayer } from "./AudioPlayer";
@@ -14,325 +14,344 @@ import { useZoomPan } from "./useZoomPan";
import { Folder } from "@sd/assets/icons";
interface ContentRendererProps {
file: File;
onZoomChange?: (isZoomed: boolean) => void;
file: File;
onZoomChange?: (isZoomed: boolean) => void;
}
function ImageRenderer({ file, onZoomChange }: ContentRendererProps) {
const platform = usePlatform();
const containerRef = useRef<HTMLDivElement>(null);
const [originalLoaded, setOriginalLoaded] = useState(false);
const [originalUrl, setOriginalUrl] = useState<string | null>(null);
const { zoom, zoomIn, zoomOut, reset, isZoomed, transform } = useZoomPan(containerRef);
const platform = usePlatform();
const containerRef = useRef<HTMLDivElement>(null);
const [originalLoaded, setOriginalLoaded] = useState(false);
const [originalUrl, setOriginalUrl] = useState<string | null>(null);
const { zoom, zoomIn, zoomOut, reset, isZoomed, transform } =
useZoomPan(containerRef);
// Notify parent of zoom state changes
useEffect(() => {
onZoomChange?.(isZoomed);
}, [isZoomed, onZoomChange]);
// Notify parent of zoom state changes
useEffect(() => {
onZoomChange?.(isZoomed);
}, [isZoomed, onZoomChange]);
useEffect(() => {
if (!platform.convertFileSrc) {
return;
}
useEffect(() => {
if (!platform.convertFileSrc) {
return;
}
const sdPath = file.sd_path as any;
const physicalPath = sdPath?.Physical?.path;
const sdPath = file.sd_path as any;
const physicalPath = sdPath?.Physical?.path;
if (!physicalPath) {
console.log(
"[ImageRenderer] No physical path available, sd_path:",
file.sd_path,
);
return;
}
if (!physicalPath) {
console.log(
"[ImageRenderer] No physical path available, sd_path:",
file.sd_path,
);
return;
}
const url = platform.convertFileSrc(physicalPath);
console.log(
"[ImageRenderer] Loading original from:",
physicalPath,
"-> URL:",
url,
);
setOriginalUrl(url);
}, [file, platform]);
const url = platform.convertFileSrc(physicalPath);
console.log(
"[ImageRenderer] Loading original from:",
physicalPath,
"-> URL:",
url,
);
setOriginalUrl(url);
}, [file, platform]);
// Get highest resolution thumbnail first
const getHighestResThumbnail = () => {
const thumbnails = file.sidecars?.filter((s) => s.kind === "thumb") || [];
if (thumbnails.length === 0) return null;
// Get highest resolution thumbnail first
const getHighestResThumbnail = () => {
const thumbnails =
file.sidecars?.filter((s) => s.kind === "thumb") || [];
if (thumbnails.length === 0) return null;
const highest = thumbnails.sort((a, b) => {
const aSize = parseInt(
a.variant.split("x")[0]?.replace(/\D/g, "") || "0",
);
const bSize = parseInt(
b.variant.split("x")[0]?.replace(/\D/g, "") || "0",
);
return bSize - aSize;
})[0];
const highest = thumbnails.sort((a, b) => {
const aSize = parseInt(
a.variant.split("x")[0]?.replace(/\D/g, "") || "0",
);
const bSize = parseInt(
b.variant.split("x")[0]?.replace(/\D/g, "") || "0",
);
return bSize - aSize;
})[0];
const serverUrl = (window as any).__SPACEDRIVE_SERVER_URL__;
const libraryId = (window as any).__SPACEDRIVE_LIBRARY_ID__;
const contentUuid = file.content_identity?.uuid;
const serverUrl = (window as any).__SPACEDRIVE_SERVER_URL__;
const libraryId = (window as any).__SPACEDRIVE_LIBRARY_ID__;
const contentUuid = file.content_identity?.uuid;
if (!serverUrl || !libraryId || !contentUuid) return null;
if (!serverUrl || !libraryId || !contentUuid) return null;
return `${serverUrl}/sidecar/${libraryId}/${contentUuid}/${highest.kind}/${highest.variant}.${highest.format}`;
};
return `${serverUrl}/sidecar/${libraryId}/${contentUuid}/${highest.kind}/${highest.variant}.${highest.format}`;
};
const thumbnailUrl = getHighestResThumbnail();
const thumbnailUrl = getHighestResThumbnail();
return (
<div
ref={containerRef}
className={`relative w-full h-full flex items-center justify-center ${isZoomed ? 'overflow-visible' : 'overflow-hidden'}`}
>
{/* Zoom Controls */}
<div className="absolute top-4 right-4 z-10 flex flex-col gap-2">
<button
onClick={zoomIn}
className="rounded-lg bg-app-box/80 p-2 text-ink backdrop-blur-xl transition-colors hover:bg-app-hover"
title="Zoom in (+)"
>
<MagnifyingGlassPlus size={20} weight="bold" />
</button>
<button
onClick={zoomOut}
className="rounded-lg bg-app-box/80 p-2 text-ink backdrop-blur-xl transition-colors hover:bg-app-hover"
title="Zoom out (-)"
>
<MagnifyingGlassMinus size={20} weight="bold" />
</button>
{zoom > 1 && (
<button
onClick={reset}
className="rounded-lg bg-app-box/80 p-2 text-ink backdrop-blur-xl transition-colors hover:bg-app-hover"
title="Reset zoom (0)"
>
<ArrowCounterClockwise size={20} weight="bold" />
</button>
)}
</div>
return (
<div
ref={containerRef}
className={`relative w-full h-full flex items-center justify-center ${isZoomed ? "overflow-visible" : "overflow-hidden"}`}
>
{/* Zoom Controls */}
<div className="absolute top-4 right-4 z-10 flex flex-col gap-2">
<button
onClick={zoomIn}
className="rounded-lg bg-app-box/80 p-2 text-ink backdrop-blur-xl transition-colors hover:bg-app-hover"
title="Zoom in (+)"
>
<MagnifyingGlassPlus size={20} weight="bold" />
</button>
<button
onClick={zoomOut}
className="rounded-lg bg-app-box/80 p-2 text-ink backdrop-blur-xl transition-colors hover:bg-app-hover"
title="Zoom out (-)"
>
<MagnifyingGlassMinus size={20} weight="bold" />
</button>
{zoom > 1 && (
<button
onClick={reset}
className="rounded-lg bg-app-box/80 p-2 text-ink backdrop-blur-xl transition-colors hover:bg-app-hover"
title="Reset zoom (0)"
>
<ArrowCounterClockwise size={20} weight="bold" />
</button>
)}
</div>
{/* Zoom level indicator */}
{zoom > 1 && (
<div className="absolute top-4 left-4 z-10 rounded-lg bg-app-box/80 px-3 py-1.5 text-sm font-medium text-ink backdrop-blur-xl">
{Math.round(zoom * 100)}%
</div>
)}
{/* Zoom level indicator */}
{zoom > 1 && (
<div className="absolute top-4 left-4 z-10 rounded-lg bg-app-box/80 px-3 py-1.5 text-sm font-medium text-ink backdrop-blur-xl">
{Math.round(zoom * 100)}%
</div>
)}
{/* Image container with zoom/pan transform */}
<div
className="relative w-full h-full flex items-center justify-center"
style={transform}
>
{/* High-res thumbnail (loads fast, shows immediately) */}
{thumbnailUrl && (
<img
src={thumbnailUrl}
alt={file.name}
className="w-full h-full object-contain"
style={{
opacity: originalLoaded ? 0 : 1,
transition: "opacity 0.3s",
}}
draggable={false}
/>
)}
{/* Image container with zoom/pan transform */}
<div
className="relative w-full h-full flex items-center justify-center"
style={transform}
>
{/* High-res thumbnail (loads fast, shows immediately) */}
{thumbnailUrl && (
<img
src={thumbnailUrl}
alt={file.name}
className="w-full h-full object-contain"
style={{
opacity: originalLoaded ? 0 : 1,
transition: "opacity 0.3s",
}}
draggable={false}
/>
)}
{/* Original image (loads async, fades in when ready) */}
{originalUrl && (
<img
src={originalUrl}
alt={file.name}
className="absolute inset-0 w-full h-full object-contain"
style={{
opacity: originalLoaded ? 1 : 0,
transition: "opacity 0.3s",
}}
onLoad={() => setOriginalLoaded(true)}
onError={(e) =>
console.error("[ImageRenderer] Original failed to load:", e)
}
draggable={false}
/>
)}
</div>
</div>
);
{/* Original image (loads async, fades in when ready) */}
{originalUrl && (
<img
src={originalUrl}
alt={file.name}
className="absolute inset-0 w-full h-full object-contain"
style={{
opacity: originalLoaded ? 1 : 0,
transition: "opacity 0.3s",
}}
onLoad={() => setOriginalLoaded(true)}
onError={(e) =>
console.error(
"[ImageRenderer] Original failed to load:",
e,
)
}
draggable={false}
/>
)}
</div>
</div>
);
}
function VideoRenderer({ file, onZoomChange }: ContentRendererProps) {
const platform = usePlatform();
const [videoUrl, setVideoUrl] = useState<string | null>(null);
const platform = usePlatform();
const [videoUrl, setVideoUrl] = useState<string | null>(null);
useEffect(() => {
if (!platform.convertFileSrc) {
return;
}
useEffect(() => {
if (!platform.convertFileSrc) {
return;
}
const sdPath = file.sd_path as any;
const physicalPath = sdPath?.Physical?.path;
const sdPath = file.sd_path as any;
const physicalPath = sdPath?.Physical?.path;
if (!physicalPath) {
console.log("[VideoRenderer] No physical path available");
return;
}
if (!physicalPath) {
console.log("[VideoRenderer] No physical path available");
return;
}
const url = platform.convertFileSrc(physicalPath);
console.log(
"[VideoRenderer] Loading video from:",
physicalPath,
"-> URL:",
url,
);
setVideoUrl(url);
}, [file, platform]);
const url = platform.convertFileSrc(physicalPath);
console.log(
"[VideoRenderer] Loading video from:",
physicalPath,
"-> URL:",
url,
);
setVideoUrl(url);
}, [file, platform]);
if (!videoUrl) {
return (
<div className="w-full h-full flex items-center justify-center">
<FileComponent.Thumb
file={file}
size={800}
className="max-w-full max-h-full"
/>
</div>
);
}
if (!videoUrl) {
return (
<div className="w-full h-full flex items-center justify-center">
<FileComponent.Thumb
file={file}
size={800}
className="max-w-full max-h-full"
/>
</div>
);
}
return <VideoPlayer src={videoUrl} file={file} onZoomChange={onZoomChange} />;
return (
<VideoPlayer src={videoUrl} file={file} onZoomChange={onZoomChange} />
);
}
function AudioRenderer({ file }: ContentRendererProps) {
const platform = usePlatform();
const [audioUrl, setAudioUrl] = useState<string | null>(null);
const platform = usePlatform();
const [audioUrl, setAudioUrl] = useState<string | null>(null);
useEffect(() => {
if (!platform.convertFileSrc) {
return;
}
useEffect(() => {
if (!platform.convertFileSrc) {
return;
}
const sdPath = file.sd_path as any;
const physicalPath = sdPath?.Physical?.path;
const sdPath = file.sd_path as any;
const physicalPath = sdPath?.Physical?.path;
if (!physicalPath) {
console.log("[AudioRenderer] No physical path available");
return;
}
if (!physicalPath) {
console.log("[AudioRenderer] No physical path available");
return;
}
const url = platform.convertFileSrc(physicalPath);
console.log(
"[AudioRenderer] Loading audio from:",
physicalPath,
"-> URL:",
url,
);
setAudioUrl(url);
}, [file, platform]);
const url = platform.convertFileSrc(physicalPath);
console.log(
"[AudioRenderer] Loading audio from:",
physicalPath,
"-> URL:",
url,
);
setAudioUrl(url);
}, [file, platform]);
if (!audioUrl) {
return (
<div className="w-full h-full flex items-center justify-center">
<div className="text-center">
<FileComponent.Thumb file={file} size={200} />
<div className="mt-6 text-ink text-lg font-medium">{file.name}</div>
<div className="text-ink-dull text-sm mt-2">Loading...</div>
</div>
</div>
);
}
if (!audioUrl) {
return (
<div className="w-full h-full flex items-center justify-center">
<div className="text-center">
<FileComponent.Thumb file={file} size={200} />
<div className="mt-6 text-ink text-lg font-medium">
{file.name}
</div>
<div className="text-ink-dull text-sm mt-2">Loading...</div>
</div>
</div>
);
}
return <AudioPlayer src={audioUrl} file={file} />;
return <AudioPlayer src={audioUrl} file={file} />;
}
function DocumentRenderer({ file }: ContentRendererProps) {
return (
<div className="w-full h-full flex items-center justify-center">
<div className="text-center">
<FileComponent.Thumb file={file} size={200} />
<div className="mt-6 text-ink text-lg font-medium">{file.name}</div>
<div className="text-ink-dull text-sm mt-2 capitalize">
{file.content_identity?.kind ?? "unknown"}
</div>
<div className="text-ink-dull text-xs mt-1">
{formatBytes(file.size || 0)}
</div>
</div>
</div>
);
return (
<div className="w-full h-full flex items-center justify-center">
<div className="text-center">
<FileComponent.Thumb file={file} size={200} />
<div className="mt-6 text-ink text-lg font-medium">
{file.name}
</div>
<div className="text-ink-dull text-sm mt-2 capitalize">
{getContentKind(file) ?? "unknown"}
</div>
<div className="text-ink-dull text-xs mt-1">
{formatBytes(file.size || 0)}
</div>
</div>
</div>
);
}
function TextRenderer({ file }: ContentRendererProps) {
// TODO: Load actual text content
return (
<div className="w-full h-full flex items-center justify-center">
<div className="text-center max-w-xl">
<FileComponent.Thumb file={file} size={120} />
<div className="mt-4 text-ink text-lg font-medium">{file.name}</div>
<div className="text-ink-dull text-sm mt-2">Text File</div>
<div className="text-ink-dull text-xs mt-1">
{formatBytes(file.size || 0)}
</div>
<div className="mt-4 text-xs text-ink-dull">
Full text preview coming soon
</div>
</div>
</div>
);
// TODO: Load actual text content
return (
<div className="w-full h-full flex items-center justify-center">
<div className="text-center max-w-xl">
<FileComponent.Thumb file={file} size={120} />
<div className="mt-4 text-ink text-lg font-medium">
{file.name}
</div>
<div className="text-ink-dull text-sm mt-2">Text File</div>
<div className="text-ink-dull text-xs mt-1">
{formatBytes(file.size || 0)}
</div>
<div className="mt-4 text-xs text-ink-dull">
Full text preview coming soon
</div>
</div>
</div>
);
}
function DefaultRenderer({ file }: ContentRendererProps) {
return (
<div className="w-full h-full flex items-center justify-center">
<div className="text-center">
<FileComponent.Thumb file={file} size={200} />
<div className="mt-6 text-ink text-lg font-medium">{file.name}</div>
<div className="text-ink-dull text-sm mt-2 capitalize">
{file.content_identity?.kind ?? "unknown"}
</div>
<div className="text-ink-dull text-xs mt-1">
{formatBytes(file.size || 0)}
</div>
</div>
</div>
);
return (
<div className="w-full h-full flex items-center justify-center">
<div className="text-center">
<FileComponent.Thumb file={file} size={200} />
<div className="mt-6 text-ink text-lg font-medium">
{file.name}
</div>
<div className="text-ink-dull text-sm mt-2 capitalize">
{getContentKind(file) ?? "unknown"}
</div>
<div className="text-ink-dull text-xs mt-1">
{formatBytes(file.size || 0)}
</div>
</div>
</div>
);
}
export function ContentRenderer({ file, onZoomChange }: ContentRendererProps) {
// Handle directories first
if (file.kind.type === "Directory") {
return (
<div className="flex flex-col items-center justify-center h-full text-ink-dull">
<img src={Folder} alt="Folder Icon" className="w-16 h-16 mb-4" />
<div className="text-lg font-medium text-ink">{file.name}</div>
<div className="text-sm mt-2">Folder</div>
{file.size > 0 && (
<div className="text-xs mt-1">{formatBytes(file.size)}</div>
)}
</div>
);
}
// Handle directories first
if (file.kind.type === "Directory") {
return (
<div className="flex flex-col items-center justify-center h-full text-ink-dull">
<img
src={Folder}
alt="Folder Icon"
className="w-16 h-16 mb-4"
/>
<div className="text-lg font-medium text-ink">{file.name}</div>
<div className="text-sm mt-2">Folder</div>
{file.size > 0 && (
<div className="text-xs mt-1">{formatBytes(file.size)}</div>
)}
</div>
);
}
const kind = file.content_identity?.kind;
const kind = getContentKind(file);
switch (kind) {
case "image":
return <ImageRenderer file={file} onZoomChange={onZoomChange} />;
case "video":
return <VideoRenderer file={file} onZoomChange={onZoomChange} />;
case "audio":
return <AudioRenderer file={file} />;
case "document":
case "book":
case "spreadsheet":
case "presentation":
return <DocumentRenderer file={file} />;
case "text":
case "code":
case "config":
return <TextRenderer file={file} />;
default:
return <DefaultRenderer file={file} />;
}
switch (kind) {
case "image":
return <ImageRenderer file={file} onZoomChange={onZoomChange} />;
case "video":
return <VideoRenderer file={file} onZoomChange={onZoomChange} />;
case "audio":
return <AudioRenderer file={file} />;
case "document":
case "book":
case "spreadsheet":
case "presentation":
return <DocumentRenderer file={file} />;
case "text":
case "code":
case "config":
return <TextRenderer file={file} />;
default:
return <DefaultRenderer file={file} />;
}
}

View File

@@ -2,150 +2,170 @@ import { useNormalizedQuery } from "../../context";
import { usePlatform } from "../../platform";
import type { File } from "@sd/ts-client";
import { useEffect, useState } from "react";
import { formatBytes } from "../Explorer/utils";
import { formatBytes, getContentKind } from "../Explorer/utils";
import { X } from "@phosphor-icons/react";
import { ContentRenderer } from "./ContentRenderer";
function MetadataPanel({ file }: { file: File }) {
return (
<div className="w-[280px] min-w-[280px] bg-sidebar-box border-l border-sidebar-line p-4 overflow-y-auto">
<div className="space-y-4">
<div>
<div className="text-xs text-ink-dull mb-1">Name</div>
<div className="text-sm text-ink break-words">{file.name}</div>
</div>
return (
<div className="w-[280px] min-w-[280px] bg-sidebar-box border-l border-sidebar-line p-4 overflow-y-auto">
<div className="space-y-4">
<div>
<div className="text-xs text-ink-dull mb-1">Name</div>
<div className="text-sm text-ink break-words">
{file.name}
</div>
</div>
<div>
<div className="text-xs text-ink-dull mb-1">Kind</div>
<div className="text-sm text-ink capitalize">{file.content_identity?.kind ?? "unknown"}</div>
</div>
<div>
<div className="text-xs text-ink-dull mb-1">Kind</div>
<div className="text-sm text-ink capitalize">
{getContentKind(file)}
</div>
</div>
<div>
<div className="text-xs text-ink-dull mb-1">Size</div>
<div className="text-sm text-ink">{formatBytes(file.size || 0)}</div>
</div>
<div>
<div className="text-xs text-ink-dull mb-1">Size</div>
<div className="text-sm text-ink">
{formatBytes(file.size || 0)}
</div>
</div>
{file.extension && (
<div>
<div className="text-xs text-ink-dull mb-1">Extension</div>
<div className="text-sm text-ink">{file.extension}</div>
</div>
)}
{file.extension && (
<div>
<div className="text-xs text-ink-dull mb-1">
Extension
</div>
<div className="text-sm text-ink">{file.extension}</div>
</div>
)}
{file.created_at && (
<div>
<div className="text-xs text-ink-dull mb-1">Created</div>
<div className="text-sm text-ink">
{new Date(file.created_at).toLocaleString()}
</div>
</div>
)}
{file.created_at && (
<div>
<div className="text-xs text-ink-dull mb-1">
Created
</div>
<div className="text-sm text-ink">
{new Date(file.created_at).toLocaleString()}
</div>
</div>
)}
{file.modified_at && (
<div>
<div className="text-xs text-ink-dull mb-1">Modified</div>
<div className="text-sm text-ink">
{new Date(file.modified_at).toLocaleString()}
</div>
</div>
)}
</div>
</div>
);
{file.modified_at && (
<div>
<div className="text-xs text-ink-dull mb-1">
Modified
</div>
<div className="text-sm text-ink">
{new Date(file.modified_at).toLocaleString()}
</div>
</div>
)}
</div>
</div>
);
}
export function QuickPreview() {
const platform = usePlatform();
const [fileId, setFileId] = useState<string | null>(null);
const platform = usePlatform();
const [fileId, setFileId] = useState<string | null>(null);
useEffect(() => {
// Extract file_id from window label
if (platform.getCurrentWindowLabel) {
const label = platform.getCurrentWindowLabel();
useEffect(() => {
// Extract file_id from window label
if (platform.getCurrentWindowLabel) {
const label = platform.getCurrentWindowLabel();
// Label format: "quick-preview-{file_id}"
const match = label.match(/^quick-preview-(.+)$/);
if (match) {
setFileId(match[1]);
}
}
}, [platform]);
// Label format: "quick-preview-{file_id}"
const match = label.match(/^quick-preview-(.+)$/);
if (match) {
setFileId(match[1]);
}
}
}, [platform]);
const { data: file, isLoading, error } = useNormalizedQuery<{ file_id: string }, File>({
wireMethod: "query:files.by_id",
input: { file_id: fileId! },
resourceType: "file",
resourceId: fileId!,
enabled: !!fileId,
});
const {
data: file,
isLoading,
error,
} = useNormalizedQuery<{ file_id: string }, File>({
wireMethod: "query:files.by_id",
input: { file_id: fileId! },
resourceType: "file",
resourceId: fileId!,
enabled: !!fileId,
});
const handleClose = () => {
if (platform.closeCurrentWindow) {
platform.closeCurrentWindow();
}
};
const handleClose = () => {
if (platform.closeCurrentWindow) {
platform.closeCurrentWindow();
}
};
// Keyboard shortcuts
useEffect(() => {
const handleKeyDown = (e: KeyboardEvent) => {
if (e.code === "Escape") {
handleClose();
}
};
// Keyboard shortcuts
useEffect(() => {
const handleKeyDown = (e: KeyboardEvent) => {
if (e.code === "Escape") {
handleClose();
}
};
window.addEventListener("keydown", handleKeyDown);
return () => window.removeEventListener("keydown", handleKeyDown);
}, []);
window.addEventListener("keydown", handleKeyDown);
return () => window.removeEventListener("keydown", handleKeyDown);
}, []);
if (isLoading || !file) {
return (
<div className="h-screen flex items-center justify-center bg-app text-ink">
<div className="animate-pulse">Loading...</div>
</div>
);
}
if (isLoading || !file) {
return (
<div className="h-screen flex items-center justify-center bg-app text-ink">
<div className="animate-pulse">Loading...</div>
</div>
);
}
if (error) {
return (
<div className="h-screen flex items-center justify-center bg-app text-red-400">
<div>
<div className="text-lg font-medium mb-2">Error loading file</div>
<div className="text-sm">{error.message}</div>
</div>
</div>
);
}
if (error) {
return (
<div className="h-screen flex items-center justify-center bg-app text-red-400">
<div>
<div className="text-lg font-medium mb-2">
Error loading file
</div>
<div className="text-sm">{error.message}</div>
</div>
</div>
);
}
return (
<div className="h-screen flex flex-col bg-app text-ink">
{/* Header */}
<div className="flex items-center justify-between px-4 py-3 border-b border-app-line">
<div className="text-sm font-medium truncate flex-1">{file.name}</div>
<button
onClick={handleClose}
className="p-1 rounded-md hover:bg-app-hover text-ink-dull hover:text-ink"
>
<X size={16} weight="bold" />
</button>
</div>
return (
<div className="h-screen flex flex-col bg-app text-ink">
{/* Header */}
<div className="flex items-center justify-between px-4 py-3 border-b border-app-line">
<div className="text-sm font-medium truncate flex-1">
{file.name}
</div>
<button
onClick={handleClose}
className="p-1 rounded-md hover:bg-app-hover text-ink-dull hover:text-ink"
>
<X size={16} weight="bold" />
</button>
</div>
{/* Content Area */}
<div className="flex-1 flex overflow-hidden">
{/* File Content */}
<div className="flex-1 p-6 bg-app-box/30">
<ContentRenderer file={file} />
</div>
{/* Content Area */}
<div className="flex-1 flex overflow-hidden">
{/* File Content */}
<div className="flex-1 p-6 bg-app-box/30">
<ContentRenderer file={file} />
</div>
{/* Metadata Sidebar */}
<MetadataPanel file={file} />
</div>
{/* Metadata Sidebar */}
<MetadataPanel file={file} />
</div>
{/* Footer with keyboard hints */}
<div className="px-4 py-2 border-t border-app-line bg-app-box/30">
<div className="text-xs text-ink-dull text-center">
Press <span className="text-ink">ESC</span> to close
</div>
</div>
</div>
);
{/* Footer with keyboard hints */}
<div className="px-4 py-2 border-t border-app-line bg-app-box/30">
<div className="text-xs text-ink-dull text-center">
Press <span className="text-ink">ESC</span> to close
</div>
</div>
</div>
);
}

View File

@@ -1,11 +1,12 @@
import { createPortal } from 'react-dom';
import { motion, AnimatePresence } from 'framer-motion';
import { X, ArrowLeft, ArrowRight } from '@phosphor-icons/react';
import { useEffect, useState } from 'react';
import type { File } from '@sd/ts-client';
import { useNormalizedQuery } from '../../context';
import { ContentRenderer } from './ContentRenderer';
import { TopBarPortal } from '../../TopBar';
import { createPortal } from "react-dom";
import { motion, AnimatePresence } from "framer-motion";
import { X, ArrowLeft, ArrowRight } from "@phosphor-icons/react";
import { useEffect, useState } from "react";
import type { File } from "@sd/ts-client";
import { useNormalizedQuery } from "../../context";
import { ContentRenderer } from "./ContentRenderer";
import { TopBarPortal } from "../../TopBar";
import { getContentKind } from "../Explorer/utils";
interface QuickPreviewFullscreenProps {
fileId: string;
@@ -19,7 +20,7 @@ interface QuickPreviewFullscreenProps {
inspectorWidth?: number;
}
const PREVIEW_LAYER_ID = 'quick-preview-layer';
const PREVIEW_LAYER_ID = "quick-preview-layer";
export function QuickPreviewFullscreen({
fileId,
@@ -30,7 +31,7 @@ export function QuickPreviewFullscreen({
hasPrevious,
hasNext,
sidebarWidth = 0,
inspectorWidth = 0
inspectorWidth = 0,
}: QuickPreviewFullscreenProps) {
const [portalTarget, setPortalTarget] = useState<HTMLElement | null>(null);
const [isZoomed, setIsZoomed] = useState(false);
@@ -40,10 +41,14 @@ export function QuickPreviewFullscreen({
setIsZoomed(false);
}, [fileId]);
const { data: file, isLoading, error } = useNormalizedQuery<{ file_id: string }, File>({
wireMethod: 'query:files.by_id',
const {
data: file,
isLoading,
error,
} = useNormalizedQuery<{ file_id: string }, File>({
wireMethod: "query:files.by_id",
input: { file_id: fileId },
resourceType: 'file',
resourceType: "file",
resourceId: fileId,
enabled: !!fileId && isOpen,
});
@@ -59,30 +64,33 @@ export function QuickPreviewFullscreen({
const handleKeyDown = (e: KeyboardEvent) => {
// Only handle close events - let Explorer handle navigation
if (e.code === 'Escape' || e.code === 'Space') {
if (e.code === "Escape" || e.code === "Space") {
e.preventDefault();
e.stopImmediatePropagation();
onClose();
}
};
window.addEventListener('keydown', handleKeyDown, { capture: true });
return () => window.removeEventListener('keydown', handleKeyDown, { capture: true });
window.addEventListener("keydown", handleKeyDown, { capture: true });
return () =>
window.removeEventListener("keydown", handleKeyDown, {
capture: true,
});
}, [isOpen, onClose]);
// Get background style based on content type
const getBackgroundClass = () => {
if (!file) return 'bg-black/90';
if (!file) return "bg-black/90";
switch (file.content_identity?.kind) {
case 'video':
return 'bg-black';
case 'audio':
return 'audio-gradient';
case 'image':
return 'bg-black/95';
switch (getContentKind(file)) {
case "video":
return "bg-black";
case "audio":
return "audio-gradient";
case "image":
return "bg-black/95";
default:
return 'bg-black/90';
return "bg-black/90";
}
};
@@ -106,7 +114,9 @@ export function QuickPreviewFullscreen({
) : error ? (
<div className="flex h-full items-center justify-center text-red-400">
<div>
<div className="mb-2 text-lg font-medium">Error loading file</div>
<div className="mb-2 text-lg font-medium">
Error loading file
</div>
<div className="text-sm">{error.message}</div>
</div>
</div>
@@ -123,14 +133,20 @@ export function QuickPreviewFullscreen({
disabled={!hasPrevious}
className="rounded-md p-1.5 text-white/70 transition-colors hover:bg-white/10 hover:text-white disabled:opacity-30"
>
<ArrowLeft size={16} weight="bold" />
<ArrowLeft
size={16}
weight="bold"
/>
</button>
<button
onClick={onNext}
disabled={!hasNext}
className="rounded-md p-1.5 text-white/70 transition-colors hover:bg-white/10 hover:text-white disabled:opacity-30"
>
<ArrowRight size={16} weight="bold" />
<ArrowRight
size={16}
weight="bold"
/>
</button>
<div className="h-4 w-px bg-white/20 mx-1" />
</>
@@ -154,25 +170,36 @@ export function QuickPreviewFullscreen({
{/* Content Area - padded to fit between sidebar/inspector, expands on zoom */}
<div
className={`flex-1 pt-14 pb-10 ${isZoomed ? 'overflow-visible' : 'overflow-hidden'}`}
className={`flex-1 pt-14 pb-10 ${isZoomed ? "overflow-visible" : "overflow-hidden"}`}
style={{
paddingLeft: isZoomed ? 0 : sidebarWidth,
paddingRight: isZoomed ? 0 : inspectorWidth,
}}
>
<ContentRenderer file={file} onZoomChange={setIsZoomed} />
<ContentRenderer
file={file}
onZoomChange={setIsZoomed}
/>
</div>
{/* Footer with keyboard hints */}
<div className="absolute bottom-0 left-0 right-0 z-10 px-6 py-3">
<div className="text-center text-xs text-white/50">
<span className="text-white/70">ESC</span> or{' '}
<span className="text-white/70">Space</span> to close
<span className="text-white/70">ESC</span>{" "}
or{" "}
<span className="text-white/70">Space</span>{" "}
to close
{(hasPrevious || hasNext) && (
<>
{' · '}
<span className="text-white/70"></span> /{' '}
<span className="text-white/70"></span> to navigate
{" · "}
<span className="text-white/70">
</span>{" "}
/{" "}
<span className="text-white/70">
</span>{" "}
to navigate
</>
)}
</div>

View File

File diff suppressed because it is too large Load Diff