feat: Add index integrity verification command to CLI

- Introduced `index verify` command to perform comprehensive integrity checks on the Spacedrive index.
- Implemented functionality to compare filesystem state with database entries, reporting discrepancies such as missing files, stale entries, and metadata mismatches.
- Added detailed output options for verification results, including a summary report of findings.
- Created associated input and output types for verification actions, enhancing the overall integrity management of the indexing system.
- Documented usage and examples in the CLI documentation for user guidance.
This commit is contained in:
Jamie Pine
2025-10-08 03:50:38 -07:00
parent 63f2f44f20
commit 0f0471b978
14 changed files with 1854 additions and 6 deletions

View File

@@ -7,6 +7,7 @@ use sd_core::{
ops::indexing::{
input::IndexInput,
job::{IndexMode, IndexPersistence, IndexScope},
verify::input::IndexVerifyInput,
},
};
@@ -139,3 +140,32 @@ impl BrowseArgs {
.with_persistence(IndexPersistence::Ephemeral))
}
}
#[derive(Args, Debug, Clone)]
pub struct IndexVerifyArgs {
/// Path to verify (can be location root or subdirectory)
pub path: PathBuf,
/// Verify content hashes (slower but more thorough)
#[arg(long, default_value_t = false)]
pub verify_content: bool,
/// Show detailed file-by-file comparison
#[arg(long, default_value_t = true)]
pub detailed: bool,
/// Automatically fix issues (not yet implemented)
#[arg(long, default_value_t = false)]
pub auto_fix: bool,
}
impl IndexVerifyArgs {
pub fn to_input(&self) -> IndexVerifyInput {
IndexVerifyInput {
path: self.path.clone(),
verify_content: self.verify_content,
detailed_report: self.detailed,
auto_fix: self.auto_fix,
}
}
}

View File

@@ -18,6 +18,8 @@ pub enum IndexCmd {
QuickScan(QuickScanArgs),
/// Browse a path without adding as location
Browse(BrowseArgs),
/// Verify index integrity for a path
Verify(IndexVerifyArgs),
}
pub async fn run(ctx: &Context, cmd: IndexCmd) -> Result<()> {
@@ -87,6 +89,149 @@ pub async fn run(ctx: &Context, cmd: IndexCmd) -> Result<()> {
println!("Browse request submitted");
});
}
IndexCmd::Verify(args) => {
let input = args.to_input();
let out: sd_core::ops::indexing::verify::output::IndexVerifyOutput =
execute_action!(ctx, input);
print_output!(
ctx,
&out,
|result: &sd_core::ops::indexing::verify::output::IndexVerifyOutput| {
println!("\n╔══════════════════════════════════════════════════════════════╗");
println!("║ INDEX INTEGRITY VERIFICATION REPORT ║");
println!("╠══════════════════════════════════════════════════════════════╣");
println!(
"║ Path: {:60}",
result
.path
.display()
.to_string()
.chars()
.take(60)
.collect::<String>()
);
println!("║ Duration: {:.2}s {:49}", result.duration_secs, "");
println!("╠══════════════════════════════════════════════════════════════╣");
let report = &result.report;
println!(
"║ Filesystem: {} files, {} directories {:23}",
report.filesystem_file_count, report.filesystem_dir_count, ""
);
println!(
"║ Database: {} files, {} directories {:23}",
report.database_file_count, report.database_dir_count, ""
);
println!("╠══════════════════════════════════════════════════════════════╣");
if result.is_valid {
println!("║ ✅ STATUS: VALID - Index matches filesystem perfectly! ║");
} else {
println!(
"║ ❌ STATUS: DIVERGED - {} issues found {:24}",
report.total_issues(),
""
);
println!(
"╠══════════════════════════════════════════════════════════════╣"
);
if !report.missing_from_index.is_empty() {
println!(
"║ ⚠️ Missing from index: {} {:33}",
report.missing_from_index.len(),
""
);
if args.detailed {
for diff in report.missing_from_index.iter().take(5) {
let path_str = diff.path.display().to_string();
if path_str.len() <= 58 {
println!("║ - {:58}", path_str);
} else {
println!(
"║ - ...{:55}",
&path_str[path_str.len().saturating_sub(55)..]
);
}
}
if report.missing_from_index.len() > 5 {
println!(
"║ ... and {} more {:40}",
report.missing_from_index.len() - 5,
""
);
}
}
}
if !report.stale_in_index.is_empty() {
println!(
"║ 🗑️ Stale in index: {} {:36}",
report.stale_in_index.len(),
""
);
if args.detailed {
for diff in report.stale_in_index.iter().take(5) {
let path_str = diff.path.display().to_string();
if path_str.len() <= 58 {
println!("║ - {:58}", path_str);
} else {
println!(
"║ - ...{:55}",
&path_str[path_str.len().saturating_sub(55)..]
);
}
}
if report.stale_in_index.len() > 5 {
println!(
"║ ... and {} more {:40}",
report.stale_in_index.len() - 5,
""
);
}
}
}
if !report.metadata_mismatches.is_empty() {
println!(
"║ ⚙️ Metadata mismatches: {} {:31}",
report.metadata_mismatches.len(),
""
);
if args.detailed {
for diff in &report.metadata_mismatches {
println!(
"║ - {:?}: {:?} -> {:?} {:20}",
diff.issue_type,
diff.expected.as_deref().unwrap_or("?"),
diff.actual.as_deref().unwrap_or("?"),
""
);
}
}
}
if !report.hierarchy_errors.is_empty() {
println!(
"║ 🌳 Hierarchy errors: {} {:34}",
report.hierarchy_errors.len(),
""
);
}
}
println!("╠══════════════════════════════════════════════════════════════╣");
println!(
"{}{:59}",
if result.is_valid { "" } else { "" },
report.summary.chars().take(59).collect::<String>()
);
println!("╚══════════════════════════════════════════════════════════════╝\n");
}
);
}
}
Ok(())
}

View File

@@ -57,7 +57,7 @@ pub struct LibrarySettings {
/// File extensions to ignore during indexing
pub ignored_extensions: Vec<String>,
/// Maximum file size to index (in bytes)
/// TODO: ai slop config pls remove this
pub max_file_size: Option<u64>,
/// Whether to automatically track system volumes

View File

@@ -75,7 +75,7 @@ impl EntryProcessor {
/// Extract detailed metadata from a path
pub async fn extract_metadata(path: &Path) -> Result<EntryMetadata, std::io::Error> {
let metadata = tokio::fs::metadata(path).await?;
let metadata = tokio::fs::symlink_metadata(path).await?;
let kind = if metadata.is_dir() {
EntryKind::Directory
@@ -602,7 +602,7 @@ impl EntryProcessor {
existing_id
} else {
// Create new content identity with deterministic UUID (ready for sync)
let file_size = tokio::fs::metadata(path)
let file_size = tokio::fs::symlink_metadata(path)
.await
.map(|m| m.len() as i64)
.unwrap_or(0);
@@ -900,7 +900,7 @@ impl EntryProcessor {
let mut entry_active: entities::entry::ActiveModel = db_entry.into();
// Update size if it changed
if let Ok(metadata) = std::fs::metadata(&entry.path) {
if let Ok(metadata) = std::fs::symlink_metadata(&entry.path) {
entry_active.size = Set(metadata.len() as i64);
// Update modified time

View File

@@ -557,6 +557,12 @@ impl IndexerJob {
Self::new(IndexerJobConfig::ui_navigation(location_id, path))
}
/// Set the ephemeral index storage (must be called before dispatching for ephemeral jobs)
/// This allows external code to maintain a reference to the same storage the job uses
pub fn set_ephemeral_index(&mut self, index: Arc<RwLock<EphemeralIndex>>) {
self.ephemeral_index = Some(index);
}
/// Create an ephemeral browsing job (no database writes)
pub fn ephemeral_browse(path: SdPath, scope: IndexScope) -> Self {
Self::new(IndexerJobConfig::ephemeral_browse(path, scope))

View File

@@ -23,6 +23,7 @@ pub mod progress;
pub mod responder;
pub mod rules;
pub mod state;
pub mod verify;
// Re-exports for convenience
pub use action::IndexingAction;
@@ -42,6 +43,7 @@ pub use rules::{
RulerDecision,
};
pub use state::{IndexPhase, IndexerProgress, IndexerState, IndexerStats};
pub use verify::{IndexVerifyAction, IndexVerifyInput, IndexVerifyOutput, IntegrityReport};
// Rules system will be integrated here in the future
// pub mod rules;

View File

@@ -426,7 +426,7 @@ impl IndexPersistence for EphemeralPersistence {
cas_id: String,
) -> JobResult<()> {
// Get file size
let file_size = tokio::fs::metadata(path)
let file_size = tokio::fs::symlink_metadata(path)
.await
.map(|m| m.len())
.unwrap_or(0);

View File

@@ -189,7 +189,7 @@ pub async fn run_processing_phase(
// Add to seen_paths for delete detection (important for resumed jobs)
state.seen_paths.insert(entry.path.clone());
// Get metadata for change detection
let metadata = match std::fs::metadata(&entry.path) {
let metadata = match std::fs::symlink_metadata(&entry.path) {
Ok(m) => m,
Err(e) => {
ctx.add_non_critical_error(format!(

View File

@@ -0,0 +1,543 @@
//! Index integrity verification action
use super::{input::IndexVerifyInput, output::*};
use crate::{
context::CoreContext,
domain::addressing::SdPath,
infra::{
action::{error::ActionError, LibraryAction},
db::entities,
},
ops::indexing::{
entry::EntryProcessor,
job::{
EphemeralIndex, IndexMode, IndexPersistence, IndexScope, IndexerJob, IndexerJobConfig,
},
path_resolver::PathResolver,
state::EntryKind,
},
};
use sea_orm::{ColumnTrait, EntityTrait, QueryFilter};
use std::{
collections::{HashMap, HashSet},
path::{Path, PathBuf},
sync::Arc,
time::Instant,
};
use tokio::sync::RwLock;
use uuid::Uuid;
#[derive(Debug, Clone)]
pub struct IndexVerifyAction {
input: IndexVerifyInput,
}
impl LibraryAction for IndexVerifyAction {
type Input = IndexVerifyInput;
type Output = IndexVerifyOutput;
fn from_input(input: Self::Input) -> Result<Self, String> {
// Validate input
input
.validate()
.map_err(|errors| format!("Validation failed: {}", errors.join("; ")))?;
Ok(Self { input })
}
async fn execute(
self,
library: Arc<crate::library::Library>,
context: Arc<CoreContext>,
) -> Result<Self::Output, ActionError> {
let start = Instant::now();
let path = self.input.path.clone();
tracing::info!(
"Starting index integrity verification for: {}",
path.display()
);
// Step 1: Scan filesystem to get current state
let fs_entries = self.run_ephemeral_index(&library, &context, &path).await?;
// Step 2: Query database for existing entries in this path
let db_entries = self.query_database_entries(&library, &path).await?;
// Step 3: Compare and generate report
let mut report = self.compare_indexes(fs_entries, db_entries, &path).await?;
// Generate summary
report.generate_summary();
let duration = start.elapsed();
tracing::info!(
"Index verification complete in {:.2}s: {}",
duration.as_secs_f64(),
report.summary
);
Ok(IndexVerifyOutput {
is_valid: report.is_valid(),
report,
path,
duration_secs: duration.as_secs_f64(),
})
}
fn action_kind(&self) -> &'static str {
"indexing.verify"
}
}
impl IndexVerifyAction {
/// Run ephemeral indexing to get current filesystem state using the real IndexerJob
async fn run_ephemeral_index(
&self,
library: &Arc<crate::library::Library>,
context: &Arc<CoreContext>,
path: &Path,
) -> Result<HashMap<PathBuf, crate::ops::indexing::entry::EntryMetadata>, ActionError> {
use tokio::sync::RwLock;
tracing::debug!("Running ephemeral indexer job on {}", path.display());
// Create ephemeral index storage that we'll share with the job
let ephemeral_index = Arc::new(RwLock::new(EphemeralIndex::new(path.to_path_buf())));
// Subscribe to job events before dispatching
let mut event_subscriber = context.events.subscribe();
// Create indexer job config for ephemeral scanning
let config = IndexerJobConfig {
location_id: None, // Ephemeral - no location
path: SdPath::local(path),
mode: IndexMode::Deep, // Full metadata extraction including inodes
scope: IndexScope::Recursive,
persistence: IndexPersistence::Ephemeral,
max_depth: None,
rule_toggles: Default::default(),
};
// Create the job and set our ephemeral index storage BEFORE dispatching
let mut job = IndexerJob::new(config);
job.set_ephemeral_index(ephemeral_index.clone());
// Dispatch the job
let job_handle =
library.jobs().dispatch(job).await.map_err(|e| {
ActionError::Internal(format!("Failed to dispatch indexer job: {}", e))
})?;
let job_id = job_handle.id().to_string();
tracing::debug!(
"Waiting for ephemeral indexer job {} to complete...",
job_id
);
// Listen for job completion events
loop {
match event_subscriber.recv().await {
Ok(event) => match event {
crate::infra::event::Event::JobCompleted {
job_id: completed_id,
..
} if completed_id == job_id => {
tracing::debug!("Ephemeral indexer job {} completed", job_id);
break;
}
crate::infra::event::Event::JobFailed {
job_id: failed_id,
error,
..
} if failed_id == job_id => {
return Err(ActionError::Internal(format!(
"Ephemeral indexer job failed: {}",
error
)));
}
crate::infra::event::Event::JobCancelled {
job_id: cancelled_id,
..
} if cancelled_id == job_id => {
return Err(ActionError::Internal(
"Ephemeral indexer job was cancelled".to_string(),
));
}
_ => {
// Not our job event, keep listening
}
},
Err(e) => {
return Err(ActionError::Internal(format!(
"Failed to receive job event: {}",
e
)));
}
}
}
tracing::debug!("Ephemeral indexer job completed, extracting results");
// Extract the results from our shared ephemeral index
let entries = {
let index = ephemeral_index.read().await;
index.entries.clone()
};
tracing::debug!(
"Collected {} filesystem entries from ephemeral index",
entries.len()
);
Ok(entries)
}
/// Query database for all entries under the given path
async fn query_database_entries(
&self,
library: &Arc<crate::library::Library>,
root_path: &Path,
) -> Result<HashMap<PathBuf, (entities::entry::Model, PathBuf)>, ActionError> {
tracing::debug!("Querying database entries for {}", root_path.display());
let db = library.db().conn();
let root_path_str = root_path.to_string_lossy().to_string();
// First, find which location this path belongs to
let locations = entities::location::Entity::find()
.all(db)
.await
.map_err(|e| ActionError::Internal(format!("Failed to query locations: {}", e)))?;
let mut target_location = None;
for loc in locations {
let loc_path = PathResolver::get_full_path(db, loc.entry_id)
.await
.map_err(|e| {
ActionError::Internal(format!("Failed to get location path: {}", e))
})?;
// Check if our target path is within this location
if root_path.starts_with(&loc_path) || root_path == loc_path {
target_location = Some((loc, loc_path));
break;
}
}
let Some((location, location_path)) = target_location else {
return Err(ActionError::Internal(format!(
"Path {} does not belong to any managed location",
root_path.display()
)));
};
tracing::debug!(
"Found location {} for path {}",
location.name.as_deref().unwrap_or("Unknown"),
root_path.display()
);
let mut entries_map = HashMap::new();
// Find the directory entry for this specific path
let root_entry = entities::directory_paths::Entity::find()
.filter(entities::directory_paths::Column::Path.eq(&root_path_str))
.one(db)
.await
.map_err(|e| {
ActionError::Internal(format!("Failed to query directory paths: {}", e))
})?;
if let Some(root_dir) = root_entry {
// Get all descendant entries using closure table
let descendant_closures = entities::entry_closure::Entity::find()
.filter(entities::entry_closure::Column::AncestorId.eq(root_dir.entry_id))
.all(db)
.await
.map_err(|e| {
ActionError::Internal(format!("Failed to query entry closure: {}", e))
})?;
let descendant_ids: Vec<i32> = descendant_closures
.iter()
.map(|ec| ec.descendant_id)
.collect();
if descendant_ids.is_empty() {
tracing::warn!("No descendants found for root directory");
return Ok(entries_map);
}
// Fetch all entries
let entries = entities::entry::Entity::find()
.filter(entities::entry::Column::Id.is_in(descendant_ids))
.all(db)
.await
.map_err(|e| ActionError::Internal(format!("Failed to query entries: {}", e)))?;
tracing::debug!("Found {} descendant entries", entries.len());
// Resolve full paths for all entries
for entry in entries {
let full_path = PathResolver::get_full_path(db, entry.id)
.await
.unwrap_or_else(|_| PathBuf::from(&entry.name));
entries_map.insert(full_path.clone(), (entry, full_path));
}
} else {
// Path is within a location but not the root - need to find the entry ID for this path
// by traversing from the location root
tracing::debug!("Path is subdirectory of location, traversing from root");
let relative_path = root_path.strip_prefix(&location_path).map_err(|e| {
ActionError::Internal(format!("Failed to compute relative path: {}", e))
})?;
// Get path components
let components: Vec<&str> = relative_path
.components()
.filter_map(|c| c.as_os_str().to_str())
.collect();
if components.is_empty() {
// This is the location root, use location.entry_id
let root_entry_id = location.entry_id;
// Get all descendants using closure table
let descendant_closures = entities::entry_closure::Entity::find()
.filter(entities::entry_closure::Column::AncestorId.eq(root_entry_id))
.all(db)
.await
.map_err(|e| {
ActionError::Internal(format!("Failed to query entry closure: {}", e))
})?;
let descendant_ids: Vec<i32> = descendant_closures
.iter()
.map(|ec| ec.descendant_id)
.collect();
let entries = entities::entry::Entity::find()
.filter(entities::entry::Column::Id.is_in(descendant_ids))
.all(db)
.await
.map_err(|e| {
ActionError::Internal(format!("Failed to query entries: {}", e))
})?;
for entry in entries {
let full_path = PathResolver::get_full_path(db, entry.id)
.await
.unwrap_or_else(|_| PathBuf::from(&entry.name));
entries_map.insert(full_path.clone(), (entry, full_path));
}
} else {
// Traverse from location root to find the target directory
let mut current_parent_id = Some(location.entry_id);
for component in &components {
if let Some(parent_id) = current_parent_id {
// Find child with this name
let child = entities::entry::Entity::find()
.filter(entities::entry::Column::ParentId.eq(parent_id))
.filter(entities::entry::Column::Name.eq(*component))
.one(db)
.await
.map_err(|e| {
ActionError::Internal(format!("Failed to query entry: {}", e))
})?;
current_parent_id = child.as_ref().map(|c| c.id);
} else {
break;
}
}
if let Some(target_entry_id) = current_parent_id {
// Get all descendants of this subdirectory
let descendant_closures = entities::entry_closure::Entity::find()
.filter(entities::entry_closure::Column::AncestorId.eq(target_entry_id))
.all(db)
.await
.map_err(|e| {
ActionError::Internal(format!("Failed to query entry closure: {}", e))
})?;
let descendant_ids: Vec<i32> = descendant_closures
.iter()
.map(|ec| ec.descendant_id)
.collect();
let entries = entities::entry::Entity::find()
.filter(entities::entry::Column::Id.is_in(descendant_ids))
.all(db)
.await
.map_err(|e| {
ActionError::Internal(format!("Failed to query entries: {}", e))
})?;
for entry in entries {
let full_path = PathResolver::get_full_path(db, entry.id)
.await
.unwrap_or_else(|_| PathBuf::from(&entry.name));
entries_map.insert(full_path.clone(), (entry, full_path));
}
}
}
tracing::debug!("Found {} entries in database", entries_map.len());
}
Ok(entries_map)
}
/// Compare ephemeral index with database entries
async fn compare_indexes(
&self,
fs_entries: HashMap<PathBuf, crate::ops::indexing::entry::EntryMetadata>,
mut db_entries: HashMap<PathBuf, (entities::entry::Model, PathBuf)>,
root_path: &Path,
) -> Result<IntegrityReport, ActionError> {
tracing::debug!("Comparing filesystem and database indexes");
let mut report = IntegrityReport::new();
tracing::debug!(
"Comparing {} filesystem entries with {} database entries",
fs_entries.len(),
db_entries.len()
);
// Remove the root path itself from db_entries - the ephemeral indexer doesn't
// create an entry for the root directory it's scanning, only its contents
db_entries.remove(root_path);
// Count files and directories
for (_path, metadata) in &fs_entries {
match metadata.kind {
EntryKind::File => report.filesystem_file_count += 1,
EntryKind::Directory => report.filesystem_dir_count += 1,
_ => {}
}
}
for (_path, (entry, _)) in &db_entries {
let kind = entry.entry_kind();
match kind {
entities::entry::EntryKind::File => report.database_file_count += 1,
entities::entry::EntryKind::Directory => report.database_dir_count += 1,
_ => {}
}
}
// Build sets for comparison
// On case-insensitive filesystems (macOS), normalize paths to lowercase for comparison
#[cfg(target_os = "macos")]
let normalize_path = |pb: &PathBuf| -> String { pb.to_string_lossy().to_lowercase() };
#[cfg(not(target_os = "macos"))]
let normalize_path = |pb: &PathBuf| -> String { pb.to_string_lossy().to_string() };
// Create normalized path maps for case-insensitive comparison on macOS
let fs_normalized: HashMap<String, PathBuf> = fs_entries
.keys()
.map(|p| (normalize_path(p), p.clone()))
.collect();
let db_normalized: HashMap<String, PathBuf> = db_entries
.keys()
.map(|p| (normalize_path(p), p.clone()))
.collect();
let fs_paths: HashSet<String> = fs_normalized.keys().cloned().collect();
let db_paths: HashSet<String> = db_normalized.keys().cloned().collect();
// Find missing from index (in filesystem but not in DB)
for norm_path in fs_paths.difference(&db_paths) {
let path = &fs_normalized[norm_path];
report
.missing_from_index
.push(IntegrityDifference::missing_from_index(path.clone()));
}
// Find stale in index (in DB but not on filesystem)
for norm_path in db_paths.difference(&fs_paths) {
let path = &db_normalized[norm_path];
report
.stale_in_index
.push(IntegrityDifference::stale_in_index(path.clone()));
}
// Find metadata mismatches (in both but with different data)
for norm_path in fs_paths.intersection(&db_paths) {
let fs_path = &fs_normalized[norm_path];
let db_path = &db_normalized[norm_path];
if let (Some(fs_meta), Some((db_entry, _))) =
(fs_entries.get(fs_path), db_entries.get(db_path))
{
// Check size
let fs_size = fs_meta.size;
let db_size = db_entry.size as u64;
if fs_size != db_size {
report
.metadata_mismatches
.push(IntegrityDifference::size_mismatch_with_debug(
fs_path.clone(),
fs_size,
db_size,
db_entry.id,
db_entry.name.clone(),
));
}
// Check modified time (allow 1 second tolerance for filesystem precision)
if let Some(fs_modified) = fs_meta.modified {
if let Ok(fs_duration) = fs_modified.duration_since(std::time::UNIX_EPOCH) {
let fs_secs = fs_duration.as_secs() as i64;
let db_secs = db_entry.modified_at.timestamp();
if (fs_secs - db_secs).abs() > 1 {
report.metadata_mismatches.push(
IntegrityDifference::modified_time_mismatch(
fs_path.clone(),
format!("{}", fs_secs),
format!("{}", db_secs),
),
);
}
}
}
// Check inode if available
if let (Some(fs_inode), Some(db_inode)) = (fs_meta.inode, db_entry.inode) {
if fs_inode != db_inode as u64 {
report.metadata_mismatches.push(IntegrityDifference {
path: fs_path.clone(),
issue_type: IssueType::InodeMismatch,
expected: Some(format!("{}", fs_inode)),
actual: Some(format!("{}", db_inode)),
description: format!("Inode mismatch for {}", fs_path.display()),
db_entry_id: Some(db_entry.id),
db_entry_name: Some(db_entry.name.clone()),
});
}
}
}
}
tracing::debug!(
"Comparison complete: {} missing, {} stale, {} metadata mismatches",
report.missing_from_index.len(),
report.stale_in_index.len(),
report.metadata_mismatches.len()
);
Ok(report)
}
}
crate::register_library_action!(IndexVerifyAction, "indexing.verify");

View File

@@ -0,0 +1,52 @@
//! Input types for index verification
use serde::{Deserialize, Serialize};
use specta::Type;
use std::path::PathBuf;
#[derive(Debug, Clone, Serialize, Deserialize, Type)]
pub struct IndexVerifyInput {
/// Path to verify (can be a location root or subdirectory)
pub path: PathBuf,
/// Whether to check content hashes (slower but more thorough)
#[serde(default)]
pub verify_content: bool,
/// Whether to include detailed file-by-file comparison
#[serde(default = "default_true")]
pub detailed_report: bool,
/// Whether to fix issues automatically (future feature)
#[serde(default)]
pub auto_fix: bool,
}
fn default_true() -> bool {
true
}
impl IndexVerifyInput {
pub fn new(path: PathBuf) -> Self {
Self {
path,
verify_content: false,
detailed_report: true,
auto_fix: false,
}
}
pub fn validate(&self) -> Result<(), Vec<String>> {
let mut errors = Vec::new();
if !self.path.exists() {
errors.push(format!("Path does not exist: {}", self.path.display()));
}
if errors.is_empty() {
Ok(())
} else {
Err(errors)
}
}
}

View File

@@ -0,0 +1,12 @@
//! Index Integrity Verification
//!
//! Verifies the integrity of the Spacedrive index by comparing the database state
//! with the actual filesystem state for a given path.
pub mod action;
pub mod input;
pub mod output;
pub use action::IndexVerifyAction;
pub use input::IndexVerifyInput;
pub use output::{IndexVerifyOutput, IntegrityDifference, IntegrityReport};

View File

@@ -0,0 +1,258 @@
//! Output types for index verification
use serde::{Deserialize, Serialize};
use specta::Type;
use std::path::PathBuf;
/// Result of index integrity verification
#[derive(Debug, Clone, Serialize, Deserialize, Type)]
pub struct IndexVerifyOutput {
/// Overall integrity status
pub is_valid: bool,
/// Integrity report with detailed findings
pub report: IntegrityReport,
/// Path that was verified
pub path: PathBuf,
/// Time taken to verify (seconds)
pub duration_secs: f64,
}
/// Detailed integrity report
#[derive(Debug, Clone, Serialize, Deserialize, Type)]
pub struct IntegrityReport {
/// Total files found on filesystem
pub filesystem_file_count: usize,
/// Total files in database index
pub database_file_count: usize,
/// Total directories found on filesystem
pub filesystem_dir_count: usize,
/// Total directories in database index
pub database_dir_count: usize,
/// Files missing from index (on filesystem but not in DB)
pub missing_from_index: Vec<IntegrityDifference>,
/// Stale entries in index (in DB but not on filesystem)
pub stale_in_index: Vec<IntegrityDifference>,
/// Entries with incorrect metadata
pub metadata_mismatches: Vec<IntegrityDifference>,
/// Entries with incorrect parent relationships
pub hierarchy_errors: Vec<IntegrityDifference>,
/// Summary statistics
pub summary: String,
}
impl IntegrityReport {
pub fn new() -> Self {
Self {
filesystem_file_count: 0,
database_file_count: 0,
filesystem_dir_count: 0,
database_dir_count: 0,
missing_from_index: Vec::new(),
stale_in_index: Vec::new(),
metadata_mismatches: Vec::new(),
hierarchy_errors: Vec::new(),
summary: String::new(),
}
}
pub fn is_valid(&self) -> bool {
self.missing_from_index.is_empty()
&& self.stale_in_index.is_empty()
&& self.metadata_mismatches.is_empty()
&& self.hierarchy_errors.is_empty()
}
pub fn total_issues(&self) -> usize {
self.missing_from_index.len()
+ self.stale_in_index.len()
+ self.metadata_mismatches.len()
+ self.hierarchy_errors.len()
}
pub fn generate_summary(&mut self) {
if self.is_valid() {
self.summary = format!(
"✅ Index is valid! {} files and {} directories match filesystem perfectly.",
self.filesystem_file_count, self.filesystem_dir_count
);
} else {
let mut parts = Vec::new();
if !self.missing_from_index.is_empty() {
parts.push(format!(
"{} missing from index",
self.missing_from_index.len()
));
}
if !self.stale_in_index.is_empty() {
parts.push(format!("{} stale entries", self.stale_in_index.len()));
}
if !self.metadata_mismatches.is_empty() {
parts.push(format!(
"{} metadata mismatches",
self.metadata_mismatches.len()
));
}
if !self.hierarchy_errors.is_empty() {
parts.push(format!("{} hierarchy errors", self.hierarchy_errors.len()));
}
self.summary = format!(
"❌ Index has diverged: {}. Total issues: {}",
parts.join(", "),
self.total_issues()
);
}
}
}
impl Default for IntegrityReport {
fn default() -> Self {
Self::new()
}
}
/// Represents a single integrity difference
#[derive(Debug, Clone, Serialize, Deserialize, Type)]
pub struct IntegrityDifference {
/// Path relative to verification root
pub path: PathBuf,
/// Type of issue
pub issue_type: IssueType,
/// Expected value (from filesystem or correct state)
pub expected: Option<String>,
/// Actual value (from database)
pub actual: Option<String>,
/// Human-readable description
pub description: String,
/// Debug: database entry ID for investigation
#[serde(skip_serializing_if = "Option::is_none")]
pub db_entry_id: Option<i32>,
/// Debug: database entry name
#[serde(skip_serializing_if = "Option::is_none")]
pub db_entry_name: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Type)]
#[serde(tag = "type")]
pub enum IssueType {
MissingFromIndex,
StaleInIndex,
SizeMismatch,
ModifiedTimeMismatch,
InodeMismatch,
ExtensionMismatch,
ParentMismatch,
KindMismatch,
}
impl IntegrityDifference {
pub fn missing_from_index(path: PathBuf) -> Self {
Self {
description: format!(
"File exists on filesystem but not in index: {}",
path.display()
),
path,
issue_type: IssueType::MissingFromIndex,
expected: Some("Indexed".to_string()),
actual: Some("Not indexed".to_string()),
db_entry_id: None,
db_entry_name: None,
}
}
pub fn stale_in_index(path: PathBuf) -> Self {
Self {
description: format!(
"Entry exists in index but not on filesystem: {}",
path.display()
),
path,
issue_type: IssueType::StaleInIndex,
expected: Some("Not indexed".to_string()),
actual: Some("Indexed".to_string()),
db_entry_id: None,
db_entry_name: None,
}
}
pub fn size_mismatch(path: PathBuf, expected: u64, actual: u64) -> Self {
Self {
description: format!("Size mismatch for {}", path.display()),
path,
issue_type: IssueType::SizeMismatch,
expected: Some(format!("{} bytes", expected)),
actual: Some(format!("{} bytes", actual)),
db_entry_id: None,
db_entry_name: None,
}
}
pub fn size_mismatch_with_debug(
path: PathBuf,
expected: u64,
actual: u64,
db_id: i32,
db_name: String,
) -> Self {
Self {
description: format!(
"Size mismatch for {} (db_id: {}, db_name: {})",
path.display(),
db_id,
db_name
),
path,
issue_type: IssueType::SizeMismatch,
expected: Some(format!("{} bytes", expected)),
actual: Some(format!("{} bytes", actual)),
db_entry_id: Some(db_id),
db_entry_name: Some(db_name),
}
}
pub fn modified_time_mismatch(path: PathBuf, expected: String, actual: String) -> Self {
Self {
description: format!("Modified time mismatch for {}", path.display()),
path,
issue_type: IssueType::ModifiedTimeMismatch,
expected: Some(expected),
actual: Some(actual),
db_entry_id: None,
db_entry_name: None,
}
}
pub fn parent_mismatch(path: PathBuf, expected_parent: String, actual_parent: String) -> Self {
Self {
description: format!("Parent mismatch for {}", path.display()),
path,
issue_type: IssueType::ParentMismatch,
expected: Some(expected_parent),
actual: Some(actual_parent),
db_entry_id: None,
db_entry_name: None,
}
}
}

211
docs/cli/index-verify.md Normal file
View File

@@ -0,0 +1,211 @@
# Index Integrity Verification Command
## Overview
The `index verify` command performs a comprehensive integrity check of the Spacedrive index for any given path. It compares the actual filesystem state with what's stored in the database and reports any discrepancies.
## Usage
```bash
# Basic usage - verify a location
sd-cli index verify /path/to/check
# Verify with content hash checking (slower but more thorough)
sd-cli index verify /path/to/check --verify-content
# Verify without detailed output (just summary)
sd-cli index verify /path/to/check --detailed=false
# Future: Auto-fix issues (not yet implemented)
sd-cli index verify /path/to/check --auto-fix
```
## How It Works
The command performs three main steps:
### 1. Ephemeral Indexing
Runs a fresh, in-memory index scan of the filesystem path to capture the current state:
- Discovers all files and directories
- Extracts metadata (size, modified time, inode)
- Optionally generates content hashes (with `--verify-content`)
### 2. Database Query
Queries the Spacedrive database for all indexed entries under the given path:
- Uses closure table for efficient descendant lookup
- Resolves full paths for all entries
- Extracts stored metadata
### 3. Comparison & Reporting
Compares the two datasets and categorizes differences:
| Issue Type | Description |
|------------|-------------|
| **Missing from Index** | Files exist on filesystem but not in database |
| **Stale in Index** | Files exist in database but not on filesystem |
| **Metadata Mismatch** | Files exist in both but with incorrect size/time/inode |
| **Hierarchy Error** | Files have incorrect parent relationships |
## Output Format
```
╔══════════════════════════════════════════════════════════════╗
║ INDEX INTEGRITY VERIFICATION REPORT ║
╠══════════════════════════════════════════════════════════════╣
║ Path: /Users/jamie/Documents ║
║ Duration: 2.34s ║
╠══════════════════════════════════════════════════════════════╣
║ Filesystem: 1247 files, 89 directories ║
║ Database: 1245 files, 89 directories ║
╠══════════════════════════════════════════════════════════════╣
║ ❌ STATUS: DIVERGED - 4 issues found ║
╠══════════════════════════════════════════════════════════════╣
║ ⚠️ Missing from index: 2 ║
║ - /Users/jamie/Documents/new-file.txt ║
║ - /Users/jamie/Documents/another-new.pdf ║
║ 🗑️ Stale in index: 2 ║
║ - /Users/jamie/Documents/deleted-file.txt ║
║ - /Users/jamie/Documents/old/removed.doc ║
╠══════════════════════════════════════════════════════════════╣
║ ❌ Index has diverged: 2 missing, 2 stale. Total: 4 ║
╚══════════════════════════════════════════════════════════════╝
```
## When to Use
### Debugging Index Issues
If you suspect the index is out of sync with the filesystem:
```bash
sd-cli index verify ~/Documents
```
### After Manual File Operations
If you've manually modified files outside Spacedrive:
```bash
# You edited files manually
sd-cli index verify /path/that/changed
```
### Performance Testing
To verify the watcher is working correctly:
```bash
# Make changes, then verify
touch /test/newfile.txt
sleep 1
sd-cli index verify /test
```
### Pre-Sync Validation
Before syncing a library to ensure data integrity:
```bash
sd-cli index verify / # Verify entire library
```
## Exit Codes
| Code | Meaning |
|------|---------|
| 0 | Index is valid (no issues found) |
| 1 | Index has diverged (issues found) |
| 2 | Verification failed (error occurred) |
## Use in Scripts
```bash
#!/bin/bash
# Automated integrity check
if sd-cli index verify /data/important --detailed=false; then
echo "✅ Index is valid"
else
echo "❌ Index has issues - running rescan"
sd-cli location rescan <location-id>
fi
```
## Performance Characteristics
| Path Size | Typical Duration | Memory Usage |
|-----------|------------------|--------------|
| 100 files | <1 second | ~10 MB |
| 1,000 files | 1-3 seconds | ~50 MB |
| 10,000 files | 10-30 seconds | ~200 MB |
| 100,000 files | 1-5 minutes | ~1 GB |
With `--verify-content`:
- Add 50-200% time overhead (depends on file sizes)
- Requires reading file contents for hashing
## Common Use Cases
### 1. Verify Location After Import
```bash
sd-cli location add ~/Photos --name "Photos"
# Wait for indexing to complete
sd-cli index verify ~/Photos
```
### 2. Debug Watcher Issues
```bash
# Monitor filesystem
watch -n 5 'sd-cli index verify /watched/path --detailed=false'
```
### 3. Find Orphaned Entries
```bash
# Check for stale entries
sd-cli index verify / | grep "Stale in index"
```
### 4. Validate After Bulk Operations
```bash
# After moving many files
mv ~/old-location/* ~/new-location/
sd-cli index verify ~/new-location
```
## Troubleshooting
### "Path does not exist"
- Ensure the path is correct and accessible
- Check permissions
### "No entries found in database"
- Path might not be part of a managed location
- Run `sd-cli location list` to see indexed locations
### High number of "Missing from Index"
- Location watcher might be disabled
- Files were added manually without indexing
- Run `sd-cli location rescan <location-id>` to fix
### High number of "Stale in Index"
- Files were deleted manually
- Database not updated
- Consider running cleanup
## API Access
The verification can also be triggered programmatically:
```rust
use sd_core::ops::indexing::verify::{IndexVerifyAction, IndexVerifyInput};
let input = IndexVerifyInput::new(PathBuf::from("/path/to/verify"));
let result = IndexVerifyAction::from_input(input)?
.execute(library, context)
.await?;
if !result.is_valid {
println!("Found {} issues", result.report.total_issues());
}
```
## Future Enhancements
- `--auto-fix`: Automatically repair issues (add missing, remove stale)
- `--watch`: Continuously verify and report drift
- `--json`: Machine-readable output for automation
- `--compare-with <snapshot>`: Compare current state with previous snapshot
- `--export-snapshot`: Save current state for future comparison

View File

@@ -0,0 +1,589 @@
# Additional Test Scenarios for Location Watcher
## Currently Passing ✅
1. Initial indexing
2. File creation
3. File modification
4. Directory creation
5. Nested file creation
6. File renaming (same directory)
7. File moving (different directory)
## Next Priority Tests
### Tier 1: Critical Operations (Should work next)
#### Scenario 8: File Deletion
```rust
println!("\n--- Scenario 8: File Deletion ---");
// Delete a file
let entry_before = harness.verify_entry_exists("initial").await?;
let entry_id = entry_before.id;
harness.delete_file("initial.txt").await?;
harness.wait_for_fs_event(
FsRawEventKind::Remove { path: harness.path("initial.txt") },
30
).await?;
// Verify entry no longer exists
harness.verify_entry_not_exists("initial").await?;
harness.verify_entry_count(6).await?; // One less entry
// Verify entry is actually deleted (not just orphaned)
let entry_still_exists = entities::entry::Entity::find_by_id(entry_id)
.one(harness.library.db().conn())
.await?;
assert!(entry_still_exists.is_none(), "Entry should be deleted from database");
```
**Known Issue**: Currently causes task panic - needs investigation
#### Scenario 9: Directory Deletion (With Contents)
```rust
println!("\n--- Scenario 9: Directory Deletion ---");
// Create directory with multiple files
harness.create_dir("temp").await?;
harness.wait_for_fs_event(
FsRawEventKind::Create { path: harness.path("temp") },
30
).await?;
harness.create_file("temp/file1.txt", "content 1").await?;
harness.create_file("temp/file2.txt", "content 2").await?;
harness.create_file("temp/file3.txt", "content 3").await?;
// Wait for all files to be indexed
tokio::time::sleep(Duration::from_secs(1)).await;
let count_before = count_location_entries(&harness.library, harness.location_id).await?;
// Delete entire directory
harness.delete_dir("temp").await?;
harness.wait_for_fs_event(
FsRawEventKind::Remove { path: harness.path("temp") },
30
).await?;
// Wait for cascade deletion
tokio::time::sleep(Duration::from_millis(500)).await;
// Verify all entries are gone (directory + 3 files = 4 entries)
let count_after = count_location_entries(&harness.library, harness.location_id).await?;
assert_eq!(count_after, count_before - 4, "Should delete directory and all contents");
```
### Tier 2: Complex Rename Operations
#### Scenario 10: Bulk Renames
```rust
println!("\n--- Scenario 10: Bulk Renames ---");
// Create 10 files
for i in 1..=10 {
harness.create_file(&format!("bulk-{}.txt", i), "content").await?;
}
tokio::time::sleep(Duration::from_secs(1)).await;
harness.verify_entry_count(count_before + 10).await?;
// Rename all files rapidly
let start = Instant::now();
for i in 1..=10 {
harness.rename_file(
&format!("bulk-{}.txt", i),
&format!("renamed-bulk-{}.txt", i)
).await?;
}
// Wait for all rename events
tokio::time::sleep(Duration::from_secs(1)).await;
// Verify all renamed correctly
for i in 1..=10 {
harness.verify_entry_exists(&format!("renamed-bulk-{}", i)).await?;
harness.verify_entry_not_exists(&format!("bulk-{}", i)).await?;
}
let duration = start.elapsed();
println!("✓ Renamed 10 files in {:?}", duration);
assert!(duration.as_millis() < 2000, "Bulk renames should complete in <2s");
harness.verify_entry_count(count_before + 10).await?; // Same count!
```
**Success Metric**: 10 renames in <2s, no duplicates
#### Scenario 11: Rename Chain (A→B, B→C)
```rust
println!("\n--- Scenario 11: Rename Chain ---");
harness.create_file("step1.txt", "content").await?;
tokio::time::sleep(Duration::from_millis(600)).await;
let entry_start = harness.verify_entry_exists("step1").await?;
let entry_id = entry_start.id;
// Rapid renames: step1 → step2 → step3
harness.rename_file("step1.txt", "step2.txt").await?;
tokio::time::sleep(Duration::from_millis(50)).await; // Small delay
harness.rename_file("step2.txt", "step3.txt").await?;
tokio::time::sleep(Duration::from_secs(1)).await;
// Verify final state
let entry_end = harness.verify_entry_exists("step3").await?;
assert_eq!(entry_id, entry_end.id, "Entry ID should be preserved through rename chain");
harness.verify_entry_not_exists("step1").await?;
harness.verify_entry_not_exists("step2").await?;
```
**Tests**: Worker's rename chain collapsing logic
### Tier 3: Edge Cases
#### Scenario 12: Hidden Files
```rust
println!("\n--- Scenario 12: Hidden Files ---");
#[cfg(unix)]
{
harness.create_file(".hidden-file", "secret").await?;
tokio::time::sleep(Duration::from_millis(600)).await;
// Hidden files should still be indexed
harness.verify_entry_exists(".hidden-file").await?;
// Rename hidden file
harness.rename_file(".hidden-file", ".hidden-renamed").await?;
tokio::time::sleep(Duration::from_millis(600)).await;
harness.verify_entry_exists(".hidden-renamed").await?;
}
```
#### Scenario 13: Symlink Handling
```rust
println!("\n--- Scenario 13: Symlink Handling ---");
harness.create_file("target.txt", "target content").await?;
tokio::time::sleep(Duration::from_millis(600)).await;
#[cfg(unix)]
{
std::os::unix::fs::symlink(
harness.path("target.txt"),
harness.path("link.txt")
)?;
tokio::time::sleep(Duration::from_millis(600)).await;
// Verify symlink is detected
let entries = get_location_entries(&harness.library, harness.location_id).await?;
let symlink_entry = entries.iter().find(|e| e.name == "link");
assert!(symlink_entry.is_some(), "Symlink should be indexed");
}
```
#### Scenario 14: Rapid File Modifications (Debouncing Test)
```rust
println!("\n--- Scenario 14: Rapid Modifications ---");
harness.create_file("rapidly-changing.txt", "v1").await?;
tokio::time::sleep(Duration::from_millis(600)).await;
let start_count = count_location_entries(&harness.library, harness.location_id).await?;
// Modify the same file 20 times rapidly
for i in 2..=20 {
harness.modify_file("rapidly-changing.txt", &format!("v{}", i)).await?;
tokio::time::sleep(Duration::from_millis(50)).await;
}
// Wait for debouncing to settle
tokio::time::sleep(Duration::from_secs(2)).await;
// Should NOT create 20 entries - should coalesce into updates
let end_count = count_location_entries(&harness.library, harness.location_id).await?;
assert_eq!(end_count, start_count, "Rapid modifications should not create duplicates");
// Verify final content
let entry = harness.verify_entry_exists("rapidly-changing").await?;
// Size should reflect last modification
```
**Tests**: Debouncing and coalescing logic
#### Scenario 15: Same Name After Delete (Recreate)
```rust
println!("\n--- Scenario 15: Recreate Same Filename ---");
harness.create_file("temp-file.txt", "first version").await?;
tokio::time::sleep(Duration::from_millis(600)).await;
let entry_first = harness.verify_entry_exists("temp-file").await?;
let first_id = entry_first.id;
let first_inode = entry_first.inode;
// Delete it
harness.delete_file("temp-file.txt").await?;
tokio::time::sleep(Duration::from_millis(600)).await;
harness.verify_entry_not_exists("temp-file").await?;
// Create new file with same name
harness.create_file("temp-file.txt", "second version different content").await?;
tokio::time::sleep(Duration::from_millis(600)).await;
let entry_second = harness.verify_entry_exists("temp-file").await?;
// Should be a DIFFERENT entry (different ID and inode)
assert_ne!(entry_second.id, first_id, "Should create new entry, not reuse old one");
assert_ne!(entry_second.inode, first_inode, "Should have different inode");
```
**Tests**: Entry identity vs filename - ensures we don't reuse deleted entries
### Tier 4: Performance & Stress Tests
#### Scenario 16: Bulk Create Performance
```rust
println!("\n--- Scenario 16: Bulk Create Performance ---");
let start = Instant::now();
// Create 100 files rapidly
for i in 1..=100 {
harness.create_file(&format!("perf-{}.txt", i), "test content").await?;
}
// Wait for all to be indexed
tokio::time::sleep(Duration::from_secs(3)).await;
let duration = start.elapsed();
let throughput = 100.0 / duration.as_secs_f64();
println!("✓ Created 100 files in {:?}", duration);
println!(" Throughput: {:.2} files/sec", throughput);
// Verify all indexed
for i in 1..=100 {
harness.verify_entry_exists(&format!("perf-{}", i)).await?;
}
// Performance assertions
assert!(throughput > 20.0, "Should handle >20 files/sec even with debouncing");
assert!(duration.as_secs() < 10, "Should complete 100 files in <10s");
```
**Success Metrics**: >20 files/sec, <10s total, >50% coalescing rate
#### Scenario 17: Mixed Operations Chaos Test
```rust
println!("\n--- Scenario 17: Mixed Operations Chaos ---");
// Rapid mixed operations
harness.create_file("chaos1.txt", "1").await?;
harness.create_file("chaos2.txt", "2").await?;
tokio::time::sleep(Duration::from_millis(100)).await;
harness.rename_file("chaos1.txt", "chaos1-renamed.txt").await?;
harness.modify_file("chaos2.txt", "2 modified").await?;
tokio::time::sleep(Duration::from_millis(100)).await;
harness.create_file("chaos3.txt", "3").await?;
harness.delete_file("chaos2.txt").await?; // If deletion works
tokio::time::sleep(Duration::from_millis(100)).await;
harness.rename_file("chaos3.txt", "chaos-final.txt").await?;
// Wait for all operations to settle
tokio::time::sleep(Duration::from_secs(2)).await;
// Verify final state
harness.verify_entry_exists("chaos1-renamed").await?;
harness.verify_entry_exists("chaos-final").await?;
harness.verify_entry_not_exists("chaos1").await?;
harness.verify_entry_not_exists("chaos2").await?; // If deletion works
harness.verify_entry_not_exists("chaos3").await?;
```
**Tests**: Correctness under complex interleaved operations
### Tier 5: Platform-Specific Edge Cases
#### Scenario 18: Special Characters in Filenames
```rust
println!("\n--- Scenario 18: Special Characters ---");
let special_names = vec![
"file with spaces.txt",
"file-with-dashes.txt",
"file_with_underscores.txt",
"file (with parens).txt",
"file[with brackets].txt",
"file{with braces}.txt",
"日本語.txt", // Japanese
"émojis🎉.txt", // Unicode emoji
];
for name in special_names {
harness.create_file(name, "content").await?;
}
tokio::time::sleep(Duration::from_secs(1)).await;
// Verify all were indexed correctly
for name in special_names {
let stem = Path::new(name).file_stem().unwrap().to_str().unwrap();
harness.verify_entry_exists(stem).await?;
}
```
#### Scenario 19: Very Long Filenames
```rust
println!("\n--- Scenario 19: Long Filenames ---");
// Create file with 200-character name
let long_name = format!("{}.txt", "a".repeat(200));
harness.create_file(&long_name, "content").await?;
tokio::time::sleep(Duration::from_millis(600)).await;
let stem = "a".repeat(200);
harness.verify_entry_exists(&stem).await?;
```
#### Scenario 20: Deep Nesting
```rust
println!("\n--- Scenario 20: Deep Directory Nesting ---");
// Create 20-level deep directory structure
let mut path = String::new();
for i in 1..=20 {
if !path.is_empty() {
path.push('/');
}
path.push_str(&format!("level{}", i));
}
harness.create_dir(&path).await?;
tokio::time::sleep(Duration::from_millis(600)).await;
// Create file at deepest level
harness.create_file(&format!("{}/deep-file.txt", path), "deep content").await?;
tokio::time::sleep(Duration::from_millis(600)).await;
harness.verify_entry_exists("deep-file").await?;
```
### Tier 6: Rename Edge Cases
#### Scenario 21: Rename While Modifying
```rust
println!("\n--- Scenario 21: Rename During Modification ---");
harness.create_file("busy.txt", "initial").await?;
tokio::time::sleep(Duration::from_millis(600)).await;
let entry_before = harness.verify_entry_exists("busy").await?;
// Modify and rename almost simultaneously
harness.modify_file("busy.txt", "modified content").await?;
tokio::time::sleep(Duration::from_millis(50)).await; // Very short delay
harness.rename_file("busy.txt", "busy-renamed.txt").await?;
tokio::time::sleep(Duration::from_secs(1)).await;
// Verify final state
let entry_after = harness.verify_entry_exists("busy-renamed").await?;
assert_eq!(entry_after.id, entry_before.id, "Should preserve entry ID");
assert_eq!(entry_after.size, 16, "Should have updated size");
```
#### Scenario 22: Rename to Existing Name (Overwrite)
```rust
println!("\n--- Scenario 22: Rename Overwrite ---");
harness.create_file("source.txt", "source content").await?;
harness.create_file("target.txt", "target content").await?;
tokio::time::sleep(Duration::from_millis(600)).await;
let source_entry = harness.verify_entry_exists("source").await?;
let target_entry = harness.verify_entry_exists("target").await?;
// Rename source to target (overwrites target on filesystem)
harness.rename_file("source.txt", "target.txt").await?;
tokio::time::sleep(Duration::from_secs(1)).await;
// target.txt should now have source's content and ID
let final_entry = harness.verify_entry_exists("target").await?;
// On macOS/Unix, the source file's inode is kept
assert_eq!(final_entry.inode, source_entry.inode, "Should keep source's inode");
harness.verify_entry_not_exists("source").await?;
```
#### Scenario 23: Move to Non-Existent Directory
```rust
println!("\n--- Scenario 23: Move to Non-Existent Parent ---");
harness.create_file("orphan.txt", "content").await?;
tokio::time::sleep(Duration::from_millis(600)).await;
// Try to move to directory that doesn't exist
// macOS will fail the rename operation, should handle gracefully
let result = tokio::fs::rename(
harness.path("orphan.txt"),
harness.path("nonexistent/orphan.txt")
).await;
assert!(result.is_err(), "Should fail to move to non-existent directory");
// Entry should still exist at original location
harness.verify_entry_exists("orphan").await?;
```
### Tier 7: Performance Validation
#### Scenario 24: Event Coalescing Rate
```rust
println!("\n--- Scenario 24: Coalescing Validation ---");
// Create 100 files in same directory (should heavily coalesce)
for i in 1..=100 {
harness.create_file(&format!("coalesce-test-{}.txt", i), "c").await?;
}
tokio::time::sleep(Duration::from_secs(2)).await;
// Get worker metrics
if let Some(metrics) = harness.core.services.location_watcher
.get_location_metrics(harness.location_id)
.await
{
let coalescing_rate = metrics.coalescing_rate();
println!(" Coalescing rate: {:.2}%", coalescing_rate * 100.0);
println!(" Events processed: {}", metrics.events_processed());
println!(" Batches sent: {}", metrics.batches_sent());
println!(" Avg batch size: {:.2}", metrics.avg_batch_size());
assert!(coalescing_rate > 0.3, "Should coalesce >30% for bulk creates in same dir");
}
```
#### Scenario 25: Memory Leak Detection
```rust
println!("\n--- Scenario 25: Memory Stability ---");
// Create and delete 1000 files in a loop
for round in 1..=10 {
for i in 1..=100 {
harness.create_file(&format!("temp-{}.txt", i), "content").await?;
}
tokio::time::sleep(Duration::from_millis(600)).await;
for i in 1..=100 {
harness.delete_file(&format!("temp-{}.txt", i)).await?;
}
tokio::time::sleep(Duration::from_millis(600)).await;
println!(" Round {} complete", round);
}
// Check that buffers were properly cleaned up
if let Some(metrics) = harness.core.services.location_watcher
.get_location_metrics(harness.location_id)
.await
{
// Queue should be empty or near-empty
assert!(metrics.current_queue_depth() < 100, "Queue should be drained");
}
```
### Tier 8: Failure Recovery
#### Scenario 26: Filesystem Race Conditions
```rust
println!("\n--- Scenario 26: File Disappears During Processing ---");
// Create file
harness.create_file("disappearing.txt", "content").await?;
// Immediately delete it (before watcher processes)
tokio::time::sleep(Duration::from_millis(50)).await;
harness.delete_file("disappearing.txt").await?;
// Wait for processing
tokio::time::sleep(Duration::from_secs(1)).await;
// Should handle gracefully (no crash, no stale entry)
let entries = get_location_entries(&harness.library, harness.location_id).await?;
assert!(!entries.iter().any(|e| e.name == "disappearing"), "Should not have stale entry");
```
#### Scenario 27: Permission Changes
```rust
println!("\n--- Scenario 27: Permission Changes ---");
#[cfg(unix)]
{
harness.create_file("restricted.txt", "content").await?;
tokio::time::sleep(Duration::from_millis(600)).await;
// Make file read-only
let path = harness.path("restricted.txt");
let mut perms = tokio::fs::metadata(&path).await?.permissions();
perms.set_readonly(true);
tokio::fs::set_permissions(&path, perms).await?;
tokio::time::sleep(Duration::from_millis(600)).await;
// Verify file is still indexed (permission change should be detected)
let entry = harness.verify_entry_exists("restricted").await?;
// Check if permissions field is updated in database
}
```
## Testing Strategy
### Quick Smoke Test Suite (Run Often)
- Scenarios 1-9: Core functionality
- Run time: ~20 seconds
### Full Functional Test Suite (Run Before Commit)
- Scenarios 1-23: All functionality + edge cases
- Run time: ~2 minutes
### Performance Benchmark Suite (Run Weekly)
- Scenarios 24-25: Performance validation
- Compare metrics against baseline
- Run time: ~5 minutes
### Stress Test Suite (Run Before Release)
- Scenarios 26-27: Failure recovery
- 10,000+ file operations
- Multi-hour soak tests
- Run time: Hours
## Implementation Priority
1. **Immediate**: Scenario 8 (File Deletion) - Currently broken, high priority
2. **Next**: Scenario 9 (Directory Deletion) - Related to #8
3. **Then**: Scenarios 10-11 (Bulk renames, chains) - Validate the rename fix
4. **Later**: Scenarios 12-23 (Edge cases) - Comprehensive coverage
5. **Eventually**: Scenarios 24-27 (Performance, stress) - Quality assurance
## Success Criteria
For the watcher to be considered **production-ready**:
- ✅ All Tier 1-2 scenarios pass (critical operations)
- ✅ 90% of Tier 3 scenarios pass (edge cases)
- ✅ Performance metrics within 20% of v1
- ✅ No memory leaks in 24-hour soak test
- ✅ No crashes or panics under normal load
- ✅ Cross-platform validation (macOS, Linux, Windows)
## Current Score: 7/27 Scenarios Implemented ✅
That's 26% coverage. Let's get to 100%! 🚀