mirror of
https://github.com/spacedriveapp/spacedrive.git
synced 2026-02-20 07:37:26 -05:00
feat: Add index integrity verification command to CLI
- Introduced `index verify` command to perform comprehensive integrity checks on the Spacedrive index. - Implemented functionality to compare filesystem state with database entries, reporting discrepancies such as missing files, stale entries, and metadata mismatches. - Added detailed output options for verification results, including a summary report of findings. - Created associated input and output types for verification actions, enhancing the overall integrity management of the indexing system. - Documented usage and examples in the CLI documentation for user guidance.
This commit is contained in:
@@ -7,6 +7,7 @@ use sd_core::{
|
||||
ops::indexing::{
|
||||
input::IndexInput,
|
||||
job::{IndexMode, IndexPersistence, IndexScope},
|
||||
verify::input::IndexVerifyInput,
|
||||
},
|
||||
};
|
||||
|
||||
@@ -139,3 +140,32 @@ impl BrowseArgs {
|
||||
.with_persistence(IndexPersistence::Ephemeral))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Args, Debug, Clone)]
|
||||
pub struct IndexVerifyArgs {
|
||||
/// Path to verify (can be location root or subdirectory)
|
||||
pub path: PathBuf,
|
||||
|
||||
/// Verify content hashes (slower but more thorough)
|
||||
#[arg(long, default_value_t = false)]
|
||||
pub verify_content: bool,
|
||||
|
||||
/// Show detailed file-by-file comparison
|
||||
#[arg(long, default_value_t = true)]
|
||||
pub detailed: bool,
|
||||
|
||||
/// Automatically fix issues (not yet implemented)
|
||||
#[arg(long, default_value_t = false)]
|
||||
pub auto_fix: bool,
|
||||
}
|
||||
|
||||
impl IndexVerifyArgs {
|
||||
pub fn to_input(&self) -> IndexVerifyInput {
|
||||
IndexVerifyInput {
|
||||
path: self.path.clone(),
|
||||
verify_content: self.verify_content,
|
||||
detailed_report: self.detailed,
|
||||
auto_fix: self.auto_fix,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,6 +18,8 @@ pub enum IndexCmd {
|
||||
QuickScan(QuickScanArgs),
|
||||
/// Browse a path without adding as location
|
||||
Browse(BrowseArgs),
|
||||
/// Verify index integrity for a path
|
||||
Verify(IndexVerifyArgs),
|
||||
}
|
||||
|
||||
pub async fn run(ctx: &Context, cmd: IndexCmd) -> Result<()> {
|
||||
@@ -87,6 +89,149 @@ pub async fn run(ctx: &Context, cmd: IndexCmd) -> Result<()> {
|
||||
println!("Browse request submitted");
|
||||
});
|
||||
}
|
||||
IndexCmd::Verify(args) => {
|
||||
let input = args.to_input();
|
||||
let out: sd_core::ops::indexing::verify::output::IndexVerifyOutput =
|
||||
execute_action!(ctx, input);
|
||||
|
||||
print_output!(
|
||||
ctx,
|
||||
&out,
|
||||
|result: &sd_core::ops::indexing::verify::output::IndexVerifyOutput| {
|
||||
println!("\n╔══════════════════════════════════════════════════════════════╗");
|
||||
println!("║ INDEX INTEGRITY VERIFICATION REPORT ║");
|
||||
println!("╠══════════════════════════════════════════════════════════════╣");
|
||||
println!(
|
||||
"║ Path: {:60} ║",
|
||||
result
|
||||
.path
|
||||
.display()
|
||||
.to_string()
|
||||
.chars()
|
||||
.take(60)
|
||||
.collect::<String>()
|
||||
);
|
||||
println!("║ Duration: {:.2}s {:49} ║", result.duration_secs, "");
|
||||
println!("╠══════════════════════════════════════════════════════════════╣");
|
||||
|
||||
let report = &result.report;
|
||||
|
||||
println!(
|
||||
"║ Filesystem: {} files, {} directories {:23} ║",
|
||||
report.filesystem_file_count, report.filesystem_dir_count, ""
|
||||
);
|
||||
println!(
|
||||
"║ Database: {} files, {} directories {:23} ║",
|
||||
report.database_file_count, report.database_dir_count, ""
|
||||
);
|
||||
println!("╠══════════════════════════════════════════════════════════════╣");
|
||||
|
||||
if result.is_valid {
|
||||
println!("║ ✅ STATUS: VALID - Index matches filesystem perfectly! ║");
|
||||
} else {
|
||||
println!(
|
||||
"║ ❌ STATUS: DIVERGED - {} issues found {:24} ║",
|
||||
report.total_issues(),
|
||||
""
|
||||
);
|
||||
println!(
|
||||
"╠══════════════════════════════════════════════════════════════╣"
|
||||
);
|
||||
|
||||
if !report.missing_from_index.is_empty() {
|
||||
println!(
|
||||
"║ ⚠️ Missing from index: {} {:33} ║",
|
||||
report.missing_from_index.len(),
|
||||
""
|
||||
);
|
||||
if args.detailed {
|
||||
for diff in report.missing_from_index.iter().take(5) {
|
||||
let path_str = diff.path.display().to_string();
|
||||
if path_str.len() <= 58 {
|
||||
println!("║ - {:58} ║", path_str);
|
||||
} else {
|
||||
println!(
|
||||
"║ - ...{:55} ║",
|
||||
&path_str[path_str.len().saturating_sub(55)..]
|
||||
);
|
||||
}
|
||||
}
|
||||
if report.missing_from_index.len() > 5 {
|
||||
println!(
|
||||
"║ ... and {} more {:40} ║",
|
||||
report.missing_from_index.len() - 5,
|
||||
""
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !report.stale_in_index.is_empty() {
|
||||
println!(
|
||||
"║ 🗑️ Stale in index: {} {:36} ║",
|
||||
report.stale_in_index.len(),
|
||||
""
|
||||
);
|
||||
if args.detailed {
|
||||
for diff in report.stale_in_index.iter().take(5) {
|
||||
let path_str = diff.path.display().to_string();
|
||||
if path_str.len() <= 58 {
|
||||
println!("║ - {:58} ║", path_str);
|
||||
} else {
|
||||
println!(
|
||||
"║ - ...{:55} ║",
|
||||
&path_str[path_str.len().saturating_sub(55)..]
|
||||
);
|
||||
}
|
||||
}
|
||||
if report.stale_in_index.len() > 5 {
|
||||
println!(
|
||||
"║ ... and {} more {:40} ║",
|
||||
report.stale_in_index.len() - 5,
|
||||
""
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !report.metadata_mismatches.is_empty() {
|
||||
println!(
|
||||
"║ ⚙️ Metadata mismatches: {} {:31} ║",
|
||||
report.metadata_mismatches.len(),
|
||||
""
|
||||
);
|
||||
if args.detailed {
|
||||
for diff in &report.metadata_mismatches {
|
||||
println!(
|
||||
"║ - {:?}: {:?} -> {:?} {:20} ║",
|
||||
diff.issue_type,
|
||||
diff.expected.as_deref().unwrap_or("?"),
|
||||
diff.actual.as_deref().unwrap_or("?"),
|
||||
""
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !report.hierarchy_errors.is_empty() {
|
||||
println!(
|
||||
"║ 🌳 Hierarchy errors: {} {:34} ║",
|
||||
report.hierarchy_errors.len(),
|
||||
""
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
println!("╠══════════════════════════════════════════════════════════════╣");
|
||||
println!(
|
||||
"║ {}{:59} ║",
|
||||
if result.is_valid { "✅ " } else { "❌ " },
|
||||
report.summary.chars().take(59).collect::<String>()
|
||||
);
|
||||
println!("╚══════════════════════════════════════════════════════════════╝\n");
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -57,7 +57,7 @@ pub struct LibrarySettings {
|
||||
/// File extensions to ignore during indexing
|
||||
pub ignored_extensions: Vec<String>,
|
||||
|
||||
/// Maximum file size to index (in bytes)
|
||||
/// TODO: ai slop config pls remove this
|
||||
pub max_file_size: Option<u64>,
|
||||
|
||||
/// Whether to automatically track system volumes
|
||||
|
||||
@@ -75,7 +75,7 @@ impl EntryProcessor {
|
||||
|
||||
/// Extract detailed metadata from a path
|
||||
pub async fn extract_metadata(path: &Path) -> Result<EntryMetadata, std::io::Error> {
|
||||
let metadata = tokio::fs::metadata(path).await?;
|
||||
let metadata = tokio::fs::symlink_metadata(path).await?;
|
||||
|
||||
let kind = if metadata.is_dir() {
|
||||
EntryKind::Directory
|
||||
@@ -602,7 +602,7 @@ impl EntryProcessor {
|
||||
existing_id
|
||||
} else {
|
||||
// Create new content identity with deterministic UUID (ready for sync)
|
||||
let file_size = tokio::fs::metadata(path)
|
||||
let file_size = tokio::fs::symlink_metadata(path)
|
||||
.await
|
||||
.map(|m| m.len() as i64)
|
||||
.unwrap_or(0);
|
||||
@@ -900,7 +900,7 @@ impl EntryProcessor {
|
||||
let mut entry_active: entities::entry::ActiveModel = db_entry.into();
|
||||
|
||||
// Update size if it changed
|
||||
if let Ok(metadata) = std::fs::metadata(&entry.path) {
|
||||
if let Ok(metadata) = std::fs::symlink_metadata(&entry.path) {
|
||||
entry_active.size = Set(metadata.len() as i64);
|
||||
|
||||
// Update modified time
|
||||
|
||||
@@ -557,6 +557,12 @@ impl IndexerJob {
|
||||
Self::new(IndexerJobConfig::ui_navigation(location_id, path))
|
||||
}
|
||||
|
||||
/// Set the ephemeral index storage (must be called before dispatching for ephemeral jobs)
|
||||
/// This allows external code to maintain a reference to the same storage the job uses
|
||||
pub fn set_ephemeral_index(&mut self, index: Arc<RwLock<EphemeralIndex>>) {
|
||||
self.ephemeral_index = Some(index);
|
||||
}
|
||||
|
||||
/// Create an ephemeral browsing job (no database writes)
|
||||
pub fn ephemeral_browse(path: SdPath, scope: IndexScope) -> Self {
|
||||
Self::new(IndexerJobConfig::ephemeral_browse(path, scope))
|
||||
|
||||
@@ -23,6 +23,7 @@ pub mod progress;
|
||||
pub mod responder;
|
||||
pub mod rules;
|
||||
pub mod state;
|
||||
pub mod verify;
|
||||
|
||||
// Re-exports for convenience
|
||||
pub use action::IndexingAction;
|
||||
@@ -42,6 +43,7 @@ pub use rules::{
|
||||
RulerDecision,
|
||||
};
|
||||
pub use state::{IndexPhase, IndexerProgress, IndexerState, IndexerStats};
|
||||
pub use verify::{IndexVerifyAction, IndexVerifyInput, IndexVerifyOutput, IntegrityReport};
|
||||
|
||||
// Rules system will be integrated here in the future
|
||||
// pub mod rules;
|
||||
|
||||
@@ -426,7 +426,7 @@ impl IndexPersistence for EphemeralPersistence {
|
||||
cas_id: String,
|
||||
) -> JobResult<()> {
|
||||
// Get file size
|
||||
let file_size = tokio::fs::metadata(path)
|
||||
let file_size = tokio::fs::symlink_metadata(path)
|
||||
.await
|
||||
.map(|m| m.len())
|
||||
.unwrap_or(0);
|
||||
|
||||
@@ -189,7 +189,7 @@ pub async fn run_processing_phase(
|
||||
// Add to seen_paths for delete detection (important for resumed jobs)
|
||||
state.seen_paths.insert(entry.path.clone());
|
||||
// Get metadata for change detection
|
||||
let metadata = match std::fs::metadata(&entry.path) {
|
||||
let metadata = match std::fs::symlink_metadata(&entry.path) {
|
||||
Ok(m) => m,
|
||||
Err(e) => {
|
||||
ctx.add_non_critical_error(format!(
|
||||
|
||||
543
core/src/ops/indexing/verify/action.rs
Normal file
543
core/src/ops/indexing/verify/action.rs
Normal file
@@ -0,0 +1,543 @@
|
||||
//! Index integrity verification action
|
||||
|
||||
use super::{input::IndexVerifyInput, output::*};
|
||||
use crate::{
|
||||
context::CoreContext,
|
||||
domain::addressing::SdPath,
|
||||
infra::{
|
||||
action::{error::ActionError, LibraryAction},
|
||||
db::entities,
|
||||
},
|
||||
ops::indexing::{
|
||||
entry::EntryProcessor,
|
||||
job::{
|
||||
EphemeralIndex, IndexMode, IndexPersistence, IndexScope, IndexerJob, IndexerJobConfig,
|
||||
},
|
||||
path_resolver::PathResolver,
|
||||
state::EntryKind,
|
||||
},
|
||||
};
|
||||
use sea_orm::{ColumnTrait, EntityTrait, QueryFilter};
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
path::{Path, PathBuf},
|
||||
sync::Arc,
|
||||
time::Instant,
|
||||
};
|
||||
use tokio::sync::RwLock;
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IndexVerifyAction {
|
||||
input: IndexVerifyInput,
|
||||
}
|
||||
|
||||
impl LibraryAction for IndexVerifyAction {
|
||||
type Input = IndexVerifyInput;
|
||||
type Output = IndexVerifyOutput;
|
||||
|
||||
fn from_input(input: Self::Input) -> Result<Self, String> {
|
||||
// Validate input
|
||||
input
|
||||
.validate()
|
||||
.map_err(|errors| format!("Validation failed: {}", errors.join("; ")))?;
|
||||
|
||||
Ok(Self { input })
|
||||
}
|
||||
|
||||
async fn execute(
|
||||
self,
|
||||
library: Arc<crate::library::Library>,
|
||||
context: Arc<CoreContext>,
|
||||
) -> Result<Self::Output, ActionError> {
|
||||
let start = Instant::now();
|
||||
let path = self.input.path.clone();
|
||||
|
||||
tracing::info!(
|
||||
"Starting index integrity verification for: {}",
|
||||
path.display()
|
||||
);
|
||||
|
||||
// Step 1: Scan filesystem to get current state
|
||||
let fs_entries = self.run_ephemeral_index(&library, &context, &path).await?;
|
||||
|
||||
// Step 2: Query database for existing entries in this path
|
||||
let db_entries = self.query_database_entries(&library, &path).await?;
|
||||
|
||||
// Step 3: Compare and generate report
|
||||
let mut report = self.compare_indexes(fs_entries, db_entries, &path).await?;
|
||||
|
||||
// Generate summary
|
||||
report.generate_summary();
|
||||
|
||||
let duration = start.elapsed();
|
||||
|
||||
tracing::info!(
|
||||
"Index verification complete in {:.2}s: {}",
|
||||
duration.as_secs_f64(),
|
||||
report.summary
|
||||
);
|
||||
|
||||
Ok(IndexVerifyOutput {
|
||||
is_valid: report.is_valid(),
|
||||
report,
|
||||
path,
|
||||
duration_secs: duration.as_secs_f64(),
|
||||
})
|
||||
}
|
||||
|
||||
fn action_kind(&self) -> &'static str {
|
||||
"indexing.verify"
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexVerifyAction {
|
||||
/// Run ephemeral indexing to get current filesystem state using the real IndexerJob
|
||||
async fn run_ephemeral_index(
|
||||
&self,
|
||||
library: &Arc<crate::library::Library>,
|
||||
context: &Arc<CoreContext>,
|
||||
path: &Path,
|
||||
) -> Result<HashMap<PathBuf, crate::ops::indexing::entry::EntryMetadata>, ActionError> {
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
tracing::debug!("Running ephemeral indexer job on {}", path.display());
|
||||
|
||||
// Create ephemeral index storage that we'll share with the job
|
||||
let ephemeral_index = Arc::new(RwLock::new(EphemeralIndex::new(path.to_path_buf())));
|
||||
|
||||
// Subscribe to job events before dispatching
|
||||
let mut event_subscriber = context.events.subscribe();
|
||||
|
||||
// Create indexer job config for ephemeral scanning
|
||||
let config = IndexerJobConfig {
|
||||
location_id: None, // Ephemeral - no location
|
||||
path: SdPath::local(path),
|
||||
mode: IndexMode::Deep, // Full metadata extraction including inodes
|
||||
scope: IndexScope::Recursive,
|
||||
persistence: IndexPersistence::Ephemeral,
|
||||
max_depth: None,
|
||||
rule_toggles: Default::default(),
|
||||
};
|
||||
|
||||
// Create the job and set our ephemeral index storage BEFORE dispatching
|
||||
let mut job = IndexerJob::new(config);
|
||||
job.set_ephemeral_index(ephemeral_index.clone());
|
||||
|
||||
// Dispatch the job
|
||||
let job_handle =
|
||||
library.jobs().dispatch(job).await.map_err(|e| {
|
||||
ActionError::Internal(format!("Failed to dispatch indexer job: {}", e))
|
||||
})?;
|
||||
|
||||
let job_id = job_handle.id().to_string();
|
||||
tracing::debug!(
|
||||
"Waiting for ephemeral indexer job {} to complete...",
|
||||
job_id
|
||||
);
|
||||
|
||||
// Listen for job completion events
|
||||
loop {
|
||||
match event_subscriber.recv().await {
|
||||
Ok(event) => match event {
|
||||
crate::infra::event::Event::JobCompleted {
|
||||
job_id: completed_id,
|
||||
..
|
||||
} if completed_id == job_id => {
|
||||
tracing::debug!("Ephemeral indexer job {} completed", job_id);
|
||||
break;
|
||||
}
|
||||
crate::infra::event::Event::JobFailed {
|
||||
job_id: failed_id,
|
||||
error,
|
||||
..
|
||||
} if failed_id == job_id => {
|
||||
return Err(ActionError::Internal(format!(
|
||||
"Ephemeral indexer job failed: {}",
|
||||
error
|
||||
)));
|
||||
}
|
||||
crate::infra::event::Event::JobCancelled {
|
||||
job_id: cancelled_id,
|
||||
..
|
||||
} if cancelled_id == job_id => {
|
||||
return Err(ActionError::Internal(
|
||||
"Ephemeral indexer job was cancelled".to_string(),
|
||||
));
|
||||
}
|
||||
_ => {
|
||||
// Not our job event, keep listening
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
return Err(ActionError::Internal(format!(
|
||||
"Failed to receive job event: {}",
|
||||
e
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tracing::debug!("Ephemeral indexer job completed, extracting results");
|
||||
|
||||
// Extract the results from our shared ephemeral index
|
||||
let entries = {
|
||||
let index = ephemeral_index.read().await;
|
||||
index.entries.clone()
|
||||
};
|
||||
|
||||
tracing::debug!(
|
||||
"Collected {} filesystem entries from ephemeral index",
|
||||
entries.len()
|
||||
);
|
||||
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
/// Query database for all entries under the given path
|
||||
async fn query_database_entries(
|
||||
&self,
|
||||
library: &Arc<crate::library::Library>,
|
||||
root_path: &Path,
|
||||
) -> Result<HashMap<PathBuf, (entities::entry::Model, PathBuf)>, ActionError> {
|
||||
tracing::debug!("Querying database entries for {}", root_path.display());
|
||||
|
||||
let db = library.db().conn();
|
||||
let root_path_str = root_path.to_string_lossy().to_string();
|
||||
|
||||
// First, find which location this path belongs to
|
||||
let locations = entities::location::Entity::find()
|
||||
.all(db)
|
||||
.await
|
||||
.map_err(|e| ActionError::Internal(format!("Failed to query locations: {}", e)))?;
|
||||
|
||||
let mut target_location = None;
|
||||
for loc in locations {
|
||||
let loc_path = PathResolver::get_full_path(db, loc.entry_id)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
ActionError::Internal(format!("Failed to get location path: {}", e))
|
||||
})?;
|
||||
|
||||
// Check if our target path is within this location
|
||||
if root_path.starts_with(&loc_path) || root_path == loc_path {
|
||||
target_location = Some((loc, loc_path));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let Some((location, location_path)) = target_location else {
|
||||
return Err(ActionError::Internal(format!(
|
||||
"Path {} does not belong to any managed location",
|
||||
root_path.display()
|
||||
)));
|
||||
};
|
||||
|
||||
tracing::debug!(
|
||||
"Found location {} for path {}",
|
||||
location.name.as_deref().unwrap_or("Unknown"),
|
||||
root_path.display()
|
||||
);
|
||||
|
||||
let mut entries_map = HashMap::new();
|
||||
|
||||
// Find the directory entry for this specific path
|
||||
let root_entry = entities::directory_paths::Entity::find()
|
||||
.filter(entities::directory_paths::Column::Path.eq(&root_path_str))
|
||||
.one(db)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
ActionError::Internal(format!("Failed to query directory paths: {}", e))
|
||||
})?;
|
||||
|
||||
if let Some(root_dir) = root_entry {
|
||||
// Get all descendant entries using closure table
|
||||
let descendant_closures = entities::entry_closure::Entity::find()
|
||||
.filter(entities::entry_closure::Column::AncestorId.eq(root_dir.entry_id))
|
||||
.all(db)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
ActionError::Internal(format!("Failed to query entry closure: {}", e))
|
||||
})?;
|
||||
|
||||
let descendant_ids: Vec<i32> = descendant_closures
|
||||
.iter()
|
||||
.map(|ec| ec.descendant_id)
|
||||
.collect();
|
||||
|
||||
if descendant_ids.is_empty() {
|
||||
tracing::warn!("No descendants found for root directory");
|
||||
return Ok(entries_map);
|
||||
}
|
||||
|
||||
// Fetch all entries
|
||||
let entries = entities::entry::Entity::find()
|
||||
.filter(entities::entry::Column::Id.is_in(descendant_ids))
|
||||
.all(db)
|
||||
.await
|
||||
.map_err(|e| ActionError::Internal(format!("Failed to query entries: {}", e)))?;
|
||||
|
||||
tracing::debug!("Found {} descendant entries", entries.len());
|
||||
|
||||
// Resolve full paths for all entries
|
||||
for entry in entries {
|
||||
let full_path = PathResolver::get_full_path(db, entry.id)
|
||||
.await
|
||||
.unwrap_or_else(|_| PathBuf::from(&entry.name));
|
||||
|
||||
entries_map.insert(full_path.clone(), (entry, full_path));
|
||||
}
|
||||
} else {
|
||||
// Path is within a location but not the root - need to find the entry ID for this path
|
||||
// by traversing from the location root
|
||||
tracing::debug!("Path is subdirectory of location, traversing from root");
|
||||
|
||||
let relative_path = root_path.strip_prefix(&location_path).map_err(|e| {
|
||||
ActionError::Internal(format!("Failed to compute relative path: {}", e))
|
||||
})?;
|
||||
|
||||
// Get path components
|
||||
let components: Vec<&str> = relative_path
|
||||
.components()
|
||||
.filter_map(|c| c.as_os_str().to_str())
|
||||
.collect();
|
||||
|
||||
if components.is_empty() {
|
||||
// This is the location root, use location.entry_id
|
||||
let root_entry_id = location.entry_id;
|
||||
|
||||
// Get all descendants using closure table
|
||||
let descendant_closures = entities::entry_closure::Entity::find()
|
||||
.filter(entities::entry_closure::Column::AncestorId.eq(root_entry_id))
|
||||
.all(db)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
ActionError::Internal(format!("Failed to query entry closure: {}", e))
|
||||
})?;
|
||||
|
||||
let descendant_ids: Vec<i32> = descendant_closures
|
||||
.iter()
|
||||
.map(|ec| ec.descendant_id)
|
||||
.collect();
|
||||
|
||||
let entries = entities::entry::Entity::find()
|
||||
.filter(entities::entry::Column::Id.is_in(descendant_ids))
|
||||
.all(db)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
ActionError::Internal(format!("Failed to query entries: {}", e))
|
||||
})?;
|
||||
|
||||
for entry in entries {
|
||||
let full_path = PathResolver::get_full_path(db, entry.id)
|
||||
.await
|
||||
.unwrap_or_else(|_| PathBuf::from(&entry.name));
|
||||
entries_map.insert(full_path.clone(), (entry, full_path));
|
||||
}
|
||||
} else {
|
||||
// Traverse from location root to find the target directory
|
||||
let mut current_parent_id = Some(location.entry_id);
|
||||
|
||||
for component in &components {
|
||||
if let Some(parent_id) = current_parent_id {
|
||||
// Find child with this name
|
||||
let child = entities::entry::Entity::find()
|
||||
.filter(entities::entry::Column::ParentId.eq(parent_id))
|
||||
.filter(entities::entry::Column::Name.eq(*component))
|
||||
.one(db)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
ActionError::Internal(format!("Failed to query entry: {}", e))
|
||||
})?;
|
||||
|
||||
current_parent_id = child.as_ref().map(|c| c.id);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(target_entry_id) = current_parent_id {
|
||||
// Get all descendants of this subdirectory
|
||||
let descendant_closures = entities::entry_closure::Entity::find()
|
||||
.filter(entities::entry_closure::Column::AncestorId.eq(target_entry_id))
|
||||
.all(db)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
ActionError::Internal(format!("Failed to query entry closure: {}", e))
|
||||
})?;
|
||||
|
||||
let descendant_ids: Vec<i32> = descendant_closures
|
||||
.iter()
|
||||
.map(|ec| ec.descendant_id)
|
||||
.collect();
|
||||
|
||||
let entries = entities::entry::Entity::find()
|
||||
.filter(entities::entry::Column::Id.is_in(descendant_ids))
|
||||
.all(db)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
ActionError::Internal(format!("Failed to query entries: {}", e))
|
||||
})?;
|
||||
|
||||
for entry in entries {
|
||||
let full_path = PathResolver::get_full_path(db, entry.id)
|
||||
.await
|
||||
.unwrap_or_else(|_| PathBuf::from(&entry.name));
|
||||
entries_map.insert(full_path.clone(), (entry, full_path));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tracing::debug!("Found {} entries in database", entries_map.len());
|
||||
}
|
||||
|
||||
Ok(entries_map)
|
||||
}
|
||||
|
||||
/// Compare ephemeral index with database entries
|
||||
async fn compare_indexes(
|
||||
&self,
|
||||
fs_entries: HashMap<PathBuf, crate::ops::indexing::entry::EntryMetadata>,
|
||||
mut db_entries: HashMap<PathBuf, (entities::entry::Model, PathBuf)>,
|
||||
root_path: &Path,
|
||||
) -> Result<IntegrityReport, ActionError> {
|
||||
tracing::debug!("Comparing filesystem and database indexes");
|
||||
|
||||
let mut report = IntegrityReport::new();
|
||||
|
||||
tracing::debug!(
|
||||
"Comparing {} filesystem entries with {} database entries",
|
||||
fs_entries.len(),
|
||||
db_entries.len()
|
||||
);
|
||||
|
||||
// Remove the root path itself from db_entries - the ephemeral indexer doesn't
|
||||
// create an entry for the root directory it's scanning, only its contents
|
||||
db_entries.remove(root_path);
|
||||
|
||||
// Count files and directories
|
||||
for (_path, metadata) in &fs_entries {
|
||||
match metadata.kind {
|
||||
EntryKind::File => report.filesystem_file_count += 1,
|
||||
EntryKind::Directory => report.filesystem_dir_count += 1,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
for (_path, (entry, _)) in &db_entries {
|
||||
let kind = entry.entry_kind();
|
||||
match kind {
|
||||
entities::entry::EntryKind::File => report.database_file_count += 1,
|
||||
entities::entry::EntryKind::Directory => report.database_dir_count += 1,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
// Build sets for comparison
|
||||
// On case-insensitive filesystems (macOS), normalize paths to lowercase for comparison
|
||||
#[cfg(target_os = "macos")]
|
||||
let normalize_path = |pb: &PathBuf| -> String { pb.to_string_lossy().to_lowercase() };
|
||||
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
let normalize_path = |pb: &PathBuf| -> String { pb.to_string_lossy().to_string() };
|
||||
|
||||
// Create normalized path maps for case-insensitive comparison on macOS
|
||||
let fs_normalized: HashMap<String, PathBuf> = fs_entries
|
||||
.keys()
|
||||
.map(|p| (normalize_path(p), p.clone()))
|
||||
.collect();
|
||||
|
||||
let db_normalized: HashMap<String, PathBuf> = db_entries
|
||||
.keys()
|
||||
.map(|p| (normalize_path(p), p.clone()))
|
||||
.collect();
|
||||
|
||||
let fs_paths: HashSet<String> = fs_normalized.keys().cloned().collect();
|
||||
let db_paths: HashSet<String> = db_normalized.keys().cloned().collect();
|
||||
|
||||
// Find missing from index (in filesystem but not in DB)
|
||||
for norm_path in fs_paths.difference(&db_paths) {
|
||||
let path = &fs_normalized[norm_path];
|
||||
report
|
||||
.missing_from_index
|
||||
.push(IntegrityDifference::missing_from_index(path.clone()));
|
||||
}
|
||||
|
||||
// Find stale in index (in DB but not on filesystem)
|
||||
for norm_path in db_paths.difference(&fs_paths) {
|
||||
let path = &db_normalized[norm_path];
|
||||
report
|
||||
.stale_in_index
|
||||
.push(IntegrityDifference::stale_in_index(path.clone()));
|
||||
}
|
||||
|
||||
// Find metadata mismatches (in both but with different data)
|
||||
for norm_path in fs_paths.intersection(&db_paths) {
|
||||
let fs_path = &fs_normalized[norm_path];
|
||||
let db_path = &db_normalized[norm_path];
|
||||
|
||||
if let (Some(fs_meta), Some((db_entry, _))) =
|
||||
(fs_entries.get(fs_path), db_entries.get(db_path))
|
||||
{
|
||||
// Check size
|
||||
let fs_size = fs_meta.size;
|
||||
let db_size = db_entry.size as u64;
|
||||
if fs_size != db_size {
|
||||
report
|
||||
.metadata_mismatches
|
||||
.push(IntegrityDifference::size_mismatch_with_debug(
|
||||
fs_path.clone(),
|
||||
fs_size,
|
||||
db_size,
|
||||
db_entry.id,
|
||||
db_entry.name.clone(),
|
||||
));
|
||||
}
|
||||
|
||||
// Check modified time (allow 1 second tolerance for filesystem precision)
|
||||
if let Some(fs_modified) = fs_meta.modified {
|
||||
if let Ok(fs_duration) = fs_modified.duration_since(std::time::UNIX_EPOCH) {
|
||||
let fs_secs = fs_duration.as_secs() as i64;
|
||||
let db_secs = db_entry.modified_at.timestamp();
|
||||
|
||||
if (fs_secs - db_secs).abs() > 1 {
|
||||
report.metadata_mismatches.push(
|
||||
IntegrityDifference::modified_time_mismatch(
|
||||
fs_path.clone(),
|
||||
format!("{}", fs_secs),
|
||||
format!("{}", db_secs),
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check inode if available
|
||||
if let (Some(fs_inode), Some(db_inode)) = (fs_meta.inode, db_entry.inode) {
|
||||
if fs_inode != db_inode as u64 {
|
||||
report.metadata_mismatches.push(IntegrityDifference {
|
||||
path: fs_path.clone(),
|
||||
issue_type: IssueType::InodeMismatch,
|
||||
expected: Some(format!("{}", fs_inode)),
|
||||
actual: Some(format!("{}", db_inode)),
|
||||
description: format!("Inode mismatch for {}", fs_path.display()),
|
||||
db_entry_id: Some(db_entry.id),
|
||||
db_entry_name: Some(db_entry.name.clone()),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tracing::debug!(
|
||||
"Comparison complete: {} missing, {} stale, {} metadata mismatches",
|
||||
report.missing_from_index.len(),
|
||||
report.stale_in_index.len(),
|
||||
report.metadata_mismatches.len()
|
||||
);
|
||||
|
||||
Ok(report)
|
||||
}
|
||||
}
|
||||
|
||||
crate::register_library_action!(IndexVerifyAction, "indexing.verify");
|
||||
52
core/src/ops/indexing/verify/input.rs
Normal file
52
core/src/ops/indexing/verify/input.rs
Normal file
@@ -0,0 +1,52 @@
|
||||
//! Input types for index verification
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use specta::Type;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Type)]
|
||||
pub struct IndexVerifyInput {
|
||||
/// Path to verify (can be a location root or subdirectory)
|
||||
pub path: PathBuf,
|
||||
|
||||
/// Whether to check content hashes (slower but more thorough)
|
||||
#[serde(default)]
|
||||
pub verify_content: bool,
|
||||
|
||||
/// Whether to include detailed file-by-file comparison
|
||||
#[serde(default = "default_true")]
|
||||
pub detailed_report: bool,
|
||||
|
||||
/// Whether to fix issues automatically (future feature)
|
||||
#[serde(default)]
|
||||
pub auto_fix: bool,
|
||||
}
|
||||
|
||||
fn default_true() -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
impl IndexVerifyInput {
|
||||
pub fn new(path: PathBuf) -> Self {
|
||||
Self {
|
||||
path,
|
||||
verify_content: false,
|
||||
detailed_report: true,
|
||||
auto_fix: false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn validate(&self) -> Result<(), Vec<String>> {
|
||||
let mut errors = Vec::new();
|
||||
|
||||
if !self.path.exists() {
|
||||
errors.push(format!("Path does not exist: {}", self.path.display()));
|
||||
}
|
||||
|
||||
if errors.is_empty() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(errors)
|
||||
}
|
||||
}
|
||||
}
|
||||
12
core/src/ops/indexing/verify/mod.rs
Normal file
12
core/src/ops/indexing/verify/mod.rs
Normal file
@@ -0,0 +1,12 @@
|
||||
//! Index Integrity Verification
|
||||
//!
|
||||
//! Verifies the integrity of the Spacedrive index by comparing the database state
|
||||
//! with the actual filesystem state for a given path.
|
||||
|
||||
pub mod action;
|
||||
pub mod input;
|
||||
pub mod output;
|
||||
|
||||
pub use action::IndexVerifyAction;
|
||||
pub use input::IndexVerifyInput;
|
||||
pub use output::{IndexVerifyOutput, IntegrityDifference, IntegrityReport};
|
||||
258
core/src/ops/indexing/verify/output.rs
Normal file
258
core/src/ops/indexing/verify/output.rs
Normal file
@@ -0,0 +1,258 @@
|
||||
//! Output types for index verification
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use specta::Type;
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// Result of index integrity verification
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Type)]
|
||||
pub struct IndexVerifyOutput {
|
||||
/// Overall integrity status
|
||||
pub is_valid: bool,
|
||||
|
||||
/// Integrity report with detailed findings
|
||||
pub report: IntegrityReport,
|
||||
|
||||
/// Path that was verified
|
||||
pub path: PathBuf,
|
||||
|
||||
/// Time taken to verify (seconds)
|
||||
pub duration_secs: f64,
|
||||
}
|
||||
|
||||
/// Detailed integrity report
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Type)]
|
||||
pub struct IntegrityReport {
|
||||
/// Total files found on filesystem
|
||||
pub filesystem_file_count: usize,
|
||||
|
||||
/// Total files in database index
|
||||
pub database_file_count: usize,
|
||||
|
||||
/// Total directories found on filesystem
|
||||
pub filesystem_dir_count: usize,
|
||||
|
||||
/// Total directories in database index
|
||||
pub database_dir_count: usize,
|
||||
|
||||
/// Files missing from index (on filesystem but not in DB)
|
||||
pub missing_from_index: Vec<IntegrityDifference>,
|
||||
|
||||
/// Stale entries in index (in DB but not on filesystem)
|
||||
pub stale_in_index: Vec<IntegrityDifference>,
|
||||
|
||||
/// Entries with incorrect metadata
|
||||
pub metadata_mismatches: Vec<IntegrityDifference>,
|
||||
|
||||
/// Entries with incorrect parent relationships
|
||||
pub hierarchy_errors: Vec<IntegrityDifference>,
|
||||
|
||||
/// Summary statistics
|
||||
pub summary: String,
|
||||
}
|
||||
|
||||
impl IntegrityReport {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
filesystem_file_count: 0,
|
||||
database_file_count: 0,
|
||||
filesystem_dir_count: 0,
|
||||
database_dir_count: 0,
|
||||
missing_from_index: Vec::new(),
|
||||
stale_in_index: Vec::new(),
|
||||
metadata_mismatches: Vec::new(),
|
||||
hierarchy_errors: Vec::new(),
|
||||
summary: String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_valid(&self) -> bool {
|
||||
self.missing_from_index.is_empty()
|
||||
&& self.stale_in_index.is_empty()
|
||||
&& self.metadata_mismatches.is_empty()
|
||||
&& self.hierarchy_errors.is_empty()
|
||||
}
|
||||
|
||||
pub fn total_issues(&self) -> usize {
|
||||
self.missing_from_index.len()
|
||||
+ self.stale_in_index.len()
|
||||
+ self.metadata_mismatches.len()
|
||||
+ self.hierarchy_errors.len()
|
||||
}
|
||||
|
||||
pub fn generate_summary(&mut self) {
|
||||
if self.is_valid() {
|
||||
self.summary = format!(
|
||||
"✅ Index is valid! {} files and {} directories match filesystem perfectly.",
|
||||
self.filesystem_file_count, self.filesystem_dir_count
|
||||
);
|
||||
} else {
|
||||
let mut parts = Vec::new();
|
||||
|
||||
if !self.missing_from_index.is_empty() {
|
||||
parts.push(format!(
|
||||
"{} missing from index",
|
||||
self.missing_from_index.len()
|
||||
));
|
||||
}
|
||||
|
||||
if !self.stale_in_index.is_empty() {
|
||||
parts.push(format!("{} stale entries", self.stale_in_index.len()));
|
||||
}
|
||||
|
||||
if !self.metadata_mismatches.is_empty() {
|
||||
parts.push(format!(
|
||||
"{} metadata mismatches",
|
||||
self.metadata_mismatches.len()
|
||||
));
|
||||
}
|
||||
|
||||
if !self.hierarchy_errors.is_empty() {
|
||||
parts.push(format!("{} hierarchy errors", self.hierarchy_errors.len()));
|
||||
}
|
||||
|
||||
self.summary = format!(
|
||||
"❌ Index has diverged: {}. Total issues: {}",
|
||||
parts.join(", "),
|
||||
self.total_issues()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for IntegrityReport {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a single integrity difference
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Type)]
|
||||
pub struct IntegrityDifference {
|
||||
/// Path relative to verification root
|
||||
pub path: PathBuf,
|
||||
|
||||
/// Type of issue
|
||||
pub issue_type: IssueType,
|
||||
|
||||
/// Expected value (from filesystem or correct state)
|
||||
pub expected: Option<String>,
|
||||
|
||||
/// Actual value (from database)
|
||||
pub actual: Option<String>,
|
||||
|
||||
/// Human-readable description
|
||||
pub description: String,
|
||||
|
||||
/// Debug: database entry ID for investigation
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub db_entry_id: Option<i32>,
|
||||
|
||||
/// Debug: database entry name
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub db_entry_name: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Type)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum IssueType {
|
||||
MissingFromIndex,
|
||||
StaleInIndex,
|
||||
SizeMismatch,
|
||||
ModifiedTimeMismatch,
|
||||
InodeMismatch,
|
||||
ExtensionMismatch,
|
||||
ParentMismatch,
|
||||
KindMismatch,
|
||||
}
|
||||
|
||||
impl IntegrityDifference {
|
||||
pub fn missing_from_index(path: PathBuf) -> Self {
|
||||
Self {
|
||||
description: format!(
|
||||
"File exists on filesystem but not in index: {}",
|
||||
path.display()
|
||||
),
|
||||
path,
|
||||
issue_type: IssueType::MissingFromIndex,
|
||||
expected: Some("Indexed".to_string()),
|
||||
actual: Some("Not indexed".to_string()),
|
||||
db_entry_id: None,
|
||||
db_entry_name: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn stale_in_index(path: PathBuf) -> Self {
|
||||
Self {
|
||||
description: format!(
|
||||
"Entry exists in index but not on filesystem: {}",
|
||||
path.display()
|
||||
),
|
||||
path,
|
||||
issue_type: IssueType::StaleInIndex,
|
||||
expected: Some("Not indexed".to_string()),
|
||||
actual: Some("Indexed".to_string()),
|
||||
db_entry_id: None,
|
||||
db_entry_name: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn size_mismatch(path: PathBuf, expected: u64, actual: u64) -> Self {
|
||||
Self {
|
||||
description: format!("Size mismatch for {}", path.display()),
|
||||
path,
|
||||
issue_type: IssueType::SizeMismatch,
|
||||
expected: Some(format!("{} bytes", expected)),
|
||||
actual: Some(format!("{} bytes", actual)),
|
||||
db_entry_id: None,
|
||||
db_entry_name: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn size_mismatch_with_debug(
|
||||
path: PathBuf,
|
||||
expected: u64,
|
||||
actual: u64,
|
||||
db_id: i32,
|
||||
db_name: String,
|
||||
) -> Self {
|
||||
Self {
|
||||
description: format!(
|
||||
"Size mismatch for {} (db_id: {}, db_name: {})",
|
||||
path.display(),
|
||||
db_id,
|
||||
db_name
|
||||
),
|
||||
path,
|
||||
issue_type: IssueType::SizeMismatch,
|
||||
expected: Some(format!("{} bytes", expected)),
|
||||
actual: Some(format!("{} bytes", actual)),
|
||||
db_entry_id: Some(db_id),
|
||||
db_entry_name: Some(db_name),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn modified_time_mismatch(path: PathBuf, expected: String, actual: String) -> Self {
|
||||
Self {
|
||||
description: format!("Modified time mismatch for {}", path.display()),
|
||||
path,
|
||||
issue_type: IssueType::ModifiedTimeMismatch,
|
||||
expected: Some(expected),
|
||||
actual: Some(actual),
|
||||
db_entry_id: None,
|
||||
db_entry_name: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parent_mismatch(path: PathBuf, expected_parent: String, actual_parent: String) -> Self {
|
||||
Self {
|
||||
description: format!("Parent mismatch for {}", path.display()),
|
||||
path,
|
||||
issue_type: IssueType::ParentMismatch,
|
||||
expected: Some(expected_parent),
|
||||
actual: Some(actual_parent),
|
||||
db_entry_id: None,
|
||||
db_entry_name: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
211
docs/cli/index-verify.md
Normal file
211
docs/cli/index-verify.md
Normal file
@@ -0,0 +1,211 @@
|
||||
# Index Integrity Verification Command
|
||||
|
||||
## Overview
|
||||
|
||||
The `index verify` command performs a comprehensive integrity check of the Spacedrive index for any given path. It compares the actual filesystem state with what's stored in the database and reports any discrepancies.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
# Basic usage - verify a location
|
||||
sd-cli index verify /path/to/check
|
||||
|
||||
# Verify with content hash checking (slower but more thorough)
|
||||
sd-cli index verify /path/to/check --verify-content
|
||||
|
||||
# Verify without detailed output (just summary)
|
||||
sd-cli index verify /path/to/check --detailed=false
|
||||
|
||||
# Future: Auto-fix issues (not yet implemented)
|
||||
sd-cli index verify /path/to/check --auto-fix
|
||||
```
|
||||
|
||||
## How It Works
|
||||
|
||||
The command performs three main steps:
|
||||
|
||||
### 1. Ephemeral Indexing
|
||||
Runs a fresh, in-memory index scan of the filesystem path to capture the current state:
|
||||
- Discovers all files and directories
|
||||
- Extracts metadata (size, modified time, inode)
|
||||
- Optionally generates content hashes (with `--verify-content`)
|
||||
|
||||
### 2. Database Query
|
||||
Queries the Spacedrive database for all indexed entries under the given path:
|
||||
- Uses closure table for efficient descendant lookup
|
||||
- Resolves full paths for all entries
|
||||
- Extracts stored metadata
|
||||
|
||||
### 3. Comparison & Reporting
|
||||
Compares the two datasets and categorizes differences:
|
||||
|
||||
| Issue Type | Description |
|
||||
|------------|-------------|
|
||||
| **Missing from Index** | Files exist on filesystem but not in database |
|
||||
| **Stale in Index** | Files exist in database but not on filesystem |
|
||||
| **Metadata Mismatch** | Files exist in both but with incorrect size/time/inode |
|
||||
| **Hierarchy Error** | Files have incorrect parent relationships |
|
||||
|
||||
## Output Format
|
||||
|
||||
```
|
||||
╔══════════════════════════════════════════════════════════════╗
|
||||
║ INDEX INTEGRITY VERIFICATION REPORT ║
|
||||
╠══════════════════════════════════════════════════════════════╣
|
||||
║ Path: /Users/jamie/Documents ║
|
||||
║ Duration: 2.34s ║
|
||||
╠══════════════════════════════════════════════════════════════╣
|
||||
║ Filesystem: 1247 files, 89 directories ║
|
||||
║ Database: 1245 files, 89 directories ║
|
||||
╠══════════════════════════════════════════════════════════════╣
|
||||
║ ❌ STATUS: DIVERGED - 4 issues found ║
|
||||
╠══════════════════════════════════════════════════════════════╣
|
||||
║ ⚠️ Missing from index: 2 ║
|
||||
║ - /Users/jamie/Documents/new-file.txt ║
|
||||
║ - /Users/jamie/Documents/another-new.pdf ║
|
||||
║ 🗑️ Stale in index: 2 ║
|
||||
║ - /Users/jamie/Documents/deleted-file.txt ║
|
||||
║ - /Users/jamie/Documents/old/removed.doc ║
|
||||
╠══════════════════════════════════════════════════════════════╣
|
||||
║ ❌ Index has diverged: 2 missing, 2 stale. Total: 4 ║
|
||||
╚══════════════════════════════════════════════════════════════╝
|
||||
```
|
||||
|
||||
## When to Use
|
||||
|
||||
### Debugging Index Issues
|
||||
If you suspect the index is out of sync with the filesystem:
|
||||
```bash
|
||||
sd-cli index verify ~/Documents
|
||||
```
|
||||
|
||||
### After Manual File Operations
|
||||
If you've manually modified files outside Spacedrive:
|
||||
```bash
|
||||
# You edited files manually
|
||||
sd-cli index verify /path/that/changed
|
||||
```
|
||||
|
||||
### Performance Testing
|
||||
To verify the watcher is working correctly:
|
||||
```bash
|
||||
# Make changes, then verify
|
||||
touch /test/newfile.txt
|
||||
sleep 1
|
||||
sd-cli index verify /test
|
||||
```
|
||||
|
||||
### Pre-Sync Validation
|
||||
Before syncing a library to ensure data integrity:
|
||||
```bash
|
||||
sd-cli index verify / # Verify entire library
|
||||
```
|
||||
|
||||
## Exit Codes
|
||||
|
||||
| Code | Meaning |
|
||||
|------|---------|
|
||||
| 0 | Index is valid (no issues found) |
|
||||
| 1 | Index has diverged (issues found) |
|
||||
| 2 | Verification failed (error occurred) |
|
||||
|
||||
## Use in Scripts
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# Automated integrity check
|
||||
|
||||
if sd-cli index verify /data/important --detailed=false; then
|
||||
echo "✅ Index is valid"
|
||||
else
|
||||
echo "❌ Index has issues - running rescan"
|
||||
sd-cli location rescan <location-id>
|
||||
fi
|
||||
```
|
||||
|
||||
## Performance Characteristics
|
||||
|
||||
| Path Size | Typical Duration | Memory Usage |
|
||||
|-----------|------------------|--------------|
|
||||
| 100 files | <1 second | ~10 MB |
|
||||
| 1,000 files | 1-3 seconds | ~50 MB |
|
||||
| 10,000 files | 10-30 seconds | ~200 MB |
|
||||
| 100,000 files | 1-5 minutes | ~1 GB |
|
||||
|
||||
With `--verify-content`:
|
||||
- Add 50-200% time overhead (depends on file sizes)
|
||||
- Requires reading file contents for hashing
|
||||
|
||||
## Common Use Cases
|
||||
|
||||
### 1. Verify Location After Import
|
||||
```bash
|
||||
sd-cli location add ~/Photos --name "Photos"
|
||||
# Wait for indexing to complete
|
||||
sd-cli index verify ~/Photos
|
||||
```
|
||||
|
||||
### 2. Debug Watcher Issues
|
||||
```bash
|
||||
# Monitor filesystem
|
||||
watch -n 5 'sd-cli index verify /watched/path --detailed=false'
|
||||
```
|
||||
|
||||
### 3. Find Orphaned Entries
|
||||
```bash
|
||||
# Check for stale entries
|
||||
sd-cli index verify / | grep "Stale in index"
|
||||
```
|
||||
|
||||
### 4. Validate After Bulk Operations
|
||||
```bash
|
||||
# After moving many files
|
||||
mv ~/old-location/* ~/new-location/
|
||||
sd-cli index verify ~/new-location
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### "Path does not exist"
|
||||
- Ensure the path is correct and accessible
|
||||
- Check permissions
|
||||
|
||||
### "No entries found in database"
|
||||
- Path might not be part of a managed location
|
||||
- Run `sd-cli location list` to see indexed locations
|
||||
|
||||
### High number of "Missing from Index"
|
||||
- Location watcher might be disabled
|
||||
- Files were added manually without indexing
|
||||
- Run `sd-cli location rescan <location-id>` to fix
|
||||
|
||||
### High number of "Stale in Index"
|
||||
- Files were deleted manually
|
||||
- Database not updated
|
||||
- Consider running cleanup
|
||||
|
||||
## API Access
|
||||
|
||||
The verification can also be triggered programmatically:
|
||||
|
||||
```rust
|
||||
use sd_core::ops::indexing::verify::{IndexVerifyAction, IndexVerifyInput};
|
||||
|
||||
let input = IndexVerifyInput::new(PathBuf::from("/path/to/verify"));
|
||||
let result = IndexVerifyAction::from_input(input)?
|
||||
.execute(library, context)
|
||||
.await?;
|
||||
|
||||
if !result.is_valid {
|
||||
println!("Found {} issues", result.report.total_issues());
|
||||
}
|
||||
```
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
- `--auto-fix`: Automatically repair issues (add missing, remove stale)
|
||||
- `--watch`: Continuously verify and report drift
|
||||
- `--json`: Machine-readable output for automation
|
||||
- `--compare-with <snapshot>`: Compare current state with previous snapshot
|
||||
- `--export-snapshot`: Save current state for future comparison
|
||||
|
||||
589
docs/core/additional_test_scenarios.md
Normal file
589
docs/core/additional_test_scenarios.md
Normal file
@@ -0,0 +1,589 @@
|
||||
# Additional Test Scenarios for Location Watcher
|
||||
|
||||
## Currently Passing ✅
|
||||
1. Initial indexing
|
||||
2. File creation
|
||||
3. File modification
|
||||
4. Directory creation
|
||||
5. Nested file creation
|
||||
6. File renaming (same directory)
|
||||
7. File moving (different directory)
|
||||
|
||||
## Next Priority Tests
|
||||
|
||||
### Tier 1: Critical Operations (Should work next)
|
||||
|
||||
#### Scenario 8: File Deletion
|
||||
```rust
|
||||
println!("\n--- Scenario 8: File Deletion ---");
|
||||
|
||||
// Delete a file
|
||||
let entry_before = harness.verify_entry_exists("initial").await?;
|
||||
let entry_id = entry_before.id;
|
||||
|
||||
harness.delete_file("initial.txt").await?;
|
||||
harness.wait_for_fs_event(
|
||||
FsRawEventKind::Remove { path: harness.path("initial.txt") },
|
||||
30
|
||||
).await?;
|
||||
|
||||
// Verify entry no longer exists
|
||||
harness.verify_entry_not_exists("initial").await?;
|
||||
harness.verify_entry_count(6).await?; // One less entry
|
||||
|
||||
// Verify entry is actually deleted (not just orphaned)
|
||||
let entry_still_exists = entities::entry::Entity::find_by_id(entry_id)
|
||||
.one(harness.library.db().conn())
|
||||
.await?;
|
||||
assert!(entry_still_exists.is_none(), "Entry should be deleted from database");
|
||||
```
|
||||
|
||||
**Known Issue**: Currently causes task panic - needs investigation
|
||||
|
||||
#### Scenario 9: Directory Deletion (With Contents)
|
||||
```rust
|
||||
println!("\n--- Scenario 9: Directory Deletion ---");
|
||||
|
||||
// Create directory with multiple files
|
||||
harness.create_dir("temp").await?;
|
||||
harness.wait_for_fs_event(
|
||||
FsRawEventKind::Create { path: harness.path("temp") },
|
||||
30
|
||||
).await?;
|
||||
|
||||
harness.create_file("temp/file1.txt", "content 1").await?;
|
||||
harness.create_file("temp/file2.txt", "content 2").await?;
|
||||
harness.create_file("temp/file3.txt", "content 3").await?;
|
||||
|
||||
// Wait for all files to be indexed
|
||||
tokio::time::sleep(Duration::from_secs(1)).await;
|
||||
let count_before = count_location_entries(&harness.library, harness.location_id).await?;
|
||||
|
||||
// Delete entire directory
|
||||
harness.delete_dir("temp").await?;
|
||||
harness.wait_for_fs_event(
|
||||
FsRawEventKind::Remove { path: harness.path("temp") },
|
||||
30
|
||||
).await?;
|
||||
|
||||
// Wait for cascade deletion
|
||||
tokio::time::sleep(Duration::from_millis(500)).await;
|
||||
|
||||
// Verify all entries are gone (directory + 3 files = 4 entries)
|
||||
let count_after = count_location_entries(&harness.library, harness.location_id).await?;
|
||||
assert_eq!(count_after, count_before - 4, "Should delete directory and all contents");
|
||||
```
|
||||
|
||||
### Tier 2: Complex Rename Operations
|
||||
|
||||
#### Scenario 10: Bulk Renames
|
||||
```rust
|
||||
println!("\n--- Scenario 10: Bulk Renames ---");
|
||||
|
||||
// Create 10 files
|
||||
for i in 1..=10 {
|
||||
harness.create_file(&format!("bulk-{}.txt", i), "content").await?;
|
||||
}
|
||||
tokio::time::sleep(Duration::from_secs(1)).await;
|
||||
harness.verify_entry_count(count_before + 10).await?;
|
||||
|
||||
// Rename all files rapidly
|
||||
let start = Instant::now();
|
||||
for i in 1..=10 {
|
||||
harness.rename_file(
|
||||
&format!("bulk-{}.txt", i),
|
||||
&format!("renamed-bulk-{}.txt", i)
|
||||
).await?;
|
||||
}
|
||||
|
||||
// Wait for all rename events
|
||||
tokio::time::sleep(Duration::from_secs(1)).await;
|
||||
|
||||
// Verify all renamed correctly
|
||||
for i in 1..=10 {
|
||||
harness.verify_entry_exists(&format!("renamed-bulk-{}", i)).await?;
|
||||
harness.verify_entry_not_exists(&format!("bulk-{}", i)).await?;
|
||||
}
|
||||
|
||||
let duration = start.elapsed();
|
||||
println!("✓ Renamed 10 files in {:?}", duration);
|
||||
assert!(duration.as_millis() < 2000, "Bulk renames should complete in <2s");
|
||||
harness.verify_entry_count(count_before + 10).await?; // Same count!
|
||||
```
|
||||
|
||||
**Success Metric**: 10 renames in <2s, no duplicates
|
||||
|
||||
#### Scenario 11: Rename Chain (A→B, B→C)
|
||||
```rust
|
||||
println!("\n--- Scenario 11: Rename Chain ---");
|
||||
|
||||
harness.create_file("step1.txt", "content").await?;
|
||||
tokio::time::sleep(Duration::from_millis(600)).await;
|
||||
|
||||
let entry_start = harness.verify_entry_exists("step1").await?;
|
||||
let entry_id = entry_start.id;
|
||||
|
||||
// Rapid renames: step1 → step2 → step3
|
||||
harness.rename_file("step1.txt", "step2.txt").await?;
|
||||
tokio::time::sleep(Duration::from_millis(50)).await; // Small delay
|
||||
harness.rename_file("step2.txt", "step3.txt").await?;
|
||||
|
||||
tokio::time::sleep(Duration::from_secs(1)).await;
|
||||
|
||||
// Verify final state
|
||||
let entry_end = harness.verify_entry_exists("step3").await?;
|
||||
assert_eq!(entry_id, entry_end.id, "Entry ID should be preserved through rename chain");
|
||||
harness.verify_entry_not_exists("step1").await?;
|
||||
harness.verify_entry_not_exists("step2").await?;
|
||||
```
|
||||
|
||||
**Tests**: Worker's rename chain collapsing logic
|
||||
|
||||
### Tier 3: Edge Cases
|
||||
|
||||
#### Scenario 12: Hidden Files
|
||||
```rust
|
||||
println!("\n--- Scenario 12: Hidden Files ---");
|
||||
|
||||
#[cfg(unix)]
|
||||
{
|
||||
harness.create_file(".hidden-file", "secret").await?;
|
||||
tokio::time::sleep(Duration::from_millis(600)).await;
|
||||
|
||||
// Hidden files should still be indexed
|
||||
harness.verify_entry_exists(".hidden-file").await?;
|
||||
|
||||
// Rename hidden file
|
||||
harness.rename_file(".hidden-file", ".hidden-renamed").await?;
|
||||
tokio::time::sleep(Duration::from_millis(600)).await;
|
||||
harness.verify_entry_exists(".hidden-renamed").await?;
|
||||
}
|
||||
```
|
||||
|
||||
#### Scenario 13: Symlink Handling
|
||||
```rust
|
||||
println!("\n--- Scenario 13: Symlink Handling ---");
|
||||
|
||||
harness.create_file("target.txt", "target content").await?;
|
||||
tokio::time::sleep(Duration::from_millis(600)).await;
|
||||
|
||||
#[cfg(unix)]
|
||||
{
|
||||
std::os::unix::fs::symlink(
|
||||
harness.path("target.txt"),
|
||||
harness.path("link.txt")
|
||||
)?;
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(600)).await;
|
||||
|
||||
// Verify symlink is detected
|
||||
let entries = get_location_entries(&harness.library, harness.location_id).await?;
|
||||
let symlink_entry = entries.iter().find(|e| e.name == "link");
|
||||
assert!(symlink_entry.is_some(), "Symlink should be indexed");
|
||||
}
|
||||
```
|
||||
|
||||
#### Scenario 14: Rapid File Modifications (Debouncing Test)
|
||||
```rust
|
||||
println!("\n--- Scenario 14: Rapid Modifications ---");
|
||||
|
||||
harness.create_file("rapidly-changing.txt", "v1").await?;
|
||||
tokio::time::sleep(Duration::from_millis(600)).await;
|
||||
|
||||
let start_count = count_location_entries(&harness.library, harness.location_id).await?;
|
||||
|
||||
// Modify the same file 20 times rapidly
|
||||
for i in 2..=20 {
|
||||
harness.modify_file("rapidly-changing.txt", &format!("v{}", i)).await?;
|
||||
tokio::time::sleep(Duration::from_millis(50)).await;
|
||||
}
|
||||
|
||||
// Wait for debouncing to settle
|
||||
tokio::time::sleep(Duration::from_secs(2)).await;
|
||||
|
||||
// Should NOT create 20 entries - should coalesce into updates
|
||||
let end_count = count_location_entries(&harness.library, harness.location_id).await?;
|
||||
assert_eq!(end_count, start_count, "Rapid modifications should not create duplicates");
|
||||
|
||||
// Verify final content
|
||||
let entry = harness.verify_entry_exists("rapidly-changing").await?;
|
||||
// Size should reflect last modification
|
||||
```
|
||||
|
||||
**Tests**: Debouncing and coalescing logic
|
||||
|
||||
#### Scenario 15: Same Name After Delete (Recreate)
|
||||
```rust
|
||||
println!("\n--- Scenario 15: Recreate Same Filename ---");
|
||||
|
||||
harness.create_file("temp-file.txt", "first version").await?;
|
||||
tokio::time::sleep(Duration::from_millis(600)).await;
|
||||
let entry_first = harness.verify_entry_exists("temp-file").await?;
|
||||
let first_id = entry_first.id;
|
||||
let first_inode = entry_first.inode;
|
||||
|
||||
// Delete it
|
||||
harness.delete_file("temp-file.txt").await?;
|
||||
tokio::time::sleep(Duration::from_millis(600)).await;
|
||||
harness.verify_entry_not_exists("temp-file").await?;
|
||||
|
||||
// Create new file with same name
|
||||
harness.create_file("temp-file.txt", "second version different content").await?;
|
||||
tokio::time::sleep(Duration::from_millis(600)).await;
|
||||
|
||||
let entry_second = harness.verify_entry_exists("temp-file").await?;
|
||||
|
||||
// Should be a DIFFERENT entry (different ID and inode)
|
||||
assert_ne!(entry_second.id, first_id, "Should create new entry, not reuse old one");
|
||||
assert_ne!(entry_second.inode, first_inode, "Should have different inode");
|
||||
```
|
||||
|
||||
**Tests**: Entry identity vs filename - ensures we don't reuse deleted entries
|
||||
|
||||
### Tier 4: Performance & Stress Tests
|
||||
|
||||
#### Scenario 16: Bulk Create Performance
|
||||
```rust
|
||||
println!("\n--- Scenario 16: Bulk Create Performance ---");
|
||||
|
||||
let start = Instant::now();
|
||||
|
||||
// Create 100 files rapidly
|
||||
for i in 1..=100 {
|
||||
harness.create_file(&format!("perf-{}.txt", i), "test content").await?;
|
||||
}
|
||||
|
||||
// Wait for all to be indexed
|
||||
tokio::time::sleep(Duration::from_secs(3)).await;
|
||||
|
||||
let duration = start.elapsed();
|
||||
let throughput = 100.0 / duration.as_secs_f64();
|
||||
|
||||
println!("✓ Created 100 files in {:?}", duration);
|
||||
println!(" Throughput: {:.2} files/sec", throughput);
|
||||
|
||||
// Verify all indexed
|
||||
for i in 1..=100 {
|
||||
harness.verify_entry_exists(&format!("perf-{}", i)).await?;
|
||||
}
|
||||
|
||||
// Performance assertions
|
||||
assert!(throughput > 20.0, "Should handle >20 files/sec even with debouncing");
|
||||
assert!(duration.as_secs() < 10, "Should complete 100 files in <10s");
|
||||
```
|
||||
|
||||
**Success Metrics**: >20 files/sec, <10s total, >50% coalescing rate
|
||||
|
||||
#### Scenario 17: Mixed Operations Chaos Test
|
||||
```rust
|
||||
println!("\n--- Scenario 17: Mixed Operations Chaos ---");
|
||||
|
||||
// Rapid mixed operations
|
||||
harness.create_file("chaos1.txt", "1").await?;
|
||||
harness.create_file("chaos2.txt", "2").await?;
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
|
||||
harness.rename_file("chaos1.txt", "chaos1-renamed.txt").await?;
|
||||
harness.modify_file("chaos2.txt", "2 modified").await?;
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
|
||||
harness.create_file("chaos3.txt", "3").await?;
|
||||
harness.delete_file("chaos2.txt").await?; // If deletion works
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
|
||||
harness.rename_file("chaos3.txt", "chaos-final.txt").await?;
|
||||
|
||||
// Wait for all operations to settle
|
||||
tokio::time::sleep(Duration::from_secs(2)).await;
|
||||
|
||||
// Verify final state
|
||||
harness.verify_entry_exists("chaos1-renamed").await?;
|
||||
harness.verify_entry_exists("chaos-final").await?;
|
||||
harness.verify_entry_not_exists("chaos1").await?;
|
||||
harness.verify_entry_not_exists("chaos2").await?; // If deletion works
|
||||
harness.verify_entry_not_exists("chaos3").await?;
|
||||
```
|
||||
|
||||
**Tests**: Correctness under complex interleaved operations
|
||||
|
||||
### Tier 5: Platform-Specific Edge Cases
|
||||
|
||||
#### Scenario 18: Special Characters in Filenames
|
||||
```rust
|
||||
println!("\n--- Scenario 18: Special Characters ---");
|
||||
|
||||
let special_names = vec![
|
||||
"file with spaces.txt",
|
||||
"file-with-dashes.txt",
|
||||
"file_with_underscores.txt",
|
||||
"file (with parens).txt",
|
||||
"file[with brackets].txt",
|
||||
"file{with braces}.txt",
|
||||
"日本語.txt", // Japanese
|
||||
"émojis🎉.txt", // Unicode emoji
|
||||
];
|
||||
|
||||
for name in special_names {
|
||||
harness.create_file(name, "content").await?;
|
||||
}
|
||||
|
||||
tokio::time::sleep(Duration::from_secs(1)).await;
|
||||
|
||||
// Verify all were indexed correctly
|
||||
for name in special_names {
|
||||
let stem = Path::new(name).file_stem().unwrap().to_str().unwrap();
|
||||
harness.verify_entry_exists(stem).await?;
|
||||
}
|
||||
```
|
||||
|
||||
#### Scenario 19: Very Long Filenames
|
||||
```rust
|
||||
println!("\n--- Scenario 19: Long Filenames ---");
|
||||
|
||||
// Create file with 200-character name
|
||||
let long_name = format!("{}.txt", "a".repeat(200));
|
||||
harness.create_file(&long_name, "content").await?;
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(600)).await;
|
||||
|
||||
let stem = "a".repeat(200);
|
||||
harness.verify_entry_exists(&stem).await?;
|
||||
```
|
||||
|
||||
#### Scenario 20: Deep Nesting
|
||||
```rust
|
||||
println!("\n--- Scenario 20: Deep Directory Nesting ---");
|
||||
|
||||
// Create 20-level deep directory structure
|
||||
let mut path = String::new();
|
||||
for i in 1..=20 {
|
||||
if !path.is_empty() {
|
||||
path.push('/');
|
||||
}
|
||||
path.push_str(&format!("level{}", i));
|
||||
}
|
||||
|
||||
harness.create_dir(&path).await?;
|
||||
tokio::time::sleep(Duration::from_millis(600)).await;
|
||||
|
||||
// Create file at deepest level
|
||||
harness.create_file(&format!("{}/deep-file.txt", path), "deep content").await?;
|
||||
tokio::time::sleep(Duration::from_millis(600)).await;
|
||||
|
||||
harness.verify_entry_exists("deep-file").await?;
|
||||
```
|
||||
|
||||
### Tier 6: Rename Edge Cases
|
||||
|
||||
#### Scenario 21: Rename While Modifying
|
||||
```rust
|
||||
println!("\n--- Scenario 21: Rename During Modification ---");
|
||||
|
||||
harness.create_file("busy.txt", "initial").await?;
|
||||
tokio::time::sleep(Duration::from_millis(600)).await;
|
||||
|
||||
let entry_before = harness.verify_entry_exists("busy").await?;
|
||||
|
||||
// Modify and rename almost simultaneously
|
||||
harness.modify_file("busy.txt", "modified content").await?;
|
||||
tokio::time::sleep(Duration::from_millis(50)).await; // Very short delay
|
||||
harness.rename_file("busy.txt", "busy-renamed.txt").await?;
|
||||
|
||||
tokio::time::sleep(Duration::from_secs(1)).await;
|
||||
|
||||
// Verify final state
|
||||
let entry_after = harness.verify_entry_exists("busy-renamed").await?;
|
||||
assert_eq!(entry_after.id, entry_before.id, "Should preserve entry ID");
|
||||
assert_eq!(entry_after.size, 16, "Should have updated size");
|
||||
```
|
||||
|
||||
#### Scenario 22: Rename to Existing Name (Overwrite)
|
||||
```rust
|
||||
println!("\n--- Scenario 22: Rename Overwrite ---");
|
||||
|
||||
harness.create_file("source.txt", "source content").await?;
|
||||
harness.create_file("target.txt", "target content").await?;
|
||||
tokio::time::sleep(Duration::from_millis(600)).await;
|
||||
|
||||
let source_entry = harness.verify_entry_exists("source").await?;
|
||||
let target_entry = harness.verify_entry_exists("target").await?;
|
||||
|
||||
// Rename source to target (overwrites target on filesystem)
|
||||
harness.rename_file("source.txt", "target.txt").await?;
|
||||
|
||||
tokio::time::sleep(Duration::from_secs(1)).await;
|
||||
|
||||
// target.txt should now have source's content and ID
|
||||
let final_entry = harness.verify_entry_exists("target").await?;
|
||||
// On macOS/Unix, the source file's inode is kept
|
||||
assert_eq!(final_entry.inode, source_entry.inode, "Should keep source's inode");
|
||||
harness.verify_entry_not_exists("source").await?;
|
||||
```
|
||||
|
||||
#### Scenario 23: Move to Non-Existent Directory
|
||||
```rust
|
||||
println!("\n--- Scenario 23: Move to Non-Existent Parent ---");
|
||||
|
||||
harness.create_file("orphan.txt", "content").await?;
|
||||
tokio::time::sleep(Duration::from_millis(600)).await;
|
||||
|
||||
// Try to move to directory that doesn't exist
|
||||
// macOS will fail the rename operation, should handle gracefully
|
||||
let result = tokio::fs::rename(
|
||||
harness.path("orphan.txt"),
|
||||
harness.path("nonexistent/orphan.txt")
|
||||
).await;
|
||||
|
||||
assert!(result.is_err(), "Should fail to move to non-existent directory");
|
||||
|
||||
// Entry should still exist at original location
|
||||
harness.verify_entry_exists("orphan").await?;
|
||||
```
|
||||
|
||||
### Tier 7: Performance Validation
|
||||
|
||||
#### Scenario 24: Event Coalescing Rate
|
||||
```rust
|
||||
println!("\n--- Scenario 24: Coalescing Validation ---");
|
||||
|
||||
// Create 100 files in same directory (should heavily coalesce)
|
||||
for i in 1..=100 {
|
||||
harness.create_file(&format!("coalesce-test-{}.txt", i), "c").await?;
|
||||
}
|
||||
|
||||
tokio::time::sleep(Duration::from_secs(2)).await;
|
||||
|
||||
// Get worker metrics
|
||||
if let Some(metrics) = harness.core.services.location_watcher
|
||||
.get_location_metrics(harness.location_id)
|
||||
.await
|
||||
{
|
||||
let coalescing_rate = metrics.coalescing_rate();
|
||||
println!(" Coalescing rate: {:.2}%", coalescing_rate * 100.0);
|
||||
println!(" Events processed: {}", metrics.events_processed());
|
||||
println!(" Batches sent: {}", metrics.batches_sent());
|
||||
println!(" Avg batch size: {:.2}", metrics.avg_batch_size());
|
||||
|
||||
assert!(coalescing_rate > 0.3, "Should coalesce >30% for bulk creates in same dir");
|
||||
}
|
||||
```
|
||||
|
||||
#### Scenario 25: Memory Leak Detection
|
||||
```rust
|
||||
println!("\n--- Scenario 25: Memory Stability ---");
|
||||
|
||||
// Create and delete 1000 files in a loop
|
||||
for round in 1..=10 {
|
||||
for i in 1..=100 {
|
||||
harness.create_file(&format!("temp-{}.txt", i), "content").await?;
|
||||
}
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(600)).await;
|
||||
|
||||
for i in 1..=100 {
|
||||
harness.delete_file(&format!("temp-{}.txt", i)).await?;
|
||||
}
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(600)).await;
|
||||
|
||||
println!(" Round {} complete", round);
|
||||
}
|
||||
|
||||
// Check that buffers were properly cleaned up
|
||||
if let Some(metrics) = harness.core.services.location_watcher
|
||||
.get_location_metrics(harness.location_id)
|
||||
.await
|
||||
{
|
||||
// Queue should be empty or near-empty
|
||||
assert!(metrics.current_queue_depth() < 100, "Queue should be drained");
|
||||
}
|
||||
```
|
||||
|
||||
### Tier 8: Failure Recovery
|
||||
|
||||
#### Scenario 26: Filesystem Race Conditions
|
||||
```rust
|
||||
println!("\n--- Scenario 26: File Disappears During Processing ---");
|
||||
|
||||
// Create file
|
||||
harness.create_file("disappearing.txt", "content").await?;
|
||||
|
||||
// Immediately delete it (before watcher processes)
|
||||
tokio::time::sleep(Duration::from_millis(50)).await;
|
||||
harness.delete_file("disappearing.txt").await?;
|
||||
|
||||
// Wait for processing
|
||||
tokio::time::sleep(Duration::from_secs(1)).await;
|
||||
|
||||
// Should handle gracefully (no crash, no stale entry)
|
||||
let entries = get_location_entries(&harness.library, harness.location_id).await?;
|
||||
assert!(!entries.iter().any(|e| e.name == "disappearing"), "Should not have stale entry");
|
||||
```
|
||||
|
||||
#### Scenario 27: Permission Changes
|
||||
```rust
|
||||
println!("\n--- Scenario 27: Permission Changes ---");
|
||||
|
||||
#[cfg(unix)]
|
||||
{
|
||||
harness.create_file("restricted.txt", "content").await?;
|
||||
tokio::time::sleep(Duration::from_millis(600)).await;
|
||||
|
||||
// Make file read-only
|
||||
let path = harness.path("restricted.txt");
|
||||
let mut perms = tokio::fs::metadata(&path).await?.permissions();
|
||||
perms.set_readonly(true);
|
||||
tokio::fs::set_permissions(&path, perms).await?;
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(600)).await;
|
||||
|
||||
// Verify file is still indexed (permission change should be detected)
|
||||
let entry = harness.verify_entry_exists("restricted").await?;
|
||||
// Check if permissions field is updated in database
|
||||
}
|
||||
```
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
### Quick Smoke Test Suite (Run Often)
|
||||
- Scenarios 1-9: Core functionality
|
||||
- Run time: ~20 seconds
|
||||
|
||||
### Full Functional Test Suite (Run Before Commit)
|
||||
- Scenarios 1-23: All functionality + edge cases
|
||||
- Run time: ~2 minutes
|
||||
|
||||
### Performance Benchmark Suite (Run Weekly)
|
||||
- Scenarios 24-25: Performance validation
|
||||
- Compare metrics against baseline
|
||||
- Run time: ~5 minutes
|
||||
|
||||
### Stress Test Suite (Run Before Release)
|
||||
- Scenarios 26-27: Failure recovery
|
||||
- 10,000+ file operations
|
||||
- Multi-hour soak tests
|
||||
- Run time: Hours
|
||||
|
||||
## Implementation Priority
|
||||
|
||||
1. **Immediate**: Scenario 8 (File Deletion) - Currently broken, high priority
|
||||
2. **Next**: Scenario 9 (Directory Deletion) - Related to #8
|
||||
3. **Then**: Scenarios 10-11 (Bulk renames, chains) - Validate the rename fix
|
||||
4. **Later**: Scenarios 12-23 (Edge cases) - Comprehensive coverage
|
||||
5. **Eventually**: Scenarios 24-27 (Performance, stress) - Quality assurance
|
||||
|
||||
## Success Criteria
|
||||
|
||||
For the watcher to be considered **production-ready**:
|
||||
|
||||
- ✅ All Tier 1-2 scenarios pass (critical operations)
|
||||
- ✅ 90% of Tier 3 scenarios pass (edge cases)
|
||||
- ✅ Performance metrics within 20% of v1
|
||||
- ✅ No memory leaks in 24-hour soak test
|
||||
- ✅ No crashes or panics under normal load
|
||||
- ✅ Cross-platform validation (macOS, Linux, Windows)
|
||||
|
||||
## Current Score: 7/27 Scenarios Implemented ✅
|
||||
|
||||
That's 26% coverage. Let's get to 100%! 🚀
|
||||
|
||||
Reference in New Issue
Block a user