Fix ghost folder bug by scoping indexing to path

- Scope the closure query to the indexing path by resolving its entry_id
from directory_paths and use that for descendants; fall back to the
location root if not found. - Seed all ancestors between the location
root and the indexing path into the in-memory cache during the
processing phase to ensure correct parent links. - Seed ancestor
directories into the create path cache to prevent ghost folders on
subpath indexing. - Add test core/tests/indexing_move_folder_bug_test.rs
reproducing the ghost-folder bug when moving a folder into a managed
location and asserting correct parent linkage.
This commit is contained in:
Jamie Pine
2025-11-21 09:18:03 -08:00
parent 008d05414a
commit 08f2d551ff
5 changed files with 1237 additions and 13 deletions

View File

@@ -6,7 +6,7 @@
use crate::{
filetype::FileTypeRegistry,
infra::{
db::entities::{self, entry_closure},
db::entities::{self, directory_paths, entry_closure},
job::prelude::{JobContext, JobError, JobResult},
},
};
@@ -335,9 +335,23 @@ impl<'a> IndexPersistence for DatabasePersistence<'a> {
None => return Ok(HashMap::new()),
};
// Get all descendants of the location root using the closure table
// Query descendants of the indexing path
let indexing_path_str = indexing_path.to_string_lossy().to_string();
let indexing_path_entry_id = if let Ok(Some(dir_record)) = directory_paths::Entity::find()
.filter(directory_paths::Column::Path.eq(&indexing_path_str))
.one(self.ctx.library_db())
.await
{
// Indexing path exists in DB - use its entry ID
dir_record.entry_id
} else {
// This is safe because if the path doesn't exist, there are no descendants to delete
location_root_entry_id
};
// Get all descendants of the indexing path
let descendant_ids = entry_closure::Entity::find()
.filter(entry_closure::Column::AncestorId.eq(location_root_entry_id))
.filter(entry_closure::Column::AncestorId.eq(indexing_path_entry_id))
.all(self.ctx.library_db())
.await
.map_err(|e| JobError::execution(format!("Failed to query closure table: {}", e)))?
@@ -345,8 +359,8 @@ impl<'a> IndexPersistence for DatabasePersistence<'a> {
.map(|ec| ec.descendant_id)
.collect::<Vec<i32>>();
// Add the root entry itself
let mut all_entry_ids = vec![location_root_entry_id];
// Add the indexing path entry itself
let mut all_entry_ids = vec![indexing_path_entry_id];
all_entry_ids.extend(descendant_ids);
// Fetch all entries (chunked to avoid SQLite variable limit)

View File

@@ -90,10 +90,49 @@ pub async fn run_processing_phase(
location_actual_path.display()
));
// Add the location root entry to the cache so children can find their parent
// Seed cache with ancestor directories from location root to indexing path
// First, add the actual location root
state
.entry_id_cache
.insert(location_root_path.to_path_buf(), location_entry_id);
.insert(location_actual_path.clone(), location_entry_id);
// If we're indexing a subpath (not the location root itself), seed all ancestors
if location_root_path != &location_actual_path {
// Traverse from location root to indexing path, seeding each directory
if let Ok(relative_path) = location_root_path.strip_prefix(&location_actual_path) {
let mut current_path = location_actual_path.clone();
for component in relative_path.components() {
current_path.push(component);
// Look up this ancestor directory in the database
if let Ok(Some(dir_record)) = entities::directory_paths::Entity::find()
.filter(
entities::directory_paths::Column::Path
.eq(current_path.to_string_lossy().to_string()),
)
.one(ctx.library_db())
.await
{
state
.entry_id_cache
.insert(current_path.clone(), dir_record.entry_id);
ctx.log(format!(
"Seeded ancestor {} (entry {}) into cache",
current_path.display(),
dir_record.entry_id
));
} else {
// Ancestor not yet indexed - this is OK, it will be created during this job
ctx.log(format!(
"Ancestor {} not yet in database, will be created during indexing",
current_path.display()
));
}
}
}
}
// Load existing entries for change detection scoped to the indexing path
// Note: location_root_path is the actual path being indexed (could be a subpath of the location)

View File

@@ -328,23 +328,59 @@ async fn handle_create(
// Minimal state provides parent cache used by EntryProcessor
let mut state = IndexerState::new(&crate::domain::addressing::SdPath::local(path));
// Seed the location root entry into cache to scope parent lookup
// This ensures parents are found within THIS location's tree, not another device's location
// with the same path. Without this, create_entry could attach to the wrong location's tree.
// Seed ancestor directories into cache, not just location root
if let Ok(Some(location_record)) = entities::location::Entity::find()
.filter(entities::location::Column::Uuid.eq(location_id))
.one(ctx.library_db())
.await
{
if let Some(location_entry_id) = location_record.entry_id {
// Seed location root
state
.entry_id_cache
.insert(location_root.to_path_buf(), location_entry_id);
debug!(
"Seeded location root {} (entry {}) into cache for scoped parent lookup",
"Seeded location root {} (entry {}) into cache",
location_root.display(),
location_entry_id
);
// Seed all intermediate ancestors between location root and target path
// This ensures parent lookup succeeds for deeply nested paths
if let Ok(stripped) = path.strip_prefix(location_root) {
let mut current_path = location_root.to_path_buf();
for component in stripped
.ancestors()
.skip(1)
.collect::<Vec<_>>()
.iter()
.rev()
{
if let Some(comp_name) = component.file_name() {
current_path.push(comp_name);
// Look up this ancestor in directory_paths table
if let Ok(Some(dir_path_record)) = entities::directory_paths::Entity::find()
.filter(
entities::directory_paths::Column::Path
.eq(current_path.to_string_lossy().to_string()),
)
.one(ctx.library_db())
.await
{
state
.entry_id_cache
.insert(current_path.clone(), dir_path_record.entry_id);
debug!(
"Seeded ancestor {} (entry {}) into cache",
current_path.display(),
dir_path_record.entry_id
);
}
}
}
}
}
}
@@ -423,11 +459,32 @@ async fn handle_create(
// Get the library to access the job manager
if let Some(library) = context.get_library(library_id).await {
// Query the location to get its index_mode policy
let location_record = entities::location::Entity::find()
.filter(entities::location::Column::Uuid.eq(location_id))
.one(ctx.library_db())
.await
.ok()
.flatten();
// Determine index mode from location policy (default to Content if not found)
let index_mode = if let Some(loc) = location_record {
match loc.index_mode.as_str() {
"shallow" => super::job::IndexMode::Shallow,
"content" => super::job::IndexMode::Content,
"deep" => super::job::IndexMode::Deep,
_ => super::job::IndexMode::Content,
}
} else {
super::job::IndexMode::Content
};
// Create a recursive indexer job for this directory subtree
// Use the location's index_mode to respect thumbnail/thumbstrip policies
let indexer_job = super::job::IndexerJob::from_location(
location_id,
crate::domain::addressing::SdPath::local(path),
super::job::IndexMode::Content,
index_mode,
);
// Dispatch the job asynchronously (fire and forget)
@@ -439,7 +496,8 @@ async fn handle_create(
);
} else {
debug!(
"✓ Spawned recursive indexer job for directory: {}",
"✓ Spawned recursive indexer job (mode: {:?}) for directory: {}",
index_mode,
path.display()
);
}

View File

@@ -0,0 +1,550 @@
//! Test to reproduce ghost folder bug when moving folders into managed locations
//!
//! This test reproduces the issue where moving a folder into a managed location
//! triggers a reindex that creates duplicate entries with wrong parent_ids.
//!
//! ## Bug Description
//! When a folder is moved from outside a managed location into it (e.g., moving
//! Desk1 into Desktop), the watcher triggers a reindex at that subpath. During
//! this reindex, entries are created with incorrect parent_id values, pointing
//! to the location root instead of their actual parent directory.
//!
//! ## Expected Behavior
//! - Desk1/ moved into Desktop/
//! - Desk1/Andrew Willis Files/ should have parent_id = Desk1's entry ID
//!
//! ## Actual Behavior
//! - Desk1/Andrew Willis Files/ gets parent_id = Desktop's entry ID (wrong!)
//! - Creates "ghost folders" that appear at Desktop root in API but don't exist there
//!
//! ## Running Test
//! ```bash
//! cargo test -p sd-core --test indexing_move_folder_bug_test -- --nocapture
//! ```
// mod helpers; // Disabled due to compile errors in sync helper
use sd_core::{
infra::{db::entities, event::Event},
location::{create_location, IndexMode, LocationCreateArgs},
Core,
};
use sea_orm::{ColumnTrait, EntityTrait, PaginatorTrait, QueryFilter};
use std::{path::PathBuf, sync::Arc};
use tokio::{fs, sync::Mutex, time::Duration};
use uuid::Uuid;
struct TestHarness {
test_root: PathBuf,
data_dir: PathBuf,
core: Core,
library: Arc<sd_core::library::Library>,
event_log: Arc<Mutex<Vec<Event>>>,
snapshot_dir: PathBuf,
}
impl TestHarness {
async fn new(test_name: &str) -> anyhow::Result<Self> {
// Create test root
let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string());
let test_root = PathBuf::from(home)
.join("Library/Application Support/spacedrive/indexing_bug_tests");
// Create data directory for spacedrive
let data_dir = test_root.join("data");
fs::create_dir_all(&data_dir).await?;
// Create snapshot directory
let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
let snapshot_dir = test_root
.join("snapshots")
.join(format!("{}_{}", test_name, timestamp));
fs::create_dir_all(&snapshot_dir).await?;
// Initialize logging
let log_file = std::fs::File::create(snapshot_dir.join("test.log"))?;
use tracing_subscriber::{fmt, layer::SubscriberExt, util::SubscriberInitExt, EnvFilter};
let _ = tracing_subscriber::registry()
.with(
fmt::layer()
.with_target(true)
.with_thread_ids(true)
.with_ansi(false)
.with_writer(log_file),
)
.with(fmt::layer().with_target(true).with_thread_ids(true))
.with(
EnvFilter::try_from_default_env().unwrap_or_else(|_| {
EnvFilter::new(
"sd_core::ops::indexing=debug,\
sd_core::ops::indexing::entry=trace,\
sd_core::ops::indexing::responder=trace,\
sd_core::location=debug,\
indexing_move_folder_bug_test=debug",
)
}),
)
.try_init();
tracing::info!(
test_root = %test_root.display(),
snapshot_dir = %snapshot_dir.display(),
"Created test environment"
);
// Create config
Self::create_test_config(&data_dir)?;
// Initialize core
let core = Core::new(data_dir.clone())
.await
.map_err(|e| anyhow::anyhow!("Failed to create core: {}", e))?;
// Create library
let library = core
.libraries
.create_library_no_sync("Bug Reproduction Library", None, core.context.clone())
.await?;
// Set up event collection
let event_log = Arc::new(Mutex::new(Vec::new()));
Self::start_event_collector(&library, event_log.clone());
Ok(Self {
test_root,
data_dir,
core,
library,
event_log,
snapshot_dir,
})
}
fn create_test_config(data_dir: &std::path::Path) -> anyhow::Result<sd_core::config::AppConfig> {
let config = sd_core::config::AppConfig {
version: 4,
logging: sd_core::config::LoggingConfig {
main_filter: "sd_core=debug".to_string(),
streams: vec![],
},
data_dir: data_dir.to_path_buf(),
log_level: "debug".to_string(),
telemetry_enabled: false,
preferences: sd_core::config::Preferences::default(),
job_logging: sd_core::config::JobLoggingConfig::default(),
services: sd_core::config::ServiceConfig {
networking_enabled: false,
volume_monitoring_enabled: false,
location_watcher_enabled: true, // Need watcher to trigger reindex on move
},
};
config.save()?;
Ok(config)
}
fn start_event_collector(
library: &Arc<sd_core::library::Library>,
event_log: Arc<Mutex<Vec<Event>>>,
) {
let mut subscriber = library.event_bus().subscribe();
tokio::spawn(async move {
while let Ok(event) = subscriber.recv().await {
event_log.lock().await.push(event);
}
});
}
/// Create a test folder structure outside the managed location
async fn create_test_folder_structure(&self, base_path: &PathBuf) -> anyhow::Result<()> {
// Create folder structure: TestFolder/SubFolder1/file1.txt, SubFolder2/file2.txt
let test_folder = base_path.join("TestFolder");
fs::create_dir_all(&test_folder).await?;
let subfolder1 = test_folder.join("SubFolder1");
fs::create_dir_all(&subfolder1).await?;
fs::write(subfolder1.join("file1.txt"), b"test content 1").await?;
fs::write(subfolder1.join("file2.txt"), b"test content 2").await?;
let subfolder2 = test_folder.join("SubFolder2");
fs::create_dir_all(&subfolder2).await?;
fs::write(subfolder2.join("file3.txt"), b"test content 3").await?;
fs::write(subfolder2.join("file4.txt"), b"test content 4").await?;
// Also add a file at TestFolder root
fs::write(test_folder.join("root_file.txt"), b"root content").await?;
tracing::info!(
test_folder = %test_folder.display(),
"Created test folder structure"
);
Ok(())
}
/// Add location and wait for initial indexing
async fn add_location(&self, path: &str, name: &str) -> anyhow::Result<(Uuid, i32)> {
tracing::info!(path = %path, name = %name, "Creating location");
// Get device record
let device_record = entities::device::Entity::find()
.one(self.library.db().conn())
.await?
.ok_or_else(|| anyhow::anyhow!("Device not found"))?;
// Create location (use Shallow to avoid thumbnail jobs)
let location_args = LocationCreateArgs {
path: PathBuf::from(path),
name: Some(name.to_string()),
index_mode: IndexMode::Shallow,
};
let location_db_id = create_location(
self.library.clone(),
self.library.event_bus(),
location_args,
device_record.id,
)
.await?;
// Get location UUID
let location_record = entities::location::Entity::find_by_id(location_db_id)
.one(self.library.db().conn())
.await?
.ok_or_else(|| anyhow::anyhow!("Location not found"))?;
tracing::info!(
location_uuid = %location_record.uuid,
location_id = location_db_id,
"Location created, waiting for indexing"
);
// Wait for initial indexing
self.wait_for_indexing().await?;
Ok((location_record.uuid, location_db_id))
}
/// Wait for indexing jobs to complete (ignores thumbnail/processor jobs)
async fn wait_for_indexing(&self) -> anyhow::Result<()> {
use sd_core::infra::job::JobStatus;
// Just wait a bit for jobs to start and complete
tokio::time::sleep(Duration::from_millis(500)).await;
let start_time = tokio::time::Instant::now();
let timeout = Duration::from_secs(10);
loop {
let all_running = self.library.jobs().list_jobs(Some(JobStatus::Running)).await?;
// Filter to only indexer jobs (ignore thumbnail/processor jobs)
let indexer_jobs: Vec<_> = all_running
.iter()
.filter(|j| j.name.contains("indexer"))
.collect();
if indexer_jobs.is_empty() {
// No indexer jobs running - we're done
let entry_count = entities::entry::Entity::find()
.count(self.library.db().conn())
.await?;
tracing::info!(entries = entry_count, "Indexing complete");
return Ok(());
}
if start_time.elapsed() > timeout {
anyhow::bail!(
"Indexing timeout - {} indexer jobs still running",
indexer_jobs.len()
);
}
tokio::time::sleep(Duration::from_millis(200)).await;
}
}
/// Capture snapshot for post-mortem analysis
async fn capture_snapshot(&self, phase: &str) -> anyhow::Result<()> {
let phase_dir = self.snapshot_dir.join(phase);
fs::create_dir_all(&phase_dir).await?;
tracing::info!(phase = %phase, path = %phase_dir.display(), "Capturing snapshot");
// Copy database
let src_db = self.library.path().join("database.db");
let dest_db = phase_dir.join("database.db");
if src_db.exists() {
fs::copy(&src_db, &dest_db).await?;
}
// Write event log
let events = self.event_log.lock().await;
let mut event_file = fs::File::create(phase_dir.join("events.log")).await?;
use tokio::io::AsyncWriteExt;
for event in events.iter() {
let line = format!("{}\n", serde_json::to_string(event)?);
event_file.write_all(line.as_bytes()).await?;
}
// Write database analysis
self.write_db_analysis(&phase_dir).await?;
tracing::info!("Snapshot captured");
Ok(())
}
/// Analyze database state and write report
async fn write_db_analysis(&self, dest_dir: &PathBuf) -> anyhow::Result<()> {
let mut report = String::new();
report.push_str("# Database Analysis Report\n\n");
// Count entries
let total_entries = entities::entry::Entity::find()
.count(self.library.db().conn())
.await?;
report.push_str(&format!("Total entries: {}\n\n", total_entries));
// Check for duplicate names with different parents
let conn = self.library.db().conn();
// Get all directory entries
let dirs = entities::entry::Entity::find()
.filter(entities::entry::Column::Kind.eq(1))
.all(conn)
.await?;
report.push_str("## Directory Entries\n\n");
for dir in &dirs {
// Get full path from directory_paths
let dir_path = entities::directory_paths::Entity::find()
.filter(entities::directory_paths::Column::EntryId.eq(dir.id))
.one(conn)
.await?;
let path_str = dir_path
.map(|dp| dp.path)
.unwrap_or_else(|| "<no path>".to_string());
report.push_str(&format!(
"- ID: {}, Name: '{}', Parent ID: {:?}, Path: {}\n",
dir.id, dir.name, dir.parent_id, path_str
));
}
// Check for inconsistencies
report.push_str("\n## Inconsistency Check\n\n");
for dir in &dirs {
if let Some(parent_id) = dir.parent_id {
// Get parent's path
let parent_path = entities::directory_paths::Entity::find()
.filter(entities::directory_paths::Column::EntryId.eq(parent_id))
.one(conn)
.await?;
// Get this dir's path
let dir_path = entities::directory_paths::Entity::find()
.filter(entities::directory_paths::Column::EntryId.eq(dir.id))
.one(conn)
.await?;
if let (Some(parent_path), Some(dir_path)) = (parent_path, dir_path) {
// Check if dir_path actually starts with parent_path
let dir_pathbuf = PathBuf::from(&dir_path.path);
let parent_pathbuf = PathBuf::from(&parent_path.path);
if let Some(actual_parent) = dir_pathbuf.parent() {
if actual_parent != parent_pathbuf {
report.push_str(&format!(
"❌ INCONSISTENCY: Entry '{}' (ID: {})\n",
dir.name, dir.id
));
report.push_str(&format!(
" - parent_id points to: {} ({})\n",
parent_id, parent_path.path
));
report.push_str(&format!(
" - But actual path is: {}\n",
dir_path.path
));
report.push_str(&format!(
" - Actual parent should be: {}\n\n",
actual_parent.display()
));
}
}
}
}
}
// Check for duplicate entries
report.push_str("\n## Duplicate Entry Check\n\n");
let all_entries = entities::entry::Entity::find().all(conn).await?;
let mut name_counts: std::collections::HashMap<String, Vec<i32>> = std::collections::HashMap::new();
for entry in &all_entries {
name_counts
.entry(entry.name.clone())
.or_insert_with(Vec::new)
.push(entry.id);
}
for (name, ids) in name_counts.iter() {
if ids.len() > 1 {
report.push_str(&format!("⚠️ Duplicate name '{}': {} entries with IDs {:?}\n", name, ids.len(), ids));
}
}
// Write report
fs::write(dest_dir.join("analysis.md"), report.as_bytes()).await?;
Ok(())
}
}
/// Test: Move folder into managed location and check for ghost entries
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_move_folder_creates_ghost_entries() -> anyhow::Result<()> {
let harness = TestHarness::new("move_folder_bug").await?;
// Clean up from previous test runs
let managed_location_path = harness.test_root.join("ManagedLocation");
let outside_path = harness.test_root.join("outside");
if managed_location_path.exists() {
let _ = fs::remove_dir_all(&managed_location_path).await;
}
if outside_path.exists() {
let _ = fs::remove_dir_all(&outside_path).await;
}
// Phase 1: Create a managed location at test_root/ManagedLocation
tracing::info!("=== Phase 1: Create managed location ===");
fs::create_dir_all(&managed_location_path).await?;
let (_location_uuid, _location_id) = harness
.add_location(managed_location_path.to_str().unwrap(), "ManagedLocation")
.await?;
// Capture initial state
harness.capture_snapshot("01_after_location_creation").await?;
// Phase 2: Create test folder structure OUTSIDE the managed location
tracing::info!("=== Phase 2: Create test folder outside managed location ===");
let outside_path = harness.test_root.join("outside");
fs::create_dir_all(&outside_path).await?;
harness.create_test_folder_structure(&outside_path).await?;
// Phase 3: Move the folder INTO the managed location
tracing::info!("=== Phase 3: Move folder into managed location ===");
let source = outside_path.join("TestFolder");
let destination = managed_location_path.join("TestFolder");
tracing::info!(
from = %source.display(),
to = %destination.display(),
"Moving folder"
);
// Use fs::rename to simulate moving the folder
fs::rename(&source, &destination).await?;
tracing::info!("Folder moved, waiting for watcher to detect and trigger reindex");
// Phase 4: Wait for watcher to detect and reindex
// The watcher should trigger a reindex at the TestFolder subpath
// Give watcher time to detect the new folder and spawn indexer job
tokio::time::sleep(Duration::from_secs(3)).await;
// Wait for any indexer jobs triggered by the watcher
harness.wait_for_indexing().await?;
// Give it a bit more time to ensure all processing is complete
tokio::time::sleep(Duration::from_millis(500)).await;
// Capture final state
harness.capture_snapshot("02_after_move_and_reindex").await?;
// Phase 5: Verify database integrity
tracing::info!("=== Phase 5: Verify database integrity ===");
let conn = harness.library.db().conn();
// Get all entries
let all_entries = entities::entry::Entity::find().all(conn).await?;
tracing::info!(total_entries = all_entries.len(), "Total entries found");
// Check for TestFolder
let test_folder_entry = entities::entry::Entity::find()
.filter(entities::entry::Column::Name.eq("TestFolder"))
.one(conn)
.await?
.expect("TestFolder should exist");
tracing::info!(
test_folder_id = test_folder_entry.id,
test_folder_parent = ?test_folder_entry.parent_id,
"Found TestFolder entry"
);
// Check SubFolder1 and SubFolder2
let subfolder1 = entities::entry::Entity::find()
.filter(entities::entry::Column::Name.eq("SubFolder1"))
.one(conn)
.await?
.expect("SubFolder1 should exist");
let subfolder2 = entities::entry::Entity::find()
.filter(entities::entry::Column::Name.eq("SubFolder2"))
.one(conn)
.await?
.expect("SubFolder2 should exist");
tracing::info!(
subfolder1_id = subfolder1.id,
subfolder1_parent = ?subfolder1.parent_id,
subfolder2_id = subfolder2.id,
subfolder2_parent = ?subfolder2.parent_id,
"Found subfolder entries"
);
// CRITICAL ASSERTION: SubFolder1 and SubFolder2 should have TestFolder as parent
assert_eq!(
subfolder1.parent_id,
Some(test_folder_entry.id),
"SubFolder1 should have TestFolder (ID: {}) as parent, but has {:?}",
test_folder_entry.id,
subfolder1.parent_id
);
assert_eq!(
subfolder2.parent_id,
Some(test_folder_entry.id),
"SubFolder2 should have TestFolder (ID: {}) as parent, but has {:?}",
test_folder_entry.id,
subfolder2.parent_id
);
// Verify no duplicate entries
let mut name_counts: std::collections::HashMap<String, usize> = std::collections::HashMap::new();
for entry in &all_entries {
*name_counts.entry(entry.name.clone()).or_insert(0) += 1;
}
for (name, count) in name_counts.iter() {
if *count > 1 {
tracing::error!(name = %name, count = count, "Found duplicate entries");
}
assert_eq!(
*count, 1,
"Entry '{}' appears {} times (should be 1)",
name, count
);
}
tracing::info!("✅ All assertions passed - no ghost entries created");
Ok(())
}

View File

@@ -0,0 +1,563 @@
//! Test to reproduce ghost folder bug when moving folders into managed locations
//!
//! This test reproduces the issue where moving a folder into a managed location
//! triggers a reindex that creates duplicate entries with wrong parent_ids.
//!
//! ## Bug Description
//! When a folder is moved from outside a managed location into it (e.g., moving
//! Desk1 into Desktop), the watcher triggers a reindex at that subpath. During
//! this reindex, entries are created with incorrect parent_id values, pointing
//! to the location root instead of their actual parent directory.
//!
//! ## Expected Behavior
//! - Desk/ moved into Desktop/
//! - Desk/Subfolder/ should have parent_id = Desk1's entry ID
//!
//! ## Actual Behavior
//! - Desk/Subfolder/ gets parent_id = Desktop's entry ID (wrong!)
//! - Creates "ghost folders" that appear at Desktop root in API but don't exist there
//!
//! ## Running Test
//! ```bash
//! cargo test -p sd-core --test indexing_move_folder_bug_test -- --nocapture
//! ```
// mod helpers; // Disabled due to compile errors in sync helper
use sd_core::{
infra::{db::entities, event::Event},
location::{create_location, IndexMode, LocationCreateArgs},
Core,
};
use sea_orm::{ColumnTrait, EntityTrait, PaginatorTrait, QueryFilter};
use std::{path::PathBuf, sync::Arc};
use tokio::{fs, sync::Mutex, time::Duration};
use uuid::Uuid;
struct TestHarness {
test_root: PathBuf,
data_dir: PathBuf,
core: Core,
library: Arc<sd_core::library::Library>,
event_log: Arc<Mutex<Vec<Event>>>,
snapshot_dir: PathBuf,
}
impl TestHarness {
async fn new(test_name: &str) -> anyhow::Result<Self> {
// Create test root
let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string());
let test_root =
PathBuf::from(home).join("Library/Application Support/spacedrive/indexing_bug_tests");
// Create data directory for spacedrive
let data_dir = test_root.join("data");
fs::create_dir_all(&data_dir).await?;
// Create snapshot directory
let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
let snapshot_dir = test_root
.join("snapshots")
.join(format!("{}_{}", test_name, timestamp));
fs::create_dir_all(&snapshot_dir).await?;
// Initialize logging
let log_file = std::fs::File::create(snapshot_dir.join("test.log"))?;
use tracing_subscriber::{fmt, layer::SubscriberExt, util::SubscriberInitExt, EnvFilter};
let _ = tracing_subscriber::registry()
.with(
fmt::layer()
.with_target(true)
.with_thread_ids(true)
.with_ansi(false)
.with_writer(log_file),
)
.with(fmt::layer().with_target(true).with_thread_ids(true))
.with(EnvFilter::try_from_default_env().unwrap_or_else(|_| {
EnvFilter::new(
"sd_core::ops::indexing=debug,\
sd_core::ops::indexing::entry=trace,\
sd_core::ops::indexing::responder=trace,\
sd_core::location=debug,\
indexing_move_folder_bug_test=debug",
)
}))
.try_init();
tracing::info!(
test_root = %test_root.display(),
snapshot_dir = %snapshot_dir.display(),
"Created test environment"
);
// Create config
Self::create_test_config(&data_dir)?;
// Initialize core
let core = Core::new(data_dir.clone())
.await
.map_err(|e| anyhow::anyhow!("Failed to create core: {}", e))?;
// Create library
let library = core
.libraries
.create_library_no_sync("Bug Reproduction Library", None, core.context.clone())
.await?;
// Set up event collection
let event_log = Arc::new(Mutex::new(Vec::new()));
Self::start_event_collector(&library, event_log.clone());
Ok(Self {
test_root,
data_dir,
core,
library,
event_log,
snapshot_dir,
})
}
fn create_test_config(
data_dir: &std::path::Path,
) -> anyhow::Result<sd_core::config::AppConfig> {
let config = sd_core::config::AppConfig {
version: 4,
logging: sd_core::config::LoggingConfig {
main_filter: "sd_core=debug".to_string(),
streams: vec![],
},
data_dir: data_dir.to_path_buf(),
log_level: "debug".to_string(),
telemetry_enabled: false,
preferences: sd_core::config::Preferences::default(),
job_logging: sd_core::config::JobLoggingConfig::default(),
services: sd_core::config::ServiceConfig {
networking_enabled: false,
volume_monitoring_enabled: false,
location_watcher_enabled: true, // Need watcher to trigger reindex on move
},
};
config.save()?;
Ok(config)
}
fn start_event_collector(
library: &Arc<sd_core::library::Library>,
event_log: Arc<Mutex<Vec<Event>>>,
) {
let mut subscriber = library.event_bus().subscribe();
tokio::spawn(async move {
while let Ok(event) = subscriber.recv().await {
event_log.lock().await.push(event);
}
});
}
/// Create a test folder structure outside the managed location
async fn create_test_folder_structure(&self, base_path: &PathBuf) -> anyhow::Result<()> {
// Create folder structure: TestFolder/SubFolder1/file1.txt, SubFolder2/file2.txt
let test_folder = base_path.join("TestFolder");
fs::create_dir_all(&test_folder).await?;
let subfolder1 = test_folder.join("SubFolder1");
fs::create_dir_all(&subfolder1).await?;
fs::write(subfolder1.join("file1.txt"), b"test content 1").await?;
fs::write(subfolder1.join("file2.txt"), b"test content 2").await?;
let subfolder2 = test_folder.join("SubFolder2");
fs::create_dir_all(&subfolder2).await?;
fs::write(subfolder2.join("file3.txt"), b"test content 3").await?;
fs::write(subfolder2.join("file4.txt"), b"test content 4").await?;
// Also add a file at TestFolder root
fs::write(test_folder.join("root_file.txt"), b"root content").await?;
tracing::info!(
test_folder = %test_folder.display(),
"Created test folder structure"
);
Ok(())
}
/// Add location and wait for initial indexing
async fn add_location(&self, path: &str, name: &str) -> anyhow::Result<(Uuid, i32)> {
tracing::info!(path = %path, name = %name, "Creating location");
// Get device record
let device_record = entities::device::Entity::find()
.one(self.library.db().conn())
.await?
.ok_or_else(|| anyhow::anyhow!("Device not found"))?;
// Create location (use Shallow to avoid thumbnail jobs)
let location_args = LocationCreateArgs {
path: PathBuf::from(path),
name: Some(name.to_string()),
index_mode: IndexMode::Shallow,
};
let location_db_id = create_location(
self.library.clone(),
self.library.event_bus(),
location_args,
device_record.id,
)
.await?;
// Get location UUID
let location_record = entities::location::Entity::find_by_id(location_db_id)
.one(self.library.db().conn())
.await?
.ok_or_else(|| anyhow::anyhow!("Location not found"))?;
tracing::info!(
location_uuid = %location_record.uuid,
location_id = location_db_id,
"Location created, waiting for indexing"
);
// Wait for initial indexing
self.wait_for_indexing().await?;
Ok((location_record.uuid, location_db_id))
}
/// Wait for indexing jobs to complete (ignores thumbnail/processor jobs)
async fn wait_for_indexing(&self) -> anyhow::Result<()> {
use sd_core::infra::job::JobStatus;
// Just wait a bit for jobs to start and complete
tokio::time::sleep(Duration::from_millis(500)).await;
let start_time = tokio::time::Instant::now();
let timeout = Duration::from_secs(10);
loop {
let all_running = self
.library
.jobs()
.list_jobs(Some(JobStatus::Running))
.await?;
// Filter to only indexer jobs (ignore thumbnail/processor jobs)
let indexer_jobs: Vec<_> = all_running
.iter()
.filter(|j| j.name.contains("indexer"))
.collect();
if indexer_jobs.is_empty() {
// No indexer jobs running - we're done
let entry_count = entities::entry::Entity::find()
.count(self.library.db().conn())
.await?;
tracing::info!(entries = entry_count, "Indexing complete");
return Ok(());
}
if start_time.elapsed() > timeout {
anyhow::bail!(
"Indexing timeout - {} indexer jobs still running",
indexer_jobs.len()
);
}
tokio::time::sleep(Duration::from_millis(200)).await;
}
}
/// Capture snapshot for post-mortem analysis
async fn capture_snapshot(&self, phase: &str) -> anyhow::Result<()> {
let phase_dir = self.snapshot_dir.join(phase);
fs::create_dir_all(&phase_dir).await?;
tracing::info!(phase = %phase, path = %phase_dir.display(), "Capturing snapshot");
// Copy database
let src_db = self.library.path().join("database.db");
let dest_db = phase_dir.join("database.db");
if src_db.exists() {
fs::copy(&src_db, &dest_db).await?;
}
// Write event log
let events = self.event_log.lock().await;
let mut event_file = fs::File::create(phase_dir.join("events.log")).await?;
use tokio::io::AsyncWriteExt;
for event in events.iter() {
let line = format!("{}\n", serde_json::to_string(event)?);
event_file.write_all(line.as_bytes()).await?;
}
// Write database analysis
self.write_db_analysis(&phase_dir).await?;
tracing::info!("Snapshot captured");
Ok(())
}
/// Analyze database state and write report
async fn write_db_analysis(&self, dest_dir: &PathBuf) -> anyhow::Result<()> {
let mut report = String::new();
report.push_str("# Database Analysis Report\n\n");
// Count entries
let total_entries = entities::entry::Entity::find()
.count(self.library.db().conn())
.await?;
report.push_str(&format!("Total entries: {}\n\n", total_entries));
// Check for duplicate names with different parents
let conn = self.library.db().conn();
// Get all directory entries
let dirs = entities::entry::Entity::find()
.filter(entities::entry::Column::Kind.eq(1))
.all(conn)
.await?;
report.push_str("## Directory Entries\n\n");
for dir in &dirs {
// Get full path from directory_paths
let dir_path = entities::directory_paths::Entity::find()
.filter(entities::directory_paths::Column::EntryId.eq(dir.id))
.one(conn)
.await?;
let path_str = dir_path
.map(|dp| dp.path)
.unwrap_or_else(|| "<no path>".to_string());
report.push_str(&format!(
"- ID: {}, Name: '{}', Parent ID: {:?}, Path: {}\n",
dir.id, dir.name, dir.parent_id, path_str
));
}
// Check for inconsistencies
report.push_str("\n## Inconsistency Check\n\n");
for dir in &dirs {
if let Some(parent_id) = dir.parent_id {
// Get parent's path
let parent_path = entities::directory_paths::Entity::find()
.filter(entities::directory_paths::Column::EntryId.eq(parent_id))
.one(conn)
.await?;
// Get this dir's path
let dir_path = entities::directory_paths::Entity::find()
.filter(entities::directory_paths::Column::EntryId.eq(dir.id))
.one(conn)
.await?;
if let (Some(parent_path), Some(dir_path)) = (parent_path, dir_path) {
// Check if dir_path actually starts with parent_path
let dir_pathbuf = PathBuf::from(&dir_path.path);
let parent_pathbuf = PathBuf::from(&parent_path.path);
if let Some(actual_parent) = dir_pathbuf.parent() {
if actual_parent != parent_pathbuf {
report.push_str(&format!(
"INCONSISTENCY: Entry '{}' (ID: {})\n",
dir.name, dir.id
));
report.push_str(&format!(
" - parent_id points to: {} ({})\n",
parent_id, parent_path.path
));
report
.push_str(&format!(" - But actual path is: {}\n", dir_path.path));
report.push_str(&format!(
" - Actual parent should be: {}\n\n",
actual_parent.display()
));
}
}
}
}
}
// Check for duplicate entries
report.push_str("\n## Duplicate Entry Check\n\n");
let all_entries = entities::entry::Entity::find().all(conn).await?;
let mut name_counts: std::collections::HashMap<String, Vec<i32>> =
std::collections::HashMap::new();
for entry in &all_entries {
name_counts
.entry(entry.name.clone())
.or_insert_with(Vec::new)
.push(entry.id);
}
for (name, ids) in name_counts.iter() {
if ids.len() > 1 {
report.push_str(&format!(
" Duplicate name '{}': {} entries with IDs {:?}\n",
name,
ids.len(),
ids
));
}
}
// Write report
fs::write(dest_dir.join("analysis.md"), report.as_bytes()).await?;
Ok(())
}
}
/// Test: Move folder into managed location and check for ghost entries
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_move_folder_creates_ghost_entries() -> anyhow::Result<()> {
let harness = TestHarness::new("move_folder_bug").await?;
// Clean up from previous test runs
let managed_location_path = harness.test_root.join("ManagedLocation");
let outside_path = harness.test_root.join("outside");
if managed_location_path.exists() {
let _ = fs::remove_dir_all(&managed_location_path).await;
}
if outside_path.exists() {
let _ = fs::remove_dir_all(&outside_path).await;
}
// Phase 1: Create a managed location at test_root/ManagedLocation
tracing::info!("=== Phase 1: Create managed location ===");
fs::create_dir_all(&managed_location_path).await?;
let (_location_uuid, _location_id) = harness
.add_location(managed_location_path.to_str().unwrap(), "ManagedLocation")
.await?;
// Capture initial state
harness
.capture_snapshot("01_after_location_creation")
.await?;
// Phase 2: Create test folder structure OUTSIDE the managed location
tracing::info!("=== Phase 2: Create test folder outside managed location ===");
let outside_path = harness.test_root.join("outside");
fs::create_dir_all(&outside_path).await?;
harness.create_test_folder_structure(&outside_path).await?;
// Phase 3: Move the folder INTO the managed location
tracing::info!("=== Phase 3: Move folder into managed location ===");
let source = outside_path.join("TestFolder");
let destination = managed_location_path.join("TestFolder");
tracing::info!(
from = %source.display(),
to = %destination.display(),
"Moving folder"
);
// Use fs::rename to simulate moving the folder
fs::rename(&source, &destination).await?;
tracing::info!("Folder moved, waiting for watcher to detect and trigger reindex");
// Phase 4: Wait for watcher to detect and reindex
// The watcher should trigger a reindex at the TestFolder subpath
// Give watcher time to detect the new folder and spawn indexer job
tokio::time::sleep(Duration::from_secs(3)).await;
// Wait for any indexer jobs triggered by the watcher
harness.wait_for_indexing().await?;
// Give it a bit more time to ensure all processing is complete
tokio::time::sleep(Duration::from_millis(500)).await;
// Capture final state
harness
.capture_snapshot("02_after_move_and_reindex")
.await?;
// Phase 5: Verify database integrity
tracing::info!("=== Phase 5: Verify database integrity ===");
let conn = harness.library.db().conn();
// Get all entries
let all_entries = entities::entry::Entity::find().all(conn).await?;
tracing::info!(total_entries = all_entries.len(), "Total entries found");
// Check for TestFolder
let test_folder_entry = entities::entry::Entity::find()
.filter(entities::entry::Column::Name.eq("TestFolder"))
.one(conn)
.await?
.expect("TestFolder should exist");
tracing::info!(
test_folder_id = test_folder_entry.id,
test_folder_parent = ?test_folder_entry.parent_id,
"Found TestFolder entry"
);
// Check SubFolder1 and SubFolder2
let subfolder1 = entities::entry::Entity::find()
.filter(entities::entry::Column::Name.eq("SubFolder1"))
.one(conn)
.await?
.expect("SubFolder1 should exist");
let subfolder2 = entities::entry::Entity::find()
.filter(entities::entry::Column::Name.eq("SubFolder2"))
.one(conn)
.await?
.expect("SubFolder2 should exist");
tracing::info!(
subfolder1_id = subfolder1.id,
subfolder1_parent = ?subfolder1.parent_id,
subfolder2_id = subfolder2.id,
subfolder2_parent = ?subfolder2.parent_id,
"Found subfolder entries"
);
// CRITICAL ASSERTION: SubFolder1 and SubFolder2 should have TestFolder as parent
assert_eq!(
subfolder1.parent_id,
Some(test_folder_entry.id),
"SubFolder1 should have TestFolder (ID: {}) as parent, but has {:?}",
test_folder_entry.id,
subfolder1.parent_id
);
assert_eq!(
subfolder2.parent_id,
Some(test_folder_entry.id),
"SubFolder2 should have TestFolder (ID: {}) as parent, but has {:?}",
test_folder_entry.id,
subfolder2.parent_id
);
// Verify no duplicate entries
let mut name_counts: std::collections::HashMap<String, usize> =
std::collections::HashMap::new();
for entry in &all_entries {
*name_counts.entry(entry.name.clone()).or_insert(0) += 1;
}
for (name, count) in name_counts.iter() {
if *count > 1 {
tracing::error!(name = %name, count = count, "Found duplicate entries");
}
assert_eq!(
*count, 1,
"Entry '{}' appears {} times (should be 1)",
name, count
);
}
tracing::info!("All assertions passed - no ghost entries created");
Ok(())
}