From c3517a554e76def817c22eca9db38af4886b242a Mon Sep 17 00:00:00 2001 From: Jamie Pine Date: Sun, 7 Dec 2025 21:03:51 -0800 Subject: [PATCH] Preserve ephemeral UUIDs during indexing - Remove TTL-based ephemeral cache and switch to a permanent in-memory cache. - Reuse ephemeral UUIDs when creating persistent entries to preserve continuity of user data. - Populate ephemeral UUIDs during the processing phase and expose get_ephemeral_uuid in the indexer state. - Remove the location invalidation hook and related UI usage. --- apps/cli/src/domains/index/mod.rs | 4 +- core/src/ops/core/ephemeral_status/output.rs | 2 - core/src/ops/core/ephemeral_status/query.rs | 1 - core/src/ops/indexing/entry.rs | 19 ++- .../src/ops/indexing/ephemeral/index_cache.rs | 100 ++++---------- core/src/ops/indexing/phases/processing.rs | 14 ++ core/src/ops/indexing/state.rs | 122 ++++++++++++++++++ packages/interface/src/Explorer.tsx | 4 - .../hooks/useLocationChangeInvalidation.ts | 94 -------------- 9 files changed, 180 insertions(+), 180 deletions(-) delete mode 100644 packages/interface/src/hooks/useLocationChangeInvalidation.ts diff --git a/apps/cli/src/domains/index/mod.rs b/apps/cli/src/domains/index/mod.rs index 6581d4129..60fe3d9cf 100644 --- a/apps/cli/src/domains/index/mod.rs +++ b/apps/cli/src/domains/index/mod.rs @@ -249,8 +249,8 @@ pub async fn run(ctx: &Context, cmd: IndexCmd) -> Result<()> { println!("║ EPHEMERAL INDEX CACHE STATUS ║"); println!("╠══════════════════════════════════════════════════════════════╣"); println!( - "║ Total Indexes: {:3} In Progress: {:3} Stale: {:3} ║", - status.total_indexes, status.indexing_in_progress, status.stale_count + "║ Total Indexes: {:3} In Progress: {:3} ║", + status.total_indexes, status.indexing_in_progress ); println!("╚══════════════════════════════════════════════════════════════╝"); diff --git a/core/src/ops/core/ephemeral_status/output.rs b/core/src/ops/core/ephemeral_status/output.rs index 789652b63..b092da1a8 100644 --- a/core/src/ops/core/ephemeral_status/output.rs +++ b/core/src/ops/core/ephemeral_status/output.rs @@ -11,8 +11,6 @@ pub struct EphemeralCacheStatus { pub total_indexes: usize, /// Number of indexes currently being populated pub indexing_in_progress: usize, - /// Number of stale indexes (past TTL) - pub stale_count: usize, /// Details for each cached index pub indexes: Vec, } diff --git a/core/src/ops/core/ephemeral_status/query.rs b/core/src/ops/core/ephemeral_status/query.rs index df03245a1..8e2747efc 100644 --- a/core/src/ops/core/ephemeral_status/query.rs +++ b/core/src/ops/core/ephemeral_status/query.rs @@ -91,7 +91,6 @@ impl CoreQuery for EphemeralCacheStatusQuery { Ok(EphemeralCacheStatus { total_indexes: cache_stats.total_entries, indexing_in_progress: cache_stats.indexing_count, - stale_count: cache_stats.stale_count, indexes, }) } diff --git a/core/src/ops/indexing/entry.rs b/core/src/ops/indexing/entry.rs index 592f25885..16c32cb1a 100644 --- a/core/src/ops/indexing/entry.rs +++ b/core/src/ops/indexing/entry.rs @@ -223,10 +223,21 @@ impl EntryProcessor { }) .unwrap_or_else(|| chrono::Utc::now()); - // All entries get UUIDs immediately for UI normalized caching compatibility. - // Sync readiness is now determined by content_id presence (for regular files) - // or by entry kind (for directories/empty files). - let entry_uuid = Some(Uuid::new_v4()); + // UUID assignment strategy: + // 1. First check if there's an ephemeral UUID to preserve (from previous browsing) + // 2. If not, generate a new UUID + // + // This ensures that files browsed before enabling indexing keep the same UUID + let entry_uuid = if let Some(ephemeral_uuid) = state.get_ephemeral_uuid(&entry.path) { + tracing::debug!( + "Preserving ephemeral UUID {} for {}", + ephemeral_uuid, + entry.path.display() + ); + Some(ephemeral_uuid) + } else { + Some(Uuid::new_v4()) + }; // Find parent entry ID let parent_id = if let Some(parent_path) = entry.path.parent() { diff --git a/core/src/ops/indexing/ephemeral/index_cache.rs b/core/src/ops/indexing/ephemeral/index_cache.rs index 52932cf35..f464dbf16 100644 --- a/core/src/ops/indexing/ephemeral/index_cache.rs +++ b/core/src/ops/indexing/ephemeral/index_cache.rs @@ -3,6 +3,10 @@ //! This module provides a thread-safe cache for storing ephemeral indexes //! by their root path. This allows directory listing queries to reuse //! existing indexes instead of spawning new indexer jobs. +//! +//! The cache is permanent in memory (no TTL or expiration). Entries persist +//! until the daemon restarts or they are explicitly removed. This ensures +//! UUIDs from ephemeral indexing can be preserved when regular indexing is enabled. use crate::ops::indexing::EphemeralIndex; use parking_lot::RwLock; @@ -10,16 +14,10 @@ use std::{ collections::HashMap, path::{Path, PathBuf}, sync::Arc, - time::{Duration, Instant}, + time::Instant, }; use tokio::sync::RwLock as TokioRwLock; -/// Default TTL for ephemeral indexes (5 minutes) -const DEFAULT_TTL: Duration = Duration::from_secs(5 * 60); - -/// Maximum idle time before an index is considered stale (2 minutes) -const MAX_IDLE_TIME: Duration = Duration::from_secs(2 * 60); - /// Cache entry wrapping an ephemeral index with metadata struct CacheEntry { /// The ephemeral index @@ -38,77 +36,40 @@ impl CacheEntry { indexing_in_progress: false, } } - - fn is_stale(&self, ttl: Duration) -> bool { - self.created_at.elapsed() > ttl - } } /// Global cache for ephemeral indexes /// /// Stores ephemeral indexes by their root path for reuse across queries. -/// Indexes are automatically evicted based on TTL and idle time. +/// Indexes persist in memory until the daemon restarts or they are explicitly removed. pub struct EphemeralIndexCache { /// Map of root path to cache entry entries: RwLock>, - /// Time-to-live for cache entries - ttl: Duration, } impl EphemeralIndexCache { - /// Create a new cache with default TTL + /// Create a new cache pub fn new() -> Self { Self { entries: RwLock::new(HashMap::new()), - ttl: DEFAULT_TTL, } } - /// Create a new cache with custom TTL - pub fn with_ttl(ttl: Duration) -> Self { - Self { - entries: RwLock::new(HashMap::new()), - ttl, - } - } - - /// Get an existing index for a path, or None if not cached or stale - /// - /// Also checks if the index is still being populated (indexing in progress). + /// Get an existing index for a path, or None if not cached pub fn get(&self, path: &Path) -> Option>> { let entries = self.entries.read(); - if let Some(entry) = entries.get(path) { - // Check if stale - if entry.is_stale(self.ttl) { - return None; - } - Some(entry.index.clone()) - } else { - None - } + entries.get(path).map(|entry| entry.index.clone()) } /// Get an existing index for a path (exact match only) /// - /// Returns the index if: - /// 1. An index exists for this exact path - /// 2. The index is not stale + /// Returns the index if an index exists for this exact path. /// /// Note: We only use exact matches because ephemeral indexing uses /// IndexScope::Current (single level), so an ancestor index doesn't /// contain the contents of subdirectories. pub fn get_for_path(&self, path: &Path) -> Option>> { - let entries = self.entries.read(); - - // Only exact match - ancestor indexes don't contain subdirectory contents - // because ephemeral indexing uses IndexScope::Current (single level) - if let Some(entry) = entries.get(path) { - if !entry.is_stale(self.ttl) { - return Some(entry.index.clone()); - } - } - - None + self.get(path) } /// Check if indexing is in progress for a path @@ -147,17 +108,10 @@ impl EphemeralIndexCache { } /// Mark indexing as complete for a path - /// - /// This also refreshes the entry's `created_at` timestamp so it's no longer - /// considered stale. This is important because `create_for_indexing()` may - /// have reused an existing stale entry, and without this refresh the entry - /// would remain stale even after being freshly populated. pub fn mark_indexing_complete(&self, path: &Path) { let mut entries = self.entries.write(); if let Some(entry) = entries.get_mut(path) { entry.indexing_in_progress = false; - // Reset created_at so the freshly-populated index is no longer stale - entry.created_at = Instant::now(); } } @@ -167,12 +121,6 @@ impl EphemeralIndexCache { entries.remove(path); } - /// Remove stale entries from the cache - pub fn evict_stale(&self) { - let mut entries = self.entries.write(); - entries.retain(|_, entry| !entry.is_stale(self.ttl)); - } - /// Get the number of cached indexes pub fn len(&self) -> usize { self.entries.read().len() @@ -193,14 +141,20 @@ impl EphemeralIndexCache { let entries = self.entries.read(); let total_entries = entries.len(); let indexing_count = entries.values().filter(|e| e.indexing_in_progress).count(); - let stale_count = entries.values().filter(|e| e.is_stale(self.ttl)).count(); EphemeralIndexCacheStats { total_entries, indexing_count, - stale_count, } } + + /// Get the age of a cached index in seconds + pub fn get_age(&self, path: &Path) -> Option { + let entries = self.entries.read(); + entries + .get(path) + .map(|e| e.created_at.elapsed().as_secs_f64()) + } } impl Default for EphemeralIndexCache { @@ -214,7 +168,6 @@ impl Default for EphemeralIndexCache { pub struct EphemeralIndexCacheStats { pub total_entries: usize, pub indexing_count: usize, - pub stale_count: usize, } #[cfg(test)] @@ -244,7 +197,7 @@ mod tests { let cache = EphemeralIndexCache::new(); let path = PathBuf::from("/test/path"); - let index = cache.create_for_indexing(path.clone()); + let _index = cache.create_for_indexing(path.clone()); assert!(cache.is_indexing(&path)); @@ -284,17 +237,18 @@ mod tests { } #[test] - fn test_stale_detection() { - let cache = EphemeralIndexCache::with_ttl(Duration::from_millis(1)); + fn test_cache_persists() { + // Test that cache entries persist (no TTL expiration) + let cache = EphemeralIndexCache::new(); let path = PathBuf::from("/test/path"); let index = Arc::new(TokioRwLock::new(EphemeralIndex::new(path.clone()))); cache.insert(path.clone(), index); - // Wait for TTL to expire - std::thread::sleep(Duration::from_millis(10)); + // Wait a bit + std::thread::sleep(std::time::Duration::from_millis(100)); - // Should be stale now - assert!(cache.get(&path).is_none()); + // Should still be available (no expiration) + assert!(cache.get(&path).is_some()); } } diff --git a/core/src/ops/indexing/phases/processing.rs b/core/src/ops/indexing/phases/processing.rs index a5b474fca..68de9d604 100644 --- a/core/src/ops/indexing/phases/processing.rs +++ b/core/src/ops/indexing/phases/processing.rs @@ -42,6 +42,20 @@ pub async fn run_processing_phase( total_batches )); + // Populate ephemeral UUIDs for preservation before processing + // This allows entries that were browsed before enabling indexing to keep + // the same UUID, preserving any user data associated with them + let ephemeral_cache = ctx.library().core_context().ephemeral_cache(); + let preserved_count = state + .populate_ephemeral_uuids(ephemeral_cache, location_root_path) + .await; + if preserved_count > 0 { + ctx.log(format!( + "Found {} ephemeral UUIDs to preserve from previous browsing", + preserved_count + )); + } + if total_batches == 0 { ctx.log("No batches to process - transitioning to Aggregation phase"); state.phase = crate::ops::indexing::state::Phase::Aggregation; diff --git a/core/src/ops/indexing/state.rs b/core/src/ops/indexing/state.rs index 4e769f8ee..b1ac7c58a 100644 --- a/core/src/ops/indexing/state.rs +++ b/core/src/ops/indexing/state.rs @@ -9,6 +9,7 @@ use std::{ path::PathBuf, time::{Duration, Instant}, }; +use uuid::Uuid; /// Indexer progress information #[derive(Debug, Clone, Serialize, Deserialize)] @@ -107,6 +108,12 @@ pub struct IndexerState { // Database operations pub(crate) entry_id_cache: HashMap, // path -> entry_id for parent lookups + // Ephemeral UUID preservation + // UUIDs from ephemeral indexing that should be reused when creating persistent entries + // This ensures files browsed before enabling indexing keep the same UUID + #[serde(skip, default)] + pub(crate) ephemeral_uuids: HashMap, + // Change detection pub(crate) existing_entries: HashMap, Option)>, // path -> (id, inode, modified) @@ -149,6 +156,7 @@ impl IndexerState { entry_batches: Vec::new(), entries_for_content: Vec::new(), entry_id_cache: HashMap::new(), + ephemeral_uuids: HashMap::new(), existing_entries: HashMap::new(), stats: Default::default(), errors: Vec::new(), @@ -161,6 +169,51 @@ impl IndexerState { } } + /// Populate ephemeral UUIDs from the ephemeral cache for UUID preservation + /// + /// When a directory is browsed before being added as a managed location, + /// ephemeral indexing assigns UUIDs to each entry. This method extracts + /// those UUIDs so they can be reused when creating persistent database entries, + /// ensuring continuity for any user data (tags, notes, etc.) associated with + /// the ephemeral UUIDs. + pub async fn populate_ephemeral_uuids( + &mut self, + ephemeral_cache: &super::ephemeral::EphemeralIndexCache, + root_path: &std::path::Path, + ) -> usize { + // Try to get an ephemeral index that covers this path + if let Some(index) = ephemeral_cache.get_for_path(root_path) { + let index_read = index.read().await; + + // Get all paths from the entries and look up their UUIDs + let entries = index_read.entries(); + for path in entries.keys() { + if let Some(entry_uuid) = index_read.get_entry_uuid(path) { + self.ephemeral_uuids.insert(path.clone(), entry_uuid); + } + } + + let count = self.ephemeral_uuids.len(); + tracing::info!( + "Populated {} ephemeral UUIDs for preservation from cache covering {}", + count, + root_path.display() + ); + count + } else { + tracing::debug!("No ephemeral index found for path: {}", root_path.display()); + 0 + } + } + + /// Get an ephemeral UUID for a path if one exists + /// + /// Returns the UUID that was assigned during ephemeral indexing, + /// allowing it to be reused for the persistent database entry. + pub fn get_ephemeral_uuid(&self, path: &std::path::Path) -> Option { + self.ephemeral_uuids.get(path).copied() + } + pub fn calculate_rate(&mut self) -> f32 { let elapsed = self.last_progress_time.elapsed(); if elapsed.as_secs() > 0 { @@ -232,3 +285,72 @@ impl IndexerState { Ok(()) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::domain::addressing::SdPath; + + #[test] + fn test_ephemeral_uuid_lookup() { + let sd_path = SdPath::Physical { + device_slug: "local".to_string(), + path: PathBuf::from("/test"), + }; + let mut state = IndexerState::new(&sd_path); + + // Initially no ephemeral UUIDs + assert!(state + .get_ephemeral_uuid(std::path::Path::new("/test/file.txt")) + .is_none()); + + // Add an ephemeral UUID + let test_uuid = Uuid::new_v4(); + state + .ephemeral_uuids + .insert(PathBuf::from("/test/file.txt"), test_uuid); + + // Now we can retrieve it + assert_eq!( + state.get_ephemeral_uuid(std::path::Path::new("/test/file.txt")), + Some(test_uuid) + ); + + // Non-existent path still returns None + assert!(state + .get_ephemeral_uuid(std::path::Path::new("/test/other.txt")) + .is_none()); + } + + #[test] + fn test_ephemeral_uuid_preservation_concept() { + // This test demonstrates the UUID preservation concept: + // When ephemeral_uuids is populated, the same UUID should be used + // instead of generating a new one + + let sd_path = SdPath::Physical { + device_slug: "local".to_string(), + path: PathBuf::from("/test"), + }; + let mut state = IndexerState::new(&sd_path); + + // Simulate an ephemeral UUID from previous browsing + let preserved_uuid = Uuid::new_v4(); + let test_path = PathBuf::from("/test/document.pdf"); + state + .ephemeral_uuids + .insert(test_path.clone(), preserved_uuid); + + // When creating an entry, the code should check get_ephemeral_uuid first + let entry_uuid = if let Some(ephemeral_uuid) = state.get_ephemeral_uuid(&test_path) { + // Preserve the ephemeral UUID + ephemeral_uuid + } else { + // Generate a new UUID + Uuid::new_v4() + }; + + // The preserved UUID should be used + assert_eq!(entry_uuid, preserved_uuid); + } +} diff --git a/packages/interface/src/Explorer.tsx b/packages/interface/src/Explorer.tsx index 3d44d0cac..b3bdde815 100644 --- a/packages/interface/src/Explorer.tsx +++ b/packages/interface/src/Explorer.tsx @@ -7,7 +7,6 @@ import { useParams, } from "react-router-dom"; import { useEffect, useMemo } from "react"; -import { useLocationChangeInvalidation } from "./hooks/useLocationChangeInvalidation"; import { Dialogs } from "@sd/ui"; import { Inspector, type InspectorVariant } from "./Inspector"; import { TopBarProvider, TopBar } from "./TopBar"; @@ -71,9 +70,6 @@ export function ExplorerLayout() { } = useExplorer(); const { selectedFiles, selectFile } = useSelection(); - // Listen for location index_mode changes and invalidate directory listing queries - useLocationChangeInvalidation(); - // Sync route with explorer context for view preferences useEffect(() => { const spaceItemKey = getSpaceItemKeyFromRoute( diff --git a/packages/interface/src/hooks/useLocationChangeInvalidation.ts b/packages/interface/src/hooks/useLocationChangeInvalidation.ts deleted file mode 100644 index ab9dffc89..000000000 --- a/packages/interface/src/hooks/useLocationChangeInvalidation.ts +++ /dev/null @@ -1,94 +0,0 @@ -/** - * useLocationChangeInvalidation - Invalidates directory listing queries when location index_mode changes - * - * When a user enables indexing for a location (index_mode changes from "none" to something else), - * we need to refetch directory listings because: - * - Before: Data came from ephemeral in-memory index - * - After: Data comes from persistent database - * - * This hook subscribes to location events and invalidates affected queries. - */ - -import { useEffect, useRef } from "react"; -import { useQueryClient } from "@tanstack/react-query"; -import { useSpacedriveClient } from "@sd/ts-client/hooks"; -import type { Event, LocationInfo } from "@sd/ts-client"; - -export function useLocationChangeInvalidation() { - const client = useSpacedriveClient(); - const queryClient = useQueryClient(); - const libraryId = client.getCurrentLibraryId(); - - // Track previous index_mode for each location to detect changes - const prevIndexModes = useRef>(new Map()); - - useEffect(() => { - if (!libraryId) return; - - let unsubscribe: (() => void) | undefined; - let isCancelled = false; - - const handleEvent = (event: Event) => { - // Only handle ResourceChanged events for locations - if (typeof event === "string" || !("ResourceChanged" in event)) { - return; - } - - const { resource_type, resource } = event.ResourceChanged; - if (resource_type !== "location") { - return; - } - - const location = resource as LocationInfo; - const locationId = location.id; - const newIndexMode = location.index_mode; - - // Get previous index_mode - const prevIndexMode = prevIndexModes.current.get(locationId); - - // Update tracked index_mode - prevIndexModes.current.set(locationId, newIndexMode); - - // Check if index_mode changed from "none" to something else - // This means the user just enabled indexing - if (prevIndexMode === "none" && newIndexMode !== "none") { - console.log( - `[useLocationChangeInvalidation] Location ${locationId} indexing enabled (${prevIndexMode} -> ${newIndexMode}), invalidating directory_listing queries`, - ); - - // Invalidate all directory_listing queries - // They will refetch and get data from the persistent index instead of ephemeral - queryClient.invalidateQueries({ - predicate: (query) => { - const key = query.queryKey; - return ( - Array.isArray(key) && - key[0] === "query:files.directory_listing" - ); - }, - }); - } - }; - - client - .subscribeFiltered( - { - resource_type: "location", - library_id: libraryId, - }, - handleEvent, - ) - .then((unsub) => { - if (isCancelled) { - unsub(); - } else { - unsubscribe = unsub; - } - }); - - return () => { - isCancelled = true; - unsubscribe?.(); - }; - }, [client, queryClient, libraryId]); -}