Preserve ephemeral UUIDs during indexing

- Remove TTL-based ephemeral cache and switch to a permanent in-memory
  cache.
- Reuse ephemeral UUIDs when creating persistent entries to preserve
  continuity of user data.
- Populate ephemeral UUIDs during the processing phase and expose
  get_ephemeral_uuid in the indexer state.
- Remove the location invalidation hook and related UI usage.
This commit is contained in:
Jamie Pine
2025-12-07 21:03:51 -08:00
parent cf400865f4
commit c3517a554e
9 changed files with 180 additions and 180 deletions

View File

@@ -249,8 +249,8 @@ pub async fn run(ctx: &Context, cmd: IndexCmd) -> Result<()> {
println!("║ EPHEMERAL INDEX CACHE STATUS ║");
println!("╠══════════════════════════════════════════════════════════════╣");
println!(
"║ Total Indexes: {:3} In Progress: {:3} Stale: {:3}",
status.total_indexes, status.indexing_in_progress, status.stale_count
"║ Total Indexes: {:3} In Progress: {:3} ",
status.total_indexes, status.indexing_in_progress
);
println!("╚══════════════════════════════════════════════════════════════╝");

View File

@@ -11,8 +11,6 @@ pub struct EphemeralCacheStatus {
pub total_indexes: usize,
/// Number of indexes currently being populated
pub indexing_in_progress: usize,
/// Number of stale indexes (past TTL)
pub stale_count: usize,
/// Details for each cached index
pub indexes: Vec<EphemeralIndexInfo>,
}

View File

@@ -91,7 +91,6 @@ impl CoreQuery for EphemeralCacheStatusQuery {
Ok(EphemeralCacheStatus {
total_indexes: cache_stats.total_entries,
indexing_in_progress: cache_stats.indexing_count,
stale_count: cache_stats.stale_count,
indexes,
})
}

View File

@@ -223,10 +223,21 @@ impl EntryProcessor {
})
.unwrap_or_else(|| chrono::Utc::now());
// All entries get UUIDs immediately for UI normalized caching compatibility.
// Sync readiness is now determined by content_id presence (for regular files)
// or by entry kind (for directories/empty files).
let entry_uuid = Some(Uuid::new_v4());
// UUID assignment strategy:
// 1. First check if there's an ephemeral UUID to preserve (from previous browsing)
// 2. If not, generate a new UUID
//
// This ensures that files browsed before enabling indexing keep the same UUID
let entry_uuid = if let Some(ephemeral_uuid) = state.get_ephemeral_uuid(&entry.path) {
tracing::debug!(
"Preserving ephemeral UUID {} for {}",
ephemeral_uuid,
entry.path.display()
);
Some(ephemeral_uuid)
} else {
Some(Uuid::new_v4())
};
// Find parent entry ID
let parent_id = if let Some(parent_path) = entry.path.parent() {

View File

@@ -3,6 +3,10 @@
//! This module provides a thread-safe cache for storing ephemeral indexes
//! by their root path. This allows directory listing queries to reuse
//! existing indexes instead of spawning new indexer jobs.
//!
//! The cache is permanent in memory (no TTL or expiration). Entries persist
//! until the daemon restarts or they are explicitly removed. This ensures
//! UUIDs from ephemeral indexing can be preserved when regular indexing is enabled.
use crate::ops::indexing::EphemeralIndex;
use parking_lot::RwLock;
@@ -10,16 +14,10 @@ use std::{
collections::HashMap,
path::{Path, PathBuf},
sync::Arc,
time::{Duration, Instant},
time::Instant,
};
use tokio::sync::RwLock as TokioRwLock;
/// Default TTL for ephemeral indexes (5 minutes)
const DEFAULT_TTL: Duration = Duration::from_secs(5 * 60);
/// Maximum idle time before an index is considered stale (2 minutes)
const MAX_IDLE_TIME: Duration = Duration::from_secs(2 * 60);
/// Cache entry wrapping an ephemeral index with metadata
struct CacheEntry {
/// The ephemeral index
@@ -38,77 +36,40 @@ impl CacheEntry {
indexing_in_progress: false,
}
}
fn is_stale(&self, ttl: Duration) -> bool {
self.created_at.elapsed() > ttl
}
}
/// Global cache for ephemeral indexes
///
/// Stores ephemeral indexes by their root path for reuse across queries.
/// Indexes are automatically evicted based on TTL and idle time.
/// Indexes persist in memory until the daemon restarts or they are explicitly removed.
pub struct EphemeralIndexCache {
/// Map of root path to cache entry
entries: RwLock<HashMap<PathBuf, CacheEntry>>,
/// Time-to-live for cache entries
ttl: Duration,
}
impl EphemeralIndexCache {
/// Create a new cache with default TTL
/// Create a new cache
pub fn new() -> Self {
Self {
entries: RwLock::new(HashMap::new()),
ttl: DEFAULT_TTL,
}
}
/// Create a new cache with custom TTL
pub fn with_ttl(ttl: Duration) -> Self {
Self {
entries: RwLock::new(HashMap::new()),
ttl,
}
}
/// Get an existing index for a path, or None if not cached or stale
///
/// Also checks if the index is still being populated (indexing in progress).
/// Get an existing index for a path, or None if not cached
pub fn get(&self, path: &Path) -> Option<Arc<TokioRwLock<EphemeralIndex>>> {
let entries = self.entries.read();
if let Some(entry) = entries.get(path) {
// Check if stale
if entry.is_stale(self.ttl) {
return None;
}
Some(entry.index.clone())
} else {
None
}
entries.get(path).map(|entry| entry.index.clone())
}
/// Get an existing index for a path (exact match only)
///
/// Returns the index if:
/// 1. An index exists for this exact path
/// 2. The index is not stale
/// Returns the index if an index exists for this exact path.
///
/// Note: We only use exact matches because ephemeral indexing uses
/// IndexScope::Current (single level), so an ancestor index doesn't
/// contain the contents of subdirectories.
pub fn get_for_path(&self, path: &Path) -> Option<Arc<TokioRwLock<EphemeralIndex>>> {
let entries = self.entries.read();
// Only exact match - ancestor indexes don't contain subdirectory contents
// because ephemeral indexing uses IndexScope::Current (single level)
if let Some(entry) = entries.get(path) {
if !entry.is_stale(self.ttl) {
return Some(entry.index.clone());
}
}
None
self.get(path)
}
/// Check if indexing is in progress for a path
@@ -147,17 +108,10 @@ impl EphemeralIndexCache {
}
/// Mark indexing as complete for a path
///
/// This also refreshes the entry's `created_at` timestamp so it's no longer
/// considered stale. This is important because `create_for_indexing()` may
/// have reused an existing stale entry, and without this refresh the entry
/// would remain stale even after being freshly populated.
pub fn mark_indexing_complete(&self, path: &Path) {
let mut entries = self.entries.write();
if let Some(entry) = entries.get_mut(path) {
entry.indexing_in_progress = false;
// Reset created_at so the freshly-populated index is no longer stale
entry.created_at = Instant::now();
}
}
@@ -167,12 +121,6 @@ impl EphemeralIndexCache {
entries.remove(path);
}
/// Remove stale entries from the cache
pub fn evict_stale(&self) {
let mut entries = self.entries.write();
entries.retain(|_, entry| !entry.is_stale(self.ttl));
}
/// Get the number of cached indexes
pub fn len(&self) -> usize {
self.entries.read().len()
@@ -193,14 +141,20 @@ impl EphemeralIndexCache {
let entries = self.entries.read();
let total_entries = entries.len();
let indexing_count = entries.values().filter(|e| e.indexing_in_progress).count();
let stale_count = entries.values().filter(|e| e.is_stale(self.ttl)).count();
EphemeralIndexCacheStats {
total_entries,
indexing_count,
stale_count,
}
}
/// Get the age of a cached index in seconds
pub fn get_age(&self, path: &Path) -> Option<f64> {
let entries = self.entries.read();
entries
.get(path)
.map(|e| e.created_at.elapsed().as_secs_f64())
}
}
impl Default for EphemeralIndexCache {
@@ -214,7 +168,6 @@ impl Default for EphemeralIndexCache {
pub struct EphemeralIndexCacheStats {
pub total_entries: usize,
pub indexing_count: usize,
pub stale_count: usize,
}
#[cfg(test)]
@@ -244,7 +197,7 @@ mod tests {
let cache = EphemeralIndexCache::new();
let path = PathBuf::from("/test/path");
let index = cache.create_for_indexing(path.clone());
let _index = cache.create_for_indexing(path.clone());
assert!(cache.is_indexing(&path));
@@ -284,17 +237,18 @@ mod tests {
}
#[test]
fn test_stale_detection() {
let cache = EphemeralIndexCache::with_ttl(Duration::from_millis(1));
fn test_cache_persists() {
// Test that cache entries persist (no TTL expiration)
let cache = EphemeralIndexCache::new();
let path = PathBuf::from("/test/path");
let index = Arc::new(TokioRwLock::new(EphemeralIndex::new(path.clone())));
cache.insert(path.clone(), index);
// Wait for TTL to expire
std::thread::sleep(Duration::from_millis(10));
// Wait a bit
std::thread::sleep(std::time::Duration::from_millis(100));
// Should be stale now
assert!(cache.get(&path).is_none());
// Should still be available (no expiration)
assert!(cache.get(&path).is_some());
}
}

View File

@@ -42,6 +42,20 @@ pub async fn run_processing_phase(
total_batches
));
// Populate ephemeral UUIDs for preservation before processing
// This allows entries that were browsed before enabling indexing to keep
// the same UUID, preserving any user data associated with them
let ephemeral_cache = ctx.library().core_context().ephemeral_cache();
let preserved_count = state
.populate_ephemeral_uuids(ephemeral_cache, location_root_path)
.await;
if preserved_count > 0 {
ctx.log(format!(
"Found {} ephemeral UUIDs to preserve from previous browsing",
preserved_count
));
}
if total_batches == 0 {
ctx.log("No batches to process - transitioning to Aggregation phase");
state.phase = crate::ops::indexing::state::Phase::Aggregation;

View File

@@ -9,6 +9,7 @@ use std::{
path::PathBuf,
time::{Duration, Instant},
};
use uuid::Uuid;
/// Indexer progress information
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -107,6 +108,12 @@ pub struct IndexerState {
// Database operations
pub(crate) entry_id_cache: HashMap<PathBuf, i32>, // path -> entry_id for parent lookups
// Ephemeral UUID preservation
// UUIDs from ephemeral indexing that should be reused when creating persistent entries
// This ensures files browsed before enabling indexing keep the same UUID
#[serde(skip, default)]
pub(crate) ephemeral_uuids: HashMap<PathBuf, Uuid>,
// Change detection
pub(crate) existing_entries:
HashMap<PathBuf, (i32, Option<u64>, Option<std::time::SystemTime>)>, // path -> (id, inode, modified)
@@ -149,6 +156,7 @@ impl IndexerState {
entry_batches: Vec::new(),
entries_for_content: Vec::new(),
entry_id_cache: HashMap::new(),
ephemeral_uuids: HashMap::new(),
existing_entries: HashMap::new(),
stats: Default::default(),
errors: Vec::new(),
@@ -161,6 +169,51 @@ impl IndexerState {
}
}
/// Populate ephemeral UUIDs from the ephemeral cache for UUID preservation
///
/// When a directory is browsed before being added as a managed location,
/// ephemeral indexing assigns UUIDs to each entry. This method extracts
/// those UUIDs so they can be reused when creating persistent database entries,
/// ensuring continuity for any user data (tags, notes, etc.) associated with
/// the ephemeral UUIDs.
pub async fn populate_ephemeral_uuids(
&mut self,
ephemeral_cache: &super::ephemeral::EphemeralIndexCache,
root_path: &std::path::Path,
) -> usize {
// Try to get an ephemeral index that covers this path
if let Some(index) = ephemeral_cache.get_for_path(root_path) {
let index_read = index.read().await;
// Get all paths from the entries and look up their UUIDs
let entries = index_read.entries();
for path in entries.keys() {
if let Some(entry_uuid) = index_read.get_entry_uuid(path) {
self.ephemeral_uuids.insert(path.clone(), entry_uuid);
}
}
let count = self.ephemeral_uuids.len();
tracing::info!(
"Populated {} ephemeral UUIDs for preservation from cache covering {}",
count,
root_path.display()
);
count
} else {
tracing::debug!("No ephemeral index found for path: {}", root_path.display());
0
}
}
/// Get an ephemeral UUID for a path if one exists
///
/// Returns the UUID that was assigned during ephemeral indexing,
/// allowing it to be reused for the persistent database entry.
pub fn get_ephemeral_uuid(&self, path: &std::path::Path) -> Option<Uuid> {
self.ephemeral_uuids.get(path).copied()
}
pub fn calculate_rate(&mut self) -> f32 {
let elapsed = self.last_progress_time.elapsed();
if elapsed.as_secs() > 0 {
@@ -232,3 +285,72 @@ impl IndexerState {
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::domain::addressing::SdPath;
#[test]
fn test_ephemeral_uuid_lookup() {
let sd_path = SdPath::Physical {
device_slug: "local".to_string(),
path: PathBuf::from("/test"),
};
let mut state = IndexerState::new(&sd_path);
// Initially no ephemeral UUIDs
assert!(state
.get_ephemeral_uuid(std::path::Path::new("/test/file.txt"))
.is_none());
// Add an ephemeral UUID
let test_uuid = Uuid::new_v4();
state
.ephemeral_uuids
.insert(PathBuf::from("/test/file.txt"), test_uuid);
// Now we can retrieve it
assert_eq!(
state.get_ephemeral_uuid(std::path::Path::new("/test/file.txt")),
Some(test_uuid)
);
// Non-existent path still returns None
assert!(state
.get_ephemeral_uuid(std::path::Path::new("/test/other.txt"))
.is_none());
}
#[test]
fn test_ephemeral_uuid_preservation_concept() {
// This test demonstrates the UUID preservation concept:
// When ephemeral_uuids is populated, the same UUID should be used
// instead of generating a new one
let sd_path = SdPath::Physical {
device_slug: "local".to_string(),
path: PathBuf::from("/test"),
};
let mut state = IndexerState::new(&sd_path);
// Simulate an ephemeral UUID from previous browsing
let preserved_uuid = Uuid::new_v4();
let test_path = PathBuf::from("/test/document.pdf");
state
.ephemeral_uuids
.insert(test_path.clone(), preserved_uuid);
// When creating an entry, the code should check get_ephemeral_uuid first
let entry_uuid = if let Some(ephemeral_uuid) = state.get_ephemeral_uuid(&test_path) {
// Preserve the ephemeral UUID
ephemeral_uuid
} else {
// Generate a new UUID
Uuid::new_v4()
};
// The preserved UUID should be used
assert_eq!(entry_uuid, preserved_uuid);
}
}

View File

@@ -7,7 +7,6 @@ import {
useParams,
} from "react-router-dom";
import { useEffect, useMemo } from "react";
import { useLocationChangeInvalidation } from "./hooks/useLocationChangeInvalidation";
import { Dialogs } from "@sd/ui";
import { Inspector, type InspectorVariant } from "./Inspector";
import { TopBarProvider, TopBar } from "./TopBar";
@@ -71,9 +70,6 @@ export function ExplorerLayout() {
} = useExplorer();
const { selectedFiles, selectFile } = useSelection();
// Listen for location index_mode changes and invalidate directory listing queries
useLocationChangeInvalidation();
// Sync route with explorer context for view preferences
useEffect(() => {
const spaceItemKey = getSpaceItemKeyFromRoute(

View File

@@ -1,94 +0,0 @@
/**
* useLocationChangeInvalidation - Invalidates directory listing queries when location index_mode changes
*
* When a user enables indexing for a location (index_mode changes from "none" to something else),
* we need to refetch directory listings because:
* - Before: Data came from ephemeral in-memory index
* - After: Data comes from persistent database
*
* This hook subscribes to location events and invalidates affected queries.
*/
import { useEffect, useRef } from "react";
import { useQueryClient } from "@tanstack/react-query";
import { useSpacedriveClient } from "@sd/ts-client/hooks";
import type { Event, LocationInfo } from "@sd/ts-client";
export function useLocationChangeInvalidation() {
const client = useSpacedriveClient();
const queryClient = useQueryClient();
const libraryId = client.getCurrentLibraryId();
// Track previous index_mode for each location to detect changes
const prevIndexModes = useRef<Map<string, string>>(new Map());
useEffect(() => {
if (!libraryId) return;
let unsubscribe: (() => void) | undefined;
let isCancelled = false;
const handleEvent = (event: Event) => {
// Only handle ResourceChanged events for locations
if (typeof event === "string" || !("ResourceChanged" in event)) {
return;
}
const { resource_type, resource } = event.ResourceChanged;
if (resource_type !== "location") {
return;
}
const location = resource as LocationInfo;
const locationId = location.id;
const newIndexMode = location.index_mode;
// Get previous index_mode
const prevIndexMode = prevIndexModes.current.get(locationId);
// Update tracked index_mode
prevIndexModes.current.set(locationId, newIndexMode);
// Check if index_mode changed from "none" to something else
// This means the user just enabled indexing
if (prevIndexMode === "none" && newIndexMode !== "none") {
console.log(
`[useLocationChangeInvalidation] Location ${locationId} indexing enabled (${prevIndexMode} -> ${newIndexMode}), invalidating directory_listing queries`,
);
// Invalidate all directory_listing queries
// They will refetch and get data from the persistent index instead of ephemeral
queryClient.invalidateQueries({
predicate: (query) => {
const key = query.queryKey;
return (
Array.isArray(key) &&
key[0] === "query:files.directory_listing"
);
},
});
}
};
client
.subscribeFiltered(
{
resource_type: "location",
library_id: libraryId,
},
handleEvent,
)
.then((unsub) => {
if (isCancelled) {
unsub();
} else {
unsubscribe = unsub;
}
});
return () => {
isCancelled = true;
unsubscribe?.();
};
}, [client, queryClient, libraryId]);
}