mirror of
https://github.com/spacedriveapp/spacedrive.git
synced 2026-05-18 21:36:56 -04:00
Preserve ephemeral UUIDs during indexing
- Remove TTL-based ephemeral cache and switch to a permanent in-memory cache. - Reuse ephemeral UUIDs when creating persistent entries to preserve continuity of user data. - Populate ephemeral UUIDs during the processing phase and expose get_ephemeral_uuid in the indexer state. - Remove the location invalidation hook and related UI usage.
This commit is contained in:
@@ -249,8 +249,8 @@ pub async fn run(ctx: &Context, cmd: IndexCmd) -> Result<()> {
|
||||
println!("║ EPHEMERAL INDEX CACHE STATUS ║");
|
||||
println!("╠══════════════════════════════════════════════════════════════╣");
|
||||
println!(
|
||||
"║ Total Indexes: {:3} In Progress: {:3} Stale: {:3} ║",
|
||||
status.total_indexes, status.indexing_in_progress, status.stale_count
|
||||
"║ Total Indexes: {:3} In Progress: {:3} ║",
|
||||
status.total_indexes, status.indexing_in_progress
|
||||
);
|
||||
println!("╚══════════════════════════════════════════════════════════════╝");
|
||||
|
||||
|
||||
@@ -11,8 +11,6 @@ pub struct EphemeralCacheStatus {
|
||||
pub total_indexes: usize,
|
||||
/// Number of indexes currently being populated
|
||||
pub indexing_in_progress: usize,
|
||||
/// Number of stale indexes (past TTL)
|
||||
pub stale_count: usize,
|
||||
/// Details for each cached index
|
||||
pub indexes: Vec<EphemeralIndexInfo>,
|
||||
}
|
||||
|
||||
@@ -91,7 +91,6 @@ impl CoreQuery for EphemeralCacheStatusQuery {
|
||||
Ok(EphemeralCacheStatus {
|
||||
total_indexes: cache_stats.total_entries,
|
||||
indexing_in_progress: cache_stats.indexing_count,
|
||||
stale_count: cache_stats.stale_count,
|
||||
indexes,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -223,10 +223,21 @@ impl EntryProcessor {
|
||||
})
|
||||
.unwrap_or_else(|| chrono::Utc::now());
|
||||
|
||||
// All entries get UUIDs immediately for UI normalized caching compatibility.
|
||||
// Sync readiness is now determined by content_id presence (for regular files)
|
||||
// or by entry kind (for directories/empty files).
|
||||
let entry_uuid = Some(Uuid::new_v4());
|
||||
// UUID assignment strategy:
|
||||
// 1. First check if there's an ephemeral UUID to preserve (from previous browsing)
|
||||
// 2. If not, generate a new UUID
|
||||
//
|
||||
// This ensures that files browsed before enabling indexing keep the same UUID
|
||||
let entry_uuid = if let Some(ephemeral_uuid) = state.get_ephemeral_uuid(&entry.path) {
|
||||
tracing::debug!(
|
||||
"Preserving ephemeral UUID {} for {}",
|
||||
ephemeral_uuid,
|
||||
entry.path.display()
|
||||
);
|
||||
Some(ephemeral_uuid)
|
||||
} else {
|
||||
Some(Uuid::new_v4())
|
||||
};
|
||||
|
||||
// Find parent entry ID
|
||||
let parent_id = if let Some(parent_path) = entry.path.parent() {
|
||||
|
||||
@@ -3,6 +3,10 @@
|
||||
//! This module provides a thread-safe cache for storing ephemeral indexes
|
||||
//! by their root path. This allows directory listing queries to reuse
|
||||
//! existing indexes instead of spawning new indexer jobs.
|
||||
//!
|
||||
//! The cache is permanent in memory (no TTL or expiration). Entries persist
|
||||
//! until the daemon restarts or they are explicitly removed. This ensures
|
||||
//! UUIDs from ephemeral indexing can be preserved when regular indexing is enabled.
|
||||
|
||||
use crate::ops::indexing::EphemeralIndex;
|
||||
use parking_lot::RwLock;
|
||||
@@ -10,16 +14,10 @@ use std::{
|
||||
collections::HashMap,
|
||||
path::{Path, PathBuf},
|
||||
sync::Arc,
|
||||
time::{Duration, Instant},
|
||||
time::Instant,
|
||||
};
|
||||
use tokio::sync::RwLock as TokioRwLock;
|
||||
|
||||
/// Default TTL for ephemeral indexes (5 minutes)
|
||||
const DEFAULT_TTL: Duration = Duration::from_secs(5 * 60);
|
||||
|
||||
/// Maximum idle time before an index is considered stale (2 minutes)
|
||||
const MAX_IDLE_TIME: Duration = Duration::from_secs(2 * 60);
|
||||
|
||||
/// Cache entry wrapping an ephemeral index with metadata
|
||||
struct CacheEntry {
|
||||
/// The ephemeral index
|
||||
@@ -38,77 +36,40 @@ impl CacheEntry {
|
||||
indexing_in_progress: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_stale(&self, ttl: Duration) -> bool {
|
||||
self.created_at.elapsed() > ttl
|
||||
}
|
||||
}
|
||||
|
||||
/// Global cache for ephemeral indexes
|
||||
///
|
||||
/// Stores ephemeral indexes by their root path for reuse across queries.
|
||||
/// Indexes are automatically evicted based on TTL and idle time.
|
||||
/// Indexes persist in memory until the daemon restarts or they are explicitly removed.
|
||||
pub struct EphemeralIndexCache {
|
||||
/// Map of root path to cache entry
|
||||
entries: RwLock<HashMap<PathBuf, CacheEntry>>,
|
||||
/// Time-to-live for cache entries
|
||||
ttl: Duration,
|
||||
}
|
||||
|
||||
impl EphemeralIndexCache {
|
||||
/// Create a new cache with default TTL
|
||||
/// Create a new cache
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
entries: RwLock::new(HashMap::new()),
|
||||
ttl: DEFAULT_TTL,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new cache with custom TTL
|
||||
pub fn with_ttl(ttl: Duration) -> Self {
|
||||
Self {
|
||||
entries: RwLock::new(HashMap::new()),
|
||||
ttl,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get an existing index for a path, or None if not cached or stale
|
||||
///
|
||||
/// Also checks if the index is still being populated (indexing in progress).
|
||||
/// Get an existing index for a path, or None if not cached
|
||||
pub fn get(&self, path: &Path) -> Option<Arc<TokioRwLock<EphemeralIndex>>> {
|
||||
let entries = self.entries.read();
|
||||
if let Some(entry) = entries.get(path) {
|
||||
// Check if stale
|
||||
if entry.is_stale(self.ttl) {
|
||||
return None;
|
||||
}
|
||||
Some(entry.index.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
entries.get(path).map(|entry| entry.index.clone())
|
||||
}
|
||||
|
||||
/// Get an existing index for a path (exact match only)
|
||||
///
|
||||
/// Returns the index if:
|
||||
/// 1. An index exists for this exact path
|
||||
/// 2. The index is not stale
|
||||
/// Returns the index if an index exists for this exact path.
|
||||
///
|
||||
/// Note: We only use exact matches because ephemeral indexing uses
|
||||
/// IndexScope::Current (single level), so an ancestor index doesn't
|
||||
/// contain the contents of subdirectories.
|
||||
pub fn get_for_path(&self, path: &Path) -> Option<Arc<TokioRwLock<EphemeralIndex>>> {
|
||||
let entries = self.entries.read();
|
||||
|
||||
// Only exact match - ancestor indexes don't contain subdirectory contents
|
||||
// because ephemeral indexing uses IndexScope::Current (single level)
|
||||
if let Some(entry) = entries.get(path) {
|
||||
if !entry.is_stale(self.ttl) {
|
||||
return Some(entry.index.clone());
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
self.get(path)
|
||||
}
|
||||
|
||||
/// Check if indexing is in progress for a path
|
||||
@@ -147,17 +108,10 @@ impl EphemeralIndexCache {
|
||||
}
|
||||
|
||||
/// Mark indexing as complete for a path
|
||||
///
|
||||
/// This also refreshes the entry's `created_at` timestamp so it's no longer
|
||||
/// considered stale. This is important because `create_for_indexing()` may
|
||||
/// have reused an existing stale entry, and without this refresh the entry
|
||||
/// would remain stale even after being freshly populated.
|
||||
pub fn mark_indexing_complete(&self, path: &Path) {
|
||||
let mut entries = self.entries.write();
|
||||
if let Some(entry) = entries.get_mut(path) {
|
||||
entry.indexing_in_progress = false;
|
||||
// Reset created_at so the freshly-populated index is no longer stale
|
||||
entry.created_at = Instant::now();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -167,12 +121,6 @@ impl EphemeralIndexCache {
|
||||
entries.remove(path);
|
||||
}
|
||||
|
||||
/// Remove stale entries from the cache
|
||||
pub fn evict_stale(&self) {
|
||||
let mut entries = self.entries.write();
|
||||
entries.retain(|_, entry| !entry.is_stale(self.ttl));
|
||||
}
|
||||
|
||||
/// Get the number of cached indexes
|
||||
pub fn len(&self) -> usize {
|
||||
self.entries.read().len()
|
||||
@@ -193,14 +141,20 @@ impl EphemeralIndexCache {
|
||||
let entries = self.entries.read();
|
||||
let total_entries = entries.len();
|
||||
let indexing_count = entries.values().filter(|e| e.indexing_in_progress).count();
|
||||
let stale_count = entries.values().filter(|e| e.is_stale(self.ttl)).count();
|
||||
|
||||
EphemeralIndexCacheStats {
|
||||
total_entries,
|
||||
indexing_count,
|
||||
stale_count,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the age of a cached index in seconds
|
||||
pub fn get_age(&self, path: &Path) -> Option<f64> {
|
||||
let entries = self.entries.read();
|
||||
entries
|
||||
.get(path)
|
||||
.map(|e| e.created_at.elapsed().as_secs_f64())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for EphemeralIndexCache {
|
||||
@@ -214,7 +168,6 @@ impl Default for EphemeralIndexCache {
|
||||
pub struct EphemeralIndexCacheStats {
|
||||
pub total_entries: usize,
|
||||
pub indexing_count: usize,
|
||||
pub stale_count: usize,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -244,7 +197,7 @@ mod tests {
|
||||
let cache = EphemeralIndexCache::new();
|
||||
let path = PathBuf::from("/test/path");
|
||||
|
||||
let index = cache.create_for_indexing(path.clone());
|
||||
let _index = cache.create_for_indexing(path.clone());
|
||||
|
||||
assert!(cache.is_indexing(&path));
|
||||
|
||||
@@ -284,17 +237,18 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stale_detection() {
|
||||
let cache = EphemeralIndexCache::with_ttl(Duration::from_millis(1));
|
||||
fn test_cache_persists() {
|
||||
// Test that cache entries persist (no TTL expiration)
|
||||
let cache = EphemeralIndexCache::new();
|
||||
let path = PathBuf::from("/test/path");
|
||||
let index = Arc::new(TokioRwLock::new(EphemeralIndex::new(path.clone())));
|
||||
|
||||
cache.insert(path.clone(), index);
|
||||
|
||||
// Wait for TTL to expire
|
||||
std::thread::sleep(Duration::from_millis(10));
|
||||
// Wait a bit
|
||||
std::thread::sleep(std::time::Duration::from_millis(100));
|
||||
|
||||
// Should be stale now
|
||||
assert!(cache.get(&path).is_none());
|
||||
// Should still be available (no expiration)
|
||||
assert!(cache.get(&path).is_some());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,6 +42,20 @@ pub async fn run_processing_phase(
|
||||
total_batches
|
||||
));
|
||||
|
||||
// Populate ephemeral UUIDs for preservation before processing
|
||||
// This allows entries that were browsed before enabling indexing to keep
|
||||
// the same UUID, preserving any user data associated with them
|
||||
let ephemeral_cache = ctx.library().core_context().ephemeral_cache();
|
||||
let preserved_count = state
|
||||
.populate_ephemeral_uuids(ephemeral_cache, location_root_path)
|
||||
.await;
|
||||
if preserved_count > 0 {
|
||||
ctx.log(format!(
|
||||
"Found {} ephemeral UUIDs to preserve from previous browsing",
|
||||
preserved_count
|
||||
));
|
||||
}
|
||||
|
||||
if total_batches == 0 {
|
||||
ctx.log("No batches to process - transitioning to Aggregation phase");
|
||||
state.phase = crate::ops::indexing::state::Phase::Aggregation;
|
||||
|
||||
@@ -9,6 +9,7 @@ use std::{
|
||||
path::PathBuf,
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Indexer progress information
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
@@ -107,6 +108,12 @@ pub struct IndexerState {
|
||||
// Database operations
|
||||
pub(crate) entry_id_cache: HashMap<PathBuf, i32>, // path -> entry_id for parent lookups
|
||||
|
||||
// Ephemeral UUID preservation
|
||||
// UUIDs from ephemeral indexing that should be reused when creating persistent entries
|
||||
// This ensures files browsed before enabling indexing keep the same UUID
|
||||
#[serde(skip, default)]
|
||||
pub(crate) ephemeral_uuids: HashMap<PathBuf, Uuid>,
|
||||
|
||||
// Change detection
|
||||
pub(crate) existing_entries:
|
||||
HashMap<PathBuf, (i32, Option<u64>, Option<std::time::SystemTime>)>, // path -> (id, inode, modified)
|
||||
@@ -149,6 +156,7 @@ impl IndexerState {
|
||||
entry_batches: Vec::new(),
|
||||
entries_for_content: Vec::new(),
|
||||
entry_id_cache: HashMap::new(),
|
||||
ephemeral_uuids: HashMap::new(),
|
||||
existing_entries: HashMap::new(),
|
||||
stats: Default::default(),
|
||||
errors: Vec::new(),
|
||||
@@ -161,6 +169,51 @@ impl IndexerState {
|
||||
}
|
||||
}
|
||||
|
||||
/// Populate ephemeral UUIDs from the ephemeral cache for UUID preservation
|
||||
///
|
||||
/// When a directory is browsed before being added as a managed location,
|
||||
/// ephemeral indexing assigns UUIDs to each entry. This method extracts
|
||||
/// those UUIDs so they can be reused when creating persistent database entries,
|
||||
/// ensuring continuity for any user data (tags, notes, etc.) associated with
|
||||
/// the ephemeral UUIDs.
|
||||
pub async fn populate_ephemeral_uuids(
|
||||
&mut self,
|
||||
ephemeral_cache: &super::ephemeral::EphemeralIndexCache,
|
||||
root_path: &std::path::Path,
|
||||
) -> usize {
|
||||
// Try to get an ephemeral index that covers this path
|
||||
if let Some(index) = ephemeral_cache.get_for_path(root_path) {
|
||||
let index_read = index.read().await;
|
||||
|
||||
// Get all paths from the entries and look up their UUIDs
|
||||
let entries = index_read.entries();
|
||||
for path in entries.keys() {
|
||||
if let Some(entry_uuid) = index_read.get_entry_uuid(path) {
|
||||
self.ephemeral_uuids.insert(path.clone(), entry_uuid);
|
||||
}
|
||||
}
|
||||
|
||||
let count = self.ephemeral_uuids.len();
|
||||
tracing::info!(
|
||||
"Populated {} ephemeral UUIDs for preservation from cache covering {}",
|
||||
count,
|
||||
root_path.display()
|
||||
);
|
||||
count
|
||||
} else {
|
||||
tracing::debug!("No ephemeral index found for path: {}", root_path.display());
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
/// Get an ephemeral UUID for a path if one exists
|
||||
///
|
||||
/// Returns the UUID that was assigned during ephemeral indexing,
|
||||
/// allowing it to be reused for the persistent database entry.
|
||||
pub fn get_ephemeral_uuid(&self, path: &std::path::Path) -> Option<Uuid> {
|
||||
self.ephemeral_uuids.get(path).copied()
|
||||
}
|
||||
|
||||
pub fn calculate_rate(&mut self) -> f32 {
|
||||
let elapsed = self.last_progress_time.elapsed();
|
||||
if elapsed.as_secs() > 0 {
|
||||
@@ -232,3 +285,72 @@ impl IndexerState {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::domain::addressing::SdPath;
|
||||
|
||||
#[test]
|
||||
fn test_ephemeral_uuid_lookup() {
|
||||
let sd_path = SdPath::Physical {
|
||||
device_slug: "local".to_string(),
|
||||
path: PathBuf::from("/test"),
|
||||
};
|
||||
let mut state = IndexerState::new(&sd_path);
|
||||
|
||||
// Initially no ephemeral UUIDs
|
||||
assert!(state
|
||||
.get_ephemeral_uuid(std::path::Path::new("/test/file.txt"))
|
||||
.is_none());
|
||||
|
||||
// Add an ephemeral UUID
|
||||
let test_uuid = Uuid::new_v4();
|
||||
state
|
||||
.ephemeral_uuids
|
||||
.insert(PathBuf::from("/test/file.txt"), test_uuid);
|
||||
|
||||
// Now we can retrieve it
|
||||
assert_eq!(
|
||||
state.get_ephemeral_uuid(std::path::Path::new("/test/file.txt")),
|
||||
Some(test_uuid)
|
||||
);
|
||||
|
||||
// Non-existent path still returns None
|
||||
assert!(state
|
||||
.get_ephemeral_uuid(std::path::Path::new("/test/other.txt"))
|
||||
.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ephemeral_uuid_preservation_concept() {
|
||||
// This test demonstrates the UUID preservation concept:
|
||||
// When ephemeral_uuids is populated, the same UUID should be used
|
||||
// instead of generating a new one
|
||||
|
||||
let sd_path = SdPath::Physical {
|
||||
device_slug: "local".to_string(),
|
||||
path: PathBuf::from("/test"),
|
||||
};
|
||||
let mut state = IndexerState::new(&sd_path);
|
||||
|
||||
// Simulate an ephemeral UUID from previous browsing
|
||||
let preserved_uuid = Uuid::new_v4();
|
||||
let test_path = PathBuf::from("/test/document.pdf");
|
||||
state
|
||||
.ephemeral_uuids
|
||||
.insert(test_path.clone(), preserved_uuid);
|
||||
|
||||
// When creating an entry, the code should check get_ephemeral_uuid first
|
||||
let entry_uuid = if let Some(ephemeral_uuid) = state.get_ephemeral_uuid(&test_path) {
|
||||
// Preserve the ephemeral UUID
|
||||
ephemeral_uuid
|
||||
} else {
|
||||
// Generate a new UUID
|
||||
Uuid::new_v4()
|
||||
};
|
||||
|
||||
// The preserved UUID should be used
|
||||
assert_eq!(entry_uuid, preserved_uuid);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,7 +7,6 @@ import {
|
||||
useParams,
|
||||
} from "react-router-dom";
|
||||
import { useEffect, useMemo } from "react";
|
||||
import { useLocationChangeInvalidation } from "./hooks/useLocationChangeInvalidation";
|
||||
import { Dialogs } from "@sd/ui";
|
||||
import { Inspector, type InspectorVariant } from "./Inspector";
|
||||
import { TopBarProvider, TopBar } from "./TopBar";
|
||||
@@ -71,9 +70,6 @@ export function ExplorerLayout() {
|
||||
} = useExplorer();
|
||||
const { selectedFiles, selectFile } = useSelection();
|
||||
|
||||
// Listen for location index_mode changes and invalidate directory listing queries
|
||||
useLocationChangeInvalidation();
|
||||
|
||||
// Sync route with explorer context for view preferences
|
||||
useEffect(() => {
|
||||
const spaceItemKey = getSpaceItemKeyFromRoute(
|
||||
|
||||
@@ -1,94 +0,0 @@
|
||||
/**
|
||||
* useLocationChangeInvalidation - Invalidates directory listing queries when location index_mode changes
|
||||
*
|
||||
* When a user enables indexing for a location (index_mode changes from "none" to something else),
|
||||
* we need to refetch directory listings because:
|
||||
* - Before: Data came from ephemeral in-memory index
|
||||
* - After: Data comes from persistent database
|
||||
*
|
||||
* This hook subscribes to location events and invalidates affected queries.
|
||||
*/
|
||||
|
||||
import { useEffect, useRef } from "react";
|
||||
import { useQueryClient } from "@tanstack/react-query";
|
||||
import { useSpacedriveClient } from "@sd/ts-client/hooks";
|
||||
import type { Event, LocationInfo } from "@sd/ts-client";
|
||||
|
||||
export function useLocationChangeInvalidation() {
|
||||
const client = useSpacedriveClient();
|
||||
const queryClient = useQueryClient();
|
||||
const libraryId = client.getCurrentLibraryId();
|
||||
|
||||
// Track previous index_mode for each location to detect changes
|
||||
const prevIndexModes = useRef<Map<string, string>>(new Map());
|
||||
|
||||
useEffect(() => {
|
||||
if (!libraryId) return;
|
||||
|
||||
let unsubscribe: (() => void) | undefined;
|
||||
let isCancelled = false;
|
||||
|
||||
const handleEvent = (event: Event) => {
|
||||
// Only handle ResourceChanged events for locations
|
||||
if (typeof event === "string" || !("ResourceChanged" in event)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const { resource_type, resource } = event.ResourceChanged;
|
||||
if (resource_type !== "location") {
|
||||
return;
|
||||
}
|
||||
|
||||
const location = resource as LocationInfo;
|
||||
const locationId = location.id;
|
||||
const newIndexMode = location.index_mode;
|
||||
|
||||
// Get previous index_mode
|
||||
const prevIndexMode = prevIndexModes.current.get(locationId);
|
||||
|
||||
// Update tracked index_mode
|
||||
prevIndexModes.current.set(locationId, newIndexMode);
|
||||
|
||||
// Check if index_mode changed from "none" to something else
|
||||
// This means the user just enabled indexing
|
||||
if (prevIndexMode === "none" && newIndexMode !== "none") {
|
||||
console.log(
|
||||
`[useLocationChangeInvalidation] Location ${locationId} indexing enabled (${prevIndexMode} -> ${newIndexMode}), invalidating directory_listing queries`,
|
||||
);
|
||||
|
||||
// Invalidate all directory_listing queries
|
||||
// They will refetch and get data from the persistent index instead of ephemeral
|
||||
queryClient.invalidateQueries({
|
||||
predicate: (query) => {
|
||||
const key = query.queryKey;
|
||||
return (
|
||||
Array.isArray(key) &&
|
||||
key[0] === "query:files.directory_listing"
|
||||
);
|
||||
},
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
client
|
||||
.subscribeFiltered(
|
||||
{
|
||||
resource_type: "location",
|
||||
library_id: libraryId,
|
||||
},
|
||||
handleEvent,
|
||||
)
|
||||
.then((unsub) => {
|
||||
if (isCancelled) {
|
||||
unsub();
|
||||
} else {
|
||||
unsubscribe = unsub;
|
||||
}
|
||||
});
|
||||
|
||||
return () => {
|
||||
isCancelled = true;
|
||||
unsubscribe?.();
|
||||
};
|
||||
}, [client, queryClient, libraryId]);
|
||||
}
|
||||
Reference in New Issue
Block a user