From f2c10f36d24ccd907cdba93e45ebed3ed60dd610 Mon Sep 17 00:00:00 2001 From: Jamie Pine Date: Tue, 16 Sep 2025 23:46:55 -0700 Subject: [PATCH] feat: Integrate FTS5 for enhanced file search capabilities - Added FTS5 support to improve text search performance and relevance scoring in file search queries. - Refactored the search logic to utilize FTS5 for high-performance querying, including BM25 ranking. - Implemented methods for building FTS5 queries and extracting highlights from search results. - Updated tests to validate FTS5 query building and highlight extraction functionality. This enhancement significantly boosts the efficiency and accuracy of file searches. --- ...0250120_000001_create_fts5_search_index.rs | 160 +++++++ core/src/infra/db/migration/mod.rs | 2 + core/src/ops/search/query.rs | 419 +++++++++++++++--- core/src/ops/search/tests.rs | 196 ++++---- 4 files changed, 630 insertions(+), 147 deletions(-) create mode 100644 core/src/infra/db/migration/m20250120_000001_create_fts5_search_index.rs diff --git a/core/src/infra/db/migration/m20250120_000001_create_fts5_search_index.rs b/core/src/infra/db/migration/m20250120_000001_create_fts5_search_index.rs new file mode 100644 index 000000000..fdfb5431a --- /dev/null +++ b/core/src/infra/db/migration/m20250120_000001_create_fts5_search_index.rs @@ -0,0 +1,160 @@ +//! FTS5 Search Index Migration +//! +//! Creates FTS5 virtual table for high-performance full-text search +//! and associated triggers for real-time index updates. + +use sea_orm_migration::prelude::*; + +#[derive(DeriveMigrationName)] +pub struct Migration; + +#[async_trait::async_trait] +impl MigrationTrait for Migration { + async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { + // Create FTS5 virtual table for search indexing + manager + .get_connection() + .execute_unprepared( + r#" + CREATE VIRTUAL TABLE search_index USING fts5( + content='entries', + content_rowid='id', + name, + extension, + tokenize="unicode61 remove_diacritics 2 tokenchars '.@-_'", + prefix='2,3' + ); + "#, + ) + .await?; + + // Create trigger for INSERT operations + manager + .get_connection() + .execute_unprepared( + r#" + CREATE TRIGGER IF NOT EXISTS entries_search_insert + AFTER INSERT ON entries WHEN new.kind = 0 + BEGIN + INSERT INTO search_index(rowid, name, extension) + VALUES (new.id, new.name, new.extension); + END; + "#, + ) + .await?; + + // Create trigger for UPDATE operations + manager + .get_connection() + .execute_unprepared( + r#" + CREATE TRIGGER IF NOT EXISTS entries_search_update + AFTER UPDATE ON entries WHEN new.kind = 0 + BEGIN + UPDATE search_index SET + name = new.name, + extension = new.extension + WHERE rowid = new.id; + END; + "#, + ) + .await?; + + // Create trigger for DELETE operations + manager + .get_connection() + .execute_unprepared( + r#" + CREATE TRIGGER IF NOT EXISTS entries_search_delete + AFTER DELETE ON entries WHEN old.kind = 0 + BEGIN + DELETE FROM search_index WHERE rowid = old.id; + END; + "#, + ) + .await?; + + // Populate FTS5 index with existing file entries + manager + .get_connection() + .execute_unprepared( + r#" + INSERT INTO search_index(rowid, name, extension) + SELECT id, name, extension FROM entries WHERE kind = 0; + "#, + ) + .await?; + + // Create search analytics table for query optimization + manager + .get_connection() + .execute_unprepared( + r#" + CREATE TABLE search_analytics ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + query_text TEXT NOT NULL, + query_hash TEXT NOT NULL, + search_mode TEXT NOT NULL, + execution_time_ms INTEGER NOT NULL, + result_count INTEGER NOT NULL, + fts5_used BOOLEAN DEFAULT TRUE, + semantic_used BOOLEAN DEFAULT FALSE, + user_clicked_result BOOLEAN DEFAULT FALSE, + clicked_result_position INTEGER, + created_at TEXT NOT NULL DEFAULT (datetime('now')) + ); + "#, + ) + .await?; + + // Create index on query_hash for performance analytics + manager + .get_connection() + .execute_unprepared( + r#" + CREATE INDEX idx_search_analytics_query_hash + ON search_analytics(query_hash); + "#, + ) + .await?; + + Ok(()) + } + + async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { + // Drop analytics table and index + manager + .get_connection() + .execute_unprepared("DROP INDEX IF EXISTS idx_search_analytics_query_hash;") + .await?; + + manager + .get_connection() + .execute_unprepared("DROP TABLE IF EXISTS search_analytics;") + .await?; + + // Drop triggers + manager + .get_connection() + .execute_unprepared("DROP TRIGGER IF EXISTS entries_search_delete;") + .await?; + + manager + .get_connection() + .execute_unprepared("DROP TRIGGER IF EXISTS entries_search_update;") + .await?; + + manager + .get_connection() + .execute_unprepared("DROP TRIGGER IF EXISTS entries_search_insert;") + .await?; + + // Drop FTS5 virtual table + manager + .get_connection() + .execute_unprepared("DROP TABLE IF EXISTS search_index;") + .await?; + + Ok(()) + } +} diff --git a/core/src/infra/db/migration/mod.rs b/core/src/infra/db/migration/mod.rs index 734e25cfd..8fc6ab6f2 100644 --- a/core/src/infra/db/migration/mod.rs +++ b/core/src/infra/db/migration/mod.rs @@ -9,6 +9,7 @@ mod m20250109_000001_create_sidecars; mod m20250110_000001_refactor_volumes_table; mod m20250112_000001_create_indexer_rules; mod m20250115_000001_semantic_tags; +mod m20250120_000001_create_fts5_search_index; pub struct Migrator; @@ -23,6 +24,7 @@ impl MigratorTrait for Migrator { Box::new(m20250110_000001_refactor_volumes_table::Migration), Box::new(m20250112_000001_create_indexer_rules::Migration), Box::new(m20250115_000001_semantic_tags::Migration), + Box::new(m20250120_000001_create_fts5_search_index::Migration), ] } } diff --git a/core/src/ops/search/query.rs b/core/src/ops/search/query.rs index 1e036cea7..59c197d16 100644 --- a/core/src/ops/search/query.rs +++ b/core/src/ops/search/query.rs @@ -14,8 +14,8 @@ use crate::{ use anyhow::Result; use chrono::{DateTime, Utc}; use sea_orm::{ - ColumnTrait, Condition, DatabaseConnection, EntityTrait, JoinType, QueryFilter, QueryOrder, - QuerySelect, RelationTrait, + ColumnTrait, Condition, ConnectionTrait, DatabaseConnection, EntityTrait, JoinType, + QueryFilter, QueryOrder, QuerySelect, RelationTrait, Statement, }; use serde::{Deserialize, Serialize}; use std::sync::Arc; @@ -126,61 +126,25 @@ impl FileSearchQuery { &self, db: &DatabaseConnection, ) -> Result> { - // For now, implement basic SQL LIKE search with directory path joins - // TODO: Implement FTS5 integration + // Use FTS5 for high-performance text search + let fts_query = self.build_fts5_query(); + let fts_results = self.execute_fts5_search(db, &fts_query).await?; - let mut condition = Condition::any() - .add(entry::Column::Name.contains(&self.input.query)) - .add(entry::Column::Extension.contains(&self.input.query)); - - // Apply scope filters - condition = self.apply_scope_filter(condition); - - // Get file type registry for content type filtering - let registry = FileTypeRegistry::new(); - - // Apply additional filters - condition = self.apply_filters(condition, ®istry); - - // Build query with necessary joins - let mut query = entry::Entity::find() - .filter(condition) - .filter(entry::Column::Kind.eq(0)); // Only files - - // Add location join if location filtering is needed - if self.input.filters.locations.is_some() { - // Join with locations table to enable location filtering - query = query.join( - JoinType::LeftJoin, - crate::infra::db::entities::location::Relation::Entry.def(), - ); - } - - // Apply SD path filtering if specified in scope - if let SearchScope::Path { path } = &self.input.scope { - if let Some(device_id) = path.device_id() { - if let Some(path_str) = path.path() { - // Join with directory_paths to filter by path - query = query - .join(JoinType::LeftJoin, directory_paths::Relation::Entry.def()) - .filter( - directory_paths::Column::Path - .like(&format!("{}%", path_str.to_string_lossy())), - ); - } - } - } - - let entries = query - .limit(self.input.pagination.limit as u64) - .offset(self.input.pagination.offset as u64) - .all(db) - .await?; - - // Convert to search results with proper path construction + // Convert FTS5 results to search results with proper path construction let mut results = Vec::new(); - for entry_model in entries { + for (entry_id, bm25_score) in fts_results { + // Get the full entry data + let entry_model = entry::Entity::find_by_id(entry_id) + .one(db) + .await? + .ok_or_else(|| anyhow::anyhow!("Entry not found: {}", entry_id))?; + + // Apply additional filters (non-text filters) + if !self.passes_additional_filters(&entry_model, db).await? { + continue; + } + // Construct the full path by joining with directory_paths let full_path = self .construct_full_path(&entry_model, db) @@ -219,6 +183,10 @@ impl FileSearchQuery { None }; + // Extract values before moving into Entry + let entry_name = entry_model.name.clone(); + let entry_extension = entry_model.extension.clone(); + let entry = Entry { id: entry_model.uuid.unwrap_or_else(|| Uuid::new_v4()), sd_path: crate::domain::SdPathSerialized { @@ -252,10 +220,9 @@ impl FileSearchQuery { last_indexed_at: Some(entry_model.created_at), }; - // Calculate relevance score + // Use BM25 score from FTS5 as base relevance score let relevance_calc = crate::ops::search::sorting::RelevanceCalculator::new(self.input.query.clone()); - let filename_score = relevance_calc.calculate_filename_score(&entry.name); let recency_boost = if let Some(modified_at) = entry.modified_at { relevance_calc.calculate_recency_boost(modified_at) } else { @@ -264,26 +231,27 @@ impl FileSearchQuery { let user_preference_boost = relevance_calc.calculate_user_preference_boost(entry_model.id); - let final_score = filename_score + recency_boost + user_preference_boost; + // Combine FTS5 BM25 score with additional scoring factors + let final_score = bm25_score as f32 + recency_boost + user_preference_boost; let result = crate::ops::search::output::FileSearchResult { entry, score: final_score, score_breakdown: crate::ops::search::output::ScoreBreakdown::new( - filename_score, // temporal_score (using filename score as base) + bm25_score as f32, // temporal_score (FTS5 BM25) None, // semantic_score 0.0, // metadata_score recency_boost, // recency_boost user_preference_boost, // user_preference_boost ), - highlights: Vec::new(), + highlights: self.extract_highlights(&fts_query, &entry_name, &entry_extension), matched_content: None, }; results.push(result); } - // Sort results by relevance score (highest first) + // Results are already sorted by FTS5 BM25 score, but re-sort with additional factors results.sort_by(|a, b| { b.score .partial_cmp(&a.score) @@ -293,24 +261,88 @@ impl FileSearchQuery { Ok(results) } - /// Execute normal search with basic ranking + /// Execute normal search with FTS5 + enhanced ranking async fn execute_normal_search( &self, db: &DatabaseConnection, ) -> Result> { - // For now, same as fast search - // TODO: Add semantic ranking - self.execute_fast_search(db).await + // Use FTS5 as base, then enhance with additional ranking factors + let mut results = self.execute_fast_search(db).await?; + + // Enhanced ranking for normal search + for result in &mut results { + let mut enhanced_score = result.score; + + // Add metadata-based scoring + if result.entry.size.is_some() { + // Slightly boost files with reasonable sizes + if let Some(size) = result.entry.size { + if size > 1024 && size < 10_000_000 { + // 1KB to 10MB + enhanced_score += 0.1; + } + } + } + + // Boost files with extensions that match common document types + if let Some(extension) = result.entry.extension() { + match extension { + "pdf" | "doc" | "docx" | "txt" | "md" => enhanced_score += 0.2, + "jpg" | "png" | "gif" | "webp" => enhanced_score += 0.1, + _ => {} + } + } + + // Update the score + result.score = enhanced_score; + result.score_breakdown.metadata_score = enhanced_score + - result.score_breakdown.temporal_score + - result.score_breakdown.recency_boost + - result.score_breakdown.user_preference_boost; + } + + // Re-sort with enhanced scores + results.sort_by(|a, b| { + b.score + .partial_cmp(&a.score) + .unwrap_or(std::cmp::Ordering::Equal) + }); + + Ok(results) } - /// Execute full search with content analysis + /// Execute full search with FTS5 + content analysis async fn execute_full_search( &self, db: &DatabaseConnection, ) -> Result> { - // For now, same as normal search - // TODO: Add content extraction and analysis - self.execute_normal_search(db).await + // Start with normal search results + let mut results = self.execute_normal_search(db).await?; + + // For full search, we would add content analysis here + // This is a placeholder for future implementation + for result in &mut results { + // TODO: Add content extraction and analysis + // For now, just add a small boost for files that might have content + if result.entry.is_file() { + if let Some(size) = result.entry.size { + if size > 0 && size < 100_000_000 { + // Up to 100MB + result.score += 0.05; + result.score_breakdown.metadata_score += 0.05; + } + } + } + } + + // Re-sort with content analysis scores + results.sort_by(|a, b| { + b.score + .partial_cmp(&a.score) + .unwrap_or(std::cmp::Ordering::Equal) + }); + + Ok(results) } /// Apply scope filters to the query condition @@ -549,6 +581,257 @@ impl FileSearchQuery { let entries = query.all(db).await?; Ok(entries.len() as u64) } + + /// Build FTS5 query string with proper escaping + pub fn build_fts5_query(&self) -> String { + // Escape special FTS5 characters and build query + let escaped_query = self + .input + .query + .replace('"', r#"\""#) + .replace('\'', r#"\'"#) + .replace('*', r#"\*"#) + .replace('(', r#"\("#) + .replace(')', r#"\)"#); + + // Add prefix matching for autocomplete if query is long enough + if self.input.query.len() > 2 { + format!("{}*", escaped_query) + } else { + escaped_query + } + } + + /// Execute FTS5 search with BM25 ranking + async fn execute_fts5_search( + &self, + db: &DatabaseConnection, + query: &str, + ) -> Result> { + let sql = match &self.input.scope { + SearchScope::Path { path } => { + if let Some(path_str) = path.path() { + // FTS5 search with path filtering + r#" + WITH fts AS ( + SELECT rowid, bm25(search_index) AS rank + FROM search_index + WHERE search_index MATCH ? + ORDER BY rank + LIMIT 5000 + ) + SELECT e.id, fts.rank + FROM fts + JOIN entries e ON e.id = fts.rowid + JOIN directory_paths dp ON dp.entry_id = e.parent_id + WHERE dp.path LIKE ? + AND e.kind = 0 + ORDER BY fts.rank + LIMIT ? OFFSET ? + "# + } else { + // Basic FTS5 search + r#" + SELECT e.id, bm25(search_index) as rank + FROM search_index + JOIN entries e ON e.id = search_index.rowid + WHERE search_index MATCH ? + AND e.kind = 0 + ORDER BY rank + LIMIT ? OFFSET ? + "# + } + } + _ => { + // Basic FTS5 search + r#" + SELECT e.id, bm25(search_index) as rank + FROM search_index + JOIN entries e ON e.id = search_index.rowid + WHERE search_index MATCH ? + AND e.kind = 0 + ORDER BY rank + LIMIT ? OFFSET ? + "# + } + }; + + let statement = Statement::from_string(db.get_database_backend(), sql.to_string()); + + let params = match &self.input.scope { + SearchScope::Path { path } if path.path().is_some() => { + let path_str = path.path().unwrap().to_string_lossy(); + vec![ + query.into(), + format!("{}%", path_str).into(), + self.input.pagination.limit.to_string().into(), + self.input.pagination.offset.to_string().into(), + ] + } + _ => { + vec![ + query.into(), + self.input.pagination.limit.to_string().into(), + self.input.pagination.offset.to_string().into(), + ] + } + }; + + let results = db + .query_all(Statement::from_sql_and_values( + db.get_database_backend(), + &statement.sql, + params, + )) + .await?; + + let mut fts_results = Vec::new(); + for row in results { + let entry_id: i32 = row.try_get("", "id")?; + let rank: f64 = row.try_get("", "rank")?; + fts_results.push((entry_id, rank)); + } + + Ok(fts_results) + } + + /// Check if an entry passes additional (non-text) filters + async fn passes_additional_filters( + &self, + entry_model: &entry::Model, + db: &DatabaseConnection, + ) -> Result { + // File type filter + if let Some(file_types) = &self.input.filters.file_types { + if !file_types.is_empty() { + if let Some(ref extension) = entry_model.extension { + if !file_types.contains(extension) { + return Ok(false); + } + } else { + return Ok(false); + } + } + } + + // Date range filter + if let Some(date_range) = &self.input.filters.date_range { + let date_to_check = match date_range.field { + crate::ops::search::input::DateField::CreatedAt => Some(entry_model.created_at), + crate::ops::search::input::DateField::ModifiedAt => Some(entry_model.modified_at), + crate::ops::search::input::DateField::AccessedAt => entry_model.accessed_at, + }; + + if let Some(date) = date_to_check { + if let Some(start) = date_range.start { + if date < start { + return Ok(false); + } + } + if let Some(end) = date_range.end { + if date > end { + return Ok(false); + } + } + } + } + + // Size range filter + if let Some(size_range) = &self.input.filters.size_range { + if let Some(min) = size_range.min { + if (entry_model.size as u64) < min { + return Ok(false); + } + } + if let Some(max) = size_range.max { + if (entry_model.size as u64) > max { + return Ok(false); + } + } + } + + // Content type filter using file type registry + if let Some(content_types) = &self.input.filters.content_types { + if !content_types.is_empty() { + let registry = FileTypeRegistry::new(); + let mut matches_content_type = false; + + for content_type in content_types { + let extensions = registry.get_extensions_for_category(*content_type); + if let Some(ref extension) = entry_model.extension { + if extensions.contains(&extension.as_str()) { + matches_content_type = true; + break; + } + } + } + + if !matches_content_type { + return Ok(false); + } + } + } + + // Location filter + if let Some(locations) = &self.input.filters.locations { + if !locations.is_empty() { + // Check if entry belongs to one of the specified locations + if let Ok(Some(location)) = crate::infra::db::entities::location::Entity::find() + .filter( + crate::infra::db::entities::location::Column::EntryId.eq(entry_model.id), + ) + .one(db) + .await + { + if !locations.contains(&location.uuid) { + return Ok(false); + } + } else { + return Ok(false); + } + } + } + + Ok(true) + } + + /// Extract text highlights from FTS5 results + pub fn extract_highlights( + &self, + query: &str, + name: &str, + extension: &Option, + ) -> Vec { + let mut highlights = Vec::new(); + + // Highlight matches in filename + let name_lower = name.to_lowercase(); + let query_lower = query.replace('*', "").to_lowercase(); + + if let Some(start) = name_lower.find(&query_lower) { + highlights.push(crate::ops::search::output::TextHighlight { + field: "name".to_string(), + text: name.to_string(), + start, + end: start + query_lower.len(), + }); + } + + // Highlight matches in extension + if let Some(ref ext) = extension { + let ext_lower = ext.to_lowercase(); + if let Some(start) = ext_lower.find(&query_lower) { + highlights.push(crate::ops::search::output::TextHighlight { + field: "extension".to_string(), + text: ext.clone(), + start, + end: start + query_lower.len(), + }); + } + } + + highlights + } } crate::register_query!(FileSearchQuery, "search.files"); diff --git a/core/src/ops/search/tests.rs b/core/src/ops/search/tests.rs index 0db37a5b9..8c1d658c6 100644 --- a/core/src/ops/search/tests.rs +++ b/core/src/ops/search/tests.rs @@ -2,91 +2,129 @@ #[cfg(test)] mod tests { - use super::*; - use crate::ops::search::input::*; + use super::*; + use crate::ops::search::input::*; - #[test] - fn test_file_search_input_validation() { - // Test valid input - let valid_input = FileSearchInput::simple("test query".to_string()); - assert!(valid_input.validate().is_ok()); + #[test] + fn test_file_search_input_validation() { + // Test valid input + let valid_input = FileSearchInput::simple("test query".to_string()); + assert!(valid_input.validate().is_ok()); - // Test empty query - let empty_input = FileSearchInput::simple("".to_string()); - assert!(empty_input.validate().is_err()); + // Test empty query + let empty_input = FileSearchInput::simple("".to_string()); + assert!(empty_input.validate().is_err()); - // Test query too long - let long_query = "a".repeat(1001); - let long_input = FileSearchInput::simple(long_query); - assert!(long_input.validate().is_err()); + // Test query too long + let long_query = "a".repeat(1001); + let long_input = FileSearchInput::simple(long_query); + assert!(long_input.validate().is_err()); - // Test invalid pagination - let mut invalid_pagination = FileSearchInput::simple("test".to_string()); - invalid_pagination.pagination.limit = 0; - assert!(invalid_pagination.validate().is_err()); - } + // Test invalid pagination + let mut invalid_pagination = FileSearchInput::simple("test".to_string()); + invalid_pagination.pagination.limit = 0; + assert!(invalid_pagination.validate().is_err()); + } - #[test] - fn test_search_mode_creation() { - let fast_search = FileSearchInput::fast("test".to_string()); - assert!(matches!(fast_search.mode, SearchMode::Fast)); - assert_eq!(fast_search.pagination.limit, 20); + #[test] + fn test_search_mode_creation() { + let fast_search = FileSearchInput::fast("test".to_string()); + assert!(matches!(fast_search.mode, SearchMode::Fast)); + assert_eq!(fast_search.pagination.limit, 20); - let normal_search = FileSearchInput::simple("test".to_string()); - assert!(matches!(normal_search.mode, SearchMode::Normal)); - assert_eq!(normal_search.pagination.limit, 50); + let normal_search = FileSearchInput::simple("test".to_string()); + assert!(matches!(normal_search.mode, SearchMode::Normal)); + assert_eq!(normal_search.pagination.limit, 50); - let comprehensive_search = FileSearchInput::comprehensive("test".to_string()); - assert!(matches!(comprehensive_search.mode, SearchMode::Full)); - assert_eq!(comprehensive_search.pagination.limit, 100); - } + let comprehensive_search = FileSearchInput::comprehensive("test".to_string()); + assert!(matches!(comprehensive_search.mode, SearchMode::Full)); + assert_eq!(comprehensive_search.pagination.limit, 100); + } - #[test] - fn test_search_filters() { - let mut filters = SearchFilters::default(); - - // Test file type filter - filters.file_types = Some(vec!["txt".to_string(), "pdf".to_string()]); - - // Test date range filter - filters.date_range = Some(DateRangeFilter { - field: DateField::ModifiedAt, - start: Some(chrono::Utc::now() - chrono::Duration::days(7)), - end: Some(chrono::Utc::now()), - }); - - // Test size range filter - filters.size_range = Some(SizeRangeFilter { - min: Some(1024), - max: Some(1024 * 1024), - }); - - assert!(filters.file_types.is_some()); - assert!(filters.date_range.is_some()); - assert!(filters.size_range.is_some()); - } + #[test] + fn test_search_filters() { + let mut filters = SearchFilters::default(); - #[test] - fn test_content_type_extensions() { - use crate::filetype::FileTypeRegistry; - use crate::domain::ContentKind; - - let registry = FileTypeRegistry::new(); - - let image_exts = registry.get_extensions_for_category(ContentKind::Image); - assert!(image_exts.contains(&"jpg")); - assert!(image_exts.contains(&"png")); - - let code_exts = registry.get_extensions_for_category(ContentKind::Code); - assert!(code_exts.contains(&"rs")); - assert!(code_exts.contains(&"js")); - - let database_exts = registry.get_extensions_for_category(ContentKind::Database); - assert!(database_exts.contains(&"db")); - assert!(database_exts.contains(&"sqlite")); - - // Test that we get more extensions than hardcoded approach - assert!(image_exts.len() > 5); // Should have more than basic hardcoded list - assert!(code_exts.len() > 10); // Should have comprehensive code extensions - } -} \ No newline at end of file + // Test file type filter + filters.file_types = Some(vec!["txt".to_string(), "pdf".to_string()]); + + // Test date range filter + filters.date_range = Some(DateRangeFilter { + field: DateField::ModifiedAt, + start: Some(chrono::Utc::now() - chrono::Duration::days(7)), + end: Some(chrono::Utc::now()), + }); + + // Test size range filter + filters.size_range = Some(SizeRangeFilter { + min: Some(1024), + max: Some(1024 * 1024), + }); + + assert!(filters.file_types.is_some()); + assert!(filters.date_range.is_some()); + assert!(filters.size_range.is_some()); + } + + #[test] + fn test_content_type_extensions() { + use crate::domain::ContentKind; + use crate::filetype::FileTypeRegistry; + + let registry = FileTypeRegistry::new(); + + let image_exts = registry.get_extensions_for_category(ContentKind::Image); + assert!(image_exts.contains(&"jpg")); + assert!(image_exts.contains(&"png")); + + let code_exts = registry.get_extensions_for_category(ContentKind::Code); + assert!(code_exts.contains(&"rs")); + assert!(code_exts.contains(&"js")); + + let database_exts = registry.get_extensions_for_category(ContentKind::Database); + assert!(database_exts.contains(&"db")); + assert!(database_exts.contains(&"sqlite")); + + // Test that we get more extensions than hardcoded approach + assert!(image_exts.len() > 5); // Should have more than basic hardcoded list + assert!(code_exts.len() > 10); // Should have comprehensive code extensions + } + + #[test] + fn test_fts5_query_building() { + use crate::ops::search::query::FileSearchQuery; + + let search_input = FileSearchInput::simple("test query".to_string()); + let query = FileSearchQuery::new(search_input); + + let fts_query = query.build_fts5_query(); + assert!(fts_query.contains("test")); + assert!(fts_query.contains("query")); + + // Test escaping + let search_input_special = FileSearchInput::simple("test*query".to_string()); + let query_special = FileSearchQuery::new(search_input_special); + let fts_query_special = query_special.build_fts5_query(); + assert!(fts_query_special.contains("test\\*query")); + } + + #[test] + fn test_highlight_extraction() { + use crate::ops::search::query::FileSearchQuery; + + let search_input = FileSearchInput::simple("test".to_string()); + let query = FileSearchQuery::new(search_input); + + let highlights = + query.extract_highlights("test", "test_file.txt", &Some("test".to_string())); + + assert_eq!(highlights.len(), 2); // Should match in both name and extension + assert_eq!(highlights[0].field, "name"); + assert_eq!(highlights[0].start, 0); + assert_eq!(highlights[0].end, 4); + + assert_eq!(highlights[1].field, "extension"); + assert_eq!(highlights[1].start, 0); + assert_eq!(highlights[1].end, 4); // "test" extension + } +}