mirror of
https://github.com/spacedriveapp/spacedrive.git
synced 2026-04-30 19:33:30 -04:00
feat: Integrate FTS5 for enhanced file search capabilities
- Added FTS5 support to improve text search performance and relevance scoring in file search queries. - Refactored the search logic to utilize FTS5 for high-performance querying, including BM25 ranking. - Implemented methods for building FTS5 queries and extracting highlights from search results. - Updated tests to validate FTS5 query building and highlight extraction functionality. This enhancement significantly boosts the efficiency and accuracy of file searches.
This commit is contained in:
@@ -0,0 +1,160 @@
|
||||
//! FTS5 Search Index Migration
|
||||
//!
|
||||
//! Creates FTS5 virtual table for high-performance full-text search
|
||||
//! and associated triggers for real-time index updates.
|
||||
|
||||
use sea_orm_migration::prelude::*;
|
||||
|
||||
#[derive(DeriveMigrationName)]
|
||||
pub struct Migration;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MigrationTrait for Migration {
|
||||
async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> {
|
||||
// Create FTS5 virtual table for search indexing
|
||||
manager
|
||||
.get_connection()
|
||||
.execute_unprepared(
|
||||
r#"
|
||||
CREATE VIRTUAL TABLE search_index USING fts5(
|
||||
content='entries',
|
||||
content_rowid='id',
|
||||
name,
|
||||
extension,
|
||||
tokenize="unicode61 remove_diacritics 2 tokenchars '.@-_'",
|
||||
prefix='2,3'
|
||||
);
|
||||
"#,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Create trigger for INSERT operations
|
||||
manager
|
||||
.get_connection()
|
||||
.execute_unprepared(
|
||||
r#"
|
||||
CREATE TRIGGER IF NOT EXISTS entries_search_insert
|
||||
AFTER INSERT ON entries WHEN new.kind = 0
|
||||
BEGIN
|
||||
INSERT INTO search_index(rowid, name, extension)
|
||||
VALUES (new.id, new.name, new.extension);
|
||||
END;
|
||||
"#,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Create trigger for UPDATE operations
|
||||
manager
|
||||
.get_connection()
|
||||
.execute_unprepared(
|
||||
r#"
|
||||
CREATE TRIGGER IF NOT EXISTS entries_search_update
|
||||
AFTER UPDATE ON entries WHEN new.kind = 0
|
||||
BEGIN
|
||||
UPDATE search_index SET
|
||||
name = new.name,
|
||||
extension = new.extension
|
||||
WHERE rowid = new.id;
|
||||
END;
|
||||
"#,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Create trigger for DELETE operations
|
||||
manager
|
||||
.get_connection()
|
||||
.execute_unprepared(
|
||||
r#"
|
||||
CREATE TRIGGER IF NOT EXISTS entries_search_delete
|
||||
AFTER DELETE ON entries WHEN old.kind = 0
|
||||
BEGIN
|
||||
DELETE FROM search_index WHERE rowid = old.id;
|
||||
END;
|
||||
"#,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Populate FTS5 index with existing file entries
|
||||
manager
|
||||
.get_connection()
|
||||
.execute_unprepared(
|
||||
r#"
|
||||
INSERT INTO search_index(rowid, name, extension)
|
||||
SELECT id, name, extension FROM entries WHERE kind = 0;
|
||||
"#,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Create search analytics table for query optimization
|
||||
manager
|
||||
.get_connection()
|
||||
.execute_unprepared(
|
||||
r#"
|
||||
CREATE TABLE search_analytics (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
query_text TEXT NOT NULL,
|
||||
query_hash TEXT NOT NULL,
|
||||
search_mode TEXT NOT NULL,
|
||||
execution_time_ms INTEGER NOT NULL,
|
||||
result_count INTEGER NOT NULL,
|
||||
fts5_used BOOLEAN DEFAULT TRUE,
|
||||
semantic_used BOOLEAN DEFAULT FALSE,
|
||||
user_clicked_result BOOLEAN DEFAULT FALSE,
|
||||
clicked_result_position INTEGER,
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
"#,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Create index on query_hash for performance analytics
|
||||
manager
|
||||
.get_connection()
|
||||
.execute_unprepared(
|
||||
r#"
|
||||
CREATE INDEX idx_search_analytics_query_hash
|
||||
ON search_analytics(query_hash);
|
||||
"#,
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> {
|
||||
// Drop analytics table and index
|
||||
manager
|
||||
.get_connection()
|
||||
.execute_unprepared("DROP INDEX IF EXISTS idx_search_analytics_query_hash;")
|
||||
.await?;
|
||||
|
||||
manager
|
||||
.get_connection()
|
||||
.execute_unprepared("DROP TABLE IF EXISTS search_analytics;")
|
||||
.await?;
|
||||
|
||||
// Drop triggers
|
||||
manager
|
||||
.get_connection()
|
||||
.execute_unprepared("DROP TRIGGER IF EXISTS entries_search_delete;")
|
||||
.await?;
|
||||
|
||||
manager
|
||||
.get_connection()
|
||||
.execute_unprepared("DROP TRIGGER IF EXISTS entries_search_update;")
|
||||
.await?;
|
||||
|
||||
manager
|
||||
.get_connection()
|
||||
.execute_unprepared("DROP TRIGGER IF EXISTS entries_search_insert;")
|
||||
.await?;
|
||||
|
||||
// Drop FTS5 virtual table
|
||||
manager
|
||||
.get_connection()
|
||||
.execute_unprepared("DROP TABLE IF EXISTS search_index;")
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -9,6 +9,7 @@ mod m20250109_000001_create_sidecars;
|
||||
mod m20250110_000001_refactor_volumes_table;
|
||||
mod m20250112_000001_create_indexer_rules;
|
||||
mod m20250115_000001_semantic_tags;
|
||||
mod m20250120_000001_create_fts5_search_index;
|
||||
|
||||
pub struct Migrator;
|
||||
|
||||
@@ -23,6 +24,7 @@ impl MigratorTrait for Migrator {
|
||||
Box::new(m20250110_000001_refactor_volumes_table::Migration),
|
||||
Box::new(m20250112_000001_create_indexer_rules::Migration),
|
||||
Box::new(m20250115_000001_semantic_tags::Migration),
|
||||
Box::new(m20250120_000001_create_fts5_search_index::Migration),
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,8 +14,8 @@ use crate::{
|
||||
use anyhow::Result;
|
||||
use chrono::{DateTime, Utc};
|
||||
use sea_orm::{
|
||||
ColumnTrait, Condition, DatabaseConnection, EntityTrait, JoinType, QueryFilter, QueryOrder,
|
||||
QuerySelect, RelationTrait,
|
||||
ColumnTrait, Condition, ConnectionTrait, DatabaseConnection, EntityTrait, JoinType,
|
||||
QueryFilter, QueryOrder, QuerySelect, RelationTrait, Statement,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::sync::Arc;
|
||||
@@ -126,61 +126,25 @@ impl FileSearchQuery {
|
||||
&self,
|
||||
db: &DatabaseConnection,
|
||||
) -> Result<Vec<crate::ops::search::output::FileSearchResult>> {
|
||||
// For now, implement basic SQL LIKE search with directory path joins
|
||||
// TODO: Implement FTS5 integration
|
||||
// Use FTS5 for high-performance text search
|
||||
let fts_query = self.build_fts5_query();
|
||||
let fts_results = self.execute_fts5_search(db, &fts_query).await?;
|
||||
|
||||
let mut condition = Condition::any()
|
||||
.add(entry::Column::Name.contains(&self.input.query))
|
||||
.add(entry::Column::Extension.contains(&self.input.query));
|
||||
|
||||
// Apply scope filters
|
||||
condition = self.apply_scope_filter(condition);
|
||||
|
||||
// Get file type registry for content type filtering
|
||||
let registry = FileTypeRegistry::new();
|
||||
|
||||
// Apply additional filters
|
||||
condition = self.apply_filters(condition, ®istry);
|
||||
|
||||
// Build query with necessary joins
|
||||
let mut query = entry::Entity::find()
|
||||
.filter(condition)
|
||||
.filter(entry::Column::Kind.eq(0)); // Only files
|
||||
|
||||
// Add location join if location filtering is needed
|
||||
if self.input.filters.locations.is_some() {
|
||||
// Join with locations table to enable location filtering
|
||||
query = query.join(
|
||||
JoinType::LeftJoin,
|
||||
crate::infra::db::entities::location::Relation::Entry.def(),
|
||||
);
|
||||
}
|
||||
|
||||
// Apply SD path filtering if specified in scope
|
||||
if let SearchScope::Path { path } = &self.input.scope {
|
||||
if let Some(device_id) = path.device_id() {
|
||||
if let Some(path_str) = path.path() {
|
||||
// Join with directory_paths to filter by path
|
||||
query = query
|
||||
.join(JoinType::LeftJoin, directory_paths::Relation::Entry.def())
|
||||
.filter(
|
||||
directory_paths::Column::Path
|
||||
.like(&format!("{}%", path_str.to_string_lossy())),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let entries = query
|
||||
.limit(self.input.pagination.limit as u64)
|
||||
.offset(self.input.pagination.offset as u64)
|
||||
.all(db)
|
||||
.await?;
|
||||
|
||||
// Convert to search results with proper path construction
|
||||
// Convert FTS5 results to search results with proper path construction
|
||||
let mut results = Vec::new();
|
||||
|
||||
for entry_model in entries {
|
||||
for (entry_id, bm25_score) in fts_results {
|
||||
// Get the full entry data
|
||||
let entry_model = entry::Entity::find_by_id(entry_id)
|
||||
.one(db)
|
||||
.await?
|
||||
.ok_or_else(|| anyhow::anyhow!("Entry not found: {}", entry_id))?;
|
||||
|
||||
// Apply additional filters (non-text filters)
|
||||
if !self.passes_additional_filters(&entry_model, db).await? {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Construct the full path by joining with directory_paths
|
||||
let full_path = self
|
||||
.construct_full_path(&entry_model, db)
|
||||
@@ -219,6 +183,10 @@ impl FileSearchQuery {
|
||||
None
|
||||
};
|
||||
|
||||
// Extract values before moving into Entry
|
||||
let entry_name = entry_model.name.clone();
|
||||
let entry_extension = entry_model.extension.clone();
|
||||
|
||||
let entry = Entry {
|
||||
id: entry_model.uuid.unwrap_or_else(|| Uuid::new_v4()),
|
||||
sd_path: crate::domain::SdPathSerialized {
|
||||
@@ -252,10 +220,9 @@ impl FileSearchQuery {
|
||||
last_indexed_at: Some(entry_model.created_at),
|
||||
};
|
||||
|
||||
// Calculate relevance score
|
||||
// Use BM25 score from FTS5 as base relevance score
|
||||
let relevance_calc =
|
||||
crate::ops::search::sorting::RelevanceCalculator::new(self.input.query.clone());
|
||||
let filename_score = relevance_calc.calculate_filename_score(&entry.name);
|
||||
let recency_boost = if let Some(modified_at) = entry.modified_at {
|
||||
relevance_calc.calculate_recency_boost(modified_at)
|
||||
} else {
|
||||
@@ -264,26 +231,27 @@ impl FileSearchQuery {
|
||||
let user_preference_boost =
|
||||
relevance_calc.calculate_user_preference_boost(entry_model.id);
|
||||
|
||||
let final_score = filename_score + recency_boost + user_preference_boost;
|
||||
// Combine FTS5 BM25 score with additional scoring factors
|
||||
let final_score = bm25_score as f32 + recency_boost + user_preference_boost;
|
||||
|
||||
let result = crate::ops::search::output::FileSearchResult {
|
||||
entry,
|
||||
score: final_score,
|
||||
score_breakdown: crate::ops::search::output::ScoreBreakdown::new(
|
||||
filename_score, // temporal_score (using filename score as base)
|
||||
bm25_score as f32, // temporal_score (FTS5 BM25)
|
||||
None, // semantic_score
|
||||
0.0, // metadata_score
|
||||
recency_boost, // recency_boost
|
||||
user_preference_boost, // user_preference_boost
|
||||
),
|
||||
highlights: Vec::new(),
|
||||
highlights: self.extract_highlights(&fts_query, &entry_name, &entry_extension),
|
||||
matched_content: None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
// Sort results by relevance score (highest first)
|
||||
// Results are already sorted by FTS5 BM25 score, but re-sort with additional factors
|
||||
results.sort_by(|a, b| {
|
||||
b.score
|
||||
.partial_cmp(&a.score)
|
||||
@@ -293,24 +261,88 @@ impl FileSearchQuery {
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Execute normal search with basic ranking
|
||||
/// Execute normal search with FTS5 + enhanced ranking
|
||||
async fn execute_normal_search(
|
||||
&self,
|
||||
db: &DatabaseConnection,
|
||||
) -> Result<Vec<crate::ops::search::output::FileSearchResult>> {
|
||||
// For now, same as fast search
|
||||
// TODO: Add semantic ranking
|
||||
self.execute_fast_search(db).await
|
||||
// Use FTS5 as base, then enhance with additional ranking factors
|
||||
let mut results = self.execute_fast_search(db).await?;
|
||||
|
||||
// Enhanced ranking for normal search
|
||||
for result in &mut results {
|
||||
let mut enhanced_score = result.score;
|
||||
|
||||
// Add metadata-based scoring
|
||||
if result.entry.size.is_some() {
|
||||
// Slightly boost files with reasonable sizes
|
||||
if let Some(size) = result.entry.size {
|
||||
if size > 1024 && size < 10_000_000 {
|
||||
// 1KB to 10MB
|
||||
enhanced_score += 0.1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Boost files with extensions that match common document types
|
||||
if let Some(extension) = result.entry.extension() {
|
||||
match extension {
|
||||
"pdf" | "doc" | "docx" | "txt" | "md" => enhanced_score += 0.2,
|
||||
"jpg" | "png" | "gif" | "webp" => enhanced_score += 0.1,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
// Update the score
|
||||
result.score = enhanced_score;
|
||||
result.score_breakdown.metadata_score = enhanced_score
|
||||
- result.score_breakdown.temporal_score
|
||||
- result.score_breakdown.recency_boost
|
||||
- result.score_breakdown.user_preference_boost;
|
||||
}
|
||||
|
||||
// Re-sort with enhanced scores
|
||||
results.sort_by(|a, b| {
|
||||
b.score
|
||||
.partial_cmp(&a.score)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
});
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Execute full search with content analysis
|
||||
/// Execute full search with FTS5 + content analysis
|
||||
async fn execute_full_search(
|
||||
&self,
|
||||
db: &DatabaseConnection,
|
||||
) -> Result<Vec<crate::ops::search::output::FileSearchResult>> {
|
||||
// For now, same as normal search
|
||||
// TODO: Add content extraction and analysis
|
||||
self.execute_normal_search(db).await
|
||||
// Start with normal search results
|
||||
let mut results = self.execute_normal_search(db).await?;
|
||||
|
||||
// For full search, we would add content analysis here
|
||||
// This is a placeholder for future implementation
|
||||
for result in &mut results {
|
||||
// TODO: Add content extraction and analysis
|
||||
// For now, just add a small boost for files that might have content
|
||||
if result.entry.is_file() {
|
||||
if let Some(size) = result.entry.size {
|
||||
if size > 0 && size < 100_000_000 {
|
||||
// Up to 100MB
|
||||
result.score += 0.05;
|
||||
result.score_breakdown.metadata_score += 0.05;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Re-sort with content analysis scores
|
||||
results.sort_by(|a, b| {
|
||||
b.score
|
||||
.partial_cmp(&a.score)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
});
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Apply scope filters to the query condition
|
||||
@@ -549,6 +581,257 @@ impl FileSearchQuery {
|
||||
let entries = query.all(db).await?;
|
||||
Ok(entries.len() as u64)
|
||||
}
|
||||
|
||||
/// Build FTS5 query string with proper escaping
|
||||
pub fn build_fts5_query(&self) -> String {
|
||||
// Escape special FTS5 characters and build query
|
||||
let escaped_query = self
|
||||
.input
|
||||
.query
|
||||
.replace('"', r#"\""#)
|
||||
.replace('\'', r#"\'"#)
|
||||
.replace('*', r#"\*"#)
|
||||
.replace('(', r#"\("#)
|
||||
.replace(')', r#"\)"#);
|
||||
|
||||
// Add prefix matching for autocomplete if query is long enough
|
||||
if self.input.query.len() > 2 {
|
||||
format!("{}*", escaped_query)
|
||||
} else {
|
||||
escaped_query
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute FTS5 search with BM25 ranking
|
||||
async fn execute_fts5_search(
|
||||
&self,
|
||||
db: &DatabaseConnection,
|
||||
query: &str,
|
||||
) -> Result<Vec<(i32, f64)>> {
|
||||
let sql = match &self.input.scope {
|
||||
SearchScope::Path { path } => {
|
||||
if let Some(path_str) = path.path() {
|
||||
// FTS5 search with path filtering
|
||||
r#"
|
||||
WITH fts AS (
|
||||
SELECT rowid, bm25(search_index) AS rank
|
||||
FROM search_index
|
||||
WHERE search_index MATCH ?
|
||||
ORDER BY rank
|
||||
LIMIT 5000
|
||||
)
|
||||
SELECT e.id, fts.rank
|
||||
FROM fts
|
||||
JOIN entries e ON e.id = fts.rowid
|
||||
JOIN directory_paths dp ON dp.entry_id = e.parent_id
|
||||
WHERE dp.path LIKE ?
|
||||
AND e.kind = 0
|
||||
ORDER BY fts.rank
|
||||
LIMIT ? OFFSET ?
|
||||
"#
|
||||
} else {
|
||||
// Basic FTS5 search
|
||||
r#"
|
||||
SELECT e.id, bm25(search_index) as rank
|
||||
FROM search_index
|
||||
JOIN entries e ON e.id = search_index.rowid
|
||||
WHERE search_index MATCH ?
|
||||
AND e.kind = 0
|
||||
ORDER BY rank
|
||||
LIMIT ? OFFSET ?
|
||||
"#
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// Basic FTS5 search
|
||||
r#"
|
||||
SELECT e.id, bm25(search_index) as rank
|
||||
FROM search_index
|
||||
JOIN entries e ON e.id = search_index.rowid
|
||||
WHERE search_index MATCH ?
|
||||
AND e.kind = 0
|
||||
ORDER BY rank
|
||||
LIMIT ? OFFSET ?
|
||||
"#
|
||||
}
|
||||
};
|
||||
|
||||
let statement = Statement::from_string(db.get_database_backend(), sql.to_string());
|
||||
|
||||
let params = match &self.input.scope {
|
||||
SearchScope::Path { path } if path.path().is_some() => {
|
||||
let path_str = path.path().unwrap().to_string_lossy();
|
||||
vec![
|
||||
query.into(),
|
||||
format!("{}%", path_str).into(),
|
||||
self.input.pagination.limit.to_string().into(),
|
||||
self.input.pagination.offset.to_string().into(),
|
||||
]
|
||||
}
|
||||
_ => {
|
||||
vec![
|
||||
query.into(),
|
||||
self.input.pagination.limit.to_string().into(),
|
||||
self.input.pagination.offset.to_string().into(),
|
||||
]
|
||||
}
|
||||
};
|
||||
|
||||
let results = db
|
||||
.query_all(Statement::from_sql_and_values(
|
||||
db.get_database_backend(),
|
||||
&statement.sql,
|
||||
params,
|
||||
))
|
||||
.await?;
|
||||
|
||||
let mut fts_results = Vec::new();
|
||||
for row in results {
|
||||
let entry_id: i32 = row.try_get("", "id")?;
|
||||
let rank: f64 = row.try_get("", "rank")?;
|
||||
fts_results.push((entry_id, rank));
|
||||
}
|
||||
|
||||
Ok(fts_results)
|
||||
}
|
||||
|
||||
/// Check if an entry passes additional (non-text) filters
|
||||
async fn passes_additional_filters(
|
||||
&self,
|
||||
entry_model: &entry::Model,
|
||||
db: &DatabaseConnection,
|
||||
) -> Result<bool> {
|
||||
// File type filter
|
||||
if let Some(file_types) = &self.input.filters.file_types {
|
||||
if !file_types.is_empty() {
|
||||
if let Some(ref extension) = entry_model.extension {
|
||||
if !file_types.contains(extension) {
|
||||
return Ok(false);
|
||||
}
|
||||
} else {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Date range filter
|
||||
if let Some(date_range) = &self.input.filters.date_range {
|
||||
let date_to_check = match date_range.field {
|
||||
crate::ops::search::input::DateField::CreatedAt => Some(entry_model.created_at),
|
||||
crate::ops::search::input::DateField::ModifiedAt => Some(entry_model.modified_at),
|
||||
crate::ops::search::input::DateField::AccessedAt => entry_model.accessed_at,
|
||||
};
|
||||
|
||||
if let Some(date) = date_to_check {
|
||||
if let Some(start) = date_range.start {
|
||||
if date < start {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
if let Some(end) = date_range.end {
|
||||
if date > end {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Size range filter
|
||||
if let Some(size_range) = &self.input.filters.size_range {
|
||||
if let Some(min) = size_range.min {
|
||||
if (entry_model.size as u64) < min {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
if let Some(max) = size_range.max {
|
||||
if (entry_model.size as u64) > max {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Content type filter using file type registry
|
||||
if let Some(content_types) = &self.input.filters.content_types {
|
||||
if !content_types.is_empty() {
|
||||
let registry = FileTypeRegistry::new();
|
||||
let mut matches_content_type = false;
|
||||
|
||||
for content_type in content_types {
|
||||
let extensions = registry.get_extensions_for_category(*content_type);
|
||||
if let Some(ref extension) = entry_model.extension {
|
||||
if extensions.contains(&extension.as_str()) {
|
||||
matches_content_type = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !matches_content_type {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Location filter
|
||||
if let Some(locations) = &self.input.filters.locations {
|
||||
if !locations.is_empty() {
|
||||
// Check if entry belongs to one of the specified locations
|
||||
if let Ok(Some(location)) = crate::infra::db::entities::location::Entity::find()
|
||||
.filter(
|
||||
crate::infra::db::entities::location::Column::EntryId.eq(entry_model.id),
|
||||
)
|
||||
.one(db)
|
||||
.await
|
||||
{
|
||||
if !locations.contains(&location.uuid) {
|
||||
return Ok(false);
|
||||
}
|
||||
} else {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
/// Extract text highlights from FTS5 results
|
||||
pub fn extract_highlights(
|
||||
&self,
|
||||
query: &str,
|
||||
name: &str,
|
||||
extension: &Option<String>,
|
||||
) -> Vec<crate::ops::search::output::TextHighlight> {
|
||||
let mut highlights = Vec::new();
|
||||
|
||||
// Highlight matches in filename
|
||||
let name_lower = name.to_lowercase();
|
||||
let query_lower = query.replace('*', "").to_lowercase();
|
||||
|
||||
if let Some(start) = name_lower.find(&query_lower) {
|
||||
highlights.push(crate::ops::search::output::TextHighlight {
|
||||
field: "name".to_string(),
|
||||
text: name.to_string(),
|
||||
start,
|
||||
end: start + query_lower.len(),
|
||||
});
|
||||
}
|
||||
|
||||
// Highlight matches in extension
|
||||
if let Some(ref ext) = extension {
|
||||
let ext_lower = ext.to_lowercase();
|
||||
if let Some(start) = ext_lower.find(&query_lower) {
|
||||
highlights.push(crate::ops::search::output::TextHighlight {
|
||||
field: "extension".to_string(),
|
||||
text: ext.clone(),
|
||||
start,
|
||||
end: start + query_lower.len(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
highlights
|
||||
}
|
||||
}
|
||||
|
||||
crate::register_query!(FileSearchQuery, "search.files");
|
||||
|
||||
@@ -2,91 +2,129 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::ops::search::input::*;
|
||||
use super::*;
|
||||
use crate::ops::search::input::*;
|
||||
|
||||
#[test]
|
||||
fn test_file_search_input_validation() {
|
||||
// Test valid input
|
||||
let valid_input = FileSearchInput::simple("test query".to_string());
|
||||
assert!(valid_input.validate().is_ok());
|
||||
#[test]
|
||||
fn test_file_search_input_validation() {
|
||||
// Test valid input
|
||||
let valid_input = FileSearchInput::simple("test query".to_string());
|
||||
assert!(valid_input.validate().is_ok());
|
||||
|
||||
// Test empty query
|
||||
let empty_input = FileSearchInput::simple("".to_string());
|
||||
assert!(empty_input.validate().is_err());
|
||||
// Test empty query
|
||||
let empty_input = FileSearchInput::simple("".to_string());
|
||||
assert!(empty_input.validate().is_err());
|
||||
|
||||
// Test query too long
|
||||
let long_query = "a".repeat(1001);
|
||||
let long_input = FileSearchInput::simple(long_query);
|
||||
assert!(long_input.validate().is_err());
|
||||
// Test query too long
|
||||
let long_query = "a".repeat(1001);
|
||||
let long_input = FileSearchInput::simple(long_query);
|
||||
assert!(long_input.validate().is_err());
|
||||
|
||||
// Test invalid pagination
|
||||
let mut invalid_pagination = FileSearchInput::simple("test".to_string());
|
||||
invalid_pagination.pagination.limit = 0;
|
||||
assert!(invalid_pagination.validate().is_err());
|
||||
}
|
||||
// Test invalid pagination
|
||||
let mut invalid_pagination = FileSearchInput::simple("test".to_string());
|
||||
invalid_pagination.pagination.limit = 0;
|
||||
assert!(invalid_pagination.validate().is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_search_mode_creation() {
|
||||
let fast_search = FileSearchInput::fast("test".to_string());
|
||||
assert!(matches!(fast_search.mode, SearchMode::Fast));
|
||||
assert_eq!(fast_search.pagination.limit, 20);
|
||||
#[test]
|
||||
fn test_search_mode_creation() {
|
||||
let fast_search = FileSearchInput::fast("test".to_string());
|
||||
assert!(matches!(fast_search.mode, SearchMode::Fast));
|
||||
assert_eq!(fast_search.pagination.limit, 20);
|
||||
|
||||
let normal_search = FileSearchInput::simple("test".to_string());
|
||||
assert!(matches!(normal_search.mode, SearchMode::Normal));
|
||||
assert_eq!(normal_search.pagination.limit, 50);
|
||||
let normal_search = FileSearchInput::simple("test".to_string());
|
||||
assert!(matches!(normal_search.mode, SearchMode::Normal));
|
||||
assert_eq!(normal_search.pagination.limit, 50);
|
||||
|
||||
let comprehensive_search = FileSearchInput::comprehensive("test".to_string());
|
||||
assert!(matches!(comprehensive_search.mode, SearchMode::Full));
|
||||
assert_eq!(comprehensive_search.pagination.limit, 100);
|
||||
}
|
||||
let comprehensive_search = FileSearchInput::comprehensive("test".to_string());
|
||||
assert!(matches!(comprehensive_search.mode, SearchMode::Full));
|
||||
assert_eq!(comprehensive_search.pagination.limit, 100);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_search_filters() {
|
||||
let mut filters = SearchFilters::default();
|
||||
|
||||
// Test file type filter
|
||||
filters.file_types = Some(vec!["txt".to_string(), "pdf".to_string()]);
|
||||
|
||||
// Test date range filter
|
||||
filters.date_range = Some(DateRangeFilter {
|
||||
field: DateField::ModifiedAt,
|
||||
start: Some(chrono::Utc::now() - chrono::Duration::days(7)),
|
||||
end: Some(chrono::Utc::now()),
|
||||
});
|
||||
|
||||
// Test size range filter
|
||||
filters.size_range = Some(SizeRangeFilter {
|
||||
min: Some(1024),
|
||||
max: Some(1024 * 1024),
|
||||
});
|
||||
|
||||
assert!(filters.file_types.is_some());
|
||||
assert!(filters.date_range.is_some());
|
||||
assert!(filters.size_range.is_some());
|
||||
}
|
||||
#[test]
|
||||
fn test_search_filters() {
|
||||
let mut filters = SearchFilters::default();
|
||||
|
||||
#[test]
|
||||
fn test_content_type_extensions() {
|
||||
use crate::filetype::FileTypeRegistry;
|
||||
use crate::domain::ContentKind;
|
||||
|
||||
let registry = FileTypeRegistry::new();
|
||||
|
||||
let image_exts = registry.get_extensions_for_category(ContentKind::Image);
|
||||
assert!(image_exts.contains(&"jpg"));
|
||||
assert!(image_exts.contains(&"png"));
|
||||
|
||||
let code_exts = registry.get_extensions_for_category(ContentKind::Code);
|
||||
assert!(code_exts.contains(&"rs"));
|
||||
assert!(code_exts.contains(&"js"));
|
||||
|
||||
let database_exts = registry.get_extensions_for_category(ContentKind::Database);
|
||||
assert!(database_exts.contains(&"db"));
|
||||
assert!(database_exts.contains(&"sqlite"));
|
||||
|
||||
// Test that we get more extensions than hardcoded approach
|
||||
assert!(image_exts.len() > 5); // Should have more than basic hardcoded list
|
||||
assert!(code_exts.len() > 10); // Should have comprehensive code extensions
|
||||
}
|
||||
}
|
||||
// Test file type filter
|
||||
filters.file_types = Some(vec!["txt".to_string(), "pdf".to_string()]);
|
||||
|
||||
// Test date range filter
|
||||
filters.date_range = Some(DateRangeFilter {
|
||||
field: DateField::ModifiedAt,
|
||||
start: Some(chrono::Utc::now() - chrono::Duration::days(7)),
|
||||
end: Some(chrono::Utc::now()),
|
||||
});
|
||||
|
||||
// Test size range filter
|
||||
filters.size_range = Some(SizeRangeFilter {
|
||||
min: Some(1024),
|
||||
max: Some(1024 * 1024),
|
||||
});
|
||||
|
||||
assert!(filters.file_types.is_some());
|
||||
assert!(filters.date_range.is_some());
|
||||
assert!(filters.size_range.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_content_type_extensions() {
|
||||
use crate::domain::ContentKind;
|
||||
use crate::filetype::FileTypeRegistry;
|
||||
|
||||
let registry = FileTypeRegistry::new();
|
||||
|
||||
let image_exts = registry.get_extensions_for_category(ContentKind::Image);
|
||||
assert!(image_exts.contains(&"jpg"));
|
||||
assert!(image_exts.contains(&"png"));
|
||||
|
||||
let code_exts = registry.get_extensions_for_category(ContentKind::Code);
|
||||
assert!(code_exts.contains(&"rs"));
|
||||
assert!(code_exts.contains(&"js"));
|
||||
|
||||
let database_exts = registry.get_extensions_for_category(ContentKind::Database);
|
||||
assert!(database_exts.contains(&"db"));
|
||||
assert!(database_exts.contains(&"sqlite"));
|
||||
|
||||
// Test that we get more extensions than hardcoded approach
|
||||
assert!(image_exts.len() > 5); // Should have more than basic hardcoded list
|
||||
assert!(code_exts.len() > 10); // Should have comprehensive code extensions
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fts5_query_building() {
|
||||
use crate::ops::search::query::FileSearchQuery;
|
||||
|
||||
let search_input = FileSearchInput::simple("test query".to_string());
|
||||
let query = FileSearchQuery::new(search_input);
|
||||
|
||||
let fts_query = query.build_fts5_query();
|
||||
assert!(fts_query.contains("test"));
|
||||
assert!(fts_query.contains("query"));
|
||||
|
||||
// Test escaping
|
||||
let search_input_special = FileSearchInput::simple("test*query".to_string());
|
||||
let query_special = FileSearchQuery::new(search_input_special);
|
||||
let fts_query_special = query_special.build_fts5_query();
|
||||
assert!(fts_query_special.contains("test\\*query"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_highlight_extraction() {
|
||||
use crate::ops::search::query::FileSearchQuery;
|
||||
|
||||
let search_input = FileSearchInput::simple("test".to_string());
|
||||
let query = FileSearchQuery::new(search_input);
|
||||
|
||||
let highlights =
|
||||
query.extract_highlights("test", "test_file.txt", &Some("test".to_string()));
|
||||
|
||||
assert_eq!(highlights.len(), 2); // Should match in both name and extension
|
||||
assert_eq!(highlights[0].field, "name");
|
||||
assert_eq!(highlights[0].start, 0);
|
||||
assert_eq!(highlights[0].end, 4);
|
||||
|
||||
assert_eq!(highlights[1].field, "extension");
|
||||
assert_eq!(highlights[1].start, 0);
|
||||
assert_eq!(highlights[1].end, 4); // "test" extension
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user