From b0e79faacf1c2c1126df7e6e6d1f76d80774ebb3 Mon Sep 17 00:00:00 2001 From: rmcrackan Date: Tue, 7 Apr 2026 15:00:18 -0400 Subject: [PATCH] #1719 - add defensive code to recover from a corrupt search engine --- .../SearchEngineCommands.cs | 13 +++++ .../LibationSearchEngine.csproj | 1 + Source/LibationSearchEngine/SearchEngine.cs | 52 +++++++++++++++++-- 3 files changed, 62 insertions(+), 4 deletions(-) diff --git a/Source/ApplicationServices/SearchEngineCommands.cs b/Source/ApplicationServices/SearchEngineCommands.cs index 9272d1d0..d7d4e75f 100644 --- a/Source/ApplicationServices/SearchEngineCommands.cs +++ b/Source/ApplicationServices/SearchEngineCommands.cs @@ -1,5 +1,6 @@ using DataLayer; using LibationSearchEngine; +using Serilog; using System; using System.Collections.Generic; using System.IO; @@ -31,6 +32,12 @@ public static class SearchEngineCommands fullReIndex(engine); return func(engine); } + catch (ArgumentException ex) when (SearchEngine.IsRecoverableCorruptIndexException(ex)) + { + Log.Warning(ex, "Search index unreadable; rebuilding and retrying query."); + fullReIndex(engine); + return func(engine); + } } } #endregion @@ -76,6 +83,12 @@ public static class SearchEngineCommands fullReIndex(new SearchEngine()); update(action); } + catch (ArgumentException ex) when (SearchEngine.IsRecoverableCorruptIndexException(ex)) + { + Log.Warning(ex, "Search index unreadable; rebuilding and retrying."); + fullReIndex(new SearchEngine()); + update(action); + } } private static void update(Action action) diff --git a/Source/LibationSearchEngine/LibationSearchEngine.csproj b/Source/LibationSearchEngine/LibationSearchEngine.csproj index 98aa2f10..d51ecba7 100644 --- a/Source/LibationSearchEngine/LibationSearchEngine.csproj +++ b/Source/LibationSearchEngine/LibationSearchEngine.csproj @@ -15,6 +15,7 @@ + diff --git a/Source/LibationSearchEngine/SearchEngine.cs b/Source/LibationSearchEngine/SearchEngine.cs index 43e55b8f..d6139c34 100644 --- a/Source/LibationSearchEngine/SearchEngine.cs +++ b/Source/LibationSearchEngine/SearchEngine.cs @@ -110,12 +110,57 @@ public class SearchEngine } } + /// + /// Lucene 3 parses segments_* filenames in the index directory. Cloud sync (e.g. OneDrive) can leave debris + /// or conflict copies whose names break that parser, throwing with this message shape. + /// Actual error is likely to be something like: Invalid or unsupported character in number, hence this string check. + /// + public static bool IsRecoverableCorruptIndexException(ArgumentException ex) + => ex.Message.Contains("character in number", StringComparison.OrdinalIgnoreCase); + + private static void deleteAllSearchIndexFiles(string searchEngineDirectory) + { + if (!System.IO.Directory.Exists(searchEngineDirectory)) + return; + + foreach (var file in System.IO.Directory.GetFiles(searchEngineDirectory, "*", SearchOption.AllDirectories)) + FileUtility.SaferDelete(file); + + foreach (var dir in System.IO.Directory.GetDirectories(searchEngineDirectory, "*", SearchOption.AllDirectories).OrderByDescending(d => d.Length)) + { + try + { + System.IO.Directory.Delete(dir); + } + catch (Exception ex) + { + Serilog.Log.Logger.Warning(ex, "Could not remove search index subdirectory {Dir}", dir); + } + } + } + private void createNewIndexCore(List library, bool overwrite) { + bool indexExists; + using (var indexProbe = getIndex()) + { + try + { + indexExists = IndexReader.IndexExists(indexProbe); + } + catch (ArgumentException ex) when (IsRecoverableCorruptIndexException(ex)) + { + Serilog.Log.Logger.Warning(ex, "Lucene search index at {Path} is unreadable (often cloud-sync debris or a partial write). Clearing it for rebuild.", SearchEngineDirectory); + indexExists = false; + } + } + + if (!indexExists) + deleteAllSearchIndexFiles(SearchEngineDirectory); + // location of index/create the index using var index = getIndex(); - var exists = IndexReader.IndexExists(index); - var createNewIndex = overwrite || !exists; + var createNewIndex = overwrite || !indexExists; // analyzer for tokenizing text. same analyzer should be used for indexing and searching using var analyzer = new StandardAnalyzer(Version); @@ -129,8 +174,7 @@ public class SearchEngine public SearchEngine(string? directory = null) { - SearchEngineDirectory = directory - ?? new System.IO.DirectoryInfo(Configuration.Instance.LibationFiles.Location).CreateSubdirectoryEx("SearchEngine").FullName; + SearchEngineDirectory = directory ?? new DirectoryInfo(Configuration.Instance.LibationFiles.Location).CreateSubdirectoryEx("SearchEngine").FullName; } /// Long running. Use await Task.Run(() => UpdateBook(productId))