#1719 - add defensive code to recover from a corrupt search engine

This commit is contained in:
rmcrackan
2026-04-07 15:00:18 -04:00
parent 3fece6aa44
commit b0e79faacf
3 changed files with 62 additions and 4 deletions

View File

@@ -1,5 +1,6 @@
using DataLayer;
using LibationSearchEngine;
using Serilog;
using System;
using System.Collections.Generic;
using System.IO;
@@ -31,6 +32,12 @@ public static class SearchEngineCommands
fullReIndex(engine);
return func(engine);
}
catch (ArgumentException ex) when (SearchEngine.IsRecoverableCorruptIndexException(ex))
{
Log.Warning(ex, "Search index unreadable; rebuilding and retrying query.");
fullReIndex(engine);
return func(engine);
}
}
}
#endregion
@@ -76,6 +83,12 @@ public static class SearchEngineCommands
fullReIndex(new SearchEngine());
update(action);
}
catch (ArgumentException ex) when (SearchEngine.IsRecoverableCorruptIndexException(ex))
{
Log.Warning(ex, "Search index unreadable; rebuilding and retrying.");
fullReIndex(new SearchEngine());
update(action);
}
}
private static void update(Action<SearchEngine> action)

View File

@@ -15,6 +15,7 @@
<ItemGroup>
<ProjectReference Include="..\DataLayer\DataLayer.csproj" />
<ProjectReference Include="..\FileManager\FileManager.csproj" />
<ProjectReference Include="..\LibationFileManager\LibationFileManager.csproj" />
</ItemGroup>

View File

@@ -110,12 +110,57 @@ public class SearchEngine
}
}
/// <summary>
/// Lucene 3 parses <c>segments_*</c> filenames in the index directory. Cloud sync (e.g. OneDrive) can leave debris
/// or conflict copies whose names break that parser, throwing <see cref="ArgumentException"/> with this message shape.
/// Actual error is likely to be something like: Invalid or unsupported character in number, hence this string check.
/// </summary>
public static bool IsRecoverableCorruptIndexException(ArgumentException ex)
=> ex.Message.Contains("character in number", StringComparison.OrdinalIgnoreCase);
private static void deleteAllSearchIndexFiles(string searchEngineDirectory)
{
if (!System.IO.Directory.Exists(searchEngineDirectory))
return;
foreach (var file in System.IO.Directory.GetFiles(searchEngineDirectory, "*", SearchOption.AllDirectories))
FileUtility.SaferDelete(file);
foreach (var dir in System.IO.Directory.GetDirectories(searchEngineDirectory, "*", SearchOption.AllDirectories).OrderByDescending(d => d.Length))
{
try
{
System.IO.Directory.Delete(dir);
}
catch (Exception ex)
{
Serilog.Log.Logger.Warning(ex, "Could not remove search index subdirectory {Dir}", dir);
}
}
}
private void createNewIndexCore(List<LibraryBook> library, bool overwrite)
{
bool indexExists;
using (var indexProbe = getIndex())
{
try
{
indexExists = IndexReader.IndexExists(indexProbe);
}
catch (ArgumentException ex) when (IsRecoverableCorruptIndexException(ex))
{
Serilog.Log.Logger.Warning(ex, "Lucene search index at {Path} is unreadable (often cloud-sync debris or a partial write). Clearing it for rebuild.", SearchEngineDirectory);
indexExists = false;
}
}
if (!indexExists)
deleteAllSearchIndexFiles(SearchEngineDirectory);
// location of index/create the index
using var index = getIndex();
var exists = IndexReader.IndexExists(index);
var createNewIndex = overwrite || !exists;
var createNewIndex = overwrite || !indexExists;
// analyzer for tokenizing text. same analyzer should be used for indexing and searching
using var analyzer = new StandardAnalyzer(Version);
@@ -129,8 +174,7 @@ public class SearchEngine
public SearchEngine(string? directory = null)
{
SearchEngineDirectory = directory
?? new System.IO.DirectoryInfo(Configuration.Instance.LibationFiles.Location).CreateSubdirectoryEx("SearchEngine").FullName;
SearchEngineDirectory = directory ?? new DirectoryInfo(Configuration.Instance.LibationFiles.Location).CreateSubdirectoryEx("SearchEngine").FullName;
}
/// <summary>Long running. Use await Task.Run(() => UpdateBook(productId))</summary>