From dea306247bbde896deb0f5a73d9e4315ea606c64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Duffeck?= Date: Wed, 25 Mar 2026 08:42:12 +0100 Subject: [PATCH] Do not remove stopwords by default Keeping the stop words leads to slightly bigger indexes but fixes chopped up highlights of search results and phrase accuracy during search. --- .woodpecker.star | 1 + services/search/pkg/config/defaults/defaultconfig.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.woodpecker.star b/.woodpecker.star index 547d92bb68..39745a5f8e 100644 --- a/.woodpecker.star +++ b/.woodpecker.star @@ -2426,6 +2426,7 @@ def opencloudServer(storage = "decomposed", depends_on = [], deploy_type = "", e environment["FRONTEND_FULL_TEXT_SEARCH_ENABLED"] = True environment["SEARCH_EXTRACTOR_TYPE"] = "tika" environment["SEARCH_EXTRACTOR_TIKA_TIKA_URL"] = "http://tika:9998" + environment["SEARCH_EXTRACTOR_TIKA_CLEAN_STOP_WORDS"] = True environment["SEARCH_EXTRACTOR_CS3SOURCE_INSECURE"] = True if watch_fs_enabled: diff --git a/services/search/pkg/config/defaults/defaultconfig.go b/services/search/pkg/config/defaults/defaultconfig.go index 630cd24e7e..555484bdcf 100644 --- a/services/search/pkg/config/defaults/defaultconfig.go +++ b/services/search/pkg/config/defaults/defaultconfig.go @@ -50,7 +50,7 @@ func DefaultConfig() *config.Config { CS3AllowInsecure: false, Tika: config.ExtractorTika{ TikaURL: "http://127.0.0.1:9998", - CleanStopWords: true, + CleanStopWords: false, }, }, Events: config.Events{