From 10bc14130ee209ccf03648fb3b0f82ae95569363 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Duffeck?= Date: Thu, 19 Mar 2026 10:09:14 +0100 Subject: [PATCH 1/6] Do not send back the full content in the search response --- services/search/pkg/opensearch/backend.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/services/search/pkg/opensearch/backend.go b/services/search/pkg/opensearch/backend.go index 8972a01dd3..d41121730b 100644 --- a/services/search/pkg/opensearch/backend.go +++ b/services/search/pkg/opensearch/backend.go @@ -92,7 +92,9 @@ func (b *Backend) Search(ctx context.Context, sir *searchService.SearchIndexRequ ) } - searchParams := opensearchgoAPI.SearchParams{} + searchParams := opensearchgoAPI.SearchParams{ + SourceExcludes: []string{"Content"}, // Do not send back the full content in the search response, as it is only needed for highlighting and can be large. The highlighted snippets will be sent back in the response instead. + } switch { case sir.PageSize == -1: From 9e93f29ffe49f987992d4977068a7f65b640c458 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Duffeck?= Date: Thu, 19 Mar 2026 11:17:05 +0100 Subject: [PATCH 2/6] Introduce opensearch index v2 The new index allows for faster highlighing uses the fvh highlighter and searching for favorites. --- services/search/pkg/opensearch/index.go | 3 +- .../internal/indexes/resource_v2.json | 56 +++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 services/search/pkg/opensearch/internal/indexes/resource_v2.json diff --git a/services/search/pkg/opensearch/index.go b/services/search/pkg/opensearch/index.go index ef69cf9041..9f36cbaa83 100644 --- a/services/search/pkg/opensearch/index.go +++ b/services/search/pkg/opensearch/index.go @@ -16,8 +16,9 @@ import ( var ( ErrManualActionRequired = errors.New("manual action required") - IndexManagerLatest = IndexIndexManagerResourceV1 + IndexManagerLatest = IndexIndexManagerResourceV2 IndexIndexManagerResourceV1 IndexManager = "resource_v1.json" + IndexIndexManagerResourceV2 IndexManager = "resource_v2.json" ) //go:embed internal/indexes/*.json diff --git a/services/search/pkg/opensearch/internal/indexes/resource_v2.json b/services/search/pkg/opensearch/internal/indexes/resource_v2.json new file mode 100644 index 0000000000..64b450ef51 --- /dev/null +++ b/services/search/pkg/opensearch/internal/indexes/resource_v2.json @@ -0,0 +1,56 @@ +{ + "settings": { + "number_of_shards": "1", + "number_of_replicas": "1", + "analysis": { + "analyzer": { + "path_hierarchy": { + "filter": [ + "lowercase" + ], + "tokenizer": "path_hierarchy", + "type": "custom" + } + }, + "tokenizer": { + "path_hierarchy": { + "type": "path_hierarchy" + } + } + } + }, + "mappings": { + "properties": { + "Content": { + "type": "text", + "term_vector": "with_positions_offsets" + }, + "ID": { + "type": "keyword" + }, + "ParentID": { + "type": "keyword" + }, + "RootID": { + "type": "keyword" + }, + "MimeType": { + "type": "wildcard", + "doc_values": false + }, + "Path": { + "type": "text", + "analyzer": "path_hierarchy" + }, + "Deleted": { + "type": "boolean" + }, + "Hidden": { + "type": "boolean" + }, + "Favorites": { + "type": "keyword" + } + } + } +} \ No newline at end of file From a6dd9b9e180247b2a27bdcaba8802c472d36cdff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Duffeck?= Date: Thu, 19 Mar 2026 11:25:42 +0100 Subject: [PATCH 3/6] Use the fast vector highlighter for highlighting search results --- services/search/pkg/opensearch/backend.go | 4 +++- services/search/pkg/opensearch/internal/osu/request.go | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/services/search/pkg/opensearch/backend.go b/services/search/pkg/opensearch/backend.go index d41121730b..18d0995c2a 100644 --- a/services/search/pkg/opensearch/backend.go +++ b/services/search/pkg/opensearch/backend.go @@ -115,7 +115,9 @@ func (b *Backend) Search(ctx context.Context, sir *searchService.SearchIndexRequ PreTags: []string{""}, PostTags: []string{""}, Fields: map[string]osu.BodyParamHighlight{ - "Content": {}, + "Content": { + Type: "fvh", + }, }, }, }, diff --git a/services/search/pkg/opensearch/internal/osu/request.go b/services/search/pkg/opensearch/internal/osu/request.go index 8e845cf0af..04e4c3396d 100644 --- a/services/search/pkg/opensearch/internal/osu/request.go +++ b/services/search/pkg/opensearch/internal/osu/request.go @@ -43,6 +43,7 @@ func (q QueryReqBody[O]) MarshalJSON() ([]byte, error) { type BodyParamHighlight struct { PreTags []string `json:"pre_tags,omitempty"` PostTags []string `json:"post_tags,omitempty"` + Type string `json:"type,omitempty"` Fields map[string]BodyParamHighlight `json:"fields,omitempty"` } From 4fa51985012aec3e973c3826b4cb4f469d5e3649 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Duffeck?= Date: Thu, 19 Mar 2026 15:52:58 +0100 Subject: [PATCH 4/6] Improve highlight support in osu Co-authored-by: Florian Schade --- services/search/pkg/opensearch/backend.go | 10 +++-- .../pkg/opensearch/internal/osu/request.go | 37 +++++++++++++++++-- .../opensearch/internal/osu/request_test.go | 20 +++++++--- 3 files changed, 53 insertions(+), 14 deletions(-) diff --git a/services/search/pkg/opensearch/backend.go b/services/search/pkg/opensearch/backend.go index 18d0995c2a..69a6b7df36 100644 --- a/services/search/pkg/opensearch/backend.go +++ b/services/search/pkg/opensearch/backend.go @@ -112,11 +112,13 @@ func (b *Backend) Search(ctx context.Context, sir *searchService.SearchIndexRequ boolQuery, osu.SearchBodyParams{ Highlight: &osu.BodyParamHighlight{ - PreTags: []string{""}, - PostTags: []string{""}, - Fields: map[string]osu.BodyParamHighlight{ + HighlightOptions: osu.HighlightOptions{ + PreTags: []string{""}, + PostTags: []string{""}, + }, + Fields: map[string]osu.HighlightOptions{ "Content": { - Type: "fvh", + Type: osu.HighlightTypeFvh, }, }, }, diff --git a/services/search/pkg/opensearch/internal/osu/request.go b/services/search/pkg/opensearch/internal/osu/request.go index 04e4c3396d..927da7ac54 100644 --- a/services/search/pkg/opensearch/internal/osu/request.go +++ b/services/search/pkg/opensearch/internal/osu/request.go @@ -41,10 +41,39 @@ func (q QueryReqBody[O]) MarshalJSON() ([]byte, error) { //----------------------------------------------------------------------------// type BodyParamHighlight struct { - PreTags []string `json:"pre_tags,omitempty"` - PostTags []string `json:"post_tags,omitempty"` - Type string `json:"type,omitempty"` - Fields map[string]BodyParamHighlight `json:"fields,omitempty"` + HighlightOptions + Fields map[string]HighlightOptions `json:"fields,omitempty"` +} + +type HighlightType string + +const ( + HighlightTypeUnified HighlightType = "unified" + HighlightTypeFvh HighlightType = "fvh" + HighlightTypePlain HighlightType = "plain" + HighlightTypeSemantic HighlightType = "semantic" +) + +type HighlightOptions struct { + Type HighlightType `json:"type,omitempty"` + FragmentSize int `json:"fragment_size,omitempty"` + NumberOfFragments int `json:"number_of_fragments,omitempty"` + FragmentOffset int `json:"fragment_offset,omitempty"` + BoundaryChars string `json:"boundary_chars,omitempty"` + BoundaryMaxScan int `json:"boundary_max_scan,omitempty"` + BoundaryScanner string `json:"boundary_scanner,omitempty"` + BoundaryScannerLocale string `json:"boundary_scanner_locale,omitempty"` + Encoder string `json:"encoder,omitempty"` + ForceSource bool `json:"force_source,omitempty"` + Fragmenter string `json:"fragmenter,omitempty"` + HighlightQuery Builder `json:"highlight_query,omitempty"` + Order string `json:"order,omitempty"` + NoMatchSize int `json:"no_match_size,omitempty"` + RequireFieldMatch bool `json:"require_field_match,omitempty"` + MatchedFields []string `json:"matched_fields,omitempty"` + PhraseLimit int `json:"phrase_limit,omitempty"` + PreTags []string `json:"pre_tags,omitempty"` + PostTags []string `json:"post_tags,omitempty"` } type BodyParamScript struct { diff --git a/services/search/pkg/opensearch/internal/osu/request_test.go b/services/search/pkg/opensearch/internal/osu/request_test.go index 6a48cd398e..535f05bd82 100644 --- a/services/search/pkg/opensearch/internal/osu/request_test.go +++ b/services/search/pkg/opensearch/internal/osu/request_test.go @@ -9,7 +9,7 @@ import ( "github.com/stretchr/testify/require" "github.com/opencloud-eu/opencloud/services/search/pkg/opensearch/internal/osu" - "github.com/opencloud-eu/opencloud/services/search/pkg/opensearch/internal/test" + opensearchtest "github.com/opencloud-eu/opencloud/services/search/pkg/opensearch/internal/test" ) func TestRequestBody(t *testing.T) { @@ -46,10 +46,15 @@ func TestBuildSearchReq(t *testing.T) { osu.NewTermQuery[string]("content").Value("content"), osu.SearchBodyParams{ Highlight: &osu.BodyParamHighlight{ - PreTags: []string{""}, - PostTags: []string{""}, - Fields: map[string]osu.BodyParamHighlight{ - "content": {}, + HighlightOptions: osu.HighlightOptions{ + PreTags: []string{""}, + PostTags: []string{""}, + }, + Fields: map[string]osu.HighlightOptions{ + "content": { + PreTags: []string{""}, + PostTags: []string{""}, + }, }, }, }, @@ -69,7 +74,10 @@ func TestBuildSearchReq(t *testing.T) { "pre_tags": []string{""}, "post_tags": []string{""}, "fields": map[string]any{ - "content": map[string]any{}, + "content": map[string]any{ + "pre_tags": []string{""}, + "post_tags": []string{""}, + }, }, }, }, From 8a83eea742bf921fa98253e0f515e270a1f5a2a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Duffeck?= Date: Tue, 24 Mar 2026 09:39:32 +0100 Subject: [PATCH 5/6] Limit the highlighter to two fragments --- services/search/pkg/opensearch/backend.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/services/search/pkg/opensearch/backend.go b/services/search/pkg/opensearch/backend.go index 69a6b7df36..1f27c136bf 100644 --- a/services/search/pkg/opensearch/backend.go +++ b/services/search/pkg/opensearch/backend.go @@ -113,8 +113,9 @@ func (b *Backend) Search(ctx context.Context, sir *searchService.SearchIndexRequ osu.SearchBodyParams{ Highlight: &osu.BodyParamHighlight{ HighlightOptions: osu.HighlightOptions{ - PreTags: []string{""}, - PostTags: []string{""}, + NumberOfFragments: 2, + PreTags: []string{""}, + PostTags: []string{""}, }, Fields: map[string]osu.HighlightOptions{ "Content": { From dea306247bbde896deb0f5a73d9e4315ea606c64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Duffeck?= Date: Wed, 25 Mar 2026 08:42:12 +0100 Subject: [PATCH 6/6] Do not remove stopwords by default Keeping the stop words leads to slightly bigger indexes but fixes chopped up highlights of search results and phrase accuracy during search. --- .woodpecker.star | 1 + services/search/pkg/config/defaults/defaultconfig.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.woodpecker.star b/.woodpecker.star index 547d92bb68..39745a5f8e 100644 --- a/.woodpecker.star +++ b/.woodpecker.star @@ -2426,6 +2426,7 @@ def opencloudServer(storage = "decomposed", depends_on = [], deploy_type = "", e environment["FRONTEND_FULL_TEXT_SEARCH_ENABLED"] = True environment["SEARCH_EXTRACTOR_TYPE"] = "tika" environment["SEARCH_EXTRACTOR_TIKA_TIKA_URL"] = "http://tika:9998" + environment["SEARCH_EXTRACTOR_TIKA_CLEAN_STOP_WORDS"] = True environment["SEARCH_EXTRACTOR_CS3SOURCE_INSECURE"] = True if watch_fs_enabled: diff --git a/services/search/pkg/config/defaults/defaultconfig.go b/services/search/pkg/config/defaults/defaultconfig.go index 630cd24e7e..555484bdcf 100644 --- a/services/search/pkg/config/defaults/defaultconfig.go +++ b/services/search/pkg/config/defaults/defaultconfig.go @@ -50,7 +50,7 @@ func DefaultConfig() *config.Config { CS3AllowInsecure: false, Tika: config.ExtractorTika{ TikaURL: "http://127.0.0.1:9998", - CleanStopWords: true, + CleanStopWords: false, }, }, Events: config.Events{