diff --git a/.woodpecker.star b/.woodpecker.star index 547d92bb68..39745a5f8e 100644 --- a/.woodpecker.star +++ b/.woodpecker.star @@ -2426,6 +2426,7 @@ def opencloudServer(storage = "decomposed", depends_on = [], deploy_type = "", e environment["FRONTEND_FULL_TEXT_SEARCH_ENABLED"] = True environment["SEARCH_EXTRACTOR_TYPE"] = "tika" environment["SEARCH_EXTRACTOR_TIKA_TIKA_URL"] = "http://tika:9998" + environment["SEARCH_EXTRACTOR_TIKA_CLEAN_STOP_WORDS"] = True environment["SEARCH_EXTRACTOR_CS3SOURCE_INSECURE"] = True if watch_fs_enabled: diff --git a/services/search/pkg/config/defaults/defaultconfig.go b/services/search/pkg/config/defaults/defaultconfig.go index 630cd24e7e..555484bdcf 100644 --- a/services/search/pkg/config/defaults/defaultconfig.go +++ b/services/search/pkg/config/defaults/defaultconfig.go @@ -50,7 +50,7 @@ func DefaultConfig() *config.Config { CS3AllowInsecure: false, Tika: config.ExtractorTika{ TikaURL: "http://127.0.0.1:9998", - CleanStopWords: true, + CleanStopWords: false, }, }, Events: config.Events{ diff --git a/services/search/pkg/opensearch/backend.go b/services/search/pkg/opensearch/backend.go index 8972a01dd3..1f27c136bf 100644 --- a/services/search/pkg/opensearch/backend.go +++ b/services/search/pkg/opensearch/backend.go @@ -92,7 +92,9 @@ func (b *Backend) Search(ctx context.Context, sir *searchService.SearchIndexRequ ) } - searchParams := opensearchgoAPI.SearchParams{} + searchParams := opensearchgoAPI.SearchParams{ + SourceExcludes: []string{"Content"}, // Do not send back the full content in the search response, as it is only needed for highlighting and can be large. The highlighted snippets will be sent back in the response instead. + } switch { case sir.PageSize == -1: @@ -110,10 +112,15 @@ func (b *Backend) Search(ctx context.Context, sir *searchService.SearchIndexRequ boolQuery, osu.SearchBodyParams{ Highlight: &osu.BodyParamHighlight{ - PreTags: []string{""}, - PostTags: []string{""}, - Fields: map[string]osu.BodyParamHighlight{ - "Content": {}, + HighlightOptions: osu.HighlightOptions{ + NumberOfFragments: 2, + PreTags: []string{""}, + PostTags: []string{""}, + }, + Fields: map[string]osu.HighlightOptions{ + "Content": { + Type: osu.HighlightTypeFvh, + }, }, }, }, diff --git a/services/search/pkg/opensearch/index.go b/services/search/pkg/opensearch/index.go index ef69cf9041..9f36cbaa83 100644 --- a/services/search/pkg/opensearch/index.go +++ b/services/search/pkg/opensearch/index.go @@ -16,8 +16,9 @@ import ( var ( ErrManualActionRequired = errors.New("manual action required") - IndexManagerLatest = IndexIndexManagerResourceV1 + IndexManagerLatest = IndexIndexManagerResourceV2 IndexIndexManagerResourceV1 IndexManager = "resource_v1.json" + IndexIndexManagerResourceV2 IndexManager = "resource_v2.json" ) //go:embed internal/indexes/*.json diff --git a/services/search/pkg/opensearch/internal/indexes/resource_v2.json b/services/search/pkg/opensearch/internal/indexes/resource_v2.json new file mode 100644 index 0000000000..64b450ef51 --- /dev/null +++ b/services/search/pkg/opensearch/internal/indexes/resource_v2.json @@ -0,0 +1,56 @@ +{ + "settings": { + "number_of_shards": "1", + "number_of_replicas": "1", + "analysis": { + "analyzer": { + "path_hierarchy": { + "filter": [ + "lowercase" + ], + "tokenizer": "path_hierarchy", + "type": "custom" + } + }, + "tokenizer": { + "path_hierarchy": { + "type": "path_hierarchy" + } + } + } + }, + "mappings": { + "properties": { + "Content": { + "type": "text", + "term_vector": "with_positions_offsets" + }, + "ID": { + "type": "keyword" + }, + "ParentID": { + "type": "keyword" + }, + "RootID": { + "type": "keyword" + }, + "MimeType": { + "type": "wildcard", + "doc_values": false + }, + "Path": { + "type": "text", + "analyzer": "path_hierarchy" + }, + "Deleted": { + "type": "boolean" + }, + "Hidden": { + "type": "boolean" + }, + "Favorites": { + "type": "keyword" + } + } + } +} \ No newline at end of file diff --git a/services/search/pkg/opensearch/internal/osu/request.go b/services/search/pkg/opensearch/internal/osu/request.go index 8e845cf0af..927da7ac54 100644 --- a/services/search/pkg/opensearch/internal/osu/request.go +++ b/services/search/pkg/opensearch/internal/osu/request.go @@ -41,9 +41,39 @@ func (q QueryReqBody[O]) MarshalJSON() ([]byte, error) { //----------------------------------------------------------------------------// type BodyParamHighlight struct { - PreTags []string `json:"pre_tags,omitempty"` - PostTags []string `json:"post_tags,omitempty"` - Fields map[string]BodyParamHighlight `json:"fields,omitempty"` + HighlightOptions + Fields map[string]HighlightOptions `json:"fields,omitempty"` +} + +type HighlightType string + +const ( + HighlightTypeUnified HighlightType = "unified" + HighlightTypeFvh HighlightType = "fvh" + HighlightTypePlain HighlightType = "plain" + HighlightTypeSemantic HighlightType = "semantic" +) + +type HighlightOptions struct { + Type HighlightType `json:"type,omitempty"` + FragmentSize int `json:"fragment_size,omitempty"` + NumberOfFragments int `json:"number_of_fragments,omitempty"` + FragmentOffset int `json:"fragment_offset,omitempty"` + BoundaryChars string `json:"boundary_chars,omitempty"` + BoundaryMaxScan int `json:"boundary_max_scan,omitempty"` + BoundaryScanner string `json:"boundary_scanner,omitempty"` + BoundaryScannerLocale string `json:"boundary_scanner_locale,omitempty"` + Encoder string `json:"encoder,omitempty"` + ForceSource bool `json:"force_source,omitempty"` + Fragmenter string `json:"fragmenter,omitempty"` + HighlightQuery Builder `json:"highlight_query,omitempty"` + Order string `json:"order,omitempty"` + NoMatchSize int `json:"no_match_size,omitempty"` + RequireFieldMatch bool `json:"require_field_match,omitempty"` + MatchedFields []string `json:"matched_fields,omitempty"` + PhraseLimit int `json:"phrase_limit,omitempty"` + PreTags []string `json:"pre_tags,omitempty"` + PostTags []string `json:"post_tags,omitempty"` } type BodyParamScript struct { diff --git a/services/search/pkg/opensearch/internal/osu/request_test.go b/services/search/pkg/opensearch/internal/osu/request_test.go index 6a48cd398e..535f05bd82 100644 --- a/services/search/pkg/opensearch/internal/osu/request_test.go +++ b/services/search/pkg/opensearch/internal/osu/request_test.go @@ -9,7 +9,7 @@ import ( "github.com/stretchr/testify/require" "github.com/opencloud-eu/opencloud/services/search/pkg/opensearch/internal/osu" - "github.com/opencloud-eu/opencloud/services/search/pkg/opensearch/internal/test" + opensearchtest "github.com/opencloud-eu/opencloud/services/search/pkg/opensearch/internal/test" ) func TestRequestBody(t *testing.T) { @@ -46,10 +46,15 @@ func TestBuildSearchReq(t *testing.T) { osu.NewTermQuery[string]("content").Value("content"), osu.SearchBodyParams{ Highlight: &osu.BodyParamHighlight{ - PreTags: []string{""}, - PostTags: []string{""}, - Fields: map[string]osu.BodyParamHighlight{ - "content": {}, + HighlightOptions: osu.HighlightOptions{ + PreTags: []string{""}, + PostTags: []string{""}, + }, + Fields: map[string]osu.HighlightOptions{ + "content": { + PreTags: []string{""}, + PostTags: []string{""}, + }, }, }, }, @@ -69,7 +74,10 @@ func TestBuildSearchReq(t *testing.T) { "pre_tags": []string{""}, "post_tags": []string{""}, "fields": map[string]any{ - "content": map[string]any{}, + "content": map[string]any{ + "pre_tags": []string{""}, + "post_tags": []string{""}, + }, }, }, },