diff --git a/.woodpecker.star b/.woodpecker.star
index 547d92bb68..39745a5f8e 100644
--- a/.woodpecker.star
+++ b/.woodpecker.star
@@ -2426,6 +2426,7 @@ def opencloudServer(storage = "decomposed", depends_on = [], deploy_type = "", e
environment["FRONTEND_FULL_TEXT_SEARCH_ENABLED"] = True
environment["SEARCH_EXTRACTOR_TYPE"] = "tika"
environment["SEARCH_EXTRACTOR_TIKA_TIKA_URL"] = "http://tika:9998"
+ environment["SEARCH_EXTRACTOR_TIKA_CLEAN_STOP_WORDS"] = True
environment["SEARCH_EXTRACTOR_CS3SOURCE_INSECURE"] = True
if watch_fs_enabled:
diff --git a/services/search/pkg/config/defaults/defaultconfig.go b/services/search/pkg/config/defaults/defaultconfig.go
index 630cd24e7e..555484bdcf 100644
--- a/services/search/pkg/config/defaults/defaultconfig.go
+++ b/services/search/pkg/config/defaults/defaultconfig.go
@@ -50,7 +50,7 @@ func DefaultConfig() *config.Config {
CS3AllowInsecure: false,
Tika: config.ExtractorTika{
TikaURL: "http://127.0.0.1:9998",
- CleanStopWords: true,
+ CleanStopWords: false,
},
},
Events: config.Events{
diff --git a/services/search/pkg/opensearch/backend.go b/services/search/pkg/opensearch/backend.go
index 8972a01dd3..1f27c136bf 100644
--- a/services/search/pkg/opensearch/backend.go
+++ b/services/search/pkg/opensearch/backend.go
@@ -92,7 +92,9 @@ func (b *Backend) Search(ctx context.Context, sir *searchService.SearchIndexRequ
)
}
- searchParams := opensearchgoAPI.SearchParams{}
+ searchParams := opensearchgoAPI.SearchParams{
+ SourceExcludes: []string{"Content"}, // Do not send back the full content in the search response, as it is only needed for highlighting and can be large. The highlighted snippets will be sent back in the response instead.
+ }
switch {
case sir.PageSize == -1:
@@ -110,10 +112,15 @@ func (b *Backend) Search(ctx context.Context, sir *searchService.SearchIndexRequ
boolQuery,
osu.SearchBodyParams{
Highlight: &osu.BodyParamHighlight{
- PreTags: []string{""},
- PostTags: []string{""},
- Fields: map[string]osu.BodyParamHighlight{
- "Content": {},
+ HighlightOptions: osu.HighlightOptions{
+ NumberOfFragments: 2,
+ PreTags: []string{""},
+ PostTags: []string{""},
+ },
+ Fields: map[string]osu.HighlightOptions{
+ "Content": {
+ Type: osu.HighlightTypeFvh,
+ },
},
},
},
diff --git a/services/search/pkg/opensearch/index.go b/services/search/pkg/opensearch/index.go
index ef69cf9041..9f36cbaa83 100644
--- a/services/search/pkg/opensearch/index.go
+++ b/services/search/pkg/opensearch/index.go
@@ -16,8 +16,9 @@ import (
var (
ErrManualActionRequired = errors.New("manual action required")
- IndexManagerLatest = IndexIndexManagerResourceV1
+ IndexManagerLatest = IndexIndexManagerResourceV2
IndexIndexManagerResourceV1 IndexManager = "resource_v1.json"
+ IndexIndexManagerResourceV2 IndexManager = "resource_v2.json"
)
//go:embed internal/indexes/*.json
diff --git a/services/search/pkg/opensearch/internal/indexes/resource_v2.json b/services/search/pkg/opensearch/internal/indexes/resource_v2.json
new file mode 100644
index 0000000000..64b450ef51
--- /dev/null
+++ b/services/search/pkg/opensearch/internal/indexes/resource_v2.json
@@ -0,0 +1,56 @@
+{
+ "settings": {
+ "number_of_shards": "1",
+ "number_of_replicas": "1",
+ "analysis": {
+ "analyzer": {
+ "path_hierarchy": {
+ "filter": [
+ "lowercase"
+ ],
+ "tokenizer": "path_hierarchy",
+ "type": "custom"
+ }
+ },
+ "tokenizer": {
+ "path_hierarchy": {
+ "type": "path_hierarchy"
+ }
+ }
+ }
+ },
+ "mappings": {
+ "properties": {
+ "Content": {
+ "type": "text",
+ "term_vector": "with_positions_offsets"
+ },
+ "ID": {
+ "type": "keyword"
+ },
+ "ParentID": {
+ "type": "keyword"
+ },
+ "RootID": {
+ "type": "keyword"
+ },
+ "MimeType": {
+ "type": "wildcard",
+ "doc_values": false
+ },
+ "Path": {
+ "type": "text",
+ "analyzer": "path_hierarchy"
+ },
+ "Deleted": {
+ "type": "boolean"
+ },
+ "Hidden": {
+ "type": "boolean"
+ },
+ "Favorites": {
+ "type": "keyword"
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/services/search/pkg/opensearch/internal/osu/request.go b/services/search/pkg/opensearch/internal/osu/request.go
index 8e845cf0af..927da7ac54 100644
--- a/services/search/pkg/opensearch/internal/osu/request.go
+++ b/services/search/pkg/opensearch/internal/osu/request.go
@@ -41,9 +41,39 @@ func (q QueryReqBody[O]) MarshalJSON() ([]byte, error) {
//----------------------------------------------------------------------------//
type BodyParamHighlight struct {
- PreTags []string `json:"pre_tags,omitempty"`
- PostTags []string `json:"post_tags,omitempty"`
- Fields map[string]BodyParamHighlight `json:"fields,omitempty"`
+ HighlightOptions
+ Fields map[string]HighlightOptions `json:"fields,omitempty"`
+}
+
+type HighlightType string
+
+const (
+ HighlightTypeUnified HighlightType = "unified"
+ HighlightTypeFvh HighlightType = "fvh"
+ HighlightTypePlain HighlightType = "plain"
+ HighlightTypeSemantic HighlightType = "semantic"
+)
+
+type HighlightOptions struct {
+ Type HighlightType `json:"type,omitempty"`
+ FragmentSize int `json:"fragment_size,omitempty"`
+ NumberOfFragments int `json:"number_of_fragments,omitempty"`
+ FragmentOffset int `json:"fragment_offset,omitempty"`
+ BoundaryChars string `json:"boundary_chars,omitempty"`
+ BoundaryMaxScan int `json:"boundary_max_scan,omitempty"`
+ BoundaryScanner string `json:"boundary_scanner,omitempty"`
+ BoundaryScannerLocale string `json:"boundary_scanner_locale,omitempty"`
+ Encoder string `json:"encoder,omitempty"`
+ ForceSource bool `json:"force_source,omitempty"`
+ Fragmenter string `json:"fragmenter,omitempty"`
+ HighlightQuery Builder `json:"highlight_query,omitempty"`
+ Order string `json:"order,omitempty"`
+ NoMatchSize int `json:"no_match_size,omitempty"`
+ RequireFieldMatch bool `json:"require_field_match,omitempty"`
+ MatchedFields []string `json:"matched_fields,omitempty"`
+ PhraseLimit int `json:"phrase_limit,omitempty"`
+ PreTags []string `json:"pre_tags,omitempty"`
+ PostTags []string `json:"post_tags,omitempty"`
}
type BodyParamScript struct {
diff --git a/services/search/pkg/opensearch/internal/osu/request_test.go b/services/search/pkg/opensearch/internal/osu/request_test.go
index 6a48cd398e..535f05bd82 100644
--- a/services/search/pkg/opensearch/internal/osu/request_test.go
+++ b/services/search/pkg/opensearch/internal/osu/request_test.go
@@ -9,7 +9,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/opencloud-eu/opencloud/services/search/pkg/opensearch/internal/osu"
- "github.com/opencloud-eu/opencloud/services/search/pkg/opensearch/internal/test"
+ opensearchtest "github.com/opencloud-eu/opencloud/services/search/pkg/opensearch/internal/test"
)
func TestRequestBody(t *testing.T) {
@@ -46,10 +46,15 @@ func TestBuildSearchReq(t *testing.T) {
osu.NewTermQuery[string]("content").Value("content"),
osu.SearchBodyParams{
Highlight: &osu.BodyParamHighlight{
- PreTags: []string{""},
- PostTags: []string{""},
- Fields: map[string]osu.BodyParamHighlight{
- "content": {},
+ HighlightOptions: osu.HighlightOptions{
+ PreTags: []string{""},
+ PostTags: []string{""},
+ },
+ Fields: map[string]osu.HighlightOptions{
+ "content": {
+ PreTags: []string{""},
+ PostTags: []string{""},
+ },
},
},
},
@@ -69,7 +74,10 @@ func TestBuildSearchReq(t *testing.T) {
"pre_tags": []string{""},
"post_tags": []string{""},
"fields": map[string]any{
- "content": map[string]any{},
+ "content": map[string]any{
+ "pre_tags": []string{""},
+ "post_tags": []string{""},
+ },
},
},
},