Merge pull request #2514 from aduffeck/improve-opensearch

Improve opensearch highlighting, fix favorites
This commit is contained in:
Florian Schade
2026-03-25 11:01:14 +01:00
committed by GitHub
7 changed files with 119 additions and 16 deletions

View File

@@ -2426,6 +2426,7 @@ def opencloudServer(storage = "decomposed", depends_on = [], deploy_type = "", e
environment["FRONTEND_FULL_TEXT_SEARCH_ENABLED"] = True
environment["SEARCH_EXTRACTOR_TYPE"] = "tika"
environment["SEARCH_EXTRACTOR_TIKA_TIKA_URL"] = "http://tika:9998"
environment["SEARCH_EXTRACTOR_TIKA_CLEAN_STOP_WORDS"] = True
environment["SEARCH_EXTRACTOR_CS3SOURCE_INSECURE"] = True
if watch_fs_enabled:

View File

@@ -50,7 +50,7 @@ func DefaultConfig() *config.Config {
CS3AllowInsecure: false,
Tika: config.ExtractorTika{
TikaURL: "http://127.0.0.1:9998",
CleanStopWords: true,
CleanStopWords: false,
},
},
Events: config.Events{

View File

@@ -92,7 +92,9 @@ func (b *Backend) Search(ctx context.Context, sir *searchService.SearchIndexRequ
)
}
searchParams := opensearchgoAPI.SearchParams{}
searchParams := opensearchgoAPI.SearchParams{
SourceExcludes: []string{"Content"}, // Do not send back the full content in the search response, as it is only needed for highlighting and can be large. The highlighted snippets will be sent back in the response instead.
}
switch {
case sir.PageSize == -1:
@@ -110,10 +112,15 @@ func (b *Backend) Search(ctx context.Context, sir *searchService.SearchIndexRequ
boolQuery,
osu.SearchBodyParams{
Highlight: &osu.BodyParamHighlight{
PreTags: []string{"<mark>"},
PostTags: []string{"</mark>"},
Fields: map[string]osu.BodyParamHighlight{
"Content": {},
HighlightOptions: osu.HighlightOptions{
NumberOfFragments: 2,
PreTags: []string{"<mark>"},
PostTags: []string{"</mark>"},
},
Fields: map[string]osu.HighlightOptions{
"Content": {
Type: osu.HighlightTypeFvh,
},
},
},
},

View File

@@ -16,8 +16,9 @@ import (
var (
ErrManualActionRequired = errors.New("manual action required")
IndexManagerLatest = IndexIndexManagerResourceV1
IndexManagerLatest = IndexIndexManagerResourceV2
IndexIndexManagerResourceV1 IndexManager = "resource_v1.json"
IndexIndexManagerResourceV2 IndexManager = "resource_v2.json"
)
//go:embed internal/indexes/*.json

View File

@@ -0,0 +1,56 @@
{
"settings": {
"number_of_shards": "1",
"number_of_replicas": "1",
"analysis": {
"analyzer": {
"path_hierarchy": {
"filter": [
"lowercase"
],
"tokenizer": "path_hierarchy",
"type": "custom"
}
},
"tokenizer": {
"path_hierarchy": {
"type": "path_hierarchy"
}
}
}
},
"mappings": {
"properties": {
"Content": {
"type": "text",
"term_vector": "with_positions_offsets"
},
"ID": {
"type": "keyword"
},
"ParentID": {
"type": "keyword"
},
"RootID": {
"type": "keyword"
},
"MimeType": {
"type": "wildcard",
"doc_values": false
},
"Path": {
"type": "text",
"analyzer": "path_hierarchy"
},
"Deleted": {
"type": "boolean"
},
"Hidden": {
"type": "boolean"
},
"Favorites": {
"type": "keyword"
}
}
}
}

View File

@@ -41,9 +41,39 @@ func (q QueryReqBody[O]) MarshalJSON() ([]byte, error) {
//----------------------------------------------------------------------------//
type BodyParamHighlight struct {
PreTags []string `json:"pre_tags,omitempty"`
PostTags []string `json:"post_tags,omitempty"`
Fields map[string]BodyParamHighlight `json:"fields,omitempty"`
HighlightOptions
Fields map[string]HighlightOptions `json:"fields,omitempty"`
}
type HighlightType string
const (
HighlightTypeUnified HighlightType = "unified"
HighlightTypeFvh HighlightType = "fvh"
HighlightTypePlain HighlightType = "plain"
HighlightTypeSemantic HighlightType = "semantic"
)
type HighlightOptions struct {
Type HighlightType `json:"type,omitempty"`
FragmentSize int `json:"fragment_size,omitempty"`
NumberOfFragments int `json:"number_of_fragments,omitempty"`
FragmentOffset int `json:"fragment_offset,omitempty"`
BoundaryChars string `json:"boundary_chars,omitempty"`
BoundaryMaxScan int `json:"boundary_max_scan,omitempty"`
BoundaryScanner string `json:"boundary_scanner,omitempty"`
BoundaryScannerLocale string `json:"boundary_scanner_locale,omitempty"`
Encoder string `json:"encoder,omitempty"`
ForceSource bool `json:"force_source,omitempty"`
Fragmenter string `json:"fragmenter,omitempty"`
HighlightQuery Builder `json:"highlight_query,omitempty"`
Order string `json:"order,omitempty"`
NoMatchSize int `json:"no_match_size,omitempty"`
RequireFieldMatch bool `json:"require_field_match,omitempty"`
MatchedFields []string `json:"matched_fields,omitempty"`
PhraseLimit int `json:"phrase_limit,omitempty"`
PreTags []string `json:"pre_tags,omitempty"`
PostTags []string `json:"post_tags,omitempty"`
}
type BodyParamScript struct {

View File

@@ -9,7 +9,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/opencloud-eu/opencloud/services/search/pkg/opensearch/internal/osu"
"github.com/opencloud-eu/opencloud/services/search/pkg/opensearch/internal/test"
opensearchtest "github.com/opencloud-eu/opencloud/services/search/pkg/opensearch/internal/test"
)
func TestRequestBody(t *testing.T) {
@@ -46,10 +46,15 @@ func TestBuildSearchReq(t *testing.T) {
osu.NewTermQuery[string]("content").Value("content"),
osu.SearchBodyParams{
Highlight: &osu.BodyParamHighlight{
PreTags: []string{"<b>"},
PostTags: []string{"</b>"},
Fields: map[string]osu.BodyParamHighlight{
"content": {},
HighlightOptions: osu.HighlightOptions{
PreTags: []string{"<b>"},
PostTags: []string{"</b>"},
},
Fields: map[string]osu.HighlightOptions{
"content": {
PreTags: []string{"<strong>"},
PostTags: []string{"</strong>"},
},
},
},
},
@@ -69,7 +74,10 @@ func TestBuildSearchReq(t *testing.T) {
"pre_tags": []string{"<b>"},
"post_tags": []string{"</b>"},
"fields": map[string]any{
"content": map[string]any{},
"content": map[string]any{
"pre_tags": []string{"<strong>"},
"post_tags": []string{"</strong>"},
},
},
},
},