mirror of
https://github.com/opencloud-eu/opencloud.git
synced 2026-01-24 13:58:12 -05:00
* Add audio facet to search protobuf message * Add audio metadata to search index * Return audio facet from search if available * Store audio metadata in arbitrary metadata * Add audio facet to driveItems listings * Make tests coding style more consistent * Fix tests * Add changelog * Make valueToString code more defensive * Log status code as well
429 lines
12 KiB
Go
429 lines
12 KiB
Go
package engine
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"math"
|
|
"path"
|
|
"path/filepath"
|
|
"reflect"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/blevesearch/bleve/v2"
|
|
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
|
|
"github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
|
|
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
|
|
"github.com/blevesearch/bleve/v2/analysis/token/porter"
|
|
"github.com/blevesearch/bleve/v2/analysis/tokenizer/single"
|
|
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
|
|
"github.com/blevesearch/bleve/v2/mapping"
|
|
"github.com/blevesearch/bleve/v2/search/query"
|
|
storageProvider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1"
|
|
"google.golang.org/protobuf/types/known/timestamppb"
|
|
|
|
"github.com/cs3org/reva/v2/pkg/storagespace"
|
|
"github.com/cs3org/reva/v2/pkg/utils"
|
|
|
|
libregraph "github.com/owncloud/libre-graph-api-go"
|
|
searchMessage "github.com/owncloud/ocis/v2/protogen/gen/ocis/messages/search/v0"
|
|
searchService "github.com/owncloud/ocis/v2/protogen/gen/ocis/services/search/v0"
|
|
"github.com/owncloud/ocis/v2/services/search/pkg/content"
|
|
searchQuery "github.com/owncloud/ocis/v2/services/search/pkg/query"
|
|
)
|
|
|
|
// Bleve represents a search engine which utilizes bleve to search and store resources.
|
|
type Bleve struct {
|
|
index bleve.Index
|
|
queryCreator searchQuery.Creator[query.Query]
|
|
}
|
|
|
|
// NewBleveIndex returns a new bleve index
|
|
// given path must exist.
|
|
func NewBleveIndex(root string) (bleve.Index, error) {
|
|
destination := filepath.Join(root, "bleve")
|
|
index, err := bleve.Open(destination)
|
|
if errors.Is(bleve.ErrorIndexPathDoesNotExist, err) {
|
|
m, err := BuildBleveMapping()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
index, err = bleve.New(destination, m)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return index, nil
|
|
}
|
|
|
|
return index, err
|
|
}
|
|
|
|
// NewBleveEngine creates a new Bleve instance
|
|
func NewBleveEngine(index bleve.Index, queryCreator searchQuery.Creator[query.Query]) *Bleve {
|
|
return &Bleve{
|
|
index: index,
|
|
queryCreator: queryCreator,
|
|
}
|
|
}
|
|
|
|
// BuildBleveMapping builds a bleve index mapping which can be used for indexing
|
|
func BuildBleveMapping() (mapping.IndexMapping, error) {
|
|
nameMapping := bleve.NewTextFieldMapping()
|
|
nameMapping.Analyzer = "lowercaseKeyword"
|
|
|
|
lowercaseMapping := bleve.NewTextFieldMapping()
|
|
lowercaseMapping.IncludeInAll = false
|
|
lowercaseMapping.Analyzer = "lowercaseKeyword"
|
|
|
|
fulltextFieldMapping := bleve.NewTextFieldMapping()
|
|
fulltextFieldMapping.Analyzer = "fulltext"
|
|
fulltextFieldMapping.IncludeInAll = false
|
|
|
|
docMapping := bleve.NewDocumentMapping()
|
|
docMapping.AddFieldMappingsAt("Name", nameMapping)
|
|
docMapping.AddFieldMappingsAt("Tags", lowercaseMapping)
|
|
docMapping.AddFieldMappingsAt("Content", fulltextFieldMapping)
|
|
|
|
indexMapping := bleve.NewIndexMapping()
|
|
indexMapping.DefaultAnalyzer = keyword.Name
|
|
indexMapping.DefaultMapping = docMapping
|
|
err := indexMapping.AddCustomAnalyzer("lowercaseKeyword",
|
|
map[string]interface{}{
|
|
"type": custom.Name,
|
|
"tokenizer": single.Name,
|
|
"token_filters": []string{
|
|
lowercase.Name,
|
|
},
|
|
},
|
|
)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
err = indexMapping.AddCustomAnalyzer("fulltext",
|
|
map[string]interface{}{
|
|
"type": custom.Name,
|
|
"tokenizer": unicode.Name,
|
|
"token_filters": []string{
|
|
lowercase.Name,
|
|
porter.Name,
|
|
},
|
|
},
|
|
)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return indexMapping, nil
|
|
}
|
|
|
|
// Search executes a search request operation within the index.
|
|
// Returns a SearchIndexResponse object or an error.
|
|
func (b *Bleve) Search(ctx context.Context, sir *searchService.SearchIndexRequest) (*searchService.SearchIndexResponse, error) {
|
|
createdQuery, err := b.queryCreator.Create(sir.Query)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
q := bleve.NewConjunctionQuery(
|
|
// Skip documents that have been marked as deleted
|
|
&query.BoolFieldQuery{
|
|
Bool: false,
|
|
FieldVal: "Deleted",
|
|
},
|
|
createdQuery,
|
|
)
|
|
|
|
if sir.Ref != nil {
|
|
q.Conjuncts = append(
|
|
q.Conjuncts,
|
|
&query.TermQuery{
|
|
FieldVal: "RootID",
|
|
Term: storagespace.FormatResourceID(
|
|
storageProvider.ResourceId{
|
|
StorageId: sir.Ref.GetResourceId().GetStorageId(),
|
|
SpaceId: sir.Ref.GetResourceId().GetSpaceId(),
|
|
OpaqueId: sir.Ref.GetResourceId().GetOpaqueId(),
|
|
},
|
|
),
|
|
},
|
|
)
|
|
}
|
|
|
|
bleveReq := bleve.NewSearchRequest(q)
|
|
bleveReq.Highlight = bleve.NewHighlight()
|
|
|
|
switch {
|
|
case sir.PageSize == -1:
|
|
bleveReq.Size = math.MaxInt
|
|
case sir.PageSize == 0:
|
|
bleveReq.Size = 200
|
|
default:
|
|
bleveReq.Size = int(sir.PageSize)
|
|
}
|
|
|
|
bleveReq.Fields = []string{"*"}
|
|
res, err := b.index.Search(bleveReq)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
matches := make([]*searchMessage.Match, 0, len(res.Hits))
|
|
totalMatches := res.Total
|
|
for _, hit := range res.Hits {
|
|
if sir.Ref != nil {
|
|
hitPath := strings.TrimSuffix(getFieldValue[string](hit.Fields, "Path"), "/")
|
|
requestedPath := utils.MakeRelativePath(sir.Ref.Path)
|
|
isRoot := hitPath == requestedPath
|
|
|
|
if !isRoot && requestedPath != "." && !strings.HasPrefix(hitPath, requestedPath+"/") {
|
|
totalMatches--
|
|
continue
|
|
}
|
|
}
|
|
|
|
rootID, err := storagespace.ParseID(getFieldValue[string](hit.Fields, "RootID"))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
rID, err := storagespace.ParseID(getFieldValue[string](hit.Fields, "ID"))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
pID, _ := storagespace.ParseID(getFieldValue[string](hit.Fields, "ParentID"))
|
|
match := &searchMessage.Match{
|
|
Score: float32(hit.Score),
|
|
Entity: &searchMessage.Entity{
|
|
Ref: &searchMessage.Reference{
|
|
ResourceId: resourceIDtoSearchID(rootID),
|
|
Path: getFieldValue[string](hit.Fields, "Path"),
|
|
},
|
|
Id: resourceIDtoSearchID(rID),
|
|
Name: getFieldValue[string](hit.Fields, "Name"),
|
|
ParentId: resourceIDtoSearchID(pID),
|
|
Size: uint64(getFieldValue[float64](hit.Fields, "Size")),
|
|
Type: uint64(getFieldValue[float64](hit.Fields, "Type")),
|
|
MimeType: getFieldValue[string](hit.Fields, "MimeType"),
|
|
Deleted: getFieldValue[bool](hit.Fields, "Deleted"),
|
|
Tags: getFieldSliceValue[string](hit.Fields, "Tags"),
|
|
Highlights: getFragmentValue(hit.Fragments, "Content", 0),
|
|
Audio: getAudioValue[searchMessage.Audio](hit.Fields),
|
|
},
|
|
}
|
|
|
|
if mtime, err := time.Parse(time.RFC3339, getFieldValue[string](hit.Fields, "Mtime")); err == nil {
|
|
match.Entity.LastModifiedTime = ×tamppb.Timestamp{Seconds: mtime.Unix(), Nanos: int32(mtime.Nanosecond())}
|
|
}
|
|
|
|
matches = append(matches, match)
|
|
}
|
|
|
|
return &searchService.SearchIndexResponse{
|
|
Matches: matches,
|
|
TotalMatches: int32(totalMatches),
|
|
}, nil
|
|
}
|
|
|
|
// Upsert indexes or stores Resource data fields.
|
|
func (b *Bleve) Upsert(id string, r Resource) error {
|
|
return b.index.Index(id, r)
|
|
}
|
|
|
|
// Move updates the resource location and all of its necessary fields.
|
|
func (b *Bleve) Move(id string, parentid string, target string) error {
|
|
r, err := b.getResource(id)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
currentPath := r.Path
|
|
nextPath := utils.MakeRelativePath(target)
|
|
|
|
r, err = b.updateEntity(id, func(r *Resource) {
|
|
r.Path = nextPath
|
|
r.Name = path.Base(nextPath)
|
|
r.ParentID = parentid
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if r.Type == uint64(storageProvider.ResourceType_RESOURCE_TYPE_CONTAINER) {
|
|
q := bleve.NewConjunctionQuery(
|
|
bleve.NewQueryStringQuery("RootID:"+r.RootID),
|
|
bleve.NewQueryStringQuery("Path:"+escapeQuery(currentPath+"/*")),
|
|
)
|
|
bleveReq := bleve.NewSearchRequest(q)
|
|
bleveReq.Size = math.MaxInt
|
|
bleveReq.Fields = []string{"*"}
|
|
res, err := b.index.Search(bleveReq)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, h := range res.Hits {
|
|
_, err := b.updateEntity(h.ID, func(r *Resource) {
|
|
r.Path = strings.Replace(r.Path, currentPath, nextPath, 1)
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Delete marks the resource as deleted.
|
|
// The resource object will stay in the bleve index,
|
|
// instead of removing the resource it just marks it as deleted!
|
|
// can be undone
|
|
func (b *Bleve) Delete(id string) error {
|
|
return b.setDeleted(id, true)
|
|
}
|
|
|
|
// Restore is the counterpart to Delete.
|
|
// It restores the resource which makes it available again.
|
|
func (b *Bleve) Restore(id string) error {
|
|
return b.setDeleted(id, false)
|
|
}
|
|
|
|
// Purge removes a resource from the index, irreversible operation.
|
|
func (b *Bleve) Purge(id string) error {
|
|
return b.index.Delete(id)
|
|
}
|
|
|
|
// DocCount returns the number of resources in the index.
|
|
func (b *Bleve) DocCount() (uint64, error) {
|
|
return b.index.DocCount()
|
|
}
|
|
|
|
func (b *Bleve) getResource(id string) (*Resource, error) {
|
|
req := bleve.NewSearchRequest(bleve.NewDocIDQuery([]string{id}))
|
|
req.Fields = []string{"*"}
|
|
res, err := b.index.Search(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if res.Hits.Len() == 0 {
|
|
return nil, errors.New("entity not found")
|
|
}
|
|
|
|
fields := res.Hits[0].Fields
|
|
|
|
return &Resource{
|
|
ID: getFieldValue[string](fields, "ID"),
|
|
RootID: getFieldValue[string](fields, "RootID"),
|
|
Path: getFieldValue[string](fields, "Path"),
|
|
ParentID: getFieldValue[string](fields, "ParentID"),
|
|
Type: uint64(getFieldValue[float64](fields, "Type")),
|
|
Deleted: getFieldValue[bool](fields, "Deleted"),
|
|
Document: content.Document{
|
|
Name: getFieldValue[string](fields, "Name"),
|
|
Title: getFieldValue[string](fields, "Title"),
|
|
Size: uint64(getFieldValue[float64](fields, "Size")),
|
|
Mtime: getFieldValue[string](fields, "Mtime"),
|
|
MimeType: getFieldValue[string](fields, "MimeType"),
|
|
Content: getFieldValue[string](fields, "Content"),
|
|
Tags: getFieldSliceValue[string](fields, "Tags"),
|
|
Audio: getAudioValue[libregraph.Audio](fields),
|
|
},
|
|
}, nil
|
|
}
|
|
|
|
func newPointerOfType[T any]() *T {
|
|
t := reflect.TypeOf((*T)(nil)).Elem()
|
|
ptr := reflect.New(t).Interface()
|
|
return ptr.(*T)
|
|
}
|
|
|
|
func getFieldName(structField reflect.StructField) string {
|
|
tag := structField.Tag.Get("json")
|
|
if tag == "" {
|
|
return structField.Name
|
|
}
|
|
|
|
return strings.Split(tag, ",")[0]
|
|
}
|
|
|
|
func unmarshalInterfaceMap(out any, flatMap map[string]interface{}, prefix string) bool {
|
|
nonEmpty := false
|
|
obj := reflect.ValueOf(out).Elem()
|
|
for i := 0; i < obj.NumField(); i++ {
|
|
field := obj.Field(i)
|
|
structField := obj.Type().Field(i)
|
|
mapKey := prefix + getFieldName(structField)
|
|
|
|
if value, ok := flatMap[mapKey]; ok {
|
|
if field.Kind() == reflect.Ptr {
|
|
alloc := reflect.New(field.Type().Elem())
|
|
alloc.Elem().Set(reflect.ValueOf(value).Convert(field.Type().Elem()))
|
|
field.Set(alloc)
|
|
nonEmpty = true
|
|
}
|
|
}
|
|
}
|
|
|
|
return nonEmpty
|
|
}
|
|
|
|
func getAudioValue[T any](fields map[string]interface{}) *T {
|
|
if !strings.HasPrefix(getFieldValue[string](fields, "MimeType"), "audio/") {
|
|
return nil
|
|
}
|
|
|
|
var audio = newPointerOfType[T]()
|
|
if ok := unmarshalInterfaceMap(audio, fields, "audio."); ok {
|
|
return audio
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (b *Bleve) updateEntity(id string, mutateFunc func(r *Resource)) (*Resource, error) {
|
|
it, err := b.getResource(id)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
mutateFunc(it)
|
|
|
|
return it, b.index.Index(it.ID, it)
|
|
}
|
|
|
|
func (b *Bleve) setDeleted(id string, deleted bool) error {
|
|
it, err := b.updateEntity(id, func(r *Resource) {
|
|
r.Deleted = deleted
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if it.Type == uint64(storageProvider.ResourceType_RESOURCE_TYPE_CONTAINER) {
|
|
q := bleve.NewConjunctionQuery(
|
|
bleve.NewQueryStringQuery("RootID:"+it.RootID),
|
|
bleve.NewQueryStringQuery("Path:"+escapeQuery(it.Path+"/*")),
|
|
)
|
|
bleveReq := bleve.NewSearchRequest(q)
|
|
bleveReq.Size = math.MaxInt
|
|
bleveReq.Fields = []string{"*"}
|
|
res, err := b.index.Search(bleveReq)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, h := range res.Hits {
|
|
_, err := b.updateEntity(h.ID, func(r *Resource) {
|
|
r.Deleted = deleted
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|