Files
opencloud/services/search/pkg/engine/bleve.go
Dominik Schmidt db32fb46f9 Audio Metadata (#7490)
* Add audio facet to search protobuf message

* Add audio metadata to search index

* Return audio facet from search if available

* Store audio metadata in arbitrary metadata

* Add audio facet to driveItems listings

* Make tests coding style more consistent

* Fix tests

* Add changelog

* Make valueToString code more defensive

* Log status code as well
2023-11-06 08:56:46 +01:00

429 lines
12 KiB
Go

package engine
import (
"context"
"errors"
"math"
"path"
"path/filepath"
"reflect"
"strings"
"time"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
"github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/token/porter"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/single"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search/query"
storageProvider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1"
"google.golang.org/protobuf/types/known/timestamppb"
"github.com/cs3org/reva/v2/pkg/storagespace"
"github.com/cs3org/reva/v2/pkg/utils"
libregraph "github.com/owncloud/libre-graph-api-go"
searchMessage "github.com/owncloud/ocis/v2/protogen/gen/ocis/messages/search/v0"
searchService "github.com/owncloud/ocis/v2/protogen/gen/ocis/services/search/v0"
"github.com/owncloud/ocis/v2/services/search/pkg/content"
searchQuery "github.com/owncloud/ocis/v2/services/search/pkg/query"
)
// Bleve represents a search engine which utilizes bleve to search and store resources.
type Bleve struct {
index bleve.Index
queryCreator searchQuery.Creator[query.Query]
}
// NewBleveIndex returns a new bleve index
// given path must exist.
func NewBleveIndex(root string) (bleve.Index, error) {
destination := filepath.Join(root, "bleve")
index, err := bleve.Open(destination)
if errors.Is(bleve.ErrorIndexPathDoesNotExist, err) {
m, err := BuildBleveMapping()
if err != nil {
return nil, err
}
index, err = bleve.New(destination, m)
if err != nil {
return nil, err
}
return index, nil
}
return index, err
}
// NewBleveEngine creates a new Bleve instance
func NewBleveEngine(index bleve.Index, queryCreator searchQuery.Creator[query.Query]) *Bleve {
return &Bleve{
index: index,
queryCreator: queryCreator,
}
}
// BuildBleveMapping builds a bleve index mapping which can be used for indexing
func BuildBleveMapping() (mapping.IndexMapping, error) {
nameMapping := bleve.NewTextFieldMapping()
nameMapping.Analyzer = "lowercaseKeyword"
lowercaseMapping := bleve.NewTextFieldMapping()
lowercaseMapping.IncludeInAll = false
lowercaseMapping.Analyzer = "lowercaseKeyword"
fulltextFieldMapping := bleve.NewTextFieldMapping()
fulltextFieldMapping.Analyzer = "fulltext"
fulltextFieldMapping.IncludeInAll = false
docMapping := bleve.NewDocumentMapping()
docMapping.AddFieldMappingsAt("Name", nameMapping)
docMapping.AddFieldMappingsAt("Tags", lowercaseMapping)
docMapping.AddFieldMappingsAt("Content", fulltextFieldMapping)
indexMapping := bleve.NewIndexMapping()
indexMapping.DefaultAnalyzer = keyword.Name
indexMapping.DefaultMapping = docMapping
err := indexMapping.AddCustomAnalyzer("lowercaseKeyword",
map[string]interface{}{
"type": custom.Name,
"tokenizer": single.Name,
"token_filters": []string{
lowercase.Name,
},
},
)
if err != nil {
return nil, err
}
err = indexMapping.AddCustomAnalyzer("fulltext",
map[string]interface{}{
"type": custom.Name,
"tokenizer": unicode.Name,
"token_filters": []string{
lowercase.Name,
porter.Name,
},
},
)
if err != nil {
return nil, err
}
return indexMapping, nil
}
// Search executes a search request operation within the index.
// Returns a SearchIndexResponse object or an error.
func (b *Bleve) Search(ctx context.Context, sir *searchService.SearchIndexRequest) (*searchService.SearchIndexResponse, error) {
createdQuery, err := b.queryCreator.Create(sir.Query)
if err != nil {
return nil, err
}
q := bleve.NewConjunctionQuery(
// Skip documents that have been marked as deleted
&query.BoolFieldQuery{
Bool: false,
FieldVal: "Deleted",
},
createdQuery,
)
if sir.Ref != nil {
q.Conjuncts = append(
q.Conjuncts,
&query.TermQuery{
FieldVal: "RootID",
Term: storagespace.FormatResourceID(
storageProvider.ResourceId{
StorageId: sir.Ref.GetResourceId().GetStorageId(),
SpaceId: sir.Ref.GetResourceId().GetSpaceId(),
OpaqueId: sir.Ref.GetResourceId().GetOpaqueId(),
},
),
},
)
}
bleveReq := bleve.NewSearchRequest(q)
bleveReq.Highlight = bleve.NewHighlight()
switch {
case sir.PageSize == -1:
bleveReq.Size = math.MaxInt
case sir.PageSize == 0:
bleveReq.Size = 200
default:
bleveReq.Size = int(sir.PageSize)
}
bleveReq.Fields = []string{"*"}
res, err := b.index.Search(bleveReq)
if err != nil {
return nil, err
}
matches := make([]*searchMessage.Match, 0, len(res.Hits))
totalMatches := res.Total
for _, hit := range res.Hits {
if sir.Ref != nil {
hitPath := strings.TrimSuffix(getFieldValue[string](hit.Fields, "Path"), "/")
requestedPath := utils.MakeRelativePath(sir.Ref.Path)
isRoot := hitPath == requestedPath
if !isRoot && requestedPath != "." && !strings.HasPrefix(hitPath, requestedPath+"/") {
totalMatches--
continue
}
}
rootID, err := storagespace.ParseID(getFieldValue[string](hit.Fields, "RootID"))
if err != nil {
return nil, err
}
rID, err := storagespace.ParseID(getFieldValue[string](hit.Fields, "ID"))
if err != nil {
return nil, err
}
pID, _ := storagespace.ParseID(getFieldValue[string](hit.Fields, "ParentID"))
match := &searchMessage.Match{
Score: float32(hit.Score),
Entity: &searchMessage.Entity{
Ref: &searchMessage.Reference{
ResourceId: resourceIDtoSearchID(rootID),
Path: getFieldValue[string](hit.Fields, "Path"),
},
Id: resourceIDtoSearchID(rID),
Name: getFieldValue[string](hit.Fields, "Name"),
ParentId: resourceIDtoSearchID(pID),
Size: uint64(getFieldValue[float64](hit.Fields, "Size")),
Type: uint64(getFieldValue[float64](hit.Fields, "Type")),
MimeType: getFieldValue[string](hit.Fields, "MimeType"),
Deleted: getFieldValue[bool](hit.Fields, "Deleted"),
Tags: getFieldSliceValue[string](hit.Fields, "Tags"),
Highlights: getFragmentValue(hit.Fragments, "Content", 0),
Audio: getAudioValue[searchMessage.Audio](hit.Fields),
},
}
if mtime, err := time.Parse(time.RFC3339, getFieldValue[string](hit.Fields, "Mtime")); err == nil {
match.Entity.LastModifiedTime = &timestamppb.Timestamp{Seconds: mtime.Unix(), Nanos: int32(mtime.Nanosecond())}
}
matches = append(matches, match)
}
return &searchService.SearchIndexResponse{
Matches: matches,
TotalMatches: int32(totalMatches),
}, nil
}
// Upsert indexes or stores Resource data fields.
func (b *Bleve) Upsert(id string, r Resource) error {
return b.index.Index(id, r)
}
// Move updates the resource location and all of its necessary fields.
func (b *Bleve) Move(id string, parentid string, target string) error {
r, err := b.getResource(id)
if err != nil {
return err
}
currentPath := r.Path
nextPath := utils.MakeRelativePath(target)
r, err = b.updateEntity(id, func(r *Resource) {
r.Path = nextPath
r.Name = path.Base(nextPath)
r.ParentID = parentid
})
if err != nil {
return err
}
if r.Type == uint64(storageProvider.ResourceType_RESOURCE_TYPE_CONTAINER) {
q := bleve.NewConjunctionQuery(
bleve.NewQueryStringQuery("RootID:"+r.RootID),
bleve.NewQueryStringQuery("Path:"+escapeQuery(currentPath+"/*")),
)
bleveReq := bleve.NewSearchRequest(q)
bleveReq.Size = math.MaxInt
bleveReq.Fields = []string{"*"}
res, err := b.index.Search(bleveReq)
if err != nil {
return err
}
for _, h := range res.Hits {
_, err := b.updateEntity(h.ID, func(r *Resource) {
r.Path = strings.Replace(r.Path, currentPath, nextPath, 1)
})
if err != nil {
return err
}
}
}
return nil
}
// Delete marks the resource as deleted.
// The resource object will stay in the bleve index,
// instead of removing the resource it just marks it as deleted!
// can be undone
func (b *Bleve) Delete(id string) error {
return b.setDeleted(id, true)
}
// Restore is the counterpart to Delete.
// It restores the resource which makes it available again.
func (b *Bleve) Restore(id string) error {
return b.setDeleted(id, false)
}
// Purge removes a resource from the index, irreversible operation.
func (b *Bleve) Purge(id string) error {
return b.index.Delete(id)
}
// DocCount returns the number of resources in the index.
func (b *Bleve) DocCount() (uint64, error) {
return b.index.DocCount()
}
func (b *Bleve) getResource(id string) (*Resource, error) {
req := bleve.NewSearchRequest(bleve.NewDocIDQuery([]string{id}))
req.Fields = []string{"*"}
res, err := b.index.Search(req)
if err != nil {
return nil, err
}
if res.Hits.Len() == 0 {
return nil, errors.New("entity not found")
}
fields := res.Hits[0].Fields
return &Resource{
ID: getFieldValue[string](fields, "ID"),
RootID: getFieldValue[string](fields, "RootID"),
Path: getFieldValue[string](fields, "Path"),
ParentID: getFieldValue[string](fields, "ParentID"),
Type: uint64(getFieldValue[float64](fields, "Type")),
Deleted: getFieldValue[bool](fields, "Deleted"),
Document: content.Document{
Name: getFieldValue[string](fields, "Name"),
Title: getFieldValue[string](fields, "Title"),
Size: uint64(getFieldValue[float64](fields, "Size")),
Mtime: getFieldValue[string](fields, "Mtime"),
MimeType: getFieldValue[string](fields, "MimeType"),
Content: getFieldValue[string](fields, "Content"),
Tags: getFieldSliceValue[string](fields, "Tags"),
Audio: getAudioValue[libregraph.Audio](fields),
},
}, nil
}
func newPointerOfType[T any]() *T {
t := reflect.TypeOf((*T)(nil)).Elem()
ptr := reflect.New(t).Interface()
return ptr.(*T)
}
func getFieldName(structField reflect.StructField) string {
tag := structField.Tag.Get("json")
if tag == "" {
return structField.Name
}
return strings.Split(tag, ",")[0]
}
func unmarshalInterfaceMap(out any, flatMap map[string]interface{}, prefix string) bool {
nonEmpty := false
obj := reflect.ValueOf(out).Elem()
for i := 0; i < obj.NumField(); i++ {
field := obj.Field(i)
structField := obj.Type().Field(i)
mapKey := prefix + getFieldName(structField)
if value, ok := flatMap[mapKey]; ok {
if field.Kind() == reflect.Ptr {
alloc := reflect.New(field.Type().Elem())
alloc.Elem().Set(reflect.ValueOf(value).Convert(field.Type().Elem()))
field.Set(alloc)
nonEmpty = true
}
}
}
return nonEmpty
}
func getAudioValue[T any](fields map[string]interface{}) *T {
if !strings.HasPrefix(getFieldValue[string](fields, "MimeType"), "audio/") {
return nil
}
var audio = newPointerOfType[T]()
if ok := unmarshalInterfaceMap(audio, fields, "audio."); ok {
return audio
}
return nil
}
func (b *Bleve) updateEntity(id string, mutateFunc func(r *Resource)) (*Resource, error) {
it, err := b.getResource(id)
if err != nil {
return nil, err
}
mutateFunc(it)
return it, b.index.Index(it.ID, it)
}
func (b *Bleve) setDeleted(id string, deleted bool) error {
it, err := b.updateEntity(id, func(r *Resource) {
r.Deleted = deleted
})
if err != nil {
return err
}
if it.Type == uint64(storageProvider.ResourceType_RESOURCE_TYPE_CONTAINER) {
q := bleve.NewConjunctionQuery(
bleve.NewQueryStringQuery("RootID:"+it.RootID),
bleve.NewQueryStringQuery("Path:"+escapeQuery(it.Path+"/*")),
)
bleveReq := bleve.NewSearchRequest(q)
bleveReq.Size = math.MaxInt
bleveReq.Fields = []string{"*"}
res, err := b.index.Search(bleveReq)
if err != nil {
return err
}
for _, h := range res.Hits {
_, err := b.updateEntity(h.ID, func(r *Resource) {
r.Deleted = deleted
})
if err != nil {
return err
}
}
}
return nil
}