build(deps): bump github.com/blevesearch/bleve/v2 from 2.5.3 to 2.5.4

Bumps [github.com/blevesearch/bleve/v2](https://github.com/blevesearch/bleve) from 2.5.3 to 2.5.4.
- [Release notes](https://github.com/blevesearch/bleve/releases)
- [Commits](https://github.com/blevesearch/bleve/compare/v2.5.3...v2.5.4)

---
updated-dependencies:
- dependency-name: github.com/blevesearch/bleve/v2
  dependency-version: 2.5.4
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
This commit is contained in:
dependabot[bot]
2025-10-23 14:17:50 +00:00
committed by Ralf Haferkamp
parent 0b4c9becfb
commit fb94f34a1f
62 changed files with 2640 additions and 714 deletions

8
go.mod
View File

@@ -11,7 +11,7 @@ require (
github.com/Nerzal/gocloak/v13 v13.9.0
github.com/bbalet/stopwords v1.0.0
github.com/beevik/etree v1.6.0
github.com/blevesearch/bleve/v2 v2.5.3
github.com/blevesearch/bleve/v2 v2.5.4
github.com/cenkalti/backoff v2.2.1+incompatible
github.com/coreos/go-oidc/v3 v3.16.0
github.com/cs3org/go-cs3apis v0.0.0-20250908152307-4ca807afe54e
@@ -140,13 +140,13 @@ require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/bitly/go-simplejson v0.5.0 // indirect
github.com/bits-and-blooms/bitset v1.22.0 // indirect
github.com/blevesearch/bleve_index_api v1.2.8 // indirect
github.com/blevesearch/bleve_index_api v1.2.10 // indirect
github.com/blevesearch/geo v0.2.4 // indirect
github.com/blevesearch/go-faiss v1.0.25 // indirect
github.com/blevesearch/go-porterstemmer v1.0.3 // indirect
github.com/blevesearch/gtreap v0.1.1 // indirect
github.com/blevesearch/mmap-go v1.0.4 // indirect
github.com/blevesearch/scorch_segment_api/v2 v2.3.10 // indirect
github.com/blevesearch/scorch_segment_api/v2 v2.3.12 // indirect
github.com/blevesearch/segment v0.9.1 // indirect
github.com/blevesearch/snowballstem v0.9.0 // indirect
github.com/blevesearch/upsidedown_store_api v1.0.2 // indirect
@@ -156,7 +156,7 @@ require (
github.com/blevesearch/zapx/v13 v13.4.2 // indirect
github.com/blevesearch/zapx/v14 v14.4.2 // indirect
github.com/blevesearch/zapx/v15 v15.4.2 // indirect
github.com/blevesearch/zapx/v16 v16.2.4 // indirect
github.com/blevesearch/zapx/v16 v16.2.6 // indirect
github.com/bluele/gcache v0.0.2 // indirect
github.com/bombsimon/logrusr/v3 v3.1.0 // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect

16
go.sum
View File

@@ -151,10 +151,10 @@ github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6
github.com/bits-and-blooms/bitset v1.22.0 h1:Tquv9S8+SGaS3EhyA+up3FXzmkhxPGjQQCkcs2uw7w4=
github.com/bits-and-blooms/bitset v1.22.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84=
github.com/blevesearch/bleve/v2 v2.5.3 h1:9l1xtKaETv64SZc1jc4Sy0N804laSa/LeMbYddq1YEM=
github.com/blevesearch/bleve/v2 v2.5.3/go.mod h1:Z/e8aWjiq8HeX+nW8qROSxiE0830yQA071dwR3yoMzw=
github.com/blevesearch/bleve_index_api v1.2.8 h1:Y98Pu5/MdlkRyLM0qDHostYo7i+Vv1cDNhqTeR4Sy6Y=
github.com/blevesearch/bleve_index_api v1.2.8/go.mod h1:rKQDl4u51uwafZxFrPD1R7xFOwKnzZW7s/LSeK4lgo0=
github.com/blevesearch/bleve/v2 v2.5.4 h1:1iur8e+PHsxtncV2xIVuqlQme/V8guEDO2uV6Wll3lQ=
github.com/blevesearch/bleve/v2 v2.5.4/go.mod h1:yB4PnV4N2q5rTEpB2ndG8N2ISexBQEFIYgwx4ztfvoo=
github.com/blevesearch/bleve_index_api v1.2.10 h1:FMFmZCmTX6PdoLLvwUnKF2RsmILFFwO3h0WPevXY9fE=
github.com/blevesearch/bleve_index_api v1.2.10/go.mod h1:rKQDl4u51uwafZxFrPD1R7xFOwKnzZW7s/LSeK4lgo0=
github.com/blevesearch/geo v0.2.4 h1:ECIGQhw+QALCZaDcogRTNSJYQXRtC8/m8IKiA706cqk=
github.com/blevesearch/geo v0.2.4/go.mod h1:K56Q33AzXt2YExVHGObtmRSFYZKYGv0JEN5mdacJJR8=
github.com/blevesearch/go-faiss v1.0.25 h1:lel1rkOUGbT1CJ0YgzKwC7k+XH0XVBHnCVWahdCXk4U=
@@ -165,8 +165,8 @@ github.com/blevesearch/gtreap v0.1.1 h1:2JWigFrzDMR+42WGIN/V2p0cUvn4UP3C4Q5nmaZG
github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgYICSZ3w0tYk=
github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc=
github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs=
github.com/blevesearch/scorch_segment_api/v2 v2.3.10 h1:Yqk0XD1mE0fDZAJXTjawJ8If/85JxnLd8v5vG/jWE/s=
github.com/blevesearch/scorch_segment_api/v2 v2.3.10/go.mod h1:Z3e6ChN3qyN35yaQpl00MfI5s8AxUJbpTR/DL8QOQ+8=
github.com/blevesearch/scorch_segment_api/v2 v2.3.12 h1:GGZc2qwbyRBwtckPPkHkLyXw64mmsLJxdturBI1cM+c=
github.com/blevesearch/scorch_segment_api/v2 v2.3.12/go.mod h1:JBRGAneqgLSI2+jCNjtwMqp2B7EBF3/VUzgDPIU33MM=
github.com/blevesearch/segment v0.9.1 h1:+dThDy+Lvgj5JMxhmOVlgFfkUtZV2kw49xax4+jTfSU=
github.com/blevesearch/segment v0.9.1/go.mod h1:zN21iLm7+GnBHWTao9I+Au/7MBiL8pPFtJBJTsk6kQw=
github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD/iWeNXm8s=
@@ -185,8 +185,8 @@ github.com/blevesearch/zapx/v14 v14.4.2 h1:2SGHakVKd+TrtEqpfeq8X+So5PShQ5nW6GNxT
github.com/blevesearch/zapx/v14 v14.4.2/go.mod h1:rz0XNb/OZSMjNorufDGSpFpjoFKhXmppH9Hi7a877D8=
github.com/blevesearch/zapx/v15 v15.4.2 h1:sWxpDE0QQOTjyxYbAVjt3+0ieu8NCE0fDRaFxEsp31k=
github.com/blevesearch/zapx/v15 v15.4.2/go.mod h1:1pssev/59FsuWcgSnTa0OeEpOzmhtmr/0/11H0Z8+Nw=
github.com/blevesearch/zapx/v16 v16.2.4 h1:tGgfvleXTAkwsD5mEzgM3zCS/7pgocTCnO1oyAUjlww=
github.com/blevesearch/zapx/v16 v16.2.4/go.mod h1:Rti/REtuuMmzwsI8/C/qIzRaEoSK/wiFYw5e5ctUKKs=
github.com/blevesearch/zapx/v16 v16.2.6 h1:OHuUl2GhM+FpBq9RwNsJ4k/QodqbMMHoQEgn/IHYpu8=
github.com/blevesearch/zapx/v16 v16.2.6/go.mod h1:cuAPB+YoIyRngNhno1S1GPr9SfMk+x/SgAHBLXSIq3k=
github.com/bluele/gcache v0.0.2 h1:WcbfdXICg7G/DGBh1PFfcirkWOQV+v077yF1pSy3DGw=
github.com/bluele/gcache v0.0.2/go.mod h1:m15KV+ECjptwSPxKhOhQoAFQVtUFjTVkc3H8o0t/fp0=
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY=

View File

@@ -4,7 +4,6 @@
[![Coverage Status](https://coveralls.io/repos/github/blevesearch/bleve/badge.svg?branch=master)](https://coveralls.io/github/blevesearch/bleve?branch=master)
[![Go Reference](https://pkg.go.dev/badge/github.com/blevesearch/bleve/v2.svg)](https://pkg.go.dev/github.com/blevesearch/bleve/v2)
[![Join the chat](https://badges.gitter.im/join_chat.svg)](https://app.gitter.im/#/room/#blevesearch_bleve:gitter.im)
[![codebeat](https://codebeat.co/badges/38a7cbc9-9cf5-41c0-a315-0746178230f4)](https://codebeat.co/projects/github-com-blevesearch-bleve)
[![Go Report Card](https://goreportcard.com/badge/github.com/blevesearch/bleve/v2)](https://goreportcard.com/report/github.com/blevesearch/bleve/v2)
[![Sourcegraph](https://sourcegraph.com/github.com/blevesearch/bleve/-/badge.svg)](https://sourcegraph.com/github.com/blevesearch/bleve?badge)
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
@@ -27,6 +26,8 @@ A modern indexing + search library in GO
* [synonym search](https://github.com/blevesearch/bleve/blob/master/docs/synonyms.md)
* [tf-idf](https://github.com/blevesearch/bleve/blob/master/docs/scoring.md#tf-idf) / [bm25](https://github.com/blevesearch/bleve/blob/master/docs/scoring.md#bm25) scoring models
* Hybrid search: exact + semantic
* Supports [RRF (Reciprocal Rank Fusion) and RSF (Relative Score Fusion)](docs/score_fusion.md)
* [Result pagination](https://github.com/blevesearch/bleve/blob/master/docs/pagination.md)
* Query time boosting
* Search result match highlighting with document fragments
* Aggregations/faceting support:

View File

@@ -68,7 +68,7 @@ func newBuilder(path string, mapping mapping.IndexMapping, config map[string]int
return nil, err
}
config["internal"] = map[string][]byte{
string(mappingInternalKey): mappingBytes,
string(util.MappingInternalKey): mappingBytes,
}
// do not use real config, as these are options for the builder,

View File

@@ -0,0 +1,26 @@
// Copyright (c) 2025 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fusion
import (
"github.com/blevesearch/bleve/v2/search"
)
type FusionResult struct {
Hits search.DocumentMatchCollection
Total uint64
MaxScore float64
}

131
vendor/github.com/blevesearch/bleve/v2/fusion/rrf.go generated vendored Normal file
View File

@@ -0,0 +1,131 @@
// Copyright (c) 2025 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fusion
import (
"fmt"
"sort"
"github.com/blevesearch/bleve/v2/search"
)
func formatRRFMessage(weight float64, rank int, rankConstant int) string {
return fmt.Sprintf("rrf score (weight=%.3f, rank=%d, rank_constant=%d), normalized score of", weight, rank, rankConstant)
}
// ReciprocalRankFusion performs a reciprocal rank fusion on the search results.
func ReciprocalRankFusion(hits search.DocumentMatchCollection, weights []float64, rankConstant int, windowSize int, numKNNQueries int, explain bool) FusionResult {
if len(hits) == 0 {
return FusionResult{
Hits: hits,
Total: 0,
MaxScore: 0.0,
}
}
// Create a map of document ID to a slice of ranks.
// The first element of the slice is the rank from the FTS search,
// and the subsequent elements are the ranks from the KNN searches.
docRanks := make(map[string][]int)
// Pre-assign rank lists to each candidate document
for _, hit := range hits {
docRanks[hit.ID] = make([]int, numKNNQueries+1)
}
// Only a max of `window_size` elements need to be counted for. Stop
// calculating rank once this threshold is hit.
sort.Slice(hits, func(a, b int) bool {
return scoreSortFunc()(hits[a], hits[b]) < 0
})
// Only consider top windowSize docs for rescoring
for i := range min(windowSize, len(hits)) {
if hits[i].Score != 0.0 {
// Skip if Score is 0, since that means the document was not
// found as part of FTS, and only in KNN.
docRanks[hits[i].ID][0] = i + 1
}
}
// Allocate knnDocs and reuse it within the loop
knnDocs := make([]*search.DocumentMatch, 0, len(hits))
// For each KNN query, rank the documents based on their KNN score.
for i := range numKNNQueries {
knnDocs = knnDocs[:0]
for _, hit := range hits {
if _, ok := hit.ScoreBreakdown[i]; ok {
knnDocs = append(knnDocs, hit)
}
}
// Sort the documents based on their score for this KNN query.
sort.Slice(knnDocs, func(a, b int) bool {
return scoreBreakdownSortFunc(i)(knnDocs[a], knnDocs[b]) < 0
})
// Update the ranks of the documents in the docRanks map.
// Only consider top windowSize docs for rescoring.
for j := range min(windowSize, len(knnDocs)) {
docRanks[knnDocs[j].ID][i+1] = j + 1
}
}
// Calculate the RRF score for each document.
var maxScore float64
for _, hit := range hits {
var rrfScore float64
var explChildren []*search.Explanation
if explain {
explChildren = make([]*search.Explanation, 0, numKNNQueries+1)
}
for i, rank := range docRanks[hit.ID] {
if rank > 0 {
partialRrfScore := weights[i] * 1.0 / float64(rankConstant+rank)
if explain {
expl := getFusionExplAt(
hit,
i,
partialRrfScore,
formatRRFMessage(weights[i], rank, rankConstant),
)
explChildren = append(explChildren, expl)
}
rrfScore += partialRrfScore
}
}
hit.Score = rrfScore
hit.ScoreBreakdown = nil
if rrfScore > maxScore {
maxScore = rrfScore
}
if explain {
finalizeFusionExpl(hit, explChildren)
}
}
sort.Sort(hits)
if len(hits) > windowSize {
hits = hits[:windowSize]
}
return FusionResult{
Hits: hits,
Total: uint64(len(hits)),
MaxScore: maxScore,
}
}

162
vendor/github.com/blevesearch/bleve/v2/fusion/rsf.go generated vendored Normal file
View File

@@ -0,0 +1,162 @@
// Copyright (c) 2025 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fusion
import (
"fmt"
"sort"
"github.com/blevesearch/bleve/v2/search"
)
func formatRSFMessage(weight float64, normalizedScore float64, minScore float64, maxScore float64) string {
return fmt.Sprintf("rsf score (weight=%.3f, normalized=%.6f, min=%.6f, max=%.6f), normalized score of",
weight, normalizedScore, minScore, maxScore)
}
// RelativeScoreFusion normalizes scores based on min/max values for FTS and each KNN query, then applies weights.
func RelativeScoreFusion(hits search.DocumentMatchCollection, weights []float64, windowSize int, numKNNQueries int, explain bool) FusionResult {
if len(hits) == 0 {
return FusionResult{
Hits: hits,
Total: 0,
MaxScore: 0.0,
}
}
rsfScores := make(map[string]float64)
// contains the docs under consideration for scoring.
// Reused for fts and knn hits
scoringDocs := make([]*search.DocumentMatch, 0, len(hits))
var explMap map[string][]*search.Explanation
if explain {
explMap = make(map[string][]*search.Explanation)
}
// remove non-fts hits
for _, hit := range hits {
if hit.Score != 0.0 {
scoringDocs = append(scoringDocs, hit)
}
}
// sort hits by fts score
sort.Slice(scoringDocs, func(a, b int) bool {
return scoreSortFunc()(scoringDocs[a], scoringDocs[b]) < 0
})
// Reslice to correct size
if len(scoringDocs) > windowSize {
scoringDocs = scoringDocs[:windowSize]
}
var min, max float64
if len(scoringDocs) > 0 {
min, max = scoringDocs[len(scoringDocs)-1].Score, scoringDocs[0].Score
}
for _, hit := range scoringDocs {
var tempRsfScore float64
if max > min {
tempRsfScore = (hit.Score - min) / (max - min)
} else {
tempRsfScore = 1.0
}
if explain {
// create and replace new explanation
expl := getFusionExplAt(
hit,
0,
tempRsfScore,
formatRSFMessage(weights[0], tempRsfScore, min, max),
)
explMap[hit.ID] = append(explMap[hit.ID], expl)
}
rsfScores[hit.ID] = weights[0] * tempRsfScore
}
for i := range numKNNQueries {
scoringDocs = scoringDocs[:0]
for _, hit := range hits {
if _, exists := hit.ScoreBreakdown[i]; exists {
scoringDocs = append(scoringDocs, hit)
}
}
sort.Slice(scoringDocs, func(a, b int) bool {
return scoreBreakdownSortFunc(i)(scoringDocs[a], scoringDocs[b]) < 0
})
if len(scoringDocs) > windowSize {
scoringDocs = scoringDocs[:windowSize]
}
if len(scoringDocs) > 0 {
min, max = scoringDocs[len(scoringDocs)-1].ScoreBreakdown[i], scoringDocs[0].ScoreBreakdown[i]
} else {
min, max = 0.0, 0.0
}
for _, hit := range scoringDocs {
var tempRsfScore float64
if max > min {
tempRsfScore = (hit.ScoreBreakdown[i] - min) / (max - min)
} else {
tempRsfScore = 1.0
}
if explain {
expl := getFusionExplAt(
hit,
i+1,
tempRsfScore,
formatRSFMessage(weights[i+1], tempRsfScore, min, max),
)
explMap[hit.ID] = append(explMap[hit.ID], expl)
}
rsfScores[hit.ID] += weights[i+1] * tempRsfScore
}
}
var maxScore float64
for _, hit := range hits {
if rsfScore, exists := rsfScores[hit.ID]; exists {
hit.Score = rsfScore
if rsfScore > maxScore {
maxScore = rsfScore
}
if explain {
finalizeFusionExpl(hit, explMap[hit.ID])
}
} else {
hit.Score = 0.0
}
hit.ScoreBreakdown = nil
}
sort.Sort(hits)
if len(hits) > windowSize {
hits = hits[:windowSize]
}
return FusionResult{
Hits: hits,
Total: uint64(len(hits)),
MaxScore: maxScore,
}
}

96
vendor/github.com/blevesearch/bleve/v2/fusion/util.go generated vendored Normal file
View File

@@ -0,0 +1,96 @@
// Copyright (c) 2025 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fusion
import (
"github.com/blevesearch/bleve/v2/search"
)
// scoreBreakdownSortFunc returns a comparison function for sorting DocumentMatch objects
// by their ScoreBreakdown at the specified index in descending order.
// In case of ties, documents with lower HitNumber (earlier hits) are preferred.
// If either document is missing the ScoreBreakdown for the specified index,
// it's treated as having a score of 0.0.
func scoreBreakdownSortFunc(idx int) func(i, j *search.DocumentMatch) int {
return func(i, j *search.DocumentMatch) int {
// Safely extract scores, defaulting to 0.0 if missing
iScore := 0.0
jScore := 0.0
if i.ScoreBreakdown != nil {
if score, ok := i.ScoreBreakdown[idx]; ok {
iScore = score
}
}
if j.ScoreBreakdown != nil {
if score, ok := j.ScoreBreakdown[idx]; ok {
jScore = score
}
}
// Sort by score in descending order (higher scores first)
if iScore > jScore {
return -1
} else if iScore < jScore {
return 1
}
// Break ties by HitNumber in ascending order (lower HitNumber wins)
if i.HitNumber < j.HitNumber {
return -1
} else if i.HitNumber > j.HitNumber {
return 1
}
return 0 // Equal scores and HitNumbers
}
}
func scoreSortFunc() func(i, j *search.DocumentMatch) int {
return func(i, j *search.DocumentMatch) int {
// Sort by score in descending order
if i.Score > j.Score {
return -1
} else if i.Score < j.Score {
return 1
}
// Break ties by HitNumber
if i.HitNumber < j.HitNumber {
return -1
} else if i.HitNumber > j.HitNumber {
return 1
}
return 0
}
}
func getFusionExplAt(hit *search.DocumentMatch, i int, value float64, message string) *search.Explanation {
return &search.Explanation{
Value: value,
Message: message,
Children: []*search.Explanation{hit.Expl.Children[i]},
}
}
func finalizeFusionExpl(hit *search.DocumentMatch, explChildren []*search.Explanation) {
hit.Expl.Children = explChildren
hit.Expl.Value = hit.Score
hit.Expl.Message = "sum of"
}

View File

@@ -308,5 +308,5 @@ First, all of this geo code is a Go adaptation of the [Lucene 5.3.2 sandbox geo
- LineStrings and MultiLineStrings may only contain Points and MultiPoints.
- Polygons or MultiPolygons intersecting Polygons and MultiPolygons may return arbitrary results when the overlap is only an edge or a vertex.
- Circles containing polygon will return a false positive result if all of the vertices of the polygon are within the circle, but the orientation of those points are clock-wise.
- The edges of an Envelope follows the latitude and logitude lines instead of the shortest path on a globe.
- The edges of an Envelope follows the latitude and longitude lines instead of the shortest path on a globe.
- Envelope intersecting queries with LineStrings, MultiLineStrings, Polygons and MultiPolygons implicitly converts the Envelope into a Polygon which changes the curvature of the edges causing inaccurate results for few edge cases.

View File

@@ -114,7 +114,7 @@ func DegreesToRadians(d float64) float64 {
return d * degreesToRadian
}
// RadiansToDegrees converts an angle in radians to degress
// RadiansToDegrees converts an angle in radians to degrees
func RadiansToDegrees(r float64) float64 {
return r * radiansToDegrees
}

View File

@@ -83,7 +83,7 @@ func ParseDistanceUnit(u string) (float64, error) {
}
// Haversin computes the distance between two points.
// This implemenation uses the sloppy math implemenations which trade off
// This implementation uses the sloppy math implementations which trade off
// accuracy for performance. The distance returned is in kilometers.
func Haversin(lon1, lat1, lon2, lat2 float64) float64 {
x1 := lat1 * degreesToRadian

View File

@@ -149,7 +149,7 @@ func (b *Batch) String() string {
}
// Reset returns a Batch to the empty state so that it can
// be re-used in the future.
// be reused in the future.
func (b *Batch) Reset() {
b.internal.Reset()
b.lastDocSize = 0
@@ -325,6 +325,8 @@ func Open(path string) (Index, error) {
// The mapping used when it was created will be used for all Index/Search operations.
// The provided runtimeConfig can override settings
// persisted when the kvstore was created.
// If runtimeConfig has updated mapping, then an index update is attempted
// Throws an error without any changes to the index if an unupdatable mapping is provided
func OpenUsing(path string, runtimeConfig map[string]interface{}) (Index, error) {
return openIndexUsing(path, runtimeConfig)
}

View File

@@ -293,7 +293,7 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
newIndexSnapshot.segment[i] = newSegmentSnapshot
delete(persist.persisted, segmentSnapshot.id)
// update items persisted incase of a new segment snapshot
// update items persisted in case of a new segment snapshot
atomic.AddUint64(&s.stats.TotPersistedItems, newSegmentSnapshot.Count())
atomic.AddUint64(&s.stats.TotPersistedSegments, 1)
fileSegments++

View File

@@ -295,7 +295,7 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
if len(bestRoster) == 0 {
return rv, nil
}
// create tasks with valid merges - i.e. there should be atleast 2 non-empty segments
// create tasks with valid merges - i.e. there should be at least 2 non-empty segments
if len(bestRoster) > 1 {
rv.Tasks = append(rv.Tasks, &MergeTask{Segments: bestRoster})
}

View File

@@ -79,6 +79,12 @@ func (o *OptimizeVR) Finish() error {
wg.Done()
}()
for field, vrs := range o.vrs {
// Early exit if the field is supposed to be completely deleted or
// if it's index data has been deleted
if info, ok := o.snapshot.updatedFields[field]; ok && (info.Deleted || info.Index) {
continue
}
vecIndex, err := segment.InterpretVectorIndex(field,
o.requiresFiltering, origSeg.deleted)
if err != nil {
@@ -185,7 +191,7 @@ func (s *IndexSnapshotVectorReader) VectorOptimize(ctx context.Context,
err := cbF(sumVectorIndexSize)
if err != nil {
// it's important to invoke the end callback at this point since
// if the earlier searchers of this optimze struct were successful
// if the earlier searchers of this optimize struct were successful
// the cost corresponding to it would be incremented and if the
// current searcher fails the check then we end up erroring out
// the overall optimized searcher creation, the cost needs to be

View File

@@ -386,7 +386,7 @@ type flushable struct {
totDocs uint64
}
// number workers which parallely perform an in-memory merge of the segments
// number workers which parallelly perform an in-memory merge of the segments
// followed by a flush operation.
var DefaultNumPersisterWorkers = 1
@@ -395,7 +395,7 @@ var DefaultNumPersisterWorkers = 1
var DefaultMaxSizeInMemoryMergePerWorker = 0
func legacyFlushBehaviour(maxSizeInMemoryMergePerWorker, numPersisterWorkers int) bool {
// DefaultMaxSizeInMemoryMergePerWorker = 0 is a special value to preserve the leagcy
// DefaultMaxSizeInMemoryMergePerWorker = 0 is a special value to preserve the legacy
// one-shot in-memory merge + flush behaviour.
return maxSizeInMemoryMergePerWorker == 0 && numPersisterWorkers == 1
}
@@ -608,7 +608,7 @@ func persistToDirectory(seg segment.UnpersistedSegment, d index.Directory,
func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string,
segPlugin SegmentPlugin, exclude map[uint64]struct{}, d index.Directory) (
[]string, map[uint64]string, error) {
snapshotsBucket, err := tx.CreateBucketIfNotExists(boltSnapshotsBucket)
snapshotsBucket, err := tx.CreateBucketIfNotExists(util.BoltSnapshotsBucket)
if err != nil {
return nil, nil, err
}
@@ -619,17 +619,17 @@ func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string,
}
// persist meta values
metaBucket, err := snapshotBucket.CreateBucketIfNotExists(boltMetaDataKey)
metaBucket, err := snapshotBucket.CreateBucketIfNotExists(util.BoltMetaDataKey)
if err != nil {
return nil, nil, err
}
err = metaBucket.Put(boltMetaDataSegmentTypeKey, []byte(segPlugin.Type()))
err = metaBucket.Put(util.BoltMetaDataSegmentTypeKey, []byte(segPlugin.Type()))
if err != nil {
return nil, nil, err
}
buf := make([]byte, binary.MaxVarintLen32)
binary.BigEndian.PutUint32(buf, segPlugin.Version())
err = metaBucket.Put(boltMetaDataSegmentVersionKey, buf)
err = metaBucket.Put(util.BoltMetaDataSegmentVersionKey, buf)
if err != nil {
return nil, nil, err
}
@@ -643,13 +643,13 @@ func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string,
if err != nil {
return nil, nil, err
}
err = metaBucket.Put(boltMetaDataTimeStamp, timeStampBinary)
err = metaBucket.Put(util.BoltMetaDataTimeStamp, timeStampBinary)
if err != nil {
return nil, nil, err
}
// persist internal values
internalBucket, err := snapshotBucket.CreateBucketIfNotExists(boltInternalKey)
internalBucket, err := snapshotBucket.CreateBucketIfNotExists(util.BoltInternalKey)
if err != nil {
return nil, nil, err
}
@@ -665,7 +665,7 @@ func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string,
val := make([]byte, 8)
bytesWritten := atomic.LoadUint64(&snapshot.parent.stats.TotBytesWrittenAtIndexTime)
binary.LittleEndian.PutUint64(val, bytesWritten)
err = internalBucket.Put(TotBytesWrittenKey, val)
err = internalBucket.Put(util.TotBytesWrittenKey, val)
if err != nil {
return nil, nil, err
}
@@ -689,7 +689,7 @@ func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string,
return nil, nil, fmt.Errorf("segment: %s copy err: %v", segPath, err)
}
filename := filepath.Base(segPath)
err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
err = snapshotSegmentBucket.Put(util.BoltPathKey, []byte(filename))
if err != nil {
return nil, nil, err
}
@@ -705,7 +705,7 @@ func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string,
return nil, nil, fmt.Errorf("segment: %s persist err: %v", path, err)
}
newSegmentPaths[segmentSnapshot.id] = path
err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
err = snapshotSegmentBucket.Put(util.BoltPathKey, []byte(filename))
if err != nil {
return nil, nil, err
}
@@ -721,7 +721,7 @@ func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string,
if err != nil {
return nil, nil, fmt.Errorf("error persisting roaring bytes: %v", err)
}
err = snapshotSegmentBucket.Put(boltDeletedKey, roaringBuf.Bytes())
err = snapshotSegmentBucket.Put(util.BoltDeletedKey, roaringBuf.Bytes())
if err != nil {
return nil, nil, err
}
@@ -733,7 +733,19 @@ func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string,
if err != nil {
return nil, nil, err
}
err = snapshotSegmentBucket.Put(boltStatsKey, b)
err = snapshotSegmentBucket.Put(util.BoltStatsKey, b)
if err != nil {
return nil, nil, err
}
}
// store updated field info
if segmentSnapshot.updatedFields != nil {
b, err := json.Marshal(segmentSnapshot.updatedFields)
if err != nil {
return nil, nil, err
}
err = snapshotSegmentBucket.Put(util.BoltUpdatedFieldsKey, b)
if err != nil {
return nil, nil, err
}
@@ -832,22 +844,9 @@ func zapFileName(epoch uint64) string {
// bolt snapshot code
var (
boltSnapshotsBucket = []byte{'s'}
boltPathKey = []byte{'p'}
boltDeletedKey = []byte{'d'}
boltInternalKey = []byte{'i'}
boltMetaDataKey = []byte{'m'}
boltMetaDataSegmentTypeKey = []byte("type")
boltMetaDataSegmentVersionKey = []byte("version")
boltMetaDataTimeStamp = []byte("timeStamp")
boltStatsKey = []byte("stats")
TotBytesWrittenKey = []byte("TotBytesWritten")
)
func (s *Scorch) loadFromBolt() error {
err := s.rootBolt.View(func(tx *bolt.Tx) error {
snapshots := tx.Bucket(boltSnapshotsBucket)
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
if snapshots == nil {
return nil
}
@@ -912,7 +911,7 @@ func (s *Scorch) loadFromBolt() error {
// NOTE: this is currently ONLY intended to be used by the command-line tool
func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) {
err = s.rootBolt.View(func(tx *bolt.Tx) error {
snapshots := tx.Bucket(boltSnapshotsBucket)
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
if snapshots == nil {
return nil
}
@@ -940,14 +939,14 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
// first we look for the meta-data bucket, this will tell us
// which segment type/version was used for this snapshot
// all operations for this scorch will use this type/version
metaBucket := snapshot.Bucket(boltMetaDataKey)
metaBucket := snapshot.Bucket(util.BoltMetaDataKey)
if metaBucket == nil {
_ = rv.DecRef()
return nil, fmt.Errorf("meta-data bucket missing")
}
segmentType := string(metaBucket.Get(boltMetaDataSegmentTypeKey))
segmentType := string(metaBucket.Get(util.BoltMetaDataSegmentTypeKey))
segmentVersion := binary.BigEndian.Uint32(
metaBucket.Get(boltMetaDataSegmentVersionKey))
metaBucket.Get(util.BoltMetaDataSegmentVersionKey))
err := s.loadSegmentPlugin(segmentType, segmentVersion)
if err != nil {
_ = rv.DecRef()
@@ -957,7 +956,7 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
var running uint64
c := snapshot.Cursor()
for k, _ := c.First(); k != nil; k, _ = c.Next() {
if k[0] == boltInternalKey[0] {
if k[0] == util.BoltInternalKey[0] {
internalBucket := snapshot.Bucket(k)
if internalBucket == nil {
_ = rv.DecRef()
@@ -972,11 +971,11 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
_ = rv.DecRef()
return nil, err
}
} else if k[0] != boltMetaDataKey[0] {
} else if k[0] != util.BoltMetaDataKey[0] {
segmentBucket := snapshot.Bucket(k)
if segmentBucket == nil {
_ = rv.DecRef()
return nil, fmt.Errorf("segment key, but bucket missing % x", k)
return nil, fmt.Errorf("segment key, but bucket missing %x", k)
}
segmentSnapshot, err := s.loadSegment(segmentBucket)
if err != nil {
@@ -990,6 +989,10 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
}
rv.segment = append(rv.segment, segmentSnapshot)
rv.offsets = append(rv.offsets, running)
// Merge all segment level updated field info for use during queries
if segmentSnapshot.updatedFields != nil {
rv.MergeUpdateFieldsInfo(segmentSnapshot.updatedFields)
}
running += segmentSnapshot.segment.Count()
}
}
@@ -997,46 +1000,59 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
}
func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, error) {
pathBytes := segmentBucket.Get(boltPathKey)
pathBytes := segmentBucket.Get(util.BoltPathKey)
if pathBytes == nil {
return nil, fmt.Errorf("segment path missing")
}
segmentPath := s.path + string(os.PathSeparator) + string(pathBytes)
segment, err := s.segPlugin.Open(segmentPath)
seg, err := s.segPlugin.Open(segmentPath)
if err != nil {
return nil, fmt.Errorf("error opening bolt segment: %v", err)
}
rv := &SegmentSnapshot{
segment: segment,
segment: seg,
cachedDocs: &cachedDocs{cache: nil},
cachedMeta: &cachedMeta{meta: nil},
}
deletedBytes := segmentBucket.Get(boltDeletedKey)
deletedBytes := segmentBucket.Get(util.BoltDeletedKey)
if deletedBytes != nil {
deletedBitmap := roaring.NewBitmap()
r := bytes.NewReader(deletedBytes)
_, err := deletedBitmap.ReadFrom(r)
if err != nil {
_ = segment.Close()
_ = seg.Close()
return nil, fmt.Errorf("error reading deleted bytes: %v", err)
}
if !deletedBitmap.IsEmpty() {
rv.deleted = deletedBitmap
}
}
statBytes := segmentBucket.Get(boltStatsKey)
statBytes := segmentBucket.Get(util.BoltStatsKey)
if statBytes != nil {
var statsMap map[string]map[string]uint64
err := json.Unmarshal(statBytes, &statsMap)
stats := &fieldStats{statMap: statsMap}
if err != nil {
_ = segment.Close()
_ = seg.Close()
return nil, fmt.Errorf("error reading stat bytes: %v", err)
}
rv.stats = stats
}
updatedFieldBytes := segmentBucket.Get(util.BoltUpdatedFieldsKey)
if updatedFieldBytes != nil {
var updatedFields map[string]*index.UpdateFieldInfo
err := json.Unmarshal(updatedFieldBytes, &updatedFields)
if err != nil {
_ = seg.Close()
return nil, fmt.Errorf("error reading updated field bytes: %v", err)
}
rv.updatedFields = updatedFields
// Set the value within the segment base for use during merge
rv.UpdateFieldsInfo(rv.updatedFields)
}
return rv, nil
}
@@ -1215,7 +1231,7 @@ func (s *Scorch) removeOldBoltSnapshots() (numRemoved int, err error) {
}
}()
snapshots := tx.Bucket(boltSnapshotsBucket)
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
if snapshots == nil {
return 0, nil
}
@@ -1293,7 +1309,7 @@ func (s *Scorch) removeOldZapFiles() error {
// duration. This results in all of them being purged from the boltDB
// and the next iteration of the removeOldData() would end up protecting
// latest contiguous snapshot which is a poor pattern in the rollback checkpoints.
// Hence we try to retain atmost retentionFactor portion worth of old snapshots
// Hence we try to retain at most retentionFactor portion worth of old snapshots
// in such a scenario using the following function
func getBoundaryCheckPoint(retentionFactor float64,
checkPoints []*snapshotMetaData, timeStamp time.Time,
@@ -1325,7 +1341,7 @@ func (s *Scorch) rootBoltSnapshotMetaData() ([]*snapshotMetaData, error) {
expirationDuration := time.Duration(s.numSnapshotsToKeep-1) * s.rollbackSamplingInterval
err := s.rootBolt.View(func(tx *bolt.Tx) error {
snapshots := tx.Bucket(boltSnapshotsBucket)
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
if snapshots == nil {
return nil
}
@@ -1349,11 +1365,11 @@ func (s *Scorch) rootBoltSnapshotMetaData() ([]*snapshotMetaData, error) {
if snapshot == nil {
continue
}
metaBucket := snapshot.Bucket(boltMetaDataKey)
metaBucket := snapshot.Bucket(util.BoltMetaDataKey)
if metaBucket == nil {
continue
}
timeStampBytes := metaBucket.Get(boltMetaDataTimeStamp)
timeStampBytes := metaBucket.Get(util.BoltMetaDataTimeStamp)
var timeStamp time.Time
err = timeStamp.UnmarshalText(timeStampBytes)
if err != nil {
@@ -1390,7 +1406,7 @@ func (s *Scorch) rootBoltSnapshotMetaData() ([]*snapshotMetaData, error) {
func (s *Scorch) RootBoltSnapshotEpochs() ([]uint64, error) {
var rv []uint64
err := s.rootBolt.View(func(tx *bolt.Tx) error {
snapshots := tx.Bucket(boltSnapshotsBucket)
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
if snapshots == nil {
return nil
}
@@ -1411,7 +1427,7 @@ func (s *Scorch) RootBoltSnapshotEpochs() ([]uint64, error) {
func (s *Scorch) loadZapFileNames() (map[string]struct{}, error) {
rv := map[string]struct{}{}
err := s.rootBolt.View(func(tx *bolt.Tx) error {
snapshots := tx.Bucket(boltSnapshotsBucket)
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
if snapshots == nil {
return nil
}
@@ -1423,14 +1439,14 @@ func (s *Scorch) loadZapFileNames() (map[string]struct{}, error) {
}
segc := snapshot.Cursor()
for segk, _ := segc.First(); segk != nil; segk, _ = segc.Next() {
if segk[0] == boltInternalKey[0] {
if segk[0] == util.BoltInternalKey[0] {
continue
}
segmentBucket := snapshot.Bucket(segk)
if segmentBucket == nil {
continue
}
pathBytes := segmentBucket.Get(boltPathKey)
pathBytes := segmentBucket.Get(util.BoltPathKey)
if pathBytes == nil {
continue
}

View File

@@ -19,6 +19,7 @@ import (
"log"
"os"
"github.com/blevesearch/bleve/v2/util"
bolt "go.etcd.io/bbolt"
)
@@ -61,7 +62,7 @@ func RollbackPoints(path string) ([]*RollbackPoint, error) {
_ = rootBolt.Close()
}()
snapshots := tx.Bucket(boltSnapshotsBucket)
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
if snapshots == nil {
return nil, nil
}
@@ -87,7 +88,7 @@ func RollbackPoints(path string) ([]*RollbackPoint, error) {
meta := map[string][]byte{}
c2 := snapshot.Cursor()
for j, _ := c2.First(); j != nil; j, _ = c2.Next() {
if j[0] == boltInternalKey[0] {
if j[0] == util.BoltInternalKey[0] {
internalBucket := snapshot.Bucket(j)
if internalBucket == nil {
err = fmt.Errorf("internal bucket missing")
@@ -151,7 +152,7 @@ func Rollback(path string, to *RollbackPoint) error {
var found bool
var eligibleEpochs []uint64
err = rootBolt.View(func(tx *bolt.Tx) error {
snapshots := tx.Bucket(boltSnapshotsBucket)
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
if snapshots == nil {
return nil
}
@@ -193,7 +194,7 @@ func Rollback(path string, to *RollbackPoint) error {
}
}()
snapshots := tx.Bucket(boltSnapshotsBucket)
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
if snapshots == nil {
return nil
}

View File

@@ -25,6 +25,7 @@ import (
"github.com/RoaringBitmap/roaring/v2"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
segment "github.com/blevesearch/scorch_segment_api/v2"
bolt "go.etcd.io/bbolt"
@@ -217,9 +218,11 @@ func (s *Scorch) fireAsyncError(err error) {
}
func (s *Scorch) Open() error {
err := s.openBolt()
if err != nil {
return err
if s.rootBolt == nil {
err := s.openBolt()
if err != nil {
return err
}
}
s.asyncTasks.Add(1)
@@ -371,6 +374,7 @@ func (s *Scorch) Close() (err error) {
}
}
s.root = nil
s.rootBolt = nil
s.rootLock.Unlock()
}
@@ -940,3 +944,96 @@ func (s *Scorch) CopyReader() index.CopyReader {
func (s *Scorch) FireIndexEvent() {
s.fireEvent(EventKindIndexStart, 0)
}
// Updates bolt db with the given field info. Existing field info already in bolt
// will be merged before persisting. The index mapping is also overwritted both
// in bolt as well as the index snapshot
func (s *Scorch) UpdateFields(fieldInfo map[string]*index.UpdateFieldInfo, mappingBytes []byte) error {
err := s.updateBolt(fieldInfo, mappingBytes)
if err != nil {
return err
}
// Pass the update field info to all snapshots and segment bases
s.root.UpdateFieldsInfo(fieldInfo)
return nil
}
func (s *Scorch) OpenMeta() error {
if s.rootBolt == nil {
err := s.openBolt()
if err != nil {
return err
}
}
return nil
}
// Merge and update deleted field info and rewrite index mapping
func (s *Scorch) updateBolt(fieldInfo map[string]*index.UpdateFieldInfo, mappingBytes []byte) error {
return s.rootBolt.Update(func(tx *bolt.Tx) error {
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
if snapshots == nil {
return nil
}
c := snapshots.Cursor()
for k, _ := c.Last(); k != nil; k, _ = c.Prev() {
_, _, err := decodeUvarintAscending(k)
if err != nil {
fmt.Printf("unable to parse segment epoch %x, continuing", k)
continue
}
snapshot := snapshots.Bucket(k)
cc := snapshot.Cursor()
for kk, _ := cc.First(); kk != nil; kk, _ = cc.Next() {
if kk[0] == util.BoltInternalKey[0] {
internalBucket := snapshot.Bucket(kk)
if internalBucket == nil {
return fmt.Errorf("segment key, but bucket missing %x", kk)
}
err = internalBucket.Put(util.MappingInternalKey, mappingBytes)
if err != nil {
return err
}
} else if kk[0] != util.BoltMetaDataKey[0] {
segmentBucket := snapshot.Bucket(kk)
if segmentBucket == nil {
return fmt.Errorf("segment key, but bucket missing %x", kk)
}
var updatedFields map[string]*index.UpdateFieldInfo
updatedFieldBytes := segmentBucket.Get(util.BoltUpdatedFieldsKey)
if updatedFieldBytes != nil {
err := json.Unmarshal(updatedFieldBytes, &updatedFields)
if err != nil {
return fmt.Errorf("error reading updated field bytes: %v", err)
}
for field, info := range fieldInfo {
if val, ok := updatedFields[field]; ok {
updatedFields[field] = &index.UpdateFieldInfo{
Deleted: info.Deleted || val.Deleted,
Store: info.Store || val.Store,
DocValues: info.DocValues || val.DocValues,
Index: info.Index || val.Index,
}
} else {
updatedFields[field] = info
}
}
} else {
updatedFields = fieldInfo
}
b, err := json.Marshal(updatedFields)
if err != nil {
return err
}
err = segmentBucket.Put(util.BoltUpdatedFieldsKey, b)
if err != nil {
return err
}
}
}
}
return nil
})
}

View File

@@ -84,6 +84,13 @@ type IndexSnapshot struct {
m3 sync.RWMutex // bm25 metrics specific - not to interfere with TFR creation
fieldCardinality map[string]int
// Stores information about zapx fields that have been
// fully deleted (indicated by UpdateFieldInfo.Deleted) or
// partially deleted index, store or docvalues (indicated by
// UpdateFieldInfo.Index or .Store or .DocValues).
// Used to short circuit queries trying to read stale data
updatedFields map[string]*index.UpdateFieldInfo
}
func (i *IndexSnapshot) Segments() []*SegmentSnapshot {
@@ -509,6 +516,13 @@ func (is *IndexSnapshot) Document(id string) (rv index.Document, err error) {
// Keeping that TODO for now until we have a cleaner way.
rvd.StoredFieldsSize += uint64(len(val))
// Skip fields that have been completely deleted or had their
// store data deleted
if info, ok := is.updatedFields[name]; ok &&
(info.Deleted || info.Store) {
return true
}
// copy value, array positions to preserve them beyond the scope of this callback
value := append([]byte(nil), val...)
arrayPos := append([]uint64(nil), pos...)
@@ -634,10 +648,22 @@ func (is *IndexSnapshot) TermFieldReader(ctx context.Context, term []byte, field
segBytesRead := s.segment.BytesRead()
rv.incrementBytesRead(segBytesRead)
}
dict, err := s.segment.Dictionary(field)
var dict segment.TermDictionary
var err error
// Skip fields that have been completely deleted or had their
// index data deleted
if info, ok := is.updatedFields[field]; ok &&
(info.Index || info.Deleted) {
dict, err = s.segment.Dictionary("")
} else {
dict, err = s.segment.Dictionary(field)
}
if err != nil {
return nil, err
}
if dictStats, ok := dict.(segment.DiskStatsReporter); ok {
bytesRead := dictStats.BytesRead()
rv.incrementBytesRead(bytesRead)
@@ -783,6 +809,23 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment(
}
}
// Filter out fields that have been completely deleted or had their
// docvalues data deleted from both visitable fields and required fields
filterUpdatedFields := func(fields []string) []string {
filteredFields := make([]string, 0)
for _, field := range fields {
if info, ok := is.updatedFields[field]; ok &&
(info.DocValues || info.Deleted) {
continue
}
filteredFields = append(filteredFields, field)
}
return filteredFields
}
fieldsFiltered := filterUpdatedFields(fields)
vFieldsFiltered := filterUpdatedFields(vFields)
var errCh chan error
// cFields represents the fields that we'll need from the
@@ -790,7 +833,7 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment(
// if the caller happens to know we're on the same segmentIndex
// from a previous invocation
if cFields == nil {
cFields = subtractStrings(fields, vFields)
cFields = subtractStrings(fieldsFiltered, vFieldsFiltered)
if !ss.cachedDocs.hasFields(cFields) {
errCh = make(chan error, 1)
@@ -805,8 +848,8 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment(
}
}
if ssvOk && ssv != nil && len(vFields) > 0 {
dvs, err = ssv.VisitDocValues(localDocNum, fields, visitor, dvs)
if ssvOk && ssv != nil && len(vFieldsFiltered) > 0 {
dvs, err = ssv.VisitDocValues(localDocNum, fieldsFiltered, visitor, dvs)
if err != nil {
return nil, nil, err
}
@@ -1161,3 +1204,33 @@ func (is *IndexSnapshot) ThesaurusKeysRegexp(name string,
func (is *IndexSnapshot) UpdateSynonymSearchCount(delta uint64) {
atomic.AddUint64(&is.parent.stats.TotSynonymSearches, delta)
}
// Update current snapshot updated field data as well as pass it on to all segments and segment bases
func (is *IndexSnapshot) UpdateFieldsInfo(updatedFields map[string]*index.UpdateFieldInfo) {
is.m.Lock()
defer is.m.Unlock()
is.MergeUpdateFieldsInfo(updatedFields)
for _, segmentSnapshot := range is.segment {
segmentSnapshot.UpdateFieldsInfo(is.updatedFields)
}
}
// Merge given updated field information with existing updated field information
func (is *IndexSnapshot) MergeUpdateFieldsInfo(updatedFields map[string]*index.UpdateFieldInfo) {
if is.updatedFields == nil {
is.updatedFields = updatedFields
} else {
for fieldName, info := range updatedFields {
if val, ok := is.updatedFields[fieldName]; ok {
val.Deleted = val.Deleted || info.Deleted
val.Index = val.Index || info.Index
val.DocValues = val.DocValues || info.DocValues
val.Store = val.Store || info.Store
} else {
is.updatedFields[fieldName] = info
}
}
}
}

View File

@@ -163,7 +163,7 @@ func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAllo
// unadorned composite optimization
// we need to reset all the iterators
// back to the beginning, which effectively
// achives the same thing as the above
// achieves the same thing as the above
for _, iter := range i.iterators {
if optimizedIterator, ok := iter.(ResetablePostingsIterator); ok {
optimizedIterator.ResetIterator()

View File

@@ -83,6 +83,10 @@ func (i *IndexSnapshotVectorReader) Next(preAlloced *index.VectorDoc) (
}
for i.segmentOffset < len(i.iterators) {
if i.iterators[i.segmentOffset] == nil {
i.segmentOffset++
continue
}
next, err := i.iterators[i.segmentOffset].Next()
if err != nil {
return nil, err

View File

@@ -35,12 +35,13 @@ type SegmentSnapshot struct {
// segment was mmaped recently, in which case
// we consider the loading cost of the metadata
// as part of IO stats.
mmaped uint32
id uint64
segment segment.Segment
deleted *roaring.Bitmap
creator string
stats *fieldStats
mmaped uint32
id uint64
segment segment.Segment
deleted *roaring.Bitmap
creator string
stats *fieldStats
updatedFields map[string]*index.UpdateFieldInfo
cachedMeta *cachedMeta
@@ -146,6 +147,28 @@ func (s *SegmentSnapshot) Size() (rv int) {
return
}
// Merge given updated field information with existing and pass it on to the segment base
func (s *SegmentSnapshot) UpdateFieldsInfo(updatedFields map[string]*index.UpdateFieldInfo) {
if s.updatedFields == nil {
s.updatedFields = updatedFields
} else {
for fieldName, info := range updatedFields {
if val, ok := s.updatedFields[fieldName]; ok {
val.Deleted = val.Deleted || info.Deleted
val.Index = val.Index || info.Index
val.DocValues = val.DocValues || info.DocValues
val.Store = val.Store || info.Store
} else {
s.updatedFields[fieldName] = info
}
}
}
if segment, ok := s.segment.(segment.UpdatableSegment); ok {
segment.SetUpdatedFields(s.updatedFields)
}
}
type cachedFieldDocs struct {
m sync.Mutex
readyCh chan struct{} // closed when the cachedFieldDocs.docs is ready to be used.

View File

@@ -0,0 +1,13 @@
## Instructions for generating new go stubs using upsidedown.proto
1. Download latest of protoc-gen-go
```
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
```
2. To generate `upsidedown.pb.go` using upsdidedown.proto:
```
protoc --go_out=. --go_opt=Mindex/upsidedown/upsidedown.proto=index/upsidedown/ index/upsidedown/upsidedown.proto
```
3. Manually add back Size and MarshalTo methods for BackIndexRowValue, BackIndexTermsEntry, BackIndexStoreEntry to support upside_down.

View File

@@ -371,6 +371,6 @@ func (r *UpsideDownCouchDocIDReader) nextOnly() bool {
start = r.onlyPos
r.onlyPos++
}
// inidicate if we got to the end of the list
// indicate if we got to the end of the list
return r.onlyPos < len(r.only)
}

View File

@@ -23,7 +23,7 @@ import (
"reflect"
"github.com/blevesearch/bleve/v2/size"
"github.com/golang/protobuf/proto"
"google.golang.org/protobuf/proto"
)
var (
@@ -924,7 +924,7 @@ type backIndexFieldTermVisitor func(field uint32, term []byte)
//
// This code originates from:
// func (m *BackIndexRowValue) Unmarshal(data []byte) error
// the sections which create garbage or parse unintersting sections
// the sections which create garbage or parse uninteresting sections
// have been commented out. This was done by design to allow for easier
// merging in the future if that original function is regenerated
func visitBackIndexRow(data []byte, callback backIndexFieldTermVisitor) error {

View File

@@ -30,7 +30,7 @@ import (
index "github.com/blevesearch/bleve_index_api"
store "github.com/blevesearch/upsidedown_store_api"
"github.com/golang/protobuf/proto"
"google.golang.org/protobuf/proto"
)
const Name = "upside_down"

View File

@@ -1,382 +1,319 @@
// Code generated by protoc-gen-gogo.
// source: upsidedown.proto
// DO NOT EDIT!
// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
// protoc-gen-go v1.36.6
// protoc v5.29.3
// source: index/upsidedown/upsidedown.proto
/*
Package upsidedown is a generated protocol buffer package.
It is generated from these files:
upsidedown.proto
It has these top-level messages:
BackIndexTermsEntry
BackIndexStoreEntry
BackIndexRowValue
*/
package upsidedown
import proto "github.com/golang/protobuf/proto"
import math "math"
import (
fmt "fmt"
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
io "io"
reflect "reflect"
sync "sync"
unsafe "unsafe"
)
import io "io"
import fmt "fmt"
import github_com_golang_protobuf_proto "github.com/golang/protobuf/proto"
const (
// Verify that this generated code is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
// Verify that runtime/protoimpl is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
)
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal
var _ = math.Inf
var (
ErrInvalidLengthUpsidedown = fmt.Errorf("proto: negative length found during unmarshaling")
)
type BackIndexTermsEntry struct {
Field *uint32 `protobuf:"varint,1,req,name=field" json:"field,omitempty"`
Terms []string `protobuf:"bytes,2,rep,name=terms" json:"terms,omitempty"`
XXX_unrecognized []byte `json:"-"`
state protoimpl.MessageState `protogen:"open.v1"`
Field *uint32 `protobuf:"varint,1,req,name=field" json:"field,omitempty"`
Terms []string `protobuf:"bytes,2,rep,name=terms" json:"terms,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (m *BackIndexTermsEntry) Reset() { *m = BackIndexTermsEntry{} }
func (m *BackIndexTermsEntry) String() string { return proto.CompactTextString(m) }
func (*BackIndexTermsEntry) ProtoMessage() {}
func (x *BackIndexTermsEntry) Reset() {
*x = BackIndexTermsEntry{}
mi := &file_index_upsidedown_upsidedown_proto_msgTypes[0]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (m *BackIndexTermsEntry) GetField() uint32 {
if m != nil && m.Field != nil {
return *m.Field
func (x *BackIndexTermsEntry) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*BackIndexTermsEntry) ProtoMessage() {}
func (x *BackIndexTermsEntry) ProtoReflect() protoreflect.Message {
mi := &file_index_upsidedown_upsidedown_proto_msgTypes[0]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use BackIndexTermsEntry.ProtoReflect.Descriptor instead.
func (*BackIndexTermsEntry) Descriptor() ([]byte, []int) {
return file_index_upsidedown_upsidedown_proto_rawDescGZIP(), []int{0}
}
func (x *BackIndexTermsEntry) GetField() uint32 {
if x != nil && x.Field != nil {
return *x.Field
}
return 0
}
func (m *BackIndexTermsEntry) GetTerms() []string {
if m != nil {
return m.Terms
func (x *BackIndexTermsEntry) GetTerms() []string {
if x != nil {
return x.Terms
}
return nil
}
func (x *BackIndexTermsEntry) MarshalTo(data []byte) (n int, err error) {
var i int
_ = i
var l int
_ = l
if x.Field == nil {
return 0, fmt.Errorf("missing required `Field`")
} else {
data[i] = 0x8
i++
i = encodeVarintUpsidedown(data, i, uint64(*x.Field))
}
if len(x.Terms) > 0 {
for _, s := range x.Terms {
data[i] = 0x12
i++
l = len(s)
for l >= 1<<7 {
data[i] = uint8(uint64(l)&0x7f | 0x80)
l >>= 7
i++
}
data[i] = uint8(l)
i++
i += copy(data[i:], s)
}
}
return i, nil
}
func (x *BackIndexTermsEntry) Size() (n int) {
var l int
_ = l
if x.Field != nil {
n += 1 + sovUpsidedown(uint64(*x.Field))
}
if len(x.Terms) > 0 {
for _, s := range x.Terms {
l = len(s)
n += 1 + l + sovUpsidedown(uint64(l))
}
}
return n
}
type BackIndexStoreEntry struct {
Field *uint32 `protobuf:"varint,1,req,name=field" json:"field,omitempty"`
ArrayPositions []uint64 `protobuf:"varint,2,rep,name=arrayPositions" json:"arrayPositions,omitempty"`
XXX_unrecognized []byte `json:"-"`
state protoimpl.MessageState `protogen:"open.v1"`
Field *uint32 `protobuf:"varint,1,req,name=field" json:"field,omitempty"`
ArrayPositions []uint64 `protobuf:"varint,2,rep,name=arrayPositions" json:"arrayPositions,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (m *BackIndexStoreEntry) Reset() { *m = BackIndexStoreEntry{} }
func (m *BackIndexStoreEntry) String() string { return proto.CompactTextString(m) }
func (*BackIndexStoreEntry) ProtoMessage() {}
func (x *BackIndexStoreEntry) Reset() {
*x = BackIndexStoreEntry{}
mi := &file_index_upsidedown_upsidedown_proto_msgTypes[1]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (m *BackIndexStoreEntry) GetField() uint32 {
if m != nil && m.Field != nil {
return *m.Field
func (x *BackIndexStoreEntry) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*BackIndexStoreEntry) ProtoMessage() {}
func (x *BackIndexStoreEntry) ProtoReflect() protoreflect.Message {
mi := &file_index_upsidedown_upsidedown_proto_msgTypes[1]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use BackIndexStoreEntry.ProtoReflect.Descriptor instead.
func (*BackIndexStoreEntry) Descriptor() ([]byte, []int) {
return file_index_upsidedown_upsidedown_proto_rawDescGZIP(), []int{1}
}
func (x *BackIndexStoreEntry) GetField() uint32 {
if x != nil && x.Field != nil {
return *x.Field
}
return 0
}
func (m *BackIndexStoreEntry) GetArrayPositions() []uint64 {
if m != nil {
return m.ArrayPositions
func (x *BackIndexStoreEntry) GetArrayPositions() []uint64 {
if x != nil {
return x.ArrayPositions
}
return nil
}
func (x *BackIndexStoreEntry) MarshalTo(data []byte) (n int, err error) {
var i int
_ = i
var l int
_ = l
if x.Field == nil {
return 0, fmt.Errorf("missing required `Field`")
} else {
data[i] = 0x8
i++
i = encodeVarintUpsidedown(data, i, uint64(*x.Field))
}
if len(x.ArrayPositions) > 0 {
for _, num := range x.ArrayPositions {
data[i] = 0x10
i++
i = encodeVarintUpsidedown(data, i, uint64(num))
}
}
return i, nil
}
func (x *BackIndexStoreEntry) Size() (n int) {
var l int
_ = l
if x.Field != nil {
n += 1 + sovUpsidedown(uint64(*x.Field))
}
if len(x.ArrayPositions) > 0 {
for _, e := range x.ArrayPositions {
n += 1 + sovUpsidedown(uint64(e))
}
}
return n
}
type BackIndexRowValue struct {
TermsEntries []*BackIndexTermsEntry `protobuf:"bytes,1,rep,name=termsEntries" json:"termsEntries,omitempty"`
StoredEntries []*BackIndexStoreEntry `protobuf:"bytes,2,rep,name=storedEntries" json:"storedEntries,omitempty"`
XXX_unrecognized []byte `json:"-"`
state protoimpl.MessageState `protogen:"open.v1"`
TermsEntries []*BackIndexTermsEntry `protobuf:"bytes,1,rep,name=termsEntries" json:"termsEntries,omitempty"`
StoredEntries []*BackIndexStoreEntry `protobuf:"bytes,2,rep,name=storedEntries" json:"storedEntries,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (m *BackIndexRowValue) Reset() { *m = BackIndexRowValue{} }
func (m *BackIndexRowValue) String() string { return proto.CompactTextString(m) }
func (*BackIndexRowValue) ProtoMessage() {}
func (m *BackIndexRowValue) GetTermsEntries() []*BackIndexTermsEntry {
if m != nil {
return m.TermsEntries
}
return nil
func (x *BackIndexRowValue) Reset() {
*x = BackIndexRowValue{}
mi := &file_index_upsidedown_upsidedown_proto_msgTypes[2]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (m *BackIndexRowValue) GetStoredEntries() []*BackIndexStoreEntry {
if m != nil {
return m.StoredEntries
}
return nil
func (x *BackIndexRowValue) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (m *BackIndexTermsEntry) Unmarshal(data []byte) error {
var hasFields [1]uint64
l := len(data)
iNdEx := 0
for iNdEx < l {
var wire uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
func (*BackIndexRowValue) ProtoMessage() {}
func (x *BackIndexRowValue) ProtoReflect() protoreflect.Message {
mi := &file_index_upsidedown_upsidedown_proto_msgTypes[2]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
fieldNum := int32(wire >> 3)
wireType := int(wire & 0x7)
switch fieldNum {
case 1:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType)
}
var v uint32
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
v |= (uint32(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
m.Field = &v
hasFields[0] |= uint64(0x00000001)
case 2:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field Terms", wireType)
}
var stringLen uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
stringLen |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
postIndex := iNdEx + int(stringLen)
if postIndex > l {
return io.ErrUnexpectedEOF
}
m.Terms = append(m.Terms, string(data[iNdEx:postIndex]))
iNdEx = postIndex
default:
var sizeOfWire int
for {
sizeOfWire++
wire >>= 7
if wire == 0 {
break
}
}
iNdEx -= sizeOfWire
skippy, err := skipUpsidedown(data[iNdEx:])
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use BackIndexRowValue.ProtoReflect.Descriptor instead.
func (*BackIndexRowValue) Descriptor() ([]byte, []int) {
return file_index_upsidedown_upsidedown_proto_rawDescGZIP(), []int{2}
}
func (x *BackIndexRowValue) GetTermsEntries() []*BackIndexTermsEntry {
if x != nil {
return x.TermsEntries
}
return nil
}
func (x *BackIndexRowValue) GetStoredEntries() []*BackIndexStoreEntry {
if x != nil {
return x.StoredEntries
}
return nil
}
func (x *BackIndexRowValue) MarshalTo(data []byte) (n int, err error) {
var i int
_ = i
var l int
_ = l
if len(x.TermsEntries) > 0 {
for _, msg := range x.TermsEntries {
data[i] = 0xa
i++
i = encodeVarintUpsidedown(data, i, uint64(msg.Size()))
n, err := msg.MarshalTo(data[i:])
if err != nil {
return err
return 0, err
}
if skippy < 0 {
return ErrInvalidLengthUpsidedown
}
if (iNdEx + skippy) > l {
return io.ErrUnexpectedEOF
}
m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
iNdEx += skippy
i += n
}
}
if hasFields[0]&uint64(0x00000001) == 0 {
return new(github_com_golang_protobuf_proto.RequiredNotSetError)
}
return nil
}
func (m *BackIndexStoreEntry) Unmarshal(data []byte) error {
var hasFields [1]uint64
l := len(data)
iNdEx := 0
for iNdEx < l {
var wire uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
fieldNum := int32(wire >> 3)
wireType := int(wire & 0x7)
switch fieldNum {
case 1:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType)
}
var v uint32
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
v |= (uint32(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
m.Field = &v
hasFields[0] |= uint64(0x00000001)
case 2:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field ArrayPositions", wireType)
}
var v uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
v |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
m.ArrayPositions = append(m.ArrayPositions, v)
default:
var sizeOfWire int
for {
sizeOfWire++
wire >>= 7
if wire == 0 {
break
}
}
iNdEx -= sizeOfWire
skippy, err := skipUpsidedown(data[iNdEx:])
if len(x.StoredEntries) > 0 {
for _, msg := range x.StoredEntries {
data[i] = 0x12
i++
i = encodeVarintUpsidedown(data, i, uint64(msg.Size()))
n, err := msg.MarshalTo(data[i:])
if err != nil {
return err
return 0, err
}
if skippy < 0 {
return ErrInvalidLengthUpsidedown
}
if (iNdEx + skippy) > l {
return io.ErrUnexpectedEOF
}
m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
iNdEx += skippy
i += n
}
}
if hasFields[0]&uint64(0x00000001) == 0 {
return new(github_com_golang_protobuf_proto.RequiredNotSetError)
}
return nil
return i, nil
}
func (m *BackIndexRowValue) Unmarshal(data []byte) error {
l := len(data)
iNdEx := 0
for iNdEx < l {
var wire uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
fieldNum := int32(wire >> 3)
wireType := int(wire & 0x7)
switch fieldNum {
case 1:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field TermsEntries", wireType)
}
var msglen int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
msglen |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
postIndex := iNdEx + msglen
if msglen < 0 {
return ErrInvalidLengthUpsidedown
}
if postIndex > l {
return io.ErrUnexpectedEOF
}
m.TermsEntries = append(m.TermsEntries, &BackIndexTermsEntry{})
if err := m.TermsEntries[len(m.TermsEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
return err
}
iNdEx = postIndex
case 2:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field StoredEntries", wireType)
}
var msglen int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
msglen |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
postIndex := iNdEx + msglen
if msglen < 0 {
return ErrInvalidLengthUpsidedown
}
if postIndex > l {
return io.ErrUnexpectedEOF
}
m.StoredEntries = append(m.StoredEntries, &BackIndexStoreEntry{})
if err := m.StoredEntries[len(m.StoredEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
return err
}
iNdEx = postIndex
default:
var sizeOfWire int
for {
sizeOfWire++
wire >>= 7
if wire == 0 {
break
}
}
iNdEx -= sizeOfWire
skippy, err := skipUpsidedown(data[iNdEx:])
if err != nil {
return err
}
if skippy < 0 {
return ErrInvalidLengthUpsidedown
}
if (iNdEx + skippy) > l {
return io.ErrUnexpectedEOF
}
m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
iNdEx += skippy
func (x *BackIndexRowValue) Size() (n int) {
var l int
_ = l
if len(x.TermsEntries) > 0 {
for _, e := range x.TermsEntries {
l = e.Size()
n += 1 + l + sovUpsidedown(uint64(l))
}
}
return nil
if len(x.StoredEntries) > 0 {
for _, e := range x.StoredEntries {
l = e.Size()
n += 1 + l + sovUpsidedown(uint64(l))
}
}
return n
}
func skipUpsidedown(data []byte) (n int, err error) {
l := len(data)
iNdEx := 0
@@ -465,66 +402,6 @@ func skipUpsidedown(data []byte) (n int, err error) {
panic("unreachable")
}
var (
ErrInvalidLengthUpsidedown = fmt.Errorf("proto: negative length found during unmarshaling")
)
func (m *BackIndexTermsEntry) Size() (n int) {
var l int
_ = l
if m.Field != nil {
n += 1 + sovUpsidedown(uint64(*m.Field))
}
if len(m.Terms) > 0 {
for _, s := range m.Terms {
l = len(s)
n += 1 + l + sovUpsidedown(uint64(l))
}
}
if m.XXX_unrecognized != nil {
n += len(m.XXX_unrecognized)
}
return n
}
func (m *BackIndexStoreEntry) Size() (n int) {
var l int
_ = l
if m.Field != nil {
n += 1 + sovUpsidedown(uint64(*m.Field))
}
if len(m.ArrayPositions) > 0 {
for _, e := range m.ArrayPositions {
n += 1 + sovUpsidedown(uint64(e))
}
}
if m.XXX_unrecognized != nil {
n += len(m.XXX_unrecognized)
}
return n
}
func (m *BackIndexRowValue) Size() (n int) {
var l int
_ = l
if len(m.TermsEntries) > 0 {
for _, e := range m.TermsEntries {
l = e.Size()
n += 1 + l + sovUpsidedown(uint64(l))
}
}
if len(m.StoredEntries) > 0 {
for _, e := range m.StoredEntries {
l = e.Size()
n += 1 + l + sovUpsidedown(uint64(l))
}
}
if m.XXX_unrecognized != nil {
n += len(m.XXX_unrecognized)
}
return n
}
func sovUpsidedown(x uint64) (n int) {
for {
n++
@@ -535,150 +412,7 @@ func sovUpsidedown(x uint64) (n int) {
}
return n
}
func sozUpsidedown(x uint64) (n int) {
return sovUpsidedown(uint64((x << 1) ^ uint64((int64(x) >> 63))))
}
func (m *BackIndexTermsEntry) Marshal() (data []byte, err error) {
size := m.Size()
data = make([]byte, size)
n, err := m.MarshalTo(data)
if err != nil {
return nil, err
}
return data[:n], nil
}
func (m *BackIndexTermsEntry) MarshalTo(data []byte) (n int, err error) {
var i int
_ = i
var l int
_ = l
if m.Field == nil {
return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError)
} else {
data[i] = 0x8
i++
i = encodeVarintUpsidedown(data, i, uint64(*m.Field))
}
if len(m.Terms) > 0 {
for _, s := range m.Terms {
data[i] = 0x12
i++
l = len(s)
for l >= 1<<7 {
data[i] = uint8(uint64(l)&0x7f | 0x80)
l >>= 7
i++
}
data[i] = uint8(l)
i++
i += copy(data[i:], s)
}
}
if m.XXX_unrecognized != nil {
i += copy(data[i:], m.XXX_unrecognized)
}
return i, nil
}
func (m *BackIndexStoreEntry) Marshal() (data []byte, err error) {
size := m.Size()
data = make([]byte, size)
n, err := m.MarshalTo(data)
if err != nil {
return nil, err
}
return data[:n], nil
}
func (m *BackIndexStoreEntry) MarshalTo(data []byte) (n int, err error) {
var i int
_ = i
var l int
_ = l
if m.Field == nil {
return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError)
} else {
data[i] = 0x8
i++
i = encodeVarintUpsidedown(data, i, uint64(*m.Field))
}
if len(m.ArrayPositions) > 0 {
for _, num := range m.ArrayPositions {
data[i] = 0x10
i++
i = encodeVarintUpsidedown(data, i, uint64(num))
}
}
if m.XXX_unrecognized != nil {
i += copy(data[i:], m.XXX_unrecognized)
}
return i, nil
}
func (m *BackIndexRowValue) Marshal() (data []byte, err error) {
size := m.Size()
data = make([]byte, size)
n, err := m.MarshalTo(data)
if err != nil {
return nil, err
}
return data[:n], nil
}
func (m *BackIndexRowValue) MarshalTo(data []byte) (n int, err error) {
var i int
_ = i
var l int
_ = l
if len(m.TermsEntries) > 0 {
for _, msg := range m.TermsEntries {
data[i] = 0xa
i++
i = encodeVarintUpsidedown(data, i, uint64(msg.Size()))
n, err := msg.MarshalTo(data[i:])
if err != nil {
return 0, err
}
i += n
}
}
if len(m.StoredEntries) > 0 {
for _, msg := range m.StoredEntries {
data[i] = 0x12
i++
i = encodeVarintUpsidedown(data, i, uint64(msg.Size()))
n, err := msg.MarshalTo(data[i:])
if err != nil {
return 0, err
}
i += n
}
}
if m.XXX_unrecognized != nil {
i += copy(data[i:], m.XXX_unrecognized)
}
return i, nil
}
func encodeFixed64Upsidedown(data []byte, offset int, v uint64) int {
data[offset] = uint8(v)
data[offset+1] = uint8(v >> 8)
data[offset+2] = uint8(v >> 16)
data[offset+3] = uint8(v >> 24)
data[offset+4] = uint8(v >> 32)
data[offset+5] = uint8(v >> 40)
data[offset+6] = uint8(v >> 48)
data[offset+7] = uint8(v >> 56)
return offset + 8
}
func encodeFixed32Upsidedown(data []byte, offset int, v uint32) int {
data[offset] = uint8(v)
data[offset+1] = uint8(v >> 8)
data[offset+2] = uint8(v >> 16)
data[offset+3] = uint8(v >> 24)
return offset + 4
}
func encodeVarintUpsidedown(data []byte, offset int, v uint64) int {
for v >= 1<<7 {
data[offset] = uint8(v&0x7f | 0x80)
@@ -688,3 +422,70 @@ func encodeVarintUpsidedown(data []byte, offset int, v uint64) int {
data[offset] = uint8(v)
return offset + 1
}
var File_index_upsidedown_upsidedown_proto protoreflect.FileDescriptor
const file_index_upsidedown_upsidedown_proto_rawDesc = "" +
"\n" +
"!index/upsidedown/upsidedown.proto\"A\n" +
"\x13BackIndexTermsEntry\x12\x14\n" +
"\x05field\x18\x01 \x02(\rR\x05field\x12\x14\n" +
"\x05terms\x18\x02 \x03(\tR\x05terms\"S\n" +
"\x13BackIndexStoreEntry\x12\x14\n" +
"\x05field\x18\x01 \x02(\rR\x05field\x12&\n" +
"\x0earrayPositions\x18\x02 \x03(\x04R\x0earrayPositions\"\x89\x01\n" +
"\x11BackIndexRowValue\x128\n" +
"\ftermsEntries\x18\x01 \x03(\v2\x14.BackIndexTermsEntryR\ftermsEntries\x12:\n" +
"\rstoredEntries\x18\x02 \x03(\v2\x14.BackIndexStoreEntryR\rstoredEntries"
var (
file_index_upsidedown_upsidedown_proto_rawDescOnce sync.Once
file_index_upsidedown_upsidedown_proto_rawDescData []byte
)
func file_index_upsidedown_upsidedown_proto_rawDescGZIP() []byte {
file_index_upsidedown_upsidedown_proto_rawDescOnce.Do(func() {
file_index_upsidedown_upsidedown_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_index_upsidedown_upsidedown_proto_rawDesc), len(file_index_upsidedown_upsidedown_proto_rawDesc)))
})
return file_index_upsidedown_upsidedown_proto_rawDescData
}
var file_index_upsidedown_upsidedown_proto_msgTypes = make([]protoimpl.MessageInfo, 3)
var file_index_upsidedown_upsidedown_proto_goTypes = []any{
(*BackIndexTermsEntry)(nil), // 0: BackIndexTermsEntry
(*BackIndexStoreEntry)(nil), // 1: BackIndexStoreEntry
(*BackIndexRowValue)(nil), // 2: BackIndexRowValue
}
var file_index_upsidedown_upsidedown_proto_depIdxs = []int32{
0, // 0: BackIndexRowValue.termsEntries:type_name -> BackIndexTermsEntry
1, // 1: BackIndexRowValue.storedEntries:type_name -> BackIndexStoreEntry
2, // [2:2] is the sub-list for method output_type
2, // [2:2] is the sub-list for method input_type
2, // [2:2] is the sub-list for extension type_name
2, // [2:2] is the sub-list for extension extendee
0, // [0:2] is the sub-list for field type_name
}
func init() { file_index_upsidedown_upsidedown_proto_init() }
func file_index_upsidedown_upsidedown_proto_init() {
if File_index_upsidedown_upsidedown_proto != nil {
return
}
type x struct{}
out := protoimpl.TypeBuilder{
File: protoimpl.DescBuilder{
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: unsafe.Slice(unsafe.StringData(file_index_upsidedown_upsidedown_proto_rawDesc), len(file_index_upsidedown_upsidedown_proto_rawDesc)),
NumEnums: 0,
NumMessages: 3,
NumExtensions: 0,
NumServices: 0,
},
GoTypes: file_index_upsidedown_upsidedown_proto_goTypes,
DependencyIndexes: file_index_upsidedown_upsidedown_proto_depIdxs,
MessageInfos: file_index_upsidedown_upsidedown_proto_msgTypes,
}.Build()
File_index_upsidedown_upsidedown_proto = out.File
file_index_upsidedown_upsidedown_proto_goTypes = nil
file_index_upsidedown_upsidedown_proto_depIdxs = nil
}

View File

@@ -32,7 +32,7 @@ type indexAliasImpl struct {
indexes []Index
mutex sync.RWMutex
open bool
// if all the indexes in tha alias have the same mapping
// if all the indexes in that alias have the same mapping
// then the user can set the mapping here to avoid
// checking the mapping of each index in the alias
mapping mapping.IndexMapping
@@ -186,6 +186,7 @@ func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest
if len(i.indexes) < 1 {
return nil, ErrorAliasEmpty
}
if _, ok := ctx.Value(search.PreSearchKey).(bool); ok {
// since preSearchKey is set, it means that the request
// is being executed as part of a preSearch, which
@@ -227,6 +228,21 @@ func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest
return i.indexes[0].SearchInContext(ctx, req)
}
// rescorer will be set if score fusion is supposed to happen
// at this alias (root alias), else will be nil
var rescorer *rescorer
if _, ok := ctx.Value(search.ScoreFusionKey).(bool); !ok {
// new context will be used in internal functions to collect data
// as suitable for fusion. Rescorer is used for rescoring
// using fusion algorithms.
if IsScoreFusionRequested(req) {
ctx = context.WithValue(ctx, search.ScoreFusionKey, true)
rescorer = newRescorer(req)
rescorer.prepareSearchRequest()
defer rescorer.restoreSearchRequest()
}
}
// at this stage we know we have multiple indexes
// check if preSearchData needs to be gathered from all indexes
// before executing the query
@@ -236,6 +252,14 @@ func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest
// - the request requires preSearch
var preSearchDuration time.Duration
var sr *SearchResult
// fusionKnnHits stores the KnnHits at the root alias.
// This is used with score fusion in case there is no need to
// send the knn hits to the leaf indexes in search phase.
// Refer to constructPreSearchDataAndFusionKnnHits for more info.
// This variable is left nil if we have to send the knn hits to leaf
// indexes again, else contains the knn hits if not required.
var fusionKnnHits search.DocumentMatchCollection
flags, err := preSearchRequired(ctx, req, i.mapping)
if err != nil {
return nil, err
@@ -261,10 +285,10 @@ func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest
// if the request is satisfied by the preSearch result, then we can
// directly return the preSearch result as the final result
if requestSatisfiedByPreSearch(req, flags) {
sr = finalizeSearchResult(req, preSearchResult)
sr = finalizeSearchResult(ctx, req, preSearchResult, rescorer)
// no need to run the 2nd phase MultiSearch(..)
} else {
preSearchData, err = constructPreSearchData(req, flags, preSearchResult, i.indexes)
preSearchData, fusionKnnHits, err = constructPreSearchDataAndFusionKnnHits(req, flags, preSearchResult, rescorer, i.indexes)
if err != nil {
return nil, err
}
@@ -274,7 +298,8 @@ func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest
// check if search result was generated as part of preSearch itself
if sr == nil {
sr, err = MultiSearch(ctx, req, preSearchData, i.indexes...)
multiSearchParams := &multiSearchParams{preSearchData, rescorer, fusionKnnHits}
sr, err = MultiSearch(ctx, req, multiSearchParams, i.indexes...)
if err != nil {
return nil, err
}
@@ -653,7 +678,7 @@ func preSearch(ctx context.Context, req *SearchRequest, flags *preSearchFlags, i
// if the request is satisfied by just the preSearch result,
// finalize the result and return it directly without
// performing multi search
func finalizeSearchResult(req *SearchRequest, preSearchResult *SearchResult) *SearchResult {
func finalizeSearchResult(ctx context.Context, req *SearchRequest, preSearchResult *SearchResult, rescorer *rescorer) *SearchResult {
if preSearchResult == nil {
return nil
}
@@ -682,7 +707,16 @@ func finalizeSearchResult(req *SearchRequest, preSearchResult *SearchResult) *Se
if req.SearchAfter != nil {
preSearchResult.Hits = collector.FilterHitsBySearchAfter(preSearchResult.Hits, req.Sort, req.SearchAfter)
}
if rescorer != nil {
// rescore takes ftsHits and knnHits as first and second argument respectively
// since this is pure knn, set ftsHits to nil. preSearchResult.Hits contains knn results
preSearchResult.Hits, preSearchResult.Total, preSearchResult.MaxScore = rescorer.rescore(nil, preSearchResult.Hits)
rescorer.restoreSearchRequest()
}
preSearchResult.Hits = hitsInCurrentPage(req, preSearchResult.Hits)
if reverseQueryExecution {
// reverse the sort back to the original
req.Sort.Reverse()
@@ -759,6 +793,31 @@ func constructPreSearchData(req *SearchRequest, flags *preSearchFlags,
return mergedOut, nil
}
// Constructs the presearch data if required during the search phase.
// Also if we need to store knn hits at alias.
// If we need to store knn hits at alias: returns all the knn hits
// If we should send it to leaf indexes: includes in presearch data
func constructPreSearchDataAndFusionKnnHits(req *SearchRequest, flags *preSearchFlags,
preSearchResult *SearchResult, rescorer *rescorer, indexes []Index,
) (map[string]map[string]interface{}, search.DocumentMatchCollection, error) {
var fusionknnhits search.DocumentMatchCollection
// Checks if we need to send the KNN hits to the indexes in the
// search phase. If there is score fusion enabled, we do not
// send the KNN hits to the indexes.
if rescorer != nil && flags.knn {
fusionknnhits = preSearchResult.Hits
preSearchResult.Hits = nil
}
preSearchData, err := constructPreSearchData(req, flags, preSearchResult, indexes)
if err != nil {
return nil, nil, err
}
return preSearchData, fusionknnhits, nil
}
func preSearchDataSearch(ctx context.Context, req *SearchRequest, flags *preSearchFlags, indexes ...Index) (*SearchResult, error) {
asyncResults := make(chan *asyncSearchResult, len(indexes))
// run search on each index in separate go routine
@@ -912,9 +971,16 @@ func hitsInCurrentPage(req *SearchRequest, hits []*search.DocumentMatch) []*sear
return hits
}
// Extra parameters for MultiSearch
type multiSearchParams struct {
preSearchData map[string]map[string]interface{}
rescorer *rescorer
fusionKnnHits search.DocumentMatchCollection
}
// MultiSearch executes a SearchRequest across multiple Index objects,
// then merges the results. The indexes must honor any ctx deadline.
func MultiSearch(ctx context.Context, req *SearchRequest, preSearchData map[string]map[string]interface{}, indexes ...Index) (*SearchResult, error) {
func MultiSearch(ctx context.Context, req *SearchRequest, params *multiSearchParams, indexes ...Index) (*SearchResult, error) {
searchStart := time.Now()
asyncResults := make(chan *asyncSearchResult, len(indexes))
@@ -939,8 +1005,8 @@ func MultiSearch(ctx context.Context, req *SearchRequest, preSearchData map[stri
waitGroup.Add(len(indexes))
for _, in := range indexes {
var payload map[string]interface{}
if preSearchData != nil {
payload = preSearchData[in.Name()]
if params.preSearchData != nil {
payload = params.preSearchData[in.Name()]
}
go searchChildIndex(in, createChildSearchRequest(req, payload))
}
@@ -980,6 +1046,11 @@ func MultiSearch(ctx context.Context, req *SearchRequest, preSearchData map[stri
}
}
if params.rescorer != nil {
sr.Hits, sr.Total, sr.MaxScore = params.rescorer.rescore(sr.Hits, params.fusionKnnHits)
params.rescorer.restoreSearchRequest()
}
sr.Hits = hitsInCurrentPage(req, sr.Hits)
// fix up facets

View File

@@ -133,7 +133,7 @@ func newIndexUsing(path string, mapping mapping.IndexMapping, indexType string,
if err != nil {
return nil, err
}
err = rv.i.SetInternal(mappingInternalKey, mappingBytes)
err = rv.i.SetInternal(util.MappingInternalKey, mappingBytes)
if err != nil {
return nil, err
}
@@ -163,6 +163,9 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde
rv.meta.IndexType = upsidedown.Name
}
var um *mapping.IndexMappingImpl
var umBytes []byte
storeConfig := rv.meta.Config
if storeConfig == nil {
storeConfig = map[string]interface{}{}
@@ -173,6 +176,21 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde
storeConfig["error_if_exists"] = false
for rck, rcv := range runtimeConfig {
storeConfig[rck] = rcv
if rck == "updated_mapping" {
if val, ok := rcv.(string); ok {
if len(val) == 0 {
return nil, fmt.Errorf("updated_mapping is empty")
}
umBytes = []byte(val)
err = util.UnmarshalJSON(umBytes, &um)
if err != nil {
return nil, fmt.Errorf("error parsing updated_mapping into JSON: %v\nmapping contents:\n%v", err, rck)
}
} else {
return nil, fmt.Errorf("updated_mapping not of type string")
}
}
}
// open the index
@@ -185,15 +203,32 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde
if err != nil {
return nil, err
}
err = rv.i.Open()
if err != nil {
return nil, err
}
defer func(rv *indexImpl) {
if !rv.open {
rv.i.Close()
var ui index.UpdateIndex
if um != nil {
var ok bool
ui, ok = rv.i.(index.UpdateIndex)
if !ok {
return nil, fmt.Errorf("updated mapping present for unupdatable index")
}
}(rv)
// Load the meta data from bolt so that we can read the current index
// mapping to compare with
err = ui.OpenMeta()
if err != nil {
return nil, err
}
} else {
err = rv.i.Open()
if err != nil {
return nil, err
}
defer func(rv *indexImpl) {
if !rv.open {
rv.i.Close()
}
}(rv)
}
// now load the mapping
indexReader, err := rv.i.Reader()
@@ -206,7 +241,7 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde
}
}()
mappingBytes, err := indexReader.GetInternal(mappingInternalKey)
mappingBytes, err := indexReader.GetInternal(util.MappingInternalKey)
if err != nil {
return nil, err
}
@@ -217,19 +252,48 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde
return nil, fmt.Errorf("error parsing mapping JSON: %v\nmapping contents:\n%s", err, string(mappingBytes))
}
// validate the mapping
err = im.Validate()
if err != nil {
// no longer return usable index on error because there
// is a chance the index is not open at this stage
return nil, err
}
// Validate and update the index with the new mapping
if um != nil && ui != nil {
err = um.Validate()
if err != nil {
return nil, err
}
fieldInfo, err := DeletedFields(im, um)
if err != nil {
return nil, err
}
err = ui.UpdateFields(fieldInfo, umBytes)
if err != nil {
return nil, err
}
im = um
err = rv.i.Open()
if err != nil {
return nil, err
}
defer func(rv *indexImpl) {
if !rv.open {
rv.i.Close()
}
}(rv)
}
// mark the index as open
rv.mutex.Lock()
defer rv.mutex.Unlock()
rv.open = true
// validate the mapping
err = im.Validate()
if err != nil {
// note even if the mapping is invalid
// we still return an open usable index
return rv, err
}
rv.m = im
indexStats.Register(rv)
return rv, err
@@ -562,6 +626,21 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
}
}()
// rescorer will be set if score fusion is supposed to happen
// at this alias (root alias), else will be nil
var rescorer *rescorer
if _, ok := ctx.Value(search.ScoreFusionKey).(bool); !ok {
// new context will be used in internal functions to collect data
// as suitable for hybrid search. Rescorer is used for rescoring
// using fusion algorithms.
if IsScoreFusionRequested(req) {
ctx = context.WithValue(ctx, search.ScoreFusionKey, true)
rescorer = newRescorer(req)
rescorer.prepareSearchRequest()
defer rescorer.restoreSearchRequest()
}
}
if _, ok := ctx.Value(search.PreSearchKey).(bool); ok {
preSearchResult, err := i.preSearch(ctx, req, indexReader)
if err != nil {
@@ -632,10 +711,21 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
}
}
}
if !skipKNNCollector && requestHasKNN(req) {
knnHits, err = i.runKnnCollector(ctx, req, indexReader, false)
if err != nil {
return nil, err
_, contextScoreFusionKeyExists := ctx.Value(search.ScoreFusionKey).(bool)
if !contextScoreFusionKeyExists {
// if no score fusion, default behaviour
if !skipKNNCollector && requestHasKNN(req) {
knnHits, err = i.runKnnCollector(ctx, req, indexReader, false)
if err != nil {
return nil, err
}
}
} else {
// if score fusion, run collect if rescorer is defined
if rescorer != nil && requestHasKNN(req) {
knnHits, err = i.runKnnCollector(ctx, req, indexReader, false)
}
}
@@ -650,7 +740,12 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
}
}
setKnnHitsInCollector(knnHits, req, coll)
// if score fusion, no faceting for knn hits is done
// hence we can skip setting the knn hits in the collector
if !contextScoreFusionKeyExists {
setKnnHitsInCollector(knnHits, req, coll)
}
if fts != nil {
if is, ok := indexReader.(*scorch.IndexSnapshot); ok {
@@ -859,6 +954,13 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
Facets: coll.FacetResults(),
}
// rescore if fusion flag is set
if rescorer != nil {
rv.Hits, rv.Total, rv.MaxScore = rescorer.rescore(rv.Hits, knnHits)
rescorer.restoreSearchRequest()
rv.Hits = hitsInCurrentPage(req, rv.Hits)
}
if req.Explain {
rv.Request = req
}

595
vendor/github.com/blevesearch/bleve/v2/index_update.go generated vendored Normal file
View File

@@ -0,0 +1,595 @@
// Copyright (c) 2025 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/v2/mapping"
index "github.com/blevesearch/bleve_index_api"
)
// Store all the fields that interact with the data
// from a document path
type pathInfo struct {
fieldMapInfo []*fieldMapInfo
dynamic bool
path string
analyser string
parentPath string
}
// Store the field information with respect to the
// document paths
type fieldMapInfo struct {
fieldMapping *mapping.FieldMapping
analyzer string
datetimeParser string
rootName string
parent *pathInfo
}
// Compare two index mappings to identify all of the updatable changes
func DeletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.UpdateFieldInfo, error) {
// Compare all of the top level fields in an index mapping
err := compareMappings(ori, upd)
if err != nil {
return nil, err
}
// Check for new mappings present in the type mappings
// of the updated compared to the original
for name, updDMapping := range upd.TypeMapping {
err = checkUpdatedMapping(ori.TypeMapping[name], updDMapping)
if err != nil {
return nil, err
}
}
// Check for new mappings present in the default mappings
// of the updated compared to the original
err = checkUpdatedMapping(ori.DefaultMapping, upd.DefaultMapping)
if err != nil {
return nil, err
}
oriPaths := make(map[string]*pathInfo)
updPaths := make(map[string]*pathInfo)
// Go through each mapping present in the original
// and consolidate according to the document paths
for name, oriDMapping := range ori.TypeMapping {
addPathInfo(oriPaths, "", oriDMapping, ori, nil, name)
}
addPathInfo(oriPaths, "", ori.DefaultMapping, ori, nil, "")
// Go through each mapping present in the updated
// and consolidate according to the document paths
for name, updDMapping := range upd.TypeMapping {
addPathInfo(updPaths, "", updDMapping, upd, nil, name)
}
addPathInfo(updPaths, "", upd.DefaultMapping, upd, nil, "")
// Compare all components of custom analysis currently in use
err = compareCustomComponents(oriPaths, updPaths, ori, upd)
if err != nil {
return nil, err
}
// Compare both the mappings based on the document paths
// and create a list of index, docvalues, store differences
// for every single field possible
fieldInfo := make(map[string]*index.UpdateFieldInfo)
for path, info := range oriPaths {
err = addFieldInfo(fieldInfo, info, updPaths[path])
if err != nil {
return nil, err
}
}
// Remove entries from the list with no changes between the
// original and the updated mapping
for name, info := range fieldInfo {
if !info.Deleted && !info.Index && !info.DocValues && !info.Store {
delete(fieldInfo, name)
}
// A field cannot be completely deleted with any dynamic value turned on
if info.Deleted {
if upd.IndexDynamic {
return nil, fmt.Errorf("Mapping cannot be removed when index dynamic is true")
}
if upd.StoreDynamic {
return nil, fmt.Errorf("Mapping cannot be removed when store dynamic is true")
}
if upd.DocValuesDynamic {
return nil, fmt.Errorf("Mapping cannot be removed when docvalues dynamic is true")
}
}
}
return fieldInfo, nil
}
// Ensures none of the top level index mapping fields have changed
func compareMappings(ori, upd *mapping.IndexMappingImpl) error {
if ori.TypeField != upd.TypeField &&
(len(ori.TypeMapping) != 0 || len(upd.TypeMapping) != 0) {
return fmt.Errorf("type field cannot be changed when type mappings are present")
}
if ori.DefaultType != upd.DefaultType {
return fmt.Errorf("default type cannot be changed")
}
if ori.IndexDynamic != upd.IndexDynamic {
return fmt.Errorf("index dynamic cannot be changed")
}
if ori.StoreDynamic != upd.StoreDynamic {
return fmt.Errorf("store dynamic cannot be changed")
}
if ori.DocValuesDynamic != upd.DocValuesDynamic {
return fmt.Errorf("docvalues dynamic cannot be changed")
}
if ori.DefaultAnalyzer != upd.DefaultAnalyzer && upd.IndexDynamic {
return fmt.Errorf("default analyser cannot be changed if index dynamic is true")
}
if ori.DefaultDateTimeParser != upd.DefaultDateTimeParser && upd.IndexDynamic {
return fmt.Errorf("default datetime parser cannot be changed if index dynamic is true")
}
// Scoring model changes between "", "tf-idf" and "bm25" require no index changes to be made
if ori.ScoringModel != upd.ScoringModel {
if ori.ScoringModel != "" && ori.ScoringModel != index.TFIDFScoring && ori.ScoringModel != index.BM25Scoring ||
upd.ScoringModel != "" && upd.ScoringModel != index.TFIDFScoring && upd.ScoringModel != index.BM25Scoring {
return fmt.Errorf("scoring model can only be changed between \"\", %q and %q", index.TFIDFScoring, index.BM25Scoring)
}
}
return nil
}
// Ensures updated document mapping does not contain new
// field mappings or document mappings
func checkUpdatedMapping(ori, upd *mapping.DocumentMapping) error {
// Check to verify both original and updated are not nil
// and are enabled before proceeding
if ori == nil {
if upd == nil || !upd.Enabled {
return nil
}
return fmt.Errorf("updated index mapping contains new properties")
}
if upd == nil || !upd.Enabled {
return nil
}
var err error
// Recursively go through the child mappings
for name, updDMapping := range upd.Properties {
err = checkUpdatedMapping(ori.Properties[name], updDMapping)
if err != nil {
return err
}
}
// Simple checks to ensure no new field mappings present
// in updated
for _, updFMapping := range upd.Fields {
var oriFMapping *mapping.FieldMapping
for _, fMapping := range ori.Fields {
if updFMapping.Name == fMapping.Name {
oriFMapping = fMapping
}
}
if oriFMapping == nil {
return fmt.Errorf("updated index mapping contains new fields")
}
}
return nil
}
// Adds all of the field mappings while maintaining a tree of the document structure
// to ensure traversal and verification is possible incase of multiple mappings defined
// for a single field or multiple document fields' data getting written to a single zapx field
func addPathInfo(paths map[string]*pathInfo, name string, mp *mapping.DocumentMapping,
im *mapping.IndexMappingImpl, parent *pathInfo, rootName string) {
// Early exit if mapping has been disabled
// Comparisions later on will be done with a nil object
if !mp.Enabled {
return
}
// Consolidate path information like index dynamic across multiple
// mappings if path is the same
var pInfo *pathInfo
if val, ok := paths[name]; ok {
pInfo = val
} else {
pInfo = &pathInfo{
fieldMapInfo: make([]*fieldMapInfo, 0),
}
pInfo.dynamic = mp.Dynamic && im.IndexDynamic
pInfo.analyser = im.AnalyzerNameForPath(name)
}
pInfo.dynamic = (pInfo.dynamic || mp.Dynamic) && im.IndexDynamic
pInfo.path = name
if parent != nil {
pInfo.parentPath = parent.path
}
// Recursively add path information for all child mappings
for cName, cMapping := range mp.Properties {
var pathName string
if name == "" {
pathName = cName
} else {
pathName = name + "." + cName
}
addPathInfo(paths, pathName, cMapping, im, pInfo, rootName)
}
// Add field mapping information keeping the document structure intact
for _, fMap := range mp.Fields {
fieldMapInfo := &fieldMapInfo{
fieldMapping: fMap,
rootName: rootName,
parent: pInfo,
}
pInfo.fieldMapInfo = append(pInfo.fieldMapInfo, fieldMapInfo)
}
paths[name] = pInfo
}
// Compares all of the custom analysis components in use
func compareCustomComponents(oriPaths, updPaths map[string]*pathInfo, ori, upd *mapping.IndexMappingImpl) error {
// Compare all analysers currently in use
err := compareAnalysers(oriPaths, updPaths, ori, upd)
if err != nil {
return err
}
// Compare all datetime parsers currently in use
err = compareDateTimeParsers(oriPaths, updPaths, ori, upd)
if err != nil {
return err
}
// Compare all synonum sources
err = compareSynonymSources(ori, upd)
if err != nil {
return err
}
// Compare all char filters, tokenizers, token filters and token maps
err = compareAnalyserSubcomponents(ori, upd)
if err != nil {
return err
}
return nil
}
// Compares all analysers currently in use
// Standard analysers not in custom analysis are not compared
// Analysers in custom analysis but not in use are not compared
func compareAnalysers(oriPaths, updPaths map[string]*pathInfo, ori, upd *mapping.IndexMappingImpl) error {
oriAnalyzers := make(map[string]interface{})
updAnalyzers := make(map[string]interface{})
extractAnalyzers := func(paths map[string]*pathInfo, customAnalyzers map[string]map[string]interface{},
analyzers map[string]interface{}, indexMapping *mapping.IndexMappingImpl) {
for path, info := range paths {
for _, fInfo := range info.fieldMapInfo {
if fInfo.fieldMapping.Type == "text" {
analyzerName := indexMapping.AnalyzerNameForPath(path)
fInfo.analyzer = analyzerName
if val, ok := customAnalyzers[analyzerName]; ok {
analyzers[analyzerName] = val
}
}
}
}
}
extractAnalyzers(oriPaths, ori.CustomAnalysis.Analyzers, oriAnalyzers, ori)
extractAnalyzers(updPaths, upd.CustomAnalysis.Analyzers, updAnalyzers, upd)
for name, anUpd := range updAnalyzers {
if anOri, ok := oriAnalyzers[name]; ok {
if !reflect.DeepEqual(anUpd, anOri) {
return fmt.Errorf("analyser %s changed while being used by fields", name)
}
} else {
return fmt.Errorf("analyser %s newly added to an existing field", name)
}
}
return nil
}
// Compares all date time parsers currently in use
// Date time parsers in custom analysis but not in use are not compared
func compareDateTimeParsers(oriPaths, updPaths map[string]*pathInfo, ori, upd *mapping.IndexMappingImpl) error {
oriDateTimeParsers := make(map[string]interface{})
updDateTimeParsers := make(map[string]interface{})
extractDateTimeParsers := func(paths map[string]*pathInfo, customParsers map[string]map[string]interface{},
parsers map[string]interface{}, indexMapping *mapping.IndexMappingImpl) {
for _, info := range paths {
for _, fInfo := range info.fieldMapInfo {
if fInfo.fieldMapping.Type == "datetime" {
parserName := fInfo.fieldMapping.DateFormat
if parserName == "" {
parserName = indexMapping.DefaultDateTimeParser
}
fInfo.datetimeParser = parserName
if val, ok := customParsers[parserName]; ok {
parsers[parserName] = val
}
}
}
}
}
extractDateTimeParsers(oriPaths, ori.CustomAnalysis.DateTimeParsers, oriDateTimeParsers, ori)
extractDateTimeParsers(updPaths, upd.CustomAnalysis.DateTimeParsers, updDateTimeParsers, upd)
for name, dtUpd := range updDateTimeParsers {
if dtOri, ok := oriDateTimeParsers[name]; ok {
if !reflect.DeepEqual(dtUpd, dtOri) {
return fmt.Errorf("datetime parser %s changed while being used by fields", name)
}
} else {
return fmt.Errorf("datetime parser %s added to an existing field", name)
}
}
return nil
}
// Compares all synonym sources
// Synonym sources currently not in use are also compared
func compareSynonymSources(ori, upd *mapping.IndexMappingImpl) error {
if !reflect.DeepEqual(ori.CustomAnalysis.SynonymSources, upd.CustomAnalysis.SynonymSources) {
return fmt.Errorf("synonym sources cannot be changed")
}
return nil
}
// Compares all char filters, tokenizers, token filters and token maps
// Components not currently in use are also compared
func compareAnalyserSubcomponents(ori, upd *mapping.IndexMappingImpl) error {
if !reflect.DeepEqual(ori.CustomAnalysis.CharFilters, upd.CustomAnalysis.CharFilters) {
return fmt.Errorf("char filters cannot be changed")
}
if !reflect.DeepEqual(ori.CustomAnalysis.TokenFilters, upd.CustomAnalysis.TokenFilters) {
return fmt.Errorf("token filters cannot be changed")
}
if !reflect.DeepEqual(ori.CustomAnalysis.TokenMaps, upd.CustomAnalysis.TokenMaps) {
return fmt.Errorf("token maps cannot be changed")
}
if !reflect.DeepEqual(ori.CustomAnalysis.Tokenizers, upd.CustomAnalysis.Tokenizers) {
return fmt.Errorf("tokenizers cannot be changed")
}
return nil
}
// Compare all of the fields at a particular document path and add its field information
func addFieldInfo(fInfo map[string]*index.UpdateFieldInfo, ori, upd *pathInfo) error {
var info *index.UpdateFieldInfo
var err error
// Assume deleted or disabled mapping if upd is nil. Checks for ori being nil
// or upd having mappings not in orihave already been done before this stage
if upd == nil {
for _, oriFMapInfo := range ori.fieldMapInfo {
info, err = compareFieldMapping(oriFMapInfo.fieldMapping, nil)
if err != nil {
return err
}
err = validateFieldInfo(info, fInfo, ori, oriFMapInfo)
if err != nil {
return err
}
}
} else {
if upd.dynamic && ori.analyser != upd.analyser {
return fmt.Errorf("analyser has been changed for a dynamic mapping")
}
for _, oriFMapInfo := range ori.fieldMapInfo {
var updFMap *mapping.FieldMapping
var updAnalyser string
var updDatetimeParser string
// For multiple fields at a single document path, compare
// only with the matching ones
for _, updFMapInfo := range upd.fieldMapInfo {
if oriFMapInfo.rootName == updFMapInfo.rootName &&
oriFMapInfo.fieldMapping.Name == updFMapInfo.fieldMapping.Name {
updFMap = updFMapInfo.fieldMapping
if updFMap.Type == "text" {
updAnalyser = updFMapInfo.analyzer
} else if updFMap.Type == "datetime" {
updDatetimeParser = updFMapInfo.datetimeParser
}
}
}
// Compare analyser, datetime parser and synonym source before comparing
// the field mapping as it might not have this information
if updAnalyser != "" && oriFMapInfo.analyzer != updAnalyser {
return fmt.Errorf("analyser has been changed for a text field")
}
if updDatetimeParser != "" && oriFMapInfo.datetimeParser != updDatetimeParser {
return fmt.Errorf("datetime parser has been changed for a date time field")
}
info, err = compareFieldMapping(oriFMapInfo.fieldMapping, updFMap)
if err != nil {
return err
}
// Validate to ensure change is possible
// Needed if multiple mappings are aliased to the same field
err = validateFieldInfo(info, fInfo, ori, oriFMapInfo)
if err != nil {
return err
}
}
}
if err != nil {
return err
}
return nil
}
// Compares two field mappings against each other, checking for changes in index, store, doc values
// and complete deletiion of the mapping while noting that the changes made are doable based on
// other values like includeInAll and dynamic
// first return argument gives an empty fieldInfo if no changes detected
// second return argument gives a flag indicating whether any changes, if detected, are doable or if
// update is impossible
// third argument is an error explaining exactly why the change is not possible
func compareFieldMapping(original, updated *mapping.FieldMapping) (*index.UpdateFieldInfo, error) {
rv := &index.UpdateFieldInfo{}
if updated == nil {
if original != nil && !original.IncludeInAll {
rv.Deleted = true
return rv, nil
} else if original == nil {
return nil, fmt.Errorf("both field mappings cannot be nil")
}
return nil, fmt.Errorf("deleted field present in '_all' field")
} else if original == nil {
return nil, fmt.Errorf("matching field not found in original index mapping")
}
if original.Type != updated.Type {
return nil, fmt.Errorf("field type cannot be updated")
}
if original.Type == "text" {
if original.Analyzer != updated.Analyzer {
return nil, fmt.Errorf("analyzer cannot be updated for text fields")
}
}
if original.Type == "datetime" {
if original.DateFormat != updated.DateFormat {
return nil, fmt.Errorf("dateFormat cannot be updated for datetime fields")
}
}
if original.Type == "vector" || original.Type == "vector_base64" {
if original.Dims != updated.Dims {
return nil, fmt.Errorf("dimensions cannot be updated for vector and vector_base64 fields")
}
if original.Similarity != updated.Similarity {
return nil, fmt.Errorf("similarity cannot be updated for vector and vector_base64 fields")
}
if original.VectorIndexOptimizedFor != updated.VectorIndexOptimizedFor {
return nil, fmt.Errorf("vectorIndexOptimizedFor cannot be updated for vector and vector_base64 fields")
}
}
if original.IncludeInAll != updated.IncludeInAll {
return nil, fmt.Errorf("includeInAll cannot be changed")
}
if original.IncludeTermVectors != updated.IncludeTermVectors {
return nil, fmt.Errorf("includeTermVectors cannot be changed")
}
if original.SkipFreqNorm != updated.SkipFreqNorm {
return nil, fmt.Errorf("skipFreqNorm cannot be changed")
}
// Updating is not possible if store changes from true
// to false when the field is included in _all
if original.Store != updated.Store {
if updated.Store {
return nil, fmt.Errorf("store cannot be changed from false to true")
} else if updated.IncludeInAll {
return nil, fmt.Errorf("store cannot be changed if field present in `_all' field")
} else {
rv.Store = true
}
}
// Updating is not possible if index changes from true
// to false when the field is included in _all
if original.Index != updated.Index {
if updated.Index {
return nil, fmt.Errorf("index cannot be changed from false to true")
} else if updated.IncludeInAll {
return nil, fmt.Errorf("index cannot be changed if field present in `_all' field")
} else {
rv.Index = true
rv.DocValues = true
}
}
// Updating is not possible if docvalues changes from true
// to false when the field is included in _all
if original.DocValues != updated.DocValues {
if updated.DocValues {
return nil, fmt.Errorf("docvalues cannot be changed from false to true")
} else if updated.IncludeInAll {
return nil, fmt.Errorf("docvalues cannot be changed if field present in `_all' field")
} else {
rv.DocValues = true
}
}
return rv, nil
}
// After identifying changes, validate against the existing changes incase of duplicate fields.
// In such a situation, any conflicting changes found will abort the update process
func validateFieldInfo(newInfo *index.UpdateFieldInfo, fInfo map[string]*index.UpdateFieldInfo,
ori *pathInfo, oriFMapInfo *fieldMapInfo) error {
var name string
if oriFMapInfo.parent.parentPath == "" {
if oriFMapInfo.fieldMapping.Name == "" {
name = oriFMapInfo.parent.path
} else {
name = oriFMapInfo.fieldMapping.Name
}
} else {
if oriFMapInfo.fieldMapping.Name == "" {
name = oriFMapInfo.parent.parentPath + "." + oriFMapInfo.parent.path
} else {
name = oriFMapInfo.parent.parentPath + "." + oriFMapInfo.fieldMapping.Name
}
}
if (newInfo.Deleted || newInfo.Index || newInfo.DocValues || newInfo.Store) && ori.dynamic {
return fmt.Errorf("updated field is under a dynamic property")
}
if oldInfo, ok := fInfo[name]; ok {
if !reflect.DeepEqual(oldInfo, newInfo) {
return fmt.Errorf("updated field impossible to verify because multiple mappings point to the same field name")
}
} else {
fInfo[name] = newInfo
}
return nil
}

View File

@@ -13,7 +13,7 @@ var interleaveMagic = []uint64{
var interleaveShift = []uint{1, 2, 4, 8, 16}
// Interleave the first 32 bits of each uint64
// apdated from org.apache.lucene.util.BitUtil
// adapted from org.apache.lucene.util.BitUtil
// which was adapted from:
// http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN
func Interleave(v1, v2 uint64) uint64 {

View File

@@ -30,7 +30,7 @@ func RegisterKVStore(name string, constructor KVStoreConstructor) error {
}
// KVStoreConstructor is used to build a KVStore of a specific type when
// specificied by the index configuration. In addition to meeting the
// specified by the index configuration. In addition to meeting the
// store.KVStore interface, KVStores must also support this constructor.
// Note that currently the values of config must
// be able to be marshaled and unmarshaled using the encoding/json library (used

162
vendor/github.com/blevesearch/bleve/v2/rescorer.go generated vendored Normal file
View File

@@ -0,0 +1,162 @@
// Copyright (c) 2025 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import (
"github.com/blevesearch/bleve/v2/fusion"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/query"
)
const (
DefaultScoreRankConstant = 60
)
// Rescorer is applied after all the query and knn results are obtained.
// The main use of Rescorer is in hybrid search; all the individual scores
// for query and knn are combined using Rescorer. Makes use of algorithms
// defined in `fusion`
type rescorer struct {
req *SearchRequest
// Stores the original From, Size and Boost parameters from the request
origFrom int
origSize int
origBoosts []float64
// Flag variable to make sure that restoreSearchRequest is only run once
// when it is deferred
restored bool
}
// Stores information about the hybrid search into FusionRescorer.
// Also mutates the SearchRequest by:
// - Setting boosts to 1: top level boosts only used for rescoring
// - Setting From and Size to 0 and ScoreWindowSize
func (r *rescorer) prepareSearchRequest() error {
if r.req.Params == nil {
r.req.Params = NewDefaultParams(r.req.From, r.req.Size)
}
r.origFrom = r.req.From
r.origSize = r.req.Size
r.req.From = 0
r.req.Size = r.req.Params.ScoreWindowSize
// req.Query's top level boost comes first, followed by the KNN queries
numQueries := numKNNQueries(r.req) + 1
r.origBoosts = make([]float64, numQueries)
// only modify queries if it is boostable. If not, ignore
if bQuery, ok := r.req.Query.(query.BoostableQuery); ok {
r.origBoosts[0] = bQuery.Boost()
bQuery.SetBoost(1.0)
} else {
r.origBoosts[0] = 1.0
}
// for all the knn queries, replace boost values
r.prepareKnnRequest()
return nil
}
func (r *rescorer) restoreSearchRequest() {
// Skip if already restored
if r.restored {
return
}
r.restored = true
r.req.From = r.origFrom
r.req.Size = r.origSize
if bQuery, ok := r.req.Query.(query.BoostableQuery); ok {
bQuery.SetBoost(r.origBoosts[0])
}
// for all the knn queries, restore boost values
r.restoreKnnRequest()
}
func (r *rescorer) rescore(ftsHits, knnHits search.DocumentMatchCollection) (search.DocumentMatchCollection, uint64, float64) {
mergedHits := r.mergeDocs(ftsHits, knnHits)
var fusionResult *fusion.FusionResult
switch r.req.Score {
case ScoreRRF:
res := fusion.ReciprocalRankFusion(
mergedHits,
r.origBoosts,
r.req.Params.ScoreRankConstant,
r.req.Params.ScoreWindowSize,
numKNNQueries(r.req),
r.req.Explain,
)
fusionResult = &res
case ScoreRSF:
res := fusion.RelativeScoreFusion(
mergedHits,
r.origBoosts,
r.req.Params.ScoreWindowSize,
numKNNQueries(r.req),
r.req.Explain,
)
fusionResult = &res
}
return fusionResult.Hits, fusionResult.Total, fusionResult.MaxScore
}
// Merge all the FTS and KNN docs along with explanations
func (r *rescorer) mergeDocs(ftsHits, knnHits search.DocumentMatchCollection) search.DocumentMatchCollection {
if len(knnHits) == 0 {
return ftsHits
}
knnHitMap := make(map[string]*search.DocumentMatch, len(knnHits))
for _, hit := range knnHits {
knnHitMap[hit.ID] = hit
}
for _, hit := range ftsHits {
if knnHit, ok := knnHitMap[hit.ID]; ok {
hit.ScoreBreakdown = knnHit.ScoreBreakdown
if r.req.Explain {
hit.Expl = &search.Explanation{Value: 0.0, Message: "", Children: append([]*search.Explanation{hit.Expl}, knnHit.Expl.Children...)}
}
delete(knnHitMap, hit.ID)
}
}
for _, hit := range knnHitMap {
hit.Score = 0
ftsHits = append(ftsHits, hit)
if r.req.Explain {
hit.Expl = &search.Explanation{Value: 0.0, Message: "", Children: append([]*search.Explanation{nil}, hit.Expl.Children...)}
}
}
return ftsHits
}
func newRescorer(req *SearchRequest) *rescorer {
return &rescorer{
req: req,
}
}

View File

@@ -18,6 +18,7 @@ import (
"fmt"
"reflect"
"sort"
"strconv"
"time"
"github.com/blevesearch/bleve/v2/analysis"
@@ -47,6 +48,15 @@ var cache = registry.NewCache()
const defaultDateTimeParser = optional.Name
const (
ScoreDefault = ""
ScoreNone = "none"
ScoreRRF = "rrf"
ScoreRSF = "rsf"
)
var AllowedFusionSort = search.SortOrder{&search.SortScore{Desc: true}}
type dateTimeRange struct {
Name string `json:"name,omitempty"`
Start time.Time `json:"start,omitempty"`
@@ -311,13 +321,71 @@ func (r *SearchRequest) Validate() error {
}
}
err := validateKNN(r)
err := r.validatePagination()
if err != nil {
return err
}
if IsScoreFusionRequested(r) {
if r.SearchAfter != nil || r.SearchBefore != nil {
return fmt.Errorf("cannot use search after or search before with score fusion")
}
if r.Sort != nil {
if !reflect.DeepEqual(r.Sort, AllowedFusionSort) {
return fmt.Errorf("sort must be empty or descending order of score for score fusion")
}
}
}
err = validateKNN(r)
if err != nil {
return err
}
return r.Facets.Validate()
}
// Validates SearchAfter/SearchBefore
func (r *SearchRequest) validatePagination() error {
var pagination []string
var afterOrBefore string
if r.SearchAfter != nil {
pagination = r.SearchAfter
afterOrBefore = "search after"
} else if r.SearchBefore != nil {
pagination = r.SearchBefore
afterOrBefore = "search before"
} else {
return nil
}
for i := range pagination {
switch ss := r.Sort[i].(type) {
case *search.SortGeoDistance:
_, err := strconv.ParseFloat(pagination[i], 64)
if err != nil {
return fmt.Errorf("invalid %s value for sort field '%s': '%s'. %s", afterOrBefore, ss.Field, pagination[i], err)
}
case *search.SortField:
switch ss.Type {
case search.SortFieldAsNumber:
_, err := strconv.ParseFloat(pagination[i], 64)
if err != nil {
return fmt.Errorf("invalid %s value for sort field '%s': '%s'. %s", afterOrBefore, ss.Field, pagination[i], err)
}
case search.SortFieldAsDate:
_, err := time.Parse(time.RFC3339Nano, pagination[i])
if err != nil {
return fmt.Errorf("invalid %s value for sort field '%s': '%s'. %s", afterOrBefore, ss.Field, pagination[i], err)
}
}
}
}
return nil
}
// AddFacet adds a FacetRequest to this SearchRequest
func (r *SearchRequest) AddFacet(facetName string, f *FacetRequest) {
if r.Facets == nil {
@@ -353,6 +421,11 @@ func (r *SearchRequest) SetSearchBefore(before []string) {
r.SearchBefore = before
}
// AddParams adds a RequestParams field to the search request
func (r *SearchRequest) AddParams(params RequestParams) {
r.Params = &params
}
// NewSearchRequest creates a new SearchRequest
// for the Query, using default values for all
// other search parameters.
@@ -377,7 +450,7 @@ func NewSearchRequestOptions(q query.Query, size, from int, explain bool) *Searc
// IndexErrMap tracks errors with the name of the index where it occurred
type IndexErrMap map[string]error
// MarshalJSON seralizes the error into a string for JSON consumption
// MarshalJSON serializes the error into a string for JSON consumption
func (iem IndexErrMap) MarshalJSON() ([]byte, error) {
tmp := make(map[string]string, len(iem))
for k, v := range iem {
@@ -398,7 +471,7 @@ func (iem IndexErrMap) UnmarshalJSON(data []byte) error {
return nil
}
// SearchStatus is a secion in the SearchResult reporting how many
// SearchStatus is a section in the SearchResult reporting how many
// underlying indexes were queried, how many were successful/failed
// and a map of any errors that were encountered
type SearchStatus struct {
@@ -433,7 +506,7 @@ func (ss *SearchStatus) Merge(other *SearchStatus) {
// scores, score explanation, location info and so on.
// Total - The total number of documents that matched the query.
// Cost - indicates how expensive was the query with respect to bytes read
// from the mmaped index files.
// from the mapped index files.
// MaxScore - The maximum score seen across all document hits seen for this query.
// Took - The time taken to execute the search.
// Facets - The facet results for the search.
@@ -607,3 +680,79 @@ func isMatchAllQuery(q query.Query) bool {
_, ok := q.(*query.MatchAllQuery)
return ok
}
// Checks if the request is hybrid search. Currently supports: RRF, RSF.
func IsScoreFusionRequested(req *SearchRequest) bool {
switch req.Score {
case ScoreRRF, ScoreRSF:
return true
default:
return false
}
}
// Additional parameters in the search request. Currently only being
// used for score fusion parameters.
type RequestParams struct {
ScoreRankConstant int `json:"score_rank_constant,omitempty"`
ScoreWindowSize int `json:"score_window_size,omitempty"`
}
func NewDefaultParams(from, size int) *RequestParams {
return &RequestParams{
ScoreRankConstant: DefaultScoreRankConstant,
ScoreWindowSize: from + size,
}
}
func (p *RequestParams) UnmarshalJSON(input []byte) error {
var temp struct {
ScoreRankConstant *int `json:"score_rank_constant,omitempty"`
ScoreWindowSize *int `json:"score_window_size,omitempty"`
}
if err := util.UnmarshalJSON(input, &temp); err != nil {
return err
}
if temp.ScoreRankConstant != nil {
p.ScoreRankConstant = *temp.ScoreRankConstant
}
if temp.ScoreWindowSize != nil {
p.ScoreWindowSize = *temp.ScoreWindowSize
}
return nil
}
func (p *RequestParams) Validate(size int) error {
if p.ScoreWindowSize < 1 {
return fmt.Errorf("score window size must be greater than 0")
} else if p.ScoreWindowSize < size {
return fmt.Errorf("score window size must be greater than or equal to Size (%d)", size)
}
return nil
}
func ParseParams(r *SearchRequest, input []byte) (*RequestParams, error) {
params := NewDefaultParams(r.From, r.Size)
if len(input) == 0 {
return params, nil
}
err := util.UnmarshalJSON(input, params)
if err != nil {
return nil, err
}
// validate params
err = params.Validate(r.Size)
if err != nil {
return nil, err
}
return params, nil
}

View File

@@ -20,6 +20,7 @@ import (
"strconv"
"time"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
@@ -117,9 +118,15 @@ func newTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector
return hc
}
// Creates a dummy document to compare with for pagination.
func createSearchAfterDocument(sort search.SortOrder, after []string) *search.DocumentMatch {
encodedAfter := make([]string, len(after))
for i, ss := range sort {
encodedAfter[i] = encodeSearchAfter(ss, after[i])
}
rv := &search.DocumentMatch{
Sort: after,
Sort: encodedAfter,
}
for pos, ss := range sort {
if ss.RequiresDocID() {
@@ -134,6 +141,46 @@ func createSearchAfterDocument(sort search.SortOrder, after []string) *search.Do
return rv
}
// encodeSearchAfter applies prefix-coding to SearchAfter
// if required to enable pagination on numeric, datetime,
// and geo fields
func encodeSearchAfter(ss search.SearchSort, after string) string {
encodeFloat := func() string {
f64, _ := strconv.ParseFloat(after, 64) // error checking in SearchRequest.Validate
i64 := numeric.Float64ToInt64(f64)
return string(numeric.MustNewPrefixCodedInt64(i64, 0))
}
encodeDate := func() string {
t, _ := time.Parse(time.RFC3339Nano, after) // error checking in SearchRequest.Validate
i64 := t.UnixNano()
return string(numeric.MustNewPrefixCodedInt64(i64, 0))
}
switch ss := ss.(type) {
case *search.SortGeoDistance:
return encodeFloat()
case *search.SortField:
switch ss.Type {
case search.SortFieldAsNumber:
return encodeFloat()
case search.SortFieldAsDate:
return encodeDate()
default:
// For SortFieldAsString and SortFieldAuto
// NOTE: SortFieldAuto is used if you set Sort with a string
// or if the type of the field is not set in the object
// in the Sort slice. We cannot perform type inference in
// this case, so we return the original string, even if
// its actually numeric or date.
return after
}
default:
// For SortDocID and SortScore
return after
}
}
// Filter document matches based on the SearchAfter field in the SearchRequest.
func FilterHitsBySearchAfter(hits []*search.DocumentMatch, sort search.SortOrder, after []string) []*search.DocumentMatch {
if len(hits) == 0 {

View File

@@ -31,7 +31,7 @@ func init() {
// a message, or panic, etc.
type DocumentMatchPoolTooSmall func(p *DocumentMatchPool) *DocumentMatch
// DocumentMatchPool manages use/re-use of DocumentMatch instances
// DocumentMatchPool manages use/reuse of DocumentMatch instances
// it pre-allocates space from a single large block with the expected
// number of instances. It is not thread-safe as currently all
// aspects of search take place in a single goroutine.

View File

@@ -15,6 +15,7 @@
package query
import (
"bytes"
"context"
"encoding/json"
"fmt"
@@ -30,6 +31,7 @@ type BooleanQuery struct {
Must Query `json:"must,omitempty"`
Should Query `json:"should,omitempty"`
MustNot Query `json:"must_not,omitempty"`
Filter Query `json:"filter,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
queryStringMode bool
}
@@ -115,6 +117,13 @@ func (q *BooleanQuery) AddMustNot(m ...Query) {
}
}
func (q *BooleanQuery) AddFilter(m Query) {
if m == nil {
return
}
q.Filter = m
}
func (q *BooleanQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
@@ -162,11 +171,61 @@ func (q *BooleanQuery) Searcher(ctx context.Context, i index.IndexReader, m mapp
}
}
// if all 3 are nil, return MatchNone
if mustSearcher == nil && shouldSearcher == nil && mustNotSearcher == nil {
var filterFunc searcher.FilterFunc
if q.Filter != nil {
// create a new searcher options with disabled scoring, since filter should not affect scoring
// and we don't want to pay the cost of scoring if we don't need it, also disable term vectors
// and explain, since we don't need them for filters
filterOptions := search.SearcherOptions{
Explain: false,
IncludeTermVectors: false,
Score: "none",
}
filterSearcher, err := q.Filter.Searcher(ctx, i, m, filterOptions)
if err != nil {
return nil, err
}
filterFunc = func(sctx *search.SearchContext, d *search.DocumentMatch) bool {
// Attempt to advance the filter searcher to the document identified by
// the base searcher's (unfiltered boolean) current result (d.IndexInternalID).
//
// If the filter searcher successfully finds a document with the same
// internal ID, it means the document satisfies the filter and should be kept.
//
// If the filter searcher returns an error, does not find a matching document,
// or finds a document with a different internal ID, the document should be discarded.
dm, err := filterSearcher.Advance(sctx, d.IndexInternalID)
return err == nil && dm != nil && bytes.Equal(dm.IndexInternalID, d.IndexInternalID)
}
}
// if all 4 are nil, return MatchNone
if mustSearcher == nil && shouldSearcher == nil && mustNotSearcher == nil && filterFunc == nil {
return searcher.NewMatchNoneSearcher(i)
}
// optimization, if only must searcher, just return it instead
if mustSearcher != nil && shouldSearcher == nil && mustNotSearcher == nil && filterFunc == nil {
return mustSearcher, nil
}
// optimization, if only should searcher, just return it instead
if mustSearcher == nil && shouldSearcher != nil && mustNotSearcher == nil && filterFunc == nil {
return shouldSearcher, nil
}
// optimization, if only filter searcher, wrap around a MatchAllSearcher
if mustSearcher == nil && shouldSearcher == nil && mustNotSearcher == nil && filterFunc != nil {
mustSearcher, err = searcher.NewMatchAllSearcher(ctx, i, 1.0, options)
if err != nil {
return nil, err
}
return searcher.NewFilteringSearcher(ctx,
mustSearcher,
filterFunc,
), nil
}
// if only mustNotSearcher, start with MatchAll
if mustSearcher == nil && shouldSearcher == nil && mustNotSearcher != nil {
mustSearcher, err = searcher.NewMatchAllSearcher(ctx, i, 1.0, options)
@@ -175,12 +234,15 @@ func (q *BooleanQuery) Searcher(ctx context.Context, i index.IndexReader, m mapp
}
}
// optimization, if only should searcher, just return it instead
if mustSearcher == nil && shouldSearcher != nil && mustNotSearcher == nil {
return shouldSearcher, nil
bs, err := searcher.NewBooleanSearcher(ctx, i, mustSearcher, shouldSearcher, mustNotSearcher, options)
if err != nil {
return nil, err
}
return searcher.NewBooleanSearcher(ctx, i, mustSearcher, shouldSearcher, mustNotSearcher, options)
if filterFunc != nil {
return searcher.NewFilteringSearcher(ctx, bs, filterFunc), nil
}
return bs, nil
}
func (q *BooleanQuery) Validate() error {
@@ -202,8 +264,14 @@ func (q *BooleanQuery) Validate() error {
return err
}
}
if q.Must == nil && q.Should == nil && q.MustNot == nil {
return fmt.Errorf("boolean query must contain at least one must or should or not must clause")
if qf, ok := q.Filter.(ValidatableQuery); ok {
err := qf.Validate()
if err != nil {
return err
}
}
if q.Must == nil && q.Should == nil && q.MustNot == nil && q.Filter == nil {
return fmt.Errorf("boolean query must contain at least one must or should or not must or filter clause")
}
return nil
}
@@ -213,6 +281,7 @@ func (q *BooleanQuery) UnmarshalJSON(data []byte) error {
Must json.RawMessage `json:"must,omitempty"`
Should json.RawMessage `json:"should,omitempty"`
MustNot json.RawMessage `json:"must_not,omitempty"`
Filter json.RawMessage `json:"filter,omitempty"`
Boost *Boost `json:"boost,omitempty"`
}{}
err := util.UnmarshalJSON(data, &tmp)
@@ -253,6 +322,13 @@ func (q *BooleanQuery) UnmarshalJSON(data []byte) error {
}
}
if tmp.Filter != nil {
q.Filter, err = ParseQuery(tmp.Filter)
if err != nil {
return err
}
}
q.BoostVal = tmp.Boost
return nil

View File

@@ -196,7 +196,8 @@ func ParseQuery(input []byte) (Query, error) {
_, hasMust := tmp["must"]
_, hasShould := tmp["should"]
_, hasMustNot := tmp["must_not"]
if hasMust || hasShould || hasMustNot {
_, hasFilter := tmp["filter"]
if hasMust || hasShould || hasMustNot || hasFilter {
var rv BooleanQuery
err := util.UnmarshalJSON(input, &rv)
if err != nil {

View File

@@ -17,6 +17,7 @@ package search
import (
"fmt"
"reflect"
"slices"
"sort"
"github.com/blevesearch/bleve/v2/size"
@@ -41,15 +42,7 @@ func init() {
type ArrayPositions []uint64
func (ap ArrayPositions) Equals(other ArrayPositions) bool {
if len(ap) != len(other) {
return false
}
for i := range ap {
if ap[i] != other[i] {
return false
}
}
return true
return slices.Equal(ap, other)
}
func (ap ArrayPositions) Compare(other ArrayPositions) int {

View File

@@ -24,7 +24,7 @@ import (
// DisjunctionMaxClauseCount is a compile time setting that applications can
// adjust to non-zero value to cause the DisjunctionSearcher to return an
// error instead of exeucting searches when the size exceeds this value.
// error instead of executing searches when the size exceeds this value.
var DisjunctionMaxClauseCount = 0
// DisjunctionHeapTakeover is a compile time setting that applications can

View File

@@ -33,7 +33,7 @@ func init() {
// FilterFunc defines a function which can filter documents
// returning true means keep the document
// returning false means do not keep the document
type FilterFunc func(d *search.DocumentMatch) bool
type FilterFunc func(sctx *search.SearchContext, d *search.DocumentMatch) bool
// FilteringSearcher wraps any other searcher, but checks any Next/Advance
// call against the supplied FilterFunc
@@ -57,7 +57,7 @@ func (f *FilteringSearcher) Size() int {
func (f *FilteringSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
next, err := f.child.Next(ctx)
for next != nil && err == nil {
if f.accept(next) {
if f.accept(ctx, next) {
return next, nil
}
next, err = f.child.Next(ctx)
@@ -73,7 +73,7 @@ func (f *FilteringSearcher) Advance(ctx *search.SearchContext, ID index.IndexInt
if adv == nil {
return nil, nil
}
if f.accept(adv) {
if f.accept(ctx, adv) {
return adv, nil
}
return f.Next(ctx)

View File

@@ -208,7 +208,7 @@ func buildIsIndexedFunc(ctx context.Context, indexReader index.IndexReader, fiel
func buildRectFilter(ctx context.Context, dvReader index.DocValueReader, field string,
minLon, minLat, maxLon, maxLat float64,
) FilterFunc {
return func(d *search.DocumentMatch) bool {
return func(sctx *search.SearchContext, d *search.DocumentMatch) bool {
// check geo matches against all numeric type terms indexed
var lons, lats []float64
var found bool

View File

@@ -115,7 +115,7 @@ func boxSearcher(ctx context.Context, indexReader index.IndexReader,
func buildDistFilter(ctx context.Context, dvReader index.DocValueReader, field string,
centerLon, centerLat, maxDist float64) FilterFunc {
return func(d *search.DocumentMatch) bool {
return func(sctx *search.SearchContext, d *search.DocumentMatch) bool {
// check geo matches against all numeric type terms indexed
var lons, lats []float64
var found bool

View File

@@ -85,7 +85,7 @@ func almostEqual(a, b float64) bool {
// here: https://wrf.ecse.rpi.edu/nikola/pubdetails/pnpoly.html
func buildPolygonFilter(ctx context.Context, dvReader index.DocValueReader, field string,
coordinates []geo.Point) FilterFunc {
return func(d *search.DocumentMatch) bool {
return func(sctx *search.SearchContext, d *search.DocumentMatch) bool {
// check geo matches against all numeric type terms indexed
var lons, lats []float64
var found bool

View File

@@ -77,7 +77,7 @@ func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueRea
bufPool = bufPoolCallback()
}
return func(d *search.DocumentMatch) bool {
return func(sctx *search.SearchContext, d *search.DocumentMatch) bool {
var found bool
err := dvReader.VisitDocValues(d.IndexInternalID,

View File

@@ -296,7 +296,7 @@ func (s *PhraseSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch,
}
// checkCurrMustMatch is solely concerned with determining if the DocumentMatch
// pointed to by s.currMust (which satisifies the pre-condition searcher)
// pointed to by s.currMust (which satisfies the pre-condition searcher)
// also satisfies the phrase constraints. if so, it returns a DocumentMatch
// for this document, otherwise nil
func (s *PhraseSearcher) checkCurrMustMatch(ctx *search.SearchContext) *search.DocumentMatch {
@@ -458,7 +458,7 @@ func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]s
if len(car) == 0 || (len(car) == 1 && car[0] == "") {
nextPos := prevPos + 1
if prevPos == 0 {
// if prevPos was 0, don't set it to 1 (as thats not a real abs pos)
// if prevPos was 0, don't set it to 1 (as that's not a real abs pos)
nextPos = 0 // don't advance nextPos if prevPos was 0
}
return findPhrasePaths(nextPos, ap, cdr, tlm, p, remainingSlop, rv)

View File

@@ -96,7 +96,7 @@ func NewRegexpStringSearcher(ctx context.Context, indexReader index.IndexReader,
// NewRegexpSearcher creates a searcher which will match documents that
// contain terms which match the pattern regexp. The match must be EXACT
// matching the entire term. The provided regexp SHOULD NOT start with ^
// or end with $ as this can intefere with the implementation. Separately,
// or end with $ as this can interfere with the implementation. Separately,
// matches will be checked to ensure they match the entire term.
func NewRegexpSearcher(ctx context.Context, indexReader index.IndexReader, pattern Regexp,
field string, boost float64, options search.SearcherOptions) (

View File

@@ -408,7 +408,7 @@ func (s *SortField) DecodeValue(value string) string {
if err != nil {
return value
}
return time.Unix(0, i64).UTC().String()
return time.Unix(0, i64).UTC().Format(time.RFC3339Nano)
default:
return value
}

View File

@@ -152,6 +152,10 @@ const (
// BM25StatsKey is used to store and transport the BM25 Data
// to the actual search phase which would use it to perform the search.
BM25StatsKey ContextKey = "_bm25_stats_key"
// ScoreFusionKey is used to communicate whether KNN hits need to be preserved for
// hybrid search algorithms (like RRF)
ScoreFusionKey ContextKey = "_fusion_rescoring_key"
)
func RecordSearchCost(ctx context.Context,

View File

@@ -67,6 +67,8 @@ type SearchRequest struct {
PreSearchData map[string]interface{} `json:"pre_search_data,omitempty"`
Params *RequestParams `json:"params,omitempty"`
sortFunc func(sort.Interface)
}
@@ -148,6 +150,7 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
KNN []*tempKNNReq `json:"knn"`
KNNOperator knnOperator `json:"knn_operator"`
PreSearchData json.RawMessage `json:"pre_search_data"`
Params json.RawMessage `json:"params"`
}
err := json.Unmarshal(input, &temp)
@@ -189,6 +192,22 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
r.From = 0
}
if IsScoreFusionRequested(r) {
if temp.Params == nil {
// If params is not present and it is requires rescoring, assign
// default values
r.Params = NewDefaultParams(r.From, r.Size)
} else {
// if it is a request that requires rescoring, parse the rescoring
// parameters.
params, err := ParseParams(r, temp.Params)
if err != nil {
return err
}
r.Params = params
}
}
r.KNN = make([]*KNNRequest, len(temp.KNN))
for i, knnReq := range temp.KNN {
r.KNN[i] = &KNNRequest{}
@@ -243,6 +262,7 @@ func copySearchRequest(req *SearchRequest, preSearchData map[string]interface{})
KNN: req.KNN,
KNNOperator: req.KNNOperator,
PreSearchData: preSearchData,
Params: req.Params,
}
return &rv
@@ -327,6 +347,7 @@ func validateKNN(req *SearchRequest) error {
default:
return fmt.Errorf("knn_operator must be either 'and' / 'or'")
}
return nil
}
@@ -458,6 +479,12 @@ func finalizeKNNResults(req *SearchRequest, knnHits []*search.DocumentMatch) []*
}
knnHits = knnHits[:idx]
}
// if score fusion required, return early because
// score breakdown is retained
if IsScoreFusionRequested(req) {
return knnHits
}
// fix the score using score breakdown now
// if the score is none, then we need to set the score to 0.0
// if req.Explain is true, then we need to use the expl breakdown to
@@ -537,6 +564,10 @@ func requestHasKNN(req *SearchRequest) bool {
return len(req.KNN) > 0
}
func numKNNQueries(req *SearchRequest) int {
return len(req.KNN)
}
// returns true if the search request contains a KNN request that can be
// satisfied by just performing a preSearch, completely bypassing the
// actual search.
@@ -608,3 +639,25 @@ func newKnnPreSearchResultProcessor(req *SearchRequest) *knnPreSearchResultProce
},
}
}
// Replace knn boost values for fusion rescoring queries
func (r *rescorer) prepareKnnRequest() {
for i := range r.req.KNN {
b := r.req.KNN[i].Boost
if b != nil {
r.origBoosts[i+1] = b.Value()
newB := query.Boost(1.0)
r.req.KNN[i].Boost = &newB
} else {
r.origBoosts[i+1] = 1.0
}
}
}
// Restore knn boost values for fusion rescoring queries
func (r *rescorer) restoreKnnRequest() {
for i := range r.req.KNN {
b := query.Boost(r.origBoosts[i+1])
r.req.KNN[i].Boost = &b
}
}

View File

@@ -77,6 +77,8 @@ type SearchRequest struct {
PreSearchData map[string]interface{} `json:"pre_search_data,omitempty"`
Params *RequestParams `json:"params,omitempty"`
sortFunc func(sort.Interface)
}
@@ -97,6 +99,7 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
SearchAfter []string `json:"search_after"`
SearchBefore []string `json:"search_before"`
PreSearchData json.RawMessage `json:"pre_search_data"`
Params json.RawMessage `json:"params"`
}
err := json.Unmarshal(input, &temp)
@@ -137,6 +140,23 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
if r.From < 0 {
r.From = 0
}
if IsScoreFusionRequested(r) {
if temp.Params == nil {
// If params is not present and it is requires rescoring, assign
// default values
r.Params = NewDefaultParams(r.From, r.Size)
} else {
// if it is a request that requires rescoring, parse the rescoring
// parameters.
params, err := ParseParams(r, temp.Params)
if err != nil {
return err
}
r.Params = params
}
}
if temp.PreSearchData != nil {
r.PreSearchData, err = query.ParsePreSearchData(temp.PreSearchData)
if err != nil {
@@ -184,6 +204,10 @@ func requestHasKNN(req *SearchRequest) bool {
return false
}
func numKNNQueries(req *SearchRequest) int {
return 0
}
func addKnnToDummyRequest(dummyReq *SearchRequest, realReq *SearchRequest) {
}
@@ -207,3 +231,9 @@ func finalizeKNNResults(req *SearchRequest, knnHits []*search.DocumentMatch) []*
func newKnnPreSearchResultProcessor(req *SearchRequest) *knnPreSearchResultProcessor {
return &knnPreSearchResultProcessor{} // equivalent to nil
}
func (r *rescorer) prepareKnnRequest() {
}
func (r *rescorer) restoreKnnRequest() {
}

32
vendor/github.com/blevesearch/bleve/v2/util/keys.go generated vendored Normal file
View File

@@ -0,0 +1,32 @@
// Copyright (c) 2025 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package util
var (
// Bolt keys
BoltSnapshotsBucket = []byte{'s'}
BoltPathKey = []byte{'p'}
BoltDeletedKey = []byte{'d'}
BoltInternalKey = []byte{'i'}
BoltMetaDataKey = []byte{'m'}
BoltMetaDataSegmentTypeKey = []byte("type")
BoltMetaDataSegmentVersionKey = []byte("version")
BoltMetaDataTimeStamp = []byte("timeStamp")
BoltStatsKey = []byte("stats")
BoltUpdatedFieldsKey = []byte("fields")
TotBytesWrittenKey = []byte("TotBytesWritten")
MappingInternalKey = []byte("_mapping")
)

View File

@@ -65,6 +65,19 @@ type EventIndex interface {
FireIndexEvent()
}
type UpdateFieldInfo struct {
Deleted bool
Store bool
Index bool
DocValues bool
}
type UpdateIndex interface {
Index
UpdateFields(fieldInfo map[string]*UpdateFieldInfo, updatedMapping []byte) error
OpenMeta() error
}
type IndexReader interface {
TermFieldReader(ctx context.Context, term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (TermFieldReader, error)

View File

@@ -26,7 +26,7 @@ const (
const (
BM25Scoring = "bm25"
TFIDFScoring = "tfidf"
TFIDFScoring = "tf-idf"
)
// Scoring model indicates the algorithm used to rank documents fetched

View File

@@ -61,6 +61,12 @@ type PersistedSegment interface {
Path() string
}
type UpdatableSegment interface {
Segment
GetUpdatedFields() map[string]*index.UpdateFieldInfo
SetUpdatedFields(fieldInfo map[string]*index.UpdateFieldInfo)
}
type TermDictionary interface {
PostingsList(term []byte, except *roaring.Bitmap, prealloc PostingsList) (PostingsList, error)

View File

@@ -21,6 +21,7 @@ import (
"math"
"os"
index "github.com/blevesearch/bleve_index_api"
"github.com/blevesearch/vellum"
)
@@ -169,6 +170,7 @@ func InitSegmentBase(mem []byte, memCRC uint32, chunkMode uint32, numDocs uint64
sectionsIndexOffset: sectionsIndexOffset,
fieldDvReaders: make([]map[uint16]*docValueReader, len(segmentSections)),
docValueOffset: 0, // docValueOffsets identified automatically by the section
updatedFields: make(map[string]*index.UpdateFieldInfo),
fieldFSTs: make(map[uint16]*vellum.FST),
vecIndexCache: newVectorIndexCache(),
synIndexCache: newSynonymIndexCache(),

View File

@@ -24,6 +24,7 @@ import (
"sort"
"github.com/RoaringBitmap/roaring/v2"
index "github.com/blevesearch/bleve_index_api"
seg "github.com/blevesearch/scorch_segment_api/v2"
"github.com/golang/snappy"
)
@@ -109,6 +110,19 @@ func mergeSegmentBases(segmentBases []*SegmentBase, drops []*roaring.Bitmap, pat
return newDocNums, uint64(cr.Count()), nil
}
// Remove fields that have been completely deleted from fieldsInv
func filterFields(fieldsInv []string, fieldInfo map[string]*index.UpdateFieldInfo) []string {
idx := 0
for _, field := range fieldsInv {
if val, ok := fieldInfo[field]; ok && val.Deleted {
continue
}
fieldsInv[idx] = field
idx++
}
return fieldsInv[:idx]
}
func mergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap,
chunkMode uint32, cr *CountHashWriter, closeCh chan struct{}) (
newDocNums [][]uint64, numDocs, storedIndexOffset uint64,
@@ -117,6 +131,8 @@ func mergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap,
var fieldsSame bool
fieldsSame, fieldsInv = mergeFields(segments)
updatedFields := mergeUpdatedFields(segments)
fieldsInv = filterFields(fieldsInv, updatedFields)
fieldsMap = mapFields(fieldsInv)
numDocs = computeNewDocCount(segments, drops)
@@ -130,15 +146,16 @@ func mergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap,
// offsets in the fields section index of the file (the final merged file).
mergeOpaque := map[int]resetable{}
args := map[string]interface{}{
"chunkMode": chunkMode,
"fieldsSame": fieldsSame,
"fieldsMap": fieldsMap,
"numDocs": numDocs,
"chunkMode": chunkMode,
"fieldsSame": fieldsSame,
"fieldsMap": fieldsMap,
"numDocs": numDocs,
"updatedFields": updatedFields,
}
if numDocs > 0 {
storedIndexOffset, newDocNums, err = mergeStoredAndRemap(segments, drops,
fieldsMap, fieldsInv, fieldsSame, numDocs, cr, closeCh)
fieldsMap, fieldsInv, fieldsSame, numDocs, cr, closeCh, updatedFields)
if err != nil {
return nil, 0, 0, nil, nil, 0, err
}
@@ -358,7 +375,7 @@ type varintEncoder func(uint64) (int, error)
func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
fieldsMap map[string]uint16, fieldsInv []string, fieldsSame bool, newSegDocCount uint64,
w *CountHashWriter, closeCh chan struct{}) (uint64, [][]uint64, error) {
w *CountHashWriter, closeCh chan struct{}, updatedFields map[string]*index.UpdateFieldInfo) (uint64, [][]uint64, error) {
var rv [][]uint64 // The remapped or newDocNums for each segment.
var newDocNum uint64
@@ -397,7 +414,8 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
// optimize when the field mapping is the same across all
// segments and there are no deletions, via byte-copying
// of stored docs bytes directly to the writer
if fieldsSame && (dropsI == nil || dropsI.GetCardinality() == 0) {
// cannot copy directly if fields might have been deleted
if fieldsSame && (dropsI == nil || dropsI.GetCardinality() == 0) && len(updatedFields) == 0 {
err := segment.copyStoredDocs(newDocNum, docNumOffsets, w)
if err != nil {
return 0, nil, err
@@ -440,6 +458,10 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
// no entry for field in fieldsMap
return false
}
// early exit if the stored portion of the field is deleted
if val, ok := updatedFields[fieldsInv[fieldID]]; ok && val.Store {
return true
}
vals[fieldID] = append(vals[fieldID], value)
typs[fieldID] = append(typs[fieldID], typ)
@@ -471,6 +493,10 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
// now walk the non-"_id" fields in order
for fieldID := 1; fieldID < len(fieldsInv); fieldID++ {
// early exit if the stored portion of the field is deleted
if val, ok := updatedFields[fieldsInv[fieldID]]; ok && val.Store {
continue
}
storedFieldValues := vals[fieldID]
stf := typs[fieldID]
@@ -606,6 +632,34 @@ func mergeFields(segments []*SegmentBase) (bool, []string) {
return fieldsSame, rv
}
// Combine updateFieldInfo from all segments
func mergeUpdatedFields(segments []*SegmentBase) map[string]*index.UpdateFieldInfo {
var fieldInfo map[string]*index.UpdateFieldInfo
for _, segment := range segments {
for field, info := range segment.updatedFields {
if fieldInfo == nil {
fieldInfo = make(map[string]*index.UpdateFieldInfo)
}
if _, ok := fieldInfo[field]; !ok {
fieldInfo[field] = &index.UpdateFieldInfo{
Deleted: info.Deleted,
Index: info.Index,
Store: info.Store,
DocValues: info.DocValues,
}
} else {
fieldInfo[field].Deleted = fieldInfo[field].Deleted || info.Deleted
fieldInfo[field].Index = fieldInfo[field].Index || info.Index
fieldInfo[field].Store = fieldInfo[field].Store || info.Store
fieldInfo[field].DocValues = fieldInfo[field].Store || info.DocValues
}
}
}
return fieldInfo
}
func isClosed(closeCh chan struct{}) bool {
select {
case <-closeCh:

View File

@@ -105,6 +105,10 @@ func (v *faissVectorIndexSection) Merge(opaque map[int]resetable, segments []*Se
if _, ok := sb.fieldsMap[fieldName]; !ok {
continue
}
// early exit if index data is supposed to be deleted
if info, ok := vo.updatedFields[fieldName]; ok && info.Index {
continue
}
// check if the section address is a valid one for "fieldName" in the
// segment sb. the local fieldID (fetched by the fieldsMap of the sb)
@@ -686,9 +690,10 @@ func (v *faissVectorIndexSection) getvectorIndexOpaque(opaque map[int]resetable)
func (v *faissVectorIndexSection) InitOpaque(args map[string]interface{}) resetable {
rv := &vectorIndexOpaque{
fieldAddrs: make(map[uint16]int),
vecIDMap: make(map[int64]*vecInfo),
vecFieldMap: make(map[uint16]*indexContent),
fieldAddrs: make(map[uint16]int),
vecIDMap: make(map[int64]*vecInfo),
vecFieldMap: make(map[uint16]*indexContent),
updatedFields: make(map[string]*index.UpdateFieldInfo),
}
for k, v := range args {
rv.Set(k, v)
@@ -727,6 +732,8 @@ type vectorIndexOpaque struct {
// index to be build.
vecFieldMap map[uint16]*indexContent
updatedFields map[string]*index.UpdateFieldInfo
tmp0 []byte
}
@@ -773,4 +780,8 @@ func (v *vectorIndexOpaque) Reset() (err error) {
}
func (v *vectorIndexOpaque) Set(key string, val interface{}) {
switch key {
case "updatedFields":
v.updatedFields = val.(map[string]*index.UpdateFieldInfo)
}
}

View File

@@ -82,7 +82,8 @@ func (i *invertedTextIndexSection) AddrForField(opaque map[int]resetable, fieldI
func mergeAndPersistInvertedSection(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
fieldsInv []string, fieldsMap map[string]uint16, fieldsSame bool,
newDocNumsIn [][]uint64, newSegDocCount uint64, chunkMode uint32,
w *CountHashWriter, closeCh chan struct{}) (map[int]int, uint64, error) {
updatedFields map[string]*index.UpdateFieldInfo, w *CountHashWriter,
closeCh chan struct{}) (map[int]int, uint64, error) {
var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
var bufLoc []uint64
@@ -125,6 +126,10 @@ func mergeAndPersistInvertedSection(segments []*SegmentBase, dropsIn []*roaring.
if isClosed(closeCh) {
return nil, 0, seg.ErrClosed
}
// early exit if index data is supposed to be deleted
if info, ok := updatedFields[fieldName]; ok && info.Index {
continue
}
dict, err2 := segment.dictionary(fieldName)
if err2 != nil {
@@ -244,7 +249,8 @@ func mergeAndPersistInvertedSection(segments []*SegmentBase, dropsIn []*roaring.
postItr = postings.iterator(true, true, true, postItr)
if fieldsSame {
// can only safely copy data if no field data has been deleted
if fieldsSame && len(updatedFields) == 0 {
// can optimize by copying freq/norm/loc bytes directly
lastDocNum, lastFreq, lastNorm, err = mergeTermFreqNormLocsByCopying(
term, postItr, newDocNums[itrI], newRoaring,
@@ -317,7 +323,10 @@ func mergeAndPersistInvertedSection(segments []*SegmentBase, dropsIn []*roaring.
if isClosed(closeCh) {
return nil, 0, seg.ErrClosed
}
// early exit if docvalues data is supposed to be deleted
if info, ok := updatedFields[fieldName]; ok && info.DocValues {
continue
}
fieldIDPlus1 := uint16(segment.fieldsMap[fieldName])
if dvIter, exists := segment.fieldDvReaders[SectionInvertedTextIndex][fieldIDPlus1-1]; exists &&
dvIter != nil {
@@ -398,7 +407,7 @@ func (i *invertedTextIndexSection) Merge(opaque map[int]resetable, segments []*S
w *CountHashWriter, closeCh chan struct{}) error {
io := i.getInvertedIndexOpaque(opaque)
fieldAddrs, _, err := mergeAndPersistInvertedSection(segments, drops, fieldsInv,
io.FieldsMap, io.fieldsSame, newDocNumsIn, io.numDocs, io.chunkMode, w, closeCh)
io.FieldsMap, io.fieldsSame, newDocNumsIn, io.numDocs, io.chunkMode, io.updatedFields, w, closeCh)
if err != nil {
return err
}
@@ -925,7 +934,8 @@ func (i *invertedIndexOpaque) getOrDefineField(fieldName string) int {
func (i *invertedTextIndexSection) InitOpaque(args map[string]interface{}) resetable {
rv := &invertedIndexOpaque{
fieldAddrs: map[int]int{},
fieldAddrs: map[int]int{},
updatedFields: make(map[string]*index.UpdateFieldInfo),
}
for k, v := range args {
rv.Set(k, v)
@@ -994,6 +1004,8 @@ type invertedIndexOpaque struct {
fieldAddrs map[int]int
updatedFields map[string]*index.UpdateFieldInfo
fieldsSame bool
numDocs uint64
}
@@ -1061,5 +1073,7 @@ func (i *invertedIndexOpaque) Set(key string, val interface{}) {
i.FieldsMap = val.(map[string]uint16)
case "numDocs":
i.numDocs = val.(uint64)
case "updatedFields":
i.updatedFields = val.(map[string]*index.UpdateFieldInfo)
}
}

View File

@@ -25,6 +25,7 @@ import (
"unsafe"
"github.com/RoaringBitmap/roaring/v2"
index "github.com/blevesearch/bleve_index_api"
mmap "github.com/blevesearch/mmap-go"
segment "github.com/blevesearch/scorch_segment_api/v2"
"github.com/blevesearch/vellum"
@@ -109,6 +110,8 @@ type SegmentBase struct {
fieldDvNames []string // field names cached in fieldDvReaders
size uint64
updatedFields map[string]*index.UpdateFieldInfo
m sync.Mutex
fieldFSTs map[uint16]*vellum.FST
@@ -952,3 +955,13 @@ func (sb *SegmentBase) loadDvReaders() error {
return nil
}
// Getter method to retrieve updateFieldInfo within segment base
func (s *SegmentBase) GetUpdatedFields() map[string]*index.UpdateFieldInfo {
return s.updatedFields
}
// Setter method to store updateFieldInfo within segment base
func (s *SegmentBase) SetUpdatedFields(updatedFields map[string]*index.UpdateFieldInfo) {
s.updatedFields = updatedFields
}

View File

@@ -162,6 +162,51 @@ In case of inverted text index, the dictionary is encoded in [Vellum](https://gi
ITI - Inverted Text Index
## Vector Index Section
In a vector index, each vector in a document is given a unique Id. This vector Id is to be used within the [Faiss](https://github.com/blevesearch/faiss) index. The mapping between the document Id and the vector Id is stored along with a serialized vector index. Doc Values are not applicable to this section.
|================================================================+- Inverted Text Index Section
| |
|================================================================+- Vector Index Section
| |
| +~~~~~~~~~~+~~~~~~~+~~~~~+~~~~~~+ |
+-------> DV Start | DVEnd | VIO | NVEC | |
| | +~~~~~~~~~~+~~~~~~~+~~~~~+~~~~~~+ |
| | |
| | +~~~~~~~~~~~~+~~~~~~~~~~~~+ |
| | | VectorID_0 | DocID_0 | |
| | +~~~~~~~~~~~~+~~~~~~~~~~~~+ |
| | | VectorID_1 | DocID_1 | |
| | +~~~~~~~~~~~~+~~~~~~~~~~~~+ |
| | | ... | ... | |
| | +~~~~~~~~~~~~+~~~~~~~~~~~~+ |
| | | VectorID_N | DocID_N | |
| | +~~~~~~~~~~~~+~~~~~~~~~~~~+ |
| | |
| | +~~~~~~~~~~~~~+ |
| | | FAISS LEN | |
| | +~~~~~~~~~~~~~+ |
| | |
| | +---------------------------+...+------------------------+ |
| | | SERIALIZED FAISS INDEX | |
| | +---------------------------+...+------------------------+ |
| | |
| |================================================================+- Synonym Index Section
| | |
| |================================================================+- Sections Info
+-----------------------------+ |
| | |
| +-------+-----+-----+------+~~~~~~~~+~~~~~~~~+--+...+--+ |
| | ... | VI | VI ADDR | NS | Length | Name | |
| +-------+-----+------------+~~~~~~~~+~~~~~~~~+--+...+--+ |
+================================================================+
VI - Vector Index
VIO - Vector Index Optimized for
NVEC - Number of vectors
FAISS LEN - Length of serialized FAISS index
## Synonym Index Section
In a synonyms index, the relationship between a term and its synonyms is represented using a Thesaurus. The Thesaurus is encoded in the [Vellum](https://github.com/couchbase/vellum) format and consists of pairs in the form `(term, offset)`. Here, the offset specifies the position of the postings list containing the synonyms for the given term. The postings list is stored as a Roaring64 bitmap, with each entry representing an encoded synonym for the term.

9
vendor/modules.txt vendored
View File

@@ -117,7 +117,7 @@ github.com/bitly/go-simplejson
# github.com/bits-and-blooms/bitset v1.22.0
## explicit; go 1.16
github.com/bits-and-blooms/bitset
# github.com/blevesearch/bleve/v2 v2.5.3
# github.com/blevesearch/bleve/v2 v2.5.4
## explicit; go 1.23
github.com/blevesearch/bleve/v2
github.com/blevesearch/bleve/v2/analysis
@@ -137,6 +137,7 @@ github.com/blevesearch/bleve/v2/analysis/token/stop
github.com/blevesearch/bleve/v2/analysis/tokenizer/single
github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode
github.com/blevesearch/bleve/v2/document
github.com/blevesearch/bleve/v2/fusion
github.com/blevesearch/bleve/v2/geo
github.com/blevesearch/bleve/v2/index/scorch
github.com/blevesearch/bleve/v2/index/scorch/mergeplan
@@ -159,7 +160,7 @@ github.com/blevesearch/bleve/v2/search/scorer
github.com/blevesearch/bleve/v2/search/searcher
github.com/blevesearch/bleve/v2/size
github.com/blevesearch/bleve/v2/util
# github.com/blevesearch/bleve_index_api v1.2.8
# github.com/blevesearch/bleve_index_api v1.2.10
## explicit; go 1.21
github.com/blevesearch/bleve_index_api
# github.com/blevesearch/geo v0.2.4
@@ -182,7 +183,7 @@ github.com/blevesearch/gtreap
# github.com/blevesearch/mmap-go v1.0.4
## explicit; go 1.13
github.com/blevesearch/mmap-go
# github.com/blevesearch/scorch_segment_api/v2 v2.3.10
# github.com/blevesearch/scorch_segment_api/v2 v2.3.12
## explicit; go 1.21
github.com/blevesearch/scorch_segment_api/v2
# github.com/blevesearch/segment v0.9.1
@@ -216,7 +217,7 @@ github.com/blevesearch/zapx/v14
# github.com/blevesearch/zapx/v15 v15.4.2
## explicit; go 1.21
github.com/blevesearch/zapx/v15
# github.com/blevesearch/zapx/v16 v16.2.4
# github.com/blevesearch/zapx/v16 v16.2.6
## explicit; go 1.23
github.com/blevesearch/zapx/v16
# github.com/bluele/gcache v0.0.2