mirror of
https://github.com/opencloud-eu/opencloud.git
synced 2026-06-17 20:38:49 -04:00
Merge branch 'origin/main' into 'next-release/main'
This commit is contained in:
65
go.mod
65
go.mod
@@ -11,7 +11,7 @@ require (
|
||||
github.com/Nerzal/gocloak/v13 v13.9.0
|
||||
github.com/bbalet/stopwords v1.0.0
|
||||
github.com/beevik/etree v1.6.0
|
||||
github.com/blevesearch/bleve/v2 v2.5.7
|
||||
github.com/blevesearch/bleve/v2 v2.6.0
|
||||
github.com/cenkalti/backoff v2.2.1+incompatible
|
||||
github.com/coreos/go-oidc/v3 v3.18.0
|
||||
github.com/cs3org/go-cs3apis v0.0.0-20260424072047-8d9ef7076ae9
|
||||
@@ -55,13 +55,13 @@ require (
|
||||
github.com/libregraph/lico v0.66.0
|
||||
github.com/mna/pigeon v1.3.0
|
||||
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826
|
||||
github.com/nats-io/nats-server/v2 v2.14.0
|
||||
github.com/nats-io/nats-server/v2 v2.14.2
|
||||
github.com/nats-io/nats.go v1.51.0
|
||||
github.com/olekukonko/tablewriter v1.1.4
|
||||
github.com/onsi/ginkgo v1.16.5
|
||||
github.com/onsi/ginkgo/v2 v2.28.3
|
||||
github.com/onsi/gomega v1.40.0
|
||||
github.com/open-policy-agent/opa v1.15.2
|
||||
github.com/open-policy-agent/opa v1.17.1
|
||||
github.com/opencloud-eu/icap-client v0.0.0-20250930132611-28a2afe62d89
|
||||
github.com/opencloud-eu/libre-graph-api-go v1.0.8-0.20260310090739-853d972b282d
|
||||
github.com/opencloud-eu/reva/v2 v2.46.4-0.20260615073558-209c2cd3b52b
|
||||
@@ -95,7 +95,7 @@ require (
|
||||
go-micro.dev/v4 v4.11.0
|
||||
go.etcd.io/bbolt v1.4.3
|
||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.69.0
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0
|
||||
go.opentelemetry.io/contrib/zpages v0.68.0
|
||||
go.opentelemetry.io/otel v1.44.0
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.44.0
|
||||
@@ -130,7 +130,7 @@ require (
|
||||
github.com/Masterminds/sprig v2.22.0+incompatible // indirect
|
||||
github.com/Microsoft/go-winio v0.6.2 // indirect
|
||||
github.com/ProtonMail/go-crypto v1.1.6 // indirect
|
||||
github.com/RoaringBitmap/roaring/v2 v2.4.5 // indirect
|
||||
github.com/RoaringBitmap/roaring/v2 v2.14.5 // indirect
|
||||
github.com/agnivade/levenshtein v1.2.1 // indirect
|
||||
github.com/ajg/form v1.5.1 // indirect
|
||||
github.com/alexedwards/argon2id v1.0.0 // indirect
|
||||
@@ -140,24 +140,25 @@ require (
|
||||
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/bitly/go-simplejson v0.5.0 // indirect
|
||||
github.com/bits-and-blooms/bitset v1.22.0 // indirect
|
||||
github.com/blevesearch/bleve_index_api v1.2.11 // indirect
|
||||
github.com/blevesearch/geo v0.2.4 // indirect
|
||||
github.com/blevesearch/go-faiss v1.0.26 // indirect
|
||||
github.com/bits-and-blooms/bitset v1.24.2 // indirect
|
||||
github.com/blevesearch/bleve_index_api v1.3.11 // indirect
|
||||
github.com/blevesearch/geo v0.2.5 // indirect
|
||||
github.com/blevesearch/go-faiss v1.1.0 // indirect
|
||||
github.com/blevesearch/go-porterstemmer v1.0.3 // indirect
|
||||
github.com/blevesearch/gtreap v0.1.1 // indirect
|
||||
github.com/blevesearch/mmap-go v1.0.4 // indirect
|
||||
github.com/blevesearch/scorch_segment_api/v2 v2.3.13 // indirect
|
||||
github.com/blevesearch/mmap-go v1.2.0 // indirect
|
||||
github.com/blevesearch/scorch_segment_api/v2 v2.4.7 // indirect
|
||||
github.com/blevesearch/segment v0.9.1 // indirect
|
||||
github.com/blevesearch/snowballstem v0.9.0 // indirect
|
||||
github.com/blevesearch/upsidedown_store_api v1.0.2 // indirect
|
||||
github.com/blevesearch/vellum v1.1.0 // indirect
|
||||
github.com/blevesearch/zapx/v11 v11.4.2 // indirect
|
||||
github.com/blevesearch/zapx/v12 v12.4.2 // indirect
|
||||
github.com/blevesearch/zapx/v13 v13.4.2 // indirect
|
||||
github.com/blevesearch/zapx/v14 v14.4.2 // indirect
|
||||
github.com/blevesearch/zapx/v15 v15.4.2 // indirect
|
||||
github.com/blevesearch/zapx/v16 v16.2.8 // indirect
|
||||
github.com/blevesearch/vellum v1.2.0 // indirect
|
||||
github.com/blevesearch/zapx/v11 v11.4.3 // indirect
|
||||
github.com/blevesearch/zapx/v12 v12.4.3 // indirect
|
||||
github.com/blevesearch/zapx/v13 v13.4.3 // indirect
|
||||
github.com/blevesearch/zapx/v14 v14.4.3 // indirect
|
||||
github.com/blevesearch/zapx/v15 v15.4.3 // indirect
|
||||
github.com/blevesearch/zapx/v16 v16.3.4 // indirect
|
||||
github.com/blevesearch/zapx/v17 v17.1.2 // indirect
|
||||
github.com/bluele/gcache v0.0.2 // indirect
|
||||
github.com/bombsimon/logrusr/v3 v3.1.0 // indirect
|
||||
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
|
||||
@@ -182,7 +183,7 @@ require (
|
||||
github.com/cyphar/filepath-securejoin v0.6.1 // indirect
|
||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
|
||||
github.com/deckarep/golang-set v1.8.0 // indirect
|
||||
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.0 // indirect
|
||||
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.1 // indirect
|
||||
github.com/desertbit/timer v0.0.0-20180107155436-c41aec40b27f // indirect
|
||||
github.com/dgraph-io/ristretto v0.2.0 // indirect
|
||||
github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da // indirect
|
||||
@@ -199,7 +200,7 @@ require (
|
||||
github.com/evanphx/json-patch/v5 v5.5.0 // indirect
|
||||
github.com/fatih/color v1.18.0 // indirect
|
||||
github.com/felixge/httpsnoop v1.0.4 // indirect
|
||||
github.com/fsnotify/fsnotify v1.9.0 // indirect
|
||||
github.com/fsnotify/fsnotify v1.10.1 // indirect
|
||||
github.com/gdexlab/go-render v1.0.1 // indirect
|
||||
github.com/go-acme/lego/v4 v4.4.0 // indirect
|
||||
github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667 // indirect
|
||||
@@ -227,14 +228,14 @@ require (
|
||||
github.com/gobwas/httphead v0.1.0 // indirect
|
||||
github.com/gobwas/pool v0.2.1 // indirect
|
||||
github.com/gobwas/ws v1.2.1 // indirect
|
||||
github.com/goccy/go-json v0.10.5 // indirect
|
||||
github.com/goccy/go-json v0.10.6 // indirect
|
||||
github.com/goccy/go-yaml v1.18.0 // indirect
|
||||
github.com/gofrs/flock v0.13.0 // indirect
|
||||
github.com/gofrs/uuid v4.4.0+incompatible // indirect
|
||||
github.com/gogo/protobuf v1.3.2 // indirect
|
||||
github.com/golang-jwt/jwt/v4 v4.5.2 // indirect
|
||||
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
|
||||
github.com/golang/snappy v0.0.4 // indirect
|
||||
github.com/golang/snappy v1.0.0 // indirect
|
||||
github.com/google/go-querystring v1.1.0 // indirect
|
||||
github.com/google/go-tpm v0.9.8 // indirect
|
||||
github.com/google/pprof v0.0.0-20260402051712-545e8a4df936 // indirect
|
||||
@@ -256,18 +257,18 @@ require (
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/juliangruber/go-intersect v1.1.0 // indirect
|
||||
github.com/kevinburke/ssh_config v1.2.0 // indirect
|
||||
github.com/klauspost/compress v1.18.5 // indirect
|
||||
github.com/klauspost/compress v1.18.6 // indirect
|
||||
github.com/klauspost/cpuid/v2 v2.3.0 // indirect
|
||||
github.com/klauspost/crc32 v1.3.0 // indirect
|
||||
github.com/kovidgoyal/go-parallel v1.1.1 // indirect
|
||||
github.com/kovidgoyal/go-shm v1.0.0 // indirect
|
||||
github.com/leodido/go-urn v1.4.0 // indirect
|
||||
github.com/lestrrat-go/blackmagic v1.0.4 // indirect
|
||||
github.com/lestrrat-go/dsig v1.0.0 // indirect
|
||||
github.com/lestrrat-go/dsig v1.2.1 // indirect
|
||||
github.com/lestrrat-go/dsig-secp256k1 v1.0.0 // indirect
|
||||
github.com/lestrrat-go/httpcc v1.0.1 // indirect
|
||||
github.com/lestrrat-go/httprc/v3 v3.0.2 // indirect
|
||||
github.com/lestrrat-go/jwx/v3 v3.0.13 // indirect
|
||||
github.com/lestrrat-go/httprc/v3 v3.0.5 // indirect
|
||||
github.com/lestrrat-go/jwx/v3 v3.1.1 // indirect
|
||||
github.com/lestrrat-go/option/v2 v2.0.0 // indirect
|
||||
github.com/libregraph/oidc-go v1.1.0 // indirect
|
||||
github.com/longsleep/go-metrics v1.0.0 // indirect
|
||||
@@ -302,8 +303,8 @@ require (
|
||||
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
|
||||
github.com/mschoch/smat v0.2.0 // indirect
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||
github.com/nats-io/jwt/v2 v2.8.1 // indirect
|
||||
github.com/nats-io/nkeys v0.4.15 // indirect
|
||||
github.com/nats-io/jwt/v2 v2.8.2 // indirect
|
||||
github.com/nats-io/nkeys v0.4.16 // indirect
|
||||
github.com/nats-io/nuid v1.0.1 // indirect
|
||||
github.com/nxadm/tail v1.4.8 // indirect
|
||||
github.com/oklog/run v1.2.0 // indirect
|
||||
@@ -327,7 +328,7 @@ require (
|
||||
github.com/prometheus/alertmanager v0.31.1 // indirect
|
||||
github.com/prometheus/client_model v0.6.2 // indirect
|
||||
github.com/prometheus/common v0.67.5 // indirect
|
||||
github.com/prometheus/procfs v0.17.0 // indirect
|
||||
github.com/prometheus/procfs v0.20.1 // indirect
|
||||
github.com/prometheus/statsd_exporter v0.22.8 // indirect
|
||||
github.com/rcrowley/go-metrics v0.0.0-20250401214520-65e299d6c5c9 // indirect
|
||||
github.com/rs/xid v1.6.0 // indirect
|
||||
@@ -365,8 +366,8 @@ require (
|
||||
github.com/toorop/go-dkim v0.0.0-20201103131630-e1cd1a0a5208 // indirect
|
||||
github.com/trustelem/zxcvbn v1.0.1 // indirect
|
||||
github.com/urfave/cli/v2 v2.27.7 // indirect
|
||||
github.com/valyala/fastjson v1.6.7 // indirect
|
||||
github.com/vektah/gqlparser/v2 v2.5.32 // indirect
|
||||
github.com/valyala/fastjson v1.6.10 // indirect
|
||||
github.com/vektah/gqlparser/v2 v2.5.33 // indirect
|
||||
github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect
|
||||
github.com/wk8/go-ordered-map v1.0.0 // indirect
|
||||
github.com/xanzy/ssh-agent v0.3.3 // indirect
|
||||
@@ -386,7 +387,7 @@ require (
|
||||
go.opentelemetry.io/proto/otlp v1.10.0 // indirect
|
||||
go.uber.org/multierr v1.11.0 // indirect
|
||||
go.uber.org/zap v1.27.0 // indirect
|
||||
go.yaml.in/yaml/v2 v2.4.3 // indirect
|
||||
go.yaml.in/yaml/v2 v2.4.4 // indirect
|
||||
go.yaml.in/yaml/v3 v3.0.4 // indirect
|
||||
golang.org/x/mod v0.35.0 // indirect
|
||||
golang.org/x/sys v0.45.0 // indirect
|
||||
|
||||
136
go.sum
136
go.sum
@@ -91,8 +91,8 @@ github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAE
|
||||
github.com/OpenDNS/vegadns2client v0.0.0-20180418235048-a3fa4a771d87/go.mod h1:iGLljf5n9GjT6kc0HBvyI1nOKnGQbNB66VzSNbK5iks=
|
||||
github.com/ProtonMail/go-crypto v1.1.6 h1:ZcV+Ropw6Qn0AX9brlQLAUXfqLBc7Bl+f/DmNxpLfdw=
|
||||
github.com/ProtonMail/go-crypto v1.1.6/go.mod h1:rA3QumHc/FZ8pAHreoekgiAbzpNsfQAosU5td4SnOrE=
|
||||
github.com/RoaringBitmap/roaring/v2 v2.4.5 h1:uGrrMreGjvAtTBobc0g5IrW1D5ldxDQYe2JW2gggRdg=
|
||||
github.com/RoaringBitmap/roaring/v2 v2.4.5/go.mod h1:FiJcsfkGje/nZBZgCu0ZxCPOKD/hVXDS2dXi7/eUFE0=
|
||||
github.com/RoaringBitmap/roaring/v2 v2.14.5 h1:ckd0o545JqDPeVJDgeFoaM21eBixUnlWfYgjE5VnyWw=
|
||||
github.com/RoaringBitmap/roaring/v2 v2.14.5/go.mod h1:eq4wdNXxtJIS/oikeCzdX1rBzek7ANzbth041hrU8Q4=
|
||||
github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo=
|
||||
github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI=
|
||||
github.com/agnivade/levenshtein v1.2.1 h1:EHBY3UOn1gwdy/VbFwgo4cxecRznFk7fKWN1KOX7eoM=
|
||||
@@ -145,46 +145,47 @@ github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6r
|
||||
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
|
||||
github.com/bitly/go-simplejson v0.5.0 h1:6IH+V8/tVMab511d5bn4M7EwGXZf9Hj6i2xSwkNEM+Y=
|
||||
github.com/bitly/go-simplejson v0.5.0/go.mod h1:cXHtHw4XUPsvGaxgjIAn8PhEWG9NfngEKAMDJEczWVA=
|
||||
github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
|
||||
github.com/bits-and-blooms/bitset v1.22.0 h1:Tquv9S8+SGaS3EhyA+up3FXzmkhxPGjQQCkcs2uw7w4=
|
||||
github.com/bits-and-blooms/bitset v1.22.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
|
||||
github.com/bits-and-blooms/bitset v1.24.2 h1:M7/NzVbsytmtfHbumG+K2bremQPMJuqv1JD3vOaFxp0=
|
||||
github.com/bits-and-blooms/bitset v1.24.2/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
|
||||
github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84=
|
||||
github.com/blevesearch/bleve/v2 v2.5.7 h1:2d9YrL5zrX5EBBW++GOaEKjE+NPWeZGaX77IM26m1Z8=
|
||||
github.com/blevesearch/bleve/v2 v2.5.7/go.mod h1:yj0NlS7ocGC4VOSAedqDDMktdh2935v2CSWOCDMHdSA=
|
||||
github.com/blevesearch/bleve_index_api v1.2.11 h1:bXQ54kVuwP8hdrXUSOnvTQfgK0KI1+f9A0ITJT8tX1s=
|
||||
github.com/blevesearch/bleve_index_api v1.2.11/go.mod h1:rKQDl4u51uwafZxFrPD1R7xFOwKnzZW7s/LSeK4lgo0=
|
||||
github.com/blevesearch/geo v0.2.4 h1:ECIGQhw+QALCZaDcogRTNSJYQXRtC8/m8IKiA706cqk=
|
||||
github.com/blevesearch/geo v0.2.4/go.mod h1:K56Q33AzXt2YExVHGObtmRSFYZKYGv0JEN5mdacJJR8=
|
||||
github.com/blevesearch/go-faiss v1.0.26 h1:4dRLolFgjPyjkaXwff4NfbZFdE/dfywbzDqporeQvXI=
|
||||
github.com/blevesearch/go-faiss v1.0.26/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk=
|
||||
github.com/blevesearch/bleve/v2 v2.6.0 h1:Cyd3dd4q5tCbOV8MnKUVRUDYMHOir9xn12NZzXVSEd4=
|
||||
github.com/blevesearch/bleve/v2 v2.6.0/go.mod h1:gLmI8lWgHgrIYf7UpUX7JISI1CaqC6VScu46mHThuAY=
|
||||
github.com/blevesearch/bleve_index_api v1.3.11 h1:x29vbV8OjWfLcrDVd7Lr1q+BkLNS0JWNEig0MCVnKH4=
|
||||
github.com/blevesearch/bleve_index_api v1.3.11/go.mod h1:xvd48t5XMeeioWQ5/jZvgLrV98flT2rdvEJ3l/ki4Ko=
|
||||
github.com/blevesearch/geo v0.2.5 h1:yJg9FX1oRwLnjXSXF+ECHfXFTF4diF02Ca/qUGVjJhE=
|
||||
github.com/blevesearch/geo v0.2.5/go.mod h1:Jhq7WE2K6mJTx1xS44M2pUO6Io+wjCSHh1+co3YOgH4=
|
||||
github.com/blevesearch/go-faiss v1.1.0 h1:xM7Jc0ZUCv5lssG9Ohj3Jv0SdTpxcUABU1dDt9XVsc4=
|
||||
github.com/blevesearch/go-faiss v1.1.0/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk=
|
||||
github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo=
|
||||
github.com/blevesearch/go-porterstemmer v1.0.3/go.mod h1:angGc5Ht+k2xhJdZi511LtmxuEf0OVpvUUNrwmM1P7M=
|
||||
github.com/blevesearch/gtreap v0.1.1 h1:2JWigFrzDMR+42WGIN/V2p0cUvn4UP3C4Q5nmaZGW8Y=
|
||||
github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgYICSZ3w0tYk=
|
||||
github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc=
|
||||
github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs=
|
||||
github.com/blevesearch/scorch_segment_api/v2 v2.3.13 h1:ZPjv/4VwWvHJZKeMSgScCapOy8+DdmsmRyLmSB88UoY=
|
||||
github.com/blevesearch/scorch_segment_api/v2 v2.3.13/go.mod h1:ENk2LClTehOuMS8XzN3UxBEErYmtwkE7MAArFTXs9Vc=
|
||||
github.com/blevesearch/mmap-go v1.2.0 h1:l33nNKPFcBjJUMwem6sAYJPUzhUCABoK9FxZDGiFNBI=
|
||||
github.com/blevesearch/mmap-go v1.2.0/go.mod h1:Vd6+20GBhEdwJnU1Xohgt88XCD/CTWcqbCNxkZpyBo0=
|
||||
github.com/blevesearch/scorch_segment_api/v2 v2.4.7 h1:GlMzW08hcsM3DnLUxhyF/1PcDal1qtvvIuytuph5djw=
|
||||
github.com/blevesearch/scorch_segment_api/v2 v2.4.7/go.mod h1://IJ7tG3QCf0cWW/aVSXqy77tc1AvLu3fcJLYEvOAFs=
|
||||
github.com/blevesearch/segment v0.9.1 h1:+dThDy+Lvgj5JMxhmOVlgFfkUtZV2kw49xax4+jTfSU=
|
||||
github.com/blevesearch/segment v0.9.1/go.mod h1:zN21iLm7+GnBHWTao9I+Au/7MBiL8pPFtJBJTsk6kQw=
|
||||
github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD/iWeNXm8s=
|
||||
github.com/blevesearch/snowballstem v0.9.0/go.mod h1:PivSj3JMc8WuaFkTSRDW2SlrulNWPl4ABg1tC/hlgLs=
|
||||
github.com/blevesearch/upsidedown_store_api v1.0.2 h1:U53Q6YoWEARVLd1OYNc9kvhBMGZzVrdmaozG2MfoB+A=
|
||||
github.com/blevesearch/upsidedown_store_api v1.0.2/go.mod h1:M01mh3Gpfy56Ps/UXHjEO/knbqyQ1Oamg8If49gRwrQ=
|
||||
github.com/blevesearch/vellum v1.1.0 h1:CinkGyIsgVlYf8Y2LUQHvdelgXr6PYuvoDIajq6yR9w=
|
||||
github.com/blevesearch/vellum v1.1.0/go.mod h1:QgwWryE8ThtNPxtgWJof5ndPfx0/YMBh+W2weHKPw8Y=
|
||||
github.com/blevesearch/zapx/v11 v11.4.2 h1:l46SV+b0gFN+Rw3wUI1YdMWdSAVhskYuvxlcgpQFljs=
|
||||
github.com/blevesearch/zapx/v11 v11.4.2/go.mod h1:4gdeyy9oGa/lLa6D34R9daXNUvfMPZqUYjPwiLmekwc=
|
||||
github.com/blevesearch/zapx/v12 v12.4.2 h1:fzRbhllQmEMUuAQ7zBuMvKRlcPA5ESTgWlDEoB9uQNE=
|
||||
github.com/blevesearch/zapx/v12 v12.4.2/go.mod h1:TdFmr7afSz1hFh/SIBCCZvcLfzYvievIH6aEISCte58=
|
||||
github.com/blevesearch/zapx/v13 v13.4.2 h1:46PIZCO/ZuKZYgxI8Y7lOJqX3Irkc3N8W82QTK3MVks=
|
||||
github.com/blevesearch/zapx/v13 v13.4.2/go.mod h1:knK8z2NdQHlb5ot/uj8wuvOq5PhDGjNYQQy0QDnopZk=
|
||||
github.com/blevesearch/zapx/v14 v14.4.2 h1:2SGHakVKd+TrtEqpfeq8X+So5PShQ5nW6GNxT7fWYz0=
|
||||
github.com/blevesearch/zapx/v14 v14.4.2/go.mod h1:rz0XNb/OZSMjNorufDGSpFpjoFKhXmppH9Hi7a877D8=
|
||||
github.com/blevesearch/zapx/v15 v15.4.2 h1:sWxpDE0QQOTjyxYbAVjt3+0ieu8NCE0fDRaFxEsp31k=
|
||||
github.com/blevesearch/zapx/v15 v15.4.2/go.mod h1:1pssev/59FsuWcgSnTa0OeEpOzmhtmr/0/11H0Z8+Nw=
|
||||
github.com/blevesearch/zapx/v16 v16.2.8 h1:SlnzF0YGtSlrsOE3oE7EgEX6BIepGpeqxs1IjMbHLQI=
|
||||
github.com/blevesearch/zapx/v16 v16.2.8/go.mod h1:murSoCJPCk25MqURrcJaBQ1RekuqSCSfMjXH4rHyA14=
|
||||
github.com/blevesearch/vellum v1.2.0 h1:xkDiOEsHc2t3Cp0NsNZZ36pvc130sCzcGKOPMzXe+e0=
|
||||
github.com/blevesearch/vellum v1.2.0/go.mod h1:uEcfBJz7mAOf0Kvq6qoEKQQkLODBF46SINYNkZNae4k=
|
||||
github.com/blevesearch/zapx/v11 v11.4.3 h1:PTZOO5loKpHC/x/GzmPZNa9cw7GZIQxd5qRjwij9tHY=
|
||||
github.com/blevesearch/zapx/v11 v11.4.3/go.mod h1:4gdeyy9oGa/lLa6D34R9daXNUvfMPZqUYjPwiLmekwc=
|
||||
github.com/blevesearch/zapx/v12 v12.4.3 h1:eElXvAaAX4m04t//CGBQAtHNPA+Q6A1hHZVrN3LSFYo=
|
||||
github.com/blevesearch/zapx/v12 v12.4.3/go.mod h1:TdFmr7afSz1hFh/SIBCCZvcLfzYvievIH6aEISCte58=
|
||||
github.com/blevesearch/zapx/v13 v13.4.3 h1:qsdhRhaSpVnqDFlRiH9vG5+KJ+dE7KAW9WyZz/KXAiE=
|
||||
github.com/blevesearch/zapx/v13 v13.4.3/go.mod h1:knK8z2NdQHlb5ot/uj8wuvOq5PhDGjNYQQy0QDnopZk=
|
||||
github.com/blevesearch/zapx/v14 v14.4.3 h1:GY4Hecx0C6UTmiNC2pKdeA2rOKiLR5/rwpU9WR51dgM=
|
||||
github.com/blevesearch/zapx/v14 v14.4.3/go.mod h1:rz0XNb/OZSMjNorufDGSpFpjoFKhXmppH9Hi7a877D8=
|
||||
github.com/blevesearch/zapx/v15 v15.4.3 h1:iJiMJOHrz216jyO6lS0m9RTCEkprUnzvqAI2lc/0/CU=
|
||||
github.com/blevesearch/zapx/v15 v15.4.3/go.mod h1:1pssev/59FsuWcgSnTa0OeEpOzmhtmr/0/11H0Z8+Nw=
|
||||
github.com/blevesearch/zapx/v16 v16.3.4 h1:hDAqA8qusZTNbPEL7//w5P65UZ2de6yhSeUaTbp0Po0=
|
||||
github.com/blevesearch/zapx/v16 v16.3.4/go.mod h1:zqkPPqs9GS9FzVWzCO3Wf1X044yWAV17+4zb+FTiEHg=
|
||||
github.com/blevesearch/zapx/v17 v17.1.2 h1:avbOk2igaASNoiy0BE/jPgcxAnRI2PGeydeP4hg7Ikk=
|
||||
github.com/blevesearch/zapx/v17 v17.1.2/go.mod h1:WQObxKrqUX7cd0G1GMvDfc/bmZzQvoy7APOPimx7DiI=
|
||||
github.com/bluele/gcache v0.0.2 h1:WcbfdXICg7G/DGBh1PFfcirkWOQV+v077yF1pSy3DGw=
|
||||
github.com/bluele/gcache v0.0.2/go.mod h1:m15KV+ECjptwSPxKhOhQoAFQVtUFjTVkc3H8o0t/fp0=
|
||||
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY=
|
||||
@@ -196,8 +197,8 @@ github.com/bufbuild/protocompile v0.14.1 h1:iA73zAf/fyljNjQKwYzUHD6AD4R8KMasmwa/
|
||||
github.com/bufbuild/protocompile v0.14.1/go.mod h1:ppVdAIhbr2H8asPk6k4pY7t9zB1OU5DoEw9xY/FUi1c=
|
||||
github.com/butonic/go-micro/v4 v4.11.1-0.20241115112658-b5d4de5ed9b3 h1:h8Z0hBv5tg/uZMKu8V47+DKWYVQg0lYP8lXDQq7uRpE=
|
||||
github.com/butonic/go-micro/v4 v4.11.1-0.20241115112658-b5d4de5ed9b3/go.mod h1:eE/tD53n3KbVrzrWxKLxdkGw45Fg1qaNLWjpJMvIUF4=
|
||||
github.com/bytecodealliance/wasmtime-go/v39 v39.0.1 h1:RibaT47yiyCRxMOj/l2cvL8cWiWBSqDXHyqsa9sGcCE=
|
||||
github.com/bytecodealliance/wasmtime-go/v39 v39.0.1/go.mod h1:miR4NYIEBXeDNamZIzpskhJ0z/p8al+lwMWylQ/ZJb4=
|
||||
github.com/bytecodealliance/wasmtime-go/v44 v44.0.0 h1:WRZXnLPIer/TWs5aYPaMlmVcOlzmR6Ur6wjLRIQOhTQ=
|
||||
github.com/bytecodealliance/wasmtime-go/v44 v44.0.0/go.mod h1:GP93piU+39CoFVCQ5xfHrPOUtL0APlMnkbblJ2d3YY0=
|
||||
github.com/c-bata/go-prompt v0.2.5/go.mod h1:vFnjEGDIIA/Lib7giyE4E9c50Lvl8j0S+7FVlAwDAVw=
|
||||
github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4=
|
||||
github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM=
|
||||
@@ -277,8 +278,8 @@ github.com/davidbyttow/govips/v2 v2.18.0 h1:pZRshWVYvewP/TZx3yZ7YeC42WyLXg53tHy5
|
||||
github.com/davidbyttow/govips/v2 v2.18.0/go.mod h1:8+nst5zfMoats12PgmmAPh6p5OfjDaXK0BXMFl/vOcM=
|
||||
github.com/deckarep/golang-set v1.8.0 h1:sk9/l/KqpunDwP7pSjUg0keiOOLEnOBHzykLrsPppp4=
|
||||
github.com/deckarep/golang-set v1.8.0/go.mod h1:5nI87KwE7wgsBU1F4GKAw2Qod7p5kyS383rP6+o6qqo=
|
||||
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.0 h1:NMZiJj8QnKe1LgsbDayM4UoHwbvwDRwnI3hwNaAHRnc=
|
||||
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.0/go.mod h1:ZXNYxsqcloTdSy/rNShjYzMhyjf0LaoftYK0p+A3h40=
|
||||
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.1 h1:5RVFMOWjMyRy8cARdy79nAmgYw3hK/4HUq48LQ6Wwqo=
|
||||
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.1/go.mod h1:ZXNYxsqcloTdSy/rNShjYzMhyjf0LaoftYK0p+A3h40=
|
||||
github.com/deepmap/oapi-codegen v1.3.11/go.mod h1:suMvK7+rKlx3+tpa8ByptmvoXbAV70wERKTOGH3hLp0=
|
||||
github.com/desertbit/timer v0.0.0-20180107155436-c41aec40b27f h1:U5y3Y5UE0w7amNe7Z5G/twsBW0KEalRQXZzf8ufSh9I=
|
||||
github.com/desertbit/timer v0.0.0-20180107155436-c41aec40b27f/go.mod h1:xH/i4TFMt8koVQZ6WFms69WAsDWr2XsYL3Hkl7jkoLE=
|
||||
@@ -349,8 +350,8 @@ github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHk
|
||||
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
|
||||
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
|
||||
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
|
||||
github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
|
||||
github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
|
||||
github.com/fsnotify/fsnotify v1.10.1 h1:b0/UzAf9yR5rhf3RPm9gf3ehBPpf0oZKIjtpKrx59Ho=
|
||||
github.com/fsnotify/fsnotify v1.10.1/go.mod h1:TLheqan6HD6GBK6PrDWyDPBaEV8LspOxvPSjC+bVfgo=
|
||||
github.com/gabriel-vasile/mimetype v1.4.13 h1:46nXokslUBsAJE/wMsp5gtO500a4F3Nkz9Ufpk2AcUM=
|
||||
github.com/gabriel-vasile/mimetype v1.4.13/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s=
|
||||
github.com/gdexlab/go-render v1.0.1 h1:rxqB3vo5s4n1kF0ySmoNeSPRYkEsyHgln4jFIQY7v0U=
|
||||
@@ -477,8 +478,8 @@ github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og=
|
||||
github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw=
|
||||
github.com/gobwas/ws v1.2.1 h1:F2aeBZrm2NDsc7vbovKrWSogd4wvfAxg0FQ89/iqOTk=
|
||||
github.com/gobwas/ws v1.2.1/go.mod h1:hRKAFb8wOxFROYNsT1bqfWnhX+b5MFeJM9r2ZSwg/KY=
|
||||
github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4=
|
||||
github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
|
||||
github.com/goccy/go-json v0.10.6 h1:p8HrPJzOakx/mn/bQtjgNjdTcN+/S6FcG2CTtQOrHVU=
|
||||
github.com/goccy/go-json v0.10.6/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
|
||||
github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw=
|
||||
github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA=
|
||||
github.com/gofrs/flock v0.13.0 h1:95JolYOvGMqeH31+FC7D2+uULf6mG61mEZ/A8dRYMzw=
|
||||
@@ -535,8 +536,8 @@ github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiu
|
||||
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
|
||||
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
|
||||
github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
|
||||
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/golang/snappy v1.0.0 h1:Oy607GVXHs7RtbggtPBnr2RmDArIsAefDwvrdWvRhGs=
|
||||
github.com/golang/snappy v1.0.0/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/golangci/lint-1 v0.0.0-20181222135242-d2cdd8c08219/go.mod h1:/X8TswGSh1pIozq4ZwCfxS0WA5JGXguxk94ar/4c87Y=
|
||||
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
|
||||
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
|
||||
@@ -719,8 +720,8 @@ github.com/kevinburke/ssh_config v1.2.0/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF
|
||||
github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=
|
||||
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
|
||||
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
||||
github.com/klauspost/compress v1.18.5 h1:/h1gH5Ce+VWNLSWqPzOVn6XBO+vJbCNGvjoaGBFW2IE=
|
||||
github.com/klauspost/compress v1.18.5/go.mod h1:cwPg85FWrGar70rWktvGQj8/hthj3wpl0PGDogxkrSQ=
|
||||
github.com/klauspost/compress v1.18.6 h1:2jupLlAwFm95+YDR+NwD2MEfFO9d4z4Prjl1XXDjuao=
|
||||
github.com/klauspost/compress v1.18.6/go.mod h1:cwPg85FWrGar70rWktvGQj8/hthj3wpl0PGDogxkrSQ=
|
||||
github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
|
||||
github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y=
|
||||
github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
|
||||
@@ -760,16 +761,16 @@ github.com/leonelquinteros/gotext v1.7.3-0.20260422134830-b012b4ccae69 h1:ZLo0bX
|
||||
github.com/leonelquinteros/gotext v1.7.3-0.20260422134830-b012b4ccae69/go.mod h1:ksG5iXViKefoupjy+0qQjAVoaDnylnQ1ejWl9g14wh8=
|
||||
github.com/lestrrat-go/blackmagic v1.0.4 h1:IwQibdnf8l2KoO+qC3uT4OaTWsW7tuRQXy9TRN9QanA=
|
||||
github.com/lestrrat-go/blackmagic v1.0.4/go.mod h1:6AWFyKNNj0zEXQYfTMPfZrAXUWUfTIZ5ECEUEJaijtw=
|
||||
github.com/lestrrat-go/dsig v1.0.0 h1:OE09s2r9Z81kxzJYRn07TFM9XA4akrUdoMwr0L8xj38=
|
||||
github.com/lestrrat-go/dsig v1.0.0/go.mod h1:dEgoOYYEJvW6XGbLasr8TFcAxoWrKlbQvmJgCR0qkDo=
|
||||
github.com/lestrrat-go/dsig v1.2.1 h1:MwxzZhE4+4fguHi+uDALKVlC3Cn+O1QU1Q/F8D7hVIc=
|
||||
github.com/lestrrat-go/dsig v1.2.1/go.mod h1:RD2eOaidyPvpc7IJQoO3Qq52RWdy8ZcJs8lrOnoa1Kc=
|
||||
github.com/lestrrat-go/dsig-secp256k1 v1.0.0 h1:JpDe4Aybfl0soBvoVwjqDbp+9S1Y2OM7gcrVVMFPOzY=
|
||||
github.com/lestrrat-go/dsig-secp256k1 v1.0.0/go.mod h1:CxUgAhssb8FToqbL8NjSPoGQlnO4w3LG1P0qPWQm/NU=
|
||||
github.com/lestrrat-go/httpcc v1.0.1 h1:ydWCStUeJLkpYyjLDHihupbn2tYmZ7m22BGkcvZZrIE=
|
||||
github.com/lestrrat-go/httpcc v1.0.1/go.mod h1:qiltp3Mt56+55GPVCbTdM9MlqhvzyuL6W/NMDA8vA5E=
|
||||
github.com/lestrrat-go/httprc/v3 v3.0.2 h1:7u4HUaD0NQbf2/n5+fyp+T10hNCsAnwKfqn4A4Baif0=
|
||||
github.com/lestrrat-go/httprc/v3 v3.0.2/go.mod h1:mSMtkZW92Z98M5YoNNztbRGxbXHql7tSitCvaxvo9l0=
|
||||
github.com/lestrrat-go/jwx/v3 v3.0.13 h1:AdHKiPIYeCSnOJtvdpipPg/0SuFh9rdkN+HF3O0VdSk=
|
||||
github.com/lestrrat-go/jwx/v3 v3.0.13/go.mod h1:2m0PV1A9tM4b/jVLMx8rh6rBl7F6WGb3EG2hufN9OQU=
|
||||
github.com/lestrrat-go/httprc/v3 v3.0.5 h1:S+Mb4L2I+bM6JGTibLmxExhyTOqnXjqx+zi9MoXw/TM=
|
||||
github.com/lestrrat-go/httprc/v3 v3.0.5/go.mod h1:mSMtkZW92Z98M5YoNNztbRGxbXHql7tSitCvaxvo9l0=
|
||||
github.com/lestrrat-go/jwx/v3 v3.1.1 h1:yd9AdPmZ4INnQ7k42IrzXYpnEG803+SrQ6hdMvzHJzw=
|
||||
github.com/lestrrat-go/jwx/v3 v3.1.1/go.mod h1:uw/MN2M/Xiu4FhwcIwH11Zsh9JWx9SWzgALl7/uIEkU=
|
||||
github.com/lestrrat-go/option/v2 v2.0.0 h1:XxrcaJESE1fokHy3FpaQ/cXW8ZsIdWcdFzzLOcID3Ss=
|
||||
github.com/lestrrat-go/option/v2 v2.0.0/go.mod h1:oSySsmzMoR0iRzCDCaUfsCzxQHUEuhOViQObyy7S6Vg=
|
||||
github.com/libregraph/idm v0.5.0 h1:tDMwKbAOZzdeDYMxVlY5PbSqRKO7dbAW9KT42A51WSk=
|
||||
@@ -896,14 +897,14 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8m
|
||||
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
|
||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
|
||||
github.com/namedotcom/go v0.0.0-20180403034216-08470befbe04/go.mod h1:5sN+Lt1CaY4wsPvgQH/jsuJi4XO2ssZbdsIizr4CVC8=
|
||||
github.com/nats-io/jwt/v2 v2.8.1 h1:V0xpGuD/N8Mi+fQNDynXohVvp7ZztevW5io8CUWlPmU=
|
||||
github.com/nats-io/jwt/v2 v2.8.1/go.mod h1:nWnOEEiVMiKHQpnAy4eXlizVEtSfzacZ1Q43LIRavZg=
|
||||
github.com/nats-io/nats-server/v2 v2.14.0 h1:+8q0HrDFotwLLcGH/legOEOnowunhK+aZ4GYBIWpQlM=
|
||||
github.com/nats-io/nats-server/v2 v2.14.0/go.mod h1:ImVUUDvfClJbb6cuJQRc1VmgDCXKM5ds0OoiG9MVOKo=
|
||||
github.com/nats-io/jwt/v2 v2.8.2 h1:XXRgB60MSTnqsRwejQurVDs/hcv2dkt+86GjI+I/bMc=
|
||||
github.com/nats-io/jwt/v2 v2.8.2/go.mod h1:Ag/56sq9OblL4JgdYufDd16Egb17Kr/8WwwuO/forVc=
|
||||
github.com/nats-io/nats-server/v2 v2.14.2 h1:Q7dRhCY03Y00rETFW3KV+KGaCIajlDfWgWUVgbMxyuk=
|
||||
github.com/nats-io/nats-server/v2 v2.14.2/go.mod h1:lWpb1bSpRELZfRdlMkdz8E7lbXKKyNe8RIn0vvepIHs=
|
||||
github.com/nats-io/nats.go v1.51.0 h1:ByW84XTz6W03GSSsygsZcA+xgKK8vPGaa/FCAAEHnAI=
|
||||
github.com/nats-io/nats.go v1.51.0/go.mod h1:26HypzazeOkyO3/mqd1zZd53STJN0EjCYF9Uy2ZOBno=
|
||||
github.com/nats-io/nkeys v0.4.15 h1:JACV5jRVO9V856KOapQ7x+EY8Jo3qw1vJt/9Jpwzkk4=
|
||||
github.com/nats-io/nkeys v0.4.15/go.mod h1:CpMchTXC9fxA5zrMo4KpySxNjiDVvr8ANOSZdiNfUrs=
|
||||
github.com/nats-io/nkeys v0.4.16 h1:rd5oAuLOb8mnAycB0xleuEBNS1pVVnN0fv/FF34Eypg=
|
||||
github.com/nats-io/nkeys v0.4.16/go.mod h1:llLgWoI0o4z/Q57q2R1kHfmocyhGV6VG/U18Glg1Afs=
|
||||
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
|
||||
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
|
||||
github.com/nbio/st v0.0.0-20140626010706-e9e8d9816f32/go.mod h1:9wM+0iRr9ahx58uYLpLIr5fm8diHn0JbqRycJi6w0Ms=
|
||||
@@ -940,8 +941,8 @@ github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7J
|
||||
github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
|
||||
github.com/onsi/gomega v1.40.0 h1:Vtol0e1MghCD2ZVIilPDIg44XSL9l2QAn8ZNaljWcJc=
|
||||
github.com/onsi/gomega v1.40.0/go.mod h1:M/Uqpu/8qTjtzCLUA2zJHX9Iilrau25x1PdoSRbWh5A=
|
||||
github.com/open-policy-agent/opa v1.15.2 h1:dS9q+0Yvruq/VNvWJc5qCvCchn715OWc3HLHXn/UCCc=
|
||||
github.com/open-policy-agent/opa v1.15.2/go.mod h1:c6SN+7jSsUcKJLQc5P4yhwx8YYDRbjpAiGkBOTqxaa4=
|
||||
github.com/open-policy-agent/opa v1.17.1 h1:wO0MOux/VCqY41aVAD6Toe1p3A7O7DlRZ1RHmYSpoS8=
|
||||
github.com/open-policy-agent/opa v1.17.1/go.mod h1:lcuZYSlqQpXFzsA6EJCELmfR5+nNOpZYX+eo7xaIIlk=
|
||||
github.com/opencloud-eu/go-micro-plugins/v4/store/nats-js-kv v0.0.0-20250512152754-23325793059a h1:Sakl76blJAaM6NxylVkgSzktjo2dS504iDotEFJsh3M=
|
||||
github.com/opencloud-eu/go-micro-plugins/v4/store/nats-js-kv v0.0.0-20250512152754-23325793059a/go.mod h1:pjcozWijkNPbEtX5SIQaxEW/h8VAVZYTLx+70bmB3LY=
|
||||
github.com/opencloud-eu/icap-client v0.0.0-20250930132611-28a2afe62d89 h1:W1ms+lP5lUUIzjRGDg93WrQfZJZCaV1ZP3KeyXi8bzY=
|
||||
@@ -1051,8 +1052,8 @@ github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4O
|
||||
github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=
|
||||
github.com/prometheus/procfs v0.7.3/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=
|
||||
github.com/prometheus/procfs v0.8.0/go.mod h1:z7EfXMXOkbkqb9IINtpCn86r/to3BnA0uaxHdg830/4=
|
||||
github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7DuK0=
|
||||
github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw=
|
||||
github.com/prometheus/procfs v0.20.1 h1:XwbrGOIplXW/AU3YhIhLODXMJYyC1isLFfYCsTEycfc=
|
||||
github.com/prometheus/procfs v0.20.1/go.mod h1:o9EMBZGRyvDrSPH1RqdxhojkuXstoe4UlK79eF5TGGo=
|
||||
github.com/prometheus/statsd_exporter v0.22.7/go.mod h1:N/TevpjkIh9ccs6nuzY3jQn9dFqnUakOjnEuMPJJJnI=
|
||||
github.com/prometheus/statsd_exporter v0.22.8 h1:Qo2D9ZzaQG+id9i5NYNGmbf1aa/KxKbB9aKfMS+Yib0=
|
||||
github.com/prometheus/statsd_exporter v0.22.8/go.mod h1:/DzwbTEaFTE0Ojz5PqcSk6+PFHOPWGxdXVr6yC8eFOM=
|
||||
@@ -1229,12 +1230,12 @@ github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/X
|
||||
github.com/urfave/cli/v2 v2.27.7 h1:bH59vdhbjLv3LAvIu6gd0usJHgoTTPhCFib8qqOwXYU=
|
||||
github.com/urfave/cli/v2 v2.27.7/go.mod h1:CyNAG/xg+iAOg0N4MPGZqVmv2rCoP267496AOXUZjA4=
|
||||
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
|
||||
github.com/valyala/fastjson v1.6.7 h1:ZE4tRy0CIkh+qDc5McjatheGX2czdn8slQjomexVpBM=
|
||||
github.com/valyala/fastjson v1.6.7/go.mod h1:CLCAqky6SMuOcxStkYQvblddUtoRxhYMGLrsQns1aXY=
|
||||
github.com/valyala/fastjson v1.6.10 h1:/yjJg8jaVQdYR3arGxPE2X5z89xrlhS0eGXdv+ADTh4=
|
||||
github.com/valyala/fastjson v1.6.10/go.mod h1:e6FubmQouUNP73jtMLmcbxS6ydWIpOfhz34TSfO3JaE=
|
||||
github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8=
|
||||
github.com/valyala/fasttemplate v1.1.0/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8=
|
||||
github.com/vektah/gqlparser/v2 v2.5.32 h1:k9QPJd4sEDTL+qB4ncPLflqTJ3MmjB9SrVzJrawpFSc=
|
||||
github.com/vektah/gqlparser/v2 v2.5.32/go.mod h1:c1I28gSOVNzlfc4WuDlqU7voQnsqI6OG2amkBAFmgts=
|
||||
github.com/vektah/gqlparser/v2 v2.5.33 h1:lRp8aIeNUNbimf/axZd7ETg24q06hBtPaas+TcvI/7E=
|
||||
github.com/vektah/gqlparser/v2 v2.5.33/go.mod h1:c1I28gSOVNzlfc4WuDlqU7voQnsqI6OG2amkBAFmgts=
|
||||
github.com/vinyldns/go-vinyldns v0.0.0-20200917153823-148a5f6b8f14/go.mod h1:RWc47jtnVuQv6+lY3c768WtXCas/Xi+U5UFc5xULmYg=
|
||||
github.com/vmihailenco/msgpack/v5 v5.4.1 h1:cQriyiUvjTwOHg8QZaPihLWeRAAVoCpE00IUPn0Bjt8=
|
||||
github.com/vmihailenco/msgpack/v5 v5.4.1/go.mod h1:GaZTsDaehaPpQVyxrf5mtQlH+pc21PIudVV/E3rRQok=
|
||||
@@ -1300,8 +1301,8 @@ go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ
|
||||
go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
|
||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.69.0 h1:2yEATaop1/a1I4psnSLgWVPLWwCzkqWakgJy7xTDVy0=
|
||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.69.0/go.mod h1:D7J12YRapIekYyPWgGPlA/23pRmpSEZC5xJC/TTLI9U=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0 h1:OyrsyzuttWTSur2qN/Lm0m2a8yqyIjUVBZcxFPuXq2o=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0/go.mod h1:C2NGBr+kAB4bk3xtMXfZ94gqFDtg/GkI7e9zqGh5Beg=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0 h1:CqXxU8VOmDefoh0+ztfGaymYbhdB/tT3zs79QaZTNGY=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0/go.mod h1:BuhAPThV8PBHBvg8ZzZ/Ok3idOdhWIodywz2xEcRbJo=
|
||||
go.opentelemetry.io/contrib/zpages v0.68.0 h1:H5yrUwxPrbvhzdBxjQD+VXMtPjIBfp8NWNVvQT8E30M=
|
||||
go.opentelemetry.io/contrib/zpages v0.68.0/go.mod h1:sZGctYYO4UOHItj9bx3F+t/s+u1Fv8CHCJ5s2eR2cjU=
|
||||
go.opentelemetry.io/otel v1.44.0 h1:JjwHmHpA4iZ3wBxluu2fbbE7j4kqlE8jXyAyPXH7HqU=
|
||||
@@ -1337,8 +1338,8 @@ go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
|
||||
go.uber.org/zap v1.18.1/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI=
|
||||
go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8=
|
||||
go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
|
||||
go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0=
|
||||
go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8=
|
||||
go.yaml.in/yaml/v2 v2.4.4 h1:tuyd0P+2Ont/d6e2rl3be67goVK4R6deVxCUX5vyPaQ=
|
||||
go.yaml.in/yaml/v2 v2.4.4/go.mod h1:gMZqIpDtDqOfM0uNfy0SkpRhvUryYH0Z6wdMYcacYXQ=
|
||||
go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
|
||||
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
|
||||
golang.org/x/crypto v0.0.0-20180621125126-a49355c7e3f8/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
|
||||
@@ -1783,7 +1784,6 @@ gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo=
|
||||
|
||||
1
vendor/github.com/RoaringBitmap/roaring/v2/.gitignore
generated
vendored
1
vendor/github.com/RoaringBitmap/roaring/v2/.gitignore
generated
vendored
@@ -3,3 +3,4 @@ roaring-fuzz.zip
|
||||
workdir
|
||||
coverage.out
|
||||
testdata/all3.classic
|
||||
/vendor
|
||||
10
vendor/github.com/RoaringBitmap/roaring/v2/Makefile
generated
vendored
Normal file
10
vendor/github.com/RoaringBitmap/roaring/v2/Makefile
generated
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
# Display general help about this command
|
||||
help:
|
||||
@echo ""
|
||||
@echo "The following commands are available:"
|
||||
@echo " make unconvert : Find unnecessary type conversions"
|
||||
@echo ""
|
||||
|
||||
# Find unnecessary type conversions
|
||||
unconvert:
|
||||
go tool unconvert -apply ./...
|
||||
19
vendor/github.com/RoaringBitmap/roaring/v2/README.md
generated
vendored
19
vendor/github.com/RoaringBitmap/roaring/v2/README.md
generated
vendored
@@ -25,6 +25,7 @@ Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene
|
||||
[pinot]: http://github.com/linkedin/pinot/wiki
|
||||
[vsts]: https://www.visualstudio.com/team-services/
|
||||
[atlas]: https://github.com/Netflix/atlas
|
||||
[quanta]: https://github.com/disney/quanta
|
||||
|
||||
Roaring bitmaps are found to work well in many important applications:
|
||||
|
||||
@@ -44,6 +45,10 @@ The ``roaring`` Go library is used by
|
||||
* [trident](https://github.com/NetApp/trident)
|
||||
* [Husky](https://www.datadoghq.com/blog/engineering/introducing-husky/)
|
||||
* [FrostDB](https://github.com/polarsignals/frostdb)
|
||||
* [Disney Quanta](https://github.com/disney/quanta)
|
||||
|
||||
|
||||
|
||||
|
||||
This library is used in production in several systems, it is part of the [Awesome Go collection](https://awesome-go.com).
|
||||
|
||||
@@ -370,7 +375,7 @@ go get github.com/RoaringBitmap/real-roaring-datasets
|
||||
BENCH_REAL_DATA=1 go test -bench BenchmarkRealData -run -
|
||||
```
|
||||
|
||||
### Iterative use
|
||||
### Interactive use
|
||||
|
||||
You can use roaring with gore:
|
||||
|
||||
@@ -414,4 +419,14 @@ The two versions were written independently.
|
||||
|
||||
### Mailing list/discussion group
|
||||
|
||||
https://groups.google.com/forum/#!forum/roaring-bitmaps
|
||||
https://groups.google.com/g/roaring-bitmaps
|
||||
|
||||
## Stars
|
||||
|
||||
|
||||
[](https://www.star-history.com/#RoaringBitmap/roaring&Date)
|
||||
|
||||
### Further reading
|
||||
|
||||
<p>Mastering Programming: From Testing to Performance in Go</p>
|
||||
<div><a href="https://www.amazon.com/dp/B0FMPGSWR5"><img style="margin-left: auto; margin-right: auto;" src="https://m.media-amazon.com/images/I/61feneHS7kL._SL1499_.jpg" alt="" width="250px" /></a></div>
|
||||
|
||||
43
vendor/github.com/RoaringBitmap/roaring/v2/arraycontainer.go
generated
vendored
43
vendor/github.com/RoaringBitmap/roaring/v2/arraycontainer.go
generated
vendored
@@ -11,6 +11,7 @@ type arrayContainer struct {
|
||||
|
||||
var (
|
||||
ErrArrayIncorrectSort = errors.New("incorrectly sorted array")
|
||||
ErrEmptyArray = errors.New("empty array")
|
||||
ErrArrayInvalidSize = errors.New("invalid array size")
|
||||
)
|
||||
|
||||
@@ -61,6 +62,10 @@ func (ac *arrayContainer) getManyIterator() manyIterable {
|
||||
return &shortIterator{ac.content, 0}
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) getUnsetIterator() shortPeekable {
|
||||
return newArrayContainerUnsetIterator(ac.content)
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) minimum() uint16 {
|
||||
return ac.content[0] // assume not empty
|
||||
}
|
||||
@@ -417,8 +422,10 @@ func (ac *arrayContainer) iorArray(value2 *arrayContainer) container {
|
||||
func (ac *arrayContainer) iorBitmap(bc2 *bitmapContainer) container {
|
||||
bc1 := ac.toBitmapContainer()
|
||||
bc1.iorBitmap(bc2)
|
||||
*ac = *newArrayContainerFromBitmap(bc1)
|
||||
return ac
|
||||
// DO NOT DO THIS:
|
||||
// *ac = *newArrayContainerFromBitmap(bc1)
|
||||
// This will create gigantic array containers in the case of repeated calls to iorBitmap.
|
||||
return bc1
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) iorRun16(rc *runContainer16) container {
|
||||
@@ -621,6 +628,30 @@ func (ac *arrayContainer) xor(a container) container {
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) ixor(a container) container {
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return ac.ixorArray(x)
|
||||
case *bitmapContainer:
|
||||
return ac.ixorBitmap(x)
|
||||
case *runContainer16:
|
||||
return ac.ixorRun16(x)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) ixorArray(value2 *arrayContainer) container {
|
||||
return ac.xorArray(value2)
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) ixorBitmap(value2 *bitmapContainer) container {
|
||||
return value2.ixor(ac)
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) ixorRun16(value2 *runContainer16) container {
|
||||
return value2.ixor(ac)
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) xorArray(value2 *arrayContainer) container {
|
||||
value1 := ac
|
||||
totalCardinality := value1.getCardinality() + value2.getCardinality()
|
||||
@@ -962,12 +993,12 @@ func (ac *arrayContainer) resetTo(a container) {
|
||||
x.fillArray(ac.content)
|
||||
|
||||
case *runContainer16:
|
||||
card := int(x.getCardinality())
|
||||
card := x.getCardinality()
|
||||
ac.realloc(card)
|
||||
cur := 0
|
||||
for _, r := range x.iv {
|
||||
for val := r.start; val <= r.last(); val++ {
|
||||
ac.content[cur] = val
|
||||
for val := int(r.start); val <= int(r.last()); val++ {
|
||||
ac.content[cur] = uint16(val)
|
||||
cur++
|
||||
}
|
||||
}
|
||||
@@ -1289,7 +1320,7 @@ func (ac *arrayContainer) validate() error {
|
||||
cardinality := ac.getCardinality()
|
||||
|
||||
if cardinality <= 0 {
|
||||
return ErrArrayInvalidSize
|
||||
return ErrEmptyArray
|
||||
}
|
||||
|
||||
if cardinality > arrayDefaultMaxSize {
|
||||
|
||||
97
vendor/github.com/RoaringBitmap/roaring/v2/bitmapcontainer.go
generated
vendored
97
vendor/github.com/RoaringBitmap/roaring/v2/bitmapcontainer.go
generated
vendored
@@ -262,6 +262,39 @@ func (bc *bitmapContainer) getManyIterator() manyIterable {
|
||||
return newBitmapContainerManyIterator(bc)
|
||||
}
|
||||
|
||||
type bitmapContainerUnsetIterator struct {
|
||||
ptr *bitmapContainer
|
||||
i int
|
||||
}
|
||||
|
||||
func (bcui *bitmapContainerUnsetIterator) next() uint16 {
|
||||
j := bcui.i
|
||||
bcui.i = bcui.ptr.NextUnsetBit(uint(bcui.i) + 1)
|
||||
return uint16(j)
|
||||
}
|
||||
|
||||
func (bcui *bitmapContainerUnsetIterator) hasNext() bool {
|
||||
return bcui.i >= 0 && bcui.i < 65536
|
||||
}
|
||||
|
||||
func (bcui *bitmapContainerUnsetIterator) peekNext() uint16 {
|
||||
return uint16(bcui.i)
|
||||
}
|
||||
|
||||
func (bcui *bitmapContainerUnsetIterator) advanceIfNeeded(minval uint16) {
|
||||
if bcui.hasNext() && bcui.peekNext() < minval {
|
||||
bcui.i = bcui.ptr.NextUnsetBit(uint(minval))
|
||||
}
|
||||
}
|
||||
|
||||
func newBitmapContainerUnsetIterator(a *bitmapContainer) *bitmapContainerUnsetIterator {
|
||||
return &bitmapContainerUnsetIterator{a, a.NextUnsetBit(0)}
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) getUnsetIterator() shortPeekable {
|
||||
return newBitmapContainerUnsetIterator(bc)
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) getSizeInBytes() int {
|
||||
return len(bc.bitmap) * 8
|
||||
}
|
||||
@@ -882,6 +915,43 @@ func (bc *bitmapContainer) iandBitmap(value2 *bitmapContainer) container {
|
||||
return bc
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) ixor(a container) container {
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return bc.ixorArray(x)
|
||||
case *bitmapContainer:
|
||||
return bc.ixorBitmap(x)
|
||||
case *runContainer16:
|
||||
return bc.ixorRun16(x)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) ixorArray(value2 *arrayContainer) container {
|
||||
vbc := value2.toBitmapContainer()
|
||||
return bc.ixorBitmap(vbc)
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) ixorRun16(value2 *runContainer16) container {
|
||||
rcb := value2.toBitmapContainer()
|
||||
return bc.ixorBitmap(rcb)
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) ixorBitmap(value2 *bitmapContainer) container {
|
||||
newCardinality := int(popcntXorSlice(bc.bitmap, value2.bitmap))
|
||||
if newCardinality > arrayDefaultMaxSize {
|
||||
for k := 0; k < len(bc.bitmap); k++ {
|
||||
bc.bitmap[k] = bc.bitmap[k] ^ value2.bitmap[k]
|
||||
}
|
||||
bc.cardinality = newCardinality
|
||||
return bc
|
||||
}
|
||||
ac := newArrayContainerSize(newCardinality)
|
||||
fillArrayXOR(ac.content, bc.bitmap, value2.bitmap)
|
||||
ac.content = ac.content[:newCardinality]
|
||||
return ac
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) andNot(a container) container {
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
@@ -1100,7 +1170,7 @@ func (bc *bitmapContainer) NextSetBit(i uint) int {
|
||||
return -1
|
||||
}
|
||||
w := bc.bitmap[x]
|
||||
w = w >> uint(i%64)
|
||||
w = w >> (i % 64)
|
||||
if w != 0 {
|
||||
return int(i) + countTrailingZeros(w)
|
||||
}
|
||||
@@ -1113,6 +1183,29 @@ func (bc *bitmapContainer) NextSetBit(i uint) int {
|
||||
return -1
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) NextUnsetBit(i uint) int {
|
||||
var (
|
||||
x = i / 64
|
||||
length = uint(len(bc.bitmap))
|
||||
)
|
||||
if x >= length {
|
||||
return int(i)
|
||||
}
|
||||
w := bc.bitmap[x]
|
||||
w = w >> (i % 64)
|
||||
w = ^w
|
||||
if w != 0 {
|
||||
return int(i) + countTrailingZeros(w)
|
||||
}
|
||||
x++
|
||||
for ; x < length; x++ {
|
||||
if bc.bitmap[x] != 0xFFFFFFFFFFFFFFFF {
|
||||
return int(x*64) + countTrailingZeros(^bc.bitmap[x])
|
||||
}
|
||||
}
|
||||
return int(length * 64)
|
||||
}
|
||||
|
||||
// PrevSetBit returns the previous set bit e.g the previous int packed into the bitmaparray
|
||||
func (bc *bitmapContainer) PrevSetBit(i int) int {
|
||||
if i < 0 {
|
||||
@@ -1136,7 +1229,7 @@ func (bc *bitmapContainer) uPrevSetBit(i uint) int {
|
||||
|
||||
b := i % 64
|
||||
|
||||
w = w << uint(63-b)
|
||||
w = w << (63 - b)
|
||||
if w != 0 {
|
||||
return int(i) - countLeadingZeros(w)
|
||||
}
|
||||
|
||||
44
vendor/github.com/RoaringBitmap/roaring/v2/iter.go
generated
vendored
Normal file
44
vendor/github.com/RoaringBitmap/roaring/v2/iter.go
generated
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
package roaring
|
||||
|
||||
import "iter"
|
||||
|
||||
// Values returns an iterator that yields the elements of the bitmap in
|
||||
// increasing order. Starting with Go 1.23, users can use a for loop to iterate
|
||||
// over it.
|
||||
func Values(b *Bitmap) iter.Seq[uint32] {
|
||||
return func(yield func(uint32) bool) {
|
||||
it := b.Iterator()
|
||||
for it.HasNext() {
|
||||
if !yield(it.Next()) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Backward returns an iterator that yields the elements of the bitmap in
|
||||
// decreasing order. Starting with Go 1.23, users can use a for loop to iterate
|
||||
// over it.
|
||||
func Backward(b *Bitmap) iter.Seq[uint32] {
|
||||
return func(yield func(uint32) bool) {
|
||||
it := b.ReverseIterator()
|
||||
for it.HasNext() {
|
||||
if !yield(it.Next()) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Unset creates an iterator that yields values in the range [min, max] that are NOT contained in the bitmap.
|
||||
// The iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
|
||||
func Unset(b *Bitmap, min, max uint32) iter.Seq[uint32] {
|
||||
return func(yield func(uint32) bool) {
|
||||
it := b.UnsetIterator(uint64(min), uint64(max)+1)
|
||||
for it.HasNext() {
|
||||
if !yield(it.Next()) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
14
vendor/github.com/RoaringBitmap/roaring/v2/parallel.go
generated
vendored
14
vendor/github.com/RoaringBitmap/roaring/v2/parallel.go
generated
vendored
@@ -370,23 +370,23 @@ func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
|
||||
|
||||
var chunkSize int
|
||||
var chunkCount int
|
||||
if parallelism*4 > int(keyRange) {
|
||||
if parallelism*4 > keyRange {
|
||||
chunkSize = 1
|
||||
chunkCount = int(keyRange)
|
||||
chunkCount = keyRange
|
||||
} else {
|
||||
chunkCount = parallelism * 4
|
||||
chunkSize = (int(keyRange) + chunkCount - 1) / chunkCount
|
||||
chunkSize = (keyRange + chunkCount - 1) / chunkCount
|
||||
}
|
||||
|
||||
if chunkCount*chunkSize < int(keyRange) {
|
||||
if chunkCount*chunkSize < keyRange {
|
||||
// it's fine to panic to indicate an implementation error
|
||||
panic(fmt.Sprintf("invariant check failed: chunkCount * chunkSize < keyRange, %d * %d < %d", chunkCount, chunkSize, keyRange))
|
||||
}
|
||||
|
||||
chunks := make([]*roaringArray, chunkCount)
|
||||
|
||||
chunkSpecChan := make(chan parChunkSpec, minOfInt(maxOfInt(64, 2*parallelism), int(chunkCount)))
|
||||
chunkChan := make(chan parChunk, minOfInt(32, int(chunkCount)))
|
||||
chunkSpecChan := make(chan parChunkSpec, minOfInt(maxOfInt(64, 2*parallelism), chunkCount))
|
||||
chunkChan := make(chan parChunk, minOfInt(32, chunkCount))
|
||||
|
||||
orFunc := func() {
|
||||
for spec := range chunkSpecChan {
|
||||
@@ -412,7 +412,7 @@ func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
|
||||
spec := parChunkSpec{
|
||||
start: uint16(int(lKey) + i*chunkSize),
|
||||
end: uint16(minOfInt(int(lKey)+(i+1)*chunkSize-1, int(hKey))),
|
||||
idx: int(i),
|
||||
idx: i,
|
||||
}
|
||||
chunkSpecChan <- spec
|
||||
}
|
||||
|
||||
269
vendor/github.com/RoaringBitmap/roaring/v2/roaring.go
generated
vendored
269
vendor/github.com/RoaringBitmap/roaring/v2/roaring.go
generated
vendored
@@ -68,10 +68,10 @@ func (rb *Bitmap) DenseSize() uint64 {
|
||||
|
||||
maximum := 1 + uint64(rb.Maximum())
|
||||
if maximum > (capacity - wordSize + 1) {
|
||||
return uint64(capacity >> log2WordSize)
|
||||
return capacity >> log2WordSize
|
||||
}
|
||||
|
||||
return uint64((maximum + (wordSize - 1)) >> log2WordSize)
|
||||
return (maximum + (wordSize - 1)) >> log2WordSize
|
||||
}
|
||||
|
||||
// ToDense returns a slice of uint64s representing the bitmap as a dense bitmap.
|
||||
@@ -421,6 +421,11 @@ func FromBitSet(bitset *bitset.BitSet) *Bitmap {
|
||||
// ToArray creates a new slice containing all of the integers stored in the Bitmap in sorted order
|
||||
func (rb *Bitmap) ToArray() []uint32 {
|
||||
array := make([]uint32, rb.GetCardinality())
|
||||
ar := rb.toArray(&array)
|
||||
return *ar
|
||||
}
|
||||
|
||||
func (rb *Bitmap) toArray(array *[]uint32) *[]uint32 {
|
||||
pos := 0
|
||||
pos2 := 0
|
||||
|
||||
@@ -428,11 +433,18 @@ func (rb *Bitmap) ToArray() []uint32 {
|
||||
hs := uint32(rb.highlowcontainer.getKeyAtIndex(pos)) << 16
|
||||
c := rb.highlowcontainer.getContainerAtIndex(pos)
|
||||
pos++
|
||||
pos2 = c.fillLeastSignificant16bits(array, pos2, hs)
|
||||
pos2 = c.fillLeastSignificant16bits(*array, pos2, hs)
|
||||
}
|
||||
return array
|
||||
}
|
||||
|
||||
// ToExistingArray stores all of the integers stored in the Bitmap in sorted order in the
|
||||
// slice that is given to ToExistingArray. It is the callers duty to make sure the slice
|
||||
// has the right size.
|
||||
func (rb *Bitmap) ToExistingArray(array *[]uint32) *[]uint32 {
|
||||
return rb.toArray(array)
|
||||
}
|
||||
|
||||
// GetSizeInBytes estimates the memory usage of the Bitmap. Note that this
|
||||
// might differ slightly from the amount of bytes required for persistent storage
|
||||
func (rb *Bitmap) GetSizeInBytes() uint64 {
|
||||
@@ -599,7 +611,7 @@ func (ii *intReverseIterator) init() {
|
||||
ii.shortIter = reverseIterator{t.content, len(t.content) - 1}
|
||||
ii.iter = &ii.shortIter
|
||||
case *runContainer16:
|
||||
index := int(len(t.iv)) - 1
|
||||
index := len(t.iv) - 1
|
||||
pos := uint16(0)
|
||||
|
||||
if index >= 0 {
|
||||
@@ -730,6 +742,182 @@ func (ii *manyIntIterator) Initialize(a *Bitmap) {
|
||||
ii.init()
|
||||
}
|
||||
|
||||
type unsetIterator struct {
|
||||
containerIndex int
|
||||
nextKey int
|
||||
hs uint32
|
||||
iter shortPeekable
|
||||
highlowcontainer *roaringArray
|
||||
|
||||
arrayUnsetIter arrayContainerUnsetIterator
|
||||
runUnsetIter runUnsetIterator16
|
||||
bitmapUnsetIter bitmapContainerUnsetIterator
|
||||
emptyContainerVal uint16
|
||||
|
||||
start, end uint64
|
||||
}
|
||||
|
||||
// HasNext returns true if there are more integers to iterate over
|
||||
func (iui *unsetIterator) HasNext() bool {
|
||||
// Skip containers that have no unset bits in our range
|
||||
for iui.nextKey < 65536 && uint64(iui.nextKey)<<16 < iui.end {
|
||||
if iui.iter == nil {
|
||||
// We're in an empty container gap, which has unset bits
|
||||
if uint64(iui.nextKey)<<16|uint64(iui.emptyContainerVal) < iui.end {
|
||||
return true
|
||||
}
|
||||
// Move to next container
|
||||
iui.nextKey++
|
||||
iui.containerIndex++
|
||||
iui.init()
|
||||
continue
|
||||
}
|
||||
if iui.iter.hasNext() {
|
||||
// Check if next value is within range
|
||||
nextVal := (uint64(iui.nextKey) << 16) | uint64(iui.iter.peekNext())
|
||||
if nextVal < iui.end {
|
||||
return true
|
||||
}
|
||||
}
|
||||
// Current container has no more unset bits in range, move to next
|
||||
iui.nextKey++
|
||||
iui.containerIndex++
|
||||
iui.init()
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (iui *unsetIterator) init() {
|
||||
// Check if we've gone past the end range
|
||||
if uint64(iui.nextKey)<<16 >= iui.end {
|
||||
iui.iter = nil
|
||||
return
|
||||
}
|
||||
|
||||
// Check if we're in an empty container gap
|
||||
if iui.containerIndex >= iui.highlowcontainer.size() ||
|
||||
iui.highlowcontainer.getKeyAtIndex(iui.containerIndex) > uint16(iui.nextKey) {
|
||||
// We're in a gap - iterate through empty container
|
||||
iui.emptyContainerVal = 0
|
||||
// If this container overlaps with start, advance to start
|
||||
if uint64(iui.nextKey)<<16 < iui.start && iui.start < uint64(iui.nextKey+1)<<16 {
|
||||
iui.emptyContainerVal = uint16(iui.start)
|
||||
}
|
||||
iui.iter = nil
|
||||
return
|
||||
}
|
||||
|
||||
// We're in an actual container
|
||||
iui.hs = uint32(iui.nextKey) << 16
|
||||
c := iui.highlowcontainer.getContainerAtIndex(iui.containerIndex)
|
||||
switch t := c.(type) {
|
||||
case *arrayContainer:
|
||||
iui.arrayUnsetIter = *newArrayContainerUnsetIterator(t.content)
|
||||
iui.iter = &iui.arrayUnsetIter
|
||||
case *runContainer16:
|
||||
iui.runUnsetIter = *t.newRunUnsetIterator16()
|
||||
iui.iter = &iui.runUnsetIter
|
||||
case *bitmapContainer:
|
||||
iui.bitmapUnsetIter = *newBitmapContainerUnsetIterator(t)
|
||||
iui.iter = &iui.bitmapUnsetIter
|
||||
}
|
||||
|
||||
// If this container overlaps with start, advance to the low bits of start
|
||||
if uint64(iui.nextKey)<<16 < iui.start && iui.start < uint64(iui.nextKey+1)<<16 {
|
||||
iui.iter.advanceIfNeeded(uint16(iui.start))
|
||||
}
|
||||
}
|
||||
|
||||
// Next returns the next integer
|
||||
func (iui *unsetIterator) Next() uint32 {
|
||||
if iui.iter == nil {
|
||||
// We're in an empty container gap
|
||||
x := (uint32(iui.nextKey) << 16) | uint32(iui.emptyContainerVal)
|
||||
iui.emptyContainerVal++
|
||||
if iui.emptyContainerVal == 0 || uint64(iui.nextKey)<<16|uint64(iui.emptyContainerVal) >= iui.end {
|
||||
// Wrapped around or reached end, move to next container
|
||||
iui.nextKey++
|
||||
iui.init()
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
x := uint32(iui.iter.next()) | iui.hs
|
||||
if !iui.iter.hasNext() || uint64(iui.nextKey)<<16|uint64(iui.iter.peekNext()) >= iui.end {
|
||||
iui.nextKey++
|
||||
iui.containerIndex++
|
||||
iui.init()
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
// PeekNext peeks the next value without advancing the iterator
|
||||
func (iui *unsetIterator) PeekNext() uint32 {
|
||||
if !iui.HasNext() {
|
||||
panic("PeekNext() called when HasNext() returns false")
|
||||
}
|
||||
if iui.iter == nil {
|
||||
return (uint32(iui.nextKey) << 16) | uint32(iui.emptyContainerVal)
|
||||
}
|
||||
return uint32(iui.iter.peekNext()&maxLowBit) | iui.hs
|
||||
}
|
||||
|
||||
// AdvanceIfNeeded advances as long as the next value is smaller than minval
|
||||
func (iui *unsetIterator) AdvanceIfNeeded(minval uint32) {
|
||||
targetKey := int(minval >> 16)
|
||||
|
||||
for iui.HasNext() && iui.nextKey < targetKey {
|
||||
iui.nextKey++
|
||||
// Find the next container that matches or exceeds nextKey
|
||||
for iui.containerIndex < iui.highlowcontainer.size() &&
|
||||
int(iui.highlowcontainer.getKeyAtIndex(iui.containerIndex)) < iui.nextKey {
|
||||
iui.containerIndex++
|
||||
}
|
||||
iui.init()
|
||||
}
|
||||
|
||||
if iui.HasNext() && iui.nextKey == targetKey {
|
||||
if iui.iter != nil {
|
||||
iui.iter.advanceIfNeeded(lowbits(minval))
|
||||
if !iui.iter.hasNext() || uint64(iui.nextKey)<<16|uint64(iui.iter.peekNext()) >= iui.end {
|
||||
iui.nextKey++
|
||||
iui.containerIndex++
|
||||
iui.init()
|
||||
}
|
||||
} else {
|
||||
lowVal := lowbits(minval)
|
||||
if iui.emptyContainerVal < lowVal {
|
||||
iui.emptyContainerVal = lowVal
|
||||
}
|
||||
if uint64(iui.nextKey)<<16|uint64(iui.emptyContainerVal) >= iui.end {
|
||||
iui.nextKey++
|
||||
iui.containerIndex++
|
||||
iui.init()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize configures the unset iterator to iterate over values in [start, end) that are not in the bitmap
|
||||
func (iui *unsetIterator) Initialize(a *Bitmap, start, end uint64) {
|
||||
if end > 0x100000000 {
|
||||
panic("end > 0x100000000")
|
||||
}
|
||||
iui.start = start
|
||||
iui.end = end
|
||||
iui.containerIndex = 0
|
||||
iui.nextKey = int(start >> 16)
|
||||
iui.highlowcontainer = &a.highlowcontainer
|
||||
|
||||
// Find the first container that matches or exceeds the start key
|
||||
for iui.containerIndex < iui.highlowcontainer.size() &&
|
||||
int(iui.highlowcontainer.getKeyAtIndex(iui.containerIndex)) < iui.nextKey {
|
||||
iui.containerIndex++
|
||||
}
|
||||
|
||||
iui.init()
|
||||
}
|
||||
|
||||
// String creates a string representation of the Bitmap
|
||||
func (rb *Bitmap) String() string {
|
||||
// inspired by https://github.com/fzandona/goroar/
|
||||
@@ -812,6 +1000,14 @@ func (rb *Bitmap) ManyIterator() ManyIntIterable {
|
||||
return p
|
||||
}
|
||||
|
||||
// UnsetIterator creates a new IntPeekable to iterate over values in the range [start, end) that are NOT contained in the bitmap.
|
||||
// The iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
|
||||
func (rb *Bitmap) UnsetIterator(start, end uint64) IntPeekable {
|
||||
p := new(unsetIterator)
|
||||
p.Initialize(rb, start, end)
|
||||
return p
|
||||
}
|
||||
|
||||
// Clone creates a copy of the Bitmap
|
||||
func (rb *Bitmap) Clone() *Bitmap {
|
||||
ptr := new(Bitmap)
|
||||
@@ -1290,6 +1486,10 @@ main:
|
||||
|
||||
// Xor computes the symmetric difference between two bitmaps and stores the result in the current bitmap
|
||||
func (rb *Bitmap) Xor(x2 *Bitmap) {
|
||||
if rb == x2 {
|
||||
rb.Clear()
|
||||
return
|
||||
}
|
||||
pos1 := 0
|
||||
pos2 := 0
|
||||
length1 := rb.highlowcontainer.size()
|
||||
@@ -1304,14 +1504,12 @@ func (rb *Bitmap) Xor(x2 *Bitmap) {
|
||||
break
|
||||
}
|
||||
} else if s1 > s2 {
|
||||
c := x2.highlowcontainer.getWritableContainerAtIndex(pos2)
|
||||
rb.highlowcontainer.insertNewKeyValueAt(pos1, x2.highlowcontainer.getKeyAtIndex(pos2), c)
|
||||
rb.highlowcontainer.insertNewKeyValueAt(pos1, x2.highlowcontainer.getKeyAtIndex(pos2), x2.highlowcontainer.getContainerAtIndex(pos2).clone())
|
||||
length1++
|
||||
pos1++
|
||||
pos2++
|
||||
} else {
|
||||
// TODO: couple be computed in-place for reduced memory usage
|
||||
c := rb.highlowcontainer.getContainerAtIndex(pos1).xor(x2.highlowcontainer.getContainerAtIndex(pos2))
|
||||
c := rb.highlowcontainer.getWritableContainerAtIndex(pos1).ixor(x2.highlowcontainer.getContainerAtIndex(pos2))
|
||||
if !c.isEmpty() {
|
||||
rb.highlowcontainer.setContainerAtIndex(pos1, c)
|
||||
pos1++
|
||||
@@ -1358,7 +1556,8 @@ main:
|
||||
}
|
||||
s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
|
||||
} else {
|
||||
rb.highlowcontainer.replaceKeyAndContainerAtIndex(pos1, s1, rb.highlowcontainer.getUnionedWritableContainer(pos1, x2.highlowcontainer.getContainerAtIndex(pos2)), false)
|
||||
newcont := rb.highlowcontainer.getUnionedWritableContainer(pos1, x2.highlowcontainer.getContainerAtIndex(pos2))
|
||||
rb.highlowcontainer.replaceKeyAndContainerAtIndex(pos1, s1, newcont, false)
|
||||
pos1++
|
||||
pos2++
|
||||
if (pos1 == length1) || (pos2 == length2) {
|
||||
@@ -1376,6 +1575,10 @@ main:
|
||||
|
||||
// AndNot computes the difference between two bitmaps and stores the result in the current bitmap
|
||||
func (rb *Bitmap) AndNot(x2 *Bitmap) {
|
||||
if rb == x2 {
|
||||
rb.Clear()
|
||||
return
|
||||
}
|
||||
pos1 := 0
|
||||
pos2 := 0
|
||||
intersectionsize := 0
|
||||
@@ -1465,7 +1668,6 @@ main:
|
||||
}
|
||||
s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
|
||||
} else {
|
||||
|
||||
answer.highlowcontainer.appendContainer(s1, x1.highlowcontainer.getContainerAtIndex(pos1).or(x2.highlowcontainer.getContainerAtIndex(pos2)), false)
|
||||
pos1++
|
||||
pos2++
|
||||
@@ -1504,6 +1706,7 @@ main:
|
||||
if !C.isEmpty() {
|
||||
answer.highlowcontainer.appendContainer(s1, C, false)
|
||||
}
|
||||
|
||||
pos1++
|
||||
pos2++
|
||||
if (pos1 == length1) || (pos2 == length2) {
|
||||
@@ -1531,6 +1734,9 @@ main:
|
||||
|
||||
// Xor computes the symmetric difference between two bitmaps and returns the result
|
||||
func Xor(x1, x2 *Bitmap) *Bitmap {
|
||||
if x1 == x2 {
|
||||
return NewBitmap()
|
||||
}
|
||||
answer := NewBitmap()
|
||||
pos1 := 0
|
||||
pos2 := 0
|
||||
@@ -1568,6 +1774,9 @@ func Xor(x1, x2 *Bitmap) *Bitmap {
|
||||
|
||||
// AndNot computes the difference between two bitmaps and returns the result
|
||||
func AndNot(x1, x2 *Bitmap) *Bitmap {
|
||||
if x1 == x2 {
|
||||
return NewBitmap()
|
||||
}
|
||||
answer := NewBitmap()
|
||||
pos1 := 0
|
||||
pos2 := 0
|
||||
@@ -1669,11 +1878,11 @@ func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) {
|
||||
for hb := hbStart; hb <= hbLast; hb++ {
|
||||
var containerStart uint32
|
||||
if hb == hbStart {
|
||||
containerStart = uint32(lbStart)
|
||||
containerStart = lbStart
|
||||
}
|
||||
containerLast := max
|
||||
if hb == hbLast {
|
||||
containerLast = uint32(lbLast)
|
||||
containerLast = lbLast
|
||||
}
|
||||
|
||||
i := rb.highlowcontainer.getIndex(uint16(hb))
|
||||
@@ -1829,11 +2038,11 @@ func Flip(bm *Bitmap, rangeStart, rangeEnd uint64) *Bitmap {
|
||||
for hb := hbStart; hb <= hbLast; hb++ {
|
||||
var containerStart uint32
|
||||
if hb == hbStart {
|
||||
containerStart = uint32(lbStart)
|
||||
containerStart = lbStart
|
||||
}
|
||||
containerLast := max
|
||||
if hb == hbLast {
|
||||
containerLast = uint32(lbLast)
|
||||
containerLast = lbLast
|
||||
}
|
||||
|
||||
i := bm.highlowcontainer.getIndex(uint16(hb))
|
||||
@@ -1931,8 +2140,8 @@ func (rb *Bitmap) PreviousValue(target uint32) int64 {
|
||||
return -1
|
||||
}
|
||||
|
||||
originalKey := highbits(uint32(target))
|
||||
query := lowbits(uint32(target))
|
||||
originalKey := highbits(target)
|
||||
query := lowbits(target)
|
||||
var prevValue int64
|
||||
prevValue = -1
|
||||
containerIndex := rb.highlowcontainer.advanceUntil(originalKey, -1)
|
||||
@@ -2133,6 +2342,34 @@ func (rb *Bitmap) Stats() Statistics {
|
||||
return stats
|
||||
}
|
||||
|
||||
// Describe prints a description of the bitmap's containers to stdout
|
||||
func (rb *Bitmap) Describe() {
|
||||
fmt.Printf("Bitmap with %d containers:\n", len(rb.highlowcontainer.containers))
|
||||
for i, c := range rb.highlowcontainer.containers {
|
||||
key := rb.highlowcontainer.keys[i]
|
||||
shared := ""
|
||||
if rb.highlowcontainer.needCopyOnWrite[i] {
|
||||
shared = " (shared)"
|
||||
}
|
||||
switch c.(type) {
|
||||
case *arrayContainer:
|
||||
fmt.Printf(" Container %d (key %d): array, cardinality %d%s\n", i, key, c.getCardinality(), shared)
|
||||
case *bitmapContainer:
|
||||
fmt.Printf(" Container %d (key %d): bitmap, cardinality %d%s\n", i, key, c.getCardinality(), shared)
|
||||
case *runContainer16:
|
||||
fmt.Printf(" Container %d (key %d): run, cardinality %d%s\n", i, key, c.getCardinality(), shared)
|
||||
default:
|
||||
fmt.Printf(" Container %d (key %d): unknown type, cardinality %d%s\n", i, key, c.getCardinality(), shared)
|
||||
}
|
||||
}
|
||||
valid := rb.Validate()
|
||||
if valid != nil {
|
||||
fmt.Printf(" Bitmap is INVALID: %v\n", valid)
|
||||
} else {
|
||||
fmt.Printf(" Bitmap is valid\n")
|
||||
}
|
||||
}
|
||||
|
||||
// Validate checks if the bitmap is internally consistent.
|
||||
// You may call it after deserialization to check that the bitmap is valid.
|
||||
// This function returns an error if the bitmap is invalid, nil otherwise.
|
||||
|
||||
46
vendor/github.com/RoaringBitmap/roaring/v2/roaring64/bsi64.go
generated
vendored
46
vendor/github.com/RoaringBitmap/roaring/v2/roaring64/bsi64.go
generated
vendored
@@ -66,7 +66,7 @@ func (b *BSI) GetExistenceBitmap() *Bitmap {
|
||||
// ValueExists tests whether the value exists.
|
||||
func (b *BSI) ValueExists(columnID uint64) bool {
|
||||
|
||||
return b.eBM.Contains(uint64(columnID))
|
||||
return b.eBM.Contains(columnID)
|
||||
}
|
||||
|
||||
// GetCardinality returns a count of unique column IDs for which a value has been set.
|
||||
@@ -115,11 +115,37 @@ func (b *BSI) SetBigValue(columnID uint64, value *big.Int) {
|
||||
b.eBM.Add(columnID)
|
||||
}
|
||||
|
||||
func (b *BSI) SetBigMany(foundSet *Bitmap, value *big.Int) {
|
||||
// If max/min values are set to zero then automatically determine bit array size
|
||||
if b.MaxValue == 0 && b.MinValue == 0 {
|
||||
minBits := value.BitLen() + 1
|
||||
if minBits == 1 {
|
||||
minBits = 2
|
||||
}
|
||||
for len(b.bA) < minBits {
|
||||
b.bA = append(b.bA, Bitmap{})
|
||||
}
|
||||
}
|
||||
for i := b.BitCount(); i >= 0; i-- {
|
||||
if value.Bit(i) == 0 {
|
||||
b.bA[i].AndNot(foundSet)
|
||||
} else {
|
||||
b.bA[i].Or(foundSet)
|
||||
}
|
||||
}
|
||||
b.eBM.Or(foundSet)
|
||||
}
|
||||
|
||||
// SetValue sets a value for a given columnID.
|
||||
func (b *BSI) SetValue(columnID uint64, value int64) {
|
||||
b.SetBigValue(columnID, big.NewInt(value))
|
||||
}
|
||||
|
||||
// SetMany sets a value for all columns in foundSet
|
||||
func (b *BSI) SetMany(foundSet *Bitmap, value int64) {
|
||||
b.SetBigMany(foundSet, big.NewInt(value))
|
||||
}
|
||||
|
||||
// GetValue gets the value at the column ID. Second param will be false for non-existent values.
|
||||
func (b *BSI) GetValue(columnID uint64) (value int64, exists bool) {
|
||||
bv, exists := b.GetBigValue(columnID)
|
||||
@@ -722,7 +748,7 @@ func transpose(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.WaitG
|
||||
results.RunOptimize()
|
||||
}
|
||||
for _, cID := range batch {
|
||||
if value, ok := e.bsi.GetValue(uint64(cID)); ok {
|
||||
if value, ok := e.bsi.GetValue(cID); ok {
|
||||
results.Add(uint64(value))
|
||||
}
|
||||
}
|
||||
@@ -738,7 +764,7 @@ func (b *BSI) ParOr(parallelism int, bsis ...*BSI) {
|
||||
bits := len(b.bA)
|
||||
for i := 0; i < len(bsis); i++ {
|
||||
if len(bsis[i].bA) > bits {
|
||||
bits = len(bsis[i].bA )
|
||||
bits = len(bsis[i].bA)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -931,7 +957,7 @@ func batchEqual(e *task, batch []uint64, resultsChan chan *Bitmap,
|
||||
|
||||
for i := 0; i < len(batch); i++ {
|
||||
cID := batch[i]
|
||||
if value, ok := e.bsi.GetBigValue(uint64(cID)); ok {
|
||||
if value, ok := e.bsi.GetBigValue(cID); ok {
|
||||
if _, yes := e.values[string(value.Bytes())]; yes {
|
||||
results.Add(cID)
|
||||
}
|
||||
@@ -942,11 +968,7 @@ func batchEqual(e *task, batch []uint64, resultsChan chan *Bitmap,
|
||||
|
||||
// ClearBits cleared the bits that exist in the target if they are also in the found set.
|
||||
func ClearBits(foundSet, target *Bitmap) {
|
||||
iter := foundSet.Iterator()
|
||||
for iter.HasNext() {
|
||||
cID := iter.Next()
|
||||
target.Remove(cID)
|
||||
}
|
||||
target.AndNot(foundSet)
|
||||
}
|
||||
|
||||
// ClearValues removes the values found in foundSet
|
||||
@@ -956,13 +978,13 @@ func (b *BSI) ClearValues(foundSet *Bitmap) {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
ClearBits(foundSet, &b.eBM)
|
||||
b.eBM.AndNot(foundSet)
|
||||
}()
|
||||
for i := 0; i < b.BitCount(); i++ {
|
||||
wg.Add(1)
|
||||
go func(j int) {
|
||||
defer wg.Done()
|
||||
ClearBits(foundSet, &b.bA[j])
|
||||
b.bA[j].AndNot(foundSet)
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
@@ -1044,7 +1066,7 @@ func transposeWithCounts(input *BSI, filterSet *Bitmap, batch []uint64, resultsC
|
||||
results.RunOptimize()
|
||||
}
|
||||
for _, cID := range batch {
|
||||
if value, ok := input.GetValue(uint64(cID)); ok {
|
||||
if value, ok := input.GetValue(cID); ok {
|
||||
if !filterSet.Contains(uint64(value)) {
|
||||
continue
|
||||
}
|
||||
|
||||
31
vendor/github.com/RoaringBitmap/roaring/v2/roaring64/iter.go
generated
vendored
Normal file
31
vendor/github.com/RoaringBitmap/roaring/v2/roaring64/iter.go
generated
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
package roaring64
|
||||
|
||||
import "iter"
|
||||
|
||||
// Values returns an iterator that yields the elements of the bitmap in
|
||||
// increasing order. Starting with Go 1.23, users can use a for loop to iterate
|
||||
// over it.
|
||||
func Values(b *Bitmap) iter.Seq[uint64] {
|
||||
return func(yield func(uint64) bool) {
|
||||
it := b.Iterator()
|
||||
for it.HasNext() {
|
||||
if !yield(it.Next()) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Backward returns an iterator that yields the elements of the bitmap in
|
||||
// decreasing order. Starting with Go 1.23, users can use a for loop to iterate
|
||||
// over it.
|
||||
func Backward(b *Bitmap) iter.Seq[uint64] {
|
||||
return func(yield func(uint64) bool) {
|
||||
it := b.ReverseIterator()
|
||||
for it.HasNext() {
|
||||
if !yield(it.Next()) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
10
vendor/github.com/RoaringBitmap/roaring/v2/roaring64/parallel64.go
generated
vendored
10
vendor/github.com/RoaringBitmap/roaring/v2/roaring64/parallel64.go
generated
vendored
@@ -39,9 +39,13 @@ func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
|
||||
// on some systems, would block indefinitely.
|
||||
keyRange := uint64(hKey) - uint64(lKey) + 1
|
||||
if keyRange == 1 {
|
||||
// revert to FastOr. Since the key range is 0
|
||||
// no container-level aggregation parallelism is achievable
|
||||
return FastOr(bitmaps...)
|
||||
// All bitmaps have the same key,
|
||||
// we can merge the 32-bit roaring bitmaps in parallel
|
||||
var bms32s = make([]*roaring.Bitmap, 0, len(bitmaps))
|
||||
for _, b := range bitmaps {
|
||||
bms32s = append(bms32s, b.highlowcontainer.containers...)
|
||||
}
|
||||
return roaring32AsRoaring64(roaring.ParOr(parallelism, bms32s...), lKey)
|
||||
}
|
||||
|
||||
if parallelism == 0 {
|
||||
|
||||
20
vendor/github.com/RoaringBitmap/roaring/v2/roaring64/roaring64.go
generated
vendored
20
vendor/github.com/RoaringBitmap/roaring/v2/roaring64/roaring64.go
generated
vendored
@@ -73,7 +73,7 @@ func (rb *Bitmap) WriteTo(stream io.Writer) (int64, error) {
|
||||
return n, err
|
||||
}
|
||||
written, err := c.WriteTo(stream)
|
||||
n += int64(written)
|
||||
n += written
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
@@ -119,7 +119,7 @@ func (rb *Bitmap) FromUnsafeBytes(data []byte) (p int64, err error) {
|
||||
n, err := rb.highlowcontainer.containers[i].ReadFrom(stream)
|
||||
|
||||
if n == 0 || err != nil {
|
||||
return int64(n), fmt.Errorf("Could not deserialize bitmap for key #%d: %s", i, err)
|
||||
return n, fmt.Errorf("Could not deserialize bitmap for key #%d: %s", i, err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -167,9 +167,9 @@ func (rb *Bitmap) ReadFrom(stream io.Reader) (p int64, err error) {
|
||||
n, err := rb.highlowcontainer.containers[i].ReadFrom(stream)
|
||||
|
||||
if n == 0 || err != nil {
|
||||
return int64(n), fmt.Errorf("Could not deserialize bitmap for key #%d: %s", i, err)
|
||||
return n, fmt.Errorf("Could not deserialize bitmap for key #%d: %s", i, err)
|
||||
}
|
||||
p += int64(n)
|
||||
p += n
|
||||
}
|
||||
return p, nil
|
||||
}
|
||||
@@ -249,7 +249,7 @@ func (rb *Bitmap) String() string {
|
||||
counter := 0
|
||||
if i.HasNext() {
|
||||
counter = counter + 1
|
||||
buffer.WriteString(strconv.FormatUint(uint64(i.Next()), 10))
|
||||
buffer.WriteString(strconv.FormatUint(i.Next(), 10))
|
||||
}
|
||||
for i.HasNext() {
|
||||
buffer.WriteString(",")
|
||||
@@ -259,7 +259,7 @@ func (rb *Bitmap) String() string {
|
||||
buffer.WriteString("...")
|
||||
break
|
||||
}
|
||||
buffer.WriteString(strconv.FormatUint(uint64(i.Next()), 10))
|
||||
buffer.WriteString(strconv.FormatUint(i.Next(), 10))
|
||||
}
|
||||
buffer.WriteString("}")
|
||||
return buffer.String()
|
||||
@@ -346,7 +346,7 @@ func (rb *Bitmap) CheckedAdd(x uint64) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// AddInt adds the integer x to the bitmap (convenience method: the parameter is casted to uint32 and we call Add)
|
||||
// AddInt adds the integer x to the bitmap (convenience method: the parameter is casted to uint64 and we call Add)
|
||||
func (rb *Bitmap) AddInt(x int) {
|
||||
rb.Add(uint64(x))
|
||||
}
|
||||
@@ -1248,9 +1248,13 @@ func (rb *Bitmap) Validate() error {
|
||||
// Roaring32AsRoaring64 inserts a 32-bit roaring bitmap into
|
||||
// a 64-bit roaring bitmap. No copy is made.
|
||||
func Roaring32AsRoaring64(bm32 *roaring.Bitmap) *Bitmap {
|
||||
return roaring32AsRoaring64(bm32, 0)
|
||||
}
|
||||
|
||||
func roaring32AsRoaring64(bm32 *roaring.Bitmap, key uint32) *Bitmap {
|
||||
rb := NewBitmap()
|
||||
rb.highlowcontainer.resize(0)
|
||||
rb.highlowcontainer.keys = append(rb.highlowcontainer.keys, 0)
|
||||
rb.highlowcontainer.keys = append(rb.highlowcontainer.keys, key)
|
||||
rb.highlowcontainer.containers = append(rb.highlowcontainer.containers, bm32)
|
||||
rb.highlowcontainer.needCopyOnWrite = append(rb.highlowcontainer.needCopyOnWrite, false)
|
||||
return rb
|
||||
|
||||
6
vendor/github.com/RoaringBitmap/roaring/v2/roaringarray.go
generated
vendored
6
vendor/github.com/RoaringBitmap/roaring/v2/roaringarray.go
generated
vendored
@@ -39,7 +39,9 @@ type container interface {
|
||||
not(start, final int) container // range is [firstOfRange,lastOfRange)
|
||||
inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx)
|
||||
xor(r container) container
|
||||
ixor(r container) container // i stands for inplace
|
||||
getShortIterator() shortPeekable
|
||||
getUnsetIterator() shortPeekable
|
||||
iterate(cb func(x uint16) bool) bool
|
||||
getReverseIterator() shortIterable
|
||||
getManyIterator() manyIterable
|
||||
@@ -108,7 +110,7 @@ func rangeOfOnes(start, last int) container {
|
||||
if last < 0 {
|
||||
panic("rangeOfOnes called with last < 0")
|
||||
}
|
||||
return newRunContainer16Range(uint16(start), uint16(last))
|
||||
return newRunContainer16Range(uint16(start), uint16(last)).toEfficientContainer()
|
||||
}
|
||||
|
||||
type roaringArray struct {
|
||||
@@ -588,7 +590,7 @@ func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte
|
||||
var isRunBitmap []byte
|
||||
|
||||
if cookie&0x0000FFFF == serialCookie {
|
||||
size = uint32(cookie>>16 + 1)
|
||||
size = cookie>>16 + 1
|
||||
// create is-run-container bitmap
|
||||
isRunBitmapSize := (int(size) + 7) / 8
|
||||
isRunBitmap, err = stream.Next(isRunBitmapSize)
|
||||
|
||||
304
vendor/github.com/RoaringBitmap/roaring/v2/runcontainer.go
generated
vendored
304
vendor/github.com/RoaringBitmap/roaring/v2/runcontainer.go
generated
vendored
@@ -41,7 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"sort"
|
||||
"slices"
|
||||
)
|
||||
|
||||
// runContainer16 does run-length encoding of sets of
|
||||
@@ -113,18 +113,6 @@ func (rc *runContainer16) String() string {
|
||||
return `runContainer16{` + is + `}`
|
||||
}
|
||||
|
||||
// uint16Slice is a sort.Sort convenience method
|
||||
type uint16Slice []uint16
|
||||
|
||||
// Len returns the length of p.
|
||||
func (p uint16Slice) Len() int { return len(p) }
|
||||
|
||||
// Less returns p[i] < p[j]
|
||||
func (p uint16Slice) Less(i, j int) bool { return p[i] < p[j] }
|
||||
|
||||
// Swap swaps elements i and j.
|
||||
func (p uint16Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
|
||||
|
||||
// addHelper helps build a runContainer16.
|
||||
type addHelper16 struct {
|
||||
runstart uint16
|
||||
@@ -183,7 +171,7 @@ func newRunContainer16FromVals(alreadySorted bool, vals ...uint16) *runContainer
|
||||
ah := addHelper16{rc: rc}
|
||||
|
||||
if !alreadySorted {
|
||||
sort.Sort(uint16Slice(vals))
|
||||
slices.Sort(vals)
|
||||
}
|
||||
n := len(vals)
|
||||
var cur, prev uint16
|
||||
@@ -386,8 +374,8 @@ func (rc *runContainer16) union(b *runContainer16) *runContainer16 {
|
||||
|
||||
var m []interval16
|
||||
|
||||
alim := int(len(rc.iv))
|
||||
blim := int(len(b.iv))
|
||||
alim := len(rc.iv)
|
||||
blim := len(b.iv)
|
||||
|
||||
var na int // next from a
|
||||
var nb int // next from b
|
||||
@@ -497,8 +485,8 @@ func (rc *runContainer16) unionCardinality(b *runContainer16) uint {
|
||||
// call it rc for consistency with the rest of the methods.
|
||||
answer := uint(0)
|
||||
|
||||
alim := int(len(rc.iv))
|
||||
blim := int(len(b.iv))
|
||||
alim := len(rc.iv)
|
||||
blim := len(b.iv)
|
||||
|
||||
var na int // next from a
|
||||
var nb int // next from b
|
||||
@@ -617,8 +605,8 @@ func (rc *runContainer16) indexOfIntervalAtOrAfter(key int, startIndex int) int
|
||||
// intersection of rc (also known as 'a') and b.
|
||||
func (rc *runContainer16) intersect(b *runContainer16) *runContainer16 {
|
||||
a := rc
|
||||
numa := int(len(a.iv))
|
||||
numb := int(len(b.iv))
|
||||
numa := len(a.iv)
|
||||
numb := len(b.iv)
|
||||
res := &runContainer16{}
|
||||
if numa == 0 || numb == 0 {
|
||||
return res
|
||||
@@ -719,8 +707,8 @@ func (rc *runContainer16) intersectCardinality(b *runContainer16) int {
|
||||
answer := int(0)
|
||||
|
||||
a := rc
|
||||
numa := int(len(a.iv))
|
||||
numb := int(len(b.iv))
|
||||
numa := len(a.iv)
|
||||
numb := len(b.iv)
|
||||
if numa == 0 || numb == 0 {
|
||||
return 0
|
||||
}
|
||||
@@ -847,7 +835,7 @@ func (rc *runContainer16) numIntervals() int {
|
||||
// The search space is from startIndex to endxIndex. If endxIndex is set to zero, then there
|
||||
// no upper bound.
|
||||
func (rc *runContainer16) searchRange(key int, startIndex int, endxIndex int) (whichInterval16 int, alreadyPresent bool, numCompares int) {
|
||||
n := int(len(rc.iv))
|
||||
n := len(rc.iv)
|
||||
if n == 0 {
|
||||
return -1, false, 0
|
||||
}
|
||||
@@ -1045,7 +1033,7 @@ func (rc *runContainer16) Add(k uint16) (wasNew bool) {
|
||||
}
|
||||
wasNew = true
|
||||
|
||||
n := int(len(rc.iv))
|
||||
n := len(rc.iv)
|
||||
if index == -1 {
|
||||
// we may need to extend the first run
|
||||
if n > 0 {
|
||||
@@ -1139,8 +1127,8 @@ func (rc *runContainer16) iterate(cb func(x uint16) bool) bool {
|
||||
// returns true when there is at least one more value
|
||||
// available in the iteration sequence.
|
||||
func (ri *runIterator16) hasNext() bool {
|
||||
return int(len(ri.rc.iv)) > ri.curIndex+1 ||
|
||||
(int(len(ri.rc.iv)) == ri.curIndex+1 && ri.rc.iv[ri.curIndex].length >= ri.curPosInIndex)
|
||||
return len(ri.rc.iv) > ri.curIndex+1 ||
|
||||
(len(ri.rc.iv) == ri.curIndex+1 && ri.rc.iv[ri.curIndex].length >= ri.curPosInIndex)
|
||||
}
|
||||
|
||||
// next returns the next value in the iteration sequence.
|
||||
@@ -1169,7 +1157,7 @@ func (ri *runIterator16) advanceIfNeeded(minval uint16) {
|
||||
}
|
||||
|
||||
// interval cannot be -1 because of minval > peekNext
|
||||
interval, isPresent, _ := ri.rc.searchRange(int(minval), ri.curIndex, int(len(ri.rc.iv)))
|
||||
interval, isPresent, _ := ri.rc.searchRange(int(minval), ri.curIndex, len(ri.rc.iv))
|
||||
|
||||
// if the minval is present, set the curPosIndex at the right position
|
||||
if isPresent {
|
||||
@@ -1193,7 +1181,7 @@ type runReverseIterator16 struct {
|
||||
|
||||
// newRunReverseIterator16 returns a new empty run iterator.
|
||||
func (rc *runContainer16) newRunReverseIterator16() *runReverseIterator16 {
|
||||
index := int(len(rc.iv)) - 1
|
||||
index := len(rc.iv) - 1
|
||||
pos := uint16(0)
|
||||
|
||||
if index >= 0 {
|
||||
@@ -1254,8 +1242,17 @@ func (ri *runIterator16) nextMany(hs uint32, buf []uint32) int {
|
||||
|
||||
// allows BCE
|
||||
buf2 := buf[n : n+moreVals]
|
||||
for i := range buf2 {
|
||||
buf2[i] = base + uint32(i)
|
||||
i := 0
|
||||
for ; i+3 < len(buf2); i += 4 {
|
||||
buf2[i] = base
|
||||
buf2[i+1] = base + 1
|
||||
buf2[i+2] = base + 2
|
||||
buf2[i+3] = base + 3
|
||||
base += 4
|
||||
}
|
||||
for ; i < len(buf2); i++ {
|
||||
buf2[i] = base
|
||||
base++
|
||||
}
|
||||
|
||||
// update values
|
||||
@@ -1266,7 +1263,7 @@ func (ri *runIterator16) nextMany(hs uint32, buf []uint32) int {
|
||||
ri.curPosInIndex = 0
|
||||
ri.curIndex++
|
||||
|
||||
if ri.curIndex == int(len(ri.rc.iv)) {
|
||||
if ri.curIndex == len(ri.rc.iv) {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
@@ -1295,8 +1292,17 @@ func (ri *runIterator16) nextMany64(hs uint64, buf []uint64) int {
|
||||
|
||||
// allows BCE
|
||||
buf2 := buf[n : n+moreVals]
|
||||
for i := range buf2 {
|
||||
buf2[i] = base + uint64(i)
|
||||
i := 0
|
||||
for ; i+3 < len(buf2); i += 4 {
|
||||
buf2[i] = base
|
||||
buf2[i+1] = base + 1
|
||||
buf2[i+2] = base + 2
|
||||
buf2[i+3] = base + 3
|
||||
base += 4
|
||||
}
|
||||
for ; i < len(buf2); i++ {
|
||||
buf2[i] = base
|
||||
base++
|
||||
}
|
||||
|
||||
// update values
|
||||
@@ -1307,7 +1313,7 @@ func (ri *runIterator16) nextMany64(hs uint64, buf []uint64) int {
|
||||
ri.curPosInIndex = 0
|
||||
ri.curIndex++
|
||||
|
||||
if ri.curIndex == int(len(ri.rc.iv)) {
|
||||
if ri.curIndex == len(ri.rc.iv) {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
@@ -1416,7 +1422,7 @@ func (rc *runContainer16) findNextIntervalThatIntersectsStartingFrom(startIndex
|
||||
if w < startIndex {
|
||||
// not found and comes before lower bound startIndex,
|
||||
// so just use the lower bound.
|
||||
if startIndex == int(len(rc.iv)) {
|
||||
if startIndex == len(rc.iv) {
|
||||
// also this bump up means that we are done
|
||||
return startIndex, true
|
||||
}
|
||||
@@ -1542,7 +1548,7 @@ func (iv interval16) subtractInterval(del interval16) (left []interval16, delcou
|
||||
func (rc *runContainer16) isubtract(del interval16) {
|
||||
origiv := make([]interval16, len(rc.iv))
|
||||
copy(origiv, rc.iv)
|
||||
n := int(len(rc.iv))
|
||||
n := len(rc.iv)
|
||||
if n == 0 {
|
||||
return // already done.
|
||||
}
|
||||
@@ -1569,8 +1575,8 @@ func (rc *runContainer16) isubtract(del interval16) {
|
||||
// would overwrite values in iv b/c res0 can have len 2. so
|
||||
// write to origiv instead.
|
||||
lost := 1 + ilast - istart
|
||||
changeSize := int(len(res0)) - lost
|
||||
newSize := int(len(rc.iv)) + changeSize
|
||||
changeSize := len(res0) - lost
|
||||
newSize := len(rc.iv) + changeSize
|
||||
|
||||
// rc.iv = append(pre, caboose...)
|
||||
// return
|
||||
@@ -1578,19 +1584,19 @@ func (rc *runContainer16) isubtract(del interval16) {
|
||||
if ilast != istart {
|
||||
res1, _ := rc.iv[ilast].subtractInterval(del)
|
||||
res0 = append(res0, res1...)
|
||||
changeSize = int(len(res0)) - lost
|
||||
newSize = int(len(rc.iv)) + changeSize
|
||||
changeSize = len(res0) - lost
|
||||
newSize = len(rc.iv) + changeSize
|
||||
}
|
||||
switch {
|
||||
case changeSize < 0:
|
||||
// shrink
|
||||
copy(rc.iv[istart+int(len(res0)):], rc.iv[ilast+1:])
|
||||
copy(rc.iv[istart:istart+int(len(res0))], res0)
|
||||
copy(rc.iv[istart+len(res0):], rc.iv[ilast+1:])
|
||||
copy(rc.iv[istart:istart+len(res0)], res0)
|
||||
rc.iv = rc.iv[:newSize]
|
||||
return
|
||||
case changeSize == 0:
|
||||
// stay the same
|
||||
copy(rc.iv[istart:istart+int(len(res0))], res0)
|
||||
copy(rc.iv[istart:istart+len(res0)], res0)
|
||||
return
|
||||
default:
|
||||
// changeSize > 0 is only possible when ilast == istart.
|
||||
@@ -1647,7 +1653,7 @@ func (rc *runContainer16) isubtract(del interval16) {
|
||||
// INVAR: ilast < n-1
|
||||
lost := ilast - istart
|
||||
changeSize := -lost
|
||||
newSize := int(len(rc.iv)) + changeSize
|
||||
newSize := len(rc.iv) + changeSize
|
||||
if changeSize != 0 {
|
||||
copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:])
|
||||
}
|
||||
@@ -1664,8 +1670,8 @@ func (rc *runContainer16) isubtract(del interval16) {
|
||||
rc.iv[istart] = res0[0]
|
||||
}
|
||||
lost := 1 + (ilast - istart)
|
||||
changeSize := int(len(res0)) - lost
|
||||
newSize := int(len(rc.iv)) + changeSize
|
||||
changeSize := len(res0) - lost
|
||||
newSize := len(rc.iv) + changeSize
|
||||
if changeSize != 0 {
|
||||
copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:])
|
||||
}
|
||||
@@ -1676,8 +1682,8 @@ func (rc *runContainer16) isubtract(del interval16) {
|
||||
// we can only shrink or stay the same size
|
||||
res1, _ := rc.iv[ilast].subtractInterval(del)
|
||||
lost := ilast - istart
|
||||
changeSize := int(len(res1)) - lost
|
||||
newSize := int(len(rc.iv)) + changeSize
|
||||
changeSize := len(res1) - lost
|
||||
newSize := len(rc.iv) + changeSize
|
||||
if changeSize != 0 {
|
||||
// move the tail first to make room for res1
|
||||
copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:])
|
||||
@@ -1823,7 +1829,11 @@ func (rc *runContainer16) and(a container) container {
|
||||
}
|
||||
switch c := a.(type) {
|
||||
case *runContainer16:
|
||||
return rc.intersect(c)
|
||||
// Important: there is no reason to believe that the
|
||||
// result of intersecting two run containers is itself
|
||||
// a run container. Hence we convert to efficient container.
|
||||
// We only use run containers when they are efficient.
|
||||
return rc.intersect(c).toEfficientContainer()
|
||||
case *arrayContainer:
|
||||
return rc.andArray(c)
|
||||
case *bitmapContainer:
|
||||
@@ -1835,7 +1845,7 @@ func (rc *runContainer16) and(a container) container {
|
||||
func (rc *runContainer16) andCardinality(a container) int {
|
||||
switch c := a.(type) {
|
||||
case *runContainer16:
|
||||
return int(rc.intersectCardinality(c))
|
||||
return rc.intersectCardinality(c)
|
||||
case *arrayContainer:
|
||||
return rc.andArrayCardinality(c)
|
||||
case *bitmapContainer:
|
||||
@@ -1885,11 +1895,19 @@ func (rc *runContainer16) iand(a container) container {
|
||||
}
|
||||
switch c := a.(type) {
|
||||
case *runContainer16:
|
||||
return rc.inplaceIntersect(c)
|
||||
// Important: there is no reason to believe that the
|
||||
// result of intersecting two run containers is itself
|
||||
// a run container. Hence we convert to efficient container.
|
||||
// We only use run containers when they are efficient.
|
||||
return rc.inplaceIntersect(c).toEfficientContainer()
|
||||
case *arrayContainer:
|
||||
// inplace intersection with array is not supported
|
||||
// It is likely not very useful either.
|
||||
return rc.andArray(c)
|
||||
case *bitmapContainer:
|
||||
return rc.iandBitmapContainer(c)
|
||||
// inplace intersection with bitmap is not supported
|
||||
// It is very difficult to do this inplace and likely not useful.
|
||||
return rc.andBitmapContainer(c)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
@@ -1900,12 +1918,6 @@ func (rc *runContainer16) inplaceIntersect(rc2 *runContainer16) container {
|
||||
return rc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iandBitmapContainer(bc *bitmapContainer) container {
|
||||
isect := rc.andBitmapContainer(bc)
|
||||
*rc = *newRunContainer16FromContainer(isect)
|
||||
return rc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) andArray(ac *arrayContainer) container {
|
||||
if len(rc.iv) == 0 {
|
||||
return newArrayContainer()
|
||||
@@ -1943,7 +1955,7 @@ func (rc *runContainer16) andNot(a container) container {
|
||||
case *bitmapContainer:
|
||||
return rc.andNotBitmap(c)
|
||||
case *runContainer16:
|
||||
return rc.andNotRunContainer16(c)
|
||||
return rc.andNotRunContainer16(c).toEfficientContainer()
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
@@ -1974,6 +1986,61 @@ func (rc *runContainer16) getManyIterator() manyIterable {
|
||||
return rc.newManyRunIterator16()
|
||||
}
|
||||
|
||||
type runUnsetIterator16 struct {
|
||||
rc *runContainer16
|
||||
curIndex int
|
||||
nextVal int
|
||||
}
|
||||
|
||||
func (rc *runContainer16) newRunUnsetIterator16() *runUnsetIterator16 {
|
||||
rui := &runUnsetIterator16{rc: rc, curIndex: 0, nextVal: 0}
|
||||
if len(rc.iv) > 0 && rc.iv[0].start == 0 {
|
||||
rui.nextVal = int(rc.iv[0].start) + int(rc.iv[0].length) + 1
|
||||
rui.curIndex = 1
|
||||
}
|
||||
return rui
|
||||
}
|
||||
|
||||
func (rui *runUnsetIterator16) hasNext() bool {
|
||||
return rui.nextVal < 65536
|
||||
}
|
||||
|
||||
func (rui *runUnsetIterator16) next() uint16 {
|
||||
val := rui.nextVal
|
||||
rui.nextVal++
|
||||
if rui.curIndex < len(rui.rc.iv) && uint16(rui.nextVal) >= rui.rc.iv[rui.curIndex].start {
|
||||
rui.nextVal = int(rui.rc.iv[rui.curIndex].start) + int(rui.rc.iv[rui.curIndex].length) + 1
|
||||
rui.curIndex++
|
||||
}
|
||||
return uint16(val)
|
||||
}
|
||||
|
||||
func (rui *runUnsetIterator16) peekNext() uint16 {
|
||||
return uint16(rui.nextVal)
|
||||
}
|
||||
|
||||
func (rui *runUnsetIterator16) advanceIfNeeded(minval uint16) {
|
||||
if !rui.hasNext() || rui.peekNext() >= minval {
|
||||
return
|
||||
}
|
||||
rui.nextVal = int(minval)
|
||||
for rui.curIndex < len(rui.rc.iv) {
|
||||
if rui.rc.iv[rui.curIndex].start+rui.rc.iv[rui.curIndex].length < minval {
|
||||
rui.curIndex++
|
||||
} else if rui.rc.iv[rui.curIndex].start <= minval {
|
||||
rui.nextVal = int(rui.rc.iv[rui.curIndex].start) + int(rui.rc.iv[rui.curIndex].length) + 1
|
||||
rui.curIndex++
|
||||
break
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (rc *runContainer16) getUnsetIterator() shortPeekable {
|
||||
return rc.newRunUnsetIterator16()
|
||||
}
|
||||
|
||||
// add the values in the range [firstOfRange, endx). endx
|
||||
// is still abe to express 2^16 because it is an int not an uint16.
|
||||
func (rc *runContainer16) iaddRange(firstOfRange, endx int) container {
|
||||
@@ -2104,7 +2171,7 @@ func (rc *runContainer16) equals(o container) bool {
|
||||
|
||||
func (rc *runContainer16) iaddReturnMinimized(x uint16) container {
|
||||
rc.Add(x)
|
||||
return rc
|
||||
return rc.toEfficientContainer()
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iadd(x uint16) (wasNew bool) {
|
||||
@@ -2113,7 +2180,7 @@ func (rc *runContainer16) iadd(x uint16) (wasNew bool) {
|
||||
|
||||
func (rc *runContainer16) iremoveReturnMinimized(x uint16) container {
|
||||
rc.removeKey(x)
|
||||
return rc
|
||||
return rc.toEfficientContainer()
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iremove(x uint16) bool {
|
||||
@@ -2174,15 +2241,9 @@ func (rc *runContainer16) orArray(ac *arrayContainer) container {
|
||||
if rc.isEmpty() {
|
||||
return ac.clone()
|
||||
}
|
||||
intervals, cardMinusOne := runArrayUnionToRuns(rc, ac)
|
||||
intervals, cardminusone := runArrayUnionToRuns(rc, ac)
|
||||
result := newRunContainer16TakeOwnership(intervals)
|
||||
if len(intervals) >= MaxNumIntervals && cardMinusOne >= arrayDefaultMaxSize {
|
||||
return newBitmapContainerFromRun(result)
|
||||
}
|
||||
if len(intervals)*2 > 1+int(cardMinusOne) {
|
||||
return result.toArrayContainer()
|
||||
}
|
||||
return result
|
||||
return result.toEfficientContainerFromCardinality(int(cardminusone) + 1)
|
||||
}
|
||||
|
||||
// orArray finds the union of rc and ac.
|
||||
@@ -2200,7 +2261,7 @@ func (rc *runContainer16) ior(a container) container {
|
||||
case *arrayContainer:
|
||||
return rc.iorArray(c)
|
||||
case *bitmapContainer:
|
||||
return rc.iorBitmapContainer(c)
|
||||
return rc.orBitmapContainer(c)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
@@ -2212,16 +2273,17 @@ func (rc *runContainer16) inplaceUnion(rc2 *runContainer16) container {
|
||||
rc.Add(uint16(i))
|
||||
}
|
||||
}
|
||||
return rc
|
||||
return rc.toEfficientContainer()
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iorBitmapContainer(bc *bitmapContainer) container {
|
||||
it := bc.getShortIterator()
|
||||
for it.hasNext() {
|
||||
rc.Add(it.next())
|
||||
}
|
||||
return rc
|
||||
}
|
||||
// Such code should not be used as it will not preserve the container invariants:
|
||||
//func (rc *runContainer16) iorBitmapContainer(bc *bitmapContainer) container {
|
||||
// it := bc.getShortIterator()
|
||||
// for it.hasNext() {
|
||||
// rc.Add(it.next())
|
||||
// }
|
||||
// return rc
|
||||
//}
|
||||
|
||||
func (rc *runContainer16) iorArray(ac *arrayContainer) container {
|
||||
if rc.isEmpty() {
|
||||
@@ -2235,13 +2297,8 @@ func (rc *runContainer16) iorArray(ac *arrayContainer) container {
|
||||
// this can be done with methods like the in-place array container union
|
||||
// but maybe lazily moving the remaining elements back.
|
||||
rc.iv, cardMinusOne = runArrayUnionToRuns(rc, ac)
|
||||
if len(rc.iv) >= MaxNumIntervals && cardMinusOne >= arrayDefaultMaxSize {
|
||||
return newBitmapContainerFromRun(rc)
|
||||
}
|
||||
if len(rc.iv)*2 > 1+int(cardMinusOne) {
|
||||
return rc.toArrayContainer()
|
||||
}
|
||||
return rc
|
||||
return rc.toEfficientContainerFromCardinality(int(cardMinusOne) + 1)
|
||||
|
||||
}
|
||||
|
||||
func runArrayUnionToRuns(rc *runContainer16, ac *arrayContainer) ([]interval16, uint16) {
|
||||
@@ -2377,6 +2434,30 @@ func (rc *runContainer16) xor(a container) container {
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (rc *runContainer16) ixor(a container) container {
|
||||
switch c := a.(type) {
|
||||
case *arrayContainer:
|
||||
return rc.ixorArray(c)
|
||||
case *bitmapContainer:
|
||||
return rc.ixorBitmap(c)
|
||||
case *runContainer16:
|
||||
return rc.ixorRunContainer16(c)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (rc *runContainer16) ixorArray(value2 *arrayContainer) container {
|
||||
return rc.toBitmapContainer().ixor(value2)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) ixorBitmap(value2 *bitmapContainer) container {
|
||||
return value2.ixor(rc)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) ixorRunContainer16(value2 *runContainer16) container {
|
||||
return rc.toBitmapContainer().ixor(value2.toBitmapContainer())
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iandNot(a container) container {
|
||||
switch c := a.(type) {
|
||||
case *arrayContainer:
|
||||
@@ -2384,7 +2465,7 @@ func (rc *runContainer16) iandNot(a container) container {
|
||||
case *bitmapContainer:
|
||||
return rc.iandNotBitmap(c)
|
||||
case *runContainer16:
|
||||
return rc.iandNotRunContainer16(c)
|
||||
return rc.iandNotRunContainer16(c).toEfficientContainer()
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
@@ -2399,11 +2480,11 @@ func (rc *runContainer16) inot(firstOfRange, endx int) container {
|
||||
}
|
||||
// TODO: minimize copies, do it all inplace; not() makes a copy.
|
||||
rc = rc.Not(firstOfRange, endx)
|
||||
return rc
|
||||
return rc.toEfficientContainer()
|
||||
}
|
||||
|
||||
func (rc *runContainer16) rank(x uint16) int {
|
||||
n := int(len(rc.iv))
|
||||
n := len(rc.iv)
|
||||
xx := int(x)
|
||||
w, already, _ := rc.search(xx)
|
||||
if w < 0 {
|
||||
@@ -2417,13 +2498,13 @@ func (rc *runContainer16) rank(x uint16) int {
|
||||
for i := int(0); i <= w; i++ {
|
||||
rnk += rc.iv[i].runlen()
|
||||
}
|
||||
return int(rnk)
|
||||
return rnk
|
||||
}
|
||||
for i := int(0); i < w; i++ {
|
||||
rnk += rc.iv[i].runlen()
|
||||
}
|
||||
rnk += int(x-rc.iv[w].start) + 1
|
||||
return int(rnk)
|
||||
return rnk
|
||||
}
|
||||
|
||||
func (rc *runContainer16) selectInt(x uint16) int {
|
||||
@@ -2431,7 +2512,7 @@ func (rc *runContainer16) selectInt(x uint16) int {
|
||||
for k := range rc.iv {
|
||||
nextOffset := offset + rc.iv[k].runlen()
|
||||
if nextOffset > int(x) {
|
||||
return int(int(rc.iv[k].start) + (int(x) - offset))
|
||||
return int(rc.iv[k].start) + (int(x) - offset)
|
||||
}
|
||||
offset = nextOffset
|
||||
}
|
||||
@@ -2455,10 +2536,11 @@ func (rc *runContainer16) andNotBitmap(bc *bitmapContainer) container {
|
||||
|
||||
func (rc *runContainer16) toBitmapContainer() *bitmapContainer {
|
||||
bc := newBitmapContainer()
|
||||
bc.cardinality = 0
|
||||
for i := range rc.iv {
|
||||
bc.cardinality += rc.iv[i].runlen()
|
||||
bc.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1)
|
||||
}
|
||||
bc.computeCardinality()
|
||||
return bc
|
||||
}
|
||||
|
||||
@@ -2473,21 +2555,23 @@ func (rc *runContainer16) iandNotArray(ac *arrayContainer) container {
|
||||
rcb := rc.toBitmapContainer()
|
||||
acb := ac.toBitmapContainer()
|
||||
rcb.iandNotBitmapSurely(acb)
|
||||
// TODO: check size and optimize the return value
|
||||
// TODO: is inplace modification really required? If not, elide the copy.
|
||||
rc2 := newRunContainer16FromBitmapContainer(rcb)
|
||||
*rc = *rc2
|
||||
return rc
|
||||
answer := rcb.toEfficientContainer()
|
||||
if runrc, ok := answer.(*runContainer16); ok {
|
||||
*rc = *runrc
|
||||
return rc
|
||||
}
|
||||
return answer
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iandNotBitmap(bc *bitmapContainer) container {
|
||||
rcb := rc.toBitmapContainer()
|
||||
rcb.iandNotBitmapSurely(bc)
|
||||
// TODO: check size and optimize the return value
|
||||
// TODO: is inplace modification really required? If not, elide the copy.
|
||||
rc2 := newRunContainer16FromBitmapContainer(rcb)
|
||||
*rc = *rc2
|
||||
return rc
|
||||
answer := rcb.toEfficientContainer()
|
||||
if runrc, ok := answer.(*runContainer16); ok {
|
||||
*rc = *runrc
|
||||
return rc
|
||||
}
|
||||
return answer
|
||||
}
|
||||
|
||||
func (rc *runContainer16) xorRunContainer16(x2 *runContainer16) container {
|
||||
@@ -2523,6 +2607,20 @@ func (rc *runContainer16) toEfficientContainer() container {
|
||||
return bc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) toEfficientContainerFromCardinality(card int) container {
|
||||
sizeAsRunContainer := rc.getSizeInBytes()
|
||||
sizeAsBitmapContainer := bitmapContainerSizeInBytes()
|
||||
sizeAsArrayContainer := arrayContainerSizeInBytes(card)
|
||||
if sizeAsRunContainer < minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
|
||||
return rc
|
||||
}
|
||||
if card <= arrayDefaultMaxSize {
|
||||
return rc.toArrayContainer()
|
||||
}
|
||||
bc := newBitmapContainerFromRun(rc)
|
||||
return bc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) toArrayContainer() *arrayContainer {
|
||||
ac := newArrayContainer()
|
||||
for i := range rc.iv {
|
||||
@@ -2619,7 +2717,7 @@ func (rc *runContainer16) addOffset(x uint16) (container, container) {
|
||||
|
||||
for _, iv := range rc.iv {
|
||||
val := int(iv.start) + int(x)
|
||||
finalVal := int(val) + int(iv.length)
|
||||
finalVal := val + int(iv.length)
|
||||
if val <= 0xffff {
|
||||
if finalVal <= 0xffff {
|
||||
low.iv = append(low.iv, interval16{uint16(val), iv.length})
|
||||
|
||||
251
vendor/github.com/RoaringBitmap/roaring/v2/serialization_littleendian.go
generated
vendored
251
vendor/github.com/RoaringBitmap/roaring/v2/serialization_littleendian.go
generated
vendored
@@ -6,9 +6,8 @@ package roaring
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
@@ -26,51 +25,30 @@ func (bc *bitmapContainer) writeTo(stream io.Writer) (int, error) {
|
||||
}
|
||||
|
||||
func uint64SliceAsByteSlice(slice []uint64) []byte {
|
||||
// make a new slice header
|
||||
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
|
||||
// update its capacity and length
|
||||
header.Len *= 8
|
||||
header.Cap *= 8
|
||||
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
result := *(*[]byte)(unsafe.Pointer(&header))
|
||||
runtime.KeepAlive(&slice)
|
||||
|
||||
// return it
|
||||
return result
|
||||
ptr := unsafe.SliceData(slice)
|
||||
if ptr == nil {
|
||||
return nil
|
||||
}
|
||||
const size = unsafe.Sizeof(uint64(0))
|
||||
return unsafe.Slice(((*byte)(unsafe.Pointer(ptr))), int(size)*len(slice))
|
||||
}
|
||||
|
||||
func uint16SliceAsByteSlice(slice []uint16) []byte {
|
||||
// make a new slice header
|
||||
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
|
||||
// update its capacity and length
|
||||
header.Len *= 2
|
||||
header.Cap *= 2
|
||||
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
result := *(*[]byte)(unsafe.Pointer(&header))
|
||||
runtime.KeepAlive(&slice)
|
||||
|
||||
// return it
|
||||
return result
|
||||
ptr := unsafe.SliceData(slice)
|
||||
if ptr == nil {
|
||||
return nil
|
||||
}
|
||||
const size = unsafe.Sizeof(uint16(0))
|
||||
return unsafe.Slice(((*byte)(unsafe.Pointer(ptr))), int(size)*len(slice))
|
||||
}
|
||||
|
||||
func interval16SliceAsByteSlice(slice []interval16) []byte {
|
||||
// make a new slice header
|
||||
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
|
||||
// update its capacity and length
|
||||
header.Len *= 4
|
||||
header.Cap *= 4
|
||||
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
result := *(*[]byte)(unsafe.Pointer(&header))
|
||||
runtime.KeepAlive(&slice)
|
||||
|
||||
// return it
|
||||
return result
|
||||
ptr := unsafe.SliceData(slice)
|
||||
if ptr == nil {
|
||||
return nil
|
||||
}
|
||||
const size = unsafe.Sizeof(interval16{})
|
||||
return unsafe.Slice(((*byte)(unsafe.Pointer(ptr))), int(size)*len(slice))
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
|
||||
@@ -86,69 +64,39 @@ func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
|
||||
// or modified while you hold the returned slince.
|
||||
// //
|
||||
func byteSliceAsUint16Slice(slice []byte) (result []uint16) { // here we create a new slice holder
|
||||
if len(slice)%2 != 0 {
|
||||
panic("Slice size should be divisible by 2")
|
||||
const sz = int(unsafe.Sizeof(uint16(0)))
|
||||
if len(slice)%sz != 0 {
|
||||
panic(fmt.Sprintf("Slice size should be divisible by %d", sz))
|
||||
}
|
||||
// reference: https://go101.org/article/unsafe.html
|
||||
|
||||
// make a new slice header
|
||||
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||
|
||||
// transfer the data from the given slice to a new variable (our result)
|
||||
rHeader.Data = bHeader.Data
|
||||
rHeader.Len = bHeader.Len / 2
|
||||
rHeader.Cap = bHeader.Cap / 2
|
||||
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||
|
||||
// return result
|
||||
return
|
||||
ptr := unsafe.SliceData(slice)
|
||||
if ptr == nil {
|
||||
return nil
|
||||
}
|
||||
return unsafe.Slice((*uint16)(unsafe.Pointer(ptr)), len(slice)/sz)
|
||||
}
|
||||
|
||||
func byteSliceAsUint64Slice(slice []byte) (result []uint64) {
|
||||
if len(slice)%8 != 0 {
|
||||
panic("Slice size should be divisible by 8")
|
||||
const sz = int(unsafe.Sizeof(uint64(0)))
|
||||
if len(slice)%sz != 0 {
|
||||
panic(fmt.Sprintf("Slice size should be divisible by %d", sz))
|
||||
}
|
||||
// reference: https://go101.org/article/unsafe.html
|
||||
|
||||
// make a new slice header
|
||||
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||
|
||||
// transfer the data from the given slice to a new variable (our result)
|
||||
rHeader.Data = bHeader.Data
|
||||
rHeader.Len = bHeader.Len / 8
|
||||
rHeader.Cap = bHeader.Cap / 8
|
||||
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||
|
||||
// return result
|
||||
return
|
||||
ptr := unsafe.SliceData(slice)
|
||||
if ptr == nil {
|
||||
return nil
|
||||
}
|
||||
return unsafe.Slice((*uint64)(unsafe.Pointer(ptr)), len(slice)/sz)
|
||||
}
|
||||
|
||||
func byteSliceAsInterval16Slice(slice []byte) (result []interval16) {
|
||||
if len(slice)%4 != 0 {
|
||||
panic("Slice size should be divisible by 4")
|
||||
const sz = int(unsafe.Sizeof(interval16{}))
|
||||
if len(slice)%sz != 0 {
|
||||
panic(fmt.Sprintf("Slice size should be divisible by %d", sz))
|
||||
}
|
||||
// reference: https://go101.org/article/unsafe.html
|
||||
|
||||
// make a new slice header
|
||||
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||
|
||||
// transfer the data from the given slice to a new variable (our result)
|
||||
rHeader.Data = bHeader.Data
|
||||
rHeader.Len = bHeader.Len / 4
|
||||
rHeader.Cap = bHeader.Cap / 4
|
||||
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||
|
||||
// return result
|
||||
return
|
||||
ptr := unsafe.SliceData(slice)
|
||||
if ptr == nil {
|
||||
return nil
|
||||
}
|
||||
return unsafe.Slice((*interval16)(unsafe.Pointer(ptr)), len(slice)/sz)
|
||||
}
|
||||
|
||||
func byteSliceAsContainerSlice(slice []byte) (result []container) {
|
||||
@@ -158,114 +106,59 @@ func byteSliceAsContainerSlice(slice []byte) (result []container) {
|
||||
if len(slice)%containerSize != 0 {
|
||||
panic("Slice size should be divisible by unsafe.Sizeof(container)")
|
||||
}
|
||||
// reference: https://go101.org/article/unsafe.html
|
||||
|
||||
// make a new slice header
|
||||
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||
|
||||
// transfer the data from the given slice to a new variable (our result)
|
||||
rHeader.Data = bHeader.Data
|
||||
rHeader.Len = bHeader.Len / containerSize
|
||||
rHeader.Cap = bHeader.Cap / containerSize
|
||||
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||
|
||||
// return result
|
||||
return
|
||||
ptr := unsafe.SliceData(slice)
|
||||
if ptr == nil {
|
||||
return nil
|
||||
}
|
||||
return unsafe.Slice((*container)(unsafe.Pointer(ptr)), len(slice)/containerSize)
|
||||
}
|
||||
|
||||
func byteSliceAsBitsetSlice(slice []byte) (result []bitmapContainer) {
|
||||
bitsetSize := int(unsafe.Sizeof(bitmapContainer{}))
|
||||
const bitsetSize = int(unsafe.Sizeof(bitmapContainer{}))
|
||||
if len(slice)%bitsetSize != 0 {
|
||||
panic("Slice size should be divisible by unsafe.Sizeof(bitmapContainer)")
|
||||
}
|
||||
// reference: https://go101.org/article/unsafe.html
|
||||
|
||||
// make a new slice header
|
||||
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||
|
||||
// transfer the data from the given slice to a new variable (our result)
|
||||
rHeader.Data = bHeader.Data
|
||||
rHeader.Len = bHeader.Len / bitsetSize
|
||||
rHeader.Cap = bHeader.Cap / bitsetSize
|
||||
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||
|
||||
// return result
|
||||
return
|
||||
ptr := unsafe.SliceData(slice)
|
||||
if ptr == nil {
|
||||
return nil
|
||||
}
|
||||
return unsafe.Slice((*bitmapContainer)(unsafe.Pointer(ptr)), len(slice)/bitsetSize)
|
||||
}
|
||||
|
||||
func byteSliceAsArraySlice(slice []byte) (result []arrayContainer) {
|
||||
arraySize := int(unsafe.Sizeof(arrayContainer{}))
|
||||
const arraySize = int(unsafe.Sizeof(arrayContainer{}))
|
||||
if len(slice)%arraySize != 0 {
|
||||
panic("Slice size should be divisible by unsafe.Sizeof(arrayContainer)")
|
||||
}
|
||||
// reference: https://go101.org/article/unsafe.html
|
||||
|
||||
// make a new slice header
|
||||
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||
|
||||
// transfer the data from the given slice to a new variable (our result)
|
||||
rHeader.Data = bHeader.Data
|
||||
rHeader.Len = bHeader.Len / arraySize
|
||||
rHeader.Cap = bHeader.Cap / arraySize
|
||||
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||
|
||||
// return result
|
||||
return
|
||||
ptr := unsafe.SliceData(slice)
|
||||
if ptr == nil {
|
||||
return nil
|
||||
}
|
||||
return unsafe.Slice((*arrayContainer)(unsafe.Pointer(ptr)), len(slice)/arraySize)
|
||||
}
|
||||
|
||||
func byteSliceAsRun16Slice(slice []byte) (result []runContainer16) {
|
||||
run16Size := int(unsafe.Sizeof(runContainer16{}))
|
||||
const run16Size = int(unsafe.Sizeof(runContainer16{}))
|
||||
if len(slice)%run16Size != 0 {
|
||||
panic("Slice size should be divisible by unsafe.Sizeof(runContainer16)")
|
||||
}
|
||||
// reference: https://go101.org/article/unsafe.html
|
||||
|
||||
// make a new slice header
|
||||
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||
|
||||
// transfer the data from the given slice to a new variable (our result)
|
||||
rHeader.Data = bHeader.Data
|
||||
rHeader.Len = bHeader.Len / run16Size
|
||||
rHeader.Cap = bHeader.Cap / run16Size
|
||||
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||
|
||||
// return result
|
||||
return
|
||||
ptr := unsafe.SliceData(slice)
|
||||
if ptr == nil {
|
||||
return nil
|
||||
}
|
||||
return unsafe.Slice((*runContainer16)(unsafe.Pointer(ptr)), len(slice)/run16Size)
|
||||
}
|
||||
|
||||
func byteSliceAsBoolSlice(slice []byte) (result []bool) {
|
||||
boolSize := int(unsafe.Sizeof(true))
|
||||
const boolSize = int(unsafe.Sizeof(true))
|
||||
if len(slice)%boolSize != 0 {
|
||||
panic("Slice size should be divisible by unsafe.Sizeof(bool)")
|
||||
}
|
||||
// reference: https://go101.org/article/unsafe.html
|
||||
|
||||
// make a new slice header
|
||||
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||
|
||||
// transfer the data from the given slice to a new variable (our result)
|
||||
rHeader.Data = bHeader.Data
|
||||
rHeader.Len = bHeader.Len / boolSize
|
||||
rHeader.Cap = bHeader.Cap / boolSize
|
||||
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||
|
||||
// return result
|
||||
return
|
||||
ptr := unsafe.SliceData(slice)
|
||||
if ptr == nil {
|
||||
return nil
|
||||
}
|
||||
return unsafe.Slice((*bool)(unsafe.Pointer(ptr)), len(slice)/boolSize)
|
||||
}
|
||||
|
||||
// FrozenView creates a static view of a serialized bitmap stored in buf.
|
||||
|
||||
46
vendor/github.com/RoaringBitmap/roaring/v2/setutil.go
generated
vendored
46
vendor/github.com/RoaringBitmap/roaring/v2/setutil.go
generated
vendored
@@ -202,10 +202,22 @@ func intersects2by2(
|
||||
set1 []uint16,
|
||||
set2 []uint16,
|
||||
) bool {
|
||||
// could be optimized if one set is much larger than the other one
|
||||
if (len(set1) == 0) || (len(set2) == 0) {
|
||||
return false
|
||||
}
|
||||
if len(set1)*64 < len(set2) {
|
||||
return onesidedgallopingintersect2by2Bool(set1, set2)
|
||||
} else if len(set2)*64 < len(set1) {
|
||||
return onesidedgallopingintersect2by2Bool(set2, set1)
|
||||
} else {
|
||||
return intersects2by2Bool(set1, set2)
|
||||
}
|
||||
}
|
||||
|
||||
func intersects2by2Bool(
|
||||
set1 []uint16,
|
||||
set2 []uint16,
|
||||
) bool {
|
||||
index1 := 0
|
||||
index2 := 0
|
||||
value1 := set1[index1]
|
||||
@@ -244,6 +256,38 @@ mainwhile:
|
||||
return false
|
||||
}
|
||||
|
||||
func onesidedgallopingintersect2by2Bool(
|
||||
smallset []uint16,
|
||||
largeset []uint16,
|
||||
) bool {
|
||||
k1 := 0
|
||||
k2 := 0
|
||||
s1 := largeset[k1]
|
||||
s2 := smallset[k2]
|
||||
mainwhile:
|
||||
for {
|
||||
if s1 < s2 {
|
||||
k1 = advanceUntil(largeset, k1, len(largeset), s2)
|
||||
if k1 == len(largeset) {
|
||||
break mainwhile
|
||||
}
|
||||
s1 = largeset[k1]
|
||||
}
|
||||
if s2 < s1 {
|
||||
k2++
|
||||
if k2 == len(smallset) {
|
||||
break mainwhile
|
||||
}
|
||||
s2 = smallset[k2]
|
||||
} else {
|
||||
// (set2[k2] == set1[k1])
|
||||
return true
|
||||
}
|
||||
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func localintersect2by2(
|
||||
set1 []uint16,
|
||||
set2 []uint16,
|
||||
|
||||
50
vendor/github.com/RoaringBitmap/roaring/v2/shortiterator.go
generated
vendored
50
vendor/github.com/RoaringBitmap/roaring/v2/shortiterator.go
generated
vendored
@@ -50,3 +50,53 @@ func (si *reverseIterator) next() uint16 {
|
||||
si.loc--
|
||||
return a
|
||||
}
|
||||
|
||||
type arrayContainerUnsetIterator struct {
|
||||
content []uint16
|
||||
// pos is the index of the next set bit that is >= nextVal.
|
||||
// When nextVal reaches content[pos], pos is incremented.
|
||||
pos int
|
||||
nextVal int
|
||||
}
|
||||
|
||||
func (acui *arrayContainerUnsetIterator) next() uint16 {
|
||||
val := acui.nextVal
|
||||
acui.nextVal++
|
||||
for acui.pos < len(acui.content) && uint16(acui.nextVal) >= acui.content[acui.pos] {
|
||||
acui.nextVal++
|
||||
acui.pos++
|
||||
}
|
||||
return uint16(val)
|
||||
}
|
||||
|
||||
func (acui *arrayContainerUnsetIterator) hasNext() bool {
|
||||
return acui.nextVal < 65536
|
||||
}
|
||||
|
||||
func (acui *arrayContainerUnsetIterator) peekNext() uint16 {
|
||||
return uint16(acui.nextVal)
|
||||
}
|
||||
|
||||
func (acui *arrayContainerUnsetIterator) advanceIfNeeded(minval uint16) {
|
||||
if !acui.hasNext() || acui.peekNext() >= minval {
|
||||
return
|
||||
}
|
||||
acui.nextVal = int(minval)
|
||||
acui.pos = binarySearch(acui.content, minval)
|
||||
if acui.pos < 0 {
|
||||
acui.pos = -acui.pos - 1
|
||||
}
|
||||
for acui.pos < len(acui.content) && uint16(acui.nextVal) >= acui.content[acui.pos] {
|
||||
acui.nextVal++
|
||||
acui.pos++
|
||||
}
|
||||
}
|
||||
|
||||
func newArrayContainerUnsetIterator(content []uint16) *arrayContainerUnsetIterator {
|
||||
acui := &arrayContainerUnsetIterator{content: content, pos: 0, nextVal: 0}
|
||||
for acui.pos < len(acui.content) && uint16(acui.nextVal) >= acui.content[acui.pos] {
|
||||
acui.nextVal++
|
||||
acui.pos++
|
||||
}
|
||||
return acui
|
||||
}
|
||||
|
||||
459
vendor/github.com/RoaringBitmap/roaring/v2/smat.go
generated
vendored
459
vendor/github.com/RoaringBitmap/roaring/v2/smat.go
generated
vendored
@@ -1,6 +1,3 @@
|
||||
//go:build gofuzz
|
||||
// +build gofuzz
|
||||
|
||||
/*
|
||||
# Instructions for smat testing for roaring
|
||||
|
||||
@@ -11,69 +8,49 @@ To run the smat tests for roaring...
|
||||
|
||||
## Prerequisites
|
||||
|
||||
$ go get github.com/dvyukov/go-fuzz/go-fuzz
|
||||
$ go get github.com/dvyukov/go-fuzz/go-fuzz-build
|
||||
Go 1.18 or later (for native fuzzing support).
|
||||
|
||||
## Steps
|
||||
|
||||
1. Generate initial smat corpus:
|
||||
1. Generate initial smat corpus:
|
||||
```
|
||||
go test -tags=gofuzz -run=TestGenerateSmatCorpus
|
||||
go test -tags=gofuzz -run=TestGenerateSmatCorpus
|
||||
```
|
||||
You should see a directory `workdir` created with initial corpus files.
|
||||
|
||||
2. Run the fuzz test:
|
||||
```
|
||||
go test -run='^$' -fuzz=FuzzSmat -fuzztime=300s -timeout=60s
|
||||
```
|
||||
|
||||
2. Build go-fuzz test program with instrumentation:
|
||||
```
|
||||
go-fuzz-build -func FuzzSmat github.com/RoaringBitmap/roaring
|
||||
```
|
||||
|
||||
3. Run go-fuzz:
|
||||
```
|
||||
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
|
||||
```
|
||||
|
||||
You should see output like...
|
||||
```
|
||||
2016/09/16 13:58:35 slaves: 8, corpus: 1 (3s ago), crashers: 0, restarts: 1/0, execs: 0 (0/sec), cover: 0, uptime: 3s
|
||||
2016/09/16 13:58:38 slaves: 8, corpus: 1 (6s ago), crashers: 0, restarts: 1/0, execs: 0 (0/sec), cover: 0, uptime: 6s
|
||||
2016/09/16 13:58:41 slaves: 8, corpus: 1 (9s ago), crashers: 0, restarts: 1/44, execs: 44 (5/sec), cover: 0, uptime: 9s
|
||||
2016/09/16 13:58:44 slaves: 8, corpus: 1 (12s ago), crashers: 0, restarts: 1/45, execs: 45 (4/sec), cover: 0, uptime: 12s
|
||||
2016/09/16 13:58:47 slaves: 8, corpus: 1 (15s ago), crashers: 0, restarts: 1/46, execs: 46 (3/sec), cover: 0, uptime: 15s
|
||||
2016/09/16 13:58:50 slaves: 8, corpus: 1 (18s ago), crashers: 0, restarts: 1/47, execs: 47 (3/sec), cover: 0, uptime: 18s
|
||||
2016/09/16 13:58:53 slaves: 8, corpus: 1 (21s ago), crashers: 0, restarts: 1/63, execs: 63 (3/sec), cover: 0, uptime: 21s
|
||||
2016/09/16 13:58:56 slaves: 8, corpus: 1 (24s ago), crashers: 0, restarts: 1/65, execs: 65 (3/sec), cover: 0, uptime: 24s
|
||||
2016/09/16 13:58:59 slaves: 8, corpus: 1 (27s ago), crashers: 0, restarts: 1/66, execs: 66 (2/sec), cover: 0, uptime: 27s
|
||||
2016/09/16 13:59:02 slaves: 8, corpus: 1 (30s ago), crashers: 0, restarts: 1/67, execs: 67 (2/sec), cover: 0, uptime: 30s
|
||||
2016/09/16 13:59:05 slaves: 8, corpus: 1 (33s ago), crashers: 0, restarts: 1/83, execs: 83 (3/sec), cover: 0, uptime: 33s
|
||||
2016/09/16 13:59:08 slaves: 8, corpus: 1 (36s ago), crashers: 0, restarts: 1/84, execs: 84 (2/sec), cover: 0, uptime: 36s
|
||||
2016/09/16 13:59:11 slaves: 8, corpus: 2 (0s ago), crashers: 0, restarts: 1/85, execs: 85 (2/sec), cover: 0, uptime: 39s
|
||||
2016/09/16 13:59:14 slaves: 8, corpus: 17 (2s ago), crashers: 0, restarts: 1/86, execs: 86 (2/sec), cover: 480, uptime: 42s
|
||||
2016/09/16 13:59:17 slaves: 8, corpus: 17 (5s ago), crashers: 0, restarts: 1/66, execs: 132 (3/sec), cover: 487, uptime: 45s
|
||||
2016/09/16 13:59:20 slaves: 8, corpus: 17 (8s ago), crashers: 0, restarts: 1/440, execs: 2645 (55/sec), cover: 487, uptime: 48s
|
||||
|
||||
```
|
||||
|
||||
Let it run, and if the # of crashers is > 0, check out the reports in
|
||||
the workdir where you should be able to find the panic goroutine stack
|
||||
traces.
|
||||
Adjust `-fuzztime` as needed for longer or shorter runs. If crashes are found,
|
||||
check the test output and the reproducer files in the `workdir` directory.
|
||||
You may copy the reproducers to roaring_tests.go
|
||||
*/
|
||||
|
||||
package roaring
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"sort"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime/debug"
|
||||
"slices"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/bits-and-blooms/bitset"
|
||||
"github.com/mschoch/smat"
|
||||
)
|
||||
|
||||
// fuzz test using state machine driven by byte stream.
|
||||
func FuzzSmat(data []byte) int {
|
||||
return smat.Fuzz(&smatContext{}, smat.ActionID('S'), smat.ActionID('T'),
|
||||
smatActionMap, data)
|
||||
}
|
||||
// The native fuzz entry point lives in a _test.go file so the go test
|
||||
// fuzz engine discovers it. See smat_fuzz_test.go for the fuzz wrapper.
|
||||
|
||||
var smatDebug = false
|
||||
var smatDebug = true
|
||||
|
||||
const max_value = 1048576
|
||||
const max_pairs = 10
|
||||
|
||||
func smatLog(prefix, format string, args ...interface{}) {
|
||||
if smatDebug {
|
||||
@@ -90,22 +67,33 @@ type smatContext struct {
|
||||
y int
|
||||
|
||||
actions int
|
||||
// per-context last action for this fuzz worker
|
||||
lastAction *actionRecord
|
||||
}
|
||||
|
||||
// actionRecord stores a snapshot of the state just before an action runs.
|
||||
type actionRecord struct {
|
||||
Name string
|
||||
X, Y int
|
||||
PairSnapshots []string // base64-encoded MarshalBinary of each pair's Bitmap
|
||||
}
|
||||
|
||||
type smatPair struct {
|
||||
bm *Bitmap
|
||||
bs *bitset.BitSet
|
||||
// parent context (nil if unknown)
|
||||
ctx *smatContext
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
var smatActionMap = smat.ActionMap{
|
||||
smat.ActionID('X'): smatAction("x++", smatWrap(func(c *smatContext) { c.x++ })),
|
||||
smat.ActionID('x'): smatAction("x--", smatWrap(func(c *smatContext) { c.x-- })),
|
||||
smat.ActionID('Y'): smatAction("y++", smatWrap(func(c *smatContext) { c.y++ })),
|
||||
smat.ActionID('y'): smatAction("y--", smatWrap(func(c *smatContext) { c.y-- })),
|
||||
smat.ActionID('*'): smatAction("x*y", smatWrap(func(c *smatContext) { c.x = c.x * c.y })),
|
||||
smat.ActionID('<'): smatAction("x<<", smatWrap(func(c *smatContext) { c.x = c.x << 1 })),
|
||||
smat.ActionID('X'): smatAction("x++", smatWrap(func(c *smatContext) { c.x = (c.x + 1) % max_value })),
|
||||
smat.ActionID('x'): smatAction("x--", smatWrap(func(c *smatContext) { c.x = (c.x - 1 + max_value) % max_value })),
|
||||
smat.ActionID('Y'): smatAction("y++", smatWrap(func(c *smatContext) { c.y = (c.y + 1) % max_value })),
|
||||
smat.ActionID('y'): smatAction("y--", smatWrap(func(c *smatContext) { c.y = (c.y - 1 + max_value) % max_value })),
|
||||
smat.ActionID('*'): smatAction("x*y", smatWrap(func(c *smatContext) { c.x = (c.x * c.y) % max_value })),
|
||||
smat.ActionID('<'): smatAction("x<<", smatWrap(func(c *smatContext) { c.x = (c.x << 1) % max_value })),
|
||||
|
||||
smat.ActionID('^'): smatAction("swap", smatWrap(func(c *smatContext) { c.x, c.y = c.y, c.x })),
|
||||
|
||||
@@ -117,11 +105,13 @@ var smatActionMap = smat.ActionMap{
|
||||
|
||||
smat.ActionID('o'): smatAction(" or", smatWrap(smatOr)),
|
||||
smat.ActionID('a'): smatAction(" and", smatWrap(smatAnd)),
|
||||
smat.ActionID('z'): smatAction(" xor", smatWrap(smatXor)),
|
||||
|
||||
smat.ActionID('#'): smatAction(" cardinality", smatWrap(smatCardinality)),
|
||||
|
||||
smat.ActionID('O'): smatAction(" orCardinality", smatWrap(smatOrCardinality)),
|
||||
smat.ActionID('A'): smatAction(" andCardinality", smatWrap(smatAndCardinality)),
|
||||
smat.ActionID('Z'): smatAction(" xorCardinality", smatWrap(smatXorCardinality)),
|
||||
|
||||
smat.ActionID('c'): smatAction(" clear", smatWrap(smatClear)),
|
||||
smat.ActionID('r'): smatAction(" runOptimize", smatWrap(smatRunOptimize)),
|
||||
@@ -142,12 +132,12 @@ func init() {
|
||||
for actionId := range smatActionMap {
|
||||
ids = append(ids, int(actionId))
|
||||
}
|
||||
sort.Ints(ids)
|
||||
slices.Sort(ids)
|
||||
|
||||
pct := 100 / len(smatActionMap)
|
||||
for _, actionId := range ids {
|
||||
smatRunningPercentActions = append(smatRunningPercentActions,
|
||||
smat.PercentAction{pct, smat.ActionID(actionId)})
|
||||
smat.PercentAction{Percent: pct, Action: smat.ActionID(actionId)})
|
||||
}
|
||||
|
||||
smatActionMap[smat.ActionID('S')] = smatAction("SETUP", smatSetupFunc)
|
||||
@@ -162,14 +152,153 @@ func smatRunning(next byte) smat.ActionID {
|
||||
func smatAction(name string, f func(ctx smat.Context) (smat.State, error)) func(smat.Context) (smat.State, error) {
|
||||
return func(ctx smat.Context) (smat.State, error) {
|
||||
c := ctx.(*smatContext)
|
||||
|
||||
// Snapshot all pairs' bitmaps (base64 of MarshalBinary) before action
|
||||
rec := actionRecord{Name: name, X: c.x, Y: c.y}
|
||||
if len(c.pairs) > 0 {
|
||||
rec.PairSnapshots = make([]string, 0, len(c.pairs))
|
||||
for _, pair := range c.pairs {
|
||||
if pair == nil || pair.bm == nil {
|
||||
rec.PairSnapshots = append(rec.PairSnapshots, "<nil>")
|
||||
continue
|
||||
}
|
||||
b, err := pair.bm.MarshalBinary()
|
||||
if err != nil {
|
||||
rec.PairSnapshots = append(rec.PairSnapshots, "<marshal-error:"+err.Error()+">")
|
||||
} else {
|
||||
rec.PairSnapshots = append(rec.PairSnapshots, base64.StdEncoding.EncodeToString(b))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// record per-context last action (no global mutex required)
|
||||
if c != nil {
|
||||
c.lastAction = &rec
|
||||
}
|
||||
|
||||
// catch panics inside action to dump a repro and stack before re-panicking
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
// best-effort: write quick repro with lastAction from context
|
||||
var lastAction *actionRecord
|
||||
if c != nil {
|
||||
lastAction = c.lastAction
|
||||
}
|
||||
ts := time.Now().UnixNano()
|
||||
repro := "// Reproducer generated by smat (panic)\n"
|
||||
repro += "package roaring\n\n"
|
||||
repro += "import (\n\t\"encoding/base64\"\n\t\"testing\"\n)\n\n"
|
||||
repro += fmt.Sprintf("func TestFuzzerPanicRepro_%d(t *testing.T) {\n", ts)
|
||||
// similar to checkEquals repro
|
||||
if lastAction != nil && len(lastAction.PairSnapshots) > 0 {
|
||||
pairIndex := lastAction.X % len(lastAction.PairSnapshots)
|
||||
if pairIndex < len(lastAction.PairSnapshots) {
|
||||
snapshot := lastAction.PairSnapshots[pairIndex]
|
||||
if snapshot != "<nil>" && !strings.HasPrefix(snapshot, "<") {
|
||||
repro += fmt.Sprintf("\tb, _ := base64.StdEncoding.DecodeString(\"%s\")\n", snapshot)
|
||||
repro += "\tbm := NewBitmap()\n"
|
||||
repro += "\tbm.UnmarshalBinary(b)\n"
|
||||
// perform the action that caused panic
|
||||
if strings.Contains(lastAction.Name, "setBit") {
|
||||
repro += fmt.Sprintf("\tbm.AddInt(%d)\n", lastAction.Y)
|
||||
} else if strings.Contains(lastAction.Name, "removeBit") {
|
||||
repro += fmt.Sprintf("\tbm.Remove(%d)\n", lastAction.Y)
|
||||
} else if strings.Contains(lastAction.Name, "flip") {
|
||||
repro += fmt.Sprintf("\tbm.Flip(uint64(%d), uint64(%d)+1)\n", lastAction.Y, lastAction.Y)
|
||||
} else if strings.Contains(lastAction.Name, "runOptimize") {
|
||||
repro += "\tbm.RunOptimize()\n"
|
||||
} else if strings.Contains(lastAction.Name, "clear") {
|
||||
repro += "\tbm.Clear()\n"
|
||||
} else if lastAction.Name == " or" {
|
||||
pairIndexY := lastAction.Y % len(lastAction.PairSnapshots)
|
||||
if pairIndexY < len(lastAction.PairSnapshots) {
|
||||
snapshotY := lastAction.PairSnapshots[pairIndexY]
|
||||
if snapshotY != "<nil>" && !strings.HasPrefix(snapshotY, "<") {
|
||||
repro += fmt.Sprintf("\tb2, _ := base64.StdEncoding.DecodeString(\"%s\")\n", snapshotY)
|
||||
repro += "\tbm2 := NewBitmap()\n"
|
||||
repro += "\tbm2.UnmarshalBinary(b2)\n"
|
||||
repro += "\tbm.Or(bm2)\n"
|
||||
}
|
||||
}
|
||||
} else if lastAction.Name == " and" {
|
||||
pairIndexY := lastAction.Y % len(lastAction.PairSnapshots)
|
||||
if pairIndexY < len(lastAction.PairSnapshots) {
|
||||
snapshotY := lastAction.PairSnapshots[pairIndexY]
|
||||
if snapshotY != "<nil>" && !strings.HasPrefix(snapshotY, "<") {
|
||||
repro += fmt.Sprintf("\tb2, _ := base64.StdEncoding.DecodeString(\"%s\")\n", snapshotY)
|
||||
repro += "\tbm2 := NewBitmap()\n"
|
||||
repro += "\tbm2.UnmarshalBinary(b2)\n"
|
||||
repro += "\tbm.And(bm2)\n"
|
||||
}
|
||||
}
|
||||
} else if lastAction.Name == " difference" {
|
||||
pairIndexY := lastAction.Y % len(lastAction.PairSnapshots)
|
||||
if pairIndexY < len(lastAction.PairSnapshots) {
|
||||
snapshotY := lastAction.PairSnapshots[pairIndexY]
|
||||
if snapshotY != "<nil>" && !strings.HasPrefix(snapshotY, "<") {
|
||||
repro += fmt.Sprintf("\tb2, _ := base64.StdEncoding.DecodeString(\"%s\")\n", snapshotY)
|
||||
repro += "\tbm2 := NewBitmap()\n"
|
||||
repro += "\tbm2.UnmarshalBinary(b2)\n"
|
||||
repro += "\tbm.AndNot(bm2)\n"
|
||||
}
|
||||
}
|
||||
} else if lastAction.Name == " xor" {
|
||||
pairIndexY := lastAction.Y % len(lastAction.PairSnapshots)
|
||||
if pairIndexY < len(lastAction.PairSnapshots) {
|
||||
snapshotY := lastAction.PairSnapshots[pairIndexY]
|
||||
if snapshotY != "<nil>" && !strings.HasPrefix(snapshotY, "<") {
|
||||
repro += fmt.Sprintf("\tb2, _ := base64.StdEncoding.DecodeString(\"%s\")\n", snapshotY)
|
||||
repro += "\tbm2 := NewBitmap()\n"
|
||||
repro += "\tbm2.UnmarshalBinary(b2)\n"
|
||||
repro += "\tbm.Xor(bm2)\n"
|
||||
}
|
||||
}
|
||||
} else {
|
||||
repro += fmt.Sprintf("\t// Unhandled action: %s\n", lastAction.Name)
|
||||
}
|
||||
} else {
|
||||
repro += "\t// invalid snapshot\n"
|
||||
}
|
||||
}
|
||||
}
|
||||
repro += "}\n"
|
||||
if path, werr := saveReproFile("smat_panic_repro", ts, repro); werr == nil {
|
||||
fmt.Printf("wrote panic repro to %s\n", path)
|
||||
} else {
|
||||
fmt.Printf("failed writing panic repro: %v\n", werr)
|
||||
}
|
||||
fmt.Printf("PANIC in action %s: %v\n", rec.Name, r)
|
||||
fmt.Printf("stack:\n%s\n", debug.Stack())
|
||||
panic(r)
|
||||
}
|
||||
}()
|
||||
|
||||
c.actions++
|
||||
|
||||
smatLog(" ", "%s\n", name)
|
||||
|
||||
return f(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
// saveReproFile writes the given repro content to workdir/<prefix>_<ts>_test.go
|
||||
// or falls back to the OS temp dir. Returns full path or error.
|
||||
func saveReproFile(prefix string, ts int64, content string) (string, error) {
|
||||
// try workdir
|
||||
if err := os.MkdirAll("workdir", 0o755); err == nil {
|
||||
fname := fmt.Sprintf("workdir/%s_%d_test.go", prefix, ts)
|
||||
if err := os.WriteFile(fname, []byte(content), 0o644); err == nil {
|
||||
return fname, nil
|
||||
}
|
||||
}
|
||||
// fallback to temp
|
||||
tmp := os.TempDir()
|
||||
fname := fmt.Sprintf("%s_%d_test.go", prefix, ts)
|
||||
full := filepath.Join(tmp, fname)
|
||||
if err := os.WriteFile(full, []byte(content), 0o644); err == nil {
|
||||
return full, nil
|
||||
} else {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
// Creates an smat action func based on a simple callback.
|
||||
func smatWrap(cb func(c *smatContext)) func(smat.Context) (next smat.State, err error) {
|
||||
return func(ctx smat.Context) (next smat.State, err error) {
|
||||
@@ -203,10 +332,15 @@ func smatTeardownFunc(ctx smat.Context) (next smat.State, err error) {
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
func smatPushPair(c *smatContext) {
|
||||
c.pairs = append(c.pairs, &smatPair{
|
||||
bm: NewBitmap(),
|
||||
bs: bitset.New(100),
|
||||
})
|
||||
if len(c.pairs) >= max_pairs {
|
||||
return
|
||||
}
|
||||
p := &smatPair{
|
||||
bm: NewBitmap(),
|
||||
bs: bitset.New(100),
|
||||
ctx: c,
|
||||
}
|
||||
c.pairs = append(c.pairs, p)
|
||||
}
|
||||
|
||||
func smatPopPair(c *smatContext) {
|
||||
@@ -217,6 +351,7 @@ func smatPopPair(c *smatContext) {
|
||||
|
||||
func smatSetBit(c *smatContext) {
|
||||
c.withPair(c.x, func(p *smatPair) {
|
||||
p.Validate()
|
||||
y := uint32(c.y)
|
||||
p.bm.AddInt(int(y))
|
||||
p.bs.Set(uint(y))
|
||||
@@ -226,6 +361,7 @@ func smatSetBit(c *smatContext) {
|
||||
|
||||
func smatRemoveBit(c *smatContext) {
|
||||
c.withPair(c.x, func(p *smatPair) {
|
||||
p.Validate()
|
||||
y := uint32(c.y)
|
||||
p.bm.Remove(y)
|
||||
p.bs.Clear(uint(y))
|
||||
@@ -236,6 +372,8 @@ func smatRemoveBit(c *smatContext) {
|
||||
func smatAnd(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
c.withPair(c.y, func(py *smatPair) {
|
||||
px.Validate()
|
||||
py.Validate()
|
||||
px.bm.And(py.bm)
|
||||
px.bs = px.bs.Intersection(py.bs)
|
||||
px.checkEquals()
|
||||
@@ -247,6 +385,8 @@ func smatAnd(c *smatContext) {
|
||||
func smatOr(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
c.withPair(c.y, func(py *smatPair) {
|
||||
px.Validate()
|
||||
py.Validate()
|
||||
px.bm.Or(py.bm)
|
||||
px.bs = px.bs.Union(py.bs)
|
||||
px.checkEquals()
|
||||
@@ -255,9 +395,24 @@ func smatOr(c *smatContext) {
|
||||
})
|
||||
}
|
||||
|
||||
func smatXor(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
c.withPair(c.y, func(py *smatPair) {
|
||||
px.Validate()
|
||||
py.Validate()
|
||||
px.bm.Xor(py.bm)
|
||||
px.bs = px.bs.SymmetricDifference(py.bs)
|
||||
px.checkEquals()
|
||||
py.checkEquals()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func smatAndCardinality(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
c.withPair(c.y, func(py *smatPair) {
|
||||
px.Validate()
|
||||
py.Validate()
|
||||
c0 := px.bm.AndCardinality(py.bm)
|
||||
c1 := px.bs.IntersectionCardinality(py.bs)
|
||||
if c0 != uint64(c1) {
|
||||
@@ -272,6 +427,8 @@ func smatAndCardinality(c *smatContext) {
|
||||
func smatOrCardinality(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
c.withPair(c.y, func(py *smatPair) {
|
||||
px.Validate()
|
||||
py.Validate()
|
||||
c0 := px.bm.OrCardinality(py.bm)
|
||||
c1 := px.bs.UnionCardinality(py.bs)
|
||||
if c0 != uint64(c1) {
|
||||
@@ -283,8 +440,25 @@ func smatOrCardinality(c *smatContext) {
|
||||
})
|
||||
}
|
||||
|
||||
func smatXorCardinality(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
c.withPair(c.y, func(py *smatPair) {
|
||||
px.Validate()
|
||||
py.Validate()
|
||||
c0 := px.bm.OrCardinality(py.bm) - px.bm.AndCardinality(py.bm)
|
||||
c1 := px.bs.SymmetricDifferenceCardinality(py.bs)
|
||||
if c0 != uint64(c1) {
|
||||
panic("expected same xor cardinality")
|
||||
}
|
||||
px.checkEquals()
|
||||
py.checkEquals()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func smatRunOptimize(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
px.Validate()
|
||||
px.bm.RunOptimize()
|
||||
px.checkEquals()
|
||||
})
|
||||
@@ -292,6 +466,7 @@ func smatRunOptimize(c *smatContext) {
|
||||
|
||||
func smatClear(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
px.Validate()
|
||||
px.bm.Clear()
|
||||
px.bs = px.bs.ClearAll()
|
||||
px.checkEquals()
|
||||
@@ -321,6 +496,8 @@ func smatIsEmpty(c *smatContext) {
|
||||
func smatIntersects(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
c.withPair(c.y, func(py *smatPair) {
|
||||
px.Validate()
|
||||
py.Validate()
|
||||
v0 := px.bm.Intersects(py.bm)
|
||||
v1 := px.bs.IntersectionCardinality(py.bs) > 0
|
||||
if v0 != v1 {
|
||||
@@ -335,6 +512,7 @@ func smatIntersects(c *smatContext) {
|
||||
|
||||
func smatFlip(c *smatContext) {
|
||||
c.withPair(c.x, func(p *smatPair) {
|
||||
p.Validate()
|
||||
y := uint32(c.y)
|
||||
p.bm.Flip(uint64(y), uint64(y)+1)
|
||||
p.bs = p.bs.Flip(uint(y))
|
||||
@@ -345,6 +523,8 @@ func smatFlip(c *smatContext) {
|
||||
func smatDifference(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
c.withPair(c.y, func(py *smatPair) {
|
||||
px.Validate()
|
||||
py.Validate()
|
||||
px.bm.AndNot(py.bm)
|
||||
px.bs = px.bs.Difference(py.bs)
|
||||
px.checkEquals()
|
||||
@@ -354,11 +534,164 @@ func smatDifference(c *smatContext) {
|
||||
}
|
||||
|
||||
func (p *smatPair) checkEquals() {
|
||||
valid := p.bm.Validate()
|
||||
if valid != nil {
|
||||
// marshal current bitmap
|
||||
var curSnap string
|
||||
if p != nil && p.bm != nil {
|
||||
if b, err := p.bm.MarshalBinary(); err == nil {
|
||||
curSnap = base64.StdEncoding.EncodeToString(b)
|
||||
} else {
|
||||
curSnap = "<marshal-error:" + err.Error() + ">"
|
||||
}
|
||||
} else {
|
||||
curSnap = "<nil>"
|
||||
}
|
||||
|
||||
// collect last action summary from context (per-worker)
|
||||
last := "<none>"
|
||||
if p != nil && p.ctx != nil {
|
||||
c := p.ctx
|
||||
if c.lastAction != nil {
|
||||
last = fmt.Sprintf("action=%s x=%d y=%d pairs=%d", c.lastAction.Name, c.lastAction.X, c.lastAction.Y, len(c.lastAction.PairSnapshots))
|
||||
}
|
||||
}
|
||||
|
||||
// If debugging enabled, log extra info
|
||||
smatLog("ERROR: ", "bitmap invalid: %v\n", valid)
|
||||
|
||||
// build a reproducible test snippet that reconstructs the bitmap and replays the failing action
|
||||
ts := time.Now().UnixNano()
|
||||
testName := fmt.Sprintf("TestFuzzerRepro_%d", ts)
|
||||
repro := "// Reproducer generated by smat\n"
|
||||
repro += "package roaring\n\n"
|
||||
repro += "import (\n\t\"encoding/base64\"\n\t\"testing\"\n)\n\n"
|
||||
repro += fmt.Sprintf("func %s(t *testing.T) {\n", testName)
|
||||
var lastAction *actionRecord
|
||||
if p != nil && p.ctx != nil {
|
||||
lastAction = p.ctx.lastAction
|
||||
}
|
||||
// use the snapshot of the modified pair
|
||||
if lastAction != nil && len(lastAction.PairSnapshots) > 0 {
|
||||
// assume the modified pair is x % len(pairs), but since pairs are in order, and x is lastAction.X
|
||||
pairIndex := lastAction.X % len(lastAction.PairSnapshots)
|
||||
if pairIndex < len(lastAction.PairSnapshots) {
|
||||
snapshot := lastAction.PairSnapshots[pairIndex]
|
||||
if snapshot != "<nil>" && !strings.HasPrefix(snapshot, "<") {
|
||||
repro += fmt.Sprintf("\tb, _ := base64.StdEncoding.DecodeString(\"%s\")\n", snapshot)
|
||||
repro += "\tbm := NewBitmap()\n"
|
||||
repro += "\tbm.UnmarshalBinary(b)\n"
|
||||
repro += "\tif err := bm.Validate(); err != nil {\n"
|
||||
repro += "\t\tt.Errorf(\"Initial Validate failed: %v\", err)\n"
|
||||
repro += "\t}\n"
|
||||
// perform the action
|
||||
if strings.Contains(lastAction.Name, "setBit") {
|
||||
repro += fmt.Sprintf("\tbm.AddInt(%d)\n", lastAction.Y)
|
||||
} else if strings.Contains(lastAction.Name, "removeBit") {
|
||||
repro += fmt.Sprintf("\tbm.Remove(%d)\n", lastAction.Y)
|
||||
} else if strings.Contains(lastAction.Name, "flip") {
|
||||
repro += fmt.Sprintf("\tbm.Flip(uint64(%d), uint64(%d)+1)\n", lastAction.Y, lastAction.Y)
|
||||
} else if strings.Contains(lastAction.Name, "runOptimize") {
|
||||
repro += "\tbm.RunOptimize()\n"
|
||||
} else if strings.Contains(lastAction.Name, "clear") {
|
||||
repro += "\tbm.Clear()\n"
|
||||
} else if lastAction.Name == " or" {
|
||||
pairIndexY := lastAction.Y % len(lastAction.PairSnapshots)
|
||||
if pairIndexY < len(lastAction.PairSnapshots) {
|
||||
snapshotY := lastAction.PairSnapshots[pairIndexY]
|
||||
if snapshotY != "<nil>" && !strings.HasPrefix(snapshotY, "<") {
|
||||
repro += fmt.Sprintf("\tb2, _ := base64.StdEncoding.DecodeString(\"%s\")\n", snapshotY)
|
||||
repro += "\tbm2 := NewBitmap()\n"
|
||||
repro += "\tbm2.UnmarshalBinary(b2)\n"
|
||||
repro += "\tbm.Or(bm2)\n"
|
||||
}
|
||||
}
|
||||
} else if lastAction.Name == " and" {
|
||||
pairIndexY := lastAction.Y % len(lastAction.PairSnapshots)
|
||||
if pairIndexY < len(lastAction.PairSnapshots) {
|
||||
snapshotY := lastAction.PairSnapshots[pairIndexY]
|
||||
if snapshotY != "<nil>" && !strings.HasPrefix(snapshotY, "<") {
|
||||
repro += fmt.Sprintf("\tb2, _ := base64.StdEncoding.DecodeString(\"%s\")\n", snapshotY)
|
||||
repro += "\tbm2 := NewBitmap()\n"
|
||||
repro += "\tbm2.UnmarshalBinary(b2)\n"
|
||||
repro += "\tbm.And(bm2)\n"
|
||||
}
|
||||
}
|
||||
} else if lastAction.Name == " difference" {
|
||||
pairIndexY := lastAction.Y % len(lastAction.PairSnapshots)
|
||||
if pairIndexY < len(lastAction.PairSnapshots) {
|
||||
snapshotY := lastAction.PairSnapshots[pairIndexY]
|
||||
if snapshotY != "<nil>" && !strings.HasPrefix(snapshotY, "<") {
|
||||
repro += fmt.Sprintf("\tb2, _ := base64.StdEncoding.DecodeString(\"%s\")\n", snapshotY)
|
||||
repro += "\tbm2 := NewBitmap()\n"
|
||||
repro += "\tbm2.UnmarshalBinary(b2)\n"
|
||||
repro += "\tbm.AndNot(bm2)\n"
|
||||
}
|
||||
}
|
||||
} else if lastAction.Name == " xor" {
|
||||
pairIndexY := lastAction.Y % len(lastAction.PairSnapshots)
|
||||
if pairIndexY < len(lastAction.PairSnapshots) {
|
||||
snapshotY := lastAction.PairSnapshots[pairIndexY]
|
||||
if snapshotY != "<nil>" && !strings.HasPrefix(snapshotY, "<") {
|
||||
repro += fmt.Sprintf("\tb2, _ := base64.StdEncoding.DecodeString(\"%s\")\n", snapshotY)
|
||||
repro += "\tbm2 := NewBitmap()\n"
|
||||
repro += "\tbm2.UnmarshalBinary(b2)\n"
|
||||
repro += "\tbm.Xor(bm2)\n"
|
||||
}
|
||||
}
|
||||
} else {
|
||||
repro += fmt.Sprintf("\t// Unhandled action: %s\n", lastAction.Name)
|
||||
}
|
||||
repro += "\tif err := bm.Validate(); err != nil {\n"
|
||||
repro += "\t\tt.Errorf(\"Validate failed: %v\", err)\n"
|
||||
repro += "\t} else {\n"
|
||||
repro += "\t\tt.Logf(\"Validate succeeded\")\n"
|
||||
repro += "\t}\n"
|
||||
} else {
|
||||
repro += "\t// invalid snapshot\n"
|
||||
}
|
||||
}
|
||||
}
|
||||
repro += "}\n"
|
||||
|
||||
// print the repro snippet for the developer
|
||||
fmt.Println()
|
||||
fmt.Println("=== SMAT REPRODUCER SNIPPET ===")
|
||||
if len(repro) > 10000 {
|
||||
fmt.Println("// Reproducer too large, skipping full print")
|
||||
} else {
|
||||
fmt.Println(repro)
|
||||
}
|
||||
|
||||
// also write the repro snippet to a timestamped file in workdir/
|
||||
if len(repro) > 10000 {
|
||||
repro = "// Reproducer too large, skipping\n"
|
||||
}
|
||||
if err := os.MkdirAll("workdir", 0o755); err == nil {
|
||||
fname := fmt.Sprintf("workdir/smat_repro_%d_test.go", ts)
|
||||
if werr := os.WriteFile(fname, []byte(repro), 0o644); werr == nil {
|
||||
fmt.Printf("Wrote repro to %s\n", fname)
|
||||
} else {
|
||||
fmt.Printf("Failed writing repro file: %v\n", werr)
|
||||
}
|
||||
} else {
|
||||
fmt.Printf("Failed creating workdir: %v\n", err)
|
||||
}
|
||||
|
||||
panic(fmt.Sprintf("[checkEquals] bitmap invalid: %v\ncurrentBase64:%s\nlastAction:%s\n", valid, curSnap, last))
|
||||
}
|
||||
if !p.equalsBitSet(p.bs, p.bm) {
|
||||
panic("bitset mismatch")
|
||||
}
|
||||
}
|
||||
|
||||
func (p *smatPair) Validate() {
|
||||
valid := p.bm.Validate()
|
||||
if valid != nil {
|
||||
panic(fmt.Sprintf("[Validate] bitmap invalid: %v", valid))
|
||||
}
|
||||
}
|
||||
|
||||
func (p *smatPair) equalsBitSet(a *bitset.BitSet, b *Bitmap) bool {
|
||||
for i, e := a.NextSet(0); e; i, e = a.NextSet(i + 1) {
|
||||
if !b.ContainsInt(int(i)) {
|
||||
|
||||
13
vendor/github.com/RoaringBitmap/roaring/v2/util.go
generated
vendored
13
vendor/github.com/RoaringBitmap/roaring/v2/util.go
generated
vendored
@@ -1,9 +1,10 @@
|
||||
package roaring
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"math"
|
||||
"math/rand"
|
||||
"sort"
|
||||
"slices"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -123,7 +124,7 @@ func combineLoHi16(lob uint16, hob uint16) uint32 {
|
||||
}
|
||||
|
||||
func combineLoHi32(lob uint32, hob uint32) uint32 {
|
||||
return uint32(lob) | (hob << 16)
|
||||
return lob | (hob << 16)
|
||||
}
|
||||
|
||||
const maxLowBit = 0xFFFF
|
||||
@@ -264,19 +265,13 @@ type ph struct {
|
||||
rand int
|
||||
}
|
||||
|
||||
type pha []ph
|
||||
|
||||
func (p pha) Len() int { return len(p) }
|
||||
func (p pha) Less(i, j int) bool { return p[i].rand < p[j].rand }
|
||||
func (p pha) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
|
||||
|
||||
func getRandomPermutation(n int) []int {
|
||||
r := make([]ph, n)
|
||||
for i := 0; i < n; i++ {
|
||||
r[i].orig = i
|
||||
r[i].rand = rand.Intn(1 << 29)
|
||||
}
|
||||
sort.Sort(pha(r))
|
||||
slices.SortFunc(r, func(a, b ph) int { return cmp.Compare(a.rand, b.rand) })
|
||||
m := make([]int, n)
|
||||
for i := range m {
|
||||
m[i] = r[i].orig
|
||||
|
||||
10
vendor/github.com/bits-and-blooms/bitset/README.md
generated
vendored
10
vendor/github.com/bits-and-blooms/bitset/README.md
generated
vendored
@@ -164,3 +164,13 @@ Before committing the code, please check if it passes tests, has adequate covera
|
||||
go test
|
||||
go test -cover
|
||||
```
|
||||
|
||||
## Stars
|
||||
|
||||
|
||||
[](https://www.star-history.com/#bits-and-blooms/bitset&Date)
|
||||
|
||||
## Further reading
|
||||
|
||||
<p>Mastering Programming: From Testing to Performance in Go</p>
|
||||
<div><a href="https://www.amazon.com/dp/B0FMPGSWR5"><img style="margin-left: auto; margin-right: auto;" src="https://m.media-amazon.com/images/I/61feneHS7kL._SL1499_.jpg" alt="" width="250px" /></a></div>
|
||||
|
||||
21
vendor/github.com/bits-and-blooms/bitset/bitset.go
generated
vendored
21
vendor/github.com/bits-and-blooms/bitset/bitset.go
generated
vendored
@@ -905,7 +905,9 @@ func (b *BitSet) DifferenceCardinality(compare *BitSet) uint {
|
||||
l = b.wordCount()
|
||||
}
|
||||
cnt := uint64(0)
|
||||
cnt += popcntMaskSlice(b.set[:l], compare.set[:l])
|
||||
if l > 0 {
|
||||
cnt += popcntMaskSlice(b.set[:l], compare.set[:l])
|
||||
}
|
||||
cnt += popcntSlice(b.set[l:])
|
||||
return uint(cnt)
|
||||
}
|
||||
@@ -960,6 +962,9 @@ func (b *BitSet) Intersection(compare *BitSet) (result *BitSet) {
|
||||
func (b *BitSet) IntersectionCardinality(compare *BitSet) uint {
|
||||
panicIfNull(b)
|
||||
panicIfNull(compare)
|
||||
if b.length == 0 || compare.length == 0 {
|
||||
return 0
|
||||
}
|
||||
b, compare = sortByLength(b, compare)
|
||||
cnt := popcntAndSlice(b.set, compare.set)
|
||||
return uint(cnt)
|
||||
@@ -1016,7 +1021,10 @@ func (b *BitSet) UnionCardinality(compare *BitSet) uint {
|
||||
panicIfNull(b)
|
||||
panicIfNull(compare)
|
||||
b, compare = sortByLength(b, compare)
|
||||
cnt := popcntOrSlice(b.set, compare.set)
|
||||
cnt := uint64(0)
|
||||
if len(b.set) > 0 {
|
||||
cnt += popcntOrSlice(b.set, compare.set)
|
||||
}
|
||||
if len(compare.set) > len(b.set) {
|
||||
cnt += popcntSlice(compare.set[len(b.set):])
|
||||
}
|
||||
@@ -1071,7 +1079,10 @@ func (b *BitSet) SymmetricDifferenceCardinality(compare *BitSet) uint {
|
||||
panicIfNull(b)
|
||||
panicIfNull(compare)
|
||||
b, compare = sortByLength(b, compare)
|
||||
cnt := popcntXorSlice(b.set, compare.set)
|
||||
cnt := uint64(0)
|
||||
if len(b.set) > 0 {
|
||||
cnt += popcntXorSlice(b.set, compare.set)
|
||||
}
|
||||
if len(compare.set) > len(b.set) {
|
||||
cnt += popcntSlice(compare.set[len(b.set):])
|
||||
}
|
||||
@@ -1473,7 +1484,7 @@ func (b *BitSet) ShiftLeft(bits uint) {
|
||||
dst := b.set
|
||||
|
||||
// not using extendSet() to avoid unneeded data copying
|
||||
nsize := wordsNeeded(top + bits)
|
||||
nsize := wordsNeeded(top + bits + 1)
|
||||
if len(b.set) < nsize {
|
||||
dst = make([]uint64, nsize)
|
||||
}
|
||||
@@ -1520,7 +1531,7 @@ func (b *BitSet) ShiftRight(bits uint) {
|
||||
return
|
||||
}
|
||||
|
||||
if bits >= top {
|
||||
if bits > top {
|
||||
b.set = make([]uint64, wordsNeeded(b.length))
|
||||
return
|
||||
}
|
||||
|
||||
65
vendor/github.com/bits-and-blooms/bitset/popcnt.go
generated
vendored
65
vendor/github.com/bits-and-blooms/bitset/popcnt.go
generated
vendored
@@ -2,58 +2,51 @@ package bitset
|
||||
|
||||
import "math/bits"
|
||||
|
||||
func popcntSlice(s []uint64) uint64 {
|
||||
var cnt int
|
||||
func popcntSlice(s []uint64) (cnt uint64) {
|
||||
for _, x := range s {
|
||||
cnt += bits.OnesCount64(x)
|
||||
cnt += uint64(bits.OnesCount64(x))
|
||||
}
|
||||
return uint64(cnt)
|
||||
return
|
||||
}
|
||||
|
||||
func popcntMaskSlice(s, m []uint64) uint64 {
|
||||
var cnt int
|
||||
// this explicit check eliminates a bounds check in the loop
|
||||
if len(m) < len(s) {
|
||||
panic("mask slice is too short")
|
||||
}
|
||||
func popcntMaskSlice(s, m []uint64) (cnt uint64) {
|
||||
// The next line is to help the bounds checker, it matters!
|
||||
_ = m[len(s)-1] // BCE
|
||||
for i := range s {
|
||||
cnt += bits.OnesCount64(s[i] &^ m[i])
|
||||
cnt += uint64(bits.OnesCount64(s[i] &^ m[i]))
|
||||
}
|
||||
return uint64(cnt)
|
||||
return
|
||||
}
|
||||
|
||||
func popcntAndSlice(s, m []uint64) uint64 {
|
||||
var cnt int
|
||||
// this explicit check eliminates a bounds check in the loop
|
||||
if len(m) < len(s) {
|
||||
panic("mask slice is too short")
|
||||
}
|
||||
// popcntAndSlice computes the population count of the AND of two slices.
|
||||
// It assumes that len(m) >= len(s) > 0.
|
||||
func popcntAndSlice(s, m []uint64) (cnt uint64) {
|
||||
// The next line is to help the bounds checker, it matters!
|
||||
_ = m[len(s)-1] // BCE
|
||||
for i := range s {
|
||||
cnt += bits.OnesCount64(s[i] & m[i])
|
||||
cnt += uint64(bits.OnesCount64(s[i] & m[i]))
|
||||
}
|
||||
return uint64(cnt)
|
||||
return
|
||||
}
|
||||
|
||||
func popcntOrSlice(s, m []uint64) uint64 {
|
||||
var cnt int
|
||||
// this explicit check eliminates a bounds check in the loop
|
||||
if len(m) < len(s) {
|
||||
panic("mask slice is too short")
|
||||
}
|
||||
// popcntOrSlice computes the population count of the OR of two slices.
|
||||
// It assumes that len(m) >= len(s) > 0.
|
||||
func popcntOrSlice(s, m []uint64) (cnt uint64) {
|
||||
// The next line is to help the bounds checker, it matters!
|
||||
_ = m[len(s)-1] // BCE
|
||||
for i := range s {
|
||||
cnt += bits.OnesCount64(s[i] | m[i])
|
||||
cnt += uint64(bits.OnesCount64(s[i] | m[i]))
|
||||
}
|
||||
return uint64(cnt)
|
||||
return
|
||||
}
|
||||
|
||||
func popcntXorSlice(s, m []uint64) uint64 {
|
||||
var cnt int
|
||||
// this explicit check eliminates a bounds check in the loop
|
||||
if len(m) < len(s) {
|
||||
panic("mask slice is too short")
|
||||
}
|
||||
// popcntXorSlice computes the population count of the XOR of two slices.
|
||||
// It assumes that len(m) >= len(s) > 0.
|
||||
func popcntXorSlice(s, m []uint64) (cnt uint64) {
|
||||
// The next line is to help the bounds checker, it matters!
|
||||
_ = m[len(s)-1] // BCE
|
||||
for i := range s {
|
||||
cnt += bits.OnesCount64(s[i] ^ m[i])
|
||||
cnt += uint64(bits.OnesCount64(s[i] ^ m[i]))
|
||||
}
|
||||
return uint64(cnt)
|
||||
return
|
||||
}
|
||||
|
||||
1
vendor/github.com/blevesearch/bleve/v2/README.md
generated
vendored
1
vendor/github.com/blevesearch/bleve/v2/README.md
generated
vendored
@@ -24,6 +24,7 @@ A modern indexing + search library in GO
|
||||
* [geo spatial search](https://github.com/blevesearch/bleve/blob/master/geo/README.md)
|
||||
* approximate k-nearest neighbors via [vector search](https://github.com/blevesearch/bleve/blob/master/docs/vectors.md)
|
||||
* [synonym search](https://github.com/blevesearch/bleve/blob/master/docs/synonyms.md)
|
||||
* [hierarchical nested search](https://github.com/blevesearch/bleve/blob/master/docs/hierarchy.md)
|
||||
* [tf-idf](https://github.com/blevesearch/bleve/blob/master/docs/scoring.md#tf-idf) / [bm25](https://github.com/blevesearch/bleve/blob/master/docs/scoring.md#bm25) scoring models
|
||||
* Hybrid search: exact + semantic
|
||||
* Supports [RRF (Reciprocal Rank Fusion) and RSF (Relative Score Fusion)](docs/score_fusion.md)
|
||||
|
||||
2
vendor/github.com/blevesearch/bleve/v2/analysis/analyzer/custom/custom.go
generated
vendored
2
vendor/github.com/blevesearch/bleve/v2/analysis/analyzer/custom/custom.go
generated
vendored
@@ -140,7 +140,7 @@ func convertInterfaceSliceToStringSlice(interfaceSlice []interface{}, objType st
|
||||
if ok {
|
||||
stringSlice[i] = stringObj
|
||||
} else {
|
||||
return nil, fmt.Errorf(objType + " name must be a string")
|
||||
return nil, fmt.Errorf("%s name must be a string", objType)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
5
vendor/github.com/blevesearch/bleve/v2/builder.go
generated
vendored
5
vendor/github.com/blevesearch/bleve/v2/builder.go
generated
vendored
@@ -73,7 +73,10 @@ func newBuilder(path string, mapping mapping.IndexMapping, config map[string]int
|
||||
|
||||
// do not use real config, as these are options for the builder,
|
||||
// not the resulting index
|
||||
meta := newIndexMeta(scorch.Name, scorch.Name, map[string]interface{}{})
|
||||
meta, err := newIndexMeta(scorch.Name, scorch.Name, map[string]interface{}{}, path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = meta.Save(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
||||
26
vendor/github.com/blevesearch/bleve/v2/document/document.go
generated
vendored
26
vendor/github.com/blevesearch/bleve/v2/document/document.go
generated
vendored
@@ -30,8 +30,9 @@ func init() {
|
||||
}
|
||||
|
||||
type Document struct {
|
||||
id string `json:"id"`
|
||||
Fields []Field `json:"fields"`
|
||||
id string
|
||||
Fields []Field `json:"fields"`
|
||||
NestedDocuments []*Document `json:"nested_documents"`
|
||||
CompositeFields []*CompositeField
|
||||
StoredFieldsSize uint64
|
||||
indexed bool
|
||||
@@ -68,6 +69,12 @@ func (d *Document) Size() int {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
for _, entry := range d.NestedDocuments {
|
||||
if entry != nil {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
@@ -111,6 +118,11 @@ func (d *Document) NumPlainTextBytes() uint64 {
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, nestedDoc := range d.NestedDocuments {
|
||||
if nestedDoc != nil {
|
||||
rv += nestedDoc.NumPlainTextBytes()
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
@@ -157,3 +169,13 @@ func (d *Document) SetIndexed() {
|
||||
func (d *Document) Indexed() bool {
|
||||
return d.indexed
|
||||
}
|
||||
|
||||
func (d *Document) AddNestedDocument(doc *Document) {
|
||||
d.NestedDocuments = append(d.NestedDocuments, doc)
|
||||
}
|
||||
|
||||
func (d *Document) VisitNestedDocuments(visitor func(doc index.Document)) {
|
||||
for _, doc := range d.NestedDocuments {
|
||||
visitor(doc)
|
||||
}
|
||||
}
|
||||
|
||||
9
vendor/github.com/blevesearch/bleve/v2/document/field_geopoint.go
generated
vendored
9
vendor/github.com/blevesearch/bleve/v2/document/field_geopoint.go
generated
vendored
@@ -180,6 +180,15 @@ func NewGeoPointField(name string, arrayPositions []uint64, lon, lat float64) *G
|
||||
func NewGeoPointFieldWithIndexingOptions(name string, arrayPositions []uint64, lon, lat float64, options index.FieldIndexingOptions) *GeoPointField {
|
||||
mhash := geo.MortonHash(lon, lat)
|
||||
prefixCoded := numeric.MustNewPrefixCodedInt64(int64(mhash), 0)
|
||||
|
||||
// docvalues are always enabled for geopoint fields, even if the
|
||||
// indexing options are set to not include docvalues.
|
||||
// snappy compression and chunking are always skipped for geopoint
|
||||
// to avoid mem copies and faster lookups.
|
||||
options |= index.DocValues
|
||||
options |= index.SkipDVChunking
|
||||
options |= index.SkipDVCompression
|
||||
|
||||
return &GeoPointField{
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
|
||||
8
vendor/github.com/blevesearch/bleve/v2/document/field_geoshape.go
generated
vendored
8
vendor/github.com/blevesearch/bleve/v2/document/field_geoshape.go
generated
vendored
@@ -180,7 +180,11 @@ func NewGeoShapeFieldFromShapeWithIndexingOptions(name string, arrayPositions []
|
||||
|
||||
// docvalues are always enabled for geoshape fields, even if the
|
||||
// indexing options are set to not include docvalues.
|
||||
// snappy compression and chunking are always skipped for geoshape
|
||||
// to avoid mem copies and faster lookups.
|
||||
options |= index.DocValues
|
||||
options |= index.SkipDVChunking
|
||||
options |= index.SkipDVCompression
|
||||
|
||||
return &GeoShapeField{
|
||||
shape: shape,
|
||||
@@ -232,7 +236,11 @@ func NewGeometryCollectionFieldFromShapesWithIndexingOptions(name string,
|
||||
|
||||
// docvalues are always enabled for geoshape fields, even if the
|
||||
// indexing options are set to not include docvalues.
|
||||
// snappy compression and chunking are always skipped for geoshape
|
||||
// to avoid mem copies and faster lookups.
|
||||
options |= index.DocValues
|
||||
options |= index.SkipDVChunking
|
||||
options |= index.SkipDVCompression
|
||||
|
||||
return &GeoShapeField{
|
||||
shape: shape,
|
||||
|
||||
7
vendor/github.com/blevesearch/bleve/v2/document/field_vector.go
generated
vendored
7
vendor/github.com/blevesearch/bleve/v2/document/field_vector.go
generated
vendored
@@ -114,6 +114,13 @@ func NewVectorFieldWithIndexingOptions(name string, arrayPositions []uint64,
|
||||
// skip freq/norms for vector field
|
||||
options |= index.SkipFreqNorm
|
||||
|
||||
// bivf-sq8 indexes only supports hamming distance for the primary
|
||||
// binary index. Similarity here is used for the backing flat index,
|
||||
// which is set to cosine similarity for recall reasons
|
||||
if index.OptimizationRequiresBinaryIndex(vectorIndexOptimizedFor) {
|
||||
similarity = index.CosineSimilarity
|
||||
}
|
||||
|
||||
return &VectorField{
|
||||
name: name,
|
||||
dims: dims,
|
||||
|
||||
1
vendor/github.com/blevesearch/bleve/v2/error.go
generated
vendored
1
vendor/github.com/blevesearch/bleve/v2/error.go
generated
vendored
@@ -28,6 +28,7 @@ const (
|
||||
ErrorIndexReadInconsistency
|
||||
ErrorTwoPhaseSearchInconsistency
|
||||
ErrorSynonymSearchNotSupported
|
||||
ErrorTrainingNotSupported
|
||||
)
|
||||
|
||||
// Error represents a more strongly typed bleve error for detecting
|
||||
|
||||
15
vendor/github.com/blevesearch/bleve/v2/index.go
generated
vendored
15
vendor/github.com/blevesearch/bleve/v2/index.go
generated
vendored
@@ -389,6 +389,11 @@ type SynonymIndex interface {
|
||||
IndexSynonym(id string, collection string, definition *SynonymDefinition) error
|
||||
}
|
||||
|
||||
type IndexWithCallbacks interface {
|
||||
FileWriterIDsInUse() (map[string]struct{}, error)
|
||||
DropFileWriterIDs(ids map[string]struct{}) error
|
||||
}
|
||||
|
||||
type InsightsIndex interface {
|
||||
Index
|
||||
// TermFrequencies returns the tokens ordered by frequencies for the field index.
|
||||
@@ -396,3 +401,13 @@ type InsightsIndex interface {
|
||||
// CentroidCardinalities returns the centroids (clusters) from IVF indexes ordered by data density.
|
||||
CentroidCardinalities(field string, limit int, desceding bool) ([]index.CentroidCardinality, error)
|
||||
}
|
||||
|
||||
type TrainableIndex interface {
|
||||
Index
|
||||
Train(*Batch) error
|
||||
}
|
||||
|
||||
type IndexFileCopyable interface {
|
||||
SetPathInBolt(key []byte, value []byte) error //dest index
|
||||
CopyFile(file string, d index.IndexDirectory) error // source index
|
||||
}
|
||||
|
||||
4
vendor/github.com/blevesearch/bleve/v2/index/scorch/builder.go
generated
vendored
4
vendor/github.com/blevesearch/bleve/v2/index/scorch/builder.go
generated
vendored
@@ -20,9 +20,9 @@ import (
|
||||
"sync"
|
||||
|
||||
"github.com/RoaringBitmap/roaring/v2"
|
||||
"github.com/blevesearch/bleve/v2/util"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment "github.com/blevesearch/scorch_segment_api/v2"
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
const DefaultBuilderBatchSize = 1000
|
||||
@@ -291,7 +291,7 @@ func (o *Builder) Close() error {
|
||||
|
||||
// create the root bolt
|
||||
rootBoltPath := o.path + string(os.PathSeparator) + "root.bolt"
|
||||
rootBolt, err := bolt.Open(rootBoltPath, 0600, nil)
|
||||
rootBolt, err := util.OpenBolt(rootBoltPath, 0600, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
20
vendor/github.com/blevesearch/bleve/v2/index/scorch/introducer.go
generated
vendored
20
vendor/github.com/blevesearch/bleve/v2/index/scorch/introducer.go
generated
vendored
@@ -154,22 +154,24 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
|
||||
cachedDocs: root.segment[i].cachedDocs,
|
||||
cachedMeta: root.segment[i].cachedMeta,
|
||||
creator: root.segment[i].creator,
|
||||
internal: root.segment[i].internal,
|
||||
}
|
||||
|
||||
// apply new obsoletions
|
||||
if root.segment[i].deleted == nil {
|
||||
newss.deleted = delta
|
||||
} else {
|
||||
if delta.IsEmpty() {
|
||||
newss.deleted = root.segment[i].deleted
|
||||
} else {
|
||||
newss.deleted = roaring.Or(root.segment[i].deleted, delta)
|
||||
}
|
||||
newss.deleted = roaring.Or(root.segment[i].deleted, delta)
|
||||
}
|
||||
if newss.deleted.IsEmpty() {
|
||||
newss.deleted = nil
|
||||
}
|
||||
|
||||
// update the deleted bitmap to include any nested/sub-documents as well
|
||||
// if the segment supports that
|
||||
if ns, ok := newss.segment.(segment.NestedSegment); ok {
|
||||
newss.deleted = ns.AddNestedDocuments(newss.deleted)
|
||||
}
|
||||
// check for live size before copying
|
||||
if newss.LiveSize() > 0 {
|
||||
newSnapshot.segment = append(newSnapshot.segment, newss)
|
||||
@@ -201,6 +203,7 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
|
||||
stats: next.stats,
|
||||
cachedDocs: &cachedDocs{cache: nil},
|
||||
cachedMeta: &cachedMeta{meta: nil},
|
||||
internal: make(map[string][]byte),
|
||||
creator: "introduceSegment",
|
||||
}
|
||||
newSnapshot.segment = append(newSnapshot.segment, newSegmentSnapshot)
|
||||
@@ -210,6 +213,12 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
|
||||
// queued for persistence.
|
||||
atomic.AddUint64(&s.stats.TotIntroducedItems, newSegmentSnapshot.Count())
|
||||
atomic.AddUint64(&s.stats.TotIntroducedSegmentsBatch, 1)
|
||||
|
||||
// track the internal values of this segment so that when we update the
|
||||
// bolt we keep the internal values in sync with the segments on disk, and
|
||||
// if this segment didn't get persisted we need to undo that info from the
|
||||
// indexSnapshot's internal map as part of the bolt update.
|
||||
newSegmentSnapshot.internal = next.internal
|
||||
}
|
||||
// copy old values
|
||||
for key, oldVal := range root.internal {
|
||||
@@ -398,6 +407,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
|
||||
cachedDocs: root.segment[i].cachedDocs,
|
||||
cachedMeta: root.segment[i].cachedMeta,
|
||||
creator: root.segment[i].creator,
|
||||
internal: root.segment[i].internal,
|
||||
})
|
||||
root.segment[i].segment.AddRef()
|
||||
newSnapshot.offsets = append(newSnapshot.offsets, running)
|
||||
|
||||
110
vendor/github.com/blevesearch/bleve/v2/index/scorch/merge.go
generated
vendored
110
vendor/github.com/blevesearch/bleve/v2/index/scorch/merge.go
generated
vendored
@@ -31,6 +31,19 @@ import (
|
||||
|
||||
const merger = "merger"
|
||||
|
||||
// used in the context of mergerCtrl to provide a way to verify
|
||||
// the completion of a merge operation
|
||||
const mergeDoneKey = "mergeDone"
|
||||
|
||||
type mergeDoneChan chan error
|
||||
|
||||
// used in the context of mergerCtrl to provide a way to use
|
||||
// a custom merge plan instead of the one generated by the
|
||||
// default merge planner
|
||||
const mergePlanFuncKey = "mergePlanFunc"
|
||||
|
||||
type mergePlanFunc func(*IndexSnapshot) (*mergeplan.MergePlan, error)
|
||||
|
||||
func (s *Scorch) mergerLoop() {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
@@ -95,11 +108,9 @@ OUTER:
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
startTime := time.Now()
|
||||
|
||||
// lets get started
|
||||
err := s.planMergeAtSnapshot(ctrlMsg.ctx, ctrlMsg.options,
|
||||
ourSnapshot)
|
||||
startTime := time.Now()
|
||||
err := s.planMergeAtSnapshot(ctrlMsg, ourSnapshot)
|
||||
if err != nil {
|
||||
atomic.StoreUint64(&s.iStats.mergeEpoch, 0)
|
||||
if err == segment.ErrClosed {
|
||||
@@ -286,42 +297,64 @@ func (w *closeChWrapper) listen() {
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scorch) planMergeAtSnapshot(ctx context.Context,
|
||||
options *mergeplan.MergePlanOptions, ourSnapshot *IndexSnapshot) error {
|
||||
// build list of persisted segments in this snapshot
|
||||
var onlyPersistedSnapshots []mergeplan.Segment
|
||||
for _, segmentSnapshot := range ourSnapshot.segment {
|
||||
if _, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
|
||||
onlyPersistedSnapshots = append(onlyPersistedSnapshots, segmentSnapshot)
|
||||
// planMergeAtSnapshot plans and executes the merge operations for a given snapshot
|
||||
// if there is a custom merge plan function provided, it uses that to get the merge plan
|
||||
// otherwise, it builds the merge plan using the default planner and executes the merge tasks in the plan.
|
||||
func (s *Scorch) planMergeAtSnapshot(ctrlMsg *mergerCtrl, ourSnapshot *IndexSnapshot) error {
|
||||
var mergePlan *mergeplan.MergePlan
|
||||
// if a merge plan function is provided in the context, use it to get the merge plan
|
||||
if mergePlanFunc, ok := ctrlMsg.ctx.Value(mergePlanFuncKey).(mergePlanFunc); ok {
|
||||
var err error
|
||||
mergePlan, err = mergePlanFunc(ourSnapshot)
|
||||
if err != nil {
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanErr, 1)
|
||||
return fmt.Errorf("merge planning err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlan, 1)
|
||||
// default to making a merge plan if a custom one is not provided
|
||||
if mergePlan == nil {
|
||||
// build list of persisted segments in this snapshot
|
||||
var onlyPersistedSnapshots []mergeplan.Segment
|
||||
for _, segmentSnapshot := range ourSnapshot.segment {
|
||||
if _, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
|
||||
onlyPersistedSnapshots = append(onlyPersistedSnapshots, segmentSnapshot)
|
||||
}
|
||||
}
|
||||
|
||||
// give this list to the planner
|
||||
resultMergePlan, err := mergeplan.Plan(onlyPersistedSnapshots, options)
|
||||
if err != nil {
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanErr, 1)
|
||||
return fmt.Errorf("merge planning err: %v", err)
|
||||
}
|
||||
if resultMergePlan == nil {
|
||||
// nothing to do
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1)
|
||||
return nil
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlan, 1)
|
||||
|
||||
// give this list to the planner
|
||||
var err error
|
||||
mergePlan, err = mergeplan.Plan(onlyPersistedSnapshots, ctrlMsg.options)
|
||||
if err != nil {
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanErr, 1)
|
||||
return fmt.Errorf("merge planning err: %v", err)
|
||||
}
|
||||
if mergePlan == nil {
|
||||
// nothing to do
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanOk, 1)
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(mergePlan.Tasks)))
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks)))
|
||||
|
||||
// process tasks in serial for now
|
||||
var filenames []string
|
||||
|
||||
cw := newCloseChWrapper(s.closeCh, ctx)
|
||||
cw := newCloseChWrapper(s.closeCh, ctrlMsg.ctx)
|
||||
defer cw.close()
|
||||
|
||||
go cw.listen()
|
||||
|
||||
for _, task := range resultMergePlan.Tasks {
|
||||
var filenames []string
|
||||
var err error
|
||||
defer func() {
|
||||
// send error to done channel if present
|
||||
if done, ok := cw.ctx.Value(mergeDoneKey).(chan error); ok {
|
||||
done <- err
|
||||
}
|
||||
}()
|
||||
|
||||
for _, task := range mergePlan.Tasks {
|
||||
if len(task.Segments) == 0 {
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1)
|
||||
continue
|
||||
@@ -329,7 +362,6 @@ func (s *Scorch) planMergeAtSnapshot(ctx context.Context,
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegments, uint64(len(task.Segments)))
|
||||
|
||||
oldMap := make(map[uint64]*SegmentSnapshot, len(task.Segments))
|
||||
newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
|
||||
segmentsToMerge := make([]segment.Segment, 0, len(task.Segments))
|
||||
docsToDrop := make([]*roaring.Bitmap, 0, len(task.Segments))
|
||||
@@ -337,7 +369,6 @@ func (s *Scorch) planMergeAtSnapshot(ctx context.Context,
|
||||
|
||||
for _, planSegment := range task.Segments {
|
||||
if segSnapshot, ok := planSegment.(*SegmentSnapshot); ok {
|
||||
oldMap[segSnapshot.id] = segSnapshot
|
||||
mergedSegHistory[segSnapshot.id] = &mergedSegmentHistory{
|
||||
workerID: 0,
|
||||
oldSegment: segSnapshot,
|
||||
@@ -345,7 +376,6 @@ func (s *Scorch) planMergeAtSnapshot(ctx context.Context,
|
||||
if persistedSeg, ok := segSnapshot.segment.(segment.PersistedSegment); ok {
|
||||
if segSnapshot.LiveSize() == 0 {
|
||||
atomic.AddUint64(&s.stats.TotFileMergeSegmentsEmpty, 1)
|
||||
oldMap[segSnapshot.id] = nil
|
||||
delete(mergedSegHistory, segSnapshot.id)
|
||||
} else {
|
||||
segmentsToMerge = append(segmentsToMerge, segSnapshot.segment)
|
||||
@@ -372,8 +402,9 @@ func (s *Scorch) planMergeAtSnapshot(ctx context.Context,
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
|
||||
prevBytesReadTotal := cumulateBytesRead(segmentsToMerge)
|
||||
newDocNums, _, err := s.segPlugin.Merge(segmentsToMerge, docsToDrop, path,
|
||||
cw.cancelCh, s)
|
||||
var newDocNums [][]uint64
|
||||
newDocNums, _, err = s.segPlugin.MergeUsing(segmentsToMerge, docsToDrop, path,
|
||||
cw.cancelCh, s, s.segmentConfig)
|
||||
atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
|
||||
|
||||
fileMergeZapTime := uint64(time.Since(fileMergeZapStartTime))
|
||||
@@ -391,7 +422,7 @@ func (s *Scorch) planMergeAtSnapshot(ctx context.Context,
|
||||
return fmt.Errorf("merging failed: %v", err)
|
||||
}
|
||||
|
||||
seg, err = s.segPlugin.Open(path)
|
||||
seg, err = s.segPlugin.OpenUsing(path, s.segmentConfig)
|
||||
if err != nil {
|
||||
s.unmarkIneligibleForRemoval(filename)
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
|
||||
@@ -425,7 +456,8 @@ func (s *Scorch) planMergeAtSnapshot(ctx context.Context,
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
_ = seg.Close()
|
||||
return segment.ErrClosed
|
||||
err = segment.ErrClosed
|
||||
return err
|
||||
case s.merges <- sm:
|
||||
atomic.AddUint64(&s.stats.TotFileMergeIntroductions, 1)
|
||||
}
|
||||
@@ -540,7 +572,7 @@ func (s *Scorch) mergeAndPersistInMemorySegments(snapshot *IndexSnapshot,
|
||||
// the newly merged segment is already flushed out to disk, just needs
|
||||
// to be opened using mmap.
|
||||
newDocIDs, _, err :=
|
||||
s.segPlugin.Merge(segsBatch, dropsBatch, path, s.closeCh, s)
|
||||
s.segPlugin.MergeUsing(segsBatch, dropsBatch, path, s.closeCh, s, s.segmentConfig)
|
||||
if err != nil {
|
||||
em.Lock()
|
||||
errs = append(errs, err)
|
||||
@@ -555,7 +587,7 @@ func (s *Scorch) mergeAndPersistInMemorySegments(snapshot *IndexSnapshot,
|
||||
s.markIneligibleForRemoval(filename)
|
||||
newMergedSegmentIDs[id] = newSegmentID
|
||||
newDocIDsSet[id] = newDocIDs
|
||||
newMergedSegments[id], err = s.segPlugin.Open(path)
|
||||
newMergedSegments[id], err = s.segPlugin.OpenUsing(path, s.segmentConfig)
|
||||
if err != nil {
|
||||
em.Lock()
|
||||
errs = append(errs, err)
|
||||
|
||||
2
vendor/github.com/blevesearch/bleve/v2/index/scorch/optimize.go
generated
vendored
2
vendor/github.com/blevesearch/bleve/v2/index/scorch/optimize.go
generated
vendored
@@ -395,5 +395,7 @@ func (i *IndexSnapshot) unadornedTermFieldReader(
|
||||
recycle: false,
|
||||
// signal downstream that this is a special unadorned termFieldReader
|
||||
unadorned: true,
|
||||
// unadorned TFRs do not require bytes read tracking
|
||||
updateBytesRead: false,
|
||||
}
|
||||
}
|
||||
|
||||
10
vendor/github.com/blevesearch/bleve/v2/index/scorch/optimize_knn.go
generated
vendored
10
vendor/github.com/blevesearch/bleve/v2/index/scorch/optimize_knn.go
generated
vendored
@@ -34,8 +34,6 @@ type OptimizeVR struct {
|
||||
totalCost uint64
|
||||
// maps field to vector readers
|
||||
vrs map[string][]*IndexSnapshotVectorReader
|
||||
// if at least one of the vector readers requires filtered kNN.
|
||||
requiresFiltering bool
|
||||
}
|
||||
|
||||
// This setting _MUST_ only be changed during init and not after.
|
||||
@@ -85,8 +83,7 @@ func (o *OptimizeVR) Finish() error {
|
||||
continue
|
||||
}
|
||||
|
||||
vecIndex, err := segment.InterpretVectorIndex(field,
|
||||
o.requiresFiltering, origSeg.deleted)
|
||||
vecIndex, err := segment.InterpretVectorIndex(field, origSeg.deleted)
|
||||
if err != nil {
|
||||
errorsM.Lock()
|
||||
errors = append(errors, err)
|
||||
@@ -109,7 +106,7 @@ func (o *OptimizeVR) Finish() error {
|
||||
// kNN search.
|
||||
if vr.eligibleSelector != nil {
|
||||
pl, err = vecIndex.SearchWithFilter(vr.vector, vr.k,
|
||||
vr.eligibleSelector.SegmentEligibleDocs(index), vr.searchParams)
|
||||
vr.eligibleSelector.SegmentEligibleDocuments(index), vr.searchParams)
|
||||
} else {
|
||||
pl, err = vecIndex.Search(vr.vector, vr.k, vr.searchParams)
|
||||
}
|
||||
@@ -163,9 +160,6 @@ func (s *IndexSnapshotVectorReader) VectorOptimize(ctx context.Context,
|
||||
return octx, nil
|
||||
}
|
||||
o.ctx = ctx
|
||||
if !o.requiresFiltering {
|
||||
o.requiresFiltering = s.eligibleSelector != nil
|
||||
}
|
||||
|
||||
if o.snapshot != s.snapshot {
|
||||
o.invokeSearcherEndCallback()
|
||||
|
||||
445
vendor/github.com/blevesearch/bleve/v2/index/scorch/persister.go
generated
vendored
445
vendor/github.com/blevesearch/bleve/v2/index/scorch/persister.go
generated
vendored
@@ -425,7 +425,6 @@ func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot, po *persiste
|
||||
var totSize int
|
||||
var numSegsToFlushOut int
|
||||
var totDocs uint64
|
||||
|
||||
// legacy behaviour of merge + flush of all in-memory segments in one-shot
|
||||
if legacyFlushBehaviour(po.MaxSizeInMemoryMergePerWorker, po.NumPersisterWorkers) {
|
||||
val := &flushable{
|
||||
@@ -538,10 +537,15 @@ func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot, po *persiste
|
||||
exclude := make(map[uint64]struct{})
|
||||
|
||||
// copy to the equiv the segments that weren't replaced
|
||||
for _, segment := range snapshot.segment {
|
||||
if _, wasMerged := mergedSegmentIDs[segment.id]; !wasMerged {
|
||||
equiv.segment = append(equiv.segment, segment)
|
||||
exclude[segment.id] = struct{}{}
|
||||
for _, ss := range snapshot.segment {
|
||||
if _, wasMerged := mergedSegmentIDs[ss.id]; !wasMerged {
|
||||
equiv.segment = append(equiv.segment, ss)
|
||||
// this can be either in-memory or persisted segment, but while
|
||||
// preparing the bolt snapshot we avoid the in-memory segments to be
|
||||
// flushed out
|
||||
if _, ok := ss.segment.(segment.PersistedSegment); !ok {
|
||||
exclude[ss.id] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -549,10 +553,11 @@ func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot, po *persiste
|
||||
for _, segment := range newSnapshot.segment {
|
||||
if _, ok := newMergedSegmentIDs[segment.id]; ok {
|
||||
equiv.segment = append(equiv.segment, &SegmentSnapshot{
|
||||
id: segment.id,
|
||||
segment: segment.segment,
|
||||
deleted: nil, // nil since merging handled deletions
|
||||
stats: nil,
|
||||
id: segment.id,
|
||||
segment: segment.segment,
|
||||
deleted: nil, // nil since merging handled deletions
|
||||
stats: nil,
|
||||
internal: nil, // segment is persisted and equiv is already updated
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -575,6 +580,11 @@ func copyToDirectory(srcPath string, d index.Directory) (int64, error) {
|
||||
return 0, fmt.Errorf("GetWriter err: %v", err)
|
||||
}
|
||||
|
||||
// skip
|
||||
if dest == nil {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
sourceFileStat, err := os.Stat(srcPath)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
@@ -616,9 +626,8 @@ func persistToDirectory(seg segment.UnpersistedSegment, d index.Directory,
|
||||
return err
|
||||
}
|
||||
|
||||
func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string,
|
||||
segPlugin SegmentPlugin, exclude map[uint64]struct{}, d index.Directory) (
|
||||
[]string, map[uint64]string, error) {
|
||||
func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *util.BoltTxImpl, path string, segPlugin SegmentPlugin,
|
||||
exclude map[uint64]struct{}, d index.Directory) ([]string, map[uint64]string, error) {
|
||||
snapshotsBucket, err := tx.CreateBucketIfNotExists(util.BoltSnapshotsBucket)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
@@ -634,13 +643,29 @@ func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string,
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
err = metaBucket.Put(util.BoltMetaDataSegmentTypeKey, []byte(segPlugin.Type()))
|
||||
err = metaBucket.Put(util.BoltMetaDataSegmentTypeKey, []byte(segPlugin.Type()), nil)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
buf := make([]byte, binary.MaxVarintLen32)
|
||||
binary.BigEndian.PutUint32(buf, segPlugin.Version())
|
||||
err = metaBucket.Put(util.BoltMetaDataSegmentVersionKey, buf)
|
||||
err = metaBucket.Put(util.BoltMetaDataSegmentVersionKey, buf, nil)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
// always obtain the path from the parent snapshot if available
|
||||
// since that is the primary source of truth for context
|
||||
if snapshot.parent != nil {
|
||||
path = snapshot.parent.path
|
||||
}
|
||||
writer, err := util.NewFileWriter(
|
||||
[]byte(path + string(os.PathSeparator) + "root.bolt"))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// persist the writer ID used for the bolt snapshot
|
||||
err = metaBucket.Put(util.BoltMetaDataFileWriterIDKey, []byte(writer.Id()), writer)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
@@ -654,7 +679,7 @@ func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string,
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
err = metaBucket.Put(util.BoltMetaDataTimeStamp, timeStampBinary)
|
||||
err = metaBucket.Put(util.BoltMetaDataTimeStamp, timeStampBinary, writer)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
@@ -664,19 +689,19 @@ func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string,
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
// TODO optimize writing these in order?
|
||||
|
||||
// deep copy the internal map since we'll be keeping only the persisted info
|
||||
// in bolt and some of the information might be deleted
|
||||
internal := make(map[string][]byte, len(snapshot.internal))
|
||||
for k, v := range snapshot.internal {
|
||||
err = internalBucket.Put([]byte(k), v)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
internal[k] = v
|
||||
}
|
||||
|
||||
if snapshot.parent != nil {
|
||||
val := make([]byte, 8)
|
||||
bytesWritten := atomic.LoadUint64(&snapshot.parent.stats.TotBytesWrittenAtIndexTime)
|
||||
binary.LittleEndian.PutUint64(val, bytesWritten)
|
||||
err = internalBucket.Put(util.TotBytesWrittenKey, val)
|
||||
err = internalBucket.Put(util.TotBytesWrittenKey, val, writer)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
@@ -687,79 +712,112 @@ func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string,
|
||||
|
||||
// first ensure that each segment in this snapshot has been persisted
|
||||
for _, segmentSnapshot := range snapshot.segment {
|
||||
snapshotSegmentKey := encodeUvarintAscending(nil, segmentSnapshot.id)
|
||||
snapshotSegmentBucket, err := snapshotBucket.CreateBucketIfNotExists(snapshotSegmentKey)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
var persistedSeg bool
|
||||
var snapshotSegmentBucket *util.BoltBucketImpl
|
||||
switch seg := segmentSnapshot.segment.(type) {
|
||||
case segment.PersistedSegment:
|
||||
snapshotSegmentKey := encodeUvarintAscending(nil, segmentSnapshot.id)
|
||||
snapshotSegmentBucket, err = snapshotBucket.CreateBucketIfNotExists(snapshotSegmentKey)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
segPath := seg.Path()
|
||||
_, err = copyToDirectory(segPath, d)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("segment: %s copy err: %v", segPath, err)
|
||||
}
|
||||
filename := filepath.Base(segPath)
|
||||
err = snapshotSegmentBucket.Put(util.BoltPathKey, []byte(filename))
|
||||
err = snapshotSegmentBucket.Put(util.BoltPathKey, []byte(filename), writer)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
filenames = append(filenames, filename)
|
||||
persistedSeg = true
|
||||
case segment.UnpersistedSegment:
|
||||
// need to persist this to disk if its not part of exclude list (which
|
||||
// restricts which in-memory segment to be persisted to disk)
|
||||
if _, ok := exclude[segmentSnapshot.id]; !ok {
|
||||
snapshotSegmentKey := encodeUvarintAscending(nil, segmentSnapshot.id)
|
||||
snapshotSegmentBucket, err = snapshotBucket.CreateBucketIfNotExists(snapshotSegmentKey)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
filename := zapFileName(segmentSnapshot.id)
|
||||
path := filepath.Join(path, filename)
|
||||
err := persistToDirectory(seg, d, path)
|
||||
err = persistToDirectory(seg, d, path)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("segment: %s persist err: %v", path, err)
|
||||
}
|
||||
newSegmentPaths[segmentSnapshot.id] = path
|
||||
err = snapshotSegmentBucket.Put(util.BoltPathKey, []byte(filename))
|
||||
err = snapshotSegmentBucket.Put(util.BoltPathKey, []byte(filename), nil)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
filenames = append(filenames, filename)
|
||||
persistedSeg = true
|
||||
} else {
|
||||
// this segment is not going to be persisted in this cycle, so any
|
||||
// of the corresponding internal values need to be removed since
|
||||
// on recovery they shouldn't be loaded as part of the indexSnapshot
|
||||
for k, v := range segmentSnapshot.internal {
|
||||
if v != nil {
|
||||
delete(internal, k)
|
||||
}
|
||||
}
|
||||
}
|
||||
default:
|
||||
return nil, nil, fmt.Errorf("unknown segment type: %T", seg)
|
||||
}
|
||||
// store current deleted bits
|
||||
var roaringBuf bytes.Buffer
|
||||
if segmentSnapshot.deleted != nil {
|
||||
_, err = segmentSnapshot.deleted.WriteTo(&roaringBuf)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("error persisting roaring bytes: %v", err)
|
||||
|
||||
// if the segment was excluded from persistence, then skip updating the metadata
|
||||
// or helper data corresponding to it - we need to keep things in-line with
|
||||
// the on-disk information
|
||||
if persistedSeg {
|
||||
// store current deleted bits
|
||||
var roaringBuf bytes.Buffer
|
||||
if segmentSnapshot.deleted != nil {
|
||||
_, err = segmentSnapshot.deleted.WriteTo(&roaringBuf)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("error persisting roaring bytes: %v", err)
|
||||
}
|
||||
err = snapshotSegmentBucket.Put(util.BoltDeletedKey, roaringBuf.Bytes(), writer)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
err = snapshotSegmentBucket.Put(util.BoltDeletedKey, roaringBuf.Bytes())
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
|
||||
// store segment stats
|
||||
if segmentSnapshot.stats != nil {
|
||||
statsBytes, err := json.Marshal(segmentSnapshot.stats.Fetch())
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
err = snapshotSegmentBucket.Put(util.BoltStatsKey, statsBytes, writer)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// store updated field info
|
||||
if segmentSnapshot.updatedFields != nil {
|
||||
updatedFieldsBytes, err := json.Marshal(segmentSnapshot.updatedFields)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
err = snapshotSegmentBucket.Put(
|
||||
util.BoltUpdatedFieldsKey, updatedFieldsBytes, writer)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// store segment stats
|
||||
if segmentSnapshot.stats != nil {
|
||||
b, err := json.Marshal(segmentSnapshot.stats.Fetch())
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
err = snapshotSegmentBucket.Put(util.BoltStatsKey, b)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// store updated field info
|
||||
if segmentSnapshot.updatedFields != nil {
|
||||
b, err := json.Marshal(segmentSnapshot.updatedFields)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
err = snapshotSegmentBucket.Put(util.BoltUpdatedFieldsKey, b)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
// now the internal values are reflective of the on-disk data, update in bolt
|
||||
for k, v := range internal {
|
||||
err = internalBucket.Put([]byte(k), v, writer)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
@@ -804,7 +862,7 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot, exclude map[uint
|
||||
}
|
||||
}()
|
||||
for segmentID, path := range newSegmentPaths {
|
||||
newSegments[segmentID], err = s.segPlugin.Open(path)
|
||||
newSegments[segmentID], err = s.segPlugin.OpenUsing(path, s.segmentConfig)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening new segment at %s, %v", path, err)
|
||||
}
|
||||
@@ -854,9 +912,8 @@ func zapFileName(epoch uint64) string {
|
||||
}
|
||||
|
||||
// bolt snapshot code
|
||||
|
||||
func (s *Scorch) loadFromBolt() error {
|
||||
err := s.rootBolt.View(func(tx *bolt.Tx) error {
|
||||
err := s.rootBolt.View(func(tx *util.BoltTxImpl) error {
|
||||
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil
|
||||
@@ -873,7 +930,7 @@ func (s *Scorch) loadFromBolt() error {
|
||||
s.AddEligibleForRemoval(snapshotEpoch)
|
||||
continue
|
||||
}
|
||||
snapshot := snapshots.Bucket(k)
|
||||
snapshot := snapshots.GetBucket(k)
|
||||
if snapshot == nil {
|
||||
log.Printf("snapshot key, but bucket missing %x, continuing", k)
|
||||
s.AddEligibleForRemoval(snapshotEpoch)
|
||||
@@ -904,6 +961,17 @@ func (s *Scorch) loadFromBolt() error {
|
||||
|
||||
foundRoot = true
|
||||
}
|
||||
|
||||
// try init trainer and load the trained data
|
||||
if trainer := initTrainer(s, s.config); trainer != nil {
|
||||
s.trainer = trainer
|
||||
trainerBucket := snapshots.GetBucket(util.BoltTrainerKey)
|
||||
err := s.trainer.loadTrainedData(trainerBucket)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
@@ -921,13 +989,13 @@ func (s *Scorch) loadFromBolt() error {
|
||||
// LoadSnapshot loads the segment with the specified epoch
|
||||
// NOTE: this is currently ONLY intended to be used by the command-line tool
|
||||
func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) {
|
||||
err = s.rootBolt.View(func(tx *bolt.Tx) error {
|
||||
err = s.rootBolt.View(func(tx *util.BoltTxImpl) error {
|
||||
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil
|
||||
}
|
||||
snapshotKey := encodeUvarintAscending(nil, epoch)
|
||||
snapshot := snapshots.Bucket(snapshotKey)
|
||||
snapshot := snapshots.GetBucket(snapshotKey)
|
||||
if snapshot == nil {
|
||||
return fmt.Errorf("snapshot with epoch: %v - doesn't exist", epoch)
|
||||
}
|
||||
@@ -940,7 +1008,7 @@ func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) {
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
|
||||
func (s *Scorch) loadSnapshot(snapshot *util.BoltBucketImpl) (*IndexSnapshot, error) {
|
||||
rv := &IndexSnapshot{
|
||||
parent: s,
|
||||
internal: make(map[string][]byte),
|
||||
@@ -950,45 +1018,64 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
|
||||
// first we look for the meta-data bucket, this will tell us
|
||||
// which segment type/version was used for this snapshot
|
||||
// all operations for this scorch will use this type/version
|
||||
metaBucket := snapshot.Bucket(util.BoltMetaDataKey)
|
||||
metaBucket := snapshot.GetBucket(util.BoltMetaDataKey)
|
||||
if metaBucket == nil {
|
||||
_ = rv.DecRef()
|
||||
return nil, fmt.Errorf("meta-data bucket missing")
|
||||
}
|
||||
segmentType := string(metaBucket.Get(util.BoltMetaDataSegmentTypeKey))
|
||||
segmentVersion := binary.BigEndian.Uint32(
|
||||
metaBucket.Get(util.BoltMetaDataSegmentVersionKey))
|
||||
err := s.loadSegmentPlugin(segmentType, segmentVersion)
|
||||
segmentType, err := metaBucket.Get(util.BoltMetaDataSegmentTypeKey, nil)
|
||||
if err != nil {
|
||||
_ = rv.DecRef()
|
||||
return nil, fmt.Errorf("segment type missing: %v", err)
|
||||
}
|
||||
segmentVersionBytes, err := metaBucket.Get(util.BoltMetaDataSegmentVersionKey, nil)
|
||||
if err != nil {
|
||||
_ = rv.DecRef()
|
||||
return nil, fmt.Errorf("segment version missing: %v", err)
|
||||
}
|
||||
segmentVersion := binary.BigEndian.Uint32(segmentVersionBytes)
|
||||
err = s.loadSegmentPlugin(string(segmentType), segmentVersion)
|
||||
if err != nil {
|
||||
_ = rv.DecRef()
|
||||
return nil, fmt.Errorf(
|
||||
"unable to load correct segment wrapper: %v", err)
|
||||
}
|
||||
fileWriterID, err := metaBucket.Get(util.BoltMetaDataFileWriterIDKey, nil)
|
||||
if err != nil {
|
||||
_ = rv.DecRef()
|
||||
return nil, fmt.Errorf("file writer id missing: %v", err)
|
||||
}
|
||||
reader, err := util.NewFileReader(
|
||||
string(fileWriterID), []byte(s.path+string(os.PathSeparator)+"root.bolt"))
|
||||
if err != nil {
|
||||
_ = rv.DecRef()
|
||||
return nil, fmt.Errorf("unable to load correct reader: %v", err)
|
||||
}
|
||||
|
||||
var running uint64
|
||||
c := snapshot.Cursor()
|
||||
for k, _ := c.First(); k != nil; k, _ = c.Next() {
|
||||
if k[0] == util.BoltInternalKey[0] {
|
||||
internalBucket := snapshot.Bucket(k)
|
||||
internalBucket := snapshot.GetBucket(k)
|
||||
if internalBucket == nil {
|
||||
_ = rv.DecRef()
|
||||
return nil, fmt.Errorf("internal bucket missing")
|
||||
}
|
||||
err := internalBucket.ForEach(func(key []byte, val []byte) error {
|
||||
copiedVal := append([]byte(nil), val...)
|
||||
rv.internal[string(key)] = copiedVal
|
||||
rv.internal[string(key)] = val
|
||||
return nil
|
||||
})
|
||||
}, reader)
|
||||
if err != nil {
|
||||
_ = rv.DecRef()
|
||||
return nil, err
|
||||
}
|
||||
} else if k[0] != util.BoltMetaDataKey[0] {
|
||||
segmentBucket := snapshot.Bucket(k)
|
||||
segmentBucket := snapshot.GetBucket(k)
|
||||
if segmentBucket == nil {
|
||||
_ = rv.DecRef()
|
||||
return nil, fmt.Errorf("segment key, but bucket missing %x", k)
|
||||
}
|
||||
segmentSnapshot, err := s.loadSegment(segmentBucket)
|
||||
segmentSnapshot, err := s.loadSegment(segmentBucket, reader)
|
||||
if err != nil {
|
||||
_ = rv.DecRef()
|
||||
return nil, fmt.Errorf("failed to load segment: %v", err)
|
||||
@@ -1010,13 +1097,14 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, error) {
|
||||
pathBytes := segmentBucket.Get(util.BoltPathKey)
|
||||
func (s *Scorch) loadSegment(segmentBucket *util.BoltBucketImpl, reader util.FileReader) (
|
||||
*SegmentSnapshot, error) {
|
||||
pathBytes, err := segmentBucket.Get(util.BoltPathKey, nil)
|
||||
if pathBytes == nil {
|
||||
return nil, fmt.Errorf("segment path missing")
|
||||
}
|
||||
segmentPath := s.path + string(os.PathSeparator) + string(pathBytes)
|
||||
seg, err := s.segPlugin.Open(segmentPath)
|
||||
seg, err := s.segPlugin.OpenUsing(segmentPath, s.segmentConfig)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error opening bolt segment: %v", err)
|
||||
}
|
||||
@@ -1026,7 +1114,11 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro
|
||||
cachedDocs: &cachedDocs{cache: nil},
|
||||
cachedMeta: &cachedMeta{meta: nil},
|
||||
}
|
||||
deletedBytes := segmentBucket.Get(util.BoltDeletedKey)
|
||||
deletedBytes, err := segmentBucket.Get(util.BoltDeletedKey, reader)
|
||||
if err != nil {
|
||||
_ = seg.Close()
|
||||
return nil, fmt.Errorf("error getting deleted bytes: %v", err)
|
||||
}
|
||||
if deletedBytes != nil {
|
||||
deletedBitmap := roaring.NewBitmap()
|
||||
r := bytes.NewReader(deletedBytes)
|
||||
@@ -1039,23 +1131,28 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro
|
||||
rv.deleted = deletedBitmap
|
||||
}
|
||||
}
|
||||
statBytes := segmentBucket.Get(util.BoltStatsKey)
|
||||
statBytes, err := segmentBucket.Get(util.BoltStatsKey, reader)
|
||||
if err != nil {
|
||||
_ = seg.Close()
|
||||
return nil, fmt.Errorf("error getting stat bytes: %v", err)
|
||||
}
|
||||
if statBytes != nil {
|
||||
var statsMap map[string]map[string]uint64
|
||||
|
||||
err := json.Unmarshal(statBytes, &statsMap)
|
||||
stats := &fieldStats{statMap: statsMap}
|
||||
if err != nil {
|
||||
_ = seg.Close()
|
||||
return nil, fmt.Errorf("error reading stat bytes: %v", err)
|
||||
}
|
||||
rv.stats = stats
|
||||
rv.stats = &fieldStats{statMap: statsMap}
|
||||
}
|
||||
updatedFieldBytes, err := segmentBucket.Get(util.BoltUpdatedFieldsKey, reader)
|
||||
if err != nil {
|
||||
_ = seg.Close()
|
||||
return nil, fmt.Errorf("error getting updated field bytes: %v", err)
|
||||
}
|
||||
updatedFieldBytes := segmentBucket.Get(util.BoltUpdatedFieldsKey)
|
||||
if updatedFieldBytes != nil {
|
||||
var updatedFields map[string]*index.UpdateFieldInfo
|
||||
|
||||
err := json.Unmarshal(updatedFieldBytes, &updatedFields)
|
||||
err = json.Unmarshal(updatedFieldBytes, &updatedFields)
|
||||
if err != nil {
|
||||
_ = seg.Close()
|
||||
return nil, fmt.Errorf("error reading updated field bytes: %v", err)
|
||||
@@ -1068,6 +1165,152 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
// identify all the file callback writer ids that are in use by boltdb
|
||||
func (s *Scorch) boltFileWriterIDsInUse() (map[string]struct{}, error) {
|
||||
idMap := make(map[string]struct{})
|
||||
err := s.rootBolt.View(func(tx *util.BoltTxImpl) error {
|
||||
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil
|
||||
}
|
||||
c := snapshots.Cursor()
|
||||
for k, _ := c.First(); k != nil; k, _ = c.Next() {
|
||||
snapshot := snapshots.GetBucket(k)
|
||||
if snapshot == nil {
|
||||
continue
|
||||
}
|
||||
metaBucket := snapshot.GetBucket(util.BoltMetaDataKey)
|
||||
if metaBucket == nil {
|
||||
continue
|
||||
}
|
||||
id, err := metaBucket.Get(util.BoltMetaDataFileWriterIDKey, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
idMap[string(id)] = struct{}{}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return idMap, nil
|
||||
}
|
||||
|
||||
// remove all content in boltdb associated with the file callback
|
||||
// writer ids and process the data using the latest file writer
|
||||
func (s *Scorch) removeBoltFileWriterIDs(ids map[string]struct{}) error {
|
||||
filePath := s.path + string(os.PathSeparator) + "root.bolt"
|
||||
writer, err := util.NewFileWriter([]byte(filePath))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = s.rootBolt.Update(func(tx *util.BoltTxImpl) error {
|
||||
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil
|
||||
}
|
||||
c := snapshots.Cursor()
|
||||
for k, _ := c.First(); k != nil; k, _ = c.Next() {
|
||||
snapshot := snapshots.GetBucket(k)
|
||||
if snapshot == nil {
|
||||
continue
|
||||
}
|
||||
metaBucket := snapshot.GetBucket(util.BoltMetaDataKey)
|
||||
if metaBucket == nil {
|
||||
continue
|
||||
}
|
||||
fileWriterIDBytes, err := metaBucket.Get(util.BoltMetaDataFileWriterIDKey, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fileWriterID := string(fileWriterIDBytes)
|
||||
if _, ok := ids[fileWriterID]; ok {
|
||||
reader, err := util.NewFileReader(fileWriterID, []byte(filePath))
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to load correct reader: %v", err)
|
||||
}
|
||||
|
||||
cc := snapshot.Cursor()
|
||||
for kk, _ := cc.First(); kk != nil; kk, _ = cc.Next() {
|
||||
if kk[0] == util.BoltInternalKey[0] {
|
||||
internalBucket := snapshot.GetBucket(kk)
|
||||
if internalBucket == nil {
|
||||
continue
|
||||
}
|
||||
// process all of the internal values and replace them with new values
|
||||
internalBucketVals := make(map[string][]byte)
|
||||
err := internalBucket.ForEach(func(key []byte, val []byte) error {
|
||||
internalBucketVals[string(key)] = val
|
||||
return nil
|
||||
}, reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for key, val := range internalBucketVals {
|
||||
err = internalBucket.Put([]byte(key), val, writer)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else if kk[0] != util.BoltMetaDataKey[0] {
|
||||
segmentBucket := snapshot.GetBucket(kk)
|
||||
if segmentBucket == nil {
|
||||
continue
|
||||
}
|
||||
// process the updated field key
|
||||
updatedFieldBytes, err := segmentBucket.Get(util.BoltUpdatedFieldsKey, reader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting updated field bytes: %v", err)
|
||||
}
|
||||
if updatedFieldBytes != nil {
|
||||
err = segmentBucket.Put(util.BoltUpdatedFieldsKey, updatedFieldBytes, writer)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// process the deleted key
|
||||
deletedBytes, err := segmentBucket.Get(util.BoltDeletedKey, reader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting deleted bytes: %v", err)
|
||||
}
|
||||
if deletedBytes != nil {
|
||||
err = segmentBucket.Put(util.BoltDeletedKey, deletedBytes, writer)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// process the stats key
|
||||
statsBytes, err := segmentBucket.Get(util.BoltStatsKey, reader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting stats bytes: %v", err)
|
||||
}
|
||||
if statsBytes != nil {
|
||||
err = segmentBucket.Put(util.BoltStatsKey, statsBytes, writer)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
err = metaBucket.Put(util.BoltMetaDataFileWriterIDKey,
|
||||
[]byte(writer.Id()), writer)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Scorch) removeOldData() {
|
||||
removed, err := s.removeOldBoltSnapshots()
|
||||
if err != nil {
|
||||
@@ -1359,7 +1602,7 @@ func (s *Scorch) rootBoltSnapshotMetaData() ([]*snapshotMetaData, error) {
|
||||
// for eg for n = 3 the checkpoints preserved should be tc, tc - d, tc - 2d
|
||||
expirationDuration := time.Duration(s.numSnapshotsToKeep-1) * s.rollbackSamplingInterval
|
||||
|
||||
err := s.rootBolt.View(func(tx *bolt.Tx) error {
|
||||
err := s.rootBolt.View(func(tx *util.BoltTxImpl) error {
|
||||
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil
|
||||
@@ -1380,15 +1623,18 @@ func (s *Scorch) rootBoltSnapshotMetaData() ([]*snapshotMetaData, error) {
|
||||
continue
|
||||
}
|
||||
|
||||
snapshot := snapshots.Bucket(sk)
|
||||
snapshot := snapshots.GetBucket(sk)
|
||||
if snapshot == nil {
|
||||
continue
|
||||
}
|
||||
metaBucket := snapshot.Bucket(util.BoltMetaDataKey)
|
||||
metaBucket := snapshot.GetBucket(util.BoltMetaDataKey)
|
||||
if metaBucket == nil {
|
||||
continue
|
||||
}
|
||||
timeStampBytes := metaBucket.Get(util.BoltMetaDataTimeStamp)
|
||||
timeStampBytes, err := metaBucket.Get(util.BoltMetaDataTimeStamp, nil)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
var timeStamp time.Time
|
||||
err = timeStamp.UnmarshalText(timeStampBytes)
|
||||
if err != nil {
|
||||
@@ -1424,7 +1670,7 @@ func (s *Scorch) rootBoltSnapshotMetaData() ([]*snapshotMetaData, error) {
|
||||
|
||||
func (s *Scorch) RootBoltSnapshotEpochs() ([]uint64, error) {
|
||||
var rv []uint64
|
||||
err := s.rootBolt.View(func(tx *bolt.Tx) error {
|
||||
err := s.rootBolt.View(func(tx *util.BoltTxImpl) error {
|
||||
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil
|
||||
@@ -1445,14 +1691,14 @@ func (s *Scorch) RootBoltSnapshotEpochs() ([]uint64, error) {
|
||||
// Returns the *.zap file names that are listed in the rootBolt.
|
||||
func (s *Scorch) loadZapFileNames() (map[string]struct{}, error) {
|
||||
rv := map[string]struct{}{}
|
||||
err := s.rootBolt.View(func(tx *bolt.Tx) error {
|
||||
err := s.rootBolt.View(func(tx *util.BoltTxImpl) error {
|
||||
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil
|
||||
}
|
||||
sc := snapshots.Cursor()
|
||||
for sk, _ := sc.First(); sk != nil; sk, _ = sc.Next() {
|
||||
snapshot := snapshots.Bucket(sk)
|
||||
snapshot := snapshots.GetBucket(sk)
|
||||
if snapshot == nil {
|
||||
continue
|
||||
}
|
||||
@@ -1461,11 +1707,14 @@ func (s *Scorch) loadZapFileNames() (map[string]struct{}, error) {
|
||||
if segk[0] == util.BoltInternalKey[0] {
|
||||
continue
|
||||
}
|
||||
segmentBucket := snapshot.Bucket(segk)
|
||||
segmentBucket := snapshot.GetBucket(segk)
|
||||
if segmentBucket == nil {
|
||||
continue
|
||||
}
|
||||
pathBytes := segmentBucket.Get(util.BoltPathKey)
|
||||
pathBytes, err := segmentBucket.Get(util.BoltPathKey, nil)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if pathBytes == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
29
vendor/github.com/blevesearch/bleve/v2/index/scorch/rollback.go
generated
vendored
29
vendor/github.com/blevesearch/bleve/v2/index/scorch/rollback.go
generated
vendored
@@ -44,7 +44,7 @@ func RollbackPoints(path string) ([]*RollbackPoint, error) {
|
||||
rootBoltOpt := &bolt.Options{
|
||||
ReadOnly: true,
|
||||
}
|
||||
rootBolt, err := bolt.Open(rootBoltPath, 0600, rootBoltOpt)
|
||||
rootBolt, err := util.OpenBolt(rootBoltPath, 0600, rootBoltOpt)
|
||||
if err != nil || rootBolt == nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -78,27 +78,40 @@ func RollbackPoints(path string) ([]*RollbackPoint, error) {
|
||||
continue
|
||||
}
|
||||
|
||||
snapshot := snapshots.Bucket(k)
|
||||
snapshot := snapshots.GetBucket(k)
|
||||
if snapshot == nil {
|
||||
log.Printf("RollbackPoints:"+
|
||||
" snapshot key, but bucket missing %x, continuing", k)
|
||||
continue
|
||||
}
|
||||
|
||||
metaBucket := snapshot.GetBucket(util.BoltMetaDataKey)
|
||||
if metaBucket == nil {
|
||||
return nil, fmt.Errorf("meta-data bucket missing")
|
||||
}
|
||||
|
||||
fileWriterID, err := metaBucket.Get(util.BoltMetaDataFileWriterIDKey, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to load file writer ID: %v", err)
|
||||
}
|
||||
reader, err := util.NewFileReader(string(fileWriterID), []byte(rootBoltPath))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to load correct reader: %v", err)
|
||||
}
|
||||
|
||||
meta := map[string][]byte{}
|
||||
c2 := snapshot.Cursor()
|
||||
for j, _ := c2.First(); j != nil; j, _ = c2.Next() {
|
||||
if j[0] == util.BoltInternalKey[0] {
|
||||
internalBucket := snapshot.Bucket(j)
|
||||
internalBucket := snapshot.GetBucket(j)
|
||||
if internalBucket == nil {
|
||||
err = fmt.Errorf("internal bucket missing")
|
||||
break
|
||||
}
|
||||
err = internalBucket.ForEach(func(key []byte, val []byte) error {
|
||||
copiedVal := append([]byte(nil), val...)
|
||||
meta[string(key)] = copiedVal
|
||||
meta[string(key)] = val
|
||||
return nil
|
||||
})
|
||||
}, reader)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
@@ -136,7 +149,7 @@ func Rollback(path string, to *RollbackPoint) error {
|
||||
rootBoltOpt := &bolt.Options{
|
||||
ReadOnly: false,
|
||||
}
|
||||
rootBolt, err := bolt.Open(rootBoltPath, 0600, rootBoltOpt)
|
||||
rootBolt, err := util.OpenBolt(rootBoltPath, 0600, rootBoltOpt)
|
||||
if err != nil || rootBolt == nil {
|
||||
return err
|
||||
}
|
||||
@@ -151,7 +164,7 @@ func Rollback(path string, to *RollbackPoint) error {
|
||||
// including the target one.
|
||||
var found bool
|
||||
var eligibleEpochs []uint64
|
||||
err = rootBolt.View(func(tx *bolt.Tx) error {
|
||||
err = rootBolt.View(func(tx *util.BoltTxImpl) error {
|
||||
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil
|
||||
|
||||
446
vendor/github.com/blevesearch/bleve/v2/index/scorch/scorch.go
generated
vendored
446
vendor/github.com/blevesearch/bleve/v2/index/scorch/scorch.go
generated
vendored
@@ -15,15 +15,19 @@
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/RoaringBitmap/roaring/v2"
|
||||
"github.com/blevesearch/bleve/v2/index/scorch/mergeplan"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
"github.com/blevesearch/bleve/v2/util"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
@@ -45,6 +49,7 @@ type Scorch struct {
|
||||
readOnly bool
|
||||
version uint8
|
||||
config map[string]interface{}
|
||||
segmentConfig map[string]interface{}
|
||||
analysisQueue *index.AnalysisQueue
|
||||
path string
|
||||
|
||||
@@ -75,9 +80,11 @@ type Scorch struct {
|
||||
merges chan *segmentMerge
|
||||
introducerNotifier chan *epochWatcher
|
||||
persisterNotifier chan *epochWatcher
|
||||
rootBolt *bolt.DB
|
||||
rootBolt *util.RootBoltImpl
|
||||
asyncTasks sync.WaitGroup
|
||||
|
||||
trainer trainer
|
||||
|
||||
onEvent func(event Event) bool
|
||||
onAsyncError func(err error, path string)
|
||||
|
||||
@@ -88,6 +95,33 @@ type Scorch struct {
|
||||
spatialPlugin index.SpatialAnalyzerPlugin
|
||||
}
|
||||
|
||||
// trainer interface is used for training an index that has the concept
|
||||
// of "learning". Naturally, a vector index is one such thing that would
|
||||
// implement this interface. There can be multiple implementations of the
|
||||
// training itself even for the same index type.
|
||||
//
|
||||
// this component is not supposed to interact with the other master routines
|
||||
// of scorch and will be used only for training the index before the actual data
|
||||
// ingestion starts. The routine should also be released once the
|
||||
// training is marked as complete - which can be done using the BoltTrainCompleteKey
|
||||
// key and a bool value. However the struct is still maintained for the pointer to
|
||||
// the instance so that we can use in the later stages of the index lifecycle.
|
||||
type trainer interface {
|
||||
// ephemeral
|
||||
trainLoop()
|
||||
// for the training state and the ingestion of the samples
|
||||
train(batch *index.Batch) error
|
||||
|
||||
// to load the metadata from the bolt under the BoltTrainerKey
|
||||
loadTrainedData(*util.BoltBucketImpl) error
|
||||
// to fetch the internal data from the component
|
||||
getInternal(key []byte) ([]byte, error)
|
||||
|
||||
// trainer specific file transfer operations
|
||||
copyFileLOCKED(file string, d index.IndexDirectory) error
|
||||
updateBolt(snapshotsBucket *util.BoltBucketImpl, key []byte, value []byte) error
|
||||
}
|
||||
|
||||
type ScorchErrorType string
|
||||
|
||||
func (t ScorchErrorType) Error() string {
|
||||
@@ -154,6 +188,7 @@ func NewScorch(storeName string,
|
||||
forceMergeRequestCh: make(chan *mergerCtrl, 1),
|
||||
segPlugin: defaultSegmentPlugin,
|
||||
copyScheduled: map[string]int{},
|
||||
segmentConfig: make(map[string]interface{}),
|
||||
}
|
||||
|
||||
forcedSegmentType, forcedSegmentVersion, err := configForceSegmentTypeVersion(config)
|
||||
@@ -168,6 +203,11 @@ func NewScorch(storeName string,
|
||||
}
|
||||
}
|
||||
|
||||
segConfig, ok := config["segmentConfig"].(map[string]interface{})
|
||||
if ok {
|
||||
rv.segmentConfig = segConfig
|
||||
}
|
||||
|
||||
typ, ok := config["spatialPlugin"].(string)
|
||||
if ok {
|
||||
if err := rv.loadSpatialAnalyzerPlugin(typ); err != nil {
|
||||
@@ -205,6 +245,10 @@ func NewScorch(storeName string,
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if trainer := initTrainer(rv, config); trainer != nil {
|
||||
rv.trainer = trainer
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
@@ -259,6 +303,11 @@ func (s *Scorch) Open() error {
|
||||
s.asyncTasks.Add(1)
|
||||
go s.introducerLoop()
|
||||
|
||||
if s.trainer != nil {
|
||||
s.asyncTasks.Add(1)
|
||||
go s.trainer.trainLoop()
|
||||
}
|
||||
|
||||
if !s.readOnly && s.path != "" {
|
||||
s.asyncTasks.Add(1)
|
||||
go s.persisterLoop()
|
||||
@@ -312,7 +361,7 @@ func (s *Scorch) openBolt() error {
|
||||
rootBoltPath := s.path + string(os.PathSeparator) + "root.bolt"
|
||||
var err error
|
||||
if s.path != "" {
|
||||
s.rootBolt, err = bolt.Open(rootBoltPath, 0o600, &rootBoltOpt)
|
||||
s.rootBolt, err = util.OpenBolt(rootBoltPath, 0o600, &rootBoltOpt)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -424,6 +473,24 @@ func (s *Scorch) Delete(id string) error {
|
||||
return s.Batch(b)
|
||||
}
|
||||
|
||||
func (s *Scorch) isTrained(batch *index.Batch) (bool, error) {
|
||||
trained := true
|
||||
if len(batch.IndexOps) > 0 && s.trainer != nil {
|
||||
val, err := s.getInternal(util.BoltTrainCompleteKey)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
if val != nil {
|
||||
trained, err = strconv.ParseBool(string(val))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
}
|
||||
}
|
||||
return trained, nil
|
||||
}
|
||||
|
||||
// Batch applices a batch of changes to the index atomically
|
||||
func (s *Scorch) Batch(batch *index.Batch) (err error) {
|
||||
start := time.Now()
|
||||
@@ -434,6 +501,15 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
|
||||
s.fireEvent(EventKindBatchIntroduction, time.Since(start))
|
||||
}()
|
||||
|
||||
trained, err := s.isTrained(batch)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !trained {
|
||||
return fmt.Errorf("index is not trained yet")
|
||||
}
|
||||
|
||||
resultChan := make(chan index.Document, len(batch.IndexOps))
|
||||
|
||||
var numUpdates uint64
|
||||
@@ -497,7 +573,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
|
||||
stats := newFieldStats()
|
||||
|
||||
if len(analysisResults) > 0 {
|
||||
newSegment, bufBytes, err = s.segPlugin.New(analysisResults)
|
||||
newSegment, bufBytes, err = s.segPlugin.NewUsing(analysisResults, s.segmentConfig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -532,6 +608,29 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *Scorch) getInternal(key []byte) ([]byte, error) {
|
||||
s.rootLock.RLock()
|
||||
defer s.rootLock.RUnlock()
|
||||
|
||||
switch string(key) {
|
||||
case string(util.BoltTrainCompleteKey):
|
||||
if s.trainer != nil {
|
||||
return s.trainer.getInternal(key)
|
||||
} else {
|
||||
return nil, fmt.Errorf("get on BoltTrainCompleteKey is not supported" +
|
||||
" with this build")
|
||||
}
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (s *Scorch) Train(batch *index.Batch) error {
|
||||
if s.trainer != nil {
|
||||
return s.trainer.train(batch)
|
||||
}
|
||||
return fmt.Errorf("training is not supported with this build")
|
||||
}
|
||||
|
||||
func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
|
||||
internalOps map[string][]byte, persistedCallback index.BatchCallback, stats *fieldStats,
|
||||
) error {
|
||||
@@ -741,6 +840,20 @@ func (s *Scorch) StatsMap() map[string]interface{} {
|
||||
m["field:"+fieldName+":"+statName] = val
|
||||
}
|
||||
}
|
||||
|
||||
aggVectorStats := newFieldStats()
|
||||
for _, segmentSnapshot := range indexSnapshot.Segments() {
|
||||
if vsr, ok := segmentSnapshot.Segment().(segment.VectorFieldStatsReporter); ok {
|
||||
segStats := newFieldStats()
|
||||
vsr.UpdateVectorFieldStats(segStats)
|
||||
aggVectorStats.Aggregate(segStats)
|
||||
}
|
||||
}
|
||||
for statName, stats := range aggVectorStats.Fetch() {
|
||||
for fieldName, val := range stats {
|
||||
m["field:"+fieldName+":"+statName] = val
|
||||
}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
@@ -799,6 +912,12 @@ func analyze(d index.Document, fn customAnalyzerPluginInitFunc) {
|
||||
}
|
||||
}
|
||||
})
|
||||
if nd, ok := d.(index.NestedDocument); ok {
|
||||
nd.VisitNestedDocuments(func(doc index.Document) {
|
||||
doc.AddIDField()
|
||||
analyze(doc, fn)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scorch) AddEligibleForRemoval(epoch uint64) {
|
||||
@@ -971,6 +1090,65 @@ func (s *Scorch) CopyReader() index.CopyReader {
|
||||
return rv
|
||||
}
|
||||
|
||||
func (s *Scorch) SetPathInBolt(key []byte, value []byte) error {
|
||||
tx, err := s.rootBolt.Begin(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
_ = tx.Rollback()
|
||||
}
|
||||
}()
|
||||
|
||||
snapshotsBucket, err := tx.CreateBucketIfNotExists(util.BoltSnapshotsBucket)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// currently this is specific to trained index file update
|
||||
err = s.trainer.updateBolt(snapshotsBucket, key, value)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = tx.Commit()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return s.rootBolt.Sync()
|
||||
}
|
||||
|
||||
// CopyFile copies a specific file to a destination directory which has an access to a bleve index
|
||||
// doing a io.Copy() isn't enough because the file needs to be tracked in bolt file as well
|
||||
func (s *Scorch) CopyFile(file string, d index.IndexDirectory) error {
|
||||
s.rootLock.Lock()
|
||||
defer s.rootLock.Unlock()
|
||||
|
||||
dest, err := d.GetWriter(filepath.Join("store", file))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
source, err := os.Open(filepath.Join(s.path, file))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
defer source.Close()
|
||||
defer dest.Close()
|
||||
_, err = io.Copy(dest, source)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// this code is currently specific to copying trained data but is future proofed for other files
|
||||
// to be updated in the dest's bolt
|
||||
err = s.trainer.copyFileLOCKED(file, d)
|
||||
return err
|
||||
}
|
||||
|
||||
// external API to fire a scorch event (EventKindIndexStart) externally from bleve
|
||||
func (s *Scorch) FireIndexEvent() {
|
||||
s.fireEvent(EventKindIndexStart, 0)
|
||||
@@ -1002,7 +1180,8 @@ func (s *Scorch) OpenMeta() error {
|
||||
|
||||
// Merge and update deleted field info and rewrite index mapping
|
||||
func (s *Scorch) updateBolt(fieldInfo map[string]*index.UpdateFieldInfo, mappingBytes []byte) error {
|
||||
return s.rootBolt.Update(func(tx *bolt.Tx) error {
|
||||
filePath := s.path + string(os.PathSeparator) + "root.bolt"
|
||||
return s.rootBolt.Update(func(tx *util.BoltTxImpl) error {
|
||||
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil
|
||||
@@ -1015,27 +1194,69 @@ func (s *Scorch) updateBolt(fieldInfo map[string]*index.UpdateFieldInfo, mapping
|
||||
fmt.Printf("unable to parse segment epoch %x, continuing", k)
|
||||
continue
|
||||
}
|
||||
snapshot := snapshots.Bucket(k)
|
||||
snapshot := snapshots.GetBucket(k)
|
||||
metaBucket := snapshot.GetBucket(util.BoltMetaDataKey)
|
||||
if metaBucket == nil {
|
||||
return fmt.Errorf("meta-data bucket missing")
|
||||
}
|
||||
|
||||
writer, err := util.NewFileWriter([]byte(filePath))
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to load correct writer: %v", err)
|
||||
}
|
||||
|
||||
fileWriterID, err := metaBucket.Get(util.BoltMetaDataFileWriterIDKey, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to get file writer id: %v", err)
|
||||
}
|
||||
if fileWriterID == nil {
|
||||
return fmt.Errorf("file writer id missing in meta data")
|
||||
}
|
||||
reader, err := util.NewFileReader(string(fileWriterID), []byte(filePath))
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to load correct reader: %v", err)
|
||||
}
|
||||
|
||||
err = metaBucket.Put(util.BoltMetaDataFileWriterIDKey, []byte(writer.Id()), writer)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cc := snapshot.Cursor()
|
||||
for kk, _ := cc.First(); kk != nil; kk, _ = cc.Next() {
|
||||
if kk[0] == util.BoltInternalKey[0] {
|
||||
internalBucket := snapshot.Bucket(kk)
|
||||
internalBucket := snapshot.GetBucket(kk)
|
||||
if internalBucket == nil {
|
||||
return fmt.Errorf("segment key, but bucket missing %x", kk)
|
||||
}
|
||||
err = internalBucket.Put(util.MappingInternalKey, mappingBytes)
|
||||
|
||||
internalVals := make(map[string][]byte)
|
||||
err := internalBucket.ForEach(func(key []byte, val []byte) error {
|
||||
internalVals[string(key)] = val
|
||||
return nil
|
||||
}, reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for key, val := range internalVals {
|
||||
err = internalBucket.Put([]byte(key), val, writer)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else if kk[0] != util.BoltMetaDataKey[0] {
|
||||
segmentBucket := snapshot.Bucket(kk)
|
||||
segmentBucket := snapshot.GetBucket(kk)
|
||||
if segmentBucket == nil {
|
||||
return fmt.Errorf("segment key, but bucket missing %x", kk)
|
||||
}
|
||||
var updatedFields map[string]*index.UpdateFieldInfo
|
||||
updatedFieldBytes := segmentBucket.Get(util.BoltUpdatedFieldsKey)
|
||||
updatedFieldBytes, err := segmentBucket.Get(util.BoltUpdatedFieldsKey, reader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting updated field bytes: %v", err)
|
||||
}
|
||||
if updatedFieldBytes != nil {
|
||||
err := json.Unmarshal(updatedFieldBytes, &updatedFields)
|
||||
err = json.Unmarshal(updatedFieldBytes, &updatedFields)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading updated field bytes: %v", err)
|
||||
}
|
||||
@@ -1054,17 +1275,218 @@ func (s *Scorch) updateBolt(fieldInfo map[string]*index.UpdateFieldInfo, mapping
|
||||
} else {
|
||||
updatedFields = fieldInfo
|
||||
}
|
||||
b, err := json.Marshal(updatedFields)
|
||||
buf, err := json.Marshal(updatedFields)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = segmentBucket.Put(util.BoltUpdatedFieldsKey, b)
|
||||
err = segmentBucket.Put(util.BoltUpdatedFieldsKey, buf, writer)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
deletedBytes, err := segmentBucket.Get(util.BoltDeletedKey, reader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting deleted bytes: %v", err)
|
||||
}
|
||||
if deletedBytes != nil {
|
||||
err = segmentBucket.Put(util.BoltDeletedKey, deletedBytes, writer)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
statBytes, err := segmentBucket.Get(util.BoltStatsKey, reader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting stats bytes: %v", err)
|
||||
}
|
||||
if statBytes != nil {
|
||||
err = segmentBucket.Put(util.BoltStatsKey, statBytes, writer)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
// returns the set of file callback writer ids in use by all of the segments and boltdb
|
||||
func (s *Scorch) FileWriterIDsInUse() (map[string]struct{}, error) {
|
||||
s.rootLock.RLock()
|
||||
keyMap := make(map[string]struct{})
|
||||
for _, segmentSnapShot := range s.root.segment {
|
||||
if seg, ok := segmentSnapShot.segment.(segment.SegmentWithCallbacks); ok {
|
||||
keyMap[seg.CallbackId()] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
boltKeys, err := s.boltFileWriterIDsInUse()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for k, _ := range boltKeys {
|
||||
keyMap[k] = struct{}{}
|
||||
}
|
||||
s.rootLock.RUnlock()
|
||||
|
||||
return keyMap, nil
|
||||
}
|
||||
|
||||
// removes all file callback writer ids in use from all of the segments and boltdb
|
||||
// boltdb is updated with the latest callback writer while segments are force
|
||||
// merged blockingly until snapshot is persisted with the latest callback writer
|
||||
func (s *Scorch) DropFileWriterIDs(ids map[string]struct{}) error {
|
||||
err := s.removeBoltFileWriterIDs(ids)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
s.rootLock.Lock()
|
||||
// create a done channel to ensure success of merge
|
||||
ctx := context.Background()
|
||||
doneCh := make(chan error)
|
||||
ctx = context.WithValue(ctx, mergeDoneKey, doneCh)
|
||||
|
||||
// PARTIAL ROLLBACK WILL NOT BE SUPPORTED DURING THIS OPERATION
|
||||
// this is done because all of the rollback snapshots
|
||||
// are likely to have the same sequence numbers and
|
||||
// morever, it is not functionally correct to hold
|
||||
// data with writer ids that have been removed
|
||||
prevNumSnapshotsToKeep := s.numSnapshotsToKeep
|
||||
s.numSnapshotsToKeep = 1
|
||||
|
||||
// track the zapx files that are expected to be removed after
|
||||
// the merge so that we can block until they are removed by the persister
|
||||
filePaths := make([]string, 0)
|
||||
|
||||
var mergePlanner mergePlanFunc = func(ourSnapshot *IndexSnapshot) (*mergeplan.MergePlan, error) {
|
||||
// Create a merge plan with the filtered segments and force a merge
|
||||
// to remove the callback from the segments.
|
||||
mergePlannerOptions, err := s.parseMergePlannerOptions()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("mergePlannerOption json parsing err: %v", err)
|
||||
}
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlan, 1)
|
||||
|
||||
// filter all segments that have callbacks that need to be removed
|
||||
// and add them to the list of segments to compact
|
||||
segsToCompact := make([]mergeplan.Segment, 0)
|
||||
for _, ss := range ourSnapshot.segment {
|
||||
// only persisted segments needs to be checked
|
||||
if _, ok := ss.segment.(segment.PersistedSegment); ok {
|
||||
if segWithCallbacks, ok := ss.segment.(segment.SegmentWithCallbacks); ok {
|
||||
if _, ok := ids[segWithCallbacks.CallbackId()]; ok {
|
||||
segsToCompact = append(segsToCompact, ss)
|
||||
filePaths = append(filePaths, zapFileName(ss.id))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// attempt a merge plan with the default merge planner options
|
||||
mergePlan, err := mergeplan.Plan(segsToCompact, mergePlannerOptions)
|
||||
if err != nil {
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanErr, 1)
|
||||
return nil, fmt.Errorf("merge plan creation err: %v", err)
|
||||
}
|
||||
|
||||
// create a map to track segments included in the default merge plan
|
||||
segDictionary := make(map[uint64]bool)
|
||||
for _, seg := range segsToCompact {
|
||||
segDictionary[seg.Id()] = true
|
||||
}
|
||||
|
||||
// create a merge plan if the default merge planner is unable
|
||||
// to create one with the given segments
|
||||
if mergePlan == nil {
|
||||
mergePlan = &mergeplan.MergePlan{
|
||||
Tasks: make([]*mergeplan.MergeTask, 0),
|
||||
}
|
||||
}
|
||||
|
||||
// mark all segments included in the default merge plan
|
||||
for _, task := range mergePlan.Tasks {
|
||||
for _, seg := range task.Segments {
|
||||
segDictionary[seg.Id()] = false
|
||||
}
|
||||
}
|
||||
|
||||
// Create additional merge tasks for segments that are unable to be merged
|
||||
for _, seg := range segsToCompact {
|
||||
if segDictionary[seg.Id()] {
|
||||
mergePlan.Tasks = append(mergePlan.Tasks, &mergeplan.MergeTask{
|
||||
Segments: []mergeplan.Segment{seg},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return mergePlan, nil
|
||||
}
|
||||
|
||||
// set the merge plan func in the context for the merger to use when it receives the merge request
|
||||
// this is to ensure that the merge request is triggered with the latest snapshot, thus avoiding
|
||||
// any races
|
||||
ctx = context.WithValue(ctx, mergePlanFuncKey, mergePlanner)
|
||||
|
||||
// trigger the merge with the force merge plan
|
||||
s.forceMergeRequestCh <- &mergerCtrl{
|
||||
ctx: ctx,
|
||||
}
|
||||
s.rootLock.Unlock()
|
||||
|
||||
// blockingly wait for merge to complete
|
||||
err = <-doneCh
|
||||
close(doneCh)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// wait for files to be cleaned up by persister
|
||||
err = s.waitTillFileCleanup(filePaths)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// reset rollback snapshot retention
|
||||
s.rootLock.Lock()
|
||||
s.numSnapshotsToKeep = prevNumSnapshotsToKeep
|
||||
s.rootLock.Unlock()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// waitTillFileCleanup blocks until the given files are cleaned up by the persister or merger or
|
||||
// returns an error after a timeout. It does so by checking the index directory every 5 seconds
|
||||
// for the presence of the given files, and returns once they are no longer present.
|
||||
func (s *Scorch) waitTillFileCleanup(filePaths []string) error {
|
||||
ticker := time.NewTicker(5 * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
timeout := time.After(5 * time.Minute)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
files, err := os.ReadDir(s.path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, f := range files {
|
||||
fname := f.Name()
|
||||
if filepath.Ext(fname) == ".zap" {
|
||||
for _, filePath := range filePaths {
|
||||
if fname == filePath {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
case <-timeout:
|
||||
return fmt.Errorf("timeout waiting for file cleanup for files: %v", filePaths)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
12
vendor/github.com/blevesearch/bleve/v2/index/scorch/segment_plugin.go
generated
vendored
12
vendor/github.com/blevesearch/bleve/v2/index/scorch/segment_plugin.go
generated
vendored
@@ -28,6 +28,7 @@ import (
|
||||
zapv14 "github.com/blevesearch/zapx/v14"
|
||||
zapv15 "github.com/blevesearch/zapx/v15"
|
||||
zapv16 "github.com/blevesearch/zapx/v16"
|
||||
zapv17 "github.com/blevesearch/zapx/v17"
|
||||
)
|
||||
|
||||
// SegmentPlugin represents the essential functions required by a package to plug in
|
||||
@@ -45,10 +46,14 @@ type SegmentPlugin interface {
|
||||
// New takes a set of Documents and turns them into a new Segment
|
||||
New(results []index.Document) (segment.Segment, uint64, error)
|
||||
|
||||
NewUsing(results []index.Document, config map[string]interface{}) (segment.Segment, uint64, error)
|
||||
|
||||
// Open attempts to open the file at the specified path and
|
||||
// return the corresponding Segment
|
||||
Open(path string) (segment.Segment, error)
|
||||
|
||||
OpenUsing(path string, config map[string]interface{}) (segment.Segment, error)
|
||||
|
||||
// Merge takes a set of Segments, and creates a new segment on disk at
|
||||
// the specified path.
|
||||
// Drops is a set of bitmaps (one for each segment) indicating which
|
||||
@@ -66,6 +71,10 @@ type SegmentPlugin interface {
|
||||
Merge(segments []segment.Segment, drops []*roaring.Bitmap, path string,
|
||||
closeCh chan struct{}, s segment.StatsReporter) (
|
||||
[][]uint64, uint64, error)
|
||||
|
||||
MergeUsing(segments []segment.Segment, drops []*roaring.Bitmap, path string,
|
||||
closeCh chan struct{}, s segment.StatsReporter, config map[string]interface{}) (
|
||||
[][]uint64, uint64, error)
|
||||
}
|
||||
|
||||
var supportedSegmentPlugins map[string]map[uint32]SegmentPlugin
|
||||
@@ -73,7 +82,8 @@ var defaultSegmentPlugin SegmentPlugin
|
||||
|
||||
func init() {
|
||||
ResetSegmentPlugins()
|
||||
RegisterSegmentPlugin(&zapv16.ZapPlugin{}, true)
|
||||
RegisterSegmentPlugin(&zapv17.ZapPlugin{}, true)
|
||||
RegisterSegmentPlugin(&zapv16.ZapPlugin{}, false)
|
||||
RegisterSegmentPlugin(&zapv15.ZapPlugin{}, false)
|
||||
RegisterSegmentPlugin(&zapv14.ZapPlugin{}, false)
|
||||
RegisterSegmentPlugin(&zapv13.ZapPlugin{}, false)
|
||||
|
||||
99
vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index.go
generated
vendored
99
vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index.go
generated
vendored
@@ -17,7 +17,6 @@ package scorch
|
||||
import (
|
||||
"container/heap"
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
@@ -28,11 +27,11 @@ import (
|
||||
|
||||
"github.com/RoaringBitmap/roaring/v2"
|
||||
"github.com/blevesearch/bleve/v2/document"
|
||||
"github.com/blevesearch/bleve/v2/util"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment "github.com/blevesearch/scorch_segment_api/v2"
|
||||
"github.com/blevesearch/vellum"
|
||||
lev "github.com/blevesearch/vellum/levenshtein"
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
// re usable, threadsafe levenshtein builders
|
||||
@@ -42,9 +41,8 @@ type asynchSegmentResult struct {
|
||||
dict segment.TermDictionary
|
||||
dictItr segment.DictionaryIterator
|
||||
|
||||
cardinality int
|
||||
index int
|
||||
docs *roaring.Bitmap
|
||||
index int
|
||||
docs *roaring.Bitmap
|
||||
|
||||
thesItr segment.ThesaurusIterator
|
||||
|
||||
@@ -59,11 +57,11 @@ func init() {
|
||||
var err error
|
||||
lb1, err = lev.NewLevenshteinAutomatonBuilder(1, true)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("Levenshtein automaton ed1 builder err: %v", err))
|
||||
panic(fmt.Errorf("levenshtein automaton ed1 builder err: %v", err))
|
||||
}
|
||||
lb2, err = lev.NewLevenshteinAutomatonBuilder(2, true)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("Levenshtein automaton ed2 builder err: %v", err))
|
||||
panic(fmt.Errorf("levenshtein automaton ed2 builder err: %v", err))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -91,6 +89,8 @@ type IndexSnapshot struct {
|
||||
// UpdateFieldInfo.Index or .Store or .DocValues).
|
||||
// Used to short circuit queries trying to read stale data
|
||||
updatedFields map[string]*index.UpdateFieldInfo
|
||||
|
||||
fileWriterID string // the file callback writer id associated with this snapshot
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) Segments() []*SegmentSnapshot {
|
||||
@@ -468,13 +468,17 @@ func (is *IndexSnapshot) Fields() ([]string, error) {
|
||||
}
|
||||
|
||||
func (is *IndexSnapshot) GetInternal(key []byte) ([]byte, error) {
|
||||
_, ok := is.internal[string(key)]
|
||||
if !ok {
|
||||
return is.parent.getInternal(key)
|
||||
}
|
||||
return is.internal[string(key)], nil
|
||||
}
|
||||
|
||||
func (is *IndexSnapshot) DocCount() (uint64, error) {
|
||||
var rv uint64
|
||||
for _, segment := range is.segment {
|
||||
rv += segment.Count()
|
||||
rv += segment.CountRoot()
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
@@ -501,7 +505,7 @@ func (is *IndexSnapshot) Document(id string) (rv index.Document, err error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
docNum, err := docInternalToNumber(next.ID)
|
||||
docNum, err := next.ID.Value()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -571,7 +575,7 @@ func (is *IndexSnapshot) segmentIndexAndLocalDocNumFromGlobal(docNum uint64) (in
|
||||
}
|
||||
|
||||
func (is *IndexSnapshot) ExternalID(id index.IndexInternalID) (string, error) {
|
||||
docNum, err := docInternalToNumber(id)
|
||||
docNum, err := id.Value()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
@@ -589,7 +593,7 @@ func (is *IndexSnapshot) ExternalID(id index.IndexInternalID) (string, error) {
|
||||
}
|
||||
|
||||
func (is *IndexSnapshot) segmentIndexAndLocalDocNum(id index.IndexInternalID) (int, uint64, error) {
|
||||
docNum, err := docInternalToNumber(id)
|
||||
docNum, err := id.Value()
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
@@ -700,6 +704,8 @@ func (is *IndexSnapshot) TermFieldReader(ctx context.Context, term []byte, field
|
||||
rv.incrementBytesRead(bytesRead - prevBytesReadItr)
|
||||
}
|
||||
}
|
||||
// ONLY update the bytes read value beyond this point for this TFR if scoring is enabled
|
||||
rv.updateBytesRead = rv.includeFreq || rv.includeNorm || rv.includeTermVectors
|
||||
atomic.AddUint64(&is.parent.stats.TotTermSearchersStarted, uint64(1))
|
||||
return rv, nil
|
||||
}
|
||||
@@ -776,25 +782,6 @@ func (is *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReade
|
||||
is.m2.Unlock()
|
||||
}
|
||||
|
||||
func docNumberToBytes(buf []byte, in uint64) []byte {
|
||||
if len(buf) != 8 {
|
||||
if cap(buf) >= 8 {
|
||||
buf = buf[0:8]
|
||||
} else {
|
||||
buf = make([]byte, 8)
|
||||
}
|
||||
}
|
||||
binary.BigEndian.PutUint64(buf, in)
|
||||
return buf
|
||||
}
|
||||
|
||||
func docInternalToNumber(in index.IndexInternalID) (uint64, error) {
|
||||
if len(in) != 8 {
|
||||
return 0, fmt.Errorf("wrong len for IndexInternalID: %q", in)
|
||||
}
|
||||
return binary.BigEndian.Uint64(in), nil
|
||||
}
|
||||
|
||||
func (is *IndexSnapshot) documentVisitFieldTermsOnSegment(
|
||||
segmentIndex int, localDocNum uint64, fields []string, cFields []string,
|
||||
visitor index.DocValueVisitor, dvs segment.DocVisitState) (
|
||||
@@ -826,8 +813,10 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment(
|
||||
return filteredFields
|
||||
}
|
||||
|
||||
fieldsFiltered := filterUpdatedFields(fields)
|
||||
vFieldsFiltered := filterUpdatedFields(vFields)
|
||||
if len(is.updatedFields) > 0 {
|
||||
fields = filterUpdatedFields(fields)
|
||||
vFields = filterUpdatedFields(vFields)
|
||||
}
|
||||
|
||||
var errCh chan error
|
||||
|
||||
@@ -836,9 +825,9 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment(
|
||||
// if the caller happens to know we're on the same segmentIndex
|
||||
// from a previous invocation
|
||||
if cFields == nil {
|
||||
cFields = subtractStrings(fieldsFiltered, vFieldsFiltered)
|
||||
cFields = subtractStrings(fields, vFields)
|
||||
|
||||
if !ss.cachedDocs.hasFields(cFields) {
|
||||
if len(cFields) > 0 && !ss.cachedDocs.hasFields(cFields) {
|
||||
errCh = make(chan error, 1)
|
||||
|
||||
go func() {
|
||||
@@ -851,8 +840,8 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment(
|
||||
}
|
||||
}
|
||||
|
||||
if ssvOk && ssv != nil && len(vFieldsFiltered) > 0 {
|
||||
dvs, err = ssv.VisitDocValues(localDocNum, fieldsFiltered, visitor, dvs)
|
||||
if ssvOk && ssv != nil && len(vFields) > 0 {
|
||||
dvs, err = ssv.VisitDocValues(localDocNum, fields, visitor, dvs)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
@@ -897,7 +886,7 @@ func (dvr *DocValueReader) BytesRead() uint64 {
|
||||
func (dvr *DocValueReader) VisitDocValues(id index.IndexInternalID,
|
||||
visitor index.DocValueVisitor,
|
||||
) (err error) {
|
||||
docNum, err := docInternalToNumber(id)
|
||||
docNum, err := id.Value()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -980,17 +969,15 @@ func subtractStrings(a, b []string) []string {
|
||||
return a
|
||||
}
|
||||
|
||||
// Create a map for O(1) lookups
|
||||
bMap := make(map[string]struct{}, len(b))
|
||||
for _, bs := range b {
|
||||
bMap[bs] = struct{}{}
|
||||
}
|
||||
|
||||
rv := make([]string, 0, len(a))
|
||||
OUTER:
|
||||
for _, as := range a {
|
||||
if _, exists := bMap[as]; !exists {
|
||||
rv = append(rv, as)
|
||||
for _, bs := range b {
|
||||
if as == bs {
|
||||
continue OUTER
|
||||
}
|
||||
}
|
||||
rv = append(rv, as)
|
||||
}
|
||||
return rv
|
||||
}
|
||||
@@ -1006,7 +993,7 @@ func (is *IndexSnapshot) CopyTo(d index.Directory) error {
|
||||
return fmt.Errorf("invalid root.bolt file found")
|
||||
}
|
||||
|
||||
copyBolt, err := bolt.Open(rootFile.Name(), 0o600, nil)
|
||||
copyBolt, err := util.OpenBolt(rootFile.Name(), 0o600, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -1297,3 +1284,23 @@ func (is *IndexSnapshot) TermFrequencies(field string, limit int, descending boo
|
||||
|
||||
return termFreqs[:limit], nil
|
||||
}
|
||||
|
||||
// Ancestors returns the ancestor IDs for the given document ID. The prealloc
|
||||
// slice can be provided to avoid allocations downstream, and MUST be empty.
|
||||
func (i *IndexSnapshot) Ancestors(ID index.IndexInternalID, prealloc []index.AncestorID) ([]index.AncestorID, error) {
|
||||
// get segment and local doc num for the ID
|
||||
seg, ldoc, err := i.segmentIndexAndLocalDocNum(ID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// get ancestors from the segment
|
||||
prealloc = i.segment[seg].Ancestors(ldoc, prealloc)
|
||||
// get global offset for the segment (correcting factor for multi-segment indexes)
|
||||
globalOffset := i.offsets[seg]
|
||||
// adjust ancestors to global doc numbers, not local to segment
|
||||
for idx := range prealloc {
|
||||
prealloc[idx] = prealloc[idx].Add(globalOffset)
|
||||
}
|
||||
// return adjusted ancestors
|
||||
return prealloc, nil
|
||||
}
|
||||
|
||||
5
vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index_doc.go
generated
vendored
5
vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index_doc.go
generated
vendored
@@ -15,7 +15,6 @@
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"reflect"
|
||||
|
||||
"github.com/RoaringBitmap/roaring/v2"
|
||||
@@ -49,7 +48,7 @@ func (i *IndexSnapshotDocIDReader) Next() (index.IndexInternalID, error) {
|
||||
next := i.iterators[i.segmentOffset].Next()
|
||||
// make segment number into global number by adding offset
|
||||
globalOffset := i.snapshot.offsets[i.segmentOffset]
|
||||
return docNumberToBytes(nil, uint64(next)+globalOffset), nil
|
||||
return index.NewIndexInternalID(nil, uint64(next)+globalOffset), nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
@@ -63,7 +62,7 @@ func (i *IndexSnapshotDocIDReader) Advance(ID index.IndexInternalID) (index.Inde
|
||||
if next == nil {
|
||||
return nil, nil
|
||||
}
|
||||
for bytes.Compare(next, ID) < 0 {
|
||||
for next.Compare(ID) < 0 {
|
||||
next, err = i.Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
||||
38
vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index_tfr.go
generated
vendored
38
vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index_tfr.go
generated
vendored
@@ -15,7 +15,6 @@
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"reflect"
|
||||
@@ -51,6 +50,10 @@ type IndexSnapshotTermFieldReader struct {
|
||||
bytesRead uint64
|
||||
ctx context.Context
|
||||
unadorned bool
|
||||
// flag to indicate whether to increment our bytesRead
|
||||
// value after creation of the TFR while iterating our postings
|
||||
// lists
|
||||
updateBytesRead bool
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotTermFieldReader) incrementBytesRead(val uint64) {
|
||||
@@ -83,10 +86,15 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in
|
||||
if rv == nil {
|
||||
rv = &index.TermFieldDoc{}
|
||||
}
|
||||
var prevBytesRead uint64
|
||||
// find the next hit
|
||||
for i.segmentOffset < len(i.iterators) {
|
||||
prevBytesRead := i.iterators[i.segmentOffset].BytesRead()
|
||||
next, err := i.iterators[i.segmentOffset].Next()
|
||||
// get our current postings iterator
|
||||
curItr := i.iterators[i.segmentOffset]
|
||||
if i.updateBytesRead {
|
||||
prevBytesRead = curItr.BytesRead()
|
||||
}
|
||||
next, err := curItr.Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -94,18 +102,20 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in
|
||||
// make segment number into global number by adding offset
|
||||
globalOffset := i.snapshot.offsets[i.segmentOffset]
|
||||
nnum := next.Number()
|
||||
rv.ID = docNumberToBytes(rv.ID, nnum+globalOffset)
|
||||
rv.ID = index.NewIndexInternalID(rv.ID, nnum+globalOffset)
|
||||
i.postingToTermFieldDoc(next, rv)
|
||||
|
||||
i.currID = rv.ID
|
||||
i.currPosting = next
|
||||
// postingsIterators is maintain the bytesRead stat in a cumulative fashion.
|
||||
// this is because there are chances of having a series of loadChunk calls,
|
||||
// and they have to be added together before sending the bytesRead at this point
|
||||
// upstream.
|
||||
bytesRead := i.iterators[i.segmentOffset].BytesRead()
|
||||
if bytesRead > prevBytesRead {
|
||||
i.incrementBytesRead(bytesRead - prevBytesRead)
|
||||
if i.updateBytesRead {
|
||||
// postingsIterators maintains the bytesRead stat in a cumulative fashion.
|
||||
// this is because there are chances of having a series of loadChunk calls,
|
||||
// and they have to be added together before sending the bytesRead at this point
|
||||
// upstream.
|
||||
bytesRead := curItr.BytesRead()
|
||||
if bytesRead > prevBytesRead {
|
||||
i.incrementBytesRead(bytesRead - prevBytesRead)
|
||||
}
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
@@ -146,7 +156,7 @@ func (i *IndexSnapshotTermFieldReader) postingToTermFieldDoc(next segment.Postin
|
||||
func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
|
||||
// FIXME do something better
|
||||
// for now, if we need to seek backwards, then restart from the beginning
|
||||
if i.currPosting != nil && bytes.Compare(i.currID, ID) >= 0 {
|
||||
if i.currPosting != nil && i.currID.Compare(ID) >= 0 {
|
||||
// Check if the TFR is a special unadorned composite optimization.
|
||||
// Such a TFR will NOT have a valid `term` or `field` set, making it
|
||||
// impossible for the TFR to replace itself with a new one.
|
||||
@@ -171,7 +181,7 @@ func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAllo
|
||||
}
|
||||
}
|
||||
}
|
||||
num, err := docInternalToNumber(ID)
|
||||
num, err := ID.Value()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error converting to doc number % x - %v", ID, err)
|
||||
}
|
||||
@@ -196,7 +206,7 @@ func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAllo
|
||||
if preAlloced == nil {
|
||||
preAlloced = &index.TermFieldDoc{}
|
||||
}
|
||||
preAlloced.ID = docNumberToBytes(preAlloced.ID, next.Number()+
|
||||
preAlloced.ID = index.NewIndexInternalID(preAlloced.ID, next.Number()+
|
||||
i.snapshot.offsets[segIndex])
|
||||
i.postingToTermFieldDoc(next, preAlloced)
|
||||
i.currID = preAlloced.ID
|
||||
|
||||
12
vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index_vr.go
generated
vendored
12
vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index_vr.go
generated
vendored
@@ -18,7 +18,6 @@
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
@@ -96,7 +95,7 @@ func (i *IndexSnapshotVectorReader) Next(preAlloced *index.VectorDoc) (
|
||||
// make segment number into global number by adding offset
|
||||
globalOffset := i.snapshot.offsets[i.segmentOffset]
|
||||
nnum := next.Number()
|
||||
rv.ID = docNumberToBytes(rv.ID, nnum+globalOffset)
|
||||
rv.ID = index.NewIndexInternalID(rv.ID, nnum+globalOffset)
|
||||
rv.Score = float64(next.Score())
|
||||
|
||||
i.currID = rv.ID
|
||||
@@ -113,7 +112,7 @@ func (i *IndexSnapshotVectorReader) Next(preAlloced *index.VectorDoc) (
|
||||
func (i *IndexSnapshotVectorReader) Advance(ID index.IndexInternalID,
|
||||
preAlloced *index.VectorDoc) (*index.VectorDoc, error) {
|
||||
|
||||
if i.currPosting != nil && bytes.Compare(i.currID, ID) >= 0 {
|
||||
if i.currPosting != nil && i.currID.Compare(ID) >= 0 {
|
||||
i2, err := i.snapshot.VectorReader(i.ctx, i.vector, i.field, i.k,
|
||||
i.searchParams, i.eligibleSelector)
|
||||
if err != nil {
|
||||
@@ -124,7 +123,7 @@ func (i *IndexSnapshotVectorReader) Advance(ID index.IndexInternalID,
|
||||
*i = *(i2.(*IndexSnapshotVectorReader))
|
||||
}
|
||||
|
||||
num, err := docInternalToNumber(ID)
|
||||
num, err := ID.Value()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error converting to doc number % x - %v", ID, err)
|
||||
}
|
||||
@@ -149,7 +148,7 @@ func (i *IndexSnapshotVectorReader) Advance(ID index.IndexInternalID,
|
||||
if preAlloced == nil {
|
||||
preAlloced = &index.VectorDoc{}
|
||||
}
|
||||
preAlloced.ID = docNumberToBytes(preAlloced.ID, next.Number()+
|
||||
preAlloced.ID = index.NewIndexInternalID(preAlloced.ID, next.Number()+
|
||||
i.snapshot.offsets[segIndex])
|
||||
i.currID = preAlloced.ID
|
||||
i.currPosting = next
|
||||
@@ -183,8 +182,7 @@ func (i *IndexSnapshot) CentroidCardinalities(field string, limit int, descendin
|
||||
|
||||
for _, segment := range i.segment {
|
||||
if sv, ok := segment.segment.(segment_api.VectorSegment); ok {
|
||||
vecIndex, err := sv.InterpretVectorIndex(field,
|
||||
false /* does not require filtering */, segment.deleted)
|
||||
vecIndex, err := sv.InterpretVectorIndex(field, segment.deleted)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to interpret vector index for field %s in segment: %v", field, err)
|
||||
}
|
||||
|
||||
68
vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_segment.go
generated
vendored
68
vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_segment.go
generated
vendored
@@ -26,21 +26,23 @@ import (
|
||||
segment "github.com/blevesearch/scorch_segment_api/v2"
|
||||
)
|
||||
|
||||
var TermSeparator byte = 0xff
|
||||
|
||||
var TermSeparatorSplitSlice = []byte{TermSeparator}
|
||||
|
||||
type SegmentSnapshot struct {
|
||||
// this flag is needed to identify whether this
|
||||
// segment was mmaped recently, in which case
|
||||
// we consider the loading cost of the metadata
|
||||
// as part of IO stats.
|
||||
mmaped uint32
|
||||
id uint64
|
||||
segment segment.Segment
|
||||
deleted *roaring.Bitmap
|
||||
creator string
|
||||
stats *fieldStats
|
||||
mmaped uint32
|
||||
id uint64
|
||||
segment segment.Segment
|
||||
deleted *roaring.Bitmap
|
||||
creator string
|
||||
stats *fieldStats
|
||||
|
||||
// if this segment is in-memory then we'll try to undo the internal values
|
||||
// in the indexSnapshot internal map before updating the bolt, since its
|
||||
// supposed to be reflective of the on-disk data.
|
||||
internal map[string][]byte
|
||||
|
||||
updatedFields map[string]*index.UpdateFieldInfo
|
||||
|
||||
cachedMeta *cachedMeta
|
||||
@@ -113,6 +115,19 @@ func (s *SegmentSnapshot) Count() uint64 {
|
||||
return rv
|
||||
}
|
||||
|
||||
// this counts the root documents in the segment this differs from Count() in that
|
||||
// Count() counts all live documents including nested children, whereas this method
|
||||
// counts only root live documents
|
||||
func (s *SegmentSnapshot) CountRoot() uint64 {
|
||||
var rv uint64
|
||||
if nsb, ok := s.segment.(segment.NestedSegment); ok {
|
||||
rv = nsb.CountRoot(s.deleted)
|
||||
} else {
|
||||
rv = s.Count()
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) DocNumbers(docIDs []string) (*roaring.Bitmap, error) {
|
||||
rv, err := s.segment.DocNumbers(docIDs)
|
||||
if err != nil {
|
||||
@@ -220,7 +235,7 @@ func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) {
|
||||
for err2 == nil && nextPosting != nil {
|
||||
docNum := nextPosting.Number()
|
||||
cfd.docs[docNum] = append(cfd.docs[docNum], []byte(next.Term)...)
|
||||
cfd.docs[docNum] = append(cfd.docs[docNum], TermSeparator)
|
||||
cfd.docs[docNum] = append(cfd.docs[docNum], index.DocValueTermSeparator)
|
||||
cfd.size += uint64(len(next.Term) + 1) // map value
|
||||
nextPosting, err2 = postingsItr.Next()
|
||||
}
|
||||
@@ -241,7 +256,7 @@ func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) {
|
||||
|
||||
type cachedDocs struct {
|
||||
size uint64
|
||||
m sync.Mutex // As the cache is asynchronously prepared, need a lock
|
||||
m sync.RWMutex // As the cache is asynchronously prepared, need a lock
|
||||
cache map[string]*cachedFieldDocs // Keyed by field
|
||||
}
|
||||
|
||||
@@ -283,14 +298,14 @@ func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) e
|
||||
|
||||
// hasFields returns true if the cache has all the given fields
|
||||
func (c *cachedDocs) hasFields(fields []string) bool {
|
||||
c.m.Lock()
|
||||
c.m.RLock()
|
||||
for _, field := range fields {
|
||||
if _, exists := c.cache[field]; !exists {
|
||||
c.m.Unlock()
|
||||
c.m.RUnlock()
|
||||
return false // found a field not in cache
|
||||
}
|
||||
}
|
||||
c.m.Unlock()
|
||||
c.m.RUnlock()
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -311,17 +326,17 @@ func (c *cachedDocs) updateSizeLOCKED() {
|
||||
|
||||
func (c *cachedDocs) visitDoc(localDocNum uint64,
|
||||
fields []string, visitor index.DocValueVisitor) {
|
||||
c.m.Lock()
|
||||
c.m.RLock()
|
||||
|
||||
for _, field := range fields {
|
||||
if cachedFieldDocs, exists := c.cache[field]; exists {
|
||||
c.m.Unlock()
|
||||
c.m.RUnlock()
|
||||
<-cachedFieldDocs.readyCh
|
||||
c.m.Lock()
|
||||
c.m.RLock()
|
||||
|
||||
if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists {
|
||||
for {
|
||||
i := bytes.Index(tlist, TermSeparatorSplitSlice)
|
||||
i := bytes.IndexByte(tlist, index.DocValueTermSeparator)
|
||||
if i < 0 {
|
||||
break
|
||||
}
|
||||
@@ -332,7 +347,7 @@ func (c *cachedDocs) visitDoc(localDocNum uint64,
|
||||
}
|
||||
}
|
||||
|
||||
c.m.Unlock()
|
||||
c.m.RUnlock()
|
||||
}
|
||||
|
||||
// the purpose of the cachedMeta is to simply allow the user of this type to record
|
||||
@@ -357,7 +372,18 @@ func (c *cachedMeta) updateMeta(field string, val interface{}) {
|
||||
|
||||
func (c *cachedMeta) fetchMeta(field string) (rv interface{}) {
|
||||
c.m.RLock()
|
||||
defer c.m.RUnlock()
|
||||
if c.meta == nil {
|
||||
return nil
|
||||
}
|
||||
rv = c.meta[field]
|
||||
c.m.RUnlock()
|
||||
return rv
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) Ancestors(docNum uint64, prealloc []index.AncestorID) []index.AncestorID {
|
||||
nsb, ok := s.segment.(segment.NestedSegment)
|
||||
if !ok {
|
||||
return append(prealloc, index.NewAncestorID(docNum))
|
||||
}
|
||||
return nsb.Ancestors(docNum, prealloc)
|
||||
}
|
||||
|
||||
85
vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_vector_index.go
generated
vendored
85
vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_vector_index.go
generated
vendored
@@ -22,6 +22,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/bits-and-blooms/bitset"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment_api "github.com/blevesearch/scorch_segment_api/v2"
|
||||
)
|
||||
@@ -45,17 +46,82 @@ func (is *IndexSnapshot) VectorReader(ctx context.Context, vector []float32,
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
// eligibleDocumentList represents the list of eligible documents within a segment.
|
||||
type eligibleDocumentList struct {
|
||||
bs *bitset.BitSet
|
||||
}
|
||||
|
||||
// Iterator returns an iterator for the eligible document IDs.
|
||||
func (edl *eligibleDocumentList) Iterator() index.EligibleDocumentIterator {
|
||||
if edl.bs == nil {
|
||||
// no eligible documents
|
||||
return emptyEligibleIterator
|
||||
}
|
||||
// return the iterator
|
||||
return &eligibleDocumentIterator{
|
||||
bs: edl.bs,
|
||||
}
|
||||
}
|
||||
|
||||
// Count returns the number of eligible document IDs.
|
||||
func (edl *eligibleDocumentList) Count() uint64 {
|
||||
if edl.bs == nil {
|
||||
return 0
|
||||
}
|
||||
return uint64(edl.bs.Count())
|
||||
}
|
||||
|
||||
// emptyEligibleDocumentList is a reusable empty eligible document list.
|
||||
var emptyEligibleDocumentList = &eligibleDocumentList{}
|
||||
|
||||
// eligibleDocumentIterator iterates over eligible document IDs within a segment.
|
||||
type eligibleDocumentIterator struct {
|
||||
bs *bitset.BitSet
|
||||
current uint
|
||||
}
|
||||
|
||||
// Next returns the next eligible document ID and whether it exists.
|
||||
func (it *eligibleDocumentIterator) Next() (id uint64, ok bool) {
|
||||
next, found := it.bs.NextSet(it.current)
|
||||
if !found {
|
||||
return 0, false
|
||||
}
|
||||
it.current = next + 1
|
||||
return uint64(next), true
|
||||
}
|
||||
|
||||
// emptyEligibleIterator is a reusable empty eligible document iterator.
|
||||
var emptyEligibleIterator = &emptyEligibleDocumentIterator{}
|
||||
|
||||
// emptyEligibleDocumentIterator is an iterator that always returns no documents.
|
||||
type emptyEligibleDocumentIterator struct{}
|
||||
|
||||
// Next always returns false for empty iterator.
|
||||
func (it *emptyEligibleDocumentIterator) Next() (id uint64, ok bool) {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
// eligibleDocumentSelector is used to filter out documents that are eligible for
|
||||
// the KNN search from a pre-filter query.
|
||||
type eligibleDocumentSelector struct {
|
||||
// segment ID -> segment local doc nums
|
||||
eligibleDocNums map[int][]uint64
|
||||
// segment ID -> segment local doc nums in a bitset
|
||||
eligibleDocNums []*bitset.BitSet
|
||||
is *IndexSnapshot
|
||||
}
|
||||
|
||||
// SegmentEligibleDocs returns the list of eligible local doc numbers for the given segment.
|
||||
func (eds *eligibleDocumentSelector) SegmentEligibleDocs(segmentID int) []uint64 {
|
||||
return eds.eligibleDocNums[segmentID]
|
||||
// SegmentEligibleDocuments returns an EligibleDocumentList for the specified segment ID.
|
||||
func (eds *eligibleDocumentSelector) SegmentEligibleDocuments(segmentID int) index.EligibleDocumentList {
|
||||
if eds.eligibleDocNums == nil || segmentID < 0 || segmentID >= len(eds.eligibleDocNums) {
|
||||
return emptyEligibleDocumentList
|
||||
}
|
||||
bs := eds.eligibleDocNums[segmentID]
|
||||
if bs == nil {
|
||||
// no eligible documents for this segment
|
||||
return emptyEligibleDocumentList
|
||||
}
|
||||
return &eligibleDocumentList{
|
||||
bs: bs,
|
||||
}
|
||||
}
|
||||
|
||||
// AddEligibleDocumentMatch adds a document match to the list of eligible documents.
|
||||
@@ -68,14 +134,19 @@ func (eds *eligibleDocumentSelector) AddEligibleDocumentMatch(id index.IndexInte
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// allocate a bitset for this segment if needed
|
||||
if eds.eligibleDocNums[segIdx] == nil {
|
||||
// the size of the bitset is the full size of the segment (which is the max local doc num + 1)
|
||||
eds.eligibleDocNums[segIdx] = bitset.New(uint(eds.is.segment[segIdx].FullSize()))
|
||||
}
|
||||
// Add the local doc number to the list of eligible doc numbers for this segment.
|
||||
eds.eligibleDocNums[segIdx] = append(eds.eligibleDocNums[segIdx], docNum)
|
||||
eds.eligibleDocNums[segIdx].Set(uint(docNum))
|
||||
return nil
|
||||
}
|
||||
|
||||
func (is *IndexSnapshot) NewEligibleDocumentSelector() index.EligibleDocumentSelector {
|
||||
return &eligibleDocumentSelector{
|
||||
eligibleDocNums: map[int][]uint64{},
|
||||
eligibleDocNums: make([]*bitset.BitSet, len(is.segment)),
|
||||
is: is,
|
||||
}
|
||||
}
|
||||
|
||||
3
vendor/github.com/blevesearch/bleve/v2/index/scorch/stats.go
generated
vendored
3
vendor/github.com/blevesearch/bleve/v2/index/scorch/stats.go
generated
vendored
@@ -136,6 +136,9 @@ type Stats struct {
|
||||
MaxMemMergeZapTime uint64
|
||||
TotMemMergeSegments uint64
|
||||
TotMemorySegmentsAtRoot uint64
|
||||
|
||||
TotTrainedSamples uint64
|
||||
TotTrainTime uint64
|
||||
}
|
||||
|
||||
// atomically populates the returned map
|
||||
|
||||
55
vendor/github.com/blevesearch/bleve/v2/index/scorch/train_noop.go
generated
vendored
Normal file
55
vendor/github.com/blevesearch/bleve/v2/index/scorch/train_noop.go
generated
vendored
Normal file
@@ -0,0 +1,55 @@
|
||||
// Copyright (c) 2026 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build !vectors
|
||||
// +build !vectors
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/util"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
func initTrainer(s *Scorch, config map[string]interface{}) *noopTrainer {
|
||||
return nil
|
||||
}
|
||||
|
||||
type noopTrainer struct {
|
||||
}
|
||||
|
||||
func (t *noopTrainer) trainLoop() {}
|
||||
|
||||
func (t *noopTrainer) train(batch *index.Batch) error {
|
||||
return fmt.Errorf("training is not supported with this build")
|
||||
}
|
||||
|
||||
func (t *noopTrainer) loadTrainedData(bucket *util.BoltBucketImpl) error {
|
||||
// noop
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *noopTrainer) getInternal(key []byte) ([]byte, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (t *noopTrainer) copyFileLOCKED(file string, d index.IndexDirectory) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *noopTrainer) updateBolt(snapshotsBucket *util.BoltBucketImpl, key []byte, value []byte) error {
|
||||
return nil
|
||||
}
|
||||
397
vendor/github.com/blevesearch/bleve/v2/index/scorch/train_vector.go
generated
vendored
Normal file
397
vendor/github.com/blevesearch/bleve/v2/index/scorch/train_vector.go
generated
vendored
Normal file
@@ -0,0 +1,397 @@
|
||||
// Copyright (c) 2026 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build vectors
|
||||
// +build vectors
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"maps"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/RoaringBitmap/roaring/v2"
|
||||
"github.com/blevesearch/bleve/v2/util"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment "github.com/blevesearch/scorch_segment_api/v2"
|
||||
)
|
||||
|
||||
type trainRequest struct {
|
||||
finalSample bool
|
||||
sampleSize int
|
||||
ackCh chan error
|
||||
sample segment.Segment
|
||||
}
|
||||
|
||||
type vectorTrainer struct {
|
||||
trainingComplete atomic.Bool
|
||||
trainedSamples uint64
|
||||
parent *Scorch
|
||||
config map[string]interface{}
|
||||
|
||||
m sync.RWMutex
|
||||
// not a searchable segment in the sense that it won't return
|
||||
// the data vectors, returns trained centroid layout
|
||||
trainedIndex *SegmentSnapshot
|
||||
trainCh chan *trainRequest
|
||||
}
|
||||
|
||||
const IndexTrainedWithFastMerge = "vector_index_fast_merge"
|
||||
|
||||
func initTrainer(s *Scorch, config map[string]interface{}) *vectorTrainer {
|
||||
if f, ok := config[IndexTrainedWithFastMerge]; ok {
|
||||
feature, ok := f.(bool)
|
||||
if ok && feature {
|
||||
trainer := vectorTrainer{
|
||||
parent: s,
|
||||
config: maps.Clone(s.config),
|
||||
trainCh: make(chan *trainRequest, 1),
|
||||
}
|
||||
// update the parent scorch config with the trainer's callback to fetch the trained index
|
||||
s.segmentConfig[index.TrainedIndexCallback] = index.TrainedIndexCallbackFn(trainer.getTrainedIndex)
|
||||
return &trainer
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func moveFile(sourcePath, destPath string) error {
|
||||
// rename is supposed to be atomic on the same filesystem
|
||||
err := os.Rename(sourcePath, destPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error renaming file: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *vectorTrainer) persistToBolt(trainReq *trainRequest) error {
|
||||
tx, err := t.parent.rootBolt.Begin(true)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error starting bolt transaction: %v", err)
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
snapshotsBucket, err := tx.CreateBucketIfNotExists(util.BoltSnapshotsBucket)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating snapshots bucket: %v", err)
|
||||
}
|
||||
|
||||
trainerBucket, err := snapshotsBucket.CreateBucketIfNotExists(util.BoltTrainerKey)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating trained index bucket: %v", err)
|
||||
}
|
||||
err = trainerBucket.Put(util.BoltPathKey, []byte(index.TrainedIndexFileName), nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error updating trained index bucket: %v", err)
|
||||
}
|
||||
|
||||
t.trainingComplete.Store(trainReq.finalSample)
|
||||
err = trainerBucket.Put(util.BoltTrainCompleteKey, []byte(strconv.FormatBool(trainReq.finalSample)), nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error updating train complete key: %v", err)
|
||||
}
|
||||
|
||||
totSamples := atomic.AddUint64(&t.trainedSamples, uint64(trainReq.sampleSize))
|
||||
err = trainerBucket.Put(util.BoltTrainedSamplesKey, binary.LittleEndian.AppendUint64(nil, totSamples), nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error updating trained samples key: %v", err)
|
||||
}
|
||||
|
||||
err = tx.Commit()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error committing bolt transaction: %v", err)
|
||||
}
|
||||
|
||||
return t.parent.rootBolt.Sync()
|
||||
}
|
||||
|
||||
// this is not a routine that will be running throughout the lifetime of the index. It's purpose
|
||||
// is to only train the vector index before the data ingestion starts.
|
||||
func (t *vectorTrainer) trainLoop() {
|
||||
defer t.parent.asyncTasks.Done()
|
||||
|
||||
trainLoopStartTime := time.Now()
|
||||
path := filepath.Join(t.parent.path, index.TrainedIndexFileName)
|
||||
for {
|
||||
// exit once the final sample set has been ingested and training is complete.
|
||||
if t.trainingComplete.Load() {
|
||||
atomic.StoreUint64(&t.parent.stats.TotTrainedSamples, t.trainedSamples)
|
||||
atomic.StoreUint64(&t.parent.stats.TotTrainTime, uint64(time.Since(trainLoopStartTime).Milliseconds()))
|
||||
return
|
||||
}
|
||||
select {
|
||||
case <-t.parent.closeCh:
|
||||
select {
|
||||
case req := <-t.trainCh:
|
||||
req.ackCh <- fmt.Errorf("trainer is closed")
|
||||
close(req.ackCh)
|
||||
default:
|
||||
}
|
||||
return
|
||||
case trainReq := <-t.trainCh:
|
||||
sampleSeg := trainReq.sample
|
||||
// no sample segment: just persist state if this is the final sample and move on.
|
||||
if sampleSeg == nil {
|
||||
if trainReq.finalSample {
|
||||
if err := t.persistToBolt(trainReq); err != nil {
|
||||
trainReq.ackCh <- fmt.Errorf("error persisting to bolt: %v", err)
|
||||
close(trainReq.ackCh)
|
||||
return
|
||||
}
|
||||
}
|
||||
close(trainReq.ackCh)
|
||||
continue
|
||||
}
|
||||
|
||||
if t.trainedIndex == nil {
|
||||
switch seg := sampleSeg.(type) {
|
||||
case segment.UnpersistedSegment:
|
||||
if err := persistToDirectory(seg, nil, path); err != nil {
|
||||
trainReq.ackCh <- fmt.Errorf("error persisting segment: %v", err)
|
||||
close(trainReq.ackCh)
|
||||
continue
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// merge the new segment with the existing one into a .tmp file, then
|
||||
// atomically rename it into place (Os.Open on the live path is unsafe
|
||||
// during the merge).
|
||||
t.config[index.TrainingKey] = true
|
||||
_, _, err := t.parent.segPlugin.MergeUsing([]segment.Segment{t.trainedIndex.segment, sampleSeg},
|
||||
[]*roaring.Bitmap{nil, nil}, path+".tmp", t.parent.closeCh, nil, t.config)
|
||||
t.config[index.TrainingKey] = false
|
||||
if err != nil {
|
||||
trainReq.ackCh <- fmt.Errorf("error merging trained index: %v", err)
|
||||
close(trainReq.ackCh)
|
||||
return
|
||||
}
|
||||
|
||||
t.trainedIndex.segment.Close()
|
||||
if err = moveFile(path+".tmp", path); err != nil {
|
||||
trainReq.ackCh <- fmt.Errorf("error renaming trained index: %v", err)
|
||||
close(trainReq.ackCh)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// bolt write acts as a checkpoint for failover-recovery: callers downstream
|
||||
// can rely on the trained index being available once this completes.
|
||||
// todo: rethink the frequency of bolt writes
|
||||
if err := t.persistToBolt(trainReq); err != nil {
|
||||
trainReq.ackCh <- fmt.Errorf("error persisting to bolt: %v", err)
|
||||
close(trainReq.ackCh)
|
||||
return
|
||||
}
|
||||
|
||||
trainedIndex, err := t.parent.segPlugin.OpenUsing(path, t.parent.segmentConfig)
|
||||
if err != nil {
|
||||
trainReq.ackCh <- fmt.Errorf("error opening trained index: %v", err)
|
||||
close(trainReq.ackCh)
|
||||
return
|
||||
}
|
||||
|
||||
t.m.Lock()
|
||||
t.trainedIndex = &SegmentSnapshot{segment: trainedIndex}
|
||||
t.m.Unlock()
|
||||
close(trainReq.ackCh)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// loads the metadata specific to the trained index from boltdb, happens during init
|
||||
// no lock needed
|
||||
func (t *vectorTrainer) loadTrainedData(bucket *util.BoltBucketImpl) error {
|
||||
if bucket == nil {
|
||||
return nil
|
||||
}
|
||||
writerID, err := bucket.Get(util.BoltMetaDataFileWriterIDKey, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting writer id: %v", err)
|
||||
}
|
||||
reader, err := util.NewFileReader(string(writerID), nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating file reader: %v", err)
|
||||
}
|
||||
|
||||
segmentSnapshot, err := t.parent.loadSegment(bucket, reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// get the training status out of bolt
|
||||
trainComplete, err := bucket.Get(util.BoltTrainCompleteKey, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting train complete: %v", err)
|
||||
}
|
||||
trainedSamples, err := bucket.Get(util.BoltTrainedSamplesKey, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting trained samples: %v", err)
|
||||
}
|
||||
atomic.StoreUint64(&t.trainedSamples, binary.LittleEndian.Uint64(trainedSamples))
|
||||
comp, err := strconv.ParseBool(string(trainComplete))
|
||||
if err != nil {
|
||||
return fmt.Errorf("error parsing train complete: %v", err)
|
||||
}
|
||||
t.trainingComplete.Store(comp)
|
||||
|
||||
t.m.Lock()
|
||||
defer t.m.Unlock()
|
||||
t.trainedIndex = segmentSnapshot
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *vectorTrainer) train(batch *index.Batch) error {
|
||||
// regulate the Train function
|
||||
t.parent.FireIndexEvent()
|
||||
|
||||
var trainData []index.Document
|
||||
for _, doc := range batch.IndexOps {
|
||||
if doc != nil {
|
||||
// insert _id field
|
||||
// no need to track updates/deletes over here since
|
||||
// the API is singleton
|
||||
doc.AddIDField()
|
||||
}
|
||||
trainData = append(trainData, doc)
|
||||
}
|
||||
|
||||
trainComplete := batch.InternalOps[string(util.BoltTrainCompleteKey)]
|
||||
if trainComplete == nil {
|
||||
trainComplete = []byte("false")
|
||||
}
|
||||
fin, err := strconv.ParseBool(string(trainComplete))
|
||||
if err != nil {
|
||||
return fmt.Errorf("error parsing train complete: %v", err)
|
||||
}
|
||||
|
||||
trainReq := &trainRequest{
|
||||
finalSample: fin,
|
||||
sampleSize: len(trainData),
|
||||
ackCh: make(chan error),
|
||||
}
|
||||
// just builds a new vector index out of the train data provided
|
||||
// this is not necessarily the final train data since this is submitted
|
||||
// as a request to the trainer component to be merged. once the training
|
||||
// is complete, the template will be used for other operations down the line
|
||||
// like merge and search.
|
||||
//
|
||||
// note: this might index text data too, how to handle this? s.segmentConfig?
|
||||
// todo: updates/deletes -> data drift detection
|
||||
if len(trainData) > 0 {
|
||||
trainReq.sample, _, err = t.parent.segPlugin.NewUsing(trainData, t.parent.segmentConfig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
t.trainCh <- trainReq
|
||||
err = <-trainReq.ackCh
|
||||
if err != nil {
|
||||
return fmt.Errorf("train_vector: train() err'd out with: %w", err)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (t *vectorTrainer) getInternal(key []byte) ([]byte, error) {
|
||||
switch string(key) {
|
||||
case string(util.BoltTrainCompleteKey):
|
||||
return []byte(strconv.FormatBool(t.trainingComplete.Load())), nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (t *vectorTrainer) getTrainedIndex(field string) (interface{}, error) {
|
||||
// return the coarse quantizer of the trained faiss index belonging to the field
|
||||
// if its not available then zap performs naive merge
|
||||
t.m.RLock()
|
||||
defer t.m.RUnlock()
|
||||
if t.trainedIndex != nil {
|
||||
trainedSegment, ok := t.trainedIndex.segment.(segment.TrainedSegment)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("segment is not a trained index segment")
|
||||
}
|
||||
|
||||
coarseQuantizer, err := trainedSegment.GetCoarseQuantizer(field)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return coarseQuantizer, nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (t *vectorTrainer) copyFileLOCKED(file string, d index.IndexDirectory) error {
|
||||
if strings.HasSuffix(file, index.TrainedIndexFileName) {
|
||||
// trained index file - this is outside the snapshots domain so the bolt update is different
|
||||
err := d.SetPathInBolt(util.BoltTrainerKey, []byte(file))
|
||||
if err != nil {
|
||||
return fmt.Errorf("error updating dest index bolt: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *vectorTrainer) updateBolt(snapshotsBucket *util.BoltBucketImpl, key []byte, value []byte) error {
|
||||
if bytes.Equal(key, util.BoltTrainerKey) {
|
||||
trainerBucket, err := snapshotsBucket.CreateBucketIfNotExists(util.BoltTrainerKey)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if trainerBucket == nil {
|
||||
return fmt.Errorf("trainer bucket not found")
|
||||
}
|
||||
|
||||
// guard against duplicate updates
|
||||
existingValue, err := trainerBucket.Get(util.BoltPathKey, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error checking existing value: %v", err)
|
||||
}
|
||||
if existingValue != nil {
|
||||
return fmt.Errorf("key already exists %v %v", t.parent.path, string(existingValue))
|
||||
}
|
||||
|
||||
err = trainerBucket.Put(util.BoltPathKey, value, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
writerID, err := trainerBucket.Get(util.BoltMetaDataFileWriterIDKey, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting writer id: %v", err)
|
||||
}
|
||||
reader, err := util.NewFileReader(string(writerID), nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating file reader: %v", err)
|
||||
}
|
||||
|
||||
// update the centroid index pointer
|
||||
t.trainedIndex, err = t.parent.loadSegment(trainerBucket, reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
34
vendor/github.com/blevesearch/bleve/v2/index/scorch/unadorned.go
generated
vendored
34
vendor/github.com/blevesearch/bleve/v2/index/scorch/unadorned.go
generated
vendored
@@ -38,6 +38,7 @@ func init() {
|
||||
type unadornedPostingsIteratorBitmap struct {
|
||||
actual roaring.IntPeekable
|
||||
actualBM *roaring.Bitmap
|
||||
next UnadornedPosting // reused across Next() calls
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIteratorBitmap) Next() (segment.Posting, error) {
|
||||
@@ -53,7 +54,10 @@ func (i *unadornedPostingsIteratorBitmap) nextAtOrAfter(atOrAfter uint64) (segme
|
||||
if !exists {
|
||||
return nil, nil
|
||||
}
|
||||
return UnadornedPosting(docNum), nil
|
||||
i.next = UnadornedPosting{} // clear the struct
|
||||
rv := &i.next
|
||||
rv.docNum = docNum
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIteratorBitmap) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool) {
|
||||
@@ -112,8 +116,9 @@ func newUnadornedPostingsIteratorFromBitmap(bm *roaring.Bitmap) segment.Postings
|
||||
const docNum1HitFinished = math.MaxUint64
|
||||
|
||||
type unadornedPostingsIterator1Hit struct {
|
||||
docNumOrig uint64 // original 1-hit docNum used to create this iterator
|
||||
docNum uint64 // current docNum
|
||||
docNumOrig uint64 // original 1-hit docNum used to create this iterator
|
||||
docNum uint64 // current docNum
|
||||
next UnadornedPosting // reused across Next() calls
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIterator1Hit) Next() (segment.Posting, error) {
|
||||
@@ -129,7 +134,10 @@ func (i *unadornedPostingsIterator1Hit) nextAtOrAfter(atOrAfter uint64) (segment
|
||||
if !exists {
|
||||
return nil, nil
|
||||
}
|
||||
return UnadornedPosting(docNum), nil
|
||||
i.next = UnadornedPosting{} // clear the struct
|
||||
rv := &i.next
|
||||
rv.docNum = docNum
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIterator1Hit) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool) {
|
||||
@@ -176,24 +184,26 @@ type ResetablePostingsIterator interface {
|
||||
ResetIterator()
|
||||
}
|
||||
|
||||
type UnadornedPosting uint64
|
||||
|
||||
func (p UnadornedPosting) Number() uint64 {
|
||||
return uint64(p)
|
||||
type UnadornedPosting struct {
|
||||
docNum uint64
|
||||
}
|
||||
|
||||
func (p UnadornedPosting) Frequency() uint64 {
|
||||
func (p *UnadornedPosting) Number() uint64 {
|
||||
return p.docNum
|
||||
}
|
||||
|
||||
func (p *UnadornedPosting) Frequency() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (p UnadornedPosting) Norm() float64 {
|
||||
func (p *UnadornedPosting) Norm() float64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (p UnadornedPosting) Locations() []segment.Location {
|
||||
func (p *UnadornedPosting) Locations() []segment.Location {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p UnadornedPosting) Size() int {
|
||||
func (p *UnadornedPosting) Size() int {
|
||||
return reflectStaticSizeUnadornedPosting
|
||||
}
|
||||
|
||||
37
vendor/github.com/blevesearch/bleve/v2/index_alias_impl.go
generated
vendored
37
vendor/github.com/blevesearch/bleve/v2/index_alias_impl.go
generated
vendored
@@ -103,6 +103,24 @@ func (i *indexAliasImpl) IndexSynonym(id string, collection string, definition *
|
||||
return ErrorSynonymSearchNotSupported
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) Train(batch *Batch) error {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
if !i.open {
|
||||
return ErrorIndexClosed
|
||||
}
|
||||
|
||||
err := i.isAliasToSingleIndex()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if vi, ok := i.indexes[0].(TrainableIndex); ok {
|
||||
return vi.Train(batch)
|
||||
}
|
||||
return ErrorTrainingNotSupported
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) Delete(id string) error {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
@@ -985,6 +1003,15 @@ func MultiSearch(ctx context.Context, req *SearchRequest, params *multiSearchPar
|
||||
searchStart := time.Now()
|
||||
asyncResults := make(chan *asyncSearchResult, len(indexes))
|
||||
|
||||
var preSearchData map[string]map[string]interface{}
|
||||
var rescorer *rescorer
|
||||
var fusionKnnHits search.DocumentMatchCollection
|
||||
if params != nil {
|
||||
preSearchData = params.preSearchData
|
||||
rescorer = params.rescorer
|
||||
fusionKnnHits = params.fusionKnnHits
|
||||
}
|
||||
|
||||
var reverseQueryExecution bool
|
||||
if req.SearchBefore != nil {
|
||||
reverseQueryExecution = true
|
||||
@@ -1006,8 +1033,8 @@ func MultiSearch(ctx context.Context, req *SearchRequest, params *multiSearchPar
|
||||
waitGroup.Add(len(indexes))
|
||||
for _, in := range indexes {
|
||||
var payload map[string]interface{}
|
||||
if params.preSearchData != nil {
|
||||
payload = params.preSearchData[in.Name()]
|
||||
if preSearchData != nil {
|
||||
payload = preSearchData[in.Name()]
|
||||
}
|
||||
go searchChildIndex(in, createChildSearchRequest(req, payload))
|
||||
}
|
||||
@@ -1047,9 +1074,9 @@ func MultiSearch(ctx context.Context, req *SearchRequest, params *multiSearchPar
|
||||
}
|
||||
}
|
||||
|
||||
if params.rescorer != nil {
|
||||
sr.Hits, sr.Total, sr.MaxScore = params.rescorer.rescore(sr.Hits, params.fusionKnnHits)
|
||||
params.rescorer.restoreSearchRequest()
|
||||
if rescorer != nil {
|
||||
sr.Hits, sr.Total, sr.MaxScore = rescorer.rescore(sr.Hits, fusionKnnHits)
|
||||
rescorer.restoreSearchRequest()
|
||||
}
|
||||
|
||||
sr.Hits = hitsInCurrentPage(req, sr.Hits)
|
||||
|
||||
214
vendor/github.com/blevesearch/bleve/v2/index_impl.go
generated
vendored
214
vendor/github.com/blevesearch/bleve/v2/index_impl.go
generated
vendored
@@ -91,7 +91,10 @@ func newIndexUsing(path string, mapping mapping.IndexMapping, indexType string,
|
||||
path: path,
|
||||
name: path,
|
||||
m: mapping,
|
||||
meta: newIndexMeta(indexType, kvstore, kvconfig),
|
||||
}
|
||||
rv.meta, err = newIndexMeta(indexType, kvstore, kvconfig, path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv.stats = &IndexStat{i: &rv}
|
||||
// at this point there is hope that we can be successful, so save index meta
|
||||
@@ -369,6 +372,20 @@ func (i *indexImpl) IndexSynonym(id string, collection string, definition *Synon
|
||||
return err
|
||||
}
|
||||
|
||||
func (i *indexImpl) Train(batch *Batch) error {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return ErrorIndexClosed
|
||||
}
|
||||
|
||||
if vi, ok := i.i.(index.TrainableIndex); ok {
|
||||
return vi.Train(batch.internal)
|
||||
}
|
||||
return ErrorTrainingNotSupported
|
||||
}
|
||||
|
||||
// IndexAdvanced takes a document.Document object
|
||||
// skips the mapping and indexes it.
|
||||
func (i *indexImpl) IndexAdvanced(doc *document.Document) (err error) {
|
||||
@@ -479,6 +496,55 @@ func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) {
|
||||
return i.SearchInContext(context.Background(), req)
|
||||
}
|
||||
|
||||
// returns the set of file callback writer ids in use by the index
|
||||
func (i *indexImpl) FileWriterIDsInUse() (map[string]struct{}, error) {
|
||||
ids := map[string]struct{}{i.meta.fileReader.Id(): {}}
|
||||
|
||||
if cidx, ok := i.i.(IndexWithCallbacks); ok {
|
||||
cIds, err := cidx.FileWriterIDsInUse()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for k := range cIds {
|
||||
ids[k] = struct{}{}
|
||||
}
|
||||
} else {
|
||||
// if the underlying index does not support callbacks, we
|
||||
// assume that the data being written is with the default
|
||||
// writer id which is the empty string
|
||||
ids[util.DefaultFileCallbackId] = struct{}{}
|
||||
}
|
||||
|
||||
return ids, nil
|
||||
}
|
||||
|
||||
// drops the file callback writer ids from the index and
|
||||
// re-processes data with the latest file callback writer id
|
||||
func (i *indexImpl) DropFileWriterIDs(ids map[string]struct{}) error {
|
||||
i.mutex.Lock()
|
||||
if _, ok := ids[i.meta.fileReader.Id()]; ok {
|
||||
var err error
|
||||
err = i.meta.UpdateWriter(i.path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
i.mutex.Unlock()
|
||||
|
||||
if cidx, ok := i.i.(IndexWithCallbacks); ok {
|
||||
return cidx.DropFileWriterIDs(ids)
|
||||
} else {
|
||||
// if the underlying index does not support callbacks and the request is
|
||||
// to drop the empty id, which is the default id, we return an error
|
||||
// because it is not possible to drop it
|
||||
if _, ok := ids[util.DefaultFileCallbackId]; ok {
|
||||
return fmt.Errorf("underlying index does not support DropFileWriterIDs")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
documentMatchEmptySize int
|
||||
searchContextEmptySize int
|
||||
@@ -572,8 +638,7 @@ func (i *indexImpl) preSearch(ctx context.Context, req *SearchRequest, reader in
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fs := make(query.FieldSet)
|
||||
fs, err := query.ExtractFields(req.Query, i.m, fs)
|
||||
fs, err := query.ExtractFields(req.Query, i.m, search.NewFieldSet())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -642,7 +707,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
||||
|
||||
// ------------------------------------------------------------------------------------------
|
||||
// set up additional contexts for any search operation that will proceed from
|
||||
// here, such as presearch, collectors etc.
|
||||
// here, such as presearch, knn collector, topn collector etc.
|
||||
|
||||
// Scoring model callback to be used to get scoring model
|
||||
scoringModelCallback := func() string {
|
||||
@@ -687,6 +752,13 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
||||
}
|
||||
|
||||
ctx = context.WithValue(ctx, search.GeoBufferPoolCallbackKey, search.GeoBufferPoolCallbackFunc(getBufferPool))
|
||||
// check if the index mapping has any nested fields, which should force
|
||||
// all collectors and searchers to be run in nested mode
|
||||
if nm, ok := i.m.(mapping.NestedMapping); ok {
|
||||
if nm.CountNested() > 0 {
|
||||
ctx = context.WithValue(ctx, search.NestedSearchKey, true)
|
||||
}
|
||||
}
|
||||
// ------------------------------------------------------------------------------------------
|
||||
|
||||
if _, ok := ctx.Value(search.PreSearchKey).(bool); ok {
|
||||
@@ -716,11 +788,9 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
||||
req.SearchBefore = nil
|
||||
}
|
||||
|
||||
var coll *collector.TopNCollector
|
||||
if req.SearchAfter != nil {
|
||||
coll = collector.NewTopNCollectorAfter(req.Size, req.Sort, req.SearchAfter)
|
||||
} else {
|
||||
coll = collector.NewTopNCollector(req.Size, req.From, req.Sort)
|
||||
coll, err := i.buildTopNCollector(ctx, req, indexReader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var knnHits []*search.DocumentMatch
|
||||
@@ -795,7 +865,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
||||
// if score fusion, no faceting for knn hits is done
|
||||
// hence we can skip setting the knn hits in the collector
|
||||
if !contextScoreFusionKeyExists {
|
||||
setKnnHitsInCollector(knnHits, req, coll)
|
||||
setKnnHitsInCollector(knnHits, coll)
|
||||
}
|
||||
|
||||
if fts != nil {
|
||||
@@ -937,7 +1007,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
||||
if i.name != "" && hit.Index == "" {
|
||||
hit.Index = i.name
|
||||
}
|
||||
err, storedFieldsBytes := LoadAndHighlightFields(hit, req, i.name, indexReader, highlighter)
|
||||
err, storedFieldsBytes := LoadAndHighlightAllFields(hit, req, i.name, indexReader, highlighter)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -1105,6 +1175,56 @@ func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest,
|
||||
return nil, totalStoredFieldsBytes
|
||||
}
|
||||
|
||||
const NestedDocumentKey = "_$nested"
|
||||
|
||||
// LoadAndHighlightAllFields loads stored fields + highlights for root and its descendants.
|
||||
// All descendant documents are collected into a _$nested array in the root DocumentMatch.
|
||||
func LoadAndHighlightAllFields(
|
||||
root *search.DocumentMatch,
|
||||
req *SearchRequest,
|
||||
indexName string,
|
||||
r index.IndexReader,
|
||||
highlighter highlight.Highlighter,
|
||||
) (error, uint64) {
|
||||
var totalStoredFieldsBytes uint64
|
||||
// load root fields/highlights
|
||||
err, bytes := LoadAndHighlightFields(root, req, indexName, r, highlighter)
|
||||
totalStoredFieldsBytes += bytes
|
||||
if err != nil {
|
||||
return err, totalStoredFieldsBytes
|
||||
}
|
||||
// collect all descendant documents
|
||||
nestedDocs := make([]*search.NestedDocumentMatch, 0, len(root.Descendants))
|
||||
// create a dummy desc DocumentMatch to reuse LoadAndHighlightFields
|
||||
desc := &search.DocumentMatch{}
|
||||
for _, descID := range root.Descendants {
|
||||
extID, err := r.ExternalID(descID)
|
||||
if err != nil {
|
||||
return err, totalStoredFieldsBytes
|
||||
}
|
||||
// reset desc for reuse
|
||||
desc.ID = extID
|
||||
desc.IndexInternalID = descID
|
||||
desc.Locations = root.Locations
|
||||
err, bytes := LoadAndHighlightFields(desc, req, indexName, r, highlighter)
|
||||
totalStoredFieldsBytes += bytes
|
||||
if err != nil {
|
||||
return err, totalStoredFieldsBytes
|
||||
}
|
||||
// copy fields to nested doc and append
|
||||
if len(desc.Fields) != 0 || len(desc.Fragments) != 0 {
|
||||
nestedDocs = append(nestedDocs, search.NewNestedDocumentMatch(desc.Fields, desc.Fragments))
|
||||
}
|
||||
desc.Fields = nil
|
||||
desc.Fragments = nil
|
||||
}
|
||||
// add nested documents to root under _$nested key
|
||||
if len(nestedDocs) > 0 {
|
||||
root.AddFieldValue(NestedDocumentKey, nestedDocs)
|
||||
}
|
||||
return nil, totalStoredFieldsBytes
|
||||
}
|
||||
|
||||
// Fields returns the name of all the fields this
|
||||
// Index has operated on.
|
||||
func (i *indexImpl) Fields() (fields []string, err error) {
|
||||
@@ -1388,11 +1508,43 @@ func (i *indexImpl) CopyTo(d index.Directory) (err error) {
|
||||
|
||||
err = copyReader.CopyTo(d)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error copying index metadata: %v", err)
|
||||
return fmt.Errorf("error copying index data: %v", err)
|
||||
}
|
||||
|
||||
// copy the metadata
|
||||
return i.meta.CopyTo(d)
|
||||
return i.meta.CopyTo(i.path, d)
|
||||
}
|
||||
|
||||
func (i *indexImpl) CopyFile(file string, d index.IndexDirectory) (err error) {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return ErrorIndexClosed
|
||||
}
|
||||
|
||||
fileCopyIndex, ok := i.i.(IndexFileCopyable)
|
||||
if !ok {
|
||||
return fmt.Errorf("index implementation does not support file copy reader")
|
||||
}
|
||||
|
||||
return fileCopyIndex.CopyFile(file, d)
|
||||
}
|
||||
|
||||
func (i *indexImpl) SetPathInBolt(key []byte, value []byte) error {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return ErrorIndexClosed
|
||||
}
|
||||
|
||||
fileCopyIndex, ok := i.i.(IndexFileCopyable)
|
||||
if !ok {
|
||||
return fmt.Errorf("index implementation does not support file copy")
|
||||
}
|
||||
|
||||
return fileCopyIndex.SetPathInBolt(key, value)
|
||||
}
|
||||
|
||||
func (f FileSystemDirectory) GetWriter(filePath string) (io.WriteCloser,
|
||||
@@ -1487,3 +1639,39 @@ func (i *indexImpl) CentroidCardinalities(field string, limit int, descending bo
|
||||
|
||||
return centroidCardinalities, nil
|
||||
}
|
||||
|
||||
func (i *indexImpl) buildTopNCollector(ctx context.Context, req *SearchRequest, reader index.IndexReader) (*collector.TopNCollector, error) {
|
||||
newCollector := func() *collector.TopNCollector {
|
||||
if req.SearchAfter != nil {
|
||||
return collector.NewTopNCollectorAfter(req.Size, req.Sort, req.SearchAfter)
|
||||
}
|
||||
return collector.NewTopNCollector(req.Size, req.From, req.Sort)
|
||||
}
|
||||
|
||||
newNestedCollector := func(nr index.NestedReader) *collector.TopNCollector {
|
||||
if req.SearchAfter != nil {
|
||||
return collector.NewNestedTopNCollectorAfter(req.Size, req.Sort, req.SearchAfter, nr)
|
||||
}
|
||||
return collector.NewNestedTopNCollector(req.Size, req.From, req.Sort, nr)
|
||||
}
|
||||
|
||||
// check if we are in nested mode
|
||||
if nestedMode, ok := ctx.Value(search.NestedSearchKey).(bool); ok && nestedMode {
|
||||
// get the nested reader from the index reader
|
||||
if nr, ok := reader.(index.NestedReader); ok {
|
||||
// check if the mapping has any nested fields that intersect
|
||||
if nm, ok := i.m.(mapping.NestedMapping); ok {
|
||||
var fs search.FieldSet
|
||||
var err error
|
||||
fs, err = query.ExtractFields(req.Query, i.m, fs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if fs.HasID() || nm.IntersectsPrefix(fs) {
|
||||
return newNestedCollector(nr), nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return newCollector(), nil
|
||||
}
|
||||
|
||||
162
vendor/github.com/blevesearch/bleve/v2/index_meta.go
generated
vendored
162
vendor/github.com/blevesearch/bleve/v2/index_meta.go
generated
vendored
@@ -15,6 +15,7 @@
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
@@ -27,17 +28,30 @@ import (
|
||||
const metaFilename = "index_meta.json"
|
||||
|
||||
type indexMeta struct {
|
||||
Storage string `json:"storage"`
|
||||
IndexType string `json:"index_type"`
|
||||
Config map[string]interface{} `json:"config,omitempty"`
|
||||
Storage string `json:"storage"`
|
||||
IndexType string `json:"index_type"`
|
||||
Config map[string]interface{} `json:"config,omitempty"`
|
||||
fileWriter util.FileWriter
|
||||
fileReader util.FileReader
|
||||
}
|
||||
|
||||
func newIndexMeta(indexType string, storage string, config map[string]interface{}) *indexMeta {
|
||||
return &indexMeta{
|
||||
IndexType: indexType,
|
||||
Storage: storage,
|
||||
Config: config,
|
||||
func newIndexMeta(indexType string, storage string, config map[string]interface{}, path string) (*indexMeta, error) {
|
||||
indexMetaPath := indexMetaPath(path)
|
||||
fileWriter, err := util.NewFileWriter([]byte(indexMetaPath))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create file writer for index meta: %w", err)
|
||||
}
|
||||
fileReader, err := util.NewFileReader(fileWriter.Id(), []byte(indexMetaPath))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create file reader for index meta: %w", err)
|
||||
}
|
||||
return &indexMeta{
|
||||
IndexType: indexType,
|
||||
Storage: storage,
|
||||
Config: config,
|
||||
fileWriter: fileWriter,
|
||||
fileReader: fileReader,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func openIndexMeta(path string) (*indexMeta, error) {
|
||||
@@ -49,11 +63,60 @@ func openIndexMeta(path string) (*indexMeta, error) {
|
||||
if err != nil {
|
||||
return nil, ErrorIndexMetaMissing
|
||||
}
|
||||
|
||||
// check if indexMetaPath+_temp exists, if so, this means a writer update was in progress
|
||||
// and we should attempt to recover using the temp file
|
||||
if _, err := os.Stat(indexMetaPath + "_temp"); err == nil {
|
||||
tempBytes, err := os.ReadFile(indexMetaPath + "_temp")
|
||||
if err == nil {
|
||||
err = os.Rename(indexMetaPath+"_temp", indexMetaPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
metaBytes = tempBytes
|
||||
}
|
||||
}
|
||||
|
||||
var im indexMeta
|
||||
var fileReader util.FileReader
|
||||
// attempt to unmarshal metabytes directly. If this succeeds,
|
||||
// then we know there was no file callback writer used and we can
|
||||
// proceed as normal.
|
||||
err = util.UnmarshalJSON(metaBytes, &im)
|
||||
if err != nil {
|
||||
return nil, ErrorIndexMetaCorrupt
|
||||
// on failure, we expect the last 4 bytes to be the length of the file
|
||||
// callback id and the preceding bytes to be the file callback id, which
|
||||
// we can use to obtain the file reader to read the actual meta data bytes
|
||||
if len(metaBytes) < 4 {
|
||||
return nil, ErrorIndexMetaCorrupt
|
||||
}
|
||||
|
||||
// read the length of the file callback id from the last 4 bytes
|
||||
pos := len(metaBytes) - 4
|
||||
fileWriterIDLen := int(binary.BigEndian.Uint32(metaBytes[pos:]))
|
||||
pos -= fileWriterIDLen
|
||||
if pos < 0 {
|
||||
return nil, ErrorIndexMetaCorrupt
|
||||
}
|
||||
|
||||
// read and initialize the file reader using the file callback id
|
||||
fileWriterID := metaBytes[pos : pos+fileWriterIDLen]
|
||||
fileReader, err = util.NewFileReader(string(fileWriterID), []byte(indexMetaPath))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
buf, err := fileReader.Process(metaBytes[0:pos])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = util.UnmarshalJSON(buf, &im)
|
||||
if err != nil {
|
||||
return nil, ErrorIndexMetaCorrupt
|
||||
}
|
||||
}
|
||||
im.fileReader = fileReader
|
||||
|
||||
if im.IndexType == "" {
|
||||
im.IndexType = upsidedown.Name
|
||||
}
|
||||
@@ -86,15 +149,29 @@ func (i *indexMeta) Save(path string) (err error) {
|
||||
err = ierr
|
||||
}
|
||||
}()
|
||||
|
||||
metaBytes = i.fileWriter.Process(metaBytes)
|
||||
|
||||
_, err = indexMetaFile.Write(metaBytes)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = indexMetaFile.Write([]byte(i.fileWriter.Id()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = binary.Write(indexMetaFile, binary.BigEndian, uint32(len(i.fileWriter.Id())))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *indexMeta) CopyTo(d index.Directory) (err error) {
|
||||
metaBytes, err := util.MarshalJSON(i)
|
||||
func (i *indexMeta) CopyTo(path string, d index.Directory) (err error) {
|
||||
metaBytes, err := os.ReadFile(indexMetaPath(path))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -110,6 +187,69 @@ func (i *indexMeta) CopyTo(d index.Directory) (err error) {
|
||||
return err
|
||||
}
|
||||
|
||||
// updates the file callback writer id in the index meta,
|
||||
// and re-processes data with the latest file callback writer
|
||||
// returns the new file callback writer and reader to be used for
|
||||
// future processing of index meta data
|
||||
func (i *indexMeta) UpdateWriter(path string) error {
|
||||
indexMetaPath := indexMetaPath(path)
|
||||
metaBytes, err := util.MarshalJSON(i)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
i.fileWriter, err = util.NewFileWriter([]byte(indexMetaPath))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
metaBytes = i.fileWriter.Process(metaBytes)
|
||||
|
||||
// write out new meta with new writer id, using temp file and rename to ensure atomicity
|
||||
// if we crash in the middle of this, on next open we will see the temp file and recover using it
|
||||
tempMetaPath := indexMetaPath + "_temp"
|
||||
tempMetaFile, err := os.OpenFile(tempMetaPath, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666)
|
||||
if err != nil {
|
||||
if os.IsExist(err) {
|
||||
return ErrorIndexPathExists
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// write the meta bytes
|
||||
_, err = tempMetaFile.Write(metaBytes)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// write the file callback id
|
||||
_, err = tempMetaFile.Write([]byte(i.fileWriter.Id()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// write the length of the file callback id
|
||||
err = binary.Write(tempMetaFile, binary.BigEndian, uint32(len(i.fileWriter.Id())))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// close file before renaming
|
||||
err = tempMetaFile.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// atomically rename temp file to index meta file
|
||||
err = os.Rename(tempMetaPath, indexMetaPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// initialize the new file reader for index meta
|
||||
i.fileReader, err = util.NewFileReader(string(i.fileWriter.Id()), []byte(indexMetaPath))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func indexMetaPath(path string) string {
|
||||
return filepath.Join(path, metaFilename)
|
||||
}
|
||||
|
||||
7
vendor/github.com/blevesearch/bleve/v2/index_update.go
generated
vendored
7
vendor/github.com/blevesearch/bleve/v2/index_update.go
generated
vendored
@@ -180,6 +180,10 @@ func checkUpdatedMapping(ori, upd *mapping.DocumentMapping) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
if ori.Nested != upd.Nested {
|
||||
return fmt.Errorf("nested property cannot be changed")
|
||||
}
|
||||
|
||||
var err error
|
||||
// Recursively go through the child mappings
|
||||
for name, updDMapping := range upd.Properties {
|
||||
@@ -507,6 +511,9 @@ func compareFieldMapping(original, updated *mapping.FieldMapping) (*index.Update
|
||||
if original.VectorIndexOptimizedFor != updated.VectorIndexOptimizedFor {
|
||||
return nil, fmt.Errorf("vectorIndexOptimizedFor cannot be updated for vector and vector_base64 fields")
|
||||
}
|
||||
if original.GPU != updated.GPU {
|
||||
return nil, fmt.Errorf("gpu cannot be updated for vector and vector_base64 fields")
|
||||
}
|
||||
}
|
||||
if original.IncludeInAll != updated.IncludeInAll {
|
||||
return nil, fmt.Errorf("includeInAll cannot be changed")
|
||||
|
||||
14
vendor/github.com/blevesearch/bleve/v2/mapping.go
generated
vendored
14
vendor/github.com/blevesearch/bleve/v2/mapping.go
generated
vendored
@@ -34,6 +34,20 @@ func NewDocumentStaticMapping() *mapping.DocumentMapping {
|
||||
return mapping.NewDocumentStaticMapping()
|
||||
}
|
||||
|
||||
// NewNestedDocumentMapping returns a new document mapping
|
||||
// that will treat all objects as nested documents.
|
||||
func NewNestedDocumentMapping() *mapping.DocumentMapping {
|
||||
return mapping.NewNestedDocumentMapping()
|
||||
}
|
||||
|
||||
// NewNestedDocumentStaticMapping returns a new document mapping
|
||||
// that will treat all objects as nested documents and
|
||||
// will not automatically index parts of a nested document
|
||||
// without an explicit mapping.
|
||||
func NewNestedDocumentStaticMapping() *mapping.DocumentMapping {
|
||||
return mapping.NewNestedDocumentStaticMapping()
|
||||
}
|
||||
|
||||
// NewDocumentDisabledMapping returns a new document
|
||||
// mapping that will not perform any indexing.
|
||||
func NewDocumentDisabledMapping() *mapping.DocumentMapping {
|
||||
|
||||
75
vendor/github.com/blevesearch/bleve/v2/mapping/document.go
generated
vendored
75
vendor/github.com/blevesearch/bleve/v2/mapping/document.go
generated
vendored
@@ -22,6 +22,7 @@ import (
|
||||
"reflect"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/document"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
"github.com/blevesearch/bleve/v2/util"
|
||||
)
|
||||
@@ -44,6 +45,7 @@ type DocumentMapping struct {
|
||||
Dynamic bool `json:"dynamic"`
|
||||
Properties map[string]*DocumentMapping `json:"properties,omitempty"`
|
||||
Fields []*FieldMapping `json:"fields,omitempty"`
|
||||
Nested bool `json:"nested,omitempty"`
|
||||
DefaultAnalyzer string `json:"default_analyzer,omitempty"`
|
||||
DefaultSynonymSource string `json:"default_synonym_source,omitempty"`
|
||||
|
||||
@@ -230,6 +232,17 @@ func NewDocumentMapping() *DocumentMapping {
|
||||
}
|
||||
}
|
||||
|
||||
// NewNestedDocumentMapping returns a new document
|
||||
// mapping that treats sub-documents as nested
|
||||
// objects.
|
||||
func NewNestedDocumentMapping() *DocumentMapping {
|
||||
return &DocumentMapping{
|
||||
Nested: true,
|
||||
Enabled: true,
|
||||
Dynamic: true,
|
||||
}
|
||||
}
|
||||
|
||||
// NewDocumentStaticMapping returns a new document
|
||||
// mapping that will not automatically index parts
|
||||
// of a document without an explicit mapping.
|
||||
@@ -239,6 +252,17 @@ func NewDocumentStaticMapping() *DocumentMapping {
|
||||
}
|
||||
}
|
||||
|
||||
// NewNestedDocumentStaticMapping returns a new document
|
||||
// mapping that treats sub-documents as nested
|
||||
// objects and will not automatically index parts
|
||||
// of the nested document without an explicit mapping.
|
||||
func NewNestedDocumentStaticMapping() *DocumentMapping {
|
||||
return &DocumentMapping{
|
||||
Enabled: true,
|
||||
Nested: true,
|
||||
}
|
||||
}
|
||||
|
||||
// NewDocumentDisabledMapping returns a new document
|
||||
// mapping that will not perform any indexing.
|
||||
func NewDocumentDisabledMapping() *DocumentMapping {
|
||||
@@ -312,6 +336,11 @@ func (dm *DocumentMapping) UnmarshalJSON(data []byte) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case "nested":
|
||||
err := util.UnmarshalJSON(v, &dm.Nested)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case "default_analyzer":
|
||||
err := util.UnmarshalJSON(v, &dm.DefaultAnalyzer)
|
||||
if err != nil {
|
||||
@@ -381,6 +410,18 @@ func (dm *DocumentMapping) defaultSynonymSource(path []string) string {
|
||||
return rv
|
||||
}
|
||||
|
||||
// baseType returns the base type of v by dereferencing pointers
|
||||
func baseType(v interface{}) reflect.Type {
|
||||
if v == nil {
|
||||
return nil
|
||||
}
|
||||
t := reflect.TypeOf(v)
|
||||
for t.Kind() == reflect.Pointer {
|
||||
t = t.Elem()
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) {
|
||||
// allow default "json" tag to be overridden
|
||||
structTagKey := dm.StructTagKey
|
||||
@@ -434,11 +475,39 @@ func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes
|
||||
}
|
||||
}
|
||||
case reflect.Slice, reflect.Array:
|
||||
subDocMapping, _ := dm.documentMappingForPathElements(path)
|
||||
allowNested := subDocMapping != nil && subDocMapping.Nested
|
||||
for i := 0; i < val.Len(); i++ {
|
||||
if val.Index(i).CanInterface() {
|
||||
fieldVal := val.Index(i).Interface()
|
||||
dm.processProperty(fieldVal, path, append(indexes, uint64(i)), context)
|
||||
// for each array element, check if it can be represented as an interface
|
||||
idxVal := val.Index(i)
|
||||
// skip invalid values
|
||||
if !idxVal.CanInterface() {
|
||||
continue
|
||||
}
|
||||
// get the actual value in interface form
|
||||
actual := idxVal.Interface()
|
||||
// if nested mapping, only create nested document for object elements
|
||||
if allowNested && actual != nil {
|
||||
// check the kind of the actual value, is it an object (struct or map)?
|
||||
typ := baseType(actual)
|
||||
if typ == nil {
|
||||
continue
|
||||
}
|
||||
kind := typ.Kind()
|
||||
// only create nested docs for real JSON objects
|
||||
if kind == reflect.Struct || kind == reflect.Map {
|
||||
// Create nested document only for only object elements
|
||||
nestedDocument := document.NewDocument(
|
||||
fmt.Sprintf("%s_$%s_$%d", context.doc.ID(), encodePath(path), i))
|
||||
nestedContext := context.im.newWalkContext(nestedDocument, dm)
|
||||
dm.processProperty(actual, path, append(indexes, uint64(i)), nestedContext)
|
||||
context.doc.AddNestedDocument(nestedDocument)
|
||||
continue
|
||||
}
|
||||
}
|
||||
// non-nested mapping, or non-object element in nested mapping
|
||||
// process the element normally
|
||||
dm.processProperty(actual, path, append(indexes, uint64(i)), context)
|
||||
}
|
||||
case reflect.Ptr:
|
||||
ptrElem := val.Elem()
|
||||
|
||||
11
vendor/github.com/blevesearch/bleve/v2/mapping/field.go
generated
vendored
11
vendor/github.com/blevesearch/bleve/v2/mapping/field.go
generated
vendored
@@ -83,6 +83,9 @@ type FieldMapping struct {
|
||||
VectorIndexOptimizedFor string `json:"vector_index_optimized_for,omitempty"`
|
||||
|
||||
SynonymSource string `json:"synonym_source,omitempty"`
|
||||
|
||||
// Applicable to vector fields only - enables GPU acceleration for indexing and searching
|
||||
GPU bool `json:"gpu,omitempty"`
|
||||
}
|
||||
|
||||
// NewTextFieldMapping returns a default field mapping for text
|
||||
@@ -226,6 +229,9 @@ func (fm *FieldMapping) Options() index.FieldIndexingOptions {
|
||||
if fm.SkipFreqNorm {
|
||||
rv |= index.SkipFreqNorm
|
||||
}
|
||||
if fm.GPU {
|
||||
rv |= index.GPU
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
@@ -479,6 +485,11 @@ func (fm *FieldMapping) UnmarshalJSON(data []byte) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case "gpu":
|
||||
err := util.UnmarshalJSON(v, &fm.GPU)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
default:
|
||||
invalidKeys = append(invalidKeys, k)
|
||||
}
|
||||
|
||||
79
vendor/github.com/blevesearch/bleve/v2/mapping/index.go
generated
vendored
79
vendor/github.com/blevesearch/bleve/v2/mapping/index.go
generated
vendored
@@ -17,12 +17,14 @@ package mapping
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/analysis/analyzer/standard"
|
||||
"github.com/blevesearch/bleve/v2/analysis/datetime/optional"
|
||||
"github.com/blevesearch/bleve/v2/document"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/util"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
@@ -195,11 +197,19 @@ func (im *IndexMappingImpl) Validate() error {
|
||||
// the map will hold the fully qualified field name to FieldMapping, so we can
|
||||
// check for conflicts as we validate each DocumentMapping.
|
||||
fieldAliasCtx := make(map[string]*FieldMapping)
|
||||
// ensure that the nested property is not set for top-level default mapping
|
||||
if im.DefaultMapping.Nested {
|
||||
return fmt.Errorf("default mapping cannot be nested")
|
||||
}
|
||||
err = im.DefaultMapping.Validate(im.cache, []string{}, fieldAliasCtx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, docMapping := range im.TypeMapping {
|
||||
for name, docMapping := range im.TypeMapping {
|
||||
// ensure that the nested property is not set for top-level mappings
|
||||
if docMapping.Nested {
|
||||
return fmt.Errorf("type mapping named: %s cannot be nested", name)
|
||||
}
|
||||
err = docMapping.Validate(im.cache, []string{}, fieldAliasCtx)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -574,3 +584,70 @@ func (im *IndexMappingImpl) SynonymSourceVisitor(visitor analysis.SynonymSourceV
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (im *IndexMappingImpl) buildNestedPrefixes() map[string]int {
|
||||
prefixDepth := make(map[string]int)
|
||||
var collectNestedFields func(dm *DocumentMapping, pathComponents []string, currentDepth int)
|
||||
collectNestedFields = func(dm *DocumentMapping, pathComponents []string, currentDepth int) {
|
||||
for name, docMapping := range dm.Properties {
|
||||
newPathComponents := append(pathComponents, name)
|
||||
if docMapping.Nested {
|
||||
// This is a nested field boundary
|
||||
newDepth := currentDepth + 1
|
||||
prefixDepth[strings.Join(newPathComponents, pathSeparator)] = newDepth
|
||||
// Continue deeper with incremented depth
|
||||
collectNestedFields(docMapping, newPathComponents, newDepth)
|
||||
} else {
|
||||
// Not nested, continue with same depth
|
||||
collectNestedFields(docMapping, newPathComponents, currentDepth)
|
||||
}
|
||||
}
|
||||
}
|
||||
// Start from depth 0 (root)
|
||||
if im.DefaultMapping != nil && im.DefaultMapping.Enabled {
|
||||
collectNestedFields(im.DefaultMapping, []string{}, 0)
|
||||
}
|
||||
// Now do this for each type mapping
|
||||
for _, docMapping := range im.TypeMapping {
|
||||
if docMapping.Enabled {
|
||||
collectNestedFields(docMapping, []string{}, 0)
|
||||
}
|
||||
}
|
||||
return prefixDepth
|
||||
}
|
||||
|
||||
func (im *IndexMappingImpl) NestedDepth(fs search.FieldSet) (int, int) {
|
||||
if im.cache == nil || im.cache.NestedPrefixes == nil {
|
||||
return 0, 0
|
||||
}
|
||||
|
||||
im.cache.NestedPrefixes.InitOnce(func() map[string]int {
|
||||
return im.buildNestedPrefixes()
|
||||
})
|
||||
|
||||
return im.cache.NestedPrefixes.NestedDepth(fs)
|
||||
}
|
||||
|
||||
func (im *IndexMappingImpl) CountNested() int {
|
||||
if im.cache == nil || im.cache.NestedPrefixes == nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
im.cache.NestedPrefixes.InitOnce(func() map[string]int {
|
||||
return im.buildNestedPrefixes()
|
||||
})
|
||||
|
||||
return im.cache.NestedPrefixes.CountNested()
|
||||
}
|
||||
|
||||
func (im *IndexMappingImpl) IntersectsPrefix(fs search.FieldSet) bool {
|
||||
if im.cache == nil || im.cache.NestedPrefixes == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
im.cache.NestedPrefixes.InitOnce(func() map[string]int {
|
||||
return im.buildNestedPrefixes()
|
||||
})
|
||||
|
||||
return im.cache.NestedPrefixes.IntersectsPrefix(fs)
|
||||
}
|
||||
|
||||
19
vendor/github.com/blevesearch/bleve/v2/mapping/mapping.go
generated
vendored
19
vendor/github.com/blevesearch/bleve/v2/mapping/mapping.go
generated
vendored
@@ -20,6 +20,7 @@ import (
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/document"
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
)
|
||||
|
||||
// A Classifier is an interface describing any object which knows how to
|
||||
@@ -74,3 +75,21 @@ type SynonymMapping interface {
|
||||
|
||||
SynonymSourceVisitor(visitor analysis.SynonymSourceVisitor) error
|
||||
}
|
||||
|
||||
// A NestedMapping extends the IndexMapping interface to provide
|
||||
// additional methods for working with nested object mappings.
|
||||
type NestedMapping interface {
|
||||
// NestedDepth returns two values:
|
||||
// - common: the highest nested level that is common to all given field paths,
|
||||
// if 0 then there is no common nested level among the given field paths
|
||||
// - max: the highest nested level that applies to at least one of the given field paths
|
||||
// if 0 then none of the given field paths are nested
|
||||
NestedDepth(fieldPaths search.FieldSet) (int, int)
|
||||
|
||||
// IntersectsPrefix returns true if any of the given
|
||||
// field paths intersect with a known nested prefix
|
||||
IntersectsPrefix(fieldPaths search.FieldSet) bool
|
||||
|
||||
// CountNested returns the number of nested object mappings
|
||||
CountNested() int
|
||||
}
|
||||
|
||||
23
vendor/github.com/blevesearch/bleve/v2/mapping/mapping_vectors.go
generated
vendored
23
vendor/github.com/blevesearch/bleve/v2/mapping/mapping_vectors.go
generated
vendored
@@ -151,6 +151,12 @@ func (fm *FieldMapping) processVector(propertyMightBeVector interface{},
|
||||
if vectorIndexOptimizedFor == "" {
|
||||
vectorIndexOptimizedFor = index.DefaultIndexOptimization
|
||||
}
|
||||
// bivf indexes only supports hamming distance for the primary
|
||||
// binary index. Similarity here is used for the backing flat index,
|
||||
// which is set to cosine similarity for recall reasons
|
||||
if index.OptimizationRequiresBinaryIndex(vectorIndexOptimizedFor) {
|
||||
similarity = index.CosineSimilarity
|
||||
}
|
||||
// normalize raw vector if similarity is cosine
|
||||
// Since the vector can be multi-vector (flattened array of multiple vectors),
|
||||
// we use NormalizeMultiVector to normalize each sub-vector independently.
|
||||
@@ -185,6 +191,12 @@ func (fm *FieldMapping) processVectorBase64(propertyMightBeVectorBase64 interfac
|
||||
if vectorIndexOptimizedFor == "" {
|
||||
vectorIndexOptimizedFor = index.DefaultIndexOptimization
|
||||
}
|
||||
// bivf indexes only supports hamming distance for the primary
|
||||
// binary index. Similarity here is used for the backing flat index,
|
||||
// which is set to cosine similarity for recall reasons
|
||||
if index.OptimizationRequiresBinaryIndex(vectorIndexOptimizedFor) {
|
||||
similarity = index.CosineSimilarity
|
||||
}
|
||||
decodedVector, err := document.DecodeVector(encodedString)
|
||||
if err != nil || len(decodedVector) != fm.Dims {
|
||||
return
|
||||
@@ -197,6 +209,7 @@ func (fm *FieldMapping) processVectorBase64(propertyMightBeVectorBase64 interfac
|
||||
|
||||
fieldName := getFieldName(pathString, path, fm)
|
||||
options := fm.Options()
|
||||
|
||||
field := document.NewVectorFieldWithIndexingOptions(fieldName, indexes, decodedVector,
|
||||
fm.Dims, similarity, vectorIndexOptimizedFor, options)
|
||||
context.doc.AddField(field)
|
||||
@@ -264,6 +277,11 @@ func validateVectorFieldAlias(field *FieldMapping, path []string,
|
||||
"(different vector index optimization values %s and %s)", effectiveFieldName,
|
||||
effectiveOptimizedFor, aliasOptimizedFor)
|
||||
}
|
||||
if field.GPU != fieldAlias.GPU {
|
||||
return fmt.Errorf("field: '%s', invalid alias "+
|
||||
"(different gpu values %v and %v)", effectiveFieldName,
|
||||
field.GPU, fieldAlias.GPU)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -288,6 +306,11 @@ func validateVectorFieldAlias(field *FieldMapping, path []string,
|
||||
effectiveOptimizedFor,
|
||||
reflect.ValueOf(index.SupportedVectorIndexOptimizations).MapKeys())
|
||||
}
|
||||
// bivf indexes requires vector dimensionality to be a multiple of 8
|
||||
if index.OptimizationRequiresBinaryIndex(effectiveOptimizedFor) && field.Dims%8 != 0 {
|
||||
return fmt.Errorf("field: '%s', incompatible vector dimensionality for BIVF: %d,"+
|
||||
" dimension should be a multiple of 8", effectiveFieldName, field.Dims)
|
||||
}
|
||||
|
||||
if fieldAliasCtx != nil { // writing to a nil map is unsafe
|
||||
fieldAliasCtx[effectiveFieldName] = field
|
||||
|
||||
8
vendor/github.com/blevesearch/bleve/v2/numeric/prefix_coded.go
generated
vendored
8
vendor/github.com/blevesearch/bleve/v2/numeric/prefix_coded.go
generated
vendored
@@ -66,6 +66,14 @@ func MustNewPrefixCodedInt64(in int64, shift uint) PrefixCoded {
|
||||
return rv
|
||||
}
|
||||
|
||||
func MustNewPrefixCodedInt64Prealloc(in int64, shift uint, prealloc []byte) PrefixCoded {
|
||||
rv, _, err := NewPrefixCodedInt64Prealloc(in, shift, prealloc)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
// Shift returns the number of bits shifted
|
||||
// returns 0 if in uninitialized state
|
||||
func (p PrefixCoded) Shift() (uint, error) {
|
||||
|
||||
136
vendor/github.com/blevesearch/bleve/v2/registry/nested.go
generated
vendored
Normal file
136
vendor/github.com/blevesearch/bleve/v2/registry/nested.go
generated
vendored
Normal file
@@ -0,0 +1,136 @@
|
||||
// Copyright (c) 2026 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package registry
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
)
|
||||
|
||||
// NestedFieldCache caches nested field prefixes and their corresponding nesting levels.
|
||||
// A nested field prefix is a field path prefix that indicates the start of a nested document.
|
||||
// The nesting level indicates how deep the nested document is in the overall document structure.
|
||||
type NestedFieldCache struct {
|
||||
// nested prefix -> nested level
|
||||
prefixDepth map[string]int
|
||||
once sync.Once
|
||||
m sync.RWMutex
|
||||
}
|
||||
|
||||
func NewNestedFieldCache() *NestedFieldCache {
|
||||
return &NestedFieldCache{}
|
||||
}
|
||||
|
||||
func (nfc *NestedFieldCache) InitOnce(buildFunc func() map[string]int) {
|
||||
nfc.once.Do(func() {
|
||||
nfc.m.Lock()
|
||||
defer nfc.m.Unlock()
|
||||
nfc.prefixDepth = buildFunc()
|
||||
})
|
||||
}
|
||||
|
||||
// NestedDepth returns two values:
|
||||
// - common: The nesting level of the longest prefix that applies to every field path
|
||||
// in the provided FieldSet. A value of 0 means no nested prefix is shared
|
||||
// across all field paths.
|
||||
// - max: The nesting level of the longest prefix that applies to at least one
|
||||
// field path in the provided FieldSet. A value of 0 means none of the
|
||||
// field paths match any nested prefix.
|
||||
func (nfc *NestedFieldCache) NestedDepth(fieldPaths search.FieldSet) (common int, max int) {
|
||||
// if no field paths, no nested depth
|
||||
if len(fieldPaths) == 0 {
|
||||
return
|
||||
}
|
||||
nfc.m.RLock()
|
||||
defer nfc.m.RUnlock()
|
||||
// if no cached prefixes, no nested depth
|
||||
if len(nfc.prefixDepth) == 0 {
|
||||
return
|
||||
}
|
||||
// for each prefix, check if its a common prefix or matches any path
|
||||
// update common and max accordingly with the highest nesting level
|
||||
// possible for each respective case
|
||||
for prefix, level := range nfc.prefixDepth {
|
||||
// only check prefixes that could increase one of the results
|
||||
if level <= common && level <= max {
|
||||
continue
|
||||
}
|
||||
// check prefix against field paths, getting whether it matches all paths (common)
|
||||
// and whether it matches at least one path (any)
|
||||
matchAll, matchAny := nfc.prefixMatch(prefix, fieldPaths)
|
||||
// if it matches all paths, update common
|
||||
if matchAll && level > common {
|
||||
common = level
|
||||
}
|
||||
// if it matches any path, update max
|
||||
if matchAny && level > max {
|
||||
max = level
|
||||
}
|
||||
}
|
||||
return common, max
|
||||
}
|
||||
|
||||
// CountNested returns the number of nested prefixes
|
||||
func (nfc *NestedFieldCache) CountNested() int {
|
||||
nfc.m.RLock()
|
||||
defer nfc.m.RUnlock()
|
||||
|
||||
return len(nfc.prefixDepth)
|
||||
}
|
||||
|
||||
// IntersectsPrefix returns true if any of the given
|
||||
// field paths have a nested prefix
|
||||
func (nfc *NestedFieldCache) IntersectsPrefix(fieldPaths search.FieldSet) bool {
|
||||
// if no field paths, no intersection
|
||||
if len(fieldPaths) == 0 {
|
||||
return false
|
||||
}
|
||||
nfc.m.RLock()
|
||||
defer nfc.m.RUnlock()
|
||||
// if no cached prefixes, no intersection
|
||||
if len(nfc.prefixDepth) == 0 {
|
||||
return false
|
||||
}
|
||||
// Check each cached nested prefix to see if it intersects with any path
|
||||
for prefix := range nfc.prefixDepth {
|
||||
_, matchAny := nfc.prefixMatch(prefix, fieldPaths)
|
||||
if matchAny {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// prefixMatch checks whether the prefix matches all paths (common) and whether it matches at least one path (any)
|
||||
// Caller must hold the read lock.
|
||||
func (nfc *NestedFieldCache) prefixMatch(prefix string, fieldPaths search.FieldSet) (common bool, any bool) {
|
||||
common = true
|
||||
any = false
|
||||
for path := range fieldPaths {
|
||||
has := strings.HasPrefix(path, prefix)
|
||||
if has {
|
||||
any = true
|
||||
} else {
|
||||
common = false
|
||||
}
|
||||
// early exit if we have determined both values
|
||||
if any && !common {
|
||||
break
|
||||
}
|
||||
}
|
||||
return common, any
|
||||
}
|
||||
2
vendor/github.com/blevesearch/bleve/v2/registry/registry.go
generated
vendored
2
vendor/github.com/blevesearch/bleve/v2/registry/registry.go
generated
vendored
@@ -49,6 +49,7 @@ type Cache struct {
|
||||
Fragmenters *FragmenterCache
|
||||
Highlighters *HighlighterCache
|
||||
SynonymSources *SynonymSourceCache
|
||||
NestedPrefixes *NestedFieldCache
|
||||
}
|
||||
|
||||
func NewCache() *Cache {
|
||||
@@ -63,6 +64,7 @@ func NewCache() *Cache {
|
||||
Fragmenters: NewFragmenterCache(),
|
||||
Highlighters: NewHighlighterCache(),
|
||||
SynonymSources: NewSynonymSourceCache(),
|
||||
NestedPrefixes: NewNestedFieldCache(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
46
vendor/github.com/blevesearch/bleve/v2/search.go
generated
vendored
46
vendor/github.com/blevesearch/bleve/v2/search.go
generated
vendored
@@ -15,9 +15,12 @@
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"regexp"
|
||||
"slices"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -625,11 +628,35 @@ func formatHit(rv *strings.Builder, hit *search.DocumentMatch, hitNumber int) *s
|
||||
}
|
||||
}
|
||||
for otherFieldName, otherFieldValue := range hit.Fields {
|
||||
if otherFieldName == NestedDocumentKey {
|
||||
continue
|
||||
}
|
||||
if _, ok := hit.Fragments[otherFieldName]; !ok {
|
||||
fmt.Fprintf(rv, "\t%s\n", otherFieldName)
|
||||
fmt.Fprintf(rv, "\t\t%v\n", otherFieldValue)
|
||||
}
|
||||
}
|
||||
// nested documents
|
||||
if nested, ok := hit.Fields[NestedDocumentKey]; ok {
|
||||
if list, ok := nested.([]*search.NestedDocumentMatch); ok {
|
||||
fmt.Fprintf(rv, "\t%s (%d nested documents)\n", NestedDocumentKey, len(list))
|
||||
for ni, nd := range list {
|
||||
fmt.Fprintf(rv, "\t\tNested #%d:\n", ni+1)
|
||||
for f, frags := range nd.Fragments {
|
||||
fmt.Fprintf(rv, "\t\t\t%s\n", f)
|
||||
for _, frag := range frags {
|
||||
fmt.Fprintf(rv, "\t\t\t\t%s\n", frag)
|
||||
}
|
||||
}
|
||||
for f, v := range nd.Fields {
|
||||
if _, ok := nd.Fragments[f]; !ok {
|
||||
fmt.Fprintf(rv, "\t\t\t%s\n", f)
|
||||
fmt.Fprintf(rv, "\t\t\t\t%v\n", v)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(hit.DecodedSort) > 0 {
|
||||
fmt.Fprintf(rv, "\t_sort: [")
|
||||
for k, v := range hit.DecodedSort {
|
||||
@@ -806,3 +833,22 @@ func ParseParams(r *SearchRequest, input []byte) (*RequestParams, error) {
|
||||
|
||||
return params, nil
|
||||
}
|
||||
|
||||
// OptionalRawMessage is a wrapper around json.RawMessage that treats empty or `null` JSON as nil.
|
||||
type OptionalRawMessage json.RawMessage
|
||||
|
||||
func (n *OptionalRawMessage) UnmarshalJSON(data []byte) error {
|
||||
if len(data) == 0 || bytes.Equal(data, []byte("null")) {
|
||||
*n = nil
|
||||
return nil
|
||||
}
|
||||
*n = slices.Clone(data)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n OptionalRawMessage) MarshalJSON() ([]byte, error) {
|
||||
if len(n) == 0 {
|
||||
return []byte("null"), nil
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
103
vendor/github.com/blevesearch/bleve/v2/search/collector/nested.go
generated
vendored
Normal file
103
vendor/github.com/blevesearch/bleve/v2/search/collector/nested.go
generated
vendored
Normal file
@@ -0,0 +1,103 @@
|
||||
// Copyright (c) 2026 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
type collectStoreNested struct {
|
||||
// descAdder is used to customize how descendants are merged into their parent
|
||||
descAdder search.DescendantAdderCallbackFn
|
||||
// nested reader to retrieve ancestor information
|
||||
nr index.NestedReader
|
||||
// the current root document match being built
|
||||
currRoot *search.DocumentMatch
|
||||
// the ancestor ID of the current root document being built
|
||||
currRootAncestorID index.AncestorID
|
||||
// prealloc slice for ancestor IDs
|
||||
ancestors []index.AncestorID
|
||||
}
|
||||
|
||||
func newStoreNested(nr index.NestedReader, descAdder search.DescendantAdderCallbackFn) *collectStoreNested {
|
||||
rv := &collectStoreNested{
|
||||
descAdder: descAdder,
|
||||
nr: nr,
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
// ProcessNestedDocument adds a document to the nested store, merging it into its root document
|
||||
// as needed. If the returned DocumentMatch is nil, the incoming doc has been merged
|
||||
// into its parent and should not be processed further. If the returned DocumentMatch
|
||||
// is non-nil, it represents a complete root document that should be processed further.
|
||||
// NOTE: This implementation assumes that documents are added in increasing order of their internal IDs
|
||||
// which is guaranteed by all searchers in bleve.
|
||||
func (c *collectStoreNested) ProcessNestedDocument(ctx *search.SearchContext, doc *search.DocumentMatch) (*search.DocumentMatch, error) {
|
||||
// find ancestors for the doc
|
||||
var err error
|
||||
c.ancestors, err = c.nr.Ancestors(doc.IndexInternalID, c.ancestors[:0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(c.ancestors) == 0 {
|
||||
// should not happen, every doc should have at least itself as ancestor
|
||||
return nil, nil
|
||||
}
|
||||
// root docID is the last ancestor
|
||||
rootID := c.ancestors[len(c.ancestors)-1]
|
||||
// check if there is an interim root already and if the incoming doc belongs to it
|
||||
if c.currRoot != nil && c.currRootAncestorID.Equals(rootID) {
|
||||
// there is an interim root already, and the incoming doc belongs to it
|
||||
if err := c.descAdder(c.currRoot, doc); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// recycle the child document now that it's merged into the interim root
|
||||
ctx.DocumentMatchPool.Put(doc)
|
||||
return nil, nil
|
||||
}
|
||||
// completedRoot is the root document match to return, if any
|
||||
var completedRoot *search.DocumentMatch
|
||||
if c.currRoot != nil {
|
||||
// we have an existing interim root, return it for processing
|
||||
completedRoot = c.currRoot
|
||||
}
|
||||
// no interim root for now so either we have a root document incoming
|
||||
// or we have a child doc and need to create an interim root
|
||||
if len(c.ancestors) == 1 {
|
||||
// incoming doc is the root itself
|
||||
c.currRoot = doc
|
||||
c.currRootAncestorID = rootID
|
||||
return completedRoot, nil
|
||||
}
|
||||
// this is a child doc, create interim root
|
||||
newDM := ctx.DocumentMatchPool.Get()
|
||||
newDM.IndexInternalID = rootID.ToIndexInternalID(newDM.IndexInternalID)
|
||||
// merge the incoming doc into the new interim root
|
||||
c.currRoot = newDM
|
||||
c.currRootAncestorID = rootID
|
||||
if err := c.descAdder(c.currRoot, doc); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// recycle the child document now that it's merged into the interim root
|
||||
ctx.DocumentMatchPool.Put(doc)
|
||||
return completedRoot, nil
|
||||
}
|
||||
|
||||
// Current returns the current interim root document match being built, if any
|
||||
func (c *collectStoreNested) Current() *search.DocumentMatch {
|
||||
return c.currRoot
|
||||
}
|
||||
144
vendor/github.com/blevesearch/bleve/v2/search/collector/topn.go
generated
vendored
144
vendor/github.com/blevesearch/bleve/v2/search/collector/topn.go
generated
vendored
@@ -78,7 +78,9 @@ type TopNCollector struct {
|
||||
searchAfter *search.DocumentMatch
|
||||
|
||||
knnHits map[string]*search.DocumentMatch
|
||||
computeNewScoreExpl search.ScoreExplCorrectionCallbackFunc
|
||||
hybridMergeCallback search.HybridMergeCallbackFn
|
||||
|
||||
nestedStore *collectStoreNested
|
||||
}
|
||||
|
||||
// CheckDoneEvery controls how frequently we check the context deadline
|
||||
@@ -88,25 +90,74 @@ const CheckDoneEvery = uint64(1024)
|
||||
// skipping over the first 'skip' hits
|
||||
// ordering hits by the provided sort order
|
||||
func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
|
||||
return newTopNCollector(size, skip, sort)
|
||||
return newTopNCollector(size, skip, sort, nil)
|
||||
}
|
||||
|
||||
// NewTopNCollectorAfter builds a collector to find the top 'size' hits
|
||||
// skipping over the first 'skip' hits
|
||||
// ordering hits by the provided sort order
|
||||
// starting after the provided 'after' sort values
|
||||
func NewTopNCollectorAfter(size int, sort search.SortOrder, after []string) *TopNCollector {
|
||||
rv := newTopNCollector(size, 0, sort)
|
||||
rv := newTopNCollector(size, 0, sort, nil)
|
||||
rv.searchAfter = createSearchAfterDocument(sort, after)
|
||||
return rv
|
||||
}
|
||||
|
||||
func newTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
|
||||
// NewNestedTopNCollector builds a collector to find the top 'size' hits
|
||||
// skipping over the first 'skip' hits
|
||||
// ordering hits by the provided sort order
|
||||
// while ensuring the nested documents are handled correctly
|
||||
// (i.e. parent document is returned instead of nested document)
|
||||
func NewNestedTopNCollector(size int, skip int, sort search.SortOrder, nr index.NestedReader) *TopNCollector {
|
||||
return newTopNCollector(size, skip, sort, nr)
|
||||
}
|
||||
|
||||
// NewNestedTopNCollectorAfter builds a collector to find the top 'size' hits
|
||||
// skipping over the first 'skip' hits
|
||||
// ordering hits by the provided sort order
|
||||
// starting after the provided 'after' sort values
|
||||
// while ensuring the nested documents are handled correctly
|
||||
// (i.e. parent document is returned instead of nested document)
|
||||
func NewNestedTopNCollectorAfter(size int, sort search.SortOrder, after []string, nr index.NestedReader) *TopNCollector {
|
||||
rv := newTopNCollector(size, 0, sort, nr)
|
||||
rv.searchAfter = createSearchAfterDocument(sort, after)
|
||||
return rv
|
||||
}
|
||||
|
||||
func newTopNCollector(size int, skip int, sort search.SortOrder, nr index.NestedReader) *TopNCollector {
|
||||
hc := &TopNCollector{size: size, skip: skip, sort: sort}
|
||||
|
||||
hc.store = getOptimalCollectorStore(size, skip, func(i, j *search.DocumentMatch) int {
|
||||
return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j)
|
||||
})
|
||||
|
||||
if nr != nil {
|
||||
descAdder := func(parent, child *search.DocumentMatch) error {
|
||||
// add descendant score to parent score
|
||||
parent.Score += child.Score
|
||||
// merge explanations
|
||||
parent.Expl = parent.Expl.MergeWith(child.Expl)
|
||||
// merge field term locations
|
||||
parent.FieldTermLocations = search.MergeFieldTermLocationsFromMatch(parent.FieldTermLocations, child)
|
||||
// add child's ID to parent's Descendants
|
||||
// add other as descendant only if it is not the same document
|
||||
if !parent.IndexInternalID.Equals(child.IndexInternalID) {
|
||||
// Add a copy of child.IndexInternalID to descendants, because
|
||||
// child.IndexInternalID will be reset when 'child' is recycled.
|
||||
var descendantID index.IndexInternalID
|
||||
// first check if parent's descendants slice has capacity to reuse
|
||||
if len(parent.Descendants) < cap(parent.Descendants) {
|
||||
// reuse the buffer element at len(parent.Descendants)
|
||||
descendantID = parent.Descendants[:len(parent.Descendants)+1][len(parent.Descendants)]
|
||||
}
|
||||
// copy the contents of id into descendantID, allocating if needed
|
||||
parent.Descendants = append(parent.Descendants, index.NewIndexInternalIDFrom(descendantID, child.IndexInternalID))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
hc.nestedStore = newStoreNested(nr, search.DescendantAdderCallbackFn(descAdder))
|
||||
}
|
||||
|
||||
// these lookups traverse an interface, so do once up-front
|
||||
if sort.RequiresDocID() {
|
||||
hc.needDocIds = true
|
||||
@@ -283,8 +334,13 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
|
||||
default:
|
||||
next, err = searcher.Next(searchContext)
|
||||
}
|
||||
// use a local totalDocs for counting total docs seen
|
||||
// for context deadline checking, as hc.total is only
|
||||
// incremented for actual(root) collected documents, and
|
||||
// we need to check deadline for every document seen (root or nested)
|
||||
var totalDocs uint64
|
||||
for err == nil && next != nil {
|
||||
if hc.total%CheckDoneEvery == 0 {
|
||||
if totalDocs%CheckDoneEvery == 0 {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
search.RecordSearchCost(ctx, search.AbortM, 0)
|
||||
@@ -292,27 +348,60 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
err = hc.adjustDocumentMatch(searchContext, reader, next)
|
||||
if err != nil {
|
||||
break
|
||||
totalDocs++
|
||||
if hc.nestedStore != nil {
|
||||
// This may be a nested document — add it to the nested store first.
|
||||
// If the nested store returns nil, the document was merged into its parent
|
||||
// and should not be processed further.
|
||||
// If it returns a non-nil document, it represents a complete root document
|
||||
// and should be processed further.
|
||||
next, err = hc.nestedStore.ProcessNestedDocument(searchContext, next)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
err = hc.prepareDocumentMatch(searchContext, reader, next, false)
|
||||
if err != nil {
|
||||
break
|
||||
if next != nil {
|
||||
err = hc.adjustDocumentMatch(searchContext, reader, next)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
err = hc.prepareDocumentMatch(searchContext, reader, next, false)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
err = dmHandler(next)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
err = dmHandler(next)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
next, err = searcher.Next(searchContext)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// if we have a nested store, we may have an interim root
|
||||
// that needs to be returned for processing
|
||||
if hc.nestedStore != nil {
|
||||
currRoot := hc.nestedStore.Current()
|
||||
if currRoot != nil {
|
||||
err = hc.adjustDocumentMatch(searchContext, reader, currRoot)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// no descendants at this point
|
||||
err = hc.prepareDocumentMatch(searchContext, reader, currRoot, false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = dmHandler(currRoot)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if hc.knnHits != nil {
|
||||
// we may have some knn hits left that did not match any of the top N tf-idf hits
|
||||
// we need to add them to the collector store to consider them as well.
|
||||
@@ -366,7 +455,10 @@ func (hc *TopNCollector) adjustDocumentMatch(ctx *search.SearchContext,
|
||||
return err
|
||||
}
|
||||
if knnHit, ok := hc.knnHits[d.ID]; ok {
|
||||
d.Score, d.Expl = hc.computeNewScoreExpl(d, knnHit)
|
||||
// we have a knn hit corresponding to this document
|
||||
hc.hybridMergeCallback(d, knnHit)
|
||||
// remove this knn hit from the map as it's already
|
||||
// been merged
|
||||
delete(hc.knnHits, d.ID)
|
||||
}
|
||||
}
|
||||
@@ -501,6 +593,14 @@ func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.Doc
|
||||
}
|
||||
}
|
||||
|
||||
// first visit descendants if any
|
||||
for _, descID := range d.Descendants {
|
||||
err := hc.dvReader.VisitDocValues(descID, v)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// now visit the doc values for this document
|
||||
err := hc.dvReader.VisitDocValues(d.IndexInternalID, v)
|
||||
if hc.facetsBuilder != nil {
|
||||
hc.facetsBuilder.EndDoc()
|
||||
@@ -579,10 +679,10 @@ func (hc *TopNCollector) FacetResults() search.FacetResults {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (hc *TopNCollector) SetKNNHits(knnHits search.DocumentMatchCollection, newScoreExplComputer search.ScoreExplCorrectionCallbackFunc) {
|
||||
func (hc *TopNCollector) SetKNNHits(knnHits search.DocumentMatchCollection, hybridMergeCallback search.HybridMergeCallbackFn) {
|
||||
hc.knnHits = make(map[string]*search.DocumentMatch, len(knnHits))
|
||||
for _, hit := range knnHits {
|
||||
hc.knnHits[hit.ID] = hit
|
||||
}
|
||||
hc.computeNewScoreExpl = newScoreExplComputer
|
||||
hc.hybridMergeCallback = hybridMergeCallback
|
||||
}
|
||||
|
||||
49
vendor/github.com/blevesearch/bleve/v2/search/explanation.go
generated
vendored
49
vendor/github.com/blevesearch/bleve/v2/search/explanation.go
generated
vendored
@@ -29,6 +29,8 @@ func init() {
|
||||
reflectStaticSizeExplanation = int(reflect.TypeOf(e).Size())
|
||||
}
|
||||
|
||||
const MergedExplMessage = "sum of merged explanations:"
|
||||
|
||||
type Explanation struct {
|
||||
Value float64 `json:"value"`
|
||||
Message string `json:"message"`
|
||||
@@ -54,3 +56,50 @@ func (expl *Explanation) Size() int {
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
// MergeExpl merges two explanations into one.
|
||||
// If either explanation is nil, the other is returned.
|
||||
// If the first explanation is already a merged explanation,
|
||||
// the second explanation is appended to its children.
|
||||
// Otherwise, a new merged explanation is created
|
||||
// with the two explanations as its children.
|
||||
func (expl *Explanation) MergeWith(other *Explanation) *Explanation {
|
||||
if expl == nil {
|
||||
return other
|
||||
}
|
||||
if other == nil || expl == other {
|
||||
return expl
|
||||
}
|
||||
|
||||
newScore := expl.Value + other.Value
|
||||
|
||||
// if both are merged explanations, combine children
|
||||
if expl.Message == MergedExplMessage && other.Message == MergedExplMessage {
|
||||
expl.Value = newScore
|
||||
expl.Children = append(expl.Children, other.Children...)
|
||||
return expl
|
||||
}
|
||||
|
||||
// atleast one is not a merged explanation see which one it is
|
||||
// if expl is merged, append other
|
||||
if expl.Message == MergedExplMessage {
|
||||
// append other as a child to first
|
||||
expl.Value = newScore
|
||||
expl.Children = append(expl.Children, other)
|
||||
return expl
|
||||
}
|
||||
|
||||
// if other is merged, append expl
|
||||
if other.Message == MergedExplMessage {
|
||||
other.Value = newScore
|
||||
other.Children = append(other.Children, expl)
|
||||
return other
|
||||
}
|
||||
// create a new explanation to hold the merged one
|
||||
rv := &Explanation{
|
||||
Value: expl.Value + other.Value,
|
||||
Message: MergedExplMessage,
|
||||
Children: []*Explanation{expl, other},
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
@@ -146,12 +146,8 @@ func (s *Highlighter) BestFragmentsInField(dm *search.DocumentMatch, doc index.D
|
||||
formattedFragments[i] += s.sep
|
||||
}
|
||||
}
|
||||
|
||||
if dm.Fragments == nil {
|
||||
dm.Fragments = make(search.FieldFragmentMap, 0)
|
||||
}
|
||||
if len(formattedFragments) > 0 {
|
||||
dm.Fragments[field] = formattedFragments
|
||||
dm.AddFragments(field, formattedFragments)
|
||||
}
|
||||
|
||||
return formattedFragments
|
||||
|
||||
2
vendor/github.com/blevesearch/bleve/v2/search/query/boolean.go
generated
vendored
2
vendor/github.com/blevesearch/bleve/v2/search/query/boolean.go
generated
vendored
@@ -204,6 +204,8 @@ func (q *BooleanQuery) Searcher(ctx context.Context, i index.IndexReader, m mapp
|
||||
// Compare document IDs
|
||||
cmp := refDoc.IndexInternalID.Compare(d.IndexInternalID)
|
||||
if cmp < 0 {
|
||||
// recycle refDoc now that we do not need it
|
||||
sctx.DocumentMatchPool.Put(refDoc)
|
||||
// filterSearcher is behind the current document, Advance() it
|
||||
refDoc, err = filterSearcher.Advance(sctx, d.IndexInternalID)
|
||||
if err != nil || refDoc == nil {
|
||||
|
||||
52
vendor/github.com/blevesearch/bleve/v2/search/query/conjunction.go
generated
vendored
52
vendor/github.com/blevesearch/bleve/v2/search/query/conjunction.go
generated
vendored
@@ -54,14 +54,39 @@ func (q *ConjunctionQuery) AddQuery(aq ...Query) {
|
||||
|
||||
func (q *ConjunctionQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
|
||||
ss := make([]search.Searcher, 0, len(q.Conjuncts))
|
||||
cleanup := func() {
|
||||
for _, searcher := range ss {
|
||||
if searcher != nil {
|
||||
_ = searcher.Close()
|
||||
}
|
||||
}
|
||||
}
|
||||
nestedMode, _ := ctx.Value(search.NestedSearchKey).(bool)
|
||||
var nm mapping.NestedMapping
|
||||
if nestedMode {
|
||||
var ok bool
|
||||
// get the nested mapping
|
||||
if nm, ok = m.(mapping.NestedMapping); !ok {
|
||||
// shouldn't be in nested mode if no nested mapping
|
||||
nestedMode = false
|
||||
}
|
||||
}
|
||||
// set of fields used in this query
|
||||
var qfs search.FieldSet
|
||||
var err error
|
||||
|
||||
for _, conjunct := range q.Conjuncts {
|
||||
// Gather fields when nested mode is enabled
|
||||
if nestedMode {
|
||||
qfs, err = ExtractFields(conjunct, m, qfs)
|
||||
if err != nil {
|
||||
cleanup()
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
sr, err := conjunct.Searcher(ctx, i, m, options)
|
||||
if err != nil {
|
||||
for _, searcher := range ss {
|
||||
if searcher != nil {
|
||||
_ = searcher.Close()
|
||||
}
|
||||
}
|
||||
cleanup()
|
||||
return nil, err
|
||||
}
|
||||
if _, ok := sr.(*searcher.MatchNoneSearcher); ok && q.queryStringMode {
|
||||
@@ -75,6 +100,23 @@ func (q *ConjunctionQuery) Searcher(ctx context.Context, i index.IndexReader, m
|
||||
return searcher.NewMatchNoneSearcher(i)
|
||||
}
|
||||
|
||||
if nestedMode {
|
||||
// first determine the nested depth info for the query fields
|
||||
commonDepth, maxDepth := nm.NestedDepth(qfs)
|
||||
// if we have common depth == max depth then we can just use
|
||||
// the normal conjunction searcher, as all fields share the same
|
||||
// nested context, otherwise we need to use the nested conjunction searcher
|
||||
// also, if we are querying the _all or _id fields, we need to use
|
||||
// the nested conjunction searcher as well, with common depth 0
|
||||
// indicating matches happen only at the root level
|
||||
if qfs.HasAll() || qfs.HasID() {
|
||||
commonDepth = 0
|
||||
}
|
||||
if commonDepth < maxDepth {
|
||||
return searcher.NewNestedConjunctionSearcher(ctx, i, ss, commonDepth, options)
|
||||
}
|
||||
}
|
||||
|
||||
return searcher.NewConjunctionSearcher(ctx, i, ss, options)
|
||||
}
|
||||
|
||||
|
||||
130
vendor/github.com/blevesearch/bleve/v2/search/query/custom_filter.go
generated
vendored
Normal file
130
vendor/github.com/blevesearch/bleve/v2/search/query/custom_filter.go
generated
vendored
Normal file
@@ -0,0 +1,130 @@
|
||||
// Copyright (c) 2026 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package query
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/mapping"
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/search/searcher"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
// CustomFilterQuery wraps a child query and filters its candidate matches via
|
||||
// an embedder-provided per-hit callback.
|
||||
type CustomFilterQuery struct {
|
||||
Query Query `json:"query"`
|
||||
Fields []string `json:"fields,omitempty"`
|
||||
|
||||
filterFunc searcher.CustomFilterFunc
|
||||
payload map[string]interface{}
|
||||
}
|
||||
|
||||
// CustomFilterQueryParser lets an embedder override parsing of
|
||||
// {"custom_filter": ...} nodes. It is intended to be assigned once during
|
||||
// process startup or init, before any queries are parsed; callers must not
|
||||
// mutate it concurrently with ParseQuery(). For example:
|
||||
//
|
||||
// func init() {
|
||||
// query.CustomFilterQueryParser = parseCustomFilterQuery
|
||||
// }
|
||||
var CustomFilterQueryParser func([]byte) (Query, error)
|
||||
|
||||
func NewCustomFilterQueryWithFilter(query Query, filter searcher.CustomFilterFunc, fields []string, payload map[string]interface{}) *CustomFilterQuery {
|
||||
return &CustomFilterQuery{
|
||||
Query: query,
|
||||
Fields: fields,
|
||||
filterFunc: filter,
|
||||
payload: payload,
|
||||
}
|
||||
}
|
||||
|
||||
func (q *CustomFilterQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
|
||||
if q == nil {
|
||||
return nil, fmt.Errorf("custom filter query is nil")
|
||||
}
|
||||
if q.Query == nil {
|
||||
return nil, fmt.Errorf("custom filter query must have a query")
|
||||
}
|
||||
if q.filterFunc == nil {
|
||||
return nil, fmt.Errorf("custom filter query must have a filter callback")
|
||||
}
|
||||
|
||||
// Build the inner searcher first; custom filtering wraps its output.
|
||||
childSearcher, err := q.Query.Searcher(ctx, i, m, options)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Create a doc value reader for the requested fields (if any) so the
|
||||
// searcher can populate d.Fields before invoking the callback.
|
||||
var dvReader index.DocValueReader
|
||||
var fieldTypes map[string]string
|
||||
if len(q.Fields) > 0 {
|
||||
var err2 error
|
||||
dvReader, err2 = i.DocValueReader(q.Fields)
|
||||
if err2 != nil {
|
||||
_ = childSearcher.Close()
|
||||
return nil, err2
|
||||
}
|
||||
fieldTypes = resolveFieldTypes(q.Fields, m)
|
||||
}
|
||||
|
||||
return searcher.NewCustomFilterSearcher(ctx, childSearcher, q.filterFunc, dvReader, i, fieldTypes), nil
|
||||
}
|
||||
|
||||
func (q *CustomFilterQuery) Validate() error {
|
||||
if q == nil {
|
||||
return fmt.Errorf("custom filter query is nil")
|
||||
}
|
||||
if q.Query == nil {
|
||||
return fmt.Errorf("custom filter query must have a query")
|
||||
}
|
||||
if q.filterFunc == nil {
|
||||
return fmt.Errorf("custom filter query must have a filter callback")
|
||||
}
|
||||
if vq, ok := q.Query.(ValidatableQuery); ok {
|
||||
return vq.Validate()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (q *CustomFilterQuery) MarshalJSON() ([]byte, error) {
|
||||
inner := make(map[string]interface{}, len(q.payload)+2)
|
||||
for k, v := range q.payload {
|
||||
inner[k] = v
|
||||
}
|
||||
inner["query"] = q.Query
|
||||
if len(q.Fields) > 0 {
|
||||
inner["fields"] = q.Fields
|
||||
}
|
||||
return json.Marshal(map[string]interface{}{
|
||||
"custom_filter": inner,
|
||||
})
|
||||
}
|
||||
|
||||
func (q *CustomFilterQuery) UnmarshalJSON(data []byte) error {
|
||||
child, fields, payload, err := unmarshalCustomQueryPayload(data, "custom_filter")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
q.Query = child
|
||||
q.Fields = fields
|
||||
q.payload = payload
|
||||
return nil
|
||||
}
|
||||
96
vendor/github.com/blevesearch/bleve/v2/search/query/custom_payload.go
generated
vendored
Normal file
96
vendor/github.com/blevesearch/bleve/v2/search/query/custom_payload.go
generated
vendored
Normal file
@@ -0,0 +1,96 @@
|
||||
// Copyright (c) 2026 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package query
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/mapping"
|
||||
"github.com/blevesearch/bleve/v2/util"
|
||||
)
|
||||
|
||||
func unmarshalCustomQueryPayload(data []byte, key string) (Query, []string, map[string]interface{}, error) {
|
||||
tmp := map[string]json.RawMessage{}
|
||||
err := util.UnmarshalJSON(data, &tmp)
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
|
||||
innerRaw, ok := tmp[key]
|
||||
if !ok || innerRaw == nil {
|
||||
return nil, nil, nil, nil
|
||||
}
|
||||
|
||||
var inner map[string]json.RawMessage
|
||||
err = util.UnmarshalJSON(innerRaw, &inner)
|
||||
if err != nil || inner == nil {
|
||||
return nil, nil, nil, fmt.Errorf("%s query must be a JSON object", key)
|
||||
}
|
||||
|
||||
var child Query
|
||||
if childQuery, ok := inner["query"]; ok && childQuery != nil {
|
||||
child, err = ParseQuery(childQuery)
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
var fields []string
|
||||
if rawFields, ok := inner["fields"]; ok && rawFields != nil {
|
||||
if err := util.UnmarshalJSON(rawFields, &fields); err != nil {
|
||||
return nil, nil, nil, fmt.Errorf("%s query has invalid %q: %w",
|
||||
key, "fields", err)
|
||||
}
|
||||
}
|
||||
|
||||
payload := make(map[string]interface{}, len(inner))
|
||||
for k, raw := range inner {
|
||||
if k == "query" || k == "fields" {
|
||||
continue
|
||||
}
|
||||
var v interface{}
|
||||
if raw != nil {
|
||||
err = util.UnmarshalJSON(raw, &v)
|
||||
if err != nil {
|
||||
return nil, nil, nil, fmt.Errorf("%s query has invalid %q payload: %w",
|
||||
key, k, err)
|
||||
}
|
||||
}
|
||||
payload[k] = v
|
||||
}
|
||||
|
||||
return child, fields, payload, nil
|
||||
}
|
||||
|
||||
// resolveFieldTypes looks up each field name in the index mapping and returns
|
||||
// a map of field name → mapping type (e.g. "datetime", "number", "text").
|
||||
// This is used by the searcher layer to correctly decode doc value bytes.
|
||||
func resolveFieldTypes(fields []string, m mapping.IndexMapping) map[string]string {
|
||||
if m == nil || len(fields) == 0 {
|
||||
return nil
|
||||
}
|
||||
types := make(map[string]string, len(fields))
|
||||
for _, f := range fields {
|
||||
fm := m.FieldMappingForPath(f)
|
||||
if fm.Type != "" {
|
||||
types[f] = fm.Type
|
||||
}
|
||||
}
|
||||
if len(types) == 0 {
|
||||
return nil
|
||||
}
|
||||
return types
|
||||
}
|
||||
130
vendor/github.com/blevesearch/bleve/v2/search/query/custom_score.go
generated
vendored
Normal file
130
vendor/github.com/blevesearch/bleve/v2/search/query/custom_score.go
generated
vendored
Normal file
@@ -0,0 +1,130 @@
|
||||
// Copyright (c) 2026 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package query
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/mapping"
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/search/searcher"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
// CustomScoreQuery wraps a child query and re-scores its candidate matches via
|
||||
// an embedder-provided per-hit callback.
|
||||
type CustomScoreQuery struct {
|
||||
Query Query `json:"query"`
|
||||
Fields []string `json:"fields,omitempty"`
|
||||
|
||||
scoreFunc searcher.CustomScoreFunc
|
||||
payload map[string]interface{}
|
||||
}
|
||||
|
||||
// CustomScoreQueryParser lets an embedder override parsing of
|
||||
// {"custom_score": ...} nodes. It is intended to be assigned once during
|
||||
// process startup or init, before any queries are parsed; callers must not
|
||||
// mutate it concurrently with ParseQuery(). For example:
|
||||
//
|
||||
// func init() {
|
||||
// query.CustomScoreQueryParser = parseCustomScoreQuery
|
||||
// }
|
||||
var CustomScoreQueryParser func([]byte) (Query, error)
|
||||
|
||||
func NewCustomScoreQueryWithScorer(query Query, score searcher.CustomScoreFunc, fields []string, payload map[string]interface{}) *CustomScoreQuery {
|
||||
return &CustomScoreQuery{
|
||||
Query: query,
|
||||
Fields: fields,
|
||||
scoreFunc: score,
|
||||
payload: payload,
|
||||
}
|
||||
}
|
||||
|
||||
func (q *CustomScoreQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
|
||||
if q == nil {
|
||||
return nil, fmt.Errorf("custom score query is nil")
|
||||
}
|
||||
if q.Query == nil {
|
||||
return nil, fmt.Errorf("custom score query must have a query")
|
||||
}
|
||||
if q.scoreFunc == nil {
|
||||
return nil, fmt.Errorf("custom score query must have a score callback")
|
||||
}
|
||||
|
||||
// Build the inner searcher first; custom scoring wraps its output.
|
||||
childSearcher, err := q.Query.Searcher(ctx, i, m, options)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Create a doc value reader for the requested fields (if any) so the
|
||||
// searcher can populate d.Fields before invoking the callback.
|
||||
var dvReader index.DocValueReader
|
||||
var fieldTypes map[string]string
|
||||
if len(q.Fields) > 0 {
|
||||
var err2 error
|
||||
dvReader, err2 = i.DocValueReader(q.Fields)
|
||||
if err2 != nil {
|
||||
_ = childSearcher.Close()
|
||||
return nil, err2
|
||||
}
|
||||
fieldTypes = resolveFieldTypes(q.Fields, m)
|
||||
}
|
||||
|
||||
return searcher.NewCustomScoreSearcher(ctx, childSearcher, q.scoreFunc, dvReader, i, fieldTypes), nil
|
||||
}
|
||||
|
||||
func (q *CustomScoreQuery) Validate() error {
|
||||
if q == nil {
|
||||
return fmt.Errorf("custom score query is nil")
|
||||
}
|
||||
if q.Query == nil {
|
||||
return fmt.Errorf("custom score query must have a query")
|
||||
}
|
||||
if q.scoreFunc == nil {
|
||||
return fmt.Errorf("custom score query must have a score callback")
|
||||
}
|
||||
if vq, ok := q.Query.(ValidatableQuery); ok {
|
||||
return vq.Validate()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (q *CustomScoreQuery) MarshalJSON() ([]byte, error) {
|
||||
inner := make(map[string]interface{}, len(q.payload)+2)
|
||||
for k, v := range q.payload {
|
||||
inner[k] = v
|
||||
}
|
||||
inner["query"] = q.Query
|
||||
if len(q.Fields) > 0 {
|
||||
inner["fields"] = q.Fields
|
||||
}
|
||||
return json.Marshal(map[string]interface{}{
|
||||
"custom_score": inner,
|
||||
})
|
||||
}
|
||||
|
||||
func (q *CustomScoreQuery) UnmarshalJSON(data []byte) error {
|
||||
child, fields, payload, err := unmarshalCustomQueryPayload(data, "custom_score")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
q.Query = child
|
||||
q.Fields = fields
|
||||
q.payload = payload
|
||||
return nil
|
||||
}
|
||||
6
vendor/github.com/blevesearch/bleve/v2/search/query/knn.go
generated
vendored
6
vendor/github.com/blevesearch/bleve/v2/search/query/knn.go
generated
vendored
@@ -84,6 +84,12 @@ func (q *KNNQuery) Searcher(ctx context.Context, i index.IndexReader,
|
||||
if q.K <= 0 || len(q.Vector) == 0 {
|
||||
return nil, fmt.Errorf("k must be greater than 0 and vector must be non-empty")
|
||||
}
|
||||
// bivf-sq8 indexes only supports hamming distance for the primary
|
||||
// binary index. Similarity here is used for the backing flat index,
|
||||
// which is set to cosine similarity for recall reasons
|
||||
if index.OptimizationRequiresBinaryIndex(fieldMapping.VectorIndexOptimizedFor) {
|
||||
similarityMetric = index.CosineSimilarity
|
||||
}
|
||||
if similarityMetric == index.CosineSimilarity {
|
||||
// normalize the vector
|
||||
q.Vector = mapping.NormalizeVector(q.Vector)
|
||||
|
||||
28
vendor/github.com/blevesearch/bleve/v2/search/query/query.go
generated
vendored
28
vendor/github.com/blevesearch/bleve/v2/search/query/query.go
generated
vendored
@@ -308,6 +308,20 @@ func ParseQuery(input []byte) (Query, error) {
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
_, hasCustomFilter := tmp["custom_filter"]
|
||||
if hasCustomFilter {
|
||||
if CustomFilterQueryParser == nil {
|
||||
return nil, fmt.Errorf("custom filter query parser is not registered")
|
||||
}
|
||||
return CustomFilterQueryParser(input)
|
||||
}
|
||||
_, hasCustomScore := tmp["custom_score"]
|
||||
if hasCustomScore {
|
||||
if CustomScoreQueryParser == nil {
|
||||
return nil, fmt.Errorf("custom score query parser is not registered")
|
||||
}
|
||||
return CustomScoreQueryParser(input)
|
||||
}
|
||||
_, hasDocIds := tmp["ids"]
|
||||
if hasDocIds {
|
||||
var rv DocIDQuery
|
||||
@@ -455,13 +469,10 @@ func DumpQuery(m mapping.IndexMapping, query Query) (string, error) {
|
||||
return string(data), err
|
||||
}
|
||||
|
||||
// FieldSet represents a set of queried fields.
|
||||
type FieldSet map[string]struct{}
|
||||
|
||||
// ExtractFields returns a set of fields referenced by the query.
|
||||
// The returned set may be nil if the query does not explicitly reference any field
|
||||
// and the DefaultSearchField is unset in the index mapping.
|
||||
func ExtractFields(q Query, m mapping.IndexMapping, fs FieldSet) (FieldSet, error) {
|
||||
func ExtractFields(q Query, m mapping.IndexMapping, fs search.FieldSet) (search.FieldSet, error) {
|
||||
if q == nil || m == nil {
|
||||
return fs, nil
|
||||
}
|
||||
@@ -474,9 +485,9 @@ func ExtractFields(q Query, m mapping.IndexMapping, fs FieldSet) (FieldSet, erro
|
||||
}
|
||||
if f != "" {
|
||||
if fs == nil {
|
||||
fs = make(FieldSet)
|
||||
fs = search.NewFieldSet()
|
||||
}
|
||||
fs[f] = struct{}{}
|
||||
fs.AddField(f)
|
||||
}
|
||||
case *QueryStringQuery:
|
||||
var expandedQuery Query
|
||||
@@ -505,6 +516,11 @@ func ExtractFields(q Query, m mapping.IndexMapping, fs FieldSet) (FieldSet, erro
|
||||
break
|
||||
}
|
||||
}
|
||||
case *DocIDQuery, *MatchAllQuery:
|
||||
if fs == nil {
|
||||
fs = search.NewFieldSet()
|
||||
}
|
||||
fs.AddField("_id")
|
||||
}
|
||||
return fs, err
|
||||
}
|
||||
|
||||
39
vendor/github.com/blevesearch/bleve/v2/search/query/query_string_lex.go
generated
vendored
39
vendor/github.com/blevesearch/bleve/v2/search/query/query_string_lex.go
generated
vendored
@@ -18,9 +18,40 @@ import (
|
||||
"bufio"
|
||||
"io"
|
||||
"strings"
|
||||
"sync"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
var queryStringLexPool = sync.Pool{
|
||||
New: func() interface{} {
|
||||
return &queryStringLex{
|
||||
in: bufio.NewReader(strings.NewReader("")),
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
func getQueryStringLex(in io.Reader) *queryStringLex {
|
||||
l := queryStringLexPool.Get().(*queryStringLex)
|
||||
l.in.Reset(in)
|
||||
l.currState = startState
|
||||
l.currConsumed = true
|
||||
l.buf = ""
|
||||
l.inEscape = false
|
||||
l.nextToken = nil
|
||||
l.nextTokenType = 0
|
||||
l.seenDot = false
|
||||
l.nextRune = 0
|
||||
l.nextRuneSize = 0
|
||||
l.atEOF = false
|
||||
return l
|
||||
}
|
||||
|
||||
func putQueryStringLex(l *queryStringLex) {
|
||||
l.in.Reset(strings.NewReader(""))
|
||||
l.nextToken = nil
|
||||
queryStringLexPool.Put(l)
|
||||
}
|
||||
|
||||
const reservedChars = "+-=&|><!(){}[]^\"~*?:\\/ "
|
||||
|
||||
func unescape(escaped string) string {
|
||||
@@ -82,14 +113,6 @@ func (l *queryStringLex) Lex(lval *yySymType) int {
|
||||
return rv
|
||||
}
|
||||
|
||||
func newQueryStringLex(in io.Reader) *queryStringLex {
|
||||
return &queryStringLex{
|
||||
in: bufio.NewReader(in),
|
||||
currState: startState,
|
||||
currConsumed: true,
|
||||
}
|
||||
}
|
||||
|
||||
type lexState func(l *queryStringLex, next rune, eof bool) (lexState, bool)
|
||||
|
||||
func startState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
|
||||
|
||||
6
vendor/github.com/blevesearch/bleve/v2/search/query/query_string_parser.go
generated
vendored
6
vendor/github.com/blevesearch/bleve/v2/search/query/query_string_parser.go
generated
vendored
@@ -37,11 +37,13 @@ func parseQuerySyntax(query string) (rq Query, err error) {
|
||||
if query == "" {
|
||||
return NewMatchNoneQuery(), nil
|
||||
}
|
||||
lex := newLexerWrapper(newQueryStringLex(strings.NewReader(query)))
|
||||
qsl := getQueryStringLex(strings.NewReader(query))
|
||||
defer putQueryStringLex(qsl)
|
||||
lex := newLexerWrapper(qsl)
|
||||
doParse(lex)
|
||||
|
||||
if len(lex.errs) > 0 {
|
||||
return nil, fmt.Errorf(strings.Join(lex.errs, "\n"))
|
||||
return nil, fmt.Errorf("%s", strings.Join(lex.errs, "\n"))
|
||||
}
|
||||
return lex.query, nil
|
||||
}
|
||||
|
||||
2
vendor/github.com/blevesearch/bleve/v2/search/scorer/scorer_knn.go
generated
vendored
2
vendor/github.com/blevesearch/bleve/v2/search/scorer/scorer_knn.go
generated
vendored
@@ -123,7 +123,7 @@ func (sqs *KNNQueryScorer) Score(ctx *search.SearchContext,
|
||||
if sqs.options.Explain {
|
||||
rv.Expl = scoreExplanation
|
||||
}
|
||||
rv.IndexInternalID = append(rv.IndexInternalID, knnMatch.ID...)
|
||||
rv.IndexInternalID = index.NewIndexInternalIDFrom(rv.IndexInternalID, knnMatch.ID)
|
||||
return rv
|
||||
}
|
||||
|
||||
|
||||
2
vendor/github.com/blevesearch/bleve/v2/search/scorer/scorer_term.go
generated
vendored
2
vendor/github.com/blevesearch/bleve/v2/search/scorer/scorer_term.go
generated
vendored
@@ -243,7 +243,7 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
|
||||
}
|
||||
}
|
||||
|
||||
rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...)
|
||||
rv.IndexInternalID = index.NewIndexInternalIDFrom(rv.IndexInternalID, termMatch.ID)
|
||||
|
||||
if len(termMatch.Vectors) > 0 {
|
||||
if cap(rv.FieldTermLocations) < len(termMatch.Vectors) {
|
||||
|
||||
49
vendor/github.com/blevesearch/bleve/v2/search/search.go
generated
vendored
49
vendor/github.com/blevesearch/bleve/v2/search/search.go
generated
vendored
@@ -165,9 +165,9 @@ type DocumentMatch struct {
|
||||
|
||||
// used to indicate the sub-scores that combined to form the
|
||||
// final score for this document match. This is only populated
|
||||
// when the search request's query is a DisjunctionQuery
|
||||
// or a ConjunctionQuery. The map key is the index of the sub-query
|
||||
// in the DisjunctionQuery or ConjunctionQuery. The map value is the
|
||||
// when the search request's query is a DisjunctionQuery.
|
||||
// The map key is the index of the sub-query
|
||||
// in the DisjunctionQuery. The map value is the
|
||||
// sub-score for that sub-query.
|
||||
ScoreBreakdown map[int]float64 `json:"score_breakdown,omitempty"`
|
||||
|
||||
@@ -178,6 +178,10 @@ type DocumentMatch struct {
|
||||
// of the index that this match came from
|
||||
// of the current alias view, used in alias of aliases scenario
|
||||
IndexNames []string `json:"index_names,omitempty"`
|
||||
|
||||
// Descendants holds the IDs of any child/descendant document that contributed
|
||||
// to this root DocumentMatch.
|
||||
Descendants []index.IndexInternalID `json:"-"`
|
||||
}
|
||||
|
||||
func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) {
|
||||
@@ -201,6 +205,21 @@ func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) {
|
||||
dm.Fields[name] = valSlice
|
||||
}
|
||||
|
||||
func (dm *DocumentMatch) AddFragments(field string, fragments []string) {
|
||||
if dm.Fragments == nil {
|
||||
dm.Fragments = make(FieldFragmentMap)
|
||||
}
|
||||
OUTER:
|
||||
for _, newFrag := range fragments {
|
||||
for _, existingFrag := range dm.Fragments[field] {
|
||||
if existingFrag == newFrag {
|
||||
continue OUTER // no duplicates allowed
|
||||
}
|
||||
}
|
||||
dm.Fragments[field] = append(dm.Fragments[field], newFrag)
|
||||
}
|
||||
}
|
||||
|
||||
// Reset allows an already allocated DocumentMatch to be reused
|
||||
func (dm *DocumentMatch) Reset() *DocumentMatch {
|
||||
// remember the []byte used for the IndexInternalID
|
||||
@@ -218,6 +237,11 @@ func (dm *DocumentMatch) Reset() *DocumentMatch {
|
||||
scoreBreakdown := dm.ScoreBreakdown
|
||||
// clear out the score breakdown map
|
||||
clear(scoreBreakdown)
|
||||
// remember the Descendants backing array
|
||||
descendants := dm.Descendants
|
||||
for i := range descendants { // recycle each IndexInternalID
|
||||
descendants[i] = descendants[i][:0]
|
||||
}
|
||||
// idiom to copy over from empty DocumentMatch (0 allocations)
|
||||
*dm = DocumentMatch{}
|
||||
// reuse the []byte already allocated (and reset len to 0)
|
||||
@@ -228,6 +252,8 @@ func (dm *DocumentMatch) Reset() *DocumentMatch {
|
||||
dm.DecodedSort = decodedSort[:0]
|
||||
// reuse the FieldTermLocations already allocated (and reset len to 0)
|
||||
dm.FieldTermLocations = ftls[:0]
|
||||
// reuse the Descendants already allocated (and reset len to 0)
|
||||
dm.Descendants = descendants[:0]
|
||||
// reuse the score breakdown map already allocated (after clearing it)
|
||||
dm.ScoreBreakdown = scoreBreakdown
|
||||
return dm
|
||||
@@ -402,3 +428,20 @@ func (sc *SearchContext) Size() int {
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
// A NestedDocumentMatch is like a DocumentMatch but used for nested documents
|
||||
// and does not have score or locations, or a score and is mainly used to
|
||||
// hold field values and fragments, to be embedded in the parent DocumentMatch
|
||||
type NestedDocumentMatch struct {
|
||||
Fields map[string]interface{} `json:"fields,omitempty"`
|
||||
Fragments FieldFragmentMap `json:"fragments,omitempty"`
|
||||
}
|
||||
|
||||
// NewNestedDocumentMatch creates a new NestedDocumentMatch instance
|
||||
// with the given fields and fragments
|
||||
func NewNestedDocumentMatch(fields map[string]interface{}, fragments FieldFragmentMap) *NestedDocumentMatch {
|
||||
return &NestedDocumentMatch{
|
||||
Fields: fields,
|
||||
Fragments: fragments,
|
||||
}
|
||||
}
|
||||
|
||||
480
vendor/github.com/blevesearch/bleve/v2/search/searcher/search_conjunction_nested.go
generated
vendored
Normal file
480
vendor/github.com/blevesearch/bleve/v2/search/searcher/search_conjunction_nested.go
generated
vendored
Normal file
@@ -0,0 +1,480 @@
|
||||
// Copyright (c) 2026 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package searcher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
"slices"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeNestedConjunctionSearcher int
|
||||
|
||||
func init() {
|
||||
var ncs NestedConjunctionSearcher
|
||||
reflectStaticSizeNestedConjunctionSearcher = int(reflect.TypeOf(ncs).Size())
|
||||
}
|
||||
|
||||
type NestedConjunctionSearcher struct {
|
||||
nestedReader index.NestedReader
|
||||
searchers []search.Searcher
|
||||
queryNorm float64
|
||||
currs []*search.DocumentMatch
|
||||
currAncestors [][]index.AncestorID
|
||||
currKeys []index.AncestorID
|
||||
initialized bool
|
||||
joinIdx int
|
||||
options search.SearcherOptions
|
||||
docQueue *CoalesceQueue
|
||||
// reusable ID buffer for Advance() calls
|
||||
advanceID index.IndexInternalID
|
||||
// reusable buffer for Advance() calls
|
||||
ancestors []index.AncestorID
|
||||
}
|
||||
|
||||
func NewNestedConjunctionSearcher(ctx context.Context, indexReader index.IndexReader,
|
||||
searchers []search.Searcher, joinIdx int, options search.SearcherOptions) (search.Searcher, error) {
|
||||
|
||||
var nr index.NestedReader
|
||||
var ok bool
|
||||
if nr, ok = indexReader.(index.NestedReader); !ok {
|
||||
return nil, fmt.Errorf("indexReader does not support nested documents")
|
||||
}
|
||||
|
||||
// build our searcher
|
||||
rv := NestedConjunctionSearcher{
|
||||
nestedReader: nr,
|
||||
options: options,
|
||||
searchers: searchers,
|
||||
currs: make([]*search.DocumentMatch, len(searchers)),
|
||||
currAncestors: make([][]index.AncestorID, len(searchers)),
|
||||
currKeys: make([]index.AncestorID, len(searchers)),
|
||||
joinIdx: joinIdx,
|
||||
docQueue: NewCoalesceQueue(),
|
||||
}
|
||||
rv.computeQueryNorm()
|
||||
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func (s *NestedConjunctionSearcher) computeQueryNorm() {
|
||||
// first calculate sum of squared weights
|
||||
sumOfSquaredWeights := 0.0
|
||||
for _, searcher := range s.searchers {
|
||||
sumOfSquaredWeights += searcher.Weight()
|
||||
}
|
||||
// now compute query norm from this
|
||||
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
|
||||
// finally tell all the downstream searchers the norm
|
||||
for _, searcher := range s.searchers {
|
||||
searcher.SetQueryNorm(s.queryNorm)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *NestedConjunctionSearcher) Size() int {
|
||||
sizeInBytes := reflectStaticSizeNestedConjunctionSearcher + size.SizeOfPtr
|
||||
|
||||
for _, entry := range s.searchers {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
for _, entry := range s.currs {
|
||||
if entry != nil {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (s *NestedConjunctionSearcher) Weight() float64 {
|
||||
var rv float64
|
||||
for _, searcher := range s.searchers {
|
||||
rv += searcher.Weight()
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (s *NestedConjunctionSearcher) SetQueryNorm(qnorm float64) {
|
||||
for _, searcher := range s.searchers {
|
||||
searcher.SetQueryNorm(qnorm)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *NestedConjunctionSearcher) Count() uint64 {
|
||||
// for now return a worst case
|
||||
var sum uint64
|
||||
for _, searcher := range s.searchers {
|
||||
sum += searcher.Count()
|
||||
}
|
||||
return sum
|
||||
}
|
||||
|
||||
func (s *NestedConjunctionSearcher) Close() (rv error) {
|
||||
for _, searcher := range s.searchers {
|
||||
err := searcher.Close()
|
||||
if err != nil && rv == nil {
|
||||
rv = err
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (s *NestedConjunctionSearcher) Min() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (s *NestedConjunctionSearcher) DocumentMatchPoolSize() int {
|
||||
rv := len(s.currs)
|
||||
for _, s := range s.searchers {
|
||||
rv += s.DocumentMatchPoolSize()
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (s *NestedConjunctionSearcher) initialize(ctx *search.SearchContext) (bool, error) {
|
||||
var err error
|
||||
for i, searcher := range s.searchers {
|
||||
if s.currs[i] != nil {
|
||||
ctx.DocumentMatchPool.Put(s.currs[i])
|
||||
}
|
||||
s.currs[i], err = searcher.Next(ctx)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if s.currs[i] == nil {
|
||||
// one of the searchers is exhausted, so we are done
|
||||
return true, nil
|
||||
}
|
||||
// get the ancestry chain for this match
|
||||
s.currAncestors[i], err = s.nestedReader.Ancestors(s.currs[i].IndexInternalID, s.currAncestors[i][:0])
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
// check if the ancestry chain is > joinIdx, if not we reset the joinIdx
|
||||
// to the minimum possible value across all searchers, ideally this will be
|
||||
// done in query construction time itself, by using the covering depth across
|
||||
// all sub-queries, but we do this here as a fallback
|
||||
if s.joinIdx >= len(s.currAncestors[i]) {
|
||||
s.joinIdx = len(s.currAncestors[i]) - 1
|
||||
}
|
||||
}
|
||||
// build currKeys for each searcher, do it here as we may have adjusted joinIdx
|
||||
for i := range s.searchers {
|
||||
s.currKeys[i] = ancestorFromRoot(s.currAncestors[i], s.joinIdx)
|
||||
}
|
||||
s.initialized = true
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func (s *NestedConjunctionSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
|
||||
// initialize on first call to Next, by getting first match
|
||||
// from each searcher and their ancestry chains
|
||||
if !s.initialized {
|
||||
done, err := s.initialize(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if done {
|
||||
return nil, nil
|
||||
}
|
||||
}
|
||||
// check if the docQueue has any buffered matches
|
||||
if s.docQueue.Len() > 0 {
|
||||
return s.docQueue.Dequeue(ctx), nil
|
||||
}
|
||||
// now enter the main alignment loop
|
||||
n := len(s.searchers)
|
||||
OUTER:
|
||||
for {
|
||||
// pick the pivot searcher with the highest key (ancestor at joinIdx level)
|
||||
if s.currs[0] == nil {
|
||||
return nil, nil
|
||||
}
|
||||
maxKey := s.currKeys[0]
|
||||
for i := 1; i < n; i++ {
|
||||
// currs[i] is nil means one of the searchers is exhausted
|
||||
if s.currs[i] == nil {
|
||||
return nil, nil
|
||||
}
|
||||
currKey := s.currKeys[i]
|
||||
if maxKey.Compare(currKey) < 0 {
|
||||
maxKey = currKey
|
||||
}
|
||||
}
|
||||
// store maxkey as advanceID only once only if needed
|
||||
var advanceID index.IndexInternalID
|
||||
// flag to track if all searchers are aligned
|
||||
var aligned bool = true
|
||||
// now try to align all other searchers to the
|
||||
// we check if the a searchers key matches maxKey
|
||||
// if not, we advance the pivot searcher to maxKey
|
||||
// else do nothing and move to the next searcher
|
||||
for i := 0; i < n; i++ {
|
||||
cmp := s.currKeys[i].Compare(maxKey)
|
||||
if cmp < 0 {
|
||||
// not aligned, so advance this searcher to maxKey
|
||||
// convert maxKey to advanceID only once
|
||||
if advanceID == nil {
|
||||
advanceID = s.toAdvanceID(maxKey)
|
||||
}
|
||||
var err error
|
||||
ctx.DocumentMatchPool.Put(s.currs[i])
|
||||
s.currs[i], err = s.searchers[i].Advance(ctx, advanceID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if s.currs[i] == nil {
|
||||
// one of the searchers is exhausted, so we are done
|
||||
return nil, nil
|
||||
}
|
||||
// recalc ancestors
|
||||
s.currAncestors[i], err = s.nestedReader.Ancestors(s.currs[i].IndexInternalID, s.currAncestors[i][:0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// recalc key
|
||||
s.currKeys[i] = ancestorFromRoot(s.currAncestors[i], s.joinIdx)
|
||||
// recalc cmp
|
||||
cmp = s.currKeys[i].Compare(maxKey)
|
||||
}
|
||||
if cmp != 0 {
|
||||
// not aligned
|
||||
aligned = false
|
||||
}
|
||||
}
|
||||
// now check if all the searchers are aligned at the same maxKey
|
||||
// if they are not aligned, we need to restart the loop of picking
|
||||
// the pivot searcher with the highest key
|
||||
if !aligned {
|
||||
continue OUTER
|
||||
}
|
||||
// if we are here, all the searchers are aligned at maxKey
|
||||
// now we need to buffer all the intermediate matches for every
|
||||
// searcher at this key, until either the searcher's key changes
|
||||
// or the searcher is exhausted
|
||||
var err error
|
||||
for i := 0; i < n; i++ {
|
||||
for {
|
||||
// buffer the current match
|
||||
s.docQueue.Enqueue(s.currs[i])
|
||||
// advance to next match
|
||||
s.currs[i], err = s.searchers[i].Next(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if s.currs[i] == nil {
|
||||
// searcher exhausted, break out
|
||||
break
|
||||
}
|
||||
// recalc ancestors
|
||||
s.currAncestors[i], err = s.nestedReader.Ancestors(s.currs[i].IndexInternalID, s.currAncestors[i][:0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// recalc key
|
||||
s.currKeys[i] = ancestorFromRoot(s.currAncestors[i], s.joinIdx)
|
||||
// check if key has changed
|
||||
if !s.currKeys[i].Equals(maxKey) {
|
||||
// key changed, break out
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
// finalize the docQueue for dequeueing
|
||||
s.docQueue.Finalize()
|
||||
// finally return the first buffered match
|
||||
return s.docQueue.Dequeue(ctx), nil
|
||||
}
|
||||
}
|
||||
|
||||
// ancestorFromRoot gets the AncestorID at the given position from the root
|
||||
// if pos is 0, it returns the root AncestorID, and so on
|
||||
func ancestorFromRoot(ancestors []index.AncestorID, pos int) index.AncestorID {
|
||||
return ancestors[len(ancestors)-pos-1]
|
||||
}
|
||||
|
||||
// toAdvanceID converts an AncestorID to IndexInternalID, reusing the advanceID buffer.
|
||||
// The returned ID is safe to pass to Advance() since Advance() never retains references.
|
||||
func (s *NestedConjunctionSearcher) toAdvanceID(key index.AncestorID) index.IndexInternalID {
|
||||
// Reset length to 0 while preserving capacity for buffer reuse
|
||||
s.advanceID = s.advanceID[:0]
|
||||
// Convert key to IndexInternalID, reusing the underlying buffer
|
||||
s.advanceID = key.ToIndexInternalID(s.advanceID)
|
||||
return s.advanceID
|
||||
}
|
||||
|
||||
func (s *NestedConjunctionSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
|
||||
if !s.initialized {
|
||||
done, err := s.initialize(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if done {
|
||||
return nil, nil
|
||||
}
|
||||
}
|
||||
// first check if the docQueue has any buffered matches
|
||||
// if so we first check if any of them can satisfy the Advance(ID)
|
||||
for s.docQueue.Len() > 0 {
|
||||
dm := s.docQueue.Dequeue(ctx)
|
||||
if dm.IndexInternalID.Compare(ID) >= 0 {
|
||||
return dm, nil
|
||||
}
|
||||
// otherwise recycle this match
|
||||
ctx.DocumentMatchPool.Put(dm)
|
||||
}
|
||||
var err error
|
||||
// now we first get the ancestry chain for the given ID
|
||||
s.ancestors, err = s.nestedReader.Ancestors(ID, s.ancestors[:0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// we now follow the the following logic for each searcher:
|
||||
// let S be the length of the ancestry chain for the searcher
|
||||
// let I be the length of the ancestry chain for the given ID
|
||||
// 1. if S > I:
|
||||
// then we just Advance() the searcher to the given ID if required
|
||||
// 2. else if S <= I:
|
||||
// then we get the AncestorID at position (S - 1) from the root of
|
||||
// the given ID's ancestry chain, and Advance() the searcher to
|
||||
// it if required
|
||||
for i, searcher := range s.searchers {
|
||||
if s.currs[i] == nil {
|
||||
return nil, nil // already exhausted, nothing to do
|
||||
}
|
||||
var targetID index.IndexInternalID
|
||||
S := len(s.currAncestors[i])
|
||||
I := len(s.ancestors)
|
||||
if S > I {
|
||||
// case 1: S > I
|
||||
targetID = ID
|
||||
} else {
|
||||
// case 2: S <= I
|
||||
targetID = s.toAdvanceID(ancestorFromRoot(s.ancestors, S-1))
|
||||
}
|
||||
if s.currs[i].IndexInternalID.Compare(targetID) < 0 {
|
||||
// need to advance this searcher
|
||||
ctx.DocumentMatchPool.Put(s.currs[i])
|
||||
s.currs[i], err = searcher.Advance(ctx, targetID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if s.currs[i] == nil {
|
||||
// one of the searchers is exhausted, so we are done
|
||||
return nil, nil
|
||||
}
|
||||
// recalc ancestors
|
||||
s.currAncestors[i], err = s.nestedReader.Ancestors(s.currs[i].IndexInternalID, s.currAncestors[i][:0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// recalc key
|
||||
s.currKeys[i] = ancestorFromRoot(s.currAncestors[i], s.joinIdx)
|
||||
}
|
||||
}
|
||||
// we need to call Next() in a loop until we reach or exceed the given ID
|
||||
// the Next() call basically gives us a match that is aligned correctly, but
|
||||
// if joinIdx < I, we can have multiple matches for the same joinIdx ancestor
|
||||
// and they may be < ID, so we need to loop
|
||||
for {
|
||||
next, err := s.Next(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next == nil {
|
||||
return nil, nil
|
||||
}
|
||||
if next.IndexInternalID.Compare(ID) >= 0 {
|
||||
return next, nil
|
||||
}
|
||||
ctx.DocumentMatchPool.Put(next)
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------------
|
||||
type CoalesceQueue struct {
|
||||
order []*search.DocumentMatch // queue of DocumentMatch
|
||||
}
|
||||
|
||||
func NewCoalesceQueue() *CoalesceQueue {
|
||||
cq := &CoalesceQueue{
|
||||
order: make([]*search.DocumentMatch, 0),
|
||||
}
|
||||
return cq
|
||||
}
|
||||
|
||||
// Enqueue appends the given DocumentMatch to the queue. Coalescing of duplicates
|
||||
// is deferred until Dequeue, after Finalize has sorted items by IndexInternalID.
|
||||
func (cq *CoalesceQueue) Enqueue(it *search.DocumentMatch) {
|
||||
// append to order slice (this is a stack)
|
||||
cq.order = append(cq.order, it)
|
||||
}
|
||||
|
||||
// Finalize prepares the queue for dequeue operations by sorting the items based on
|
||||
// their IndexInternalID values. This MUST be called before any Dequeue operations,
|
||||
// and after all Enqueue operations are complete. The sort is done in descending order
|
||||
// so that dequeueing will basically be popping from the end of the slice, allowing for
|
||||
// slice reuse.
|
||||
func (cq *CoalesceQueue) Finalize() {
|
||||
slices.SortFunc(cq.order, func(a, b *search.DocumentMatch) int {
|
||||
return b.IndexInternalID.Compare(a.IndexInternalID)
|
||||
})
|
||||
}
|
||||
|
||||
// Dequeue removes and returns the next DocumentMatch in sorted order, merging any
|
||||
// consecutive duplicates. Merged items are recycled via ctx.DocumentMatchPool.
|
||||
// Returns nil when the queue is empty.
|
||||
func (cq *CoalesceQueue) Dequeue(ctx *search.SearchContext) *search.DocumentMatch {
|
||||
if cq.Len() == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// pop from end of slice
|
||||
rv := cq.order[len(cq.order)-1]
|
||||
cq.order = cq.order[:len(cq.order)-1]
|
||||
|
||||
// merge duplicates
|
||||
for cq.Len() > 0 {
|
||||
// peek at next item
|
||||
next := cq.order[len(cq.order)-1]
|
||||
if !rv.IndexInternalID.Equals(next.IndexInternalID) {
|
||||
// different ID, stop merging
|
||||
break
|
||||
}
|
||||
// pop the next item
|
||||
cq.order = cq.order[:len(cq.order)-1]
|
||||
// same ID, merge
|
||||
rv.Score += next.Score
|
||||
rv.Expl = rv.Expl.MergeWith(next.Expl)
|
||||
rv.FieldTermLocations = search.MergeFieldTermLocationsFromMatch(
|
||||
rv.FieldTermLocations, next)
|
||||
// recycle the merged item
|
||||
ctx.DocumentMatchPool.Put(next)
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
// Len returns the number of DocumentMatch items currently in the queue.
|
||||
func (cq *CoalesceQueue) Len() int {
|
||||
return len(cq.order)
|
||||
}
|
||||
111
vendor/github.com/blevesearch/bleve/v2/search/searcher/search_custom_fields.go
generated
vendored
Normal file
111
vendor/github.com/blevesearch/bleve/v2/search/searcher/search_custom_fields.go
generated
vendored
Normal file
@@ -0,0 +1,111 @@
|
||||
// Copyright (c) 2026 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package searcher
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/numeric"
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
// loadDocValuesOnHit uses the supplied DocValueReader to visit doc values
|
||||
// for the given hit and populate hit.Fields. It also resolves hit.ID if empty.
|
||||
// It is a no-op when dvReader is nil.
|
||||
//
|
||||
// fieldTypes maps field name → mapping type (e.g. "datetime", "number").
|
||||
// When provided, datetime fields are decoded from their stored nanosecond
|
||||
// int64 into an RFC3339Nano string, while numeric fields use IEEE 754 bit
|
||||
// reinterpretation to recover the original float64. When nil, all prefix-coded
|
||||
// values use the numeric (float64) path.
|
||||
func loadDocValuesOnHit(hit *search.DocumentMatch, dvReader index.DocValueReader,
|
||||
r index.IndexReader) error {
|
||||
return loadDocValuesOnHitWithTypes(hit, dvReader, r, nil)
|
||||
}
|
||||
|
||||
func loadDocValuesOnHitWithTypes(hit *search.DocumentMatch, dvReader index.DocValueReader,
|
||||
r index.IndexReader, fieldTypes map[string]string) error {
|
||||
// Always resolve external ID so the callback can read hit.ID.
|
||||
if hit.ID == "" && r != nil {
|
||||
extID, err := r.ExternalID(hit.IndexInternalID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hit.ID = extID
|
||||
}
|
||||
|
||||
if dvReader == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
err := dvReader.VisitDocValues(hit.IndexInternalID, func(field string, term []byte) {
|
||||
value := decodeDocValueTerm(term, fieldTypes[field])
|
||||
if value != nil {
|
||||
hit.AddFieldValue(field, value)
|
||||
}
|
||||
})
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// decodeDocValueTerm converts raw doc value bytes into a typed Go value.
|
||||
// Numeric fields are prefix-coded int64s (only shift-0 terms carry values).
|
||||
// Boolean fields are stored as "T" or "F".
|
||||
// Everything else (text/keyword) is returned as a string.
|
||||
//
|
||||
// fieldType is the mapping type string for the field (e.g. "datetime",
|
||||
// "number"). When fieldType is "datetime", the prefix-coded int64 is
|
||||
// treated as raw nanoseconds (time.UnixNano()) and converted to a UTC
|
||||
// RFC3339Nano-formatted string. For numeric fields the int64 is decoded via
|
||||
// Int64ToFloat64 (IEEE 754 bit reinterpretation).
|
||||
func decodeDocValueTerm(term []byte, fieldType string) interface{} {
|
||||
if len(term) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if it's a prefix-coded numeric term.
|
||||
if valid, shift := numeric.ValidPrefixCodedTermBytes(term); valid {
|
||||
// Only shift-0 terms carry the actual value.
|
||||
if shift != 0 {
|
||||
return nil
|
||||
}
|
||||
i64, err := numeric.PrefixCoded(term).Int64()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
if fieldType == "datetime" {
|
||||
// Datetime doc values store time.UnixNano() directly as int64.
|
||||
// Convert back to a formatted string so callbacks (including
|
||||
// JS UDFs) receive a human-readable date like "2022-03-10T00:00:00Z".
|
||||
return time.Unix(0, i64).UTC().Format(time.RFC3339Nano)
|
||||
}
|
||||
// Numeric float64 fields use Float64ToInt64 bit manipulation encoding.
|
||||
return numeric.Int64ToFloat64(i64)
|
||||
}
|
||||
|
||||
// Boolean fields are stored as "T" or "F".
|
||||
if len(term) == 1 {
|
||||
if term[0] == 'T' {
|
||||
return true
|
||||
}
|
||||
if term[0] == 'F' {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Default: text/keyword — return as string.
|
||||
return string(term)
|
||||
}
|
||||
121
vendor/github.com/blevesearch/bleve/v2/search/searcher/search_custom_filter.go
generated
vendored
Normal file
121
vendor/github.com/blevesearch/bleve/v2/search/searcher/search_custom_filter.go
generated
vendored
Normal file
@@ -0,0 +1,121 @@
|
||||
// Copyright (c) 2026 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package searcher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeCustomFilterSearcher int
|
||||
|
||||
func init() {
|
||||
var cfs CustomFilterSearcher
|
||||
reflectStaticSizeCustomFilterSearcher = int(reflect.TypeOf(cfs).Size())
|
||||
}
|
||||
|
||||
// CustomFilterFunc decides whether a hit (with doc-value fields populated)
|
||||
// should be kept. Unlike FilterFunc it does not receive a SearchContext since
|
||||
// custom-query callbacks only need the DocumentMatch.
|
||||
type CustomFilterFunc func(d *search.DocumentMatch) bool
|
||||
|
||||
// CustomFilterSearcher wraps a child searcher, optionally loads doc values
|
||||
// into each DocumentMatch, then applies a CustomFilterFunc to decide whether
|
||||
// to keep the hit. Unlike FilteringSearcher this variant is purpose-built for
|
||||
// custom queries that need field values at callback time.
|
||||
type CustomFilterSearcher struct {
|
||||
child search.Searcher
|
||||
accept CustomFilterFunc
|
||||
dvReader index.DocValueReader
|
||||
indexReader index.IndexReader
|
||||
fieldTypes map[string]string
|
||||
}
|
||||
|
||||
func NewCustomFilterSearcher(ctx context.Context, child search.Searcher,
|
||||
filter CustomFilterFunc, dvReader index.DocValueReader,
|
||||
indexReader index.IndexReader,
|
||||
fieldTypes map[string]string) *CustomFilterSearcher {
|
||||
return &CustomFilterSearcher{
|
||||
child: child,
|
||||
accept: filter,
|
||||
dvReader: dvReader,
|
||||
indexReader: indexReader,
|
||||
fieldTypes: fieldTypes,
|
||||
}
|
||||
}
|
||||
|
||||
func (f *CustomFilterSearcher) Size() int {
|
||||
return reflectStaticSizeCustomFilterSearcher + size.SizeOfPtr +
|
||||
f.child.Size()
|
||||
}
|
||||
|
||||
func (f *CustomFilterSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
|
||||
next, err := f.child.Next(ctx)
|
||||
for next != nil && err == nil {
|
||||
if err = loadDocValuesOnHitWithTypes(next, f.dvReader, f.indexReader, f.fieldTypes); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if f.accept(next) {
|
||||
return next, nil
|
||||
}
|
||||
next, err = f.child.Next(ctx)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
func (f *CustomFilterSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
|
||||
adv, err := f.child.Advance(ctx, ID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if adv == nil {
|
||||
return nil, nil
|
||||
}
|
||||
if err = loadDocValuesOnHitWithTypes(adv, f.dvReader, f.indexReader, f.fieldTypes); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if f.accept(adv) {
|
||||
return adv, nil
|
||||
}
|
||||
return f.Next(ctx)
|
||||
}
|
||||
|
||||
func (f *CustomFilterSearcher) Close() error {
|
||||
return f.child.Close()
|
||||
}
|
||||
|
||||
func (f *CustomFilterSearcher) Weight() float64 {
|
||||
return f.child.Weight()
|
||||
}
|
||||
|
||||
func (f *CustomFilterSearcher) SetQueryNorm(n float64) {
|
||||
f.child.SetQueryNorm(n)
|
||||
}
|
||||
|
||||
func (f *CustomFilterSearcher) Count() uint64 {
|
||||
return f.child.Count()
|
||||
}
|
||||
|
||||
func (f *CustomFilterSearcher) Min() int {
|
||||
return f.child.Min()
|
||||
}
|
||||
|
||||
func (f *CustomFilterSearcher) DocumentMatchPoolSize() int {
|
||||
return f.child.DocumentMatchPoolSize()
|
||||
}
|
||||
114
vendor/github.com/blevesearch/bleve/v2/search/searcher/search_custom_score.go
generated
vendored
Normal file
114
vendor/github.com/blevesearch/bleve/v2/search/searcher/search_custom_score.go
generated
vendored
Normal file
@@ -0,0 +1,114 @@
|
||||
// Copyright (c) 2026 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package searcher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeCustomScoreSearcher int
|
||||
|
||||
func init() {
|
||||
var sfs CustomScoreSearcher
|
||||
reflectStaticSizeCustomScoreSearcher = int(reflect.TypeOf(sfs).Size())
|
||||
}
|
||||
|
||||
// CustomScoreFunc defines a function which can mutate document scores.
|
||||
type CustomScoreFunc func(d *search.DocumentMatch) float64
|
||||
|
||||
// CustomScoreSearcher wraps any other searcher, optionally loads doc values
|
||||
// into each DocumentMatch, then mutates the score using the supplied
|
||||
// CustomScoreFunc.
|
||||
type CustomScoreSearcher struct {
|
||||
child search.Searcher
|
||||
mutate CustomScoreFunc
|
||||
dvReader index.DocValueReader
|
||||
indexReader index.IndexReader
|
||||
fieldTypes map[string]string
|
||||
}
|
||||
|
||||
func NewCustomScoreSearcher(ctx context.Context, s search.Searcher, mutate CustomScoreFunc,
|
||||
dvReader index.DocValueReader, indexReader index.IndexReader,
|
||||
fieldTypes map[string]string) *CustomScoreSearcher {
|
||||
return &CustomScoreSearcher{
|
||||
child: s,
|
||||
mutate: mutate,
|
||||
dvReader: dvReader,
|
||||
indexReader: indexReader,
|
||||
fieldTypes: fieldTypes,
|
||||
}
|
||||
}
|
||||
|
||||
func (f *CustomScoreSearcher) Size() int {
|
||||
return reflectStaticSizeCustomScoreSearcher + size.SizeOfPtr +
|
||||
f.child.Size()
|
||||
}
|
||||
|
||||
func (f *CustomScoreSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
|
||||
next, err := f.child.Next(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next != nil {
|
||||
if err = loadDocValuesOnHitWithTypes(next, f.dvReader, f.indexReader, f.fieldTypes); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
next.Score = f.mutate(next)
|
||||
}
|
||||
return next, nil
|
||||
}
|
||||
|
||||
func (f *CustomScoreSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
|
||||
adv, err := f.child.Advance(ctx, ID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if adv != nil {
|
||||
if err = loadDocValuesOnHitWithTypes(adv, f.dvReader, f.indexReader, f.fieldTypes); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
adv.Score = f.mutate(adv)
|
||||
}
|
||||
return adv, nil
|
||||
}
|
||||
|
||||
func (f *CustomScoreSearcher) Close() error {
|
||||
return f.child.Close()
|
||||
}
|
||||
|
||||
func (f *CustomScoreSearcher) Weight() float64 {
|
||||
return f.child.Weight()
|
||||
}
|
||||
|
||||
func (f *CustomScoreSearcher) SetQueryNorm(n float64) {
|
||||
f.child.SetQueryNorm(n)
|
||||
}
|
||||
|
||||
func (f *CustomScoreSearcher) Count() uint64 {
|
||||
return f.child.Count()
|
||||
}
|
||||
|
||||
func (f *CustomScoreSearcher) Min() int {
|
||||
return f.child.Min()
|
||||
}
|
||||
|
||||
func (f *CustomScoreSearcher) DocumentMatchPoolSize() int {
|
||||
return f.child.DocumentMatchPoolSize()
|
||||
}
|
||||
7
vendor/github.com/blevesearch/bleve/v2/search/searcher/search_disjunction_heap.go
generated
vendored
7
vendor/github.com/blevesearch/bleve/v2/search/searcher/search_disjunction_heap.go
generated
vendored
@@ -15,7 +15,6 @@
|
||||
package searcher
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"container/heap"
|
||||
"context"
|
||||
"math"
|
||||
@@ -169,7 +168,7 @@ func (s *DisjunctionHeapSearcher) updateMatches() error {
|
||||
matchingIdxs = append(matchingIdxs, next.matchingIdx)
|
||||
|
||||
// now as long as top of heap matches, keep popping
|
||||
for len(s.heap) > 0 && bytes.Compare(next.curr.IndexInternalID, s.heap[0].curr.IndexInternalID) == 0 {
|
||||
for len(s.heap) > 0 && next.curr.IndexInternalID.Equals(s.heap[0].curr.IndexInternalID) {
|
||||
next = heap.Pop(s).(*SearcherCurr)
|
||||
matching = append(matching, next.curr)
|
||||
matchingCurrs = append(matchingCurrs, next)
|
||||
@@ -264,7 +263,7 @@ func (s *DisjunctionHeapSearcher) Advance(ctx *search.SearchContext,
|
||||
|
||||
// find all searchers that actually need to be advanced
|
||||
// advance them, using s.matchingCurrs as temp storage
|
||||
for len(s.heap) > 0 && bytes.Compare(s.heap[0].curr.IndexInternalID, ID) < 0 {
|
||||
for len(s.heap) > 0 && s.heap[0].curr.IndexInternalID.Compare(ID) < 0 {
|
||||
searcherCurr := heap.Pop(s).(*SearcherCurr)
|
||||
ctx.DocumentMatchPool.Put(searcherCurr.curr)
|
||||
curr, err := searcherCurr.searcher.Advance(ctx, ID)
|
||||
@@ -347,7 +346,7 @@ func (s *DisjunctionHeapSearcher) Less(i, j int) bool {
|
||||
} else if s.heap[j].curr == nil {
|
||||
return false
|
||||
}
|
||||
return bytes.Compare(s.heap[i].curr.IndexInternalID, s.heap[j].curr.IndexInternalID) < 0
|
||||
return s.heap[i].curr.IndexInternalID.Compare(s.heap[j].curr.IndexInternalID) < 0
|
||||
}
|
||||
|
||||
func (s *DisjunctionHeapSearcher) Swap(i, j int) {
|
||||
|
||||
6
vendor/github.com/blevesearch/bleve/v2/search/searcher/search_filter.go
generated
vendored
6
vendor/github.com/blevesearch/bleve/v2/search/searcher/search_filter.go
generated
vendored
@@ -60,6 +60,9 @@ func (f *FilteringSearcher) Next(ctx *search.SearchContext) (*search.DocumentMat
|
||||
if f.accept(ctx, next) {
|
||||
return next, nil
|
||||
}
|
||||
// recycle this document match now, since
|
||||
// we do not need it anymore
|
||||
ctx.DocumentMatchPool.Put(next)
|
||||
next, err = f.child.Next(ctx)
|
||||
}
|
||||
return nil, err
|
||||
@@ -76,6 +79,9 @@ func (f *FilteringSearcher) Advance(ctx *search.SearchContext, ID index.IndexInt
|
||||
if f.accept(ctx, adv) {
|
||||
return adv, nil
|
||||
}
|
||||
// recycle this document match now, since
|
||||
// we do not need it anymore
|
||||
ctx.DocumentMatchPool.Put(adv)
|
||||
return f.Next(ctx)
|
||||
}
|
||||
|
||||
|
||||
47
vendor/github.com/blevesearch/bleve/v2/search/searcher/search_geoboundingbox.go
generated
vendored
47
vendor/github.com/blevesearch/bleve/v2/search/searcher/search_geoboundingbox.go
generated
vendored
@@ -53,7 +53,7 @@ func NewGeoBoundingBoxSearcher(ctx context.Context, indexReader index.IndexReade
|
||||
}
|
||||
|
||||
return NewFilteringSearcher(ctx, boxSearcher, buildRectFilter(ctx, dvReader,
|
||||
field, minLon, minLat, maxLon, maxLat)), nil
|
||||
minLon, minLat, maxLon, maxLat)), nil
|
||||
}
|
||||
}
|
||||
|
||||
@@ -88,7 +88,7 @@ func NewGeoBoundingBoxSearcher(ctx context.Context, indexReader index.IndexReade
|
||||
}
|
||||
// add filter to check points near the boundary
|
||||
onBoundarySearcher = NewFilteringSearcher(ctx, rawOnBoundarySearcher,
|
||||
buildRectFilter(ctx, dvReader, field, minLon, minLat, maxLon, maxLat))
|
||||
buildRectFilter(ctx, dvReader, minLon, minLat, maxLon, maxLat))
|
||||
openedSearchers = append(openedSearchers, onBoundarySearcher)
|
||||
}
|
||||
|
||||
@@ -205,28 +205,35 @@ func buildIsIndexedFunc(ctx context.Context, indexReader index.IndexReader, fiel
|
||||
return isIndexed, closeF, err
|
||||
}
|
||||
|
||||
func buildRectFilter(ctx context.Context, dvReader index.DocValueReader, field string,
|
||||
func buildRectFilter(ctx context.Context, dvReader index.DocValueReader,
|
||||
minLon, minLat, maxLon, maxLat float64,
|
||||
) FilterFunc {
|
||||
// reuse the following for each document match that is checked using the filter
|
||||
var lons, lats []float64
|
||||
var found bool
|
||||
dvVisitor := func(_ string, term []byte) {
|
||||
if found {
|
||||
// avoid redundant work if already found
|
||||
return
|
||||
}
|
||||
// only consider the values which are shifted 0
|
||||
prefixCoded := numeric.PrefixCoded(term)
|
||||
shift, err := prefixCoded.Shift()
|
||||
if err == nil && shift == 0 {
|
||||
var i64 int64
|
||||
i64, err = prefixCoded.Int64()
|
||||
if err == nil {
|
||||
lons = append(lons, geo.MortonUnhashLon(uint64(i64)))
|
||||
lats = append(lats, geo.MortonUnhashLat(uint64(i64)))
|
||||
found = true
|
||||
}
|
||||
}
|
||||
}
|
||||
return func(sctx *search.SearchContext, d *search.DocumentMatch) bool {
|
||||
// check geo matches against all numeric type terms indexed
|
||||
var lons, lats []float64
|
||||
var found bool
|
||||
err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
|
||||
// only consider the values which are shifted 0
|
||||
prefixCoded := numeric.PrefixCoded(term)
|
||||
shift, err := prefixCoded.Shift()
|
||||
if err == nil && shift == 0 {
|
||||
var i64 int64
|
||||
i64, err = prefixCoded.Int64()
|
||||
if err == nil {
|
||||
lons = append(lons, geo.MortonUnhashLon(uint64(i64)))
|
||||
lats = append(lats, geo.MortonUnhashLat(uint64(i64)))
|
||||
found = true
|
||||
}
|
||||
}
|
||||
})
|
||||
if err == nil && found {
|
||||
lons, lats = lons[:0], lats[:0]
|
||||
found = false
|
||||
if err := dvReader.VisitDocValues(d.IndexInternalID, dvVisitor); err == nil && found {
|
||||
bytes := dvReader.BytesRead()
|
||||
if bytes > 0 {
|
||||
reportIOStats(ctx, bytes)
|
||||
|
||||
44
vendor/github.com/blevesearch/bleve/v2/search/searcher/search_geopointdistance.go
generated
vendored
44
vendor/github.com/blevesearch/bleve/v2/search/searcher/search_geopointdistance.go
generated
vendored
@@ -66,7 +66,7 @@ func NewGeoPointDistanceSearcher(ctx context.Context, indexReader index.IndexRea
|
||||
|
||||
// wrap it in a filtering searcher which checks the actual distance
|
||||
return NewFilteringSearcher(ctx, rectSearcher,
|
||||
buildDistFilter(ctx, dvReader, field, centerLon, centerLat, dist)), nil
|
||||
buildDistFilter(ctx, dvReader, centerLon, centerLat, dist)), nil
|
||||
}
|
||||
|
||||
// boxSearcher builds a searcher for the described bounding box
|
||||
@@ -113,27 +113,33 @@ func boxSearcher(ctx context.Context, indexReader index.IndexReader,
|
||||
return boxSearcher, nil
|
||||
}
|
||||
|
||||
func buildDistFilter(ctx context.Context, dvReader index.DocValueReader, field string,
|
||||
func buildDistFilter(ctx context.Context, dvReader index.DocValueReader,
|
||||
centerLon, centerLat, maxDist float64) FilterFunc {
|
||||
// reuse the following for each document match that is checked using the filter
|
||||
var lons, lats []float64
|
||||
var found bool
|
||||
dvVisitor := func(_ string, term []byte) {
|
||||
if found {
|
||||
// avoid redundant work if already found
|
||||
return
|
||||
}
|
||||
// only consider the values which are shifted 0
|
||||
prefixCoded := numeric.PrefixCoded(term)
|
||||
shift, err := prefixCoded.Shift()
|
||||
if err == nil && shift == 0 {
|
||||
i64, err := prefixCoded.Int64()
|
||||
if err == nil {
|
||||
lons = append(lons, geo.MortonUnhashLon(uint64(i64)))
|
||||
lats = append(lats, geo.MortonUnhashLat(uint64(i64)))
|
||||
found = true
|
||||
}
|
||||
}
|
||||
}
|
||||
return func(sctx *search.SearchContext, d *search.DocumentMatch) bool {
|
||||
// check geo matches against all numeric type terms indexed
|
||||
var lons, lats []float64
|
||||
var found bool
|
||||
|
||||
err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
|
||||
// only consider the values which are shifted 0
|
||||
prefixCoded := numeric.PrefixCoded(term)
|
||||
shift, err := prefixCoded.Shift()
|
||||
if err == nil && shift == 0 {
|
||||
i64, err := prefixCoded.Int64()
|
||||
if err == nil {
|
||||
lons = append(lons, geo.MortonUnhashLon(uint64(i64)))
|
||||
lats = append(lats, geo.MortonUnhashLat(uint64(i64)))
|
||||
found = true
|
||||
}
|
||||
}
|
||||
})
|
||||
if err == nil && found {
|
||||
lons, lats = lons[:0], lats[:0]
|
||||
found = false
|
||||
if err := dvReader.VisitDocValues(d.IndexInternalID, dvVisitor); err == nil && found {
|
||||
bytes := dvReader.BytesRead()
|
||||
if bytes > 0 {
|
||||
reportIOStats(ctx, bytes)
|
||||
|
||||
49
vendor/github.com/blevesearch/bleve/v2/search/searcher/search_geopolygon.go
generated
vendored
49
vendor/github.com/blevesearch/bleve/v2/search/searcher/search_geopolygon.go
generated
vendored
@@ -85,28 +85,37 @@ func almostEqual(a, b float64) bool {
|
||||
// here: https://wrf.ecse.rpi.edu/nikola/pubdetails/pnpoly.html
|
||||
func buildPolygonFilter(ctx context.Context, dvReader index.DocValueReader, field string,
|
||||
coordinates []geo.Point) FilterFunc {
|
||||
// reuse the following for each document match that is checked using the filter
|
||||
var lons, lats []float64
|
||||
var found bool
|
||||
dvVisitor := func(_ string, term []byte) {
|
||||
if found {
|
||||
// avoid redundant work if already found
|
||||
return
|
||||
}
|
||||
// only consider the values which are shifted 0
|
||||
prefixCoded := numeric.PrefixCoded(term)
|
||||
shift, err := prefixCoded.Shift()
|
||||
if err == nil && shift == 0 {
|
||||
i64, err := prefixCoded.Int64()
|
||||
if err == nil {
|
||||
lons = append(lons, geo.MortonUnhashLon(uint64(i64)))
|
||||
lats = append(lats, geo.MortonUnhashLat(uint64(i64)))
|
||||
found = true
|
||||
}
|
||||
}
|
||||
}
|
||||
rayIntersectsSegment := func(point, a, b geo.Point) bool {
|
||||
return (a.Lat > point.Lat) != (b.Lat > point.Lat) &&
|
||||
point.Lon < (b.Lon-a.Lon)*(point.Lat-a.Lat)/(b.Lat-a.Lat)+a.Lon
|
||||
}
|
||||
return func(sctx *search.SearchContext, d *search.DocumentMatch) bool {
|
||||
// check geo matches against all numeric type terms indexed
|
||||
var lons, lats []float64
|
||||
var found bool
|
||||
|
||||
err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
|
||||
// only consider the values which are shifted 0
|
||||
prefixCoded := numeric.PrefixCoded(term)
|
||||
shift, err := prefixCoded.Shift()
|
||||
if err == nil && shift == 0 {
|
||||
i64, err := prefixCoded.Int64()
|
||||
if err == nil {
|
||||
lons = append(lons, geo.MortonUnhashLon(uint64(i64)))
|
||||
lats = append(lats, geo.MortonUnhashLat(uint64(i64)))
|
||||
found = true
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
lons, lats = lons[:0], lats[:0]
|
||||
found = false
|
||||
// Note: this approach works for points which are strictly inside
|
||||
// the polygon. ie it might fail for certain points on the polygon boundaries.
|
||||
if err == nil && found {
|
||||
if err := dvReader.VisitDocValues(d.IndexInternalID, dvVisitor); err == nil && found {
|
||||
bytes := dvReader.BytesRead()
|
||||
if bytes > 0 {
|
||||
reportIOStats(ctx, bytes)
|
||||
@@ -116,10 +125,6 @@ func buildPolygonFilter(ctx context.Context, dvReader index.DocValueReader, fiel
|
||||
if len(coordinates) < 3 {
|
||||
return false
|
||||
}
|
||||
rayIntersectsSegment := func(point, a, b geo.Point) bool {
|
||||
return (a.Lat > point.Lat) != (b.Lat > point.Lat) &&
|
||||
point.Lon < (b.Lon-a.Lon)*(point.Lat-a.Lat)/(b.Lat-a.Lat)+a.Lon
|
||||
}
|
||||
|
||||
for i := range lons {
|
||||
pt := geo.Point{Lon: lons[i], Lat: lats[i]}
|
||||
|
||||
96
vendor/github.com/blevesearch/bleve/v2/search/searcher/search_geoshape.go
generated
vendored
96
vendor/github.com/blevesearch/bleve/v2/search/searcher/search_geoshape.go
generated
vendored
@@ -58,18 +58,13 @@ func NewGeoShapeSearcher(ctx context.Context, indexReader index.IndexReader, sha
|
||||
return NewFilteringSearcher(ctx, mSearcher, buildRelationFilterOnShapes(ctx, dvReader, field, relation, shape)), nil
|
||||
}
|
||||
|
||||
// Using the same term splitter slice used in the doc values in zap.
|
||||
// TODO: This needs to be revisited whenever we change the zap
|
||||
// implementation of doc values.
|
||||
var termSeparatorSplitSlice = []byte{0xff}
|
||||
|
||||
func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueReader, field string,
|
||||
relation string, shape index.GeoJSON,
|
||||
) FilterFunc {
|
||||
// this is for accumulating the shape's actual complete value
|
||||
// spread across multiple docvalue visitor callbacks.
|
||||
var dvShapeValue []byte
|
||||
var startReading, finishReading bool
|
||||
var startReading, finishReading, found bool
|
||||
var reader *bytes.Reader
|
||||
|
||||
var bufPool *s2.GeoBufferPool
|
||||
@@ -77,51 +72,58 @@ func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueRea
|
||||
bufPool = bufPoolCallback()
|
||||
}
|
||||
|
||||
return func(sctx *search.SearchContext, d *search.DocumentMatch) bool {
|
||||
var found bool
|
||||
dvVisitor := func(_ string, term []byte) {
|
||||
if found {
|
||||
// avoid redundant work if already found
|
||||
return
|
||||
}
|
||||
tl := len(term)
|
||||
// only consider the values which are GlueBytes prefixed or
|
||||
// if it had already started reading the shape bytes from previous callbacks.
|
||||
if startReading || tl > geo.GlueBytesOffset {
|
||||
|
||||
err := dvReader.VisitDocValues(d.IndexInternalID,
|
||||
func(field string, term []byte) {
|
||||
// only consider the values which are GlueBytes prefixed or
|
||||
// if it had already started reading the shape bytes from previous callbacks.
|
||||
if startReading || len(term) > geo.GlueBytesOffset {
|
||||
if !startReading && bytes.Equal(geo.GlueBytes, term[:geo.GlueBytesOffset]) {
|
||||
startReading = true
|
||||
|
||||
if !startReading && bytes.Equal(geo.GlueBytes, term[:geo.GlueBytesOffset]) {
|
||||
startReading = true
|
||||
|
||||
if bytes.Equal(geo.GlueBytes, term[len(term)-geo.GlueBytesOffset:]) {
|
||||
term = term[:len(term)-geo.GlueBytesOffset]
|
||||
finishReading = true
|
||||
}
|
||||
|
||||
dvShapeValue = append(dvShapeValue, term[geo.GlueBytesOffset:]...)
|
||||
|
||||
} else if startReading && !finishReading {
|
||||
if len(term) > geo.GlueBytesOffset &&
|
||||
bytes.Equal(geo.GlueBytes, term[len(term)-geo.GlueBytesOffset:]) {
|
||||
term = term[:len(term)-geo.GlueBytesOffset]
|
||||
finishReading = true
|
||||
}
|
||||
|
||||
term = append(termSeparatorSplitSlice, term...)
|
||||
dvShapeValue = append(dvShapeValue, term...)
|
||||
}
|
||||
|
||||
// apply the filter once the entire docvalue is finished reading.
|
||||
if finishReading {
|
||||
v, err := geojson.FilterGeoShapesOnRelation(shape, dvShapeValue, relation, &reader, bufPool)
|
||||
if err == nil && v {
|
||||
found = true
|
||||
}
|
||||
|
||||
dvShapeValue = dvShapeValue[:0]
|
||||
startReading = false
|
||||
finishReading = false
|
||||
}
|
||||
if bytes.Equal(geo.GlueBytes, term[tl-geo.GlueBytesOffset:]) {
|
||||
term = term[:tl-geo.GlueBytesOffset]
|
||||
finishReading = true
|
||||
}
|
||||
})
|
||||
|
||||
if err == nil && found {
|
||||
dvShapeValue = append(dvShapeValue, term[geo.GlueBytesOffset:]...)
|
||||
|
||||
} else if startReading && !finishReading {
|
||||
if tl > geo.GlueBytesOffset &&
|
||||
bytes.Equal(geo.GlueBytes, term[tl-geo.GlueBytesOffset:]) {
|
||||
term = term[:tl-geo.GlueBytesOffset]
|
||||
finishReading = true
|
||||
}
|
||||
|
||||
dvShapeValue = append(dvShapeValue, index.DocValueTermSeparator)
|
||||
dvShapeValue = append(dvShapeValue, term...)
|
||||
}
|
||||
|
||||
// apply the filter once the entire docvalue is finished reading.
|
||||
if finishReading {
|
||||
v, err := geojson.FilterGeoShapesOnRelation(shape, dvShapeValue, relation, &reader, bufPool)
|
||||
if err == nil && v {
|
||||
found = true
|
||||
}
|
||||
|
||||
dvShapeValue = dvShapeValue[:0]
|
||||
startReading = false
|
||||
finishReading = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return func(sctx *search.SearchContext, d *search.DocumentMatch) bool {
|
||||
// reset state variables for each document
|
||||
found = false
|
||||
startReading = false
|
||||
finishReading = false
|
||||
dvShapeValue = dvShapeValue[:0]
|
||||
if err := dvReader.VisitDocValues(d.IndexInternalID, dvVisitor); err == nil && found {
|
||||
bytes := dvReader.BytesRead()
|
||||
if bytes > 0 {
|
||||
reportIOStats(ctx, bytes)
|
||||
|
||||
2
vendor/github.com/blevesearch/bleve/v2/search/searcher/search_numeric_range.go
generated
vendored
2
vendor/github.com/blevesearch/bleve/v2/search/searcher/search_numeric_range.go
generated
vendored
@@ -132,7 +132,7 @@ func filterCandidateTerms(indexReader index.IndexReader,
|
||||
for err == nil && tfd != nil {
|
||||
termBytes := []byte(tfd.Term)
|
||||
i := sort.Search(len(terms), func(i int) bool { return bytes.Compare(terms[i], termBytes) >= 0 })
|
||||
if i < len(terms) && bytes.Compare(terms[i], termBytes) == 0 {
|
||||
if i < len(terms) && bytes.Equal(terms[i], termBytes) {
|
||||
rv = append(rv, terms[i])
|
||||
}
|
||||
terms = terms[i:]
|
||||
|
||||
36
vendor/github.com/blevesearch/bleve/v2/search/sort.go
generated
vendored
36
vendor/github.com/blevesearch/bleve/v2/search/sort.go
generated
vendored
@@ -683,29 +683,29 @@ type SortGeoDistance struct {
|
||||
Field string
|
||||
Desc bool
|
||||
Unit string
|
||||
values []string
|
||||
values [][]byte
|
||||
Lon float64
|
||||
Lat float64
|
||||
unitMult float64
|
||||
tmp []byte
|
||||
}
|
||||
|
||||
// UpdateVisitor notifies this sort field that in this document
|
||||
// this field has the specified term
|
||||
func (s *SortGeoDistance) UpdateVisitor(field string, term []byte) {
|
||||
if field == s.Field {
|
||||
s.values = append(s.values, string(term))
|
||||
s.values = append(s.values, term)
|
||||
}
|
||||
}
|
||||
|
||||
// Value returns the sort value of the DocumentMatch
|
||||
// it also resets the state of this SortField for
|
||||
// it also resets the state of this SortGeoDistance for
|
||||
// processing the next document
|
||||
func (s *SortGeoDistance) Value(i *DocumentMatch) string {
|
||||
iTerms := s.filterTermsByType(s.values)
|
||||
iTerm := s.filterTermsByMode(iTerms)
|
||||
iTerm := s.findPrefixCodedNumericTerm(s.values)
|
||||
s.values = s.values[:0]
|
||||
|
||||
if iTerm == "" {
|
||||
if iTerm == nil {
|
||||
return maxDistance
|
||||
}
|
||||
|
||||
@@ -723,7 +723,8 @@ func (s *SortGeoDistance) Value(i *DocumentMatch) string {
|
||||
dist /= s.unitMult
|
||||
}
|
||||
distInt64 := numeric.Float64ToInt64(dist)
|
||||
return string(numeric.MustNewPrefixCodedInt64(distInt64, 0))
|
||||
s.tmp = numeric.MustNewPrefixCodedInt64Prealloc(distInt64, 0, s.tmp)
|
||||
return string(s.tmp)
|
||||
}
|
||||
|
||||
func (s *SortGeoDistance) DecodeValue(value string) string {
|
||||
@@ -739,25 +740,16 @@ func (s *SortGeoDistance) Descending() bool {
|
||||
return s.Desc
|
||||
}
|
||||
|
||||
func (s *SortGeoDistance) filterTermsByMode(terms []string) string {
|
||||
if len(terms) >= 1 {
|
||||
return terms[0]
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// filterTermsByType attempts to make one pass on the terms
|
||||
// return only valid prefix coded numbers with shift of 0
|
||||
func (s *SortGeoDistance) filterTermsByType(terms []string) []string {
|
||||
var termsWithShiftZero []string
|
||||
// findPrefixCodedNumericTerm looks through the provided terms
|
||||
// and returns the first valid prefix coded numeric term with shift of 0
|
||||
func (s *SortGeoDistance) findPrefixCodedNumericTerm(terms [][]byte) []byte {
|
||||
for _, term := range terms {
|
||||
valid, shift := numeric.ValidPrefixCodedTerm(term)
|
||||
valid, shift := numeric.ValidPrefixCodedTermBytes(term)
|
||||
if valid && shift == 0 {
|
||||
termsWithShiftZero = append(termsWithShiftZero, term)
|
||||
return term
|
||||
}
|
||||
}
|
||||
return termsWithShiftZero
|
||||
return nil
|
||||
}
|
||||
|
||||
// RequiresDocID says this SearchSort does not require the DocID be loaded
|
||||
|
||||
125
vendor/github.com/blevesearch/bleve/v2/search/util.go
generated
vendored
125
vendor/github.com/blevesearch/bleve/v2/search/util.go
generated
vendored
@@ -50,41 +50,54 @@ func MergeTermLocationMaps(rv, other TermLocationMap) TermLocationMap {
|
||||
func MergeFieldTermLocations(dest []FieldTermLocation, matches []*DocumentMatch) []FieldTermLocation {
|
||||
n := len(dest)
|
||||
for _, dm := range matches {
|
||||
n += len(dm.FieldTermLocations)
|
||||
if dm != nil {
|
||||
n += len(dm.FieldTermLocations)
|
||||
}
|
||||
}
|
||||
if cap(dest) < n {
|
||||
dest = append(make([]FieldTermLocation, 0, n), dest...)
|
||||
}
|
||||
|
||||
for _, dm := range matches {
|
||||
for _, ftl := range dm.FieldTermLocations {
|
||||
dest = append(dest, FieldTermLocation{
|
||||
Field: ftl.Field,
|
||||
Term: ftl.Term,
|
||||
Location: Location{
|
||||
Pos: ftl.Location.Pos,
|
||||
Start: ftl.Location.Start,
|
||||
End: ftl.Location.End,
|
||||
ArrayPositions: append(ArrayPositions(nil), ftl.Location.ArrayPositions...),
|
||||
},
|
||||
})
|
||||
if dm != nil {
|
||||
dest = mergeFieldTermLocationFromMatch(dest, dm)
|
||||
}
|
||||
}
|
||||
|
||||
return dest
|
||||
}
|
||||
|
||||
type SearchIOStatsCallbackFunc func(uint64)
|
||||
// MergeFieldTermLocationsFromMatch merges field term locations from a single DocumentMatch
|
||||
// into dest, returning the updated slice.
|
||||
func MergeFieldTermLocationsFromMatch(dest []FieldTermLocation, match *DocumentMatch) []FieldTermLocation {
|
||||
if match == nil {
|
||||
return dest
|
||||
}
|
||||
n := len(dest) + len(match.FieldTermLocations)
|
||||
if cap(dest) < n {
|
||||
dest = append(make([]FieldTermLocation, 0, n), dest...)
|
||||
}
|
||||
return mergeFieldTermLocationFromMatch(dest, match)
|
||||
}
|
||||
|
||||
// Implementation of SearchIncrementalCostCallbackFn should handle the following messages
|
||||
// - add: increment the cost of a search operation
|
||||
// (which can be specific to a query type as well)
|
||||
// - abort: query was aborted due to a cancel of search's context (for eg),
|
||||
// which can be handled differently as well
|
||||
// - done: indicates that a search was complete and the tracked cost can be
|
||||
// handled safely by the implementation.
|
||||
type SearchIncrementalCostCallbackFn func(SearchIncrementalCostCallbackMsg,
|
||||
SearchQueryType, uint64)
|
||||
// mergeFieldTermLocationFromMatch appends field term locations from a DocumentMatch into dest.
|
||||
// Assumes dest has sufficient capacity.
|
||||
func mergeFieldTermLocationFromMatch(dest []FieldTermLocation, dm *DocumentMatch) []FieldTermLocation {
|
||||
for _, ftl := range dm.FieldTermLocations {
|
||||
dest = append(dest, FieldTermLocation{
|
||||
Field: ftl.Field,
|
||||
Term: ftl.Term,
|
||||
Location: Location{
|
||||
Pos: ftl.Location.Pos,
|
||||
Start: ftl.Location.Start,
|
||||
End: ftl.Location.End,
|
||||
ArrayPositions: append(ArrayPositions(nil), ftl.Location.ArrayPositions...),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
return dest
|
||||
}
|
||||
|
||||
type (
|
||||
SearchIncrementalCostCallbackMsg uint
|
||||
@@ -156,6 +169,10 @@ const (
|
||||
// ScoreFusionKey is used to communicate whether KNN hits need to be preserved for
|
||||
// hybrid search algorithms (like RRF)
|
||||
ScoreFusionKey ContextKey = "_fusion_rescoring_key"
|
||||
|
||||
// NestedSearchKey is used to communicate whether the search is performed
|
||||
// in an index with nested documents
|
||||
NestedSearchKey ContextKey = "_nested_search_key"
|
||||
)
|
||||
|
||||
func RecordSearchCost(ctx context.Context,
|
||||
@@ -184,9 +201,7 @@ const (
|
||||
MinGeoBufPoolSize = 24
|
||||
)
|
||||
|
||||
type GeoBufferPoolCallbackFunc func() *s2.GeoBufferPool
|
||||
|
||||
// *PreSearchDataKey are used to store the data gathered during the presearch phase
|
||||
// PreSearchDataKey are used to store the data gathered during the presearch phase
|
||||
// which would be use in the actual search phase.
|
||||
const (
|
||||
KnnPreSearchDataKey = "_knn_pre_search_data_key"
|
||||
@@ -197,14 +212,39 @@ const (
|
||||
const GlobalScoring = "_global_scoring"
|
||||
|
||||
type (
|
||||
// SearcherStartCallbackFn is a callback function type used to signal the start of
|
||||
// searcher creation phase.
|
||||
SearcherStartCallbackFn func(size uint64) error
|
||||
SearcherEndCallbackFn func(size uint64) error
|
||||
// SearcherEndCallbackFn is a callback function type used to signal the end of
|
||||
// a searcher creation phase.
|
||||
SearcherEndCallbackFn func(size uint64) error
|
||||
// GetScoringModelCallbackFn is a callback function type used to get the scoring model
|
||||
// to be used for scoring documents during search.
|
||||
GetScoringModelCallbackFn func() string
|
||||
// HybridMergeCallbackFn is a callback function type used to merge a KNN document match
|
||||
// into a full text search document match, of the same docID as part of hybrid search.
|
||||
HybridMergeCallbackFn func(ftsMatch *DocumentMatch, knnMatch *DocumentMatch)
|
||||
// DescendantAdderCallback is a callback function type used to customize how a descendant
|
||||
// DocumentMatch is merged into its parent. This allows different descendant addition strategies for
|
||||
// different use cases (e.g., TopN vs KNN collection).
|
||||
DescendantAdderCallbackFn func(parent *DocumentMatch, descendant *DocumentMatch) error
|
||||
// GeoBufferPoolCallbackFunc is a callback function type used to get the geo buffer pool
|
||||
// to be used during geo searches.
|
||||
GeoBufferPoolCallbackFunc func() *s2.GeoBufferPool
|
||||
// SearchIOStatsCallbackFunc is a callback function type used to report search IO stats
|
||||
// during search.
|
||||
SearchIOStatsCallbackFunc func(uint64)
|
||||
// Implementation of SearchIncrementalCostCallbackFn should handle the following messages
|
||||
// - add: increment the cost of a search operation
|
||||
// (which can be specific to a query type as well)
|
||||
// - abort: query was aborted due to a cancel of search's context (for eg),
|
||||
// which can be handled differently as well
|
||||
// - done: indicates that a search was complete and the tracked cost can be
|
||||
// handled safely by the implementation.
|
||||
SearchIncrementalCostCallbackFn func(SearchIncrementalCostCallbackMsg,
|
||||
SearchQueryType, uint64)
|
||||
)
|
||||
|
||||
type GetScoringModelCallbackFn func() string
|
||||
|
||||
type ScoreExplCorrectionCallbackFunc func(queryMatch *DocumentMatch, knnMatch *DocumentMatch) (float64, *Explanation)
|
||||
|
||||
// field -> term -> synonyms
|
||||
type FieldTermSynonymMap map[string]map[string][]string
|
||||
|
||||
@@ -237,3 +277,28 @@ type BM25Stats struct {
|
||||
DocCount float64 `json:"doc_count"`
|
||||
FieldCardinality map[string]int `json:"field_cardinality"`
|
||||
}
|
||||
|
||||
// FieldSet represents a set of queried fields.
|
||||
type FieldSet map[string]struct{}
|
||||
|
||||
// NewFieldSet creates a new FieldSet.
|
||||
func NewFieldSet() FieldSet {
|
||||
return make(map[string]struct{})
|
||||
}
|
||||
|
||||
// Add adds a field to the set.
|
||||
func (fs FieldSet) AddField(field string) {
|
||||
fs[field] = struct{}{}
|
||||
}
|
||||
|
||||
// HasID returns true if the field set contains the "_id" field.
|
||||
func (fs FieldSet) HasID() bool {
|
||||
_, ok := fs["_id"]
|
||||
return ok
|
||||
}
|
||||
|
||||
// HasAll returns true if the field set contains the "_all" field.
|
||||
func (fs FieldSet) HasAll() bool {
|
||||
_, ok := fs["_all"]
|
||||
return ok
|
||||
}
|
||||
|
||||
90
vendor/github.com/blevesearch/bleve/v2/search_knn.go
generated
vendored
90
vendor/github.com/blevesearch/bleve/v2/search_knn.go
generated
vendored
@@ -27,6 +27,7 @@ import (
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/search/collector"
|
||||
"github.com/blevesearch/bleve/v2/search/query"
|
||||
"github.com/blevesearch/bleve/v2/util"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
@@ -42,18 +43,18 @@ type SearchRequest struct {
|
||||
Query query.Query `json:"query"`
|
||||
Size int `json:"size"`
|
||||
From int `json:"from"`
|
||||
Highlight *HighlightRequest `json:"highlight"`
|
||||
Fields []string `json:"fields"`
|
||||
Facets FacetsRequest `json:"facets"`
|
||||
Highlight *HighlightRequest `json:"highlight,omitempty"`
|
||||
Fields []string `json:"fields,omitempty"`
|
||||
Facets FacetsRequest `json:"facets,omitempty"`
|
||||
Explain bool `json:"explain"`
|
||||
Sort search.SortOrder `json:"sort"`
|
||||
IncludeLocations bool `json:"includeLocations"`
|
||||
Score string `json:"score,omitempty"`
|
||||
SearchAfter []string `json:"search_after"`
|
||||
SearchBefore []string `json:"search_before"`
|
||||
SearchAfter []string `json:"search_after,omitempty"`
|
||||
SearchBefore []string `json:"search_before,omitempty"`
|
||||
|
||||
KNN []*KNNRequest `json:"knn"`
|
||||
KNNOperator knnOperator `json:"knn_operator"`
|
||||
KNN []*KNNRequest `json:"knn,omitempty"`
|
||||
KNNOperator knnOperator `json:"knn_operator,omitempty"`
|
||||
|
||||
// PreSearchData will be a map that will be used
|
||||
// in the second phase of any 2-phase search, to provide additional
|
||||
@@ -125,35 +126,35 @@ func (r *SearchRequest) AddKNNOperator(operator knnOperator) {
|
||||
// a SearchRequest
|
||||
func (r *SearchRequest) UnmarshalJSON(input []byte) error {
|
||||
type tempKNNReq struct {
|
||||
Field string `json:"field"`
|
||||
Vector []float32 `json:"vector"`
|
||||
VectorBase64 string `json:"vector_base64"`
|
||||
K int64 `json:"k"`
|
||||
Boost *query.Boost `json:"boost,omitempty"`
|
||||
Params json.RawMessage `json:"params"`
|
||||
FilterQuery json.RawMessage `json:"filter,omitempty"`
|
||||
Field string `json:"field"`
|
||||
Vector []float32 `json:"vector"`
|
||||
VectorBase64 string `json:"vector_base64"`
|
||||
K int64 `json:"k"`
|
||||
Boost *query.Boost `json:"boost,omitempty"`
|
||||
Params OptionalRawMessage `json:"params"`
|
||||
FilterQuery OptionalRawMessage `json:"filter,omitempty"`
|
||||
}
|
||||
|
||||
var temp struct {
|
||||
Q json.RawMessage `json:"query"`
|
||||
Size *int `json:"size"`
|
||||
From int `json:"from"`
|
||||
Highlight *HighlightRequest `json:"highlight"`
|
||||
Fields []string `json:"fields"`
|
||||
Facets FacetsRequest `json:"facets"`
|
||||
Explain bool `json:"explain"`
|
||||
Sort []json.RawMessage `json:"sort"`
|
||||
IncludeLocations bool `json:"includeLocations"`
|
||||
Score string `json:"score"`
|
||||
SearchAfter []string `json:"search_after"`
|
||||
SearchBefore []string `json:"search_before"`
|
||||
KNN []*tempKNNReq `json:"knn"`
|
||||
KNNOperator knnOperator `json:"knn_operator"`
|
||||
PreSearchData json.RawMessage `json:"pre_search_data"`
|
||||
Params json.RawMessage `json:"params"`
|
||||
Q json.RawMessage `json:"query"`
|
||||
Size *int `json:"size"`
|
||||
From int `json:"from"`
|
||||
Highlight *HighlightRequest `json:"highlight"`
|
||||
Fields []string `json:"fields"`
|
||||
Facets FacetsRequest `json:"facets"`
|
||||
Explain bool `json:"explain"`
|
||||
Sort []json.RawMessage `json:"sort"`
|
||||
IncludeLocations bool `json:"includeLocations"`
|
||||
Score string `json:"score"`
|
||||
SearchAfter []string `json:"search_after"`
|
||||
SearchBefore []string `json:"search_before"`
|
||||
KNN []*tempKNNReq `json:"knn"`
|
||||
KNNOperator knnOperator `json:"knn_operator"`
|
||||
PreSearchData OptionalRawMessage `json:"pre_search_data"`
|
||||
Params OptionalRawMessage `json:"params"`
|
||||
}
|
||||
|
||||
err := json.Unmarshal(input, &temp)
|
||||
err := util.UnmarshalJSON(input, &temp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -216,11 +217,10 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
|
||||
r.KNN[i].VectorBase64 = temp.KNN[i].VectorBase64
|
||||
r.KNN[i].K = temp.KNN[i].K
|
||||
r.KNN[i].Boost = temp.KNN[i].Boost
|
||||
r.KNN[i].Params = temp.KNN[i].Params
|
||||
if len(knnReq.FilterQuery) == 0 {
|
||||
// Setting this to nil to avoid ParseQuery() setting it to a match none
|
||||
r.KNN[i].FilterQuery = nil
|
||||
} else {
|
||||
if len(temp.KNN[i].Params) > 0 {
|
||||
r.KNN[i].Params = json.RawMessage(temp.KNN[i].Params)
|
||||
}
|
||||
if len(temp.KNN[i].FilterQuery) > 0 {
|
||||
r.KNN[i].FilterQuery, err = query.ParseQuery(knnReq.FilterQuery)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -377,7 +377,7 @@ func addSortAndFieldsToKNNHits(req *SearchRequest, knnHits []*search.DocumentMat
|
||||
}
|
||||
}
|
||||
req.Sort.Value(hit)
|
||||
err, _ = LoadAndHighlightFields(hit, req, "", reader, nil)
|
||||
err, _ = LoadAndHighlightAllFields(hit, req, "", reader, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -474,17 +474,15 @@ func (i *indexImpl) runKnnCollector(ctx context.Context, req *SearchRequest, rea
|
||||
return knnHits, nil
|
||||
}
|
||||
|
||||
func setKnnHitsInCollector(knnHits []*search.DocumentMatch, req *SearchRequest, coll *collector.TopNCollector) {
|
||||
func setKnnHitsInCollector(knnHits []*search.DocumentMatch, coll *collector.TopNCollector) {
|
||||
if len(knnHits) > 0 {
|
||||
newScoreExplComputer := func(queryMatch *search.DocumentMatch, knnMatch *search.DocumentMatch) (float64, *search.Explanation) {
|
||||
totalScore := queryMatch.Score + knnMatch.Score
|
||||
if !req.Explain {
|
||||
// exit early as we don't need to compute the explanation
|
||||
return totalScore, nil
|
||||
}
|
||||
return totalScore, &search.Explanation{Value: totalScore, Message: "sum of:", Children: []*search.Explanation{queryMatch.Expl, knnMatch.Expl}}
|
||||
mergeFn := func(ftsMatch *search.DocumentMatch, knnMatch *search.DocumentMatch) {
|
||||
// Boost the FTS score using the KNN score
|
||||
ftsMatch.Score += knnMatch.Score
|
||||
// Combine the FTS explanation with the KNN explanation, if present
|
||||
ftsMatch.Expl.MergeWith(knnMatch.Expl)
|
||||
}
|
||||
coll.SetKNNHits(knnHits, search.ScoreExplCorrectionCallbackFunc(newScoreExplComputer))
|
||||
coll.SetKNNHits(knnHits, search.HybridMergeCallbackFn(mergeFn))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
43
vendor/github.com/blevesearch/bleve/v2/search_no_knn.go
generated
vendored
43
vendor/github.com/blevesearch/bleve/v2/search_no_knn.go
generated
vendored
@@ -25,6 +25,7 @@ import (
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/search/collector"
|
||||
"github.com/blevesearch/bleve/v2/search/query"
|
||||
"github.com/blevesearch/bleve/v2/util"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
@@ -55,15 +56,15 @@ type SearchRequest struct {
|
||||
Query query.Query `json:"query"`
|
||||
Size int `json:"size"`
|
||||
From int `json:"from"`
|
||||
Highlight *HighlightRequest `json:"highlight"`
|
||||
Fields []string `json:"fields"`
|
||||
Facets FacetsRequest `json:"facets"`
|
||||
Highlight *HighlightRequest `json:"highlight,omitempty"`
|
||||
Fields []string `json:"fields,omitempty"`
|
||||
Facets FacetsRequest `json:"facets,omitempty"`
|
||||
Explain bool `json:"explain"`
|
||||
Sort search.SortOrder `json:"sort"`
|
||||
IncludeLocations bool `json:"includeLocations"`
|
||||
Score string `json:"score,omitempty"`
|
||||
SearchAfter []string `json:"search_after"`
|
||||
SearchBefore []string `json:"search_before"`
|
||||
SearchAfter []string `json:"search_after,omitempty"`
|
||||
SearchBefore []string `json:"search_before,omitempty"`
|
||||
|
||||
// PreSearchData will be a map that will be used
|
||||
// in the second phase of any 2-phase search, to provide additional
|
||||
@@ -86,23 +87,23 @@ type SearchRequest struct {
|
||||
// a SearchRequest
|
||||
func (r *SearchRequest) UnmarshalJSON(input []byte) error {
|
||||
var temp struct {
|
||||
Q json.RawMessage `json:"query"`
|
||||
Size *int `json:"size"`
|
||||
From int `json:"from"`
|
||||
Highlight *HighlightRequest `json:"highlight"`
|
||||
Fields []string `json:"fields"`
|
||||
Facets FacetsRequest `json:"facets"`
|
||||
Explain bool `json:"explain"`
|
||||
Sort []json.RawMessage `json:"sort"`
|
||||
IncludeLocations bool `json:"includeLocations"`
|
||||
Score string `json:"score"`
|
||||
SearchAfter []string `json:"search_after"`
|
||||
SearchBefore []string `json:"search_before"`
|
||||
PreSearchData json.RawMessage `json:"pre_search_data"`
|
||||
Params json.RawMessage `json:"params"`
|
||||
Q json.RawMessage `json:"query"`
|
||||
Size *int `json:"size"`
|
||||
From int `json:"from"`
|
||||
Highlight *HighlightRequest `json:"highlight"`
|
||||
Fields []string `json:"fields"`
|
||||
Facets FacetsRequest `json:"facets"`
|
||||
Explain bool `json:"explain"`
|
||||
Sort []json.RawMessage `json:"sort"`
|
||||
IncludeLocations bool `json:"includeLocations"`
|
||||
Score string `json:"score"`
|
||||
SearchAfter []string `json:"search_after"`
|
||||
SearchBefore []string `json:"search_before"`
|
||||
PreSearchData OptionalRawMessage `json:"pre_search_data"`
|
||||
Params OptionalRawMessage `json:"params"`
|
||||
}
|
||||
|
||||
err := json.Unmarshal(input, &temp)
|
||||
err := util.UnmarshalJSON(input, &temp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -197,7 +198,7 @@ func (i *indexImpl) runKnnCollector(ctx context.Context, req *SearchRequest, rea
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func setKnnHitsInCollector(knnHits []*search.DocumentMatch, req *SearchRequest, coll *collector.TopNCollector) {
|
||||
func setKnnHitsInCollector(knnHits []*search.DocumentMatch, coll *collector.TopNCollector) {
|
||||
}
|
||||
|
||||
func requestHasKNN(req *SearchRequest) bool {
|
||||
|
||||
170
vendor/github.com/blevesearch/bleve/v2/util/bolt.go
generated
vendored
Normal file
170
vendor/github.com/blevesearch/bleve/v2/util/bolt.go
generated
vendored
Normal file
@@ -0,0 +1,170 @@
|
||||
// Copyright (c) 2026 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package util
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
// All of the bolt impls provide a layer of indirection to allow for processing
|
||||
// of values as they are read/written to bolt depending on the key or bucket name
|
||||
// This is used to allow better support for file callbacks
|
||||
|
||||
// wrapper around bolt.DB
|
||||
type RootBoltImpl struct {
|
||||
*bolt.DB
|
||||
}
|
||||
|
||||
// wrapper around bolt.Tx
|
||||
type BoltTxImpl struct {
|
||||
*bolt.Tx
|
||||
}
|
||||
|
||||
// wrapper around bolt.Bucket
|
||||
type BoltBucketImpl struct {
|
||||
*bolt.Bucket
|
||||
|
||||
name string // store the name of the bucket during creation
|
||||
}
|
||||
|
||||
func OpenBolt(path string, mode os.FileMode, options *bolt.Options) (*RootBoltImpl, error) {
|
||||
db, err := bolt.Open(path, mode, options)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &RootBoltImpl{DB: db}, nil
|
||||
}
|
||||
|
||||
func (r *RootBoltImpl) Begin(writable bool) (*BoltTxImpl, error) {
|
||||
tx, err := r.DB.Begin(writable)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &BoltTxImpl{Tx: tx}, nil
|
||||
}
|
||||
|
||||
func (r *RootBoltImpl) View(fn func(*BoltTxImpl) error) error {
|
||||
return r.DB.View(func(tx *bolt.Tx) error {
|
||||
return fn(&BoltTxImpl{Tx: tx})
|
||||
})
|
||||
}
|
||||
|
||||
func (r *RootBoltImpl) Update(fn func(*BoltTxImpl) error) error {
|
||||
return r.DB.Update(func(tx *bolt.Tx) error {
|
||||
return fn(&BoltTxImpl{Tx: tx})
|
||||
})
|
||||
}
|
||||
|
||||
func (tx *BoltTxImpl) CreateBucketIfNotExists(name []byte) (*BoltBucketImpl, error) {
|
||||
bucket, err := tx.Tx.CreateBucketIfNotExists(name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &BoltBucketImpl{
|
||||
name: string(name),
|
||||
Bucket: bucket,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (tx *BoltTxImpl) Bucket(name []byte) *BoltBucketImpl {
|
||||
bucket := tx.Tx.Bucket(name)
|
||||
if bucket == nil {
|
||||
return nil
|
||||
}
|
||||
return &BoltBucketImpl{
|
||||
name: string(name),
|
||||
Bucket: bucket,
|
||||
}
|
||||
}
|
||||
|
||||
func (b *BoltBucketImpl) GetBucket(name []byte) *BoltBucketImpl {
|
||||
bucket := b.Bucket.Bucket(name)
|
||||
if bucket == nil {
|
||||
return nil
|
||||
}
|
||||
return &BoltBucketImpl{
|
||||
name: string(name),
|
||||
Bucket: bucket,
|
||||
}
|
||||
}
|
||||
|
||||
func (b *BoltBucketImpl) CreateBucketIfNotExists(name []byte) (*BoltBucketImpl, error) {
|
||||
bucket, err := b.Bucket.CreateBucketIfNotExists(name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &BoltBucketImpl{
|
||||
name: string(name),
|
||||
Bucket: bucket,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Process values during ForEach if the bucket name or key is in the boltKeysProcessed map
|
||||
func (b *BoltBucketImpl) ForEach(fn func(key []byte, value []byte) error, reader FileReader) error {
|
||||
_, ok1 := boltKeysProcessed[b.name]
|
||||
return b.Bucket.ForEach(func(k, v []byte) error {
|
||||
v = append([]byte(nil), v...)
|
||||
if _, ok2 := boltKeysProcessed[string(k)]; ok1 || ok2 {
|
||||
if reader == nil {
|
||||
return fmt.Errorf("reader callback is required for bucket %s", b.name)
|
||||
}
|
||||
processedValue, err := reader.Process(v)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return fn(k, processedValue)
|
||||
}
|
||||
return fn(k, v)
|
||||
})
|
||||
}
|
||||
|
||||
// Process values during Put/Get if the bucket name or key is in the boltKeysProcessed map
|
||||
func (b *BoltBucketImpl) Put(key []byte, value []byte, writer FileWriter) error {
|
||||
_, ok1 := boltKeysProcessed[string(key)]
|
||||
_, ok2 := boltKeysProcessed[b.name]
|
||||
value = append([]byte(nil), value...)
|
||||
if ok1 || ok2 {
|
||||
if writer == nil {
|
||||
return fmt.Errorf("writer callback is required for key %s", string(key))
|
||||
}
|
||||
processedValue := writer.Process(value)
|
||||
return b.Bucket.Put(key, processedValue)
|
||||
}
|
||||
return b.Bucket.Put(key, value)
|
||||
}
|
||||
|
||||
// Process values during Put/Get if the bucket name or key is in the boltKeysProcessed map
|
||||
func (b *BoltBucketImpl) Get(key []byte, reader FileReader) ([]byte, error) {
|
||||
_, ok1 := boltKeysProcessed[string(key)]
|
||||
_, ok2 := boltKeysProcessed[b.name]
|
||||
if ok1 || ok2 {
|
||||
if reader == nil {
|
||||
return nil, fmt.Errorf("reader callback is required for key %s", string(key))
|
||||
}
|
||||
val := b.Bucket.Get(key)
|
||||
if val == nil {
|
||||
return nil, nil
|
||||
}
|
||||
processedVal, err := reader.Process(val)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return processedVal, nil
|
||||
}
|
||||
return b.Bucket.Get(key), nil
|
||||
}
|
||||
129
vendor/github.com/blevesearch/bleve/v2/util/file_callbacks.go
generated
vendored
Normal file
129
vendor/github.com/blevesearch/bleve/v2/util/file_callbacks.go
generated
vendored
Normal file
@@ -0,0 +1,129 @@
|
||||
// Copyright (c) 2026 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package util
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
// This file provides a mechanism for users of bleve to provide callbacks
|
||||
// that can process data before it is written to disk, and after it is read
|
||||
// from disk. This can be used for things like encryption, compression, etc.
|
||||
|
||||
// The user is responsible for ensuring that the writer and reader callbacks
|
||||
// are compatible with each other, and that any state needed by the callbacks
|
||||
// is managed appropriately. For example, if the writer callback uses a
|
||||
// unique key or nonce per write, the reader callback must be able to
|
||||
// determine the correct key or nonce to use for each read.
|
||||
|
||||
// The callbacks are identified by an id string, which is returned by the
|
||||
// WriterHook. The same id string is passed to the ReaderHook when creating a reader.
|
||||
// This allows the reader to determine which callback to use for a given file.
|
||||
|
||||
// Support for identifying all callbacks used by a given index and to remove
|
||||
// selected callbacks associated with ids is provided via index.WriterIdsInUse()
|
||||
// and index.DropWriterIds().
|
||||
|
||||
const DefaultFileCallbackId = ""
|
||||
|
||||
// FileWriter and FileReader interfaces are wrappers around the callback functions
|
||||
// provided by the user. They provide a convenient way to apply the callbacks to data
|
||||
// being written to or read from a file. They also store the id the callbacks,
|
||||
// which can be useful for managing state across multiple reads and writes.
|
||||
type FileWriter interface {
|
||||
Process(data []byte) []byte
|
||||
Id() string
|
||||
}
|
||||
type fileWriterImpl struct {
|
||||
id string
|
||||
processor func(data []byte) []byte
|
||||
}
|
||||
|
||||
func NewFileWriter(context []byte) (FileWriter, error) {
|
||||
rv := &fileWriterImpl{}
|
||||
|
||||
if index.WriterHook != nil {
|
||||
var err error
|
||||
rv.id, rv.processor, err = index.WriterHook(context)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (w *fileWriterImpl) Process(data []byte) []byte {
|
||||
if w.processor != nil {
|
||||
return w.processor(data)
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
func (w *fileWriterImpl) Id() string {
|
||||
return w.id
|
||||
}
|
||||
|
||||
type FileReader interface {
|
||||
Process(data []byte) ([]byte, error)
|
||||
Id() string
|
||||
}
|
||||
|
||||
type fileReaderImpl struct {
|
||||
id string
|
||||
processor func(data []byte) ([]byte, error)
|
||||
}
|
||||
|
||||
func NewFileReader(id string, context []byte) (FileReader, error) {
|
||||
rv := &fileReaderImpl{
|
||||
id: id,
|
||||
}
|
||||
|
||||
if index.ReaderHook != nil {
|
||||
var err error
|
||||
rv.processor, err = index.ReaderHook(id, context)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else if id != "" {
|
||||
return nil, fmt.Errorf("reader callback id %s provided but no ReaderHook is set", id)
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (r *fileReaderImpl) Process(data []byte) ([]byte, error) {
|
||||
if r.processor != nil {
|
||||
return r.processor(data)
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func (r *fileReaderImpl) Id() string {
|
||||
return r.id
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
// set of bolt keys and bucket names that require processing by the reader
|
||||
// and writer callbacks.
|
||||
var boltKeysProcessed = map[string]struct{}{
|
||||
string(BoltDeletedKey): {},
|
||||
string(BoltInternalKey): {},
|
||||
string(BoltStatsKey): {},
|
||||
string(BoltUpdatedFieldsKey): {},
|
||||
}
|
||||
4
vendor/github.com/blevesearch/bleve/v2/util/keys.go
generated
vendored
4
vendor/github.com/blevesearch/bleve/v2/util/keys.go
generated
vendored
@@ -17,6 +17,9 @@ package util
|
||||
var (
|
||||
// Bolt keys
|
||||
BoltSnapshotsBucket = []byte{'s'}
|
||||
BoltTrainerKey = []byte{'t'}
|
||||
BoltTrainCompleteKey = []byte{'c'}
|
||||
BoltTrainedSamplesKey = []byte{'n'}
|
||||
BoltPathKey = []byte{'p'}
|
||||
BoltDeletedKey = []byte{'d'}
|
||||
BoltInternalKey = []byte{'i'}
|
||||
@@ -27,6 +30,7 @@ var (
|
||||
BoltStatsKey = []byte("stats")
|
||||
BoltUpdatedFieldsKey = []byte("fields")
|
||||
TotBytesWrittenKey = []byte("TotBytesWritten")
|
||||
BoltMetaDataFileWriterIDKey = []byte("fileWriterID")
|
||||
|
||||
MappingInternalKey = []byte("_mapping")
|
||||
)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user