Compare commits

...

11 Commits

Author SHA1 Message Date
Viktor Scharf
53d0bb467b fix issue-2146 2026-01-15 16:57:26 +01:00
dependabot[bot]
dccc3a0f21 build(deps): bump github.com/sirupsen/logrus
Bumps [github.com/sirupsen/logrus](https://github.com/sirupsen/logrus) from 1.9.4-0.20230606125235-dd1b4c2e81af to 1.9.4.
- [Release notes](https://github.com/sirupsen/logrus/releases)
- [Changelog](https://github.com/sirupsen/logrus/blob/master/CHANGELOG.md)
- [Commits](https://github.com/sirupsen/logrus/commits/v1.9.4)

---
updated-dependencies:
- dependency-name: github.com/sirupsen/logrus
  dependency-version: 1.9.4
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-01-15 16:48:11 +01:00
dependabot[bot]
22a7eaa005 build(deps): bump github.com/go-chi/chi/v5 from 5.2.3 to 5.2.4
Bumps [github.com/go-chi/chi/v5](https://github.com/go-chi/chi) from 5.2.3 to 5.2.4.
- [Release notes](https://github.com/go-chi/chi/releases)
- [Changelog](https://github.com/go-chi/chi/blob/master/CHANGELOG.md)
- [Commits](https://github.com/go-chi/chi/compare/v5.2.3...v5.2.4)

---
updated-dependencies:
- dependency-name: github.com/go-chi/chi/v5
  dependency-version: 5.2.4
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-01-15 15:17:16 +01:00
Michael Flemming
2f7542c36e Merge pull request #2159 from opencloud-eu/stablecitest-12.3.4 2026-01-15 13:36:15 +01:00
Viktor Scharf
3eac173644 fix flaky #2145 2026-01-15 13:23:44 +01:00
dependabot[bot]
42fd54dd35 build(deps): bump go.opentelemetry.io/contrib/zpages
Bumps [go.opentelemetry.io/contrib/zpages](https://github.com/open-telemetry/opentelemetry-go-contrib) from 0.63.0 to 0.64.0.
- [Release notes](https://github.com/open-telemetry/opentelemetry-go-contrib/releases)
- [Changelog](https://github.com/open-telemetry/opentelemetry-go-contrib/blob/main/CHANGELOG.md)
- [Commits](https://github.com/open-telemetry/opentelemetry-go-contrib/compare/zpages/v0.63.0...zpages/v0.64.0)

---
updated-dependencies:
- dependency-name: go.opentelemetry.io/contrib/zpages
  dependency-version: 0.64.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-01-15 12:51:28 +01:00
dependabot[bot]
21207eba40 build(deps): bump github.com/blevesearch/bleve/v2 from 2.5.5 to 2.5.7
Bumps [github.com/blevesearch/bleve/v2](https://github.com/blevesearch/bleve) from 2.5.5 to 2.5.7.
- [Release notes](https://github.com/blevesearch/bleve/releases)
- [Commits](https://github.com/blevesearch/bleve/compare/v2.5.5...v2.5.7)

---
updated-dependencies:
- dependency-name: github.com/blevesearch/bleve/v2
  dependency-version: 2.5.7
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-01-15 12:39:32 +01:00
Michael 'Flimmy' Flemming
82c00913d3 add shared pkgs to trigger path for GenDocsPr pipeline 2026-01-15 12:35:52 +01:00
Michael 'Flimmy' Flemming
11b20f4ca5 fix cron not triggering GenDocsPr 2026-01-15 12:28:25 +01:00
Viktor Scharf
e33ff722f7 run wopi-validator tests localy (#2151) 2026-01-15 12:22:04 +01:00
Michael 'Flimmy' Flemming
a935704368 add prefix removal for stable branches in docs gen pipeline 2026-01-15 11:38:53 +01:00
59 changed files with 1842 additions and 1031 deletions

View File

@@ -24,6 +24,7 @@ OC_CI_NODEJS_ALPINE = "quay.io/opencloudeu/nodejs-alpine-ci:24"
OC_CI_PHP = "quay.io/opencloudeu/php-alpine-ci:%s"
OC_CI_WAIT_FOR = "quay.io/opencloudeu/wait-for-ci:latest"
OC_CS3_API_VALIDATOR = "opencloudeu/cs3api-validator:latest"
OC_CI_WOPI_VALIDATOR = "quay.io/opencloudeu/wopi-validator-ci:latest"
OC_LITMUS = "owncloudci/litmus:latest"
ONLYOFFICE_DOCUMENT_SERVER = "onlyoffice/documentserver:7.5.1"
PLUGINS_DOCKER_BUILDX = "woodpeckerci/plugin-docker-buildx:latest"
@@ -1137,7 +1138,7 @@ def wopiValidatorTests(ctx, storage, wopiServerType, accounts_hash_difficulty =
for testgroup in testgroups:
validatorTests.append({
"name": "wopiValidatorTests-%s" % testgroup,
"image": "owncloudci/wopi-validator",
"image": OC_CI_WOPI_VALIDATOR,
"commands": [
"export WOPI_TOKEN=$(cat accesstoken)",
"echo $WOPI_TOKEN",
@@ -1153,7 +1154,7 @@ def wopiValidatorTests(ctx, storage, wopiServerType, accounts_hash_difficulty =
for builtinOnlyGroup in builtinOnlyTestGroups:
validatorTests.append({
"name": "wopiValidatorTests-%s" % builtinOnlyGroup,
"image": "owncloudci/wopi-validator",
"image": OC_CI_WOPI_VALIDATOR,
"commands": [
"export WOPI_TOKEN=$(cat accesstoken)",
"echo $WOPI_TOKEN",
@@ -2238,7 +2239,7 @@ def genDocsPr(ctx):
},
"GIT_SSH_COMMAND": "ssh -o StrictHostKeyChecking=no -i /root/id_rsa",
"OC_GIT_BRANCH": "${CI_COMMIT_BRANCH}",
"MY_TARGET_BRANCH": "${CI_COMMIT_BRANCH}",
"MY_TARGET_BRANCH": "${CI_COMMIT_BRANCH##stable-}",
},
"commands": [
'export DOC_GIT_TARGET_FOLDER="$$(if [ \"$$MY_TARGET_BRANCH\" = \"main\" ]; then echo \"tmpdocs/docs/dev/_static/env-vars/\"; else echo \"tmpdocs/versioned_docs/version-$${MY_TARGET_BRANCH}/dev/_static/env-vars/\"; fi)"',
@@ -2256,13 +2257,18 @@ def genDocsPr(ctx):
"when": [
{
"event": "push",
"path": "services/*/pkg/config/**/*.go",
"path": {
"include": [
"services/*/pkg/config/**/*.go",
"pkg/**/*.go",
],
},
"branch": "[main, stable-*]",
},
{
"event": "cron",
"branch": "[main]",
"cron": "nightly (@daily)",
"cron": "nightly *",
},
],
}]

10
go.mod
View File

@@ -11,7 +11,7 @@ require (
github.com/Nerzal/gocloak/v13 v13.9.0
github.com/bbalet/stopwords v1.0.0
github.com/beevik/etree v1.6.0
github.com/blevesearch/bleve/v2 v2.5.5
github.com/blevesearch/bleve/v2 v2.5.7
github.com/cenkalti/backoff v2.2.1+incompatible
github.com/coreos/go-oidc/v3 v3.17.0
github.com/cs3org/go-cs3apis v0.0.0-20250908152307-4ca807afe54e
@@ -20,7 +20,7 @@ require (
github.com/dutchcoders/go-clamd v0.0.0-20170520113014-b970184f4d9e
github.com/gabriel-vasile/mimetype v1.4.12
github.com/ggwhite/go-masker v1.1.0
github.com/go-chi/chi/v5 v5.2.3
github.com/go-chi/chi/v5 v5.2.4
github.com/go-chi/render v1.0.3
github.com/go-jose/go-jose/v3 v3.0.4
github.com/go-ldap/ldap/v3 v3.4.12
@@ -75,7 +75,7 @@ require (
github.com/rogpeppe/go-internal v1.14.1
github.com/rs/cors v1.11.1
github.com/rs/zerolog v1.34.0
github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af
github.com/sirupsen/logrus v1.9.4
github.com/spf13/afero v1.15.0
github.com/spf13/cobra v1.10.2
github.com/spf13/pflag v1.0.10
@@ -96,7 +96,7 @@ require (
go.etcd.io/bbolt v1.4.3
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.64.0
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.64.0
go.opentelemetry.io/contrib/zpages v0.63.0
go.opentelemetry.io/contrib/zpages v0.64.0
go.opentelemetry.io/otel v1.39.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.39.0
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.39.0
@@ -157,7 +157,7 @@ require (
github.com/blevesearch/zapx/v13 v13.4.2 // indirect
github.com/blevesearch/zapx/v14 v14.4.2 // indirect
github.com/blevesearch/zapx/v15 v15.4.2 // indirect
github.com/blevesearch/zapx/v16 v16.2.7 // indirect
github.com/blevesearch/zapx/v16 v16.2.8 // indirect
github.com/bluele/gcache v0.0.2 // indirect
github.com/bombsimon/logrusr/v3 v3.1.0 // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect

20
go.sum
View File

@@ -151,8 +151,8 @@ github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6
github.com/bits-and-blooms/bitset v1.22.0 h1:Tquv9S8+SGaS3EhyA+up3FXzmkhxPGjQQCkcs2uw7w4=
github.com/bits-and-blooms/bitset v1.22.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84=
github.com/blevesearch/bleve/v2 v2.5.5 h1:lzC89QUCco+y1qBnJxGqm4AbtsdsnlUvq0kXok8n3C8=
github.com/blevesearch/bleve/v2 v2.5.5/go.mod h1:t5WoESS5TDteTdnjhhvpA1BpLYErOBX2IQViTMLK7wo=
github.com/blevesearch/bleve/v2 v2.5.7 h1:2d9YrL5zrX5EBBW++GOaEKjE+NPWeZGaX77IM26m1Z8=
github.com/blevesearch/bleve/v2 v2.5.7/go.mod h1:yj0NlS7ocGC4VOSAedqDDMktdh2935v2CSWOCDMHdSA=
github.com/blevesearch/bleve_index_api v1.2.11 h1:bXQ54kVuwP8hdrXUSOnvTQfgK0KI1+f9A0ITJT8tX1s=
github.com/blevesearch/bleve_index_api v1.2.11/go.mod h1:rKQDl4u51uwafZxFrPD1R7xFOwKnzZW7s/LSeK4lgo0=
github.com/blevesearch/geo v0.2.4 h1:ECIGQhw+QALCZaDcogRTNSJYQXRtC8/m8IKiA706cqk=
@@ -185,8 +185,8 @@ github.com/blevesearch/zapx/v14 v14.4.2 h1:2SGHakVKd+TrtEqpfeq8X+So5PShQ5nW6GNxT
github.com/blevesearch/zapx/v14 v14.4.2/go.mod h1:rz0XNb/OZSMjNorufDGSpFpjoFKhXmppH9Hi7a877D8=
github.com/blevesearch/zapx/v15 v15.4.2 h1:sWxpDE0QQOTjyxYbAVjt3+0ieu8NCE0fDRaFxEsp31k=
github.com/blevesearch/zapx/v15 v15.4.2/go.mod h1:1pssev/59FsuWcgSnTa0OeEpOzmhtmr/0/11H0Z8+Nw=
github.com/blevesearch/zapx/v16 v16.2.7 h1:xcgFRa7f/tQXOwApVq7JWgPYSlzyUMmkuYa54tMDuR0=
github.com/blevesearch/zapx/v16 v16.2.7/go.mod h1:murSoCJPCk25MqURrcJaBQ1RekuqSCSfMjXH4rHyA14=
github.com/blevesearch/zapx/v16 v16.2.8 h1:SlnzF0YGtSlrsOE3oE7EgEX6BIepGpeqxs1IjMbHLQI=
github.com/blevesearch/zapx/v16 v16.2.8/go.mod h1:murSoCJPCk25MqURrcJaBQ1RekuqSCSfMjXH4rHyA14=
github.com/bluele/gcache v0.0.2 h1:WcbfdXICg7G/DGBh1PFfcirkWOQV+v077yF1pSy3DGw=
github.com/bluele/gcache v0.0.2/go.mod h1:m15KV+ECjptwSPxKhOhQoAFQVtUFjTVkc3H8o0t/fp0=
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY=
@@ -381,8 +381,8 @@ github.com/go-asn1-ber/asn1-ber v1.4.1/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkPro
github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667 h1:BP4M0CvQ4S3TGls2FvczZtj5Re/2ZzkV9VwqPHH/3Bo=
github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0=
github.com/go-chi/chi v4.0.2+incompatible/go.mod h1:eB3wogJHnLi3x/kFX2A+IbTBlXxmMeXJVKy9tTv1XzQ=
github.com/go-chi/chi/v5 v5.2.3 h1:WQIt9uxdsAbgIYgid+BpYc+liqQZGMHRaUwp0JUcvdE=
github.com/go-chi/chi/v5 v5.2.3/go.mod h1:L2yAIGWB3H+phAw1NxKwWM+7eUH/lU8pOMm5hHcoops=
github.com/go-chi/chi/v5 v5.2.4 h1:WtFKPHwlywe8Srng8j2BhOD9312j9cGUxG1SP4V2cR4=
github.com/go-chi/chi/v5 v5.2.4/go.mod h1:X7Gx4mteadT3eDOMTsXzmI4/rwUpOwBHLpAfupzFJP0=
github.com/go-chi/render v1.0.3 h1:AsXqd2a1/INaIfUSKq3G5uA8weYx20FOsM7uSoCyyt4=
github.com/go-chi/render v1.0.3/go.mod h1:/gr3hVkmYR0YlEy3LxCuVRFzEu9Ruok+gFqbIofjao0=
github.com/go-cmd/cmd v1.0.5/go.mod h1:y8q8qlK5wQibcw63djSl/ntiHUHXHGdCkPk0j4QeW4s=
@@ -1138,8 +1138,8 @@ github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPx
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af h1:Sp5TG9f7K39yfB+If0vjp97vuT74F72r8hfRpP8jLU0=
github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/sirupsen/logrus v1.9.4 h1:TsZE7l11zFCLZnZ+teH4Umoq5BhEIfIzfRDZ1Uzql2w=
github.com/sirupsen/logrus v1.9.4/go.mod h1:ftWc9WdOfJ0a92nsE2jF5u5ZwH8Bv2zdeOC42RjbV2g=
github.com/skeema/knownhosts v1.3.0 h1:AM+y0rI04VksttfwjkSTNQorvGqmwATnvnAHpSgc0LY=
github.com/skeema/knownhosts v1.3.0/go.mod h1:sPINvnADmT/qYH1kfv+ePMmOBTH6Tbl7b5LvTDjFK7M=
github.com/skratchdot/open-golang v0.0.0-20160302144031-75fb7ed4208c/go.mod h1:sUM3LWHvSMaG192sy56D9F7CNvL7jUJVXoqM1QKLnog=
@@ -1313,8 +1313,8 @@ go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.6
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.64.0/go.mod h1:habDz3tEWiFANTo6oUE99EmaFUrCNYAAg3wiVmusm70=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.64.0 h1:ssfIgGNANqpVFCndZvcuyKbl0g+UAVcbBcqGkG28H0Y=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.64.0/go.mod h1:GQ/474YrbE4Jx8gZ4q5I4hrhUzM6UPzyrqJYV2AqPoQ=
go.opentelemetry.io/contrib/zpages v0.63.0 h1:TppOKuZGbqXMgsfjqq3i09N5Vbo1JLtLImUqiTPGnX4=
go.opentelemetry.io/contrib/zpages v0.63.0/go.mod h1:5F8uugz75ay/MMhRRhxAXY33FuaI8dl7jTxefrIy5qk=
go.opentelemetry.io/contrib/zpages v0.64.0 h1:iMybqKVR8AHHxFX4DuEWJ9dY75+9E7+IPwUK3Ll7NxM=
go.opentelemetry.io/contrib/zpages v0.64.0/go.mod h1:DnkiyoQ7Yx/NmmKn10b6M2YBXreUqq0qhFa/kYgSZME=
go.opentelemetry.io/otel v1.39.0 h1:8yPrr/S0ND9QEfTfdP9V+SiwT4E0G7Y5MO7p85nis48=
go.opentelemetry.io/otel v1.39.0/go.mod h1:kLlFTywNWrFyEdH0oj2xK0bFYZtHRYUdv1NklR/tgc8=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.39.0 h1:f0cb2XPmrqn4XMy9PNliTgRKJgS5WcL/u0/WRYGz4t0=

View File

@@ -585,3 +585,34 @@ By default, the system uses `posix` storage. However, you can override this by s
```bash
STORAGE_DRIVER=posix ./tests/acceptance/run_api_tests.sh
```
## Running WOPI Validator Tests
### Available Test Groups
```text
BaseWopiViewing
CheckFileInfoSchema
EditFlows
Locks
AccessTokens
GetLock
ExtendedLockLength
FileVersion
Features
PutRelativeFile
RenameFileIfCreateChildFileIsNotSupported
```
### Run Test
```bash
TEST_GROUP=BaseWopiViewing docker compose -f tests/acceptance/docker/src/wopi-validator-test.yml up -d
```
### for macOS use arm image
```bash
WOPI_VALIDATOR_IMAGE=scharfvi/wopi-validator \
TEST_GROUP=BaseWopiViewing \
docker compose -f tests/acceptance/docker/src/wopi-validator-test.yml up -d
```

View File

@@ -100,7 +100,7 @@ class HttpRequestHelper {
$parsedUrl = parse_url($url);
$baseUrl = $parsedUrl['scheme'] . '://' . $parsedUrl['host'];
$baseUrl .= isset($parsedUrl['port']) ? ':' . $parsedUrl['port'] : '';
$testUrl = $baseUrl . "/graph/v1.0/use/$user";
$testUrl = $baseUrl . "/graph/v1.0/user/$user";
if (OcHelper::isTestingOnReva()) {
$url = $baseUrl . "/ocs/v2.php/cloud/users/$user";
}

View File

@@ -1325,6 +1325,20 @@ trait WebDav {
if ($statusCode === 404 || $statusCode === 405) {
return;
}
// After MOVE the source path might still be visible for a short time
// We wait 1 second and retry once to avoid flaky failures.
if ($statusCode === 207) {
sleep(1);
$response = $this->listFolder(
$user,
$path,
'0',
null,
null,
$type
);
$statusCode = $response->getStatusCode();
}
if ($statusCode === 207) {
$responseXmlObject = HttpRequestHelper::getResponseXml(
$response,

View File

@@ -0,0 +1,45 @@
#!/bin/sh
set -xe
if [ -z "$TEST_GROUP" ]; then
echo "TEST_GROUP not set"
exit 1
fi
echo "Waiting for collaboration WOPI endpoint..."
until curl -s http://collaboration:9304 >/dev/null; do
echo "Waiting for collaboration WOPI endpoint..."
sleep 2
done
echo "Collaboration is up"
if [ -z "$OC_URL" ]; then
OC_URL="https://opencloud-server:9200"
fi
curl -vk -X DELETE "$OC_URL/remote.php/webdav/test.wopitest" -u admin:admin
curl -vk -X PUT "$OC_URL/remote.php/webdav/test.wopitest" -u admin:admin -D headers.txt
cat headers.txt
FILE_ID="$(cat headers.txt | sed -n -e 's/^.*oc-fileid: //Ip')"
export FILE_ID
URL="$OC_URL/app/open?app_name=FakeOffice&file_id=$FILE_ID"
URL="$(echo "$URL" | tr -d '[:cntrl:]')"
export URL
curl -vk -X POST "$URL" -u admin:admin > open.json
cat open.json
cat open.json | jq .form_parameters.access_token | tr -d '"' > accesstoken
cat open.json | jq .form_parameters.access_token_ttl | tr -d '"' > accesstokenttl
WOPI_FILE_ID="$(cat open.json | jq .app_url | sed -n -e 's/^.*files%2F//p' | tr -d '"')"
echo "http://collaboration:9300/wopi/files/$WOPI_FILE_ID" > wopisrc
WOPI_TOKEN=$(cat accesstoken)
export WOPI_TOKEN
WOPI_TTL=$(cat accesstokenttl)
export WOPI_TTL
WOPI_SRC=$(cat wopisrc)
export WOPI_SRC
/app/Microsoft.Office.WopiValidator -s -t "$WOPI_TOKEN" -w "$WOPI_SRC" -l "$WOPI_TTL" --testgroup $TEST_GROUP

View File

@@ -0,0 +1,86 @@
services:
fakeoffice:
image: owncloudci/alpine:latest
entrypoint: /bin/sh
command:
[
"-c",
"while true; do echo -e \"HTTP/1.1 200 OK\n\n$(cat /hosting-discovery.xml)\" | nc -l -k -p 8080; done",
]
ports:
- 8080:8080
extra_hosts:
- opencloud.local:${DOCKER_HOST:-host-gateway}
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080"]
volumes:
- ./../../../config/woodpecker/hosting-discovery.xml:/hosting-discovery.xml
opencloud:
image: opencloudeu/opencloud:dev
container_name: opencloud-server
ports:
- 9200:9200
entrypoint: /bin/sh
command: ["-c", "opencloud init || true; sleep 10; opencloud server"]
environment:
OC_URL: https://opencloud-server:9200
OC_CONFIG_DIR: /etc/opencloud
STORAGE_USERS_DRIVER: posix
PROXY_ENABLE_BASIC_AUTH: true
OC_LOG_LEVEL: error
OC_LOG_COLOR: false
OC_INSECURE: true
IDM_ADMIN_PASSWORD: admin
GATEWAY_GRPC_ADDR: 0.0.0.0:9142
NATS_NATS_HOST: 0.0.0.0
NATS_NATS_PORT: 9233
volumes:
- config:/etc/opencloud
depends_on:
fakeoffice:
condition: service_healthy
collaboration:
image: opencloudeu/opencloud:dev
restart: unless-stopped
ports:
- 9300:9300
entrypoint:
- /bin/sh
command: ["-c", "opencloud collaboration server"]
environment:
OC_CONFIG_DIR: /etc/opencloud
MICRO_REGISTRY: nats-js-kv
MICRO_REGISTRY_ADDRESS: opencloud:9233
COLLABORATION_LOG_LEVEL: info
COLLABORATION_GRPC_ADDR: 0.0.0.0:9301
COLLABORATION_HTTP_ADDR: 0.0.0.0:9300
COLLABORATION_DEBUG_ADDR: 0.0.0.0:9304
COLLABORATION_APP_PROOF_DISABLE: true
COLLABORATION_APP_INSECURE: true
COLLABORATION_CS3API_DATAGATEWAY_INSECURE: true
COLLABORATION_WOPI_SECRET: some-wopi-secret
COLLABORATION_SERVICE_NAME: collaboration-fakeoffice
COLLABORATION_APP_NAME: FakeOffice
COLLABORATION_APP_PRODUCT: Microsoft
COLLABORATION_APP_ADDR: http://fakeoffice:8080
COLLABORATION_WOPI_SRC: http://collaboration:9300
volumes:
- config:/etc/opencloud
depends_on:
- opencloud
wopi-validator:
image: ${WOPI_VALIDATOR_IMAGE:-opencloudeu/wopi-validator-ci}
volumes:
- ./run-wopi-validator.sh:/app/run-wopi-validator.sh
environment:
TEST_GROUP: ${TEST_GROUP:-PutRelativeFile}
entrypoint: /app/run-wopi-validator.sh
depends_on:
- collaboration
restart: "on-failure"
volumes:
config:

View File

@@ -1,25 +0,0 @@
sudo: false
language: go
go:
- "1.21.x"
- "1.22.x"
- "1.23.x"
script:
- go get golang.org/x/tools/cmd/cover
- go get github.com/mattn/goveralls
- go get github.com/kisielk/errcheck
- go get -u github.com/FiloSottile/gvt
- gvt restore
- go test -race -v $(go list ./... | grep -v vendor/)
- go vet $(go list ./... | grep -v vendor/)
- go test ./test -v -indexType scorch
- errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/);
- scripts/project-code-coverage.sh
- scripts/build_children.sh
notifications:
email:
- fts-team@couchbase.com

View File

@@ -1,7 +1,7 @@
# ![bleve](docs/bleve.png) bleve
[![Tests](https://github.com/blevesearch/bleve/actions/workflows/tests.yml/badge.svg?branch=master&event=push)](https://github.com/blevesearch/bleve/actions/workflows/tests.yml?query=event%3Apush+branch%3Amaster)
[![Coverage Status](https://coveralls.io/repos/github/blevesearch/bleve/badge.svg?branch=master)](https://coveralls.io/github/blevesearch/bleve?branch=master)
[![Coverage Status](https://coveralls.io/repos/github/blevesearch/bleve/badge.svg)](https://coveralls.io/github/blevesearch/bleve)
[![Go Reference](https://pkg.go.dev/badge/github.com/blevesearch/bleve/v2.svg)](https://pkg.go.dev/github.com/blevesearch/bleve/v2)
[![Join the chat](https://badges.gitter.im/join_chat.svg)](https://app.gitter.im/#/room/#blevesearch_bleve:gitter.im)
[![Go Report Card](https://goreportcard.com/badge/github.com/blevesearch/bleve/v2)](https://goreportcard.com/report/github.com/blevesearch/bleve/v2)

View File

@@ -180,7 +180,7 @@ func NewGeoShapeFieldFromShapeWithIndexingOptions(name string, arrayPositions []
// docvalues are always enabled for geoshape fields, even if the
// indexing options are set to not include docvalues.
options = options | index.DocValues
options |= index.DocValues
return &GeoShapeField{
shape: shape,
@@ -232,7 +232,7 @@ func NewGeometryCollectionFieldFromShapesWithIndexingOptions(name string,
// docvalues are always enabled for geoshape fields, even if the
// indexing options are set to not include docvalues.
options = options | index.DocValues
options |= index.DocValues
return &GeoShapeField{
shape: shape,

View File

@@ -109,6 +109,10 @@ func NewVectorField(name string, arrayPositions []uint64,
func NewVectorFieldWithIndexingOptions(name string, arrayPositions []uint64,
vector []float32, dims int, similarity, vectorIndexOptimizedFor string,
options index.FieldIndexingOptions) *VectorField {
// ensure the options are set to not store/index term vectors/doc values
options &^= index.StoreField | index.IncludeTermVectors | index.DocValues
// skip freq/norms for vector field
options |= index.SkipFreqNorm
return &VectorField{
name: name,

View File

@@ -17,113 +17,125 @@ package fusion
import (
"fmt"
"sort"
"github.com/blevesearch/bleve/v2/search"
)
// formatRRFMessage builds the explanation string for a single component of the
// Reciprocal Rank Fusion calculation.
func formatRRFMessage(weight float64, rank int, rankConstant int) string {
return fmt.Sprintf("rrf score (weight=%.3f, rank=%d, rank_constant=%d), normalized score of", weight, rank, rankConstant)
}
// ReciprocalRankFusion performs a reciprocal rank fusion on the search results.
func ReciprocalRankFusion(hits search.DocumentMatchCollection, weights []float64, rankConstant int, windowSize int, numKNNQueries int, explain bool) FusionResult {
if len(hits) == 0 {
return FusionResult{
Hits: hits,
// ReciprocalRankFusion applies Reciprocal Rank Fusion across the primary FTS
// results and each KNN sub-query. Ranks are limited to `windowSize` per source,
// weighted, and combined into a single fused score, with optional explanation
// details.
func ReciprocalRankFusion(hits search.DocumentMatchCollection, weights []float64, rankConstant int, windowSize int, numKNNQueries int, explain bool) *FusionResult {
nHits := len(hits)
if nHits == 0 || windowSize == 0 {
return &FusionResult{
Hits: search.DocumentMatchCollection{},
Total: 0,
MaxScore: 0.0,
}
}
// Create a map of document ID to a slice of ranks.
// The first element of the slice is the rank from the FTS search,
// and the subsequent elements are the ranks from the KNN searches.
docRanks := make(map[string][]int)
limit := min(nHits, windowSize)
// Pre-assign rank lists to each candidate document
for _, hit := range hits {
docRanks[hit.ID] = make([]int, numKNNQueries+1)
// precompute rank+scores to prevent additional division ops later
rankReciprocals := make([]float64, limit)
for i := range rankReciprocals {
rankReciprocals[i] = 1.0 / float64(rankConstant+i+1)
}
// Only a max of `window_size` elements need to be counted for. Stop
// calculating rank once this threshold is hit.
sort.Slice(hits, func(a, b int) bool {
return scoreSortFunc()(hits[a], hits[b]) < 0
})
// Only consider top windowSize docs for rescoring
for i := range min(windowSize, len(hits)) {
if hits[i].Score != 0.0 {
// Skip if Score is 0, since that means the document was not
// found as part of FTS, and only in KNN.
docRanks[hits[i].ID][0] = i + 1
// init explanations if required
var fusionExpl map[*search.DocumentMatch][]*search.Explanation
if explain {
fusionExpl = make(map[*search.DocumentMatch][]*search.Explanation, nHits)
}
// The code here mainly deals with obtaining rank/score for fts hits.
// First sort hits by score
sortDocMatchesByScore(hits)
// Calculate fts rank+scores
ftsWeight := weights[0]
for i := 0; i < nHits; i++ {
if i < windowSize {
hit := hits[i]
// No fts scores from this hit onwards, break loop
if hit.Score == 0.0 {
break
}
contrib := ftsWeight * rankReciprocals[i]
hit.Score = contrib
if explain {
expl := getFusionExplAt(
hit,
0,
contrib,
formatRRFMessage(ftsWeight, i+1, rankConstant),
)
fusionExpl[hit] = append(fusionExpl[hit], expl)
}
} else {
// These FTS hits are not counted in the results, so set to 0
hits[i].Score = 0.0
}
}
// Allocate knnDocs and reuse it within the loop
knnDocs := make([]*search.DocumentMatch, 0, len(hits))
// Code from here is to calculate knn ranks and scores
// iterate over each knn query and calculate knn rank+scores
for queryIdx := 0; queryIdx < numKNNQueries; queryIdx++ {
knnWeight := weights[queryIdx+1]
// Sorts hits in decreasing order of hit.ScoreBreakdown[i]
sortDocMatchesByBreakdown(hits, queryIdx)
// For each KNN query, rank the documents based on their KNN score.
for i := range numKNNQueries {
knnDocs = knnDocs[:0]
for i := 0; i < nHits; i++ {
// break if score breakdown doesn't exist (sort function puts these hits at the end)
// or if we go past the windowSize
_, scoreBreakdownExists := scoreBreakdownForQuery(hits[i], queryIdx)
if i >= windowSize || !scoreBreakdownExists {
break
}
for _, hit := range hits {
if _, ok := hit.ScoreBreakdown[i]; ok {
knnDocs = append(knnDocs, hit)
hit := hits[i]
contrib := knnWeight * rankReciprocals[i]
hit.Score += contrib
if explain {
expl := getFusionExplAt(
hit,
queryIdx+1,
contrib,
formatRRFMessage(knnWeight, i+1, rankConstant),
)
fusionExpl[hit] = append(fusionExpl[hit], expl)
}
}
// Sort the documents based on their score for this KNN query.
sort.Slice(knnDocs, func(a, b int) bool {
return scoreBreakdownSortFunc(i)(knnDocs[a], knnDocs[b]) < 0
})
// Update the ranks of the documents in the docRanks map.
// Only consider top windowSize docs for rescoring.
for j := range min(windowSize, len(knnDocs)) {
docRanks[knnDocs[j].ID][i+1] = j + 1
}
}
// Calculate the RRF score for each document.
var maxScore float64
for _, hit := range hits {
var rrfScore float64
var explChildren []*search.Explanation
if explain {
explChildren = make([]*search.Explanation, 0, numKNNQueries+1)
finalizeFusionExpl(hit, fusionExpl[hit])
}
for i, rank := range docRanks[hit.ID] {
if rank > 0 {
partialRrfScore := weights[i] * 1.0 / float64(rankConstant+rank)
if explain {
expl := getFusionExplAt(
hit,
i,
partialRrfScore,
formatRRFMessage(weights[i], rank, rankConstant),
)
explChildren = append(explChildren, expl)
}
rrfScore += partialRrfScore
}
}
hit.Score = rrfScore
hit.ScoreBreakdown = nil
if rrfScore > maxScore {
maxScore = rrfScore
}
if explain {
finalizeFusionExpl(hit, explChildren)
if hit.Score > maxScore {
maxScore = hit.Score
}
}
sort.Sort(hits)
if len(hits) > windowSize {
sortDocMatchesByScore(hits)
if nHits > windowSize {
hits = hits[:windowSize]
}
return FusionResult{
return &FusionResult{
Hits: hits,
Total: uint64(len(hits)),
MaxScore: maxScore,

View File

@@ -16,145 +16,147 @@ package fusion
import (
"fmt"
"sort"
"github.com/blevesearch/bleve/v2/search"
)
// formatRSFMessage builds the explanation string associated with a single
// component of the Relative Score Fusion calculation.
func formatRSFMessage(weight float64, normalizedScore float64, minScore float64, maxScore float64) string {
return fmt.Sprintf("rsf score (weight=%.3f, normalized=%.6f, min=%.6f, max=%.6f), normalized score of",
weight, normalizedScore, minScore, maxScore)
}
// RelativeScoreFusion normalizes scores based on min/max values for FTS and each KNN query, then applies weights.
func RelativeScoreFusion(hits search.DocumentMatchCollection, weights []float64, windowSize int, numKNNQueries int, explain bool) FusionResult {
if len(hits) == 0 {
return FusionResult{
Hits: hits,
// RelativeScoreFusion normalizes the best-scoring documents from the primary
// FTS query and each KNN query, scales those normalized values by the supplied
// weights, and combines them into a single fused score. Only the top
// `windowSize` documents per source are considered, and explanations are
// materialized lazily when requested.
func RelativeScoreFusion(hits search.DocumentMatchCollection, weights []float64, windowSize int, numKNNQueries int, explain bool) *FusionResult {
nHits := len(hits)
if nHits == 0 || windowSize == 0 {
return &FusionResult{
Hits: search.DocumentMatchCollection{},
Total: 0,
MaxScore: 0.0,
}
}
rsfScores := make(map[string]float64)
// contains the docs under consideration for scoring.
// Reused for fts and knn hits
scoringDocs := make([]*search.DocumentMatch, 0, len(hits))
var explMap map[string][]*search.Explanation
// init explanations if required
var fusionExpl map[*search.DocumentMatch][]*search.Explanation
if explain {
explMap = make(map[string][]*search.Explanation)
fusionExpl = make(map[*search.DocumentMatch][]*search.Explanation, nHits)
}
// remove non-fts hits
// Code here for calculating fts results
// Sort by fts scores
sortDocMatchesByScore(hits)
// ftsLimit holds the total number of fts hits to consider for rsf
ftsLimit := 0
for _, hit := range hits {
if hit.Score != 0.0 {
scoringDocs = append(scoringDocs, hit)
if hit.Score == 0.0 {
break
}
ftsLimit++
}
// sort hits by fts score
sort.Slice(scoringDocs, func(a, b int) bool {
return scoreSortFunc()(scoringDocs[a], scoringDocs[b]) < 0
})
// Reslice to correct size
if len(scoringDocs) > windowSize {
scoringDocs = scoringDocs[:windowSize]
}
ftsLimit = min(ftsLimit, windowSize)
var min, max float64
if len(scoringDocs) > 0 {
min, max = scoringDocs[len(scoringDocs)-1].Score, scoringDocs[0].Score
}
// calculate fts scores
if ftsLimit > 0 {
max := hits[0].Score
min := hits[ftsLimit-1].Score
denom := max - min
weight := weights[0]
for _, hit := range scoringDocs {
var tempRsfScore float64
if max > min {
tempRsfScore = (hit.Score - min) / (max - min)
} else {
tempRsfScore = 1.0
}
if explain {
// create and replace new explanation
expl := getFusionExplAt(
hit,
0,
tempRsfScore,
formatRSFMessage(weights[0], tempRsfScore, min, max),
)
explMap[hit.ID] = append(explMap[hit.ID], expl)
}
rsfScores[hit.ID] = weights[0] * tempRsfScore
}
for i := range numKNNQueries {
scoringDocs = scoringDocs[:0]
for _, hit := range hits {
if _, exists := hit.ScoreBreakdown[i]; exists {
scoringDocs = append(scoringDocs, hit)
for i := 0; i < ftsLimit; i++ {
hit := hits[i]
norm := 1.0
if denom > 0 {
norm = (hit.Score - min) / denom
}
}
sort.Slice(scoringDocs, func(a, b int) bool {
return scoreBreakdownSortFunc(i)(scoringDocs[a], scoringDocs[b]) < 0
})
if len(scoringDocs) > windowSize {
scoringDocs = scoringDocs[:windowSize]
}
if len(scoringDocs) > 0 {
min, max = scoringDocs[len(scoringDocs)-1].ScoreBreakdown[i], scoringDocs[0].ScoreBreakdown[i]
} else {
min, max = 0.0, 0.0
}
for _, hit := range scoringDocs {
var tempRsfScore float64
if max > min {
tempRsfScore = (hit.ScoreBreakdown[i] - min) / (max - min)
} else {
tempRsfScore = 1.0
}
contrib := weight * norm
if explain {
expl := getFusionExplAt(
hit,
i+1,
tempRsfScore,
formatRSFMessage(weights[i+1], tempRsfScore, min, max),
0,
norm,
formatRSFMessage(weight, norm, min, max),
)
explMap[hit.ID] = append(explMap[hit.ID], expl)
fusionExpl[hit] = append(fusionExpl[hit], expl)
}
rsfScores[hit.ID] += weights[i+1] * tempRsfScore
hit.Score = contrib
}
for i := ftsLimit; i < nHits; i++ {
// These FTS hits are not counted in the results, so set to 0
hits[i].Score = 0.0
}
}
var maxScore float64
for _, hit := range hits {
if rsfScore, exists := rsfScores[hit.ID]; exists {
hit.Score = rsfScore
if rsfScore > maxScore {
maxScore = rsfScore
// Code from here is for calculating knn scores
for queryIdx := 0; queryIdx < numKNNQueries; queryIdx++ {
sortDocMatchesByBreakdown(hits, queryIdx)
// knnLimit holds the total number of knn hits retrieved for a specific knn query
knnLimit := 0
for _, hit := range hits {
if _, ok := scoreBreakdownForQuery(hit, queryIdx); !ok {
break
}
if explain {
finalizeFusionExpl(hit, explMap[hit.ID])
}
} else {
hit.Score = 0.0
knnLimit++
}
knnLimit = min(knnLimit, windowSize)
// if limit is 0, skip calculating
if knnLimit == 0 {
continue
}
max, _ := scoreBreakdownForQuery(hits[0], queryIdx)
min, _ := scoreBreakdownForQuery(hits[knnLimit-1], queryIdx)
denom := max - min
weight := weights[queryIdx+1]
for i := 0; i < knnLimit; i++ {
hit := hits[i]
score, _ := scoreBreakdownForQuery(hit, queryIdx)
norm := 1.0
if denom > 0 {
norm = (score - min) / denom
}
contrib := weight * norm
if explain {
expl := getFusionExplAt(
hit,
queryIdx+1,
norm,
formatRSFMessage(weight, norm, min, max),
)
fusionExpl[hit] = append(fusionExpl[hit], expl)
}
hit.Score += contrib
}
}
// Finalize scores
var maxScore float64
for _, hit := range hits {
if explain {
finalizeFusionExpl(hit, fusionExpl[hit])
}
if hit.Score > maxScore {
maxScore = hit.Score
}
hit.ScoreBreakdown = nil
}
sort.Sort(hits)
sortDocMatchesByScore(hits)
if len(hits) > windowSize {
if nHits > windowSize {
hits = hits[:windowSize]
}
return FusionResult{
return &FusionResult{
Hits: hits,
Total: uint64(len(hits)),
MaxScore: maxScore,

View File

@@ -16,70 +16,82 @@
package fusion
import (
"sort"
"github.com/blevesearch/bleve/v2/search"
)
// scoreBreakdownSortFunc returns a comparison function for sorting DocumentMatch objects
// by their ScoreBreakdown at the specified index in descending order.
// In case of ties, documents with lower HitNumber (earlier hits) are preferred.
// If either document is missing the ScoreBreakdown for the specified index,
// it's treated as having a score of 0.0.
func scoreBreakdownSortFunc(idx int) func(i, j *search.DocumentMatch) int {
return func(i, j *search.DocumentMatch) int {
// Safely extract scores, defaulting to 0.0 if missing
iScore := 0.0
jScore := 0.0
if i.ScoreBreakdown != nil {
if score, ok := i.ScoreBreakdown[idx]; ok {
iScore = score
}
}
if j.ScoreBreakdown != nil {
if score, ok := j.ScoreBreakdown[idx]; ok {
jScore = score
}
}
// Sort by score in descending order (higher scores first)
if iScore > jScore {
return -1
} else if iScore < jScore {
return 1
}
// Break ties by HitNumber in ascending order (lower HitNumber wins)
if i.HitNumber < j.HitNumber {
return -1
} else if i.HitNumber > j.HitNumber {
return 1
}
return 0 // Equal scores and HitNumbers
// sortDocMatchesByScore orders the provided collection in-place by the primary
// score in descending order, breaking ties with the original `HitNumber` to
// ensure deterministic output.
func sortDocMatchesByScore(hits search.DocumentMatchCollection) {
if len(hits) < 2 {
return
}
sort.Slice(hits, func(a, b int) bool {
i := hits[a]
j := hits[b]
if i.Score == j.Score {
return i.HitNumber < j.HitNumber
}
return i.Score > j.Score
})
}
func scoreSortFunc() func(i, j *search.DocumentMatch) int {
return func(i, j *search.DocumentMatch) int {
// Sort by score in descending order
if i.Score > j.Score {
return -1
} else if i.Score < j.Score {
return 1
}
// Break ties by HitNumber
if i.HitNumber < j.HitNumber {
return -1
} else if i.HitNumber > j.HitNumber {
return 1
}
return 0
// scoreBreakdownForQuery fetches the score for a specific KNN query index from
// the provided hit. The boolean return indicates whether the score is present.
func scoreBreakdownForQuery(hit *search.DocumentMatch, idx int) (float64, bool) {
if hit == nil || hit.ScoreBreakdown == nil {
return 0, false
}
score, ok := hit.ScoreBreakdown[idx]
return score, ok
}
// sortDocMatchesByBreakdown orders the hits in-place using the KNN score for
// the supplied query index (descending), breaking ties with `HitNumber` and
// placing hits without a score at the end.
func sortDocMatchesByBreakdown(hits search.DocumentMatchCollection, queryIdx int) {
if len(hits) < 2 {
return
}
sort.SliceStable(hits, func(a, b int) bool {
left := hits[a]
right := hits[b]
var leftScore float64
leftOK := false
if left != nil && left.ScoreBreakdown != nil {
leftScore, leftOK = left.ScoreBreakdown[queryIdx]
}
var rightScore float64
rightOK := false
if right != nil && right.ScoreBreakdown != nil {
rightScore, rightOK = right.ScoreBreakdown[queryIdx]
}
if leftOK && rightOK {
if leftScore == rightScore {
return left.HitNumber < right.HitNumber
}
return leftScore > rightScore
}
if leftOK != rightOK {
return leftOK
}
return left.HitNumber < right.HitNumber
})
}
// getFusionExplAt copies the existing explanation child at the requested index
// and wraps it in a new node describing how the fusion algorithm adjusted the
// score.
func getFusionExplAt(hit *search.DocumentMatch, i int, value float64, message string) *search.Explanation {
return &search.Explanation{
Value: value,
@@ -88,6 +100,9 @@ func getFusionExplAt(hit *search.DocumentMatch, i int, value float64, message st
}
}
// finalizeFusionExpl installs the collection of fusion explanation children and
// updates the root message so the caller sees the fused score as the sum of its
// parts.
func finalizeFusionExpl(hit *search.DocumentMatch, explChildren []*search.Explanation) {
hit.Expl.Children = explChildren

View File

@@ -35,43 +35,45 @@ type Event struct {
// EventKind represents an event code for OnEvent() callbacks.
type EventKind int
// EventKindCloseStart is fired when a Scorch.Close() has begun.
var EventKindCloseStart = EventKind(1)
const (
// EventKindCloseStart is fired when a Scorch.Close() has begun.
EventKindCloseStart EventKind = iota
// EventKindClose is fired when a scorch index has been fully closed.
var EventKindClose = EventKind(2)
// EventKindClose is fired when a scorch index has been fully closed.
EventKindClose
// EventKindMergerProgress is fired when the merger has completed a
// round of merge processing.
var EventKindMergerProgress = EventKind(3)
// EventKindMergerProgress is fired when the merger has completed a
// round of merge processing.
EventKindMergerProgress
// EventKindPersisterProgress is fired when the persister has completed
// a round of persistence processing.
var EventKindPersisterProgress = EventKind(4)
// EventKindPersisterProgress is fired when the persister has completed
// a round of persistence processing.
EventKindPersisterProgress
// EventKindBatchIntroductionStart is fired when Batch() is invoked which
// introduces a new segment.
var EventKindBatchIntroductionStart = EventKind(5)
// EventKindBatchIntroductionStart is fired when Batch() is invoked which
// introduces a new segment.
EventKindBatchIntroductionStart
// EventKindBatchIntroduction is fired when Batch() completes.
var EventKindBatchIntroduction = EventKind(6)
// EventKindBatchIntroduction is fired when Batch() completes.
EventKindBatchIntroduction
// EventKindMergeTaskIntroductionStart is fired when the merger is about to
// start the introduction of merged segment from a single merge task.
var EventKindMergeTaskIntroductionStart = EventKind(7)
// EventKindMergeTaskIntroductionStart is fired when the merger is about to
// start the introduction of merged segment from a single merge task.
EventKindMergeTaskIntroductionStart
// EventKindMergeTaskIntroduction is fired when the merger has completed
// the introduction of merged segment from a single merge task.
var EventKindMergeTaskIntroduction = EventKind(8)
// EventKindMergeTaskIntroduction is fired when the merger has completed
// the introduction of merged segment from a single merge task.
EventKindMergeTaskIntroduction
// EventKindPreMergeCheck is fired before the merge begins to check if
// the caller should proceed with the merge.
var EventKindPreMergeCheck = EventKind(9)
// EventKindPreMergeCheck is fired before the merge begins to check if
// the caller should proceed with the merge.
EventKindPreMergeCheck
// EventKindIndexStart is fired when Index() is invoked which
// creates a new Document object from an interface using the index mapping.
var EventKindIndexStart = EventKind(10)
// EventKindIndexStart is fired when Index() is invoked which
// creates a new Document object from an interface using the index mapping.
EventKindIndexStart
// EventKindPurgerCheck is fired before the purge code is invoked and decides
// whether to execute or not. For unit test purposes
var EventKindPurgerCheck = EventKind(11)
// EventKindPurgerCheck is fired before the purge code is invoked and decides
// whether to execute or not. For unit test purposes
EventKindPurgerCheck
)

View File

@@ -24,6 +24,8 @@ import (
segment "github.com/blevesearch/scorch_segment_api/v2"
)
const introducer = "introducer"
type segmentIntroduction struct {
id uint64
data segment.Segment
@@ -50,10 +52,11 @@ type epochWatcher struct {
func (s *Scorch) introducerLoop() {
defer func() {
if r := recover(); r != nil {
s.fireAsyncError(&AsyncPanicError{
Source: "introducer",
Path: s.path,
})
s.fireAsyncError(NewScorchError(
introducer,
fmt.Sprintf("panic: %v, path: %s", r, s.path),
ErrAsyncPanic,
))
}
s.asyncTasks.Done()

View File

@@ -29,13 +29,16 @@ import (
segment "github.com/blevesearch/scorch_segment_api/v2"
)
const merger = "merger"
func (s *Scorch) mergerLoop() {
defer func() {
if r := recover(); r != nil {
s.fireAsyncError(&AsyncPanicError{
Source: "merger",
Path: s.path,
})
s.fireAsyncError(NewScorchError(
merger,
fmt.Sprintf("panic: %v, path: %s", r, s.path),
ErrAsyncPanic,
))
}
s.asyncTasks.Done()
@@ -45,7 +48,11 @@ func (s *Scorch) mergerLoop() {
var ctrlMsg *mergerCtrl
mergePlannerOptions, err := s.parseMergePlannerOptions()
if err != nil {
s.fireAsyncError(fmt.Errorf("mergePlannerOption json parsing err: %v", err))
s.fireAsyncError(NewScorchError(
merger,
fmt.Sprintf("mergerPlannerOptions json parsing err: %v", err),
ErrOptionsParse,
))
return
}
ctrlMsgDflt := &mergerCtrl{ctx: context.Background(),
@@ -110,7 +117,12 @@ OUTER:
ctrlMsg = nil
break OUTER
}
s.fireAsyncError(fmt.Errorf("merging err: %v", err))
s.fireAsyncError(NewScorchError(
merger,
fmt.Sprintf("merging err: %v", err),
ErrPersist,
))
_ = ourSnapshot.DecRef()
atomic.AddUint64(&s.stats.TotFileMergeLoopErr, 1)
continue OUTER

View File

@@ -38,6 +38,8 @@ import (
bolt "go.etcd.io/bbolt"
)
const persister = "persister"
// DefaultPersisterNapTimeMSec is kept to zero as this helps in direct
// persistence of segments with the default safe batch option.
// If the default safe batch option results in high number of
@@ -95,10 +97,11 @@ type notificationChan chan struct{}
func (s *Scorch) persisterLoop() {
defer func() {
if r := recover(); r != nil {
s.fireAsyncError(&AsyncPanicError{
Source: "persister",
Path: s.path,
})
s.fireAsyncError(NewScorchError(
persister,
fmt.Sprintf("panic: %v, path: %s", r, s.path),
ErrAsyncPanic,
))
}
s.asyncTasks.Done()
@@ -112,7 +115,11 @@ func (s *Scorch) persisterLoop() {
po, err := s.parsePersisterOptions()
if err != nil {
s.fireAsyncError(fmt.Errorf("persisterOptions json parsing err: %v", err))
s.fireAsyncError(NewScorchError(
persister,
fmt.Sprintf("persisterOptions json parsing err: %v", err),
ErrOptionsParse,
))
return
}
@@ -173,7 +180,11 @@ OUTER:
// the retry attempt
unpersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err))
s.fireAsyncError(NewScorchError(
persister,
fmt.Sprintf("got err persisting snapshot: %v", err),
ErrPersist,
))
_ = ourSnapshot.DecRef()
atomic.AddUint64(&s.stats.TotPersistLoopErr, 1)
continue OUTER
@@ -1060,13 +1071,21 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro
func (s *Scorch) removeOldData() {
removed, err := s.removeOldBoltSnapshots()
if err != nil {
s.fireAsyncError(fmt.Errorf("got err removing old bolt snapshots: %v", err))
s.fireAsyncError(NewScorchError(
persister,
fmt.Sprintf("got err removing old bolt snapshots: %v", err),
ErrCleanup,
))
}
atomic.AddUint64(&s.stats.TotSnapshotsRemovedFromMetaStore, uint64(removed))
err = s.removeOldZapFiles()
if err != nil {
s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err))
s.fireAsyncError(NewScorchError(
persister,
fmt.Sprintf("got err removing old zap files: %v", err),
ErrCleanup,
))
}
}

View File

@@ -88,14 +88,45 @@ type Scorch struct {
spatialPlugin index.SpatialAnalyzerPlugin
}
// AsyncPanicError is passed to scorch asyncErrorHandler when panic occurs in scorch background process
type AsyncPanicError struct {
Source string
Path string
type ScorchErrorType string
func (t ScorchErrorType) Error() string {
return string(t)
}
func (e *AsyncPanicError) Error() string {
return fmt.Sprintf("%s panic when processing %s", e.Source, e.Path)
// ErrType values for ScorchError
const (
ErrAsyncPanic = ScorchErrorType("async panic error")
ErrPersist = ScorchErrorType("persist error")
ErrCleanup = ScorchErrorType("cleanup error")
ErrOptionsParse = ScorchErrorType("options parse error")
)
// ScorchError is passed to onAsyncError when errors are
// fired from scorch background processes
type ScorchError struct {
Source string
ErrMsg string
ErrType ScorchErrorType
}
func (e *ScorchError) Error() string {
return fmt.Sprintf("source: %s, %v: %s", e.Source, e.ErrType, e.ErrMsg)
}
// Lets the onAsyncError function verify what type of
// error is fired using errors.Is(...). This lets the function
// handle errors differently.
func (e *ScorchError) Unwrap() error {
return e.ErrType
}
func NewScorchError(source, errMsg string, errType ScorchErrorType) error {
return &ScorchError{
Source: source,
ErrMsg: errMsg,
ErrType: errType,
}
}
type internalStats struct {

View File

@@ -23,7 +23,6 @@ import (
"path/filepath"
"reflect"
"sort"
"strings"
"sync"
"sync/atomic"
@@ -147,7 +146,7 @@ func (is *IndexSnapshot) newIndexSnapshotFieldDict(field string,
makeItr func(i segment.TermDictionary) segment.DictionaryIterator,
randomLookup bool,
) (*IndexSnapshotFieldDict, error) {
results := make(chan *asynchSegmentResult)
results := make(chan *asynchSegmentResult, len(is.segment))
var totalBytesRead uint64
var fieldCardinality int64
for _, s := range is.segment {
@@ -281,10 +280,13 @@ func (is *IndexSnapshot) FieldDictRange(field string, startTerm []byte,
// to use as the end key in a traditional (inclusive, exclusive]
// start/end range
func calculateExclusiveEndFromPrefix(in []byte) []byte {
if len(in) == 0 {
return nil
}
rv := make([]byte, len(in))
copy(rv, in)
for i := len(rv) - 1; i >= 0; i-- {
rv[i] = rv[i] + 1
rv[i]++
if rv[i] != 0 {
return rv // didn't overflow, so stop
}
@@ -391,7 +393,7 @@ func (is *IndexSnapshot) FieldDictContains(field string) (index.FieldDictContain
}
func (is *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {
results := make(chan *asynchSegmentResult)
results := make(chan *asynchSegmentResult, len(is.segment))
for index, segment := range is.segment {
go func(index int, segment *SegmentSnapshot) {
results <- &asynchSegmentResult{
@@ -405,7 +407,7 @@ func (is *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {
}
func (is *IndexSnapshot) DocIDReaderOnly(ids []string) (index.DocIDReader, error) {
results := make(chan *asynchSegmentResult)
results := make(chan *asynchSegmentResult, len(is.segment))
for index, segment := range is.segment {
go func(index int, segment *SegmentSnapshot) {
docs, err := segment.DocNumbers(ids)
@@ -451,7 +453,7 @@ func (is *IndexSnapshot) newDocIDReader(results chan *asynchSegmentResult) (inde
func (is *IndexSnapshot) Fields() ([]string, error) {
// FIXME not making this concurrent for now as it's not used in hot path
// of any searches at the moment (just a debug aid)
fieldsMap := map[string]struct{}{}
fieldsMap := make(map[string]struct{})
for _, segment := range is.segment {
fields := segment.Fields()
for _, field := range fields {
@@ -765,7 +767,7 @@ func (is *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReade
is.m2.Lock()
if is.fieldTFRs == nil {
is.fieldTFRs = map[string][]*IndexSnapshotTermFieldReader{}
is.fieldTFRs = make(map[string][]*IndexSnapshotTermFieldReader)
}
if len(is.fieldTFRs[tfr.field]) < is.getFieldTFRCacheThreshold() {
tfr.bytesRead = 0
@@ -813,7 +815,7 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment(
// Filter out fields that have been completely deleted or had their
// docvalues data deleted from both visitable fields and required fields
filterUpdatedFields := func(fields []string) []string {
filteredFields := make([]string, 0)
filteredFields := make([]string, 0, len(fields))
for _, field := range fields {
if info, ok := is.updatedFields[field]; ok &&
(info.DocValues || info.Deleted) {
@@ -978,15 +980,17 @@ func subtractStrings(a, b []string) []string {
return a
}
// Create a map for O(1) lookups
bMap := make(map[string]struct{}, len(b))
for _, bs := range b {
bMap[bs] = struct{}{}
}
rv := make([]string, 0, len(a))
OUTER:
for _, as := range a {
for _, bs := range b {
if as == bs {
continue OUTER
}
if _, exists := bMap[as]; !exists {
rv = append(rv, as)
}
rv = append(rv, as)
}
return rv
}
@@ -1279,7 +1283,7 @@ func (is *IndexSnapshot) TermFrequencies(field string, limit int, descending boo
sort.Slice(termFreqs, func(i, j int) bool {
if termFreqs[i].Frequency == termFreqs[j].Frequency {
// If frequencies are equal, sort by term lexicographically
return strings.Compare(termFreqs[i].Term, termFreqs[j].Term) < 0
return termFreqs[i].Term < termFreqs[j].Term
}
if descending {
return termFreqs[i].Frequency > termFreqs[j].Frequency

View File

@@ -37,14 +37,10 @@ func (is *IndexSnapshot) VectorReader(ctx context.Context, vector []float32,
snapshot: is,
searchParams: searchParams,
eligibleSelector: eligibleSelector,
postings: make([]segment_api.VecPostingsList, len(is.segment)),
iterators: make([]segment_api.VecPostingsIterator, len(is.segment)),
}
if rv.postings == nil {
rv.postings = make([]segment_api.VecPostingsList, len(is.segment))
}
if rv.iterators == nil {
rv.iterators = make([]segment_api.VecPostingsIterator, len(is.segment))
}
// initialize postings and iterators within the OptimizeVR's Finish()
return rv, nil
}

View File

@@ -18,7 +18,6 @@ import (
"context"
"fmt"
"sort"
"strings"
"sync"
"time"
@@ -905,7 +904,7 @@ func preSearchDataSearch(ctx context.Context, req *SearchRequest, flags *preSear
// which would happen in the case of an alias tree and depending on the level of the tree, the preSearchData
// needs to be redistributed to the indexes at that level
func redistributePreSearchData(req *SearchRequest, indexes []Index) (map[string]map[string]interface{}, error) {
rv := make(map[string]map[string]interface{})
rv := make(map[string]map[string]interface{}, len(indexes))
for _, index := range indexes {
rv[index.Name()] = make(map[string]interface{})
}
@@ -1202,23 +1201,16 @@ func (i *indexAliasImpl) TermFrequencies(field string, limit int, descending boo
})
}
if descending {
sort.Slice(rvTermFreqs, func(i, j int) bool {
if rvTermFreqs[i].Frequency == rvTermFreqs[j].Frequency {
// If frequencies are equal, sort by term lexicographically
return strings.Compare(rvTermFreqs[i].Term, rvTermFreqs[j].Term) < 0
}
sort.Slice(rvTermFreqs, func(i, j int) bool {
if rvTermFreqs[i].Frequency == rvTermFreqs[j].Frequency {
// If frequencies are equal, sort by term lexicographically
return rvTermFreqs[i].Term < rvTermFreqs[j].Term
}
if descending {
return rvTermFreqs[i].Frequency > rvTermFreqs[j].Frequency
})
} else {
sort.Slice(rvTermFreqs, func(i, j int) bool {
if rvTermFreqs[i].Frequency == rvTermFreqs[j].Frequency {
// If frequencies are equal, sort by term lexicographically
return strings.Compare(rvTermFreqs[i].Term, rvTermFreqs[j].Term) < 0
}
return rvTermFreqs[i].Frequency < rvTermFreqs[j].Frequency
})
}
}
return rvTermFreqs[i].Frequency < rvTermFreqs[j].Frequency
})
if limit > len(rvTermFreqs) {
limit = len(rvTermFreqs)
@@ -1272,25 +1264,22 @@ func (i *indexAliasImpl) CentroidCardinalities(field string, limit int, descendi
close(asyncResults)
}()
rvCentroidCardinalitiesResult := make([]index.CentroidCardinality, 0, limit)
rvCentroidCardinalities := make([]index.CentroidCardinality, 0, limit*len(i.indexes))
for asr := range asyncResults {
asr = append(asr, rvCentroidCardinalitiesResult...)
if descending {
sort.Slice(asr, func(i, j int) bool {
return asr[i].Cardinality > asr[j].Cardinality
})
} else {
sort.Slice(asr, func(i, j int) bool {
return asr[i].Cardinality < asr[j].Cardinality
})
}
if limit > len(asr) {
limit = len(asr)
}
rvCentroidCardinalitiesResult = asr[:limit]
rvCentroidCardinalities = append(rvCentroidCardinalities, asr...)
}
return rvCentroidCardinalitiesResult, nil
sort.Slice(rvCentroidCardinalities, func(i, j int) bool {
if descending {
return rvCentroidCardinalities[i].Cardinality > rvCentroidCardinalities[j].Cardinality
} else {
return rvCentroidCardinalities[i].Cardinality < rvCentroidCardinalities[j].Cardinality
}
})
if limit > len(rvCentroidCardinalities) {
limit = len(rvCentroidCardinalities)
}
return rvCentroidCardinalities[:limit], nil
}

View File

@@ -20,6 +20,7 @@ import (
"io"
"os"
"path/filepath"
"regexp"
"strconv"
"sync"
"sync/atomic"
@@ -859,6 +860,26 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
} else {
// build terms facet
facetBuilder := facet.NewTermsFacetBuilder(facetRequest.Field, facetRequest.Size)
// Set prefix filter if provided
if facetRequest.TermPrefix != "" {
facetBuilder.SetPrefixFilter(facetRequest.TermPrefix)
}
// Set regex filter if provided
if facetRequest.TermPattern != "" {
// Use cached compiled pattern if available, otherwise compile it now
if facetRequest.compiledPattern != nil {
facetBuilder.SetRegexFilter(facetRequest.compiledPattern)
} else {
regex, err := regexp.Compile(facetRequest.TermPattern)
if err != nil {
return nil, fmt.Errorf("error compiling regex pattern for facet '%s': %v", facetName, err)
}
facetBuilder.SetRegexFilter(regex)
}
}
facetsBuilder.Add(facetName, facetBuilder)
}
}
@@ -1304,6 +1325,9 @@ func (f *indexImplFieldDict) Cardinality() int {
// helper function to remove duplicate entries from slice of strings
func deDuplicate(fields []string) []string {
if len(fields) == 0 {
return fields
}
entries := make(map[string]struct{})
ret := []string{}
for _, entry := range fields {

View File

@@ -92,7 +92,7 @@ func DeletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.Update
// Compare both the mappings based on the document paths
// and create a list of index, docvalues, store differences
// for every single field possible
fieldInfo := make(map[string]*index.UpdateFieldInfo)
fieldInfo := make(map[string]*index.UpdateFieldInfo, len(oriPaths))
for path, info := range oriPaths {
err = addFieldInfo(fieldInfo, info, updPaths[path])
if err != nil {
@@ -109,13 +109,13 @@ func DeletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.Update
// A field cannot be completely deleted with any dynamic value turned on
if info.Deleted {
if upd.IndexDynamic {
return nil, fmt.Errorf("Mapping cannot be removed when index dynamic is true")
return nil, fmt.Errorf("mapping cannot be removed when index dynamic is true")
}
if upd.StoreDynamic {
return nil, fmt.Errorf("Mapping cannot be removed when store dynamic is true")
return nil, fmt.Errorf("mapping cannot be removed when store dynamic is true")
}
if upd.DocValuesDynamic {
return nil, fmt.Errorf("Mapping cannot be removed when docvalues dynamic is true")
return nil, fmt.Errorf("mapping cannot be removed when docvalues dynamic is true")
}
}
}
@@ -191,14 +191,14 @@ func checkUpdatedMapping(ori, upd *mapping.DocumentMapping) error {
// Simple checks to ensure no new field mappings present
// in updated
// Create a map of original field names for O(1) lookup
oriFieldNames := make(map[string]bool, len(ori.Fields))
for _, fMapping := range ori.Fields {
oriFieldNames[fMapping.Name] = true
}
for _, updFMapping := range upd.Fields {
var oriFMapping *mapping.FieldMapping
for _, fMapping := range ori.Fields {
if updFMapping.Name == fMapping.Name {
oriFMapping = fMapping
}
}
if oriFMapping == nil {
if !oriFieldNames[updFMapping.Name] {
return fmt.Errorf("updated index mapping contains new fields")
}
}
@@ -238,10 +238,8 @@ func addPathInfo(paths map[string]*pathInfo, name string, mp *mapping.DocumentMa
// Recursively add path information for all child mappings
for cName, cMapping := range mp.Properties {
var pathName string
if name == "" {
pathName = cName
} else {
pathName := cName
if name != "" {
pathName = name + "." + cName
}
addPathInfo(paths, pathName, cMapping, im, pInfo, rootName)
@@ -460,9 +458,6 @@ func addFieldInfo(fInfo map[string]*index.UpdateFieldInfo, ori, upd *pathInfo) e
}
}
}
if err != nil {
return err
}
return nil
}
@@ -567,19 +562,18 @@ func compareFieldMapping(original, updated *mapping.FieldMapping) (*index.Update
// In such a situation, any conflicting changes found will abort the update process
func validateFieldInfo(newInfo *index.UpdateFieldInfo, fInfo map[string]*index.UpdateFieldInfo,
ori *pathInfo, oriFMapInfo *fieldMapInfo) error {
// Determine field name
fieldName := oriFMapInfo.fieldMapping.Name
if fieldName == "" {
fieldName = oriFMapInfo.parent.path
}
// Construct full name with parent path
var name string
if oriFMapInfo.parent.parentPath == "" {
if oriFMapInfo.fieldMapping.Name == "" {
name = oriFMapInfo.parent.path
} else {
name = oriFMapInfo.fieldMapping.Name
}
name = fieldName
} else {
if oriFMapInfo.fieldMapping.Name == "" {
name = oriFMapInfo.parent.parentPath + "." + oriFMapInfo.parent.path
} else {
name = oriFMapInfo.parent.parentPath + "." + oriFMapInfo.fieldMapping.Name
}
name = oriFMapInfo.parent.parentPath + "." + fieldName
}
if (newInfo.Deleted || newInfo.Index || newInfo.DocValues || newInfo.Store) && ori.dynamic {
return fmt.Errorf("updated field is under a dynamic property")

View File

@@ -52,7 +52,7 @@ type DocumentMapping struct {
}
func (dm *DocumentMapping) Validate(cache *registry.Cache,
parentName string, fieldAliasCtx map[string]*FieldMapping,
path []string, fieldAliasCtx map[string]*FieldMapping,
) error {
var err error
if dm.DefaultAnalyzer != "" {
@@ -68,11 +68,7 @@ func (dm *DocumentMapping) Validate(cache *registry.Cache,
}
}
for propertyName, property := range dm.Properties {
newParent := propertyName
if parentName != "" {
newParent = fmt.Sprintf("%s.%s", parentName, propertyName)
}
err = property.Validate(cache, newParent, fieldAliasCtx)
err = property.Validate(cache, append(path, propertyName), fieldAliasCtx)
if err != nil {
return err
}
@@ -96,7 +92,7 @@ func (dm *DocumentMapping) Validate(cache *registry.Cache,
return err
}
}
err := validateFieldMapping(field, parentName, fieldAliasCtx)
err := validateFieldMapping(field, path, fieldAliasCtx)
if err != nil {
return err
}

View File

@@ -191,13 +191,16 @@ func (im *IndexMappingImpl) Validate() error {
return err
}
}
// fieldAliasCtx is used to detect any field alias conflicts across the entire mapping
// the map will hold the fully qualified field name to FieldMapping, so we can
// check for conflicts as we validate each DocumentMapping.
fieldAliasCtx := make(map[string]*FieldMapping)
err = im.DefaultMapping.Validate(im.cache, "", fieldAliasCtx)
err = im.DefaultMapping.Validate(im.cache, []string{}, fieldAliasCtx)
if err != nil {
return err
}
for _, docMapping := range im.TypeMapping {
err = docMapping.Validate(im.cache, "", fieldAliasCtx)
err = docMapping.Validate(im.cache, []string{}, fieldAliasCtx)
if err != nil {
return err
}

View File

@@ -38,7 +38,7 @@ func (fm *FieldMapping) processVectorBase64(propertyMightBeVector interface{},
// -----------------------------------------------------------------------------
// document validation functions
func validateFieldMapping(field *FieldMapping, parentName string,
func validateFieldMapping(field *FieldMapping, path []string,
fieldAliasCtx map[string]*FieldMapping) error {
return validateFieldType(field)
}

View File

@@ -20,6 +20,7 @@ package mapping
import (
"fmt"
"reflect"
"slices"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/util"
@@ -141,15 +142,27 @@ func (fm *FieldMapping) processVector(propertyMightBeVector interface{},
if !ok {
return false
}
// Apply defaults for similarity and optimization if not set
similarity := fm.Similarity
if similarity == "" {
similarity = index.DefaultVectorSimilarityMetric
}
vectorIndexOptimizedFor := fm.VectorIndexOptimizedFor
if vectorIndexOptimizedFor == "" {
vectorIndexOptimizedFor = index.DefaultIndexOptimization
}
// normalize raw vector if similarity is cosine
if fm.Similarity == index.CosineSimilarity {
vector = NormalizeVector(vector)
// Since the vector can be multi-vector (flattened array of multiple vectors),
// we use NormalizeMultiVector to normalize each sub-vector independently.
if similarity == index.CosineSimilarity {
vector = NormalizeMultiVector(vector, fm.Dims)
}
fieldName := getFieldName(pathString, path, fm)
options := fm.Options()
field := document.NewVectorFieldWithIndexingOptions(fieldName, indexes, vector,
fm.Dims, fm.Similarity, fm.VectorIndexOptimizedFor, options)
fm.Dims, similarity, vectorIndexOptimizedFor, options)
context.doc.AddField(field)
// "_all" composite field is not applicable for vector field
@@ -163,20 +176,29 @@ func (fm *FieldMapping) processVectorBase64(propertyMightBeVectorBase64 interfac
if !ok {
return
}
// Apply defaults for similarity and optimization if not set
similarity := fm.Similarity
if similarity == "" {
similarity = index.DefaultVectorSimilarityMetric
}
vectorIndexOptimizedFor := fm.VectorIndexOptimizedFor
if vectorIndexOptimizedFor == "" {
vectorIndexOptimizedFor = index.DefaultIndexOptimization
}
decodedVector, err := document.DecodeVector(encodedString)
if err != nil || len(decodedVector) != fm.Dims {
return
}
// normalize raw vector if similarity is cosine
if fm.Similarity == index.CosineSimilarity {
// normalize raw vector if similarity is cosine, multi-vector is not supported
// for base64 encoded vectors, so we use NormalizeVector directly.
if similarity == index.CosineSimilarity {
decodedVector = NormalizeVector(decodedVector)
}
fieldName := getFieldName(pathString, path, fm)
options := fm.Options()
field := document.NewVectorFieldWithIndexingOptions(fieldName, indexes, decodedVector,
fm.Dims, fm.Similarity, fm.VectorIndexOptimizedFor, options)
fm.Dims, similarity, vectorIndexOptimizedFor, options)
context.doc.AddField(field)
// "_all" composite field is not applicable for vector_base64 field
@@ -186,87 +208,121 @@ func (fm *FieldMapping) processVectorBase64(propertyMightBeVectorBase64 interfac
// -----------------------------------------------------------------------------
// document validation functions
func validateFieldMapping(field *FieldMapping, parentName string,
func validateFieldMapping(field *FieldMapping, path []string,
fieldAliasCtx map[string]*FieldMapping) error {
switch field.Type {
case "vector", "vector_base64":
return validateVectorFieldAlias(field, parentName, fieldAliasCtx)
return validateVectorFieldAlias(field, path, fieldAliasCtx)
default: // non-vector field
return validateFieldType(field)
}
}
func validateVectorFieldAlias(field *FieldMapping, parentName string,
func validateVectorFieldAlias(field *FieldMapping, path []string,
fieldAliasCtx map[string]*FieldMapping) error {
if field.Name == "" {
field.Name = parentName
// fully qualified field name
pathString := encodePath(path)
// check if field has a name set, else use path to compute effective name
effectiveFieldName := getFieldName(pathString, path, field)
// Compute effective values for validation
effectiveSimilarity := field.Similarity
if effectiveSimilarity == "" {
effectiveSimilarity = index.DefaultVectorSimilarityMetric
}
effectiveOptimizedFor := field.VectorIndexOptimizedFor
if effectiveOptimizedFor == "" {
effectiveOptimizedFor = index.DefaultIndexOptimization
}
if field.Similarity == "" {
field.Similarity = index.DefaultVectorSimilarityMetric
}
if field.VectorIndexOptimizedFor == "" {
field.VectorIndexOptimizedFor = index.DefaultIndexOptimization
}
if _, exists := index.SupportedVectorIndexOptimizations[field.VectorIndexOptimizedFor]; !exists {
// if an unsupported config is provided, override to default
field.VectorIndexOptimizedFor = index.DefaultIndexOptimization
}
// following fields are not applicable for vector
// thus, we set them to default values
field.IncludeInAll = false
field.IncludeTermVectors = false
field.Store = false
field.DocValues = false
field.SkipFreqNorm = true
// # If alias is present, validate the field options as per the alias
// # If alias is present, validate the field options as per the alias.
// note: reading from a nil map is safe
if fieldAlias, ok := fieldAliasCtx[field.Name]; ok {
if fieldAlias, ok := fieldAliasCtx[effectiveFieldName]; ok {
if field.Dims != fieldAlias.Dims {
return fmt.Errorf("field: '%s', invalid alias "+
"(different dimensions %d and %d)", fieldAlias.Name, field.Dims,
"(different dimensions %d and %d)", effectiveFieldName, field.Dims,
fieldAlias.Dims)
}
if field.Similarity != fieldAlias.Similarity {
// Compare effective similarity values
aliasSimilarity := fieldAlias.Similarity
if aliasSimilarity == "" {
aliasSimilarity = index.DefaultVectorSimilarityMetric
}
if effectiveSimilarity != aliasSimilarity {
return fmt.Errorf("field: '%s', invalid alias "+
"(different similarity values %s and %s)", fieldAlias.Name,
field.Similarity, fieldAlias.Similarity)
"(different similarity values %s and %s)", effectiveFieldName,
effectiveSimilarity, aliasSimilarity)
}
// Compare effective vector index optimization values
aliasOptimizedFor := fieldAlias.VectorIndexOptimizedFor
if aliasOptimizedFor == "" {
aliasOptimizedFor = index.DefaultIndexOptimization
}
if effectiveOptimizedFor != aliasOptimizedFor {
return fmt.Errorf("field: '%s', invalid alias "+
"(different vector index optimization values %s and %s)", effectiveFieldName,
effectiveOptimizedFor, aliasOptimizedFor)
}
return nil
}
// # Validate field options
// Vector dimensions must be within allowed range
if field.Dims < MinVectorDims || field.Dims > MaxVectorDims {
return fmt.Errorf("field: '%s', invalid vector dimension: %d,"+
" value should be in range (%d, %d)", field.Name, field.Dims,
" value should be in range [%d, %d]", effectiveFieldName, field.Dims,
MinVectorDims, MaxVectorDims)
}
if _, ok := index.SupportedVectorSimilarityMetrics[field.Similarity]; !ok {
// Similarity metric must be supported
if _, ok := index.SupportedVectorSimilarityMetrics[effectiveSimilarity]; !ok {
return fmt.Errorf("field: '%s', invalid similarity "+
"metric: '%s', valid metrics are: %+v", field.Name, field.Similarity,
"metric: '%s', valid metrics are: %+v", effectiveFieldName, effectiveSimilarity,
reflect.ValueOf(index.SupportedVectorSimilarityMetrics).MapKeys())
}
// Vector index optimization must be supported
if _, ok := index.SupportedVectorIndexOptimizations[effectiveOptimizedFor]; !ok {
return fmt.Errorf("field: '%s', invalid vector index "+
"optimization: '%s', valid optimizations are: %+v", effectiveFieldName,
effectiveOptimizedFor,
reflect.ValueOf(index.SupportedVectorIndexOptimizations).MapKeys())
}
if fieldAliasCtx != nil { // writing to a nil map is unsafe
fieldAliasCtx[field.Name] = field
fieldAliasCtx[effectiveFieldName] = field
}
return nil
}
// NormalizeVector normalizes a single vector to unit length.
// It makes a copy of the input vector to avoid modifying it in-place.
func NormalizeVector(vec []float32) []float32 {
// make a copy of the vector to avoid modifying the original
// vector in-place
vecCopy := make([]float32, len(vec))
copy(vecCopy, vec)
vecCopy := slices.Clone(vec)
// normalize the vector copy using in-place normalization provided by faiss
return faiss.NormalizeVector(vecCopy)
}
// NormalizeMultiVector normalizes each sub-vector of size `dims` independently.
// For a flattened array containing multiple vectors, each sub-vector is
// normalized separately to unit length.
// It makes a copy of the input vector to avoid modifying it in-place.
func NormalizeMultiVector(vec []float32, dims int) []float32 {
if len(vec) == 0 || dims <= 0 || len(vec)%dims != 0 {
return vec
}
// Single vector - delegate to NormalizeVector
if len(vec) == dims {
return NormalizeVector(vec)
}
// Multi-vector - make a copy to avoid modifying the original
result := slices.Clone(vec)
// Normalize each sub-vector in-place
for i := 0; i < len(result); i += dims {
faiss.NormalizeVector(result[i : i+dims])
}
return result
}

View File

@@ -99,7 +99,7 @@ func (r *rescorer) rescore(ftsHits, knnHits search.DocumentMatchCollection) (sea
switch r.req.Score {
case ScoreRRF:
res := fusion.ReciprocalRankFusion(
fusionResult = fusion.ReciprocalRankFusion(
mergedHits,
r.origBoosts,
r.req.Params.ScoreRankConstant,
@@ -107,16 +107,14 @@ func (r *rescorer) rescore(ftsHits, knnHits search.DocumentMatchCollection) (sea
numKNNQueries(r.req),
r.req.Explain,
)
fusionResult = &res
case ScoreRSF:
res := fusion.RelativeScoreFusion(
fusionResult = fusion.RelativeScoreFusion(
mergedHits,
r.origBoosts,
r.req.Params.ScoreWindowSize,
numKNNQueries(r.req),
r.req.Explain,
)
fusionResult = &res
}
return fusionResult.Hits, fusionResult.Total, fusionResult.MaxScore

View File

@@ -17,8 +17,10 @@ package bleve
import (
"fmt"
"reflect"
"regexp"
"sort"
"strconv"
"strings"
"time"
"github.com/blevesearch/bleve/v2/analysis"
@@ -147,8 +149,13 @@ type numericRange struct {
type FacetRequest struct {
Size int `json:"size"`
Field string `json:"field"`
TermPrefix string `json:"term_prefix,omitempty"`
TermPattern string `json:"term_pattern,omitempty"`
NumericRanges []*numericRange `json:"numeric_ranges,omitempty"`
DateTimeRanges []*dateTimeRange `json:"date_ranges,omitempty"`
// Compiled regex pattern (cached during validation)
compiledPattern *regexp.Regexp
}
// NewFacetRequest creates a facet on the specified
@@ -161,7 +168,26 @@ func NewFacetRequest(field string, size int) *FacetRequest {
}
}
// SetPrefixFilter sets the prefix filter for term facets.
func (fr *FacetRequest) SetPrefixFilter(prefix string) {
fr.TermPrefix = prefix
}
// SetRegexFilter sets the regex pattern filter for term facets.
func (fr *FacetRequest) SetRegexFilter(pattern string) {
fr.TermPattern = pattern
}
func (fr *FacetRequest) Validate() error {
// Validate regex pattern if provided and cache the compiled regex
if fr.TermPattern != "" {
compiled, err := regexp.Compile(fr.TermPattern)
if err != nil {
return fmt.Errorf("invalid term pattern: %v", err)
}
fr.compiledPattern = compiled
}
nrCount := len(fr.NumericRanges)
drCount := len(fr.DateTimeRanges)
if nrCount > 0 && drCount > 0 {
@@ -546,49 +572,74 @@ func (sr *SearchResult) Size() int {
}
func (sr *SearchResult) String() string {
rv := ""
rv := &strings.Builder{}
if sr.Total > 0 {
if sr.Request != nil && sr.Request.Size > 0 {
rv = fmt.Sprintf("%d matches, showing %d through %d, took %s\n", sr.Total, sr.Request.From+1, sr.Request.From+len(sr.Hits), sr.Took)
switch {
case sr.Request != nil && sr.Request.Size > 0:
start := sr.Request.From + 1
end := sr.Request.From + len(sr.Hits)
fmt.Fprintf(rv, "%d matches, showing %d through %d, took %s\n", sr.Total, start, end, sr.Took)
for i, hit := range sr.Hits {
rv += fmt.Sprintf("%5d. %s (%f)\n", i+sr.Request.From+1, hit.ID, hit.Score)
for fragmentField, fragments := range hit.Fragments {
rv += fmt.Sprintf("\t%s\n", fragmentField)
for _, fragment := range fragments {
rv += fmt.Sprintf("\t\t%s\n", fragment)
}
}
for otherFieldName, otherFieldValue := range hit.Fields {
if _, ok := hit.Fragments[otherFieldName]; !ok {
rv += fmt.Sprintf("\t%s\n", otherFieldName)
rv += fmt.Sprintf("\t\t%v\n", otherFieldValue)
}
}
rv = formatHit(rv, hit, start+i)
}
} else {
rv = fmt.Sprintf("%d matches, took %s\n", sr.Total, sr.Took)
case sr.Request == nil:
fmt.Fprintf(rv, "%d matches, took %s\n", sr.Total, sr.Took)
for i, hit := range sr.Hits {
rv = formatHit(rv, hit, i+1)
}
default:
fmt.Fprintf(rv, "%d matches, took %s\n", sr.Total, sr.Took)
}
} else {
rv = "No matches"
fmt.Fprintf(rv, "No matches\n")
}
if len(sr.Facets) > 0 {
rv += "Facets:\n"
fmt.Fprintf(rv, "Facets:\n")
for fn, f := range sr.Facets {
rv += fmt.Sprintf("%s(%d)\n", fn, f.Total)
fmt.Fprintf(rv, "%s(%d)\n", fn, f.Total)
for _, t := range f.Terms.Terms() {
rv += fmt.Sprintf("\t%s(%d)\n", t.Term, t.Count)
fmt.Fprintf(rv, "\t%s(%d)\n", t.Term, t.Count)
}
for _, n := range f.NumericRanges {
rv += fmt.Sprintf("\t%s(%d)\n", n.Name, n.Count)
fmt.Fprintf(rv, "\t%s(%d)\n", n.Name, n.Count)
}
for _, d := range f.DateRanges {
rv += fmt.Sprintf("\t%s(%d)\n", d.Name, d.Count)
fmt.Fprintf(rv, "\t%s(%d)\n", d.Name, d.Count)
}
if f.Other != 0 {
rv += fmt.Sprintf("\tOther(%d)\n", f.Other)
fmt.Fprintf(rv, "\tOther(%d)\n", f.Other)
}
}
}
return rv.String()
}
// formatHit is a helper function to format a single hit in the search result for
// the String() method of SearchResult
func formatHit(rv *strings.Builder, hit *search.DocumentMatch, hitNumber int) *strings.Builder {
fmt.Fprintf(rv, "%5d. %s (%f)\n", hitNumber, hit.ID, hit.Score)
for fragmentField, fragments := range hit.Fragments {
fmt.Fprintf(rv, "\t%s\n", fragmentField)
for _, fragment := range fragments {
fmt.Fprintf(rv, "\t\t%s\n", fragment)
}
}
for otherFieldName, otherFieldValue := range hit.Fields {
if _, ok := hit.Fragments[otherFieldName]; !ok {
fmt.Fprintf(rv, "\t%s\n", otherFieldName)
fmt.Fprintf(rv, "\t\t%v\n", otherFieldValue)
}
}
if len(hit.DecodedSort) > 0 {
fmt.Fprintf(rv, "\t_sort: [")
for k, v := range hit.DecodedSort {
if k > 0 {
fmt.Fprintf(rv, ", ")
}
fmt.Fprintf(rv, "%v", v)
}
fmt.Fprintf(rv, "]\n")
}
return rv
}

View File

@@ -15,7 +15,9 @@
package facet
import (
"bytes"
"reflect"
"regexp"
"sort"
"github.com/blevesearch/bleve/v2/search"
@@ -30,12 +32,14 @@ func init() {
}
type TermsFacetBuilder struct {
size int
field string
termsCount map[string]int
total int
missing int
sawValue bool
size int
field string
prefixBytes []byte
regex *regexp.Regexp
termsCount map[string]int
total int
missing int
sawValue bool
}
func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder {
@@ -48,7 +52,16 @@ func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder {
func (fb *TermsFacetBuilder) Size() int {
sizeInBytes := reflectStaticSizeTermsFacetBuilder + size.SizeOfPtr +
len(fb.field)
len(fb.field) +
len(fb.prefixBytes) +
size.SizeOfPtr // regex pointer (does not include actual regexp.Regexp object size)
// Estimate regex object size if present.
if fb.regex != nil {
// This is only the static size of regexp.Regexp struct, not including heap allocations.
sizeInBytes += int(reflect.TypeOf(*fb.regex).Size())
// NOTE: Actual memory usage of regexp.Regexp may be higher due to internal allocations.
}
for k := range fb.termsCount {
sizeInBytes += size.SizeOfString + len(k) +
@@ -62,10 +75,39 @@ func (fb *TermsFacetBuilder) Field() string {
return fb.field
}
// SetPrefixFilter sets the prefix filter for term facets.
func (fb *TermsFacetBuilder) SetPrefixFilter(prefix string) {
if prefix != "" {
fb.prefixBytes = []byte(prefix)
} else {
fb.prefixBytes = nil
}
}
// SetRegexFilter sets the compiled regex filter for term facets.
func (fb *TermsFacetBuilder) SetRegexFilter(regex *regexp.Regexp) {
fb.regex = regex
}
func (fb *TermsFacetBuilder) UpdateVisitor(term []byte) {
fb.sawValue = true
fb.termsCount[string(term)] = fb.termsCount[string(term)] + 1
// Total represents all terms visited, not just matching ones.
// This is necessary for the "Other" calculation.
fb.total++
// Fast prefix check on []byte - zero allocation
if len(fb.prefixBytes) > 0 && !bytes.HasPrefix(term, fb.prefixBytes) {
return
}
// Fast regex check on []byte - zero allocation
if fb.regex != nil && !fb.regex.Match(term) {
return
}
// Only convert to string if term matches filters
termStr := string(term)
fb.sawValue = true
fb.termsCount[termStr] = fb.termsCount[termStr] + 1
}
func (fb *TermsFacetBuilder) StartDoc() {

View File

@@ -15,7 +15,6 @@
package query
import (
"bytes"
"context"
"encoding/json"
"fmt"
@@ -203,7 +202,7 @@ func (q *BooleanQuery) Searcher(ctx context.Context, i index.IndexReader, m mapp
return false
}
// Compare document IDs
cmp := bytes.Compare(refDoc.IndexInternalID, d.IndexInternalID)
cmp := refDoc.IndexInternalID.Compare(d.IndexInternalID)
if cmp < 0 {
// filterSearcher is behind the current document, Advance() it
refDoc, err = filterSearcher.Advance(sctx, d.IndexInternalID)
@@ -211,7 +210,7 @@ func (q *BooleanQuery) Searcher(ctx context.Context, i index.IndexReader, m mapp
return false
}
// After advance, check if they're now equal
return bytes.Equal(refDoc.IndexInternalID, d.IndexInternalID)
cmp = refDoc.IndexInternalID.Compare(d.IndexInternalID)
}
// cmp >= 0: either equal (match) or filterSearcher is ahead (no match)
return cmp == 0

View File

@@ -53,7 +53,7 @@ func (q *KNNQuery) SetK(k int64) {
q.K = k
}
func (q *KNNQuery) SetFieldVal(field string) {
func (q *KNNQuery) SetField(field string) {
q.VectorField = field
}

View File

@@ -88,7 +88,10 @@ func (s *DisjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [
func (s *DisjunctionQueryScorer) ScoreAndExplBreakdown(ctx *search.SearchContext, constituents []*search.DocumentMatch,
matchingIdxs []int, originalPositions []int, countTotal int) *search.DocumentMatch {
scoreBreakdown := make(map[int]float64)
rv := constituents[0]
if rv.ScoreBreakdown == nil {
rv.ScoreBreakdown = make(map[int]float64, len(constituents))
}
var childrenExplanations []*search.Explanation
if s.options.Explain {
// since we want to notify which expl belongs to which matched searcher within the disjunction searcher
@@ -104,7 +107,7 @@ func (s *DisjunctionQueryScorer) ScoreAndExplBreakdown(ctx *search.SearchContext
// scorer used in disjunction heap searcher
index = matchingIdxs[i]
}
scoreBreakdown[index] = docMatch.Score
rv.ScoreBreakdown[index] = docMatch.Score
if s.options.Explain {
childrenExplanations[index] = docMatch.Expl
}
@@ -113,9 +116,6 @@ func (s *DisjunctionQueryScorer) ScoreAndExplBreakdown(ctx *search.SearchContext
if s.options.Explain {
explBreakdown = &search.Explanation{Children: childrenExplanations}
}
rv := constituents[0]
rv.ScoreBreakdown = scoreBreakdown
rv.Expl = explBreakdown
rv.FieldTermLocations = search.MergeFieldTermLocations(
rv.FieldTermLocations, constituents[1:])

View File

@@ -207,20 +207,29 @@ func (dm *DocumentMatch) Reset() *DocumentMatch {
indexInternalID := dm.IndexInternalID
// remember the []interface{} used for sort
sort := dm.Sort
// remember the []string used for decoded sort
decodedSort := dm.DecodedSort
// remember the FieldTermLocations backing array
ftls := dm.FieldTermLocations
for i := range ftls { // recycle the ArrayPositions of each location
ftls[i].Location.ArrayPositions = ftls[i].Location.ArrayPositions[:0]
}
// remember the score breakdown map
scoreBreakdown := dm.ScoreBreakdown
// clear out the score breakdown map
clear(scoreBreakdown)
// idiom to copy over from empty DocumentMatch (0 allocations)
*dm = DocumentMatch{}
// reuse the []byte already allocated (and reset len to 0)
dm.IndexInternalID = indexInternalID[:0]
// reuse the []interface{} already allocated (and reset len to 0)
dm.Sort = sort[:0]
dm.DecodedSort = dm.DecodedSort[:0]
// reuse the []string already allocated (and reset len to 0)
dm.DecodedSort = decodedSort[:0]
// reuse the FieldTermLocations already allocated (and reset len to 0)
dm.FieldTermLocations = ftls[:0]
// reuse the score breakdown map already allocated (after clearing it)
dm.ScoreBreakdown = scoreBreakdown
return dm
}

View File

@@ -84,7 +84,7 @@ func (s *KNNSearcher) VectorOptimize(ctx context.Context, octx index.VectorOptim
func (s *KNNSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (
*search.DocumentMatch, error) {
knnMatch, err := s.vectorReader.Next(s.vd.Reset())
knnMatch, err := s.vectorReader.Advance(ID, s.vd.Reset())
if err != nil {
return nil, err
}

View File

@@ -288,10 +288,15 @@ func createKNNQuery(req *SearchRequest, knnFilterResults map[int]index.EligibleD
// If it's a filtered kNN but has no eligible filter hits, then
// do not run the kNN query.
if selector, exists := knnFilterResults[i]; exists && selector == nil {
// if the kNN query is filtered and has no eligible filter hits, then
// do not run the kNN query, so we add a match_none query to the subQueries.
// this will ensure that the score breakdown is set to 0 for this kNN query.
subQueries = append(subQueries, NewMatchNoneQuery())
kArray = append(kArray, 0)
continue
}
knnQuery := query.NewKNNQuery(knn.Vector)
knnQuery.SetFieldVal(knn.Field)
knnQuery.SetField(knn.Field)
knnQuery.SetK(knn.K)
knnQuery.SetBoost(knn.Boost.Value())
knnQuery.SetParams(knn.Params)
@@ -381,7 +386,7 @@ func addSortAndFieldsToKNNHits(req *SearchRequest, knnHits []*search.DocumentMat
return nil
}
func (i *indexImpl) runKnnCollector(ctx context.Context, req *SearchRequest, reader index.IndexReader, preSearch bool) ([]*search.DocumentMatch, error) {
func (i *indexImpl) runKnnCollector(ctx context.Context, req *SearchRequest, reader index.IndexReader, preSearch bool) (knnHits []*search.DocumentMatch, err error) {
// Maps the index of a KNN query in the request to its pre-filter result:
// - If the KNN query is **not filtered**, the value will be `nil`.
// - If the KNN query **is filtered**, the value will be an eligible document selector
@@ -401,21 +406,33 @@ func (i *indexImpl) runKnnCollector(ctx context.Context, req *SearchRequest, rea
continue
}
// Applies to all supported types of queries.
filterSearcher, _ := filterQ.Searcher(ctx, reader, i.m, search.SearcherOptions{
filterSearcher, err := filterQ.Searcher(ctx, reader, i.m, search.SearcherOptions{
Score: "none", // just want eligible hits --> don't compute scores if not needed
})
if err != nil {
return nil, err
}
// Using the index doc count to determine collector size since we do not
// have an estimate of the number of eligible docs in the index yet.
indexDocCount, err := i.DocCount()
if err != nil {
// close the searcher before returning
filterSearcher.Close()
return nil, err
}
filterColl := collector.NewEligibleCollector(int(indexDocCount))
err = filterColl.Collect(ctx, filterSearcher, reader)
if err != nil {
// close the searcher before returning
filterSearcher.Close()
return nil, err
}
knnFilterResults[idx] = filterColl.EligibleSelector()
// Close the filter searcher, as we are done with it.
err = filterSearcher.Close()
if err != nil {
return nil, err
}
}
// Add the filter hits when creating the kNN query
@@ -429,12 +446,17 @@ func (i *indexImpl) runKnnCollector(ctx context.Context, req *SearchRequest, rea
if err != nil {
return nil, err
}
defer func() {
if serr := knnSearcher.Close(); err == nil && serr != nil {
err = serr
}
}()
knnCollector := collector.NewKNNCollector(kArray, sumOfK)
err = knnCollector.Collect(ctx, knnSearcher, reader)
if err != nil {
return nil, err
}
knnHits := knnCollector.Results()
knnHits = knnCollector.Results()
if !preSearch {
knnHits = finalizeKNNResults(req, knnHits)
}

View File

@@ -19,15 +19,11 @@ package zap
import (
"encoding/binary"
"encoding/json"
"math"
"reflect"
"github.com/RoaringBitmap/roaring/v2"
"github.com/RoaringBitmap/roaring/v2/roaring64"
"github.com/bits-and-blooms/bitset"
index "github.com/blevesearch/bleve_index_api"
faiss "github.com/blevesearch/go-faiss"
segment "github.com/blevesearch/scorch_segment_api/v2"
)
@@ -272,45 +268,7 @@ func (vpItr *VecPostingsIterator) BytesWritten() uint64 {
return 0
}
// vectorIndexWrapper conforms to scorch_segment_api's VectorIndex interface
type vectorIndexWrapper struct {
search func(qVector []float32, k int64,
params json.RawMessage) (segment.VecPostingsList, error)
searchWithFilter func(qVector []float32, k int64, eligibleDocIDs []uint64,
params json.RawMessage) (segment.VecPostingsList, error)
close func()
size func() uint64
obtainKCentroidCardinalitiesFromIVFIndex func(limit int, descending bool) (
[]index.CentroidCardinality, error)
}
func (i *vectorIndexWrapper) Search(qVector []float32, k int64,
params json.RawMessage) (
segment.VecPostingsList, error) {
return i.search(qVector, k, params)
}
func (i *vectorIndexWrapper) SearchWithFilter(qVector []float32, k int64,
eligibleDocIDs []uint64, params json.RawMessage) (
segment.VecPostingsList, error) {
return i.searchWithFilter(qVector, k, eligibleDocIDs, params)
}
func (i *vectorIndexWrapper) Close() {
i.close()
}
func (i *vectorIndexWrapper) Size() uint64 {
return i.size()
}
func (i *vectorIndexWrapper) ObtainKCentroidCardinalitiesFromIVFIndex(limit int, descending bool) (
[]index.CentroidCardinality, error) {
return i.obtainKCentroidCardinalitiesFromIVFIndex(limit, descending)
}
// InterpretVectorIndex returns a construct of closures (vectorIndexWrapper)
// InterpretVectorIndex returns a struct based implementation (vectorIndexWrapper)
// that will allow the caller to -
// (1) search within an attached vector index
// (2) search limited to a subset of documents within an attached vector index
@@ -319,248 +277,18 @@ func (i *vectorIndexWrapper) ObtainKCentroidCardinalitiesFromIVFIndex(limit int,
func (sb *SegmentBase) InterpretVectorIndex(field string, requiresFiltering bool,
except *roaring.Bitmap) (
segment.VectorIndex, error) {
// Params needed for the closures
var vecIndex *faiss.IndexImpl
var vecDocIDMap map[int64]uint32
var docVecIDMap map[uint32][]int64
var vectorIDsToExclude []int64
var fieldIDPlus1 uint16
var vecIndexSize uint64
// Utility function to add the corresponding docID and scores for each vector
// returned after the kNN query to the newly
// created vecPostingsList
addIDsToPostingsList := func(pl *VecPostingsList, ids []int64, scores []float32) {
for i := 0; i < len(ids); i++ {
vecID := ids[i]
// Checking if it's present in the vecDocIDMap.
// If -1 is returned as an ID(insufficient vectors), this will ensure
// it isn't added to the final postings list.
if docID, ok := vecDocIDMap[vecID]; ok {
code := getVectorCode(docID, scores[i])
pl.postings.Add(code)
}
}
}
var (
wrapVecIndex = &vectorIndexWrapper{
search: func(qVector []float32, k int64, params json.RawMessage) (
segment.VecPostingsList, error) {
// 1. returned postings list (of type PostingsList) has two types of information - docNum and its score.
// 2. both the values can be represented using roaring bitmaps.
// 3. the Iterator (of type PostingsIterator) returned would operate in terms of VecPostings.
// 4. VecPostings would just have the docNum and the score. Every call of Next()
// and Advance just returns the next VecPostings. The caller would do a vp.Number()
// and the Score() to get the corresponding values
rv := &VecPostingsList{
except: nil, // todo: handle the except bitmap within postings iterator.
postings: roaring64.New(),
}
if vecIndex == nil || vecIndex.D() != len(qVector) {
// vector index not found or dimensionality mismatched
return rv, nil
}
scores, ids, err := vecIndex.SearchWithoutIDs(qVector, k,
vectorIDsToExclude, params)
if err != nil {
return nil, err
}
addIDsToPostingsList(rv, ids, scores)
return rv, nil
},
searchWithFilter: func(qVector []float32, k int64,
eligibleDocIDs []uint64, params json.RawMessage) (
segment.VecPostingsList, error) {
// 1. returned postings list (of type PostingsList) has two types of information - docNum and its score.
// 2. both the values can be represented using roaring bitmaps.
// 3. the Iterator (of type PostingsIterator) returned would operate in terms of VecPostings.
// 4. VecPostings would just have the docNum and the score. Every call of Next()
// and Advance just returns the next VecPostings. The caller would do a vp.Number()
// and the Score() to get the corresponding values
rv := &VecPostingsList{
except: nil, // todo: handle the except bitmap within postings iterator.
postings: roaring64.New(),
}
if vecIndex == nil || vecIndex.D() != len(qVector) {
// vector index not found or dimensionality mismatched
return rv, nil
}
// Check and proceed only if non-zero documents eligible per the filter query.
if len(eligibleDocIDs) == 0 {
return rv, nil
}
// If every element in the index is eligible (full selectivity),
// then this can basically be considered unfiltered kNN.
if len(eligibleDocIDs) == int(sb.numDocs) {
scores, ids, err := vecIndex.SearchWithoutIDs(qVector, k,
vectorIDsToExclude, params)
if err != nil {
return nil, err
}
addIDsToPostingsList(rv, ids, scores)
return rv, nil
}
// vector IDs corresponding to the local doc numbers to be
// considered for the search
vectorIDsToInclude := make([]int64, 0, len(eligibleDocIDs))
for _, id := range eligibleDocIDs {
vecIDs := docVecIDMap[uint32(id)]
// In the common case where vecIDs has only one element, which occurs
// when a document has only one vector field, we can
// avoid the unnecessary overhead of slice unpacking (append(vecIDs...)).
// Directly append the single element for efficiency.
if len(vecIDs) == 1 {
vectorIDsToInclude = append(vectorIDsToInclude, vecIDs[0])
} else {
vectorIDsToInclude = append(vectorIDsToInclude, vecIDs...)
}
}
// In case a doc has invalid vector fields but valid non-vector fields,
// filter hit IDs may be ineligible for the kNN since the document does
// not have any/valid vectors.
if len(vectorIDsToInclude) == 0 {
return rv, nil
}
// If the index is not an IVF index, then the search can be
// performed directly, using the Flat index.
if !vecIndex.IsIVFIndex() {
// vector IDs corresponding to the local doc numbers to be
// considered for the search
scores, ids, err := vecIndex.SearchWithIDs(qVector, k,
vectorIDsToInclude, params)
if err != nil {
return nil, err
}
addIDsToPostingsList(rv, ids, scores)
return rv, nil
}
// Determining which clusters, identified by centroid ID,
// have at least one eligible vector and hence, ought to be
// probed.
clusterVectorCounts, err := vecIndex.ObtainClusterVectorCountsFromIVFIndex(vectorIDsToInclude)
if err != nil {
return nil, err
}
var selector faiss.Selector
// If there are more elements to be included than excluded, it
// might be quicker to use an exclusion selector as a filter
// instead of an inclusion selector.
if float32(len(eligibleDocIDs))/float32(len(docVecIDMap)) > 0.5 {
// Use a bitset to efficiently track eligible document IDs.
// This reduces the lookup cost when checking if a document ID is eligible,
// compared to using a map or slice.
bs := bitset.New(uint(len(eligibleDocIDs)))
for _, docID := range eligibleDocIDs {
bs.Set(uint(docID))
}
ineligibleVectorIDs := make([]int64, 0, len(vecDocIDMap)-len(vectorIDsToInclude))
for docID, vecIDs := range docVecIDMap {
// Check if the document ID is NOT in the eligible set, marking it as ineligible.
if !bs.Test(uint(docID)) {
// In the common case where vecIDs has only one element, which occurs
// when a document has only one vector field, we can
// avoid the unnecessary overhead of slice unpacking (append(vecIDs...)).
// Directly append the single element for efficiency.
if len(vecIDs) == 1 {
ineligibleVectorIDs = append(ineligibleVectorIDs, vecIDs[0])
} else {
ineligibleVectorIDs = append(ineligibleVectorIDs, vecIDs...)
}
}
}
selector, err = faiss.NewIDSelectorNot(ineligibleVectorIDs)
} else {
selector, err = faiss.NewIDSelectorBatch(vectorIDsToInclude)
}
if err != nil {
return nil, err
}
// If no error occurred during the creation of the selector, then
// it should be deleted once the search is complete.
defer selector.Delete()
// Ordering the retrieved centroid IDs by increasing order
// of distance i.e. decreasing order of proximity to query vector.
centroidIDs := make([]int64, 0, len(clusterVectorCounts))
for centroidID := range clusterVectorCounts {
centroidIDs = append(centroidIDs, centroidID)
}
closestCentroidIDs, centroidDistances, err :=
vecIndex.ObtainClustersWithDistancesFromIVFIndex(qVector, centroidIDs)
if err != nil {
return nil, err
}
// Getting the nprobe value set at index time.
nprobe := int(vecIndex.GetNProbe())
// Determining the minimum number of centroids to be probed
// to ensure that at least 'k' vectors are collected while
// examining at least 'nprobe' centroids.
var eligibleDocsTillNow int64
minEligibleCentroids := len(closestCentroidIDs)
for i, centroidID := range closestCentroidIDs {
eligibleDocsTillNow += clusterVectorCounts[centroidID]
// Stop once we've examined at least 'nprobe' centroids and
// collected at least 'k' vectors.
if eligibleDocsTillNow >= k && i+1 >= nprobe {
minEligibleCentroids = i + 1
break
}
}
// Search the clusters specified by 'closestCentroidIDs' for
// vectors whose IDs are present in 'vectorIDsToInclude'
scores, ids, err := vecIndex.SearchClustersFromIVFIndex(
selector, closestCentroidIDs, minEligibleCentroids,
k, qVector, centroidDistances, params)
if err != nil {
return nil, err
}
addIDsToPostingsList(rv, ids, scores)
return rv, nil
},
close: func() {
// skipping the closing because the index is cached and it's being
// deferred to a later point of time.
sb.vecIndexCache.decRef(fieldIDPlus1)
},
size: func() uint64 {
return vecIndexSize
},
obtainKCentroidCardinalitiesFromIVFIndex: func(limit int, descending bool) ([]index.CentroidCardinality, error) {
if vecIndex == nil || !vecIndex.IsIVFIndex() {
return nil, nil
}
cardinalities, centroids, err := vecIndex.ObtainKCentroidCardinalitiesFromIVFIndex(limit, descending)
if err != nil {
return nil, err
}
centroidCardinalities := make([]index.CentroidCardinality, len(cardinalities))
for i, cardinality := range cardinalities {
centroidCardinalities[i] = index.CentroidCardinality{
Centroid: centroids[i],
Cardinality: cardinality,
}
}
return centroidCardinalities, nil
},
}
err error
)
fieldIDPlus1 = sb.fieldsMap[field]
rv := &vectorIndexWrapper{sb: sb}
fieldIDPlus1 := sb.fieldsMap[field]
if fieldIDPlus1 <= 0 {
return wrapVecIndex, nil
return rv, nil
}
rv.fieldIDPlus1 = fieldIDPlus1
vectorSection := sb.fieldsSectionsMap[fieldIDPlus1-1][SectionFaissVectorIndex]
// check if the field has a vector section in the segment.
if vectorSection <= 0 {
return wrapVecIndex, nil
return rv, nil
}
pos := int(vectorSection)
@@ -574,15 +302,19 @@ func (sb *SegmentBase) InterpretVectorIndex(field string, requiresFiltering bool
pos += n
}
vecIndex, vecDocIDMap, docVecIDMap, vectorIDsToExclude, err =
var err error
rv.vecIndex, rv.vecDocIDMap, rv.docVecIDMap, rv.vectorIDsToExclude, err =
sb.vecIndexCache.loadOrCreate(fieldIDPlus1, sb.mem[pos:], requiresFiltering,
except)
if vecIndex != nil {
vecIndexSize = vecIndex.Size()
if err != nil {
return nil, err
}
return wrapVecIndex, err
if rv.vecIndex != nil {
rv.vecIndexSize = rv.vecIndex.Size()
}
return rv, nil
}
func (sb *SegmentBase) UpdateFieldStats(stats segment.FieldStats) {

View File

@@ -0,0 +1,645 @@
// Copyright (c) 2025 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build vectors
// +build vectors
package zap
import (
"encoding/json"
"math"
"slices"
"github.com/RoaringBitmap/roaring/v2/roaring64"
"github.com/bits-and-blooms/bitset"
index "github.com/blevesearch/bleve_index_api"
faiss "github.com/blevesearch/go-faiss"
segment "github.com/blevesearch/scorch_segment_api/v2"
)
// MaxMultiVectorDocSearchRetries limits repeated searches when deduplicating
// multi-vector documents. Each retry excludes previously seen vectors to find
// new unique documents. Acts as a safeguard against pathological data distributions.
var MaxMultiVectorDocSearchRetries = 100
// vectorIndexWrapper conforms to scorch_segment_api's VectorIndex interface
type vectorIndexWrapper struct {
vecIndex *faiss.IndexImpl
vecDocIDMap map[int64]uint32
docVecIDMap map[uint32][]int64
vectorIDsToExclude []int64
fieldIDPlus1 uint16
vecIndexSize uint64
sb *SegmentBase
}
func (v *vectorIndexWrapper) Search(qVector []float32, k int64,
params json.RawMessage) (
segment.VecPostingsList, error) {
// 1. returned postings list (of type PostingsList) has two types of information - docNum and its score.
// 2. both the values can be represented using roaring bitmaps.
// 3. the Iterator (of type PostingsIterator) returned would operate in terms of VecPostings.
// 4. VecPostings would just have the docNum and the score. Every call of Next()
// and Advance just returns the next VecPostings. The caller would do a vp.Number()
// and the Score() to get the corresponding values
rv := &VecPostingsList{
except: nil, // todo: handle the except bitmap within postings iterator.
postings: roaring64.New(),
}
if v.vecIndex == nil || v.vecIndex.D() != len(qVector) {
// vector index not found or dimensionality mismatched
return rv, nil
}
if v.sb.numDocs == 0 {
return rv, nil
}
rs, err := v.searchWithoutIDs(qVector, k,
v.vectorIDsToExclude, params)
if err != nil {
return nil, err
}
v.addIDsToPostingsList(rv, rs)
return rv, nil
}
func (v *vectorIndexWrapper) SearchWithFilter(qVector []float32, k int64,
eligibleDocIDs []uint64, params json.RawMessage) (
segment.VecPostingsList, error) {
// If every element in the index is eligible (full selectivity),
// then this can basically be considered unfiltered kNN.
if len(eligibleDocIDs) == int(v.sb.numDocs) {
return v.Search(qVector, k, params)
}
// 1. returned postings list (of type PostingsList) has two types of information - docNum and its score.
// 2. both the values can be represented using roaring bitmaps.
// 3. the Iterator (of type PostingsIterator) returned would operate in terms of VecPostings.
// 4. VecPostings would just have the docNum and the score. Every call of Next()
// and Advance just returns the next VecPostings. The caller would do a vp.Number()
// and the Score() to get the corresponding values
rv := &VecPostingsList{
except: nil, // todo: handle the except bitmap within postings iterator.
postings: roaring64.New(),
}
if v.vecIndex == nil || v.vecIndex.D() != len(qVector) {
// vector index not found or dimensionality mismatched
return rv, nil
}
// Check and proceed only if non-zero documents eligible per the filter query.
if len(eligibleDocIDs) == 0 {
return rv, nil
}
// vector IDs corresponding to the local doc numbers to be
// considered for the search
vectorIDsToInclude := make([]int64, 0, len(eligibleDocIDs))
for _, id := range eligibleDocIDs {
vecIDs := v.docVecIDMap[uint32(id)]
// In the common case where vecIDs has only one element, which occurs
// when a document has only one vector field, we can
// avoid the unnecessary overhead of slice unpacking (append(vecIDs...)).
// Directly append the single element for efficiency.
if len(vecIDs) == 1 {
vectorIDsToInclude = append(vectorIDsToInclude, vecIDs[0])
} else {
vectorIDsToInclude = append(vectorIDsToInclude, vecIDs...)
}
}
// In case a doc has invalid vector fields but valid non-vector fields,
// filter hit IDs may be ineligible for the kNN since the document does
// not have any/valid vectors.
if len(vectorIDsToInclude) == 0 {
return rv, nil
}
// If the index is not an IVF index, then the search can be
// performed directly, using the Flat index.
if !v.vecIndex.IsIVFIndex() {
// vector IDs corresponding to the local doc numbers to be
// considered for the search
rs, err := v.searchWithIDs(qVector, k,
vectorIDsToInclude, params)
if err != nil {
return nil, err
}
v.addIDsToPostingsList(rv, rs)
return rv, nil
}
// Determining which clusters, identified by centroid ID,
// have at least one eligible vector and hence, ought to be
// probed.
clusterVectorCounts, err := v.vecIndex.ObtainClusterVectorCountsFromIVFIndex(vectorIDsToInclude)
if err != nil {
return nil, err
}
var ids []int64
var include bool
// If there are more elements to be included than excluded, it
// might be quicker to use an exclusion selector as a filter
// instead of an inclusion selector.
if float32(len(eligibleDocIDs))/float32(len(v.docVecIDMap)) > 0.5 {
// Use a bitset to efficiently track eligible document IDs.
// This reduces the lookup cost when checking if a document ID is eligible,
// compared to using a map or slice.
bs := bitset.New(uint(v.sb.numDocs))
for _, docID := range eligibleDocIDs {
bs.Set(uint(docID))
}
ineligibleVectorIDs := make([]int64, 0, len(v.vecDocIDMap)-len(vectorIDsToInclude))
for docID, vecIDs := range v.docVecIDMap {
// Check if the document ID is NOT in the eligible set, marking it as ineligible.
if !bs.Test(uint(docID)) {
// In the common case where vecIDs has only one element, which occurs
// when a document has only one vector field, we can
// avoid the unnecessary overhead of slice unpacking (append(vecIDs...)).
// Directly append the single element for efficiency.
if len(vecIDs) == 1 {
ineligibleVectorIDs = append(ineligibleVectorIDs, vecIDs[0])
} else {
ineligibleVectorIDs = append(ineligibleVectorIDs, vecIDs...)
}
}
}
ids = ineligibleVectorIDs
include = false
} else {
ids = vectorIDsToInclude
include = true
}
// Ordering the retrieved centroid IDs by increasing order
// of distance i.e. decreasing order of proximity to query vector.
centroidIDs := make([]int64, 0, len(clusterVectorCounts))
for centroidID := range clusterVectorCounts {
centroidIDs = append(centroidIDs, centroidID)
}
closestCentroidIDs, centroidDistances, err :=
v.vecIndex.ObtainClustersWithDistancesFromIVFIndex(qVector, centroidIDs)
if err != nil {
return nil, err
}
// Getting the nprobe value set at index time.
nprobe := int(v.vecIndex.GetNProbe())
// Determining the minimum number of centroids to be probed
// to ensure that at least 'k' vectors are collected while
// examining at least 'nprobe' centroids.
// centroidsToProbe range: [nprobe, number of eligible centroids]
var eligibleVecsTillNow int64
centroidsToProbe := len(closestCentroidIDs)
for i, centroidID := range closestCentroidIDs {
eligibleVecsTillNow += clusterVectorCounts[centroidID]
// Stop once we've examined at least 'nprobe' centroids and
// collected at least 'k' vectors.
if eligibleVecsTillNow >= k && i+1 >= nprobe {
centroidsToProbe = i + 1
break
}
}
// Search the clusters specified by 'closestCentroidIDs' for
// vectors whose IDs are present in 'vectorIDsToInclude'
rs, err := v.searchClustersFromIVFIndex(
ids, include, closestCentroidIDs, centroidsToProbe,
k, qVector, centroidDistances, params)
if err != nil {
return nil, err
}
v.addIDsToPostingsList(rv, rs)
return rv, nil
}
func (v *vectorIndexWrapper) Close() {
// skipping the closing because the index is cached and it's being
// deferred to a later point of time.
v.sb.vecIndexCache.decRef(v.fieldIDPlus1)
}
func (v *vectorIndexWrapper) Size() uint64 {
return v.vecIndexSize
}
func (v *vectorIndexWrapper) ObtainKCentroidCardinalitiesFromIVFIndex(limit int, descending bool) (
[]index.CentroidCardinality, error) {
if v.vecIndex == nil || !v.vecIndex.IsIVFIndex() {
return nil, nil
}
cardinalities, centroids, err := v.vecIndex.ObtainKCentroidCardinalitiesFromIVFIndex(limit, descending)
if err != nil {
return nil, err
}
centroidCardinalities := make([]index.CentroidCardinality, len(cardinalities))
for i, cardinality := range cardinalities {
centroidCardinalities[i] = index.CentroidCardinality{
Centroid: centroids[i],
Cardinality: cardinality,
}
}
return centroidCardinalities, nil
}
// Utility function to add the corresponding docID and scores for each unique
// docID retrieved from the vector index search to the newly created vecPostingsList
func (v *vectorIndexWrapper) addIDsToPostingsList(pl *VecPostingsList, rs resultSet) {
rs.iterate(func(docID uint32, score float32) {
// transform the docID and score to vector code format
code := getVectorCode(docID, score)
// add to postings list, this ensures ordered storage
// based on the docID since it occupies the upper 32 bits
pl.postings.Add(code)
})
}
// docSearch performs a search on the vector index to retrieve
// top k documents based on the provided search function.
// It handles deduplication of documents that may have multiple
// vectors associated with them.
// The prepareNextIter function is used to set up the state
// for the next iteration, if more searches are needed to find
// k unique documents. The callback recieves the number of iterations
// done so far and the vector ids retrieved in the last search. While preparing
// the next iteration, if its decided that no further searches are needed,
// the prepareNextIter function can decide whether to continue searching or not
func (v *vectorIndexWrapper) docSearch(k int64, numDocs uint64,
search func() (scores []float32, labels []int64, err error),
prepareNextIter func(numIter int, labels []int64) bool) (resultSet, error) {
// create a result set to hold top K docIDs and their scores
rs := newResultSet(k, numDocs)
// flag to indicate if we have exhausted the vector index
var exhausted bool
// keep track of number of iterations done, we execute the loop more than once only when
// we have multi-vector documents leading to duplicates in docIDs retrieved
numIter := 0
// get the metric type of the index to help with deduplication logic
metricType := v.vecIndex.MetricType()
// we keep searching until we have k unique docIDs or we have exhausted the vector index
// or we have reached the maximum number of deduplication iterations allowed
for numIter < MaxMultiVectorDocSearchRetries && rs.size() < k && !exhausted {
// search the vector index
numIter++
scores, labels, err := search()
if err != nil {
return nil, err
}
// process the retrieved ids and scores, getting the corresponding docIDs
// for each vector id retrieved, and storing the best score for each unique docID
// the moment we see a -1 for a vector id, we stop processing further since
// it indicates there are no more vectors to be retrieved and break out of the loop
// by setting the exhausted flag
for i, vecID := range labels {
if vecID == -1 {
exhausted = true
break
}
docID, exists := v.getDocIDForVectorID(vecID)
if !exists {
continue
}
score := scores[i]
prevScore, exists := rs.get(docID)
if !exists {
// first time seeing this docID, so just store it
rs.put(docID, score)
continue
}
// we have seen this docID before, so we must compare scores
// check the index metric type first to check how we compare distances/scores
// and store the best score for the docID accordingly
// for inner product, higher the score, better the match
// for euclidean distance, lower the score/distance, better the match
// so we invert the comparison accordingly
switch metricType {
case faiss.MetricInnerProduct: // similarity metrics like dot product => higher is better
if score > prevScore {
rs.put(docID, score)
}
case faiss.MetricL2:
fallthrough
default: // distance metrics like euclidean distance => lower is better
if score < prevScore {
rs.put(docID, score)
}
}
}
// if we still have less than k unique docIDs, prepare for the next iteration, provided
// we have not exhausted the index
if rs.size() < k && !exhausted {
// prepare state for next iteration
shouldContinue := prepareNextIter(numIter, labels)
if !shouldContinue {
break
}
}
}
// at this point we either have k unique docIDs or we have exhausted
// the vector index or we have reached the maximum number of deduplication iterations allowed
// or the prepareNextIter function decided to break out of the loop
return rs, nil
}
// searchWithoutIDs performs a search on the vector index to retrieve the top K documents while
// excluding any vector IDs specified in the exclude slice.
func (v *vectorIndexWrapper) searchWithoutIDs(qVector []float32, k int64, exclude []int64, params json.RawMessage) (
resultSet, error) {
return v.docSearch(k, v.sb.numDocs,
func() ([]float32, []int64, error) {
return v.vecIndex.SearchWithoutIDs(qVector, k, exclude, params)
},
func(numIter int, labels []int64) bool {
// if this is the first loop iteration and we have < k unique docIDs,
// we must clone the existing exclude slice before appending to it
// to avoid modifying the original slice passed in by the caller
if numIter == 1 {
exclude = slices.Clone(exclude)
}
// prepare the exclude list for the next iteration by adding
// the vector ids retrieved in this iteration
exclude = append(exclude, labels...)
// with exclude list updated, we can proceed to the next iteration
return true
})
}
// searchWithIDs performs a search on the vector index to retrieve the top K documents while only
// considering the vector IDs specified in the include slice.
func (v *vectorIndexWrapper) searchWithIDs(qVector []float32, k int64, include []int64, params json.RawMessage) (
resultSet, error) {
// if the number of iterations > 1, we will be modifying the include slice
// to exclude vector ids already seen, so we use this set to track the
// include set for the next iteration, this is reused across iterations
// and allocated only once, when numIter == 1
var includeSet map[int64]struct{}
return v.docSearch(k, v.sb.numDocs,
func() ([]float32, []int64, error) {
return v.vecIndex.SearchWithIDs(qVector, k, include, params)
},
func(numIter int, labels []int64) bool {
// if this is the first loop iteration and we have < k unique docIDs,
// we clone the existing include slice before modifying it
if numIter == 1 {
include = slices.Clone(include)
// build the include set for subsequent iterations
includeSet = make(map[int64]struct{}, len(include))
for _, id := range include {
includeSet[id] = struct{}{}
}
}
// prepare the include list for the next iteration
// by removing the vector ids retrieved in this iteration
// from the include set
for _, id := range labels {
delete(includeSet, id)
}
// now build the next include slice from the set
include = include[:0]
for id := range includeSet {
include = append(include, id)
}
// only continue searching if we still have vector ids to include
return len(include) != 0
})
}
// searchClustersFromIVFIndex performs a search on the IVF vector index to retrieve the top K documents
// while either including or excluding the vector IDs specified in the ids slice, depending on the include flag.
// It takes into account the eligible centroid IDs and ensures that at least centroidsToProbe are probed.
// If after a few iterations we haven't found enough documents, it dynamically increases the number of
// clusters searched (up to the number of eligible centroids) to ensure we can find k unique documents.
func (v *vectorIndexWrapper) searchClustersFromIVFIndex(ids []int64, include bool, eligibleCentroidIDs []int64,
centroidsToProbe int, k int64, x, centroidDis []float32, params json.RawMessage) (
resultSet, error) {
// if the number of iterations > 1, we will be modifying the include slice
// to exclude vector ids already seen, so we use this set to track the
// include set for the next iteration, this is reused across iterations
// and allocated only once, when numIter == 1
var includeSet map[int64]struct{}
var totalEligibleCentroids = len(eligibleCentroidIDs)
// Threshold for when to start increasing: after 2 iterations without
// finding enough documents, we start increasing up to the number of centroidsToProbe
// up to the total number of eligible centroids available
const nprobeIncreaseThreshold = 2
return v.docSearch(k, v.sb.numDocs,
func() ([]float32, []int64, error) {
// build the selector based on whatever ids is as of now and the
// include/exclude flag
selector, err := v.getSelector(ids, include)
if err != nil {
return nil, nil, err
}
// once the main search is done we must free the selector
defer selector.Delete()
return v.vecIndex.SearchClustersFromIVFIndex(selector, eligibleCentroidIDs,
centroidsToProbe, k, x, centroidDis, params)
},
func(numIter int, labels []int64) bool {
// if this is the first loop iteration and we have < k unique docIDs,
// we must clone the existing ids slice before modifying it to avoid
// modifying the original slice passed in by the caller
if numIter == 1 {
ids = slices.Clone(ids)
if include {
// build the include set for subsequent iterations
// by adding all the ids initially present in the ids slice
includeSet = make(map[int64]struct{}, len(ids))
for _, id := range ids {
includeSet[id] = struct{}{}
}
}
}
// if we have iterated atleast nprobeIncreaseThreshold times
// and still have not found enough unique docIDs, we increase
// the number of centroids to probe for the next iteration
// to try and find more vectors/documents
if numIter >= nprobeIncreaseThreshold && centroidsToProbe < len(eligibleCentroidIDs) {
// Calculate how much to increase: increase by 50% of the remaining centroids to probe,
// but at least by 1 to ensure progress.
increaseAmount := max((totalEligibleCentroids-centroidsToProbe)/2, 1)
// Update centroidsToProbe, ensuring it does not exceed the total eligible centroids
centroidsToProbe = min(centroidsToProbe+increaseAmount, len(eligibleCentroidIDs))
}
// prepare the exclude/include list for the next iteration
if include {
// removing the vector ids retrieved in this iteration
// from the include set and rebuild the ids slice from the set
for _, id := range labels {
delete(includeSet, id)
}
// now build the next include slice from the set
ids = ids[:0]
for id := range includeSet {
ids = append(ids, id)
}
// only continue searching if we still have vector ids to include
return len(ids) != 0
} else {
// appending the vector ids retrieved in this iteration
// to the exclude list
ids = append(ids, labels...)
// with exclude list updated, we can proceed to the next iteration
return true
}
})
}
// Utility function to get a faiss.Selector based on the include/exclude flag
// and the vector ids provided, if include is true, it returns an inclusion selector,
// else it returns an exclusion selector. The caller must ensure to free the selector
// by calling selector.Delete() when done using it.
func (v *vectorIndexWrapper) getSelector(ids []int64, include bool) (selector faiss.Selector, err error) {
if include {
selector, err = faiss.NewIDSelectorBatch(ids)
} else {
selector, err = faiss.NewIDSelectorNot(ids)
}
if err != nil {
return nil, err
}
return selector, nil
}
// Utility function to get the docID for a given vectorID, used for the
// deduplication logic, to map vectorIDs back to their corresponding docIDs
func (v *vectorIndexWrapper) getDocIDForVectorID(vecID int64) (uint32, bool) {
docID, exists := v.vecDocIDMap[vecID]
return docID, exists
}
// resultSet is a data structure to hold (docID, score) pairs while ensuring
// that each docID is unique. It supports efficient insertion, retrieval,
// and iteration over the stored pairs.
type resultSet interface {
// Add a (docID, score) pair to the result set.
put(docID uint32, score float32)
// Get the score for a given docID. Returns false if docID not present.
get(docID uint32) (float32, bool)
// Iterate over all (docID, score) pairs in the result set.
iterate(func(docID uint32, score float32))
// Get the size of the result set.
size() int64
}
// resultSetSliceThreshold defines the threshold ratio of k to total documents
// in the index, below which a map-based resultSet is used, and above which
// a slice-based resultSet is used.
// It is derived using the following reasoning:
//
// Let N = total number of documents
// Let K = number of top K documents to retrieve
//
// Memory usage if the Result Set uses a map[uint32]float32 of size K underneath:
//
// ~20 bytes per entry (key + value + map overhead)
// Total ≈ 20 * K bytes
//
// Memory usage if the Result Set uses a slice of float32 of size N underneath:
//
// 4 bytes per entry
// Total ≈ 4 * N bytes
//
// We want the threshold below which a map is more memory-efficient than a slice:
//
// 20K < 4N
// K/N < 4/20
//
// Therefore, if the ratio of K to N is less than 0.2 (4/20), we use a map-based resultSet.
const resultSetSliceThreshold float64 = 0.2
// newResultSet creates a new resultSet
func newResultSet(k int64, numDocs uint64) resultSet {
// if numDocs is zero (empty index), just use map-based resultSet as its a no-op
// else decide based the percent of documents being retrieved. If we require
// greater than 20% of total documents, use slice-based resultSet for better memory efficiency
// else use map-based resultSet
if numDocs == 0 || float64(k)/float64(numDocs) < resultSetSliceThreshold {
return newResultSetMap(k)
}
return newResultSetSlice(numDocs)
}
type resultSetMap struct {
data map[uint32]float32
}
func newResultSetMap(k int64) resultSet {
return &resultSetMap{
data: make(map[uint32]float32, k),
}
}
func (rs *resultSetMap) put(docID uint32, score float32) {
rs.data[docID] = score
}
func (rs *resultSetMap) get(docID uint32) (float32, bool) {
score, exists := rs.data[docID]
return score, exists
}
func (rs *resultSetMap) iterate(f func(docID uint32, score float32)) {
for docID, score := range rs.data {
f(docID, score)
}
}
func (rs *resultSetMap) size() int64 {
return int64(len(rs.data))
}
type resultSetSlice struct {
count int64
data []float32
}
func newResultSetSlice(numDocs uint64) resultSet {
data := make([]float32, numDocs)
// scores can be negative, so initialize to a sentinel value which is NaN
sentinel := float32(math.NaN())
for i := range data {
data[i] = sentinel
}
return &resultSetSlice{
count: 0,
data: data,
}
}
func (rs *resultSetSlice) put(docID uint32, score float32) {
// only increment count if this docID was not already present
if math.IsNaN(float64(rs.data[docID])) {
rs.count++
}
rs.data[docID] = score
}
func (rs *resultSetSlice) get(docID uint32) (float32, bool) {
score := rs.data[docID]
if math.IsNaN(float64(score)) {
return 0, false
}
return score, true
}
func (rs *resultSetSlice) iterate(f func(docID uint32, score float32)) {
for docID, score := range rs.data {
if !math.IsNaN(float64(score)) {
f(uint32(docID), score)
}
}
}
func (rs *resultSetSlice) size() int64 {
return rs.count
}

View File

@@ -1,6 +1,6 @@
// Package chi is a small, idiomatic and composable router for building HTTP services.
//
// chi requires Go 1.14 or newer.
// chi supports the four most recent major versions of Go.
//
// Example:
//

View File

@@ -2,6 +2,7 @@ package middleware
import (
"net/http"
"slices"
"strings"
)
@@ -29,13 +30,7 @@ func contentEncoding(ce string, charsets ...string) bool {
_, ce = split(strings.ToLower(ce), ";")
_, ce = split(ce, "charset=")
ce, _ = split(ce, ";")
for _, c := range charsets {
if ce == c {
return true
}
}
return false
return slices.Contains(charsets, ce)
}
// Split a string in two parts, cleaning any whitespace.

View File

@@ -25,7 +25,7 @@ const RequestIDKey ctxKeyRequestID = 0
var RequestIDHeader = "X-Request-Id"
var prefix string
var reqid uint64
var reqid atomic.Uint64
// A quick note on the statistics here: we're trying to calculate the chance that
// two randomly generated base62 prefixes will collide. We use the formula from
@@ -69,7 +69,7 @@ func RequestID(next http.Handler) http.Handler {
ctx := r.Context()
requestID := r.Header.Get(RequestIDHeader)
if requestID == "" {
myid := atomic.AddUint64(&reqid, 1)
myid := reqid.Add(1)
requestID = fmt.Sprintf("%s-%06d", prefix, myid)
}
ctx = context.WithValue(ctx, RequestIDKey, requestID)
@@ -92,5 +92,5 @@ func GetReqID(ctx context.Context) string {
// NextRequestID generates the next request ID in the sequence.
func NextRequestID() uint64 {
return atomic.AddUint64(&reqid, 1)
return reqid.Add(1)
}

View File

@@ -47,15 +47,22 @@ func RedirectSlashes(next http.Handler) http.Handler {
} else {
path = r.URL.Path
}
if len(path) > 1 && path[len(path)-1] == '/' {
// Trim all leading and trailing slashes (e.g., "//evil.com", "/some/path//")
path = "/" + strings.Trim(path, "/")
// Normalize backslashes to forward slashes to prevent "/\evil.com" style redirects
// that some clients may interpret as protocol-relative.
path = strings.ReplaceAll(path, `\`, `/`)
// Collapse leading/trailing slashes and force a single leading slash.
path := "/" + strings.Trim(path, "/")
if r.URL.RawQuery != "" {
path = fmt.Sprintf("%s?%s", path, r.URL.RawQuery)
}
http.Redirect(w, r, path, 301)
return
}
next.ServeHTTP(w, r)
}
return http.HandlerFunc(fn)

View File

@@ -467,8 +467,10 @@ func (mx *Mux) routeHTTP(w http.ResponseWriter, r *http.Request) {
// Find the route
if _, _, h := mx.tree.FindRoute(rctx, method, routePath); h != nil {
if supportsPathValue {
setPathValue(rctx, r)
// Set http.Request path values from our request context
for i, key := range rctx.URLParams.Keys {
value := rctx.URLParams.Values[i]
r.SetPathValue(key, value)
}
if supportsPattern {
setPattern(rctx, r)

View File

@@ -1,21 +0,0 @@
//go:build go1.22 && !tinygo
// +build go1.22,!tinygo
package chi
import "net/http"
// supportsPathValue is true if the Go version is 1.22 and above.
//
// If this is true, `net/http.Request` has methods `SetPathValue` and `PathValue`.
const supportsPathValue = true
// setPathValue sets the path values in the Request value
// based on the provided request context.
func setPathValue(rctx *Context, r *http.Request) {
for i, key := range rctx.URLParams.Keys {
value := rctx.URLParams.Values[i]
r.SetPathValue(key, value)
}
}

View File

@@ -1,19 +0,0 @@
//go:build !go1.22 || tinygo
// +build !go1.22 tinygo
package chi
import "net/http"
// supportsPathValue is true if the Go version is 1.22 and above.
//
// If this is true, `net/http.Request` has methods `SetPathValue` and `PathValue`.
const supportsPathValue = false
// setPathValue sets the path values in the Request value
// based on the provided request context.
//
// setPathValue is only supported in Go 1.22 and above so
// this is just a blank function so that it compiles.
func setPathValue(rctx *Context, r *http.Request) {
}

View File

@@ -71,6 +71,7 @@ func RegisterMethod(method string) {
}
mt := methodTyp(2 << n)
methodMap[method] = mt
reverseMethodMap[mt] = method
mALL |= mt
}
@@ -328,7 +329,7 @@ func (n *node) replaceChild(label, tail byte, child *node) {
func (n *node) getEdge(ntyp nodeTyp, label, tail byte, prefix string) *node {
nds := n.children[ntyp]
for i := 0; i < len(nds); i++ {
for i := range nds {
if nds[i].label == label && nds[i].tail == tail {
if ntyp == ntRegexp && nds[i].prefix != prefix {
continue
@@ -429,9 +430,7 @@ func (n *node) findRoute(rctx *Context, method methodTyp, path string) *node {
}
// serially loop through each node grouped by the tail delimiter
for idx := 0; idx < len(nds); idx++ {
xn = nds[idx]
for _, xn = range nds {
// label for param nodes is the delimiter byte
p := strings.IndexByte(xsearch, xn.tail)
@@ -770,20 +769,14 @@ func patParamKeys(pattern string) []string {
}
}
// longestPrefix finds the length of the shared prefix
// of two strings
func longestPrefix(k1, k2 string) int {
max := len(k1)
if l := len(k2); l < max {
max = l
}
var i int
for i = 0; i < max; i++ {
// longestPrefix finds the length of the shared prefix of two strings
func longestPrefix(k1, k2 string) (i int) {
for i = 0; i < min(len(k1), len(k2)); i++ {
if k1[i] != k2[i] {
break
}
}
return i
return
}
type nodes []*node

View File

@@ -1,40 +1,67 @@
version: "2"
run:
# do not run on test files yet
tests: false
# all available settings of specific linters
linters-settings:
errcheck:
# report about not checking of errors in type assetions: `a := b.(MyStruct)`;
# default is false: such cases aren't reported by default.
check-type-assertions: false
# report about assignment of errors to blank identifier: `num, _ := strconv.Atoi(numStr)`;
# default is false: such cases aren't reported by default.
check-blank: false
lll:
line-length: 100
tab-width: 4
prealloc:
simple: false
range-loops: false
for-loops: false
whitespace:
multi-if: false # Enforces newlines (or comments) after every multi-line if statement
multi-func: false # Enforces newlines (or comments) after every multi-line function signature
linters:
enable:
- megacheck
- govet
- asasalint
- asciicheck
- bidichk
- bodyclose
- contextcheck
- durationcheck
- errchkjson
- errorlint
- exhaustive
- gocheckcompilerdirectives
- gochecksumtype
- gosec
- gosmopolitan
- loggercheck
- makezero
- musttag
- nilerr
- nilnesserr
- noctx
- protogetter
- reassign
- recvcheck
- rowserrcheck
- spancheck
- sqlclosecheck
- testifylint
- unparam
- zerologlint
disable:
- maligned
- prealloc
disable-all: false
presets:
- bugs
- unused
fast: false
settings:
errcheck:
check-type-assertions: false
check-blank: false
lll:
line-length: 100
tab-width: 4
prealloc:
simple: false
range-loops: false
for-loops: false
whitespace:
multi-if: false
multi-func: false
exclusions:
generated: lax
presets:
- comments
- common-false-positives
- legacy
- std-error-handling
paths:
- third_party$
- builtin$
- examples$
formatters:
exclusions:
generated: lax
paths:
- third_party$
- builtin$
- examples$

View File

@@ -37,7 +37,7 @@ Features:
# 1.6.0
Fixes:
* end of line cleanup
* revert the entry concurrency bug fix whic leads to deadlock under some circumstances
* revert the entry concurrency bug fix which leads to deadlock under some circumstances
* update dependency on go-windows-terminal-sequences to fix a crash with go 1.14
Features:
@@ -129,7 +129,7 @@ This new release introduces:
which is mostly useful for logger wrapper
* a fix reverting the immutability of the entry given as parameter to the hooks
a new configuration field of the json formatter in order to put all the fields
in a nested dictionnary
in a nested dictionary
* a new SetOutput method in the Logger
* a new configuration of the textformatter to configure the name of the default keys
* a new configuration of the text formatter to disable the level truncation

View File

@@ -1,4 +1,4 @@
# Logrus <img src="http://i.imgur.com/hTeVwmJ.png" width="40" height="40" alt=":walrus:" class="emoji" title=":walrus:"/> [![Build Status](https://github.com/sirupsen/logrus/workflows/CI/badge.svg)](https://github.com/sirupsen/logrus/actions?query=workflow%3ACI) [![Build Status](https://travis-ci.org/sirupsen/logrus.svg?branch=master)](https://travis-ci.org/sirupsen/logrus) [![Go Reference](https://pkg.go.dev/badge/github.com/sirupsen/logrus.svg)](https://pkg.go.dev/github.com/sirupsen/logrus)
# Logrus <img src="http://i.imgur.com/hTeVwmJ.png" width="40" height="40" alt=":walrus:" class="emoji" title=":walrus:"/> [![Build Status](https://github.com/sirupsen/logrus/workflows/CI/badge.svg)](https://github.com/sirupsen/logrus/actions?query=workflow%3ACI) [![Go Reference](https://pkg.go.dev/badge/github.com/sirupsen/logrus.svg)](https://pkg.go.dev/github.com/sirupsen/logrus)
Logrus is a structured logger for Go (golang), completely API compatible with
the standard library logger.
@@ -40,7 +40,7 @@ plain text):
![Colored](http://i.imgur.com/PY7qMwd.png)
With `log.SetFormatter(&log.JSONFormatter{})`, for easy parsing by logstash
With `logrus.SetFormatter(&logrus.JSONFormatter{})`, for easy parsing by logstash
or Splunk:
```text
@@ -60,9 +60,9 @@ ocean","size":10,"time":"2014-03-10 19:57:38.562264131 -0400 EDT"}
"time":"2014-03-10 19:57:38.562543128 -0400 EDT"}
```
With the default `log.SetFormatter(&log.TextFormatter{})` when a TTY is not
With the default `logrus.SetFormatter(&logrus.TextFormatter{})` when a TTY is not
attached, the output is compatible with the
[logfmt](http://godoc.org/github.com/kr/logfmt) format:
[logfmt](https://pkg.go.dev/github.com/kr/logfmt) format:
```text
time="2015-03-26T01:27:38-04:00" level=debug msg="Started observing beach" animal=walrus number=8
@@ -75,17 +75,18 @@ time="2015-03-26T01:27:38-04:00" level=fatal msg="The ice breaks!" err=&{0x20822
To ensure this behaviour even if a TTY is attached, set your formatter as follows:
```go
log.SetFormatter(&log.TextFormatter{
DisableColors: true,
FullTimestamp: true,
})
logrus.SetFormatter(&logrus.TextFormatter{
DisableColors: true,
FullTimestamp: true,
})
```
#### Logging Method Name
If you wish to add the calling method as a field, instruct the logger via:
```go
log.SetReportCaller(true)
logrus.SetReportCaller(true)
```
This adds the caller as 'method' like so:
@@ -100,11 +101,11 @@ time="2015-03-26T01:27:38-04:00" level=fatal method=github.com/sirupsen/arcticcr
Note that this does add measurable overhead - the cost will depend on the version of Go, but is
between 20 and 40% in recent tests with 1.6 and 1.7. You can validate this in your
environment via benchmarks:
```
```bash
go test -bench=.*CallerTracing
```
#### Case-sensitivity
The organization's name was changed to lower-case--and this will not be changed
@@ -118,12 +119,10 @@ The simplest way to use Logrus is simply the package-level exported logger:
```go
package main
import (
log "github.com/sirupsen/logrus"
)
import "github.com/sirupsen/logrus"
func main() {
log.WithFields(log.Fields{
logrus.WithFields(logrus.Fields{
"animal": "walrus",
}).Info("A walrus appears")
}
@@ -139,6 +138,7 @@ package main
import (
"os"
log "github.com/sirupsen/logrus"
)
@@ -190,26 +190,27 @@ package main
import (
"os"
"github.com/sirupsen/logrus"
)
// Create a new instance of the logger. You can have any number of instances.
var log = logrus.New()
var logger = logrus.New()
func main() {
// The API for setting attributes is a little different than the package level
// exported logger. See Godoc.
log.Out = os.Stdout
// exported logger. See Godoc.
logger.Out = os.Stdout
// You could set this to any `io.Writer` such as a file
// file, err := os.OpenFile("logrus.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
// if err == nil {
// log.Out = file
// logger.Out = file
// } else {
// log.Info("Failed to log to file, using default stderr")
// logger.Info("Failed to log to file, using default stderr")
// }
log.WithFields(logrus.Fields{
logger.WithFields(logrus.Fields{
"animal": "walrus",
"size": 10,
}).Info("A group of walrus emerges from the ocean")
@@ -219,12 +220,12 @@ func main() {
#### Fields
Logrus encourages careful, structured logging through logging fields instead of
long, unparseable error messages. For example, instead of: `log.Fatalf("Failed
long, unparseable error messages. For example, instead of: `logrus.Fatalf("Failed
to send event %s to topic %s with key %d")`, you should log the much more
discoverable:
```go
log.WithFields(log.Fields{
logrus.WithFields(logrus.Fields{
"event": event,
"topic": topic,
"key": key,
@@ -245,12 +246,12 @@ seen as a hint you should add a field, however, you can still use the
Often it's helpful to have fields _always_ attached to log statements in an
application or parts of one. For example, you may want to always log the
`request_id` and `user_ip` in the context of a request. Instead of writing
`log.WithFields(log.Fields{"request_id": request_id, "user_ip": user_ip})` on
`logger.WithFields(logrus.Fields{"request_id": request_id, "user_ip": user_ip})` on
every line, you can create a `logrus.Entry` to pass around instead:
```go
requestLogger := log.WithFields(log.Fields{"request_id": request_id, "user_ip": user_ip})
requestLogger.Info("something happened on that request") # will log request_id and user_ip
requestLogger := logger.WithFields(logrus.Fields{"request_id": request_id, "user_ip": user_ip})
requestLogger.Info("something happened on that request") // will log request_id and user_ip
requestLogger.Warn("something not great happened")
```
@@ -264,28 +265,31 @@ Logrus comes with [built-in hooks](hooks/). Add those, or your custom hook, in
`init`:
```go
package main
import (
log "github.com/sirupsen/logrus"
"gopkg.in/gemnasium/logrus-airbrake-hook.v2" // the package is named "airbrake"
logrus_syslog "github.com/sirupsen/logrus/hooks/syslog"
"log/syslog"
"github.com/sirupsen/logrus"
airbrake "gopkg.in/gemnasium/logrus-airbrake-hook.v2"
logrus_syslog "github.com/sirupsen/logrus/hooks/syslog"
)
func init() {
// Use the Airbrake hook to report errors that have Error severity or above to
// an exception tracker. You can create custom hooks, see the Hooks section.
log.AddHook(airbrake.NewHook(123, "xyz", "production"))
logrus.AddHook(airbrake.NewHook(123, "xyz", "production"))
hook, err := logrus_syslog.NewSyslogHook("udp", "localhost:514", syslog.LOG_INFO, "")
if err != nil {
log.Error("Unable to connect to local syslog daemon")
logrus.Error("Unable to connect to local syslog daemon")
} else {
log.AddHook(hook)
logrus.AddHook(hook)
}
}
```
Note: Syslog hook also support connecting to local syslog (Ex. "/dev/log" or "/var/run/syslog" or "/var/run/log"). For the detail, please check the [syslog hook README](hooks/syslog/README.md).
Note: Syslog hooks also support connecting to local syslog (Ex. "/dev/log" or "/var/run/syslog" or "/var/run/log"). For the detail, please check the [syslog hook README](hooks/syslog/README.md).
A list of currently known service hooks can be found in this wiki [page](https://github.com/sirupsen/logrus/wiki/Hooks)
@@ -295,15 +299,15 @@ A list of currently known service hooks can be found in this wiki [page](https:/
Logrus has seven logging levels: Trace, Debug, Info, Warning, Error, Fatal and Panic.
```go
log.Trace("Something very low level.")
log.Debug("Useful debugging information.")
log.Info("Something noteworthy happened!")
log.Warn("You should probably take a look at this.")
log.Error("Something failed but I'm not quitting.")
logrus.Trace("Something very low level.")
logrus.Debug("Useful debugging information.")
logrus.Info("Something noteworthy happened!")
logrus.Warn("You should probably take a look at this.")
logrus.Error("Something failed but I'm not quitting.")
// Calls os.Exit(1) after logging
log.Fatal("Bye.")
logrus.Fatal("Bye.")
// Calls panic() after logging
log.Panic("I'm bailing.")
logrus.Panic("I'm bailing.")
```
You can set the logging level on a `Logger`, then it will only log entries with
@@ -311,13 +315,13 @@ that severity or anything above it:
```go
// Will log anything that is info or above (warn, error, fatal, panic). Default.
log.SetLevel(log.InfoLevel)
logrus.SetLevel(logrus.InfoLevel)
```
It may be useful to set `log.Level = logrus.DebugLevel` in a debug or verbose
It may be useful to set `logrus.Level = logrus.DebugLevel` in a debug or verbose
environment if your application has that.
Note: If you want different log levels for global (`log.SetLevel(...)`) and syslog logging, please check the [syslog hook README](hooks/syslog/README.md#different-log-levels-for-local-and-remote-logging).
Note: If you want different log levels for global (`logrus.SetLevel(...)`) and syslog logging, please check the [syslog hook README](hooks/syslog/README.md#different-log-levels-for-local-and-remote-logging).
#### Entries
@@ -340,17 +344,17 @@ could do:
```go
import (
log "github.com/sirupsen/logrus"
"github.com/sirupsen/logrus"
)
func init() {
// do something here to set environment depending on an environment variable
// or command-line flag
if Environment == "production" {
log.SetFormatter(&log.JSONFormatter{})
logrus.SetFormatter(&logrus.JSONFormatter{})
} else {
// The TextFormatter is default, you don't actually have to do this.
log.SetFormatter(&log.TextFormatter{})
logrus.SetFormatter(&logrus.TextFormatter{})
}
}
```
@@ -372,11 +376,11 @@ The built-in logging formatters are:
* When colors are enabled, levels are truncated to 4 characters by default. To disable
truncation set the `DisableLevelTruncation` field to `true`.
* When outputting to a TTY, it's often helpful to visually scan down a column where all the levels are the same width. Setting the `PadLevelText` field to `true` enables this behavior, by adding padding to the level text.
* All options are listed in the [generated docs](https://godoc.org/github.com/sirupsen/logrus#TextFormatter).
* All options are listed in the [generated docs](https://pkg.go.dev/github.com/sirupsen/logrus#TextFormatter).
* `logrus.JSONFormatter`. Logs fields as JSON.
* All options are listed in the [generated docs](https://godoc.org/github.com/sirupsen/logrus#JSONFormatter).
* All options are listed in the [generated docs](https://pkg.go.dev/github.com/sirupsen/logrus#JSONFormatter).
Third party logging formatters:
Third-party logging formatters:
* [`FluentdFormatter`](https://github.com/joonix/log). Formats entries that can be parsed by Kubernetes and Google Container Engine.
* [`GELF`](https://github.com/fabienm/go-logrus-formatters). Formats entries so they comply to Graylog's [GELF 1.1 specification](http://docs.graylog.org/en/2.4/pages/gelf.html).
@@ -384,7 +388,7 @@ Third party logging formatters:
* [`prefixed`](https://github.com/x-cray/logrus-prefixed-formatter). Displays log entry source along with alternative layout.
* [`zalgo`](https://github.com/aybabtme/logzalgo). Invoking the Power of Zalgo.
* [`nested-logrus-formatter`](https://github.com/antonfisher/nested-logrus-formatter). Converts logrus fields to a nested structure.
* [`powerful-logrus-formatter`](https://github.com/zput/zxcTool). get fileName, log's line number and the latest function's name when print log; Sava log to files.
* [`powerful-logrus-formatter`](https://github.com/zput/zxcTool). get fileName, log's line number and the latest function's name when print log; Save log to files.
* [`caption-json-formatter`](https://github.com/nolleh/caption_json_formatter). logrus's message json formatter with human-readable caption added.
You can define your formatter by implementing the `Formatter` interface,
@@ -393,10 +397,9 @@ requiring a `Format` method. `Format` takes an `*Entry`. `entry.Data` is a
default ones (see Entries section above):
```go
type MyJSONFormatter struct {
}
type MyJSONFormatter struct{}
log.SetFormatter(new(MyJSONFormatter))
logrus.SetFormatter(new(MyJSONFormatter))
func (f *MyJSONFormatter) Format(entry *Entry) ([]byte, error) {
// Note this doesn't include Time, Level and Message which are available on
@@ -455,17 +458,18 @@ entries. It should not be a feature of the application-level logger.
#### Testing
Logrus has a built in facility for asserting the presence of log messages. This is implemented through the `test` hook and provides:
Logrus has a built-in facility for asserting the presence of log messages. This is implemented through the `test` hook and provides:
* decorators for existing logger (`test.NewLocal` and `test.NewGlobal`) which basically just adds the `test` hook
* a test logger (`test.NewNullLogger`) that just records log messages (and does not output any):
```go
import(
"testing"
"github.com/sirupsen/logrus"
"github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/assert"
"testing"
)
func TestSomething(t*testing.T){
@@ -486,15 +490,15 @@ func TestSomething(t*testing.T){
Logrus can register one or more functions that will be called when any `fatal`
level message is logged. The registered handlers will be executed before
logrus performs an `os.Exit(1)`. This behavior may be helpful if callers need
to gracefully shutdown. Unlike a `panic("Something went wrong...")` call which can be intercepted with a deferred `recover` a call to `os.Exit(1)` can not be intercepted.
to gracefully shut down. Unlike a `panic("Something went wrong...")` call which can be intercepted with a deferred `recover` a call to `os.Exit(1)` can not be intercepted.
```
...
```go
// ...
handler := func() {
// gracefully shutdown something...
// gracefully shut down something...
}
logrus.RegisterExitHandler(handler)
...
// ...
```
#### Thread safety
@@ -502,7 +506,7 @@ logrus.RegisterExitHandler(handler)
By default, Logger is protected by a mutex for concurrent writes. The mutex is held when calling hooks and writing logs.
If you are sure such locking is not needed, you can call logger.SetNoLock() to disable the locking.
Situation when locking is not needed includes:
Situations when locking is not needed include:
* You have no hooks registered, or hooks calling is already thread-safe.

View File

@@ -1,14 +1,12 @@
version: "{build}"
# Minimal stub to satisfy AppVeyor CI
version: 1.0.{build}
platform: x64
clone_folder: c:\gopath\src\github.com\sirupsen\logrus
environment:
GOPATH: c:\gopath
shallow_clone: true
branches:
only:
- master
install:
- set PATH=%GOPATH%\bin;c:\go\bin;%PATH%
- go version
- main
build_script:
- go get -t
- go test
- echo "No-op build to satisfy AppVeyor CI"

View File

@@ -34,13 +34,15 @@ func init() {
minimumCallerDepth = 1
}
// Defines the key when adding errors using WithError.
// ErrorKey defines the key when adding errors using [WithError], [Logger.WithError].
var ErrorKey = "error"
// An entry is the final or intermediate Logrus logging entry. It contains all
// Entry is the final or intermediate Logrus logging entry. It contains all
// the fields passed with WithField{,s}. It's finally logged when Trace, Debug,
// Info, Warn, Error, Fatal or Panic is called on it. These objects can be
// reused and passed around as much as you wish to avoid field duplication.
//
//nolint:recvcheck // the methods of "Entry" use pointer receiver and non-pointer receiver.
type Entry struct {
Logger *Logger
@@ -86,12 +88,12 @@ func (entry *Entry) Dup() *Entry {
return &Entry{Logger: entry.Logger, Data: data, Time: entry.Time, Context: entry.Context, err: entry.err}
}
// Returns the bytes representation of this entry from the formatter.
// Bytes returns the bytes representation of this entry from the formatter.
func (entry *Entry) Bytes() ([]byte, error) {
return entry.Logger.Formatter.Format(entry)
}
// Returns the string representation from the reader and ultimately the
// String returns the string representation from the reader and ultimately the
// formatter.
func (entry *Entry) String() (string, error) {
serialized, err := entry.Bytes()
@@ -102,12 +104,13 @@ func (entry *Entry) String() (string, error) {
return str, nil
}
// Add an error as single field (using the key defined in ErrorKey) to the Entry.
// WithError adds an error as single field (using the key defined in [ErrorKey])
// to the Entry.
func (entry *Entry) WithError(err error) *Entry {
return entry.WithField(ErrorKey, err)
}
// Add a context to the Entry.
// WithContext adds a context to the Entry.
func (entry *Entry) WithContext(ctx context.Context) *Entry {
dataCopy := make(Fields, len(entry.Data))
for k, v := range entry.Data {
@@ -116,12 +119,12 @@ func (entry *Entry) WithContext(ctx context.Context) *Entry {
return &Entry{Logger: entry.Logger, Data: dataCopy, Time: entry.Time, err: entry.err, Context: ctx}
}
// Add a single field to the Entry.
// WithField adds a single field to the Entry.
func (entry *Entry) WithField(key string, value interface{}) *Entry {
return entry.WithFields(Fields{key: value})
}
// Add a map of fields to the Entry.
// WithFields adds a map of fields to the Entry.
func (entry *Entry) WithFields(fields Fields) *Entry {
data := make(Fields, len(entry.Data)+len(fields))
for k, v := range entry.Data {
@@ -150,7 +153,7 @@ func (entry *Entry) WithFields(fields Fields) *Entry {
return &Entry{Logger: entry.Logger, Data: data, Time: entry.Time, err: fieldErr, Context: entry.Context}
}
// Overrides the time of the Entry.
// WithTime overrides the time of the Entry.
func (entry *Entry) WithTime(t time.Time) *Entry {
dataCopy := make(Fields, len(entry.Data))
for k, v := range entry.Data {
@@ -204,7 +207,7 @@ func getCaller() *runtime.Frame {
// If the caller isn't part of this package, we're done
if pkg != logrusPackage {
return &f //nolint:scopelint
return &f
}
}
@@ -432,7 +435,7 @@ func (entry *Entry) Panicln(args ...interface{}) {
entry.Logln(PanicLevel, args...)
}
// Sprintlnn => Sprint no newline. This is to get the behavior of how
// sprintlnn => Sprint no newline. This is to get the behavior of how
// fmt.Sprintln where spaces are always added between operands, regardless of
// their type. Instead of vendoring the Sprintln implementation to spare a
// string allocation, we do the simplest thing.

View File

@@ -1,16 +1,16 @@
package logrus
// A hook to be fired when logging on the logging levels returned from
// `Levels()` on your implementation of the interface. Note that this is not
// Hook describes hooks to be fired when logging on the logging levels returned from
// [Hook.Levels] on your implementation of the interface. Note that this is not
// fired in a goroutine or a channel with workers, you should handle such
// functionality yourself if your call is non-blocking and you don't wish for
// functionality yourself if your call is non-blocking, and you don't wish for
// the logging calls for levels returned from `Levels()` to block.
type Hook interface {
Levels() []Level
Fire(*Entry) error
}
// Internal type for storing the hooks on a logger instance.
// LevelHooks is an internal type for storing the hooks on a logger instance.
type LevelHooks map[Level][]Hook
// Add a hook to an instance of logger. This is called with

View File

@@ -72,16 +72,16 @@ func (mw *MutexWrap) Disable() {
mw.disabled = true
}
// Creates a new logger. Configuration should be set by changing `Formatter`,
// `Out` and `Hooks` directly on the default logger instance. You can also just
// New Creates a new logger. Configuration should be set by changing [Formatter],
// Out and Hooks directly on the default Logger instance. You can also just
// instantiate your own:
//
// var log = &logrus.Logger{
// Out: os.Stderr,
// Formatter: new(logrus.TextFormatter),
// Hooks: make(logrus.LevelHooks),
// Level: logrus.DebugLevel,
// }
// var log = &logrus.Logger{
// Out: os.Stderr,
// Formatter: new(logrus.TextFormatter),
// Hooks: make(logrus.LevelHooks),
// Level: logrus.DebugLevel,
// }
//
// It's recommended to make this a global instance called `log`.
func New() *Logger {
@@ -118,30 +118,30 @@ func (logger *Logger) WithField(key string, value interface{}) *Entry {
return entry.WithField(key, value)
}
// Adds a struct of fields to the log entry. All it does is call `WithField` for
// each `Field`.
// WithFields adds a struct of fields to the log entry. It calls [Entry.WithField]
// for each Field.
func (logger *Logger) WithFields(fields Fields) *Entry {
entry := logger.newEntry()
defer logger.releaseEntry(entry)
return entry.WithFields(fields)
}
// Add an error as single field to the log entry. All it does is call
// `WithError` for the given `error`.
// WithError adds an error as single field to the log entry. It calls
// [Entry.WithError] for the given error.
func (logger *Logger) WithError(err error) *Entry {
entry := logger.newEntry()
defer logger.releaseEntry(entry)
return entry.WithError(err)
}
// Add a context to the log entry.
// WithContext add a context to the log entry.
func (logger *Logger) WithContext(ctx context.Context) *Entry {
entry := logger.newEntry()
defer logger.releaseEntry(entry)
return entry.WithContext(ctx)
}
// Overrides the time of the log entry.
// WithTime overrides the time of the log entry.
func (logger *Logger) WithTime(t time.Time) *Entry {
entry := logger.newEntry()
defer logger.releaseEntry(entry)
@@ -347,9 +347,9 @@ func (logger *Logger) Exit(code int) {
logger.ExitFunc(code)
}
//When file is opened with appending mode, it's safe to
//write concurrently to a file (within 4k message on Linux).
//In these cases user can choose to disable the lock.
// SetNoLock disables the lock for situations where a file is opened with
// appending mode, and safe for concurrent writes to the file (within 4k
// message on Linux). In these cases user can choose to disable the lock.
func (logger *Logger) SetNoLock() {
logger.mu.Disable()
}

View File

@@ -6,13 +6,15 @@ import (
"strings"
)
// Fields type, used to pass to `WithFields`.
// Fields type, used to pass to [WithFields].
type Fields map[string]interface{}
// Level type
//
//nolint:recvcheck // the methods of "Entry" use pointer receiver and non-pointer receiver.
type Level uint32
// Convert the Level to a string. E.g. PanicLevel becomes "panic".
// Convert the Level to a string. E.g. [PanicLevel] becomes "panic".
func (level Level) String() string {
if b, err := level.MarshalText(); err == nil {
return string(b)
@@ -77,7 +79,7 @@ func (level Level) MarshalText() ([]byte, error) {
return nil, fmt.Errorf("not a valid logrus level %d", level)
}
// A constant exposing all logging levels
// AllLevels exposing all logging levels.
var AllLevels = []Level{
PanicLevel,
FatalLevel,
@@ -119,8 +121,8 @@ var (
)
// StdLogger is what your logrus-enabled library should take, that way
// it'll accept a stdlib logger and a logrus logger. There's no standard
// interface, this is the closest we get, unfortunately.
// it'll accept a stdlib logger ([log.Logger]) and a logrus logger.
// There's no standard interface, so this is the closest we get, unfortunately.
type StdLogger interface {
Print(...interface{})
Printf(string, ...interface{})
@@ -135,7 +137,8 @@ type StdLogger interface {
Panicln(...interface{})
}
// The FieldLogger interface generalizes the Entry and Logger types
// FieldLogger extends the [StdLogger] interface, generalizing
// the [Entry] and [Logger] types.
type FieldLogger interface {
WithField(key string, value interface{}) *Entry
WithFields(fields Fields) *Entry
@@ -176,8 +179,9 @@ type FieldLogger interface {
// IsPanicEnabled() bool
}
// Ext1FieldLogger (the first extension to FieldLogger) is superfluous, it is
// here for consistancy. Do not use. Use Logger or Entry instead.
// Ext1FieldLogger (the first extension to [FieldLogger]) is superfluous, it is
// here for consistency. Do not use. Use [FieldLogger], [Logger] or [Entry]
// instead.
type Ext1FieldLogger interface {
FieldLogger
Tracef(format string, args ...interface{})

View File

@@ -306,6 +306,7 @@ func (f *TextFormatter) needsQuoting(text string) bool {
return false
}
for _, ch := range text {
//nolint:staticcheck // QF1001: could apply De Morgan's law
if !((ch >= 'a' && ch <= 'z') ||
(ch >= 'A' && ch <= 'Z') ||
(ch >= '0' && ch <= '9') ||
@@ -334,6 +335,6 @@ func (f *TextFormatter) appendValue(b *bytes.Buffer, value interface{}) {
if !f.needsQuoting(stringVal) {
b.WriteString(stringVal)
} else {
b.WriteString(fmt.Sprintf("%q", stringVal))
fmt.Fprintf(b, "%q", stringVal)
}
}

View File

@@ -5,6 +5,6 @@ package zpages // import "go.opentelemetry.io/contrib/zpages"
// Version is the current release version of the zpages span processor.
func Version() string {
return "0.63.0"
return "0.64.0"
// This string is updated by the pre_release.sh script during release
}

16
vendor/modules.txt vendored
View File

@@ -117,7 +117,7 @@ github.com/bitly/go-simplejson
# github.com/bits-and-blooms/bitset v1.22.0
## explicit; go 1.16
github.com/bits-and-blooms/bitset
# github.com/blevesearch/bleve/v2 v2.5.5
# github.com/blevesearch/bleve/v2 v2.5.7
## explicit; go 1.23
github.com/blevesearch/bleve/v2
github.com/blevesearch/bleve/v2/analysis
@@ -217,7 +217,7 @@ github.com/blevesearch/zapx/v14
# github.com/blevesearch/zapx/v15 v15.4.2
## explicit; go 1.21
github.com/blevesearch/zapx/v15
# github.com/blevesearch/zapx/v16 v16.2.7
# github.com/blevesearch/zapx/v16 v16.2.8
## explicit; go 1.23
github.com/blevesearch/zapx/v16
# github.com/bluele/gcache v0.0.2
@@ -475,8 +475,8 @@ github.com/go-acme/lego/v4/challenge
# github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667
## explicit; go 1.13
github.com/go-asn1-ber/asn1-ber
# github.com/go-chi/chi/v5 v5.2.3
## explicit; go 1.20
# github.com/go-chi/chi/v5 v5.2.4
## explicit; go 1.22
github.com/go-chi/chi/v5
github.com/go-chi/chi/v5/middleware
# github.com/go-chi/render v1.0.3
@@ -2031,8 +2031,8 @@ github.com/shurcooL/httpfs/vfsutil
# github.com/shurcooL/vfsgen v0.0.0-20230704071429-0000e147ea92
## explicit; go 1.19
github.com/shurcooL/vfsgen
# github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af
## explicit; go 1.13
# github.com/sirupsen/logrus v1.9.4
## explicit; go 1.17
github.com/sirupsen/logrus
# github.com/skeema/knownhosts v1.3.0
## explicit; go 1.17
@@ -2334,8 +2334,8 @@ go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc/inte
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp/internal/request
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp/internal/semconv
# go.opentelemetry.io/contrib/zpages v0.63.0
## explicit; go 1.23.0
# go.opentelemetry.io/contrib/zpages v0.64.0
## explicit; go 1.24.0
go.opentelemetry.io/contrib/zpages
go.opentelemetry.io/contrib/zpages/internal
# go.opentelemetry.io/otel v1.39.0