mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-22 15:49:12 -04:00
fix(pii): post-merge review fixes + live NER e2e for the privacy-filter tier (#10401)
* fix(pii): post-merge review fixes + live NER e2e for the privacy-filter tier Follow-up to the NER tier engine (#10360), already on master. This carries only the incremental review fixes and tests that postdate that merge — the feature itself is not re-introduced. Review fixes: - openai_completion.go: remove the dead `elem >= 0` conjunct in applyAnyText (the `elem < 0` guard above already returns). - application.go: collapse ResolvePIIPolicy's inline re-implementation of PIIIsEnabled to a single cfg.PIIIsEnabled() call (sole source of the "explicit pii.enabled wins, else cloud-proxy default" rule) and return true past the !enabled guard where it is provable. - pattern.go: hoist the triple `appConfig != nil && EnableTracing` check in patternDetector.Detect into one local. - grammar.go: MaxQuantifier was 4096, but Go's regexp/syntax rejects repeat bounds above 1000 at Parse time, so walk()'s {n,m} guard could never fire — dead code shadowed by the parser. Lower it to 512 so a bound in (512,1000] is rejected here with an actionable error; >1000 still fails closed via Parse. Specs pin the relationship so the guard can't silently revert. - PatternListEditor.jsx: clamp a directly-typed negative min_len to >=0 and force the DOM value back when clamping (min={0} only constrained the spinner, so a negative reached saved config and silently disabled the length filter). Tests: - piipattern_test.go: MaxQuantifier guard specs (must stay live, not dead). - model-config.spec.js: assert the min_len clamp, and that entity_actions collapses a duplicate group to a single row (map semantics; regression guard against emitting an array that drops a row on save). - tests/e2e-backends: token_classify capability driving the TokenClassify gRPC RPC against the backend image, asserting byte-correct, UTF-8 rune-aligned spans (entity.Text == text[start:end]) at threshold 0. Verified on CPU via `make test-extra-backend-privacy-filter` (3/3 specs). - Makefile: test-extra-backend-privacy-filter wrapper. - tests/e2e: e2e_pii_ner_test.go drives /api/pii/analyze + /api/pii/redact (mask + block) through the full HTTP -> detector -> redactor path; gated on PII_NER_MODEL_GGUF so the default suite is unaffected. - .github/workflows/tests-pii-ner-e2e.yml: path-filtered / nightly CI job running the container harness on CPU. Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Richard Palethorpe <io@richiejp.com> * feat(gallery): add privacy-filter-nemotron (f16 + q8) GGUF conversions of OpenMed/privacy-filter-nemotron — a fine-grained English PII token-classifier (55 categories / 221 BIOES classes), fine-tuned from openai/privacy-filter on NVIDIA's Nemotron-PII dataset. Sibling to the existing privacy-filter-multilingual entry, trading language breadth for category depth. - privacy-filter-nemotron: F16 reference artifact (~2.8 GB). - privacy-filter-nemotron-q8: Q8_0 quant (~1.64 GB) for RAM-constrained / edge use; description notes the size/speed tradeoff and to validate on your own data (a single dropped span is a PII leak). Both run on the privacy-filter backend with known_usecases [token_classify] and a default mask policy (min_score 0.5); operators add per-category entity_actions as needed. sha256s taken from the HF repo's LFS object ids. Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Richard Palethorpe <io@richiejp.com> --------- Signed-off-by: Richard Palethorpe <io@richiejp.com>
This commit is contained in:
committed by
GitHub
parent
95b058e1c5
commit
63bcbf6c12
97
.github/workflows/tests-pii-ner-e2e.yml
vendored
Normal file
97
.github/workflows/tests-pii-ner-e2e.yml
vendored
Normal file
@@ -0,0 +1,97 @@
|
||||
---
|
||||
name: 'PII NER tier E2E (live GGUF, CPU)'
|
||||
|
||||
# Runs the real privacy-filter GGUF NER tier end-to-end on CPU — the gap the
|
||||
# hermetic tests/e2e suite cannot cover (it only exercises the in-process
|
||||
# pattern tier). Heavy (builds the C++ backend image + downloads a ~2.7 GB
|
||||
# GGUF), so it is path-filtered on PRs and otherwise runs nightly / on demand.
|
||||
#
|
||||
# This drives the container-level harness (tests/e2e-backends) via
|
||||
# `make test-extra-backend-privacy-filter`: it builds the privacy-filter image,
|
||||
# downloads the model, loads it on CPU, and asserts byte-correct, UTF-8-aligned
|
||||
# TokenClassify spans. The complementary HTTP-path specs in tests/e2e
|
||||
# (e2e_pii_ner_test.go) Skip unless PII_NER_MODEL_GGUF is wired.
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '0 3 * * *'
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
paths:
|
||||
- 'backend/cpp/privacy-filter/**'
|
||||
- 'backend/Dockerfile.privacy-filter'
|
||||
- 'core/services/routing/pii/**'
|
||||
- 'core/services/routing/piidetector/**'
|
||||
- 'core/backend/token_classify.go'
|
||||
- 'core/http/endpoints/localai/pii.go'
|
||||
- 'core/schema/pii.go'
|
||||
- 'tests/e2e-backends/**'
|
||||
- 'tests/e2e/e2e_pii_ner_test.go'
|
||||
- 'tests/e2e/e2e_suite_test.go'
|
||||
- '.github/workflows/tests-pii-ner-e2e.yml'
|
||||
pull_request:
|
||||
paths:
|
||||
- 'backend/cpp/privacy-filter/**'
|
||||
- 'backend/Dockerfile.privacy-filter'
|
||||
- 'core/services/routing/pii/**'
|
||||
- 'core/services/routing/piidetector/**'
|
||||
- 'core/backend/token_classify.go'
|
||||
- 'core/http/endpoints/localai/pii.go'
|
||||
- 'core/schema/pii.go'
|
||||
- 'tests/e2e-backends/**'
|
||||
- 'tests/e2e/e2e_pii_ner_test.go'
|
||||
- 'tests/e2e/e2e_suite_test.go'
|
||||
- '.github/workflows/tests-pii-ner-e2e.yml'
|
||||
|
||||
concurrency:
|
||||
group: ci-tests-pii-ner-e2e-${{ github.event.pull_request.number || github.sha }}-${{ github.repository }}
|
||||
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
jobs:
|
||||
tests-pii-ner-e2e:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
go-version: ['1.25.x']
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Free disk space
|
||||
run: |
|
||||
sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL || true
|
||||
sudo docker image prune --all --force || true
|
||||
df -h
|
||||
- name: Configure apt mirror on runner
|
||||
uses: ./.github/actions/configure-apt-mirror
|
||||
- name: Setup Go ${{ matrix.go-version }}
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: ${{ matrix.go-version }}
|
||||
cache: false
|
||||
- name: Proto Dependencies
|
||||
run: |
|
||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||
rm protoc.zip
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
PATH="$PATH:$HOME/go/bin" make protogen-go
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y build-essential
|
||||
# Builds local-ai-backend:privacy-filter, downloads the GGUF, loads it on
|
||||
# CPU and runs the token_classify capability spec (byte-offset contract).
|
||||
- name: Run live PII NER backend E2E
|
||||
run: PATH="$PATH:$HOME/go/bin" make test-extra-backend-privacy-filter
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.23
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
10
Makefile
10
Makefile
@@ -690,6 +690,16 @@ test-extra-backend-llama-cpp-transcription: docker-build-llama-cpp
|
||||
BACKEND_TEST_CTX_SIZE=2048 \
|
||||
$(MAKE) test-extra-backend
|
||||
|
||||
## privacy-filter: the PII/NER token-classification backend. Exercises the
|
||||
## TokenClassify RPC and asserts byte-correct, UTF-8-aligned span offsets
|
||||
## against the openai-privacy-filter multilingual GGUF (CPU-runnable, ~50M
|
||||
## active params). This is the live-backend coverage for the PII NER tier.
|
||||
test-extra-backend-privacy-filter: docker-build-privacy-filter
|
||||
BACKEND_IMAGE=local-ai-backend:privacy-filter \
|
||||
BACKEND_TEST_MODEL_URL=https://huggingface.co/LocalAI-io/privacy-filter-multilingual-GGUF/resolve/main/privacy-filter-multilingual-f16.gguf \
|
||||
BACKEND_TEST_CAPS=health,load,token_classify \
|
||||
$(MAKE) test-extra-backend
|
||||
|
||||
## vllm is resolved from a HuggingFace model id (no file download) and
|
||||
## exercises Predict + streaming + tool-call extraction via the hermes parser.
|
||||
## Requires a host CPU with the SIMD instructions the prebuilt vllm CPU
|
||||
|
||||
@@ -341,11 +341,9 @@ func (a *Application) ResolvePIIPolicy(cfg *config.ModelConfig) (enabled bool, d
|
||||
}
|
||||
appCfg := a.ApplicationConfig()
|
||||
|
||||
if cfg.PII.Enabled != nil {
|
||||
enabled = *cfg.PII.Enabled
|
||||
} else {
|
||||
enabled = cfg.PIIIsEnabled() // backend default (cloud-proxy)
|
||||
}
|
||||
// PIIIsEnabled already encodes "explicit pii.enabled wins, else backend
|
||||
// default (cloud-proxy)" — the single source of that rule.
|
||||
enabled = cfg.PIIIsEnabled()
|
||||
if !enabled {
|
||||
return false, nil
|
||||
}
|
||||
@@ -354,7 +352,7 @@ func (a *Application) ResolvePIIPolicy(cfg *config.ModelConfig) (enabled bool, d
|
||||
if len(detectors) == 0 {
|
||||
detectors = append([]string(nil), appCfg.PIIDefaultDetectors...)
|
||||
}
|
||||
return enabled, detectors
|
||||
return true, detectors // enabled is necessarily true past the !enabled guard
|
||||
}
|
||||
|
||||
// PIIPolicyResolver adapts ResolvePIIPolicy to pii.PolicyResolver for
|
||||
|
||||
@@ -288,6 +288,21 @@ test.describe('Model Editor - Interactive Tab', () => {
|
||||
await expect(page.locator('input[placeholder^="match,"]')).toBeVisible()
|
||||
})
|
||||
|
||||
test('pattern min_len clamps a directly-typed negative to 0', async ({ page }) => {
|
||||
const searchInput = page.locator('input[placeholder="Search fields to add..."]')
|
||||
await searchInput.fill('Custom Secret Patterns')
|
||||
const dropdown = searchInput.locator('..').locator('..')
|
||||
await dropdown.locator('div', { hasText: 'Custom Secret Patterns' }).first().click()
|
||||
|
||||
await page.locator('button', { hasText: 'Add pattern' }).click()
|
||||
// The number input's min={0} only limits the spinner arrows, not keyboard
|
||||
// entry; the editor must sanitise a typed negative so a meaningless
|
||||
// negative length floor never reaches the saved config.
|
||||
const minLen = page.locator('input[aria-label="Minimum length"]')
|
||||
await minLen.fill('-5')
|
||||
await expect(minLen).toHaveValue('0')
|
||||
})
|
||||
|
||||
// Regression: a map-typed field (entity_actions) present in the loaded YAML
|
||||
// must render WITH its values. flattenConfig used to recurse into the map,
|
||||
// scattering it across pii_detection.entity_actions.<GROUP> paths that match
|
||||
@@ -329,4 +344,37 @@ test.describe('Model Editor - Interactive Tab', () => {
|
||||
await expect(page.getByText(/block —/i).first()).toBeVisible()
|
||||
})
|
||||
|
||||
// A map cannot hold two values for one key, so renaming a row to an existing
|
||||
// group must collapse to a single row (Object.fromEntries, last write wins)
|
||||
// rather than rendering two conflicting rows that silently lose one on save.
|
||||
test('entity_actions collapses a duplicate group to a single row', async ({ page }) => {
|
||||
await page.route('**/api/models/edit/ner-model', (route) => {
|
||||
route.fulfill({
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
name: 'ner-model',
|
||||
config: [
|
||||
'name: ner-model',
|
||||
'backend: llama-cpp',
|
||||
'pii_detection:',
|
||||
' entity_actions:',
|
||||
' SSN: block',
|
||||
' EMAIL: mask',
|
||||
'',
|
||||
].join('\n'),
|
||||
}),
|
||||
})
|
||||
})
|
||||
|
||||
await page.goto('/app/model-editor/ner-model')
|
||||
|
||||
const groupInputs = page.locator('input[aria-label="Entity group"]')
|
||||
await expect(groupInputs).toHaveCount(2)
|
||||
|
||||
// Rename the EMAIL row to duplicate SSN; the editor collapses to one SSN row.
|
||||
await groupInputs.nth(1).fill('SSN')
|
||||
await expect(groupInputs).toHaveCount(1)
|
||||
await expect(groupInputs.nth(0)).toHaveValue('SSN')
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
@@ -74,7 +74,18 @@ export default function PatternListEditor({ value, onChange }) {
|
||||
min={0}
|
||||
value={r.min_len || 0}
|
||||
title="Minimum match length (0 = no floor)"
|
||||
onChange={e => update(i, { min_len: parseInt(e.target.value, 10) || 0 })}
|
||||
// min={0} only constrains the spinner, not keyboard entry. Clamp a
|
||||
// typed negative to 0 (a negative floor is meaningless and would
|
||||
// disable the length filter). When we clamp, force the DOM value
|
||||
// too: the resulting 0->0 state change is a no-op, so React's
|
||||
// controlled input would otherwise keep displaying the rejected
|
||||
// "-5" even though the saved value is 0.
|
||||
onChange={e => {
|
||||
const parsed = parseInt(e.target.value, 10)
|
||||
const n = Math.max(0, parsed || 0)
|
||||
if (parsed < 0) e.target.value = String(n)
|
||||
update(i, { min_len: n })
|
||||
}}
|
||||
style={{ width: 80, fontSize: '0.8125rem' }}
|
||||
aria-label="Minimum length"
|
||||
/>
|
||||
|
||||
@@ -44,7 +44,7 @@ func applyAnyText(v any, elem int, text string) any {
|
||||
if elem < 0 {
|
||||
return text
|
||||
}
|
||||
if arr, ok := v.([]any); ok && elem >= 0 && elem < len(arr) {
|
||||
if arr, ok := v.([]any); ok && elem < len(arr) {
|
||||
arr[elem] = text
|
||||
}
|
||||
return v
|
||||
|
||||
@@ -39,8 +39,9 @@ type patternDetector struct {
|
||||
// When tracing is enabled it records a pattern_pii BackendTrace so the matches
|
||||
// (group, byte range, text) show in the Traces UI alongside NER detections.
|
||||
func (d *patternDetector) Detect(_ context.Context, text string) ([]pii.NEREntity, error) {
|
||||
tracing := d.appConfig != nil && d.appConfig.EnableTracing
|
||||
var start time.Time
|
||||
if d.appConfig != nil && d.appConfig.EnableTracing {
|
||||
if tracing {
|
||||
trace.InitBackendTracingIfEnabled(d.appConfig.TracingMaxItems, d.appConfig.TracingMaxBodyBytes)
|
||||
start = time.Now()
|
||||
}
|
||||
@@ -50,12 +51,12 @@ func (d *patternDetector) Detect(_ context.Context, text string) ([]pii.NEREntit
|
||||
var traceEnts []backend.TokenEntity
|
||||
for _, mt := range matches {
|
||||
out = append(out, pii.NEREntity{Group: mt.Group, Start: mt.Start, End: mt.End, Score: 1.0, Text: mt.Text})
|
||||
if d.appConfig != nil && d.appConfig.EnableTracing {
|
||||
if tracing {
|
||||
traceEnts = append(traceEnts, backend.TokenEntity{Group: mt.Group, Start: mt.Start, End: mt.End, Score: 1.0, Text: mt.Text})
|
||||
}
|
||||
}
|
||||
|
||||
if d.appConfig != nil && d.appConfig.EnableTracing {
|
||||
if tracing {
|
||||
trace.RecordBackendTrace(patternPIITrace(d.modelName, text, traceEnts, start))
|
||||
}
|
||||
return out, nil
|
||||
|
||||
@@ -28,10 +28,16 @@ const (
|
||||
// credential shape, small enough that the compiled program stays tiny.
|
||||
MaxPatternLen = 256
|
||||
// MaxQuantifier caps an explicit {n,m} upper bound. RE2 expands a bounded
|
||||
// repeat into that many copies, so an uncapped {0,1000000} would blow up
|
||||
// the compiled program's memory. Unbounded {n,} (no upper) is a loop, not
|
||||
// an expansion, and is allowed.
|
||||
MaxQuantifier = 4096
|
||||
// repeat into that many copies, so a large bound inflates the compiled
|
||||
// program. Go's regexp/syntax independently rejects any bound above 1000
|
||||
// at Parse time, so this cap MUST stay strictly below 1000 to be a live
|
||||
// guard rather than dead code shadowed by the parser: a bound in
|
||||
// (MaxQuantifier, 1000] reaches walk and is rejected here with an
|
||||
// actionable error, while >1000 is caught earlier by Parse. 512 is far
|
||||
// larger than any real credential token yet keeps the guard meaningful and
|
||||
// is defence in depth should the stdlib cap ever rise. Unbounded {n,} (no
|
||||
// upper) is a loop, not an expansion, and is allowed.
|
||||
MaxQuantifier = 512
|
||||
// MaxAlternation caps the arms of a single `a|b|c` alternation.
|
||||
MaxAlternation = 64
|
||||
// MaxAST bounds recursion depth so a pathologically nested pattern can't
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package piipattern
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
@@ -36,6 +37,45 @@ var _ = Describe("ValidatePattern", func() {
|
||||
)
|
||||
})
|
||||
|
||||
var _ = Describe("MaxQuantifier guard (must stay live, not dead code)", func() {
|
||||
// Go's regexp/syntax hard-caps repeat bounds at 1000 and rejects anything
|
||||
// larger at Parse time, before walk() runs. So the walk() {n,m} guard only
|
||||
// fires for bounds in (MaxQuantifier, 1000]; if MaxQuantifier ever creeps
|
||||
// to >= 1000 the guard becomes unreachable dead code. These specs pin the
|
||||
// relationship and prove the guard is the binding constraint in that band.
|
||||
const stdlibRepeatCap = 1000
|
||||
|
||||
It("is strictly below the stdlib repeat cap so the guard is reachable", func() {
|
||||
Expect(MaxQuantifier).To(BeNumerically("<", stdlibRepeatCap),
|
||||
"MaxQuantifier must be < %d or walk()'s {n,m} guard is dead code (Parse rejects larger bounds first)", stdlibRepeatCap)
|
||||
})
|
||||
|
||||
It("accepts a bound at exactly MaxQuantifier", func() {
|
||||
Expect(ValidatePattern(fmt.Sprintf(`sk-ant-[A-Za-z0-9]{%d}`, MaxQuantifier))).To(Succeed())
|
||||
})
|
||||
|
||||
It("rejects a bound just above MaxQuantifier with our actionable error (proves the guard runs)", func() {
|
||||
// MaxQuantifier+1 is still parseable (<= stdlib cap), so it reaches
|
||||
// walk(), where our guard — not the parser — rejects it.
|
||||
err := ValidatePattern(fmt.Sprintf(`sk-ant-[A-Za-z0-9]{%d}`, MaxQuantifier+1))
|
||||
Expect(err).To(HaveOccurred())
|
||||
Expect(err.Error()).To(ContainSubstring("bound is too large"),
|
||||
"a bound in (MaxQuantifier, stdlib cap] must be rejected by walk(), not the parser")
|
||||
})
|
||||
|
||||
It("rejects an unbounded {n,} whose lower bound exceeds MaxQuantifier", func() {
|
||||
err := ValidatePattern(fmt.Sprintf(`sk-ant-[A-Za-z0-9]{%d,}`, MaxQuantifier+1))
|
||||
Expect(err).To(HaveOccurred())
|
||||
Expect(err.Error()).To(ContainSubstring("bound is too large"))
|
||||
})
|
||||
|
||||
It("still fails closed above the stdlib cap (Parse rejects before walk)", func() {
|
||||
// >1000: caught by syntax.Parse; the message is the parser's, but it
|
||||
// still fails closed — defence in depth.
|
||||
Expect(ValidatePattern(fmt.Sprintf(`sk-ant-[A-Za-z0-9]{%d}`, stdlibRepeatCap+1))).NotTo(Succeed())
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("Compile", func() {
|
||||
It("compiles a valid pattern with leftmost-longest semantics", func() {
|
||||
re, err := Compile(`sk-ant-[A-Za-z0-9_-]{4,}`)
|
||||
|
||||
@@ -1252,6 +1252,98 @@
|
||||
- filename: privacy-filter/models/privacy-filter-multilingual/privacy-filter-multilingual-f16.gguf
|
||||
sha256: 01b76572f80b7d2ebee80a27cb9c3699c26b04cae1c402eee7664fc17a4b5ce6
|
||||
uri: https://huggingface.co/LocalAI-io/privacy-filter-multilingual-GGUF/resolve/main/privacy-filter-multilingual-f16.gguf
|
||||
- name: "privacy-filter-nemotron"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/5fd5e18a90b6dc4633f6d292/QPiv8pt4JNxr0FdGnpFef.png
|
||||
urls:
|
||||
- https://huggingface.co/OpenMed/privacy-filter-nemotron
|
||||
- https://huggingface.co/LocalAI-io/privacy-filter-nemotron-GGUF
|
||||
description: |
|
||||
A fine-grained English PII token-classification model: a fine-tune of
|
||||
openai/privacy-filter by OpenMed on NVIDIA's Nemotron-PII dataset. It labels
|
||||
every token with a BIOES tag over 55 PII categories (221 classes), trading
|
||||
the multilingual sibling's language breadth for category depth - identity,
|
||||
contact, address, dates, government IDs, financial, healthcare, enterprise,
|
||||
vehicle and digital entities (including api_key, ipv4/ipv6 and mac_address).
|
||||
For multilingual text prefer privacy-filter-multilingual instead.
|
||||
|
||||
In LocalAI this is a PII detector for the NER redactor tier: set
|
||||
known_usecases to [token_classify] (as below), and any model opts into
|
||||
redaction by listing this one under pii.detectors. The detection policy
|
||||
(which categories to mask vs block, and the score threshold) lives on this
|
||||
model's own pii_detection block - see the overrides below. It runs locally
|
||||
with no Python, served by the standalone privacy-filter backend's
|
||||
TokenClassify RPC (constrained BIOES Viterbi decode into UTF-8 byte-offset
|
||||
entity spans).
|
||||
|
||||
Architecture: gpt-oss-style sparse MoE (8 layers, d_model 640, 128 experts
|
||||
top-4, ~1.5B total / ~50M active per token), bidirectional banded attention,
|
||||
o200k tokenizer and a 221-way token-classification head; served via the
|
||||
openai-privacy-filter architecture. F16, ~2.8 GB. (A smaller Q8_0 quant
|
||||
exists on the GGUF repo for RAM-constrained use - validate it on your own
|
||||
data, since for PII a single dropped span is a leak.)
|
||||
license: apache-2.0
|
||||
tags:
|
||||
- token-classification
|
||||
- ner
|
||||
- pii
|
||||
- privacy
|
||||
- nemotron
|
||||
- gguf
|
||||
overrides:
|
||||
backend: privacy-filter
|
||||
embeddings: true
|
||||
known_usecases:
|
||||
- token_classify
|
||||
parameters:
|
||||
model: privacy-filter/models/privacy-filter-nemotron/privacy-filter-nemotron-f16.gguf
|
||||
pii_detection:
|
||||
min_score: 0.5
|
||||
default_action: mask
|
||||
files:
|
||||
- filename: privacy-filter/models/privacy-filter-nemotron/privacy-filter-nemotron-f16.gguf
|
||||
sha256: 70dfe91ff220ff04594168a83e296dcc2054449cde77f98d0e782edbb6a31f5a
|
||||
uri: https://huggingface.co/LocalAI-io/privacy-filter-nemotron-GGUF/resolve/main/privacy-filter-nemotron-f16.gguf
|
||||
- name: "privacy-filter-nemotron-q8"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/5fd5e18a90b6dc4633f6d292/QPiv8pt4JNxr0FdGnpFef.png
|
||||
urls:
|
||||
- https://huggingface.co/OpenMed/privacy-filter-nemotron
|
||||
- https://huggingface.co/LocalAI-io/privacy-filter-nemotron-GGUF
|
||||
description: |
|
||||
Q8_0 quant of privacy-filter-nemotron (~1.64 GB, vs ~2.8 GB for F16) for
|
||||
RAM-constrained / edge use (e.g. a 4 GB Raspberry Pi 5). The MoE expert
|
||||
weights are stored 8-bit; attention, embeddings and the classifier head
|
||||
stay F16. Same model, policy and runtime as the F16 entry - see
|
||||
privacy-filter-nemotron for the full description.
|
||||
|
||||
Prefer the F16 entry when you can afford it: it is the reference artifact.
|
||||
On a mixed-PII document the publisher measured q8 matching F16 on 99.93% of
|
||||
token labels with an identical span set at threshold 0.5 - but one token
|
||||
flipped, and for PII a single dropped span is a leak. Treat q8 as a
|
||||
deliberate size/speed tradeoff and validate it on your own data.
|
||||
license: apache-2.0
|
||||
tags:
|
||||
- token-classification
|
||||
- ner
|
||||
- pii
|
||||
- privacy
|
||||
- nemotron
|
||||
- gguf
|
||||
overrides:
|
||||
backend: privacy-filter
|
||||
embeddings: true
|
||||
known_usecases:
|
||||
- token_classify
|
||||
parameters:
|
||||
model: privacy-filter/models/privacy-filter-nemotron/privacy-filter-nemotron-q8.gguf
|
||||
pii_detection:
|
||||
min_score: 0.5
|
||||
default_action: mask
|
||||
files:
|
||||
- filename: privacy-filter/models/privacy-filter-nemotron/privacy-filter-nemotron-q8.gguf
|
||||
sha256: 2ec11c154e572a2686f4d77e861b7f74e6917e09638fe9bd27156d48bd99e21a
|
||||
uri: https://huggingface.co/LocalAI-io/privacy-filter-nemotron-GGUF/resolve/main/privacy-filter-nemotron-q8.gguf
|
||||
- name: "secret-filter"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
description: |
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
@@ -85,27 +86,28 @@ import (
|
||||
// file path to LoadModel, so GGUF, ONNX, safetensors, .bin etc. all work so
|
||||
// long as the backend under test accepts that format.
|
||||
const (
|
||||
capHealth = "health"
|
||||
capLoad = "load"
|
||||
capPredict = "predict"
|
||||
capStream = "stream"
|
||||
capEmbeddings = "embeddings"
|
||||
capTools = "tools"
|
||||
capTranscription = "transcription"
|
||||
capTTS = "tts"
|
||||
capImage = "image"
|
||||
capFaceDetect = "face_detect"
|
||||
capFaceEmbed = "face_embed"
|
||||
capFaceVerify = "face_verify"
|
||||
capFaceAnalyze = "face_analyze"
|
||||
capFaceAntispoof = "face_antispoof"
|
||||
capVoiceEmbed = "voice_embed"
|
||||
capVoiceVerify = "voice_verify"
|
||||
capVoiceAnalyze = "voice_analyze"
|
||||
capHealth = "health"
|
||||
capLoad = "load"
|
||||
capPredict = "predict"
|
||||
capStream = "stream"
|
||||
capEmbeddings = "embeddings"
|
||||
capTools = "tools"
|
||||
capTranscription = "transcription"
|
||||
capTTS = "tts"
|
||||
capImage = "image"
|
||||
capFaceDetect = "face_detect"
|
||||
capFaceEmbed = "face_embed"
|
||||
capFaceVerify = "face_verify"
|
||||
capFaceAnalyze = "face_analyze"
|
||||
capFaceAntispoof = "face_antispoof"
|
||||
capVoiceEmbed = "voice_embed"
|
||||
capVoiceVerify = "voice_verify"
|
||||
capVoiceAnalyze = "voice_analyze"
|
||||
capAudioTransform = "audio_transform"
|
||||
capLogprobs = "logprobs"
|
||||
capLogitBias = "logit_bias"
|
||||
capTokenize = "tokenize"
|
||||
capLogprobs = "logprobs"
|
||||
capLogitBias = "logit_bias"
|
||||
capTokenize = "tokenize"
|
||||
capTokenClassify = "token_classify"
|
||||
|
||||
defaultPrompt = "The capital of France is"
|
||||
streamPrompt = "Once upon a time"
|
||||
@@ -550,6 +552,45 @@ var _ = Describe("Backend container", Ordered, func() {
|
||||
GinkgoWriter.Printf("Embedding: %d dims\n", len(res.GetEmbeddings()))
|
||||
})
|
||||
|
||||
// TokenClassify is the PII-NER RPC (privacy-filter backend). The crown-jewel
|
||||
// invariant is byte-offset correctness: Start/End are half-open BYTE offsets
|
||||
// into the original UTF-8 text, and the backend's emitted text for a span must
|
||||
// equal text[Start:End]. We run at Threshold 0 (raw, unfiltered) and assert
|
||||
// every returned span is in range, rune-aligned, and self-consistent. The
|
||||
// prompt carries multibyte runes BEFORE the PII so a rune/byte confusion in
|
||||
// the engine would surface as a shifted slice here. Override the text with
|
||||
// BACKEND_TEST_TOKEN_CLASSIFY_TEXT for a model that detects a different class.
|
||||
It("classifies PII spans with byte-correct offsets via TokenClassify", func() {
|
||||
if !caps[capTokenClassify] {
|
||||
Skip("token_classify capability not enabled")
|
||||
}
|
||||
text := os.Getenv("BACKEND_TEST_TOKEN_CLASSIFY_TEXT")
|
||||
if text == "" {
|
||||
text = "Müller paid at café in Zürich; reach john.doe@example.com tomorrow."
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
|
||||
defer cancel()
|
||||
res, err := client.TokenClassify(ctx, &pb.TokenClassifyRequest{Text: text, Threshold: 0})
|
||||
Expect(err).NotTo(HaveOccurred(), "TokenClassify RPC failed")
|
||||
ents := res.GetEntities()
|
||||
Expect(ents).NotTo(BeEmpty(), "TokenClassify returned no entities for an obvious-PII sentence")
|
||||
for _, e := range ents {
|
||||
start, end := int(e.GetStart()), int(e.GetEnd())
|
||||
Expect(start).To(BeNumerically(">=", 0))
|
||||
Expect(end).To(BeNumerically(">", start))
|
||||
Expect(end).To(BeNumerically("<=", len(text)))
|
||||
Expect(utf8.RuneStart(text[start])).To(BeTrue(), "start %d is mid-rune in %q", start, text)
|
||||
if end < len(text) {
|
||||
Expect(utf8.RuneStart(text[end])).To(BeTrue(), "end %d is mid-rune in %q", end, text)
|
||||
}
|
||||
slice := text[start:end]
|
||||
Expect(utf8.ValidString(slice)).To(BeTrue(), "span %q is not valid UTF-8", slice)
|
||||
Expect(e.GetText()).To(Equal(slice), "entity text must equal text[start:end]")
|
||||
GinkgoWriter.Printf("TokenClassify: %q [%d:%d] %s score=%.3f\n",
|
||||
slice, start, end, e.GetEntityGroup(), e.GetScore())
|
||||
}
|
||||
})
|
||||
|
||||
It("generates an image via GenerateImage", func() {
|
||||
if !caps[capImage] {
|
||||
Skip("image capability not enabled")
|
||||
|
||||
186
tests/e2e/e2e_pii_ner_test.go
Normal file
186
tests/e2e/e2e_pii_ner_test.go
Normal file
@@ -0,0 +1,186 @@
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
// Live PII NER tier e2e. These specs run the real privacy-filter GGUF on CPU
|
||||
// through the full TokenClassify path — the gap the hermetic suite cannot
|
||||
// cover (it only exercises the in-process pattern tier). They Skip unless
|
||||
// PII_NER_MODEL_GGUF is wired in BeforeSuite, so the default PR suite is
|
||||
// unaffected; the dedicated CI job sets it.
|
||||
//
|
||||
// The crown-jewel invariant is byte-offset correctness: entity Start/End are
|
||||
// half-open BYTE offsets into the original UTF-8 text, and the model's emitted
|
||||
// text for a span must equal the corresponding byte slice. We assert that two
|
||||
// ways — directly against ModelTokenClassify (raw, Threshold 0, no redactor
|
||||
// merge) and against the /api/pii/analyze HTTP contract (post-merge,
|
||||
// post-MinScore). The multibyte case proves offsets are bytes, not runes.
|
||||
var _ = Describe("PII NER tier (live privacy-filter GGUF)", func() {
|
||||
const (
|
||||
// Reliable, unambiguous PII the multilingual NER model detects.
|
||||
emailText = "Please contact John Doe at john.doe@example.com about invoice 4421."
|
||||
// Multibyte chars BEFORE the email push its byte offset past its rune
|
||||
// offset, so a rune/byte confusion in the engine or the Go bridge would
|
||||
// surface as a mismatched slice here but not in the ASCII case above.
|
||||
multibyteText = "Müller paid at café in Zürich; reach john.doe@example.com tomorrow."
|
||||
)
|
||||
|
||||
BeforeEach(func() {
|
||||
if piiNERModel == "" {
|
||||
Skip("live PII NER model not wired (set PII_NER_MODEL_GGUF + REALTIME_BACKENDS_PATH; see tests-pii-ner-e2e.yml)")
|
||||
}
|
||||
})
|
||||
|
||||
Context("raw TokenClassify (byte-offset contract)", func() {
|
||||
It("returns byte-correct, rune-aligned spans for an ASCII email", func() {
|
||||
ents := tokenClassify(emailText)
|
||||
Expect(ents).NotTo(BeEmpty(), "model must detect at least one entity in an obvious-PII sentence")
|
||||
for _, e := range ents {
|
||||
assertByteCorrectSpan(emailText, e.Start, e.End, e.Text)
|
||||
}
|
||||
Expect(spanCoversSubstring(emailText, ents, "john.doe@example.com")).To(BeTrue(),
|
||||
"some detected span must cover the email address")
|
||||
})
|
||||
|
||||
It("keeps byte offsets correct when multibyte runes precede the PII", func() {
|
||||
ents := tokenClassify(multibyteText)
|
||||
Expect(ents).NotTo(BeEmpty())
|
||||
for _, e := range ents {
|
||||
// This is the assertion that fails if offsets were computed in
|
||||
// runes rather than bytes: the slice would be shifted left.
|
||||
assertByteCorrectSpan(multibyteText, e.Start, e.End, e.Text)
|
||||
}
|
||||
Expect(spanCoversSubstring(multibyteText, ents, "john.doe@example.com")).To(BeTrue())
|
||||
})
|
||||
})
|
||||
|
||||
Context("HTTP /api/pii/analyze", func() {
|
||||
It("reports ner-source entities with byte-correct offsets", func() {
|
||||
status, resp := analyze(schema.PIIAnalyzeRequest{
|
||||
Text: emailText,
|
||||
Detectors: []string{piiNERModel},
|
||||
})
|
||||
Expect(status).To(Equal(http.StatusOK))
|
||||
Expect(resp.Entities).NotTo(BeEmpty())
|
||||
for _, e := range resp.Entities {
|
||||
Expect(e.Source).To(Equal("ner"), "privacy-filter detections must be tagged source=ner")
|
||||
Expect(e.Action).To(Equal("mask"), "default_action mask must propagate to each entity")
|
||||
assertByteCorrectSpan(emailText, e.Start, e.End, emailText[e.Start:e.End])
|
||||
Expect(e.Score).To(BeNumerically(">=", 0.5), "below-MinScore spans are dropped before the response")
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
Context("HTTP /api/pii/redact", func() {
|
||||
It("masks detected PII out of the returned text", func() {
|
||||
status, body := redact(schema.PIIAnalyzeRequest{
|
||||
Text: emailText,
|
||||
Detectors: []string{piiNERModel},
|
||||
})
|
||||
Expect(status).To(Equal(http.StatusOK))
|
||||
var resp schema.PIIRedactResponse
|
||||
Expect(json.Unmarshal(body, &resp)).To(Succeed())
|
||||
Expect(resp.Masked).To(BeTrue())
|
||||
Expect(resp.RedactedText).NotTo(Equal(emailText))
|
||||
Expect(resp.RedactedText).NotTo(ContainSubstring("john.doe@example.com"),
|
||||
"the masked email must not survive in the redacted body")
|
||||
})
|
||||
|
||||
It("rejects the request with pii_blocked when an entity action is block", func() {
|
||||
status, body := redact(schema.PIIAnalyzeRequest{
|
||||
Text: emailText,
|
||||
Detectors: []string{piiNERBlockModel},
|
||||
})
|
||||
Expect(status).To(Equal(http.StatusBadRequest))
|
||||
Expect(string(body)).To(ContainSubstring("pii_blocked"))
|
||||
Expect(string(body)).NotTo(ContainSubstring("john.doe@example.com"),
|
||||
"a blocked response must never echo the raw secret")
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// tokenClassify drives core/backend.ModelTokenClassify against the live model
|
||||
// with the loader/config the running server uses — the same path the NER
|
||||
// detector takes, but at Threshold 0 so we see the raw, unmerged spans.
|
||||
func tokenClassify(text string) []backend.TokenEntity {
|
||||
GinkgoHelper()
|
||||
cfg, ok := localAIApp.ModelConfigLoader().GetModelConfig(piiNERModel)
|
||||
Expect(ok).To(BeTrue(), "model config %q must be loaded", piiNERModel)
|
||||
fn, err := backend.ModelTokenClassify(text, backend.TokenClassifyOptions{},
|
||||
localAIApp.ModelLoader(), cfg, localAIApp.ApplicationConfig())
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
ents, err := fn(context.TODO())
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
return ents
|
||||
}
|
||||
|
||||
// assertByteCorrectSpan is the shared byte-offset invariant: a half-open byte
|
||||
// range within text, aligned to UTF-8 rune boundaries, whose slice equals the
|
||||
// entity's own reported text.
|
||||
func assertByteCorrectSpan(text string, start, end int, got string) {
|
||||
GinkgoHelper()
|
||||
Expect(start).To(BeNumerically(">=", 0))
|
||||
Expect(end).To(BeNumerically(">", start))
|
||||
Expect(end).To(BeNumerically("<=", len(text)))
|
||||
Expect(utf8.RuneStart(text[start])).To(BeTrue(), "start %d is mid-rune in %q", start, text)
|
||||
if end < len(text) {
|
||||
Expect(utf8.RuneStart(text[end])).To(BeTrue(), "end %d is mid-rune in %q", end, text)
|
||||
}
|
||||
slice := text[start:end]
|
||||
Expect(utf8.ValidString(slice)).To(BeTrue(), "span %q is not valid UTF-8", slice)
|
||||
Expect(slice).To(Equal(got), "entity text must equal text[start:end]")
|
||||
}
|
||||
|
||||
func spanCoversSubstring(text string, ents []backend.TokenEntity, sub string) bool {
|
||||
lo := bytes.Index([]byte(text), []byte(sub))
|
||||
if lo < 0 {
|
||||
return false
|
||||
}
|
||||
hi := lo + len(sub)
|
||||
for _, e := range ents {
|
||||
// any overlap with [lo,hi)
|
||||
if e.Start < hi && e.End > lo {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func analyze(req schema.PIIAnalyzeRequest) (int, schema.PIIAnalyzeResponse) {
|
||||
GinkgoHelper()
|
||||
status, body := postJSON("/api/pii/analyze", req)
|
||||
var resp schema.PIIAnalyzeResponse
|
||||
if status == http.StatusOK {
|
||||
Expect(json.Unmarshal(body, &resp)).To(Succeed())
|
||||
}
|
||||
return status, resp
|
||||
}
|
||||
|
||||
func redact(req schema.PIIAnalyzeRequest) (int, []byte) {
|
||||
GinkgoHelper()
|
||||
return postJSON("/api/pii/redact", req)
|
||||
}
|
||||
|
||||
func postJSON(path string, payload any) (int, []byte) {
|
||||
GinkgoHelper()
|
||||
data, err := json.Marshal(payload)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
httpResp, err := http.Post(anthropicBaseURL+path, "application/json", bytes.NewReader(data))
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
defer func() { _ = httpResp.Body.Close() }()
|
||||
body, err := io.ReadAll(httpResp.Body)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
return httpResp.StatusCode, body
|
||||
}
|
||||
@@ -47,6 +47,15 @@ var (
|
||||
// cloud-proxy model YAMLs can point at their URLs at startup time.
|
||||
cpOpenAIUpstream *fakeOpenAIUpstreamServer
|
||||
cpAnthropicUpstream *fakeAnthropicUpstreamServer
|
||||
|
||||
// Live PII NER tier. Set only when PII_NER_MODEL_GGUF points at a
|
||||
// privacy-filter GGUF and the privacy-filter backend is discoverable
|
||||
// (REALTIME_BACKENDS_PATH). Empty => the NER specs Skip, exactly like the
|
||||
// cloud-proxy specs Skip without their binary. This is what the hermetic
|
||||
// suite cannot do (e2e_suite_test.go comment at the cp-translate detector):
|
||||
// run the real GGUF NER tier instead of only the in-process pattern tier.
|
||||
piiNERModel string
|
||||
piiNERBlockModel string
|
||||
)
|
||||
|
||||
var _ = BeforeSuite(func() {
|
||||
@@ -535,6 +544,40 @@ var _ = BeforeSuite(func() {
|
||||
}
|
||||
}
|
||||
|
||||
// Live PII NER tier. When PII_NER_MODEL_GGUF points at a downloaded
|
||||
// privacy-filter GGUF, register two detector models that drive the real
|
||||
// gRPC TokenClassify path on the privacy-filter backend (discovered via
|
||||
// REALTIME_BACKENDS_PATH). Two models so we can exercise both policy
|
||||
// outcomes against the same weights: mask (redact) and block (reject).
|
||||
// NOTE: no pii_detection.builtins/patterns here — that would flip the
|
||||
// detector to the in-process regex tier instead of the GGUF NER tier.
|
||||
if gguf := os.Getenv("PII_NER_MODEL_GGUF"); gguf != "" {
|
||||
piiNERModel = "privacy-filter-ner"
|
||||
piiNERBlockModel = "privacy-filter-ner-block"
|
||||
nerModelConfig := func(name, defaultAction string) map[string]any {
|
||||
return map[string]any{
|
||||
"name": name,
|
||||
"backend": "privacy-filter",
|
||||
"embeddings": true, // required: TOKEN_CLS pooling loads via the embeddings flag
|
||||
"known_usecases": []string{"token_classify"},
|
||||
"parameters": map[string]any{"model": gguf},
|
||||
"pii_detection": map[string]any{
|
||||
"min_score": 0.5,
|
||||
"default_action": defaultAction,
|
||||
},
|
||||
}
|
||||
}
|
||||
for _, cfg := range []map[string]any{
|
||||
nerModelConfig(piiNERModel, "mask"),
|
||||
nerModelConfig(piiNERBlockModel, "block"),
|
||||
} {
|
||||
data, err := yaml.Marshal(cfg)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(os.WriteFile(filepath.Join(modelsPath, cfg["name"].(string)+".yaml"), data, 0644)).To(Succeed())
|
||||
}
|
||||
xlog.Info("wired live PII NER models", "gguf", gguf, "models", []string{piiNERModel, piiNERBlockModel})
|
||||
}
|
||||
|
||||
systemState, err := system.GetSystemState(systemOpts...)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
|
||||
Reference in New Issue
Block a user