Files
LocalAI/core/services/routing/pii/middleware.go
Richard Palethorpe 3fa7b2955c feat(pii): NER tier engine — privacy-filter.cpp backend + NER-centric PII filter (#10360)
Squashed feat/pii-ner-tier-engine rebased onto master (was 45 commits; see
backup/pii-ner-tier-engine-prerebase). Net change:

- privacy-filter.cpp: standalone GGML engine for the openai-privacy-filter
  PII/NER token classifier, wired as a LocalAI gRPC backend (CPU/CUDA/Vulkan).
  TokenClassify moves off the patched llama.cpp path onto this backend.
- PII filter reworked to be NER-centric (encoder/NER detection tier scanning
  whole conversations as one document), with a recreated bounded restricted-
  regex secret-matching pattern detector tier alongside it (per-model
  pii_detection.builtins / .patterns + core/services/routing/piipattern).
- Detection labelled by source (ner vs pattern); backend trace / confidence /
  debug observability; analyze/redact exposed as a synchronous API.
- Instance-wide default detector policy + per-usecase default-on; request
  filtering extended to completions, embeddings, edits & Ollama.
- React UI: NER-centric PII editor, detector-models table, pattern/builtins
  editor, middleware default-policy UI.
- Gallery: privacy-filter-multilingual token-classify model + NER install
  filter; token_classify known_usecase; batch sized to context for NER models.
  privacy-filter backend registered in the backend gallery (cpu/vulkan/cuda-13
  meta + image entries with a capabilities map) matching its CI matrix jobs,
  and an /import-model auto-detect importer (PrivacyFilterImporter, narrow
  privacy-filter GGUF detection) replacing the prior pref-only registration.

Reconciled against master's independent evolution:

- Dropped master's PIIPatternOverrides feature (global-pattern runtime
  overrides + /api/pii/patterns API + runtime_settings.json persistence). The
  per-model NER + pattern-detector design supersedes it; it was built on the
  global redactor pattern set this branch replaced.
- Reverted the llama.cpp Score carry-patch (0006-server-task-type-score):
  removed the patch and restored master's grpc-server.cpp Score RPC (direct
  llama_decode, slot-loop bypass) and LLAMA_VERSION pin, plus master's
  model_config validation forbidding score + chat/completion/embeddings on
  llama-cpp. token_classify is unaffected (it runs on the privacy-filter
  backend, not llama-cpp).

Assisted-by: Claude:claude-opus-4-8 [Claude Code]

Signed-off-by: Richard Palethorpe <io@richiejp.com>
2026-06-18 11:45:22 +01:00

378 lines
14 KiB
Go

package pii
import (
"context"
"crypto/rand"
"encoding/hex"
"net/http"
"time"
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/http/auth"
"github.com/mudler/LocalAI/core/services/routing/contract"
"github.com/mudler/xlog"
)
// Echo context keys this middleware reads from / writes to. The string
// values must match the constants in core/http/middleware/context_keys.go;
// kept in sync by hand because echoing constants across packages would
// drag the http/middleware package into pii's import graph and create
// a cycle (http/middleware will import this one).
const (
ctxKeyCorrelationID = "routing.correlation_id"
ctxKeyPIIEventID = "routing.pii_event_id"
// Must match the constants in core/http/middleware/request.go.
// Echoing them across packages would create an import cycle
// (http/middleware imports this package). Drift is caught by
// integration tests against the chat route.
ctxKeyParsedRequest = "LOCALAI_REQUEST"
ctxKeyModelConfig = "MODEL_CONFIG"
)
// ModelPIIConfig is the duck-typed view this middleware needs of the
// per-model PII configuration carried on the echo context.
// *config.ModelConfig satisfies it via PIIIsEnabled / PIIDetectors; the
// indirection keeps the pii package from importing core/config.
//
// PIIDetectors lists the token-classification models whose detections
// drive redaction for this (consuming) model. The detection policy lives
// on each named detector model — resolved via NERDetectorResolver — so
// this consuming view carries no per-entity actions of its own.
type ModelPIIConfig interface {
PIIIsEnabled() bool
PIIDetectors() []string
}
// NERDetectorResolver resolves a detector model name to a ready-to-use
// NERConfig — the detector plus the policy (min score, entity→action
// map, default action) read from that model's own pii_detection block.
// ok is false when the name can't supply a detector (unknown model, not
// a token_classify model, or load failure); the middleware fails closed
// in that case. Supplied by the application layer, which owns the model
// loader and the core/backend dependency, keeping the pii package free of
// both. A nil resolver (or the option being unset) disables the NER tier.
type NERDetectorResolver func(modelName string) (NERConfig, bool)
// Option configures optional RequestMiddleware behaviour. Threaded as
// variadic options so adding the NER tier doesn't break the existing
// four-argument call sites (routes and tests).
type Option func(*mwOptions)
type mwOptions struct {
nerResolver NERDetectorResolver
policyResolver PolicyResolver
}
// PolicyResolver returns the effective (enabled, detectors) for the model
// carried on the request context, layering instance-wide PII defaults over the
// per-model config. Supplied by the application layer (which owns core/config),
// keeping this package decoupled from it — the middleware passes the raw
// context value through as `any`. When unset, the middleware falls back to the
// duck-typed ModelPIIConfig (explicit per-model config only, no global default).
type PolicyResolver func(modelCfg any) (enabled bool, detectors []string)
// WithPolicyResolver overrides how the middleware decides enablement and the
// detector list, so the instance-wide default detector / default-on usecases
// apply. Without it the middleware reads ModelPIIConfig off the context.
func WithPolicyResolver(r PolicyResolver) Option {
return func(o *mwOptions) { o.policyResolver = r }
}
// WithNERResolver enables the NER tier. When a request's model lists
// pii.detectors, the middleware resolves each to a NERConfig and runs
// RedactNER (the union of all detectors' hits, merged). Without this
// option, or when a model lists no detectors, redaction is a no-op.
func WithNERResolver(r NERDetectorResolver) Option {
return func(o *mwOptions) { o.nerResolver = r }
}
// ScannedText is one piece of user text from the request. Index is
// opaque to the middleware — the Adapter implementation uses it to
// put the redacted version back in the right place.
type ScannedText struct {
Index int
Text string
}
// Adapter pulls scannable text out of a parsed request and writes
// redacted text back. Provided as a per-API-shape function rather
// than an interface on the request type so the schema package does
// not have to depend on pii. Each route registration passes the
// adapter that knows its request format.
//
// The middleware calls Scan once per request and Apply once with
// every span the redactor returned. updates are guaranteed to share
// indices the adapter previously returned from Scan; the adapter
// must not assume input order matches scan order.
type Adapter struct {
Scan func(parsed any) []ScannedText
Apply func(parsed any, updates []ScannedText)
}
// RequestMiddleware applies the regex PII tier to incoming chat
// requests. If the parsed request is not a MessageScanner (e.g.,
// non-chat endpoints registered against the same group later), the
// middleware passes through.
//
// - On match with action=block: the request is rejected with 400 and
// a PIIEvent is recorded. The matched value is never echoed back
// to the client.
// - On match with action=mask: the redacted text replaces the
// original on the parsed request. PIIEvents are recorded.
// - On match with action=allow: the original text is left intact; a
// PIIEvent is still recorded so the detection is auditable.
//
// recorder is the Recorder on which to record events; nil disables
// recording (the redaction still happens). fallbackUser supplies the
// no-auth identity. The middleware writes ctxKeyPIIEventID on the echo
// context so the usage middleware can later cross-reference the event
// with the UsageRecord.
func RequestMiddleware(redactor *Redactor, store EventStore, adapter Adapter, fallbackUser *auth.User, opts ...Option) echo.MiddlewareFunc {
var o mwOptions
for _, opt := range opts {
opt(&o)
}
return func(next echo.HandlerFunc) echo.HandlerFunc {
return func(c echo.Context) error {
if redactor == nil || adapter.Scan == nil {
return next(c)
}
// Per-model gating: redaction is opt-in per model. The policy
// resolver (when wired) layers instance-wide defaults over the
// per-model config; otherwise we read the per-model config
// directly. A missing config (non-chat routes, or middleware
// wired before SetModelAndConfig) or a not-enabled result passes
// through.
rawCfg := c.Get(ctxKeyModelConfig)
var enabled bool
var detectors []string
if o.policyResolver != nil {
enabled, detectors = o.policyResolver(rawCfg)
} else if cfg, ok := rawCfg.(ModelPIIConfig); ok {
enabled, detectors = cfg.PIIIsEnabled(), cfg.PIIDetectors()
}
if !enabled {
return next(c)
}
parsed := c.Get(ctxKeyParsedRequest)
if parsed == nil {
return next(c)
}
// A PII-enabled model with no detectors (or no resolver wired)
// has nothing to scan with — pass through.
if len(detectors) == 0 || o.nerResolver == nil {
return next(c)
}
user := auth.GetUser(c)
if user == nil {
user = fallbackUser
}
userID := ""
if user != nil {
userID = user.ID
}
correlationID, _ := c.Get(ctxKeyCorrelationID).(string)
// Resolve each named detector to its NERConfig (detector +
// the policy from that model's own pii_detection block). A
// configured detector that can't be resolved fails closed:
// serving the request without the semantic check the operator
// asked for is exactly the leak this tier exists to prevent.
cfgs := make([]NERConfig, 0, len(detectors))
for _, name := range detectors {
nc, ok := o.nerResolver(name)
if !ok {
xlog.Error("pii: configured detector model could not be resolved; blocking request (fail-closed)", "detector", name)
return blockNERUnavailable(c, store, correlationID, userID)
}
cfgs = append(cfgs, nc)
}
texts := adapter.Scan(parsed)
updates := make([]ScannedText, 0, len(texts))
var blocked bool
var firstEventID string
// Scan the request as ONE document (messages joined) so the NER
// tier keeps conversational context — whether "4421" is a PIN is
// decided by the question in the previous message. The spans come
// back per message with local offsets for in-place rewriting.
segTexts := make([]string, len(texts))
for i, st := range texts {
segTexts[i] = st.Text
}
// Fail closed: a detector outage at request time must NOT
// silently serve the request. The NER tier was explicitly
// configured for this model, so the semantic check is part
// of the contract.
segResults, nerErr := RedactNERSegments(c.Request().Context(), segTexts, cfgs)
if nerErr != nil {
xlog.Error("pii: NER detector failed; blocking request (fail-closed)", "error", nerErr)
return blockNERUnavailable(c, store, correlationID, userID)
}
for i, res := range segResults {
st := texts[i]
if len(res.Spans) == 0 {
continue
}
// Persist one event per detected span. The action recorded
// is the one that actually fired (carried on the span after
// the overlap merge), so the events log reflects what
// happened to the request.
for _, span := range res.Spans {
ev := PIIEvent{
ID: newEventID(),
Origin: OriginMiddleware,
CorrelationID: correlationID,
UserID: userID,
Direction: DirectionIn,
PatternID: span.Pattern,
ByteOffset: span.Start,
Length: span.End - span.Start,
HashPrefix: span.HashPrefix,
Action: span.Action,
Score: span.Score,
CreatedAt: time.Now().UTC(),
}
if firstEventID == "" {
firstEventID = ev.ID
}
if store != nil {
if err := store.Record(context.Background(), ev); err != nil {
xlog.Error("pii: failed to record event", "error", err, "pattern", span.Pattern)
}
}
// Contract: every span must produce an event.
contract.Invariant(
"pii.event_per_span",
span.Pattern != "" && ev.PatternID != "",
"correlation", correlationID, "pattern", span.Pattern,
)
}
if res.Blocked {
blocked = true
}
updates = append(updates, ScannedText{Index: st.Index, Text: res.Redacted})
}
if blocked {
return c.JSON(http.StatusBadRequest, map[string]any{
"error": map[string]string{
"message": "request blocked by content policy (sensitive data detected)",
"type": "pii_blocked",
},
"correlation_id": correlationID,
"pii_event_id": firstEventID,
})
}
if len(updates) > 0 && adapter.Apply != nil {
adapter.Apply(parsed, updates)
}
if firstEventID != "" {
c.Set(ctxKeyPIIEventID, firstEventID)
}
return next(c)
}
}
}
// nerUnavailablePattern is the sentinel PatternID recorded on the
// fail-closed audit event when a model's configured NER tier cannot
// run. It is not a real regex pattern — it marks a request blocked
// because the encoder/NER check was unavailable (model unresolved or
// backend error), so the events log distinguishes it from a content
// block (which carries a real pattern ID).
const nerUnavailablePattern = "__ner_unavailable__"
// blockNERUnavailable records a fail-closed audit event and returns the
// response used when a model has an NER tier configured but it could
// not run. Failing closed is deliberate for a PII filter: if the
// semantic check the operator asked for cannot execute, refusing the
// request is safer than serving it with only the cheap regex tier. The
// 503 (vs the 400 used for a content block) tells clients and operators
// this was a dependency outage, not sensitive data in the request.
func blockNERUnavailable(c echo.Context, store EventStore, correlationID, userID string) error {
ev := PIIEvent{
ID: newEventID(),
Kind: KindPII,
Origin: OriginMiddleware,
CorrelationID: correlationID,
UserID: userID,
Direction: DirectionIn,
PatternID: nerUnavailablePattern,
Action: ActionBlock,
CreatedAt: time.Now().UTC(),
}
if store != nil {
if err := store.Record(context.Background(), ev); err != nil {
xlog.Error("pii: failed to record NER-unavailable event", "error", err)
}
}
c.Set(ctxKeyPIIEventID, ev.ID)
return c.JSON(http.StatusServiceUnavailable, map[string]any{
"error": map[string]string{
"message": "request blocked: PII NER check is configured but unavailable",
"type": "pii_ner_unavailable",
},
"correlation_id": correlationID,
"pii_event_id": ev.ID,
})
}
// validAction converts a raw YAML action string to the typed Action,
// returning "" for anything that isn't a known action.
func validAction(raw string) Action {
switch Action(raw) {
case ActionMask, ActionBlock, ActionAllow:
return Action(raw)
default:
return ""
}
}
// validActionOr is validAction with a fallback for empty/invalid input.
func validActionOr(raw string, fallback Action) Action {
if a := validAction(raw); a != "" {
return a
}
return fallback
}
// validActions converts a raw entity-group->action map to typed
// Actions, dropping (and logging) unknown actions so a model YAML typo
// is ignored rather than taking the request down — mirroring how the
// per-pattern overrides are validated above.
func validActions(raw map[string]string) map[string]Action {
if len(raw) == 0 {
return nil
}
out := make(map[string]Action, len(raw))
for group, action := range raw {
if a := validAction(action); a != "" {
out[group] = a
} else {
xlog.Warn("pii: ignoring unknown NER entity action", "group", group, "action", action)
}
}
return out
}
func newEventID() string {
var b [12]byte
_, _ = rand.Read(b[:])
return "pii_" + hex.EncodeToString(b[:])
}
// NewEventID mints a fresh random event id in the package's standard shape.
// Exported so callers outside this package (the analyze/redact API handlers)
// record events with ids indistinguishable from the in-band middleware's.
func NewEventID() string { return newEventID() }