mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-26 17:37:07 -04:00
Squashed feat/pii-ner-tier-engine rebased onto master (was 45 commits; see backup/pii-ner-tier-engine-prerebase). Net change: - privacy-filter.cpp: standalone GGML engine for the openai-privacy-filter PII/NER token classifier, wired as a LocalAI gRPC backend (CPU/CUDA/Vulkan). TokenClassify moves off the patched llama.cpp path onto this backend. - PII filter reworked to be NER-centric (encoder/NER detection tier scanning whole conversations as one document), with a recreated bounded restricted- regex secret-matching pattern detector tier alongside it (per-model pii_detection.builtins / .patterns + core/services/routing/piipattern). - Detection labelled by source (ner vs pattern); backend trace / confidence / debug observability; analyze/redact exposed as a synchronous API. - Instance-wide default detector policy + per-usecase default-on; request filtering extended to completions, embeddings, edits & Ollama. - React UI: NER-centric PII editor, detector-models table, pattern/builtins editor, middleware default-policy UI. - Gallery: privacy-filter-multilingual token-classify model + NER install filter; token_classify known_usecase; batch sized to context for NER models. privacy-filter backend registered in the backend gallery (cpu/vulkan/cuda-13 meta + image entries with a capabilities map) matching its CI matrix jobs, and an /import-model auto-detect importer (PrivacyFilterImporter, narrow privacy-filter GGUF detection) replacing the prior pref-only registration. Reconciled against master's independent evolution: - Dropped master's PIIPatternOverrides feature (global-pattern runtime overrides + /api/pii/patterns API + runtime_settings.json persistence). The per-model NER + pattern-detector design supersedes it; it was built on the global redactor pattern set this branch replaced. - Reverted the llama.cpp Score carry-patch (0006-server-task-type-score): removed the patch and restored master's grpc-server.cpp Score RPC (direct llama_decode, slot-loop bypass) and LLAMA_VERSION pin, plus master's model_config validation forbidding score + chat/completion/embeddings on llama-cpp. token_classify is unaffected (it runs on the privacy-filter backend, not llama-cpp). Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Richard Palethorpe <io@richiejp.com>
378 lines
14 KiB
Go
378 lines
14 KiB
Go
package pii
|
|
|
|
import (
|
|
"context"
|
|
"crypto/rand"
|
|
"encoding/hex"
|
|
"net/http"
|
|
"time"
|
|
|
|
"github.com/labstack/echo/v4"
|
|
"github.com/mudler/LocalAI/core/http/auth"
|
|
"github.com/mudler/LocalAI/core/services/routing/contract"
|
|
"github.com/mudler/xlog"
|
|
)
|
|
|
|
// Echo context keys this middleware reads from / writes to. The string
|
|
// values must match the constants in core/http/middleware/context_keys.go;
|
|
// kept in sync by hand because echoing constants across packages would
|
|
// drag the http/middleware package into pii's import graph and create
|
|
// a cycle (http/middleware will import this one).
|
|
const (
|
|
ctxKeyCorrelationID = "routing.correlation_id"
|
|
ctxKeyPIIEventID = "routing.pii_event_id"
|
|
// Must match the constants in core/http/middleware/request.go.
|
|
// Echoing them across packages would create an import cycle
|
|
// (http/middleware imports this package). Drift is caught by
|
|
// integration tests against the chat route.
|
|
ctxKeyParsedRequest = "LOCALAI_REQUEST"
|
|
ctxKeyModelConfig = "MODEL_CONFIG"
|
|
)
|
|
|
|
// ModelPIIConfig is the duck-typed view this middleware needs of the
|
|
// per-model PII configuration carried on the echo context.
|
|
// *config.ModelConfig satisfies it via PIIIsEnabled / PIIDetectors; the
|
|
// indirection keeps the pii package from importing core/config.
|
|
//
|
|
// PIIDetectors lists the token-classification models whose detections
|
|
// drive redaction for this (consuming) model. The detection policy lives
|
|
// on each named detector model — resolved via NERDetectorResolver — so
|
|
// this consuming view carries no per-entity actions of its own.
|
|
type ModelPIIConfig interface {
|
|
PIIIsEnabled() bool
|
|
PIIDetectors() []string
|
|
}
|
|
|
|
// NERDetectorResolver resolves a detector model name to a ready-to-use
|
|
// NERConfig — the detector plus the policy (min score, entity→action
|
|
// map, default action) read from that model's own pii_detection block.
|
|
// ok is false when the name can't supply a detector (unknown model, not
|
|
// a token_classify model, or load failure); the middleware fails closed
|
|
// in that case. Supplied by the application layer, which owns the model
|
|
// loader and the core/backend dependency, keeping the pii package free of
|
|
// both. A nil resolver (or the option being unset) disables the NER tier.
|
|
type NERDetectorResolver func(modelName string) (NERConfig, bool)
|
|
|
|
// Option configures optional RequestMiddleware behaviour. Threaded as
|
|
// variadic options so adding the NER tier doesn't break the existing
|
|
// four-argument call sites (routes and tests).
|
|
type Option func(*mwOptions)
|
|
|
|
type mwOptions struct {
|
|
nerResolver NERDetectorResolver
|
|
policyResolver PolicyResolver
|
|
}
|
|
|
|
// PolicyResolver returns the effective (enabled, detectors) for the model
|
|
// carried on the request context, layering instance-wide PII defaults over the
|
|
// per-model config. Supplied by the application layer (which owns core/config),
|
|
// keeping this package decoupled from it — the middleware passes the raw
|
|
// context value through as `any`. When unset, the middleware falls back to the
|
|
// duck-typed ModelPIIConfig (explicit per-model config only, no global default).
|
|
type PolicyResolver func(modelCfg any) (enabled bool, detectors []string)
|
|
|
|
// WithPolicyResolver overrides how the middleware decides enablement and the
|
|
// detector list, so the instance-wide default detector / default-on usecases
|
|
// apply. Without it the middleware reads ModelPIIConfig off the context.
|
|
func WithPolicyResolver(r PolicyResolver) Option {
|
|
return func(o *mwOptions) { o.policyResolver = r }
|
|
}
|
|
|
|
// WithNERResolver enables the NER tier. When a request's model lists
|
|
// pii.detectors, the middleware resolves each to a NERConfig and runs
|
|
// RedactNER (the union of all detectors' hits, merged). Without this
|
|
// option, or when a model lists no detectors, redaction is a no-op.
|
|
func WithNERResolver(r NERDetectorResolver) Option {
|
|
return func(o *mwOptions) { o.nerResolver = r }
|
|
}
|
|
|
|
// ScannedText is one piece of user text from the request. Index is
|
|
// opaque to the middleware — the Adapter implementation uses it to
|
|
// put the redacted version back in the right place.
|
|
type ScannedText struct {
|
|
Index int
|
|
Text string
|
|
}
|
|
|
|
// Adapter pulls scannable text out of a parsed request and writes
|
|
// redacted text back. Provided as a per-API-shape function rather
|
|
// than an interface on the request type so the schema package does
|
|
// not have to depend on pii. Each route registration passes the
|
|
// adapter that knows its request format.
|
|
//
|
|
// The middleware calls Scan once per request and Apply once with
|
|
// every span the redactor returned. updates are guaranteed to share
|
|
// indices the adapter previously returned from Scan; the adapter
|
|
// must not assume input order matches scan order.
|
|
type Adapter struct {
|
|
Scan func(parsed any) []ScannedText
|
|
Apply func(parsed any, updates []ScannedText)
|
|
}
|
|
|
|
// RequestMiddleware applies the regex PII tier to incoming chat
|
|
// requests. If the parsed request is not a MessageScanner (e.g.,
|
|
// non-chat endpoints registered against the same group later), the
|
|
// middleware passes through.
|
|
//
|
|
// - On match with action=block: the request is rejected with 400 and
|
|
// a PIIEvent is recorded. The matched value is never echoed back
|
|
// to the client.
|
|
// - On match with action=mask: the redacted text replaces the
|
|
// original on the parsed request. PIIEvents are recorded.
|
|
// - On match with action=allow: the original text is left intact; a
|
|
// PIIEvent is still recorded so the detection is auditable.
|
|
//
|
|
// recorder is the Recorder on which to record events; nil disables
|
|
// recording (the redaction still happens). fallbackUser supplies the
|
|
// no-auth identity. The middleware writes ctxKeyPIIEventID on the echo
|
|
// context so the usage middleware can later cross-reference the event
|
|
// with the UsageRecord.
|
|
func RequestMiddleware(redactor *Redactor, store EventStore, adapter Adapter, fallbackUser *auth.User, opts ...Option) echo.MiddlewareFunc {
|
|
var o mwOptions
|
|
for _, opt := range opts {
|
|
opt(&o)
|
|
}
|
|
return func(next echo.HandlerFunc) echo.HandlerFunc {
|
|
return func(c echo.Context) error {
|
|
if redactor == nil || adapter.Scan == nil {
|
|
return next(c)
|
|
}
|
|
|
|
// Per-model gating: redaction is opt-in per model. The policy
|
|
// resolver (when wired) layers instance-wide defaults over the
|
|
// per-model config; otherwise we read the per-model config
|
|
// directly. A missing config (non-chat routes, or middleware
|
|
// wired before SetModelAndConfig) or a not-enabled result passes
|
|
// through.
|
|
rawCfg := c.Get(ctxKeyModelConfig)
|
|
var enabled bool
|
|
var detectors []string
|
|
if o.policyResolver != nil {
|
|
enabled, detectors = o.policyResolver(rawCfg)
|
|
} else if cfg, ok := rawCfg.(ModelPIIConfig); ok {
|
|
enabled, detectors = cfg.PIIIsEnabled(), cfg.PIIDetectors()
|
|
}
|
|
if !enabled {
|
|
return next(c)
|
|
}
|
|
|
|
parsed := c.Get(ctxKeyParsedRequest)
|
|
if parsed == nil {
|
|
return next(c)
|
|
}
|
|
|
|
// A PII-enabled model with no detectors (or no resolver wired)
|
|
// has nothing to scan with — pass through.
|
|
if len(detectors) == 0 || o.nerResolver == nil {
|
|
return next(c)
|
|
}
|
|
|
|
user := auth.GetUser(c)
|
|
if user == nil {
|
|
user = fallbackUser
|
|
}
|
|
userID := ""
|
|
if user != nil {
|
|
userID = user.ID
|
|
}
|
|
correlationID, _ := c.Get(ctxKeyCorrelationID).(string)
|
|
|
|
// Resolve each named detector to its NERConfig (detector +
|
|
// the policy from that model's own pii_detection block). A
|
|
// configured detector that can't be resolved fails closed:
|
|
// serving the request without the semantic check the operator
|
|
// asked for is exactly the leak this tier exists to prevent.
|
|
cfgs := make([]NERConfig, 0, len(detectors))
|
|
for _, name := range detectors {
|
|
nc, ok := o.nerResolver(name)
|
|
if !ok {
|
|
xlog.Error("pii: configured detector model could not be resolved; blocking request (fail-closed)", "detector", name)
|
|
return blockNERUnavailable(c, store, correlationID, userID)
|
|
}
|
|
cfgs = append(cfgs, nc)
|
|
}
|
|
|
|
texts := adapter.Scan(parsed)
|
|
updates := make([]ScannedText, 0, len(texts))
|
|
var blocked bool
|
|
var firstEventID string
|
|
|
|
// Scan the request as ONE document (messages joined) so the NER
|
|
// tier keeps conversational context — whether "4421" is a PIN is
|
|
// decided by the question in the previous message. The spans come
|
|
// back per message with local offsets for in-place rewriting.
|
|
segTexts := make([]string, len(texts))
|
|
for i, st := range texts {
|
|
segTexts[i] = st.Text
|
|
}
|
|
// Fail closed: a detector outage at request time must NOT
|
|
// silently serve the request. The NER tier was explicitly
|
|
// configured for this model, so the semantic check is part
|
|
// of the contract.
|
|
segResults, nerErr := RedactNERSegments(c.Request().Context(), segTexts, cfgs)
|
|
if nerErr != nil {
|
|
xlog.Error("pii: NER detector failed; blocking request (fail-closed)", "error", nerErr)
|
|
return blockNERUnavailable(c, store, correlationID, userID)
|
|
}
|
|
|
|
for i, res := range segResults {
|
|
st := texts[i]
|
|
if len(res.Spans) == 0 {
|
|
continue
|
|
}
|
|
|
|
// Persist one event per detected span. The action recorded
|
|
// is the one that actually fired (carried on the span after
|
|
// the overlap merge), so the events log reflects what
|
|
// happened to the request.
|
|
for _, span := range res.Spans {
|
|
ev := PIIEvent{
|
|
ID: newEventID(),
|
|
Origin: OriginMiddleware,
|
|
CorrelationID: correlationID,
|
|
UserID: userID,
|
|
Direction: DirectionIn,
|
|
PatternID: span.Pattern,
|
|
ByteOffset: span.Start,
|
|
Length: span.End - span.Start,
|
|
HashPrefix: span.HashPrefix,
|
|
Action: span.Action,
|
|
Score: span.Score,
|
|
CreatedAt: time.Now().UTC(),
|
|
}
|
|
if firstEventID == "" {
|
|
firstEventID = ev.ID
|
|
}
|
|
if store != nil {
|
|
if err := store.Record(context.Background(), ev); err != nil {
|
|
xlog.Error("pii: failed to record event", "error", err, "pattern", span.Pattern)
|
|
}
|
|
}
|
|
// Contract: every span must produce an event.
|
|
contract.Invariant(
|
|
"pii.event_per_span",
|
|
span.Pattern != "" && ev.PatternID != "",
|
|
"correlation", correlationID, "pattern", span.Pattern,
|
|
)
|
|
}
|
|
|
|
if res.Blocked {
|
|
blocked = true
|
|
}
|
|
updates = append(updates, ScannedText{Index: st.Index, Text: res.Redacted})
|
|
}
|
|
|
|
if blocked {
|
|
return c.JSON(http.StatusBadRequest, map[string]any{
|
|
"error": map[string]string{
|
|
"message": "request blocked by content policy (sensitive data detected)",
|
|
"type": "pii_blocked",
|
|
},
|
|
"correlation_id": correlationID,
|
|
"pii_event_id": firstEventID,
|
|
})
|
|
}
|
|
|
|
if len(updates) > 0 && adapter.Apply != nil {
|
|
adapter.Apply(parsed, updates)
|
|
}
|
|
if firstEventID != "" {
|
|
c.Set(ctxKeyPIIEventID, firstEventID)
|
|
}
|
|
return next(c)
|
|
}
|
|
}
|
|
}
|
|
|
|
// nerUnavailablePattern is the sentinel PatternID recorded on the
|
|
// fail-closed audit event when a model's configured NER tier cannot
|
|
// run. It is not a real regex pattern — it marks a request blocked
|
|
// because the encoder/NER check was unavailable (model unresolved or
|
|
// backend error), so the events log distinguishes it from a content
|
|
// block (which carries a real pattern ID).
|
|
const nerUnavailablePattern = "__ner_unavailable__"
|
|
|
|
// blockNERUnavailable records a fail-closed audit event and returns the
|
|
// response used when a model has an NER tier configured but it could
|
|
// not run. Failing closed is deliberate for a PII filter: if the
|
|
// semantic check the operator asked for cannot execute, refusing the
|
|
// request is safer than serving it with only the cheap regex tier. The
|
|
// 503 (vs the 400 used for a content block) tells clients and operators
|
|
// this was a dependency outage, not sensitive data in the request.
|
|
func blockNERUnavailable(c echo.Context, store EventStore, correlationID, userID string) error {
|
|
ev := PIIEvent{
|
|
ID: newEventID(),
|
|
Kind: KindPII,
|
|
Origin: OriginMiddleware,
|
|
CorrelationID: correlationID,
|
|
UserID: userID,
|
|
Direction: DirectionIn,
|
|
PatternID: nerUnavailablePattern,
|
|
Action: ActionBlock,
|
|
CreatedAt: time.Now().UTC(),
|
|
}
|
|
if store != nil {
|
|
if err := store.Record(context.Background(), ev); err != nil {
|
|
xlog.Error("pii: failed to record NER-unavailable event", "error", err)
|
|
}
|
|
}
|
|
c.Set(ctxKeyPIIEventID, ev.ID)
|
|
return c.JSON(http.StatusServiceUnavailable, map[string]any{
|
|
"error": map[string]string{
|
|
"message": "request blocked: PII NER check is configured but unavailable",
|
|
"type": "pii_ner_unavailable",
|
|
},
|
|
"correlation_id": correlationID,
|
|
"pii_event_id": ev.ID,
|
|
})
|
|
}
|
|
|
|
// validAction converts a raw YAML action string to the typed Action,
|
|
// returning "" for anything that isn't a known action.
|
|
func validAction(raw string) Action {
|
|
switch Action(raw) {
|
|
case ActionMask, ActionBlock, ActionAllow:
|
|
return Action(raw)
|
|
default:
|
|
return ""
|
|
}
|
|
}
|
|
|
|
// validActionOr is validAction with a fallback for empty/invalid input.
|
|
func validActionOr(raw string, fallback Action) Action {
|
|
if a := validAction(raw); a != "" {
|
|
return a
|
|
}
|
|
return fallback
|
|
}
|
|
|
|
// validActions converts a raw entity-group->action map to typed
|
|
// Actions, dropping (and logging) unknown actions so a model YAML typo
|
|
// is ignored rather than taking the request down — mirroring how the
|
|
// per-pattern overrides are validated above.
|
|
func validActions(raw map[string]string) map[string]Action {
|
|
if len(raw) == 0 {
|
|
return nil
|
|
}
|
|
out := make(map[string]Action, len(raw))
|
|
for group, action := range raw {
|
|
if a := validAction(action); a != "" {
|
|
out[group] = a
|
|
} else {
|
|
xlog.Warn("pii: ignoring unknown NER entity action", "group", group, "action", action)
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
func newEventID() string {
|
|
var b [12]byte
|
|
_, _ = rand.Read(b[:])
|
|
return "pii_" + hex.EncodeToString(b[:])
|
|
}
|
|
|
|
// NewEventID mints a fresh random event id in the package's standard shape.
|
|
// Exported so callers outside this package (the analyze/redact API handlers)
|
|
// record events with ids indistinguishable from the in-band middleware's.
|
|
func NewEventID() string { return newEventID() }
|