feat(pii): NER tier engine — privacy-filter.cpp backend + NER-centric PII filter (#10360)

Squashed feat/pii-ner-tier-engine rebased onto master (was 45 commits; see
backup/pii-ner-tier-engine-prerebase). Net change:

- privacy-filter.cpp: standalone GGML engine for the openai-privacy-filter
  PII/NER token classifier, wired as a LocalAI gRPC backend (CPU/CUDA/Vulkan).
  TokenClassify moves off the patched llama.cpp path onto this backend.
- PII filter reworked to be NER-centric (encoder/NER detection tier scanning
  whole conversations as one document), with a recreated bounded restricted-
  regex secret-matching pattern detector tier alongside it (per-model
  pii_detection.builtins / .patterns + core/services/routing/piipattern).
- Detection labelled by source (ner vs pattern); backend trace / confidence /
  debug observability; analyze/redact exposed as a synchronous API.
- Instance-wide default detector policy + per-usecase default-on; request
  filtering extended to completions, embeddings, edits & Ollama.
- React UI: NER-centric PII editor, detector-models table, pattern/builtins
  editor, middleware default-policy UI.
- Gallery: privacy-filter-multilingual token-classify model + NER install
  filter; token_classify known_usecase; batch sized to context for NER models.
  privacy-filter backend registered in the backend gallery (cpu/vulkan/cuda-13
  meta + image entries with a capabilities map) matching its CI matrix jobs,
  and an /import-model auto-detect importer (PrivacyFilterImporter, narrow
  privacy-filter GGUF detection) replacing the prior pref-only registration.

Reconciled against master's independent evolution:

- Dropped master's PIIPatternOverrides feature (global-pattern runtime
  overrides + /api/pii/patterns API + runtime_settings.json persistence). The
  per-model NER + pattern-detector design supersedes it; it was built on the
  global redactor pattern set this branch replaced.
- Reverted the llama.cpp Score carry-patch (0006-server-task-type-score):
  removed the patch and restored master's grpc-server.cpp Score RPC (direct
  llama_decode, slot-loop bypass) and LLAMA_VERSION pin, plus master's
  model_config validation forbidding score + chat/completion/embeddings on
  llama-cpp. token_classify is unaffected (it runs on the privacy-filter
  backend, not llama-cpp).

Assisted-by: Claude:claude-opus-4-8 [Claude Code]

Signed-off-by: Richard Palethorpe <io@richiejp.com>
This commit is contained in:
Richard Palethorpe
2026-06-18 11:45:22 +01:00
committed by GitHub
parent c133ca39dc
commit 3fa7b2955c
134 changed files with 6671 additions and 4223 deletions

View File

@@ -12,14 +12,15 @@ import (
"github.com/mudler/LocalAI/core/http/auth"
mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp"
"github.com/mudler/LocalAI/core/services/agentpool"
"github.com/mudler/LocalAI/core/services/cloudproxy/mitm"
"github.com/mudler/LocalAI/core/services/facerecognition"
"github.com/mudler/LocalAI/core/services/galleryop"
"github.com/mudler/LocalAI/core/services/monitoring"
"github.com/mudler/LocalAI/core/services/nodes"
"github.com/mudler/LocalAI/core/services/routing/admission"
"github.com/mudler/LocalAI/core/services/routing/billing"
"github.com/mudler/LocalAI/core/services/cloudproxy/mitm"
"github.com/mudler/LocalAI/core/services/routing/pii"
"github.com/mudler/LocalAI/core/services/routing/piidetector"
"github.com/mudler/LocalAI/core/services/routing/router"
"github.com/mudler/LocalAI/core/services/voicerecognition"
"github.com/mudler/LocalAI/core/templates"
@@ -71,15 +72,15 @@ type Application struct {
// 1-to-1 host↔model invariant the dispatcher relies on. Read by
// /api/middleware/status so the admin UI can surface the cause.
mitmHostConflicts atomic.Pointer[map[string][]string]
routerDecisions router.DecisionStore
routerRegistry *router.Registry
admissionLimiter *admission.Limiter
watchdogMutex sync.Mutex
watchdogStop chan bool
p2pMutex sync.Mutex
p2pCtx context.Context
p2pCancel context.CancelFunc
agentJobMutex sync.Mutex
routerDecisions router.DecisionStore
routerRegistry *router.Registry
admissionLimiter *admission.Limiter
watchdogMutex sync.Mutex
watchdogStop chan bool
p2pMutex sync.Mutex
p2pCtx context.Context
p2pCancel context.CancelFunc
agentJobMutex sync.Mutex
// Distributed mode services (nil when not in distributed mode)
distributed *DistributedServices
@@ -254,6 +255,122 @@ func (a *Application) PIIEvents() pii.EventStore {
return a.piiEvents
}
// PIINERResolver returns the resolver the chat PII middleware uses to
// turn a configured detector model name into a ready-to-use NERConfig:
// a token-classifier bound over the shared model loader (lazy — the
// model loads on first Detect) plus the detection policy read from that
// model's own pii_detection block. Unknown names resolve to (zero,
// false) so the middleware fails closed. Pass it via pii.WithNERResolver.
func (a *Application) PIINERResolver() pii.NERDetectorResolver {
return func(modelName string) (pii.NERConfig, bool) {
if modelName == "" {
return pii.NERConfig{}, false
}
cfg, ok := a.ModelConfigLoader().GetModelConfig(modelName)
if !ok {
return pii.NERConfig{}, false
}
// Pattern detectors match secrets with the restricted-regex tier
// in-process (no backend load). Build a pattern matcher instead of the
// gRPC token-classifier; on a compile error fail closed with an error
// detector so the request is blocked, not silently unscanned.
if cfg.IsPatternDetector() {
det, err := piidetector.NewPattern(cfg, a.ApplicationConfig())
if err != nil {
det = pii.NewErrNERDetector(err.Error())
}
return pii.NERConfigFromRaw(
det,
0, // patterns are deterministic — no confidence floor
cfg.PIIDetectionDefaultAction(),
patternEntityActions(cfg),
pii.SourcePattern,
), true
}
det := piidetector.New(a.ModelLoader(), cfg, a.ApplicationConfig())
return pii.NERConfigFromRaw(
det,
cfg.PIIDetectionMinScore(),
cfg.PIIDetectionDefaultAction(),
cfg.PIIDetectionEntityActions(),
pii.SourceNER,
), true
}
}
// patternEntityActions merges a pattern detector's per-pattern Action overrides
// into its entity_actions map. A pattern reports matches under its Name, so a
// per-pattern action is just an entity_actions[Name] entry; explicit
// entity_actions still win if both are set.
func patternEntityActions(cfg config.ModelConfig) map[string]string {
out := cfg.PIIDetectionEntityActions()
for _, p := range cfg.PIIDetection.Patterns {
if p.Action == "" || p.Name == "" {
continue
}
if out == nil {
out = map[string]string{}
}
if _, exists := out[p.Name]; !exists {
out[p.Name] = p.Action
}
}
return out
}
// ResolvePIIPolicy resolves the effective request-side PII policy for a
// consuming model, layering the instance-wide default detector
// (PIIDefaultDetectors, set via POST /api/settings) on top of the per-model
// config. It is the single decision point shared by the chat middleware (via
// WithPolicyResolver) and the MITM listener so both agree.
//
// - enabled: an explicit pii.enabled on the model always wins (true OR
// false). Otherwise PII is on when the backend defaults it on — today
// that means cloud-proxy models, which cross the network to a third party.
// - detectors: the model's own pii.detectors, or — when it lists none — the
// global PIIDefaultDetectors fallback. This is what makes cloud-proxy/MITM
// redaction work out of the box.
//
// appConfig is read live, so changes via the settings API take effect on the
// next request without a restart.
func (a *Application) ResolvePIIPolicy(cfg *config.ModelConfig) (enabled bool, detectors []string) {
if cfg == nil {
return false, nil
}
appCfg := a.ApplicationConfig()
if cfg.PII.Enabled != nil {
enabled = *cfg.PII.Enabled
} else {
enabled = cfg.PIIIsEnabled() // backend default (cloud-proxy)
}
if !enabled {
return false, nil
}
detectors = cfg.PIIDetectors()
if len(detectors) == 0 {
detectors = append([]string(nil), appCfg.PIIDefaultDetectors...)
}
return enabled, detectors
}
// PIIPolicyResolver adapts ResolvePIIPolicy to pii.PolicyResolver for
// pii.WithPolicyResolver. The middleware carries the resolved model config as
// `any` (the MODEL_CONFIG context value, a *config.ModelConfig); this asserts
// it back and applies the instance-wide defaults.
func (a *Application) PIIPolicyResolver() pii.PolicyResolver {
return func(modelCfg any) (bool, []string) {
cfg, ok := modelCfg.(*config.ModelConfig)
if !ok {
return false, nil
}
return a.ResolvePIIPolicy(cfg)
}
}
// MITMCA returns the cloudproxy MITM proxy's CA, or nil when the
// MITM listener is disabled.
func (a *Application) MITMCA() *mitm.CA { return a.mitmCA.Load() }

View File

@@ -8,6 +8,7 @@ import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/services/cloudproxy/mitm"
"github.com/mudler/LocalAI/core/services/routing/pii"
"github.com/mudler/xlog"
)
@@ -91,25 +92,41 @@ func startMITMLocked(app *Application, options *config.ApplicationConfig) error
}
sort.Strings(effectiveHosts)
// Per-host PII gate inherits from the owning model's pii.enabled.
// A non-cloud-proxy backend with no explicit pii.enabled resolves
// to false → host is intercepted but the regex pass is skipped
// (audit events still record).
var piiDisabled []string
// Per-host NER detectors come from the owning model's pii.detectors
// (resolved against each detector model's pii_detection policy). A
// host whose model has pii.enabled=false, lists no detectors, or
// whose detectors can't be resolved gets no entry → it is intercepted
// and forwarded unredacted (audit events still record traffic). An
// unresolvable detector is recorded as an error-detector so the
// request fails closed at request time rather than leaking.
resolver := app.PIINERResolver()
detectorsByHost := map[string][]pii.NERConfig{}
for host, modelName := range ownership.Owners {
cfg, exists := app.backendLoader.GetModelConfig(modelName)
if !exists {
continue
}
if !cfg.PIIIsEnabled() {
piiDisabled = append(piiDisabled, host)
// Resolve through the shared policy so cloud-proxy hosts inherit the
// instance-wide default detector when they name none of their own.
enabled, detectors := app.ResolvePIIPolicy(&cfg)
if !enabled || len(detectors) == 0 {
continue
}
cfgs := make([]pii.NERConfig, 0, len(detectors))
for _, name := range detectors {
nc, ok := resolver(name)
if !ok {
xlog.Error("mitm: detector model not resolvable; requests to host will fail closed", "host", host, "detector", name)
nc = pii.NERConfig{Detector: pii.NewErrNERDetector("detector model '" + name + "' not resolvable")}
}
cfgs = append(cfgs, nc)
}
detectorsByHost[host] = cfgs
}
handler := mitm.NewPIIHandler(mitm.PIIHandlerOptions{
Redactor: app.piiRedactor,
EventStore: app.piiEvents,
HostsWithPIIDisabled: piiDisabled,
EventStore: app.piiEvents,
DetectorsByHost: detectorsByHost,
})
srv, err := mitm.NewServer(mitm.Config{
@@ -132,7 +149,7 @@ func startMITMLocked(app *Application, options *config.ApplicationConfig) error
"ca_dir", caDir,
"intercept_hosts", effectiveHosts,
"model_owned_hosts", len(ownership.Owners),
"pii_disabled_hosts", len(piiDisabled),
"pii_detector_hosts", len(detectorsByHost),
)
return nil
}

View File

@@ -0,0 +1,51 @@
package application
import (
"github.com/mudler/LocalAI/core/config"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("ResolvePIIPolicy", func() {
chat := config.FLAG_CHAT
bp := func(b bool) *bool { return &b }
mk := func(c *config.ApplicationConfig) *Application {
return &Application{applicationConfig: c}
}
It("lets an explicit pii.enabled=false win over the global default detector", func() {
app := mk(&config.ApplicationConfig{PIIDefaultDetectors: []string{"pf"}})
cfg := &config.ModelConfig{Backend: "cloud-proxy", KnownUsecases: &chat}
cfg.PII.Enabled = bp(false)
enabled, dets := app.ResolvePIIPolicy(cfg)
Expect(enabled).To(BeFalse())
Expect(dets).To(BeNil())
})
It("enables a cloud-proxy model with the global default detector (closes the no-op gap)", func() {
// cloud-proxy defaults PIIIsEnabled()==true but lists no detectors, so
// without a global default it scans with nothing.
app := mk(&config.ApplicationConfig{PIIDefaultDetectors: []string{"pf"}})
cfg := &config.ModelConfig{Backend: "cloud-proxy"}
enabled, dets := app.ResolvePIIPolicy(cfg)
Expect(enabled).To(BeTrue())
Expect(dets).To(Equal([]string{"pf"}))
})
It("leaves a non-cloud model off by default (no instance usecase default-on)", func() {
app := mk(&config.ApplicationConfig{PIIDefaultDetectors: []string{"pf"}})
cfg := &config.ModelConfig{Backend: "llama-cpp", KnownUsecases: &chat}
enabled, _ := app.ResolvePIIPolicy(cfg)
Expect(enabled).To(BeFalse())
})
It("prefers the model's own detectors over the global default", func() {
app := mk(&config.ApplicationConfig{PIIDefaultDetectors: []string{"global-pf"}})
cfg := &config.ModelConfig{Backend: "cloud-proxy"}
cfg.PII.Detectors = []string{"own-pf"}
enabled, dets := app.ResolvePIIPolicy(cfg)
Expect(enabled).To(BeTrue())
Expect(dets).To(Equal([]string{"own-pf"}))
})
})

View File

@@ -53,7 +53,6 @@ func New(opts ...config.AppOption) (*Application, error) {
caps, err := xsysinfo.CPUCapabilities()
if err == nil {
xlog.Debug("CPU capabilities", "capabilities", caps)
}
gpus, err := xsysinfo.GPUs()
if err == nil {
@@ -68,18 +67,18 @@ func New(opts ...config.AppOption) (*Application, error) {
return nil, fmt.Errorf("models path cannot be empty")
}
err = os.MkdirAll(options.SystemState.Model.ModelsPath, 0750)
err = os.MkdirAll(options.SystemState.Model.ModelsPath, 0o750)
if err != nil {
return nil, fmt.Errorf("unable to create ModelPath: %q", err)
}
if options.GeneratedContentDir != "" {
err := os.MkdirAll(options.GeneratedContentDir, 0750)
err := os.MkdirAll(options.GeneratedContentDir, 0o750)
if err != nil {
return nil, fmt.Errorf("unable to create ImageDir: %q", err)
}
}
if options.UploadDir != "" {
err := os.MkdirAll(options.UploadDir, 0750)
err := os.MkdirAll(options.UploadDir, 0o750)
if err != nil {
return nil, fmt.Errorf("unable to create UploadDir: %q", err)
}
@@ -87,7 +86,7 @@ func New(opts ...config.AppOption) (*Application, error) {
// Create and migrate data directory
if options.DataPath != "" {
if err := os.MkdirAll(options.DataPath, 0750); err != nil {
if err := os.MkdirAll(options.DataPath, 0o750); err != nil {
return nil, fmt.Errorf("unable to create DataPath: %q", err)
}
// Migrate data from DynamicConfigsDir to DataPath if needed
@@ -192,44 +191,14 @@ func New(opts ...config.AppOption) (*Application, error) {
xlog.Info("stats: disabled by --disable-stats")
}
// Wire the regex PII filter. Default-on: a single-user box gets
// the built-in pattern set the first time it starts, with email/
// phone/SSN/credit-card on mask and api_key_prefix on block. If
// the operator wants different actions, --pii-config points at a
// YAML file that overrides per-id; --disable-pii turns it off
// entirely.
if !options.DisablePII {
patterns, err := pii.LoadConfig(options.PIIConfigPath)
if err != nil {
return nil, fmt.Errorf("pii config: %w", err)
}
application.piiRedactor = pii.NewRedactor(patterns)
application.piiEvents = pii.NewMemoryEventStore(0)
// Apply persisted per-pattern overrides — admins toggling
// action/disabled via the UI and clicking "Save to disk" land
// here on the next start. Bad ids are warned and ignored so a
// stale entry doesn't block startup.
for id, ov := range options.PIIPatternOverrides {
if ov.Action != nil {
if err := application.piiRedactor.SetAction(id, pii.Action(*ov.Action)); err != nil {
xlog.Warn("pii: persisted override skipped", "pattern", id, "error", err)
continue
}
}
if ov.Disabled != nil {
if err := application.piiRedactor.SetDisabled(id, *ov.Disabled); err != nil {
xlog.Warn("pii: persisted disable skipped", "pattern", id, "error", err)
}
}
}
xlog.Info("pii: filter enabled",
"patterns", len(patterns),
"config_path", options.PIIConfigPath,
"persisted_overrides", len(options.PIIPatternOverrides),
)
} else {
xlog.Info("pii: disabled by --disable-pii")
}
// Wire the PII filter subsystem. The redactor is now a stateless
// handle — detection is driven by per-model NER detectors
// (pii.detectors → the detector model's pii_detection policy), run
// request-side by the chat middleware and the MITM input path. The
// regex tier was removed; redaction is opt-in per model via
// PIIIsEnabled(). The event store backs the /api/pii/events audit log.
application.piiRedactor = &pii.Redactor{}
application.piiEvents = pii.NewMemoryEventStore(0)
// Wire the routing decision log. Always-on when stats are enabled —
// the per-router admin page reads this as the live activity feed
@@ -517,7 +486,7 @@ func startWatcher(options *config.ApplicationConfig) {
if _, err := os.Stat(options.DynamicConfigsDir); err != nil {
if os.IsNotExist(err) {
// We try to create the directory if it does not exist and was specified
if err := os.MkdirAll(options.DynamicConfigsDir, 0700); err != nil {
if err := os.MkdirAll(options.DynamicConfigsDir, 0o700); err != nil {
xlog.Error("failed creating DynamicConfigsDir", "error", err)
}
} else {
@@ -764,16 +733,6 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
options.MITMListen = *settings.MITMListen
}
// PII pattern overrides — file is the only source; CLI flags don't
// reach into this map. Apply unconditionally when present; the
// redactor wiring below sees the result on first construction.
if settings.PIIPatternOverrides != nil {
options.PIIPatternOverrides = make(map[string]config.PIIPatternRuntimeOverride, len(*settings.PIIPatternOverrides))
for id, ov := range *settings.PIIPatternOverrides {
options.PIIPatternOverrides[id] = ov
}
}
// Backend upgrade flags
if settings.AutoUpgradeBackends != nil {
if !options.AutoUpgradeBackends {
@@ -924,7 +883,7 @@ func loadOrGenerateHMACSecret(path string) (string, error) {
}
secret := hex.EncodeToString(b)
if err := os.WriteFile(path, []byte(secret), 0600); err != nil {
if err := os.WriteFile(path, []byte(secret), 0o600); err != nil {
return "", fmt.Errorf("failed to persist HMAC secret: %w", err)
}