mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-18 21:58:58 -04:00
feat(pii): NER tier engine — privacy-filter.cpp backend + NER-centric PII filter (#10360)
Squashed feat/pii-ner-tier-engine rebased onto master (was 45 commits; see backup/pii-ner-tier-engine-prerebase). Net change: - privacy-filter.cpp: standalone GGML engine for the openai-privacy-filter PII/NER token classifier, wired as a LocalAI gRPC backend (CPU/CUDA/Vulkan). TokenClassify moves off the patched llama.cpp path onto this backend. - PII filter reworked to be NER-centric (encoder/NER detection tier scanning whole conversations as one document), with a recreated bounded restricted- regex secret-matching pattern detector tier alongside it (per-model pii_detection.builtins / .patterns + core/services/routing/piipattern). - Detection labelled by source (ner vs pattern); backend trace / confidence / debug observability; analyze/redact exposed as a synchronous API. - Instance-wide default detector policy + per-usecase default-on; request filtering extended to completions, embeddings, edits & Ollama. - React UI: NER-centric PII editor, detector-models table, pattern/builtins editor, middleware default-policy UI. - Gallery: privacy-filter-multilingual token-classify model + NER install filter; token_classify known_usecase; batch sized to context for NER models. privacy-filter backend registered in the backend gallery (cpu/vulkan/cuda-13 meta + image entries with a capabilities map) matching its CI matrix jobs, and an /import-model auto-detect importer (PrivacyFilterImporter, narrow privacy-filter GGUF detection) replacing the prior pref-only registration. Reconciled against master's independent evolution: - Dropped master's PIIPatternOverrides feature (global-pattern runtime overrides + /api/pii/patterns API + runtime_settings.json persistence). The per-model NER + pattern-detector design supersedes it; it was built on the global redactor pattern set this branch replaced. - Reverted the llama.cpp Score carry-patch (0006-server-task-type-score): removed the patch and restored master's grpc-server.cpp Score RPC (direct llama_decode, slot-loop bypass) and LLAMA_VERSION pin, plus master's model_config validation forbidding score + chat/completion/embeddings on llama-cpp. token_classify is unaffected (it runs on the privacy-filter backend, not llama-cpp). Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Richard Palethorpe <io@richiejp.com>
This commit is contained in:
committed by
GitHub
parent
c133ca39dc
commit
3fa7b2955c
@@ -6,12 +6,13 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strings"
|
||||
|
||||
"dario.cat/mergo"
|
||||
"gopkg.in/yaml.v3"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/config/meta"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
@@ -114,9 +115,7 @@ func (s *ConfigService) PatchConfig(_ context.Context, name string, patch map[st
|
||||
if existingMap == nil {
|
||||
existingMap = map[string]any{}
|
||||
}
|
||||
if err := mergo.Merge(&existingMap, patch, mergo.WithOverride); err != nil {
|
||||
return nil, fmt.Errorf("merge configs: %w", err)
|
||||
}
|
||||
patchMerge(existingMap, patch, mapLeafFieldPaths(), "")
|
||||
yamlData, err := yaml.Marshal(existingMap)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("marshal merged YAML: %w", err)
|
||||
@@ -142,6 +141,55 @@ func (s *ConfigService) PatchConfig(_ context.Context, name string, patch map[st
|
||||
return &updated, nil
|
||||
}
|
||||
|
||||
// mapLeafFieldPaths returns the set of dotted config paths whose schema type is
|
||||
// a map that the editor edits as one complete value (e.g.
|
||||
// pii_detection.entity_actions, roles, engine_args). A PATCH must REPLACE these
|
||||
// wholesale rather than union them: the deep-merge only adds and overrides
|
||||
// keys, so a map entry the admin deleted in the editor would otherwise silently
|
||||
// survive. Derived from the config schema so it stays correct as map fields are
|
||||
// added. (UIType comes from reflection, independent of any registry override.)
|
||||
func mapLeafFieldPaths() map[string]struct{} {
|
||||
md := meta.BuildConfigMetadata(reflect.TypeFor[config.ModelConfig]())
|
||||
out := make(map[string]struct{})
|
||||
for _, f := range md.Fields {
|
||||
if f.UIType == "map" {
|
||||
out[f.Path] = struct{}{}
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// patchMerge deep-merges src into dst with the same shape as the previous
|
||||
// mergo.WithOverride behaviour — scalars and slices replace; nested
|
||||
// struct-maps (e.g. pii_detection, parameters) recurse so unknown sibling keys
|
||||
// the editor doesn't model survive — EXCEPT that any path in mapLeaves is
|
||||
// replaced wholesale, and removed when the patch sets it empty, so deletions
|
||||
// inside a map field persist to disk.
|
||||
func patchMerge(dst, src map[string]any, mapLeaves map[string]struct{}, prefix string) {
|
||||
for k, sv := range src {
|
||||
path := k
|
||||
if prefix != "" {
|
||||
path = prefix + "." + k
|
||||
}
|
||||
if _, isLeaf := mapLeaves[path]; isLeaf {
|
||||
if m, ok := sv.(map[string]any); ok && len(m) == 0 {
|
||||
delete(dst, k) // emptied map field -> drop it from the YAML
|
||||
} else {
|
||||
dst[k] = sv
|
||||
}
|
||||
continue
|
||||
}
|
||||
// Recurse into struct-like nesting so dst-only sibling keys survive.
|
||||
if sm, ok := sv.(map[string]any); ok {
|
||||
if dm, ok2 := dst[k].(map[string]any); ok2 {
|
||||
patchMerge(dm, sm, mapLeaves, path)
|
||||
continue
|
||||
}
|
||||
}
|
||||
dst[k] = sv
|
||||
}
|
||||
}
|
||||
|
||||
// EditYAML replaces the YAML for an installed model, with optional rename
|
||||
// support. ml may be nil; when set, EditYAML calls ml.ShutdownModel(oldName)
|
||||
// after a successful write so the next inference picks up the new config.
|
||||
|
||||
@@ -107,6 +107,64 @@ var _ = Describe("ConfigService", func() {
|
||||
_, err := svc.PatchConfig(ctx, "qwen", map[string]any{})
|
||||
Expect(err).To(MatchError(ErrEmptyBody))
|
||||
})
|
||||
|
||||
It("replaces a map field wholesale so deleted entries do not survive", func() {
|
||||
// A detector model with a populated entity_actions map. The editor
|
||||
// removes SSN and re-sends the remaining map; a naive deep-merge
|
||||
// would re-add SSN (it only adds/overrides keys, never deletes).
|
||||
writeModelYAML(svc, dir, "ner", map[string]any{
|
||||
"backend": "llama-cpp",
|
||||
"known_usecases": []any{"token_classify"},
|
||||
"pii_detection": map[string]any{
|
||||
"default_action": "mask",
|
||||
"entity_actions": map[string]any{"SSN": "block", "EMAIL": "mask"},
|
||||
},
|
||||
})
|
||||
|
||||
_, err := svc.PatchConfig(ctx, "ner", map[string]any{
|
||||
"pii_detection": map[string]any{
|
||||
"default_action": "mask",
|
||||
"entity_actions": map[string]any{"EMAIL": "mask"},
|
||||
},
|
||||
})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
raw, err := os.ReadFile(filepath.Join(dir, "ner.yaml"))
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
var got map[string]any
|
||||
Expect(yaml.Unmarshal(raw, &got)).To(Succeed())
|
||||
pii := got["pii_detection"].(map[string]any)
|
||||
ea := pii["entity_actions"].(map[string]any)
|
||||
Expect(ea).To(HaveKeyWithValue("EMAIL", "mask"))
|
||||
Expect(ea).NotTo(HaveKey("SSN"), "deleted map entry must not survive the patch")
|
||||
// The scalar sibling in the same nested block is still preserved.
|
||||
Expect(pii).To(HaveKeyWithValue("default_action", "mask"))
|
||||
})
|
||||
|
||||
It("drops a map field entirely when the patch empties it", func() {
|
||||
writeModelYAML(svc, dir, "ner", map[string]any{
|
||||
"backend": "llama-cpp",
|
||||
"known_usecases": []any{"token_classify"},
|
||||
"pii_detection": map[string]any{
|
||||
"default_action": "mask",
|
||||
"entity_actions": map[string]any{"SSN": "block"},
|
||||
},
|
||||
})
|
||||
|
||||
_, err := svc.PatchConfig(ctx, "ner", map[string]any{
|
||||
"pii_detection": map[string]any{
|
||||
"entity_actions": map[string]any{},
|
||||
},
|
||||
})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
raw, err := os.ReadFile(filepath.Join(dir, "ner.yaml"))
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
var got map[string]any
|
||||
Expect(yaml.Unmarshal(raw, &got)).To(Succeed())
|
||||
pii := got["pii_detection"].(map[string]any)
|
||||
Expect(pii).NotTo(HaveKey("entity_actions"))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("EditYAML", func() {
|
||||
|
||||
Reference in New Issue
Block a user