mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-18 21:58:58 -04:00
feat(pii): NER tier engine — privacy-filter.cpp backend + NER-centric PII filter (#10360)
Squashed feat/pii-ner-tier-engine rebased onto master (was 45 commits; see backup/pii-ner-tier-engine-prerebase). Net change: - privacy-filter.cpp: standalone GGML engine for the openai-privacy-filter PII/NER token classifier, wired as a LocalAI gRPC backend (CPU/CUDA/Vulkan). TokenClassify moves off the patched llama.cpp path onto this backend. - PII filter reworked to be NER-centric (encoder/NER detection tier scanning whole conversations as one document), with a recreated bounded restricted- regex secret-matching pattern detector tier alongside it (per-model pii_detection.builtins / .patterns + core/services/routing/piipattern). - Detection labelled by source (ner vs pattern); backend trace / confidence / debug observability; analyze/redact exposed as a synchronous API. - Instance-wide default detector policy + per-usecase default-on; request filtering extended to completions, embeddings, edits & Ollama. - React UI: NER-centric PII editor, detector-models table, pattern/builtins editor, middleware default-policy UI. - Gallery: privacy-filter-multilingual token-classify model + NER install filter; token_classify known_usecase; batch sized to context for NER models. privacy-filter backend registered in the backend gallery (cpu/vulkan/cuda-13 meta + image entries with a capabilities map) matching its CI matrix jobs, and an /import-model auto-detect importer (PrivacyFilterImporter, narrow privacy-filter GGUF detection) replacing the prior pref-only registration. Reconciled against master's independent evolution: - Dropped master's PIIPatternOverrides feature (global-pattern runtime overrides + /api/pii/patterns API + runtime_settings.json persistence). The per-model NER + pattern-detector design supersedes it; it was built on the global redactor pattern set this branch replaced. - Reverted the llama.cpp Score carry-patch (0006-server-task-type-score): removed the patch and restored master's grpc-server.cpp Score RPC (direct llama_decode, slot-loop bypass) and LLAMA_VERSION pin, plus master's model_config validation forbidding score + chat/completion/embeddings on llama-cpp. token_classify is unaffected (it runs on the privacy-filter backend, not llama-cpp). Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Richard Palethorpe <io@richiejp.com>
This commit is contained in:
committed by
GitHub
parent
c133ca39dc
commit
3fa7b2955c
42
flake.nix
42
flake.nix
@@ -18,12 +18,12 @@
|
||||
packages.${system}.default = pkgs.buildGoModule {
|
||||
pname = "localai";
|
||||
version = "custom";
|
||||
|
||||
|
||||
src = ./.;
|
||||
proxyVendor = true;
|
||||
vendorHash = "sha256-6f3adjGsoFXlUtXjBDHP4Mv9jKCOK3aeUXprm0EAVO8=";
|
||||
|
||||
nativeBuildInputs = with pkgs; [
|
||||
nativeBuildInputs = with pkgs; [
|
||||
pkg-config cmake gcc protobuf go-protobuf protoc-gen-go protoc-gen-go-grpc
|
||||
];
|
||||
|
||||
@@ -32,7 +32,7 @@
|
||||
};
|
||||
|
||||
preBuild = ''
|
||||
|
||||
|
||||
PROTO_SOURCE_DIR=$(find . -name "*.proto" -printf "%h" -quit)
|
||||
mkdir -p pkg/grpc/proto
|
||||
${pkgs.protobuf}/bin/protoc \
|
||||
@@ -43,11 +43,11 @@
|
||||
$PROTO_SOURCE_DIR/*.proto
|
||||
|
||||
go mod edit -replace github.com/mudler/LocalAI/pkg/grpc/proto=./pkg/grpc/proto
|
||||
|
||||
|
||||
mkdir -p core/config/gen_inference_defaults
|
||||
cp ${inference-defaults} core/config/gen_inference_defaults/inference_defaults.json
|
||||
sed -i '/go:generate/d' core/config/inference_defaults.go || true
|
||||
|
||||
|
||||
'';
|
||||
|
||||
subPackages = [ "cmd/local-ai" ];
|
||||
@@ -65,11 +65,43 @@
|
||||
gnumake
|
||||
pkg-config
|
||||
cmake
|
||||
ccache
|
||||
protobuf
|
||||
go-protobuf
|
||||
protoc-gen-go
|
||||
protoc-gen-go-grpc
|
||||
|
||||
# C++ gRPC + protobuf for the vendored llama.cpp backend
|
||||
# (backend/cpp/llama-cpp `make grpc-server`). The CMake build does
|
||||
# find_package(gRPC)/find_package(Protobuf); without grpc here the
|
||||
# shell exposes protobuf alone and the build fails to locate gRPC
|
||||
# (or links a stale, version-skewed grpc from the store). nixpkgs
|
||||
# builds `grpc` against this same `protobuf`, so the pair is
|
||||
# self-consistent. Docker (backend/Dockerfile.base-grpc-builder)
|
||||
# compiles gRPC v1.65.0 / protoc v27.1 from source; nixpkgs here is
|
||||
# newer (grpc 1.80 / protobuf 34) but wire- and ABI-consistent
|
||||
# within the backend. Pin protobuf_27 + a grpc override if exact
|
||||
# Docker version parity is ever required.
|
||||
grpc
|
||||
|
||||
# Vulkan toolchain for the GGML Vulkan backends (e.g.
|
||||
# backend/cpp/privacy-filter BUILD_TYPE=vulkan, llama-cpp,
|
||||
# stablediffusion-ggml). ggml's find_package(Vulkan) needs the
|
||||
# headers + loader and shells out to glslc (from shaderc) to compile
|
||||
# shaders. Docker images install the LunarG SDK 1.4.335.0 instead
|
||||
# (backend/Dockerfile.{golang,python}); nixpkgs is newer but the
|
||||
# SPIR-V output is portable.
|
||||
vulkan-headers
|
||||
vulkan-loader
|
||||
vulkan-tools # vulkaninfo, to sanity-check the ICD/driver
|
||||
shaderc # glslc
|
||||
# ggml-vulkan #include <spirv/unified1/spirv.hpp>. nixpkgs splits the
|
||||
# header into its own output whose include dir the SPIRV-Headers CMake
|
||||
# target doesn't propagate, so a local vulkan build also needs
|
||||
# -DCMAKE_CXX_FLAGS=-I${pkgs.spirv-headers}/include. (The Docker SDK
|
||||
# install lands these in /usr/include, so it isn't needed there.)
|
||||
spirv-headers
|
||||
|
||||
# React UI build (core/http/react-ui — `make react-ui`)
|
||||
nodejs
|
||||
bun # alternative to npm, used by `make react-ui-docker`
|
||||
|
||||
Reference in New Issue
Block a user