mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-19 06:09:07 -04:00
Squashed feat/pii-ner-tier-engine rebased onto master (was 45 commits; see backup/pii-ner-tier-engine-prerebase). Net change: - privacy-filter.cpp: standalone GGML engine for the openai-privacy-filter PII/NER token classifier, wired as a LocalAI gRPC backend (CPU/CUDA/Vulkan). TokenClassify moves off the patched llama.cpp path onto this backend. - PII filter reworked to be NER-centric (encoder/NER detection tier scanning whole conversations as one document), with a recreated bounded restricted- regex secret-matching pattern detector tier alongside it (per-model pii_detection.builtins / .patterns + core/services/routing/piipattern). - Detection labelled by source (ner vs pattern); backend trace / confidence / debug observability; analyze/redact exposed as a synchronous API. - Instance-wide default detector policy + per-usecase default-on; request filtering extended to completions, embeddings, edits & Ollama. - React UI: NER-centric PII editor, detector-models table, pattern/builtins editor, middleware default-policy UI. - Gallery: privacy-filter-multilingual token-classify model + NER install filter; token_classify known_usecase; batch sized to context for NER models. privacy-filter backend registered in the backend gallery (cpu/vulkan/cuda-13 meta + image entries with a capabilities map) matching its CI matrix jobs, and an /import-model auto-detect importer (PrivacyFilterImporter, narrow privacy-filter GGUF detection) replacing the prior pref-only registration. Reconciled against master's independent evolution: - Dropped master's PIIPatternOverrides feature (global-pattern runtime overrides + /api/pii/patterns API + runtime_settings.json persistence). The per-model NER + pattern-detector design supersedes it; it was built on the global redactor pattern set this branch replaced. - Reverted the llama.cpp Score carry-patch (0006-server-task-type-score): removed the patch and restored master's grpc-server.cpp Score RPC (direct llama_decode, slot-loop bypass) and LLAMA_VERSION pin, plus master's model_config validation forbidding score + chat/completion/embeddings on llama-cpp. token_classify is unaffected (it runs on the privacy-filter backend, not llama-cpp). Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Richard Palethorpe <io@richiejp.com>
143 lines
5.4 KiB
Nix
143 lines
5.4 KiB
Nix
# Made by Azteczek
|
|
{
|
|
description = "LocalAI flake";
|
|
|
|
inputs = {
|
|
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
|
inference-defaults = {
|
|
url = "https://raw.githubusercontent.com/unslothai/unsloth/main/studio/backend/assets/configs/inference_defaults.json";
|
|
flake = false;
|
|
};
|
|
};
|
|
|
|
outputs = { self, nixpkgs, inference-defaults }:
|
|
let
|
|
system = "x86_64-linux";
|
|
pkgs = nixpkgs.legacyPackages.${system};
|
|
in {
|
|
packages.${system}.default = pkgs.buildGoModule {
|
|
pname = "localai";
|
|
version = "custom";
|
|
|
|
src = ./.;
|
|
proxyVendor = true;
|
|
vendorHash = "sha256-6f3adjGsoFXlUtXjBDHP4Mv9jKCOK3aeUXprm0EAVO8=";
|
|
|
|
nativeBuildInputs = with pkgs; [
|
|
pkg-config cmake gcc protobuf go-protobuf protoc-gen-go protoc-gen-go-grpc
|
|
];
|
|
|
|
env = {
|
|
CGO_ENABLED = "0";
|
|
};
|
|
|
|
preBuild = ''
|
|
|
|
PROTO_SOURCE_DIR=$(find . -name "*.proto" -printf "%h" -quit)
|
|
mkdir -p pkg/grpc/proto
|
|
${pkgs.protobuf}/bin/protoc \
|
|
-I=$PROTO_SOURCE_DIR \
|
|
-I. \
|
|
--go_out=pkg/grpc/proto --go_opt=paths=source_relative \
|
|
--go-grpc_out=pkg/grpc/proto --go-grpc_opt=paths=source_relative \
|
|
$PROTO_SOURCE_DIR/*.proto
|
|
|
|
go mod edit -replace github.com/mudler/LocalAI/pkg/grpc/proto=./pkg/grpc/proto
|
|
|
|
mkdir -p core/config/gen_inference_defaults
|
|
cp ${inference-defaults} core/config/gen_inference_defaults/inference_defaults.json
|
|
sed -i '/go:generate/d' core/config/inference_defaults.go || true
|
|
|
|
'';
|
|
|
|
subPackages = [ "cmd/local-ai" ];
|
|
doCheck = false;
|
|
|
|
postInstall = ''
|
|
[ -f $out/bin/local-ai ] && mv $out/bin/local-ai $out/bin/localai
|
|
'';
|
|
};
|
|
|
|
devShells.${system}.default = pkgs.mkShell {
|
|
packages = with pkgs; [
|
|
# Build toolchain (stdenv already provides gcc)
|
|
go
|
|
gnumake
|
|
pkg-config
|
|
cmake
|
|
ccache
|
|
protobuf
|
|
go-protobuf
|
|
protoc-gen-go
|
|
protoc-gen-go-grpc
|
|
|
|
# C++ gRPC + protobuf for the vendored llama.cpp backend
|
|
# (backend/cpp/llama-cpp `make grpc-server`). The CMake build does
|
|
# find_package(gRPC)/find_package(Protobuf); without grpc here the
|
|
# shell exposes protobuf alone and the build fails to locate gRPC
|
|
# (or links a stale, version-skewed grpc from the store). nixpkgs
|
|
# builds `grpc` against this same `protobuf`, so the pair is
|
|
# self-consistent. Docker (backend/Dockerfile.base-grpc-builder)
|
|
# compiles gRPC v1.65.0 / protoc v27.1 from source; nixpkgs here is
|
|
# newer (grpc 1.80 / protobuf 34) but wire- and ABI-consistent
|
|
# within the backend. Pin protobuf_27 + a grpc override if exact
|
|
# Docker version parity is ever required.
|
|
grpc
|
|
|
|
# Vulkan toolchain for the GGML Vulkan backends (e.g.
|
|
# backend/cpp/privacy-filter BUILD_TYPE=vulkan, llama-cpp,
|
|
# stablediffusion-ggml). ggml's find_package(Vulkan) needs the
|
|
# headers + loader and shells out to glslc (from shaderc) to compile
|
|
# shaders. Docker images install the LunarG SDK 1.4.335.0 instead
|
|
# (backend/Dockerfile.{golang,python}); nixpkgs is newer but the
|
|
# SPIR-V output is portable.
|
|
vulkan-headers
|
|
vulkan-loader
|
|
vulkan-tools # vulkaninfo, to sanity-check the ICD/driver
|
|
shaderc # glslc
|
|
# ggml-vulkan #include <spirv/unified1/spirv.hpp>. nixpkgs splits the
|
|
# header into its own output whose include dir the SPIRV-Headers CMake
|
|
# target doesn't propagate, so a local vulkan build also needs
|
|
# -DCMAKE_CXX_FLAGS=-I${pkgs.spirv-headers}/include. (The Docker SDK
|
|
# install lands these in /usr/include, so it isn't needed there.)
|
|
spirv-headers
|
|
|
|
# React UI build (core/http/react-ui — `make react-ui`)
|
|
nodejs
|
|
bun # alternative to npm, used by `make react-ui-docker`
|
|
chromium # Playwright e2e / UI coverage browser (see PLAYWRIGHT_CHROMIUM_PATH below)
|
|
|
|
# Linting / static analysis (see `make lint`)
|
|
golangci-lint
|
|
gofumpt
|
|
gotools # goimports
|
|
go-tools # staticcheck
|
|
|
|
# Audio transforms: pkg/utils/ffmpeg_test.go shells out to the
|
|
# `ffmpeg` CLI, exercised by `make test-coverage` (the pre-commit
|
|
# gate). Headless build = the CLI without GUI/X deps.
|
|
ffmpeg-headless
|
|
|
|
# Common dev conveniences
|
|
git
|
|
curl
|
|
];
|
|
|
|
shellHook = ''
|
|
# Point Playwright at the nix-provided Chromium instead of its own
|
|
# downloaded build, which can't resolve system libs (libglib-2.0, …)
|
|
# on NixOS. playwright.config.js reads PLAYWRIGHT_CHROMIUM_PATH and
|
|
# the Makefile skips `playwright install` when it's set.
|
|
export PLAYWRIGHT_CHROMIUM_PATH="${pkgs.chromium}/bin/chromium"
|
|
export PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1
|
|
|
|
echo "LocalAI dev shell: $(go version), node $(node --version)"
|
|
echo "Build: make build (Go binary + React UI)"
|
|
echo "React UI: make react-ui (npm install && vite build)"
|
|
echo "Lint: make lint (only new issues vs master)"
|
|
echo " or make lint-all (full baseline)"
|
|
'';
|
|
};
|
|
};
|
|
}
|