mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-19 14:19:16 -04:00
Squashed feat/pii-ner-tier-engine rebased onto master (was 45 commits; see backup/pii-ner-tier-engine-prerebase). Net change: - privacy-filter.cpp: standalone GGML engine for the openai-privacy-filter PII/NER token classifier, wired as a LocalAI gRPC backend (CPU/CUDA/Vulkan). TokenClassify moves off the patched llama.cpp path onto this backend. - PII filter reworked to be NER-centric (encoder/NER detection tier scanning whole conversations as one document), with a recreated bounded restricted- regex secret-matching pattern detector tier alongside it (per-model pii_detection.builtins / .patterns + core/services/routing/piipattern). - Detection labelled by source (ner vs pattern); backend trace / confidence / debug observability; analyze/redact exposed as a synchronous API. - Instance-wide default detector policy + per-usecase default-on; request filtering extended to completions, embeddings, edits & Ollama. - React UI: NER-centric PII editor, detector-models table, pattern/builtins editor, middleware default-policy UI. - Gallery: privacy-filter-multilingual token-classify model + NER install filter; token_classify known_usecase; batch sized to context for NER models. privacy-filter backend registered in the backend gallery (cpu/vulkan/cuda-13 meta + image entries with a capabilities map) matching its CI matrix jobs, and an /import-model auto-detect importer (PrivacyFilterImporter, narrow privacy-filter GGUF detection) replacing the prior pref-only registration. Reconciled against master's independent evolution: - Dropped master's PIIPatternOverrides feature (global-pattern runtime overrides + /api/pii/patterns API + runtime_settings.json persistence). The per-model NER + pattern-detector design supersedes it; it was built on the global redactor pattern set this branch replaced. - Reverted the llama.cpp Score carry-patch (0006-server-task-type-score): removed the patch and restored master's grpc-server.cpp Score RPC (direct llama_decode, slot-loop bypass) and LLAMA_VERSION pin, plus master's model_config validation forbidding score + chat/completion/embeddings on llama-cpp. token_classify is unaffected (it runs on the privacy-filter backend, not llama-cpp). Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Richard Palethorpe <io@richiejp.com>
110 lines
4.9 KiB
Docker
110 lines
4.9 KiB
Docker
ARG BASE_IMAGE=ubuntu:24.04
|
|
# BUILDER_BASE_IMAGE defaults to BASE_IMAGE so the Dockerfile parses when no
|
|
# prebuilt base is supplied; the builder-prebuilt stage is only entered when
|
|
# BUILDER_TARGET=builder-prebuilt, so the fallback content is harmless
|
|
# (BuildKit prunes the unreferenced builder).
|
|
ARG BUILDER_BASE_IMAGE=${BASE_IMAGE}
|
|
# BUILDER_TARGET selects which builder stage the scratch image copies from.
|
|
# Declared before any FROM so it is usable in `FROM ${BUILDER_TARGET}`. The
|
|
# backend_build workflow sets it to builder-prebuilt when the matrix entry
|
|
# provides builder-base-image, else builder-fromsource (the local default).
|
|
ARG BUILDER_TARGET=builder-fromsource
|
|
ARG APT_MIRROR=""
|
|
ARG APT_PORTS_MIRROR=""
|
|
|
|
# privacy-filter: standalone GGML engine for the openai-privacy-filter PII/NER
|
|
# token classifier, wrapped as a LocalAI gRPC backend.
|
|
#
|
|
# Mirrors backend/Dockerfile.llama-cpp: the build toolchain (gRPC + cmake +
|
|
# protoc + conditional CUDA/Vulkan) comes from the shared
|
|
# .docker/install-base-deps.sh (from-source path) or a prebuilt
|
|
# quay.io/go-skynet/ci-cache:base-grpc-* image (CI path) — nothing GPU-specific
|
|
# is hand-rolled here. BUILD_TYPE selects the engine backend in the Makefile:
|
|
# "" = cpu, "cublas" -> -DPF_CUDA=ON, "vulkan" -> -DPF_VULKAN=ON.
|
|
|
|
# ============================================================================
|
|
# Stage: builder-fromsource — self-contained build. Runs the same install
|
|
# script backend/Dockerfile.base-grpc-builder runs, so this path is
|
|
# bit-equivalent to the prebuilt base. Used when BUILDER_TARGET=builder-fromsource
|
|
# (the default; local `make backends/privacy-filter`).
|
|
# ============================================================================
|
|
FROM ${BASE_IMAGE} AS builder-fromsource
|
|
ARG BUILD_TYPE
|
|
ARG CUDA_MAJOR_VERSION
|
|
ARG CUDA_MINOR_VERSION
|
|
ARG CMAKE_FROM_SOURCE=false
|
|
# CUDA Toolkit 13.x needs CMake 3.31.9+ for correct toolchain/arch detection.
|
|
ARG CMAKE_VERSION=3.31.10
|
|
ARG GRPC_VERSION=v1.65.0
|
|
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
|
|
ARG SKIP_DRIVERS=false
|
|
ARG TARGETARCH
|
|
ARG UBUNTU_VERSION=2404
|
|
ARG APT_MIRROR
|
|
ARG APT_PORTS_MIRROR
|
|
|
|
ENV BUILD_TYPE=${BUILD_TYPE} \
|
|
CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} \
|
|
CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} \
|
|
CMAKE_FROM_SOURCE=${CMAKE_FROM_SOURCE} \
|
|
CMAKE_VERSION=${CMAKE_VERSION} \
|
|
GRPC_VERSION=${GRPC_VERSION} \
|
|
GRPC_MAKEFLAGS=${GRPC_MAKEFLAGS} \
|
|
SKIP_DRIVERS=${SKIP_DRIVERS} \
|
|
TARGETARCH=${TARGETARCH} \
|
|
UBUNTU_VERSION=${UBUNTU_VERSION} \
|
|
APT_MIRROR=${APT_MIRROR} \
|
|
APT_PORTS_MIRROR=${APT_PORTS_MIRROR} \
|
|
DEBIAN_FRONTEND=noninteractive
|
|
# CUDA on PATH (a no-op when CUDA is not installed, e.g. cpu/vulkan builds).
|
|
ENV PATH=/usr/local/cuda/bin:${PATH}
|
|
|
|
WORKDIR /build
|
|
|
|
# apt deps + cmake + protoc + gRPC + conditional CUDA/Vulkan, all from the
|
|
# shared script (the source of truth that base-grpc-builder also runs).
|
|
RUN --mount=type=bind,source=.docker/install-base-deps.sh,target=/usr/local/sbin/install-base-deps \
|
|
--mount=type=bind,source=.docker/apt-mirror.sh,target=/usr/local/sbin/apt-mirror \
|
|
bash /usr/local/sbin/install-base-deps
|
|
|
|
# install-base-deps installs gRPC under /opt/grpc; copy it to /usr/local so the
|
|
# backend's find_package(gRPC CONFIG) resolves it at the canonical prefix.
|
|
RUN cp -a /opt/grpc/. /usr/local/
|
|
|
|
COPY . /LocalAI
|
|
|
|
RUN --mount=type=cache,target=/root/.ccache,id=privacy-filter-ccache-${TARGETARCH}-${BUILD_TYPE},sharing=locked \
|
|
make -C /LocalAI/backend/cpp/privacy-filter BUILD_TYPE=${BUILD_TYPE} NATIVE=false grpc-server package
|
|
|
|
# ============================================================================
|
|
# Stage: builder-prebuilt — FROM a prebuilt
|
|
# quay.io/go-skynet/ci-cache:base-grpc-* image (gRPC at /opt/grpc + apt deps +
|
|
# CUDA/Vulkan already installed). Used in CI when the matrix entry sets
|
|
# builder-base-image.
|
|
# ============================================================================
|
|
FROM ${BUILDER_BASE_IMAGE} AS builder-prebuilt
|
|
ARG BUILD_TYPE
|
|
ARG TARGETARCH
|
|
ENV BUILD_TYPE=${BUILD_TYPE}
|
|
# CUDA on PATH (a no-op for the cpu/vulkan base images).
|
|
ENV PATH=/usr/local/cuda/bin:${PATH}
|
|
|
|
# Mirror builder-fromsource: the base-grpc image installs gRPC to /opt/grpc but
|
|
# does not copy it to /usr/local.
|
|
RUN cp -a /opt/grpc/. /usr/local/
|
|
|
|
COPY . /LocalAI
|
|
|
|
RUN --mount=type=cache,target=/root/.ccache,id=privacy-filter-ccache-${TARGETARCH}-${BUILD_TYPE},sharing=locked \
|
|
make -C /LocalAI/backend/cpp/privacy-filter BUILD_TYPE=${BUILD_TYPE} NATIVE=false grpc-server package
|
|
|
|
# ============================================================================
|
|
# Final stage — copy the package output from the selected builder. BuildKit
|
|
# does not expand variables in `COPY --from=`, so alias the chosen builder to a
|
|
# fixed stage name first.
|
|
# ============================================================================
|
|
FROM ${BUILDER_TARGET} AS builder
|
|
|
|
FROM scratch
|
|
COPY --from=builder /LocalAI/backend/cpp/privacy-filter/package/. ./
|