mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-18 21:58:58 -04:00
Squashed feat/pii-ner-tier-engine rebased onto master (was 45 commits; see backup/pii-ner-tier-engine-prerebase). Net change: - privacy-filter.cpp: standalone GGML engine for the openai-privacy-filter PII/NER token classifier, wired as a LocalAI gRPC backend (CPU/CUDA/Vulkan). TokenClassify moves off the patched llama.cpp path onto this backend. - PII filter reworked to be NER-centric (encoder/NER detection tier scanning whole conversations as one document), with a recreated bounded restricted- regex secret-matching pattern detector tier alongside it (per-model pii_detection.builtins / .patterns + core/services/routing/piipattern). - Detection labelled by source (ner vs pattern); backend trace / confidence / debug observability; analyze/redact exposed as a synchronous API. - Instance-wide default detector policy + per-usecase default-on; request filtering extended to completions, embeddings, edits & Ollama. - React UI: NER-centric PII editor, detector-models table, pattern/builtins editor, middleware default-policy UI. - Gallery: privacy-filter-multilingual token-classify model + NER install filter; token_classify known_usecase; batch sized to context for NER models. privacy-filter backend registered in the backend gallery (cpu/vulkan/cuda-13 meta + image entries with a capabilities map) matching its CI matrix jobs, and an /import-model auto-detect importer (PrivacyFilterImporter, narrow privacy-filter GGUF detection) replacing the prior pref-only registration. Reconciled against master's independent evolution: - Dropped master's PIIPatternOverrides feature (global-pattern runtime overrides + /api/pii/patterns API + runtime_settings.json persistence). The per-model NER + pattern-detector design supersedes it; it was built on the global redactor pattern set this branch replaced. - Reverted the llama.cpp Score carry-patch (0006-server-task-type-score): removed the patch and restored master's grpc-server.cpp Score RPC (direct llama_decode, slot-loop bypass) and LLAMA_VERSION pin, plus master's model_config validation forbidding score + chat/completion/embeddings on llama-cpp. token_classify is unaffected (it runs on the privacy-filter backend, not llama-cpp). Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Richard Palethorpe <io@richiejp.com>
398 lines
14 KiB
Go
398 lines
14 KiB
Go
package routes
|
|
|
|
import (
|
|
"context"
|
|
"net/http"
|
|
"strconv"
|
|
|
|
"github.com/labstack/echo/v4"
|
|
"github.com/mudler/LocalAI/core/application"
|
|
"github.com/mudler/LocalAI/core/config"
|
|
"github.com/mudler/LocalAI/core/http/auth"
|
|
"github.com/mudler/LocalAI/core/http/endpoints/localai"
|
|
"github.com/mudler/LocalAI/core/http/middleware"
|
|
"github.com/mudler/LocalAI/core/services/routing/router"
|
|
)
|
|
|
|
// RegisterMiddlewareRoutes wires the routing-module admin surface that
|
|
// powers the /app/middleware React page. Two endpoints:
|
|
//
|
|
// - GET /api/middleware/status — single round-trip aggregator. Lists
|
|
// PII patterns with current actions, each model's resolved
|
|
// enabled/override state, recent event count, and a router status
|
|
// stub (until subsystem 2 lands).
|
|
// - GET /api/router/status — placeholder that the page renders for
|
|
// the Routing tab. Returns { configured: false, models: [] } today;
|
|
// subsystem 2 fills it in.
|
|
//
|
|
// Both are admin-only when auth is on. In single-user (no-auth) mode
|
|
// the synthetic local user has Role: admin so the page works without
|
|
// extra config — same gating shape as the existing /api/usage/all.
|
|
func RegisterMiddlewareRoutes(e *echo.Echo, app *application.Application) {
|
|
e.GET("/api/middleware/status", func(c echo.Context) error {
|
|
viewer := resolveUsageUser(c, app)
|
|
if viewer == nil {
|
|
return c.JSON(http.StatusUnauthorized, map[string]string{"error": "not authenticated"})
|
|
}
|
|
if viewer.Role != auth.RoleAdmin {
|
|
return c.JSON(http.StatusForbidden, map[string]string{"error": "admin access required"})
|
|
}
|
|
|
|
piiSection := buildPIIStatus(app)
|
|
routerSection := buildRouterStatus(app)
|
|
mitmSection := buildMITMStatus(app)
|
|
admissionSection := buildAdmissionStatus(app)
|
|
|
|
return c.JSON(http.StatusOK, map[string]any{
|
|
"pii": piiSection,
|
|
"router": routerSection,
|
|
"mitm": mitmSection,
|
|
"admission": admissionSection,
|
|
})
|
|
})
|
|
|
|
e.GET("/api/router/status", func(c echo.Context) error {
|
|
// Read-only — admins want to see classifier configurations
|
|
// without authenticating, same as /api/pii/patterns.
|
|
return c.JSON(http.StatusOK, buildRouterStatus(app))
|
|
})
|
|
|
|
e.GET("/api/middleware/proxy-ca.crt", func(c echo.Context) error {
|
|
// The CA cert is the public half — safe to expose without
|
|
// auth so clients can curl it during initial setup. The
|
|
// private key never leaves disk and is mode 0600. Returning
|
|
// 404 (rather than 500) when MITM is disabled keeps the
|
|
// endpoint a clean "is this feature available?" probe.
|
|
ca := app.MITMCA()
|
|
if ca == nil {
|
|
return c.JSON(http.StatusNotFound, map[string]string{
|
|
"error": "mitm proxy is not enabled (set --mitm-listen to start it)",
|
|
})
|
|
}
|
|
c.Response().Header().Set("Content-Type", "application/x-pem-file")
|
|
c.Response().Header().Set("Content-Disposition", `attachment; filename="localai-mitm-ca.crt"`)
|
|
return c.Blob(http.StatusOK, "application/x-pem-file", ca.PublicCertPEM())
|
|
})
|
|
|
|
e.GET("/api/router/decisions", func(c echo.Context) error {
|
|
viewer := resolveUsageUser(c, app)
|
|
if viewer == nil {
|
|
return c.JSON(http.StatusUnauthorized, map[string]string{"error": "not authenticated"})
|
|
}
|
|
// Decision logs may include user ids — admin-only when auth is
|
|
// on; the synthetic local user has admin so single-user mode
|
|
// works.
|
|
if viewer.Role != auth.RoleAdmin {
|
|
return c.JSON(http.StatusForbidden, map[string]string{"error": "admin access required"})
|
|
}
|
|
|
|
store := app.RouterDecisions()
|
|
if store == nil {
|
|
return c.JSON(http.StatusOK, map[string]any{"decisions": []any{}})
|
|
}
|
|
|
|
limit := 100
|
|
if v := c.QueryParam("limit"); v != "" {
|
|
if n, err := strconv.Atoi(v); err == nil && n > 0 {
|
|
limit = n
|
|
}
|
|
}
|
|
decisions, err := store.List(c.Request().Context(), router.DecisionListQuery{
|
|
CorrelationID: c.QueryParam("correlation_id"),
|
|
UserID: c.QueryParam("user_id"),
|
|
RouterModel: c.QueryParam("router_model"),
|
|
Limit: limit,
|
|
})
|
|
if err != nil {
|
|
return c.JSON(http.StatusInternalServerError, map[string]string{"error": "failed to list decisions"})
|
|
}
|
|
return c.JSON(http.StatusOK, map[string]any{"decisions": decisions})
|
|
})
|
|
|
|
// GET /api/router/cache/stats — embedding-cache counters per
|
|
// router model. Read-only; same auth gating as /api/router/status
|
|
// (any authenticated user can see configuration). Omitted entries
|
|
// indicate "embedding cache not enabled for this router".
|
|
e.GET("/api/router/cache/stats", func(c echo.Context) error {
|
|
reg := app.RouterClassifierRegistry()
|
|
stats := map[string]router.EmbeddingCacheStats{}
|
|
if reg != nil {
|
|
stats = reg.EmbeddingCacheStatsByRouter()
|
|
}
|
|
return c.JSON(http.StatusOK, map[string]any{"caches": stats})
|
|
})
|
|
|
|
// POST /api/router/decide — programmatic decision-oracle endpoint
|
|
// for external routers. Runs the same classifier that the in-band
|
|
// RouteModel middleware would have run and returns the chosen
|
|
// label set + candidate model, without rewriting the request,
|
|
// forwarding it, or recording a row in the decision store.
|
|
//
|
|
// Admin-only — same gating as /api/router/decisions. The risk
|
|
// surface is "runs classifier inference on arbitrary input", which
|
|
// matches the decision-log endpoint's gating.
|
|
decideHandler := localai.RouterDecideEndpoint(
|
|
app.ModelConfigLoader(),
|
|
app.ApplicationConfig(),
|
|
middleware.ClassifierDeps{
|
|
Scorer: app.Scorer,
|
|
TokenCounter: app.TokenCounter,
|
|
Embedder: app.Embedder,
|
|
VectorStore: app.VectorStore,
|
|
Reranker: app.Reranker,
|
|
ModelLookup: app.ModelConfigLookup(),
|
|
Registry: app.RouterClassifierRegistry(),
|
|
Evaluator: app.TemplatesEvaluator(),
|
|
},
|
|
)
|
|
e.POST("/api/router/decide", func(c echo.Context) error {
|
|
viewer := resolveUsageUser(c, app)
|
|
if viewer == nil {
|
|
return c.JSON(http.StatusUnauthorized, map[string]string{"error": "not authenticated"})
|
|
}
|
|
if viewer.Role != auth.RoleAdmin {
|
|
return c.JSON(http.StatusForbidden, map[string]string{"error": "admin access required"})
|
|
}
|
|
return decideHandler(c)
|
|
})
|
|
}
|
|
|
|
// buildRouterStatus inventories every model that declares a Router
|
|
// block and reports their classifiers + candidate tables. Reads from
|
|
// the same loader the RouteModel middleware uses so the admin page
|
|
// agrees with what's actually live in the request path.
|
|
func buildRouterStatus(app *application.Application) map[string]any {
|
|
models := []map[string]any{}
|
|
hasAny := false
|
|
cacheStats := map[string]router.EmbeddingCacheStats{}
|
|
if reg := app.RouterClassifierRegistry(); reg != nil {
|
|
cacheStats = reg.EmbeddingCacheStatsByRouter()
|
|
}
|
|
for _, cfg := range app.ModelConfigLoader().GetAllModelsConfigs() {
|
|
if !cfg.HasRouter() {
|
|
continue
|
|
}
|
|
hasAny = true
|
|
candidates := make([]map[string]any, 0, len(cfg.Router.Candidates))
|
|
for _, ca := range cfg.Router.Candidates {
|
|
candidates = append(candidates, map[string]any{
|
|
"model": ca.Model,
|
|
"labels": ca.Labels,
|
|
})
|
|
}
|
|
policies := make([]map[string]any, 0, len(cfg.Router.Policies))
|
|
for _, p := range cfg.Router.Policies {
|
|
policies = append(policies, map[string]any{
|
|
"label": p.Label,
|
|
"description": p.Description,
|
|
})
|
|
}
|
|
classifier := cfg.Router.Classifier
|
|
if classifier == "" {
|
|
classifier = router.ClassifierScore
|
|
}
|
|
entry := map[string]any{
|
|
"name": cfg.Name,
|
|
"classifier": classifier,
|
|
"policies": policies,
|
|
"candidates": candidates,
|
|
"fallback": cfg.Router.Fallback,
|
|
}
|
|
if ec := cfg.Router.EmbeddingCache; ec != nil {
|
|
cacheEntry := map[string]any{
|
|
"embedding_model": ec.EmbeddingModel,
|
|
"similarity_threshold": ec.SimilarityThreshold,
|
|
"confidence_threshold": ec.ConfidenceThreshold,
|
|
"store_name": ec.StoreName,
|
|
}
|
|
if s, ok := cacheStats[cfg.Name]; ok {
|
|
cacheEntry["stats"] = s
|
|
}
|
|
entry["embedding_cache"] = cacheEntry
|
|
}
|
|
models = append(models, entry)
|
|
}
|
|
|
|
recentCount := 0
|
|
if store := app.RouterDecisions(); store != nil {
|
|
if n, err := store.Count(context.Background()); err == nil {
|
|
recentCount = n
|
|
}
|
|
}
|
|
|
|
out := map[string]any{
|
|
"configured": hasAny,
|
|
"models": models,
|
|
"recent_decision_count": recentCount,
|
|
"available_classifiers": []string{router.ClassifierScore},
|
|
}
|
|
if !hasAny {
|
|
out["note"] = "No router models configured. Add a `router:` block to a model YAML to enable intelligent routing."
|
|
}
|
|
return out
|
|
}
|
|
|
|
func buildMITMStatus(app *application.Application) map[string]any {
|
|
srv := app.MITMServer()
|
|
ca := app.MITMCA()
|
|
cfg := app.ApplicationConfig()
|
|
|
|
// MITM-bound model configs — anything with an mitm: block, even
|
|
// if hosts is empty. Surfaces a "fresh from template" config the
|
|
// admin started but hasn't yet attached a host to.
|
|
mitmModels := []map[string]any{}
|
|
for _, mc := range app.ModelConfigLoader().GetModelConfigsByFilter(func(_ string, c *config.ModelConfig) bool {
|
|
return len(c.MITM.Hosts) > 0
|
|
}) {
|
|
mitmModels = append(mitmModels, map[string]any{
|
|
"name": mc.Name,
|
|
"hosts": mc.MITM.Hosts,
|
|
"pii_enabled": mc.PIIIsEnabled(),
|
|
"backend": mc.Backend,
|
|
})
|
|
}
|
|
|
|
out := map[string]any{
|
|
"running": srv != nil,
|
|
"listen_addr": "",
|
|
"configured_addr": cfg.MITMListen,
|
|
"host_owners": app.MITMHostOwners(),
|
|
"host_conflicts": app.MITMHostConflicts(),
|
|
"models": mitmModels,
|
|
"ca_available": ca != nil,
|
|
"ca_cert_url": "",
|
|
}
|
|
if conflicts := app.MITMHostConflicts(); len(conflicts) > 0 {
|
|
out["error"] = "MITM listener disabled: duplicate host claims across model configs (see host_conflicts). Resolve by editing the conflicting model YAMLs so each host appears in at most one mitm.hosts list."
|
|
}
|
|
if srv != nil {
|
|
out["listen_addr"] = srv.Addr()
|
|
}
|
|
if ca != nil {
|
|
out["ca_cert_url"] = "/api/middleware/proxy-ca.crt"
|
|
}
|
|
return out
|
|
}
|
|
|
|
// buildAdmissionStatus reports each model's MaxConcurrent ceiling
|
|
// and current in-flight count. Models with no limit set are
|
|
// omitted — the dashboard view is "what's gated", not "every
|
|
// model in the loader".
|
|
func buildAdmissionStatus(app *application.Application) map[string]any {
|
|
limiter := app.AdmissionLimiter()
|
|
models := []map[string]any{}
|
|
if limiter == nil {
|
|
return map[string]any{"models": models}
|
|
}
|
|
for _, cfg := range app.ModelConfigLoader().GetAllModelsConfigs() {
|
|
if cfg.Limits.MaxConcurrent <= 0 {
|
|
continue
|
|
}
|
|
models = append(models, map[string]any{
|
|
"name": cfg.Name,
|
|
"max_concurrent": cfg.Limits.MaxConcurrent,
|
|
"retry_after_seconds": cfg.Limits.RetryAfterSeconds,
|
|
"in_flight": limiter.InFlight(cfg.Name),
|
|
})
|
|
}
|
|
return map[string]any{"models": models}
|
|
}
|
|
|
|
// buildPIIStatus builds the pii section of /api/middleware/status. It
|
|
// walks every model config and reports the resolved enabled state plus
|
|
// the NER detector models each one references — that's what the admin
|
|
// page renders so the operator can see at a glance which models are
|
|
// protected and by which detectors. The detection policy itself
|
|
// (entity→action, min score) lives on each detector model's
|
|
// pii_detection block.
|
|
func buildPIIStatus(app *application.Application) map[string]any {
|
|
appCfg := app.ApplicationConfig()
|
|
models := []map[string]any{}
|
|
for _, cfg := range app.ModelConfigLoader().GetAllModelsConfigs() {
|
|
// Only list models PII filtering can actually apply to (reachable
|
|
// through a text-accepting endpoint with a PII adapter wired).
|
|
// Skips VAD/STT/embedding/image-only models and the token_classify
|
|
// detector models themselves, which are the filters, not consumers.
|
|
if !cfg.PIIFilterApplies() {
|
|
continue
|
|
}
|
|
explicit := cfg.PII.Enabled != nil
|
|
ownDetectors := cfg.PIIDetectors()
|
|
// Resolve through the shared policy so the table reflects the EFFECTIVE
|
|
// state, including the instance-wide default detector — what the
|
|
// request path actually does.
|
|
enabled, detectors := app.ResolvePIIPolicy(&cfg)
|
|
|
|
entry := map[string]any{
|
|
"name": cfg.Name,
|
|
"backend": cfg.Backend,
|
|
"enabled": enabled,
|
|
"detectors": detectors,
|
|
"explicit": explicit,
|
|
// Why is this on? backend default (cloud-proxy) vs an explicit YAML
|
|
// toggle. Helps admins understand the resolved state without
|
|
// reading source.
|
|
"default_for_backend": !explicit && cfg.Backend == "cloud-proxy",
|
|
// The detectors came from the global default, not this model's YAML.
|
|
"detectors_from_default": enabled && len(ownDetectors) == 0 && len(detectors) > 0,
|
|
}
|
|
models = append(models, entry)
|
|
}
|
|
|
|
// Detector models: the token_classify "filter" models themselves (NER and
|
|
// in-process pattern matchers), which PIIFilterApplies deliberately omits
|
|
// from the consumer list above. The Filtering tab renders these as a table
|
|
// with a per-row toggle marking membership in the instance-wide default
|
|
// detector set, so admins manage defaults without retyping model names.
|
|
defaultSet := map[string]bool{}
|
|
for _, d := range appCfg.PIIDefaultDetectors {
|
|
defaultSet[d] = true
|
|
}
|
|
detectorModels := []map[string]any{}
|
|
for _, cfg := range app.ModelConfigLoader().GetAllModelsConfigs() {
|
|
if !cfg.HasUsecases(config.FLAG_TOKEN_CLASSIFY) {
|
|
continue
|
|
}
|
|
typ := "ner"
|
|
if cfg.IsPatternDetector() {
|
|
typ = "pattern"
|
|
}
|
|
detectorModels = append(detectorModels, map[string]any{
|
|
"name": cfg.Name,
|
|
"backend": cfg.Backend,
|
|
"type": typ,
|
|
// Whether this detector is in the instance-wide default set.
|
|
"default": defaultSet[cfg.Name],
|
|
})
|
|
delete(defaultSet, cfg.Name)
|
|
}
|
|
// Surface any default detector that names a model that is no longer loaded
|
|
// (or lost the token_classify usecase) so the admin can still toggle it off.
|
|
for name := range defaultSet {
|
|
detectorModels = append(detectorModels, map[string]any{
|
|
"name": name,
|
|
"backend": "",
|
|
"type": "unknown",
|
|
"default": true,
|
|
"missing": true,
|
|
})
|
|
}
|
|
|
|
recentCount := 0
|
|
if app.PIIEvents() != nil {
|
|
if n, err := app.PIIEvents().Count(context.Background()); err == nil {
|
|
recentCount = n
|
|
}
|
|
}
|
|
|
|
return map[string]any{
|
|
"enabled_globally": true,
|
|
"default_enabled_for_backends": []string{"cloud-proxy"},
|
|
"models": models,
|
|
"detector_models": detectorModels,
|
|
"recent_event_count": recentCount,
|
|
// Instance-wide default policy (the Default PII policy editor).
|
|
"default_detectors": appCfg.PIIDefaultDetectors,
|
|
}
|
|
}
|