Files
LocalAI/pkg/mcp/localaitools/client.go
Richard Palethorpe 3fa7b2955c feat(pii): NER tier engine — privacy-filter.cpp backend + NER-centric PII filter (#10360)
Squashed feat/pii-ner-tier-engine rebased onto master (was 45 commits; see
backup/pii-ner-tier-engine-prerebase). Net change:

- privacy-filter.cpp: standalone GGML engine for the openai-privacy-filter
  PII/NER token classifier, wired as a LocalAI gRPC backend (CPU/CUDA/Vulkan).
  TokenClassify moves off the patched llama.cpp path onto this backend.
- PII filter reworked to be NER-centric (encoder/NER detection tier scanning
  whole conversations as one document), with a recreated bounded restricted-
  regex secret-matching pattern detector tier alongside it (per-model
  pii_detection.builtins / .patterns + core/services/routing/piipattern).
- Detection labelled by source (ner vs pattern); backend trace / confidence /
  debug observability; analyze/redact exposed as a synchronous API.
- Instance-wide default detector policy + per-usecase default-on; request
  filtering extended to completions, embeddings, edits & Ollama.
- React UI: NER-centric PII editor, detector-models table, pattern/builtins
  editor, middleware default-policy UI.
- Gallery: privacy-filter-multilingual token-classify model + NER install
  filter; token_classify known_usecase; batch sized to context for NER models.
  privacy-filter backend registered in the backend gallery (cpu/vulkan/cuda-13
  meta + image entries with a capabilities map) matching its CI matrix jobs,
  and an /import-model auto-detect importer (PrivacyFilterImporter, narrow
  privacy-filter GGUF detection) replacing the prior pref-only registration.

Reconciled against master's independent evolution:

- Dropped master's PIIPatternOverrides feature (global-pattern runtime
  overrides + /api/pii/patterns API + runtime_settings.json persistence). The
  per-model NER + pattern-detector design supersedes it; it was built on the
  global redactor pattern set this branch replaced.
- Reverted the llama.cpp Score carry-patch (0006-server-task-type-score):
  removed the patch and restored master's grpc-server.cpp Score RPC (direct
  llama_decode, slot-loop bypass) and LLAMA_VERSION pin, plus master's
  model_config validation forbidding score + chat/completion/embeddings on
  llama-cpp. token_classify is unaffected (it runs on the privacy-filter
  backend, not llama-cpp).

Assisted-by: Claude:claude-opus-4-8 [Claude Code]

Signed-off-by: Richard Palethorpe <io@richiejp.com>
2026-06-18 11:45:22 +01:00

97 lines
4.5 KiB
Go

package localaitools
import (
"context"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/services/modeladmin"
"github.com/mudler/LocalAI/pkg/vram"
)
// LocalAIClient is the surface tools depend on. It has two implementations:
//
// - inproc.Client (in-process; calls LocalAI services directly)
// - httpapi.Client (out-of-process; calls the LocalAI REST API)
//
// Tool handlers and the embedded skill prompts are agnostic to which
// implementation backs the client.
//
// Where the same shape already exists elsewhere in the codebase
// (config.Gallery, gallery.Metadata, schema.KnownBackend, vram.EstimateResult,
// modeladmin.Action/Capability) we surface it directly rather than maintain
// a parallel DTO — keeping the LLM-visible wire format aligned with the
// rest of LocalAI by construction.
type LocalAIClient interface {
// ---- Models / gallery (read) ----
GallerySearch(ctx context.Context, q GallerySearchQuery) ([]gallery.Metadata, error)
ListInstalledModels(ctx context.Context, capability Capability) ([]InstalledModel, error)
ListGalleries(ctx context.Context) ([]config.Gallery, error)
GetJobStatus(ctx context.Context, jobID string) (*JobStatus, error)
GetModelConfig(ctx context.Context, name string) (*ModelConfigView, error)
// ---- Models / gallery (write) ----
InstallModel(ctx context.Context, req InstallModelRequest) (jobID string, err error)
DeleteModel(ctx context.Context, name string) error
EditModelConfig(ctx context.Context, name string, patch map[string]any) error
ReloadModels(ctx context.Context) error
ImportModelURI(ctx context.Context, req ImportModelURIRequest) (*ImportModelURIResponse, error)
// ---- Backends ----
// ListBackends returns installed backends. The shape stays a thin
// localaitools.Backend rather than gallery.SystemBackend because the
// latter carries filesystem paths (RunFile, Metadata) the LLM
// shouldn't see.
ListBackends(ctx context.Context) ([]Backend, error)
// ListKnownBackends returns the same shape as REST /backends/known.
ListKnownBackends(ctx context.Context) ([]schema.KnownBackend, error)
InstallBackend(ctx context.Context, req InstallBackendRequest) (jobID string, err error)
UpgradeBackend(ctx context.Context, name string) (jobID string, err error)
// ---- System ----
SystemInfo(ctx context.Context) (*SystemInfo, error)
ListNodes(ctx context.Context) ([]Node, error)
VRAMEstimate(ctx context.Context, req VRAMEstimateRequest) (*vram.EstimateResult, error)
// ---- State ----
// ToggleModelState accepts modeladmin.ActionEnable / ActionDisable.
ToggleModelState(ctx context.Context, name string, action modeladmin.Action) error
// ToggleModelPinned accepts modeladmin.ActionPin / ActionUnpin.
ToggleModelPinned(ctx context.Context, name string, action modeladmin.Action) error
// ---- Branding / whitelabeling ----
// GetBranding returns the configured instance branding (name, tagline,
// asset URLs).
GetBranding(ctx context.Context) (*Branding, error)
// SetBranding updates the text branding fields. Asset uploads are not
// exposed over MCP — admins use the Settings UI for binary files.
SetBranding(ctx context.Context, req SetBrandingRequest) (*Branding, error)
// ---- Usage / billing ----
// GetUsageStats returns aggregated token usage. In single-user
// no-auth mode this reports the synthetic local user's usage. The
// implementation enforces "admin required to query other users".
GetUsageStats(ctx context.Context, q UsageStatsQuery) (*UsageStats, error)
// ---- PII filter ----
// GetPIIEvents returns recent redaction events. Implementation
// enforces "admin required" when auth is on. The regex pattern tools
// were removed — detection policy lives on each detector model's
// pii_detection block, managed via the model-config tools.
GetPIIEvents(ctx context.Context, q PIIEventsQuery) ([]PIIEvent, error)
// ---- Middleware admin ----
// GetMiddlewareStatus returns the aggregated state surfaced on the
// /app/middleware page: active PII patterns, per-model resolved
// enabled state, recent event count, router placeholder.
GetMiddlewareStatus(ctx context.Context) (*MiddlewareStatus, error)
// ---- Router (intelligent routing) ----
// GetRouterDecisions returns recent routing decisions for the
// /app/middleware Routing tab and for agent-driven introspection.
// Admin-required when auth is on.
GetRouterDecisions(ctx context.Context, q RouterDecisionsQuery) ([]RouterDecision, error)
}