Files
LocalAI/pkg/mcp/localaitools/client.go
Richard Palethorpe 6a80e23733 feat(middleware): Model routing, PII filtering, Cloud model proxies (#9802)
Add a routing middleware stack and a cloud-proxy backend.

* cloud-proxy: a Go gRPC backend that forwards OpenAI- and
  Anthropic-shaped chat requests to upstream providers, with an
  optional translate mode (OpenAI request -> Anthropic /v1/messages
  -> OpenAI response) and full tool-calling support.

* routing: admission control, content-aware model routing
  (embedding cache + classifier + rerank + Arch-Router score),
  PII detection/redaction (regex + NER) with streaming filter and
  OpenAI/Anthropic adapters, and a per-user/per-key billing recorder
  backed by GORM or in-memory storage.

* middleware: UsageMiddleware records usage via the billing recorder,
  plus admission, route-model, usage-stamp and trace middlewares.

* observability: BackendTrace ring buffer stores full request bodies
  (capped), MITM proxy emits structured trace events, and router
  classifier decisions surface at /api/router/decide.

* gallery: Arch-Router-1.5B (Q4_K_M and Q8_0).

* UI: cloud-proxy model-editor fields, classifier system-prompt and
  score-normalization config, and a Traces page rendering request
  bodies.

Assisted-by: claude-code:claude-opus-4-7 [Read] [Edit] [Bash]

Signed-off-by: Richard Palethorpe <io@richiejp.com>
2026-05-25 09:28:27 +02:00

110 lines
5.1 KiB
Go

package localaitools
import (
"context"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/services/modeladmin"
"github.com/mudler/LocalAI/pkg/vram"
)
// LocalAIClient is the surface tools depend on. It has two implementations:
//
// - inproc.Client (in-process; calls LocalAI services directly)
// - httpapi.Client (out-of-process; calls the LocalAI REST API)
//
// Tool handlers and the embedded skill prompts are agnostic to which
// implementation backs the client.
//
// Where the same shape already exists elsewhere in the codebase
// (config.Gallery, gallery.Metadata, schema.KnownBackend, vram.EstimateResult,
// modeladmin.Action/Capability) we surface it directly rather than maintain
// a parallel DTO — keeping the LLM-visible wire format aligned with the
// rest of LocalAI by construction.
type LocalAIClient interface {
// ---- Models / gallery (read) ----
GallerySearch(ctx context.Context, q GallerySearchQuery) ([]gallery.Metadata, error)
ListInstalledModels(ctx context.Context, capability Capability) ([]InstalledModel, error)
ListGalleries(ctx context.Context) ([]config.Gallery, error)
GetJobStatus(ctx context.Context, jobID string) (*JobStatus, error)
GetModelConfig(ctx context.Context, name string) (*ModelConfigView, error)
// ---- Models / gallery (write) ----
InstallModel(ctx context.Context, req InstallModelRequest) (jobID string, err error)
DeleteModel(ctx context.Context, name string) error
EditModelConfig(ctx context.Context, name string, patch map[string]any) error
ReloadModels(ctx context.Context) error
ImportModelURI(ctx context.Context, req ImportModelURIRequest) (*ImportModelURIResponse, error)
// ---- Backends ----
// ListBackends returns installed backends. The shape stays a thin
// localaitools.Backend rather than gallery.SystemBackend because the
// latter carries filesystem paths (RunFile, Metadata) the LLM
// shouldn't see.
ListBackends(ctx context.Context) ([]Backend, error)
// ListKnownBackends returns the same shape as REST /backends/known.
ListKnownBackends(ctx context.Context) ([]schema.KnownBackend, error)
InstallBackend(ctx context.Context, req InstallBackendRequest) (jobID string, err error)
UpgradeBackend(ctx context.Context, name string) (jobID string, err error)
// ---- System ----
SystemInfo(ctx context.Context) (*SystemInfo, error)
ListNodes(ctx context.Context) ([]Node, error)
VRAMEstimate(ctx context.Context, req VRAMEstimateRequest) (*vram.EstimateResult, error)
// ---- State ----
// ToggleModelState accepts modeladmin.ActionEnable / ActionDisable.
ToggleModelState(ctx context.Context, name string, action modeladmin.Action) error
// ToggleModelPinned accepts modeladmin.ActionPin / ActionUnpin.
ToggleModelPinned(ctx context.Context, name string, action modeladmin.Action) error
// ---- Branding / whitelabeling ----
// GetBranding returns the configured instance branding (name, tagline,
// asset URLs).
GetBranding(ctx context.Context) (*Branding, error)
// SetBranding updates the text branding fields. Asset uploads are not
// exposed over MCP — admins use the Settings UI for binary files.
SetBranding(ctx context.Context, req SetBrandingRequest) (*Branding, error)
// ---- Usage / billing ----
// GetUsageStats returns aggregated token usage. In single-user
// no-auth mode this reports the synthetic local user's usage. The
// implementation enforces "admin required to query other users".
GetUsageStats(ctx context.Context, q UsageStatsQuery) (*UsageStats, error)
// ---- PII filter ----
// ListPIIPatterns returns the active PII pattern set with each
// one's action.
ListPIIPatterns(ctx context.Context) ([]PIIPattern, error)
// GetPIIEvents returns recent redaction events. Implementation
// enforces "admin required" when auth is on.
GetPIIEvents(ctx context.Context, q PIIEventsQuery) ([]PIIEvent, error)
// TestPIIRedaction dry-runs the redactor against text. No event
// is recorded.
TestPIIRedaction(ctx context.Context, req PIIRedactTestRequest) (*PIIRedactTestResult, error)
// SetPIIPatternAction mutates the named pattern's action and/or
// disabled state in-process. Transient until PersistPIIPatterns is
// called — runtime_settings.json then applies the deltas on the
// next start. Admin-required.
SetPIIPatternAction(ctx context.Context, req PIIPatternActionUpdate) error
// PersistPIIPatterns snapshots the live redactor's per-pattern
// (action, disabled) state into runtime_settings.json. Admin-required.
PersistPIIPatterns(ctx context.Context) error
// ---- Middleware admin ----
// GetMiddlewareStatus returns the aggregated state surfaced on the
// /app/middleware page: active PII patterns, per-model resolved
// enabled state, recent event count, router placeholder.
GetMiddlewareStatus(ctx context.Context) (*MiddlewareStatus, error)
// ---- Router (intelligent routing) ----
// GetRouterDecisions returns recent routing decisions for the
// /app/middleware Routing tab and for agent-driven introspection.
// Admin-required when auth is on.
GetRouterDecisions(ctx context.Context, q RouterDecisionsQuery) ([]RouterDecision, error)
}