Files
LocalAI/core/services/routing/router/resolve.go
Richard Palethorpe 6a80e23733 feat(middleware): Model routing, PII filtering, Cloud model proxies (#9802)
Add a routing middleware stack and a cloud-proxy backend.

* cloud-proxy: a Go gRPC backend that forwards OpenAI- and
  Anthropic-shaped chat requests to upstream providers, with an
  optional translate mode (OpenAI request -> Anthropic /v1/messages
  -> OpenAI response) and full tool-calling support.

* routing: admission control, content-aware model routing
  (embedding cache + classifier + rerank + Arch-Router score),
  PII detection/redaction (regex + NER) with streaming filter and
  OpenAI/Anthropic adapters, and a per-user/per-key billing recorder
  backed by GORM or in-memory storage.

* middleware: UsageMiddleware records usage via the billing recorder,
  plus admission, route-model, usage-stamp and trace middlewares.

* observability: BackendTrace ring buffer stores full request bodies
  (capped), MITM proxy emits structured trace events, and router
  classifier decisions surface at /api/router/decide.

* gallery: Arch-Router-1.5B (Q4_K_M and Q8_0).

* UI: cloud-proxy model-editor fields, classifier system-prompt and
  score-normalization config, and a Traces page rendering request
  bodies.

Assisted-by: claude-code:claude-opus-4-7 [Read] [Edit] [Bash]

Signed-off-by: Richard Palethorpe <io@richiejp.com>
2026-05-25 09:28:27 +02:00

204 lines
7.6 KiB
Go

package router
import (
"context"
"fmt"
"slices"
"strings"
"time"
"github.com/mudler/LocalAI/core/config"
)
// CandidateLoader resolves a candidate's model name to its parsed
// ModelConfig. The router calls it after MatchCandidate to load the
// resolved target so the caller (HTTP middleware or realtime handler)
// can dispatch against it.
//
// Defined as a function value rather than tied to *config.ModelConfigLoader
// so callers in tests can stub it without spinning up a real loader.
type CandidateLoader func(name string) (*config.ModelConfig, error)
// ResolveResult is the output of Resolve. It captures everything a
// caller needs to (a) dispatch the request against the chosen candidate,
// (b) record an audit row, and (c) decide whether to fall back to the
// classifier-error path.
type ResolveResult struct {
// RouterModel is the router config's own name (the model the client asked for).
RouterModel string
// ChosenModel is the candidate the classifier picked, or
// cfg.Router.Fallback when the classifier errored / no candidate
// covered the active labels.
ChosenModel string
// ChosenConfig is the loaded ModelConfig for ChosenModel. The
// caller dispatches against this — it has the right backend,
// pipeline, etc.
ChosenConfig *config.ModelConfig
// Decision carries the classifier's labels/score/latency/cache
// info. When UsedFallback is true, Decision.Labels is
// []string{LabelFallback}.
Decision Decision
// Labels are the labels recorded against this decision — either
// Decision.Labels or []string{LabelFallback} when the classifier
// failed. Pulled out so callers don't have to special-case the
// fallback path.
Labels []string
// ClassifierName is the Name() of the classifier that produced the
// decision, or LabelFallback when classifier setup itself failed
// and the fallback path ran without a working classifier.
ClassifierName string
// UsedFallback is true when the result came from cfg.Router.Fallback
// rather than a classifier-picked candidate (classifier
// build/Classify error or no candidate covered the active labels).
UsedFallback bool
}
// Resolve runs the full classify → match → load pipeline for a router
// model config. It is transport-agnostic: callers pass a built
// classifier, a candidate loader, and a probe; Resolve returns a
// ResolveResult or an error if the resolved config violates invariants
// or the fallback can't be loaded.
//
// Errors returned here are *terminal* — the caller should surface them
// to the client. Classifier-error fallbacks are non-terminal and folded
// into ResolveResult.UsedFallback.
//
// classifier may be nil; that signals "classifier build failed" and
// pushes resolution straight to the fallback path (mirrors the
// classifier-build-error branch in the historical RouteModel middleware).
func Resolve(ctx context.Context, routerCfg *config.ModelConfig, classifier Classifier, loader CandidateLoader, probe Probe) (*ResolveResult, error) {
if routerCfg == nil || !routerCfg.HasRouter() {
return nil, fmt.Errorf("router.Resolve: config has no router block")
}
if classifier == nil {
return resolveFallback(routerCfg, loader, Decision{}, LabelFallback, "classifier unavailable")
}
start := time.Now()
decision, err := classifier.Classify(ctx, probe)
if err != nil {
return resolveFallback(routerCfg, loader, Decision{Latency: time.Since(start)}, classifier.Name(), "classifier error: "+err.Error())
}
candidate := MatchCandidate(routerCfg.Router.Candidates, decision.Labels)
if candidate == "" {
return resolveFallback(routerCfg, loader, decision, classifier.Name(), "no candidate covers labels: "+strings.Join(decision.Labels, ","))
}
candidateCfg, err := loader(candidate)
if err != nil || candidateCfg == nil {
return nil, fmt.Errorf("router candidate %q not loadable: %w", candidate, err)
}
if candidateCfg.HasRouter() {
return nil, fmt.Errorf("router candidate %q is itself a router (depth-1 invariant)", candidate)
}
return &ResolveResult{
RouterModel: routerCfg.Name,
ChosenModel: candidate,
ChosenConfig: candidateCfg,
Decision: decision,
Labels: decision.Labels,
ClassifierName: classifier.Name(),
UsedFallback: false,
}, nil
}
// resolveFallback handles the three failure modes that fall through to
// cfg.Router.Fallback: classifier build failed, Classify returned an
// error, or no candidate covered the active labels. Returns an error
// when no fallback is configured — those translate to 503/500 at the
// HTTP layer.
//
// reason is included in the wrapped error for debugging; it's not
// surfaced to the client.
func resolveFallback(routerCfg *config.ModelConfig, loader CandidateLoader, decision Decision, classifierName, reason string) (*ResolveResult, error) {
if routerCfg.Router.Fallback == "" {
return nil, fmt.Errorf("router: %s and no fallback configured", reason)
}
candidateCfg, err := loader(routerCfg.Router.Fallback)
if err != nil || candidateCfg == nil {
return nil, fmt.Errorf("router fallback %q not loadable: %w", routerCfg.Router.Fallback, err)
}
if candidateCfg.HasRouter() {
return nil, fmt.Errorf("router fallback %q is itself a router (depth-1 invariant)", routerCfg.Router.Fallback)
}
decision.Labels = []string{LabelFallback}
return &ResolveResult{
RouterModel: routerCfg.Name,
ChosenModel: routerCfg.Router.Fallback,
ChosenConfig: candidateCfg,
Decision: decision,
Labels: []string{LabelFallback},
ClassifierName: classifierName,
UsedFallback: true,
}, nil
}
// ToDecisionRecord projects a ResolveResult into the persisted
// DecisionRecord shape. Centralised so the chat-side recordHTTPDecision
// and the realtime-side recorder can't drift in which Decision fields
// they copy through — a new field added to Decision only needs to be
// remembered here, not at every call site.
//
// id, correlationID, userID, and source are caller-supplied because
// they originate outside the routing pipeline (request ID generator,
// auth, entry-point dispatch).
func (r *ResolveResult) ToDecisionRecord(id, correlationID, userID, source string) DecisionRecord {
return DecisionRecord{
ID: id,
CorrelationID: correlationID,
UserID: userID,
RouterModel: r.RouterModel,
RequestedModel: r.RouterModel,
ServedModel: r.ChosenModel,
Classifier: r.ClassifierName,
Label: strings.Join(r.Labels, ","),
Score: r.Decision.Score,
LatencyMs: r.Decision.Latency.Milliseconds(),
Cached: r.Decision.Cached,
CacheSimilarity: r.Decision.CacheSimilarity,
LabelScores: r.Decision.LabelScores,
ActivationThreshold: r.Decision.ActivationThreshold,
Source: source,
CreatedAt: time.Now().UTC(),
}
}
// MatchCandidate picks the FIRST candidate whose Labels are a
// superset of the active label set. Admins order the candidates list
// smallest → largest, so a request that needs one label routes to
// the smallest capable model and one that needs multiple falls to
// the first bigger candidate that covers them all. Returns empty
// string when no candidate matches; the caller falls back.
func MatchCandidate(candidates []config.RouterCandidate, active []string) string {
if len(active) == 0 {
return ""
}
for _, c := range candidates {
if labelSetCovers(c.Labels, active) {
return c.Model
}
}
return ""
}
// labelSetCovers returns true when every element of needed appears
// in have. Label sets are typically <10 entries so the linear scan
// is fine.
func labelSetCovers(have, needed []string) bool {
for _, n := range needed {
if !slices.Contains(have, n) {
return false
}
}
return true
}