mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-02 05:06:34 -04:00
Add a routing middleware stack and a cloud-proxy backend. * cloud-proxy: a Go gRPC backend that forwards OpenAI- and Anthropic-shaped chat requests to upstream providers, with an optional translate mode (OpenAI request -> Anthropic /v1/messages -> OpenAI response) and full tool-calling support. * routing: admission control, content-aware model routing (embedding cache + classifier + rerank + Arch-Router score), PII detection/redaction (regex + NER) with streaming filter and OpenAI/Anthropic adapters, and a per-user/per-key billing recorder backed by GORM or in-memory storage. * middleware: UsageMiddleware records usage via the billing recorder, plus admission, route-model, usage-stamp and trace middlewares. * observability: BackendTrace ring buffer stores full request bodies (capped), MITM proxy emits structured trace events, and router classifier decisions surface at /api/router/decide. * gallery: Arch-Router-1.5B (Q4_K_M and Q8_0). * UI: cloud-proxy model-editor fields, classifier system-prompt and score-normalization config, and a Traces page rendering request bodies. Assisted-by: claude-code:claude-opus-4-7 [Read] [Edit] [Bash] Signed-off-by: Richard Palethorpe <io@richiejp.com>
97 lines
2.4 KiB
Go
97 lines
2.4 KiB
Go
package router
|
|
|
|
import (
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
)
|
|
|
|
// labelSetCache memoises classifier output (a sorted active-label set)
|
|
// keyed on the case-folded, whitespace-trimmed prompt. Both Score and
|
|
// Rerank classifiers embed one.
|
|
//
|
|
// Eviction is naive (drop one arbitrary entry on overflow); the cache
|
|
// is a hot-prompt amortiser, not a long-tail store, so LRU semantics
|
|
// aren't worth the extra bookkeeping. Cap=0 disables the cache.
|
|
type labelSetCache struct {
|
|
mu sync.RWMutex
|
|
store map[string][]string
|
|
cap int
|
|
}
|
|
|
|
func newLabelSetCache(size int) *labelSetCache {
|
|
if size < 0 {
|
|
size = 0
|
|
}
|
|
return &labelSetCache{store: make(map[string][]string, size), cap: size}
|
|
}
|
|
|
|
// cacheKey normalises a prompt for cache equality. Callers can compute
|
|
// it once at the top of Classify and pass it to both get and put to
|
|
// save the second TrimSpace+ToLower allocation on a miss.
|
|
func cacheKey(prompt string) string {
|
|
return strings.ToLower(strings.TrimSpace(prompt))
|
|
}
|
|
|
|
func (c *labelSetCache) get(key string) ([]string, bool) {
|
|
if c.cap == 0 {
|
|
return nil, false
|
|
}
|
|
c.mu.RLock()
|
|
defer c.mu.RUnlock()
|
|
v, ok := c.store[key]
|
|
return v, ok
|
|
}
|
|
|
|
func (c *labelSetCache) put(key string, labels []string) {
|
|
if c.cap == 0 {
|
|
return
|
|
}
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
if len(c.store) >= c.cap {
|
|
for k := range c.store {
|
|
delete(c.store, k)
|
|
break
|
|
}
|
|
}
|
|
// Defensive copy + sort: cached label sets must be stable so
|
|
// callers can't mutate via aliasing, and equality comparisons
|
|
// in tests don't depend on insertion order.
|
|
cp := make([]string, len(labels))
|
|
copy(cp, labels)
|
|
sort.Strings(cp)
|
|
c.store[key] = cp
|
|
}
|
|
|
|
func (c *labelSetCache) len() int {
|
|
c.mu.RLock()
|
|
defer c.mu.RUnlock()
|
|
return len(c.store)
|
|
}
|
|
|
|
// selectActive picks the labels whose corresponding score clears
|
|
// threshold, plus the index of the argmax. If no label clears the
|
|
// threshold the caller falls back to the argmax — both classifiers
|
|
// guarantee a non-empty active set so the surrounding middleware
|
|
// always has something to route on. Returns nil active when labels
|
|
// is empty.
|
|
func selectActive(scores []float64, labels []string, threshold float64) (active []string, bestIdx int) {
|
|
if len(labels) == 0 {
|
|
return nil, 0
|
|
}
|
|
active = make([]string, 0, 2)
|
|
for i, s := range scores {
|
|
if s > scores[bestIdx] {
|
|
bestIdx = i
|
|
}
|
|
if s >= threshold {
|
|
active = append(active, labels[i])
|
|
}
|
|
}
|
|
if len(active) == 0 {
|
|
active = []string{labels[bestIdx]}
|
|
}
|
|
return active, bestIdx
|
|
}
|