mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-30 03:25:42 -04:00
Add a routing middleware stack and a cloud-proxy backend. * cloud-proxy: a Go gRPC backend that forwards OpenAI- and Anthropic-shaped chat requests to upstream providers, with an optional translate mode (OpenAI request -> Anthropic /v1/messages -> OpenAI response) and full tool-calling support. * routing: admission control, content-aware model routing (embedding cache + classifier + rerank + Arch-Router score), PII detection/redaction (regex + NER) with streaming filter and OpenAI/Anthropic adapters, and a per-user/per-key billing recorder backed by GORM or in-memory storage. * middleware: UsageMiddleware records usage via the billing recorder, plus admission, route-model, usage-stamp and trace middlewares. * observability: BackendTrace ring buffer stores full request bodies (capped), MITM proxy emits structured trace events, and router classifier decisions surface at /api/router/decide. * gallery: Arch-Router-1.5B (Q4_K_M and Q8_0). * UI: cloud-proxy model-editor fields, classifier system-prompt and score-normalization config, and a Traces page rendering request bodies. Assisted-by: claude-code:claude-opus-4-7 [Read] [Edit] [Bash] Signed-off-by: Richard Palethorpe <io@richiejp.com>
64 lines
2.1 KiB
Go
64 lines
2.1 KiB
Go
package backend
|
|
|
|
import (
|
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
|
|
. "github.com/onsi/ginkgo/v2"
|
|
. "github.com/onsi/gomega"
|
|
)
|
|
|
|
var _ = Describe("scoreResponseToCandidates", func() {
|
|
It("returns nil for a nil response", func() {
|
|
Expect(scoreResponseToCandidates(nil, false)).To(BeNil())
|
|
})
|
|
|
|
It("returns an empty slice when the response has no candidates", func() {
|
|
Expect(scoreResponseToCandidates(&pb.ScoreResponse{}, false)).To(BeEmpty())
|
|
})
|
|
|
|
It("copies LogProb / LengthNormalizedLogProb / NumTokens for every candidate", func() {
|
|
resp := &pb.ScoreResponse{Candidates: []*pb.CandidateScore{
|
|
{LogProb: -2.0, LengthNormalizedLogProb: -1.0, NumTokens: 2},
|
|
{LogProb: -7.5, LengthNormalizedLogProb: -1.5, NumTokens: 5},
|
|
}}
|
|
got := scoreResponseToCandidates(resp, false)
|
|
Expect(got).To(HaveLen(2))
|
|
Expect(got[0].LogProb).To(Equal(-2.0))
|
|
Expect(got[0].LengthNormalizedLogProb).To(Equal(-1.0))
|
|
Expect(got[0].NumTokens).To(Equal(2))
|
|
Expect(got[1].LogProb).To(Equal(-7.5))
|
|
Expect(got[1].NumTokens).To(Equal(5))
|
|
})
|
|
|
|
It("omits per-token detail when includeTokens=false even if the wire response carries it", func() {
|
|
// Defensive: if the backend over-reports we still respect the
|
|
// caller's opt-in so consumers don't pay marshaling for data
|
|
// they didn't ask for.
|
|
resp := &pb.ScoreResponse{Candidates: []*pb.CandidateScore{{
|
|
LogProb: -1.0,
|
|
Tokens: []*pb.TokenLogProb{{Token: "hi", LogProb: -1.0}},
|
|
}}}
|
|
got := scoreResponseToCandidates(resp, false)
|
|
Expect(got).To(HaveLen(1))
|
|
Expect(got[0].Tokens).To(BeNil())
|
|
})
|
|
|
|
It("populates per-token detail when includeTokens=true", func() {
|
|
resp := &pb.ScoreResponse{Candidates: []*pb.CandidateScore{{
|
|
LogProb: -3.0,
|
|
NumTokens: 2,
|
|
Tokens: []*pb.TokenLogProb{
|
|
{Token: "Hello", LogProb: -1.0},
|
|
{Token: " world", LogProb: -2.0},
|
|
},
|
|
}}}
|
|
got := scoreResponseToCandidates(resp, true)
|
|
Expect(got).To(HaveLen(1))
|
|
Expect(got[0].Tokens).To(HaveLen(2))
|
|
Expect(got[0].Tokens[0].Token).To(Equal("Hello"))
|
|
Expect(got[0].Tokens[0].LogProb).To(Equal(-1.0))
|
|
Expect(got[0].Tokens[1].Token).To(Equal(" world"))
|
|
Expect(got[0].Tokens[1].LogProb).To(Equal(-2.0))
|
|
})
|
|
})
|