mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-30 11:36:31 -04:00
Add a routing middleware stack and a cloud-proxy backend. * cloud-proxy: a Go gRPC backend that forwards OpenAI- and Anthropic-shaped chat requests to upstream providers, with an optional translate mode (OpenAI request -> Anthropic /v1/messages -> OpenAI response) and full tool-calling support. * routing: admission control, content-aware model routing (embedding cache + classifier + rerank + Arch-Router score), PII detection/redaction (regex + NER) with streaming filter and OpenAI/Anthropic adapters, and a per-user/per-key billing recorder backed by GORM or in-memory storage. * middleware: UsageMiddleware records usage via the billing recorder, plus admission, route-model, usage-stamp and trace middlewares. * observability: BackendTrace ring buffer stores full request bodies (capped), MITM proxy emits structured trace events, and router classifier decisions surface at /api/router/decide. * gallery: Arch-Router-1.5B (Q4_K_M and Q8_0). * UI: cloud-proxy model-editor fields, classifier system-prompt and score-normalization config, and a Traces page rendering request bodies. Assisted-by: claude-code:claude-opus-4-7 [Read] [Edit] [Bash] Signed-off-by: Richard Palethorpe <io@richiejp.com>
269 lines
11 KiB
Go
269 lines
11 KiB
Go
package e2e_test
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"io"
|
|
"net/http"
|
|
"strings"
|
|
|
|
"github.com/openai/openai-go/v3"
|
|
"github.com/openai/openai-go/v3/option"
|
|
|
|
. "github.com/onsi/ginkgo/v2"
|
|
. "github.com/onsi/gomega"
|
|
)
|
|
|
|
// Cloud-proxy e2e tests drive real HTTP requests through LocalAI ->
|
|
// cloud-proxy backend (separate process) -> fake upstream httptest
|
|
// server. The whole pipeline is exercised: chat handler dispatch,
|
|
// gRPC client/server, cloud-proxy translation, upstream call,
|
|
// response forwarding back to the client.
|
|
var _ = Describe("Cloud-proxy backend E2E", func() {
|
|
BeforeEach(func() {
|
|
if cloudProxyPath == "" {
|
|
Skip("cloud-proxy backend binary not built (make build-cloud-proxy-backend)")
|
|
}
|
|
// Reset upstream scripts + counters between specs so a previous
|
|
// spec's hits don't leak in. The default script is restored by
|
|
// each spec that needs a custom one.
|
|
cpOpenAIUpstream.SetScript(defaultOpenAIScript)
|
|
cpAnthropicUpstream.SetScript(defaultAnthropicScript)
|
|
})
|
|
|
|
Context("Passthrough mode — OpenAI shape", func() {
|
|
It("forwards a chat completion request verbatim and pipes the response back", func() {
|
|
cpOpenAIUpstream.SetScript(func([]byte) (int, string, string) {
|
|
return 200, `{"id":"resp-pt","choices":[{"index":0,"message":{"role":"assistant","content":"hi via passthrough"},"finish_reason":"stop"}],"usage":{"prompt_tokens":4,"completion_tokens":3,"total_tokens":7}}`, "application/json"
|
|
})
|
|
|
|
cp := openai.NewClient(option.WithBaseURL(apiURL))
|
|
resp, err := cp.Chat.Completions.New(context.TODO(), openai.ChatCompletionNewParams{
|
|
Model: "cp-passthrough-openai",
|
|
Messages: []openai.ChatCompletionMessageParamUnion{
|
|
openai.UserMessage("hello"),
|
|
},
|
|
})
|
|
Expect(err).NotTo(HaveOccurred())
|
|
Expect(resp.Choices).NotTo(BeEmpty())
|
|
Expect(resp.Choices[0].Message.Content).To(Equal("hi via passthrough"))
|
|
|
|
// Upstream observed an Authorization header sourced from
|
|
// the api_key_env we set at suite startup.
|
|
_, _, hdr, _ := cpOpenAIUpstream.Snapshot()
|
|
Expect(hdr.Get("Authorization")).To(Equal("Bearer sk-e2e-openai"))
|
|
// Body field assertions prove the wire format wasn't
|
|
// rewritten — passthrough mode shouldn't touch tools,
|
|
// messages, etc.
|
|
body := cpOpenAIUpstream.DecodedBody()
|
|
Expect(body["messages"]).NotTo(BeNil())
|
|
})
|
|
})
|
|
|
|
Context("Passthrough mode — Anthropic shape", func() {
|
|
It("forwards an Anthropic Messages request with x-api-key + anthropic-version", func() {
|
|
cpAnthropicUpstream.SetScript(func([]byte) (int, string, string) {
|
|
return 200, `{"id":"msg-pt","type":"message","role":"assistant","content":[{"type":"text","text":"hi via passthrough anthropic"}],"model":"claude","usage":{"input_tokens":4,"output_tokens":6}}`, "application/json"
|
|
})
|
|
|
|
// Anthropic SDK omitted to keep the test self-contained;
|
|
// raw POST exercises the same path. The Anthropic endpoint
|
|
// is /v1/messages on LocalAI.
|
|
reqBody := `{"model":"cp-passthrough-anthropic","max_tokens":64,"messages":[{"role":"user","content":"hello"}]}`
|
|
httpResp, err := http.Post(anthropicBaseURL+"/v1/messages", "application/json", strings.NewReader(reqBody))
|
|
Expect(err).NotTo(HaveOccurred())
|
|
defer func() { _ = httpResp.Body.Close() }()
|
|
Expect(httpResp.StatusCode).To(Equal(200))
|
|
|
|
respBody, _ := io.ReadAll(httpResp.Body)
|
|
Expect(string(respBody)).To(ContainSubstring("hi via passthrough anthropic"))
|
|
|
|
_, _, hdr, _ := cpAnthropicUpstream.Snapshot()
|
|
Expect(hdr.Get("x-api-key")).To(Equal("sk-ant-e2e"))
|
|
Expect(hdr.Get("anthropic-version")).NotTo(BeEmpty())
|
|
Expect(hdr.Get("Authorization")).To(BeEmpty(), "Authorization leaked on anthropic backend")
|
|
})
|
|
})
|
|
|
|
Context("Translate mode — OpenAI provider", func() {
|
|
// The chat handler only emits tool_calls in the response when
|
|
// the client asked for tools. The translate backend forwards
|
|
// whatever the upstream returns, but the endpoint-level
|
|
// assembly is gated on the request shape — same as for local
|
|
// models. The e2e tests therefore declare tools on the
|
|
// outbound request so the response-side assembly fires.
|
|
toolsParam := []openai.ChatCompletionToolUnionParam{
|
|
openai.ChatCompletionFunctionTool(openai.FunctionDefinitionParam{
|
|
Name: "lookup",
|
|
Description: openai.String("look something up"),
|
|
Parameters: openai.FunctionParameters{
|
|
"type": "object",
|
|
"properties": map[string]any{
|
|
"q": map[string]any{"type": "string"},
|
|
},
|
|
},
|
|
}),
|
|
}
|
|
|
|
It("delivers tool_calls in the chat completion response", func() {
|
|
cpOpenAIUpstream.SetScript(func([]byte) (int, string, string) {
|
|
return nonStreamingOpenAIToolCallScript()
|
|
})
|
|
|
|
cp := openai.NewClient(option.WithBaseURL(apiURL))
|
|
resp, err := cp.Chat.Completions.New(context.TODO(), openai.ChatCompletionNewParams{
|
|
Model: "cp-translate-openai",
|
|
Messages: []openai.ChatCompletionMessageParamUnion{
|
|
openai.UserMessage("find clouds"),
|
|
},
|
|
Tools: toolsParam,
|
|
})
|
|
Expect(err).NotTo(HaveOccurred())
|
|
Expect(resp.Choices).NotTo(BeEmpty())
|
|
tcs := resp.Choices[0].Message.ToolCalls
|
|
Expect(tcs).To(HaveLen(1), "tool_calls should survive translate-mode round-trip")
|
|
Expect(tcs[0].Function.Name).To(Equal("lookup"))
|
|
Expect(tcs[0].Function.Arguments).To(ContainSubstring(`"q":"clouds"`))
|
|
// Token usage propagated from upstream.
|
|
Expect(resp.Usage.PromptTokens).To(BeNumerically(">", 0))
|
|
})
|
|
|
|
It("streams tool_call deltas through SSE", func() {
|
|
cpOpenAIUpstream.SetScript(func([]byte) (int, string, string) {
|
|
return streamingOpenAIToolCallScript()
|
|
})
|
|
|
|
cp := openai.NewClient(option.WithBaseURL(apiURL))
|
|
stream := cp.Chat.Completions.NewStreaming(context.TODO(), openai.ChatCompletionNewParams{
|
|
Model: "cp-translate-openai",
|
|
Messages: []openai.ChatCompletionMessageParamUnion{
|
|
openai.UserMessage("what's the weather in SF?"),
|
|
},
|
|
Tools: []openai.ChatCompletionToolUnionParam{
|
|
openai.ChatCompletionFunctionTool(openai.FunctionDefinitionParam{
|
|
Name: "get_weather",
|
|
Description: openai.String("look up the weather"),
|
|
Parameters: openai.FunctionParameters{
|
|
"type": "object",
|
|
"properties": map[string]any{
|
|
"location": map[string]any{"type": "string"},
|
|
},
|
|
},
|
|
}),
|
|
},
|
|
})
|
|
|
|
var toolID, toolName string
|
|
var args strings.Builder
|
|
for stream.Next() {
|
|
chunk := stream.Current()
|
|
for _, ch := range chunk.Choices {
|
|
for _, tc := range ch.Delta.ToolCalls {
|
|
if tc.ID != "" {
|
|
toolID = tc.ID
|
|
}
|
|
if tc.Function.Name != "" {
|
|
toolName = tc.Function.Name
|
|
}
|
|
args.WriteString(tc.Function.Arguments)
|
|
}
|
|
}
|
|
}
|
|
Expect(stream.Err()).NotTo(HaveOccurred())
|
|
Expect(toolID).To(Equal("call_e2e"))
|
|
Expect(toolName).To(Equal("get_weather"))
|
|
// Argument fragments assembled in order.
|
|
var parsed map[string]any
|
|
Expect(json.Unmarshal([]byte(args.String()), &parsed)).To(Succeed())
|
|
Expect(parsed["location"]).To(Equal("SF"))
|
|
})
|
|
})
|
|
|
|
Context("Translate mode — Anthropic provider", func() {
|
|
It("preserves tool_use blocks through Messages API", func() {
|
|
cpAnthropicUpstream.SetScript(func([]byte) (int, string, string) {
|
|
return 200, `{"id":"msg-tu","type":"message","role":"assistant","content":[{"type":"text","text":"Let me check"},{"type":"tool_use","id":"toolu_e2e","name":"weather","input":{"location":"SF"}}],"model":"claude","usage":{"input_tokens":7,"output_tokens":12}}`, "application/json"
|
|
})
|
|
|
|
// Anthropic Messages endpoint exposes tool_use blocks
|
|
// directly. Raw POST + JSON decode keeps the test
|
|
// independent of any specific SDK version's accessor API.
|
|
// Tools declared on the request so the response-side
|
|
// assembly populates the tool_use blocks (same gate as
|
|
// for local models).
|
|
reqBody := `{"model":"cp-translate-anthropic","max_tokens":64,"messages":[{"role":"user","content":"what's the weather?"}],"tools":[{"name":"weather","description":"weather lookup","input_schema":{"type":"object","properties":{"location":{"type":"string"}}}}]}`
|
|
httpResp, err := http.Post(anthropicBaseURL+"/v1/messages", "application/json", strings.NewReader(reqBody))
|
|
Expect(err).NotTo(HaveOccurred())
|
|
defer func() { _ = httpResp.Body.Close() }()
|
|
Expect(httpResp.StatusCode).To(Equal(200))
|
|
|
|
var decoded map[string]any
|
|
Expect(json.NewDecoder(httpResp.Body).Decode(&decoded)).To(Succeed())
|
|
contentArr, ok := decoded["content"].([]any)
|
|
Expect(ok).To(BeTrue(), "response must carry content array")
|
|
var sawToolUse bool
|
|
for _, block := range contentArr {
|
|
m := block.(map[string]any)
|
|
if m["type"] == "tool_use" {
|
|
sawToolUse = true
|
|
Expect(m["name"]).To(Equal("weather"))
|
|
// Anthropic content-block assembly synthesizes
|
|
// tool_use IDs from the LocalAI request ID rather
|
|
// than passing through the upstream's toolu_* ID
|
|
// (see messages.go:253-267). Documenting the
|
|
// current behavior — the synthesized ID still
|
|
// follows the toolu_ prefix convention so SDK
|
|
// validation passes.
|
|
id, _ := m["id"].(string)
|
|
Expect(id).To(HavePrefix("toolu_"))
|
|
input, _ := m["input"].(map[string]any)
|
|
Expect(input["location"]).To(Equal("SF"))
|
|
}
|
|
}
|
|
Expect(sawToolUse).To(BeTrue(), "tool_use block must survive translate-mode round-trip")
|
|
})
|
|
})
|
|
|
|
Context("Translate mode + PII filter", func() {
|
|
It("applies the streaming PII filter to translate-mode content", func() {
|
|
// Default PII config redacts email addresses. Split the
|
|
// email across two SSE deltas so the filter has to buffer
|
|
// the partial match — proves the streaming filter is wired
|
|
// up in translate mode, not just passthrough.
|
|
cpOpenAIUpstream.SetScript(func([]byte) (int, string, string) {
|
|
return emailLeakOpenAIStreamingScript()
|
|
})
|
|
|
|
cp := openai.NewClient(option.WithBaseURL(apiURL))
|
|
stream := cp.Chat.Completions.NewStreaming(context.TODO(), openai.ChatCompletionNewParams{
|
|
Model: "cp-translate-openai",
|
|
Messages: []openai.ChatCompletionMessageParamUnion{
|
|
openai.UserMessage("share contact info"),
|
|
},
|
|
})
|
|
|
|
var assembled strings.Builder
|
|
for stream.Next() {
|
|
for _, ch := range stream.Current().Choices {
|
|
assembled.WriteString(ch.Delta.Content)
|
|
}
|
|
}
|
|
Expect(stream.Err()).NotTo(HaveOccurred())
|
|
out := assembled.String()
|
|
// If PII is wired up, the email is redacted before reaching
|
|
// the client. If not, "alice@example.com" leaks through.
|
|
// This is the lock-in test for gap #3.
|
|
Expect(out).NotTo(ContainSubstring("alice@example.com"),
|
|
"email leaked through translate-mode stream — PII filter not applied")
|
|
})
|
|
})
|
|
})
|
|
|
|
func defaultOpenAIScript([]byte) (int, string, string) {
|
|
return 200, `{"id":"chatcmpl-default","choices":[{"index":0,"message":{"role":"assistant","content":"default openai reply"},"finish_reason":"stop"}],"usage":{"prompt_tokens":1,"completion_tokens":1,"total_tokens":2}}`, "application/json"
|
|
}
|
|
|
|
func defaultAnthropicScript([]byte) (int, string, string) {
|
|
return 200, `{"id":"msg-default","type":"message","role":"assistant","content":[{"type":"text","text":"default anthropic reply"}],"model":"claude","usage":{"input_tokens":1,"output_tokens":1}}`, "application/json"
|
|
}
|