Files
LocalAI/tests/e2e/e2e_cloud_proxy_test.go
Richard Palethorpe 6a80e23733 feat(middleware): Model routing, PII filtering, Cloud model proxies (#9802)
Add a routing middleware stack and a cloud-proxy backend.

* cloud-proxy: a Go gRPC backend that forwards OpenAI- and
  Anthropic-shaped chat requests to upstream providers, with an
  optional translate mode (OpenAI request -> Anthropic /v1/messages
  -> OpenAI response) and full tool-calling support.

* routing: admission control, content-aware model routing
  (embedding cache + classifier + rerank + Arch-Router score),
  PII detection/redaction (regex + NER) with streaming filter and
  OpenAI/Anthropic adapters, and a per-user/per-key billing recorder
  backed by GORM or in-memory storage.

* middleware: UsageMiddleware records usage via the billing recorder,
  plus admission, route-model, usage-stamp and trace middlewares.

* observability: BackendTrace ring buffer stores full request bodies
  (capped), MITM proxy emits structured trace events, and router
  classifier decisions surface at /api/router/decide.

* gallery: Arch-Router-1.5B (Q4_K_M and Q8_0).

* UI: cloud-proxy model-editor fields, classifier system-prompt and
  score-normalization config, and a Traces page rendering request
  bodies.

Assisted-by: claude-code:claude-opus-4-7 [Read] [Edit] [Bash]

Signed-off-by: Richard Palethorpe <io@richiejp.com>
2026-05-25 09:28:27 +02:00

269 lines
11 KiB
Go

package e2e_test
import (
"context"
"encoding/json"
"io"
"net/http"
"strings"
"github.com/openai/openai-go/v3"
"github.com/openai/openai-go/v3/option"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
// Cloud-proxy e2e tests drive real HTTP requests through LocalAI ->
// cloud-proxy backend (separate process) -> fake upstream httptest
// server. The whole pipeline is exercised: chat handler dispatch,
// gRPC client/server, cloud-proxy translation, upstream call,
// response forwarding back to the client.
var _ = Describe("Cloud-proxy backend E2E", func() {
BeforeEach(func() {
if cloudProxyPath == "" {
Skip("cloud-proxy backend binary not built (make build-cloud-proxy-backend)")
}
// Reset upstream scripts + counters between specs so a previous
// spec's hits don't leak in. The default script is restored by
// each spec that needs a custom one.
cpOpenAIUpstream.SetScript(defaultOpenAIScript)
cpAnthropicUpstream.SetScript(defaultAnthropicScript)
})
Context("Passthrough mode — OpenAI shape", func() {
It("forwards a chat completion request verbatim and pipes the response back", func() {
cpOpenAIUpstream.SetScript(func([]byte) (int, string, string) {
return 200, `{"id":"resp-pt","choices":[{"index":0,"message":{"role":"assistant","content":"hi via passthrough"},"finish_reason":"stop"}],"usage":{"prompt_tokens":4,"completion_tokens":3,"total_tokens":7}}`, "application/json"
})
cp := openai.NewClient(option.WithBaseURL(apiURL))
resp, err := cp.Chat.Completions.New(context.TODO(), openai.ChatCompletionNewParams{
Model: "cp-passthrough-openai",
Messages: []openai.ChatCompletionMessageParamUnion{
openai.UserMessage("hello"),
},
})
Expect(err).NotTo(HaveOccurred())
Expect(resp.Choices).NotTo(BeEmpty())
Expect(resp.Choices[0].Message.Content).To(Equal("hi via passthrough"))
// Upstream observed an Authorization header sourced from
// the api_key_env we set at suite startup.
_, _, hdr, _ := cpOpenAIUpstream.Snapshot()
Expect(hdr.Get("Authorization")).To(Equal("Bearer sk-e2e-openai"))
// Body field assertions prove the wire format wasn't
// rewritten — passthrough mode shouldn't touch tools,
// messages, etc.
body := cpOpenAIUpstream.DecodedBody()
Expect(body["messages"]).NotTo(BeNil())
})
})
Context("Passthrough mode — Anthropic shape", func() {
It("forwards an Anthropic Messages request with x-api-key + anthropic-version", func() {
cpAnthropicUpstream.SetScript(func([]byte) (int, string, string) {
return 200, `{"id":"msg-pt","type":"message","role":"assistant","content":[{"type":"text","text":"hi via passthrough anthropic"}],"model":"claude","usage":{"input_tokens":4,"output_tokens":6}}`, "application/json"
})
// Anthropic SDK omitted to keep the test self-contained;
// raw POST exercises the same path. The Anthropic endpoint
// is /v1/messages on LocalAI.
reqBody := `{"model":"cp-passthrough-anthropic","max_tokens":64,"messages":[{"role":"user","content":"hello"}]}`
httpResp, err := http.Post(anthropicBaseURL+"/v1/messages", "application/json", strings.NewReader(reqBody))
Expect(err).NotTo(HaveOccurred())
defer func() { _ = httpResp.Body.Close() }()
Expect(httpResp.StatusCode).To(Equal(200))
respBody, _ := io.ReadAll(httpResp.Body)
Expect(string(respBody)).To(ContainSubstring("hi via passthrough anthropic"))
_, _, hdr, _ := cpAnthropicUpstream.Snapshot()
Expect(hdr.Get("x-api-key")).To(Equal("sk-ant-e2e"))
Expect(hdr.Get("anthropic-version")).NotTo(BeEmpty())
Expect(hdr.Get("Authorization")).To(BeEmpty(), "Authorization leaked on anthropic backend")
})
})
Context("Translate mode — OpenAI provider", func() {
// The chat handler only emits tool_calls in the response when
// the client asked for tools. The translate backend forwards
// whatever the upstream returns, but the endpoint-level
// assembly is gated on the request shape — same as for local
// models. The e2e tests therefore declare tools on the
// outbound request so the response-side assembly fires.
toolsParam := []openai.ChatCompletionToolUnionParam{
openai.ChatCompletionFunctionTool(openai.FunctionDefinitionParam{
Name: "lookup",
Description: openai.String("look something up"),
Parameters: openai.FunctionParameters{
"type": "object",
"properties": map[string]any{
"q": map[string]any{"type": "string"},
},
},
}),
}
It("delivers tool_calls in the chat completion response", func() {
cpOpenAIUpstream.SetScript(func([]byte) (int, string, string) {
return nonStreamingOpenAIToolCallScript()
})
cp := openai.NewClient(option.WithBaseURL(apiURL))
resp, err := cp.Chat.Completions.New(context.TODO(), openai.ChatCompletionNewParams{
Model: "cp-translate-openai",
Messages: []openai.ChatCompletionMessageParamUnion{
openai.UserMessage("find clouds"),
},
Tools: toolsParam,
})
Expect(err).NotTo(HaveOccurred())
Expect(resp.Choices).NotTo(BeEmpty())
tcs := resp.Choices[0].Message.ToolCalls
Expect(tcs).To(HaveLen(1), "tool_calls should survive translate-mode round-trip")
Expect(tcs[0].Function.Name).To(Equal("lookup"))
Expect(tcs[0].Function.Arguments).To(ContainSubstring(`"q":"clouds"`))
// Token usage propagated from upstream.
Expect(resp.Usage.PromptTokens).To(BeNumerically(">", 0))
})
It("streams tool_call deltas through SSE", func() {
cpOpenAIUpstream.SetScript(func([]byte) (int, string, string) {
return streamingOpenAIToolCallScript()
})
cp := openai.NewClient(option.WithBaseURL(apiURL))
stream := cp.Chat.Completions.NewStreaming(context.TODO(), openai.ChatCompletionNewParams{
Model: "cp-translate-openai",
Messages: []openai.ChatCompletionMessageParamUnion{
openai.UserMessage("what's the weather in SF?"),
},
Tools: []openai.ChatCompletionToolUnionParam{
openai.ChatCompletionFunctionTool(openai.FunctionDefinitionParam{
Name: "get_weather",
Description: openai.String("look up the weather"),
Parameters: openai.FunctionParameters{
"type": "object",
"properties": map[string]any{
"location": map[string]any{"type": "string"},
},
},
}),
},
})
var toolID, toolName string
var args strings.Builder
for stream.Next() {
chunk := stream.Current()
for _, ch := range chunk.Choices {
for _, tc := range ch.Delta.ToolCalls {
if tc.ID != "" {
toolID = tc.ID
}
if tc.Function.Name != "" {
toolName = tc.Function.Name
}
args.WriteString(tc.Function.Arguments)
}
}
}
Expect(stream.Err()).NotTo(HaveOccurred())
Expect(toolID).To(Equal("call_e2e"))
Expect(toolName).To(Equal("get_weather"))
// Argument fragments assembled in order.
var parsed map[string]any
Expect(json.Unmarshal([]byte(args.String()), &parsed)).To(Succeed())
Expect(parsed["location"]).To(Equal("SF"))
})
})
Context("Translate mode — Anthropic provider", func() {
It("preserves tool_use blocks through Messages API", func() {
cpAnthropicUpstream.SetScript(func([]byte) (int, string, string) {
return 200, `{"id":"msg-tu","type":"message","role":"assistant","content":[{"type":"text","text":"Let me check"},{"type":"tool_use","id":"toolu_e2e","name":"weather","input":{"location":"SF"}}],"model":"claude","usage":{"input_tokens":7,"output_tokens":12}}`, "application/json"
})
// Anthropic Messages endpoint exposes tool_use blocks
// directly. Raw POST + JSON decode keeps the test
// independent of any specific SDK version's accessor API.
// Tools declared on the request so the response-side
// assembly populates the tool_use blocks (same gate as
// for local models).
reqBody := `{"model":"cp-translate-anthropic","max_tokens":64,"messages":[{"role":"user","content":"what's the weather?"}],"tools":[{"name":"weather","description":"weather lookup","input_schema":{"type":"object","properties":{"location":{"type":"string"}}}}]}`
httpResp, err := http.Post(anthropicBaseURL+"/v1/messages", "application/json", strings.NewReader(reqBody))
Expect(err).NotTo(HaveOccurred())
defer func() { _ = httpResp.Body.Close() }()
Expect(httpResp.StatusCode).To(Equal(200))
var decoded map[string]any
Expect(json.NewDecoder(httpResp.Body).Decode(&decoded)).To(Succeed())
contentArr, ok := decoded["content"].([]any)
Expect(ok).To(BeTrue(), "response must carry content array")
var sawToolUse bool
for _, block := range contentArr {
m := block.(map[string]any)
if m["type"] == "tool_use" {
sawToolUse = true
Expect(m["name"]).To(Equal("weather"))
// Anthropic content-block assembly synthesizes
// tool_use IDs from the LocalAI request ID rather
// than passing through the upstream's toolu_* ID
// (see messages.go:253-267). Documenting the
// current behavior — the synthesized ID still
// follows the toolu_ prefix convention so SDK
// validation passes.
id, _ := m["id"].(string)
Expect(id).To(HavePrefix("toolu_"))
input, _ := m["input"].(map[string]any)
Expect(input["location"]).To(Equal("SF"))
}
}
Expect(sawToolUse).To(BeTrue(), "tool_use block must survive translate-mode round-trip")
})
})
Context("Translate mode + PII filter", func() {
It("applies the streaming PII filter to translate-mode content", func() {
// Default PII config redacts email addresses. Split the
// email across two SSE deltas so the filter has to buffer
// the partial match — proves the streaming filter is wired
// up in translate mode, not just passthrough.
cpOpenAIUpstream.SetScript(func([]byte) (int, string, string) {
return emailLeakOpenAIStreamingScript()
})
cp := openai.NewClient(option.WithBaseURL(apiURL))
stream := cp.Chat.Completions.NewStreaming(context.TODO(), openai.ChatCompletionNewParams{
Model: "cp-translate-openai",
Messages: []openai.ChatCompletionMessageParamUnion{
openai.UserMessage("share contact info"),
},
})
var assembled strings.Builder
for stream.Next() {
for _, ch := range stream.Current().Choices {
assembled.WriteString(ch.Delta.Content)
}
}
Expect(stream.Err()).NotTo(HaveOccurred())
out := assembled.String()
// If PII is wired up, the email is redacted before reaching
// the client. If not, "alice@example.com" leaks through.
// This is the lock-in test for gap #3.
Expect(out).NotTo(ContainSubstring("alice@example.com"),
"email leaked through translate-mode stream — PII filter not applied")
})
})
})
func defaultOpenAIScript([]byte) (int, string, string) {
return 200, `{"id":"chatcmpl-default","choices":[{"index":0,"message":{"role":"assistant","content":"default openai reply"},"finish_reason":"stop"}],"usage":{"prompt_tokens":1,"completion_tokens":1,"total_tokens":2}}`, "application/json"
}
func defaultAnthropicScript([]byte) (int, string, string) {
return 200, `{"id":"msg-default","type":"message","role":"assistant","content":[{"type":"text","text":"default anthropic reply"}],"model":"claude","usage":{"input_tokens":1,"output_tokens":1}}`, "application/json"
}