Files
LocalAI/tests/e2e/e2e_cloud_proxy_test.go
Richard Palethorpe 3fa7b2955c feat(pii): NER tier engine — privacy-filter.cpp backend + NER-centric PII filter (#10360)
Squashed feat/pii-ner-tier-engine rebased onto master (was 45 commits; see
backup/pii-ner-tier-engine-prerebase). Net change:

- privacy-filter.cpp: standalone GGML engine for the openai-privacy-filter
  PII/NER token classifier, wired as a LocalAI gRPC backend (CPU/CUDA/Vulkan).
  TokenClassify moves off the patched llama.cpp path onto this backend.
- PII filter reworked to be NER-centric (encoder/NER detection tier scanning
  whole conversations as one document), with a recreated bounded restricted-
  regex secret-matching pattern detector tier alongside it (per-model
  pii_detection.builtins / .patterns + core/services/routing/piipattern).
- Detection labelled by source (ner vs pattern); backend trace / confidence /
  debug observability; analyze/redact exposed as a synchronous API.
- Instance-wide default detector policy + per-usecase default-on; request
  filtering extended to completions, embeddings, edits & Ollama.
- React UI: NER-centric PII editor, detector-models table, pattern/builtins
  editor, middleware default-policy UI.
- Gallery: privacy-filter-multilingual token-classify model + NER install
  filter; token_classify known_usecase; batch sized to context for NER models.
  privacy-filter backend registered in the backend gallery (cpu/vulkan/cuda-13
  meta + image entries with a capabilities map) matching its CI matrix jobs,
  and an /import-model auto-detect importer (PrivacyFilterImporter, narrow
  privacy-filter GGUF detection) replacing the prior pref-only registration.

Reconciled against master's independent evolution:

- Dropped master's PIIPatternOverrides feature (global-pattern runtime
  overrides + /api/pii/patterns API + runtime_settings.json persistence). The
  per-model NER + pattern-detector design supersedes it; it was built on the
  global redactor pattern set this branch replaced.
- Reverted the llama.cpp Score carry-patch (0006-server-task-type-score):
  removed the patch and restored master's grpc-server.cpp Score RPC (direct
  llama_decode, slot-loop bypass) and LLAMA_VERSION pin, plus master's
  model_config validation forbidding score + chat/completion/embeddings on
  llama-cpp. token_classify is unaffected (it runs on the privacy-filter
  backend, not llama-cpp).

Assisted-by: Claude:claude-opus-4-8 [Claude Code]

Signed-off-by: Richard Palethorpe <io@richiejp.com>
2026-06-18 11:45:22 +01:00

264 lines
11 KiB
Go

package e2e_test
import (
"context"
"encoding/json"
"io"
"net/http"
"strings"
"github.com/openai/openai-go/v3"
"github.com/openai/openai-go/v3/option"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
// Cloud-proxy e2e tests drive real HTTP requests through LocalAI ->
// cloud-proxy backend (separate process) -> fake upstream httptest
// server. The whole pipeline is exercised: chat handler dispatch,
// gRPC client/server, cloud-proxy translation, upstream call,
// response forwarding back to the client.
var _ = Describe("Cloud-proxy backend E2E", func() {
BeforeEach(func() {
if cloudProxyPath == "" {
Skip("cloud-proxy backend binary not built (make build-cloud-proxy-backend)")
}
// Reset upstream scripts + counters between specs so a previous
// spec's hits don't leak in. The default script is restored by
// each spec that needs a custom one.
cpOpenAIUpstream.SetScript(defaultOpenAIScript)
cpAnthropicUpstream.SetScript(defaultAnthropicScript)
})
Context("Passthrough mode — OpenAI shape", func() {
It("forwards a chat completion request verbatim and pipes the response back", func() {
cpOpenAIUpstream.SetScript(func([]byte) (int, string, string) {
return 200, `{"id":"resp-pt","choices":[{"index":0,"message":{"role":"assistant","content":"hi via passthrough"},"finish_reason":"stop"}],"usage":{"prompt_tokens":4,"completion_tokens":3,"total_tokens":7}}`, "application/json"
})
cp := openai.NewClient(option.WithBaseURL(apiURL))
resp, err := cp.Chat.Completions.New(context.TODO(), openai.ChatCompletionNewParams{
Model: "cp-passthrough-openai",
Messages: []openai.ChatCompletionMessageParamUnion{
openai.UserMessage("hello"),
},
})
Expect(err).NotTo(HaveOccurred())
Expect(resp.Choices).NotTo(BeEmpty())
Expect(resp.Choices[0].Message.Content).To(Equal("hi via passthrough"))
// Upstream observed an Authorization header sourced from
// the api_key_env we set at suite startup.
_, _, hdr, _ := cpOpenAIUpstream.Snapshot()
Expect(hdr.Get("Authorization")).To(Equal("Bearer sk-e2e-openai"))
// Body field assertions prove the wire format wasn't
// rewritten — passthrough mode shouldn't touch tools,
// messages, etc.
body := cpOpenAIUpstream.DecodedBody()
Expect(body["messages"]).NotTo(BeNil())
})
})
Context("Passthrough mode — Anthropic shape", func() {
It("forwards an Anthropic Messages request with x-api-key + anthropic-version", func() {
cpAnthropicUpstream.SetScript(func([]byte) (int, string, string) {
return 200, `{"id":"msg-pt","type":"message","role":"assistant","content":[{"type":"text","text":"hi via passthrough anthropic"}],"model":"claude","usage":{"input_tokens":4,"output_tokens":6}}`, "application/json"
})
// Anthropic SDK omitted to keep the test self-contained;
// raw POST exercises the same path. The Anthropic endpoint
// is /v1/messages on LocalAI.
reqBody := `{"model":"cp-passthrough-anthropic","max_tokens":64,"messages":[{"role":"user","content":"hello"}]}`
httpResp, err := http.Post(anthropicBaseURL+"/v1/messages", "application/json", strings.NewReader(reqBody))
Expect(err).NotTo(HaveOccurred())
defer func() { _ = httpResp.Body.Close() }()
Expect(httpResp.StatusCode).To(Equal(200))
respBody, _ := io.ReadAll(httpResp.Body)
Expect(string(respBody)).To(ContainSubstring("hi via passthrough anthropic"))
_, _, hdr, _ := cpAnthropicUpstream.Snapshot()
Expect(hdr.Get("x-api-key")).To(Equal("sk-ant-e2e"))
Expect(hdr.Get("anthropic-version")).NotTo(BeEmpty())
Expect(hdr.Get("Authorization")).To(BeEmpty(), "Authorization leaked on anthropic backend")
})
})
Context("Translate mode — OpenAI provider", func() {
// The chat handler only emits tool_calls in the response when
// the client asked for tools. The translate backend forwards
// whatever the upstream returns, but the endpoint-level
// assembly is gated on the request shape — same as for local
// models. The e2e tests therefore declare tools on the
// outbound request so the response-side assembly fires.
toolsParam := []openai.ChatCompletionToolUnionParam{
openai.ChatCompletionFunctionTool(openai.FunctionDefinitionParam{
Name: "lookup",
Description: openai.String("look something up"),
Parameters: openai.FunctionParameters{
"type": "object",
"properties": map[string]any{
"q": map[string]any{"type": "string"},
},
},
}),
}
It("delivers tool_calls in the chat completion response", func() {
cpOpenAIUpstream.SetScript(func([]byte) (int, string, string) {
return nonStreamingOpenAIToolCallScript()
})
cp := openai.NewClient(option.WithBaseURL(apiURL))
resp, err := cp.Chat.Completions.New(context.TODO(), openai.ChatCompletionNewParams{
Model: "cp-translate-openai",
Messages: []openai.ChatCompletionMessageParamUnion{
openai.UserMessage("find clouds"),
},
Tools: toolsParam,
})
Expect(err).NotTo(HaveOccurred())
Expect(resp.Choices).NotTo(BeEmpty())
tcs := resp.Choices[0].Message.ToolCalls
Expect(tcs).To(HaveLen(1), "tool_calls should survive translate-mode round-trip")
Expect(tcs[0].Function.Name).To(Equal("lookup"))
Expect(tcs[0].Function.Arguments).To(ContainSubstring(`"q":"clouds"`))
// Token usage propagated from upstream.
Expect(resp.Usage.PromptTokens).To(BeNumerically(">", 0))
})
It("streams tool_call deltas through SSE", func() {
cpOpenAIUpstream.SetScript(func([]byte) (int, string, string) {
return streamingOpenAIToolCallScript()
})
cp := openai.NewClient(option.WithBaseURL(apiURL))
stream := cp.Chat.Completions.NewStreaming(context.TODO(), openai.ChatCompletionNewParams{
Model: "cp-translate-openai",
Messages: []openai.ChatCompletionMessageParamUnion{
openai.UserMessage("what's the weather in SF?"),
},
Tools: []openai.ChatCompletionToolUnionParam{
openai.ChatCompletionFunctionTool(openai.FunctionDefinitionParam{
Name: "get_weather",
Description: openai.String("look up the weather"),
Parameters: openai.FunctionParameters{
"type": "object",
"properties": map[string]any{
"location": map[string]any{"type": "string"},
},
},
}),
},
})
var toolID, toolName string
var args strings.Builder
for stream.Next() {
chunk := stream.Current()
for _, ch := range chunk.Choices {
for _, tc := range ch.Delta.ToolCalls {
if tc.ID != "" {
toolID = tc.ID
}
if tc.Function.Name != "" {
toolName = tc.Function.Name
}
args.WriteString(tc.Function.Arguments)
}
}
}
Expect(stream.Err()).NotTo(HaveOccurred())
Expect(toolID).To(Equal("call_e2e"))
Expect(toolName).To(Equal("get_weather"))
// Argument fragments assembled in order.
var parsed map[string]any
Expect(json.Unmarshal([]byte(args.String()), &parsed)).To(Succeed())
Expect(parsed["location"]).To(Equal("SF"))
})
})
Context("Translate mode — Anthropic provider", func() {
It("preserves tool_use blocks through Messages API", func() {
cpAnthropicUpstream.SetScript(func([]byte) (int, string, string) {
return 200, `{"id":"msg-tu","type":"message","role":"assistant","content":[{"type":"text","text":"Let me check"},{"type":"tool_use","id":"toolu_e2e","name":"weather","input":{"location":"SF"}}],"model":"claude","usage":{"input_tokens":7,"output_tokens":12}}`, "application/json"
})
// Anthropic Messages endpoint exposes tool_use blocks
// directly. Raw POST + JSON decode keeps the test
// independent of any specific SDK version's accessor API.
// Tools declared on the request so the response-side
// assembly populates the tool_use blocks (same gate as
// for local models).
reqBody := `{"model":"cp-translate-anthropic","max_tokens":64,"messages":[{"role":"user","content":"what's the weather?"}],"tools":[{"name":"weather","description":"weather lookup","input_schema":{"type":"object","properties":{"location":{"type":"string"}}}}]}`
httpResp, err := http.Post(anthropicBaseURL+"/v1/messages", "application/json", strings.NewReader(reqBody))
Expect(err).NotTo(HaveOccurred())
defer func() { _ = httpResp.Body.Close() }()
Expect(httpResp.StatusCode).To(Equal(200))
var decoded map[string]any
Expect(json.NewDecoder(httpResp.Body).Decode(&decoded)).To(Succeed())
contentArr, ok := decoded["content"].([]any)
Expect(ok).To(BeTrue(), "response must carry content array")
var sawToolUse bool
for _, block := range contentArr {
m := block.(map[string]any)
if m["type"] == "tool_use" {
sawToolUse = true
Expect(m["name"]).To(Equal("weather"))
// Anthropic content-block assembly synthesizes
// tool_use IDs from the LocalAI request ID rather
// than passing through the upstream's toolu_* ID
// (see messages.go:253-267). Documenting the
// current behavior — the synthesized ID still
// follows the toolu_ prefix convention so SDK
// validation passes.
id, _ := m["id"].(string)
Expect(id).To(HavePrefix("toolu_"))
input, _ := m["input"].(map[string]any)
Expect(input["location"]).To(Equal("SF"))
}
}
Expect(sawToolUse).To(BeTrue(), "tool_use block must survive translate-mode round-trip")
})
})
Context("Translate mode + PII filter", func() {
It("blocks request-side secrets before they reach the upstream", func() {
// Our NER/pattern PII tier is request-side by design: it scans
// inbound content, not upstream responses. cp-translate-openai
// wires the in-process pattern detector (e2e-secret-filter, block
// action), so a leaked secret in the user's message must trip the
// request-side filter and the request must be rejected before the
// cloud-proxy ever forwards it to the provider.
var upstreamHit bool
cpOpenAIUpstream.SetScript(func([]byte) (int, string, string) {
upstreamHit = true
return 200, `{"id":"x","choices":[{"index":0,"message":{"role":"assistant","content":"ok"},"finish_reason":"stop"}],"usage":{"prompt_tokens":1,"completion_tokens":1,"total_tokens":2}}`, "application/json"
})
cp := openai.NewClient(option.WithBaseURL(apiURL))
_, err := cp.Chat.Completions.New(context.TODO(), openai.ChatCompletionNewParams{
Model: "cp-translate-openai",
Messages: []openai.ChatCompletionMessageParamUnion{
openai.UserMessage("my key is " + leakedAnthropicKey + " please keep it"),
},
})
// Lock-in: request-side PII fires in translate mode, the request is
// rejected by content policy, and the secret never leaves the box.
Expect(err).To(HaveOccurred(), "request carrying a leaked secret must be rejected")
Expect(err.Error()).To(ContainSubstring("pii_blocked"))
Expect(upstreamHit).To(BeFalse(), "blocked request must not reach the upstream provider")
})
})
})
func defaultOpenAIScript([]byte) (int, string, string) {
return 200, `{"id":"chatcmpl-default","choices":[{"index":0,"message":{"role":"assistant","content":"default openai reply"},"finish_reason":"stop"}],"usage":{"prompt_tokens":1,"completion_tokens":1,"total_tokens":2}}`, "application/json"
}
func defaultAnthropicScript([]byte) (int, string, string) {
return 200, `{"id":"msg-default","type":"message","role":"assistant","content":[{"type":"text","text":"default anthropic reply"}],"model":"claude","usage":{"input_tokens":1,"output_tokens":1}}`, "application/json"
}