Files
LocalAI/core/services/cloudproxy/ssewire/ssewire.go
Richard Palethorpe 6a80e23733 feat(middleware): Model routing, PII filtering, Cloud model proxies (#9802)
Add a routing middleware stack and a cloud-proxy backend.

* cloud-proxy: a Go gRPC backend that forwards OpenAI- and
  Anthropic-shaped chat requests to upstream providers, with an
  optional translate mode (OpenAI request -> Anthropic /v1/messages
  -> OpenAI response) and full tool-calling support.

* routing: admission control, content-aware model routing
  (embedding cache + classifier + rerank + Arch-Router score),
  PII detection/redaction (regex + NER) with streaming filter and
  OpenAI/Anthropic adapters, and a per-user/per-key billing recorder
  backed by GORM or in-memory storage.

* middleware: UsageMiddleware records usage via the billing recorder,
  plus admission, route-model, usage-stamp and trace middlewares.

* observability: BackendTrace ring buffer stores full request bodies
  (capped), MITM proxy emits structured trace events, and router
  classifier decisions surface at /api/router/decide.

* gallery: Arch-Router-1.5B (Q4_K_M and Q8_0).

* UI: cloud-proxy model-editor fields, classifier system-prompt and
  score-normalization config, and a Traces page rendering request
  bodies.

Assisted-by: claude-code:claude-opus-4-7 [Read] [Edit] [Bash]

Signed-off-by: Richard Palethorpe <io@richiejp.com>
2026-05-25 09:28:27 +02:00

219 lines
5.5 KiB
Go

// Package ssewire holds the SSE-format helpers shared between
// the request-shape cloud proxy (core/services/cloudproxy) and the
// TLS-terminating MITM proxy (core/services/cloudproxy/mitm). Both
// run a pii.StreamFilter over per-token text extracted from
// provider-specific JSON chunks; this package owns the JSON shapes
// so a future provider addition is one edit, not two.
package ssewire
import (
"bufio"
"encoding/json"
"io"
"strings"
"github.com/mudler/LocalAI/core/services/routing/pii"
)
// Provider is the upstream wire format an SSE stream conforms to.
type Provider string
const (
OpenAI Provider = "openai"
Anthropic Provider = "anthropic"
)
// Event is one SSE event with its exact wire bytes preserved in
// Raw (so unmodified events round-trip byte-for-byte) and the
// extracted JSON payload from the data: line in DataLine.
type Event struct {
Raw string
DataLine string
}
// Scanner reads SSE events one at a time from an upstream body.
type Scanner struct {
r *bufio.Reader
ev Event
err error
}
func NewScanner(r io.Reader) *Scanner {
return &Scanner{r: bufio.NewReaderSize(r, 64*1024)}
}
func (s *Scanner) Scan() bool {
var raw strings.Builder
var dataLine string
for {
line, err := s.r.ReadString('\n')
if line != "" {
raw.WriteString(line)
trimmed := strings.TrimRight(line, "\r\n")
if trimmed == "" {
if raw.Len() == len(line) {
raw.Reset()
continue
}
s.ev = Event{Raw: raw.String(), DataLine: dataLine}
return true
}
if strings.HasPrefix(trimmed, "data:") && dataLine == "" {
payload := strings.TrimPrefix(trimmed, "data:")
payload = strings.TrimPrefix(payload, " ")
dataLine = payload
}
}
if err != nil {
s.err = err
if raw.Len() > 0 {
s.ev = Event{Raw: raw.String(), DataLine: dataLine}
return true
}
return false
}
}
}
func (s *Scanner) Event() Event { return s.ev }
func (s *Scanner) Err() error { return s.err }
// IsTerminalMarker reports whether the data line is the per-provider
// end-of-stream sentinel. The streaming PII filter must drain its
// residue before the caller forwards a terminal marker — clients
// stop reading after it.
func IsTerminalMarker(dataLine string, provider Provider) bool {
if dataLine == "" {
return false
}
if strings.TrimSpace(dataLine) == "[DONE]" {
return true
}
if provider == Anthropic {
var probe struct {
Type string `json:"type"`
}
if err := json.Unmarshal([]byte(dataLine), &probe); err == nil {
return probe.Type == "message_stop"
}
}
return false
}
// RewritePayload runs the data line's content-bearing field through
// the streaming filter. drop=true tells the caller to suppress the
// SSE event entirely (the filter buffered the whole token while
// disambiguating a pattern boundary).
func RewritePayload(dataLine string, provider Provider, filter *pii.StreamFilter) (rewritten string, drop bool) {
if strings.TrimSpace(dataLine) == "[DONE]" {
return dataLine, false
}
switch provider {
case Anthropic:
return rewriteAnthropic(dataLine, filter)
default:
return rewriteOpenAI(dataLine, filter)
}
}
func rewriteOpenAI(dataLine string, filter *pii.StreamFilter) (string, bool) {
var m map[string]any
if err := json.Unmarshal([]byte(dataLine), &m); err != nil {
return dataLine, false
}
choices, ok := m["choices"].([]any)
if !ok || len(choices) == 0 {
return dataLine, false
}
first, ok := choices[0].(map[string]any)
if !ok {
return dataLine, false
}
delta, ok := first["delta"].(map[string]any)
if !ok {
return dataLine, false
}
content, ok := delta["content"].(string)
if !ok || content == "" {
return dataLine, false
}
rewritten := filter.Push(content)
if rewritten == "" {
return "", true
}
if rewritten == content {
return dataLine, false
}
delta["content"] = rewritten
out, err := json.Marshal(m)
if err != nil {
return dataLine, false
}
return string(out), false
}
func rewriteAnthropic(dataLine string, filter *pii.StreamFilter) (string, bool) {
var m map[string]any
if err := json.Unmarshal([]byte(dataLine), &m); err != nil {
return dataLine, false
}
if t, _ := m["type"].(string); t != "content_block_delta" {
return dataLine, false
}
delta, ok := m["delta"].(map[string]any)
if !ok {
return dataLine, false
}
if dt, _ := delta["type"].(string); dt != "text_delta" {
return dataLine, false
}
text, ok := delta["text"].(string)
if !ok || text == "" {
return dataLine, false
}
rewritten := filter.Push(text)
if rewritten == "" {
return "", true
}
if rewritten == text {
return dataLine, false
}
delta["text"] = rewritten
out, err := json.Marshal(m)
if err != nil {
return dataLine, false
}
return string(out), false
}
// SynthResidualEvent builds a provider-shaped SSE event carrying
// the streaming filter's drained tail so the response body remains
// a valid event stream after the proxy splices in held-back text.
func SynthResidualEvent(provider Provider, text string) string {
switch provider {
case Anthropic:
payload := map[string]any{
"type": "content_block_delta",
"index": 0,
"delta": map[string]string{"type": "text_delta", "text": text},
}
b, err := json.Marshal(payload)
if err != nil {
return ""
}
return "event: content_block_delta\ndata: " + string(b) + "\n\n"
default:
payload := map[string]any{
"object": "chat.completion.chunk",
"choices": []map[string]any{
{"index": 0, "delta": map[string]string{"content": text}},
},
}
b, err := json.Marshal(payload)
if err != nil {
return ""
}
return "data: " + string(b) + "\n\n"
}
}