mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-30 11:36:31 -04:00
Add a routing middleware stack and a cloud-proxy backend. * cloud-proxy: a Go gRPC backend that forwards OpenAI- and Anthropic-shaped chat requests to upstream providers, with an optional translate mode (OpenAI request -> Anthropic /v1/messages -> OpenAI response) and full tool-calling support. * routing: admission control, content-aware model routing (embedding cache + classifier + rerank + Arch-Router score), PII detection/redaction (regex + NER) with streaming filter and OpenAI/Anthropic adapters, and a per-user/per-key billing recorder backed by GORM or in-memory storage. * middleware: UsageMiddleware records usage via the billing recorder, plus admission, route-model, usage-stamp and trace middlewares. * observability: BackendTrace ring buffer stores full request bodies (capped), MITM proxy emits structured trace events, and router classifier decisions surface at /api/router/decide. * gallery: Arch-Router-1.5B (Q4_K_M and Q8_0). * UI: cloud-proxy model-editor fields, classifier system-prompt and score-normalization config, and a Traces page rendering request bodies. Assisted-by: claude-code:claude-opus-4-7 [Read] [Edit] [Bash] Signed-off-by: Richard Palethorpe <io@richiejp.com>
219 lines
5.5 KiB
Go
219 lines
5.5 KiB
Go
// Package ssewire holds the SSE-format helpers shared between
|
|
// the request-shape cloud proxy (core/services/cloudproxy) and the
|
|
// TLS-terminating MITM proxy (core/services/cloudproxy/mitm). Both
|
|
// run a pii.StreamFilter over per-token text extracted from
|
|
// provider-specific JSON chunks; this package owns the JSON shapes
|
|
// so a future provider addition is one edit, not two.
|
|
package ssewire
|
|
|
|
import (
|
|
"bufio"
|
|
"encoding/json"
|
|
"io"
|
|
"strings"
|
|
|
|
"github.com/mudler/LocalAI/core/services/routing/pii"
|
|
)
|
|
|
|
// Provider is the upstream wire format an SSE stream conforms to.
|
|
type Provider string
|
|
|
|
const (
|
|
OpenAI Provider = "openai"
|
|
Anthropic Provider = "anthropic"
|
|
)
|
|
|
|
// Event is one SSE event with its exact wire bytes preserved in
|
|
// Raw (so unmodified events round-trip byte-for-byte) and the
|
|
// extracted JSON payload from the data: line in DataLine.
|
|
type Event struct {
|
|
Raw string
|
|
DataLine string
|
|
}
|
|
|
|
// Scanner reads SSE events one at a time from an upstream body.
|
|
type Scanner struct {
|
|
r *bufio.Reader
|
|
ev Event
|
|
err error
|
|
}
|
|
|
|
func NewScanner(r io.Reader) *Scanner {
|
|
return &Scanner{r: bufio.NewReaderSize(r, 64*1024)}
|
|
}
|
|
|
|
func (s *Scanner) Scan() bool {
|
|
var raw strings.Builder
|
|
var dataLine string
|
|
for {
|
|
line, err := s.r.ReadString('\n')
|
|
if line != "" {
|
|
raw.WriteString(line)
|
|
trimmed := strings.TrimRight(line, "\r\n")
|
|
if trimmed == "" {
|
|
if raw.Len() == len(line) {
|
|
raw.Reset()
|
|
continue
|
|
}
|
|
s.ev = Event{Raw: raw.String(), DataLine: dataLine}
|
|
return true
|
|
}
|
|
if strings.HasPrefix(trimmed, "data:") && dataLine == "" {
|
|
payload := strings.TrimPrefix(trimmed, "data:")
|
|
payload = strings.TrimPrefix(payload, " ")
|
|
dataLine = payload
|
|
}
|
|
}
|
|
if err != nil {
|
|
s.err = err
|
|
if raw.Len() > 0 {
|
|
s.ev = Event{Raw: raw.String(), DataLine: dataLine}
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
}
|
|
|
|
func (s *Scanner) Event() Event { return s.ev }
|
|
func (s *Scanner) Err() error { return s.err }
|
|
|
|
// IsTerminalMarker reports whether the data line is the per-provider
|
|
// end-of-stream sentinel. The streaming PII filter must drain its
|
|
// residue before the caller forwards a terminal marker — clients
|
|
// stop reading after it.
|
|
func IsTerminalMarker(dataLine string, provider Provider) bool {
|
|
if dataLine == "" {
|
|
return false
|
|
}
|
|
if strings.TrimSpace(dataLine) == "[DONE]" {
|
|
return true
|
|
}
|
|
if provider == Anthropic {
|
|
var probe struct {
|
|
Type string `json:"type"`
|
|
}
|
|
if err := json.Unmarshal([]byte(dataLine), &probe); err == nil {
|
|
return probe.Type == "message_stop"
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// RewritePayload runs the data line's content-bearing field through
|
|
// the streaming filter. drop=true tells the caller to suppress the
|
|
// SSE event entirely (the filter buffered the whole token while
|
|
// disambiguating a pattern boundary).
|
|
func RewritePayload(dataLine string, provider Provider, filter *pii.StreamFilter) (rewritten string, drop bool) {
|
|
if strings.TrimSpace(dataLine) == "[DONE]" {
|
|
return dataLine, false
|
|
}
|
|
switch provider {
|
|
case Anthropic:
|
|
return rewriteAnthropic(dataLine, filter)
|
|
default:
|
|
return rewriteOpenAI(dataLine, filter)
|
|
}
|
|
}
|
|
|
|
func rewriteOpenAI(dataLine string, filter *pii.StreamFilter) (string, bool) {
|
|
var m map[string]any
|
|
if err := json.Unmarshal([]byte(dataLine), &m); err != nil {
|
|
return dataLine, false
|
|
}
|
|
choices, ok := m["choices"].([]any)
|
|
if !ok || len(choices) == 0 {
|
|
return dataLine, false
|
|
}
|
|
first, ok := choices[0].(map[string]any)
|
|
if !ok {
|
|
return dataLine, false
|
|
}
|
|
delta, ok := first["delta"].(map[string]any)
|
|
if !ok {
|
|
return dataLine, false
|
|
}
|
|
content, ok := delta["content"].(string)
|
|
if !ok || content == "" {
|
|
return dataLine, false
|
|
}
|
|
rewritten := filter.Push(content)
|
|
if rewritten == "" {
|
|
return "", true
|
|
}
|
|
if rewritten == content {
|
|
return dataLine, false
|
|
}
|
|
delta["content"] = rewritten
|
|
out, err := json.Marshal(m)
|
|
if err != nil {
|
|
return dataLine, false
|
|
}
|
|
return string(out), false
|
|
}
|
|
|
|
func rewriteAnthropic(dataLine string, filter *pii.StreamFilter) (string, bool) {
|
|
var m map[string]any
|
|
if err := json.Unmarshal([]byte(dataLine), &m); err != nil {
|
|
return dataLine, false
|
|
}
|
|
if t, _ := m["type"].(string); t != "content_block_delta" {
|
|
return dataLine, false
|
|
}
|
|
delta, ok := m["delta"].(map[string]any)
|
|
if !ok {
|
|
return dataLine, false
|
|
}
|
|
if dt, _ := delta["type"].(string); dt != "text_delta" {
|
|
return dataLine, false
|
|
}
|
|
text, ok := delta["text"].(string)
|
|
if !ok || text == "" {
|
|
return dataLine, false
|
|
}
|
|
rewritten := filter.Push(text)
|
|
if rewritten == "" {
|
|
return "", true
|
|
}
|
|
if rewritten == text {
|
|
return dataLine, false
|
|
}
|
|
delta["text"] = rewritten
|
|
out, err := json.Marshal(m)
|
|
if err != nil {
|
|
return dataLine, false
|
|
}
|
|
return string(out), false
|
|
}
|
|
|
|
// SynthResidualEvent builds a provider-shaped SSE event carrying
|
|
// the streaming filter's drained tail so the response body remains
|
|
// a valid event stream after the proxy splices in held-back text.
|
|
func SynthResidualEvent(provider Provider, text string) string {
|
|
switch provider {
|
|
case Anthropic:
|
|
payload := map[string]any{
|
|
"type": "content_block_delta",
|
|
"index": 0,
|
|
"delta": map[string]string{"type": "text_delta", "text": text},
|
|
}
|
|
b, err := json.Marshal(payload)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
return "event: content_block_delta\ndata: " + string(b) + "\n\n"
|
|
default:
|
|
payload := map[string]any{
|
|
"object": "chat.completion.chunk",
|
|
"choices": []map[string]any{
|
|
{"index": 0, "delta": map[string]string{"content": text}},
|
|
},
|
|
}
|
|
b, err := json.Marshal(payload)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
return "data: " + string(b) + "\n\n"
|
|
}
|
|
}
|