Files
LocalAI/core/schema/message.go
Richard Palethorpe 6a80e23733 feat(middleware): Model routing, PII filtering, Cloud model proxies (#9802)
Add a routing middleware stack and a cloud-proxy backend.

* cloud-proxy: a Go gRPC backend that forwards OpenAI- and
  Anthropic-shaped chat requests to upstream providers, with an
  optional translate mode (OpenAI request -> Anthropic /v1/messages
  -> OpenAI response) and full tool-calling support.

* routing: admission control, content-aware model routing
  (embedding cache + classifier + rerank + Arch-Router score),
  PII detection/redaction (regex + NER) with streaming filter and
  OpenAI/Anthropic adapters, and a per-user/per-key billing recorder
  backed by GORM or in-memory storage.

* middleware: UsageMiddleware records usage via the billing recorder,
  plus admission, route-model, usage-stamp and trace middlewares.

* observability: BackendTrace ring buffer stores full request bodies
  (capped), MITM proxy emits structured trace events, and router
  classifier decisions surface at /api/router/decide.

* gallery: Arch-Router-1.5B (Q4_K_M and Q8_0).

* UI: cloud-proxy model-editor fields, classifier system-prompt and
  score-normalization config, and a Traces page rendering request
  bodies.

Assisted-by: claude-code:claude-opus-4-7 [Read] [Edit] [Bash]

Signed-off-by: Richard Palethorpe <io@richiejp.com>
2026-05-25 09:28:27 +02:00

99 lines
2.9 KiB
Go

package schema
import (
"encoding/json"
"github.com/mudler/xlog"
"github.com/mudler/LocalAI/pkg/grpc/proto"
)
type Message struct {
// The message role
Role string `json:"role,omitempty" yaml:"role"`
// The message name (used for tools calls)
Name string `json:"name,omitempty" yaml:"name"`
// The message content
Content any `json:"content" yaml:"content"`
// Staging buffers populated by the request middleware while
// decoding multimodal Content. Never serialised — strict
// providers (Anthropic) 400 on unknown message fields when the
// cloud-proxy passthrough re-marshals Message verbatim.
StringContent string `json:"-" yaml:"-"`
StringImages []string `json:"-" yaml:"-"`
StringVideos []string `json:"-" yaml:"-"`
StringAudios []string `json:"-" yaml:"-"`
// A result of a function call
FunctionCall any `json:"function_call,omitempty" yaml:"function_call,omitempty"`
ToolCalls []ToolCall `json:"tool_calls,omitempty" yaml:"tool_call,omitempty"`
ToolCallID string `json:"tool_call_id,omitempty" yaml:"tool_call_id,omitempty"`
// Reasoning content extracted from <thinking>...</thinking> tags
Reasoning *string `json:"reasoning,omitempty" yaml:"reasoning,omitempty"`
}
type ToolCall struct {
Index int `json:"index"`
ID string `json:"id"`
Type string `json:"type"`
FunctionCall FunctionCall `json:"function"`
}
type FunctionCall struct {
Name string `json:"name,omitempty"`
Arguments string `json:"arguments"`
}
type Messages []Message
// MessagesToProto converts schema.Message slice to proto.Message slice
// It handles content conversion, tool_calls serialization, and optional fields
func (messages Messages) ToProto() []*proto.Message {
protoMessages := make([]*proto.Message, len(messages))
for i, message := range messages {
protoMessages[i] = &proto.Message{
Role: message.Role,
Name: message.Name, // needed by function calls
}
switch ct := message.Content.(type) {
case string:
protoMessages[i].Content = ct
case []any:
// If using the tokenizer template, in case of multimodal we want to keep the multimodal content as and return only strings here
data, _ := json.Marshal(ct)
resultData := []struct {
Text string `json:"text"`
}{}
json.Unmarshal(data, &resultData)
for _, r := range resultData {
protoMessages[i].Content += r.Text
}
}
// Serialize tool_calls to JSON string if present
if len(message.ToolCalls) > 0 {
toolCallsJSON, err := json.Marshal(message.ToolCalls)
if err != nil {
xlog.Warn("failed to marshal tool_calls to JSON", "error", err)
} else {
protoMessages[i].ToolCalls = string(toolCallsJSON)
}
}
if message.ToolCallID != "" {
protoMessages[i].ToolCallId = message.ToolCallID
}
if message.Reasoning != nil {
protoMessages[i].ReasoningContent = *message.Reasoning
}
}
return protoMessages
}