mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-15 04:08:55 -04:00
Add a routing middleware stack and a cloud-proxy backend. * cloud-proxy: a Go gRPC backend that forwards OpenAI- and Anthropic-shaped chat requests to upstream providers, with an optional translate mode (OpenAI request -> Anthropic /v1/messages -> OpenAI response) and full tool-calling support. * routing: admission control, content-aware model routing (embedding cache + classifier + rerank + Arch-Router score), PII detection/redaction (regex + NER) with streaming filter and OpenAI/Anthropic adapters, and a per-user/per-key billing recorder backed by GORM or in-memory storage. * middleware: UsageMiddleware records usage via the billing recorder, plus admission, route-model, usage-stamp and trace middlewares. * observability: BackendTrace ring buffer stores full request bodies (capped), MITM proxy emits structured trace events, and router classifier decisions surface at /api/router/decide. * gallery: Arch-Router-1.5B (Q4_K_M and Q8_0). * UI: cloud-proxy model-editor fields, classifier system-prompt and score-normalization config, and a Traces page rendering request bodies. Assisted-by: claude-code:claude-opus-4-7 [Read] [Edit] [Bash] Signed-off-by: Richard Palethorpe <io@richiejp.com>
109 lines
3.7 KiB
Go
109 lines
3.7 KiB
Go
package localai
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
|
|
"github.com/labstack/echo/v4"
|
|
"github.com/mudler/LocalAI/core/config"
|
|
mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp"
|
|
"github.com/mudler/LocalAI/core/http/endpoints/openai"
|
|
"github.com/mudler/LocalAI/core/http/middleware"
|
|
"github.com/mudler/LocalAI/core/schema"
|
|
"github.com/mudler/LocalAI/core/templates"
|
|
"github.com/mudler/LocalAI/pkg/model"
|
|
)
|
|
|
|
// MCP SSE Event Types (kept for backward compatibility with MCP endpoint consumers)
|
|
type MCPReasoningEvent struct {
|
|
Type string `json:"type"`
|
|
Content string `json:"content"`
|
|
}
|
|
|
|
type MCPToolCallEvent struct {
|
|
Type string `json:"type"`
|
|
Name string `json:"name"`
|
|
Arguments map[string]any `json:"arguments"`
|
|
Reasoning string `json:"reasoning"`
|
|
}
|
|
|
|
type MCPToolResultEvent struct {
|
|
Type string `json:"type"`
|
|
Name string `json:"name"`
|
|
Result string `json:"result"`
|
|
}
|
|
|
|
type MCPStatusEvent struct {
|
|
Type string `json:"type"`
|
|
Message string `json:"message"`
|
|
}
|
|
|
|
type MCPAssistantEvent struct {
|
|
Type string `json:"type"`
|
|
Content string `json:"content"`
|
|
}
|
|
|
|
type MCPErrorEvent struct {
|
|
Type string `json:"type"`
|
|
Message string `json:"message"`
|
|
}
|
|
|
|
// MCPEndpoint is the endpoint for MCP chat completions.
|
|
// It enables all MCP servers for the model and delegates to the standard chat endpoint,
|
|
// which handles MCP tool injection and server-side execution.
|
|
// Both streaming and non-streaming modes use standard OpenAI response format.
|
|
// @Summary MCP chat completions with automatic tool execution
|
|
// @Tags mcp
|
|
// @Param request body schema.OpenAIRequest true "query params"
|
|
// @Success 200 {object} schema.OpenAIResponse "Response"
|
|
// @Router /v1/mcp/chat/completions [post]
|
|
func MCPEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig, natsClient mcpTools.MCPNATSClient) echo.HandlerFunc {
|
|
// The legacy /v1/mcp/chat/completions endpoint never opts into the
|
|
// in-process LocalAI Assistant tool surface — pass nil holder so the
|
|
// assistant branch in chat.go is unreachable from this code path.
|
|
// Stream-side PII filter is also nil: this legacy endpoint pre-dates
|
|
// the per-model PII config and is kept for backward compatibility.
|
|
// The request-side middleware on the main chat route handles
|
|
// filtering for the standard /v1/chat/completions path.
|
|
chatHandler := openai.ChatEndpoint(cl, ml, evaluator, appConfig, natsClient, nil, nil, nil)
|
|
|
|
return func(c echo.Context) error {
|
|
input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
|
if !ok || input.Model == "" {
|
|
return echo.ErrBadRequest
|
|
}
|
|
|
|
modelConfig, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
|
|
if !ok || modelConfig == nil {
|
|
return echo.ErrBadRequest
|
|
}
|
|
|
|
if modelConfig.MCP.Servers == "" && modelConfig.MCP.Stdio == "" {
|
|
return fmt.Errorf("no MCP servers configured")
|
|
}
|
|
|
|
// Enable all MCP servers if none explicitly specified (preserve original behavior)
|
|
if input.Metadata == nil {
|
|
input.Metadata = map[string]string{}
|
|
}
|
|
if _, hasMCP := input.Metadata["mcp_servers"]; !hasMCP {
|
|
remote, stdio, err := modelConfig.MCP.MCPConfigFromYAML()
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get MCP config: %w", err)
|
|
}
|
|
var allServers []string
|
|
for name := range remote.Servers {
|
|
allServers = append(allServers, name)
|
|
}
|
|
for name := range stdio.Servers {
|
|
allServers = append(allServers, name)
|
|
}
|
|
input.Metadata["mcp_servers"] = strings.Join(allServers, ",")
|
|
}
|
|
|
|
// Delegate to the standard chat endpoint which handles MCP tool
|
|
// injection and server-side execution for both streaming and non-streaming.
|
|
return chatHandler(c)
|
|
}
|
|
}
|