Files
LocalAI/core/http/endpoints/localai/mcp.go
Richard Palethorpe 6a80e23733 feat(middleware): Model routing, PII filtering, Cloud model proxies (#9802)
Add a routing middleware stack and a cloud-proxy backend.

* cloud-proxy: a Go gRPC backend that forwards OpenAI- and
  Anthropic-shaped chat requests to upstream providers, with an
  optional translate mode (OpenAI request -> Anthropic /v1/messages
  -> OpenAI response) and full tool-calling support.

* routing: admission control, content-aware model routing
  (embedding cache + classifier + rerank + Arch-Router score),
  PII detection/redaction (regex + NER) with streaming filter and
  OpenAI/Anthropic adapters, and a per-user/per-key billing recorder
  backed by GORM or in-memory storage.

* middleware: UsageMiddleware records usage via the billing recorder,
  plus admission, route-model, usage-stamp and trace middlewares.

* observability: BackendTrace ring buffer stores full request bodies
  (capped), MITM proxy emits structured trace events, and router
  classifier decisions surface at /api/router/decide.

* gallery: Arch-Router-1.5B (Q4_K_M and Q8_0).

* UI: cloud-proxy model-editor fields, classifier system-prompt and
  score-normalization config, and a Traces page rendering request
  bodies.

Assisted-by: claude-code:claude-opus-4-7 [Read] [Edit] [Bash]

Signed-off-by: Richard Palethorpe <io@richiejp.com>
2026-05-25 09:28:27 +02:00

109 lines
3.7 KiB
Go

package localai
import (
"fmt"
"strings"
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/config"
mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp"
"github.com/mudler/LocalAI/core/http/endpoints/openai"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/templates"
"github.com/mudler/LocalAI/pkg/model"
)
// MCP SSE Event Types (kept for backward compatibility with MCP endpoint consumers)
type MCPReasoningEvent struct {
Type string `json:"type"`
Content string `json:"content"`
}
type MCPToolCallEvent struct {
Type string `json:"type"`
Name string `json:"name"`
Arguments map[string]any `json:"arguments"`
Reasoning string `json:"reasoning"`
}
type MCPToolResultEvent struct {
Type string `json:"type"`
Name string `json:"name"`
Result string `json:"result"`
}
type MCPStatusEvent struct {
Type string `json:"type"`
Message string `json:"message"`
}
type MCPAssistantEvent struct {
Type string `json:"type"`
Content string `json:"content"`
}
type MCPErrorEvent struct {
Type string `json:"type"`
Message string `json:"message"`
}
// MCPEndpoint is the endpoint for MCP chat completions.
// It enables all MCP servers for the model and delegates to the standard chat endpoint,
// which handles MCP tool injection and server-side execution.
// Both streaming and non-streaming modes use standard OpenAI response format.
// @Summary MCP chat completions with automatic tool execution
// @Tags mcp
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/mcp/chat/completions [post]
func MCPEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig, natsClient mcpTools.MCPNATSClient) echo.HandlerFunc {
// The legacy /v1/mcp/chat/completions endpoint never opts into the
// in-process LocalAI Assistant tool surface — pass nil holder so the
// assistant branch in chat.go is unreachable from this code path.
// Stream-side PII filter is also nil: this legacy endpoint pre-dates
// the per-model PII config and is kept for backward compatibility.
// The request-side middleware on the main chat route handles
// filtering for the standard /v1/chat/completions path.
chatHandler := openai.ChatEndpoint(cl, ml, evaluator, appConfig, natsClient, nil, nil, nil)
return func(c echo.Context) error {
input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
if !ok || input.Model == "" {
return echo.ErrBadRequest
}
modelConfig, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
if !ok || modelConfig == nil {
return echo.ErrBadRequest
}
if modelConfig.MCP.Servers == "" && modelConfig.MCP.Stdio == "" {
return fmt.Errorf("no MCP servers configured")
}
// Enable all MCP servers if none explicitly specified (preserve original behavior)
if input.Metadata == nil {
input.Metadata = map[string]string{}
}
if _, hasMCP := input.Metadata["mcp_servers"]; !hasMCP {
remote, stdio, err := modelConfig.MCP.MCPConfigFromYAML()
if err != nil {
return fmt.Errorf("failed to get MCP config: %w", err)
}
var allServers []string
for name := range remote.Servers {
allServers = append(allServers, name)
}
for name := range stdio.Servers {
allServers = append(allServers, name)
}
input.Metadata["mcp_servers"] = strings.Join(allServers, ",")
}
// Delegate to the standard chat endpoint which handles MCP tool
// injection and server-side execution for both streaming and non-streaming.
return chatHandler(c)
}
}