From 2adddef5fe266037b6c53c644cab06e7346edba8 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 2 Jan 2026 21:34:23 +0100 Subject: [PATCH] Address feedback from review Signed-off-by: Ettore Di Giacinto --- core/http/endpoints/localai/mcp.go | 4 +- core/http/endpoints/openai/mcp.go | 148 ----------------------------- core/http/routes/localai.go | 4 +- core/http/routes/openai.go | 3 - 4 files changed, 4 insertions(+), 155 deletions(-) delete mode 100644 core/http/endpoints/openai/mcp.go diff --git a/core/http/endpoints/localai/mcp.go b/core/http/endpoints/localai/mcp.go index a2367fbc3..721f97a69 100644 --- a/core/http/endpoints/localai/mcp.go +++ b/core/http/endpoints/localai/mcp.go @@ -53,12 +53,12 @@ type MCPErrorEvent struct { Message string `json:"message"` } -// MCPStreamEndpoint is the SSE streaming endpoint for MCP chat completions +// MCPEndpoint is the endpoint for MCP chat completions. Supports SSE mode, but it is not compatible with the OpenAI apis. // @Summary Stream MCP chat completions with reasoning, tool calls, and results // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/mcp/chat/completions [post] -func MCPStreamEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) echo.HandlerFunc { +func MCPEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) echo.HandlerFunc { return func(c echo.Context) error { ctx := c.Request().Context() created := int(time.Now().Unix()) diff --git a/core/http/endpoints/openai/mcp.go b/core/http/endpoints/openai/mcp.go deleted file mode 100644 index e9987cd54..000000000 --- a/core/http/endpoints/openai/mcp.go +++ /dev/null @@ -1,148 +0,0 @@ -package openai - -import ( - "context" - "encoding/json" - "errors" - "fmt" - "net" - "time" - - "github.com/labstack/echo/v4" - "github.com/mudler/LocalAI/core/config" - mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp" - "github.com/mudler/LocalAI/core/http/middleware" - - "github.com/google/uuid" - "github.com/mudler/LocalAI/core/schema" - "github.com/mudler/LocalAI/core/templates" - "github.com/mudler/LocalAI/pkg/model" - "github.com/mudler/cogito" - "github.com/mudler/xlog" -) - -// MCPCompletionEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/completions -// @Summary Generate completions for a given prompt and model. -// @Param request body schema.OpenAIRequest true "query params" -// @Success 200 {object} schema.OpenAIResponse "Response" -// @Router /mcp/v1/completions [post] -func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) echo.HandlerFunc { - // We do not support streaming mode (Yet?) - return func(c echo.Context) error { - created := int(time.Now().Unix()) - - ctx := c.Request().Context() - - // Handle Correlation - id := c.Request().Header.Get("X-Correlation-ID") - if id == "" { - id = uuid.New().String() - } - - input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest) - if !ok || input.Model == "" { - return echo.ErrBadRequest - } - - config, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig) - if !ok || config == nil { - return echo.ErrBadRequest - } - - if config.MCP.Servers == "" && config.MCP.Stdio == "" { - return fmt.Errorf("no MCP servers configured") - } - - // Get MCP config from model config - remote, stdio, err := config.MCP.MCPConfigFromYAML() - if err != nil { - return fmt.Errorf("failed to get MCP config: %w", err) - } - - // Check if we have tools in cache, or we have to have an initial connection - sessions, err := mcpTools.SessionsFromMCPConfig(config.Name, remote, stdio) - if err != nil { - return fmt.Errorf("failed to get MCP sessions: %w", err) - } - - if len(sessions) == 0 { - return fmt.Errorf("no working MCP servers found") - } - - fragment := cogito.NewEmptyFragment() - - for _, message := range input.Messages { - fragment = fragment.AddMessage(message.Role, message.StringContent) - } - - _, port, err := net.SplitHostPort(appConfig.APIAddress) - if err != nil { - return err - } - - apiKey := "" - if appConfig.ApiKeys != nil { - apiKey = appConfig.ApiKeys[0] - } - - ctxWithCancellation, cancel := context.WithCancel(ctx) - defer cancel() - - // TODO: instead of connecting to the API, we should just wire this internally - // and act like completion.go. - // We can do this as cogito expects an interface and we can create one that - // we satisfy to just call internally ComputeChoices - defaultLLM := cogito.NewOpenAILLM(config.Name, apiKey, "http://127.0.0.1:"+port) - - // Build cogito options using the consolidated method - cogitoOpts := config.BuildCogitoOptions() - - cogitoOpts = append( - cogitoOpts, - cogito.WithContext(ctxWithCancellation), - cogito.WithMCPs(sessions...), - cogito.WithStatusCallback(func(s string) { - xlog.Debug("[model agent] Status", "model", config.Name, "status", s) - }), - cogito.WithReasoningCallback(func(s string) { - xlog.Debug("[model agent] Reasoning", "model", config.Name, "reasoning", s) - }), - cogito.WithToolCallBack(func(t *cogito.ToolChoice, state *cogito.SessionState) cogito.ToolCallDecision { - xlog.Debug("[model agent] Tool call", "model", config.Name, "tool", t.Name, "reasoning", t.Reasoning, "arguments", t.Arguments) - return cogito.ToolCallDecision{ - Approved: true, - } - }), - cogito.WithToolCallResultCallback(func(t cogito.ToolStatus) { - xlog.Debug("[model agent] Tool call result", "model", config.Name, "tool", t.Name, "result", t.Result, "tool_arguments", t.ToolArguments) - }), - ) - - f, err := cogito.ExecuteTools( - defaultLLM, fragment, - cogitoOpts..., - ) - if err != nil && !errors.Is(err, cogito.ErrNoToolSelected) { - return err - } - - f, err = defaultLLM.Ask(ctx, f) - if err != nil { - return err - } - - resp := &schema.OpenAIResponse{ - ID: id, - Created: created, - Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{Message: &schema.Message{Role: "assistant", Content: &f.LastMessage().Content}}}, - Object: "text_completion", - } - - jsonResult, _ := json.Marshal(resp) - xlog.Debug("Response", "response", string(jsonResult)) - - // Return the prediction in the response body - return c.JSON(200, resp) - } -} diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go index f7db61b0e..f70a44b21 100644 --- a/core/http/routes/localai.go +++ b/core/http/routes/localai.go @@ -138,9 +138,9 @@ func RegisterLocalAIRoutes(router *echo.Echo, requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TokenizeRequest) })) // MCP endpoint - supports both streaming and non-streaming modes - // Note: These are the canonical MCP routes (not duplicated in openai.go) + // Note: streaming mode is NOT compatible with the OpenAI apis. We have a set which streams more states. if evaluator != nil { - mcpStreamHandler := localai.MCPStreamEndpoint(cl, ml, evaluator, appConfig) + mcpStreamHandler := localai.MCPEndpoint(cl, ml, evaluator, appConfig) mcpStreamMiddleware := []echo.MiddlewareFunc{ requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)), requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }), diff --git a/core/http/routes/openai.go b/core/http/routes/openai.go index e61e48a05..2d62859f3 100644 --- a/core/http/routes/openai.go +++ b/core/http/routes/openai.go @@ -79,9 +79,6 @@ func RegisterOpenAIRoutes(app *echo.Echo, app.POST("/completions", completionHandler, completionMiddleware...) app.POST("/v1/engines/:model/completions", completionHandler, completionMiddleware...) - // Note: MCP endpoints are registered in localai.go to avoid route conflicts - // The localai.MCPStreamEndpoint handler supports both streaming and non-streaming modes - // embeddings embeddingHandler := openai.EmbeddingsEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig()) embeddingMiddleware := []echo.MiddlewareFunc{