mirror of
https://github.com/mudler/LocalAI.git
synced 2026-04-01 13:42:20 -04:00
* feat: add distributed mode (experimental) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix data races, mutexes, transactions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix events and tool stream in agent chat Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * use ginkgo Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(cron): compute correctly time boundaries avoiding re-triggering Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not flood of healthy checks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not list obvious backends as text backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * tests fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop redundant healthcheck Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
114 lines
3.7 KiB
Go
114 lines
3.7 KiB
Go
package routes
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/http"
|
|
|
|
"github.com/google/uuid"
|
|
"github.com/labstack/echo/v4"
|
|
"github.com/mudler/LocalAI/core/application"
|
|
"github.com/mudler/LocalAI/core/config"
|
|
"github.com/mudler/LocalAI/core/http/endpoints/anthropic"
|
|
mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp"
|
|
"github.com/mudler/LocalAI/core/http/middleware"
|
|
"github.com/mudler/LocalAI/core/schema"
|
|
"github.com/mudler/xlog"
|
|
)
|
|
|
|
func RegisterAnthropicRoutes(app *echo.Echo,
|
|
re *middleware.RequestExtractor,
|
|
application *application.Application) {
|
|
|
|
// Anthropic Messages API endpoint
|
|
var natsClient mcpTools.MCPNATSClient
|
|
if d := application.Distributed(); d != nil {
|
|
natsClient = d.Nats
|
|
}
|
|
|
|
messagesHandler := anthropic.MessagesEndpoint(
|
|
application.ModelConfigLoader(),
|
|
application.ModelLoader(),
|
|
application.TemplatesEvaluator(),
|
|
application.ApplicationConfig(),
|
|
natsClient,
|
|
)
|
|
|
|
messagesMiddleware := []echo.MiddlewareFunc{
|
|
middleware.UsageMiddleware(application.AuthDB()),
|
|
middleware.TraceMiddleware(application),
|
|
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)),
|
|
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.AnthropicRequest) }),
|
|
setAnthropicRequestContext(application.ApplicationConfig()),
|
|
}
|
|
|
|
// Main Anthropic endpoint
|
|
app.POST("/v1/messages", messagesHandler, messagesMiddleware...)
|
|
|
|
// Also support without version prefix for compatibility
|
|
app.POST("/messages", messagesHandler, messagesMiddleware...)
|
|
}
|
|
|
|
// setAnthropicRequestContext sets up the context and cancel function for Anthropic requests
|
|
func setAnthropicRequestContext(appConfig *config.ApplicationConfig) echo.MiddlewareFunc {
|
|
return func(next echo.HandlerFunc) echo.HandlerFunc {
|
|
return func(c echo.Context) error {
|
|
input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.AnthropicRequest)
|
|
if !ok || input.Model == "" {
|
|
return echo.NewHTTPError(http.StatusBadRequest, "model is required")
|
|
}
|
|
|
|
cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
|
|
if !ok || cfg == nil {
|
|
return echo.NewHTTPError(http.StatusBadRequest, "model configuration not found")
|
|
}
|
|
|
|
// Extract or generate the correlation ID
|
|
// Anthropic uses x-request-id header
|
|
correlationID := c.Request().Header.Get("x-request-id")
|
|
if correlationID == "" {
|
|
correlationID = uuid.New().String()
|
|
}
|
|
c.Response().Header().Set("x-request-id", correlationID)
|
|
|
|
// Set up context with cancellation
|
|
reqCtx := c.Request().Context()
|
|
c1, cancel := context.WithCancel(appConfig.Context)
|
|
|
|
// Bridge request cancellation to c1 without spawning a goroutine.
|
|
stop := context.AfterFunc(reqCtx, cancel)
|
|
defer func() {
|
|
stop() // deregister callback if it hasn't fired
|
|
cancel() // release c1 resources (idempotent)
|
|
}()
|
|
|
|
// Add the correlation ID to the new context
|
|
ctxWithCorrelationID := context.WithValue(c1, middleware.CorrelationIDKey, correlationID)
|
|
|
|
input.Context = ctxWithCorrelationID
|
|
input.Cancel = cancel
|
|
|
|
if cfg.Model == "" {
|
|
xlog.Debug("replacing empty cfg.Model with input value", "input.Model", input.Model)
|
|
cfg.Model = input.Model
|
|
}
|
|
|
|
c.Set(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST, input)
|
|
c.Set(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG, cfg)
|
|
|
|
// Log the Anthropic API version if provided
|
|
anthropicVersion := c.Request().Header.Get("anthropic-version")
|
|
if anthropicVersion != "" {
|
|
xlog.Debug("Anthropic API version", "version", anthropicVersion)
|
|
}
|
|
|
|
// Validate max_tokens is provided
|
|
if input.MaxTokens <= 0 {
|
|
return echo.NewHTTPError(http.StatusBadRequest, fmt.Sprintf("max_tokens is required and must be greater than 0"))
|
|
}
|
|
|
|
return next(c)
|
|
}
|
|
}
|
|
}
|