mirror of
https://github.com/mudler/LocalAI.git
synced 2026-04-01 05:36:49 -04:00
* feat: add distributed mode (experimental) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix data races, mutexes, transactions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix events and tool stream in agent chat Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * use ginkgo Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(cron): compute correctly time boundaries avoiding re-triggering Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not flood of healthy checks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not list obvious backends as text backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * tests fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop redundant healthcheck Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
77 lines
3.1 KiB
Go
77 lines
3.1 KiB
Go
package routes
|
|
|
|
import (
|
|
"github.com/labstack/echo/v4"
|
|
"github.com/mudler/LocalAI/core/application"
|
|
"github.com/mudler/LocalAI/core/config"
|
|
localai "github.com/mudler/LocalAI/core/http/endpoints/localai"
|
|
mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp"
|
|
"github.com/mudler/LocalAI/core/http/endpoints/openresponses"
|
|
"github.com/mudler/LocalAI/core/http/middleware"
|
|
"github.com/mudler/LocalAI/core/schema"
|
|
)
|
|
|
|
func RegisterOpenResponsesRoutes(app *echo.Echo,
|
|
re *middleware.RequestExtractor,
|
|
application *application.Application) {
|
|
|
|
// NATS client for distributed MCP tool routing (nil when not in distributed mode)
|
|
var natsClient mcpTools.MCPNATSClient
|
|
if d := application.Distributed(); d != nil {
|
|
natsClient = d.Nats
|
|
}
|
|
|
|
// Open Responses API endpoint
|
|
responsesHandler := openresponses.ResponsesEndpoint(
|
|
application.ModelConfigLoader(),
|
|
application.ModelLoader(),
|
|
application.TemplatesEvaluator(),
|
|
application.ApplicationConfig(),
|
|
natsClient,
|
|
)
|
|
|
|
responsesMiddleware := []echo.MiddlewareFunc{
|
|
// Intercept requests where the model name matches an agent — route directly
|
|
// to the agent pool without going through the model config resolution pipeline.
|
|
localai.AgentResponsesInterceptor(application),
|
|
middleware.UsageMiddleware(application.AuthDB()),
|
|
middleware.TraceMiddleware(application),
|
|
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)),
|
|
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenResponsesRequest) }),
|
|
setOpenResponsesRequestContext(re),
|
|
}
|
|
|
|
// Main Open Responses endpoint
|
|
app.POST("/v1/responses", responsesHandler, responsesMiddleware...)
|
|
|
|
// Also support without version prefix for compatibility
|
|
app.POST("/responses", responsesHandler, responsesMiddleware...)
|
|
|
|
// WebSocket mode for Responses API
|
|
wsHandler := openresponses.WebSocketEndpoint(application)
|
|
app.GET("/v1/responses", wsHandler, middleware.UsageMiddleware(application.AuthDB()), middleware.TraceMiddleware(application))
|
|
app.GET("/responses", wsHandler, middleware.UsageMiddleware(application.AuthDB()), middleware.TraceMiddleware(application))
|
|
|
|
// GET /responses/:id - Retrieve a response (for polling background requests)
|
|
getResponseHandler := openresponses.GetResponseEndpoint()
|
|
app.GET("/v1/responses/:id", getResponseHandler, middleware.TraceMiddleware(application))
|
|
app.GET("/responses/:id", getResponseHandler, middleware.TraceMiddleware(application))
|
|
|
|
// POST /responses/:id/cancel - Cancel a background response
|
|
cancelResponseHandler := openresponses.CancelResponseEndpoint()
|
|
app.POST("/v1/responses/:id/cancel", cancelResponseHandler, middleware.TraceMiddleware(application))
|
|
app.POST("/responses/:id/cancel", cancelResponseHandler, middleware.TraceMiddleware(application))
|
|
}
|
|
|
|
// setOpenResponsesRequestContext sets up the context and cancel function for Open Responses requests
|
|
func setOpenResponsesRequestContext(re *middleware.RequestExtractor) echo.MiddlewareFunc {
|
|
return func(next echo.HandlerFunc) echo.HandlerFunc {
|
|
return func(c echo.Context) error {
|
|
if err := re.SetOpenResponsesRequest(c); err != nil {
|
|
return err
|
|
}
|
|
return next(c)
|
|
}
|
|
}
|
|
}
|