mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-14 19:58:44 -04:00
* feat(distributed): NATS JWT auth, TLS/mTLS options, and e2e coverage Mint per-node NATS user JWTs at registration when LOCALAI_NATS_ACCOUNT_SEED is set, and connect workers with scoped credentials from the register response. Add optional LOCALAI_NATS_TLS_CA/CERT/KEY for private CA and mTLS alongside tls:// URLs, plus test-e2e-distributed and NatsJWT container e2e specs. Document JWT setup (nats-auth-setup.sh) and TLS env vars in distributed-mode. Assisted-by: Grok:grok grok-build Signed-off-by: Richard Palethorpe <io@richiejp.com> * fix(distributed): correct NATS JWT scoping and harden client auth The JWT-auth path added in 46467cc7 had several gaps that fail silently under LOCALAI_NATS_REQUIRE_AUTH: - Agent-worker minted JWTs did not allow the subjects the agent worker actually subscribes to (jobs.mcp-ci.new and nodes.<id>.backend.stop), so MCP-CI jobs and backend-stop session cleanup were silently dropped. Scope the agent permission set to those subjects. - NATS subscription permission violations were swallowed (Subscribe returned a live-but-dead subscription). Confirm subscriptions with a server round-trip so a denial surfaces synchronously, and log async permission errors. - The backend worker connected anonymously when given a JWT without its paired seed; reject the unpaired credential instead. - The documented service-user permissions in nats-auth-setup.sh omitted prefixcache.>, which the frontend publishes and subscribes; add it. Also: add a credential-provider hook to the messaging client (consumed by the follow-up credential-lifecycle change), drop the always-nil error from NatsMessagingOptions, run go mod tidy (jwt/v2 and nkeys are now direct), and gofmt the feature's files. Tests: an agent-JWT e2e spec that connects to the enforcing NATS server and exercises every subscription the agent worker makes, plus permission allow-list coverage unit tests. Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Richard Palethorpe <io@richiejp.com> * feat(distributed): acquire and auto-refresh worker NATS credentials Workers fetched NATS credentials once at startup, which broke two cases under JWT auth: a worker that registered while still pending admin approval never received a minted JWT (it connected unauthenticated and gave up), and a long-running worker's 24h JWT expired with no way to renew it. Introduce workerregistry.NATSCredentialManager, built on idempotent re-registration (the frontend preserves the node row and mints a fresh JWT each call): - Acquire re-registers through admin approval until the node is approved and credentials are minted (or returns the first success when auth is not required, preserving anonymous-NATS behavior). - RefreshLoop re-registers before the JWT expires (~75% of its lifetime), updating the credentials served to the connection. - Both are bounded (default 100 attempts / consecutive failures) and return an error on exhaustion, so an unapprovable or unrenewable worker exits non-zero and surfaces the problem instead of hanging or drifting toward an expired credential. The messaging client gains WithUserJWTProvider, fetching credentials on each (re)connect so the connection transparently adopts a refreshed JWT when the server expires the old one. RegisterFull exposes the approval status and full response; Register delegates to it. Both the backend worker and the agent worker are wired to this: explicit env credentials are used as-is, minted credentials are acquired-with-wait and refreshed, and a permanent refresh failure shuts the worker down so it restarts and re-acquires. Tests cover Acquire (wait-through-pending, bounded give-up, context cancel), RefreshLoop (refresh-before-expiry, bounded failure, no-expiry exit) and jwtExpiry decoding. Docs updated in distributed-mode.md. Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Richard Palethorpe <io@richiejp.com> --------- Signed-off-by: Richard Palethorpe <io@richiejp.com>
147 lines
6.7 KiB
Go
147 lines
6.7 KiB
Go
package routes
|
|
|
|
import (
|
|
"crypto/subtle"
|
|
"net/http"
|
|
"strings"
|
|
|
|
"github.com/labstack/echo/v4"
|
|
"github.com/mudler/LocalAI/core/config"
|
|
"github.com/mudler/LocalAI/core/http/endpoints/localai"
|
|
"github.com/mudler/LocalAI/core/services/galleryop"
|
|
"github.com/mudler/LocalAI/core/services/nodes"
|
|
"github.com/mudler/LocalAI/pkg/natsauth"
|
|
"gorm.io/gorm"
|
|
)
|
|
|
|
// nodeReadyMiddleware returns middleware that checks the node registry is available.
|
|
func nodeReadyMiddleware(registry *nodes.NodeRegistry) echo.MiddlewareFunc {
|
|
return func(next echo.HandlerFunc) echo.HandlerFunc {
|
|
return func(c echo.Context) error {
|
|
if registry == nil {
|
|
return c.JSON(http.StatusServiceUnavailable, map[string]string{
|
|
"error": "distributed mode not enabled",
|
|
})
|
|
}
|
|
return next(c)
|
|
}
|
|
}
|
|
}
|
|
|
|
// RegisterNodeSelfServiceRoutes registers /api/node/ endpoints used by backend
|
|
// nodes themselves (register, heartbeat, drain, query own models, deregister).
|
|
// These are authenticated via the registration token, not admin middleware.
|
|
//
|
|
// TODO(security): Node self-service endpoints authenticate via shared registration
|
|
// token but do not verify per-node identity. A compromised worker can heartbeat/drain/
|
|
// deregister other nodes. Future: issue per-node JWT at registration, validate node
|
|
// identity on subsequent requests (compare :id param with token subject).
|
|
func RegisterNodeSelfServiceRoutes(e *echo.Echo, registry *nodes.NodeRegistry, registrationToken string, autoApprove bool, authDB *gorm.DB, hmacSecret string, natsCfg natsauth.Config) {
|
|
if registry == nil {
|
|
return
|
|
}
|
|
|
|
readyMw := nodeReadyMiddleware(registry)
|
|
tokenAuthMw := nodeTokenAuth(registrationToken)
|
|
|
|
node := e.Group("/api/node", readyMw, tokenAuthMw)
|
|
node.POST("/register", localai.RegisterNodeEndpoint(registry, registrationToken, autoApprove, authDB, hmacSecret, natsCfg))
|
|
node.POST("/:id/heartbeat", localai.HeartbeatEndpoint(registry))
|
|
node.POST("/:id/drain", localai.DrainNodeEndpoint(registry))
|
|
node.POST("/:id/resume", localai.ResumeNodeEndpoint(registry))
|
|
node.POST("/:id/deregister", localai.DeactivateNodeEndpoint(registry))
|
|
node.GET("/:id/models", localai.GetNodeModelsEndpoint(registry))
|
|
node.DELETE("/:id", localai.DeactivateNodeEndpoint(registry))
|
|
}
|
|
|
|
// RegisterNodeAdminRoutes registers /api/nodes/ endpoints used by admins
|
|
// (list, get, get models, drain, delete, approve, backend management). Protected by admin middleware.
|
|
//
|
|
// galleryService/opcache/appConfig are threaded in for the async node-scoped
|
|
// backend install path (POST /:id/backends/install). That handler enqueues a
|
|
// ManagementOp on the gallery channel rather than blocking on a NATS reply, so
|
|
// the browser gets HTTP 202 + jobID immediately instead of waiting up to 3 minutes.
|
|
func RegisterNodeAdminRoutes(e *echo.Echo, registry *nodes.NodeRegistry, unloader nodes.NodeCommandSender, galleryService *galleryop.GalleryService, opcache *galleryop.OpCache, appConfig *config.ApplicationConfig, adminMw echo.MiddlewareFunc, authDB *gorm.DB, hmacSecret string, registrationToken string, natsCfg natsauth.Config) {
|
|
if registry == nil {
|
|
return
|
|
}
|
|
|
|
readyMw := nodeReadyMiddleware(registry)
|
|
|
|
admin := e.Group("/api/nodes", readyMw, adminMw)
|
|
admin.GET("", localai.ListNodesEndpoint(registry))
|
|
|
|
// Model scheduling (registered before /:id to avoid route conflicts)
|
|
admin.GET("/scheduling", localai.ListSchedulingEndpoint(registry))
|
|
admin.GET("/scheduling/:model", localai.GetSchedulingEndpoint(registry))
|
|
admin.POST("/scheduling", localai.SetSchedulingEndpoint(registry))
|
|
admin.DELETE("/scheduling/:model", localai.DeleteSchedulingEndpoint(registry))
|
|
|
|
admin.GET("/:id", localai.GetNodeEndpoint(registry))
|
|
admin.GET("/:id/models", localai.GetNodeModelsEndpoint(registry))
|
|
admin.DELETE("/:id", localai.DeregisterNodeEndpoint(registry))
|
|
admin.POST("/:id/drain", localai.DrainNodeEndpoint(registry))
|
|
admin.POST("/:id/resume", localai.ResumeNodeEndpoint(registry))
|
|
admin.POST("/:id/approve", localai.ApproveNodeEndpoint(registry, authDB, hmacSecret, natsCfg))
|
|
|
|
// Backend management on workers
|
|
admin.GET("/:id/backends", localai.ListBackendsOnNodeEndpoint(unloader))
|
|
admin.POST("/:id/backends/install", localai.InstallBackendOnNodeEndpoint(unloader, galleryService, opcache, appConfig))
|
|
admin.POST("/:id/backends/delete", localai.DeleteBackendOnNodeEndpoint(unloader))
|
|
|
|
// Model management on workers
|
|
admin.POST("/:id/models/unload", localai.UnloadModelOnNodeEndpoint(unloader, registry))
|
|
admin.POST("/:id/models/delete", localai.DeleteModelOnNodeEndpoint(unloader, registry))
|
|
|
|
// Backend log streaming (proxied from worker HTTP server)
|
|
admin.GET("/:id/backend-logs", localai.NodeBackendLogsListEndpoint(registry, registrationToken))
|
|
admin.GET("/:id/backend-logs/:modelId", localai.NodeBackendLogsLinesEndpoint(registry, registrationToken))
|
|
|
|
// Label management
|
|
admin.GET("/:id/labels", localai.GetNodeLabelsEndpoint(registry))
|
|
admin.PUT("/:id/labels", localai.SetNodeLabelsEndpoint(registry))
|
|
admin.PATCH("/:id/labels", localai.MergeNodeLabelsEndpoint(registry))
|
|
admin.DELETE("/:id/labels/:key", localai.DeleteNodeLabelEndpoint(registry))
|
|
|
|
// Per-node replica capacity. PUT sets a sticky admin override that
|
|
// survives worker restarts. DELETE clears the override so the worker's
|
|
// CLI flag takes over again at the next re-registration.
|
|
admin.PUT("/:id/max-replicas-per-model", localai.UpdateMaxReplicasPerModelEndpoint(registry))
|
|
admin.DELETE("/:id/max-replicas-per-model", localai.ResetMaxReplicasPerModelEndpoint(registry))
|
|
|
|
// WebSocket proxy for real-time log streaming from workers
|
|
e.GET("/ws/nodes/:id/backend-logs/:modelId", localai.NodeBackendLogsWSEndpoint(registry, registrationToken), readyMw, adminMw)
|
|
}
|
|
|
|
// nodeTokenAuth validates the registration token for node self-service endpoints.
|
|
// When registrationToken is empty (single-node / non-distributed mode), these
|
|
// endpoints are unprotected. This is intentional: in single-node mode there are
|
|
// no remote workers to authenticate. Operators enabling distributed mode MUST
|
|
// set a registration token via LOCALAI_REGISTRATION_TOKEN or config.
|
|
//
|
|
// It validates the token from an Authorization: Bearer <token> header using
|
|
// constant-time comparison.
|
|
func nodeTokenAuth(registrationToken string) echo.MiddlewareFunc {
|
|
return func(next echo.HandlerFunc) echo.HandlerFunc {
|
|
return func(c echo.Context) error {
|
|
if registrationToken == "" {
|
|
return next(c)
|
|
}
|
|
|
|
token, ok := strings.CutPrefix(c.Request().Header.Get("Authorization"), "Bearer ")
|
|
if !ok {
|
|
return c.JSON(http.StatusUnauthorized, map[string]string{
|
|
"error": "missing or invalid Authorization header",
|
|
})
|
|
}
|
|
if subtle.ConstantTimeCompare([]byte(token), []byte(registrationToken)) != 1 {
|
|
return c.JSON(http.StatusUnauthorized, map[string]string{
|
|
"error": "invalid registration token",
|
|
})
|
|
}
|
|
|
|
return next(c)
|
|
}
|
|
}
|
|
}
|