From 91885c2c7e36bf79b31bcc96713491b643d53c47 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 28 Jun 2026 01:22:48 +0200 Subject: [PATCH] fix(distributed): return empty backend list for agent nodes instead of failing backend.list (#10545) (#10565) Opening an AGENT-type worker node's detail page errored with "failed to list backends on node" / NATS "nodes..backend.list: no responders available". Agent workers only subscribe to agent.*, jobs.*, mcp.* and .backend.stop; they never subscribe to backend.list, so the per-node ListBackendsOnNodeEndpoint request had no responder and timed out. The aggregate cluster-wide list already guards this in managers_distributed.go (skip nodes whose NodeType is set and not "backend"). The single-node endpoint lacked the same guard. Thread the NodeRegistry into ListBackendsOnNodeEndpoint and short-circuit to an empty (non-nil) list for non-backend node types before issuing the doomed NATS request, mirroring the aggregate-list gate so both views stay consistent. Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- core/http/endpoints/localai/nodes.go | 16 ++- .../localai/nodes_backends_list_test.go | 103 ++++++++++++++++++ core/http/routes/nodes.go | 2 +- 3 files changed, 118 insertions(+), 3 deletions(-) create mode 100644 core/http/endpoints/localai/nodes_backends_list_test.go diff --git a/core/http/endpoints/localai/nodes.go b/core/http/endpoints/localai/nodes.go index e91eda6f4..71b4cbb11 100644 --- a/core/http/endpoints/localai/nodes.go +++ b/core/http/endpoints/localai/nodes.go @@ -25,6 +25,7 @@ import ( "github.com/mudler/LocalAI/core/http/auth" "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/core/services/galleryop" + "github.com/mudler/LocalAI/core/services/messaging" "github.com/mudler/LocalAI/core/services/nodes" "github.com/mudler/LocalAI/core/services/nodes/prefixcache" "github.com/mudler/LocalAI/pkg/httpclient" @@ -550,12 +551,23 @@ func DeleteBackendOnNodeEndpoint(unloader nodes.NodeCommandSender) echo.HandlerF } // ListBackendsOnNodeEndpoint lists installed backends on a worker node via NATS. -func ListBackendsOnNodeEndpoint(unloader nodes.NodeCommandSender) echo.HandlerFunc { +func ListBackendsOnNodeEndpoint(unloader nodes.NodeCommandSender, registry *nodes.NodeRegistry) echo.HandlerFunc { return func(c echo.Context) error { + nodeID := c.Param("id") + // Agent-type workers don't run backends and never subscribe to the + // nodes..backend.list NATS subject, so the request would hang + // until timeout with "no responders". Their backend list is simply + // empty. Mirror the aggregate-list guard in managers_distributed.go + // (skip nodes whose NodeType is set and not "backend") so the + // single-node and cluster-wide views stay consistent. + if node, err := registry.Get(c.Request().Context(), nodeID); err == nil { + if node.NodeType != "" && node.NodeType != nodes.NodeTypeBackend { + return c.JSON(http.StatusOK, []messaging.NodeBackendInfo{}) + } + } if unloader == nil { return c.JSON(http.StatusServiceUnavailable, nodeError(http.StatusServiceUnavailable, "NATS not configured")) } - nodeID := c.Param("id") reply, err := unloader.ListBackends(nodeID) if err != nil { xlog.Error("Failed to list backends on node", "node", nodeID, "error", err) diff --git a/core/http/endpoints/localai/nodes_backends_list_test.go b/core/http/endpoints/localai/nodes_backends_list_test.go new file mode 100644 index 000000000..c625e8e95 --- /dev/null +++ b/core/http/endpoints/localai/nodes_backends_list_test.go @@ -0,0 +1,103 @@ +package localai + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + + "github.com/labstack/echo/v4" + "github.com/mudler/LocalAI/core/services/messaging" + "github.com/mudler/LocalAI/core/services/nodes" + "github.com/mudler/LocalAI/core/services/testutil" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +// stubNodeCommandSender records whether ListBackends was invoked so the test can +// assert the endpoint short-circuits (no NATS request) for agent-type nodes. +type stubNodeCommandSender struct { + listBackendsCalled bool +} + +func (s *stubNodeCommandSender) InstallBackend(_, _, _, _, _, _, _ string, _ int, _ string, _ func(messaging.BackendInstallProgressEvent)) (*messaging.BackendInstallReply, error) { + return &messaging.BackendInstallReply{}, nil +} + +func (s *stubNodeCommandSender) UpgradeBackend(_, _, _, _, _, _ string, _ int, _ string, _ func(messaging.BackendInstallProgressEvent)) (*messaging.BackendUpgradeReply, error) { + return &messaging.BackendUpgradeReply{}, nil +} + +func (s *stubNodeCommandSender) DeleteBackend(_, _ string) (*messaging.BackendDeleteReply, error) { + return &messaging.BackendDeleteReply{Success: true}, nil +} + +func (s *stubNodeCommandSender) ListBackends(_ string) (*messaging.BackendListReply, error) { + s.listBackendsCalled = true + return &messaging.BackendListReply{Backends: []messaging.NodeBackendInfo{{Name: "llama-cpp"}}}, nil +} + +func (s *stubNodeCommandSender) StopBackend(_, _ string) error { return nil } + +func (s *stubNodeCommandSender) UnloadModelOnNode(_, _ string) error { return nil } + +var _ = Describe("ListBackendsOnNodeEndpoint", func() { + var registry *nodes.NodeRegistry + + BeforeEach(func() { + db := testutil.SetupTestDB() + var err error + registry, err = nodes.NewNodeRegistry(db) + Expect(err).ToNot(HaveOccurred()) + }) + + callEndpoint := func(unloader nodes.NodeCommandSender, nodeID string) *httptest.ResponseRecorder { + e := echo.New() + req := httptest.NewRequest(http.MethodGet, "/", nil) + rec := httptest.NewRecorder() + c := e.NewContext(req, rec) + c.SetParamNames("id") + c.SetParamValues(nodeID) + handler := ListBackendsOnNodeEndpoint(unloader, registry) + Expect(handler(c)).To(Succeed()) + return rec + } + + It("returns an empty list for an agent node without issuing a NATS request", func() { + ctx := context.Background() + node := &nodes.BackendNode{Name: "agent-1", NodeType: nodes.NodeTypeAgent} + Expect(registry.Register(ctx, node, true)).To(Succeed()) + + stub := &stubNodeCommandSender{} + rec := callEndpoint(stub, node.ID) + + Expect(rec.Code).To(Equal(http.StatusOK)) + Expect(stub.listBackendsCalled).To(BeFalse(), + "agent workers don't subscribe to backend.list; the endpoint must not issue the doomed NATS request") + + var list []messaging.NodeBackendInfo + Expect(json.Unmarshal(rec.Body.Bytes(), &list)).To(Succeed()) + Expect(list).To(BeEmpty()) + // Must be `[]`, not `null`, so the UI can render it. + Expect(rec.Body.String()).To(ContainSubstring("[]")) + }) + + It("consults the unloader (NATS) for a backend node", func() { + ctx := context.Background() + node := &nodes.BackendNode{Name: "backend-1", NodeType: nodes.NodeTypeBackend, Address: "10.0.0.1:50051"} + Expect(registry.Register(ctx, node, true)).To(Succeed()) + + stub := &stubNodeCommandSender{} + rec := callEndpoint(stub, node.ID) + + Expect(rec.Code).To(Equal(http.StatusOK)) + Expect(stub.listBackendsCalled).To(BeTrue(), + "backend nodes must still be queried over NATS") + + var list []messaging.NodeBackendInfo + Expect(json.Unmarshal(rec.Body.Bytes(), &list)).To(Succeed()) + Expect(list).To(HaveLen(1)) + Expect(list[0].Name).To(Equal("llama-cpp")) + }) +}) diff --git a/core/http/routes/nodes.go b/core/http/routes/nodes.go index f6a2124b8..e35bea240 100644 --- a/core/http/routes/nodes.go +++ b/core/http/routes/nodes.go @@ -88,7 +88,7 @@ func RegisterNodeAdminRoutes(e *echo.Echo, registry *nodes.NodeRegistry, unloade admin.POST("/:id/approve", localai.ApproveNodeEndpoint(registry, authDB, hmacSecret, natsCfg)) // Backend management on workers - admin.GET("/:id/backends", localai.ListBackendsOnNodeEndpoint(unloader)) + admin.GET("/:id/backends", localai.ListBackendsOnNodeEndpoint(unloader, registry)) admin.POST("/:id/backends/install", localai.InstallBackendOnNodeEndpoint(unloader, galleryService, opcache, appConfig)) admin.POST("/:id/backends/delete", localai.DeleteBackendOnNodeEndpoint(unloader))