mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-27 18:06:58 -04:00
fix(distributed): return empty backend list for agent nodes instead of failing backend.list (#10545)
Opening an AGENT-type worker node's detail page errored with "failed to list backends on node" / NATS "nodes.<id>.backend.list: no responders available". Agent workers only subscribe to agent.*, jobs.*, mcp.* and <prefix>.backend.stop; they never subscribe to backend.list, so the per-node ListBackendsOnNodeEndpoint request had no responder and timed out. The aggregate cluster-wide list already guards this in managers_distributed.go (skip nodes whose NodeType is set and not "backend"). The single-node endpoint lacked the same guard. Thread the NodeRegistry into ListBackendsOnNodeEndpoint and short-circuit to an empty (non-nil) list for non-backend node types before issuing the doomed NATS request, mirroring the aggregate-list gate so both views stay consistent. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code]
This commit is contained in:
@@ -25,6 +25,7 @@ import (
|
||||
"github.com/mudler/LocalAI/core/http/auth"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/services/galleryop"
|
||||
"github.com/mudler/LocalAI/core/services/messaging"
|
||||
"github.com/mudler/LocalAI/core/services/nodes"
|
||||
"github.com/mudler/LocalAI/core/services/nodes/prefixcache"
|
||||
"github.com/mudler/LocalAI/pkg/httpclient"
|
||||
@@ -550,12 +551,23 @@ func DeleteBackendOnNodeEndpoint(unloader nodes.NodeCommandSender) echo.HandlerF
|
||||
}
|
||||
|
||||
// ListBackendsOnNodeEndpoint lists installed backends on a worker node via NATS.
|
||||
func ListBackendsOnNodeEndpoint(unloader nodes.NodeCommandSender) echo.HandlerFunc {
|
||||
func ListBackendsOnNodeEndpoint(unloader nodes.NodeCommandSender, registry *nodes.NodeRegistry) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
nodeID := c.Param("id")
|
||||
// Agent-type workers don't run backends and never subscribe to the
|
||||
// nodes.<id>.backend.list NATS subject, so the request would hang
|
||||
// until timeout with "no responders". Their backend list is simply
|
||||
// empty. Mirror the aggregate-list guard in managers_distributed.go
|
||||
// (skip nodes whose NodeType is set and not "backend") so the
|
||||
// single-node and cluster-wide views stay consistent.
|
||||
if node, err := registry.Get(c.Request().Context(), nodeID); err == nil {
|
||||
if node.NodeType != "" && node.NodeType != nodes.NodeTypeBackend {
|
||||
return c.JSON(http.StatusOK, []messaging.NodeBackendInfo{})
|
||||
}
|
||||
}
|
||||
if unloader == nil {
|
||||
return c.JSON(http.StatusServiceUnavailable, nodeError(http.StatusServiceUnavailable, "NATS not configured"))
|
||||
}
|
||||
nodeID := c.Param("id")
|
||||
reply, err := unloader.ListBackends(nodeID)
|
||||
if err != nil {
|
||||
xlog.Error("Failed to list backends on node", "node", nodeID, "error", err)
|
||||
|
||||
103
core/http/endpoints/localai/nodes_backends_list_test.go
Normal file
103
core/http/endpoints/localai/nodes_backends_list_test.go
Normal file
@@ -0,0 +1,103 @@
|
||||
package localai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
|
||||
"github.com/labstack/echo/v4"
|
||||
"github.com/mudler/LocalAI/core/services/messaging"
|
||||
"github.com/mudler/LocalAI/core/services/nodes"
|
||||
"github.com/mudler/LocalAI/core/services/testutil"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
// stubNodeCommandSender records whether ListBackends was invoked so the test can
|
||||
// assert the endpoint short-circuits (no NATS request) for agent-type nodes.
|
||||
type stubNodeCommandSender struct {
|
||||
listBackendsCalled bool
|
||||
}
|
||||
|
||||
func (s *stubNodeCommandSender) InstallBackend(_, _, _, _, _, _, _ string, _ int, _ string, _ func(messaging.BackendInstallProgressEvent)) (*messaging.BackendInstallReply, error) {
|
||||
return &messaging.BackendInstallReply{}, nil
|
||||
}
|
||||
|
||||
func (s *stubNodeCommandSender) UpgradeBackend(_, _, _, _, _, _ string, _ int, _ string, _ func(messaging.BackendInstallProgressEvent)) (*messaging.BackendUpgradeReply, error) {
|
||||
return &messaging.BackendUpgradeReply{}, nil
|
||||
}
|
||||
|
||||
func (s *stubNodeCommandSender) DeleteBackend(_, _ string) (*messaging.BackendDeleteReply, error) {
|
||||
return &messaging.BackendDeleteReply{Success: true}, nil
|
||||
}
|
||||
|
||||
func (s *stubNodeCommandSender) ListBackends(_ string) (*messaging.BackendListReply, error) {
|
||||
s.listBackendsCalled = true
|
||||
return &messaging.BackendListReply{Backends: []messaging.NodeBackendInfo{{Name: "llama-cpp"}}}, nil
|
||||
}
|
||||
|
||||
func (s *stubNodeCommandSender) StopBackend(_, _ string) error { return nil }
|
||||
|
||||
func (s *stubNodeCommandSender) UnloadModelOnNode(_, _ string) error { return nil }
|
||||
|
||||
var _ = Describe("ListBackendsOnNodeEndpoint", func() {
|
||||
var registry *nodes.NodeRegistry
|
||||
|
||||
BeforeEach(func() {
|
||||
db := testutil.SetupTestDB()
|
||||
var err error
|
||||
registry, err = nodes.NewNodeRegistry(db)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
})
|
||||
|
||||
callEndpoint := func(unloader nodes.NodeCommandSender, nodeID string) *httptest.ResponseRecorder {
|
||||
e := echo.New()
|
||||
req := httptest.NewRequest(http.MethodGet, "/", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
c := e.NewContext(req, rec)
|
||||
c.SetParamNames("id")
|
||||
c.SetParamValues(nodeID)
|
||||
handler := ListBackendsOnNodeEndpoint(unloader, registry)
|
||||
Expect(handler(c)).To(Succeed())
|
||||
return rec
|
||||
}
|
||||
|
||||
It("returns an empty list for an agent node without issuing a NATS request", func() {
|
||||
ctx := context.Background()
|
||||
node := &nodes.BackendNode{Name: "agent-1", NodeType: nodes.NodeTypeAgent}
|
||||
Expect(registry.Register(ctx, node, true)).To(Succeed())
|
||||
|
||||
stub := &stubNodeCommandSender{}
|
||||
rec := callEndpoint(stub, node.ID)
|
||||
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
Expect(stub.listBackendsCalled).To(BeFalse(),
|
||||
"agent workers don't subscribe to backend.list; the endpoint must not issue the doomed NATS request")
|
||||
|
||||
var list []messaging.NodeBackendInfo
|
||||
Expect(json.Unmarshal(rec.Body.Bytes(), &list)).To(Succeed())
|
||||
Expect(list).To(BeEmpty())
|
||||
// Must be `[]`, not `null`, so the UI can render it.
|
||||
Expect(rec.Body.String()).To(ContainSubstring("[]"))
|
||||
})
|
||||
|
||||
It("consults the unloader (NATS) for a backend node", func() {
|
||||
ctx := context.Background()
|
||||
node := &nodes.BackendNode{Name: "backend-1", NodeType: nodes.NodeTypeBackend, Address: "10.0.0.1:50051"}
|
||||
Expect(registry.Register(ctx, node, true)).To(Succeed())
|
||||
|
||||
stub := &stubNodeCommandSender{}
|
||||
rec := callEndpoint(stub, node.ID)
|
||||
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
Expect(stub.listBackendsCalled).To(BeTrue(),
|
||||
"backend nodes must still be queried over NATS")
|
||||
|
||||
var list []messaging.NodeBackendInfo
|
||||
Expect(json.Unmarshal(rec.Body.Bytes(), &list)).To(Succeed())
|
||||
Expect(list).To(HaveLen(1))
|
||||
Expect(list[0].Name).To(Equal("llama-cpp"))
|
||||
})
|
||||
})
|
||||
@@ -88,7 +88,7 @@ func RegisterNodeAdminRoutes(e *echo.Echo, registry *nodes.NodeRegistry, unloade
|
||||
admin.POST("/:id/approve", localai.ApproveNodeEndpoint(registry, authDB, hmacSecret, natsCfg))
|
||||
|
||||
// Backend management on workers
|
||||
admin.GET("/:id/backends", localai.ListBackendsOnNodeEndpoint(unloader))
|
||||
admin.GET("/:id/backends", localai.ListBackendsOnNodeEndpoint(unloader, registry))
|
||||
admin.POST("/:id/backends/install", localai.InstallBackendOnNodeEndpoint(unloader, galleryService, opcache, appConfig))
|
||||
admin.POST("/:id/backends/delete", localai.DeleteBackendOnNodeEndpoint(unloader))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user