From 780e720593e88160eb5de08bd0bd2c74179918a9 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 22 May 2026 22:47:11 +0000 Subject: [PATCH] feat(operations): expose per-node breakdown on /api/operations When an operation's OpStatus has Nodes entries (populated by the Phase 4 progress sink wiring), surface them as a "nodes" array on the /api/operations response, sorted by node_name for stable rendering. Backward compatible: legacy clients ignore the field; ops without any node entries (single-node mode, model installs) omit the array entirely thanks to the empty-slice guard. Signed-off-by: Ettore Di Giacinto --- core/http/routes/ui_api.go | 73 ++++++++++++++++------ core/http/routes/ui_api_operations_test.go | 57 +++++++++++++++++ 2 files changed, 112 insertions(+), 18 deletions(-) diff --git a/core/http/routes/ui_api.go b/core/http/routes/ui_api.go index 9b3067459..8696e2b22 100644 --- a/core/http/routes/ui_api.go +++ b/core/http/routes/ui_api.go @@ -10,6 +10,7 @@ import ( "net/http" "net/url" "slices" + "sort" "strconv" "strings" "time" @@ -57,7 +58,6 @@ var usecaseFilters = map[string]config.ModelConfigUsecase{ config.UsecaseRealtimeAudio: config.FLAG_REALTIME_AUDIO, } - // extractHFRepo tries to find a HuggingFace repo ID from model overrides or URLs. func extractHFRepo(overrides map[string]any, urls []string) string { if overrides != nil { @@ -257,6 +257,44 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model if status != nil && status.Error != nil { opData["error"] = status.Error.Error() } + // Expose the per-node breakdown when the Phase 4 progress sink + // has populated OpStatus.Nodes (distributed backend installs). + // We sort by node_name for stable UI rendering across polls; + // the underlying slice is order-dependent on UpdateNodeProgress + // arrival order, which the UI must not depend on. Single-node + // ops and model installs leave Nodes empty so this block emits + // no key, preserving the legacy payload shape. + if status != nil && len(status.Nodes) > 0 { + nodes := make([]map[string]any, 0, len(status.Nodes)) + for _, n := range status.Nodes { + entry := map[string]any{ + "node_id": n.NodeID, + "node_name": n.NodeName, + "status": n.Status, + "percentage": n.Percentage, + } + if n.FileName != "" { + entry["file_name"] = n.FileName + } + if n.Current != "" { + entry["current"] = n.Current + } + if n.Total != "" { + entry["total"] = n.Total + } + if n.Phase != "" { + entry["phase"] = n.Phase + } + if n.Error != "" { + entry["error"] = n.Error + } + nodes = append(nodes, entry) + } + sort.SliceStable(nodes, func(i, j int) bool { + return fmt.Sprintf("%v", nodes[i]["node_name"]) < fmt.Sprintf("%v", nodes[j]["node_name"]) + }) + opData["nodes"] = nodes + } operations = append(operations, opData) } @@ -557,11 +595,11 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model NodeStatus string `json:"node_status"` } type modelCapability struct { - ID string `json:"id"` - Capabilities []string `json:"capabilities"` - Backend string `json:"backend"` - Disabled bool `json:"disabled"` - Pinned bool `json:"pinned"` + ID string `json:"id"` + Capabilities []string `json:"capabilities"` + Backend string `json:"backend"` + Disabled bool `json:"disabled"` + Pinned bool `json:"pinned"` // LoadedOn is populated only when the node registry is active // (distributed mode). Lets the UI show "loaded on worker-1" without // the operator having to expand every node manually. An empty slice @@ -1159,17 +1197,17 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model } return c.JSON(200, map[string]any{ - "backends": backendsJSON, - "repositories": appConfig.BackendGalleries, - "allTags": tags, - "processingBackends": processingBackendsData, - "taskTypes": taskTypes, - "availableBackends": totalBackends, - "installedBackends": installedBackendsCount, - "currentPage": pageNum, - "totalPages": totalPages, - "prevPage": prevPage, - "nextPage": nextPage, + "backends": backendsJSON, + "repositories": appConfig.BackendGalleries, + "allTags": tags, + "processingBackends": processingBackendsData, + "taskTypes": taskTypes, + "availableBackends": totalBackends, + "installedBackends": installedBackendsCount, + "currentPage": pageNum, + "totalPages": totalPages, + "prevPage": prevPage, + "nextPage": nextPage, "systemCapability": detectedCapability, "preferDevelopmentBackends": appConfig.PreferDevelopmentBackends, }) @@ -1599,4 +1637,3 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model app.DELETE("/api/branding/asset/:kind", localai.DeleteBrandingAssetEndpoint(appConfig), adminMiddleware) } - diff --git a/core/http/routes/ui_api_operations_test.go b/core/http/routes/ui_api_operations_test.go index 2ed4aad9c..5f82d90ca 100644 --- a/core/http/routes/ui_api_operations_test.go +++ b/core/http/routes/ui_api_operations_test.go @@ -62,6 +62,63 @@ var _ = Describe("/api/operations with node-scoped backend ops", func() { Expect(found["isBackend"]).To(Equal(true)) }) + It("surfaces per-node OpStatus entries on /api/operations", func() { + appCfg := &config.ApplicationConfig{} + galleryService := galleryop.NewGalleryService(appCfg, nil) + opcache := galleryop.NewOpCache(galleryService) + + jobID := "test-op-nodes-1" + // Register a backend op so the handler treats this as a backend + // install (no need to consult the gallery during the test). + opcache.SetBackend("vllm", jobID) + + // Populate per-node entries via the P4.2 helper. The helper also + // allocates an OpStatus under jobID, which the handler will read. + galleryService.UpdateNodeProgress(jobID, "node-b", galleryop.NodeProgress{ + NodeID: "node-b", NodeName: "worker-b", Status: "running_on_worker", + }) + galleryService.UpdateNodeProgress(jobID, "node-a", galleryop.NodeProgress{ + NodeID: "node-a", NodeName: "worker-a", Status: "downloading", Percentage: 30, FileName: "vllm.tar", + }) + + e := echo.New() + routes.RegisterUIAPIRoutes(e, nil, nil, appCfg, galleryService, opcache, &application.Application{}, noopMw) + + req := httptest.NewRequest(http.MethodGet, "/api/operations", nil) + rec := httptest.NewRecorder() + e.ServeHTTP(rec, req) + Expect(rec.Code).To(Equal(http.StatusOK)) + + var envelope struct { + Operations []map[string]any `json:"operations"` + } + Expect(json.Unmarshal(rec.Body.Bytes(), &envelope)).To(Succeed()) + + var found map[string]any + for _, op := range envelope.Operations { + if op["jobID"] == jobID { + found = op + break + } + } + Expect(found).ToNot(BeNil(), "operation should appear in /api/operations") + nodes, ok := found["nodes"].([]any) + Expect(ok).To(BeTrue(), "operation should have a nodes array") + Expect(nodes).To(HaveLen(2)) + + // Stable sort by node_name: "worker-a" comes before "worker-b" + // even though UpdateNodeProgress was called in reverse order. + first := nodes[0].(map[string]any) + Expect(first["node_name"]).To(Equal("worker-a")) + Expect(first["status"]).To(Equal("downloading")) + Expect(first["file_name"]).To(Equal("vllm.tar")) + Expect(first["percentage"]).To(Equal(30.0)) + + second := nodes[1].(map[string]any) + Expect(second["node_name"]).To(Equal("worker-b")) + Expect(second["status"]).To(Equal("running_on_worker")) + }) + It("does not emit nodeID for non-node-scoped backend ops", func() { appCfg := &config.ApplicationConfig{} galleryService := galleryop.NewGalleryService(appCfg, nil)