Files
LocalAI/core/services/galleryop/operation.go
LocalAI [bot] a39e025d64 fix(nodes): make per-node backend install async via gallery job queue (#9928)
* feat(galleryop): add TargetNodeID to ManagementOp for single-node installs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(galleryop): add NodeScopedKey helpers for per-node opcache rows

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* refactor(galleryop): use strings.Cut for NodeScopedKey parsing, reject empty nodeID

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(nodes): scope DistributedBackendManager.InstallBackend to single node via TargetNodeID

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(http): make /api/nodes/:id/backends/install async via gallery service job queue

The handler previously called unloader.InstallBackend synchronously and
blocked the browser for up to 3 minutes waiting on the NATS reply. It now
enqueues a TargetNodeID-scoped ManagementOp on BackendGalleryChannel and
returns HTTP 202 + jobID immediately, matching /api/backends/install/:id.

The opcache key is built via NodeScopedKey(nodeID, backend) so concurrent
installs of the same backend across different nodes do not stomp each
other. galleryService/opcache/appConfig are threaded through
RegisterNodeAdminRoutes for this.

Assisted-by: Claude:opus-4-7 [Edit] [Bash]
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* refactor(http): log malformed backend_galleries override and stop test drain goroutine

Assisted-by: Claude:opus-4-7 [Edit] [Bash]
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(api): expose nodeID for node-scoped backend ops in /api/operations

Node-scoped backend installs land in opcache under "node:<nodeID>:<backend>"
keys. Without splitting that prefix back out, the operations panel renders
the full key as the display name and has no structured way to label which
worker an install is targeting. Detect the prefix, surface nodeID as its own
response field, and reduce the display name back to the bare backend slug.
Bare (non-scoped) ops are left untouched so legacy installs do not gain a
misleading empty nodeID.

Assisted-by: Claude:opus-4-7 [Edit] [Bash]
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(react-ui): poll job status for node-targeted backend installs

Assisted-by: Claude:opus-4-7 [Edit] [Bash]
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix(react-ui): make NodeInstallPicker state updates pure and surface cancellations as errors

Assisted-by: Claude:opus-4-7 [Edit] [Bash]
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* refactor(react-ui): clarify async semantics in handleInstallOnTarget

Assisted-by: Claude:opus-4-7 [Edit] [Bash]
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* refactor(http): use statusUrl casing for node install response to match codebase precedent

Assisted-by: Claude:opus-4-7 [Edit] [Bash]
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
2026-05-21 22:25:53 +02:00

153 lines
4.8 KiB
Go

package galleryop
import (
"context"
"strings"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/xsync"
)
type ManagementOp[T any, E any] struct {
ID string
GalleryElementName string
Delete bool
Req T
// If specified, we install directly the gallery element
GalleryElement *E
Galleries []config.Gallery
BackendGalleries []config.Gallery
// Context for cancellation support
Context context.Context
CancelFunc context.CancelFunc
// External backend installation parameters (for OCI/URL/path)
// These are used when installing backends from external sources rather than galleries
ExternalURI string // The OCI image, URL, or path
ExternalName string // Custom name for the backend
ExternalAlias string // Custom alias for the backend
// TargetNodeID scopes a backend install/upgrade to a single worker node.
// Empty means fan out to every healthy backend node (the previous behavior).
// Set by InstallBackendOnNodeEndpoint so an admin can install a hardware-specific
// build on one node without touching the rest of the cluster.
TargetNodeID string
// Upgrade is true if this is an upgrade operation (not a fresh install)
Upgrade bool
}
type OpStatus struct {
Deletion bool `json:"deletion"` // Deletion is true if the operation is a deletion
FileName string `json:"file_name"`
Error error `json:"error"`
Processed bool `json:"processed"`
Message string `json:"message"`
Progress float64 `json:"progress"`
TotalFileSize string `json:"file_size"`
DownloadedFileSize string `json:"downloaded_size"`
GalleryElementName string `json:"gallery_element_name"`
Cancelled bool `json:"cancelled"` // Cancelled is true if the operation was cancelled
Cancellable bool `json:"cancellable"` // Cancellable is true if the operation can be cancelled
}
type OpCache struct {
status *xsync.SyncedMap[string, string]
backendOps *xsync.SyncedMap[string, bool] // Tracks which operations are backend operations
galleryService *GalleryService
}
func NewOpCache(galleryService *GalleryService) *OpCache {
return &OpCache{
status: xsync.NewSyncedMap[string, string](),
backendOps: xsync.NewSyncedMap[string, bool](),
galleryService: galleryService,
}
}
func (m *OpCache) Set(key string, value string) {
m.status.Set(key, value)
}
// SetBackend sets a key-value pair and marks it as a backend operation
func (m *OpCache) SetBackend(key string, value string) {
m.status.Set(key, value)
m.backendOps.Set(key, true)
}
// IsBackendOp returns true if the given key is a backend operation
func (m *OpCache) IsBackendOp(key string) bool {
return m.backendOps.Get(key)
}
func (m *OpCache) Get(key string) string {
return m.status.Get(key)
}
func (m *OpCache) DeleteUUID(uuid string) {
for _, k := range m.status.Keys() {
if m.status.Get(k) == uuid {
m.status.Delete(k)
m.backendOps.Delete(k) // Also clean up the backend flag
}
}
}
func (m *OpCache) Map() map[string]string {
return m.status.Map()
}
func (m *OpCache) Exists(key string) bool {
return m.status.Exists(key)
}
func (m *OpCache) GetStatus() (map[string]string, map[string]string) {
processingModelsData := m.Map()
taskTypes := map[string]string{}
for k, v := range processingModelsData {
status := m.galleryService.GetStatus(v)
taskTypes[k] = "Installation"
if status != nil && status.Deletion {
taskTypes[k] = "Deletion"
} else if status == nil {
taskTypes[k] = "Waiting"
}
}
return processingModelsData, taskTypes
}
// NodeScopedKeyPrefix is the opcache key prefix used by InstallBackendOnNodeEndpoint
// so per-node installs do not collide on the bare backend name. Format:
// "node:<nodeID>:<backend>". Read by /api/operations to extract nodeID for the UI.
const NodeScopedKeyPrefix = "node:"
// NodeScopedKey returns the opcache key for a node-scoped backend operation.
// The prefix lets ParseNodeScopedKey detach the nodeID back out so the
// operations endpoint can surface it without storing nodeID separately.
func NodeScopedKey(nodeID, backend string) string {
return NodeScopedKeyPrefix + nodeID + ":" + backend
}
// ParseNodeScopedKey extracts (nodeID, backend) from a key built by NodeScopedKey.
// Returns ok=false for keys that lack the prefix or are missing the nodeID or
// backend segment. Backend names containing colons are preserved because we
// split on the first colon after the prefix only.
func ParseNodeScopedKey(key string) (nodeID, backend string, ok bool) {
rest, hasPrefix := strings.CutPrefix(key, NodeScopedKeyPrefix)
if !hasPrefix {
return "", "", false
}
nodeID, backend, ok = strings.Cut(rest, ":")
if !ok || nodeID == "" || backend == "" {
return "", "", false
}
return nodeID, backend, true
}