fix(distributed): pass ExternalURI through NATS backend install (#9446)

When installing a backend with a custom OCI URI in distributed mode,
the URI was captured in ManagementOp.ExternalURI by the HTTP handler
but never forwarded to workers. BackendInstallRequest had no URI field,
so workers fell through to the gallery lookup and failed with
"no backend found with name <custom-name>".

Add URI/Name/Alias fields to BackendInstallRequest and thread them from
ManagementOp through DistributedBackendManager.InstallBackend() and the
RemoteUnloaderAdapter. On the worker side, route to InstallExternalBackend
when URI is set instead of InstallBackendFromGallery. Update all
remaining InstallBackend call sites (UpgradeBackend, reconciler
pending-op drain, router auto-install) to pass empty strings for the
new params.

Assisted-by: Claude Code:claude-sonnet-4-6

Signed-off-by: Russell Sim <rsl@simopolis.xyz>
This commit is contained in:
Russell Sim
2026-04-20 23:39:35 +02:00
committed by GitHub
parent 8ab56e2ad3
commit 02bb715c0a
8 changed files with 32 additions and 15 deletions

View File

@@ -21,6 +21,7 @@ import (
"github.com/mudler/LocalAI/core/cli/workerregistry" "github.com/mudler/LocalAI/core/cli/workerregistry"
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/services/galleryop"
"github.com/mudler/LocalAI/core/services/messaging" "github.com/mudler/LocalAI/core/services/messaging"
"github.com/mudler/LocalAI/core/services/nodes" "github.com/mudler/LocalAI/core/services/nodes"
"github.com/mudler/LocalAI/core/services/storage" "github.com/mudler/LocalAI/core/services/storage"
@@ -597,13 +598,21 @@ func (s *backendSupervisor) installBackend(req messaging.BackendInstallRequest)
// Try to find the backend binary // Try to find the backend binary
backendPath := s.findBackend(req.Backend) backendPath := s.findBackend(req.Backend)
if backendPath == "" { if backendPath == "" {
// Backend not found locally — try auto-installing from gallery if req.URI != "" {
xlog.Info("Backend not found locally, attempting external install", "backend", req.Backend, "uri", req.URI)
if err := galleryop.InstallExternalBackend(
context.Background(), galleries, s.systemState, s.ml, nil, req.URI, req.Name, req.Alias,
); err != nil {
return "", fmt.Errorf("installing backend from gallery: %w", err)
}
} else {
xlog.Info("Backend not found locally, attempting gallery install", "backend", req.Backend) xlog.Info("Backend not found locally, attempting gallery install", "backend", req.Backend)
if err := gallery.InstallBackendFromGallery( if err := gallery.InstallBackendFromGallery(
context.Background(), galleries, s.systemState, s.ml, req.Backend, nil, false, context.Background(), galleries, s.systemState, s.ml, req.Backend, nil, false,
); err != nil { ); err != nil {
return "", fmt.Errorf("installing backend from gallery: %w", err) return "", fmt.Errorf("installing backend from gallery: %w", err)
} }
}
// Re-register after install and retry // Re-register after install and retry
gallery.RegisterBackends(s.systemState, s.ml) gallery.RegisterBackends(s.systemState, s.ml)
backendPath = s.findBackend(req.Backend) backendPath = s.findBackend(req.Backend)

View File

@@ -376,7 +376,7 @@ func InstallBackendOnNodeEndpoint(unloader nodes.NodeCommandSender) echo.Handler
if err := c.Bind(&req); err != nil || req.Backend == "" { if err := c.Bind(&req); err != nil || req.Backend == "" {
return c.JSON(http.StatusBadRequest, nodeError(http.StatusBadRequest, "backend name required")) return c.JSON(http.StatusBadRequest, nodeError(http.StatusBadRequest, "backend name required"))
} }
reply, err := unloader.InstallBackend(nodeID, req.Backend, "", req.BackendGalleries) reply, err := unloader.InstallBackend(nodeID, req.Backend, "", req.BackendGalleries, "", "", "")
if err != nil { if err != nil {
xlog.Error("Failed to install backend on node", "node", nodeID, "backend", req.Backend, "error", err) xlog.Error("Failed to install backend on node", "node", nodeID, "backend", req.Backend, "error", err)
return c.JSON(http.StatusInternalServerError, nodeError(http.StatusInternalServerError, "failed to install backend on node")) return c.JSON(http.StatusInternalServerError, nodeError(http.StatusInternalServerError, "failed to install backend on node"))

View File

@@ -124,8 +124,13 @@ func SubjectNodeBackendInstall(nodeID string) string {
// BackendInstallRequest is the payload for a backend.install NATS request. // BackendInstallRequest is the payload for a backend.install NATS request.
type BackendInstallRequest struct { type BackendInstallRequest struct {
Backend string `json:"backend"` Backend string `json:"backend"`
ModelID string `json:"model_id,omitempty"` // unique model identifier — each model gets its own gRPC process ModelID string `json:"model_id,omitempty"`
BackendGalleries string `json:"backend_galleries,omitempty"` BackendGalleries string `json:"backend_galleries,omitempty"`
// URI is set for external installs (OCI image, URL, or path). When non-empty
// the worker routes to InstallExternalBackend instead of the gallery lookup.
URI string `json:"uri,omitempty"`
Name string `json:"name,omitempty"`
Alias string `json:"alias,omitempty"`
} }
// BackendInstallReply is the response from a backend.install NATS request. // BackendInstallReply is the response from a backend.install NATS request.

View File

@@ -293,7 +293,7 @@ func (d *DistributedBackendManager) InstallBackend(ctx context.Context, op *gall
backendName := op.GalleryElementName backendName := op.GalleryElementName
_, err := d.enqueueAndDrainBackendOp(ctx, OpBackendInstall, backendName, galleriesJSON, func(node BackendNode) error { _, err := d.enqueueAndDrainBackendOp(ctx, OpBackendInstall, backendName, galleriesJSON, func(node BackendNode) error {
reply, err := d.adapter.InstallBackend(node.ID, backendName, "", string(galleriesJSON)) reply, err := d.adapter.InstallBackend(node.ID, backendName, "", string(galleriesJSON), op.ExternalURI, op.ExternalName, op.ExternalAlias)
if err != nil { if err != nil {
return err return err
} }
@@ -311,7 +311,7 @@ func (d *DistributedBackendManager) UpgradeBackend(ctx context.Context, name str
galleriesJSON, _ := json.Marshal(d.backendGalleries) galleriesJSON, _ := json.Marshal(d.backendGalleries)
_, err := d.enqueueAndDrainBackendOp(ctx, OpBackendUpgrade, name, galleriesJSON, func(node BackendNode) error { _, err := d.enqueueAndDrainBackendOp(ctx, OpBackendUpgrade, name, galleriesJSON, func(node BackendNode) error {
reply, err := d.adapter.InstallBackend(node.ID, name, "", string(galleriesJSON)) reply, err := d.adapter.InstallBackend(node.ID, name, "", string(galleriesJSON), "", "", "")
if err != nil { if err != nil {
return err return err
} }

View File

@@ -188,7 +188,7 @@ func (rc *ReplicaReconciler) drainPendingBackendOps(ctx context.Context) {
case OpBackendDelete: case OpBackendDelete:
_, applyErr = rc.adapter.DeleteBackend(op.NodeID, op.Backend) _, applyErr = rc.adapter.DeleteBackend(op.NodeID, op.Backend)
case OpBackendInstall, OpBackendUpgrade: case OpBackendInstall, OpBackendUpgrade:
reply, err := rc.adapter.InstallBackend(op.NodeID, op.Backend, "", string(op.Galleries)) reply, err := rc.adapter.InstallBackend(op.NodeID, op.Backend, "", string(op.Galleries), "", "", "")
if err != nil { if err != nil {
applyErr = err applyErr = err
} else if !reply.Success { } else if !reply.Success {

View File

@@ -504,7 +504,7 @@ func (r *SmartRouter) installBackendOnNode(ctx context.Context, node *BackendNod
return "", fmt.Errorf("no NATS connection for backend installation") return "", fmt.Errorf("no NATS connection for backend installation")
} }
reply, err := r.unloader.InstallBackend(node.ID, backendType, modelID, r.galleriesJSON) reply, err := r.unloader.InstallBackend(node.ID, backendType, modelID, r.galleriesJSON, "", "", "")
if err != nil { if err != nil {
return "", err return "", err
} }

View File

@@ -244,7 +244,7 @@ type fakeUnloader struct {
unloadErr error unloadErr error
} }
func (f *fakeUnloader) InstallBackend(_, _, _, _ string) (*messaging.BackendInstallReply, error) { func (f *fakeUnloader) InstallBackend(_, _, _, _, _, _, _ string) (*messaging.BackendInstallReply, error) {
return f.installReply, f.installErr return f.installReply, f.installErr
} }

View File

@@ -17,7 +17,7 @@ type backendStopRequest struct {
// NodeCommandSender abstracts NATS-based commands to worker nodes. // NodeCommandSender abstracts NATS-based commands to worker nodes.
// Used by HTTP endpoint handlers to avoid coupling to the concrete RemoteUnloaderAdapter. // Used by HTTP endpoint handlers to avoid coupling to the concrete RemoteUnloaderAdapter.
type NodeCommandSender interface { type NodeCommandSender interface {
InstallBackend(nodeID, backendType, modelID, galleriesJSON string) (*messaging.BackendInstallReply, error) InstallBackend(nodeID, backendType, modelID, galleriesJSON, uri, name, alias string) (*messaging.BackendInstallReply, error)
DeleteBackend(nodeID, backendName string) (*messaging.BackendDeleteReply, error) DeleteBackend(nodeID, backendName string) (*messaging.BackendDeleteReply, error)
ListBackends(nodeID string) (*messaging.BackendListReply, error) ListBackends(nodeID string) (*messaging.BackendListReply, error)
StopBackend(nodeID, backend string) error StopBackend(nodeID, backend string) error
@@ -72,7 +72,7 @@ func (a *RemoteUnloaderAdapter) UnloadRemoteModel(modelName string) error {
// The worker installs the backend from gallery (if not already installed), // The worker installs the backend from gallery (if not already installed),
// starts the gRPC process, and replies when ready. // starts the gRPC process, and replies when ready.
// Timeout: 5 minutes (gallery install can take a while). // Timeout: 5 minutes (gallery install can take a while).
func (a *RemoteUnloaderAdapter) InstallBackend(nodeID, backendType, modelID, galleriesJSON string) (*messaging.BackendInstallReply, error) { func (a *RemoteUnloaderAdapter) InstallBackend(nodeID, backendType, modelID, galleriesJSON, uri, name, alias string) (*messaging.BackendInstallReply, error) {
subject := messaging.SubjectNodeBackendInstall(nodeID) subject := messaging.SubjectNodeBackendInstall(nodeID)
xlog.Info("Sending NATS backend.install", "nodeID", nodeID, "backend", backendType, "modelID", modelID) xlog.Info("Sending NATS backend.install", "nodeID", nodeID, "backend", backendType, "modelID", modelID)
@@ -80,6 +80,9 @@ func (a *RemoteUnloaderAdapter) InstallBackend(nodeID, backendType, modelID, gal
Backend: backendType, Backend: backendType,
ModelID: modelID, ModelID: modelID,
BackendGalleries: galleriesJSON, BackendGalleries: galleriesJSON,
URI: uri,
Name: name,
Alias: alias,
}, 5*time.Minute) }, 5*time.Minute)
} }