fix(gallery): keep auto-upgrade off non-dev backends when -development is installed (#9736)

A `-development` backend variant (e.g. `cuda12-llama-cpp-development`)
shares its `alias` with the stable counterpart and is meant to be a
drop-in replacement via ListSystemBackends alias resolution. Two paths
in the auto-upgrade flow let the stable variant slip back in on top of
the user's explicit dev pick:

1. ListSystemBackends emits a synthetic alias row keyed by the alias
   name that re-uses the chosen concrete's metadata pointer. In
   distributed mode, the worker's handleBackendList serialised that
   row over NATS as `{Name: <alias>, URI: <dev URI>, Digest: <dev>}`
   — the frontend can't reconstruct the alias relationship, and the
   wire-rebuilt row then carried `Metadata.Name = <alias>` and
   resolved against an unrelated gallery entry on the next upgrade
   check.
2. CheckUpgradesAgainst happily iterated the synthetic row in
   single-node too. Today the duplicate gallery lookup is harmless
   because both rows share the same `Metadata.Name`, but any gallery
   change that gives a meta backend a version, or any concrete
   sharing its alias with a dev counterpart, would surface a phantom
   non-dev upgrade and auto-upgrade would install it — shadowing the
   dev one through alias-token preference.

Two layered fixes:

- `core/services/worker/lifecycle.go` (`handleBackendList`): drop
  rows where the map key differs from `b.Metadata.Name`. Concrete
  and meta entries always have `key == Metadata.Name`; only synthetic
  aliases violate it. Workers now report only what's actually on disk;
  the per-node UI listing and CheckUpgrades both stop seeing phantoms.
- `core/gallery/upgrade.go` (`CheckUpgradesAgainst`): iterate by key,
  skip rows where `key != Metadata.Name` (belt-and-suspenders for any
  caller-supplied installed set), and apply the dev-aware rule —
  build a set of installed `Metadata.Name`s and drop any non-dev
  candidate `X` whose `X-<devSuffix>` counterpart is installed. Uses
  the configured dev suffix from `getFallbackTagValues(systemState)`.

Manual `POST /api/backends/upgrade/<name>` is unaffected: it goes
straight through `bm.UpgradeBackend(name)` without consulting the
suppression list, so users who genuinely want the stable variant
upgraded can still trigger it explicitly.

Tests in core/gallery/upgrade_test.go cover three cases under
"CheckUpgradesAgainst (distributed)": dev-only installed → only the
dev surfaces; both variants installed → dev still wins; synthetic
alias row is ignored. Generic backend names are used to avoid the
capability filter dropping cuda-prefixed entries on a CPU-only host.

Assisted-by: Claude:claude-opus-4-7

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2026-05-09 18:20:00 +02:00
committed by GitHub
parent 9228e5b412
commit 3568b2819d
3 changed files with 177 additions and 0 deletions

View File

@@ -5,6 +5,7 @@ import (
"fmt"
"os"
"path/filepath"
"strings"
"time"
"github.com/mudler/LocalAI/core/config"
@@ -64,7 +65,22 @@ func CheckUpgradesAgainst(ctx context.Context, galleries []config.Gallery, syste
result := make(map[string]UpgradeInfo)
// Build a set of installed metadata names so we can suppress non-dev
// candidates whose `-development` counterpart is already installed —
// dev variants share an alias with the stable one and are explicit
// drop-in replacements, so auto-upgrade must never reintroduce the
// non-dev alongside them.
_, _, devSuffix := getFallbackTagValues(systemState)
devTag := "-" + devSuffix
installedNames := make(map[string]struct{}, len(installedBackends))
for _, installed := range installedBackends {
if installed.Metadata == nil || installed.Metadata.Name == "" {
continue
}
installedNames[installed.Metadata.Name] = struct{}{}
}
for key, installed := range installedBackends {
// Skip system backends — they are managed outside the gallery
if installed.IsSystem {
continue
@@ -73,6 +89,29 @@ func CheckUpgradesAgainst(ctx context.Context, galleries []config.Gallery, syste
continue
}
// Skip synthetic alias rows: ListSystemBackends emits an extra
// entry keyed by the alias name that re-uses the chosen concrete's
// metadata pointer. Iterating it just duplicates the concrete's
// gallery lookup, and in distributed mode the wire-reconstructed
// version of that row carries a forged Metadata.Name = alias which
// can match an unrelated gallery entry.
if key != installed.Metadata.Name {
continue
}
// Drop-in replacement guard: skip non-dev `X` if `X-<devSuffix>`
// is installed. Without this, any upgrade flagged on the non-dev
// row (e.g. surfaced via a synthetic-alias path on older workers,
// or because both variants happen to be present on disk via stale
// state) would tell auto-upgrade to install the stable variant on
// top of the user's explicit dev pick.
name := installed.Metadata.Name
if !strings.HasSuffix(name, devTag) {
if _, devInstalled := installedNames[name+devTag]; devInstalled {
continue
}
}
// Find matching gallery entry by metadata name
galleryEntry := FindGalleryElement(galleryBackends, installed.Metadata.Name)
if galleryEntry == nil {

View File

@@ -233,6 +233,132 @@ var _ = Describe("Upgrade Detection and Execution", func() {
Expect(upgrades["my-backend"].InstalledVersion).To(BeEmpty())
Expect(upgrades["my-backend"].AvailableVersion).To(Equal("2.0.0"))
})
// Dev-aware suppression: when `<X>-development` is installed it
// stands in for the stable `<X>` via alias resolution. Auto-upgrade
// must never reintroduce the stable variant alongside the dev one,
// because the install would land on disk and (depending on
// preference tokens) either shadow the dev pick or sit unused next
// to it. These tests fix CheckUpgradesAgainst to honor that.
// Names are kept generic ("my-backend") so the capability filter
// in AvailableBackends doesn't drop them on a CPU-only test host.
It("suppresses non-dev candidate when its -development counterpart is installed", func() {
writeGalleryYAML([]GalleryBackend{
{
Metadata: Metadata{Name: "my-backend"},
URI: filepath.Join(tempDir, "stable"),
Version: "2.0.0",
},
{
Metadata: Metadata{Name: "my-backend-development"},
URI: filepath.Join(tempDir, "dev"),
Version: "2.0.0",
},
})
installed := SystemBackends{
"my-backend-development": SystemBackend{
Name: "my-backend-development",
Metadata: &BackendMetadata{
Name: "my-backend-development",
Version: "1.0.0",
},
},
}
upgrades, err := CheckUpgradesAgainst(context.Background(), galleries, systemState, installed)
Expect(err).NotTo(HaveOccurred())
Expect(upgrades).To(HaveKey("my-backend-development"))
Expect(upgrades).NotTo(HaveKey("my-backend"))
})
It("dev variant wins even when non-dev is also present (vestigial state)", func() {
// Either via legacy state, manual install, or a worker still
// emitting synthetic aliases, the non-dev row may be present
// alongside the dev one. Auto-upgrade must still keep its
// hands off the non-dev — installing the stable variant on
// top of the user's explicit dev pick is exactly what the
// alias drop-in promise forbids. Users who genuinely want
// the non-dev upgraded can trigger it manually via
// /api/backends/upgrade/<name>.
writeGalleryYAML([]GalleryBackend{
{
Metadata: Metadata{Name: "my-backend"},
URI: filepath.Join(tempDir, "stable"),
Version: "2.0.0",
},
{
Metadata: Metadata{Name: "my-backend-development"},
URI: filepath.Join(tempDir, "dev"),
Version: "2.0.0",
},
})
installed := SystemBackends{
"my-backend": SystemBackend{
Name: "my-backend",
Metadata: &BackendMetadata{
Name: "my-backend",
Version: "1.0.0",
},
},
"my-backend-development": SystemBackend{
Name: "my-backend-development",
Metadata: &BackendMetadata{
Name: "my-backend-development",
Version: "1.0.0",
},
},
}
upgrades, err := CheckUpgradesAgainst(context.Background(), galleries, systemState, installed)
Expect(err).NotTo(HaveOccurred())
Expect(upgrades).To(HaveKey("my-backend-development"))
Expect(upgrades).NotTo(HaveKey("my-backend"))
})
It("ignores synthetic alias rows whose key differs from Metadata.Name", func() {
// ListSystemBackends emits an extra row keyed by the alias name
// that re-uses the chosen concrete's metadata pointer. Pre-fix
// this row caused a duplicate gallery lookup in single-node
// (harmless by accident) and a phantom upgrade in distributed
// mode (real bug — the wire-reconstructed row carries
// Metadata.Name = alias and resolves against an unrelated entry).
writeGalleryYAML([]GalleryBackend{
{
Metadata: Metadata{Name: "my-alias"},
URI: filepath.Join(tempDir, "stable-meta"),
Version: "2.0.0",
},
{
Metadata: Metadata{Name: "my-backend-development"},
URI: filepath.Join(tempDir, "dev"),
Version: "2.0.0",
},
})
devMeta := &BackendMetadata{
Name: "my-backend-development",
Version: "1.0.0",
Alias: "my-alias",
}
installed := SystemBackends{
"my-backend-development": SystemBackend{
Name: "my-backend-development",
Metadata: devMeta,
},
// Synthetic alias row: key != Metadata.Name.
"my-alias": SystemBackend{
Name: "my-alias",
Metadata: devMeta,
},
}
upgrades, err := CheckUpgradesAgainst(context.Background(), galleries, systemState, installed)
Expect(err).NotTo(HaveOccurred())
Expect(upgrades).To(HaveKey("my-backend-development"))
Expect(upgrades).NotTo(HaveKey("my-alias"))
})
})
Describe("UpgradeBackend", func() {

View File

@@ -161,6 +161,18 @@ func (s *backendSupervisor) handleBackendList(data []byte, reply func([]byte)) {
var infos []messaging.NodeBackendInfo
for name, b := range backends {
// Drop synthetic alias rows: ListSystemBackends emits an entry
// keyed by the alias name that re-uses the chosen concrete's
// metadata. The frontend can't reconstruct that aliasing
// faithfully from a flat NodeBackendInfo, and for upgrade
// detection it would surface as a phantom `<alias>` install
// pointing at the dev concrete's URI/digest — tricking the
// upgrade check into flagging the non-dev gallery entry of the
// same alias. Concrete and meta entries always have
// `name == b.Metadata.Name`, so this drops aliases only.
if b.Metadata != nil && b.Metadata.Name != "" && name != b.Metadata.Name {
continue
}
info := messaging.NodeBackendInfo{
Name: name,
IsSystem: b.IsSystem,