fix(galleryop): persist cancellable so restarted in-flight ops stay cancellable (#10454)

In distributed mode a model/backend install marks OpStatus.Cancellable=true
while downloading, but the gallery_operations row never recorded it:
UpdateStatus persisted only progress/status and Create left the cancellable
column at its zero value. After a replica restart Hydrate rebuilt the op with
cancellable=false, /api/operations reported false, and the UI hid the cancel
button - the orphaned op then lingered until the 30-minute stale reaper
expired it ("stays there on restart, can't cancel, after a bit it expires").

Persist the flag on every progress tick and at row creation (installs are
cancellable, deletes are not), and clear it on terminal transitions. A
rehydrated in-flight op is now cancellable, so an admin can dismiss the
orphaned op immediately instead of waiting out the reaper. The functional
cancel path already survived restart (CancelOperation persists store.Cancel
even with no live CancelFunc); this restores the UI affordance that drives it.


Assisted-by: Claude:claude-opus-4-8 [Claude Code]

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
LocalAI [bot]
2026-06-22 22:41:16 +02:00
committed by GitHub
parent 4755d676a3
commit 56f8a6623f
3 changed files with 73 additions and 6 deletions

View File

@@ -79,21 +79,29 @@ func (s *GalleryStore) Create(op *GalleryOperationRecord) error {
}).Create(op).Error
}
// UpdateProgress updates progress for an operation.
func (s *GalleryStore) UpdateProgress(id string, progress float64, message, downloadedSize string) error {
// UpdateProgress updates progress for an operation. The cancellable flag is
// persisted on every tick so a replica that restarts mid-install rehydrates the
// op as still cancellable — otherwise the column keeps its Create-time zero
// value (false), the UI hides the cancel button, and the orphaned op can only
// be dismissed by waiting for the 30-minute stale reaper.
func (s *GalleryStore) UpdateProgress(id string, progress float64, message, downloadedSize string, cancellable bool) error {
return s.db.Model(&GalleryOperationRecord{}).Where("id = ?", id).Updates(map[string]any{
"progress": progress,
"message": message,
"downloaded_file_size": downloadedSize,
"cancellable": cancellable,
"updated_at": time.Now(),
}).Error
}
// UpdateStatus updates the status of an operation.
// UpdateStatus updates the status of an operation. A terminal status is never
// cancellable, so the flag is cleared here to keep the persisted row consistent
// with what the UI should offer.
func (s *GalleryStore) UpdateStatus(id, status, errMsg string) error {
updates := map[string]any{
"status": status,
"updated_at": time.Now(),
"status": status,
"cancellable": false,
"updated_at": time.Now(),
}
if errMsg != "" {
updates["error"] = errMsg

View File

@@ -0,0 +1,56 @@
package galleryop_test
import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/services/distributed"
"github.com/mudler/LocalAI/core/services/galleryop"
"github.com/mudler/LocalAI/core/services/testutil"
)
// Reproduces "an in-flight install can't be cancelled after a restart". The
// live install path marks OpStatus.Cancellable=true on every progress tick, but
// UpdateStatus persisted progress/status to the gallery store WITHOUT the
// cancellable flag, and Create defaulted it to false. So after a replica
// restart Hydrate rebuilt the op with Cancellable=false, /api/operations
// reported cancellable:false, and the UI hid the cancel button — the orphaned
// op lingered until the 30-minute stale reaper expired it. The cancellable
// state must be persisted so a rehydrated in-flight op stays cancellable.
var _ = Describe("GalleryService cancellable persistence across restart", func() {
It("rehydrates an in-flight op as still cancellable", func() {
db := testutil.SetupTestDB()
store, err := distributed.NewGalleryStore(db)
Expect(err).ToNot(HaveOccurred())
svc := galleryop.NewGalleryService(&config.ApplicationConfig{}, nil)
svc.SetGalleryStore(store)
// Seed the in-flight op row as the worker goroutine does on admission.
Expect(store.Create(&distributed.GalleryOperationRecord{
ID: "op-inflight",
GalleryElementName: "llama-cpp-development",
OpType: "backend_install",
Status: "pending",
})).To(Succeed())
// Simulate a progress tick: the live path always marks installs
// cancellable while they are downloading/processing.
svc.UpdateStatus("op-inflight", &galleryop.OpStatus{
Message: "downloading",
Progress: 25,
Cancellable: true,
})
// A fresh replica boots and hydrates from the store.
fresh := galleryop.NewGalleryService(&config.ApplicationConfig{}, nil)
fresh.SetGalleryStore(store)
Expect(fresh.Hydrate()).To(Succeed())
st := fresh.GetStatus("op-inflight")
Expect(st).ToNot(BeNil(), "the in-flight op must hydrate after a restart")
Expect(st.Cancellable).To(BeTrue(),
"a still-active install must rehydrate as cancellable so the admin can dismiss it")
})
})

View File

@@ -167,7 +167,7 @@ func (g *GalleryService) UpdateStatus(s string, op *OpStatus) {
xlog.Warn("Failed to persist gallery operation status", "op_id", s, "error", err)
}
} else {
if err := store.UpdateProgress(s, op.Progress, op.Message, op.DownloadedFileSize); err != nil {
if err := store.UpdateProgress(s, op.Progress, op.Message, op.DownloadedFileSize, op.Cancellable); err != nil {
xlog.Warn("Failed to persist gallery operation progress", "op_id", s, "error", err)
}
}
@@ -467,6 +467,7 @@ func (g *GalleryService) Start(c context.Context, cl *config.ModelConfigLoader,
GalleryElementName: op.GalleryElementName,
OpType: "backend_install",
Status: "pending",
Cancellable: true,
})
}
err := g.backendHandler(&op, systemState)
@@ -499,6 +500,8 @@ func (g *GalleryService) Start(c context.Context, cl *config.ModelConfigLoader,
GalleryElementName: op.GalleryElementName,
OpType: opType,
Status: "pending",
// A delete is not cancellable; an install is.
Cancellable: !op.Delete,
})
}
err := g.modelHandler(&op, cl, systemState)