From 56f8a6623f1d422c60a4d116f9cea6499ca7a860 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Mon, 22 Jun 2026 22:41:16 +0200 Subject: [PATCH] fix(galleryop): persist cancellable so restarted in-flight ops stay cancellable (#10454) In distributed mode a model/backend install marks OpStatus.Cancellable=true while downloading, but the gallery_operations row never recorded it: UpdateStatus persisted only progress/status and Create left the cancellable column at its zero value. After a replica restart Hydrate rebuilt the op with cancellable=false, /api/operations reported false, and the UI hid the cancel button - the orphaned op then lingered until the 30-minute stale reaper expired it ("stays there on restart, can't cancel, after a bit it expires"). Persist the flag on every progress tick and at row creation (installs are cancellable, deletes are not), and clear it on terminal transitions. A rehydrated in-flight op is now cancellable, so an admin can dismiss the orphaned op immediately instead of waiting out the reaper. The functional cancel path already survived restart (CancelOperation persists store.Cancel even with no live CancelFunc); this restores the UI affordance that drives it. Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- core/services/distributed/gallery.go | 18 ++++-- .../galleryop/cancellable_persist_test.go | 56 +++++++++++++++++++ core/services/galleryop/service.go | 5 +- 3 files changed, 73 insertions(+), 6 deletions(-) create mode 100644 core/services/galleryop/cancellable_persist_test.go diff --git a/core/services/distributed/gallery.go b/core/services/distributed/gallery.go index 7b1239e5a..d85fd76a8 100644 --- a/core/services/distributed/gallery.go +++ b/core/services/distributed/gallery.go @@ -79,21 +79,29 @@ func (s *GalleryStore) Create(op *GalleryOperationRecord) error { }).Create(op).Error } -// UpdateProgress updates progress for an operation. -func (s *GalleryStore) UpdateProgress(id string, progress float64, message, downloadedSize string) error { +// UpdateProgress updates progress for an operation. The cancellable flag is +// persisted on every tick so a replica that restarts mid-install rehydrates the +// op as still cancellable — otherwise the column keeps its Create-time zero +// value (false), the UI hides the cancel button, and the orphaned op can only +// be dismissed by waiting for the 30-minute stale reaper. +func (s *GalleryStore) UpdateProgress(id string, progress float64, message, downloadedSize string, cancellable bool) error { return s.db.Model(&GalleryOperationRecord{}).Where("id = ?", id).Updates(map[string]any{ "progress": progress, "message": message, "downloaded_file_size": downloadedSize, + "cancellable": cancellable, "updated_at": time.Now(), }).Error } -// UpdateStatus updates the status of an operation. +// UpdateStatus updates the status of an operation. A terminal status is never +// cancellable, so the flag is cleared here to keep the persisted row consistent +// with what the UI should offer. func (s *GalleryStore) UpdateStatus(id, status, errMsg string) error { updates := map[string]any{ - "status": status, - "updated_at": time.Now(), + "status": status, + "cancellable": false, + "updated_at": time.Now(), } if errMsg != "" { updates["error"] = errMsg diff --git a/core/services/galleryop/cancellable_persist_test.go b/core/services/galleryop/cancellable_persist_test.go new file mode 100644 index 000000000..6879dee92 --- /dev/null +++ b/core/services/galleryop/cancellable_persist_test.go @@ -0,0 +1,56 @@ +package galleryop_test + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/services/distributed" + "github.com/mudler/LocalAI/core/services/galleryop" + "github.com/mudler/LocalAI/core/services/testutil" +) + +// Reproduces "an in-flight install can't be cancelled after a restart". The +// live install path marks OpStatus.Cancellable=true on every progress tick, but +// UpdateStatus persisted progress/status to the gallery store WITHOUT the +// cancellable flag, and Create defaulted it to false. So after a replica +// restart Hydrate rebuilt the op with Cancellable=false, /api/operations +// reported cancellable:false, and the UI hid the cancel button — the orphaned +// op lingered until the 30-minute stale reaper expired it. The cancellable +// state must be persisted so a rehydrated in-flight op stays cancellable. +var _ = Describe("GalleryService cancellable persistence across restart", func() { + It("rehydrates an in-flight op as still cancellable", func() { + db := testutil.SetupTestDB() + store, err := distributed.NewGalleryStore(db) + Expect(err).ToNot(HaveOccurred()) + + svc := galleryop.NewGalleryService(&config.ApplicationConfig{}, nil) + svc.SetGalleryStore(store) + + // Seed the in-flight op row as the worker goroutine does on admission. + Expect(store.Create(&distributed.GalleryOperationRecord{ + ID: "op-inflight", + GalleryElementName: "llama-cpp-development", + OpType: "backend_install", + Status: "pending", + })).To(Succeed()) + + // Simulate a progress tick: the live path always marks installs + // cancellable while they are downloading/processing. + svc.UpdateStatus("op-inflight", &galleryop.OpStatus{ + Message: "downloading", + Progress: 25, + Cancellable: true, + }) + + // A fresh replica boots and hydrates from the store. + fresh := galleryop.NewGalleryService(&config.ApplicationConfig{}, nil) + fresh.SetGalleryStore(store) + Expect(fresh.Hydrate()).To(Succeed()) + + st := fresh.GetStatus("op-inflight") + Expect(st).ToNot(BeNil(), "the in-flight op must hydrate after a restart") + Expect(st.Cancellable).To(BeTrue(), + "a still-active install must rehydrate as cancellable so the admin can dismiss it") + }) +}) diff --git a/core/services/galleryop/service.go b/core/services/galleryop/service.go index 5b611d41e..d01d9cc19 100644 --- a/core/services/galleryop/service.go +++ b/core/services/galleryop/service.go @@ -167,7 +167,7 @@ func (g *GalleryService) UpdateStatus(s string, op *OpStatus) { xlog.Warn("Failed to persist gallery operation status", "op_id", s, "error", err) } } else { - if err := store.UpdateProgress(s, op.Progress, op.Message, op.DownloadedFileSize); err != nil { + if err := store.UpdateProgress(s, op.Progress, op.Message, op.DownloadedFileSize, op.Cancellable); err != nil { xlog.Warn("Failed to persist gallery operation progress", "op_id", s, "error", err) } } @@ -467,6 +467,7 @@ func (g *GalleryService) Start(c context.Context, cl *config.ModelConfigLoader, GalleryElementName: op.GalleryElementName, OpType: "backend_install", Status: "pending", + Cancellable: true, }) } err := g.backendHandler(&op, systemState) @@ -499,6 +500,8 @@ func (g *GalleryService) Start(c context.Context, cl *config.ModelConfigLoader, GalleryElementName: op.GalleryElementName, OpType: opType, Status: "pending", + // A delete is not cancellable; an install is. + Cancellable: !op.Delete, }) } err := g.modelHandler(&op, cl, systemState)