diff --git a/core/services/distributed/gallery.go b/core/services/distributed/gallery.go index 7b1239e5a..d85fd76a8 100644 --- a/core/services/distributed/gallery.go +++ b/core/services/distributed/gallery.go @@ -79,21 +79,29 @@ func (s *GalleryStore) Create(op *GalleryOperationRecord) error { }).Create(op).Error } -// UpdateProgress updates progress for an operation. -func (s *GalleryStore) UpdateProgress(id string, progress float64, message, downloadedSize string) error { +// UpdateProgress updates progress for an operation. The cancellable flag is +// persisted on every tick so a replica that restarts mid-install rehydrates the +// op as still cancellable — otherwise the column keeps its Create-time zero +// value (false), the UI hides the cancel button, and the orphaned op can only +// be dismissed by waiting for the 30-minute stale reaper. +func (s *GalleryStore) UpdateProgress(id string, progress float64, message, downloadedSize string, cancellable bool) error { return s.db.Model(&GalleryOperationRecord{}).Where("id = ?", id).Updates(map[string]any{ "progress": progress, "message": message, "downloaded_file_size": downloadedSize, + "cancellable": cancellable, "updated_at": time.Now(), }).Error } -// UpdateStatus updates the status of an operation. +// UpdateStatus updates the status of an operation. A terminal status is never +// cancellable, so the flag is cleared here to keep the persisted row consistent +// with what the UI should offer. func (s *GalleryStore) UpdateStatus(id, status, errMsg string) error { updates := map[string]any{ - "status": status, - "updated_at": time.Now(), + "status": status, + "cancellable": false, + "updated_at": time.Now(), } if errMsg != "" { updates["error"] = errMsg diff --git a/core/services/galleryop/cancellable_persist_test.go b/core/services/galleryop/cancellable_persist_test.go new file mode 100644 index 000000000..6879dee92 --- /dev/null +++ b/core/services/galleryop/cancellable_persist_test.go @@ -0,0 +1,56 @@ +package galleryop_test + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/services/distributed" + "github.com/mudler/LocalAI/core/services/galleryop" + "github.com/mudler/LocalAI/core/services/testutil" +) + +// Reproduces "an in-flight install can't be cancelled after a restart". The +// live install path marks OpStatus.Cancellable=true on every progress tick, but +// UpdateStatus persisted progress/status to the gallery store WITHOUT the +// cancellable flag, and Create defaulted it to false. So after a replica +// restart Hydrate rebuilt the op with Cancellable=false, /api/operations +// reported cancellable:false, and the UI hid the cancel button — the orphaned +// op lingered until the 30-minute stale reaper expired it. The cancellable +// state must be persisted so a rehydrated in-flight op stays cancellable. +var _ = Describe("GalleryService cancellable persistence across restart", func() { + It("rehydrates an in-flight op as still cancellable", func() { + db := testutil.SetupTestDB() + store, err := distributed.NewGalleryStore(db) + Expect(err).ToNot(HaveOccurred()) + + svc := galleryop.NewGalleryService(&config.ApplicationConfig{}, nil) + svc.SetGalleryStore(store) + + // Seed the in-flight op row as the worker goroutine does on admission. + Expect(store.Create(&distributed.GalleryOperationRecord{ + ID: "op-inflight", + GalleryElementName: "llama-cpp-development", + OpType: "backend_install", + Status: "pending", + })).To(Succeed()) + + // Simulate a progress tick: the live path always marks installs + // cancellable while they are downloading/processing. + svc.UpdateStatus("op-inflight", &galleryop.OpStatus{ + Message: "downloading", + Progress: 25, + Cancellable: true, + }) + + // A fresh replica boots and hydrates from the store. + fresh := galleryop.NewGalleryService(&config.ApplicationConfig{}, nil) + fresh.SetGalleryStore(store) + Expect(fresh.Hydrate()).To(Succeed()) + + st := fresh.GetStatus("op-inflight") + Expect(st).ToNot(BeNil(), "the in-flight op must hydrate after a restart") + Expect(st.Cancellable).To(BeTrue(), + "a still-active install must rehydrate as cancellable so the admin can dismiss it") + }) +}) diff --git a/core/services/galleryop/service.go b/core/services/galleryop/service.go index 5b611d41e..d01d9cc19 100644 --- a/core/services/galleryop/service.go +++ b/core/services/galleryop/service.go @@ -167,7 +167,7 @@ func (g *GalleryService) UpdateStatus(s string, op *OpStatus) { xlog.Warn("Failed to persist gallery operation status", "op_id", s, "error", err) } } else { - if err := store.UpdateProgress(s, op.Progress, op.Message, op.DownloadedFileSize); err != nil { + if err := store.UpdateProgress(s, op.Progress, op.Message, op.DownloadedFileSize, op.Cancellable); err != nil { xlog.Warn("Failed to persist gallery operation progress", "op_id", s, "error", err) } } @@ -467,6 +467,7 @@ func (g *GalleryService) Start(c context.Context, cl *config.ModelConfigLoader, GalleryElementName: op.GalleryElementName, OpType: "backend_install", Status: "pending", + Cancellable: true, }) } err := g.backendHandler(&op, systemState) @@ -499,6 +500,8 @@ func (g *GalleryService) Start(c context.Context, cl *config.ModelConfigLoader, GalleryElementName: op.GalleryElementName, OpType: opType, Status: "pending", + // A delete is not cancellable; an install is. + Cancellable: !op.Delete, }) } err := g.modelHandler(&op, cl, systemState)