feat(concurrency-groups): per-model exclusive groups for backend loading (#9662)

* feat(concurrency-groups): per-model exclusive groups for backend loading Adds `concurrency_groups: [...]` to model YAML configs. Two models that share a group cannot be loaded concurrently on the same node — loading one evicts the others, reusing the existing pinned/busy/retry policy from LRU eviction. Layered design: - Watchdog (pkg/model): per-node correctness floor — on every Load(), evict any loaded model that shares a group with the requested one. Pinned skips surface NeedMore so the loader retries (and ultimately logs a clear warning), instead of silently allowing the rule to be violated. - Distributed scheduler (core/services/nodes): soft anti-affinity hint — scheduleNewModel prefers nodes that don't already host a same-group model, falling back to eviction only if every candidate has a conflict. Composes with NodeSelector at the same point in the candidate pipeline. Per-node, not cluster-wide: VRAM is a node-local resource, and two heavy models running on different nodes is fine. The ConfigLoader is wired into SmartRouter via a small ConcurrencyConflictResolver interface so the nodes package keeps a narrow surface on core/config. Refactors the inner LRU eviction body into a shared collectEvictionsLocked helper and the loader retry loop into retryEnforce(fn, maxRetries, interval), so both LRU and group enforcement share busy/pinned/retry semantics. Closes #9659. Assisted-by: Claude:claude-opus-4-7 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(watchdog): sync pinned + concurrency_groups at startup The startup-time watchdog setup lives in initializeWatchdog (startup.go), not in startWatchdog (watchdog.go). The latter is only invoked from the runtime-settings RestartWatchdog path. As a result, neither SyncPinnedModelsToWatchdog nor SyncModelGroupsToWatchdog ran at boot, so `pinned: true` and `concurrency_groups: [...]` only became effective after a settings-driven watchdog restart. Fix by adding both sync calls to initializeWatchdog. Confirmed end-to-end: loading model A in group "heavy", then C with no group (coexists), then B in group "heavy" now correctly evicts A and leaves [B, C]. Assisted-by: Claude:claude-opus-4-7 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(test): satisfy errcheck on new os.Remove in concurrency_groups spec CI lint runs new-from-merge-base, so the existing pre-existing `defer os.Remove(tmp.Name())` lines are baseline-grandfathered but the one introduced by the concurrency_groups YAML round-trip test is held to errcheck. Wrap the remove in a closure that discards the error. Assisted-by: Claude:claude-opus-4-7 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-05-18 13:38:49 -04:00 · 2026-05-05 08:42:50 +02:00
parent 22ae415695
commit bbcaebc1ef
17 changed files with 981 additions and 76 deletions
--- a/core/application/distributed.go
+++ b/core/application/distributed.go
@@ -71,7 +71,9 @@ func (ds *DistributedServices) Shutdown() {
 // initDistributed validates distributed mode prerequisites and initializes
 // NATS, object storage, node registry, and instance identity.
 // Returns nil if distributed mode is not enabled.
-func initDistributed(cfg *config.ApplicationConfig, authDB *gorm.DB) (*DistributedServices, error) {
+// configLoader is used by the SmartRouter to compute concurrency-group
+// anti-affinity at placement time (#9659); it may be nil in tests.
+func initDistributed(cfg *config.ApplicationConfig, authDB *gorm.DB, configLoader *config.ModelConfigLoader) (*DistributedServices, error) {
 	if !cfg.Distributed.Enabled {
 		return nil, nil
 	}
@@ -234,12 +236,17 @@ func initDistributed(cfg *config.ApplicationConfig, authDB *gorm.DB) (*Distribut
 	remoteUnloader := nodes.NewRemoteUnloaderAdapter(registry, natsClient)

 	// All dependencies ready — build SmartRouter with all options at once
+	var conflictResolver nodes.ConcurrencyConflictResolver
+	if configLoader != nil {
+		conflictResolver = configLoader
+	}
 	router := nodes.NewSmartRouter(registry, nodes.SmartRouterOptions{
-		Unloader:      remoteUnloader,
-		FileStager:    fileStager,
-		GalleriesJSON: routerGalleriesJSON,
-		AuthToken:     routerAuthToken,
-		DB:            authDB,
+		Unloader:         remoteUnloader,
+		FileStager:       fileStager,
+		GalleriesJSON:    routerGalleriesJSON,
+		AuthToken:        routerAuthToken,
+		DB:               authDB,
+		ConflictResolver: conflictResolver,
 	})

 	// Create ReplicaReconciler for auto-scaling model replicas. Adapter +
--- a/core/application/startup.go
+++ b/core/application/startup.go
@@ -139,7 +139,7 @@ func New(opts ...config.AppOption) (*Application, error) {
 	}

 	// Initialize distributed mode services (NATS, object storage, node registry)
-	distSvc, err := initDistributed(options, application.authDB)
+	distSvc, err := initDistributed(options, application.authDB, application.ModelConfigLoader())
 	if err != nil {
 		return nil, fmt.Errorf("distributed mode initialization failed: %w", err)
 	}
@@ -680,6 +680,12 @@ func initializeWatchdog(application *Application, options *config.ApplicationCon
 			options.LRUEvictionRetryInterval,
 		)

+		// Sync per-model state from configs to the watchdog. Without this,
+		// `pinned: true` and `concurrency_groups:` are only honored after a
+		// settings-driven RestartWatchdog and never at boot.
+		application.SyncPinnedModelsToWatchdog()
+		application.SyncModelGroupsToWatchdog()
+
 		// Start watchdog goroutine if any periodic checks are enabled
 		// LRU eviction doesn't need the Run() loop - it's triggered on model load
 		// But memory reclaimer needs the Run() loop for periodic checking
--- a/core/application/watchdog.go
+++ b/core/application/watchdog.go
@@ -1,6 +1,7 @@
 package application

 import (
+	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/xlog"
 )
@@ -26,6 +27,40 @@ func (a *Application) SyncPinnedModelsToWatchdog() {
 	xlog.Debug("Synced pinned models to watchdog", "count", len(pinned))
 }

+// SyncModelGroupsToWatchdog reads concurrency_groups from all model configs and
+// updates the watchdog so EnforceGroupExclusivity has the current view.
+func (a *Application) SyncModelGroupsToWatchdog() {
+	cl := a.ModelConfigLoader()
+	if cl == nil {
+		return
+	}
+	wd := a.modelLoader.GetWatchDog()
+	if wd == nil {
+		return
+	}
+	groups := extractModelGroupsFromConfigs(cl.GetAllModelsConfigs())
+	wd.ReplaceModelGroups(groups)
+	xlog.Debug("Synced concurrency groups to watchdog", "count", len(groups))
+}
+
+// extractModelGroupsFromConfigs builds the model→groups map the watchdog
+// expects. Disabled models are skipped — their declared groups should not
+// block other models from loading.
+func extractModelGroupsFromConfigs(configs []config.ModelConfig) map[string][]string {
+	out := make(map[string][]string)
+	for _, cfg := range configs {
+		if cfg.IsDisabled() {
+			continue
+		}
+		gs := cfg.GetConcurrencyGroups()
+		if len(gs) == 0 {
+			continue
+		}
+		out[cfg.Name] = gs
+	}
+	return out
+}
+
 func (a *Application) StopWatchdog() error {
 	if a.watchdogStop != nil {
 		close(a.watchdogStop)
@@ -65,8 +100,9 @@ func (a *Application) startWatchdog() error {
 		// Set the watchdog on the model loader
 		a.modelLoader.SetWatchDog(wd)

-		// Sync pinned models from config to the watchdog
+		// Sync pinned models and concurrency groups from config to the watchdog
 		a.SyncPinnedModelsToWatchdog()
+		a.SyncModelGroupsToWatchdog()

 		// Start watchdog goroutine if any periodic checks are enabled
 		// LRU eviction doesn't need the Run() loop - it's triggered on model load
@@ -148,8 +184,9 @@ func (a *Application) RestartWatchdog() error {
 		newWD.RestoreState(oldState)
 	}

-	// Re-sync pinned models after restart
+	// Re-sync pinned models and concurrency groups after restart
 	a.SyncPinnedModelsToWatchdog()
+	a.SyncModelGroupsToWatchdog()

 	return nil
 }
--- a/core/application/watchdog_test.go
+++ b/core/application/watchdog_test.go
@@ -0,0 +1,47 @@
+package application
+
+import (
+	"github.com/mudler/LocalAI/core/config"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("extractModelGroupsFromConfigs", func() {
+	It("returns an empty map when no config declares groups", func() {
+		out := extractModelGroupsFromConfigs([]config.ModelConfig{
+			{Name: "a"},
+			{Name: "b"},
+		})
+		Expect(out).To(BeEmpty())
+	})
+
+	It("returns each model's normalized groups", func() {
+		out := extractModelGroupsFromConfigs([]config.ModelConfig{
+			{Name: "a", ConcurrencyGroups: []string{" heavy ", "vision", "heavy"}},
+			{Name: "b", ConcurrencyGroups: []string{"heavy"}},
+			{Name: "c"}, // no groups → omitted
+		})
+		Expect(out).To(HaveLen(2))
+		Expect(out["a"]).To(Equal([]string{"heavy", "vision"}))
+		Expect(out["b"]).To(Equal([]string{"heavy"}))
+		Expect(out).ToNot(HaveKey("c"))
+	})
+
+	It("omits models whose groups normalize to empty", func() {
+		out := extractModelGroupsFromConfigs([]config.ModelConfig{
+			{Name: "blanks", ConcurrencyGroups: []string{"", "  "}},
+		})
+		Expect(out).To(BeEmpty())
+	})
+
+	It("skips disabled models so they cannot block loading after re-enable", func() {
+		disabled := true
+		out := extractModelGroupsFromConfigs([]config.ModelConfig{
+			{Name: "a", ConcurrencyGroups: []string{"heavy"}, Disabled: &disabled},
+			{Name: "b", ConcurrencyGroups: []string{"heavy"}},
+		})
+		Expect(out).To(HaveLen(1))
+		Expect(out).To(HaveKey("b"))
+		Expect(out).ToNot(HaveKey("a"))
+	})
+})