mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-18 13:38:49 -04:00
feat(concurrency-groups): per-model exclusive groups for backend loading (#9662)
* feat(concurrency-groups): per-model exclusive groups for backend loading Adds `concurrency_groups: [...]` to model YAML configs. Two models that share a group cannot be loaded concurrently on the same node — loading one evicts the others, reusing the existing pinned/busy/retry policy from LRU eviction. Layered design: - Watchdog (pkg/model): per-node correctness floor — on every Load(), evict any loaded model that shares a group with the requested one. Pinned skips surface NeedMore so the loader retries (and ultimately logs a clear warning), instead of silently allowing the rule to be violated. - Distributed scheduler (core/services/nodes): soft anti-affinity hint — scheduleNewModel prefers nodes that don't already host a same-group model, falling back to eviction only if every candidate has a conflict. Composes with NodeSelector at the same point in the candidate pipeline. Per-node, not cluster-wide: VRAM is a node-local resource, and two heavy models running on different nodes is fine. The ConfigLoader is wired into SmartRouter via a small ConcurrencyConflictResolver interface so the nodes package keeps a narrow surface on core/config. Refactors the inner LRU eviction body into a shared collectEvictionsLocked helper and the loader retry loop into retryEnforce(fn, maxRetries, interval), so both LRU and group enforcement share busy/pinned/retry semantics. Closes #9659. Assisted-by: Claude:claude-opus-4-7 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(watchdog): sync pinned + concurrency_groups at startup The startup-time watchdog setup lives in initializeWatchdog (startup.go), not in startWatchdog (watchdog.go). The latter is only invoked from the runtime-settings RestartWatchdog path. As a result, neither SyncPinnedModelsToWatchdog nor SyncModelGroupsToWatchdog ran at boot, so `pinned: true` and `concurrency_groups: [...]` only became effective after a settings-driven watchdog restart. Fix by adding both sync calls to initializeWatchdog. Confirmed end-to-end: loading model A in group "heavy", then C with no group (coexists), then B in group "heavy" now correctly evicts A and leaves [B, C]. Assisted-by: Claude:claude-opus-4-7 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(test): satisfy errcheck on new os.Remove in concurrency_groups spec CI lint runs new-from-merge-base, so the existing pre-existing `defer os.Remove(tmp.Name())` lines are baseline-grandfathered but the one introduced by the concurrency_groups YAML round-trip test is held to errcheck. Wrap the remove in a closure that discards the error. Assisted-by: Claude:claude-opus-4-7 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
committed by
GitHub
parent
22ae415695
commit
bbcaebc1ef
@@ -71,7 +71,9 @@ func (ds *DistributedServices) Shutdown() {
|
||||
// initDistributed validates distributed mode prerequisites and initializes
|
||||
// NATS, object storage, node registry, and instance identity.
|
||||
// Returns nil if distributed mode is not enabled.
|
||||
func initDistributed(cfg *config.ApplicationConfig, authDB *gorm.DB) (*DistributedServices, error) {
|
||||
// configLoader is used by the SmartRouter to compute concurrency-group
|
||||
// anti-affinity at placement time (#9659); it may be nil in tests.
|
||||
func initDistributed(cfg *config.ApplicationConfig, authDB *gorm.DB, configLoader *config.ModelConfigLoader) (*DistributedServices, error) {
|
||||
if !cfg.Distributed.Enabled {
|
||||
return nil, nil
|
||||
}
|
||||
@@ -234,12 +236,17 @@ func initDistributed(cfg *config.ApplicationConfig, authDB *gorm.DB) (*Distribut
|
||||
remoteUnloader := nodes.NewRemoteUnloaderAdapter(registry, natsClient)
|
||||
|
||||
// All dependencies ready — build SmartRouter with all options at once
|
||||
var conflictResolver nodes.ConcurrencyConflictResolver
|
||||
if configLoader != nil {
|
||||
conflictResolver = configLoader
|
||||
}
|
||||
router := nodes.NewSmartRouter(registry, nodes.SmartRouterOptions{
|
||||
Unloader: remoteUnloader,
|
||||
FileStager: fileStager,
|
||||
GalleriesJSON: routerGalleriesJSON,
|
||||
AuthToken: routerAuthToken,
|
||||
DB: authDB,
|
||||
Unloader: remoteUnloader,
|
||||
FileStager: fileStager,
|
||||
GalleriesJSON: routerGalleriesJSON,
|
||||
AuthToken: routerAuthToken,
|
||||
DB: authDB,
|
||||
ConflictResolver: conflictResolver,
|
||||
})
|
||||
|
||||
// Create ReplicaReconciler for auto-scaling model replicas. Adapter +
|
||||
|
||||
@@ -139,7 +139,7 @@ func New(opts ...config.AppOption) (*Application, error) {
|
||||
}
|
||||
|
||||
// Initialize distributed mode services (NATS, object storage, node registry)
|
||||
distSvc, err := initDistributed(options, application.authDB)
|
||||
distSvc, err := initDistributed(options, application.authDB, application.ModelConfigLoader())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("distributed mode initialization failed: %w", err)
|
||||
}
|
||||
@@ -680,6 +680,12 @@ func initializeWatchdog(application *Application, options *config.ApplicationCon
|
||||
options.LRUEvictionRetryInterval,
|
||||
)
|
||||
|
||||
// Sync per-model state from configs to the watchdog. Without this,
|
||||
// `pinned: true` and `concurrency_groups:` are only honored after a
|
||||
// settings-driven RestartWatchdog and never at boot.
|
||||
application.SyncPinnedModelsToWatchdog()
|
||||
application.SyncModelGroupsToWatchdog()
|
||||
|
||||
// Start watchdog goroutine if any periodic checks are enabled
|
||||
// LRU eviction doesn't need the Run() loop - it's triggered on model load
|
||||
// But memory reclaimer needs the Run() loop for periodic checking
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package application
|
||||
|
||||
import (
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/xlog"
|
||||
)
|
||||
@@ -26,6 +27,40 @@ func (a *Application) SyncPinnedModelsToWatchdog() {
|
||||
xlog.Debug("Synced pinned models to watchdog", "count", len(pinned))
|
||||
}
|
||||
|
||||
// SyncModelGroupsToWatchdog reads concurrency_groups from all model configs and
|
||||
// updates the watchdog so EnforceGroupExclusivity has the current view.
|
||||
func (a *Application) SyncModelGroupsToWatchdog() {
|
||||
cl := a.ModelConfigLoader()
|
||||
if cl == nil {
|
||||
return
|
||||
}
|
||||
wd := a.modelLoader.GetWatchDog()
|
||||
if wd == nil {
|
||||
return
|
||||
}
|
||||
groups := extractModelGroupsFromConfigs(cl.GetAllModelsConfigs())
|
||||
wd.ReplaceModelGroups(groups)
|
||||
xlog.Debug("Synced concurrency groups to watchdog", "count", len(groups))
|
||||
}
|
||||
|
||||
// extractModelGroupsFromConfigs builds the model→groups map the watchdog
|
||||
// expects. Disabled models are skipped — their declared groups should not
|
||||
// block other models from loading.
|
||||
func extractModelGroupsFromConfigs(configs []config.ModelConfig) map[string][]string {
|
||||
out := make(map[string][]string)
|
||||
for _, cfg := range configs {
|
||||
if cfg.IsDisabled() {
|
||||
continue
|
||||
}
|
||||
gs := cfg.GetConcurrencyGroups()
|
||||
if len(gs) == 0 {
|
||||
continue
|
||||
}
|
||||
out[cfg.Name] = gs
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (a *Application) StopWatchdog() error {
|
||||
if a.watchdogStop != nil {
|
||||
close(a.watchdogStop)
|
||||
@@ -65,8 +100,9 @@ func (a *Application) startWatchdog() error {
|
||||
// Set the watchdog on the model loader
|
||||
a.modelLoader.SetWatchDog(wd)
|
||||
|
||||
// Sync pinned models from config to the watchdog
|
||||
// Sync pinned models and concurrency groups from config to the watchdog
|
||||
a.SyncPinnedModelsToWatchdog()
|
||||
a.SyncModelGroupsToWatchdog()
|
||||
|
||||
// Start watchdog goroutine if any periodic checks are enabled
|
||||
// LRU eviction doesn't need the Run() loop - it's triggered on model load
|
||||
@@ -148,8 +184,9 @@ func (a *Application) RestartWatchdog() error {
|
||||
newWD.RestoreState(oldState)
|
||||
}
|
||||
|
||||
// Re-sync pinned models after restart
|
||||
// Re-sync pinned models and concurrency groups after restart
|
||||
a.SyncPinnedModelsToWatchdog()
|
||||
a.SyncModelGroupsToWatchdog()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
47
core/application/watchdog_test.go
Normal file
47
core/application/watchdog_test.go
Normal file
@@ -0,0 +1,47 @@
|
||||
package application
|
||||
|
||||
import (
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = Describe("extractModelGroupsFromConfigs", func() {
|
||||
It("returns an empty map when no config declares groups", func() {
|
||||
out := extractModelGroupsFromConfigs([]config.ModelConfig{
|
||||
{Name: "a"},
|
||||
{Name: "b"},
|
||||
})
|
||||
Expect(out).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("returns each model's normalized groups", func() {
|
||||
out := extractModelGroupsFromConfigs([]config.ModelConfig{
|
||||
{Name: "a", ConcurrencyGroups: []string{" heavy ", "vision", "heavy"}},
|
||||
{Name: "b", ConcurrencyGroups: []string{"heavy"}},
|
||||
{Name: "c"}, // no groups → omitted
|
||||
})
|
||||
Expect(out).To(HaveLen(2))
|
||||
Expect(out["a"]).To(Equal([]string{"heavy", "vision"}))
|
||||
Expect(out["b"]).To(Equal([]string{"heavy"}))
|
||||
Expect(out).ToNot(HaveKey("c"))
|
||||
})
|
||||
|
||||
It("omits models whose groups normalize to empty", func() {
|
||||
out := extractModelGroupsFromConfigs([]config.ModelConfig{
|
||||
{Name: "blanks", ConcurrencyGroups: []string{"", " "}},
|
||||
})
|
||||
Expect(out).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("skips disabled models so they cannot block loading after re-enable", func() {
|
||||
disabled := true
|
||||
out := extractModelGroupsFromConfigs([]config.ModelConfig{
|
||||
{Name: "a", ConcurrencyGroups: []string{"heavy"}, Disabled: &disabled},
|
||||
{Name: "b", ConcurrencyGroups: []string{"heavy"}},
|
||||
})
|
||||
Expect(out).To(HaveLen(1))
|
||||
Expect(out).To(HaveKey("b"))
|
||||
Expect(out).ToNot(HaveKey("a"))
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user