mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-16 20:52:08 -04:00
feat(concurrency-groups): per-model exclusive groups for backend loading (#9662)
* feat(concurrency-groups): per-model exclusive groups for backend loading Adds `concurrency_groups: [...]` to model YAML configs. Two models that share a group cannot be loaded concurrently on the same node — loading one evicts the others, reusing the existing pinned/busy/retry policy from LRU eviction. Layered design: - Watchdog (pkg/model): per-node correctness floor — on every Load(), evict any loaded model that shares a group with the requested one. Pinned skips surface NeedMore so the loader retries (and ultimately logs a clear warning), instead of silently allowing the rule to be violated. - Distributed scheduler (core/services/nodes): soft anti-affinity hint — scheduleNewModel prefers nodes that don't already host a same-group model, falling back to eviction only if every candidate has a conflict. Composes with NodeSelector at the same point in the candidate pipeline. Per-node, not cluster-wide: VRAM is a node-local resource, and two heavy models running on different nodes is fine. The ConfigLoader is wired into SmartRouter via a small ConcurrencyConflictResolver interface so the nodes package keeps a narrow surface on core/config. Refactors the inner LRU eviction body into a shared collectEvictionsLocked helper and the loader retry loop into retryEnforce(fn, maxRetries, interval), so both LRU and group enforcement share busy/pinned/retry semantics. Closes #9659. Assisted-by: Claude:claude-opus-4-7 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(watchdog): sync pinned + concurrency_groups at startup The startup-time watchdog setup lives in initializeWatchdog (startup.go), not in startWatchdog (watchdog.go). The latter is only invoked from the runtime-settings RestartWatchdog path. As a result, neither SyncPinnedModelsToWatchdog nor SyncModelGroupsToWatchdog ran at boot, so `pinned: true` and `concurrency_groups: [...]` only became effective after a settings-driven watchdog restart. Fix by adding both sync calls to initializeWatchdog. Confirmed end-to-end: loading model A in group "heavy", then C with no group (coexists), then B in group "heavy" now correctly evicts A and leaves [B, C]. Assisted-by: Claude:claude-opus-4-7 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(test): satisfy errcheck on new os.Remove in concurrency_groups spec CI lint runs new-from-merge-base, so the existing pre-existing `defer os.Remove(tmp.Name())` lines are baseline-grandfathered but the one introduced by the concurrency_groups YAML round-trip test is held to errcheck. Wrap the remove in a closure that discards the error. Assisted-by: Claude:claude-opus-4-7 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
committed by
GitHub
parent
22ae415695
commit
bbcaebc1ef
@@ -197,6 +197,35 @@ func (ml *ModelLoader) backendLoader(opts ...Option) (client grpc.Backend, err e
|
||||
return model.GRPC(o.parallelRequests, ml.wd), nil
|
||||
}
|
||||
|
||||
// retryEnforce repeatedly invokes fn until it returns NeedMore=false or the
|
||||
// retry budget is exhausted. It sleeps `retryInterval` between attempts and
|
||||
// logs progress under `label`. Used by both LRU and group-exclusivity
|
||||
// enforcement so the busy-model wait behaviour is identical.
|
||||
func retryEnforce(fn func() EnforceLRULimitResult, maxRetries int, retryInterval time.Duration, label string) {
|
||||
for attempt := range maxRetries {
|
||||
result := fn()
|
||||
if !result.NeedMore {
|
||||
if result.EvictedCount > 0 {
|
||||
xlog.Info("[ModelLoader] "+label+" enforcement complete", "evicted", result.EvictedCount)
|
||||
}
|
||||
return
|
||||
}
|
||||
if attempt < maxRetries-1 {
|
||||
xlog.Info("[ModelLoader] Waiting for busy models to become idle before eviction",
|
||||
"label", label,
|
||||
"evicted", result.EvictedCount,
|
||||
"attempt", attempt+1,
|
||||
"maxRetries", maxRetries,
|
||||
"retryIn", retryInterval)
|
||||
time.Sleep(retryInterval)
|
||||
} else {
|
||||
xlog.Warn("[ModelLoader] "+label+" enforcement incomplete after max retries",
|
||||
"evicted", result.EvictedCount,
|
||||
"reason", "conflicts are still busy or pinned")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// enforceLRULimit enforces the LRU limit before loading a new model.
|
||||
// This is called before loading a model to ensure we don't exceed the limit.
|
||||
// It accounts for models that are currently being loaded by other goroutines.
|
||||
@@ -206,41 +235,34 @@ func (ml *ModelLoader) enforceLRULimit() {
|
||||
return
|
||||
}
|
||||
|
||||
// Get the count of models currently being loaded to account for concurrent requests
|
||||
pendingLoads := ml.GetLoadingCount()
|
||||
|
||||
// Get retry settings from ModelLoader
|
||||
ml.mu.Lock()
|
||||
maxRetries := ml.lruEvictionMaxRetries
|
||||
retryInterval := ml.lruEvictionRetryInterval
|
||||
ml.mu.Unlock()
|
||||
|
||||
for attempt := range maxRetries {
|
||||
result := ml.wd.EnforceLRULimit(pendingLoads)
|
||||
retryEnforce(func() EnforceLRULimitResult {
|
||||
return ml.wd.EnforceLRULimit(pendingLoads)
|
||||
}, maxRetries, retryInterval, "LRU")
|
||||
}
|
||||
|
||||
if !result.NeedMore {
|
||||
// Successfully evicted enough models (or no eviction needed)
|
||||
if result.EvictedCount > 0 {
|
||||
xlog.Info("[ModelLoader] LRU enforcement complete", "evicted", result.EvictedCount)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Need more evictions but models are busy - wait and retry
|
||||
if attempt < maxRetries-1 {
|
||||
xlog.Info("[ModelLoader] Waiting for busy models to become idle before eviction",
|
||||
"evicted", result.EvictedCount,
|
||||
"attempt", attempt+1,
|
||||
"maxRetries", maxRetries,
|
||||
"retryIn", retryInterval)
|
||||
time.Sleep(retryInterval)
|
||||
} else {
|
||||
// Last attempt - log warning but proceed (might fail to load, but at least we tried)
|
||||
xlog.Warn("[ModelLoader] LRU enforcement incomplete after max retries",
|
||||
"evicted", result.EvictedCount,
|
||||
"reason", "models are still busy with active API calls")
|
||||
}
|
||||
// enforceGroupExclusivity evicts every loaded model that shares a concurrency
|
||||
// group with modelID before loading proceeds. Reuses the LRU retry settings so
|
||||
// busy conflicts wait for the same window as a busy LRU eviction.
|
||||
func (ml *ModelLoader) enforceGroupExclusivity(modelID string) {
|
||||
if ml.wd == nil {
|
||||
return
|
||||
}
|
||||
|
||||
ml.mu.Lock()
|
||||
maxRetries := ml.lruEvictionMaxRetries
|
||||
retryInterval := ml.lruEvictionRetryInterval
|
||||
ml.mu.Unlock()
|
||||
|
||||
retryEnforce(func() EnforceLRULimitResult {
|
||||
return ml.wd.EnforceGroupExclusivity(modelID)
|
||||
}, maxRetries, retryInterval, "group-exclusivity")
|
||||
}
|
||||
|
||||
// updateModelLastUsed updates the last used time for a model (for LRU tracking)
|
||||
@@ -270,6 +292,12 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
|
||||
return client, nil
|
||||
}
|
||||
|
||||
// Evict any loaded model that shares a concurrency group with the
|
||||
// requested one before applying the global LRU cap — group eviction may
|
||||
// already make room, and otherwise LRU might evict an unrelated model
|
||||
// only for the group check to immediately evict another.
|
||||
ml.enforceGroupExclusivity(o.modelID)
|
||||
|
||||
// Enforce LRU limit before loading a new model
|
||||
ml.enforceLRULimit()
|
||||
|
||||
|
||||
50
pkg/model/initializers_retry_test.go
Normal file
50
pkg/model/initializers_retry_test.go
Normal file
@@ -0,0 +1,50 @@
|
||||
package model
|
||||
|
||||
import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = Describe("retryEnforce", func() {
|
||||
It("returns immediately when the first attempt is satisfied", func() {
|
||||
var calls atomic.Int32
|
||||
retryEnforce(func() EnforceLRULimitResult {
|
||||
calls.Add(1)
|
||||
return EnforceLRULimitResult{}
|
||||
}, 5, 1*time.Millisecond, "test")
|
||||
Expect(calls.Load()).To(Equal(int32(1)))
|
||||
})
|
||||
|
||||
It("retries until NeedMore clears", func() {
|
||||
var calls atomic.Int32
|
||||
retryEnforce(func() EnforceLRULimitResult {
|
||||
n := calls.Add(1)
|
||||
if n < 3 {
|
||||
return EnforceLRULimitResult{NeedMore: true}
|
||||
}
|
||||
return EnforceLRULimitResult{EvictedCount: 1}
|
||||
}, 5, 1*time.Millisecond, "test")
|
||||
Expect(calls.Load()).To(Equal(int32(3)))
|
||||
})
|
||||
|
||||
It("stops after maxRetries when NeedMore never clears", func() {
|
||||
var calls atomic.Int32
|
||||
retryEnforce(func() EnforceLRULimitResult {
|
||||
calls.Add(1)
|
||||
return EnforceLRULimitResult{NeedMore: true}
|
||||
}, 4, 1*time.Millisecond, "test")
|
||||
Expect(calls.Load()).To(Equal(int32(4)))
|
||||
})
|
||||
|
||||
It("treats maxRetries <= 0 as a no-op (no calls)", func() {
|
||||
var calls atomic.Int32
|
||||
retryEnforce(func() EnforceLRULimitResult {
|
||||
calls.Add(1)
|
||||
return EnforceLRULimitResult{}
|
||||
}, 0, 1*time.Millisecond, "test")
|
||||
Expect(calls.Load()).To(Equal(int32(0)))
|
||||
})
|
||||
})
|
||||
@@ -48,6 +48,11 @@ type WatchDog struct {
|
||||
|
||||
// Pinned models are excluded from idle, LRU, and memory-pressure eviction
|
||||
pinnedModels map[string]bool
|
||||
|
||||
// modelGroups maps a model name to its declared concurrency groups.
|
||||
// Two loaded models that share at least one group cannot coexist on this
|
||||
// node — see EnforceGroupExclusivity.
|
||||
modelGroups map[string][]string
|
||||
}
|
||||
|
||||
type ProcessManager interface {
|
||||
@@ -82,6 +87,7 @@ func NewWatchDog(opts ...WatchDogOption) *WatchDog {
|
||||
lruLimit: o.lruLimit,
|
||||
addressModelMap: make(map[string]string),
|
||||
pinnedModels: make(map[string]bool),
|
||||
modelGroups: make(map[string][]string),
|
||||
stop: make(chan bool, 1),
|
||||
done: make(chan bool, 1),
|
||||
memoryReclaimerEnabled: o.memoryReclaimerEnabled,
|
||||
@@ -145,6 +151,34 @@ func (wd *WatchDog) IsModelPinned(modelName string) bool {
|
||||
return wd.pinnedModels[modelName]
|
||||
}
|
||||
|
||||
// ReplaceModelGroups replaces the per-model concurrency-group registry. The
|
||||
// supplied map is copied; callers may mutate it after the call. Passing an
|
||||
// empty or nil map clears all entries.
|
||||
func (wd *WatchDog) ReplaceModelGroups(groups map[string][]string) {
|
||||
wd.Lock()
|
||||
defer wd.Unlock()
|
||||
wd.modelGroups = make(map[string][]string, len(groups))
|
||||
for name, gs := range groups {
|
||||
if len(gs) == 0 {
|
||||
continue
|
||||
}
|
||||
wd.modelGroups[name] = slices.Clone(gs)
|
||||
}
|
||||
}
|
||||
|
||||
// GetModelGroups returns a copy of the concurrency groups configured for
|
||||
// the given model, or nil if the model has no groups. The result may be
|
||||
// freely mutated by the caller.
|
||||
func (wd *WatchDog) GetModelGroups(modelName string) []string {
|
||||
wd.Lock()
|
||||
defer wd.Unlock()
|
||||
gs, ok := wd.modelGroups[modelName]
|
||||
if !ok || len(gs) == 0 {
|
||||
return nil
|
||||
}
|
||||
return slices.Clone(gs)
|
||||
}
|
||||
|
||||
func (wd *WatchDog) Shutdown() {
|
||||
wd.Lock()
|
||||
defer wd.Unlock()
|
||||
@@ -327,31 +361,8 @@ func (wd *WatchDog) EnforceLRULimit(pendingLoads int) EnforceLRULimitResult {
|
||||
})
|
||||
|
||||
// Collect models to evict (the oldest ones)
|
||||
var modelsToShutdown []string
|
||||
evictedCount := 0
|
||||
skippedBusyCount := 0
|
||||
for i := 0; evictedCount < modelsToEvict && i < len(models); i++ {
|
||||
m := models[i]
|
||||
// Skip pinned models
|
||||
if wd.pinnedModels[m.model] {
|
||||
xlog.Debug("[WatchDog] Skipping LRU eviction for pinned model", "model", m.model)
|
||||
continue
|
||||
}
|
||||
// Check if model is busy
|
||||
_, isBusy := wd.busyTime[m.address]
|
||||
if isBusy && !forceEvictionWhenBusy {
|
||||
// Skip eviction for busy models when forceEvictionWhenBusy is false
|
||||
xlog.Warn("[WatchDog] Skipping LRU eviction for busy model", "model", m.model, "reason", "model has active API calls")
|
||||
skippedBusyCount++
|
||||
continue
|
||||
}
|
||||
xlog.Info("[WatchDog] LRU evicting model", "model", m.model, "lastUsed", m.lastUsed, "busy", isBusy)
|
||||
modelsToShutdown = append(modelsToShutdown, m.model)
|
||||
// Clean up the maps while we have the lock
|
||||
wd.untrack(m.address)
|
||||
evictedCount++
|
||||
}
|
||||
needMore := evictedCount < modelsToEvict && skippedBusyCount > 0
|
||||
modelsToShutdown, skippedBusyCount := wd.collectEvictionsLocked(models, modelsToEvict, forceEvictionWhenBusy)
|
||||
needMore := len(modelsToShutdown) < modelsToEvict && skippedBusyCount > 0
|
||||
wd.Unlock()
|
||||
|
||||
// Now shutdown models without holding the watchdog lock to prevent deadlock
|
||||
@@ -363,7 +374,7 @@ func (wd *WatchDog) EnforceLRULimit(pendingLoads int) EnforceLRULimitResult {
|
||||
}
|
||||
|
||||
if needMore {
|
||||
xlog.Warn("[WatchDog] LRU eviction incomplete", "evicted", evictedCount, "needed", modelsToEvict, "skippedBusy", skippedBusyCount, "reason", "some models are busy with active API calls")
|
||||
xlog.Warn("[WatchDog] LRU eviction incomplete", "evicted", len(modelsToShutdown), "needed", modelsToEvict, "skippedBusy", skippedBusyCount, "reason", "some models are busy with active API calls")
|
||||
}
|
||||
|
||||
return EnforceLRULimitResult{
|
||||
@@ -372,6 +383,110 @@ func (wd *WatchDog) EnforceLRULimit(pendingLoads int) EnforceLRULimitResult {
|
||||
}
|
||||
}
|
||||
|
||||
// collectEvictionsLocked walks `candidates` (already in eviction order) and
|
||||
// untracks up to `maxToEvict` models that are eligible for eviction. Pinned
|
||||
// models are always skipped; busy models are skipped unless `force` is true.
|
||||
// Returns the names of evicted models and the number skipped because they
|
||||
// were busy. Must be called with wd.Lock() held.
|
||||
func (wd *WatchDog) collectEvictionsLocked(candidates []modelUsageInfo, maxToEvict int, force bool) (evicted []string, skippedBusy int) {
|
||||
for i := 0; len(evicted) < maxToEvict && i < len(candidates); i++ {
|
||||
m := candidates[i]
|
||||
if wd.pinnedModels[m.model] {
|
||||
xlog.Debug("[WatchDog] Skipping eviction for pinned model", "model", m.model)
|
||||
continue
|
||||
}
|
||||
_, isBusy := wd.busyTime[m.address]
|
||||
if isBusy && !force {
|
||||
xlog.Warn("[WatchDog] Skipping eviction for busy model", "model", m.model, "reason", "model has active API calls")
|
||||
skippedBusy++
|
||||
continue
|
||||
}
|
||||
xlog.Info("[WatchDog] evicting model", "model", m.model, "busy", isBusy)
|
||||
evicted = append(evicted, m.model)
|
||||
wd.untrack(m.address)
|
||||
}
|
||||
return evicted, skippedBusy
|
||||
}
|
||||
|
||||
// EnforceGroupExclusivity evicts every loaded model that shares at least one
|
||||
// concurrency group with the requested model. The pinned/busy/retry semantics
|
||||
// match EnforceLRULimit so the loader's retry loop can stay generic.
|
||||
func (wd *WatchDog) EnforceGroupExclusivity(requestedModel string) EnforceLRULimitResult {
|
||||
wd.Lock()
|
||||
|
||||
requestedGroups := wd.modelGroups[requestedModel]
|
||||
if len(requestedGroups) == 0 {
|
||||
wd.Unlock()
|
||||
return EnforceLRULimitResult{}
|
||||
}
|
||||
|
||||
forceEvictionWhenBusy := wd.forceEvictionWhenBusy
|
||||
|
||||
// Build the conflict candidate list: every loaded model whose groups
|
||||
// overlap with requestedGroups. Order doesn't affect correctness, but
|
||||
// sort by lastUsed (oldest first) so logs and behaviour are deterministic.
|
||||
var conflicts []modelUsageInfo
|
||||
for address, name := range wd.addressModelMap {
|
||||
if name == requestedModel {
|
||||
continue
|
||||
}
|
||||
if !groupsOverlap(requestedGroups, wd.modelGroups[name]) {
|
||||
continue
|
||||
}
|
||||
conflicts = append(conflicts, modelUsageInfo{
|
||||
address: address,
|
||||
model: name,
|
||||
lastUsed: wd.lastUsed[address],
|
||||
})
|
||||
}
|
||||
if len(conflicts) == 0 {
|
||||
wd.Unlock()
|
||||
return EnforceLRULimitResult{}
|
||||
}
|
||||
slices.SortFunc(conflicts, func(a, b modelUsageInfo) int {
|
||||
return a.lastUsed.Compare(b.lastUsed)
|
||||
})
|
||||
|
||||
xlog.Debug("[WatchDog] Group exclusivity triggered", "requested", requestedModel, "groups", requestedGroups, "conflicts", len(conflicts))
|
||||
|
||||
modelsToShutdown, skippedBusyCount := wd.collectEvictionsLocked(conflicts, len(conflicts), forceEvictionWhenBusy)
|
||||
// For groups any unresolved conflict matters — busy *or* pinned. The loader
|
||||
// retries on NeedMore; pinned cases will eventually time out and the load
|
||||
// proceeds with a visible warning, which is the right signal for what is a
|
||||
// configuration mismatch.
|
||||
needMore := len(modelsToShutdown) < len(conflicts)
|
||||
wd.Unlock()
|
||||
|
||||
for _, m := range modelsToShutdown {
|
||||
if err := wd.pm.ShutdownModel(m); err != nil {
|
||||
xlog.Error("[WatchDog] error shutting down model during group eviction", "error", err, "model", m)
|
||||
}
|
||||
xlog.Debug("[WatchDog] Group eviction complete", "model", m)
|
||||
}
|
||||
|
||||
if needMore {
|
||||
xlog.Warn("[WatchDog] Group eviction incomplete", "requested", requestedModel, "evicted", len(modelsToShutdown), "needed", len(conflicts), "skippedBusy", skippedBusyCount, "reason", "some conflicts are busy or pinned")
|
||||
}
|
||||
|
||||
return EnforceLRULimitResult{
|
||||
EvictedCount: len(modelsToShutdown),
|
||||
NeedMore: needMore,
|
||||
}
|
||||
}
|
||||
|
||||
// groupsOverlap reports whether the two group lists share any name.
|
||||
func groupsOverlap(a, b []string) bool {
|
||||
if len(a) == 0 || len(b) == 0 {
|
||||
return false
|
||||
}
|
||||
for _, x := range a {
|
||||
if slices.Contains(b, x) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (wd *WatchDog) Run() {
|
||||
xlog.Info("[WatchDog] starting watchdog")
|
||||
|
||||
|
||||
@@ -666,6 +666,182 @@ var _ = Describe("WatchDog", func() {
|
||||
})
|
||||
})
|
||||
|
||||
Context("Concurrency Groups", func() {
|
||||
Describe("ReplaceModelGroups / GetModelGroups", func() {
|
||||
It("returns nil for unknown models", func() {
|
||||
wd = model.NewWatchDog(model.WithProcessManager(pm))
|
||||
Expect(wd.GetModelGroups("nope")).To(BeNil())
|
||||
})
|
||||
|
||||
It("stores and retrieves groups", func() {
|
||||
wd = model.NewWatchDog(model.WithProcessManager(pm))
|
||||
wd.ReplaceModelGroups(map[string][]string{
|
||||
"a": {"heavy", "vision"},
|
||||
"b": {"heavy"},
|
||||
})
|
||||
Expect(wd.GetModelGroups("a")).To(Equal([]string{"heavy", "vision"}))
|
||||
Expect(wd.GetModelGroups("b")).To(Equal([]string{"heavy"}))
|
||||
Expect(wd.GetModelGroups("c")).To(BeNil())
|
||||
})
|
||||
|
||||
It("replaces previous state on subsequent calls", func() {
|
||||
wd = model.NewWatchDog(model.WithProcessManager(pm))
|
||||
wd.ReplaceModelGroups(map[string][]string{"a": {"heavy"}})
|
||||
wd.ReplaceModelGroups(map[string][]string{"b": {"vision"}})
|
||||
Expect(wd.GetModelGroups("a")).To(BeNil())
|
||||
Expect(wd.GetModelGroups("b")).To(Equal([]string{"vision"}))
|
||||
})
|
||||
|
||||
It("clears state when called with an empty map", func() {
|
||||
wd = model.NewWatchDog(model.WithProcessManager(pm))
|
||||
wd.ReplaceModelGroups(map[string][]string{"a": {"heavy"}})
|
||||
wd.ReplaceModelGroups(nil)
|
||||
Expect(wd.GetModelGroups("a")).To(BeNil())
|
||||
})
|
||||
|
||||
It("returns a defensive copy", func() {
|
||||
wd = model.NewWatchDog(model.WithProcessManager(pm))
|
||||
wd.ReplaceModelGroups(map[string][]string{"a": {"heavy"}})
|
||||
got := wd.GetModelGroups("a")
|
||||
got[0] = "tampered"
|
||||
Expect(wd.GetModelGroups("a")).To(Equal([]string{"heavy"}))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("EnforceGroupExclusivity", func() {
|
||||
It("is a no-op when the requested model has no groups", func() {
|
||||
wd = model.NewWatchDog(model.WithProcessManager(pm))
|
||||
wd.AddAddressModelMap("addr1", "model1")
|
||||
wd.AddAddressModelMap("addr2", "model2")
|
||||
|
||||
result := wd.EnforceGroupExclusivity("requested")
|
||||
Expect(result.EvictedCount).To(Equal(0))
|
||||
Expect(result.NeedMore).To(BeFalse())
|
||||
Expect(pm.getShutdownCalls()).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("is a no-op when no loaded model shares a group", func() {
|
||||
wd = model.NewWatchDog(model.WithProcessManager(pm))
|
||||
wd.ReplaceModelGroups(map[string][]string{
|
||||
"loaded": {"vision"},
|
||||
"requested": {"heavy"},
|
||||
})
|
||||
wd.AddAddressModelMap("addr1", "loaded")
|
||||
|
||||
result := wd.EnforceGroupExclusivity("requested")
|
||||
Expect(result.EvictedCount).To(Equal(0))
|
||||
Expect(result.NeedMore).To(BeFalse())
|
||||
Expect(pm.getShutdownCalls()).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("evicts a loaded model that shares a single group", func() {
|
||||
wd = model.NewWatchDog(model.WithProcessManager(pm))
|
||||
wd.ReplaceModelGroups(map[string][]string{
|
||||
"a": {"heavy"},
|
||||
"b": {"heavy"},
|
||||
})
|
||||
wd.AddAddressModelMap("addrA", "a")
|
||||
wd.Mark("addrA")
|
||||
wd.UnMark("addrA")
|
||||
|
||||
result := wd.EnforceGroupExclusivity("b")
|
||||
Expect(result.EvictedCount).To(Equal(1))
|
||||
Expect(result.NeedMore).To(BeFalse())
|
||||
Expect(pm.getShutdownCalls()).To(ConsistOf("a"))
|
||||
})
|
||||
|
||||
It("evicts when groups overlap on any single name", func() {
|
||||
wd = model.NewWatchDog(model.WithProcessManager(pm))
|
||||
wd.ReplaceModelGroups(map[string][]string{
|
||||
"a": {"x", "y"},
|
||||
"b": {"y", "z"},
|
||||
})
|
||||
wd.AddAddressModelMap("addrA", "a")
|
||||
wd.Mark("addrA")
|
||||
wd.UnMark("addrA")
|
||||
|
||||
result := wd.EnforceGroupExclusivity("b")
|
||||
Expect(result.EvictedCount).To(Equal(1))
|
||||
Expect(pm.getShutdownCalls()).To(ConsistOf("a"))
|
||||
})
|
||||
|
||||
It("evicts every conflicting loaded model", func() {
|
||||
wd = model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
model.WithForceEvictionWhenBusy(true),
|
||||
)
|
||||
wd.ReplaceModelGroups(map[string][]string{
|
||||
"a": {"heavy"},
|
||||
"b": {"heavy"},
|
||||
"c": {"heavy"},
|
||||
})
|
||||
wd.AddAddressModelMap("addrA", "a")
|
||||
wd.Mark("addrA")
|
||||
wd.UnMark("addrA")
|
||||
wd.AddAddressModelMap("addrB", "b")
|
||||
wd.Mark("addrB")
|
||||
wd.UnMark("addrB")
|
||||
|
||||
result := wd.EnforceGroupExclusivity("c")
|
||||
Expect(result.EvictedCount).To(Equal(2))
|
||||
Expect(pm.getShutdownCalls()).To(ConsistOf("a", "b"))
|
||||
})
|
||||
|
||||
It("skips a pinned conflicting model and reports NeedMore", func() {
|
||||
wd = model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
model.WithForceEvictionWhenBusy(true),
|
||||
)
|
||||
wd.ReplaceModelGroups(map[string][]string{
|
||||
"a": {"heavy"},
|
||||
"b": {"heavy"},
|
||||
})
|
||||
wd.SetPinnedModels([]string{"a"})
|
||||
wd.AddAddressModelMap("addrA", "a")
|
||||
wd.Mark("addrA")
|
||||
wd.UnMark("addrA")
|
||||
|
||||
result := wd.EnforceGroupExclusivity("b")
|
||||
Expect(result.EvictedCount).To(Equal(0))
|
||||
Expect(result.NeedMore).To(BeTrue())
|
||||
Expect(pm.getShutdownCalls()).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("skips a busy conflict when forceEvictionWhenBusy is false", func() {
|
||||
wd = model.NewWatchDog(model.WithProcessManager(pm))
|
||||
wd.ReplaceModelGroups(map[string][]string{
|
||||
"a": {"heavy"},
|
||||
"b": {"heavy"},
|
||||
})
|
||||
wd.AddAddressModelMap("addrA", "a")
|
||||
wd.Mark("addrA") // leave busy
|
||||
|
||||
result := wd.EnforceGroupExclusivity("b")
|
||||
Expect(result.EvictedCount).To(Equal(0))
|
||||
Expect(result.NeedMore).To(BeTrue())
|
||||
Expect(pm.getShutdownCalls()).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("evicts a busy conflict when forceEvictionWhenBusy is true", func() {
|
||||
wd = model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
model.WithForceEvictionWhenBusy(true),
|
||||
)
|
||||
wd.ReplaceModelGroups(map[string][]string{
|
||||
"a": {"heavy"},
|
||||
"b": {"heavy"},
|
||||
})
|
||||
wd.AddAddressModelMap("addrA", "a")
|
||||
wd.Mark("addrA") // leave busy
|
||||
|
||||
result := wd.EnforceGroupExclusivity("b")
|
||||
Expect(result.EvictedCount).To(Equal(1))
|
||||
Expect(result.NeedMore).To(BeFalse())
|
||||
Expect(pm.getShutdownCalls()).To(ConsistOf("a"))
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
Context("Functional Options", func() {
|
||||
It("should use default options when none provided", func() {
|
||||
wd = model.NewWatchDog(
|
||||
|
||||
Reference in New Issue
Block a user