feat(concurrency-groups): per-model exclusive groups for backend loading (#9662)

* feat(concurrency-groups): per-model exclusive groups for backend loading Adds `concurrency_groups: [...]` to model YAML configs. Two models that share a group cannot be loaded concurrently on the same node — loading one evicts the others, reusing the existing pinned/busy/retry policy from LRU eviction. Layered design: - Watchdog (pkg/model): per-node correctness floor — on every Load(), evict any loaded model that shares a group with the requested one. Pinned skips surface NeedMore so the loader retries (and ultimately logs a clear warning), instead of silently allowing the rule to be violated. - Distributed scheduler (core/services/nodes): soft anti-affinity hint — scheduleNewModel prefers nodes that don't already host a same-group model, falling back to eviction only if every candidate has a conflict. Composes with NodeSelector at the same point in the candidate pipeline. Per-node, not cluster-wide: VRAM is a node-local resource, and two heavy models running on different nodes is fine. The ConfigLoader is wired into SmartRouter via a small ConcurrencyConflictResolver interface so the nodes package keeps a narrow surface on core/config. Refactors the inner LRU eviction body into a shared collectEvictionsLocked helper and the loader retry loop into retryEnforce(fn, maxRetries, interval), so both LRU and group enforcement share busy/pinned/retry semantics. Closes #9659. Assisted-by: Claude:claude-opus-4-7 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(watchdog): sync pinned + concurrency_groups at startup The startup-time watchdog setup lives in initializeWatchdog (startup.go), not in startWatchdog (watchdog.go). The latter is only invoked from the runtime-settings RestartWatchdog path. As a result, neither SyncPinnedModelsToWatchdog nor SyncModelGroupsToWatchdog ran at boot, so `pinned: true` and `concurrency_groups: [...]` only became effective after a settings-driven watchdog restart. Fix by adding both sync calls to initializeWatchdog. Confirmed end-to-end: loading model A in group "heavy", then C with no group (coexists), then B in group "heavy" now correctly evicts A and leaves [B, C]. Assisted-by: Claude:claude-opus-4-7 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(test): satisfy errcheck on new os.Remove in concurrency_groups spec CI lint runs new-from-merge-base, so the existing pre-existing `defer os.Remove(tmp.Name())` lines are baseline-grandfathered but the one introduced by the concurrency_groups YAML round-trip test is held to errcheck. Wrap the remove in a closure that discards the error. Assisted-by: Claude:claude-opus-4-7 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-05-16 20:52:08 -04:00 · 2026-05-05 08:42:50 +02:00
parent 22ae415695
commit bbcaebc1ef
17 changed files with 981 additions and 76 deletions
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -197,6 +197,35 @@ func (ml *ModelLoader) backendLoader(opts ...Option) (client grpc.Backend, err e
 	return model.GRPC(o.parallelRequests, ml.wd), nil
 }

+// retryEnforce repeatedly invokes fn until it returns NeedMore=false or the
+// retry budget is exhausted. It sleeps `retryInterval` between attempts and
+// logs progress under `label`. Used by both LRU and group-exclusivity
+// enforcement so the busy-model wait behaviour is identical.
+func retryEnforce(fn func() EnforceLRULimitResult, maxRetries int, retryInterval time.Duration, label string) {
+	for attempt := range maxRetries {
+		result := fn()
+		if !result.NeedMore {
+			if result.EvictedCount > 0 {
+				xlog.Info("[ModelLoader] "+label+" enforcement complete", "evicted", result.EvictedCount)
+			}
+			return
+		}
+		if attempt < maxRetries-1 {
+			xlog.Info("[ModelLoader] Waiting for busy models to become idle before eviction",
+				"label", label,
+				"evicted", result.EvictedCount,
+				"attempt", attempt+1,
+				"maxRetries", maxRetries,
+				"retryIn", retryInterval)
+			time.Sleep(retryInterval)
+		} else {
+			xlog.Warn("[ModelLoader] "+label+" enforcement incomplete after max retries",
+				"evicted", result.EvictedCount,
+				"reason", "conflicts are still busy or pinned")
+		}
+	}
+}
+
 // enforceLRULimit enforces the LRU limit before loading a new model.
 // This is called before loading a model to ensure we don't exceed the limit.
 // It accounts for models that are currently being loaded by other goroutines.
@@ -206,41 +235,34 @@ func (ml *ModelLoader) enforceLRULimit() {
 		return
 	}

-	// Get the count of models currently being loaded to account for concurrent requests
 	pendingLoads := ml.GetLoadingCount()

-	// Get retry settings from ModelLoader
 	ml.mu.Lock()
 	maxRetries := ml.lruEvictionMaxRetries
 	retryInterval := ml.lruEvictionRetryInterval
 	ml.mu.Unlock()

-	for attempt := range maxRetries {
-		result := ml.wd.EnforceLRULimit(pendingLoads)
+	retryEnforce(func() EnforceLRULimitResult {
+		return ml.wd.EnforceLRULimit(pendingLoads)
+	}, maxRetries, retryInterval, "LRU")
+}

-		if !result.NeedMore {
-			// Successfully evicted enough models (or no eviction needed)
-			if result.EvictedCount > 0 {
-				xlog.Info("[ModelLoader] LRU enforcement complete", "evicted", result.EvictedCount)
-			}
-			return
-		}
-
-		// Need more evictions but models are busy - wait and retry
-		if attempt < maxRetries-1 {
-			xlog.Info("[ModelLoader] Waiting for busy models to become idle before eviction",
-				"evicted", result.EvictedCount,
-				"attempt", attempt+1,
-				"maxRetries", maxRetries,
-				"retryIn", retryInterval)
-			time.Sleep(retryInterval)
-		} else {
-			// Last attempt - log warning but proceed (might fail to load, but at least we tried)
-			xlog.Warn("[ModelLoader] LRU enforcement incomplete after max retries",
-				"evicted", result.EvictedCount,
-				"reason", "models are still busy with active API calls")
-		}
+// enforceGroupExclusivity evicts every loaded model that shares a concurrency
+// group with modelID before loading proceeds. Reuses the LRU retry settings so
+// busy conflicts wait for the same window as a busy LRU eviction.
+func (ml *ModelLoader) enforceGroupExclusivity(modelID string) {
+	if ml.wd == nil {
+		return
 	}
+
+	ml.mu.Lock()
+	maxRetries := ml.lruEvictionMaxRetries
+	retryInterval := ml.lruEvictionRetryInterval
+	ml.mu.Unlock()
+
+	retryEnforce(func() EnforceLRULimitResult {
+		return ml.wd.EnforceGroupExclusivity(modelID)
+	}, maxRetries, retryInterval, "group-exclusivity")
 }

 // updateModelLastUsed updates the last used time for a model (for LRU tracking)
@@ -270,6 +292,12 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
 		return client, nil
 	}

+	// Evict any loaded model that shares a concurrency group with the
+	// requested one before applying the global LRU cap — group eviction may
+	// already make room, and otherwise LRU might evict an unrelated model
+	// only for the group check to immediately evict another.
+	ml.enforceGroupExclusivity(o.modelID)
+
 	// Enforce LRU limit before loading a new model
 	ml.enforceLRULimit()

--- a/pkg/model/initializers_retry_test.go
+++ b/pkg/model/initializers_retry_test.go
@@ -0,0 +1,50 @@
+package model
+
+import (
+	"sync/atomic"
+	"time"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("retryEnforce", func() {
+	It("returns immediately when the first attempt is satisfied", func() {
+		var calls atomic.Int32
+		retryEnforce(func() EnforceLRULimitResult {
+			calls.Add(1)
+			return EnforceLRULimitResult{}
+		}, 5, 1*time.Millisecond, "test")
+		Expect(calls.Load()).To(Equal(int32(1)))
+	})
+
+	It("retries until NeedMore clears", func() {
+		var calls atomic.Int32
+		retryEnforce(func() EnforceLRULimitResult {
+			n := calls.Add(1)
+			if n < 3 {
+				return EnforceLRULimitResult{NeedMore: true}
+			}
+			return EnforceLRULimitResult{EvictedCount: 1}
+		}, 5, 1*time.Millisecond, "test")
+		Expect(calls.Load()).To(Equal(int32(3)))
+	})
+
+	It("stops after maxRetries when NeedMore never clears", func() {
+		var calls atomic.Int32
+		retryEnforce(func() EnforceLRULimitResult {
+			calls.Add(1)
+			return EnforceLRULimitResult{NeedMore: true}
+		}, 4, 1*time.Millisecond, "test")
+		Expect(calls.Load()).To(Equal(int32(4)))
+	})
+
+	It("treats maxRetries <= 0 as a no-op (no calls)", func() {
+		var calls atomic.Int32
+		retryEnforce(func() EnforceLRULimitResult {
+			calls.Add(1)
+			return EnforceLRULimitResult{}
+		}, 0, 1*time.Millisecond, "test")
+		Expect(calls.Load()).To(Equal(int32(0)))
+	})
+})
--- a/pkg/model/watchdog.go
+++ b/pkg/model/watchdog.go
@@ -48,6 +48,11 @@ type WatchDog struct {

 	// Pinned models are excluded from idle, LRU, and memory-pressure eviction
 	pinnedModels map[string]bool
+
+	// modelGroups maps a model name to its declared concurrency groups.
+	// Two loaded models that share at least one group cannot coexist on this
+	// node — see EnforceGroupExclusivity.
+	modelGroups map[string][]string
 }

 type ProcessManager interface {
@@ -82,6 +87,7 @@ func NewWatchDog(opts ...WatchDogOption) *WatchDog {
 		lruLimit:                 o.lruLimit,
 		addressModelMap:          make(map[string]string),
 		pinnedModels:             make(map[string]bool),
+		modelGroups:              make(map[string][]string),
 		stop:                     make(chan bool, 1),
 		done:                     make(chan bool, 1),
 		memoryReclaimerEnabled:   o.memoryReclaimerEnabled,
@@ -145,6 +151,34 @@ func (wd *WatchDog) IsModelPinned(modelName string) bool {
 	return wd.pinnedModels[modelName]
 }

+// ReplaceModelGroups replaces the per-model concurrency-group registry. The
+// supplied map is copied; callers may mutate it after the call. Passing an
+// empty or nil map clears all entries.
+func (wd *WatchDog) ReplaceModelGroups(groups map[string][]string) {
+	wd.Lock()
+	defer wd.Unlock()
+	wd.modelGroups = make(map[string][]string, len(groups))
+	for name, gs := range groups {
+		if len(gs) == 0 {
+			continue
+		}
+		wd.modelGroups[name] = slices.Clone(gs)
+	}
+}
+
+// GetModelGroups returns a copy of the concurrency groups configured for
+// the given model, or nil if the model has no groups. The result may be
+// freely mutated by the caller.
+func (wd *WatchDog) GetModelGroups(modelName string) []string {
+	wd.Lock()
+	defer wd.Unlock()
+	gs, ok := wd.modelGroups[modelName]
+	if !ok || len(gs) == 0 {
+		return nil
+	}
+	return slices.Clone(gs)
+}
+
 func (wd *WatchDog) Shutdown() {
 	wd.Lock()
 	defer wd.Unlock()
@@ -327,31 +361,8 @@ func (wd *WatchDog) EnforceLRULimit(pendingLoads int) EnforceLRULimitResult {
 	})

 	// Collect models to evict (the oldest ones)
-	var modelsToShutdown []string
-	evictedCount := 0
-	skippedBusyCount := 0
-	for i := 0; evictedCount < modelsToEvict && i < len(models); i++ {
-		m := models[i]
-		// Skip pinned models
-		if wd.pinnedModels[m.model] {
-			xlog.Debug("[WatchDog] Skipping LRU eviction for pinned model", "model", m.model)
-			continue
-		}
-		// Check if model is busy
-		_, isBusy := wd.busyTime[m.address]
-		if isBusy && !forceEvictionWhenBusy {
-			// Skip eviction for busy models when forceEvictionWhenBusy is false
-			xlog.Warn("[WatchDog] Skipping LRU eviction for busy model", "model", m.model, "reason", "model has active API calls")
-			skippedBusyCount++
-			continue
-		}
-		xlog.Info("[WatchDog] LRU evicting model", "model", m.model, "lastUsed", m.lastUsed, "busy", isBusy)
-		modelsToShutdown = append(modelsToShutdown, m.model)
-		// Clean up the maps while we have the lock
-		wd.untrack(m.address)
-		evictedCount++
-	}
-	needMore := evictedCount < modelsToEvict && skippedBusyCount > 0
+	modelsToShutdown, skippedBusyCount := wd.collectEvictionsLocked(models, modelsToEvict, forceEvictionWhenBusy)
+	needMore := len(modelsToShutdown) < modelsToEvict && skippedBusyCount > 0
 	wd.Unlock()

 	// Now shutdown models without holding the watchdog lock to prevent deadlock
@@ -363,7 +374,7 @@ func (wd *WatchDog) EnforceLRULimit(pendingLoads int) EnforceLRULimitResult {
 	}

 	if needMore {
-		xlog.Warn("[WatchDog] LRU eviction incomplete", "evicted", evictedCount, "needed", modelsToEvict, "skippedBusy", skippedBusyCount, "reason", "some models are busy with active API calls")
+		xlog.Warn("[WatchDog] LRU eviction incomplete", "evicted", len(modelsToShutdown), "needed", modelsToEvict, "skippedBusy", skippedBusyCount, "reason", "some models are busy with active API calls")
 	}

 	return EnforceLRULimitResult{
@@ -372,6 +383,110 @@ func (wd *WatchDog) EnforceLRULimit(pendingLoads int) EnforceLRULimitResult {
 	}
 }

+// collectEvictionsLocked walks `candidates` (already in eviction order) and
+// untracks up to `maxToEvict` models that are eligible for eviction. Pinned
+// models are always skipped; busy models are skipped unless `force` is true.
+// Returns the names of evicted models and the number skipped because they
+// were busy. Must be called with wd.Lock() held.
+func (wd *WatchDog) collectEvictionsLocked(candidates []modelUsageInfo, maxToEvict int, force bool) (evicted []string, skippedBusy int) {
+	for i := 0; len(evicted) < maxToEvict && i < len(candidates); i++ {
+		m := candidates[i]
+		if wd.pinnedModels[m.model] {
+			xlog.Debug("[WatchDog] Skipping eviction for pinned model", "model", m.model)
+			continue
+		}
+		_, isBusy := wd.busyTime[m.address]
+		if isBusy && !force {
+			xlog.Warn("[WatchDog] Skipping eviction for busy model", "model", m.model, "reason", "model has active API calls")
+			skippedBusy++
+			continue
+		}
+		xlog.Info("[WatchDog] evicting model", "model", m.model, "busy", isBusy)
+		evicted = append(evicted, m.model)
+		wd.untrack(m.address)
+	}
+	return evicted, skippedBusy
+}
+
+// EnforceGroupExclusivity evicts every loaded model that shares at least one
+// concurrency group with the requested model. The pinned/busy/retry semantics
+// match EnforceLRULimit so the loader's retry loop can stay generic.
+func (wd *WatchDog) EnforceGroupExclusivity(requestedModel string) EnforceLRULimitResult {
+	wd.Lock()
+
+	requestedGroups := wd.modelGroups[requestedModel]
+	if len(requestedGroups) == 0 {
+		wd.Unlock()
+		return EnforceLRULimitResult{}
+	}
+
+	forceEvictionWhenBusy := wd.forceEvictionWhenBusy
+
+	// Build the conflict candidate list: every loaded model whose groups
+	// overlap with requestedGroups. Order doesn't affect correctness, but
+	// sort by lastUsed (oldest first) so logs and behaviour are deterministic.
+	var conflicts []modelUsageInfo
+	for address, name := range wd.addressModelMap {
+		if name == requestedModel {
+			continue
+		}
+		if !groupsOverlap(requestedGroups, wd.modelGroups[name]) {
+			continue
+		}
+		conflicts = append(conflicts, modelUsageInfo{
+			address:  address,
+			model:    name,
+			lastUsed: wd.lastUsed[address],
+		})
+	}
+	if len(conflicts) == 0 {
+		wd.Unlock()
+		return EnforceLRULimitResult{}
+	}
+	slices.SortFunc(conflicts, func(a, b modelUsageInfo) int {
+		return a.lastUsed.Compare(b.lastUsed)
+	})
+
+	xlog.Debug("[WatchDog] Group exclusivity triggered", "requested", requestedModel, "groups", requestedGroups, "conflicts", len(conflicts))
+
+	modelsToShutdown, skippedBusyCount := wd.collectEvictionsLocked(conflicts, len(conflicts), forceEvictionWhenBusy)
+	// For groups any unresolved conflict matters — busy *or* pinned. The loader
+	// retries on NeedMore; pinned cases will eventually time out and the load
+	// proceeds with a visible warning, which is the right signal for what is a
+	// configuration mismatch.
+	needMore := len(modelsToShutdown) < len(conflicts)
+	wd.Unlock()
+
+	for _, m := range modelsToShutdown {
+		if err := wd.pm.ShutdownModel(m); err != nil {
+			xlog.Error("[WatchDog] error shutting down model during group eviction", "error", err, "model", m)
+		}
+		xlog.Debug("[WatchDog] Group eviction complete", "model", m)
+	}
+
+	if needMore {
+		xlog.Warn("[WatchDog] Group eviction incomplete", "requested", requestedModel, "evicted", len(modelsToShutdown), "needed", len(conflicts), "skippedBusy", skippedBusyCount, "reason", "some conflicts are busy or pinned")
+	}
+
+	return EnforceLRULimitResult{
+		EvictedCount: len(modelsToShutdown),
+		NeedMore:     needMore,
+	}
+}
+
+// groupsOverlap reports whether the two group lists share any name.
+func groupsOverlap(a, b []string) bool {
+	if len(a) == 0 || len(b) == 0 {
+		return false
+	}
+	for _, x := range a {
+		if slices.Contains(b, x) {
+			return true
+		}
+	}
+	return false
+}
+
 func (wd *WatchDog) Run() {
 	xlog.Info("[WatchDog] starting watchdog")

--- a/pkg/model/watchdog_test.go
+++ b/pkg/model/watchdog_test.go
@@ -666,6 +666,182 @@ var _ = Describe("WatchDog", func() {
 		})
 	})

+	Context("Concurrency Groups", func() {
+		Describe("ReplaceModelGroups / GetModelGroups", func() {
+			It("returns nil for unknown models", func() {
+				wd = model.NewWatchDog(model.WithProcessManager(pm))
+				Expect(wd.GetModelGroups("nope")).To(BeNil())
+			})
+
+			It("stores and retrieves groups", func() {
+				wd = model.NewWatchDog(model.WithProcessManager(pm))
+				wd.ReplaceModelGroups(map[string][]string{
+					"a": {"heavy", "vision"},
+					"b": {"heavy"},
+				})
+				Expect(wd.GetModelGroups("a")).To(Equal([]string{"heavy", "vision"}))
+				Expect(wd.GetModelGroups("b")).To(Equal([]string{"heavy"}))
+				Expect(wd.GetModelGroups("c")).To(BeNil())
+			})
+
+			It("replaces previous state on subsequent calls", func() {
+				wd = model.NewWatchDog(model.WithProcessManager(pm))
+				wd.ReplaceModelGroups(map[string][]string{"a": {"heavy"}})
+				wd.ReplaceModelGroups(map[string][]string{"b": {"vision"}})
+				Expect(wd.GetModelGroups("a")).To(BeNil())
+				Expect(wd.GetModelGroups("b")).To(Equal([]string{"vision"}))
+			})
+
+			It("clears state when called with an empty map", func() {
+				wd = model.NewWatchDog(model.WithProcessManager(pm))
+				wd.ReplaceModelGroups(map[string][]string{"a": {"heavy"}})
+				wd.ReplaceModelGroups(nil)
+				Expect(wd.GetModelGroups("a")).To(BeNil())
+			})
+
+			It("returns a defensive copy", func() {
+				wd = model.NewWatchDog(model.WithProcessManager(pm))
+				wd.ReplaceModelGroups(map[string][]string{"a": {"heavy"}})
+				got := wd.GetModelGroups("a")
+				got[0] = "tampered"
+				Expect(wd.GetModelGroups("a")).To(Equal([]string{"heavy"}))
+			})
+		})
+
+		Describe("EnforceGroupExclusivity", func() {
+			It("is a no-op when the requested model has no groups", func() {
+				wd = model.NewWatchDog(model.WithProcessManager(pm))
+				wd.AddAddressModelMap("addr1", "model1")
+				wd.AddAddressModelMap("addr2", "model2")
+
+				result := wd.EnforceGroupExclusivity("requested")
+				Expect(result.EvictedCount).To(Equal(0))
+				Expect(result.NeedMore).To(BeFalse())
+				Expect(pm.getShutdownCalls()).To(BeEmpty())
+			})
+
+			It("is a no-op when no loaded model shares a group", func() {
+				wd = model.NewWatchDog(model.WithProcessManager(pm))
+				wd.ReplaceModelGroups(map[string][]string{
+					"loaded":    {"vision"},
+					"requested": {"heavy"},
+				})
+				wd.AddAddressModelMap("addr1", "loaded")
+
+				result := wd.EnforceGroupExclusivity("requested")
+				Expect(result.EvictedCount).To(Equal(0))
+				Expect(result.NeedMore).To(BeFalse())
+				Expect(pm.getShutdownCalls()).To(BeEmpty())
+			})
+
+			It("evicts a loaded model that shares a single group", func() {
+				wd = model.NewWatchDog(model.WithProcessManager(pm))
+				wd.ReplaceModelGroups(map[string][]string{
+					"a": {"heavy"},
+					"b": {"heavy"},
+				})
+				wd.AddAddressModelMap("addrA", "a")
+				wd.Mark("addrA")
+				wd.UnMark("addrA")
+
+				result := wd.EnforceGroupExclusivity("b")
+				Expect(result.EvictedCount).To(Equal(1))
+				Expect(result.NeedMore).To(BeFalse())
+				Expect(pm.getShutdownCalls()).To(ConsistOf("a"))
+			})
+
+			It("evicts when groups overlap on any single name", func() {
+				wd = model.NewWatchDog(model.WithProcessManager(pm))
+				wd.ReplaceModelGroups(map[string][]string{
+					"a": {"x", "y"},
+					"b": {"y", "z"},
+				})
+				wd.AddAddressModelMap("addrA", "a")
+				wd.Mark("addrA")
+				wd.UnMark("addrA")
+
+				result := wd.EnforceGroupExclusivity("b")
+				Expect(result.EvictedCount).To(Equal(1))
+				Expect(pm.getShutdownCalls()).To(ConsistOf("a"))
+			})
+
+			It("evicts every conflicting loaded model", func() {
+				wd = model.NewWatchDog(
+					model.WithProcessManager(pm),
+					model.WithForceEvictionWhenBusy(true),
+				)
+				wd.ReplaceModelGroups(map[string][]string{
+					"a": {"heavy"},
+					"b": {"heavy"},
+					"c": {"heavy"},
+				})
+				wd.AddAddressModelMap("addrA", "a")
+				wd.Mark("addrA")
+				wd.UnMark("addrA")
+				wd.AddAddressModelMap("addrB", "b")
+				wd.Mark("addrB")
+				wd.UnMark("addrB")
+
+				result := wd.EnforceGroupExclusivity("c")
+				Expect(result.EvictedCount).To(Equal(2))
+				Expect(pm.getShutdownCalls()).To(ConsistOf("a", "b"))
+			})
+
+			It("skips a pinned conflicting model and reports NeedMore", func() {
+				wd = model.NewWatchDog(
+					model.WithProcessManager(pm),
+					model.WithForceEvictionWhenBusy(true),
+				)
+				wd.ReplaceModelGroups(map[string][]string{
+					"a": {"heavy"},
+					"b": {"heavy"},
+				})
+				wd.SetPinnedModels([]string{"a"})
+				wd.AddAddressModelMap("addrA", "a")
+				wd.Mark("addrA")
+				wd.UnMark("addrA")
+
+				result := wd.EnforceGroupExclusivity("b")
+				Expect(result.EvictedCount).To(Equal(0))
+				Expect(result.NeedMore).To(BeTrue())
+				Expect(pm.getShutdownCalls()).To(BeEmpty())
+			})
+
+			It("skips a busy conflict when forceEvictionWhenBusy is false", func() {
+				wd = model.NewWatchDog(model.WithProcessManager(pm))
+				wd.ReplaceModelGroups(map[string][]string{
+					"a": {"heavy"},
+					"b": {"heavy"},
+				})
+				wd.AddAddressModelMap("addrA", "a")
+				wd.Mark("addrA") // leave busy
+
+				result := wd.EnforceGroupExclusivity("b")
+				Expect(result.EvictedCount).To(Equal(0))
+				Expect(result.NeedMore).To(BeTrue())
+				Expect(pm.getShutdownCalls()).To(BeEmpty())
+			})
+
+			It("evicts a busy conflict when forceEvictionWhenBusy is true", func() {
+				wd = model.NewWatchDog(
+					model.WithProcessManager(pm),
+					model.WithForceEvictionWhenBusy(true),
+				)
+				wd.ReplaceModelGroups(map[string][]string{
+					"a": {"heavy"},
+					"b": {"heavy"},
+				})
+				wd.AddAddressModelMap("addrA", "a")
+				wd.Mark("addrA") // leave busy
+
+				result := wd.EnforceGroupExclusivity("b")
+				Expect(result.EvictedCount).To(Equal(1))
+				Expect(result.NeedMore).To(BeFalse())
+				Expect(pm.getShutdownCalls()).To(ConsistOf("a"))
+			})
+		})
+	})
+
 	Context("Functional Options", func() {
 		It("should use default options when none provided", func() {
 			wd = model.NewWatchDog(