feat: disable force eviction (#7725)

* feat: allow to set forcing backends eviction while requests are in flight

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat: try to make the request sit and retry if eviction couldn't be done

Otherwise calls that in order to pass would need to shutdown other
backends would just fail.

In this way instead we make the request sit and retry eviction until it
succeeds. The thresholds can be configured by the user.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* add tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* expose settings to CLI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Update docs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2025-12-25 14:26:18 +01:00
committed by GitHub
parent bb459e671f
commit c844b7ac58
18 changed files with 739 additions and 41 deletions

View File

@@ -9,8 +9,8 @@ import (
"time"
grpc "github.com/mudler/LocalAI/pkg/grpc"
"github.com/phayes/freeport"
"github.com/mudler/xlog"
"github.com/phayes/freeport"
)
const (
@@ -173,7 +173,7 @@ func (ml *ModelLoader) backendLoader(opts ...Option) (client grpc.Backend, err e
model, err := ml.LoadModel(o.modelID, o.model, ml.grpcModel(backend, o))
if err != nil {
if stopErr := ml.StopGRPC(only(o.modelID));stopErr != nil {
if stopErr := ml.StopGRPC(only(o.modelID)); stopErr != nil {
xlog.Error("error stopping model", "error", stopErr, "model", o.modelID)
}
xlog.Error("Failed to load model", "modelID", o.modelID, "error", err, "backend", o.backendString)
@@ -186,13 +186,47 @@ func (ml *ModelLoader) backendLoader(opts ...Option) (client grpc.Backend, err e
// enforceLRULimit enforces the LRU limit before loading a new model.
// This is called before loading a model to ensure we don't exceed the limit.
// It accounts for models that are currently being loaded by other goroutines.
// If models are busy and can't be evicted, it will wait and retry until space is available.
func (ml *ModelLoader) enforceLRULimit() {
if ml.wd == nil {
return
}
// Get the count of models currently being loaded to account for concurrent requests
pendingLoads := ml.GetLoadingCount()
ml.wd.EnforceLRULimit(pendingLoads)
// Get retry settings from ModelLoader
ml.mu.Lock()
maxRetries := ml.lruEvictionMaxRetries
retryInterval := ml.lruEvictionRetryInterval
ml.mu.Unlock()
for attempt := 0; attempt < maxRetries; attempt++ {
result := ml.wd.EnforceLRULimit(pendingLoads)
if !result.NeedMore {
// Successfully evicted enough models (or no eviction needed)
if result.EvictedCount > 0 {
xlog.Info("[ModelLoader] LRU enforcement complete", "evicted", result.EvictedCount)
}
return
}
// Need more evictions but models are busy - wait and retry
if attempt < maxRetries-1 {
xlog.Info("[ModelLoader] Waiting for busy models to become idle before eviction",
"evicted", result.EvictedCount,
"attempt", attempt+1,
"maxRetries", maxRetries,
"retryIn", retryInterval)
time.Sleep(retryInterval)
} else {
// Last attempt - log warning but proceed (might fail to load, but at least we tried)
xlog.Warn("[ModelLoader] LRU enforcement incomplete after max retries",
"evicted", result.EvictedCount,
"reason", "models are still busy with active API calls")
}
}
}
// updateModelLastUsed updates the last used time for a model (for LRU tracking)

View File

@@ -20,22 +20,26 @@ import (
// TODO: Split ModelLoader and TemplateLoader? Just to keep things more organized. Left together to share a mutex until I look into that. Would split if we separate directories for .bin/.yaml and .tmpl
type ModelLoader struct {
ModelPath string
mu sync.Mutex
models map[string]*Model
loading map[string]chan struct{} // tracks models currently being loaded
wd *WatchDog
externalBackends map[string]string
ModelPath string
mu sync.Mutex
models map[string]*Model
loading map[string]chan struct{} // tracks models currently being loaded
wd *WatchDog
externalBackends map[string]string
lruEvictionMaxRetries int // Maximum number of retries when waiting for busy models
lruEvictionRetryInterval time.Duration // Interval between retries when waiting for busy models
}
// NewModelLoader creates a new ModelLoader instance.
// LRU eviction is now managed through the WatchDog component.
func NewModelLoader(system *system.SystemState) *ModelLoader {
nml := &ModelLoader{
ModelPath: system.Model.ModelsPath,
models: make(map[string]*Model),
loading: make(map[string]chan struct{}),
externalBackends: make(map[string]string),
ModelPath: system.Model.ModelsPath,
models: make(map[string]*Model),
loading: make(map[string]chan struct{}),
externalBackends: make(map[string]string),
lruEvictionMaxRetries: 30, // Default: 30 retries
lruEvictionRetryInterval: 1 * time.Second, // Default: 1 second
}
return nml
@@ -56,6 +60,14 @@ func (ml *ModelLoader) GetWatchDog() *WatchDog {
return ml.wd
}
// SetLRUEvictionRetrySettings updates the LRU eviction retry settings
func (ml *ModelLoader) SetLRUEvictionRetrySettings(maxRetries int, retryInterval time.Duration) {
ml.mu.Lock()
defer ml.mu.Unlock()
ml.lruEvictionMaxRetries = maxRetries
ml.lruEvictionRetryInterval = retryInterval
}
func (ml *ModelLoader) ExistsInModelPath(s string) bool {
return utils.ExistsInPath(ml.ModelPath, s)
}

View File

@@ -262,4 +262,13 @@ var _ = Describe("ModelLoader", func() {
Expect(modelLoader.GetLoadingCount()).To(Equal(0))
})
})
Context("LRU Eviction Retry Settings", func() {
It("should allow updating retry settings", func() {
modelLoader.SetLRUEvictionRetrySettings(50, 2*time.Second)
// Settings are updated - we can verify through behavior if needed
// For now, just verify the call doesn't panic
Expect(modelLoader).ToNot(BeNil())
})
})
})

View File

@@ -41,6 +41,9 @@ type WatchDog struct {
memoryReclaimerEnabled bool // Enable memory threshold monitoring
memoryReclaimerThreshold float64 // Threshold 0.0-1.0 (e.g., 0.95 = 95%)
watchdogInterval time.Duration
// Eviction settings
forceEvictionWhenBusy bool // Force eviction even when models have active API calls (default: false for safety)
}
type ProcessManager interface {
@@ -78,6 +81,7 @@ func NewWatchDog(opts ...WatchDogOption) *WatchDog {
memoryReclaimerEnabled: o.memoryReclaimerEnabled,
memoryReclaimerThreshold: o.memoryReclaimerThreshold,
watchdogInterval: o.watchdogInterval,
forceEvictionWhenBusy: o.forceEvictionWhenBusy,
}
}
@@ -110,6 +114,13 @@ func (wd *WatchDog) GetMemoryReclaimerSettings() (enabled bool, threshold float6
return wd.memoryReclaimerEnabled, wd.memoryReclaimerThreshold
}
// SetForceEvictionWhenBusy updates the force eviction when busy setting dynamically
func (wd *WatchDog) SetForceEvictionWhenBusy(force bool) {
wd.Lock()
defer wd.Unlock()
wd.forceEvictionWhenBusy = force
}
func (wd *WatchDog) Shutdown() {
wd.Lock()
defer wd.Unlock()
@@ -169,13 +180,19 @@ type modelUsageInfo struct {
lastUsed time.Time
}
// EnforceLRULimitResult contains the result of LRU enforcement
type EnforceLRULimitResult struct {
EvictedCount int // Number of models successfully evicted
NeedMore bool // True if more evictions are needed but couldn't be done (e.g., all models are busy)
}
// EnforceLRULimit ensures we're under the LRU limit by evicting least recently used models.
// This should be called before loading a new model.
// pendingLoads is the number of models currently being loaded (to account for concurrent loads).
// Returns the number of models evicted.
func (wd *WatchDog) EnforceLRULimit(pendingLoads int) int {
// Returns the result containing evicted count and whether more evictions are needed.
func (wd *WatchDog) EnforceLRULimit(pendingLoads int) EnforceLRULimitResult {
if wd.lruLimit <= 0 {
return 0 // LRU disabled
return EnforceLRULimitResult{EvictedCount: 0, NeedMore: false} // LRU disabled
}
wd.Lock()
@@ -186,9 +203,10 @@ func (wd *WatchDog) EnforceLRULimit(pendingLoads int) int {
// We need: currentCount + pendingLoads + 1 <= lruLimit
// So evict: currentCount + pendingLoads + 1 - lruLimit = currentCount - lruLimit + pendingLoads + 1
modelsToEvict := currentCount - wd.lruLimit + pendingLoads + 1
forceEvictionWhenBusy := wd.forceEvictionWhenBusy
if modelsToEvict <= 0 {
wd.Unlock()
return 0
return EnforceLRULimitResult{EvictedCount: 0, NeedMore: false}
}
xlog.Debug("[WatchDog] LRU enforcement triggered", "current", currentCount, "pendingLoads", pendingLoads, "limit", wd.lruLimit, "toEvict", modelsToEvict)
@@ -215,13 +233,25 @@ func (wd *WatchDog) EnforceLRULimit(pendingLoads int) int {
// Collect models to evict (the oldest ones)
var modelsToShutdown []string
for i := 0; i < modelsToEvict && i < len(models); i++ {
evictedCount := 0
skippedBusyCount := 0
for i := 0; evictedCount < modelsToEvict && i < len(models); i++ {
m := models[i]
xlog.Info("[WatchDog] LRU evicting model", "model", m.model, "lastUsed", m.lastUsed)
// Check if model is busy
_, isBusy := wd.busyTime[m.address]
if isBusy && !forceEvictionWhenBusy {
// Skip eviction for busy models when forceEvictionWhenBusy is false
xlog.Warn("[WatchDog] Skipping LRU eviction for busy model", "model", m.model, "reason", "model has active API calls")
skippedBusyCount++
continue
}
xlog.Info("[WatchDog] LRU evicting model", "model", m.model, "lastUsed", m.lastUsed, "busy", isBusy)
modelsToShutdown = append(modelsToShutdown, m.model)
// Clean up the maps while we have the lock
wd.untrack(m.address)
evictedCount++
}
needMore := evictedCount < modelsToEvict && skippedBusyCount > 0
wd.Unlock()
// Now shutdown models without holding the watchdog lock to prevent deadlock
@@ -232,7 +262,14 @@ func (wd *WatchDog) EnforceLRULimit(pendingLoads int) int {
xlog.Debug("[WatchDog] LRU eviction complete", "model", model)
}
return len(modelsToShutdown)
if needMore {
xlog.Warn("[WatchDog] LRU eviction incomplete", "evicted", evictedCount, "needed", modelsToEvict, "skippedBusy", skippedBusyCount, "reason", "some models are busy with active API calls")
}
return EnforceLRULimitResult{
EvictedCount: len(modelsToShutdown),
NeedMore: needMore,
}
}
func (wd *WatchDog) Run() {
@@ -376,6 +413,8 @@ func (wd *WatchDog) evictLRUModel() {
return
}
forceEvictionWhenBusy := wd.forceEvictionWhenBusy
// Build a list of models sorted by last used time (oldest first)
var models []modelUsageInfo
for address, model := range wd.addressModelMap {
@@ -400,8 +439,27 @@ func (wd *WatchDog) evictLRUModel() {
return models[i].lastUsed.Before(models[j].lastUsed)
})
// Get the LRU model
lruModel := models[0]
// Find the first non-busy model (or first model if forceEvictionWhenBusy is true)
var lruModel *modelUsageInfo
for i := 0; i < len(models); i++ {
m := models[i]
_, isBusy := wd.busyTime[m.address]
if isBusy && !forceEvictionWhenBusy {
// Skip busy models when forceEvictionWhenBusy is false
xlog.Warn("[WatchDog] Skipping memory reclaimer eviction for busy model", "model", m.model, "reason", "model has active API calls")
continue
}
lruModel = &m
break
}
if lruModel == nil {
// All models are busy and forceEvictionWhenBusy is false
wd.Unlock()
xlog.Warn("[WatchDog] Memory reclaimer cannot evict: all models are busy with active API calls")
return
}
xlog.Info("[WatchDog] Memory reclaimer evicting LRU model", "model", lruModel.model, "lastUsed", lruModel.lastUsed)
// Untrack the model

View File

@@ -28,6 +28,9 @@ type WatchDogOptions struct {
// Memory reclaimer settings (works with GPU if available, otherwise RAM)
memoryReclaimerEnabled bool // Enable memory threshold monitoring
memoryReclaimerThreshold float64 // Threshold 0.0-1.0 (e.g., 0.95 = 95%)
// Eviction settings
forceEvictionWhenBusy bool // Force eviction even when models have active API calls (default: false for safety)
}
// WatchDogOption is a function that configures WatchDogOptions
@@ -105,6 +108,14 @@ func WithMemoryReclaimerThreshold(threshold float64) WatchDogOption {
}
}
// WithForceEvictionWhenBusy sets whether to force eviction even when models have active API calls
// Default: false (skip eviction when busy for safety)
func WithForceEvictionWhenBusy(force bool) WatchDogOption {
return func(o *WatchDogOptions) {
o.forceEvictionWhenBusy = force
}
}
// DefaultWatchDogOptions returns default options for the watchdog
func DefaultWatchDogOptions() *WatchDogOptions {
return &WatchDogOptions{
@@ -116,6 +127,7 @@ func DefaultWatchDogOptions() *WatchDogOptions {
lruLimit: 0,
memoryReclaimerEnabled: false,
memoryReclaimerThreshold: DefaultMemoryReclaimerThreshold,
forceEvictionWhenBusy: false, // Default: skip eviction when busy for safety
}
}

View File

@@ -170,15 +170,18 @@ var _ = Describe("WatchDog", func() {
model.WithBusyTimeout(5*time.Minute),
model.WithIdleTimeout(15*time.Minute),
model.WithLRULimit(2),
model.WithForceEvictionWhenBusy(true), // Enable force eviction for these tests to match old behavior
)
})
It("should not evict when under limit", func() {
wd.AddAddressModelMap("addr1", "model1")
wd.Mark("addr1")
wd.UnMark("addr1") // Unmark to make it idle (not busy)
evicted := wd.EnforceLRULimit(0)
Expect(evicted).To(Equal(0))
result := wd.EnforceLRULimit(0)
Expect(result.EvictedCount).To(Equal(0))
Expect(result.NeedMore).To(BeFalse())
Expect(pm.getShutdownCalls()).To(BeEmpty())
})
@@ -186,14 +189,17 @@ var _ = Describe("WatchDog", func() {
// Add two models
wd.AddAddressModelMap("addr1", "model1")
wd.Mark("addr1")
wd.UnMark("addr1") // Unmark to make it idle
time.Sleep(10 * time.Millisecond)
wd.AddAddressModelMap("addr2", "model2")
wd.Mark("addr2")
wd.UnMark("addr2") // Unmark to make it idle
// Enforce LRU with limit of 2 (need to make room for 1 new model)
evicted := wd.EnforceLRULimit(0)
Expect(evicted).To(Equal(1))
result := wd.EnforceLRULimit(0)
Expect(result.EvictedCount).To(Equal(1))
Expect(result.NeedMore).To(BeFalse())
Expect(pm.getShutdownCalls()).To(ContainElement("model1")) // oldest should be evicted
})
@@ -201,19 +207,23 @@ var _ = Describe("WatchDog", func() {
// Add three models
wd.AddAddressModelMap("addr1", "model1")
wd.Mark("addr1")
wd.UnMark("addr1") // Unmark to make it idle
time.Sleep(10 * time.Millisecond)
wd.AddAddressModelMap("addr2", "model2")
wd.Mark("addr2")
wd.UnMark("addr2") // Unmark to make it idle
time.Sleep(10 * time.Millisecond)
wd.AddAddressModelMap("addr3", "model3")
wd.Mark("addr3")
wd.UnMark("addr3") // Unmark to make it idle
// Set limit to 1, should evict 2 oldest + 1 for new = 3 evictions
wd.SetLRULimit(1)
evicted := wd.EnforceLRULimit(0)
Expect(evicted).To(Equal(3))
result := wd.EnforceLRULimit(0)
Expect(result.EvictedCount).To(Equal(3))
Expect(result.NeedMore).To(BeFalse())
shutdowns := pm.getShutdownCalls()
Expect(shutdowns).To(ContainElement("model1"))
Expect(shutdowns).To(ContainElement("model2"))
@@ -224,15 +234,18 @@ var _ = Describe("WatchDog", func() {
// Add two models (at limit)
wd.AddAddressModelMap("addr1", "model1")
wd.Mark("addr1")
wd.UnMark("addr1") // Unmark to make it idle
time.Sleep(10 * time.Millisecond)
wd.AddAddressModelMap("addr2", "model2")
wd.Mark("addr2")
wd.UnMark("addr2") // Unmark to make it idle
// With 1 pending load, we need to evict 2 (current=2, pending=1, new=1, limit=2)
// total after = 2 + 1 + 1 = 4, need to evict 4 - 2 = 2
evicted := wd.EnforceLRULimit(1)
Expect(evicted).To(Equal(2))
result := wd.EnforceLRULimit(1)
Expect(result.EvictedCount).To(Equal(2))
Expect(result.NeedMore).To(BeFalse())
})
It("should not evict when LRU is disabled", func() {
@@ -242,8 +255,9 @@ var _ = Describe("WatchDog", func() {
wd.AddAddressModelMap("addr2", "model2")
wd.AddAddressModelMap("addr3", "model3")
evicted := wd.EnforceLRULimit(0)
Expect(evicted).To(Equal(0))
result := wd.EnforceLRULimit(0)
Expect(result.EvictedCount).To(Equal(0))
Expect(result.NeedMore).To(BeFalse())
Expect(pm.getShutdownCalls()).To(BeEmpty())
})
@@ -253,10 +267,12 @@ var _ = Describe("WatchDog", func() {
// Add models with different lastUsed times
wd.AddAddressModelMap("addr1", "model1")
wd.Mark("addr1")
wd.UnMark("addr1") // Unmark to make it idle
time.Sleep(20 * time.Millisecond)
wd.AddAddressModelMap("addr2", "model2")
wd.Mark("addr2")
wd.UnMark("addr2") // Unmark to make it idle
time.Sleep(20 * time.Millisecond)
// Touch model1 again to make it more recent
@@ -265,10 +281,12 @@ var _ = Describe("WatchDog", func() {
wd.AddAddressModelMap("addr3", "model3")
wd.Mark("addr3")
wd.UnMark("addr3") // Unmark to make it idle
// Now model2 is the oldest, should be evicted first
evicted := wd.EnforceLRULimit(0)
Expect(evicted).To(BeNumerically(">=", 1))
result := wd.EnforceLRULimit(0)
Expect(result.EvictedCount).To(BeNumerically(">=", 1))
Expect(result.NeedMore).To(BeFalse())
shutdowns := pm.getShutdownCalls()
// model2 should be evicted first (it's the oldest)
@@ -285,16 +303,19 @@ var _ = Describe("WatchDog", func() {
model.WithBusyTimeout(5*time.Minute),
model.WithIdleTimeout(15*time.Minute),
model.WithLRULimit(1),
model.WithForceEvictionWhenBusy(true), // Enable force eviction for these tests
)
})
It("should evict existing model when loading new one", func() {
wd.AddAddressModelMap("addr1", "model1")
wd.Mark("addr1")
wd.UnMark("addr1") // Unmark to make it idle
// With limit=1, loading a new model should evict the existing one
evicted := wd.EnforceLRULimit(0)
Expect(evicted).To(Equal(1))
result := wd.EnforceLRULimit(0)
Expect(result.EvictedCount).To(Equal(1))
Expect(result.NeedMore).To(BeFalse())
Expect(pm.getShutdownCalls()).To(ContainElement("model1"))
})
@@ -302,6 +323,7 @@ var _ = Describe("WatchDog", func() {
for i := 0; i < 5; i++ {
wd.AddAddressModelMap("addr", "model")
wd.Mark("addr")
wd.UnMark("addr") // Unmark to make it idle
wd.EnforceLRULimit(0)
}
// All previous models should have been evicted
@@ -309,6 +331,233 @@ var _ = Describe("WatchDog", func() {
})
})
Context("Force Eviction When Busy", func() {
BeforeEach(func() {
wd = model.NewWatchDog(
model.WithProcessManager(pm),
model.WithLRULimit(2),
model.WithForceEvictionWhenBusy(false), // Default: skip eviction when busy
)
})
It("should skip eviction for busy models when forceEvictionWhenBusy is false", func() {
// Add two models (at limit of 2, need to evict 1 for new model)
wd.AddAddressModelMap("addr1", "model1")
wd.Mark("addr1")
time.Sleep(10 * time.Millisecond)
wd.AddAddressModelMap("addr2", "model2")
wd.Mark("addr2")
wd.UnMark("addr2") // Make model2 idle
// Keep model1 as busy (simulating active API call)
// model1 is already marked as busy from the first Mark call
// Try to enforce LRU - should skip busy model1, evict model2
result := wd.EnforceLRULimit(0)
// Should evict model2 (not busy) but skip model1 (busy)
// Since we evicted 1 (which is what we needed), NeedMore should be false
Expect(result.EvictedCount).To(Equal(1))
Expect(result.NeedMore).To(BeFalse()) // We evicted enough, even though we skipped model1
Expect(pm.getShutdownCalls()).To(ContainElement("model2"))
Expect(pm.getShutdownCalls()).ToNot(ContainElement("model1"))
})
It("should evict busy models when forceEvictionWhenBusy is true", func() {
wd.SetForceEvictionWhenBusy(true)
// Add two models
wd.AddAddressModelMap("addr1", "model1")
wd.Mark("addr1")
time.Sleep(10 * time.Millisecond)
wd.AddAddressModelMap("addr2", "model2")
wd.Mark("addr2")
// Keep model1 as busy (already marked from first Mark call)
// Try to enforce LRU - should evict model1 even though busy
result := wd.EnforceLRULimit(0)
Expect(result.EvictedCount).To(Equal(1))
Expect(result.NeedMore).To(BeFalse())
Expect(pm.getShutdownCalls()).To(ContainElement("model1"))
})
It("should set NeedMore when all models are busy and forceEvictionWhenBusy is false", func() {
// Add two models
wd.AddAddressModelMap("addr1", "model1")
wd.Mark("addr1")
time.Sleep(10 * time.Millisecond)
wd.AddAddressModelMap("addr2", "model2")
wd.Mark("addr2")
// Mark both as busy
wd.Mark("addr1")
wd.Mark("addr2")
// Try to enforce LRU - should skip both busy models
result := wd.EnforceLRULimit(0)
Expect(result.EvictedCount).To(Equal(0))
Expect(result.NeedMore).To(BeTrue())
Expect(pm.getShutdownCalls()).To(BeEmpty())
})
It("should allow updating forceEvictionWhenBusy dynamically", func() {
// Start with false
Expect(wd).ToNot(BeNil())
// Add models
wd.AddAddressModelMap("addr1", "model1")
wd.Mark("addr1")
time.Sleep(10 * time.Millisecond)
wd.AddAddressModelMap("addr2", "model2")
wd.Mark("addr2")
wd.UnMark("addr2") // Make model2 idle
// Keep model1 busy (already marked)
// With forceEvictionWhenBusy=false, should skip busy model1, evict model2
result := wd.EnforceLRULimit(0)
Expect(result.NeedMore).To(BeFalse()) // We evicted enough (1 model)
Expect(result.EvictedCount).To(Equal(1)) // Should evict model2 (not busy)
// Now enable force eviction
wd.SetForceEvictionWhenBusy(true)
// Add models again
wd.AddAddressModelMap("addr1", "model1")
wd.Mark("addr1")
time.Sleep(10 * time.Millisecond)
wd.AddAddressModelMap("addr2", "model2")
wd.Mark("addr2")
// Keep model1 busy (already marked)
// With forceEvictionWhenBusy=true, should evict busy model1
result = wd.EnforceLRULimit(0)
Expect(result.NeedMore).To(BeFalse())
Expect(result.EvictedCount).To(Equal(1))
})
It("should continue to next LRU model when busy model is skipped", func() {
// Add three models
wd.AddAddressModelMap("addr1", "model1")
wd.Mark("addr1")
time.Sleep(10 * time.Millisecond)
wd.AddAddressModelMap("addr2", "model2")
wd.Mark("addr2")
wd.UnMark("addr2") // Make model2 idle
time.Sleep(10 * time.Millisecond)
wd.AddAddressModelMap("addr3", "model3")
wd.Mark("addr3")
wd.UnMark("addr3") // Make model3 idle
// Keep model1 as busy (oldest, already marked)
// Need to evict 2 models (limit=2, current=3, need room for 1 new)
// Should skip model1 (busy), evict model2 and model3 (not busy)
result := wd.EnforceLRULimit(0)
// Should evict model2 and model3 (2 models, which is what we needed)
Expect(result.EvictedCount).To(Equal(2))
Expect(result.NeedMore).To(BeFalse()) // We evicted enough (2 models)
Expect(pm.getShutdownCalls()).To(ContainElement("model2"))
Expect(pm.getShutdownCalls()).To(ContainElement("model3"))
})
})
Context("EnforceLRULimitResult", func() {
BeforeEach(func() {
wd = model.NewWatchDog(
model.WithProcessManager(pm),
model.WithLRULimit(2),
model.WithForceEvictionWhenBusy(false),
)
})
It("should return NeedMore=false when eviction is successful", func() {
wd.AddAddressModelMap("addr1", "model1")
wd.Mark("addr1")
wd.UnMark("addr1") // Make idle
time.Sleep(10 * time.Millisecond)
wd.AddAddressModelMap("addr2", "model2")
wd.Mark("addr2")
wd.UnMark("addr2") // Make idle
result := wd.EnforceLRULimit(0)
Expect(result.NeedMore).To(BeFalse())
Expect(result.EvictedCount).To(Equal(1))
})
It("should return NeedMore=true when not enough models can be evicted", func() {
// Add two models (at limit of 2, need to evict 1 for new model)
wd.AddAddressModelMap("addr1", "model1")
wd.Mark("addr1")
time.Sleep(10 * time.Millisecond)
wd.AddAddressModelMap("addr2", "model2")
wd.Mark("addr2")
// Mark both as busy (keep them busy)
// Both are already marked as busy from the Mark calls above
// Need to evict 1, but both are busy
result := wd.EnforceLRULimit(0)
Expect(result.NeedMore).To(BeTrue())
Expect(result.EvictedCount).To(Equal(0))
})
It("should return NeedMore=true when need to evict multiple but some are busy", func() {
// Set limit to 1, add 3 models (need to evict 2 for new model)
wd.SetLRULimit(1)
wd.AddAddressModelMap("addr1", "model1")
wd.Mark("addr1")
time.Sleep(10 * time.Millisecond)
wd.AddAddressModelMap("addr2", "model2")
wd.Mark("addr2")
wd.UnMark("addr2") // Make model2 idle
time.Sleep(10 * time.Millisecond)
wd.AddAddressModelMap("addr3", "model3")
wd.Mark("addr3")
// Keep model1 and model3 busy
// Need to evict 2 models, but model1 and model3 are busy, only model2 is idle
// Should evict model2 (1 model), but NeedMore=true because we needed 2
result := wd.EnforceLRULimit(0)
Expect(result.EvictedCount).To(Equal(1))
Expect(result.NeedMore).To(BeTrue())
})
It("should return correct EvictedCount when some models are evicted", func() {
// Add three models
wd.AddAddressModelMap("addr1", "model1")
wd.Mark("addr1")
time.Sleep(10 * time.Millisecond)
wd.AddAddressModelMap("addr2", "model2")
wd.Mark("addr2")
wd.UnMark("addr2") // Make model2 idle
time.Sleep(10 * time.Millisecond)
wd.AddAddressModelMap("addr3", "model3")
wd.Mark("addr3")
wd.UnMark("addr3") // Make model3 idle
// Keep model1 as busy (already marked)
// Need to evict 2 models, but model1 is busy
// Should evict model2 and model3 (2 models, which is what we needed)
result := wd.EnforceLRULimit(0)
Expect(result.EvictedCount).To(Equal(2))
Expect(result.NeedMore).To(BeFalse()) // We evicted enough (2 models)
})
})
Context("Functional Options", func() {
It("should use default options when none provided", func() {
wd = model.NewWatchDog(
@@ -331,6 +580,7 @@ var _ = Describe("WatchDog", func() {
model.WithLRULimit(5),
model.WithMemoryReclaimerEnabled(true),
model.WithMemoryReclaimerThreshold(0.80),
model.WithForceEvictionWhenBusy(true),
)
Expect(wd.GetLRULimit()).To(Equal(5))
@@ -339,5 +589,48 @@ var _ = Describe("WatchDog", func() {
Expect(enabled).To(BeTrue())
Expect(threshold).To(Equal(0.80))
})
It("should use default forceEvictionWhenBusy (false) when not specified", func() {
wd = model.NewWatchDog(
model.WithProcessManager(pm),
)
// Default should be false - we can test this by checking behavior
// Add a busy model and verify it's skipped
wd.AddAddressModelMap("addr1", "model1")
wd.Mark("addr1")
time.Sleep(10 * time.Millisecond)
wd.AddAddressModelMap("addr2", "model2")
wd.Mark("addr2")
wd.Mark("addr1") // Keep model1 busy
wd.SetLRULimit(1)
result := wd.EnforceLRULimit(0)
// Should skip busy model1, evict model2, but NeedMore=true
Expect(result.NeedMore).To(BeTrue())
})
It("should allow setting forceEvictionWhenBusy via option", func() {
wd = model.NewWatchDog(
model.WithProcessManager(pm),
model.WithLRULimit(2),
model.WithForceEvictionWhenBusy(true),
)
// Add models
wd.AddAddressModelMap("addr1", "model1")
wd.Mark("addr1")
time.Sleep(10 * time.Millisecond)
wd.AddAddressModelMap("addr2", "model2")
wd.Mark("addr2")
// Keep model1 busy (already marked from first Mark call)
// Should evict busy model1
result := wd.EnforceLRULimit(0)
Expect(result.NeedMore).To(BeFalse())
Expect(result.EvictedCount).To(Equal(1))
Expect(pm.getShutdownCalls()).To(ContainElement("model1"))
})
})
})