mirror of
https://github.com/mudler/LocalAI.git
synced 2026-01-18 03:11:59 -05:00
* feat: allow to set forcing backends eviction while requests are in flight Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat: try to make the request sit and retry if eviction couldn't be done Otherwise calls that in order to pass would need to shutdown other backends would just fail. In this way instead we make the request sit and retry eviction until it succeeds. The thresholds can be configured by the user. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * add tests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * expose settings to CLI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Update docs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
637 lines
19 KiB
Go
637 lines
19 KiB
Go
package model_test
|
|
|
|
import (
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/mudler/LocalAI/pkg/model"
|
|
. "github.com/onsi/ginkgo/v2"
|
|
. "github.com/onsi/gomega"
|
|
)
|
|
|
|
// mockProcessManager implements ProcessManager for testing
|
|
type mockProcessManager struct {
|
|
mu sync.Mutex
|
|
shutdownCalls []string
|
|
shutdownErrors map[string]error
|
|
}
|
|
|
|
func newMockProcessManager() *mockProcessManager {
|
|
return &mockProcessManager{
|
|
shutdownCalls: []string{},
|
|
shutdownErrors: make(map[string]error),
|
|
}
|
|
}
|
|
|
|
func (m *mockProcessManager) ShutdownModel(modelName string) error {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
m.shutdownCalls = append(m.shutdownCalls, modelName)
|
|
if err, ok := m.shutdownErrors[modelName]; ok {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (m *mockProcessManager) getShutdownCalls() []string {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
result := make([]string, len(m.shutdownCalls))
|
|
copy(result, m.shutdownCalls)
|
|
return result
|
|
}
|
|
|
|
var _ = Describe("WatchDog", func() {
|
|
var (
|
|
wd *model.WatchDog
|
|
pm *mockProcessManager
|
|
)
|
|
|
|
BeforeEach(func() {
|
|
pm = newMockProcessManager()
|
|
})
|
|
|
|
Context("LRU Limit", func() {
|
|
It("should create watchdog with LRU limit", func() {
|
|
wd = model.NewWatchDog(
|
|
model.WithProcessManager(pm),
|
|
model.WithBusyTimeout(5*time.Minute),
|
|
model.WithIdleTimeout(15*time.Minute),
|
|
model.WithLRULimit(2),
|
|
)
|
|
Expect(wd.GetLRULimit()).To(Equal(2))
|
|
})
|
|
|
|
It("should allow updating LRU limit dynamically", func() {
|
|
wd = model.NewWatchDog(
|
|
model.WithProcessManager(pm),
|
|
model.WithLRULimit(2),
|
|
)
|
|
wd.SetLRULimit(5)
|
|
Expect(wd.GetLRULimit()).To(Equal(5))
|
|
})
|
|
|
|
It("should return 0 for disabled LRU", func() {
|
|
wd = model.NewWatchDog(
|
|
model.WithProcessManager(pm),
|
|
model.WithLRULimit(0),
|
|
)
|
|
Expect(wd.GetLRULimit()).To(Equal(0))
|
|
})
|
|
})
|
|
|
|
Context("Memory Reclaimer Options", func() {
|
|
It("should create watchdog with memory reclaimer settings", func() {
|
|
wd = model.NewWatchDog(
|
|
model.WithProcessManager(pm),
|
|
model.WithMemoryReclaimer(true, 0.85),
|
|
)
|
|
enabled, threshold := wd.GetMemoryReclaimerSettings()
|
|
Expect(enabled).To(BeTrue())
|
|
Expect(threshold).To(Equal(0.85))
|
|
})
|
|
|
|
It("should allow setting memory reclaimer via separate options", func() {
|
|
wd = model.NewWatchDog(
|
|
model.WithProcessManager(pm),
|
|
model.WithMemoryReclaimerEnabled(true),
|
|
model.WithMemoryReclaimerThreshold(0.90),
|
|
)
|
|
enabled, threshold := wd.GetMemoryReclaimerSettings()
|
|
Expect(enabled).To(BeTrue())
|
|
Expect(threshold).To(Equal(0.90))
|
|
})
|
|
|
|
It("should use default threshold when not specified", func() {
|
|
wd = model.NewWatchDog(
|
|
model.WithProcessManager(pm),
|
|
)
|
|
_, threshold := wd.GetMemoryReclaimerSettings()
|
|
Expect(threshold).To(Equal(model.DefaultMemoryReclaimerThreshold))
|
|
})
|
|
|
|
It("should allow updating memory reclaimer settings dynamically", func() {
|
|
wd = model.NewWatchDog(
|
|
model.WithProcessManager(pm),
|
|
)
|
|
wd.SetMemoryReclaimer(true, 0.80)
|
|
enabled, threshold := wd.GetMemoryReclaimerSettings()
|
|
Expect(enabled).To(BeTrue())
|
|
Expect(threshold).To(Equal(0.80))
|
|
})
|
|
})
|
|
|
|
Context("Model Tracking", func() {
|
|
BeforeEach(func() {
|
|
wd = model.NewWatchDog(
|
|
model.WithProcessManager(pm),
|
|
model.WithBusyTimeout(5*time.Minute),
|
|
model.WithIdleTimeout(15*time.Minute),
|
|
model.WithLRULimit(3),
|
|
)
|
|
})
|
|
|
|
It("should track loaded models count", func() {
|
|
Expect(wd.GetLoadedModelCount()).To(Equal(0))
|
|
|
|
wd.AddAddressModelMap("addr1", "model1")
|
|
Expect(wd.GetLoadedModelCount()).To(Equal(1))
|
|
|
|
wd.AddAddressModelMap("addr2", "model2")
|
|
Expect(wd.GetLoadedModelCount()).To(Equal(2))
|
|
})
|
|
|
|
It("should update lastUsed time on Mark", func() {
|
|
wd.AddAddressModelMap("addr1", "model1")
|
|
wd.Mark("addr1")
|
|
// The model should now have a lastUsed time set
|
|
// We can verify this indirectly through LRU eviction behavior
|
|
})
|
|
|
|
It("should update lastUsed time on UnMark", func() {
|
|
wd.AddAddressModelMap("addr1", "model1")
|
|
wd.Mark("addr1")
|
|
time.Sleep(10 * time.Millisecond)
|
|
wd.UnMark("addr1")
|
|
// The model should now have an updated lastUsed time
|
|
})
|
|
|
|
It("should update lastUsed time via UpdateLastUsed", func() {
|
|
wd.AddAddressModelMap("addr1", "model1")
|
|
wd.UpdateLastUsed("addr1")
|
|
// Verify the time was updated
|
|
})
|
|
})
|
|
|
|
Context("EnforceLRULimit", func() {
|
|
BeforeEach(func() {
|
|
wd = model.NewWatchDog(
|
|
model.WithProcessManager(pm),
|
|
model.WithBusyTimeout(5*time.Minute),
|
|
model.WithIdleTimeout(15*time.Minute),
|
|
model.WithLRULimit(2),
|
|
model.WithForceEvictionWhenBusy(true), // Enable force eviction for these tests to match old behavior
|
|
)
|
|
})
|
|
|
|
It("should not evict when under limit", func() {
|
|
wd.AddAddressModelMap("addr1", "model1")
|
|
wd.Mark("addr1")
|
|
wd.UnMark("addr1") // Unmark to make it idle (not busy)
|
|
|
|
result := wd.EnforceLRULimit(0)
|
|
Expect(result.EvictedCount).To(Equal(0))
|
|
Expect(result.NeedMore).To(BeFalse())
|
|
Expect(pm.getShutdownCalls()).To(BeEmpty())
|
|
})
|
|
|
|
It("should evict oldest model when at limit", func() {
|
|
// Add two models
|
|
wd.AddAddressModelMap("addr1", "model1")
|
|
wd.Mark("addr1")
|
|
wd.UnMark("addr1") // Unmark to make it idle
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
wd.AddAddressModelMap("addr2", "model2")
|
|
wd.Mark("addr2")
|
|
wd.UnMark("addr2") // Unmark to make it idle
|
|
|
|
// Enforce LRU with limit of 2 (need to make room for 1 new model)
|
|
result := wd.EnforceLRULimit(0)
|
|
Expect(result.EvictedCount).To(Equal(1))
|
|
Expect(result.NeedMore).To(BeFalse())
|
|
Expect(pm.getShutdownCalls()).To(ContainElement("model1")) // oldest should be evicted
|
|
})
|
|
|
|
It("should evict multiple models when needed", func() {
|
|
// Add three models
|
|
wd.AddAddressModelMap("addr1", "model1")
|
|
wd.Mark("addr1")
|
|
wd.UnMark("addr1") // Unmark to make it idle
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
wd.AddAddressModelMap("addr2", "model2")
|
|
wd.Mark("addr2")
|
|
wd.UnMark("addr2") // Unmark to make it idle
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
wd.AddAddressModelMap("addr3", "model3")
|
|
wd.Mark("addr3")
|
|
wd.UnMark("addr3") // Unmark to make it idle
|
|
|
|
// Set limit to 1, should evict 2 oldest + 1 for new = 3 evictions
|
|
wd.SetLRULimit(1)
|
|
result := wd.EnforceLRULimit(0)
|
|
Expect(result.EvictedCount).To(Equal(3))
|
|
Expect(result.NeedMore).To(BeFalse())
|
|
shutdowns := pm.getShutdownCalls()
|
|
Expect(shutdowns).To(ContainElement("model1"))
|
|
Expect(shutdowns).To(ContainElement("model2"))
|
|
Expect(shutdowns).To(ContainElement("model3"))
|
|
})
|
|
|
|
It("should account for pending loads", func() {
|
|
// Add two models (at limit)
|
|
wd.AddAddressModelMap("addr1", "model1")
|
|
wd.Mark("addr1")
|
|
wd.UnMark("addr1") // Unmark to make it idle
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
wd.AddAddressModelMap("addr2", "model2")
|
|
wd.Mark("addr2")
|
|
wd.UnMark("addr2") // Unmark to make it idle
|
|
|
|
// With 1 pending load, we need to evict 2 (current=2, pending=1, new=1, limit=2)
|
|
// total after = 2 + 1 + 1 = 4, need to evict 4 - 2 = 2
|
|
result := wd.EnforceLRULimit(1)
|
|
Expect(result.EvictedCount).To(Equal(2))
|
|
Expect(result.NeedMore).To(BeFalse())
|
|
})
|
|
|
|
It("should not evict when LRU is disabled", func() {
|
|
wd.SetLRULimit(0)
|
|
|
|
wd.AddAddressModelMap("addr1", "model1")
|
|
wd.AddAddressModelMap("addr2", "model2")
|
|
wd.AddAddressModelMap("addr3", "model3")
|
|
|
|
result := wd.EnforceLRULimit(0)
|
|
Expect(result.EvictedCount).To(Equal(0))
|
|
Expect(result.NeedMore).To(BeFalse())
|
|
Expect(pm.getShutdownCalls()).To(BeEmpty())
|
|
})
|
|
|
|
It("should evict least recently used first", func() {
|
|
wd.SetLRULimit(2)
|
|
|
|
// Add models with different lastUsed times
|
|
wd.AddAddressModelMap("addr1", "model1")
|
|
wd.Mark("addr1")
|
|
wd.UnMark("addr1") // Unmark to make it idle
|
|
time.Sleep(20 * time.Millisecond)
|
|
|
|
wd.AddAddressModelMap("addr2", "model2")
|
|
wd.Mark("addr2")
|
|
wd.UnMark("addr2") // Unmark to make it idle
|
|
time.Sleep(20 * time.Millisecond)
|
|
|
|
// Touch model1 again to make it more recent
|
|
wd.UpdateLastUsed("addr1")
|
|
time.Sleep(20 * time.Millisecond)
|
|
|
|
wd.AddAddressModelMap("addr3", "model3")
|
|
wd.Mark("addr3")
|
|
wd.UnMark("addr3") // Unmark to make it idle
|
|
|
|
// Now model2 is the oldest, should be evicted first
|
|
result := wd.EnforceLRULimit(0)
|
|
Expect(result.EvictedCount).To(BeNumerically(">=", 1))
|
|
Expect(result.NeedMore).To(BeFalse())
|
|
|
|
shutdowns := pm.getShutdownCalls()
|
|
// model2 should be evicted first (it's the oldest)
|
|
if len(shutdowns) >= 1 {
|
|
Expect(shutdowns[0]).To(Equal("model2"))
|
|
}
|
|
})
|
|
})
|
|
|
|
Context("Single Backend Mode (LRU=1)", func() {
|
|
BeforeEach(func() {
|
|
wd = model.NewWatchDog(
|
|
model.WithProcessManager(pm),
|
|
model.WithBusyTimeout(5*time.Minute),
|
|
model.WithIdleTimeout(15*time.Minute),
|
|
model.WithLRULimit(1),
|
|
model.WithForceEvictionWhenBusy(true), // Enable force eviction for these tests
|
|
)
|
|
})
|
|
|
|
It("should evict existing model when loading new one", func() {
|
|
wd.AddAddressModelMap("addr1", "model1")
|
|
wd.Mark("addr1")
|
|
wd.UnMark("addr1") // Unmark to make it idle
|
|
|
|
// With limit=1, loading a new model should evict the existing one
|
|
result := wd.EnforceLRULimit(0)
|
|
Expect(result.EvictedCount).To(Equal(1))
|
|
Expect(result.NeedMore).To(BeFalse())
|
|
Expect(pm.getShutdownCalls()).To(ContainElement("model1"))
|
|
})
|
|
|
|
It("should handle rapid model switches", func() {
|
|
for i := 0; i < 5; i++ {
|
|
wd.AddAddressModelMap("addr", "model")
|
|
wd.Mark("addr")
|
|
wd.UnMark("addr") // Unmark to make it idle
|
|
wd.EnforceLRULimit(0)
|
|
}
|
|
// All previous models should have been evicted
|
|
Expect(len(pm.getShutdownCalls())).To(Equal(5))
|
|
})
|
|
})
|
|
|
|
Context("Force Eviction When Busy", func() {
|
|
BeforeEach(func() {
|
|
wd = model.NewWatchDog(
|
|
model.WithProcessManager(pm),
|
|
model.WithLRULimit(2),
|
|
model.WithForceEvictionWhenBusy(false), // Default: skip eviction when busy
|
|
)
|
|
})
|
|
|
|
It("should skip eviction for busy models when forceEvictionWhenBusy is false", func() {
|
|
// Add two models (at limit of 2, need to evict 1 for new model)
|
|
wd.AddAddressModelMap("addr1", "model1")
|
|
wd.Mark("addr1")
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
wd.AddAddressModelMap("addr2", "model2")
|
|
wd.Mark("addr2")
|
|
wd.UnMark("addr2") // Make model2 idle
|
|
|
|
// Keep model1 as busy (simulating active API call)
|
|
// model1 is already marked as busy from the first Mark call
|
|
|
|
// Try to enforce LRU - should skip busy model1, evict model2
|
|
result := wd.EnforceLRULimit(0)
|
|
// Should evict model2 (not busy) but skip model1 (busy)
|
|
// Since we evicted 1 (which is what we needed), NeedMore should be false
|
|
Expect(result.EvictedCount).To(Equal(1))
|
|
Expect(result.NeedMore).To(BeFalse()) // We evicted enough, even though we skipped model1
|
|
Expect(pm.getShutdownCalls()).To(ContainElement("model2"))
|
|
Expect(pm.getShutdownCalls()).ToNot(ContainElement("model1"))
|
|
})
|
|
|
|
It("should evict busy models when forceEvictionWhenBusy is true", func() {
|
|
wd.SetForceEvictionWhenBusy(true)
|
|
|
|
// Add two models
|
|
wd.AddAddressModelMap("addr1", "model1")
|
|
wd.Mark("addr1")
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
wd.AddAddressModelMap("addr2", "model2")
|
|
wd.Mark("addr2")
|
|
|
|
// Keep model1 as busy (already marked from first Mark call)
|
|
|
|
// Try to enforce LRU - should evict model1 even though busy
|
|
result := wd.EnforceLRULimit(0)
|
|
Expect(result.EvictedCount).To(Equal(1))
|
|
Expect(result.NeedMore).To(BeFalse())
|
|
Expect(pm.getShutdownCalls()).To(ContainElement("model1"))
|
|
})
|
|
|
|
It("should set NeedMore when all models are busy and forceEvictionWhenBusy is false", func() {
|
|
// Add two models
|
|
wd.AddAddressModelMap("addr1", "model1")
|
|
wd.Mark("addr1")
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
wd.AddAddressModelMap("addr2", "model2")
|
|
wd.Mark("addr2")
|
|
|
|
// Mark both as busy
|
|
wd.Mark("addr1")
|
|
wd.Mark("addr2")
|
|
|
|
// Try to enforce LRU - should skip both busy models
|
|
result := wd.EnforceLRULimit(0)
|
|
Expect(result.EvictedCount).To(Equal(0))
|
|
Expect(result.NeedMore).To(BeTrue())
|
|
Expect(pm.getShutdownCalls()).To(BeEmpty())
|
|
})
|
|
|
|
It("should allow updating forceEvictionWhenBusy dynamically", func() {
|
|
// Start with false
|
|
Expect(wd).ToNot(BeNil())
|
|
|
|
// Add models
|
|
wd.AddAddressModelMap("addr1", "model1")
|
|
wd.Mark("addr1")
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
wd.AddAddressModelMap("addr2", "model2")
|
|
wd.Mark("addr2")
|
|
wd.UnMark("addr2") // Make model2 idle
|
|
// Keep model1 busy (already marked)
|
|
|
|
// With forceEvictionWhenBusy=false, should skip busy model1, evict model2
|
|
result := wd.EnforceLRULimit(0)
|
|
Expect(result.NeedMore).To(BeFalse()) // We evicted enough (1 model)
|
|
Expect(result.EvictedCount).To(Equal(1)) // Should evict model2 (not busy)
|
|
|
|
// Now enable force eviction
|
|
wd.SetForceEvictionWhenBusy(true)
|
|
|
|
// Add models again
|
|
wd.AddAddressModelMap("addr1", "model1")
|
|
wd.Mark("addr1")
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
wd.AddAddressModelMap("addr2", "model2")
|
|
wd.Mark("addr2")
|
|
// Keep model1 busy (already marked)
|
|
|
|
// With forceEvictionWhenBusy=true, should evict busy model1
|
|
result = wd.EnforceLRULimit(0)
|
|
Expect(result.NeedMore).To(BeFalse())
|
|
Expect(result.EvictedCount).To(Equal(1))
|
|
})
|
|
|
|
It("should continue to next LRU model when busy model is skipped", func() {
|
|
// Add three models
|
|
wd.AddAddressModelMap("addr1", "model1")
|
|
wd.Mark("addr1")
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
wd.AddAddressModelMap("addr2", "model2")
|
|
wd.Mark("addr2")
|
|
wd.UnMark("addr2") // Make model2 idle
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
wd.AddAddressModelMap("addr3", "model3")
|
|
wd.Mark("addr3")
|
|
wd.UnMark("addr3") // Make model3 idle
|
|
|
|
// Keep model1 as busy (oldest, already marked)
|
|
|
|
// Need to evict 2 models (limit=2, current=3, need room for 1 new)
|
|
// Should skip model1 (busy), evict model2 and model3 (not busy)
|
|
result := wd.EnforceLRULimit(0)
|
|
// Should evict model2 and model3 (2 models, which is what we needed)
|
|
Expect(result.EvictedCount).To(Equal(2))
|
|
Expect(result.NeedMore).To(BeFalse()) // We evicted enough (2 models)
|
|
Expect(pm.getShutdownCalls()).To(ContainElement("model2"))
|
|
Expect(pm.getShutdownCalls()).To(ContainElement("model3"))
|
|
})
|
|
})
|
|
|
|
Context("EnforceLRULimitResult", func() {
|
|
BeforeEach(func() {
|
|
wd = model.NewWatchDog(
|
|
model.WithProcessManager(pm),
|
|
model.WithLRULimit(2),
|
|
model.WithForceEvictionWhenBusy(false),
|
|
)
|
|
})
|
|
|
|
It("should return NeedMore=false when eviction is successful", func() {
|
|
wd.AddAddressModelMap("addr1", "model1")
|
|
wd.Mark("addr1")
|
|
wd.UnMark("addr1") // Make idle
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
wd.AddAddressModelMap("addr2", "model2")
|
|
wd.Mark("addr2")
|
|
wd.UnMark("addr2") // Make idle
|
|
|
|
result := wd.EnforceLRULimit(0)
|
|
Expect(result.NeedMore).To(BeFalse())
|
|
Expect(result.EvictedCount).To(Equal(1))
|
|
})
|
|
|
|
It("should return NeedMore=true when not enough models can be evicted", func() {
|
|
// Add two models (at limit of 2, need to evict 1 for new model)
|
|
wd.AddAddressModelMap("addr1", "model1")
|
|
wd.Mark("addr1")
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
wd.AddAddressModelMap("addr2", "model2")
|
|
wd.Mark("addr2")
|
|
|
|
// Mark both as busy (keep them busy)
|
|
// Both are already marked as busy from the Mark calls above
|
|
|
|
// Need to evict 1, but both are busy
|
|
result := wd.EnforceLRULimit(0)
|
|
Expect(result.NeedMore).To(BeTrue())
|
|
Expect(result.EvictedCount).To(Equal(0))
|
|
})
|
|
|
|
It("should return NeedMore=true when need to evict multiple but some are busy", func() {
|
|
// Set limit to 1, add 3 models (need to evict 2 for new model)
|
|
wd.SetLRULimit(1)
|
|
wd.AddAddressModelMap("addr1", "model1")
|
|
wd.Mark("addr1")
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
wd.AddAddressModelMap("addr2", "model2")
|
|
wd.Mark("addr2")
|
|
wd.UnMark("addr2") // Make model2 idle
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
wd.AddAddressModelMap("addr3", "model3")
|
|
wd.Mark("addr3")
|
|
// Keep model1 and model3 busy
|
|
|
|
// Need to evict 2 models, but model1 and model3 are busy, only model2 is idle
|
|
// Should evict model2 (1 model), but NeedMore=true because we needed 2
|
|
result := wd.EnforceLRULimit(0)
|
|
Expect(result.EvictedCount).To(Equal(1))
|
|
Expect(result.NeedMore).To(BeTrue())
|
|
})
|
|
|
|
It("should return correct EvictedCount when some models are evicted", func() {
|
|
// Add three models
|
|
wd.AddAddressModelMap("addr1", "model1")
|
|
wd.Mark("addr1")
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
wd.AddAddressModelMap("addr2", "model2")
|
|
wd.Mark("addr2")
|
|
wd.UnMark("addr2") // Make model2 idle
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
wd.AddAddressModelMap("addr3", "model3")
|
|
wd.Mark("addr3")
|
|
wd.UnMark("addr3") // Make model3 idle
|
|
|
|
// Keep model1 as busy (already marked)
|
|
|
|
// Need to evict 2 models, but model1 is busy
|
|
// Should evict model2 and model3 (2 models, which is what we needed)
|
|
result := wd.EnforceLRULimit(0)
|
|
Expect(result.EvictedCount).To(Equal(2))
|
|
Expect(result.NeedMore).To(BeFalse()) // We evicted enough (2 models)
|
|
})
|
|
})
|
|
|
|
Context("Functional Options", func() {
|
|
It("should use default options when none provided", func() {
|
|
wd = model.NewWatchDog(
|
|
model.WithProcessManager(pm),
|
|
)
|
|
Expect(wd.GetLRULimit()).To(Equal(0))
|
|
|
|
enabled, threshold := wd.GetMemoryReclaimerSettings()
|
|
Expect(enabled).To(BeFalse())
|
|
Expect(threshold).To(Equal(model.DefaultMemoryReclaimerThreshold))
|
|
})
|
|
|
|
It("should allow combining multiple options", func() {
|
|
wd = model.NewWatchDog(
|
|
model.WithProcessManager(pm),
|
|
model.WithBusyTimeout(10*time.Minute),
|
|
model.WithIdleTimeout(30*time.Minute),
|
|
model.WithBusyCheck(true),
|
|
model.WithIdleCheck(true),
|
|
model.WithLRULimit(5),
|
|
model.WithMemoryReclaimerEnabled(true),
|
|
model.WithMemoryReclaimerThreshold(0.80),
|
|
model.WithForceEvictionWhenBusy(true),
|
|
)
|
|
|
|
Expect(wd.GetLRULimit()).To(Equal(5))
|
|
|
|
enabled, threshold := wd.GetMemoryReclaimerSettings()
|
|
Expect(enabled).To(BeTrue())
|
|
Expect(threshold).To(Equal(0.80))
|
|
})
|
|
|
|
It("should use default forceEvictionWhenBusy (false) when not specified", func() {
|
|
wd = model.NewWatchDog(
|
|
model.WithProcessManager(pm),
|
|
)
|
|
// Default should be false - we can test this by checking behavior
|
|
// Add a busy model and verify it's skipped
|
|
wd.AddAddressModelMap("addr1", "model1")
|
|
wd.Mark("addr1")
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
wd.AddAddressModelMap("addr2", "model2")
|
|
wd.Mark("addr2")
|
|
wd.Mark("addr1") // Keep model1 busy
|
|
|
|
wd.SetLRULimit(1)
|
|
result := wd.EnforceLRULimit(0)
|
|
// Should skip busy model1, evict model2, but NeedMore=true
|
|
Expect(result.NeedMore).To(BeTrue())
|
|
})
|
|
|
|
It("should allow setting forceEvictionWhenBusy via option", func() {
|
|
wd = model.NewWatchDog(
|
|
model.WithProcessManager(pm),
|
|
model.WithLRULimit(2),
|
|
model.WithForceEvictionWhenBusy(true),
|
|
)
|
|
|
|
// Add models
|
|
wd.AddAddressModelMap("addr1", "model1")
|
|
wd.Mark("addr1")
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
wd.AddAddressModelMap("addr2", "model2")
|
|
wd.Mark("addr2")
|
|
// Keep model1 busy (already marked from first Mark call)
|
|
|
|
// Should evict busy model1
|
|
result := wd.EnforceLRULimit(0)
|
|
Expect(result.NeedMore).To(BeFalse())
|
|
Expect(result.EvictedCount).To(Equal(1))
|
|
Expect(pm.getShutdownCalls()).To(ContainElement("model1"))
|
|
})
|
|
})
|
|
})
|