mirror of
https://github.com/mudler/LocalAI.git
synced 2026-01-30 17:22:43 -05:00
* feat: allow to set forcing backends eviction while requests are in flight Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat: try to make the request sit and retry if eviction couldn't be done Otherwise calls that in order to pass would need to shutdown other backends would just fail. In this way instead we make the request sit and retry eviction until it succeeds. The thresholds can be configured by the user. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * add tests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * expose settings to CLI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Update docs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
142 lines
4.0 KiB
Go
142 lines
4.0 KiB
Go
package model
|
|
|
|
import (
|
|
"time"
|
|
)
|
|
|
|
const (
|
|
DefaultWatchdogInterval = 500 * time.Millisecond
|
|
DefaultMemoryReclaimerThreshold = 0.80
|
|
)
|
|
|
|
// WatchDogOptions contains all configuration for the WatchDog
|
|
type WatchDogOptions struct {
|
|
processManager ProcessManager
|
|
|
|
// Timeout settings
|
|
busyTimeout time.Duration
|
|
idleTimeout time.Duration
|
|
watchdogInterval time.Duration
|
|
|
|
// Check toggles
|
|
busyCheck bool
|
|
idleCheck bool
|
|
|
|
// LRU settings
|
|
lruLimit int // Maximum number of active backends (0 = unlimited)
|
|
|
|
// Memory reclaimer settings (works with GPU if available, otherwise RAM)
|
|
memoryReclaimerEnabled bool // Enable memory threshold monitoring
|
|
memoryReclaimerThreshold float64 // Threshold 0.0-1.0 (e.g., 0.95 = 95%)
|
|
|
|
// Eviction settings
|
|
forceEvictionWhenBusy bool // Force eviction even when models have active API calls (default: false for safety)
|
|
}
|
|
|
|
// WatchDogOption is a function that configures WatchDogOptions
|
|
type WatchDogOption func(*WatchDogOptions)
|
|
|
|
// WithProcessManager sets the process manager for the watchdog
|
|
func WithProcessManager(pm ProcessManager) WatchDogOption {
|
|
return func(o *WatchDogOptions) {
|
|
o.processManager = pm
|
|
}
|
|
}
|
|
|
|
// WithBusyTimeout sets the busy timeout duration
|
|
func WithBusyTimeout(timeout time.Duration) WatchDogOption {
|
|
return func(o *WatchDogOptions) {
|
|
o.busyTimeout = timeout
|
|
}
|
|
}
|
|
|
|
// WithIdleTimeout sets the idle timeout duration
|
|
func WithIdleTimeout(timeout time.Duration) WatchDogOption {
|
|
return func(o *WatchDogOptions) {
|
|
o.idleTimeout = timeout
|
|
}
|
|
}
|
|
|
|
// WithWatchdogCheck sets the watchdog check duration
|
|
func WithWatchdogInterval(interval time.Duration) WatchDogOption {
|
|
return func(o *WatchDogOptions) {
|
|
o.watchdogInterval = interval
|
|
}
|
|
}
|
|
|
|
// WithBusyCheck enables or disables busy checking
|
|
func WithBusyCheck(enabled bool) WatchDogOption {
|
|
return func(o *WatchDogOptions) {
|
|
o.busyCheck = enabled
|
|
}
|
|
}
|
|
|
|
// WithIdleCheck enables or disables idle checking
|
|
func WithIdleCheck(enabled bool) WatchDogOption {
|
|
return func(o *WatchDogOptions) {
|
|
o.idleCheck = enabled
|
|
}
|
|
}
|
|
|
|
// WithLRULimit sets the maximum number of active backends (0 = unlimited)
|
|
func WithLRULimit(limit int) WatchDogOption {
|
|
return func(o *WatchDogOptions) {
|
|
o.lruLimit = limit
|
|
}
|
|
}
|
|
|
|
// WithMemoryReclaimer enables memory threshold monitoring with the specified threshold
|
|
// Works with GPU VRAM if available, otherwise uses system RAM
|
|
func WithMemoryReclaimer(enabled bool, threshold float64) WatchDogOption {
|
|
return func(o *WatchDogOptions) {
|
|
o.memoryReclaimerEnabled = enabled
|
|
o.memoryReclaimerThreshold = threshold
|
|
}
|
|
}
|
|
|
|
// WithMemoryReclaimerEnabled enables or disables memory threshold monitoring
|
|
func WithMemoryReclaimerEnabled(enabled bool) WatchDogOption {
|
|
return func(o *WatchDogOptions) {
|
|
o.memoryReclaimerEnabled = enabled
|
|
}
|
|
}
|
|
|
|
// WithMemoryReclaimerThreshold sets the memory threshold (0.0-1.0)
|
|
func WithMemoryReclaimerThreshold(threshold float64) WatchDogOption {
|
|
return func(o *WatchDogOptions) {
|
|
o.memoryReclaimerThreshold = threshold
|
|
}
|
|
}
|
|
|
|
// WithForceEvictionWhenBusy sets whether to force eviction even when models have active API calls
|
|
// Default: false (skip eviction when busy for safety)
|
|
func WithForceEvictionWhenBusy(force bool) WatchDogOption {
|
|
return func(o *WatchDogOptions) {
|
|
o.forceEvictionWhenBusy = force
|
|
}
|
|
}
|
|
|
|
// DefaultWatchDogOptions returns default options for the watchdog
|
|
func DefaultWatchDogOptions() *WatchDogOptions {
|
|
return &WatchDogOptions{
|
|
busyTimeout: 5 * time.Minute,
|
|
idleTimeout: 15 * time.Minute,
|
|
watchdogInterval: DefaultWatchdogInterval,
|
|
busyCheck: false,
|
|
idleCheck: false,
|
|
lruLimit: 0,
|
|
memoryReclaimerEnabled: false,
|
|
memoryReclaimerThreshold: DefaultMemoryReclaimerThreshold,
|
|
forceEvictionWhenBusy: false, // Default: skip eviction when busy for safety
|
|
}
|
|
}
|
|
|
|
// NewWatchDogOptions creates WatchDogOptions with the provided options applied
|
|
func NewWatchDogOptions(opts ...WatchDogOption) *WatchDogOptions {
|
|
o := DefaultWatchDogOptions()
|
|
for _, opt := range opts {
|
|
opt(o)
|
|
}
|
|
return o
|
|
}
|