mirror of
https://github.com/mudler/LocalAI.git
synced 2026-07-02 04:16:56 -04:00
fix(watchdog): persist a UI-saved Check Interval across restarts (#10601) The watchdog Check Interval saved via /api/settings reverted to 500ms on every restart, while the idle/busy timeouts persisted correctly. Root cause: NewApplicationConfig baseline-defaulted WatchDogInterval to 500ms, whereas the idle/busy timeouts default to 0. The startup loader (loadRuntimeSettingsFromFile) applies a persisted runtime_settings.json value only when the field is still at its zero default - its heuristic for "this wasn't set by an env var". Because the interval was always 500ms at that point, the loader never read the persisted value back, so the saved interval was silently discarded on each boot. Fix: drop the non-zero baseline default so the interval behaves like the sibling timeouts (0 = unset). The effective 500ms default is now supplied at the watchdog layer: WithWatchdogInterval ignores a non-positive value so DefaultWatchDogOptions' 500ms is preserved (and a 0 interval can never turn the watchdog loop into a busy spin). Also mirror the interval in the live config file watcher alongside idle/busy, and report the real 500ms default (not the stale "2s") from ToRuntimeSettings. Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
163 lines
4.9 KiB
Go
163 lines
4.9 KiB
Go
package model
|
|
|
|
import (
|
|
"time"
|
|
)
|
|
|
|
const (
|
|
DefaultWatchdogInterval = 500 * time.Millisecond
|
|
DefaultMemoryReclaimerThreshold = 0.80
|
|
)
|
|
|
|
// WatchDogOptions contains all configuration for the WatchDog
|
|
type WatchDogOptions struct {
|
|
processManager ProcessManager
|
|
|
|
// Timeout settings
|
|
busyTimeout time.Duration
|
|
idleTimeout time.Duration
|
|
watchdogInterval time.Duration
|
|
|
|
// Check toggles
|
|
busyCheck bool
|
|
idleCheck bool
|
|
|
|
// LRU settings
|
|
lruLimit int // Maximum number of active backends (0 = unlimited)
|
|
|
|
// Memory reclaimer settings (works with GPU if available, otherwise RAM)
|
|
memoryReclaimerEnabled bool // Enable memory threshold monitoring
|
|
memoryReclaimerThreshold float64 // Threshold 0.0-1.0 (e.g., 0.95 = 95%)
|
|
|
|
// Eviction settings
|
|
forceEvictionWhenBusy bool // Force eviction even when models have active API calls (default: false for safety)
|
|
|
|
// Size-aware eviction: sort candidates by model file size (largest first)
|
|
sizeAwareEviction bool
|
|
}
|
|
|
|
// WatchDogOption is a function that configures WatchDogOptions
|
|
type WatchDogOption func(*WatchDogOptions)
|
|
|
|
// WithProcessManager sets the process manager for the watchdog
|
|
func WithProcessManager(pm ProcessManager) WatchDogOption {
|
|
return func(o *WatchDogOptions) {
|
|
o.processManager = pm
|
|
}
|
|
}
|
|
|
|
// WithBusyTimeout sets the busy timeout duration
|
|
func WithBusyTimeout(timeout time.Duration) WatchDogOption {
|
|
return func(o *WatchDogOptions) {
|
|
o.busyTimeout = timeout
|
|
}
|
|
}
|
|
|
|
// WithIdleTimeout sets the idle timeout duration
|
|
func WithIdleTimeout(timeout time.Duration) WatchDogOption {
|
|
return func(o *WatchDogOptions) {
|
|
o.idleTimeout = timeout
|
|
}
|
|
}
|
|
|
|
// WithWatchdogInterval sets the watchdog check interval. A non-positive
|
|
// interval is ignored so the DefaultWatchdogInterval set by
|
|
// DefaultWatchDogOptions is preserved: callers pass the raw
|
|
// ApplicationConfig value, which is 0 when neither an env var nor a
|
|
// persisted setting configured it (#10601), and a 0 interval would otherwise
|
|
// turn the watchdog loop into a busy spin.
|
|
func WithWatchdogInterval(interval time.Duration) WatchDogOption {
|
|
return func(o *WatchDogOptions) {
|
|
if interval > 0 {
|
|
o.watchdogInterval = interval
|
|
}
|
|
}
|
|
}
|
|
|
|
// WithBusyCheck enables or disables busy checking
|
|
func WithBusyCheck(enabled bool) WatchDogOption {
|
|
return func(o *WatchDogOptions) {
|
|
o.busyCheck = enabled
|
|
}
|
|
}
|
|
|
|
// WithIdleCheck enables or disables idle checking
|
|
func WithIdleCheck(enabled bool) WatchDogOption {
|
|
return func(o *WatchDogOptions) {
|
|
o.idleCheck = enabled
|
|
}
|
|
}
|
|
|
|
// WithLRULimit sets the maximum number of active backends (0 = unlimited)
|
|
func WithLRULimit(limit int) WatchDogOption {
|
|
return func(o *WatchDogOptions) {
|
|
o.lruLimit = limit
|
|
}
|
|
}
|
|
|
|
// WithMemoryReclaimer enables memory threshold monitoring with the specified threshold
|
|
// Works with GPU VRAM if available, otherwise uses system RAM
|
|
func WithMemoryReclaimer(enabled bool, threshold float64) WatchDogOption {
|
|
return func(o *WatchDogOptions) {
|
|
o.memoryReclaimerEnabled = enabled
|
|
o.memoryReclaimerThreshold = threshold
|
|
}
|
|
}
|
|
|
|
// WithMemoryReclaimerEnabled enables or disables memory threshold monitoring
|
|
func WithMemoryReclaimerEnabled(enabled bool) WatchDogOption {
|
|
return func(o *WatchDogOptions) {
|
|
o.memoryReclaimerEnabled = enabled
|
|
}
|
|
}
|
|
|
|
// WithMemoryReclaimerThreshold sets the memory threshold (0.0-1.0)
|
|
func WithMemoryReclaimerThreshold(threshold float64) WatchDogOption {
|
|
return func(o *WatchDogOptions) {
|
|
o.memoryReclaimerThreshold = threshold
|
|
}
|
|
}
|
|
|
|
// WithForceEvictionWhenBusy sets whether to force eviction even when models have active API calls
|
|
// Default: false (skip eviction when busy for safety)
|
|
func WithForceEvictionWhenBusy(force bool) WatchDogOption {
|
|
return func(o *WatchDogOptions) {
|
|
o.forceEvictionWhenBusy = force
|
|
}
|
|
}
|
|
|
|
// WithSizeAwareEviction enables size-aware eviction ordering.
|
|
// When true, eviction candidates are sorted by on-disk file size (largest first)
|
|
// so that bigger models are freed before smaller ones, keeping small utility models
|
|
// resident and maximizing the memory freed per eviction round.
|
|
// Default: false (LRU time ordering).
|
|
func WithSizeAwareEviction(enabled bool) WatchDogOption {
|
|
return func(o *WatchDogOptions) {
|
|
o.sizeAwareEviction = enabled
|
|
}
|
|
}
|
|
|
|
// DefaultWatchDogOptions returns default options for the watchdog
|
|
func DefaultWatchDogOptions() *WatchDogOptions {
|
|
return &WatchDogOptions{
|
|
busyTimeout: 5 * time.Minute,
|
|
idleTimeout: 15 * time.Minute,
|
|
watchdogInterval: DefaultWatchdogInterval,
|
|
busyCheck: false,
|
|
idleCheck: false,
|
|
lruLimit: 0,
|
|
memoryReclaimerEnabled: false,
|
|
memoryReclaimerThreshold: DefaultMemoryReclaimerThreshold,
|
|
forceEvictionWhenBusy: false, // Default: skip eviction when busy for safety
|
|
}
|
|
}
|
|
|
|
// NewWatchDogOptions creates WatchDogOptions with the provided options applied
|
|
func NewWatchDogOptions(opts ...WatchDogOption) *WatchDogOptions {
|
|
o := DefaultWatchDogOptions()
|
|
for _, opt := range opts {
|
|
opt(o)
|
|
}
|
|
return o
|
|
}
|