Files
LocalAI/pkg/model/watchdog_options.go
LocalAI [bot] fd8cebd0b3 fix(watchdog): persist UI-saved Check Interval across restarts (#10601) (#10605)
fix(watchdog): persist a UI-saved Check Interval across restarts (#10601)

The watchdog Check Interval saved via /api/settings reverted to 500ms on
every restart, while the idle/busy timeouts persisted correctly.

Root cause: NewApplicationConfig baseline-defaulted WatchDogInterval to
500ms, whereas the idle/busy timeouts default to 0. The startup loader
(loadRuntimeSettingsFromFile) applies a persisted runtime_settings.json
value only when the field is still at its zero default - its heuristic
for "this wasn't set by an env var". Because the interval was always
500ms at that point, the loader never read the persisted value back, so
the saved interval was silently discarded on each boot.

Fix: drop the non-zero baseline default so the interval behaves like the
sibling timeouts (0 = unset). The effective 500ms default is now supplied
at the watchdog layer: WithWatchdogInterval ignores a non-positive value
so DefaultWatchDogOptions' 500ms is preserved (and a 0 interval can never
turn the watchdog loop into a busy spin). Also mirror the interval in the
live config file watcher alongside idle/busy, and report the real 500ms
default (not the stale "2s") from ToRuntimeSettings.


Assisted-by: Claude:claude-opus-4-8 [Claude Code]

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
2026-06-30 17:48:14 +02:00

163 lines
4.9 KiB
Go

package model
import (
"time"
)
const (
DefaultWatchdogInterval = 500 * time.Millisecond
DefaultMemoryReclaimerThreshold = 0.80
)
// WatchDogOptions contains all configuration for the WatchDog
type WatchDogOptions struct {
processManager ProcessManager
// Timeout settings
busyTimeout time.Duration
idleTimeout time.Duration
watchdogInterval time.Duration
// Check toggles
busyCheck bool
idleCheck bool
// LRU settings
lruLimit int // Maximum number of active backends (0 = unlimited)
// Memory reclaimer settings (works with GPU if available, otherwise RAM)
memoryReclaimerEnabled bool // Enable memory threshold monitoring
memoryReclaimerThreshold float64 // Threshold 0.0-1.0 (e.g., 0.95 = 95%)
// Eviction settings
forceEvictionWhenBusy bool // Force eviction even when models have active API calls (default: false for safety)
// Size-aware eviction: sort candidates by model file size (largest first)
sizeAwareEviction bool
}
// WatchDogOption is a function that configures WatchDogOptions
type WatchDogOption func(*WatchDogOptions)
// WithProcessManager sets the process manager for the watchdog
func WithProcessManager(pm ProcessManager) WatchDogOption {
return func(o *WatchDogOptions) {
o.processManager = pm
}
}
// WithBusyTimeout sets the busy timeout duration
func WithBusyTimeout(timeout time.Duration) WatchDogOption {
return func(o *WatchDogOptions) {
o.busyTimeout = timeout
}
}
// WithIdleTimeout sets the idle timeout duration
func WithIdleTimeout(timeout time.Duration) WatchDogOption {
return func(o *WatchDogOptions) {
o.idleTimeout = timeout
}
}
// WithWatchdogInterval sets the watchdog check interval. A non-positive
// interval is ignored so the DefaultWatchdogInterval set by
// DefaultWatchDogOptions is preserved: callers pass the raw
// ApplicationConfig value, which is 0 when neither an env var nor a
// persisted setting configured it (#10601), and a 0 interval would otherwise
// turn the watchdog loop into a busy spin.
func WithWatchdogInterval(interval time.Duration) WatchDogOption {
return func(o *WatchDogOptions) {
if interval > 0 {
o.watchdogInterval = interval
}
}
}
// WithBusyCheck enables or disables busy checking
func WithBusyCheck(enabled bool) WatchDogOption {
return func(o *WatchDogOptions) {
o.busyCheck = enabled
}
}
// WithIdleCheck enables or disables idle checking
func WithIdleCheck(enabled bool) WatchDogOption {
return func(o *WatchDogOptions) {
o.idleCheck = enabled
}
}
// WithLRULimit sets the maximum number of active backends (0 = unlimited)
func WithLRULimit(limit int) WatchDogOption {
return func(o *WatchDogOptions) {
o.lruLimit = limit
}
}
// WithMemoryReclaimer enables memory threshold monitoring with the specified threshold
// Works with GPU VRAM if available, otherwise uses system RAM
func WithMemoryReclaimer(enabled bool, threshold float64) WatchDogOption {
return func(o *WatchDogOptions) {
o.memoryReclaimerEnabled = enabled
o.memoryReclaimerThreshold = threshold
}
}
// WithMemoryReclaimerEnabled enables or disables memory threshold monitoring
func WithMemoryReclaimerEnabled(enabled bool) WatchDogOption {
return func(o *WatchDogOptions) {
o.memoryReclaimerEnabled = enabled
}
}
// WithMemoryReclaimerThreshold sets the memory threshold (0.0-1.0)
func WithMemoryReclaimerThreshold(threshold float64) WatchDogOption {
return func(o *WatchDogOptions) {
o.memoryReclaimerThreshold = threshold
}
}
// WithForceEvictionWhenBusy sets whether to force eviction even when models have active API calls
// Default: false (skip eviction when busy for safety)
func WithForceEvictionWhenBusy(force bool) WatchDogOption {
return func(o *WatchDogOptions) {
o.forceEvictionWhenBusy = force
}
}
// WithSizeAwareEviction enables size-aware eviction ordering.
// When true, eviction candidates are sorted by on-disk file size (largest first)
// so that bigger models are freed before smaller ones, keeping small utility models
// resident and maximizing the memory freed per eviction round.
// Default: false (LRU time ordering).
func WithSizeAwareEviction(enabled bool) WatchDogOption {
return func(o *WatchDogOptions) {
o.sizeAwareEviction = enabled
}
}
// DefaultWatchDogOptions returns default options for the watchdog
func DefaultWatchDogOptions() *WatchDogOptions {
return &WatchDogOptions{
busyTimeout: 5 * time.Minute,
idleTimeout: 15 * time.Minute,
watchdogInterval: DefaultWatchdogInterval,
busyCheck: false,
idleCheck: false,
lruLimit: 0,
memoryReclaimerEnabled: false,
memoryReclaimerThreshold: DefaultMemoryReclaimerThreshold,
forceEvictionWhenBusy: false, // Default: skip eviction when busy for safety
}
}
// NewWatchDogOptions creates WatchDogOptions with the provided options applied
func NewWatchDogOptions(opts ...WatchDogOption) *WatchDogOptions {
o := DefaultWatchDogOptions()
for _, opt := range opts {
opt(o)
}
return o
}