mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-30 11:26:32 -04:00
fix(watchdog): persist a UI-saved Check Interval across restarts (#10601)
The watchdog Check Interval saved via /api/settings reverted to 500ms on every restart, while the idle/busy timeouts persisted correctly. Root cause: NewApplicationConfig baseline-defaulted WatchDogInterval to 500ms, whereas the idle/busy timeouts default to 0. The startup loader (loadRuntimeSettingsFromFile) applies a persisted runtime_settings.json value only when the field is still at its zero default - its heuristic for "this wasn't set by an env var". Because the interval was always 500ms at that point, the loader never read the persisted value back, so the saved interval was silently discarded on each boot. Fix: drop the non-zero baseline default so the interval behaves like the sibling timeouts (0 = unset). The effective 500ms default is now supplied at the watchdog layer: WithWatchdogInterval ignores a non-positive value so DefaultWatchDogOptions' 500ms is preserved (and a 0 interval can never turn the watchdog loop into a busy spin). Also mirror the interval in the live config file watcher alongside idle/busy, and report the real 500ms default (not the stale "2s") from ToRuntimeSettings. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code]
This commit is contained in:
@@ -197,6 +197,7 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
|
||||
envWatchdogBusy := appConfig.WatchDogBusy == startupAppConfig.WatchDogBusy
|
||||
envWatchdogIdleTimeout := appConfig.WatchDogIdleTimeout == startupAppConfig.WatchDogIdleTimeout
|
||||
envWatchdogBusyTimeout := appConfig.WatchDogBusyTimeout == startupAppConfig.WatchDogBusyTimeout
|
||||
envWatchdogInterval := appConfig.WatchDogInterval == startupAppConfig.WatchDogInterval
|
||||
envSingleBackend := appConfig.SingleBackend == startupAppConfig.SingleBackend
|
||||
envMaxActiveBackends := appConfig.MaxActiveBackends == startupAppConfig.MaxActiveBackends
|
||||
envMemoryReclaimerEnabled := appConfig.MemoryReclaimerEnabled == startupAppConfig.MemoryReclaimerEnabled
|
||||
@@ -257,6 +258,14 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
|
||||
xlog.Warn("invalid watchdog busy timeout in runtime_settings.json", "error", err, "timeout", *settings.WatchdogBusyTimeout)
|
||||
}
|
||||
}
|
||||
if settings.WatchdogInterval != nil && !envWatchdogInterval {
|
||||
dur, err := time.ParseDuration(*settings.WatchdogInterval)
|
||||
if err == nil {
|
||||
appConfig.WatchDogInterval = dur
|
||||
} else {
|
||||
xlog.Warn("invalid watchdog interval in runtime_settings.json", "error", err, "interval", *settings.WatchdogInterval)
|
||||
}
|
||||
}
|
||||
// Handle MaxActiveBackends (new) and SingleBackend (deprecated)
|
||||
if settings.MaxActiveBackends != nil && !envMaxActiveBackends {
|
||||
appConfig.MaxActiveBackends = *settings.MaxActiveBackends
|
||||
|
||||
@@ -87,6 +87,31 @@ var _ = Describe("loadRuntimeSettingsFromFile", func() {
|
||||
})
|
||||
})
|
||||
|
||||
// Watchdog check interval (issue #10601). Unlike the idle/busy timeouts
|
||||
// (which default to 0), NewApplicationConfig baseline-defaults the
|
||||
// interval to 500ms. The loader's "apply file value only if still at the
|
||||
// zero default" env-detection therefore never fired for the interval, so
|
||||
// a UI-saved Check Interval silently reverted to 500ms on every restart
|
||||
// while the idle/busy timeouts persisted. These specs construct the
|
||||
// config the same way boot does (NewApplicationConfig) so they observe
|
||||
// the real default the loader sees.
|
||||
Describe("watchdog interval", func() {
|
||||
It("loads a UI-saved watchdog_interval on the next startup", func() {
|
||||
cfg := config.NewApplicationConfig()
|
||||
cfg.DynamicConfigsDir = seedSettings(`{"watchdog_interval": "2s"}`)
|
||||
loadRuntimeSettingsFromFile(cfg)
|
||||
Expect(cfg.WatchDogInterval).To(Equal(2 * time.Second))
|
||||
})
|
||||
|
||||
It("does not override an explicit env/CLI interval", func() {
|
||||
cfg := config.NewApplicationConfig()
|
||||
cfg.DynamicConfigsDir = seedSettings(`{"watchdog_interval": "2s"}`)
|
||||
cfg.WatchDogInterval = 1 * time.Second // simulate SetWatchDogInterval from env
|
||||
loadRuntimeSettingsFromFile(cfg)
|
||||
Expect(cfg.WatchDogInterval).To(Equal(1*time.Second), "env/CLI interval must win over the persisted file value")
|
||||
})
|
||||
})
|
||||
|
||||
// MITM listener address. The file is the only source — no env var
|
||||
// exists — so a regression here means an admin who configured the
|
||||
// listener via /api/settings loses it after a reboot, even though
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"regexp"
|
||||
"time"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/system"
|
||||
"github.com/mudler/LocalAI/pkg/xsysinfo"
|
||||
"github.com/mudler/xlog"
|
||||
@@ -241,12 +242,19 @@ func NewApplicationConfig(o ...AppOption) *ApplicationConfig {
|
||||
Context: context.Background(),
|
||||
UploadLimitMB: 15,
|
||||
Debug: true,
|
||||
AgentJobRetentionDays: 30, // Default: 30 days
|
||||
LRUEvictionMaxRetries: 30, // Default: 30 retries
|
||||
LRUEvictionRetryInterval: 1 * time.Second, // Default: 1 second
|
||||
WatchDogInterval: 500 * time.Millisecond, // Default: 500ms
|
||||
TracingMaxItems: 1024,
|
||||
TracingMaxBodyBytes: 64 * 1024, // 64 KiB - caps each request/response body in the trace buffer
|
||||
AgentJobRetentionDays: 30, // Default: 30 days
|
||||
LRUEvictionMaxRetries: 30, // Default: 30 retries
|
||||
LRUEvictionRetryInterval: 1 * time.Second, // Default: 1 second
|
||||
// WatchDogInterval is intentionally left at the zero value here.
|
||||
// The startup loader applies a persisted runtime_settings.json value
|
||||
// only when the interval is still 0 (its "not set by env var"
|
||||
// heuristic, matching the idle/busy timeouts); a non-zero baseline
|
||||
// default would defeat that and silently revert a UI-saved Check
|
||||
// Interval to the default on every restart (#10601). The effective
|
||||
// 500ms default is supplied at the watchdog layer (DefaultWatchdogInterval)
|
||||
// when the value is still 0.
|
||||
TracingMaxItems: 1024,
|
||||
TracingMaxBodyBytes: 64 * 1024, // 64 KiB - caps each request/response body in the trace buffer
|
||||
AgentPool: AgentPoolConfig{
|
||||
Enabled: true,
|
||||
Timeout: "5m",
|
||||
@@ -1097,7 +1105,7 @@ func (o *ApplicationConfig) ToRuntimeSettings() RuntimeSettings {
|
||||
if o.WatchDogInterval > 0 {
|
||||
watchdogInterval = o.WatchDogInterval.String()
|
||||
} else {
|
||||
watchdogInterval = "2s" // default
|
||||
watchdogInterval = model.DefaultWatchdogInterval.String() // default: 500ms
|
||||
}
|
||||
var lruEvictionRetryInterval string
|
||||
if o.LRUEvictionRetryInterval > 0 {
|
||||
|
||||
@@ -60,10 +60,17 @@ func WithIdleTimeout(timeout time.Duration) WatchDogOption {
|
||||
}
|
||||
}
|
||||
|
||||
// WithWatchdogCheck sets the watchdog check duration
|
||||
// WithWatchdogInterval sets the watchdog check interval. A non-positive
|
||||
// interval is ignored so the DefaultWatchdogInterval set by
|
||||
// DefaultWatchDogOptions is preserved: callers pass the raw
|
||||
// ApplicationConfig value, which is 0 when neither an env var nor a
|
||||
// persisted setting configured it (#10601), and a 0 interval would otherwise
|
||||
// turn the watchdog loop into a busy spin.
|
||||
func WithWatchdogInterval(interval time.Duration) WatchDogOption {
|
||||
return func(o *WatchDogOptions) {
|
||||
o.watchdogInterval = interval
|
||||
if interval > 0 {
|
||||
o.watchdogInterval = interval
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user