fix(watchdog): persist a UI-saved Check Interval across restarts (#10601)

The watchdog Check Interval saved via /api/settings reverted to 500ms on
every restart, while the idle/busy timeouts persisted correctly.

Root cause: NewApplicationConfig baseline-defaulted WatchDogInterval to
500ms, whereas the idle/busy timeouts default to 0. The startup loader
(loadRuntimeSettingsFromFile) applies a persisted runtime_settings.json
value only when the field is still at its zero default - its heuristic
for "this wasn't set by an env var". Because the interval was always
500ms at that point, the loader never read the persisted value back, so
the saved interval was silently discarded on each boot.

Fix: drop the non-zero baseline default so the interval behaves like the
sibling timeouts (0 = unset). The effective 500ms default is now supplied
at the watchdog layer: WithWatchdogInterval ignores a non-positive value
so DefaultWatchDogOptions' 500ms is preserved (and a 0 interval can never
turn the watchdog loop into a busy spin). Also mirror the interval in the
live config file watcher alongside idle/busy, and report the real 500ms
default (not the stale "2s") from ToRuntimeSettings.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:claude-opus-4-8 [Claude Code]
This commit is contained in:
Ettore Di Giacinto
2026-06-30 08:04:12 +00:00
parent 0e381897b5
commit 347cdcf545
4 changed files with 58 additions and 9 deletions

View File

@@ -197,6 +197,7 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
envWatchdogBusy := appConfig.WatchDogBusy == startupAppConfig.WatchDogBusy
envWatchdogIdleTimeout := appConfig.WatchDogIdleTimeout == startupAppConfig.WatchDogIdleTimeout
envWatchdogBusyTimeout := appConfig.WatchDogBusyTimeout == startupAppConfig.WatchDogBusyTimeout
envWatchdogInterval := appConfig.WatchDogInterval == startupAppConfig.WatchDogInterval
envSingleBackend := appConfig.SingleBackend == startupAppConfig.SingleBackend
envMaxActiveBackends := appConfig.MaxActiveBackends == startupAppConfig.MaxActiveBackends
envMemoryReclaimerEnabled := appConfig.MemoryReclaimerEnabled == startupAppConfig.MemoryReclaimerEnabled
@@ -257,6 +258,14 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
xlog.Warn("invalid watchdog busy timeout in runtime_settings.json", "error", err, "timeout", *settings.WatchdogBusyTimeout)
}
}
if settings.WatchdogInterval != nil && !envWatchdogInterval {
dur, err := time.ParseDuration(*settings.WatchdogInterval)
if err == nil {
appConfig.WatchDogInterval = dur
} else {
xlog.Warn("invalid watchdog interval in runtime_settings.json", "error", err, "interval", *settings.WatchdogInterval)
}
}
// Handle MaxActiveBackends (new) and SingleBackend (deprecated)
if settings.MaxActiveBackends != nil && !envMaxActiveBackends {
appConfig.MaxActiveBackends = *settings.MaxActiveBackends

View File

@@ -87,6 +87,31 @@ var _ = Describe("loadRuntimeSettingsFromFile", func() {
})
})
// Watchdog check interval (issue #10601). Unlike the idle/busy timeouts
// (which default to 0), NewApplicationConfig baseline-defaults the
// interval to 500ms. The loader's "apply file value only if still at the
// zero default" env-detection therefore never fired for the interval, so
// a UI-saved Check Interval silently reverted to 500ms on every restart
// while the idle/busy timeouts persisted. These specs construct the
// config the same way boot does (NewApplicationConfig) so they observe
// the real default the loader sees.
Describe("watchdog interval", func() {
It("loads a UI-saved watchdog_interval on the next startup", func() {
cfg := config.NewApplicationConfig()
cfg.DynamicConfigsDir = seedSettings(`{"watchdog_interval": "2s"}`)
loadRuntimeSettingsFromFile(cfg)
Expect(cfg.WatchDogInterval).To(Equal(2 * time.Second))
})
It("does not override an explicit env/CLI interval", func() {
cfg := config.NewApplicationConfig()
cfg.DynamicConfigsDir = seedSettings(`{"watchdog_interval": "2s"}`)
cfg.WatchDogInterval = 1 * time.Second // simulate SetWatchDogInterval from env
loadRuntimeSettingsFromFile(cfg)
Expect(cfg.WatchDogInterval).To(Equal(1*time.Second), "env/CLI interval must win over the persisted file value")
})
})
// MITM listener address. The file is the only source — no env var
// exists — so a regression here means an admin who configured the
// listener via /api/settings loses it after a reboot, even though

View File

@@ -6,6 +6,7 @@ import (
"regexp"
"time"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/system"
"github.com/mudler/LocalAI/pkg/xsysinfo"
"github.com/mudler/xlog"
@@ -241,12 +242,19 @@ func NewApplicationConfig(o ...AppOption) *ApplicationConfig {
Context: context.Background(),
UploadLimitMB: 15,
Debug: true,
AgentJobRetentionDays: 30, // Default: 30 days
LRUEvictionMaxRetries: 30, // Default: 30 retries
LRUEvictionRetryInterval: 1 * time.Second, // Default: 1 second
WatchDogInterval: 500 * time.Millisecond, // Default: 500ms
TracingMaxItems: 1024,
TracingMaxBodyBytes: 64 * 1024, // 64 KiB - caps each request/response body in the trace buffer
AgentJobRetentionDays: 30, // Default: 30 days
LRUEvictionMaxRetries: 30, // Default: 30 retries
LRUEvictionRetryInterval: 1 * time.Second, // Default: 1 second
// WatchDogInterval is intentionally left at the zero value here.
// The startup loader applies a persisted runtime_settings.json value
// only when the interval is still 0 (its "not set by env var"
// heuristic, matching the idle/busy timeouts); a non-zero baseline
// default would defeat that and silently revert a UI-saved Check
// Interval to the default on every restart (#10601). The effective
// 500ms default is supplied at the watchdog layer (DefaultWatchdogInterval)
// when the value is still 0.
TracingMaxItems: 1024,
TracingMaxBodyBytes: 64 * 1024, // 64 KiB - caps each request/response body in the trace buffer
AgentPool: AgentPoolConfig{
Enabled: true,
Timeout: "5m",
@@ -1097,7 +1105,7 @@ func (o *ApplicationConfig) ToRuntimeSettings() RuntimeSettings {
if o.WatchDogInterval > 0 {
watchdogInterval = o.WatchDogInterval.String()
} else {
watchdogInterval = "2s" // default
watchdogInterval = model.DefaultWatchdogInterval.String() // default: 500ms
}
var lruEvictionRetryInterval string
if o.LRUEvictionRetryInterval > 0 {

View File

@@ -60,10 +60,17 @@ func WithIdleTimeout(timeout time.Duration) WatchDogOption {
}
}
// WithWatchdogCheck sets the watchdog check duration
// WithWatchdogInterval sets the watchdog check interval. A non-positive
// interval is ignored so the DefaultWatchdogInterval set by
// DefaultWatchDogOptions is preserved: callers pass the raw
// ApplicationConfig value, which is 0 when neither an env var nor a
// persisted setting configured it (#10601), and a 0 interval would otherwise
// turn the watchdog loop into a busy spin.
func WithWatchdogInterval(interval time.Duration) WatchDogOption {
return func(o *WatchDogOptions) {
o.watchdogInterval = interval
if interval > 0 {
o.watchdogInterval = interval
}
}
}