package application import ( "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/pkg/model" "github.com/mudler/xlog" ) // SyncPinnedModelsToWatchdog reads pinned status from all model configs and updates the watchdog func (a *Application) SyncPinnedModelsToWatchdog() { cl := a.ModelConfigLoader() if cl == nil { return } wd := a.modelLoader.GetWatchDog() if wd == nil { return } configs := cl.GetAllModelsConfigs() var pinned []string for _, cfg := range configs { if cfg.IsPinned() { pinned = append(pinned, cfg.Name) } } wd.SetPinnedModels(pinned) xlog.Debug("Synced pinned models to watchdog", "count", len(pinned)) } // SyncModelGroupsToWatchdog reads concurrency_groups from all model configs and // updates the watchdog so EnforceGroupExclusivity has the current view. func (a *Application) SyncModelGroupsToWatchdog() { cl := a.ModelConfigLoader() if cl == nil { return } wd := a.modelLoader.GetWatchDog() if wd == nil { return } groups := extractModelGroupsFromConfigs(cl.GetAllModelsConfigs()) wd.ReplaceModelGroups(groups) xlog.Debug("Synced concurrency groups to watchdog", "count", len(groups)) } // extractModelGroupsFromConfigs builds the model→groups map the watchdog // expects. Disabled models are skipped — their declared groups should not // block other models from loading. func extractModelGroupsFromConfigs(configs []config.ModelConfig) map[string][]string { out := make(map[string][]string) for _, cfg := range configs { if cfg.IsDisabled() { continue } gs := cfg.GetConcurrencyGroups() if len(gs) == 0 { continue } out[cfg.Name] = gs } return out } func (a *Application) StopWatchdog() error { if a.watchdogStop != nil { close(a.watchdogStop) a.watchdogStop = nil } return nil } // startWatchdog starts the watchdog with current ApplicationConfig settings // This is an internal method that assumes the caller holds the watchdogMutex func (a *Application) startWatchdog() error { appConfig := a.ApplicationConfig() // Get effective max active backends (considers both MaxActiveBackends and deprecated SingleBackend) lruLimit := appConfig.GetEffectiveMaxActiveBackends() // Create watchdog if enabled OR if LRU limit is set OR if memory reclaimer is enabled // LRU eviction requires watchdog infrastructure even without busy/idle checks if appConfig.WatchDog || lruLimit > 0 || appConfig.MemoryReclaimerEnabled { wd := model.NewWatchDog( model.WithProcessManager(a.modelLoader), model.WithBusyTimeout(appConfig.WatchDogBusyTimeout), model.WithIdleTimeout(appConfig.WatchDogIdleTimeout), model.WithWatchdogInterval(appConfig.WatchDogInterval), model.WithBusyCheck(appConfig.WatchDogBusy), model.WithIdleCheck(appConfig.WatchDogIdle), model.WithLRULimit(lruLimit), model.WithMemoryReclaimer(appConfig.MemoryReclaimerEnabled, appConfig.MemoryReclaimerThreshold), model.WithForceEvictionWhenBusy(appConfig.ForceEvictionWhenBusy), ) // Create new stop channel BEFORE setting up any goroutines // This prevents race conditions where the old shutdown handler might // receive the closed channel and try to shut down the new watchdog a.watchdogStop = make(chan bool, 1) // Set the watchdog on the model loader a.modelLoader.SetWatchDog(wd) // Sync pinned models and concurrency groups from config to the watchdog a.SyncPinnedModelsToWatchdog() a.SyncModelGroupsToWatchdog() // Start watchdog goroutine if any periodic checks are enabled // LRU eviction doesn't need the Run() loop - it's triggered on model load // But memory reclaimer needs the Run() loop for periodic checking if appConfig.WatchDogBusy || appConfig.WatchDogIdle || appConfig.MemoryReclaimerEnabled { go wd.Run() } // Setup shutdown handler - this goroutine will wait on a.watchdogStop // which is now a fresh channel, so it won't receive any stale signals // Note: We capture wd in a local variable to ensure this handler operates // on the correct watchdog instance (not a later one that gets assigned to wd) wdForShutdown := wd go func() { select { case <-a.watchdogStop: xlog.Debug("Watchdog stop signal received") wdForShutdown.Shutdown() case <-appConfig.Context.Done(): xlog.Debug("Context canceled, shutting down watchdog") wdForShutdown.Shutdown() } }() xlog.Info("Watchdog started with new settings", "lruLimit", lruLimit, "busyCheck", appConfig.WatchDogBusy, "idleCheck", appConfig.WatchDogIdle, "memoryReclaimer", appConfig.MemoryReclaimerEnabled, "memoryThreshold", appConfig.MemoryReclaimerThreshold, "interval", appConfig.WatchDogInterval) } else { xlog.Info("Watchdog disabled") } return nil } // StartWatchdog starts the watchdog with current ApplicationConfig settings func (a *Application) StartWatchdog() error { a.watchdogMutex.Lock() defer a.watchdogMutex.Unlock() return a.startWatchdog() } // RestartWatchdog restarts the watchdog with current ApplicationConfig settings func (a *Application) RestartWatchdog() error { a.watchdogMutex.Lock() defer a.watchdogMutex.Unlock() // Get the old watchdog before we shut it down oldWD := a.modelLoader.GetWatchDog() // Get the state from the old watchdog before shutting it down // This preserves information about loaded models var oldState model.WatchDogState if oldWD != nil { oldState = oldWD.GetState() } // Signal all handlers to stop by closing the stop channel // This will cause any goroutine waiting on <-a.watchdogStop to unblock if a.watchdogStop != nil { close(a.watchdogStop) a.watchdogStop = nil } // Shutdown existing watchdog - this triggers the stop signal if oldWD != nil { oldWD.Shutdown() // Wait for the old watchdog's Run() goroutine to fully shut down oldWD.WaitDone() } // Start watchdog with new settings if err := a.startWatchdog(); err != nil { return err } // Restore the model state from the old watchdog to the new one // This ensures the new watchdog knows about already-loaded models newWD := a.modelLoader.GetWatchDog() if newWD != nil && len(oldState.AddressModelMap) > 0 { newWD.RestoreState(oldState) } // Re-sync pinned models and concurrency groups after restart a.SyncPinnedModelsToWatchdog() a.SyncModelGroupsToWatchdog() return nil }