feat: allow to pin models and skip from reaping (#9309)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2026-04-11 08:38:17 +02:00
committed by GitHub
parent 062e0d0d00
commit 5c35e85fe2
10 changed files with 366 additions and 2 deletions

View File

@@ -5,6 +5,27 @@ import (
"github.com/mudler/xlog"
)
// SyncPinnedModelsToWatchdog reads pinned status from all model configs and updates the watchdog
func (a *Application) SyncPinnedModelsToWatchdog() {
cl := a.ModelConfigLoader()
if cl == nil {
return
}
wd := a.modelLoader.GetWatchDog()
if wd == nil {
return
}
configs := cl.GetAllModelsConfigs()
var pinned []string
for _, cfg := range configs {
if cfg.IsPinned() {
pinned = append(pinned, cfg.Name)
}
}
wd.SetPinnedModels(pinned)
xlog.Debug("Synced pinned models to watchdog", "count", len(pinned))
}
func (a *Application) StopWatchdog() error {
if a.watchdogStop != nil {
close(a.watchdogStop)
@@ -44,6 +65,9 @@ func (a *Application) startWatchdog() error {
// Set the watchdog on the model loader
a.modelLoader.SetWatchDog(wd)
// Sync pinned models from config to the watchdog
a.SyncPinnedModelsToWatchdog()
// Start watchdog goroutine if any periodic checks are enabled
// LRU eviction doesn't need the Run() loop - it's triggered on model load
// But memory reclaimer needs the Run() loop for periodic checking
@@ -124,5 +148,8 @@ func (a *Application) RestartWatchdog() error {
newWD.RestoreState(oldState)
}
// Re-sync pinned models after restart
a.SyncPinnedModelsToWatchdog()
return nil
}

View File

@@ -78,6 +78,7 @@ type ModelConfig struct {
Description string `yaml:"description,omitempty" json:"description,omitempty"`
Usage string `yaml:"usage,omitempty" json:"usage,omitempty"`
Disabled *bool `yaml:"disabled,omitempty" json:"disabled,omitempty"`
Pinned *bool `yaml:"pinned,omitempty" json:"pinned,omitempty"`
Options []string `yaml:"options,omitempty" json:"options,omitempty"`
Overrides []string `yaml:"overrides,omitempty" json:"overrides,omitempty"`
@@ -554,6 +555,11 @@ func (c *ModelConfig) IsDisabled() bool {
return c.Disabled != nil && *c.Disabled
}
// IsPinned returns true if the model is pinned (excluded from idle unloading and eviction)
func (c *ModelConfig) IsPinned() bool {
return c.Pinned != nil && *c.Pinned
}
type ModelConfigUsecase int
const (

View File

@@ -0,0 +1,144 @@
package localai
import (
"fmt"
"net/http"
"net/url"
"os"
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/utils"
"gopkg.in/yaml.v3"
)
// TogglePinnedModelEndpoint handles pinning or unpinning a model.
// Pinned models are excluded from idle unloading, LRU eviction, and memory-pressure eviction.
//
// @Summary Toggle model pinned status
// @Description Pin or unpin a model. Pinned models stay loaded and are excluded from automatic eviction.
// @Tags config
// @Param name path string true "Model name"
// @Param action path string true "Action: 'pin' or 'unpin'"
// @Success 200 {object} ModelResponse
// @Failure 400 {object} ModelResponse
// @Failure 404 {object} ModelResponse
// @Failure 500 {object} ModelResponse
// @Router /api/models/toggle-pinned/{name}/{action} [put]
func TogglePinnedModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, syncPinnedFn func()) echo.HandlerFunc {
return func(c echo.Context) error {
modelName := c.Param("name")
if decoded, err := url.PathUnescape(modelName); err == nil {
modelName = decoded
}
if modelName == "" {
return c.JSON(http.StatusBadRequest, ModelResponse{
Success: false,
Error: "Model name is required",
})
}
action := c.Param("action")
if action != "pin" && action != "unpin" {
return c.JSON(http.StatusBadRequest, ModelResponse{
Success: false,
Error: "Action must be 'pin' or 'unpin'",
})
}
// Get existing model config
modelConfig, exists := cl.GetModelConfig(modelName)
if !exists {
return c.JSON(http.StatusNotFound, ModelResponse{
Success: false,
Error: "Model configuration not found",
})
}
// Get the config file path
configPath := modelConfig.GetModelConfigFile()
if configPath == "" {
return c.JSON(http.StatusNotFound, ModelResponse{
Success: false,
Error: "Model configuration file not found",
})
}
// Verify the path is trusted
if err := utils.VerifyPath(configPath, appConfig.SystemState.Model.ModelsPath); err != nil {
return c.JSON(http.StatusForbidden, ModelResponse{
Success: false,
Error: "Model configuration not trusted: " + err.Error(),
})
}
// Read the existing config file
configData, err := os.ReadFile(configPath)
if err != nil {
return c.JSON(http.StatusInternalServerError, ModelResponse{
Success: false,
Error: "Failed to read configuration file: " + err.Error(),
})
}
// Parse the YAML config as a generic map to preserve all fields
var configMap map[string]interface{}
if err := yaml.Unmarshal(configData, &configMap); err != nil {
return c.JSON(http.StatusInternalServerError, ModelResponse{
Success: false,
Error: "Failed to parse configuration file: " + err.Error(),
})
}
// Update the pinned field
pinned := action == "pin"
if pinned {
configMap["pinned"] = true
} else {
// Remove the pinned key entirely when unpinning (clean YAML)
delete(configMap, "pinned")
}
// Marshal back to YAML
updatedData, err := yaml.Marshal(configMap)
if err != nil {
return c.JSON(http.StatusInternalServerError, ModelResponse{
Success: false,
Error: "Failed to serialize configuration: " + err.Error(),
})
}
// Write updated config back to file
if err := os.WriteFile(configPath, updatedData, 0644); err != nil {
return c.JSON(http.StatusInternalServerError, ModelResponse{
Success: false,
Error: "Failed to write configuration file: " + err.Error(),
})
}
// Reload model configurations from disk
if err := cl.LoadModelConfigsFromPath(appConfig.SystemState.Model.ModelsPath, appConfig.ToConfigLoaderOptions()...); err != nil {
return c.JSON(http.StatusInternalServerError, ModelResponse{
Success: false,
Error: "Failed to reload configurations: " + err.Error(),
})
}
// Sync pinned models to the watchdog
if syncPinnedFn != nil {
syncPinnedFn()
}
msg := fmt.Sprintf("Model '%s' has been %sned successfully.", modelName, action)
if pinned {
msg += " The model will be excluded from automatic eviction."
}
return c.JSON(http.StatusOK, ModelResponse{
Success: true,
Message: msg,
Filename: configPath,
})
}
}

View File

@@ -25,6 +25,7 @@ export default function Manage() {
const [confirmDialog, setConfirmDialog] = useState(null)
const [distributedMode, setDistributedMode] = useState(false)
const [togglingModels, setTogglingModels] = useState(new Set())
const [pinningModels, setPinningModels] = useState(new Set())
const handleTabChange = (tab) => {
setActiveTab(tab)
@@ -122,6 +123,24 @@ export default function Manage() {
}
}
const handleTogglePinned = async (modelId, currentlyPinned) => {
const action = currentlyPinned ? 'unpin' : 'pin'
setPinningModels(prev => new Set(prev).add(modelId))
try {
await modelsApi.togglePinned(modelId, action)
addToast(`Model ${modelId} ${action}ned`, 'success')
refetchModels()
} catch (err) {
addToast(`Failed to ${action} model: ${err.message}`, 'error')
} finally {
setPinningModels(prev => {
const next = new Set(prev)
next.delete(modelId)
return next
})
}
}
const handleReload = async () => {
setReloading(true)
try {
@@ -303,6 +322,22 @@ export default function Manage() {
<i className="fas fa-stop" />
</button>
)}
{/* Pin button - prevents model from being unloaded */}
<button
className="btn btn-sm"
onClick={() => handleTogglePinned(model.id, model.pinned)}
disabled={pinningModels.has(model.id) || model.disabled}
title={model.pinned ? 'Unpin model (allow idle unloading)' : 'Pin model (prevent idle unloading)'}
style={{
padding: '2px 6px',
minWidth: 28,
color: model.pinned ? 'var(--color-warning, #f59e0b)' : 'var(--color-text-muted)',
opacity: model.disabled ? 0.3 : (pinningModels.has(model.id) ? 0.5 : 1),
cursor: pinningModels.has(model.id) ? 'wait' : (model.disabled ? 'not-allowed' : 'pointer'),
}}
>
<i className={`fas fa-thumbtack${pinningModels.has(model.id) ? ' fa-spin' : ''}`} />
</button>
{/* Toggle switch for enabling/disabling model loading on demand */}
<label
title={model.disabled ? 'Model is disabled — click to enable loading on demand' : 'Model is enabled — click to disable loading on demand'}

View File

@@ -98,6 +98,7 @@ export const modelsApi = {
getEditConfig: (name) => fetchJSON(API_CONFIG.endpoints.modelEditGet(name)),
editConfig: (name, body) => postJSON(API_CONFIG.endpoints.modelEdit(name), body),
toggleState: (name, action) => fetchJSON(API_CONFIG.endpoints.modelToggleState(name, action), { method: 'PUT' }),
togglePinned: (name, action) => fetchJSON(API_CONFIG.endpoints.modelTogglePinned(name, action), { method: 'PUT' }),
getConfigMetadata: (section) => fetchJSON(
section ? `${API_CONFIG.endpoints.configMetadata}?section=${section}`
: API_CONFIG.endpoints.configMetadata

View File

@@ -94,6 +94,7 @@ export const API_CONFIG = {
modelEditGet: (name) => `/api/models/edit/${name}`,
modelEdit: (name) => `/models/edit/${name}`,
modelToggleState: (name, action) => `/models/toggle-state/${name}/${action}`,
modelTogglePinned: (name, action) => `/models/toggle-pinned/${name}/${action}`,
backendsAvailable: '/backends/available',
backendsInstalled: '/backends',
version: '/version',

View File

@@ -78,6 +78,11 @@ func RegisterLocalAIRoutes(router *echo.Echo,
// Toggle model enable/disable endpoint
router.PUT("/models/toggle-state/:name/:action", localai.ToggleStateModelEndpoint(cl, ml, appConfig), adminMiddleware)
// Toggle model pinned status endpoint
router.PUT("/models/toggle-pinned/:name/:action", localai.TogglePinnedModelEndpoint(cl, appConfig, func() {
app.SyncPinnedModelsToWatchdog()
}), adminMiddleware)
// Reload models endpoint
router.POST("/models/reload", localai.ReloadModelsEndpoint(cl, appConfig), adminMiddleware)
}

View File

@@ -515,6 +515,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
Capabilities []string `json:"capabilities"`
Backend string `json:"backend"`
Disabled bool `json:"disabled"`
Pinned bool `json:"pinned"`
}
result := make([]modelCapability, 0, len(modelConfigs)+len(modelsWithoutConfig))
@@ -524,6 +525,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
Capabilities: cfg.KnownUsecaseStrings,
Backend: cfg.Backend,
Disabled: cfg.IsDisabled(),
Pinned: cfg.IsPinned(),
})
}
for _, name := range modelsWithoutConfig {

View File

@@ -45,6 +45,9 @@ type WatchDog struct {
// Eviction settings
forceEvictionWhenBusy bool // Force eviction even when models have active API calls (default: false for safety)
// Pinned models are excluded from idle, LRU, and memory-pressure eviction
pinnedModels map[string]bool
}
type ProcessManager interface {
@@ -78,6 +81,7 @@ func NewWatchDog(opts ...WatchDogOption) *WatchDog {
idleCheck: o.idleCheck,
lruLimit: o.lruLimit,
addressModelMap: make(map[string]string),
pinnedModels: make(map[string]bool),
stop: make(chan bool, 1),
done: make(chan bool, 1),
memoryReclaimerEnabled: o.memoryReclaimerEnabled,
@@ -123,6 +127,24 @@ func (wd *WatchDog) SetForceEvictionWhenBusy(force bool) {
wd.forceEvictionWhenBusy = force
}
// SetPinnedModels replaces the set of pinned model names.
// Pinned models are excluded from idle, LRU, and memory-pressure eviction.
func (wd *WatchDog) SetPinnedModels(models []string) {
wd.Lock()
defer wd.Unlock()
wd.pinnedModels = make(map[string]bool, len(models))
for _, m := range models {
wd.pinnedModels[m] = true
}
}
// IsModelPinned returns true if the given model name is pinned
func (wd *WatchDog) IsModelPinned(modelName string) bool {
wd.Lock()
defer wd.Unlock()
return wd.pinnedModels[modelName]
}
func (wd *WatchDog) Shutdown() {
wd.Lock()
defer wd.Unlock()
@@ -310,6 +332,11 @@ func (wd *WatchDog) EnforceLRULimit(pendingLoads int) EnforceLRULimitResult {
skippedBusyCount := 0
for i := 0; evictedCount < modelsToEvict && i < len(models); i++ {
m := models[i]
// Skip pinned models
if wd.pinnedModels[m.model] {
xlog.Debug("[WatchDog] Skipping LRU eviction for pinned model", "model", m.model)
continue
}
// Check if model is busy
_, isBusy := wd.busyTime[m.address]
if isBusy && !forceEvictionWhenBusy {
@@ -389,9 +416,13 @@ func (wd *WatchDog) checkIdle() {
for address, t := range wd.idleTime {
xlog.Debug("[WatchDog] idle connection", "address", address)
if time.Since(t) > wd.idletimeout {
xlog.Warn("[WatchDog] Address is idle for too long, killing it", "address", address)
model, ok := wd.addressModelMap[address]
if ok {
if wd.pinnedModels[model] {
xlog.Debug("[WatchDog] Skipping idle eviction for pinned model", "model", model)
continue
}
xlog.Warn("[WatchDog] Address is idle for too long, killing it", "address", address)
modelsToShutdown = append(modelsToShutdown, model)
} else {
xlog.Warn("[WatchDog] Address unresolvable", "address", address)
@@ -514,10 +545,14 @@ func (wd *WatchDog) evictLRUModel() {
return a.lastUsed.Compare(b.lastUsed)
})
// Find the first non-busy model (or first model if forceEvictionWhenBusy is true)
// Find the first non-busy, non-pinned model (or first non-pinned model if forceEvictionWhenBusy is true)
var lruModel *modelUsageInfo
for i := range len(models) {
m := models[i]
if wd.pinnedModels[m.model] {
xlog.Debug("[WatchDog] Skipping memory reclaimer eviction for pinned model", "model", m.model)
continue
}
_, isBusy := wd.busyTime[m.address]
if isBusy && !forceEvictionWhenBusy {
// Skip busy models when forceEvictionWhenBusy is false

View File

@@ -558,6 +558,114 @@ var _ = Describe("WatchDog", func() {
})
})
Context("Pinned Models", func() {
It("should set and get pinned models", func() {
wd = model.NewWatchDog(
model.WithProcessManager(pm),
)
Expect(wd.IsModelPinned("model1")).To(BeFalse())
wd.SetPinnedModels([]string{"model1", "model2"})
Expect(wd.IsModelPinned("model1")).To(BeTrue())
Expect(wd.IsModelPinned("model2")).To(BeTrue())
Expect(wd.IsModelPinned("model3")).To(BeFalse())
})
It("should replace pinned models on subsequent calls", func() {
wd = model.NewWatchDog(
model.WithProcessManager(pm),
)
wd.SetPinnedModels([]string{"model1"})
Expect(wd.IsModelPinned("model1")).To(BeTrue())
wd.SetPinnedModels([]string{"model2"})
Expect(wd.IsModelPinned("model1")).To(BeFalse())
Expect(wd.IsModelPinned("model2")).To(BeTrue())
})
It("should skip pinned models during LRU eviction", func() {
wd = model.NewWatchDog(
model.WithProcessManager(pm),
model.WithLRULimit(2),
model.WithForceEvictionWhenBusy(true),
)
// Add two models, pin the older one
wd.AddAddressModelMap("addr1", "model1")
wd.Mark("addr1")
wd.UnMark("addr1")
time.Sleep(10 * time.Millisecond)
wd.AddAddressModelMap("addr2", "model2")
wd.Mark("addr2")
wd.UnMark("addr2")
wd.SetPinnedModels([]string{"model1"})
// Enforce LRU - model1 is oldest but pinned, model2 should be evicted
result := wd.EnforceLRULimit(0)
Expect(result.EvictedCount).To(Equal(1))
Expect(pm.getShutdownCalls()).To(ContainElement("model2"))
Expect(pm.getShutdownCalls()).ToNot(ContainElement("model1"))
})
It("should not evict any model when all are pinned and LRU limit reached", func() {
wd = model.NewWatchDog(
model.WithProcessManager(pm),
model.WithLRULimit(1),
model.WithForceEvictionWhenBusy(true),
)
wd.AddAddressModelMap("addr1", "model1")
wd.Mark("addr1")
wd.UnMark("addr1")
time.Sleep(10 * time.Millisecond)
wd.AddAddressModelMap("addr2", "model2")
wd.Mark("addr2")
wd.UnMark("addr2")
wd.SetPinnedModels([]string{"model1", "model2"})
result := wd.EnforceLRULimit(0)
Expect(result.EvictedCount).To(Equal(0))
Expect(pm.getShutdownCalls()).To(BeEmpty())
})
It("should skip pinned models during idle check", func() {
wd = model.NewWatchDog(
model.WithProcessManager(pm),
model.WithIdleTimeout(10*time.Millisecond),
model.WithIdleCheck(true),
model.WithWatchdogInterval(50*time.Millisecond),
)
// Add two models and make them idle
wd.AddAddressModelMap("addr1", "model1")
wd.Mark("addr1")
wd.UnMark("addr1")
wd.AddAddressModelMap("addr2", "model2")
wd.Mark("addr2")
wd.UnMark("addr2")
// Pin model1
wd.SetPinnedModels([]string{"model1"})
// Start watchdog and wait for idle check
go wd.Run()
defer wd.Shutdown()
// Wait for the idle timeout + watchdog interval to pass
time.Sleep(200 * time.Millisecond)
// Only model2 should be shut down
shutdowns := pm.getShutdownCalls()
Expect(shutdowns).To(ContainElement("model2"))
Expect(shutdowns).ToNot(ContainElement("model1"))
})
})
Context("Functional Options", func() {
It("should use default options when none provided", func() {
wd = model.NewWatchDog(