Files
LocalAI/core/http/endpoints/localai/settings.go
Richard Palethorpe 6a80e23733 feat(middleware): Model routing, PII filtering, Cloud model proxies (#9802)
Add a routing middleware stack and a cloud-proxy backend.

* cloud-proxy: a Go gRPC backend that forwards OpenAI- and
  Anthropic-shaped chat requests to upstream providers, with an
  optional translate mode (OpenAI request -> Anthropic /v1/messages
  -> OpenAI response) and full tool-calling support.

* routing: admission control, content-aware model routing
  (embedding cache + classifier + rerank + Arch-Router score),
  PII detection/redaction (regex + NER) with streaming filter and
  OpenAI/Anthropic adapters, and a per-user/per-key billing recorder
  backed by GORM or in-memory storage.

* middleware: UsageMiddleware records usage via the billing recorder,
  plus admission, route-model, usage-stamp and trace middlewares.

* observability: BackendTrace ring buffer stores full request bodies
  (capped), MITM proxy emits structured trace events, and router
  classifier decisions surface at /api/router/decide.

* gallery: Arch-Router-1.5B (Q4_K_M and Q8_0).

* UI: cloud-proxy model-editor fields, classifier system-prompt and
  score-normalization config, and a Traces page rendering request
  bodies.

Assisted-by: claude-code:claude-opus-4-7 [Read] [Edit] [Bash]

Signed-off-by: Richard Palethorpe <io@richiejp.com>
2026-05-25 09:28:27 +02:00

294 lines
10 KiB
Go

package localai
import (
"encoding/json"
"io"
"net/http"
"os"
"path/filepath"
"time"
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/application"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/endpoints/openresponses"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/xlog"
)
// GetSettingsEndpoint returns current settings with precedence (env > file > defaults)
func GetSettingsEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
appConfig := app.ApplicationConfig()
settings := appConfig.ToRuntimeSettings()
return c.JSON(http.StatusOK, settings)
}
}
// UpdateSettingsEndpoint updates settings, saves to file, and applies immediately
func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
appConfig := app.ApplicationConfig()
startupConfig := app.StartupConfig()
if startupConfig == nil {
startupConfig = appConfig
}
body, err := io.ReadAll(c.Request().Body)
if err != nil {
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
Success: false,
Error: "Failed to read request body: " + err.Error(),
})
}
var settings config.RuntimeSettings
if err := json.Unmarshal(body, &settings); err != nil {
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
Success: false,
Error: "Failed to parse JSON: " + err.Error(),
})
}
// Validate timeouts if provided
if settings.WatchdogIdleTimeout != nil {
if _, err := time.ParseDuration(*settings.WatchdogIdleTimeout); err != nil {
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
Success: false,
Error: "Invalid watchdog_idle_timeout format: " + err.Error(),
})
}
}
if settings.WatchdogBusyTimeout != nil {
if _, err := time.ParseDuration(*settings.WatchdogBusyTimeout); err != nil {
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
Success: false,
Error: "Invalid watchdog_busy_timeout format: " + err.Error(),
})
}
}
if settings.WatchdogInterval != nil {
if _, err := time.ParseDuration(*settings.WatchdogInterval); err != nil {
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
Success: false,
Error: "Invalid watchdog_interval format: " + err.Error(),
})
}
}
if settings.LRUEvictionRetryInterval != nil {
if _, err := time.ParseDuration(*settings.LRUEvictionRetryInterval); err != nil {
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
Success: false,
Error: "Invalid lru_eviction_retry_interval format: " + err.Error(),
})
}
}
if settings.OpenResponsesStoreTTL != nil {
if *settings.OpenResponsesStoreTTL != "0" && *settings.OpenResponsesStoreTTL != "" {
if _, err := time.ParseDuration(*settings.OpenResponsesStoreTTL); err != nil {
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
Success: false,
Error: "Invalid open_responses_store_ttl format: " + err.Error(),
})
}
}
}
// Generate P2P token before saving so the real token is persisted (not "0")
if settings.P2PToken != nil && *settings.P2PToken == "0" {
token := p2p.GenerateToken(60, 60)
settings.P2PToken = &token
}
// Save to file
if appConfig.DynamicConfigsDir == "" {
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
Success: false,
Error: "DynamicConfigsDir is not set",
})
}
// Branding asset filenames are owned exclusively by
// /api/branding/asset/{kind} (upload/delete). The Settings page also
// round-trips them via GET /api/settings, but its local state is stale
// once an asset has been uploaded — clicking Save would otherwise
// clobber the uploaded basename with the empty string the UI loaded
// at page open. Replace whatever the body sent for these three fields
// with the values currently on disk so /api/settings can never
// regress them.
if existing, err := appConfig.ReadPersistedSettings(); err == nil {
settings.LogoFile = existing.LogoFile
settings.LogoHorizontalFile = existing.LogoHorizontalFile
settings.FaviconFile = existing.FaviconFile
}
// The UI reads ApiKeys from GET /api/settings, which already returns the
// merged env+runtime list. When the user clicks Save, the same merged
// list comes back in the POST body. Strip the env-supplied keys from
// the incoming list before we persist or re-merge, otherwise each save
// duplicates the env keys on top of the previous merge (#9071).
if settings.ApiKeys != nil {
envKeys := startupConfig.ApiKeys
envSet := make(map[string]struct{}, len(envKeys))
for _, k := range envKeys {
envSet[k] = struct{}{}
}
runtimeOnly := make([]string, 0, len(*settings.ApiKeys))
for _, k := range *settings.ApiKeys {
if _, fromEnv := envSet[k]; fromEnv {
continue
}
runtimeOnly = append(runtimeOnly, k)
}
settings.ApiKeys = &runtimeOnly
}
settingsFile := filepath.Join(appConfig.DynamicConfigsDir, "runtime_settings.json")
settingsJSON, err := json.MarshalIndent(settings, "", " ")
if err != nil {
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
Success: false,
Error: "Failed to marshal settings: " + err.Error(),
})
}
if err := os.WriteFile(settingsFile, settingsJSON, 0600); err != nil {
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
Success: false,
Error: "Failed to write settings file: " + err.Error(),
})
}
// Apply settings using centralized method
watchdogChanged := appConfig.ApplyRuntimeSettings(&settings)
// Handle API keys specially (merge with startup keys)
if settings.ApiKeys != nil {
envKeys := startupConfig.ApiKeys
runtimeKeys := *settings.ApiKeys
appConfig.ApiKeys = append(envKeys, runtimeKeys...)
}
// Update backend logging dynamically
if settings.EnableBackendLogging != nil {
app.ModelLoader().SetBackendLoggingEnabled(*settings.EnableBackendLogging)
xlog.Info("Updated backend logging setting", "enableBackendLogging", *settings.EnableBackendLogging)
}
// Update watchdog dynamically for settings that don't require restart
if settings.ForceEvictionWhenBusy != nil {
currentWD := app.ModelLoader().GetWatchDog()
if currentWD != nil {
currentWD.SetForceEvictionWhenBusy(*settings.ForceEvictionWhenBusy)
xlog.Info("Updated watchdog force eviction when busy setting", "forceEvictionWhenBusy", *settings.ForceEvictionWhenBusy)
}
}
// Update ModelLoader LRU eviction retry settings dynamically
maxRetries := appConfig.LRUEvictionMaxRetries
retryInterval := appConfig.LRUEvictionRetryInterval
if settings.LRUEvictionMaxRetries != nil {
maxRetries = *settings.LRUEvictionMaxRetries
}
if settings.LRUEvictionRetryInterval != nil {
if dur, err := time.ParseDuration(*settings.LRUEvictionRetryInterval); err == nil {
retryInterval = dur
}
}
if settings.LRUEvictionMaxRetries != nil || settings.LRUEvictionRetryInterval != nil {
app.ModelLoader().SetLRUEvictionRetrySettings(maxRetries, retryInterval)
xlog.Info("Updated LRU eviction retry settings", "maxRetries", maxRetries, "retryInterval", retryInterval)
}
// Update Open Responses store TTL dynamically
if settings.OpenResponsesStoreTTL != nil {
ttl := time.Duration(0)
if *settings.OpenResponsesStoreTTL != "0" && *settings.OpenResponsesStoreTTL != "" {
if dur, err := time.ParseDuration(*settings.OpenResponsesStoreTTL); err == nil {
ttl = dur
} else {
xlog.Warn("Invalid Open Responses store TTL format", "ttl", *settings.OpenResponsesStoreTTL, "error", err)
}
}
// Import the store package
store := openresponses.GetGlobalStore()
store.SetTTL(ttl)
xlog.Info("Updated Open Responses store TTL", "ttl", ttl)
}
// Check if agent job retention changed
agentJobChanged := settings.AgentJobRetentionDays != nil
// Restart watchdog if settings changed
if watchdogChanged {
if settings.WatchdogEnabled != nil && !*settings.WatchdogEnabled {
if err := app.StopWatchdog(); err != nil {
xlog.Error("Failed to stop watchdog", "error", err)
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
Success: false,
Error: "Settings saved but failed to stop watchdog: " + err.Error(),
})
}
} else {
if err := app.RestartWatchdog(); err != nil {
xlog.Error("Failed to restart watchdog", "error", err)
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
Success: false,
Error: "Settings saved but failed to restart watchdog: " + err.Error(),
})
}
}
}
// Restart agent job service if retention days changed
if agentJobChanged {
if err := app.RestartAgentJobService(); err != nil {
xlog.Error("Failed to restart agent job service", "error", err)
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
Success: false,
Error: "Settings saved but failed to restart agent job service: " + err.Error(),
})
}
}
if settings.MITMListen != nil {
if err := app.RestartMITM(); err != nil {
xlog.Error("Failed to restart MITM proxy", "error", err)
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
Success: false,
Error: "Settings saved but failed to restart MITM proxy: " + err.Error(),
})
}
}
// Restart P2P if P2P settings changed
p2pChanged := settings.P2PToken != nil || settings.P2PNetworkID != nil || settings.Federated != nil
if p2pChanged {
if settings.P2PToken != nil && *settings.P2PToken == "" {
if err := app.StopP2P(); err != nil {
xlog.Error("Failed to stop P2P", "error", err)
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
Success: false,
Error: "Settings saved but failed to stop P2P: " + err.Error(),
})
}
} else {
if err := app.RestartP2P(); err != nil {
xlog.Error("Failed to restart P2P", "error", err)
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
Success: false,
Error: "Settings saved but failed to restart P2P: " + err.Error(),
})
}
}
}
return c.JSON(http.StatusOK, schema.SettingsResponse{
Success: true,
Message: "Settings updated successfully",
})
}
}