mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-01 12:42:55 -04:00
Add a routing middleware stack and a cloud-proxy backend. * cloud-proxy: a Go gRPC backend that forwards OpenAI- and Anthropic-shaped chat requests to upstream providers, with an optional translate mode (OpenAI request -> Anthropic /v1/messages -> OpenAI response) and full tool-calling support. * routing: admission control, content-aware model routing (embedding cache + classifier + rerank + Arch-Router score), PII detection/redaction (regex + NER) with streaming filter and OpenAI/Anthropic adapters, and a per-user/per-key billing recorder backed by GORM or in-memory storage. * middleware: UsageMiddleware records usage via the billing recorder, plus admission, route-model, usage-stamp and trace middlewares. * observability: BackendTrace ring buffer stores full request bodies (capped), MITM proxy emits structured trace events, and router classifier decisions surface at /api/router/decide. * gallery: Arch-Router-1.5B (Q4_K_M and Q8_0). * UI: cloud-proxy model-editor fields, classifier system-prompt and score-normalization config, and a Traces page rendering request bodies. Assisted-by: claude-code:claude-opus-4-7 [Read] [Edit] [Bash] Signed-off-by: Richard Palethorpe <io@richiejp.com>
294 lines
10 KiB
Go
294 lines
10 KiB
Go
package localai
|
|
|
|
import (
|
|
"encoding/json"
|
|
"io"
|
|
"net/http"
|
|
"os"
|
|
"path/filepath"
|
|
"time"
|
|
|
|
"github.com/labstack/echo/v4"
|
|
"github.com/mudler/LocalAI/core/application"
|
|
"github.com/mudler/LocalAI/core/config"
|
|
"github.com/mudler/LocalAI/core/http/endpoints/openresponses"
|
|
"github.com/mudler/LocalAI/core/p2p"
|
|
"github.com/mudler/LocalAI/core/schema"
|
|
"github.com/mudler/xlog"
|
|
)
|
|
|
|
// GetSettingsEndpoint returns current settings with precedence (env > file > defaults)
|
|
func GetSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
|
return func(c echo.Context) error {
|
|
appConfig := app.ApplicationConfig()
|
|
settings := appConfig.ToRuntimeSettings()
|
|
return c.JSON(http.StatusOK, settings)
|
|
}
|
|
}
|
|
|
|
// UpdateSettingsEndpoint updates settings, saves to file, and applies immediately
|
|
func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
|
return func(c echo.Context) error {
|
|
appConfig := app.ApplicationConfig()
|
|
startupConfig := app.StartupConfig()
|
|
|
|
if startupConfig == nil {
|
|
startupConfig = appConfig
|
|
}
|
|
|
|
body, err := io.ReadAll(c.Request().Body)
|
|
if err != nil {
|
|
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
|
|
Success: false,
|
|
Error: "Failed to read request body: " + err.Error(),
|
|
})
|
|
}
|
|
|
|
var settings config.RuntimeSettings
|
|
if err := json.Unmarshal(body, &settings); err != nil {
|
|
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
|
|
Success: false,
|
|
Error: "Failed to parse JSON: " + err.Error(),
|
|
})
|
|
}
|
|
|
|
// Validate timeouts if provided
|
|
if settings.WatchdogIdleTimeout != nil {
|
|
if _, err := time.ParseDuration(*settings.WatchdogIdleTimeout); err != nil {
|
|
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
|
|
Success: false,
|
|
Error: "Invalid watchdog_idle_timeout format: " + err.Error(),
|
|
})
|
|
}
|
|
}
|
|
if settings.WatchdogBusyTimeout != nil {
|
|
if _, err := time.ParseDuration(*settings.WatchdogBusyTimeout); err != nil {
|
|
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
|
|
Success: false,
|
|
Error: "Invalid watchdog_busy_timeout format: " + err.Error(),
|
|
})
|
|
}
|
|
}
|
|
if settings.WatchdogInterval != nil {
|
|
if _, err := time.ParseDuration(*settings.WatchdogInterval); err != nil {
|
|
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
|
|
Success: false,
|
|
Error: "Invalid watchdog_interval format: " + err.Error(),
|
|
})
|
|
}
|
|
}
|
|
if settings.LRUEvictionRetryInterval != nil {
|
|
if _, err := time.ParseDuration(*settings.LRUEvictionRetryInterval); err != nil {
|
|
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
|
|
Success: false,
|
|
Error: "Invalid lru_eviction_retry_interval format: " + err.Error(),
|
|
})
|
|
}
|
|
}
|
|
if settings.OpenResponsesStoreTTL != nil {
|
|
if *settings.OpenResponsesStoreTTL != "0" && *settings.OpenResponsesStoreTTL != "" {
|
|
if _, err := time.ParseDuration(*settings.OpenResponsesStoreTTL); err != nil {
|
|
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
|
|
Success: false,
|
|
Error: "Invalid open_responses_store_ttl format: " + err.Error(),
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
// Generate P2P token before saving so the real token is persisted (not "0")
|
|
if settings.P2PToken != nil && *settings.P2PToken == "0" {
|
|
token := p2p.GenerateToken(60, 60)
|
|
settings.P2PToken = &token
|
|
}
|
|
|
|
// Save to file
|
|
if appConfig.DynamicConfigsDir == "" {
|
|
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
|
|
Success: false,
|
|
Error: "DynamicConfigsDir is not set",
|
|
})
|
|
}
|
|
|
|
// Branding asset filenames are owned exclusively by
|
|
// /api/branding/asset/{kind} (upload/delete). The Settings page also
|
|
// round-trips them via GET /api/settings, but its local state is stale
|
|
// once an asset has been uploaded — clicking Save would otherwise
|
|
// clobber the uploaded basename with the empty string the UI loaded
|
|
// at page open. Replace whatever the body sent for these three fields
|
|
// with the values currently on disk so /api/settings can never
|
|
// regress them.
|
|
if existing, err := appConfig.ReadPersistedSettings(); err == nil {
|
|
settings.LogoFile = existing.LogoFile
|
|
settings.LogoHorizontalFile = existing.LogoHorizontalFile
|
|
settings.FaviconFile = existing.FaviconFile
|
|
}
|
|
|
|
// The UI reads ApiKeys from GET /api/settings, which already returns the
|
|
// merged env+runtime list. When the user clicks Save, the same merged
|
|
// list comes back in the POST body. Strip the env-supplied keys from
|
|
// the incoming list before we persist or re-merge, otherwise each save
|
|
// duplicates the env keys on top of the previous merge (#9071).
|
|
if settings.ApiKeys != nil {
|
|
envKeys := startupConfig.ApiKeys
|
|
envSet := make(map[string]struct{}, len(envKeys))
|
|
for _, k := range envKeys {
|
|
envSet[k] = struct{}{}
|
|
}
|
|
runtimeOnly := make([]string, 0, len(*settings.ApiKeys))
|
|
for _, k := range *settings.ApiKeys {
|
|
if _, fromEnv := envSet[k]; fromEnv {
|
|
continue
|
|
}
|
|
runtimeOnly = append(runtimeOnly, k)
|
|
}
|
|
settings.ApiKeys = &runtimeOnly
|
|
}
|
|
|
|
settingsFile := filepath.Join(appConfig.DynamicConfigsDir, "runtime_settings.json")
|
|
settingsJSON, err := json.MarshalIndent(settings, "", " ")
|
|
if err != nil {
|
|
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
|
Success: false,
|
|
Error: "Failed to marshal settings: " + err.Error(),
|
|
})
|
|
}
|
|
|
|
if err := os.WriteFile(settingsFile, settingsJSON, 0600); err != nil {
|
|
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
|
Success: false,
|
|
Error: "Failed to write settings file: " + err.Error(),
|
|
})
|
|
}
|
|
|
|
// Apply settings using centralized method
|
|
watchdogChanged := appConfig.ApplyRuntimeSettings(&settings)
|
|
|
|
// Handle API keys specially (merge with startup keys)
|
|
if settings.ApiKeys != nil {
|
|
envKeys := startupConfig.ApiKeys
|
|
runtimeKeys := *settings.ApiKeys
|
|
appConfig.ApiKeys = append(envKeys, runtimeKeys...)
|
|
}
|
|
|
|
// Update backend logging dynamically
|
|
if settings.EnableBackendLogging != nil {
|
|
app.ModelLoader().SetBackendLoggingEnabled(*settings.EnableBackendLogging)
|
|
xlog.Info("Updated backend logging setting", "enableBackendLogging", *settings.EnableBackendLogging)
|
|
}
|
|
|
|
// Update watchdog dynamically for settings that don't require restart
|
|
if settings.ForceEvictionWhenBusy != nil {
|
|
currentWD := app.ModelLoader().GetWatchDog()
|
|
if currentWD != nil {
|
|
currentWD.SetForceEvictionWhenBusy(*settings.ForceEvictionWhenBusy)
|
|
xlog.Info("Updated watchdog force eviction when busy setting", "forceEvictionWhenBusy", *settings.ForceEvictionWhenBusy)
|
|
}
|
|
}
|
|
|
|
// Update ModelLoader LRU eviction retry settings dynamically
|
|
maxRetries := appConfig.LRUEvictionMaxRetries
|
|
retryInterval := appConfig.LRUEvictionRetryInterval
|
|
if settings.LRUEvictionMaxRetries != nil {
|
|
maxRetries = *settings.LRUEvictionMaxRetries
|
|
}
|
|
if settings.LRUEvictionRetryInterval != nil {
|
|
if dur, err := time.ParseDuration(*settings.LRUEvictionRetryInterval); err == nil {
|
|
retryInterval = dur
|
|
}
|
|
}
|
|
if settings.LRUEvictionMaxRetries != nil || settings.LRUEvictionRetryInterval != nil {
|
|
app.ModelLoader().SetLRUEvictionRetrySettings(maxRetries, retryInterval)
|
|
xlog.Info("Updated LRU eviction retry settings", "maxRetries", maxRetries, "retryInterval", retryInterval)
|
|
}
|
|
|
|
// Update Open Responses store TTL dynamically
|
|
if settings.OpenResponsesStoreTTL != nil {
|
|
ttl := time.Duration(0)
|
|
if *settings.OpenResponsesStoreTTL != "0" && *settings.OpenResponsesStoreTTL != "" {
|
|
if dur, err := time.ParseDuration(*settings.OpenResponsesStoreTTL); err == nil {
|
|
ttl = dur
|
|
} else {
|
|
xlog.Warn("Invalid Open Responses store TTL format", "ttl", *settings.OpenResponsesStoreTTL, "error", err)
|
|
}
|
|
}
|
|
// Import the store package
|
|
store := openresponses.GetGlobalStore()
|
|
store.SetTTL(ttl)
|
|
xlog.Info("Updated Open Responses store TTL", "ttl", ttl)
|
|
}
|
|
|
|
// Check if agent job retention changed
|
|
agentJobChanged := settings.AgentJobRetentionDays != nil
|
|
|
|
// Restart watchdog if settings changed
|
|
if watchdogChanged {
|
|
if settings.WatchdogEnabled != nil && !*settings.WatchdogEnabled {
|
|
if err := app.StopWatchdog(); err != nil {
|
|
xlog.Error("Failed to stop watchdog", "error", err)
|
|
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
|
Success: false,
|
|
Error: "Settings saved but failed to stop watchdog: " + err.Error(),
|
|
})
|
|
}
|
|
} else {
|
|
if err := app.RestartWatchdog(); err != nil {
|
|
xlog.Error("Failed to restart watchdog", "error", err)
|
|
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
|
Success: false,
|
|
Error: "Settings saved but failed to restart watchdog: " + err.Error(),
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
// Restart agent job service if retention days changed
|
|
if agentJobChanged {
|
|
if err := app.RestartAgentJobService(); err != nil {
|
|
xlog.Error("Failed to restart agent job service", "error", err)
|
|
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
|
Success: false,
|
|
Error: "Settings saved but failed to restart agent job service: " + err.Error(),
|
|
})
|
|
}
|
|
}
|
|
|
|
if settings.MITMListen != nil {
|
|
if err := app.RestartMITM(); err != nil {
|
|
xlog.Error("Failed to restart MITM proxy", "error", err)
|
|
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
|
Success: false,
|
|
Error: "Settings saved but failed to restart MITM proxy: " + err.Error(),
|
|
})
|
|
}
|
|
}
|
|
|
|
// Restart P2P if P2P settings changed
|
|
p2pChanged := settings.P2PToken != nil || settings.P2PNetworkID != nil || settings.Federated != nil
|
|
if p2pChanged {
|
|
if settings.P2PToken != nil && *settings.P2PToken == "" {
|
|
if err := app.StopP2P(); err != nil {
|
|
xlog.Error("Failed to stop P2P", "error", err)
|
|
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
|
Success: false,
|
|
Error: "Settings saved but failed to stop P2P: " + err.Error(),
|
|
})
|
|
}
|
|
} else {
|
|
if err := app.RestartP2P(); err != nil {
|
|
xlog.Error("Failed to restart P2P", "error", err)
|
|
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
|
Success: false,
|
|
Error: "Settings saved but failed to restart P2P: " + err.Error(),
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
return c.JSON(http.StatusOK, schema.SettingsResponse{
|
|
Success: true,
|
|
Message: "Settings updated successfully",
|
|
})
|
|
}
|
|
}
|