mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-31 12:07:45 -04:00
* fix(traces): cap backend trace Data field so the admin UI stays responsive The previous fix (#9946) capped API trace bodies but missed backend traces, which carry the same blast radius: - LLM backend traces store the full chat messages JSON, full response, and full streaming deltas. Every agent-pool reasoning step ships the full RAG-augmented history (50-500 KiB per trace, often 100+ traces queued). - TTS / audio_transform / transcript traces embed a 30s audio snippet as base64, around 1.3 MiB per trace. Both blow the /api/backend-traces JSON past tens of MiB. The admin Traces page then keeps re-downloading and re-parsing the buffer faster than the 5s auto-refresh and stays in the loading state forever, the same symptom the API-side fix addressed. Apply two complementary caps, both honoring LOCALAI_TRACING_MAX_BODY_BYTES: Option A (safety net in core/trace): RecordBackendTrace walks the Data map recursively and replaces any string value larger than the cap with "<truncated: N bytes>". Catches anything a future producer forgets. Option B (head-preserving at the producer): - core/backend/llm.go: TruncateToBytes on messages, response, and chat_deltas content/reasoning_content so the leading content stays readable in the UI. - core/trace/audio_snippet.go: omit audio_wav_base64 when the encoded blob would exceed the cap (truncated base64 is undecodable). The quality metrics still ship and the UI's WaveformPlayer simply skips when the field is absent. TruncateToBytes is bounded to <= maxBytes so Option A leaves the producer's head-preserving output alone instead of replacing it with the bare marker. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-7 * fix(react-ui): expose tracing_max_body_bytes in Settings and Traces panels The setting was already plumbed through env (LOCALAI_TRACING_MAX_BODY_BYTES), CLI flag, and the runtime_settings.json GET/PUT schema, but neither the main Settings page nor the inline Traces panel offered an input for it. Admins hitting the "Traces UI stuck loading" symptom had to know to set an env var or PUT raw JSON to /api/settings to dial the cap. Add a "Max Body Bytes" row next to "Max Items" in both places. Same input type, same disabled-when-tracing-off semantics, placeholder shows the 65536 default so users see what they're inheriting. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-7 * test(react-ui): disambiguate Max Items locator after adding Max Body Bytes The Tracing settings panel now has two number inputs. The previous spec matched 'input[type="number"]' which became ambiguous and triggered a Playwright strict-mode violation in CI. Switch to getByPlaceholder('100') for Max Items and add a parallel spec for the new Max Body Bytes field using getByPlaceholder('65536'). Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-7 --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
205 lines
6.1 KiB
Go
205 lines
6.1 KiB
Go
package trace
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"slices"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/emirpasic/gods/v2/queues/circularbuffer"
|
|
"github.com/mudler/LocalAI/core/schema"
|
|
"github.com/mudler/xlog"
|
|
)
|
|
|
|
type BackendTraceType string
|
|
|
|
const (
|
|
BackendTraceLLM BackendTraceType = "llm"
|
|
BackendTraceEmbedding BackendTraceType = "embedding"
|
|
BackendTraceTranscription BackendTraceType = "transcription"
|
|
BackendTraceImageGeneration BackendTraceType = "image_generation"
|
|
BackendTraceVideoGeneration BackendTraceType = "video_generation"
|
|
BackendTraceTTS BackendTraceType = "tts"
|
|
BackendTraceSoundGeneration BackendTraceType = "sound_generation"
|
|
BackendTraceRerank BackendTraceType = "rerank"
|
|
BackendTraceTokenize BackendTraceType = "tokenize"
|
|
BackendTraceDetection BackendTraceType = "detection"
|
|
BackendTraceFaceVerify BackendTraceType = "face_verify"
|
|
BackendTraceFaceAnalyze BackendTraceType = "face_analyze"
|
|
BackendTraceVoiceVerify BackendTraceType = "voice_verify"
|
|
BackendTraceVoiceAnalyze BackendTraceType = "voice_analyze"
|
|
BackendTraceVoiceEmbed BackendTraceType = "voice_embed"
|
|
BackendTraceAudioTransform BackendTraceType = "audio_transform"
|
|
BackendTraceModelLoad BackendTraceType = "model_load"
|
|
)
|
|
|
|
type BackendTrace struct {
|
|
Timestamp time.Time `json:"timestamp"`
|
|
Duration time.Duration `json:"duration"`
|
|
Type BackendTraceType `json:"type"`
|
|
ModelName string `json:"model_name"`
|
|
Backend string `json:"backend"`
|
|
Summary string `json:"summary"`
|
|
Error string `json:"error,omitempty"`
|
|
Data map[string]any `json:"data"`
|
|
}
|
|
|
|
var backendTraceBuffer *circularbuffer.Queue[*BackendTrace]
|
|
var backendMu sync.Mutex
|
|
var backendLogChan = make(chan *BackendTrace, 100)
|
|
var backendInitOnce sync.Once
|
|
|
|
// backendMaxBodyBytes caps each captured string value in a BackendTrace.Data
|
|
// field to keep the /api/backend-traces JSON small enough for the admin UI to
|
|
// load on every 5s auto-refresh. Mirrors the API-trace body cap added in
|
|
// commit 61bf34ea: without it a chatty LLM workload (full message history per
|
|
// trace) or any TTS run (~1.3 MiB of audio_wav_base64 per trace) blows the
|
|
// payload past tens of MiB and locks the Traces page in a loading state.
|
|
//
|
|
// 0 disables the cap. Set on the first InitBackendTracingIfEnabled call only,
|
|
// matching the sync.Once-guarded maxItems semantics.
|
|
var backendMaxBodyBytes int
|
|
|
|
func InitBackendTracingIfEnabled(maxItems, maxBodyBytes int) {
|
|
backendInitOnce.Do(func() {
|
|
if maxItems <= 0 {
|
|
maxItems = 100
|
|
}
|
|
backendMu.Lock()
|
|
backendTraceBuffer = circularbuffer.New[*BackendTrace](maxItems)
|
|
backendMaxBodyBytes = maxBodyBytes
|
|
backendMu.Unlock()
|
|
|
|
go func() {
|
|
for t := range backendLogChan {
|
|
backendMu.Lock()
|
|
if backendTraceBuffer != nil {
|
|
backendTraceBuffer.Enqueue(t)
|
|
}
|
|
backendMu.Unlock()
|
|
}
|
|
}()
|
|
})
|
|
}
|
|
|
|
func RecordBackendTrace(t BackendTrace) {
|
|
if t.Data != nil && backendMaxBodyBytes > 0 {
|
|
t.Data = capDataStrings(t.Data, backendMaxBodyBytes)
|
|
}
|
|
select {
|
|
case backendLogChan <- &t:
|
|
default:
|
|
xlog.Warn("Backend trace channel full, dropping trace")
|
|
}
|
|
}
|
|
|
|
// capDataStrings walks a trace Data map and replaces any string value (at any
|
|
// depth) that exceeds maxBytes with a fixed-size marker that names the
|
|
// original byte count. The replacement is intentionally short and not valid
|
|
// base64/JSON: the goal is to flag "this was dropped" cheaply, not to keep a
|
|
// partial value that the UI might try to render. Non-string scalars and
|
|
// non-map containers pass through untouched so structural fields like
|
|
// total_deltas or audio_sample_rate remain useful.
|
|
func capDataStrings(data map[string]any, maxBytes int) map[string]any {
|
|
out := make(map[string]any, len(data))
|
|
for k, v := range data {
|
|
out[k] = capValue(v, maxBytes)
|
|
}
|
|
return out
|
|
}
|
|
|
|
func capValue(v any, maxBytes int) any {
|
|
switch val := v.(type) {
|
|
case string:
|
|
if len(val) > maxBytes {
|
|
return fmt.Sprintf("<truncated: %d bytes>", len(val))
|
|
}
|
|
return val
|
|
case map[string]any:
|
|
return capDataStrings(val, maxBytes)
|
|
default:
|
|
return v
|
|
}
|
|
}
|
|
|
|
func GetBackendTraces() []BackendTrace {
|
|
backendMu.Lock()
|
|
if backendTraceBuffer == nil {
|
|
backendMu.Unlock()
|
|
return []BackendTrace{}
|
|
}
|
|
ptrs := backendTraceBuffer.Values()
|
|
backendMu.Unlock()
|
|
|
|
traces := make([]BackendTrace, len(ptrs))
|
|
for i, p := range ptrs {
|
|
traces[i] = *p
|
|
}
|
|
|
|
slices.SortFunc(traces, func(a, b BackendTrace) int {
|
|
return b.Timestamp.Compare(a.Timestamp)
|
|
})
|
|
|
|
return traces
|
|
}
|
|
|
|
func ClearBackendTraces() {
|
|
backendMu.Lock()
|
|
if backendTraceBuffer != nil {
|
|
backendTraceBuffer.Clear()
|
|
}
|
|
backendMu.Unlock()
|
|
}
|
|
|
|
func GenerateLLMSummary(messages schema.Messages, prompt string) string {
|
|
if len(messages) > 0 {
|
|
last := messages[len(messages)-1]
|
|
text := ""
|
|
switch content := last.Content.(type) {
|
|
case string:
|
|
text = content
|
|
default:
|
|
b, err := json.Marshal(content)
|
|
if err == nil {
|
|
text = string(b)
|
|
}
|
|
}
|
|
if text != "" {
|
|
return TruncateString(text, 200)
|
|
}
|
|
}
|
|
if prompt != "" {
|
|
return TruncateString(prompt, 200)
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func TruncateString(s string, maxLen int) string {
|
|
if len(s) <= maxLen {
|
|
return s
|
|
}
|
|
return s[:maxLen] + "..."
|
|
}
|
|
|
|
// TruncateToBytes caps a string at exactly maxBytes, preserving the leading
|
|
// content and appending a marker so the UI knows the value was clipped.
|
|
// Unlike TruncateString it guarantees output <= maxBytes, which matters for
|
|
// fields that feed back into the trace pipeline: capDataStrings in
|
|
// RecordBackendTrace re-checks size and would otherwise replace a producer's
|
|
// head-preserving truncation with the bare marker, losing the prefix.
|
|
//
|
|
// maxBytes <= 0 disables the cap, matching backendMaxBodyBytes semantics.
|
|
func TruncateToBytes(s string, maxBytes int) string {
|
|
if maxBytes <= 0 || len(s) <= maxBytes {
|
|
return s
|
|
}
|
|
suffix := fmt.Sprintf("...[truncated, %d bytes]", len(s))
|
|
if len(suffix) >= maxBytes {
|
|
// Pathologically small caps can't fit the marker; fall back to a
|
|
// hard cut so the contract (output <= maxBytes) still holds.
|
|
return s[:maxBytes]
|
|
}
|
|
return s[:maxBytes-len(suffix)] + suffix
|
|
}
|