mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-24 08:38:02 -04:00
fix(traces): cap backend trace Data field so the admin UI stays responsive
The previous fix (#9946) capped API trace bodies but missed backend traces, which carry the same blast radius: - LLM backend traces store the full chat messages JSON, full response, and full streaming deltas. Every agent-pool reasoning step ships the full RAG-augmented history (50-500 KiB per trace, often 100+ traces queued). - TTS / audio_transform / transcript traces embed a 30s audio snippet as base64, around 1.3 MiB per trace. Both blow the /api/backend-traces JSON past tens of MiB. The admin Traces page then keeps re-downloading and re-parsing the buffer faster than the 5s auto-refresh and stays in the loading state forever, the same symptom the API-side fix addressed. Apply two complementary caps, both honoring LOCALAI_TRACING_MAX_BODY_BYTES: Option A (safety net in core/trace): RecordBackendTrace walks the Data map recursively and replaces any string value larger than the cap with "<truncated: N bytes>". Catches anything a future producer forgets. Option B (head-preserving at the producer): - core/backend/llm.go: TruncateToBytes on messages, response, and chat_deltas content/reasoning_content so the leading content stays readable in the UI. - core/trace/audio_snippet.go: omit audio_wav_base64 when the encoded blob would exceed the cap (truncated base64 is undecodable). The quality metrics still ship and the UI's WaveformPlayer simply skips when the field is absent. TruncateToBytes is bounded to <= maxBytes so Option A leaves the producer's head-preserving output alone instead of replacing it with the bare marker. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-7
This commit is contained in:
@@ -78,7 +78,7 @@ func ModelAudioTransform(
|
||||
|
||||
var startTime time.Time
|
||||
if appConfig.EnableTracing {
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes)
|
||||
startTime = time.Now()
|
||||
}
|
||||
|
||||
@@ -104,7 +104,7 @@ func ModelAudioTransform(
|
||||
data["sample_rate"] = res.SampleRate
|
||||
data["samples"] = res.Samples
|
||||
data["reference_provided"] = res.ReferenceProvided
|
||||
if snippet := trace.AudioSnippet(dst); snippet != nil {
|
||||
if snippet := trace.AudioSnippet(dst, appConfig.TracingMaxBodyBytes); snippet != nil {
|
||||
maps.Copy(data, snippet)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,7 +35,7 @@ func Detection(
|
||||
|
||||
var startTime time.Time
|
||||
if appConfig.EnableTracing {
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes)
|
||||
startTime = time.Now()
|
||||
}
|
||||
|
||||
|
||||
@@ -67,7 +67,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, modelConf
|
||||
}
|
||||
|
||||
if appConfig.EnableTracing {
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes)
|
||||
|
||||
traceData := map[string]any{
|
||||
"input_text": trace.TruncateString(s, 1000),
|
||||
|
||||
@@ -32,7 +32,7 @@ func FaceAnalyze(
|
||||
|
||||
var startTime time.Time
|
||||
if appConfig.EnableTracing {
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes)
|
||||
startTime = time.Now()
|
||||
}
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ func FaceVerify(
|
||||
|
||||
var startTime time.Time
|
||||
if appConfig.EnableTracing {
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes)
|
||||
startTime = time.Now()
|
||||
}
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ func ImageGeneration(height, width, step, seed int, positive_prompt, negative_pr
|
||||
}
|
||||
|
||||
if appConfig.EnableTracing {
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes)
|
||||
|
||||
traceData := map[string]any{
|
||||
"positive_prompt": positive_prompt,
|
||||
|
||||
@@ -305,7 +305,7 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
|
||||
}
|
||||
|
||||
if o.EnableTracing {
|
||||
trace.InitBackendTracingIfEnabled(o.TracingMaxItems)
|
||||
trace.InitBackendTracingIfEnabled(o.TracingMaxItems, o.TracingMaxBodyBytes)
|
||||
|
||||
traceData := map[string]any{
|
||||
"chat_template": c.TemplateConfig.Chat,
|
||||
@@ -316,9 +316,13 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
|
||||
"audios_count": len(audios),
|
||||
}
|
||||
|
||||
// Cap the captured fields up front: agent-pool LLM calls embed the
|
||||
// full augmented chat history in messages and the full reply in
|
||||
// response, so without a per-field cap a single trace can dwarf the
|
||||
// rest of the buffer. The cap matches the API-trace body cap.
|
||||
if len(messages) > 0 {
|
||||
if msgJSON, err := json.Marshal(messages); err == nil {
|
||||
traceData["messages"] = string(msgJSON)
|
||||
traceData["messages"] = trace.TruncateToBytes(string(msgJSON), o.TracingMaxBodyBytes)
|
||||
}
|
||||
}
|
||||
if reasoningJSON, err := json.Marshal(c.ReasoningConfig); err == nil {
|
||||
@@ -337,7 +341,7 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
|
||||
resp, err := originalFn()
|
||||
duration := time.Since(startTime)
|
||||
|
||||
traceData["response"] = resp.Response
|
||||
traceData["response"] = trace.TruncateToBytes(resp.Response, o.TracingMaxBodyBytes)
|
||||
traceData["token_usage"] = map[string]any{
|
||||
"prompt": resp.Usage.Prompt,
|
||||
"completion": resp.Usage.Completion,
|
||||
@@ -359,10 +363,10 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
|
||||
toolCallCount += len(d.ToolCalls)
|
||||
}
|
||||
if len(contentParts) > 0 {
|
||||
chatDeltasInfo["content"] = strings.Join(contentParts, "")
|
||||
chatDeltasInfo["content"] = trace.TruncateToBytes(strings.Join(contentParts, ""), o.TracingMaxBodyBytes)
|
||||
}
|
||||
if len(reasoningParts) > 0 {
|
||||
chatDeltasInfo["reasoning_content"] = strings.Join(reasoningParts, "")
|
||||
chatDeltasInfo["reasoning_content"] = trace.TruncateToBytes(strings.Join(reasoningParts, ""), o.TracingMaxBodyBytes)
|
||||
}
|
||||
if toolCallCount > 0 {
|
||||
chatDeltasInfo["tool_call_count"] = toolCallCount
|
||||
|
||||
@@ -21,7 +21,7 @@ func recordModelLoadFailure(appConfig *config.ApplicationConfig, modelName, back
|
||||
if !appConfig.EnableTracing {
|
||||
return
|
||||
}
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes)
|
||||
trace.RecordBackendTrace(trace.BackendTrace{
|
||||
Timestamp: time.Now(),
|
||||
Type: trace.BackendTraceModelLoad,
|
||||
|
||||
@@ -25,7 +25,7 @@ func Rerank(ctx context.Context, request *proto.RerankRequest, loader *model.Mod
|
||||
|
||||
var startTime time.Time
|
||||
if appConfig.EnableTracing {
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes)
|
||||
startTime = time.Now()
|
||||
}
|
||||
|
||||
|
||||
@@ -98,7 +98,7 @@ func SoundGeneration(
|
||||
|
||||
var startTime time.Time
|
||||
if appConfig.EnableTracing {
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes)
|
||||
startTime = time.Now()
|
||||
}
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ func ModelTokenize(s string, loader *model.ModelLoader, modelConfig config.Model
|
||||
|
||||
var startTime time.Time
|
||||
if appConfig.EnableTracing {
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes)
|
||||
startTime = time.Now()
|
||||
}
|
||||
|
||||
|
||||
@@ -76,10 +76,10 @@ func ModelTranscriptionWithOptions(ctx context.Context, req TranscriptionRequest
|
||||
var startTime time.Time
|
||||
var audioSnippet map[string]any
|
||||
if appConfig.EnableTracing {
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes)
|
||||
startTime = time.Now()
|
||||
// Capture audio before the backend call — the backend may delete the file.
|
||||
audioSnippet = trace.AudioSnippet(req.Audio)
|
||||
audioSnippet = trace.AudioSnippet(req.Audio, appConfig.TracingMaxBodyBytes)
|
||||
}
|
||||
|
||||
r, err := transcriptionModel.AudioTranscription(ctx, req.toProto(uint32(*modelConfig.Threads)))
|
||||
|
||||
@@ -67,7 +67,7 @@ func ModelTTS(
|
||||
|
||||
var startTime time.Time
|
||||
if appConfig.EnableTracing {
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes)
|
||||
startTime = time.Now()
|
||||
}
|
||||
|
||||
@@ -93,7 +93,7 @@ func ModelTTS(
|
||||
"language": language,
|
||||
}
|
||||
if err == nil && res.Success {
|
||||
if snippet := trace.AudioSnippet(filePath); snippet != nil {
|
||||
if snippet := trace.AudioSnippet(filePath, appConfig.TracingMaxBodyBytes); snippet != nil {
|
||||
maps.Copy(data, snippet)
|
||||
}
|
||||
}
|
||||
@@ -161,7 +161,7 @@ func ModelTTSStream(
|
||||
|
||||
var startTime time.Time
|
||||
if appConfig.EnableTracing {
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes)
|
||||
startTime = time.Now()
|
||||
}
|
||||
|
||||
@@ -260,7 +260,7 @@ func ModelTTSStream(
|
||||
"streaming": true,
|
||||
}
|
||||
if resultErr == nil && len(snippetPCM) > 0 {
|
||||
if snippet := trace.AudioSnippetFromPCM(snippetPCM, int(sampleRate), totalPCMBytes); snippet != nil {
|
||||
if snippet := trace.AudioSnippetFromPCM(snippetPCM, int(sampleRate), totalPCMBytes, appConfig.TracingMaxBodyBytes); snippet != nil {
|
||||
maps.Copy(data, snippet)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,7 +42,7 @@ func VideoGeneration(height, width int32, prompt, negativePrompt, startImage, en
|
||||
}
|
||||
|
||||
if appConfig.EnableTracing {
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes)
|
||||
|
||||
traceData := map[string]any{
|
||||
"prompt": prompt,
|
||||
|
||||
@@ -31,7 +31,7 @@ func VoiceAnalyze(
|
||||
|
||||
var startTime time.Time
|
||||
if appConfig.EnableTracing {
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes)
|
||||
startTime = time.Now()
|
||||
}
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ func VoiceEmbed(
|
||||
|
||||
var startTime time.Time
|
||||
if appConfig.EnableTracing {
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes)
|
||||
startTime = time.Now()
|
||||
}
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ func VoiceVerify(
|
||||
|
||||
var startTime time.Time
|
||||
if appConfig.EnableTracing {
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
|
||||
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes)
|
||||
startTime = time.Now()
|
||||
}
|
||||
|
||||
|
||||
@@ -16,8 +16,12 @@ const MaxSnippetSeconds = 30
|
||||
|
||||
// AudioSnippet captures the first MaxSnippetSeconds of a WAV file and computes
|
||||
// quality metrics. The result is a map suitable for merging into a BackendTrace
|
||||
// Data field.
|
||||
func AudioSnippet(wavPath string) map[string]any {
|
||||
// Data field. maxBytes caps the embedded base64 waveform so a single TTS or
|
||||
// transcription trace cannot blow past the backend-trace body cap (~1.3 MiB
|
||||
// of base64 per 30s of 16 kHz mono int16 PCM otherwise); when the encoded
|
||||
// waveform would exceed the cap the audio_wav_base64 field is dropped and
|
||||
// the rest of the metrics are returned. maxBytes <= 0 disables the cap.
|
||||
func AudioSnippet(wavPath string, maxBytes int) map[string]any {
|
||||
raw, err := os.ReadFile(wavPath)
|
||||
if err != nil {
|
||||
xlog.Warn("audio snippet: read failed", "path", wavPath, "error", err)
|
||||
@@ -34,12 +38,14 @@ func AudioSnippet(wavPath string) map[string]any {
|
||||
sampleRate = 16000
|
||||
}
|
||||
|
||||
return AudioSnippetFromPCM(pcm, sampleRate, len(pcm))
|
||||
return AudioSnippetFromPCM(pcm, sampleRate, len(pcm), maxBytes)
|
||||
}
|
||||
|
||||
// AudioSnippetFromPCM builds an audio snippet from raw PCM bytes (int16 LE mono).
|
||||
// totalPCMBytes is the full audio size before truncation (used to compute total duration).
|
||||
func AudioSnippetFromPCM(pcm []byte, sampleRate int, totalPCMBytes int) map[string]any {
|
||||
// totalPCMBytes is the full audio size before truncation (used to compute
|
||||
// total duration). maxBytes caps the embedded base64 waveform as described
|
||||
// on AudioSnippet.
|
||||
func AudioSnippetFromPCM(pcm []byte, sampleRate, totalPCMBytes, maxBytes int) map[string]any {
|
||||
if len(pcm) == 0 || len(pcm)%2 != 0 {
|
||||
return nil
|
||||
}
|
||||
@@ -89,8 +95,7 @@ func AudioSnippetFromPCM(pcm []byte, sampleRate int, totalPCMBytes int) map[stri
|
||||
}
|
||||
buf.Write(snippetPCM)
|
||||
|
||||
return map[string]any{
|
||||
"audio_wav_base64": base64.StdEncoding.EncodeToString(buf.Bytes()),
|
||||
out := map[string]any{
|
||||
"audio_duration_s": math.Round(durationS*100) / 100,
|
||||
"audio_snippet_s": math.Round(snippetDuration*100) / 100,
|
||||
"audio_sample_rate": sampleRate,
|
||||
@@ -99,4 +104,15 @@ func AudioSnippetFromPCM(pcm []byte, sampleRate int, totalPCMBytes int) map[stri
|
||||
"audio_peak_dbfs": math.Round(peakDBFS*10) / 10,
|
||||
"audio_dc_offset": math.Round(dcOffset*10000) / 10000,
|
||||
}
|
||||
// Skip the embedded waveform when it would dominate the trace payload.
|
||||
// Truncating mid-base64 produces an undecodable string, so the right
|
||||
// move is to drop the field and let the UI render just the metrics.
|
||||
encodedSize := base64.StdEncoding.EncodedLen(buf.Len())
|
||||
if maxBytes <= 0 || encodedSize <= maxBytes {
|
||||
out["audio_wav_base64"] = base64.StdEncoding.EncodeToString(buf.Bytes())
|
||||
} else {
|
||||
xlog.Debug("audio snippet: dropping audio_wav_base64", "encoded_bytes", encodedSize, "max_bytes", maxBytes)
|
||||
out["audio_wav_base64_dropped_bytes"] = encodedSize
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
49
core/trace/audio_snippet_test.go
Normal file
49
core/trace/audio_snippet_test.go
Normal file
@@ -0,0 +1,49 @@
|
||||
package trace_test
|
||||
|
||||
import (
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/mudler/LocalAI/core/trace"
|
||||
)
|
||||
|
||||
// One second of mono 16-bit PCM at 16 kHz: 32 KiB raw. After the 44-byte
|
||||
// WAV header and base64 encoding the snippet runs ~42 KiB, which is well
|
||||
// over the small caps used here and matches the smallest realistic TTS
|
||||
// output size.
|
||||
const (
|
||||
snippetSampleRate = 16000
|
||||
snippetSeconds = 1
|
||||
)
|
||||
|
||||
func makePCM(seconds, sampleRate int) []byte {
|
||||
return make([]byte, seconds*sampleRate*2) // int16 mono
|
||||
}
|
||||
|
||||
var _ = Describe("AudioSnippetFromPCM byte cap", func() {
|
||||
pcm := makePCM(snippetSeconds, snippetSampleRate)
|
||||
totalPCM := len(pcm)
|
||||
|
||||
It("omits audio_wav_base64 when the encoded snippet would exceed the cap, keeping the metrics", func() {
|
||||
out := trace.AudioSnippetFromPCM(pcm, snippetSampleRate, totalPCM, 1024)
|
||||
|
||||
Expect(out).ToNot(BeNil(), "metrics must still be returned even when the waveform is dropped")
|
||||
Expect(out).ToNot(HaveKey("audio_wav_base64"), "oversized base64 must be dropped so the UI does not try to render invalid audio data")
|
||||
Expect(out).To(HaveKey("audio_duration_s"))
|
||||
Expect(out).To(HaveKey("audio_sample_rate"))
|
||||
Expect(out).To(HaveKey("audio_rms_dbfs"))
|
||||
})
|
||||
|
||||
It("includes audio_wav_base64 when the snippet fits under the cap", func() {
|
||||
out := trace.AudioSnippetFromPCM(pcm, snippetSampleRate, totalPCM, 1024*1024)
|
||||
|
||||
Expect(out).To(HaveKey("audio_wav_base64"))
|
||||
Expect(out["audio_wav_base64"]).ToNot(BeEmpty())
|
||||
})
|
||||
|
||||
It("includes audio_wav_base64 when the cap is disabled (0)", func() {
|
||||
out := trace.AudioSnippetFromPCM(pcm, snippetSampleRate, totalPCM, 0)
|
||||
|
||||
Expect(out).To(HaveKey("audio_wav_base64"))
|
||||
})
|
||||
})
|
||||
@@ -2,6 +2,7 @@ package trace
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"slices"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -49,13 +50,25 @@ var backendMu sync.Mutex
|
||||
var backendLogChan = make(chan *BackendTrace, 100)
|
||||
var backendInitOnce sync.Once
|
||||
|
||||
func InitBackendTracingIfEnabled(maxItems int) {
|
||||
// backendMaxBodyBytes caps each captured string value in a BackendTrace.Data
|
||||
// field to keep the /api/backend-traces JSON small enough for the admin UI to
|
||||
// load on every 5s auto-refresh. Mirrors the API-trace body cap added in
|
||||
// commit 61bf34ea: without it a chatty LLM workload (full message history per
|
||||
// trace) or any TTS run (~1.3 MiB of audio_wav_base64 per trace) blows the
|
||||
// payload past tens of MiB and locks the Traces page in a loading state.
|
||||
//
|
||||
// 0 disables the cap. Set on the first InitBackendTracingIfEnabled call only,
|
||||
// matching the sync.Once-guarded maxItems semantics.
|
||||
var backendMaxBodyBytes int
|
||||
|
||||
func InitBackendTracingIfEnabled(maxItems, maxBodyBytes int) {
|
||||
backendInitOnce.Do(func() {
|
||||
if maxItems <= 0 {
|
||||
maxItems = 100
|
||||
}
|
||||
backendMu.Lock()
|
||||
backendTraceBuffer = circularbuffer.New[*BackendTrace](maxItems)
|
||||
backendMaxBodyBytes = maxBodyBytes
|
||||
backendMu.Unlock()
|
||||
|
||||
go func() {
|
||||
@@ -71,6 +84,9 @@ func InitBackendTracingIfEnabled(maxItems int) {
|
||||
}
|
||||
|
||||
func RecordBackendTrace(t BackendTrace) {
|
||||
if t.Data != nil && backendMaxBodyBytes > 0 {
|
||||
t.Data = capDataStrings(t.Data, backendMaxBodyBytes)
|
||||
}
|
||||
select {
|
||||
case backendLogChan <- &t:
|
||||
default:
|
||||
@@ -78,6 +94,35 @@ func RecordBackendTrace(t BackendTrace) {
|
||||
}
|
||||
}
|
||||
|
||||
// capDataStrings walks a trace Data map and replaces any string value (at any
|
||||
// depth) that exceeds maxBytes with a fixed-size marker that names the
|
||||
// original byte count. The replacement is intentionally short and not valid
|
||||
// base64/JSON: the goal is to flag "this was dropped" cheaply, not to keep a
|
||||
// partial value that the UI might try to render. Non-string scalars and
|
||||
// non-map containers pass through untouched so structural fields like
|
||||
// total_deltas or audio_sample_rate remain useful.
|
||||
func capDataStrings(data map[string]any, maxBytes int) map[string]any {
|
||||
out := make(map[string]any, len(data))
|
||||
for k, v := range data {
|
||||
out[k] = capValue(v, maxBytes)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func capValue(v any, maxBytes int) any {
|
||||
switch val := v.(type) {
|
||||
case string:
|
||||
if len(val) > maxBytes {
|
||||
return fmt.Sprintf("<truncated: %d bytes>", len(val))
|
||||
}
|
||||
return val
|
||||
case map[string]any:
|
||||
return capDataStrings(val, maxBytes)
|
||||
default:
|
||||
return v
|
||||
}
|
||||
}
|
||||
|
||||
func GetBackendTraces() []BackendTrace {
|
||||
backendMu.Lock()
|
||||
if backendTraceBuffer == nil {
|
||||
@@ -136,3 +181,24 @@ func TruncateString(s string, maxLen int) string {
|
||||
}
|
||||
return s[:maxLen] + "..."
|
||||
}
|
||||
|
||||
// TruncateToBytes caps a string at exactly maxBytes, preserving the leading
|
||||
// content and appending a marker so the UI knows the value was clipped.
|
||||
// Unlike TruncateString it guarantees output <= maxBytes, which matters for
|
||||
// fields that feed back into the trace pipeline: capDataStrings in
|
||||
// RecordBackendTrace re-checks size and would otherwise replace a producer's
|
||||
// head-preserving truncation with the bare marker, losing the prefix.
|
||||
//
|
||||
// maxBytes <= 0 disables the cap, matching backendMaxBodyBytes semantics.
|
||||
func TruncateToBytes(s string, maxBytes int) string {
|
||||
if maxBytes <= 0 || len(s) <= maxBytes {
|
||||
return s
|
||||
}
|
||||
suffix := fmt.Sprintf("...[truncated, %d bytes]", len(s))
|
||||
if len(suffix) >= maxBytes {
|
||||
// Pathologically small caps can't fit the marker; fall back to a
|
||||
// hard cut so the contract (output <= maxBytes) still holds.
|
||||
return s[:maxBytes]
|
||||
}
|
||||
return s[:maxBytes-len(suffix)] + suffix
|
||||
}
|
||||
|
||||
160
core/trace/backend_trace_cap_test.go
Normal file
160
core/trace/backend_trace_cap_test.go
Normal file
@@ -0,0 +1,160 @@
|
||||
package trace_test
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/mudler/LocalAI/core/trace"
|
||||
)
|
||||
|
||||
// The /api/backend-traces endpoint ships up to TracingMaxItems entries to the
|
||||
// admin Traces UI on every 5s auto-refresh. Without a cap on the per-trace
|
||||
// Data field, a chatty agent-pool workload (LLM traces carry the full
|
||||
// `messages` array, TTS traces carry ~1.3 MiB of audio_wav_base64) makes the
|
||||
// response tens of MiB. The UI then stays in "loading" forever because the
|
||||
// download + parse runs longer than the refresh interval: the same symptom
|
||||
// the API-trace fix (commit 61bf34ea) addressed on the other side.
|
||||
//
|
||||
// These specs pin the generic safety net (Option A) so any future producer
|
||||
// that stuffs a large string into Data is automatically bounded.
|
||||
|
||||
const (
|
||||
smallCap = 1024
|
||||
smallCapStep = 16
|
||||
)
|
||||
|
||||
var _ = Describe("RecordBackendTrace Data capping", func() {
|
||||
BeforeEach(func() {
|
||||
// Init is sync.Once so the first test wins; subsequent tests just
|
||||
// clear the buffer. The cap value below has to match the first call.
|
||||
trace.InitBackendTracingIfEnabled(64, smallCap)
|
||||
trace.ClearBackendTraces()
|
||||
})
|
||||
|
||||
It("replaces oversized top-level string values with a truncation marker", func() {
|
||||
oversized := strings.Repeat("x", smallCap*4)
|
||||
|
||||
trace.RecordBackendTrace(trace.BackendTrace{
|
||||
Timestamp: time.Now(),
|
||||
Type: trace.BackendTraceLLM,
|
||||
ModelName: "m",
|
||||
Data: map[string]any{
|
||||
"messages": oversized,
|
||||
"small": "fits",
|
||||
},
|
||||
})
|
||||
|
||||
Eventually(trace.GetBackendTraces).Should(HaveLen(1))
|
||||
got := trace.GetBackendTraces()[0]
|
||||
|
||||
Expect(got.Data["small"]).To(Equal("fits"), "fields under the cap must pass through untouched")
|
||||
|
||||
// The marker is the contract the UI reads to show truncation; the
|
||||
// concrete shape can evolve but it must be a short fixed-size string
|
||||
// that encodes the original byte count so users know what was dropped.
|
||||
msg, ok := got.Data["messages"].(string)
|
||||
Expect(ok).To(BeTrue(), "string fields stay strings after capping")
|
||||
Expect(len(msg)).To(BeNumerically("<", smallCap), "capped value must fit under the configured cap")
|
||||
Expect(msg).To(ContainSubstring("truncated"))
|
||||
Expect(msg).To(ContainSubstring("4096"), "marker should reference the original byte count for diagnostics")
|
||||
})
|
||||
|
||||
It("recurses into nested maps so deeply nested oversized strings are also bounded", func() {
|
||||
oversized := strings.Repeat("y", smallCap*2)
|
||||
|
||||
trace.RecordBackendTrace(trace.BackendTrace{
|
||||
Timestamp: time.Now(),
|
||||
Type: trace.BackendTraceLLM,
|
||||
ModelName: "m",
|
||||
Data: map[string]any{
|
||||
"chat_deltas": map[string]any{
|
||||
"content": oversized,
|
||||
"total_deltas": 5,
|
||||
"tool_call_count": 0,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
Eventually(trace.GetBackendTraces).Should(HaveLen(1))
|
||||
got := trace.GetBackendTraces()[0]
|
||||
|
||||
deltas, ok := got.Data["chat_deltas"].(map[string]any)
|
||||
Expect(ok).To(BeTrue(), "nested map structure must be preserved")
|
||||
Expect(deltas["total_deltas"]).To(Equal(5), "non-string siblings must pass through untouched")
|
||||
|
||||
content, ok := deltas["content"].(string)
|
||||
Expect(ok).To(BeTrue())
|
||||
Expect(len(content)).To(BeNumerically("<", smallCap), "nested oversized string must still be capped")
|
||||
Expect(content).To(ContainSubstring("truncated"))
|
||||
})
|
||||
|
||||
It("leaves values within the cap untouched", func() {
|
||||
smallVal := strings.Repeat("z", smallCap-smallCapStep)
|
||||
|
||||
trace.RecordBackendTrace(trace.BackendTrace{
|
||||
Timestamp: time.Now(),
|
||||
Type: trace.BackendTraceEmbedding,
|
||||
ModelName: "m",
|
||||
Data: map[string]any{
|
||||
"input_text": smallVal,
|
||||
},
|
||||
})
|
||||
|
||||
Eventually(trace.GetBackendTraces).Should(HaveLen(1))
|
||||
got := trace.GetBackendTraces()[0]
|
||||
|
||||
Expect(got.Data["input_text"]).To(Equal(smallVal))
|
||||
})
|
||||
|
||||
It("does not re-truncate values that producers already capped with TruncateToBytes", func() {
|
||||
// Producers (LLM messages/response, etc.) prefer head-preserving
|
||||
// truncation so users can still read the start of the conversation.
|
||||
// TruncateToBytes guarantees output <= cap, so the generic safety
|
||||
// net below must leave it alone, otherwise the kept prefix gets
|
||||
// thrown away and replaced with the marker.
|
||||
preTruncated := trace.TruncateToBytes(strings.Repeat("a", smallCap*4), smallCap)
|
||||
Expect(len(preTruncated)).To(BeNumerically("<=", smallCap))
|
||||
|
||||
trace.RecordBackendTrace(trace.BackendTrace{
|
||||
Timestamp: time.Now(),
|
||||
Type: trace.BackendTraceLLM,
|
||||
ModelName: "m",
|
||||
Data: map[string]any{
|
||||
"messages": preTruncated,
|
||||
},
|
||||
})
|
||||
|
||||
Eventually(trace.GetBackendTraces).Should(HaveLen(1))
|
||||
got := trace.GetBackendTraces()[0]
|
||||
Expect(got.Data["messages"]).To(Equal(preTruncated))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("TruncateToBytes", func() {
|
||||
It("returns the input unchanged when it fits", func() {
|
||||
Expect(trace.TruncateToBytes("hello", 1024)).To(Equal("hello"))
|
||||
})
|
||||
|
||||
It("treats maxBytes <= 0 as unlimited", func() {
|
||||
Expect(trace.TruncateToBytes("hello", 0)).To(Equal("hello"))
|
||||
Expect(trace.TruncateToBytes("hello", -1)).To(Equal("hello"))
|
||||
})
|
||||
|
||||
It("caps oversized input to at most maxBytes and preserves the head", func() {
|
||||
in := strings.Repeat("a", 5000)
|
||||
out := trace.TruncateToBytes(in, 100)
|
||||
Expect(len(out)).To(BeNumerically("<=", 100), "output must never exceed the cap so the generic Record-time safety net doesn't fire")
|
||||
Expect(out).To(HavePrefix("a"), "should keep the leading content readable")
|
||||
Expect(out).To(ContainSubstring("truncated"), "should mark the value as truncated for the UI")
|
||||
})
|
||||
|
||||
It("falls back to plain truncation when the cap is smaller than the suffix", func() {
|
||||
in := strings.Repeat("a", 100)
|
||||
out := trace.TruncateToBytes(in, 4)
|
||||
Expect(len(out)).To(Equal(4))
|
||||
Expect(out).To(Equal("aaaa"))
|
||||
})
|
||||
})
|
||||
13
core/trace/trace_suite_test.go
Normal file
13
core/trace/trace_suite_test.go
Normal file
@@ -0,0 +1,13 @@
|
||||
package trace_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
func TestTrace(t *testing.T) {
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t, "Trace test suite")
|
||||
}
|
||||
Reference in New Issue
Block a user