From 61bf34ea2fdbfe10d5cf62d8b5fffa511d3be22f Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 22 May 2026 15:29:24 +0200 Subject: [PATCH] fix(traces): cap captured body size to keep admin Traces UI responsive (#9946) The trace middleware buffered the full request and response bodies for every JSON exchange. With a chatty agent-pool RAG workload, /embeddings responses (large vector arrays) accumulated to tens of MB in the in-memory buffer; the admin Traces page would then download and parse 40+ MB on every load and on every 5s auto-refresh, locking the UI in a loading state. Add LOCALAI_TRACING_MAX_BODY_BYTES (default 64 KiB) that caps each captured body. The full payload still flows through to the real client; only the trace copy is bounded. Exchanges record body_truncated and original body_bytes so the dashboard can show that truncation happened. The cap is configurable via env, CLI, and runtime_settings.json. Also unblock recovery: the Traces page now keeps the Clear button enabled while loading, since "buffer too large to render" is exactly when the user needs to clear it. Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- core/application/startup.go | 7 ++ core/cli/run.go | 2 + core/config/application_config.go | 13 +++ core/config/runtime_settings.go | 1 + core/http/middleware/trace.go | 82 +++++++++++--- core/http/middleware/trace_body_cap_test.go | 116 ++++++++++++++++++++ core/http/react-ui/src/pages/Traces.jsx | 10 +- 7 files changed, 212 insertions(+), 19 deletions(-) create mode 100644 core/http/middleware/trace_body_cap_test.go diff --git a/core/application/startup.go b/core/application/startup.go index 83d4a2d72..1ddeabb0d 100644 --- a/core/application/startup.go +++ b/core/application/startup.go @@ -552,6 +552,13 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) { options.TracingMaxItems = *settings.TracingMaxItems } } + if settings.TracingMaxBodyBytes != nil { + // Allow the on-disk setting to override the CLI/env default. The + // startup default is non-zero (see NewApplicationConfig), so a plain + // `== 0` guard like the others would never trigger; we instead respect + // any value the file specifies. 0 in the file means "uncapped". + options.TracingMaxBodyBytes = *settings.TracingMaxBodyBytes + } // Branding / whitelabeling. There are no env vars for these — the file is // the only source — so apply unconditionally. Without this block a server diff --git a/core/cli/run.go b/core/cli/run.go index a5651800b..7b9dc7257 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -100,6 +100,7 @@ type RunCMD struct { LoadToMemory []string `env:"LOCALAI_LOAD_TO_MEMORY,LOAD_TO_MEMORY" help:"A list of models to load into memory at startup" group:"models"` EnableTracing bool `env:"LOCALAI_ENABLE_TRACING,ENABLE_TRACING" help:"Enable API tracing" group:"api"` TracingMaxItems int `env:"LOCALAI_TRACING_MAX_ITEMS" default:"1024" help:"Maximum number of traces to keep" group:"api"` + TracingMaxBodyBytes int `env:"LOCALAI_TRACING_MAX_BODY_BYTES" default:"65536" help:"Maximum bytes captured per request/response body in the trace buffer (0 = uncapped). Caps memory growth from chatty endpoints like /embeddings." group:"api"` AgentJobRetentionDays int `env:"LOCALAI_AGENT_JOB_RETENTION_DAYS,AGENT_JOB_RETENTION_DAYS" default:"30" help:"Number of days to keep agent job history (default: 30)" group:"api"` OpenResponsesStoreTTL string `env:"LOCALAI_OPEN_RESPONSES_STORE_TTL,OPEN_RESPONSES_STORE_TTL" default:"0" help:"TTL for Open Responses store (e.g., 1h, 30m, 0 = no expiration)" group:"api"` @@ -273,6 +274,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { opts = append(opts, config.EnableTracing) } opts = append(opts, config.WithTracingMaxItems(r.TracingMaxItems)) + opts = append(opts, config.WithTracingMaxBodyBytes(r.TracingMaxBodyBytes)) token := "" if r.Peer2Peer || r.Peer2PeerToken != "" { diff --git a/core/config/application_config.go b/core/config/application_config.go index 733532e7b..a4119206d 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -21,6 +21,7 @@ type ApplicationConfig struct { Debug bool EnableTracing bool TracingMaxItems int + TracingMaxBodyBytes int // Per-body cap for captured request/response bodies; 0 disables the cap EnableBackendLogging bool GeneratedContentDir string @@ -187,6 +188,7 @@ func NewApplicationConfig(o ...AppOption) *ApplicationConfig { LRUEvictionRetryInterval: 1 * time.Second, // Default: 1 second WatchDogInterval: 500 * time.Millisecond, // Default: 500ms TracingMaxItems: 1024, + TracingMaxBodyBytes: 64 * 1024, // 64 KiB - caps each request/response body in the trace buffer AgentPool: AgentPoolConfig{ Enabled: true, Timeout: "5m", @@ -578,6 +580,12 @@ func WithTracingMaxItems(items int) AppOption { } } +func WithTracingMaxBodyBytes(bytes int) AppOption { + return func(o *ApplicationConfig) { + o.TracingMaxBodyBytes = bytes + } +} + func WithGeneratedContentDir(generatedContentDir string) AppOption { return func(o *ApplicationConfig) { o.GeneratedContentDir = generatedContentDir @@ -920,6 +928,7 @@ func (o *ApplicationConfig) ToRuntimeSettings() RuntimeSettings { f16 := o.F16 debug := o.Debug tracingMaxItems := o.TracingMaxItems + tracingMaxBodyBytes := o.TracingMaxBodyBytes enableTracing := o.EnableTracing enableBackendLogging := o.EnableBackendLogging cors := o.CORS @@ -1008,6 +1017,7 @@ func (o *ApplicationConfig) ToRuntimeSettings() RuntimeSettings { F16: &f16, Debug: &debug, TracingMaxItems: &tracingMaxItems, + TracingMaxBodyBytes: &tracingMaxBodyBytes, EnableTracing: &enableTracing, EnableBackendLogging: &enableBackendLogging, CORS: &cors, @@ -1146,6 +1156,9 @@ func (o *ApplicationConfig) ApplyRuntimeSettings(settings *RuntimeSettings) (req if settings.TracingMaxItems != nil { o.TracingMaxItems = *settings.TracingMaxItems } + if settings.TracingMaxBodyBytes != nil { + o.TracingMaxBodyBytes = *settings.TracingMaxBodyBytes + } if settings.EnableBackendLogging != nil { o.EnableBackendLogging = *settings.EnableBackendLogging } diff --git a/core/config/runtime_settings.go b/core/config/runtime_settings.go index 3fb16233e..dbddc4a2d 100644 --- a/core/config/runtime_settings.go +++ b/core/config/runtime_settings.go @@ -38,6 +38,7 @@ type RuntimeSettings struct { Debug *bool `json:"debug,omitempty"` EnableTracing *bool `json:"enable_tracing,omitempty"` TracingMaxItems *int `json:"tracing_max_items,omitempty"` + TracingMaxBodyBytes *int `json:"tracing_max_body_bytes,omitempty"` // Per-body cap in bytes; 0 disables the cap EnableBackendLogging *bool `json:"enable_backend_logging,omitempty"` // Security/CORS settings diff --git a/core/http/middleware/trace.go b/core/http/middleware/trace.go index 9e713c031..7bf661aab 100644 --- a/core/http/middleware/trace.go +++ b/core/http/middleware/trace.go @@ -17,16 +17,20 @@ import ( ) type APIExchangeRequest struct { - Method string `json:"method"` - Path string `json:"path"` - Headers *http.Header `json:"headers"` - Body *[]byte `json:"body"` + Method string `json:"method"` + Path string `json:"path"` + Headers *http.Header `json:"headers"` + Body *[]byte `json:"body"` + BodyTruncated bool `json:"body_truncated,omitempty"` + BodyBytes int `json:"body_bytes,omitempty"` // original size before truncation } type APIExchangeResponse struct { - Status int `json:"status"` - Headers *http.Header `json:"headers"` - Body *[]byte `json:"body"` + Status int `json:"status"` + Headers *http.Header `json:"headers"` + Body *[]byte `json:"body"` + BodyTruncated bool `json:"body_truncated,omitempty"` + BodyBytes int `json:"body_bytes,omitempty"` // original size before truncation } type APIExchange struct { @@ -66,11 +70,29 @@ var doInitializeTracing = sync.OnceFunc(func() { type bodyWriter struct { http.ResponseWriter - body *bytes.Buffer + body *bytes.Buffer + maxBytes int // 0 = unlimited capture + truncated bool + totalBytes int // bytes the upstream handler wrote, even past the cap } func (w *bodyWriter) Write(b []byte) (int, error) { - w.body.Write(b) + // Capture into the trace buffer up to maxBytes, then drop the overflow + // so a chatty endpoint can't grow the buffer without bound. The full + // payload still flows through to the real client below. + w.totalBytes += len(b) + if w.maxBytes <= 0 { + w.body.Write(b) + } else if remain := w.maxBytes - w.body.Len(); remain > 0 { + if remain >= len(b) { + w.body.Write(b) + } else { + w.body.Write(b[:remain]) + w.truncated = true + } + } else { + w.truncated = true + } return w.ResponseWriter.Write(b) } @@ -80,6 +102,20 @@ func (w *bodyWriter) Flush() { } } +// truncateForTrace returns a defensive copy of body capped at maxBytes, +// and a flag indicating whether the cap forced truncation. maxBytes <= 0 +// disables the cap. +func truncateForTrace(body []byte, maxBytes int) ([]byte, bool) { + if maxBytes <= 0 || len(body) <= maxBytes { + out := make([]byte, len(body)) + copy(out, body) + return out, false + } + out := make([]byte, maxBytes) + copy(out, body[:maxBytes]) + return out, true +} + func initializeTracing(maxItems int) { tracingMaxItems = maxItems doInitializeTracing() @@ -134,11 +170,18 @@ func TraceMiddleware(app *application.Application) echo.MiddlewareFunc { startTime := time.Now() + // Cap captured payload size. Without this, /embeddings and + // streaming /chat/completions blow the in-memory buffer into the + // tens of MB, which then locks the admin Traces UI fetching the + // JSON dump faster than the 5s auto-refresh. + maxBodyBytes := app.ApplicationConfig().TracingMaxBodyBytes + // Wrap response writer to capture body resBody := new(bytes.Buffer) mw := &bodyWriter{ ResponseWriter: c.Response().Writer, body: resBody, + maxBytes: maxBodyBytes, } c.Response().Writer = mw @@ -159,8 +202,7 @@ func TraceMiddleware(app *application.Application) echo.MiddlewareFunc { // via any heap-dump-style introspection, and tokens shouldn't // outlive the request that carried them. requestHeaders := redactSensitiveHeaders(c.Request().Header) - requestBody := make([]byte, len(body)) - copy(requestBody, body) + requestBody, requestTruncated := truncateForTrace(body, maxBodyBytes) responseHeaders := redactSensitiveHeaders(c.Response().Header()) responseBody := make([]byte, resBody.Len()) copy(responseBody, resBody.Bytes()) @@ -168,15 +210,19 @@ func TraceMiddleware(app *application.Application) echo.MiddlewareFunc { Timestamp: startTime, Duration: time.Since(startTime), Request: APIExchangeRequest{ - Method: c.Request().Method, - Path: c.Path(), - Headers: &requestHeaders, - Body: &requestBody, + Method: c.Request().Method, + Path: c.Path(), + Headers: &requestHeaders, + Body: &requestBody, + BodyTruncated: requestTruncated, + BodyBytes: len(body), }, Response: APIExchangeResponse{ - Status: status, - Headers: &responseHeaders, - Body: &responseBody, + Status: status, + Headers: &responseHeaders, + Body: &responseBody, + BodyTruncated: mw.truncated, + BodyBytes: mw.totalBytes, }, } if handlerErr != nil { diff --git a/core/http/middleware/trace_body_cap_test.go b/core/http/middleware/trace_body_cap_test.go new file mode 100644 index 000000000..f5466d52d --- /dev/null +++ b/core/http/middleware/trace_body_cap_test.go @@ -0,0 +1,116 @@ +package middleware + +import ( + "bytes" + "net/http/httptest" + "strings" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +// The trace middleware copies request and response bodies into an in-memory +// buffer that backs the admin /api/traces endpoint. With no upper bound a +// chatty workload (embeddings, large completions) trivially produces a +// multi-MB response that locks the Traces UI in a loading state — fetching +// and parsing the payload outruns the 5-second auto-refresh. These specs +// pin the capping contract so future refactors keep both the cap and the +// passthrough to the real client intact. + +var _ = Describe("bodyWriter capping", func() { + It("captures the full body when maxBytes is 0 (unlimited)", func() { + downstream := httptest.NewRecorder() + buf := &bytes.Buffer{} + bw := &bodyWriter{ResponseWriter: downstream, body: buf, maxBytes: 0} + + payload := []byte(strings.Repeat("x", 4096)) + n, err := bw.Write(payload) + + Expect(err).ToNot(HaveOccurred()) + Expect(n).To(Equal(len(payload))) + Expect(buf.Len()).To(Equal(len(payload))) + Expect(downstream.Body.Len()).To(Equal(len(payload))) + Expect(bw.truncated).To(BeFalse()) + }) + + It("stops appending to the trace buffer once maxBytes is reached but still forwards to the client", func() { + downstream := httptest.NewRecorder() + buf := &bytes.Buffer{} + bw := &bodyWriter{ResponseWriter: downstream, body: buf, maxBytes: 100} + + payload := []byte(strings.Repeat("a", 250)) + n, err := bw.Write(payload) + + Expect(err).ToNot(HaveOccurred()) + Expect(n).To(Equal(len(payload)), "Write must return the full byte count so callers see no short write") + Expect(buf.Len()).To(Equal(100), "trace buffer should hold exactly maxBytes") + Expect(downstream.Body.Len()).To(Equal(len(payload)), "client must still receive every byte") + Expect(bw.truncated).To(BeTrue()) + }) + + It("handles a write that straddles the cap by keeping only the leading slice", func() { + downstream := httptest.NewRecorder() + buf := &bytes.Buffer{} + bw := &bodyWriter{ResponseWriter: downstream, body: buf, maxBytes: 10} + + _, err := bw.Write([]byte("12345")) + Expect(err).ToNot(HaveOccurred()) + Expect(bw.truncated).To(BeFalse()) + + _, err = bw.Write([]byte("67890ABCDE")) + Expect(err).ToNot(HaveOccurred()) + + Expect(buf.String()).To(Equal("1234567890")) + Expect(downstream.Body.String()).To(Equal("1234567890ABCDE")) + Expect(bw.truncated).To(BeTrue()) + }) + + It("ignores further writes after the cap was already hit", func() { + downstream := httptest.NewRecorder() + buf := &bytes.Buffer{} + bw := &bodyWriter{ResponseWriter: downstream, body: buf, maxBytes: 4} + + _, _ = bw.Write([]byte("AAAA")) + _, _ = bw.Write([]byte("BBBB")) + _, _ = bw.Write([]byte("CCCC")) + + Expect(buf.String()).To(Equal("AAAA")) + Expect(downstream.Body.String()).To(Equal("AAAABBBBCCCC")) + Expect(bw.truncated).To(BeTrue()) + }) +}) + +var _ = Describe("truncateForTrace", func() { + It("returns the input unchanged when below the cap", func() { + in := []byte("hello") + out, truncated := truncateForTrace(in, 1024) + Expect(truncated).To(BeFalse()) + Expect(out).To(Equal(in)) + }) + + It("truncates when the input exceeds the cap and signals truncation", func() { + in := []byte(strings.Repeat("z", 200)) + out, truncated := truncateForTrace(in, 64) + Expect(truncated).To(BeTrue()) + Expect(out).To(HaveLen(64)) + Expect(string(out)).To(Equal(strings.Repeat("z", 64))) + }) + + It("treats maxBytes <= 0 as unlimited (back-compat with current default)", func() { + in := []byte(strings.Repeat("q", 10_000)) + out, truncated := truncateForTrace(in, 0) + Expect(truncated).To(BeFalse()) + Expect(out).To(HaveLen(len(in))) + }) + + It("does not retain the caller's backing array (defensive copy)", func() { + in := []byte("abcdefghij") + out, truncated := truncateForTrace(in, 4) + Expect(truncated).To(BeTrue()) + Expect(string(out)).To(Equal("abcd")) + + // Mutating the source must not corrupt the trace copy. + in[0] = 'Z' + Expect(string(out)).To(Equal("abcd")) + }) +}) diff --git a/core/http/react-ui/src/pages/Traces.jsx b/core/http/react-ui/src/pages/Traces.jsx index 64f261507..e1cc5c480 100644 --- a/core/http/react-ui/src/pages/Traces.jsx +++ b/core/http/react-ui/src/pages/Traces.jsx @@ -406,7 +406,15 @@ export default function Traces() {
- +
{settings && (() => {