fix(traces): cap captured body size to keep admin Traces UI responsive (#9946)

The trace middleware buffered the full request and response bodies for every
JSON exchange. With a chatty agent-pool RAG workload, /embeddings responses
(large vector arrays) accumulated to tens of MB in the in-memory buffer; the
admin Traces page would then download and parse 40+ MB on every load and on
every 5s auto-refresh, locking the UI in a loading state.

Add LOCALAI_TRACING_MAX_BODY_BYTES (default 64 KiB) that caps each captured
body. The full payload still flows through to the real client; only the
trace copy is bounded. Exchanges record body_truncated and original
body_bytes so the dashboard can show that truncation happened. The cap is
configurable via env, CLI, and runtime_settings.json.

Also unblock recovery: the Traces page now keeps the Clear button enabled
while loading, since "buffer too large to render" is exactly when the user
needs to clear it.


Assisted-by: Claude:claude-opus-4-7

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
LocalAI [bot]
2026-05-22 15:29:24 +02:00
committed by GitHub
parent 0b2ae3c6ca
commit 61bf34ea2f
7 changed files with 212 additions and 19 deletions

View File

@@ -552,6 +552,13 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
options.TracingMaxItems = *settings.TracingMaxItems
}
}
if settings.TracingMaxBodyBytes != nil {
// Allow the on-disk setting to override the CLI/env default. The
// startup default is non-zero (see NewApplicationConfig), so a plain
// `== 0` guard like the others would never trigger; we instead respect
// any value the file specifies. 0 in the file means "uncapped".
options.TracingMaxBodyBytes = *settings.TracingMaxBodyBytes
}
// Branding / whitelabeling. There are no env vars for these — the file is
// the only source — so apply unconditionally. Without this block a server

View File

@@ -100,6 +100,7 @@ type RunCMD struct {
LoadToMemory []string `env:"LOCALAI_LOAD_TO_MEMORY,LOAD_TO_MEMORY" help:"A list of models to load into memory at startup" group:"models"`
EnableTracing bool `env:"LOCALAI_ENABLE_TRACING,ENABLE_TRACING" help:"Enable API tracing" group:"api"`
TracingMaxItems int `env:"LOCALAI_TRACING_MAX_ITEMS" default:"1024" help:"Maximum number of traces to keep" group:"api"`
TracingMaxBodyBytes int `env:"LOCALAI_TRACING_MAX_BODY_BYTES" default:"65536" help:"Maximum bytes captured per request/response body in the trace buffer (0 = uncapped). Caps memory growth from chatty endpoints like /embeddings." group:"api"`
AgentJobRetentionDays int `env:"LOCALAI_AGENT_JOB_RETENTION_DAYS,AGENT_JOB_RETENTION_DAYS" default:"30" help:"Number of days to keep agent job history (default: 30)" group:"api"`
OpenResponsesStoreTTL string `env:"LOCALAI_OPEN_RESPONSES_STORE_TTL,OPEN_RESPONSES_STORE_TTL" default:"0" help:"TTL for Open Responses store (e.g., 1h, 30m, 0 = no expiration)" group:"api"`
@@ -273,6 +274,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
opts = append(opts, config.EnableTracing)
}
opts = append(opts, config.WithTracingMaxItems(r.TracingMaxItems))
opts = append(opts, config.WithTracingMaxBodyBytes(r.TracingMaxBodyBytes))
token := ""
if r.Peer2Peer || r.Peer2PeerToken != "" {

View File

@@ -21,6 +21,7 @@ type ApplicationConfig struct {
Debug bool
EnableTracing bool
TracingMaxItems int
TracingMaxBodyBytes int // Per-body cap for captured request/response bodies; 0 disables the cap
EnableBackendLogging bool
GeneratedContentDir string
@@ -187,6 +188,7 @@ func NewApplicationConfig(o ...AppOption) *ApplicationConfig {
LRUEvictionRetryInterval: 1 * time.Second, // Default: 1 second
WatchDogInterval: 500 * time.Millisecond, // Default: 500ms
TracingMaxItems: 1024,
TracingMaxBodyBytes: 64 * 1024, // 64 KiB - caps each request/response body in the trace buffer
AgentPool: AgentPoolConfig{
Enabled: true,
Timeout: "5m",
@@ -578,6 +580,12 @@ func WithTracingMaxItems(items int) AppOption {
}
}
func WithTracingMaxBodyBytes(bytes int) AppOption {
return func(o *ApplicationConfig) {
o.TracingMaxBodyBytes = bytes
}
}
func WithGeneratedContentDir(generatedContentDir string) AppOption {
return func(o *ApplicationConfig) {
o.GeneratedContentDir = generatedContentDir
@@ -920,6 +928,7 @@ func (o *ApplicationConfig) ToRuntimeSettings() RuntimeSettings {
f16 := o.F16
debug := o.Debug
tracingMaxItems := o.TracingMaxItems
tracingMaxBodyBytes := o.TracingMaxBodyBytes
enableTracing := o.EnableTracing
enableBackendLogging := o.EnableBackendLogging
cors := o.CORS
@@ -1008,6 +1017,7 @@ func (o *ApplicationConfig) ToRuntimeSettings() RuntimeSettings {
F16: &f16,
Debug: &debug,
TracingMaxItems: &tracingMaxItems,
TracingMaxBodyBytes: &tracingMaxBodyBytes,
EnableTracing: &enableTracing,
EnableBackendLogging: &enableBackendLogging,
CORS: &cors,
@@ -1146,6 +1156,9 @@ func (o *ApplicationConfig) ApplyRuntimeSettings(settings *RuntimeSettings) (req
if settings.TracingMaxItems != nil {
o.TracingMaxItems = *settings.TracingMaxItems
}
if settings.TracingMaxBodyBytes != nil {
o.TracingMaxBodyBytes = *settings.TracingMaxBodyBytes
}
if settings.EnableBackendLogging != nil {
o.EnableBackendLogging = *settings.EnableBackendLogging
}

View File

@@ -38,6 +38,7 @@ type RuntimeSettings struct {
Debug *bool `json:"debug,omitempty"`
EnableTracing *bool `json:"enable_tracing,omitempty"`
TracingMaxItems *int `json:"tracing_max_items,omitempty"`
TracingMaxBodyBytes *int `json:"tracing_max_body_bytes,omitempty"` // Per-body cap in bytes; 0 disables the cap
EnableBackendLogging *bool `json:"enable_backend_logging,omitempty"`
// Security/CORS settings

View File

@@ -17,16 +17,20 @@ import (
)
type APIExchangeRequest struct {
Method string `json:"method"`
Path string `json:"path"`
Headers *http.Header `json:"headers"`
Body *[]byte `json:"body"`
Method string `json:"method"`
Path string `json:"path"`
Headers *http.Header `json:"headers"`
Body *[]byte `json:"body"`
BodyTruncated bool `json:"body_truncated,omitempty"`
BodyBytes int `json:"body_bytes,omitempty"` // original size before truncation
}
type APIExchangeResponse struct {
Status int `json:"status"`
Headers *http.Header `json:"headers"`
Body *[]byte `json:"body"`
Status int `json:"status"`
Headers *http.Header `json:"headers"`
Body *[]byte `json:"body"`
BodyTruncated bool `json:"body_truncated,omitempty"`
BodyBytes int `json:"body_bytes,omitempty"` // original size before truncation
}
type APIExchange struct {
@@ -66,11 +70,29 @@ var doInitializeTracing = sync.OnceFunc(func() {
type bodyWriter struct {
http.ResponseWriter
body *bytes.Buffer
body *bytes.Buffer
maxBytes int // 0 = unlimited capture
truncated bool
totalBytes int // bytes the upstream handler wrote, even past the cap
}
func (w *bodyWriter) Write(b []byte) (int, error) {
w.body.Write(b)
// Capture into the trace buffer up to maxBytes, then drop the overflow
// so a chatty endpoint can't grow the buffer without bound. The full
// payload still flows through to the real client below.
w.totalBytes += len(b)
if w.maxBytes <= 0 {
w.body.Write(b)
} else if remain := w.maxBytes - w.body.Len(); remain > 0 {
if remain >= len(b) {
w.body.Write(b)
} else {
w.body.Write(b[:remain])
w.truncated = true
}
} else {
w.truncated = true
}
return w.ResponseWriter.Write(b)
}
@@ -80,6 +102,20 @@ func (w *bodyWriter) Flush() {
}
}
// truncateForTrace returns a defensive copy of body capped at maxBytes,
// and a flag indicating whether the cap forced truncation. maxBytes <= 0
// disables the cap.
func truncateForTrace(body []byte, maxBytes int) ([]byte, bool) {
if maxBytes <= 0 || len(body) <= maxBytes {
out := make([]byte, len(body))
copy(out, body)
return out, false
}
out := make([]byte, maxBytes)
copy(out, body[:maxBytes])
return out, true
}
func initializeTracing(maxItems int) {
tracingMaxItems = maxItems
doInitializeTracing()
@@ -134,11 +170,18 @@ func TraceMiddleware(app *application.Application) echo.MiddlewareFunc {
startTime := time.Now()
// Cap captured payload size. Without this, /embeddings and
// streaming /chat/completions blow the in-memory buffer into the
// tens of MB, which then locks the admin Traces UI fetching the
// JSON dump faster than the 5s auto-refresh.
maxBodyBytes := app.ApplicationConfig().TracingMaxBodyBytes
// Wrap response writer to capture body
resBody := new(bytes.Buffer)
mw := &bodyWriter{
ResponseWriter: c.Response().Writer,
body: resBody,
maxBytes: maxBodyBytes,
}
c.Response().Writer = mw
@@ -159,8 +202,7 @@ func TraceMiddleware(app *application.Application) echo.MiddlewareFunc {
// via any heap-dump-style introspection, and tokens shouldn't
// outlive the request that carried them.
requestHeaders := redactSensitiveHeaders(c.Request().Header)
requestBody := make([]byte, len(body))
copy(requestBody, body)
requestBody, requestTruncated := truncateForTrace(body, maxBodyBytes)
responseHeaders := redactSensitiveHeaders(c.Response().Header())
responseBody := make([]byte, resBody.Len())
copy(responseBody, resBody.Bytes())
@@ -168,15 +210,19 @@ func TraceMiddleware(app *application.Application) echo.MiddlewareFunc {
Timestamp: startTime,
Duration: time.Since(startTime),
Request: APIExchangeRequest{
Method: c.Request().Method,
Path: c.Path(),
Headers: &requestHeaders,
Body: &requestBody,
Method: c.Request().Method,
Path: c.Path(),
Headers: &requestHeaders,
Body: &requestBody,
BodyTruncated: requestTruncated,
BodyBytes: len(body),
},
Response: APIExchangeResponse{
Status: status,
Headers: &responseHeaders,
Body: &responseBody,
Status: status,
Headers: &responseHeaders,
Body: &responseBody,
BodyTruncated: mw.truncated,
BodyBytes: mw.totalBytes,
},
}
if handlerErr != nil {

View File

@@ -0,0 +1,116 @@
package middleware
import (
"bytes"
"net/http/httptest"
"strings"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
// The trace middleware copies request and response bodies into an in-memory
// buffer that backs the admin /api/traces endpoint. With no upper bound a
// chatty workload (embeddings, large completions) trivially produces a
// multi-MB response that locks the Traces UI in a loading state — fetching
// and parsing the payload outruns the 5-second auto-refresh. These specs
// pin the capping contract so future refactors keep both the cap and the
// passthrough to the real client intact.
var _ = Describe("bodyWriter capping", func() {
It("captures the full body when maxBytes is 0 (unlimited)", func() {
downstream := httptest.NewRecorder()
buf := &bytes.Buffer{}
bw := &bodyWriter{ResponseWriter: downstream, body: buf, maxBytes: 0}
payload := []byte(strings.Repeat("x", 4096))
n, err := bw.Write(payload)
Expect(err).ToNot(HaveOccurred())
Expect(n).To(Equal(len(payload)))
Expect(buf.Len()).To(Equal(len(payload)))
Expect(downstream.Body.Len()).To(Equal(len(payload)))
Expect(bw.truncated).To(BeFalse())
})
It("stops appending to the trace buffer once maxBytes is reached but still forwards to the client", func() {
downstream := httptest.NewRecorder()
buf := &bytes.Buffer{}
bw := &bodyWriter{ResponseWriter: downstream, body: buf, maxBytes: 100}
payload := []byte(strings.Repeat("a", 250))
n, err := bw.Write(payload)
Expect(err).ToNot(HaveOccurred())
Expect(n).To(Equal(len(payload)), "Write must return the full byte count so callers see no short write")
Expect(buf.Len()).To(Equal(100), "trace buffer should hold exactly maxBytes")
Expect(downstream.Body.Len()).To(Equal(len(payload)), "client must still receive every byte")
Expect(bw.truncated).To(BeTrue())
})
It("handles a write that straddles the cap by keeping only the leading slice", func() {
downstream := httptest.NewRecorder()
buf := &bytes.Buffer{}
bw := &bodyWriter{ResponseWriter: downstream, body: buf, maxBytes: 10}
_, err := bw.Write([]byte("12345"))
Expect(err).ToNot(HaveOccurred())
Expect(bw.truncated).To(BeFalse())
_, err = bw.Write([]byte("67890ABCDE"))
Expect(err).ToNot(HaveOccurred())
Expect(buf.String()).To(Equal("1234567890"))
Expect(downstream.Body.String()).To(Equal("1234567890ABCDE"))
Expect(bw.truncated).To(BeTrue())
})
It("ignores further writes after the cap was already hit", func() {
downstream := httptest.NewRecorder()
buf := &bytes.Buffer{}
bw := &bodyWriter{ResponseWriter: downstream, body: buf, maxBytes: 4}
_, _ = bw.Write([]byte("AAAA"))
_, _ = bw.Write([]byte("BBBB"))
_, _ = bw.Write([]byte("CCCC"))
Expect(buf.String()).To(Equal("AAAA"))
Expect(downstream.Body.String()).To(Equal("AAAABBBBCCCC"))
Expect(bw.truncated).To(BeTrue())
})
})
var _ = Describe("truncateForTrace", func() {
It("returns the input unchanged when below the cap", func() {
in := []byte("hello")
out, truncated := truncateForTrace(in, 1024)
Expect(truncated).To(BeFalse())
Expect(out).To(Equal(in))
})
It("truncates when the input exceeds the cap and signals truncation", func() {
in := []byte(strings.Repeat("z", 200))
out, truncated := truncateForTrace(in, 64)
Expect(truncated).To(BeTrue())
Expect(out).To(HaveLen(64))
Expect(string(out)).To(Equal(strings.Repeat("z", 64)))
})
It("treats maxBytes <= 0 as unlimited (back-compat with current default)", func() {
in := []byte(strings.Repeat("q", 10_000))
out, truncated := truncateForTrace(in, 0)
Expect(truncated).To(BeFalse())
Expect(out).To(HaveLen(len(in)))
})
It("does not retain the caller's backing array (defensive copy)", func() {
in := []byte("abcdefghij")
out, truncated := truncateForTrace(in, 4)
Expect(truncated).To(BeTrue())
Expect(string(out)).To(Equal("abcd"))
// Mutating the source must not corrupt the trace copy.
in[0] = 'Z'
Expect(string(out)).To(Equal("abcd"))
})
})

View File

@@ -406,7 +406,15 @@ export default function Traces() {
<button className="btn btn-secondary btn-sm" onClick={fetchTraces}><i className="fas fa-rotate" /> Refresh</button>
<button className="btn btn-secondary btn-sm" onClick={handleExport} disabled={traces.length === 0}><i className="fas fa-download" /> Export</button>
<div style={{ flex: 1 }} />
<button className="btn btn-danger btn-sm" onClick={handleClear} disabled={traces.length === 0}><i className="fas fa-trash" /> Clear</button>
<button
className="btn btn-danger btn-sm"
onClick={handleClear}
/* Stay enabled while loading: a massive in-memory trace buffer is
precisely the case where the user can't see the table yet and
needs Clear to recover. Clearing an already-empty server-side
buffer is a harmless no-op. */
disabled={!loading && traces.length === 0}
><i className="fas fa-trash" /> Clear</button>
</div>
{settings && (() => {