mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-24 08:38:02 -04:00
fix(traces): cap captured body size to keep admin Traces UI responsive (#9946)
The trace middleware buffered the full request and response bodies for every JSON exchange. With a chatty agent-pool RAG workload, /embeddings responses (large vector arrays) accumulated to tens of MB in the in-memory buffer; the admin Traces page would then download and parse 40+ MB on every load and on every 5s auto-refresh, locking the UI in a loading state. Add LOCALAI_TRACING_MAX_BODY_BYTES (default 64 KiB) that caps each captured body. The full payload still flows through to the real client; only the trace copy is bounded. Exchanges record body_truncated and original body_bytes so the dashboard can show that truncation happened. The cap is configurable via env, CLI, and runtime_settings.json. Also unblock recovery: the Traces page now keeps the Clear button enabled while loading, since "buffer too large to render" is exactly when the user needs to clear it. Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
@@ -552,6 +552,13 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
|
||||
options.TracingMaxItems = *settings.TracingMaxItems
|
||||
}
|
||||
}
|
||||
if settings.TracingMaxBodyBytes != nil {
|
||||
// Allow the on-disk setting to override the CLI/env default. The
|
||||
// startup default is non-zero (see NewApplicationConfig), so a plain
|
||||
// `== 0` guard like the others would never trigger; we instead respect
|
||||
// any value the file specifies. 0 in the file means "uncapped".
|
||||
options.TracingMaxBodyBytes = *settings.TracingMaxBodyBytes
|
||||
}
|
||||
|
||||
// Branding / whitelabeling. There are no env vars for these — the file is
|
||||
// the only source — so apply unconditionally. Without this block a server
|
||||
|
||||
@@ -100,6 +100,7 @@ type RunCMD struct {
|
||||
LoadToMemory []string `env:"LOCALAI_LOAD_TO_MEMORY,LOAD_TO_MEMORY" help:"A list of models to load into memory at startup" group:"models"`
|
||||
EnableTracing bool `env:"LOCALAI_ENABLE_TRACING,ENABLE_TRACING" help:"Enable API tracing" group:"api"`
|
||||
TracingMaxItems int `env:"LOCALAI_TRACING_MAX_ITEMS" default:"1024" help:"Maximum number of traces to keep" group:"api"`
|
||||
TracingMaxBodyBytes int `env:"LOCALAI_TRACING_MAX_BODY_BYTES" default:"65536" help:"Maximum bytes captured per request/response body in the trace buffer (0 = uncapped). Caps memory growth from chatty endpoints like /embeddings." group:"api"`
|
||||
AgentJobRetentionDays int `env:"LOCALAI_AGENT_JOB_RETENTION_DAYS,AGENT_JOB_RETENTION_DAYS" default:"30" help:"Number of days to keep agent job history (default: 30)" group:"api"`
|
||||
OpenResponsesStoreTTL string `env:"LOCALAI_OPEN_RESPONSES_STORE_TTL,OPEN_RESPONSES_STORE_TTL" default:"0" help:"TTL for Open Responses store (e.g., 1h, 30m, 0 = no expiration)" group:"api"`
|
||||
|
||||
@@ -273,6 +274,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
opts = append(opts, config.EnableTracing)
|
||||
}
|
||||
opts = append(opts, config.WithTracingMaxItems(r.TracingMaxItems))
|
||||
opts = append(opts, config.WithTracingMaxBodyBytes(r.TracingMaxBodyBytes))
|
||||
|
||||
token := ""
|
||||
if r.Peer2Peer || r.Peer2PeerToken != "" {
|
||||
|
||||
@@ -21,6 +21,7 @@ type ApplicationConfig struct {
|
||||
Debug bool
|
||||
EnableTracing bool
|
||||
TracingMaxItems int
|
||||
TracingMaxBodyBytes int // Per-body cap for captured request/response bodies; 0 disables the cap
|
||||
EnableBackendLogging bool
|
||||
GeneratedContentDir string
|
||||
|
||||
@@ -187,6 +188,7 @@ func NewApplicationConfig(o ...AppOption) *ApplicationConfig {
|
||||
LRUEvictionRetryInterval: 1 * time.Second, // Default: 1 second
|
||||
WatchDogInterval: 500 * time.Millisecond, // Default: 500ms
|
||||
TracingMaxItems: 1024,
|
||||
TracingMaxBodyBytes: 64 * 1024, // 64 KiB - caps each request/response body in the trace buffer
|
||||
AgentPool: AgentPoolConfig{
|
||||
Enabled: true,
|
||||
Timeout: "5m",
|
||||
@@ -578,6 +580,12 @@ func WithTracingMaxItems(items int) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
func WithTracingMaxBodyBytes(bytes int) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.TracingMaxBodyBytes = bytes
|
||||
}
|
||||
}
|
||||
|
||||
func WithGeneratedContentDir(generatedContentDir string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.GeneratedContentDir = generatedContentDir
|
||||
@@ -920,6 +928,7 @@ func (o *ApplicationConfig) ToRuntimeSettings() RuntimeSettings {
|
||||
f16 := o.F16
|
||||
debug := o.Debug
|
||||
tracingMaxItems := o.TracingMaxItems
|
||||
tracingMaxBodyBytes := o.TracingMaxBodyBytes
|
||||
enableTracing := o.EnableTracing
|
||||
enableBackendLogging := o.EnableBackendLogging
|
||||
cors := o.CORS
|
||||
@@ -1008,6 +1017,7 @@ func (o *ApplicationConfig) ToRuntimeSettings() RuntimeSettings {
|
||||
F16: &f16,
|
||||
Debug: &debug,
|
||||
TracingMaxItems: &tracingMaxItems,
|
||||
TracingMaxBodyBytes: &tracingMaxBodyBytes,
|
||||
EnableTracing: &enableTracing,
|
||||
EnableBackendLogging: &enableBackendLogging,
|
||||
CORS: &cors,
|
||||
@@ -1146,6 +1156,9 @@ func (o *ApplicationConfig) ApplyRuntimeSettings(settings *RuntimeSettings) (req
|
||||
if settings.TracingMaxItems != nil {
|
||||
o.TracingMaxItems = *settings.TracingMaxItems
|
||||
}
|
||||
if settings.TracingMaxBodyBytes != nil {
|
||||
o.TracingMaxBodyBytes = *settings.TracingMaxBodyBytes
|
||||
}
|
||||
if settings.EnableBackendLogging != nil {
|
||||
o.EnableBackendLogging = *settings.EnableBackendLogging
|
||||
}
|
||||
|
||||
@@ -38,6 +38,7 @@ type RuntimeSettings struct {
|
||||
Debug *bool `json:"debug,omitempty"`
|
||||
EnableTracing *bool `json:"enable_tracing,omitempty"`
|
||||
TracingMaxItems *int `json:"tracing_max_items,omitempty"`
|
||||
TracingMaxBodyBytes *int `json:"tracing_max_body_bytes,omitempty"` // Per-body cap in bytes; 0 disables the cap
|
||||
EnableBackendLogging *bool `json:"enable_backend_logging,omitempty"`
|
||||
|
||||
// Security/CORS settings
|
||||
|
||||
@@ -17,16 +17,20 @@ import (
|
||||
)
|
||||
|
||||
type APIExchangeRequest struct {
|
||||
Method string `json:"method"`
|
||||
Path string `json:"path"`
|
||||
Headers *http.Header `json:"headers"`
|
||||
Body *[]byte `json:"body"`
|
||||
Method string `json:"method"`
|
||||
Path string `json:"path"`
|
||||
Headers *http.Header `json:"headers"`
|
||||
Body *[]byte `json:"body"`
|
||||
BodyTruncated bool `json:"body_truncated,omitempty"`
|
||||
BodyBytes int `json:"body_bytes,omitempty"` // original size before truncation
|
||||
}
|
||||
|
||||
type APIExchangeResponse struct {
|
||||
Status int `json:"status"`
|
||||
Headers *http.Header `json:"headers"`
|
||||
Body *[]byte `json:"body"`
|
||||
Status int `json:"status"`
|
||||
Headers *http.Header `json:"headers"`
|
||||
Body *[]byte `json:"body"`
|
||||
BodyTruncated bool `json:"body_truncated,omitempty"`
|
||||
BodyBytes int `json:"body_bytes,omitempty"` // original size before truncation
|
||||
}
|
||||
|
||||
type APIExchange struct {
|
||||
@@ -66,11 +70,29 @@ var doInitializeTracing = sync.OnceFunc(func() {
|
||||
|
||||
type bodyWriter struct {
|
||||
http.ResponseWriter
|
||||
body *bytes.Buffer
|
||||
body *bytes.Buffer
|
||||
maxBytes int // 0 = unlimited capture
|
||||
truncated bool
|
||||
totalBytes int // bytes the upstream handler wrote, even past the cap
|
||||
}
|
||||
|
||||
func (w *bodyWriter) Write(b []byte) (int, error) {
|
||||
w.body.Write(b)
|
||||
// Capture into the trace buffer up to maxBytes, then drop the overflow
|
||||
// so a chatty endpoint can't grow the buffer without bound. The full
|
||||
// payload still flows through to the real client below.
|
||||
w.totalBytes += len(b)
|
||||
if w.maxBytes <= 0 {
|
||||
w.body.Write(b)
|
||||
} else if remain := w.maxBytes - w.body.Len(); remain > 0 {
|
||||
if remain >= len(b) {
|
||||
w.body.Write(b)
|
||||
} else {
|
||||
w.body.Write(b[:remain])
|
||||
w.truncated = true
|
||||
}
|
||||
} else {
|
||||
w.truncated = true
|
||||
}
|
||||
return w.ResponseWriter.Write(b)
|
||||
}
|
||||
|
||||
@@ -80,6 +102,20 @@ func (w *bodyWriter) Flush() {
|
||||
}
|
||||
}
|
||||
|
||||
// truncateForTrace returns a defensive copy of body capped at maxBytes,
|
||||
// and a flag indicating whether the cap forced truncation. maxBytes <= 0
|
||||
// disables the cap.
|
||||
func truncateForTrace(body []byte, maxBytes int) ([]byte, bool) {
|
||||
if maxBytes <= 0 || len(body) <= maxBytes {
|
||||
out := make([]byte, len(body))
|
||||
copy(out, body)
|
||||
return out, false
|
||||
}
|
||||
out := make([]byte, maxBytes)
|
||||
copy(out, body[:maxBytes])
|
||||
return out, true
|
||||
}
|
||||
|
||||
func initializeTracing(maxItems int) {
|
||||
tracingMaxItems = maxItems
|
||||
doInitializeTracing()
|
||||
@@ -134,11 +170,18 @@ func TraceMiddleware(app *application.Application) echo.MiddlewareFunc {
|
||||
|
||||
startTime := time.Now()
|
||||
|
||||
// Cap captured payload size. Without this, /embeddings and
|
||||
// streaming /chat/completions blow the in-memory buffer into the
|
||||
// tens of MB, which then locks the admin Traces UI fetching the
|
||||
// JSON dump faster than the 5s auto-refresh.
|
||||
maxBodyBytes := app.ApplicationConfig().TracingMaxBodyBytes
|
||||
|
||||
// Wrap response writer to capture body
|
||||
resBody := new(bytes.Buffer)
|
||||
mw := &bodyWriter{
|
||||
ResponseWriter: c.Response().Writer,
|
||||
body: resBody,
|
||||
maxBytes: maxBodyBytes,
|
||||
}
|
||||
c.Response().Writer = mw
|
||||
|
||||
@@ -159,8 +202,7 @@ func TraceMiddleware(app *application.Application) echo.MiddlewareFunc {
|
||||
// via any heap-dump-style introspection, and tokens shouldn't
|
||||
// outlive the request that carried them.
|
||||
requestHeaders := redactSensitiveHeaders(c.Request().Header)
|
||||
requestBody := make([]byte, len(body))
|
||||
copy(requestBody, body)
|
||||
requestBody, requestTruncated := truncateForTrace(body, maxBodyBytes)
|
||||
responseHeaders := redactSensitiveHeaders(c.Response().Header())
|
||||
responseBody := make([]byte, resBody.Len())
|
||||
copy(responseBody, resBody.Bytes())
|
||||
@@ -168,15 +210,19 @@ func TraceMiddleware(app *application.Application) echo.MiddlewareFunc {
|
||||
Timestamp: startTime,
|
||||
Duration: time.Since(startTime),
|
||||
Request: APIExchangeRequest{
|
||||
Method: c.Request().Method,
|
||||
Path: c.Path(),
|
||||
Headers: &requestHeaders,
|
||||
Body: &requestBody,
|
||||
Method: c.Request().Method,
|
||||
Path: c.Path(),
|
||||
Headers: &requestHeaders,
|
||||
Body: &requestBody,
|
||||
BodyTruncated: requestTruncated,
|
||||
BodyBytes: len(body),
|
||||
},
|
||||
Response: APIExchangeResponse{
|
||||
Status: status,
|
||||
Headers: &responseHeaders,
|
||||
Body: &responseBody,
|
||||
Status: status,
|
||||
Headers: &responseHeaders,
|
||||
Body: &responseBody,
|
||||
BodyTruncated: mw.truncated,
|
||||
BodyBytes: mw.totalBytes,
|
||||
},
|
||||
}
|
||||
if handlerErr != nil {
|
||||
|
||||
116
core/http/middleware/trace_body_cap_test.go
Normal file
116
core/http/middleware/trace_body_cap_test.go
Normal file
@@ -0,0 +1,116 @@
|
||||
package middleware
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
// The trace middleware copies request and response bodies into an in-memory
|
||||
// buffer that backs the admin /api/traces endpoint. With no upper bound a
|
||||
// chatty workload (embeddings, large completions) trivially produces a
|
||||
// multi-MB response that locks the Traces UI in a loading state — fetching
|
||||
// and parsing the payload outruns the 5-second auto-refresh. These specs
|
||||
// pin the capping contract so future refactors keep both the cap and the
|
||||
// passthrough to the real client intact.
|
||||
|
||||
var _ = Describe("bodyWriter capping", func() {
|
||||
It("captures the full body when maxBytes is 0 (unlimited)", func() {
|
||||
downstream := httptest.NewRecorder()
|
||||
buf := &bytes.Buffer{}
|
||||
bw := &bodyWriter{ResponseWriter: downstream, body: buf, maxBytes: 0}
|
||||
|
||||
payload := []byte(strings.Repeat("x", 4096))
|
||||
n, err := bw.Write(payload)
|
||||
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(n).To(Equal(len(payload)))
|
||||
Expect(buf.Len()).To(Equal(len(payload)))
|
||||
Expect(downstream.Body.Len()).To(Equal(len(payload)))
|
||||
Expect(bw.truncated).To(BeFalse())
|
||||
})
|
||||
|
||||
It("stops appending to the trace buffer once maxBytes is reached but still forwards to the client", func() {
|
||||
downstream := httptest.NewRecorder()
|
||||
buf := &bytes.Buffer{}
|
||||
bw := &bodyWriter{ResponseWriter: downstream, body: buf, maxBytes: 100}
|
||||
|
||||
payload := []byte(strings.Repeat("a", 250))
|
||||
n, err := bw.Write(payload)
|
||||
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(n).To(Equal(len(payload)), "Write must return the full byte count so callers see no short write")
|
||||
Expect(buf.Len()).To(Equal(100), "trace buffer should hold exactly maxBytes")
|
||||
Expect(downstream.Body.Len()).To(Equal(len(payload)), "client must still receive every byte")
|
||||
Expect(bw.truncated).To(BeTrue())
|
||||
})
|
||||
|
||||
It("handles a write that straddles the cap by keeping only the leading slice", func() {
|
||||
downstream := httptest.NewRecorder()
|
||||
buf := &bytes.Buffer{}
|
||||
bw := &bodyWriter{ResponseWriter: downstream, body: buf, maxBytes: 10}
|
||||
|
||||
_, err := bw.Write([]byte("12345"))
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(bw.truncated).To(BeFalse())
|
||||
|
||||
_, err = bw.Write([]byte("67890ABCDE"))
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
Expect(buf.String()).To(Equal("1234567890"))
|
||||
Expect(downstream.Body.String()).To(Equal("1234567890ABCDE"))
|
||||
Expect(bw.truncated).To(BeTrue())
|
||||
})
|
||||
|
||||
It("ignores further writes after the cap was already hit", func() {
|
||||
downstream := httptest.NewRecorder()
|
||||
buf := &bytes.Buffer{}
|
||||
bw := &bodyWriter{ResponseWriter: downstream, body: buf, maxBytes: 4}
|
||||
|
||||
_, _ = bw.Write([]byte("AAAA"))
|
||||
_, _ = bw.Write([]byte("BBBB"))
|
||||
_, _ = bw.Write([]byte("CCCC"))
|
||||
|
||||
Expect(buf.String()).To(Equal("AAAA"))
|
||||
Expect(downstream.Body.String()).To(Equal("AAAABBBBCCCC"))
|
||||
Expect(bw.truncated).To(BeTrue())
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("truncateForTrace", func() {
|
||||
It("returns the input unchanged when below the cap", func() {
|
||||
in := []byte("hello")
|
||||
out, truncated := truncateForTrace(in, 1024)
|
||||
Expect(truncated).To(BeFalse())
|
||||
Expect(out).To(Equal(in))
|
||||
})
|
||||
|
||||
It("truncates when the input exceeds the cap and signals truncation", func() {
|
||||
in := []byte(strings.Repeat("z", 200))
|
||||
out, truncated := truncateForTrace(in, 64)
|
||||
Expect(truncated).To(BeTrue())
|
||||
Expect(out).To(HaveLen(64))
|
||||
Expect(string(out)).To(Equal(strings.Repeat("z", 64)))
|
||||
})
|
||||
|
||||
It("treats maxBytes <= 0 as unlimited (back-compat with current default)", func() {
|
||||
in := []byte(strings.Repeat("q", 10_000))
|
||||
out, truncated := truncateForTrace(in, 0)
|
||||
Expect(truncated).To(BeFalse())
|
||||
Expect(out).To(HaveLen(len(in)))
|
||||
})
|
||||
|
||||
It("does not retain the caller's backing array (defensive copy)", func() {
|
||||
in := []byte("abcdefghij")
|
||||
out, truncated := truncateForTrace(in, 4)
|
||||
Expect(truncated).To(BeTrue())
|
||||
Expect(string(out)).To(Equal("abcd"))
|
||||
|
||||
// Mutating the source must not corrupt the trace copy.
|
||||
in[0] = 'Z'
|
||||
Expect(string(out)).To(Equal("abcd"))
|
||||
})
|
||||
})
|
||||
@@ -406,7 +406,15 @@ export default function Traces() {
|
||||
<button className="btn btn-secondary btn-sm" onClick={fetchTraces}><i className="fas fa-rotate" /> Refresh</button>
|
||||
<button className="btn btn-secondary btn-sm" onClick={handleExport} disabled={traces.length === 0}><i className="fas fa-download" /> Export</button>
|
||||
<div style={{ flex: 1 }} />
|
||||
<button className="btn btn-danger btn-sm" onClick={handleClear} disabled={traces.length === 0}><i className="fas fa-trash" /> Clear</button>
|
||||
<button
|
||||
className="btn btn-danger btn-sm"
|
||||
onClick={handleClear}
|
||||
/* Stay enabled while loading: a massive in-memory trace buffer is
|
||||
precisely the case where the user can't see the table yet and
|
||||
needs Clear to recover. Clearing an already-empty server-side
|
||||
buffer is a harmless no-op. */
|
||||
disabled={!loading && traces.length === 0}
|
||||
><i className="fas fa-trash" /> Clear</button>
|
||||
</div>
|
||||
|
||||
{settings && (() => {
|
||||
|
||||
Reference in New Issue
Block a user