Files
LocalAI/core/http/middleware/node_header.go
Ettore Di Giacinto 799215cdc6 feat(distributed): add ExposeNodeHeader middleware + ResponseWriter wrapper
Introduce a per-request Echo middleware that wraps the response writer and
lazily stamps X-LocalAI-Node on the first Write / WriteHeader / Flush.
This replaces the chan-based per-request rendezvous and per-handler
maybeSetNodeHeader calls with a single enforcement point.

The wrapper reads the picked node ID by looking up the request's model in
the ModelLoader at flush time (late binding), so the value reflects the
post-ml.Load state of the loader rather than any pre-route guess. Off by
default; gated by ApplicationConfig.ExposeNodeHeader.

Ginkgo specs cover off/on, missing model, in-process model (no node ID),
absent stash, buffered + streaming flush ordering, error path, and late
binding under in-handler stamp.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:claude-opus-4-7[1m]
2026-05-24 21:15:11 +00:00

123 lines
4.0 KiB
Go

package middleware
import (
"bufio"
"errors"
"net"
"net/http"
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/model"
)
// NodeHeaderName is the HTTP response header that, when --expose-node-header
// is enabled, carries the ID of the distributed-mode worker node that served
// the inference request. Off by default: node IDs reveal internal topology
// and should not be exposed on a public endpoint.
const NodeHeaderName = "X-LocalAI-Node"
// nodeHeaderWriter wraps an http.ResponseWriter and stamps the X-LocalAI-Node
// header lazily on the first Write / WriteHeader / Flush call. The lazy
// resolve is what makes this work for streaming: the picked node ID is only
// known AFTER ml.Load runs (i.e. on the first SSE chunk), so resolving at
// request entry would attach the previous request's routing decision (or
// nothing on a cold cache).
type nodeHeaderWriter struct {
http.ResponseWriter
resolve func() string
set bool
}
func (w *nodeHeaderWriter) maybeSet() {
if w.set {
return
}
w.set = true
if id := w.resolve(); id != "" {
w.Header().Set(NodeHeaderName, id)
}
}
func (w *nodeHeaderWriter) Write(b []byte) (int, error) {
w.maybeSet()
return w.ResponseWriter.Write(b)
}
func (w *nodeHeaderWriter) WriteHeader(code int) {
w.maybeSet()
w.ResponseWriter.WriteHeader(code)
}
// Flush keeps SSE handlers working: Echo's Response.Flush goes through
// http.NewResponseController which walks Unwrap() chains and invokes Flush
// on the first wrapper that implements http.Flusher. By implementing it
// here we both stamp the header before the underlying writer flushes AND
// keep the streaming path alive.
func (w *nodeHeaderWriter) Flush() {
w.maybeSet()
if f, ok := w.ResponseWriter.(http.Flusher); ok {
f.Flush()
}
}
// Hijack preserves WebSocket / raw-conn handlers that need to take over the
// underlying TCP connection (e.g. /v1/realtime). Without this the wrapper
// would silently break those endpoints.
func (w *nodeHeaderWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) {
if h, ok := w.ResponseWriter.(http.Hijacker); ok {
return h.Hijack()
}
return nil, nil, errors.New("ResponseWriter does not implement http.Hijacker")
}
// Unwrap lets http.NewResponseController reach through us to find optional
// interfaces (CloseNotifier, SetReadDeadline, etc.) on the real writer.
func (w *nodeHeaderWriter) Unwrap() http.ResponseWriter {
return w.ResponseWriter
}
// ExposeNodeHeader installs a per-request response writer wrapper that
// stamps the X-LocalAI-Node header from the currently-loaded model's node
// ID on the first write. Off by default; opted in via --expose-node-header
// / LOCALAI_EXPOSE_NODE_HEADER. The model name is read from the standard
// per-request context key set by the request-extractor middleware chain
// (CONTEXT_LOCALS_KEY_MODEL_NAME), so any handler that goes through the
// usual SetModelAndConfig wiring is automatically covered.
//
// Best-effort: under heavy concurrency for the same model across multiple
// replicas, the header may reflect a recent routing decision rather than
// this exact request's, because the model loader's per-modelID store entry
// is overwritten on every routing decision. Acceptable for observability
// and debugging.
func ExposeNodeHeader(appCfg *config.ApplicationConfig, ml *model.ModelLoader) echo.MiddlewareFunc {
return func(next echo.HandlerFunc) echo.HandlerFunc {
return func(c echo.Context) error {
if appCfg == nil || !appCfg.ExposeNodeHeader || ml == nil {
return next(c)
}
orig := c.Response().Writer
wrapper := &nodeHeaderWriter{
ResponseWriter: orig,
resolve: func() string {
modelName, _ := c.Get(CONTEXT_LOCALS_KEY_MODEL_NAME).(string)
if modelName == "" {
return ""
}
m := ml.CheckIsLoaded(modelName)
if m == nil {
return ""
}
return m.NodeID()
},
}
c.Response().Writer = wrapper
defer func() {
c.Response().Writer = orig
}()
return next(c)
}
}
}