Files
LocalAI/core/http/endpoints/openai/image.go
LocalAI [bot] 06e777b75e feat(distributed): gated X-LocalAI-Node response header (middleware + wrapper) (#9976)
* feat(distributed): add per-request node ID context holder

Introduce pkg/distributedhdr, a leaf package carrying a per-request
*atomic.Value holder for the picked worker node ID from the
SmartRouter (core/services/nodes) up to the HTTP response writer
wrapper (core/http/middleware). Avoids the import cycle that a shared
key in either consumer would create.

Exposes NewHolder, WithHolder, Holder, Stamp, Load, Inherit. The
holder is atomic.Value so cross-goroutine publish from the router to
the response writer wrapper is race-clean.

Assisted-by: Claude:claude-opus-4-7[1m]
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(distributed): add ExposeNodeHeader middleware + response writer wrapper

New ApplicationConfig.ExposeNodeHeader bool + --expose-node-header CLI
flag / LOCALAI_EXPOSE_NODE_HEADER env var (default off; the node ID
reveals internal topology and is opt-in).

The middleware creates a per-request *atomic.Value holder, attaches it
to c.Request().Context() via distributedhdr.WithHolder, and wraps
c.Response().Writer with a custom http.ResponseWriter that sets the
X-LocalAI-Node header on first Write / WriteHeader / Flush by reading
the holder. Implements http.Flusher, http.Hijacker, Unwrap so it
composes cleanly with Echo and http.NewResponseController.

request.go propagates the holder onto derived contexts via
distributedhdr.Inherit so the holder survives the correlation-ID
context replacement.

Unit + race-clean concurrency + integration specs.

Assisted-by: Claude:claude-opus-4-7[1m]
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(distributed): stamp node ID in router and wire middleware to inference routes

ModelRouterAdapter.Route stamps the picked node ID into the
per-request holder via distributedhdr.Stamp(ctx, result.Node.ID) right
after replica selection.

Wire ExposeNodeHeader middleware to:
- OpenAI chat/completion/embeddings + audio transcriptions/speech + image generations/inpainting
- Anthropic /v1/messages
- Ollama /api/chat, /api/generate, /api/embed, /api/embeddings
- Jina /v1/rerank
- LocalAI /v1/vad

The middleware's wrapper reads the holder on first byte and sets the
X-LocalAI-Node response header before delegating to the underlying
writer. Per-request scope means no race under concurrent multi-replica
routing.

Assisted-by: Claude:claude-opus-4-7[1m]
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix(distributed): thread request context through backend Load + cover ctx propagation

Five non-OpenAI backend helpers were silently using app.Context instead
of the request context for the gRPC backend call: transcription, TTS,
image generation, rerank, VAD. Effect: distributedhdr.Stamp in the
router callback was a silent no-op for these paths, AND client
cancellation didn't propagate to in-flight inference.

Thread c.Request().Context() (or the equivalent input.Context after
the request middleware has installed the correlation-ID derived
context) through each helper and into ModelOptions via
model.WithContext(ctx). ImageGeneration's signature gains a leading
ctx parameter; in-tree callers (openai image, openai inpainting,
openai inpainting_test) are updated to match.

ModelEmbedding gains a leading ctx parameter for the same reason; the
openai and ollama embedding handlers pass the request context through.

chat_stream_workers.go defers the initial role=assistant chunk
emission until the first token callback so the wrapper's lazy
X-LocalAI-Node lookup against the loader runs AFTER ml.Load has
stamped the per-modelID node ID; semantically identical for clients
(role still arrives before any text).

Regression test core/backend/ctx_propagation_test.go pins ctx
propagation for all five helpers.

Docs updated to enumerate the full endpoint coverage of the
--expose-node-header flag.

Assisted-by: Claude:claude-opus-4-7[1m]
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
2026-05-25 10:51:48 +02:00

309 lines
7.5 KiB
Go

package openai
import (
"bufio"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"os"
"path/filepath"
"strconv"
"strings"
"time"
"github.com/google/uuid"
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/backend"
model "github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/utils"
"github.com/mudler/xlog"
)
func downloadFile(url string) (string, error) {
if err := utils.ValidateExternalURL(url); err != nil {
return "", fmt.Errorf("URL validation failed: %w", err)
}
// Get the data
resp, err := http.Get(url)
if err != nil {
return "", err
}
defer resp.Body.Close()
// Create the file
out, err := os.CreateTemp("", "image")
if err != nil {
return "", err
}
defer out.Close()
// Write the body to file
_, err = io.Copy(out, resp.Body)
return out.Name(), err
}
//
/*
*
curl http://localhost:8080/v1/images/generations \
-H "Content-Type: application/json" \
-d '{
"prompt": "A cute baby sea otter",
"n": 1,
"size": "512x512"
}'
*
*/
// ImageEndpoint is the OpenAI Image generation API endpoint https://platform.openai.com/docs/api-reference/images/create
// @Summary Creates an image given a prompt.
// @Tags images
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/images/generations [post]
func ImageEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
return func(c echo.Context) error {
input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
if !ok || input.Model == "" {
xlog.Error("Image Endpoint - Invalid Input")
return echo.ErrBadRequest
}
config, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
if !ok || config == nil {
xlog.Error("Image Endpoint - Invalid Config")
return echo.ErrBadRequest
}
// Process input images (for img2img/inpainting)
src := ""
if input.File != "" {
src = processImageFile(input.File, appConfig.GeneratedContentDir)
if src != "" {
defer os.RemoveAll(src)
}
}
// Process multiple input images
var inputImages []string
if len(input.Files) > 0 {
for _, file := range input.Files {
processedFile := processImageFile(file, appConfig.GeneratedContentDir)
if processedFile != "" {
inputImages = append(inputImages, processedFile)
defer os.RemoveAll(processedFile)
}
}
}
// Process reference images
var refImages []string
if len(input.RefImages) > 0 {
for _, file := range input.RefImages {
processedFile := processImageFile(file, appConfig.GeneratedContentDir)
if processedFile != "" {
refImages = append(refImages, processedFile)
defer os.RemoveAll(processedFile)
}
}
}
xlog.Debug("Parameter Config", "config", config)
switch config.Backend {
case "stablediffusion":
config.Backend = model.StableDiffusionGGMLBackend
case "":
config.Backend = model.StableDiffusionGGMLBackend
}
if !strings.Contains(input.Size, "x") {
input.Size = "512x512"
xlog.Warn("Invalid size, using default 512x512")
}
sizeParts := strings.Split(input.Size, "x")
if len(sizeParts) != 2 {
return fmt.Errorf("invalid value for 'size'")
}
width, err := strconv.Atoi(sizeParts[0])
if err != nil {
return fmt.Errorf("invalid value for 'size'")
}
height, err := strconv.Atoi(sizeParts[1])
if err != nil {
return fmt.Errorf("invalid value for 'size'")
}
b64JSON := config.ResponseFormat == "b64_json"
// src and clip_skip
var result []schema.Item
for _, i := range config.PromptStrings {
n := input.N
if input.N == 0 {
n = 1
}
for range n {
prompts := strings.Split(i, "|")
positive_prompt := prompts[0]
negative_prompt := ""
if len(prompts) > 1 {
negative_prompt = prompts[1]
}
step := config.Step
if step == 0 {
step = 15
}
if input.Step != 0 {
step = input.Step
}
tempDir := ""
if !b64JSON {
tempDir = filepath.Join(appConfig.GeneratedContentDir, "images")
}
// Create a temporary file
outputFile, err := os.CreateTemp(tempDir, "b64")
if err != nil {
return err
}
outputFile.Close()
output := outputFile.Name() + ".png"
// Rename the temporary file
err = os.Rename(outputFile.Name(), output)
if err != nil {
return err
}
baseURL := middleware.BaseURL(c)
// Use the first input image as src if available, otherwise use the original src
inputSrc := src
if len(inputImages) > 0 {
inputSrc = inputImages[0]
}
fn, err := backend.ImageGeneration(c.Request().Context(), height, width, step, *config.Seed, positive_prompt, negative_prompt, inputSrc, output, ml, *config, appConfig, refImages)
if err != nil {
return err
}
if err := fn(); err != nil {
return err
}
item := &schema.Item{}
if b64JSON {
defer os.RemoveAll(output)
data, err := os.ReadFile(output)
if err != nil {
return err
}
item.B64JSON = base64.StdEncoding.EncodeToString(data)
} else {
base := filepath.Base(output)
item.URL, err = url.JoinPath(baseURL, "generated-images", base)
if err != nil {
return err
}
}
result = append(result, *item)
}
}
id := uuid.New().String()
created := int(time.Now().Unix())
resp := &schema.OpenAIResponse{
ID: id,
Created: created,
Data: result,
Usage: &schema.OpenAIUsage{
PromptTokens: 0,
CompletionTokens: 0,
TotalTokens: 0,
InputTokens: 0,
OutputTokens: 0,
InputTokensDetails: &schema.InputTokensDetails{
TextTokens: 0,
ImageTokens: 0,
},
},
}
jsonResult, _ := json.Marshal(resp)
xlog.Debug("Response", "response", string(jsonResult))
// Return the prediction in the response body
return c.JSON(200, resp)
}
}
// processImageFile handles a single image file (URL or base64) and returns the path to the temporary file
func processImageFile(file string, generatedContentDir string) string {
fileData := []byte{}
var err error
// check if file is an URL, if so download it and save it to a temporary file
if strings.HasPrefix(file, "http://") || strings.HasPrefix(file, "https://") {
out, err := downloadFile(file)
if err != nil {
xlog.Error("Failed downloading file", "error", err, "file", file)
return ""
}
defer os.RemoveAll(out)
fileData, err = os.ReadFile(out)
if err != nil {
xlog.Error("Failed reading downloaded file", "error", err, "file", out)
return ""
}
} else {
// base 64 decode the file and write it somewhere that we will cleanup
fileData, err = base64.StdEncoding.DecodeString(file)
if err != nil {
xlog.Error("Failed decoding base64 file", "error", err)
return ""
}
}
// Create a temporary file
outputFile, err := os.CreateTemp(generatedContentDir, "b64")
if err != nil {
xlog.Error("Failed creating temporary file", "error", err)
return ""
}
// write the decoded result
writer := bufio.NewWriter(outputFile)
_, err = writer.Write(fileData)
if err != nil {
outputFile.Close()
xlog.Error("Failed writing to temporary file", "error", err)
return ""
}
if err := writer.Flush(); err != nil {
outputFile.Close()
xlog.Error("Failed flushing to temporary file", "error", err)
return ""
}
outputFile.Close()
return outputFile.Name()
}