mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-30 11:36:31 -04:00
* feat(distributed): add per-request node ID context holder Introduce pkg/distributedhdr, a leaf package carrying a per-request *atomic.Value holder for the picked worker node ID from the SmartRouter (core/services/nodes) up to the HTTP response writer wrapper (core/http/middleware). Avoids the import cycle that a shared key in either consumer would create. Exposes NewHolder, WithHolder, Holder, Stamp, Load, Inherit. The holder is atomic.Value so cross-goroutine publish from the router to the response writer wrapper is race-clean. Assisted-by: Claude:claude-opus-4-7[1m] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(distributed): add ExposeNodeHeader middleware + response writer wrapper New ApplicationConfig.ExposeNodeHeader bool + --expose-node-header CLI flag / LOCALAI_EXPOSE_NODE_HEADER env var (default off; the node ID reveals internal topology and is opt-in). The middleware creates a per-request *atomic.Value holder, attaches it to c.Request().Context() via distributedhdr.WithHolder, and wraps c.Response().Writer with a custom http.ResponseWriter that sets the X-LocalAI-Node header on first Write / WriteHeader / Flush by reading the holder. Implements http.Flusher, http.Hijacker, Unwrap so it composes cleanly with Echo and http.NewResponseController. request.go propagates the holder onto derived contexts via distributedhdr.Inherit so the holder survives the correlation-ID context replacement. Unit + race-clean concurrency + integration specs. Assisted-by: Claude:claude-opus-4-7[1m] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(distributed): stamp node ID in router and wire middleware to inference routes ModelRouterAdapter.Route stamps the picked node ID into the per-request holder via distributedhdr.Stamp(ctx, result.Node.ID) right after replica selection. Wire ExposeNodeHeader middleware to: - OpenAI chat/completion/embeddings + audio transcriptions/speech + image generations/inpainting - Anthropic /v1/messages - Ollama /api/chat, /api/generate, /api/embed, /api/embeddings - Jina /v1/rerank - LocalAI /v1/vad The middleware's wrapper reads the holder on first byte and sets the X-LocalAI-Node response header before delegating to the underlying writer. Per-request scope means no race under concurrent multi-replica routing. Assisted-by: Claude:claude-opus-4-7[1m] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(distributed): thread request context through backend Load + cover ctx propagation Five non-OpenAI backend helpers were silently using app.Context instead of the request context for the gRPC backend call: transcription, TTS, image generation, rerank, VAD. Effect: distributedhdr.Stamp in the router callback was a silent no-op for these paths, AND client cancellation didn't propagate to in-flight inference. Thread c.Request().Context() (or the equivalent input.Context after the request middleware has installed the correlation-ID derived context) through each helper and into ModelOptions via model.WithContext(ctx). ImageGeneration's signature gains a leading ctx parameter; in-tree callers (openai image, openai inpainting, openai inpainting_test) are updated to match. ModelEmbedding gains a leading ctx parameter for the same reason; the openai and ollama embedding handlers pass the request context through. chat_stream_workers.go defers the initial role=assistant chunk emission until the first token callback so the wrapper's lazy X-LocalAI-Node lookup against the loader runs AFTER ml.Load has stamped the per-modelID node ID; semantically identical for clients (role still arrives before any text). Regression test core/backend/ctx_propagation_test.go pins ctx propagation for all five helpers. Docs updated to enumerate the full endpoint coverage of the --expose-node-header flag. Assisted-by: Claude:claude-opus-4-7[1m] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
309 lines
7.5 KiB
Go
309 lines
7.5 KiB
Go
package openai
|
|
|
|
import (
|
|
"bufio"
|
|
"encoding/base64"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/google/uuid"
|
|
"github.com/labstack/echo/v4"
|
|
"github.com/mudler/LocalAI/core/config"
|
|
"github.com/mudler/LocalAI/core/http/middleware"
|
|
"github.com/mudler/LocalAI/core/schema"
|
|
|
|
"github.com/mudler/LocalAI/core/backend"
|
|
|
|
model "github.com/mudler/LocalAI/pkg/model"
|
|
"github.com/mudler/LocalAI/pkg/utils"
|
|
"github.com/mudler/xlog"
|
|
)
|
|
|
|
func downloadFile(url string) (string, error) {
|
|
if err := utils.ValidateExternalURL(url); err != nil {
|
|
return "", fmt.Errorf("URL validation failed: %w", err)
|
|
}
|
|
|
|
// Get the data
|
|
resp, err := http.Get(url)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
// Create the file
|
|
out, err := os.CreateTemp("", "image")
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer out.Close()
|
|
|
|
// Write the body to file
|
|
_, err = io.Copy(out, resp.Body)
|
|
return out.Name(), err
|
|
}
|
|
|
|
//
|
|
|
|
/*
|
|
*
|
|
|
|
curl http://localhost:8080/v1/images/generations \
|
|
-H "Content-Type: application/json" \
|
|
-d '{
|
|
"prompt": "A cute baby sea otter",
|
|
"n": 1,
|
|
"size": "512x512"
|
|
}'
|
|
|
|
*
|
|
*/
|
|
// ImageEndpoint is the OpenAI Image generation API endpoint https://platform.openai.com/docs/api-reference/images/create
|
|
// @Summary Creates an image given a prompt.
|
|
// @Tags images
|
|
// @Param request body schema.OpenAIRequest true "query params"
|
|
// @Success 200 {object} schema.OpenAIResponse "Response"
|
|
// @Router /v1/images/generations [post]
|
|
func ImageEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
|
return func(c echo.Context) error {
|
|
input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
|
if !ok || input.Model == "" {
|
|
xlog.Error("Image Endpoint - Invalid Input")
|
|
return echo.ErrBadRequest
|
|
}
|
|
|
|
config, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
|
|
if !ok || config == nil {
|
|
xlog.Error("Image Endpoint - Invalid Config")
|
|
return echo.ErrBadRequest
|
|
}
|
|
|
|
// Process input images (for img2img/inpainting)
|
|
src := ""
|
|
if input.File != "" {
|
|
src = processImageFile(input.File, appConfig.GeneratedContentDir)
|
|
if src != "" {
|
|
defer os.RemoveAll(src)
|
|
}
|
|
}
|
|
|
|
// Process multiple input images
|
|
var inputImages []string
|
|
if len(input.Files) > 0 {
|
|
for _, file := range input.Files {
|
|
processedFile := processImageFile(file, appConfig.GeneratedContentDir)
|
|
if processedFile != "" {
|
|
inputImages = append(inputImages, processedFile)
|
|
defer os.RemoveAll(processedFile)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Process reference images
|
|
var refImages []string
|
|
if len(input.RefImages) > 0 {
|
|
for _, file := range input.RefImages {
|
|
processedFile := processImageFile(file, appConfig.GeneratedContentDir)
|
|
if processedFile != "" {
|
|
refImages = append(refImages, processedFile)
|
|
defer os.RemoveAll(processedFile)
|
|
}
|
|
}
|
|
}
|
|
|
|
xlog.Debug("Parameter Config", "config", config)
|
|
|
|
switch config.Backend {
|
|
case "stablediffusion":
|
|
config.Backend = model.StableDiffusionGGMLBackend
|
|
case "":
|
|
config.Backend = model.StableDiffusionGGMLBackend
|
|
}
|
|
|
|
if !strings.Contains(input.Size, "x") {
|
|
input.Size = "512x512"
|
|
xlog.Warn("Invalid size, using default 512x512")
|
|
}
|
|
|
|
sizeParts := strings.Split(input.Size, "x")
|
|
if len(sizeParts) != 2 {
|
|
return fmt.Errorf("invalid value for 'size'")
|
|
}
|
|
width, err := strconv.Atoi(sizeParts[0])
|
|
if err != nil {
|
|
return fmt.Errorf("invalid value for 'size'")
|
|
}
|
|
height, err := strconv.Atoi(sizeParts[1])
|
|
if err != nil {
|
|
return fmt.Errorf("invalid value for 'size'")
|
|
}
|
|
|
|
b64JSON := config.ResponseFormat == "b64_json"
|
|
|
|
// src and clip_skip
|
|
var result []schema.Item
|
|
for _, i := range config.PromptStrings {
|
|
n := input.N
|
|
if input.N == 0 {
|
|
n = 1
|
|
}
|
|
for range n {
|
|
prompts := strings.Split(i, "|")
|
|
positive_prompt := prompts[0]
|
|
negative_prompt := ""
|
|
if len(prompts) > 1 {
|
|
negative_prompt = prompts[1]
|
|
}
|
|
|
|
step := config.Step
|
|
if step == 0 {
|
|
step = 15
|
|
}
|
|
|
|
if input.Step != 0 {
|
|
step = input.Step
|
|
}
|
|
|
|
tempDir := ""
|
|
if !b64JSON {
|
|
tempDir = filepath.Join(appConfig.GeneratedContentDir, "images")
|
|
}
|
|
// Create a temporary file
|
|
outputFile, err := os.CreateTemp(tempDir, "b64")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
outputFile.Close()
|
|
|
|
output := outputFile.Name() + ".png"
|
|
// Rename the temporary file
|
|
err = os.Rename(outputFile.Name(), output)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
baseURL := middleware.BaseURL(c)
|
|
|
|
// Use the first input image as src if available, otherwise use the original src
|
|
inputSrc := src
|
|
if len(inputImages) > 0 {
|
|
inputSrc = inputImages[0]
|
|
}
|
|
|
|
fn, err := backend.ImageGeneration(c.Request().Context(), height, width, step, *config.Seed, positive_prompt, negative_prompt, inputSrc, output, ml, *config, appConfig, refImages)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := fn(); err != nil {
|
|
return err
|
|
}
|
|
|
|
item := &schema.Item{}
|
|
|
|
if b64JSON {
|
|
defer os.RemoveAll(output)
|
|
data, err := os.ReadFile(output)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
item.B64JSON = base64.StdEncoding.EncodeToString(data)
|
|
} else {
|
|
base := filepath.Base(output)
|
|
item.URL, err = url.JoinPath(baseURL, "generated-images", base)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
result = append(result, *item)
|
|
}
|
|
}
|
|
|
|
id := uuid.New().String()
|
|
created := int(time.Now().Unix())
|
|
resp := &schema.OpenAIResponse{
|
|
ID: id,
|
|
Created: created,
|
|
Data: result,
|
|
Usage: &schema.OpenAIUsage{
|
|
PromptTokens: 0,
|
|
CompletionTokens: 0,
|
|
TotalTokens: 0,
|
|
InputTokens: 0,
|
|
OutputTokens: 0,
|
|
InputTokensDetails: &schema.InputTokensDetails{
|
|
TextTokens: 0,
|
|
ImageTokens: 0,
|
|
},
|
|
},
|
|
}
|
|
|
|
jsonResult, _ := json.Marshal(resp)
|
|
xlog.Debug("Response", "response", string(jsonResult))
|
|
|
|
// Return the prediction in the response body
|
|
return c.JSON(200, resp)
|
|
}
|
|
}
|
|
|
|
// processImageFile handles a single image file (URL or base64) and returns the path to the temporary file
|
|
func processImageFile(file string, generatedContentDir string) string {
|
|
fileData := []byte{}
|
|
var err error
|
|
|
|
// check if file is an URL, if so download it and save it to a temporary file
|
|
if strings.HasPrefix(file, "http://") || strings.HasPrefix(file, "https://") {
|
|
out, err := downloadFile(file)
|
|
if err != nil {
|
|
xlog.Error("Failed downloading file", "error", err, "file", file)
|
|
return ""
|
|
}
|
|
defer os.RemoveAll(out)
|
|
|
|
fileData, err = os.ReadFile(out)
|
|
if err != nil {
|
|
xlog.Error("Failed reading downloaded file", "error", err, "file", out)
|
|
return ""
|
|
}
|
|
} else {
|
|
// base 64 decode the file and write it somewhere that we will cleanup
|
|
fileData, err = base64.StdEncoding.DecodeString(file)
|
|
if err != nil {
|
|
xlog.Error("Failed decoding base64 file", "error", err)
|
|
return ""
|
|
}
|
|
}
|
|
|
|
// Create a temporary file
|
|
outputFile, err := os.CreateTemp(generatedContentDir, "b64")
|
|
if err != nil {
|
|
xlog.Error("Failed creating temporary file", "error", err)
|
|
return ""
|
|
}
|
|
|
|
// write the decoded result
|
|
writer := bufio.NewWriter(outputFile)
|
|
_, err = writer.Write(fileData)
|
|
if err != nil {
|
|
outputFile.Close()
|
|
xlog.Error("Failed writing to temporary file", "error", err)
|
|
return ""
|
|
}
|
|
if err := writer.Flush(); err != nil {
|
|
outputFile.Close()
|
|
xlog.Error("Failed flushing to temporary file", "error", err)
|
|
return ""
|
|
}
|
|
outputFile.Close()
|
|
|
|
return outputFile.Name()
|
|
}
|