mirror of
https://github.com/mudler/LocalAI.git
synced 2026-04-02 06:04:09 -04:00
The OpenAI Node.js SDK v4+ sends encoding_format=base64 by default.
LocalAI previously ignored this parameter and always returned a float
JSON array, causing a silent data corruption bug in any Node.js client
(AnythingLLM Desktop, LangChain.js, LlamaIndex.TS, …):
// What the client does when it expects base64 but receives a float array:
Buffer.from(floatArray, 'base64')
Node.js treats a non-string first argument as a byte array — each
float32 value is truncated to a single byte — and Float32Array then
reads those bytes as floats, yielding dims/4 values. Vector databases
(Qdrant, pgvector, …) then create collections with the wrong dimension,
causing all similarity searches to fail silently.
e.g. granite-embedding-107m (384 dims) → 96 stored in Qdrant
jina-embeddings-v3 (1024 dims) → 256 stored in Qdrant
Changes:
- core/schema/prediction.go: add EncodingFormat string field to
PredictionOptions so the request parameter is parsed and available
throughout the request pipeline
- core/schema/openai.go: add EmbeddingBase64 string field to Item;
add MarshalJSON so the "embedding" JSON key emits either []float32
or a base64 string depending on which field is populated — all other
Item consumers (image, video endpoints) are unaffected
- core/http/endpoints/openai/embeddings.go: add floatsToBase64()
which packs a float32 slice as little-endian bytes and base64-encodes
it; add embeddingItem() helper; both InputToken and InputStrings loops
now honour encoding_format=base64
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
108 lines
3.5 KiB
Go
108 lines
3.5 KiB
Go
package openai
|
|
|
|
import (
|
|
"encoding/base64"
|
|
"encoding/binary"
|
|
"encoding/json"
|
|
"math"
|
|
"time"
|
|
|
|
"github.com/labstack/echo/v4"
|
|
"github.com/mudler/LocalAI/core/backend"
|
|
"github.com/mudler/LocalAI/core/config"
|
|
"github.com/mudler/LocalAI/core/http/middleware"
|
|
"github.com/mudler/LocalAI/pkg/model"
|
|
|
|
"github.com/google/uuid"
|
|
"github.com/mudler/LocalAI/core/schema"
|
|
|
|
"github.com/mudler/xlog"
|
|
)
|
|
|
|
// floatsToBase64 packs a float32 slice as little-endian bytes and returns a base64 string.
|
|
// This matches the OpenAI API encoding_format=base64 contract expected by the Node.js SDK.
|
|
func floatsToBase64(floats []float32) string {
|
|
buf := make([]byte, len(floats)*4)
|
|
for i, f := range floats {
|
|
binary.LittleEndian.PutUint32(buf[i*4:], math.Float32bits(f))
|
|
}
|
|
return base64.StdEncoding.EncodeToString(buf)
|
|
}
|
|
|
|
// embeddingItem builds a schema.Item for an embedding, encoding as base64 when requested.
|
|
// The OpenAI Node.js SDK (v4+) sends encoding_format=base64 by default and expects a base64
|
|
// string in the response; returning a float array causes Buffer.from(array,'base64') to
|
|
// interpret each float as a single byte, yielding dims/4 values in Qdrant.
|
|
func embeddingItem(embeddings []float32, index int, encodingFormat string) schema.Item {
|
|
if encodingFormat == "base64" {
|
|
return schema.Item{EmbeddingBase64: floatsToBase64(embeddings), Index: index, Object: "embedding"}
|
|
}
|
|
return schema.Item{Embedding: embeddings, Index: index, Object: "embedding"}
|
|
}
|
|
|
|
// EmbeddingsEndpoint is the OpenAI Embeddings API endpoint https://platform.openai.com/docs/api-reference/embeddings
|
|
// @Summary Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms.
|
|
// @Param request body schema.OpenAIRequest true "query params"
|
|
// @Success 200 {object} schema.OpenAIResponse "Response"
|
|
// @Router /v1/embeddings [post]
|
|
func EmbeddingsEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
|
return func(c echo.Context) error {
|
|
input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
|
if !ok || input.Model == "" {
|
|
return echo.ErrBadRequest
|
|
}
|
|
|
|
config, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
|
|
if !ok || config == nil {
|
|
return echo.ErrBadRequest
|
|
}
|
|
|
|
xlog.Debug("Parameter Config", "config", config)
|
|
items := []schema.Item{}
|
|
|
|
for i, s := range config.InputToken {
|
|
// get the model function to call for the result
|
|
embedFn, err := backend.ModelEmbedding("", s, ml, *config, appConfig)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
embeddings, err := embedFn()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
items = append(items, embeddingItem(embeddings, i, input.EncodingFormat))
|
|
}
|
|
|
|
for i, s := range config.InputStrings {
|
|
// get the model function to call for the result
|
|
embedFn, err := backend.ModelEmbedding(s, []int{}, ml, *config, appConfig)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
embeddings, err := embedFn()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
items = append(items, embeddingItem(embeddings, i, input.EncodingFormat))
|
|
}
|
|
|
|
id := uuid.New().String()
|
|
created := int(time.Now().Unix())
|
|
resp := &schema.OpenAIResponse{
|
|
ID: id,
|
|
Created: created,
|
|
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
|
Data: items,
|
|
Object: "list",
|
|
}
|
|
|
|
jsonResult, _ := json.Marshal(resp)
|
|
xlog.Debug("Response", "response", string(jsonResult))
|
|
|
|
// Return the prediction in the response body
|
|
return c.JSON(200, resp)
|
|
}
|
|
}
|