fix: implement encoding_format=base64 for embeddings endpoint (#9135)

The OpenAI Node.js SDK v4+ sends encoding_format=base64 by default. LocalAI previously ignored this parameter and always returned a float JSON array, causing a silent data corruption bug in any Node.js client (AnythingLLM Desktop, LangChain.js, LlamaIndex.TS, …): // What the client does when it expects base64 but receives a float array: Buffer.from(floatArray, 'base64') Node.js treats a non-string first argument as a byte array — each float32 value is truncated to a single byte — and Float32Array then reads those bytes as floats, yielding dims/4 values. Vector databases (Qdrant, pgvector, …) then create collections with the wrong dimension, causing all similarity searches to fail silently. e.g. granite-embedding-107m (384 dims) → 96 stored in Qdrant jina-embeddings-v3 (1024 dims) → 256 stored in Qdrant Changes: - core/schema/prediction.go: add EncodingFormat string field to PredictionOptions so the request parameter is parsed and available throughout the request pipeline - core/schema/openai.go: add EmbeddingBase64 string field to Item; add MarshalJSON so the "embedding" JSON key emits either []float32 or a base64 string depending on which field is populated — all other Item consumers (image, video endpoints) are unaffected - core/http/endpoints/openai/embeddings.go: add floatsToBase64() which packs a float32 slice as little-endian bytes and base64-encodes it; add embeddingItem() helper; both InputToken and InputStrings loops now honour encoding_format=base64 Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-16 20:52:08 -04:00 · 2026-03-25 17:38:07 +01:00
parent 26384c5c70
commit 00fcf6936c
3 changed files with 60 additions and 5 deletions
--- a/core/http/endpoints/openai/embeddings.go
+++ b/core/http/endpoints/openai/embeddings.go
@@ -1,7 +1,10 @@
 package openai

 import (
+	"encoding/base64"
+	"encoding/binary"
 	"encoding/json"
+	"math"
 	"time"

 	"github.com/labstack/echo/v4"
@@ -16,6 +19,27 @@ import (
 	"github.com/mudler/xlog"
 )

+// floatsToBase64 packs a float32 slice as little-endian bytes and returns a base64 string.
+// This matches the OpenAI API encoding_format=base64 contract expected by the Node.js SDK.
+func floatsToBase64(floats []float32) string {
+	buf := make([]byte, len(floats)*4)
+	for i, f := range floats {
+		binary.LittleEndian.PutUint32(buf[i*4:], math.Float32bits(f))
+	}
+	return base64.StdEncoding.EncodeToString(buf)
+}
+
+// embeddingItem builds a schema.Item for an embedding, encoding as base64 when requested.
+// The OpenAI Node.js SDK (v4+) sends encoding_format=base64 by default and expects a base64
+// string in the response; returning a float array causes Buffer.from(array,'base64') to
+// interpret each float as a single byte, yielding dims/4 values in Qdrant.
+func embeddingItem(embeddings []float32, index int, encodingFormat string) schema.Item {
+	if encodingFormat == "base64" {
+		return schema.Item{EmbeddingBase64: floatsToBase64(embeddings), Index: index, Object: "embedding"}
+	}
+	return schema.Item{Embedding: embeddings, Index: index, Object: "embedding"}
+}
+
 // EmbeddingsEndpoint is the OpenAI Embeddings API endpoint https://platform.openai.com/docs/api-reference/embeddings
 // @Summary Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms.
 // @Param request body schema.OpenAIRequest true "query params"
@@ -47,7 +71,7 @@ func EmbeddingsEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, app
 			if err != nil {
 				return err
 			}
-			items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
+			items = append(items, embeddingItem(embeddings, i, input.EncodingFormat))
 		}

 		for i, s := range config.InputStrings {
@@ -61,7 +85,7 @@ func EmbeddingsEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, app
 			if err != nil {
 				return err
 			}
-			items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
+			items = append(items, embeddingItem(embeddings, i, input.EncodingFormat))
 		}

 		id := uuid.New().String()
--- a/core/schema/openai.go
+++ b/core/schema/openai.go
@@ -2,6 +2,7 @@ package schema

 import (
 	"context"
+	"encoding/json"

 	functions "github.com/mudler/LocalAI/pkg/functions"
 )
@@ -37,15 +38,42 @@ type OpenAIUsage struct {
 }

 type Item struct {
-	Embedding []float32 `json:"embedding"`
-	Index     int       `json:"index"`
-	Object    string    `json:"object,omitempty"`
+	Embedding       []float32 `json:"-"`
+	EmbeddingBase64 string    `json:"-"`
+	Index           int       `json:"index"`
+	Object          string    `json:"object,omitempty"`

 	// Images
 	URL     string `json:"url,omitempty"`
 	B64JSON string `json:"b64_json,omitempty"`
 }

+// MarshalJSON serialises Item so that the "embedding" field is either a float array
+// or a base64 string depending on which field is populated.  This satisfies the
+// OpenAI API encoding_format contract: the Node.js SDK (v4+) sends
+// encoding_format=base64 by default and expects a base64 string back.
+func (item Item) MarshalJSON() ([]byte, error) {
+	type itemFields struct {
+		Embedding interface{} `json:"embedding,omitempty"`
+		Index     int         `json:"index"`
+		Object    string      `json:"object,omitempty"`
+		URL       string      `json:"url,omitempty"`
+		B64JSON   string      `json:"b64_json,omitempty"`
+	}
+	f := itemFields{
+		Index:   item.Index,
+		Object:  item.Object,
+		URL:     item.URL,
+		B64JSON: item.B64JSON,
+	}
+	if item.EmbeddingBase64 != "" {
+		f.Embedding = item.EmbeddingBase64
+	} else {
+		f.Embedding = item.Embedding
+	}
+	return json.Marshal(f)
+}
+
 type OpenAIResponse struct {
 	Created int      `json:"created,omitempty"`
 	Object  string   `json:"object,omitempty"`
--- a/core/schema/prediction.go
+++ b/core/schema/prediction.go
@@ -133,4 +133,7 @@ type PredictionOptions struct {

 	// RWKV (?)
 	Tokenizer string `json:"tokenizer,omitempty" yaml:"tokenizer,omitempty"`
+
+	// Embedding encoding format: "float" (default) or "base64" (OpenAI Node.js SDK default)
+	EncodingFormat string `json:"encoding_format,omitempty" yaml:"encoding_format,omitempty"`
 }