diff --git a/core/http/endpoints/openai/embeddings.go b/core/http/endpoints/openai/embeddings.go index b88f3eb03..3d35d1c3d 100644 --- a/core/http/endpoints/openai/embeddings.go +++ b/core/http/endpoints/openai/embeddings.go @@ -1,7 +1,10 @@ package openai import ( + "encoding/base64" + "encoding/binary" "encoding/json" + "math" "time" "github.com/labstack/echo/v4" @@ -16,6 +19,27 @@ import ( "github.com/mudler/xlog" ) +// floatsToBase64 packs a float32 slice as little-endian bytes and returns a base64 string. +// This matches the OpenAI API encoding_format=base64 contract expected by the Node.js SDK. +func floatsToBase64(floats []float32) string { + buf := make([]byte, len(floats)*4) + for i, f := range floats { + binary.LittleEndian.PutUint32(buf[i*4:], math.Float32bits(f)) + } + return base64.StdEncoding.EncodeToString(buf) +} + +// embeddingItem builds a schema.Item for an embedding, encoding as base64 when requested. +// The OpenAI Node.js SDK (v4+) sends encoding_format=base64 by default and expects a base64 +// string in the response; returning a float array causes Buffer.from(array,'base64') to +// interpret each float as a single byte, yielding dims/4 values in Qdrant. +func embeddingItem(embeddings []float32, index int, encodingFormat string) schema.Item { + if encodingFormat == "base64" { + return schema.Item{EmbeddingBase64: floatsToBase64(embeddings), Index: index, Object: "embedding"} + } + return schema.Item{Embedding: embeddings, Index: index, Object: "embedding"} +} + // EmbeddingsEndpoint is the OpenAI Embeddings API endpoint https://platform.openai.com/docs/api-reference/embeddings // @Summary Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms. // @Param request body schema.OpenAIRequest true "query params" @@ -47,7 +71,7 @@ func EmbeddingsEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, app if err != nil { return err } - items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"}) + items = append(items, embeddingItem(embeddings, i, input.EncodingFormat)) } for i, s := range config.InputStrings { @@ -61,7 +85,7 @@ func EmbeddingsEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, app if err != nil { return err } - items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"}) + items = append(items, embeddingItem(embeddings, i, input.EncodingFormat)) } id := uuid.New().String() diff --git a/core/schema/openai.go b/core/schema/openai.go index 4a3b06cd0..47c208d30 100644 --- a/core/schema/openai.go +++ b/core/schema/openai.go @@ -2,6 +2,7 @@ package schema import ( "context" + "encoding/json" functions "github.com/mudler/LocalAI/pkg/functions" ) @@ -37,15 +38,42 @@ type OpenAIUsage struct { } type Item struct { - Embedding []float32 `json:"embedding"` - Index int `json:"index"` - Object string `json:"object,omitempty"` + Embedding []float32 `json:"-"` + EmbeddingBase64 string `json:"-"` + Index int `json:"index"` + Object string `json:"object,omitempty"` // Images URL string `json:"url,omitempty"` B64JSON string `json:"b64_json,omitempty"` } +// MarshalJSON serialises Item so that the "embedding" field is either a float array +// or a base64 string depending on which field is populated. This satisfies the +// OpenAI API encoding_format contract: the Node.js SDK (v4+) sends +// encoding_format=base64 by default and expects a base64 string back. +func (item Item) MarshalJSON() ([]byte, error) { + type itemFields struct { + Embedding interface{} `json:"embedding,omitempty"` + Index int `json:"index"` + Object string `json:"object,omitempty"` + URL string `json:"url,omitempty"` + B64JSON string `json:"b64_json,omitempty"` + } + f := itemFields{ + Index: item.Index, + Object: item.Object, + URL: item.URL, + B64JSON: item.B64JSON, + } + if item.EmbeddingBase64 != "" { + f.Embedding = item.EmbeddingBase64 + } else { + f.Embedding = item.Embedding + } + return json.Marshal(f) +} + type OpenAIResponse struct { Created int `json:"created,omitempty"` Object string `json:"object,omitempty"` diff --git a/core/schema/prediction.go b/core/schema/prediction.go index 913e13c1a..f0b2bda40 100644 --- a/core/schema/prediction.go +++ b/core/schema/prediction.go @@ -133,4 +133,7 @@ type PredictionOptions struct { // RWKV (?) Tokenizer string `json:"tokenizer,omitempty" yaml:"tokenizer,omitempty"` + + // Embedding encoding format: "float" (default) or "base64" (OpenAI Node.js SDK default) + EncodingFormat string `json:"encoding_format,omitempty" yaml:"encoding_format,omitempty"` }