mirror of
https://github.com/mudler/LocalAI.git
synced 2026-04-01 13:42:20 -04:00
The OpenAI Node.js SDK v4+ sends encoding_format=base64 by default.
LocalAI previously ignored this parameter and always returned a float
JSON array, causing a silent data corruption bug in any Node.js client
(AnythingLLM Desktop, LangChain.js, LlamaIndex.TS, …):
// What the client does when it expects base64 but receives a float array:
Buffer.from(floatArray, 'base64')
Node.js treats a non-string first argument as a byte array — each
float32 value is truncated to a single byte — and Float32Array then
reads those bytes as floats, yielding dims/4 values. Vector databases
(Qdrant, pgvector, …) then create collections with the wrong dimension,
causing all similarity searches to fail silently.
e.g. granite-embedding-107m (384 dims) → 96 stored in Qdrant
jina-embeddings-v3 (1024 dims) → 256 stored in Qdrant
Changes:
- core/schema/prediction.go: add EncodingFormat string field to
PredictionOptions so the request parameter is parsed and available
throughout the request pipeline
- core/schema/openai.go: add EmbeddingBase64 string field to Item;
add MarshalJSON so the "embedding" JSON key emits either []float32
or a base64 string depending on which field is populated — all other
Item consumers (image, video endpoints) are unaffected
- core/http/endpoints/openai/embeddings.go: add floatsToBase64()
which packs a float32 slice as little-endian bytes and base64-encodes
it; add embeddingItem() helper; both InputToken and InputStrings loops
now honour encoding_format=base64
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
140 lines
5.0 KiB
Go
140 lines
5.0 KiB
Go
package schema
|
|
|
|
import (
|
|
"encoding/json"
|
|
|
|
"gopkg.in/yaml.v3"
|
|
)
|
|
|
|
// LogprobsValue represents the logprobs parameter which is a boolean.
|
|
// According to OpenAI API: true means return log probabilities, false/null means don't return them.
|
|
// The actual number of top logprobs per token is controlled by top_logprobs (0-5).
|
|
type LogprobsValue struct {
|
|
Enabled bool // true if logprobs should be returned
|
|
}
|
|
|
|
// UnmarshalJSON implements json.Unmarshaler to handle boolean
|
|
func (l *LogprobsValue) UnmarshalJSON(data []byte) error {
|
|
// Try to unmarshal as boolean
|
|
var b bool
|
|
if err := json.Unmarshal(data, &b); err == nil {
|
|
l.Enabled = b
|
|
return nil
|
|
}
|
|
|
|
// If it's null, set to false
|
|
var n *bool
|
|
if err := json.Unmarshal(data, &n); err == nil {
|
|
l.Enabled = false
|
|
return nil
|
|
}
|
|
|
|
// Try as integer for backward compatibility (treat > 0 as true)
|
|
var i int
|
|
if err := json.Unmarshal(data, &i); err == nil {
|
|
l.Enabled = i > 0
|
|
return nil
|
|
}
|
|
|
|
return json.Unmarshal(data, &l.Enabled)
|
|
}
|
|
|
|
// MarshalJSON implements json.Marshaler
|
|
func (l LogprobsValue) MarshalJSON() ([]byte, error) {
|
|
return json.Marshal(l.Enabled)
|
|
}
|
|
|
|
// UnmarshalYAML implements yaml.Unmarshaler to handle boolean
|
|
func (l *LogprobsValue) UnmarshalYAML(value *yaml.Node) error {
|
|
switch value.Kind {
|
|
case yaml.ScalarNode:
|
|
switch value.Tag {
|
|
case "!!bool":
|
|
var b bool
|
|
if err := value.Decode(&b); err != nil {
|
|
return err
|
|
}
|
|
l.Enabled = b
|
|
return nil
|
|
case "!!int":
|
|
// For backward compatibility, treat integer > 0 as true
|
|
var i int
|
|
if err := value.Decode(&i); err != nil {
|
|
return err
|
|
}
|
|
l.Enabled = i > 0
|
|
return nil
|
|
case "!!null":
|
|
l.Enabled = false
|
|
return nil
|
|
}
|
|
}
|
|
return value.Decode(&l.Enabled)
|
|
}
|
|
|
|
// IsEnabled returns true if logprobs should be returned
|
|
func (l *LogprobsValue) IsEnabled() bool {
|
|
return l.Enabled
|
|
}
|
|
|
|
// @Description PredictionOptions contains prediction parameters for model inference
|
|
type PredictionOptions struct {
|
|
|
|
// Also part of the OpenAI official spec
|
|
BasicModelRequest `yaml:",inline"`
|
|
|
|
// Also part of the OpenAI official spec
|
|
Language string `json:"language,omitempty" yaml:"language,omitempty"`
|
|
|
|
// Only for audio transcription
|
|
Translate bool `json:"translate,omitempty" yaml:"translate,omitempty"`
|
|
|
|
// Also part of the OpenAI official spec. use it for returning multiple results
|
|
N int `json:"n,omitempty" yaml:"n,omitempty"`
|
|
|
|
// Common options between all the API calls, part of the OpenAI spec
|
|
TopP *float64 `json:"top_p,omitempty" yaml:"top_p,omitempty"`
|
|
TopK *int `json:"top_k,omitempty" yaml:"top_k,omitempty"`
|
|
MinP *float64 `json:"min_p,omitempty" yaml:"min_p,omitempty"`
|
|
Temperature *float64 `json:"temperature,omitempty" yaml:"temperature,omitempty"`
|
|
Maxtokens *int `json:"max_tokens,omitempty" yaml:"max_tokens,omitempty"`
|
|
Echo bool `json:"echo,omitempty" yaml:"echo,omitempty"`
|
|
|
|
// Custom parameters - not present in the OpenAI API
|
|
Batch int `json:"batch,omitempty" yaml:"batch,omitempty"`
|
|
IgnoreEOS bool `json:"ignore_eos,omitempty" yaml:"ignore_eos,omitempty"`
|
|
RepeatPenalty float64 `json:"repeat_penalty,omitempty" yaml:"repeat_penalty,omitempty"`
|
|
|
|
RepeatLastN int `json:"repeat_last_n,omitempty" yaml:"repeat_last_n,omitempty"`
|
|
|
|
Keep int `json:"n_keep,omitempty" yaml:"n_keep,omitempty"`
|
|
|
|
FrequencyPenalty float64 `json:"frequency_penalty,omitempty" yaml:"frequency_penalty,omitempty"`
|
|
PresencePenalty float64 `json:"presence_penalty,omitempty" yaml:"presence_penalty,omitempty"`
|
|
TFZ *float64 `json:"tfz,omitempty" yaml:"tfz,omitempty"`
|
|
|
|
TypicalP *float64 `json:"typical_p,omitempty" yaml:"typical_p,omitempty"`
|
|
Seed *int `json:"seed,omitempty" yaml:"seed,omitempty"`
|
|
|
|
// OpenAI API logprobs parameters
|
|
// logprobs: boolean - if true, returns log probabilities of each output token
|
|
// top_logprobs: integer 0-20 - number of most likely tokens to return at each token position
|
|
Logprobs LogprobsValue `json:"logprobs,omitempty" yaml:"logprobs,omitempty"` // Whether to return log probabilities (true/false)
|
|
TopLogprobs *int `json:"top_logprobs,omitempty" yaml:"top_logprobs,omitempty"` // Number of top logprobs per token (0-20)
|
|
LogitBias map[string]float64 `json:"logit_bias,omitempty" yaml:"logit_bias,omitempty"` // Map of token IDs to bias values (-100 to 100)
|
|
|
|
NegativePrompt string `json:"negative_prompt,omitempty" yaml:"negative_prompt,omitempty"`
|
|
RopeFreqBase float32 `json:"rope_freq_base,omitempty" yaml:"rope_freq_base,omitempty"`
|
|
RopeFreqScale float32 `json:"rope_freq_scale,omitempty" yaml:"rope_freq_scale,omitempty"`
|
|
NegativePromptScale float32 `json:"negative_prompt_scale,omitempty" yaml:"negative_prompt_scale,omitempty"`
|
|
|
|
// Diffusers
|
|
ClipSkip int `json:"clip_skip,omitempty" yaml:"clip_skip,omitempty"`
|
|
|
|
// RWKV (?)
|
|
Tokenizer string `json:"tokenizer,omitempty" yaml:"tokenizer,omitempty"`
|
|
|
|
// Embedding encoding format: "float" (default) or "base64" (OpenAI Node.js SDK default)
|
|
EncodingFormat string `json:"encoding_format,omitempty" yaml:"encoding_format,omitempty"`
|
|
}
|