Files
LocalAI/core/schema/prediction.go
Richard Palethorpe 6a80e23733 feat(middleware): Model routing, PII filtering, Cloud model proxies (#9802)
Add a routing middleware stack and a cloud-proxy backend.

* cloud-proxy: a Go gRPC backend that forwards OpenAI- and
  Anthropic-shaped chat requests to upstream providers, with an
  optional translate mode (OpenAI request -> Anthropic /v1/messages
  -> OpenAI response) and full tool-calling support.

* routing: admission control, content-aware model routing
  (embedding cache + classifier + rerank + Arch-Router score),
  PII detection/redaction (regex + NER) with streaming filter and
  OpenAI/Anthropic adapters, and a per-user/per-key billing recorder
  backed by GORM or in-memory storage.

* middleware: UsageMiddleware records usage via the billing recorder,
  plus admission, route-model, usage-stamp and trace middlewares.

* observability: BackendTrace ring buffer stores full request bodies
  (capped), MITM proxy emits structured trace events, and router
  classifier decisions surface at /api/router/decide.

* gallery: Arch-Router-1.5B (Q4_K_M and Q8_0).

* UI: cloud-proxy model-editor fields, classifier system-prompt and
  score-normalization config, and a Traces page rendering request
  bodies.

Assisted-by: claude-code:claude-opus-4-7 [Read] [Edit] [Bash]

Signed-off-by: Richard Palethorpe <io@richiejp.com>
2026-05-25 09:28:27 +02:00

146 lines
5.4 KiB
Go

package schema
import (
"encoding/json"
"gopkg.in/yaml.v3"
)
// LogprobsValue represents the logprobs parameter which is a boolean.
// According to OpenAI API: true means return log probabilities, false/null means don't return them.
// The actual number of top logprobs per token is controlled by top_logprobs (0-5).
type LogprobsValue struct {
Enabled bool // true if logprobs should be returned
}
// UnmarshalJSON implements json.Unmarshaler to handle boolean
func (l *LogprobsValue) UnmarshalJSON(data []byte) error {
// Try to unmarshal as boolean
var b bool
if err := json.Unmarshal(data, &b); err == nil {
l.Enabled = b
return nil
}
// If it's null, set to false
var n *bool
if err := json.Unmarshal(data, &n); err == nil {
l.Enabled = false
return nil
}
// Try as integer for backward compatibility (treat > 0 as true)
var i int
if err := json.Unmarshal(data, &i); err == nil {
l.Enabled = i > 0
return nil
}
return json.Unmarshal(data, &l.Enabled)
}
// MarshalJSON implements json.Marshaler
func (l LogprobsValue) MarshalJSON() ([]byte, error) {
return json.Marshal(l.Enabled)
}
// UnmarshalYAML implements yaml.Unmarshaler to handle boolean
func (l *LogprobsValue) UnmarshalYAML(value *yaml.Node) error {
switch value.Kind {
case yaml.ScalarNode:
switch value.Tag {
case "!!bool":
var b bool
if err := value.Decode(&b); err != nil {
return err
}
l.Enabled = b
return nil
case "!!int":
// For backward compatibility, treat integer > 0 as true
var i int
if err := value.Decode(&i); err != nil {
return err
}
l.Enabled = i > 0
return nil
case "!!null":
l.Enabled = false
return nil
}
}
return value.Decode(&l.Enabled)
}
// IsEnabled returns true if logprobs should be returned
func (l *LogprobsValue) IsEnabled() bool {
return l.Enabled
}
// @Description PredictionOptions contains prediction parameters for model inference
type PredictionOptions struct {
// Also part of the OpenAI official spec
BasicModelRequest `yaml:",inline"`
// Also part of the OpenAI official spec
Language string `json:"language,omitempty" yaml:"language,omitempty"`
// Only for audio transcription
Translate bool `json:"translate,omitempty" yaml:"translate,omitempty"`
// Also part of the OpenAI official spec. use it for returning multiple results
N int `json:"n,omitempty" yaml:"n,omitempty"`
// Common options between all the API calls, part of the OpenAI spec
TopP *float64 `json:"top_p,omitempty" yaml:"top_p,omitempty"`
TopK *int `json:"top_k,omitempty" yaml:"top_k,omitempty"`
MinP *float64 `json:"min_p,omitempty" yaml:"min_p,omitempty"`
Temperature *float64 `json:"temperature,omitempty" yaml:"temperature,omitempty"`
Maxtokens *int `json:"max_tokens,omitempty" yaml:"max_tokens,omitempty"`
// MaxCompletionTokens is the modern alias for max_tokens
// (OpenAI deprecated max_tokens; gpt-5 / o-series reject it).
// Accepted on the wire so up-to-date clients can use the new
// name; the request middleware collapses it into Maxtokens so
// internal code reads exactly one field.
MaxCompletionTokens *int `json:"max_completion_tokens,omitempty" yaml:"-"`
Echo bool `json:"echo,omitempty" yaml:"echo,omitempty"`
// Custom parameters - not present in the OpenAI API
Batch int `json:"batch,omitempty" yaml:"batch,omitempty"`
IgnoreEOS bool `json:"ignore_eos,omitempty" yaml:"ignore_eos,omitempty"`
RepeatPenalty float64 `json:"repeat_penalty,omitempty" yaml:"repeat_penalty,omitempty"`
RepeatLastN int `json:"repeat_last_n,omitempty" yaml:"repeat_last_n,omitempty"`
Keep int `json:"n_keep,omitempty" yaml:"n_keep,omitempty"`
FrequencyPenalty float64 `json:"frequency_penalty,omitempty" yaml:"frequency_penalty,omitempty"`
PresencePenalty float64 `json:"presence_penalty,omitempty" yaml:"presence_penalty,omitempty"`
TFZ *float64 `json:"tfz,omitempty" yaml:"tfz,omitempty"`
TypicalP *float64 `json:"typical_p,omitempty" yaml:"typical_p,omitempty"`
Seed *int `json:"seed,omitempty" yaml:"seed,omitempty"`
// OpenAI API logprobs parameters
// logprobs: boolean - if true, returns log probabilities of each output token
// top_logprobs: integer 0-20 - number of most likely tokens to return at each token position
Logprobs LogprobsValue `json:"logprobs,omitempty" yaml:"logprobs,omitempty"` // Whether to return log probabilities (true/false)
TopLogprobs *int `json:"top_logprobs,omitempty" yaml:"top_logprobs,omitempty"` // Number of top logprobs per token (0-20)
LogitBias map[string]float64 `json:"logit_bias,omitempty" yaml:"logit_bias,omitempty"` // Map of token IDs to bias values (-100 to 100)
NegativePrompt string `json:"negative_prompt,omitempty" yaml:"negative_prompt,omitempty"`
RopeFreqBase float32 `json:"rope_freq_base,omitempty" yaml:"rope_freq_base,omitempty"`
RopeFreqScale float32 `json:"rope_freq_scale,omitempty" yaml:"rope_freq_scale,omitempty"`
NegativePromptScale float32 `json:"negative_prompt_scale,omitempty" yaml:"negative_prompt_scale,omitempty"`
// Diffusers
ClipSkip int `json:"clip_skip,omitempty" yaml:"clip_skip,omitempty"`
// RWKV (?)
Tokenizer string `json:"tokenizer,omitempty" yaml:"tokenizer,omitempty"`
// Embedding encoding format: "float" (default) or "base64" (OpenAI Node.js SDK default)
EncodingFormat string `json:"encoding_format,omitempty" yaml:"encoding_format,omitempty"`
}