Files
LocalAI/core/config/inference_defaults.go
Ettore Di Giacinto 031a36c995 feat: inferencing default, automatic tool parsing fallback and wire min_p (#9092)
* feat: wire min_p

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat: inferencing defaults

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(refactor): re-use iterative parser

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore: generate automatically inference defaults from unsloth

Instead of trying to re-invent the wheel and maintain here the inference
defaults, prefer to consume unsloth ones, and contribute there as
necessary.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore: apply defaults also to models installed via gallery

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore: be consistent and apply fallback to all endpoint

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-03-22 00:57:15 +01:00

129 lines
2.9 KiB
Go

package config
//go:generate go run ./gen_inference_defaults/
import (
_ "embed"
"encoding/json"
"strings"
"github.com/mudler/xlog"
)
//go:embed inference_defaults.json
var inferenceDefaultsJSON []byte
// inferenceDefaults holds the parsed inference defaults data
type inferenceDefaults struct {
Families map[string]map[string]float64 `json:"families"`
Patterns []string `json:"patterns"`
}
var defaultsData *inferenceDefaults
func init() {
defaultsData = &inferenceDefaults{}
if err := json.Unmarshal(inferenceDefaultsJSON, defaultsData); err != nil {
xlog.Warn("failed to parse inference_defaults.json", "error", err)
}
}
// normalizeModelID lowercases, strips org prefix (before /), and removes .gguf extension
func normalizeModelID(modelID string) string {
modelID = strings.ToLower(modelID)
// Strip org prefix (e.g., "unsloth/Qwen3.5-9B-GGUF" -> "qwen3.5-9b-gguf")
if idx := strings.LastIndex(modelID, "/"); idx >= 0 {
modelID = modelID[idx+1:]
}
// Strip .gguf extension
modelID = strings.TrimSuffix(modelID, ".gguf")
// Replace underscores with hyphens for matching
modelID = strings.ReplaceAll(modelID, "_", "-")
return modelID
}
// MatchModelFamily returns the inference defaults for the best-matching model family.
// Patterns are checked in order (longest-match-first as defined in the JSON).
// Returns nil if no family matches.
func MatchModelFamily(modelID string) map[string]float64 {
if defaultsData == nil || len(defaultsData.Patterns) == 0 {
return nil
}
normalized := normalizeModelID(modelID)
for _, pattern := range defaultsData.Patterns {
if strings.Contains(normalized, pattern) {
if family, ok := defaultsData.Families[pattern]; ok {
return family
}
}
}
return nil
}
// ApplyInferenceDefaults sets recommended inference parameters on cfg based on modelIDs.
// Tries each modelID in order; the first match wins.
// Only fills in parameters that are not already set (nil pointers or zero values).
func ApplyInferenceDefaults(cfg *ModelConfig, modelIDs ...string) {
var family map[string]float64
var matchedID string
for _, id := range modelIDs {
if id == "" {
continue
}
if f := MatchModelFamily(id); f != nil {
family = f
matchedID = id
break
}
}
if family == nil {
return
}
xlog.Debug("[inference_defaults] applying defaults for model", "modelID", matchedID, "family", family)
if cfg.Temperature == nil {
if v, ok := family["temperature"]; ok {
cfg.Temperature = &v
}
}
if cfg.TopP == nil {
if v, ok := family["top_p"]; ok {
cfg.TopP = &v
}
}
if cfg.TopK == nil {
if v, ok := family["top_k"]; ok {
intV := int(v)
cfg.TopK = &intV
}
}
if cfg.MinP == nil {
if v, ok := family["min_p"]; ok {
cfg.MinP = &v
}
}
if cfg.RepeatPenalty == 0 {
if v, ok := family["repeat_penalty"]; ok {
cfg.RepeatPenalty = v
}
}
if cfg.PresencePenalty == 0 {
if v, ok := family["presence_penalty"]; ok {
cfg.PresencePenalty = v
}
}
}