feat(config): add vLLM parser defaults hook and importer auto-detection

Introduces parser_defaults.json mapping model families to vLLM
tool_parser/reasoning_parser names, with longest-pattern-first matching.

The vllmDefaults hook auto-fills tool_parser and reasoning_parser
options at load time for known families, while the VLLMImporter writes
the same values into generated YAML so users can review and edit them.

Adds tests covering MatchParserDefaults, hook registration via
SetDefaults, and the user-override behavior.
This commit is contained in:
Ettore Di Giacinto
2026-04-12 08:11:46 +00:00
parent a30719f04a
commit 6cf8263c30
4 changed files with 244 additions and 0 deletions

114
core/config/hooks_test.go Normal file
View File

@@ -0,0 +1,114 @@
package config_test
import (
. "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("Backend hooks and parser defaults", func() {
Context("MatchParserDefaults", func() {
It("matches Qwen3 family", func() {
parsers := MatchParserDefaults("Qwen/Qwen3-8B")
Expect(parsers).NotTo(BeNil())
Expect(parsers["tool_parser"]).To(Equal("hermes"))
Expect(parsers["reasoning_parser"]).To(Equal("qwen3"))
})
It("matches Qwen3.5 with longest-prefix-first", func() {
parsers := MatchParserDefaults("Qwen/Qwen3.5-9B")
Expect(parsers).NotTo(BeNil())
Expect(parsers["tool_parser"]).To(Equal("qwen3_xml"))
})
It("matches Llama-3.3 not Llama-3.2", func() {
parsers := MatchParserDefaults("meta/Llama-3.3-70B-Instruct")
Expect(parsers).NotTo(BeNil())
Expect(parsers["tool_parser"]).To(Equal("llama3_json"))
})
It("matches deepseek-r1", func() {
parsers := MatchParserDefaults("deepseek-ai/DeepSeek-R1")
Expect(parsers).NotTo(BeNil())
Expect(parsers["reasoning_parser"]).To(Equal("deepseek_r1"))
Expect(parsers["tool_parser"]).To(Equal("deepseek_v3"))
})
It("returns nil for unknown families", func() {
Expect(MatchParserDefaults("acme/unknown-model-xyz")).To(BeNil())
})
})
Context("Backend hook registration and execution", func() {
It("runs registered hook for a backend", func() {
called := false
RegisterBackendHook("test-backend-hook", func(cfg *ModelConfig, modelPath string) {
called = true
cfg.Description = "modified-by-hook"
})
cfg := &ModelConfig{
Backend: "test-backend-hook",
}
// Use the public Prepare path indirectly is heavy; instead exercise via vllmDefaults
// path, but here just call RegisterBackendHook + we know runBackendHooks is internal.
// Verify by leveraging Prepare on a fresh ModelConfig with no model path.
cfg.PredictionOptions = schema.PredictionOptions{}
// Trigger via Prepare with empty options; this calls runBackendHooks internally.
cfg.SetDefaults()
Expect(called).To(BeTrue())
Expect(cfg.Description).To(Equal("modified-by-hook"))
})
})
Context("vllmDefaults hook", func() {
It("auto-sets parsers for known model families on vllm backend", func() {
cfg := &ModelConfig{
Backend: "vllm",
PredictionOptions: schema.PredictionOptions{
BasicModelRequest: schema.BasicModelRequest{
Model: "Qwen/Qwen3-8B",
},
},
}
cfg.SetDefaults()
foundTool := false
foundReasoning := false
for _, opt := range cfg.Options {
if opt == "tool_parser:hermes" {
foundTool = true
}
if opt == "reasoning_parser:qwen3" {
foundReasoning = true
}
}
Expect(foundTool).To(BeTrue())
Expect(foundReasoning).To(BeTrue())
})
It("does not override user-set tool_parser", func() {
cfg := &ModelConfig{
Backend: "vllm",
Options: []string{"tool_parser:custom"},
PredictionOptions: schema.PredictionOptions{
BasicModelRequest: schema.BasicModelRequest{
Model: "Qwen/Qwen3-8B",
},
},
}
cfg.SetDefaults()
count := 0
for _, opt := range cfg.Options {
if len(opt) >= len("tool_parser:") && opt[:len("tool_parser:")] == "tool_parser:" {
count++
}
}
Expect(count).To(Equal(1))
})
})
})

85
core/config/hooks_vllm.go Normal file
View File

@@ -0,0 +1,85 @@
package config
import (
_ "embed"
"encoding/json"
"strings"
"github.com/mudler/xlog"
)
//go:embed parser_defaults.json
var parserDefaultsJSON []byte
type parserDefaultsData struct {
Families map[string]map[string]string `json:"families"`
Patterns []string `json:"patterns"`
}
var parsersData *parserDefaultsData
func init() {
parsersData = &parserDefaultsData{}
if err := json.Unmarshal(parserDefaultsJSON, parsersData); err != nil {
xlog.Warn("failed to parse parser_defaults.json", "error", err)
}
RegisterBackendHook("vllm", vllmDefaults)
RegisterBackendHook("vllm-omni", vllmDefaults)
}
// MatchParserDefaults returns parser defaults for the best-matching model family.
// Returns nil if no family matches. Used both at load time (via hook) and at import time.
func MatchParserDefaults(modelID string) map[string]string {
if parsersData == nil || len(parsersData.Patterns) == 0 {
return nil
}
normalized := normalizeModelID(modelID)
for _, pattern := range parsersData.Patterns {
if strings.Contains(normalized, pattern) {
if family, ok := parsersData.Families[pattern]; ok {
return family
}
}
}
return nil
}
func vllmDefaults(cfg *ModelConfig, modelPath string) {
// Check if user already set tool_parser or reasoning_parser in Options
hasToolParser := false
hasReasoningParser := false
for _, opt := range cfg.Options {
if strings.HasPrefix(opt, "tool_parser:") {
hasToolParser = true
}
if strings.HasPrefix(opt, "reasoning_parser:") {
hasReasoningParser = true
}
}
if hasToolParser && hasReasoningParser {
return
}
// Try matching against Model field, then Name
parsers := MatchParserDefaults(cfg.Model)
if parsers == nil {
parsers = MatchParserDefaults(cfg.Name)
}
if parsers == nil {
return
}
if !hasToolParser {
if tp, ok := parsers["tool_parser"]; ok {
cfg.Options = append(cfg.Options, "tool_parser:"+tp)
xlog.Debug("[parser_defaults] auto-set tool_parser", "parser", tp, "model", cfg.Model)
}
}
if !hasReasoningParser {
if rp, ok := parsers["reasoning_parser"]; ok {
cfg.Options = append(cfg.Options, "reasoning_parser:"+rp)
xlog.Debug("[parser_defaults] auto-set reasoning_parser", "parser", rp, "model", cfg.Model)
}
}
}

View File

@@ -0,0 +1,33 @@
{
"families": {
"qwen3.5": {"tool_parser": "qwen3_xml", "reasoning_parser": "qwen3"},
"qwen3-coder": {"tool_parser": "qwen3_xml", "reasoning_parser": "qwen3"},
"qwen3": {"tool_parser": "hermes", "reasoning_parser": "qwen3"},
"qwen2.5": {"tool_parser": "hermes"},
"qwq": {"reasoning_parser": "deepseek_r1"},
"llama-4": {"tool_parser": "llama4_pythonic"},
"llama-3.3": {"tool_parser": "llama3_json"},
"llama-3.2": {"tool_parser": "llama3_json"},
"llama-3.1": {"tool_parser": "llama3_json"},
"mistral-nemo": {"tool_parser": "mistral", "reasoning_parser": "mistral"},
"mistral-small": {"tool_parser": "mistral", "reasoning_parser": "mistral"},
"mistral-large": {"tool_parser": "mistral", "reasoning_parser": "mistral"},
"magistral": {"tool_parser": "mistral", "reasoning_parser": "mistral"},
"deepseek-r1": {"tool_parser": "deepseek_v3", "reasoning_parser": "deepseek_r1"},
"deepseek-v3": {"tool_parser": "deepseek_v3", "reasoning_parser": "deepseek_v3"},
"glm-5": {"tool_parser": "glm47"},
"glm-4": {"tool_parser": "glm45", "reasoning_parser": "glm45"},
"gemma-4": {"tool_parser": "gemma4", "reasoning_parser": "gemma4"},
"granite-4": {"tool_parser": "granite4", "reasoning_parser": "granite"},
"minimax-m2.5": {"tool_parser": "minimax_m2", "reasoning_parser": "minimax_m2"},
"minimax": {"tool_parser": "minimax_m2", "reasoning_parser": "minimax_m2"},
"kimi-k2": {"tool_parser": "kimi_k2", "reasoning_parser": "kimi_k2"},
"nemotron": {"reasoning_parser": "nemotron_v3"},
"olmo": {"tool_parser": "olmo3", "reasoning_parser": "olmo3"},
"ernie": {"tool_parser": "ernie45", "reasoning_parser": "ernie45"},
"phi-4": {"tool_parser": "phi4_mini_json"},
"gpt-oss": {"tool_parser": "openai", "reasoning_parser": "openai_gptoss"},
"hermes": {"tool_parser": "hermes"}
},
"patterns": ["qwen3.5","qwen3-coder","qwen3","qwen2.5","qwq","llama-4","llama-3.3","llama-3.2","llama-3.1","mistral-nemo","mistral-small","mistral-large","magistral","deepseek-r1","deepseek-v3","glm-5","glm-4","gemma-4","granite-4","minimax-m2.5","minimax","kimi-k2","nemotron","olmo","ernie","phi-4","gpt-oss","hermes"]
}

View File

@@ -88,6 +88,18 @@ func (i *VLLMImporter) Import(details Details) (gallery.ModelConfig, error) {
// Apply per-model-family inference parameter defaults
config.ApplyInferenceDefaults(&modelConfig, details.URI)
// Auto-detect tool_parser and reasoning_parser for known model families.
// Surfacing them in the generated YAML lets users see and edit the choices.
parsers := config.MatchParserDefaults(details.URI)
if parsers != nil {
if tp, ok := parsers["tool_parser"]; ok {
modelConfig.Options = append(modelConfig.Options, "tool_parser:"+tp)
}
if rp, ok := parsers["reasoning_parser"]; ok {
modelConfig.Options = append(modelConfig.Options, "reasoning_parser:"+rp)
}
}
data, err := yaml.Marshal(modelConfig)
if err != nil {
return gallery.ModelConfig{}, err