mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-30 03:25:42 -04:00
Upstream llama.cpp defaults `cache_prompt = true` (common/common.h), but `parse_options` in the grpc-server backend unconditionally forwards the proto `PromptCacheAll` field, so any model that didn't set `prompt_cache_all: true` in its YAML was getting `cache_prompt=false` — silently overriding llama.cpp's own default. With `kv_unified` and `cache_idle_slots` already on by default, this was the last piece preventing the per-request prompt cache from being usable out of the box. Make `PromptCacheAll` tristate (`*bool`), default it to `true` in `SetDefaults`, and dereference at the proto boundary. Users can still opt out with an explicit `prompt_cache_all: false`. Same pattern as `MMap`, `MMlock`, `Reranking`, etc. Co-authored-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
172 lines
4.8 KiB
Go
172 lines
4.8 KiB
Go
package config_test
|
|
|
|
import (
|
|
. "github.com/mudler/LocalAI/core/config"
|
|
"github.com/mudler/LocalAI/core/schema"
|
|
|
|
. "github.com/onsi/ginkgo/v2"
|
|
. "github.com/onsi/gomega"
|
|
)
|
|
|
|
var _ = Describe("Backend hooks and parser defaults", func() {
|
|
Context("MatchParserDefaults", func() {
|
|
It("matches Qwen3 family", func() {
|
|
parsers := MatchParserDefaults("Qwen/Qwen3-8B")
|
|
Expect(parsers).NotTo(BeNil())
|
|
Expect(parsers["tool_parser"]).To(Equal("hermes"))
|
|
Expect(parsers["reasoning_parser"]).To(Equal("qwen3"))
|
|
})
|
|
|
|
It("matches Qwen3.5 with longest-prefix-first", func() {
|
|
parsers := MatchParserDefaults("Qwen/Qwen3.5-9B")
|
|
Expect(parsers).NotTo(BeNil())
|
|
Expect(parsers["tool_parser"]).To(Equal("qwen3_xml"))
|
|
})
|
|
|
|
It("matches Llama-3.3 not Llama-3.2", func() {
|
|
parsers := MatchParserDefaults("meta/Llama-3.3-70B-Instruct")
|
|
Expect(parsers).NotTo(BeNil())
|
|
Expect(parsers["tool_parser"]).To(Equal("llama3_json"))
|
|
})
|
|
|
|
It("matches deepseek-r1", func() {
|
|
parsers := MatchParserDefaults("deepseek-ai/DeepSeek-R1")
|
|
Expect(parsers).NotTo(BeNil())
|
|
Expect(parsers["reasoning_parser"]).To(Equal("deepseek_r1"))
|
|
Expect(parsers["tool_parser"]).To(Equal("deepseek_v3"))
|
|
})
|
|
|
|
It("returns nil for unknown families", func() {
|
|
Expect(MatchParserDefaults("acme/unknown-model-xyz")).To(BeNil())
|
|
})
|
|
})
|
|
|
|
Context("Backend hook registration and execution", func() {
|
|
It("runs registered hook for a backend", func() {
|
|
called := false
|
|
RegisterBackendHook("test-backend-hook", func(cfg *ModelConfig, modelPath string) {
|
|
called = true
|
|
cfg.Description = "modified-by-hook"
|
|
})
|
|
|
|
cfg := &ModelConfig{
|
|
Backend: "test-backend-hook",
|
|
}
|
|
// Use the public Prepare path indirectly is heavy; instead exercise via vllmDefaults
|
|
// path, but here just call RegisterBackendHook + we know runBackendHooks is internal.
|
|
// Verify by leveraging Prepare on a fresh ModelConfig with no model path.
|
|
cfg.PredictionOptions = schema.PredictionOptions{}
|
|
|
|
// Trigger via Prepare with empty options; this calls runBackendHooks internally.
|
|
cfg.SetDefaults()
|
|
Expect(called).To(BeTrue())
|
|
Expect(cfg.Description).To(Equal("modified-by-hook"))
|
|
})
|
|
})
|
|
|
|
Context("vllmDefaults hook", func() {
|
|
It("auto-sets parsers for known model families on vllm backend", func() {
|
|
cfg := &ModelConfig{
|
|
Backend: "vllm",
|
|
PredictionOptions: schema.PredictionOptions{
|
|
BasicModelRequest: schema.BasicModelRequest{
|
|
Model: "Qwen/Qwen3-8B",
|
|
},
|
|
},
|
|
}
|
|
cfg.SetDefaults()
|
|
|
|
foundTool := false
|
|
foundReasoning := false
|
|
for _, opt := range cfg.Options {
|
|
if opt == "tool_parser:hermes" {
|
|
foundTool = true
|
|
}
|
|
if opt == "reasoning_parser:qwen3" {
|
|
foundReasoning = true
|
|
}
|
|
}
|
|
Expect(foundTool).To(BeTrue())
|
|
Expect(foundReasoning).To(BeTrue())
|
|
})
|
|
|
|
It("does not override user-set tool_parser", func() {
|
|
cfg := &ModelConfig{
|
|
Backend: "vllm",
|
|
Options: []string{"tool_parser:custom"},
|
|
PredictionOptions: schema.PredictionOptions{
|
|
BasicModelRequest: schema.BasicModelRequest{
|
|
Model: "Qwen/Qwen3-8B",
|
|
},
|
|
},
|
|
}
|
|
cfg.SetDefaults()
|
|
|
|
count := 0
|
|
for _, opt := range cfg.Options {
|
|
if len(opt) >= len("tool_parser:") && opt[:len("tool_parser:")] == "tool_parser:" {
|
|
count++
|
|
}
|
|
}
|
|
Expect(count).To(Equal(1))
|
|
})
|
|
|
|
It("seeds production engine_args defaults", func() {
|
|
cfg := &ModelConfig{Backend: "vllm"}
|
|
cfg.SetDefaults()
|
|
|
|
Expect(cfg.EngineArgs).NotTo(BeNil())
|
|
Expect(cfg.EngineArgs["enable_prefix_caching"]).To(Equal(true))
|
|
Expect(cfg.EngineArgs["enable_chunked_prefill"]).To(Equal(true))
|
|
})
|
|
|
|
It("does not override user-set engine_args", func() {
|
|
cfg := &ModelConfig{
|
|
Backend: "vllm",
|
|
LLMConfig: LLMConfig{
|
|
EngineArgs: map[string]any{
|
|
"enable_prefix_caching": false,
|
|
},
|
|
},
|
|
}
|
|
cfg.SetDefaults()
|
|
|
|
Expect(cfg.EngineArgs["enable_prefix_caching"]).To(Equal(false))
|
|
// chunked_prefill is still seeded since user didn't set it
|
|
Expect(cfg.EngineArgs["enable_chunked_prefill"]).To(Equal(true))
|
|
})
|
|
})
|
|
|
|
Context("PromptCacheAll default", func() {
|
|
It("defaults to true when omitted from YAML", func() {
|
|
cfg := &ModelConfig{}
|
|
cfg.SetDefaults()
|
|
|
|
Expect(cfg.PromptCacheAll).NotTo(BeNil())
|
|
Expect(*cfg.PromptCacheAll).To(BeTrue())
|
|
})
|
|
|
|
It("preserves an explicit false from YAML", func() {
|
|
falseV := false
|
|
cfg := &ModelConfig{
|
|
LLMConfig: LLMConfig{PromptCacheAll: &falseV},
|
|
}
|
|
cfg.SetDefaults()
|
|
|
|
Expect(cfg.PromptCacheAll).NotTo(BeNil())
|
|
Expect(*cfg.PromptCacheAll).To(BeFalse())
|
|
})
|
|
|
|
It("preserves an explicit true from YAML", func() {
|
|
trueV := true
|
|
cfg := &ModelConfig{
|
|
LLMConfig: LLMConfig{PromptCacheAll: &trueV},
|
|
}
|
|
cfg.SetDefaults()
|
|
|
|
Expect(cfg.PromptCacheAll).NotTo(BeNil())
|
|
Expect(*cfg.PromptCacheAll).To(BeTrue())
|
|
})
|
|
})
|
|
})
|