mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-22 15:50:31 -04:00
Compare commits
1 Commits
master
...
default-ca
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e0c22e308e |
@@ -277,7 +277,7 @@ func gRPCPredictOpts(c config.ModelConfig, modelPath string) *pb.PredictOptions
|
||||
MinP: float32(*c.MinP),
|
||||
Tokens: int32(*c.Maxtokens),
|
||||
Threads: int32(*c.Threads),
|
||||
PromptCacheAll: c.PromptCacheAll,
|
||||
PromptCacheAll: *c.PromptCacheAll,
|
||||
PromptCacheRO: c.PromptCacheRO,
|
||||
PromptCachePath: promptCachePath,
|
||||
F16KV: *c.F16,
|
||||
|
||||
@@ -136,4 +136,36 @@ var _ = Describe("Backend hooks and parser defaults", func() {
|
||||
Expect(cfg.EngineArgs["enable_chunked_prefill"]).To(Equal(true))
|
||||
})
|
||||
})
|
||||
|
||||
Context("PromptCacheAll default", func() {
|
||||
It("defaults to true when omitted from YAML", func() {
|
||||
cfg := &ModelConfig{}
|
||||
cfg.SetDefaults()
|
||||
|
||||
Expect(cfg.PromptCacheAll).NotTo(BeNil())
|
||||
Expect(*cfg.PromptCacheAll).To(BeTrue())
|
||||
})
|
||||
|
||||
It("preserves an explicit false from YAML", func() {
|
||||
falseV := false
|
||||
cfg := &ModelConfig{
|
||||
LLMConfig: LLMConfig{PromptCacheAll: &falseV},
|
||||
}
|
||||
cfg.SetDefaults()
|
||||
|
||||
Expect(cfg.PromptCacheAll).NotTo(BeNil())
|
||||
Expect(*cfg.PromptCacheAll).To(BeFalse())
|
||||
})
|
||||
|
||||
It("preserves an explicit true from YAML", func() {
|
||||
trueV := true
|
||||
cfg := &ModelConfig{
|
||||
LLMConfig: LLMConfig{PromptCacheAll: &trueV},
|
||||
}
|
||||
cfg.SetDefaults()
|
||||
|
||||
Expect(cfg.PromptCacheAll).NotTo(BeNil())
|
||||
Expect(*cfg.PromptCacheAll).To(BeTrue())
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -209,7 +209,7 @@ type LLMConfig struct {
|
||||
RMSNormEps float32 `yaml:"rms_norm_eps,omitempty" json:"rms_norm_eps,omitempty"`
|
||||
NGQA int32 `yaml:"ngqa,omitempty" json:"ngqa,omitempty"`
|
||||
PromptCachePath string `yaml:"prompt_cache_path,omitempty" json:"prompt_cache_path,omitempty"`
|
||||
PromptCacheAll bool `yaml:"prompt_cache_all,omitempty" json:"prompt_cache_all,omitempty"`
|
||||
PromptCacheAll *bool `yaml:"prompt_cache_all,omitempty" json:"prompt_cache_all,omitempty"`
|
||||
PromptCacheRO bool `yaml:"prompt_cache_ro,omitempty" json:"prompt_cache_ro,omitempty"`
|
||||
MirostatETA *float64 `yaml:"mirostat_eta,omitempty" json:"mirostat_eta,omitempty"`
|
||||
MirostatTAU *float64 `yaml:"mirostat_tau,omitempty" json:"mirostat_tau,omitempty"`
|
||||
@@ -494,6 +494,13 @@ func (cfg *ModelConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||
cfg.Reranking = &falseV
|
||||
}
|
||||
|
||||
if cfg.PromptCacheAll == nil {
|
||||
// Match upstream llama.cpp's default (common/common.h: cache_prompt = true)
|
||||
// and let cache_idle_slots / kv_unified actually do useful work; users can
|
||||
// opt out with an explicit `prompt_cache_all: false` in the model YAML.
|
||||
cfg.PromptCacheAll = &trueV
|
||||
}
|
||||
|
||||
if threads == 0 {
|
||||
// Threads can't be 0
|
||||
threads = 4
|
||||
|
||||
Reference in New Issue
Block a user