mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-12 18:58:49 -04:00
Compare commits
1 Commits
master
...
fix/6632-t
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0c9666c295 |
@@ -307,11 +307,19 @@ func gRPCPredictOpts(c config.ModelConfig, modelPath string) *pb.PredictOptions
|
||||
}
|
||||
}
|
||||
|
||||
// TopK may be nil after SetDefaults for backends that don't use llama.cpp's
|
||||
// top_k=40 default (issue #6632, e.g. mlx). proto3 int32 can't be unset, so
|
||||
// send 0 — the value mlx actually wants (top-k disabled).
|
||||
var topK int32
|
||||
if c.TopK != nil {
|
||||
topK = int32(*c.TopK)
|
||||
}
|
||||
|
||||
pbOpts := &pb.PredictOptions{
|
||||
Temperature: float32(*c.Temperature),
|
||||
TopP: float32(*c.TopP),
|
||||
NDraft: c.NDraft,
|
||||
TopK: int32(*c.TopK),
|
||||
TopK: topK,
|
||||
MinP: float32(*c.MinP),
|
||||
Tokens: int32(*c.Maxtokens),
|
||||
Threads: int32(*c.Threads),
|
||||
|
||||
@@ -517,6 +517,33 @@ func NormalizeBackendName(backend string) string {
|
||||
return strings.ReplaceAll(backend, ".", "-")
|
||||
}
|
||||
|
||||
// nonLlamaSamplerBackends lists backends whose native sampler defaults differ
|
||||
// from llama.cpp's, so LocalAI must NOT inject llama.cpp's top_k=40 default for
|
||||
// them (issue #6632). mlx_lm's intended default is top_k=0 (disabled) and mlx
|
||||
// does not remap 0->40, so shipping 40 silently changes sampling for clients
|
||||
// that omit top_k. Leaving TopK nil lets the wire value default to 0.
|
||||
//
|
||||
// This is intentionally a small allow-list of KNOWN non-llama backends: empty
|
||||
// and unknown backends fall through to the llama.cpp default to preserve the
|
||||
// GGUF auto-detect path's behavior.
|
||||
var nonLlamaSamplerBackends = map[string]struct{}{
|
||||
"mlx": {},
|
||||
"mlx-vlm": {},
|
||||
"mlx-distributed": {},
|
||||
}
|
||||
|
||||
// UsesLlamaSamplerDefaults reports whether a backend should receive llama.cpp's
|
||||
// sampler defaults (e.g. top_k=40). Empty/unknown backends return true so the
|
||||
// GGUF auto-detect path (which resolves to llama.cpp) keeps today's behavior;
|
||||
// only the known non-llama backends in nonLlamaSamplerBackends return false.
|
||||
func UsesLlamaSamplerDefaults(backend string) bool {
|
||||
if backend == "" {
|
||||
return true
|
||||
}
|
||||
_, isNonLlama := nonLlamaSamplerBackends[NormalizeBackendName(backend)]
|
||||
return !isNonLlama
|
||||
}
|
||||
|
||||
// GetBackendCapability returns the capability info for a backend, or nil if unknown.
|
||||
// Handles backend name normalization.
|
||||
func GetBackendCapability(backend string) *BackendCapability {
|
||||
|
||||
@@ -867,7 +867,12 @@ func (cfg *ModelConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||
cfg.Seed = &defaultSeed
|
||||
}
|
||||
|
||||
if cfg.TopK == nil {
|
||||
// top_k=40 is llama.cpp's sampling default and is wrong for backends whose
|
||||
// native default differs (issue #6632). Only inject it for the llama.cpp
|
||||
// family and the empty/auto backend; leave TopK nil for known non-llama
|
||||
// backends (e.g. mlx, whose intended default is top_k=0) so the wire value
|
||||
// is 0 rather than a silently-changed 40.
|
||||
if cfg.TopK == nil && UsesLlamaSamplerDefaults(cfg.Backend) {
|
||||
cfg.TopK = &defaultTopK
|
||||
}
|
||||
|
||||
|
||||
@@ -529,4 +529,72 @@ concurrency_groups:
|
||||
"models that template in Go still rely on the Go-generated grammar")
|
||||
})
|
||||
})
|
||||
|
||||
// The default top_k=40 is llama.cpp's sampling default and is WRONG for
|
||||
// backends whose native default differs. mlx_lm's intended default is
|
||||
// top_k=0 (disabled) and mlx does not remap 0->40, so injecting 40 silently
|
||||
// changes sampling for mlx clients that omit top_k (issue #6632). Gate the
|
||||
// injection on backend family: keep 40 for the llama.cpp family and for the
|
||||
// empty/auto backend (the GGUF auto-detect path resolves to llama.cpp), but
|
||||
// leave TopK nil for the mlx family so the wire value is 0.
|
||||
Context("TopK default is backend-gated (issue #6632)", func() {
|
||||
It("injects top_k=40 for the llama.cpp backend", func() {
|
||||
cfg := &ModelConfig{}
|
||||
cfg.Backend = "llama-cpp"
|
||||
|
||||
cfg.SetDefaults()
|
||||
|
||||
Expect(cfg.TopK).NotTo(BeNil(), "llama.cpp must keep its top_k=40 default")
|
||||
Expect(*cfg.TopK).To(Equal(40))
|
||||
})
|
||||
|
||||
It("injects top_k=40 for the empty/auto backend (GGUF auto-detect)", func() {
|
||||
cfg := &ModelConfig{}
|
||||
|
||||
cfg.SetDefaults()
|
||||
|
||||
Expect(cfg.TopK).NotTo(BeNil(), "empty backend resolves to llama.cpp; default unchanged")
|
||||
Expect(*cfg.TopK).To(Equal(40))
|
||||
})
|
||||
|
||||
It("leaves TopK nil for the mlx backend", func() {
|
||||
cfg := &ModelConfig{}
|
||||
cfg.Backend = "mlx"
|
||||
|
||||
cfg.SetDefaults()
|
||||
|
||||
Expect(cfg.TopK).To(BeNil(),
|
||||
"mlx_lm's intended default is top_k=0 (disabled); LocalAI must not inject 40")
|
||||
})
|
||||
|
||||
It("leaves TopK nil for the mlx-vlm backend", func() {
|
||||
cfg := &ModelConfig{}
|
||||
cfg.Backend = "mlx-vlm"
|
||||
|
||||
cfg.SetDefaults()
|
||||
|
||||
Expect(cfg.TopK).To(BeNil())
|
||||
})
|
||||
|
||||
It("leaves TopK nil for the mlx-distributed backend", func() {
|
||||
cfg := &ModelConfig{}
|
||||
cfg.Backend = "mlx-distributed"
|
||||
|
||||
cfg.SetDefaults()
|
||||
|
||||
Expect(cfg.TopK).To(BeNil())
|
||||
})
|
||||
|
||||
It("respects an explicit top_k even for the mlx backend", func() {
|
||||
explicit := 7
|
||||
cfg := &ModelConfig{}
|
||||
cfg.Backend = "mlx"
|
||||
cfg.TopK = &explicit
|
||||
|
||||
cfg.SetDefaults()
|
||||
|
||||
Expect(cfg.TopK).NotTo(BeNil())
|
||||
Expect(*cfg.TopK).To(Equal(7))
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user