mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-14 11:49:33 -04:00
Compare commits
7 Commits
fix/7461-m
...
dependabot
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0d0e37ab0f | ||
|
|
7088572f75 | ||
|
|
c1e8440f5b | ||
|
|
8f0059123b | ||
|
|
a906438a69 | ||
|
|
d28a5b6da1 | ||
|
|
edeacf22c4 |
@@ -10,7 +10,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
# this on `master` always picks up the latest C-API surface (incl. the
|
||||
# per-detection accessor functions used by golocateanythingcpp.go).
|
||||
LOCATEANYTHING_REPO?=https://github.com/mudler/locate-anything.cpp.git
|
||||
LOCATEANYTHING_VERSION?=60e450945476d5e97e0754a8c0e71a9ea81690e0
|
||||
LOCATEANYTHING_VERSION?=92c1682da792c1e8a5dec91acc2be4b02c742ded
|
||||
|
||||
ifeq ($(NATIVE),false)
|
||||
CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm7.0
|
||||
torch==2.10.0+rocm7.0
|
||||
torch==2.12.0+cpu
|
||||
torchaudio
|
||||
torchvision
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||
torch==2.10.0
|
||||
torch==2.12.0+cpu
|
||||
transformers>=4.56.2
|
||||
huggingface-hub>=1.3.0
|
||||
sentencepiece
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
torch==2.10.0
|
||||
torch==2.12.0+cpu
|
||||
transformers>=4.56.2
|
||||
huggingface-hub>=1.3.0
|
||||
sentencepiece
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||
accelerate
|
||||
torch==2.8.0
|
||||
torchaudio==2.8.0
|
||||
transformers==4.56.1
|
||||
librosa==0.11.0
|
||||
neucodec>=0.0.4
|
||||
|
||||
@@ -3,6 +3,7 @@ neucodec>=0.0.4
|
||||
phonemizer==3.3.0
|
||||
soundfile==0.13.1
|
||||
torch==2.8.0
|
||||
torchaudio==2.8.0
|
||||
transformers==4.56.1
|
||||
resemble-perth==1.0.1
|
||||
accelerate
|
||||
@@ -1,6 +1,6 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||
accelerate
|
||||
torch==2.9.0
|
||||
torch==2.12.0+cpu
|
||||
torchvision
|
||||
torchaudio
|
||||
transformers
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# for cublas12 so uv consults this index alongside PyPI.
|
||||
--extra-index-url https://download.pytorch.org/whl/cu128
|
||||
accelerate
|
||||
torch==2.9.1
|
||||
torch==2.12.0+cpu
|
||||
torchvision
|
||||
torchaudio
|
||||
transformers
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
accelerate
|
||||
torch==2.7.0
|
||||
torch==2.12.0+cu130
|
||||
transformers
|
||||
bitsandbytes
|
||||
|
||||
@@ -307,11 +307,19 @@ func gRPCPredictOpts(c config.ModelConfig, modelPath string) *pb.PredictOptions
|
||||
}
|
||||
}
|
||||
|
||||
// TopK may be nil after SetDefaults for backends that don't use llama.cpp's
|
||||
// top_k=40 default (issue #6632, e.g. mlx). proto3 int32 can't be unset, so
|
||||
// send 0 — the value mlx actually wants (top-k disabled).
|
||||
var topK int32
|
||||
if c.TopK != nil {
|
||||
topK = int32(*c.TopK)
|
||||
}
|
||||
|
||||
pbOpts := &pb.PredictOptions{
|
||||
Temperature: float32(*c.Temperature),
|
||||
TopP: float32(*c.TopP),
|
||||
NDraft: c.NDraft,
|
||||
TopK: int32(*c.TopK),
|
||||
TopK: topK,
|
||||
MinP: float32(*c.MinP),
|
||||
Tokens: int32(*c.Maxtokens),
|
||||
Threads: int32(*c.Threads),
|
||||
|
||||
@@ -517,6 +517,33 @@ func NormalizeBackendName(backend string) string {
|
||||
return strings.ReplaceAll(backend, ".", "-")
|
||||
}
|
||||
|
||||
// nonLlamaSamplerBackends lists backends whose native sampler defaults differ
|
||||
// from llama.cpp's, so LocalAI must NOT inject llama.cpp's top_k=40 default for
|
||||
// them (issue #6632). mlx_lm's intended default is top_k=0 (disabled) and mlx
|
||||
// does not remap 0->40, so shipping 40 silently changes sampling for clients
|
||||
// that omit top_k. Leaving TopK nil lets the wire value default to 0.
|
||||
//
|
||||
// This is intentionally a small allow-list of KNOWN non-llama backends: empty
|
||||
// and unknown backends fall through to the llama.cpp default to preserve the
|
||||
// GGUF auto-detect path's behavior.
|
||||
var nonLlamaSamplerBackends = map[string]struct{}{
|
||||
"mlx": {},
|
||||
"mlx-vlm": {},
|
||||
"mlx-distributed": {},
|
||||
}
|
||||
|
||||
// UsesLlamaSamplerDefaults reports whether a backend should receive llama.cpp's
|
||||
// sampler defaults (e.g. top_k=40). Empty/unknown backends return true so the
|
||||
// GGUF auto-detect path (which resolves to llama.cpp) keeps today's behavior;
|
||||
// only the known non-llama backends in nonLlamaSamplerBackends return false.
|
||||
func UsesLlamaSamplerDefaults(backend string) bool {
|
||||
if backend == "" {
|
||||
return true
|
||||
}
|
||||
_, isNonLlama := nonLlamaSamplerBackends[NormalizeBackendName(backend)]
|
||||
return !isNonLlama
|
||||
}
|
||||
|
||||
// GetBackendCapability returns the capability info for a backend, or nil if unknown.
|
||||
// Handles backend name normalization.
|
||||
func GetBackendCapability(backend string) *BackendCapability {
|
||||
|
||||
@@ -867,7 +867,12 @@ func (cfg *ModelConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||
cfg.Seed = &defaultSeed
|
||||
}
|
||||
|
||||
if cfg.TopK == nil {
|
||||
// top_k=40 is llama.cpp's sampling default and is wrong for backends whose
|
||||
// native default differs (issue #6632). Only inject it for the llama.cpp
|
||||
// family and the empty/auto backend; leave TopK nil for known non-llama
|
||||
// backends (e.g. mlx, whose intended default is top_k=0) so the wire value
|
||||
// is 0 rather than a silently-changed 40.
|
||||
if cfg.TopK == nil && UsesLlamaSamplerDefaults(cfg.Backend) {
|
||||
cfg.TopK = &defaultTopK
|
||||
}
|
||||
|
||||
|
||||
@@ -529,4 +529,72 @@ concurrency_groups:
|
||||
"models that template in Go still rely on the Go-generated grammar")
|
||||
})
|
||||
})
|
||||
|
||||
// The default top_k=40 is llama.cpp's sampling default and is WRONG for
|
||||
// backends whose native default differs. mlx_lm's intended default is
|
||||
// top_k=0 (disabled) and mlx does not remap 0->40, so injecting 40 silently
|
||||
// changes sampling for mlx clients that omit top_k (issue #6632). Gate the
|
||||
// injection on backend family: keep 40 for the llama.cpp family and for the
|
||||
// empty/auto backend (the GGUF auto-detect path resolves to llama.cpp), but
|
||||
// leave TopK nil for the mlx family so the wire value is 0.
|
||||
Context("TopK default is backend-gated (issue #6632)", func() {
|
||||
It("injects top_k=40 for the llama.cpp backend", func() {
|
||||
cfg := &ModelConfig{}
|
||||
cfg.Backend = "llama-cpp"
|
||||
|
||||
cfg.SetDefaults()
|
||||
|
||||
Expect(cfg.TopK).NotTo(BeNil(), "llama.cpp must keep its top_k=40 default")
|
||||
Expect(*cfg.TopK).To(Equal(40))
|
||||
})
|
||||
|
||||
It("injects top_k=40 for the empty/auto backend (GGUF auto-detect)", func() {
|
||||
cfg := &ModelConfig{}
|
||||
|
||||
cfg.SetDefaults()
|
||||
|
||||
Expect(cfg.TopK).NotTo(BeNil(), "empty backend resolves to llama.cpp; default unchanged")
|
||||
Expect(*cfg.TopK).To(Equal(40))
|
||||
})
|
||||
|
||||
It("leaves TopK nil for the mlx backend", func() {
|
||||
cfg := &ModelConfig{}
|
||||
cfg.Backend = "mlx"
|
||||
|
||||
cfg.SetDefaults()
|
||||
|
||||
Expect(cfg.TopK).To(BeNil(),
|
||||
"mlx_lm's intended default is top_k=0 (disabled); LocalAI must not inject 40")
|
||||
})
|
||||
|
||||
It("leaves TopK nil for the mlx-vlm backend", func() {
|
||||
cfg := &ModelConfig{}
|
||||
cfg.Backend = "mlx-vlm"
|
||||
|
||||
cfg.SetDefaults()
|
||||
|
||||
Expect(cfg.TopK).To(BeNil())
|
||||
})
|
||||
|
||||
It("leaves TopK nil for the mlx-distributed backend", func() {
|
||||
cfg := &ModelConfig{}
|
||||
cfg.Backend = "mlx-distributed"
|
||||
|
||||
cfg.SetDefaults()
|
||||
|
||||
Expect(cfg.TopK).To(BeNil())
|
||||
})
|
||||
|
||||
It("respects an explicit top_k even for the mlx backend", func() {
|
||||
explicit := 7
|
||||
cfg := &ModelConfig{}
|
||||
cfg.Backend = "mlx"
|
||||
cfg.TopK = &explicit
|
||||
|
||||
cfg.SetDefaults()
|
||||
|
||||
Expect(cfg.TopK).NotTo(BeNil())
|
||||
Expect(*cfg.TopK).To(Equal(7))
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -990,8 +990,18 @@ func updateSession(session *Session, update *types.SessionUnion, cl *config.Mode
|
||||
}
|
||||
|
||||
if rt.Audio != nil && rt.Audio.Input != nil && rt.Audio.Input.Transcription != nil {
|
||||
session.InputAudioTranscription = rt.Audio.Input.Transcription
|
||||
session.ModelConfig.Pipeline.Transcription = rt.Audio.Input.Transcription.Model
|
||||
trUpd := rt.Audio.Input.Transcription
|
||||
// A language-only update (e.g. a client forcing the STT language) carries
|
||||
// an empty Model. Preserve the pipeline's configured transcription backend
|
||||
// instead of blanking it — otherwise the next utterance transcribes against
|
||||
// an empty model and the backend RPC fails with "unimplemented".
|
||||
if trUpd.Model == "" && session.InputAudioTranscription != nil {
|
||||
trUpd.Model = session.InputAudioTranscription.Model
|
||||
}
|
||||
session.InputAudioTranscription = trUpd
|
||||
if trUpd.Model != "" {
|
||||
session.ModelConfig.Pipeline.Transcription = trUpd.Model
|
||||
}
|
||||
}
|
||||
|
||||
if rt.Model != "" || (rt.Audio != nil && rt.Audio.Output != nil && rt.Audio.Output.Voice != "") || (rt.Audio != nil && rt.Audio.Input != nil && rt.Audio.Input.Transcription != nil) {
|
||||
|
||||
1620
gallery/index.yaml
1620
gallery/index.yaml
File diff suppressed because it is too large
Load Diff
2
go.mod
2
go.mod
@@ -36,7 +36,7 @@ require (
|
||||
github.com/mholt/archiver/v3 v3.5.1
|
||||
github.com/microcosm-cc/bluemonday v1.0.27
|
||||
github.com/modelcontextprotocol/go-sdk v1.5.0
|
||||
github.com/mudler/cogito v0.9.5-0.20260315222927-63abdec7189b
|
||||
github.com/mudler/cogito v0.10.1-0.20260609212329-bf4010d31047
|
||||
github.com/mudler/edgevpn v0.34.0
|
||||
github.com/mudler/go-processmanager v0.1.1
|
||||
github.com/mudler/memory v0.0.0-20260406210934-424c1ecf2cf8
|
||||
|
||||
4
go.sum
4
go.sum
@@ -968,8 +968,8 @@ github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM=
|
||||
github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw=
|
||||
github.com/mudler/LocalAGI v0.0.0-20260606071251-14aed1ae4336 h1:iKBkSnpisOvMVxFoYsAObvAuOqXBakRPMD0PWxWG5EE=
|
||||
github.com/mudler/LocalAGI v0.0.0-20260606071251-14aed1ae4336/go.mod h1:U+g6u8mF2wQxhkdBl3dr8G4db1cv3n7KTKmraoJ7D0c=
|
||||
github.com/mudler/cogito v0.9.5-0.20260315222927-63abdec7189b h1:A74T2Lauvg61KodYqsjTYDY05kPLcW+efVZjd23dghU=
|
||||
github.com/mudler/cogito v0.9.5-0.20260315222927-63abdec7189b/go.mod h1:6sfja3lcu2nWRzEc0wwqGNu/eCG3EWgij+8s7xyUeQ4=
|
||||
github.com/mudler/cogito v0.10.1-0.20260609212329-bf4010d31047 h1:wJ8WbDah1YcpBNRDmovQro8JiR228YFk7TUqPCS4m04=
|
||||
github.com/mudler/cogito v0.10.1-0.20260609212329-bf4010d31047/go.mod h1:6sfja3lcu2nWRzEc0wwqGNu/eCG3EWgij+8s7xyUeQ4=
|
||||
github.com/mudler/edgevpn v0.34.0 h1:qDrD/rCPFY/FdURbXudIZWihVKY4VOX3nMn3CcbeQEU=
|
||||
github.com/mudler/edgevpn v0.34.0/go.mod h1:yki7uMi5LR9gSMrw8PdPieuxsrk8BLV2Ui7VBEmbbIA=
|
||||
github.com/mudler/go-piper v0.0.0-20241023091659-2494246fd9fc h1:RxwneJl1VgvikiX28EkpdAyL4yQVnJMrbquKospjHyA=
|
||||
|
||||
Reference in New Issue
Block a user