chore(deps): bump torch

Bumps the pip group with 1 update in the /backend/python/vllm directory: torch. Updates `torch` from 2.9.1+cpu to 2.12.0+cpu --- updated-dependencies: - dependency-name: torch dependency-version: 2.12.0+cpu dependency-type: direct:production dependency-group: pip ... Signed-off-by: dependabot[bot] <support@github.com>
fix(gallery): make opus a meta backend for platform auto-selection (#9813 ) (#10291 )
2026-06-14 11:49:33 -04:00 · 2026-06-13 07:51:39 +00:00 · 2026-06-13 09:51:02 +02:00 · 2026-06-13 09:28:41 +02:00 · 2026-06-13 09:28:25 +02:00 · 2026-06-13 09:19:21 +02:00
13 changed files with 1763 additions and 14 deletions
--- a/backend/go/locate-anything-cpp/Makefile
+++ b/backend/go/locate-anything-cpp/Makefile
@@ -10,7 +10,7 @@ JOBS?=$(shell nproc --ignore=1)
 # this on `master` always picks up the latest C-API surface (incl. the
 # per-detection accessor functions used by golocateanythingcpp.go).
 LOCATEANYTHING_REPO?=https://github.com/mudler/locate-anything.cpp.git
-LOCATEANYTHING_VERSION?=60e450945476d5e97e0754a8c0e71a9ea81690e0
+LOCATEANYTHING_VERSION?=92c1682da792c1e8a5dec91acc2be4b02c742ded

 ifeq ($(NATIVE),false)
 	CMAKE_ARGS+=-DGGML_NATIVE=OFF
--- a/backend/index.yaml
+++ b/backend/index.yaml
@@ -1163,11 +1163,11 @@
 - &opus
  name: "opus"
  alias: "opus"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-opus"
+  capabilities:
+    default: "cpu-opus"
+    metal: "metal-opus"
  urls:
    - https://opus-codec.org/
-  mirrors:
-    - localai/localai-backends:latest-cpu-opus
  license: BSD-3-Clause
  description: |
    Opus audio codec backend for encoding and decoding audio.
@@ -1177,7 +1177,11 @@
    - opus
    - WebRTC
    - realtime
-    - CPU
+- !!merge <<: *opus
+  name: "opus-development"
+  capabilities:
+    default: "cpu-opus-development"
+    metal: "metal-opus-development"
 - &silero-vad
  name: "silero-vad"
  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-silero-vad"
@@ -1603,7 +1607,12 @@
  mirrors:
    - localai/localai-backends:master-metal-darwin-arm64-local-store
 - !!merge <<: *opus
-  name: "opus-development"
+  name: "cpu-opus"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-opus"
+  mirrors:
+    - localai/localai-backends:latest-cpu-opus
+- !!merge <<: *opus
+  name: "cpu-opus-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-opus"
  mirrors:
    - localai/localai-backends:master-cpu-opus
--- a/backend/python/neutts/requirements-cpu.txt
+++ b/backend/python/neutts/requirements-cpu.txt
@@ -1,6 +1,7 @@
 --extra-index-url https://download.pytorch.org/whl/cpu
 accelerate
 torch==2.8.0
+torchaudio==2.8.0
 transformers==4.56.1
 librosa==0.11.0
 neucodec>=0.0.4
--- a/backend/python/neutts/requirements-cublas12.txt
+++ b/backend/python/neutts/requirements-cublas12.txt
@@ -3,6 +3,7 @@ neucodec>=0.0.4
 phonemizer==3.3.0
 soundfile==0.13.1
 torch==2.8.0
+torchaudio==2.8.0
 transformers==4.56.1
 resemble-perth==1.0.1
 accelerate
--- a/backend/python/vllm/requirements-cpu.txt
+++ b/backend/python/vllm/requirements-cpu.txt
@@ -1,6 +1,6 @@
 --extra-index-url https://download.pytorch.org/whl/cpu
 accelerate
-torch==2.9.1+cpu
+torch==2.12.0+cpu
 torchvision
 torchaudio
 transformers
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -307,11 +307,19 @@ func gRPCPredictOpts(c config.ModelConfig, modelPath string) *pb.PredictOptions
 		}
 	}

+	// TopK may be nil after SetDefaults for backends that don't use llama.cpp's
+	// top_k=40 default (issue #6632, e.g. mlx). proto3 int32 can't be unset, so
+	// send 0 — the value mlx actually wants (top-k disabled).
+	var topK int32
+	if c.TopK != nil {
+		topK = int32(*c.TopK)
+	}
+
 	pbOpts := &pb.PredictOptions{
 		Temperature:         float32(*c.Temperature),
 		TopP:                float32(*c.TopP),
 		NDraft:              c.NDraft,
-		TopK:                int32(*c.TopK),
+		TopK:                topK,
 		MinP:                float32(*c.MinP),
 		Tokens:              int32(*c.Maxtokens),
 		Threads:             int32(*c.Threads),
--- a/core/config/backend_capabilities.go
+++ b/core/config/backend_capabilities.go
@@ -517,6 +517,33 @@ func NormalizeBackendName(backend string) string {
 	return strings.ReplaceAll(backend, ".", "-")
 }

+// nonLlamaSamplerBackends lists backends whose native sampler defaults differ
+// from llama.cpp's, so LocalAI must NOT inject llama.cpp's top_k=40 default for
+// them (issue #6632). mlx_lm's intended default is top_k=0 (disabled) and mlx
+// does not remap 0->40, so shipping 40 silently changes sampling for clients
+// that omit top_k. Leaving TopK nil lets the wire value default to 0.
+//
+// This is intentionally a small allow-list of KNOWN non-llama backends: empty
+// and unknown backends fall through to the llama.cpp default to preserve the
+// GGUF auto-detect path's behavior.
+var nonLlamaSamplerBackends = map[string]struct{}{
+	"mlx":             {},
+	"mlx-vlm":         {},
+	"mlx-distributed": {},
+}
+
+// UsesLlamaSamplerDefaults reports whether a backend should receive llama.cpp's
+// sampler defaults (e.g. top_k=40). Empty/unknown backends return true so the
+// GGUF auto-detect path (which resolves to llama.cpp) keeps today's behavior;
+// only the known non-llama backends in nonLlamaSamplerBackends return false.
+func UsesLlamaSamplerDefaults(backend string) bool {
+	if backend == "" {
+		return true
+	}
+	_, isNonLlama := nonLlamaSamplerBackends[NormalizeBackendName(backend)]
+	return !isNonLlama
+}
+
 // GetBackendCapability returns the capability info for a backend, or nil if unknown.
 // Handles backend name normalization.
 func GetBackendCapability(backend string) *BackendCapability {
--- a/core/config/model_config.go
+++ b/core/config/model_config.go
@@ -867,7 +867,12 @@ func (cfg *ModelConfig) SetDefaults(opts ...ConfigLoaderOption) {
 		cfg.Seed = &defaultSeed
 	}

-	if cfg.TopK == nil {
+	// top_k=40 is llama.cpp's sampling default and is wrong for backends whose
+	// native default differs (issue #6632). Only inject it for the llama.cpp
+	// family and the empty/auto backend; leave TopK nil for known non-llama
+	// backends (e.g. mlx, whose intended default is top_k=0) so the wire value
+	// is 0 rather than a silently-changed 40.
+	if cfg.TopK == nil && UsesLlamaSamplerDefaults(cfg.Backend) {
 		cfg.TopK = &defaultTopK
 	}

--- a/core/config/model_config_test.go
+++ b/core/config/model_config_test.go
@@ -529,4 +529,72 @@ concurrency_groups:
 				"models that template in Go still rely on the Go-generated grammar")
 		})
 	})
+
+	// The default top_k=40 is llama.cpp's sampling default and is WRONG for
+	// backends whose native default differs. mlx_lm's intended default is
+	// top_k=0 (disabled) and mlx does not remap 0->40, so injecting 40 silently
+	// changes sampling for mlx clients that omit top_k (issue #6632). Gate the
+	// injection on backend family: keep 40 for the llama.cpp family and for the
+	// empty/auto backend (the GGUF auto-detect path resolves to llama.cpp), but
+	// leave TopK nil for the mlx family so the wire value is 0.
+	Context("TopK default is backend-gated (issue #6632)", func() {
+		It("injects top_k=40 for the llama.cpp backend", func() {
+			cfg := &ModelConfig{}
+			cfg.Backend = "llama-cpp"
+
+			cfg.SetDefaults()
+
+			Expect(cfg.TopK).NotTo(BeNil(), "llama.cpp must keep its top_k=40 default")
+			Expect(*cfg.TopK).To(Equal(40))
+		})
+
+		It("injects top_k=40 for the empty/auto backend (GGUF auto-detect)", func() {
+			cfg := &ModelConfig{}
+
+			cfg.SetDefaults()
+
+			Expect(cfg.TopK).NotTo(BeNil(), "empty backend resolves to llama.cpp; default unchanged")
+			Expect(*cfg.TopK).To(Equal(40))
+		})
+
+		It("leaves TopK nil for the mlx backend", func() {
+			cfg := &ModelConfig{}
+			cfg.Backend = "mlx"
+
+			cfg.SetDefaults()
+
+			Expect(cfg.TopK).To(BeNil(),
+				"mlx_lm's intended default is top_k=0 (disabled); LocalAI must not inject 40")
+		})
+
+		It("leaves TopK nil for the mlx-vlm backend", func() {
+			cfg := &ModelConfig{}
+			cfg.Backend = "mlx-vlm"
+
+			cfg.SetDefaults()
+
+			Expect(cfg.TopK).To(BeNil())
+		})
+
+		It("leaves TopK nil for the mlx-distributed backend", func() {
+			cfg := &ModelConfig{}
+			cfg.Backend = "mlx-distributed"
+
+			cfg.SetDefaults()
+
+			Expect(cfg.TopK).To(BeNil())
+		})
+
+		It("respects an explicit top_k even for the mlx backend", func() {
+			explicit := 7
+			cfg := &ModelConfig{}
+			cfg.Backend = "mlx"
+			cfg.TopK = &explicit
+
+			cfg.SetDefaults()
+
+			Expect(cfg.TopK).NotTo(BeNil())
+			Expect(*cfg.TopK).To(Equal(7))
+		})
+	})
 })
--- a/core/http/endpoints/openai/realtime.go
+++ b/core/http/endpoints/openai/realtime.go
@@ -990,8 +990,18 @@ func updateSession(session *Session, update *types.SessionUnion, cl *config.Mode
 	}

 	if rt.Audio != nil && rt.Audio.Input != nil && rt.Audio.Input.Transcription != nil {
-		session.InputAudioTranscription = rt.Audio.Input.Transcription
-		session.ModelConfig.Pipeline.Transcription = rt.Audio.Input.Transcription.Model
+		trUpd := rt.Audio.Input.Transcription
+		// A language-only update (e.g. a client forcing the STT language) carries
+		// an empty Model. Preserve the pipeline's configured transcription backend
+		// instead of blanking it — otherwise the next utterance transcribes against
+		// an empty model and the backend RPC fails with "unimplemented".
+		if trUpd.Model == "" && session.InputAudioTranscription != nil {
+			trUpd.Model = session.InputAudioTranscription.Model
+		}
+		session.InputAudioTranscription = trUpd
+		if trUpd.Model != "" {
+			session.ModelConfig.Pipeline.Transcription = trUpd.Model
+		}
 	}

 	if rt.Model != "" || (rt.Audio != nil && rt.Audio.Output != nil && rt.Audio.Output.Voice != "") || (rt.Audio != nil && rt.Audio.Input != nil && rt.Audio.Input.Transcription != nil) {
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
--- a/go.mod
+++ b/go.mod
@@ -36,7 +36,7 @@ require (
 	github.com/mholt/archiver/v3 v3.5.1
 	github.com/microcosm-cc/bluemonday v1.0.27
 	github.com/modelcontextprotocol/go-sdk v1.5.0
-	github.com/mudler/cogito v0.9.5-0.20260315222927-63abdec7189b
+	github.com/mudler/cogito v0.10.1-0.20260609212329-bf4010d31047
 	github.com/mudler/edgevpn v0.34.0
 	github.com/mudler/go-processmanager v0.1.1
 	github.com/mudler/memory v0.0.0-20260406210934-424c1ecf2cf8
--- a/go.sum
+++ b/go.sum
@@ -968,8 +968,8 @@ github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM=
 github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw=
 github.com/mudler/LocalAGI v0.0.0-20260606071251-14aed1ae4336 h1:iKBkSnpisOvMVxFoYsAObvAuOqXBakRPMD0PWxWG5EE=
 github.com/mudler/LocalAGI v0.0.0-20260606071251-14aed1ae4336/go.mod h1:U+g6u8mF2wQxhkdBl3dr8G4db1cv3n7KTKmraoJ7D0c=
-github.com/mudler/cogito v0.9.5-0.20260315222927-63abdec7189b h1:A74T2Lauvg61KodYqsjTYDY05kPLcW+efVZjd23dghU=
-github.com/mudler/cogito v0.9.5-0.20260315222927-63abdec7189b/go.mod h1:6sfja3lcu2nWRzEc0wwqGNu/eCG3EWgij+8s7xyUeQ4=
+github.com/mudler/cogito v0.10.1-0.20260609212329-bf4010d31047 h1:wJ8WbDah1YcpBNRDmovQro8JiR228YFk7TUqPCS4m04=
+github.com/mudler/cogito v0.10.1-0.20260609212329-bf4010d31047/go.mod h1:6sfja3lcu2nWRzEc0wwqGNu/eCG3EWgij+8s7xyUeQ4=
 github.com/mudler/edgevpn v0.34.0 h1:qDrD/rCPFY/FdURbXudIZWihVKY4VOX3nMn3CcbeQEU=
 github.com/mudler/edgevpn v0.34.0/go.mod h1:yki7uMi5LR9gSMrw8PdPieuxsrk8BLV2Ui7VBEmbbIA=
 github.com/mudler/go-piper v0.0.0-20241023091659-2494246fd9fc h1:RxwneJl1VgvikiX28EkpdAyL4yQVnJMrbquKospjHyA=
Author	SHA1	Message	Date
dependabot[bot]	33eba45a6b	chore(deps): bump torch Bumps the pip group with 1 update in the /backend/python/vllm directory: torch. Updates `torch` from 2.9.1+cpu to 2.12.0+cpu --- updated-dependencies: - dependency-name: torch dependency-version: 2.12.0+cpu dependency-type: direct:production dependency-group: pip ... Signed-off-by: dependabot[bot] <support@github.com>	2026-06-13 07:51:39 +00:00
LocalAI [bot]	0413fc03f8	fix(gallery): make opus a meta backend for platform auto-selection (#9813 ) (#10291 ) fix(gallery): make opus a meta backend so the platform variant is auto-selected (#9813) The realtime/WebRTC path loads the "opus" codec backend by name, but on macOS arm64 only "metal-opus" is installable, so Load("opus") failed with "opus backend not available". The root cause: unlike llama-cpp and whisper, the opus entry was a concrete CPU backend (it carried a uri and no capabilities map) rather than a meta backend, so nothing mapped "opus" to the platform-appropriate variant. Restructure opus to mirror llama-cpp/whisper: "opus" becomes a meta backend with a capabilities map (default -> cpu-opus, metal -> metal-opus) and no uri; the CPU image moves to a new "cpu-opus" concrete (and its dev variant to "cpu-opus-development"). Installing "opus" now resolves to metal-opus on Apple Silicon and cpu-opus elsewhere, and Load("opus") works on every platform via the meta pointer - so the realtime endpoint needs no special casing. This reverts the realtime_webrtc.go resolution helper from the earlier approach in favor of the gallery-level fix. Assisted-by: claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>	2026-06-13 09:51:02 +02:00
LocalAI [bot]	7088572f75	fix(neutts): pin torchaudio to match torch (fixes undefined symbol) (#9798 ) (#10292 ) fix(neutts): pin torchaudio to match torch to avoid ABI mismatch (#9798) neucodec pulls torchaudio transitively but it was unpinned, so an incompatible torchaudio could be resolved against the pinned torch==2.8.0, producing the 'undefined symbol: torch_library_impl' load failure. Pin torchaudio==2.8.0 alongside torch in the cpu and cublas12 requirements. Assisted-by: claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>	2026-06-13 09:28:41 +02:00
LocalAI [bot]	c1e8440f5b	fix(deps): bump cogito to fix MCP image-result panic (#10101 ) (#10294 ) fix(mcp): bump cogito to handle non-text tool result content Fixes #10101: the API panicked with "interface conversion: mcp.Content is mcp.ImageContent, not mcp.TextContent" when an MCP tool returned an image. Upstream cogito PR #50 replaced the unchecked TextContent assertion in the tool-result loop with a contentToString type-switch that handles image (and other non-text) content blocks gracefully. Bump github.com/mudler/cogito to v0.10.1-0.20260609212329-bf4010d31047, which includes the fix. Assisted-by: claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>	2026-06-13 09:28:25 +02:00
LocalAI [bot]	8f0059123b	feat(gallery): add 60 piper TTS voices across 42 languages (Phase 2) (#10296 ) Extends the piper voice set with a couple of voices per language for 42 more languages (Arabic, Bulgarian, Catalan, Czech, Welsh, Danish, Greek, Spanish, Basque, Persian, Finnish, French, Hindi, Hungarian, Indonesian, Icelandic, Georgian, Kazakh, Luxembourgish, Latvian, Malayalam, Nepali, Dutch, Norwegian, Polish, Portuguese, Romanian, Russian, Slovak, Slovenian, Albanian, Swedish, Swahili, Telugu, Turkish, Ukrainian, Urdu, Vietnamese, Chinese, ...), run through the crispasr backend's backend:piper engine and hosted at LocalAI-Community/piper-voices-GGUF. All converted from rhasspy/piper-voices with CrispASR's convert-piper-to-gguf.py and screened end-to-end on the pinned engine. Only single-speaker low/medium voices are included; high-quality decoders and multi-speaker models segfault and are excluded (e.g. zh_CN-chaowen dropped, huayan kept). Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>	2026-06-13 09:19:21 +02:00
LocalAI [bot]	a906438a69	fix(config): backend-gate the top_k=40 sampler default (#6632 ) (#10285 ) fix(config): gate top_k=40 default on backend family (#6632) SetDefaults injected top_k=40 (llama.cpp's sampling default) for every model config regardless of backend. That value is wrong for backends whose native default differs: mlx_lm's intended default is top_k=0 (disabled) and mlx does not remap 0->40, so a client that omits top_k silently got 40 shipped to mlx, changing sampling. The mlx backend's own getattr(request,'TopK',0) fallback is dead because proto3 int32 is always present. Gate the injection on backend family via UsesLlamaSamplerDefaults: keep top_k=40 for the llama.cpp family and for the empty/auto backend (the GGUF auto-detect path resolves to llama.cpp, so existing behavior is preserved), but leave TopK nil for the known non-llama backends (mlx, mlx-vlm, mlx-distributed). gRPCPredictOpts now sends 0 when TopK is nil, which is the value mlx actually wants. Only TopK is gated - the confirmed bug. The sibling sampler defaults (top_p, temperature, min_p) are left global to avoid widening scope and introducing nil-deref risk; revisit per-backend if needed. Assisted-by: claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>	2026-06-13 09:04:25 +02:00
LocalAI [bot]	d28a5b6da1	chore: ⬆️ Update mudler/locate-anything.cpp to `92c1682da792c1e8a5dec91acc2be4b02c742ded` (#10282 ) ⬆️ Update mudler/locate-anything.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2026-06-13 09:01:17 +02:00
LocalAI [bot]	edeacf22c4	fix(realtime): keep transcription model on a language-only session.update (#10295 ) A transcription session.update that carries only a language (no model) — e.g. a client forcing the STT input language — has an empty Transcription.Model. updateSession unconditionally copied that into session.ModelConfig.Pipeline.Transcription, blanking the pipeline's configured transcription backend. The next utterance then transcribed against an empty model and the backend RPC failed with "unimplemented" (surfaced to the client as transcription_failed), so transcription silently stopped whenever a language was selected. Only adopt the incoming transcription model when it is non-empty, and preserve the existing model otherwise (mirroring updateTransSession). Signed-off-by: mudler <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>	2026-06-13 01:01:36 +02:00