From 482314c623c86d9fb362a828a371295c1d417f85 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 24 Jun 2026 21:50:44 +0200 Subject: [PATCH 1/7] fix(realtime): resolve model aliases for pipeline sub-models (#10484) Realtime pipeline sub-models (llm/transcription/tts/vad/sound-detection) were loaded via cl.LoadModelConfigFileByName without alias resolution, unlike top-level API requests which resolve aliases in core/http/middleware/request.go. So a pipeline that references an alias (e.g. `pipeline.llm: default`, where `default` is an alias for a real LLM) reached model loading as the alias stub with an empty Backend. This was silently broken on a single host (it failed downstream) and a hard error in distributed/p2p mode: routing model : loading model default: ... installing backend on node X: backend name is empty Fix by routing every pipeline sub-model load through a small helper that follows a single alias hop (mirroring the top-level resolution), so non-alias sub-models behave identically and aliased ones get the target's full config (Backend, Model, ...). Assisted-by: Claude:claude-opus-4-8 Signed-off-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- core/http/endpoints/openai/realtime_model.go | 33 +++++++++--- .../openai/realtime_model_alias_test.go | 52 +++++++++++++++++++ 2 files changed, 78 insertions(+), 7 deletions(-) create mode 100644 core/http/endpoints/openai/realtime_model_alias_test.go diff --git a/core/http/endpoints/openai/realtime_model.go b/core/http/endpoints/openai/realtime_model.go index 6843a521d..0dafa0a35 100644 --- a/core/http/endpoints/openai/realtime_model.go +++ b/core/http/endpoints/openai/realtime_model.go @@ -432,7 +432,7 @@ func loadSoundDetectionConfig(pipeline *config.Pipeline, cl *config.ModelConfigL if pipeline.SoundDetection == "" { return nil, nil } - cfg, err := cl.LoadModelConfigFileByName(pipeline.SoundDetection, ml.ModelPath) + cfg, err := loadPipelineSubModel(cl, pipeline.SoundDetection, ml.ModelPath) if err != nil { return nil, fmt.Errorf("failed to load sound detection config: %w", err) } @@ -443,7 +443,7 @@ func loadSoundDetectionConfig(pipeline *config.Pipeline, cl *config.ModelConfigL } func newTranscriptionOnlyModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (Model, *config.ModelConfig, error) { - cfgVAD, err := cl.LoadModelConfigFileByName(pipeline.VAD, ml.ModelPath) + cfgVAD, err := loadPipelineSubModel(cl, pipeline.VAD, ml.ModelPath) if err != nil { return nil, nil, fmt.Errorf("failed to load backend config: %w", err) @@ -453,7 +453,7 @@ func newTranscriptionOnlyModel(pipeline *config.Pipeline, cl *config.ModelConfig return nil, nil, fmt.Errorf("failed to validate config: %w", err) } - cfgSST, err := cl.LoadModelConfigFileByName(pipeline.Transcription, ml.ModelPath) + cfgSST, err := loadPipelineSubModel(cl, pipeline.Transcription, ml.ModelPath) if err != nil { return nil, nil, fmt.Errorf("failed to load backend config: %w", err) @@ -542,11 +542,30 @@ func buildRealtimeRoutingContext(a *application.Application, sessionID string) * } } +// loadPipelineSubModel loads a pipeline sub-model config by name and follows a +// single alias hop, so a pipeline that references an alias (e.g. `llm: default`) +// gets the alias target's full config (Backend, Model, ...) rather than the +// alias stub with an empty Backend. Without this the alias survives unresolved +// into model loading and fails downstream — notably in distributed mode with +// "backend name is empty". Mirrors the top-level alias resolution in +// core/http/middleware/request.go. +func loadPipelineSubModel(cl *config.ModelConfigLoader, name, modelPath string) (*config.ModelConfig, error) { + cfg, err := cl.LoadModelConfigFileByName(name, modelPath) + if err != nil { + return nil, err + } + resolved, _, err := cl.ResolveAlias(cfg) + if err != nil { + return nil, err + } + return resolved, nil +} + // returns and loads either a wrapped model or a model that support audio-to-audio func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, evaluator *templates.Evaluator, routing *RealtimeRoutingContext) (Model, error) { xlog.Debug("Creating new model pipeline model", "pipeline", pipeline) - cfgVAD, err := cl.LoadModelConfigFileByName(pipeline.VAD, ml.ModelPath) + cfgVAD, err := loadPipelineSubModel(cl, pipeline.VAD, ml.ModelPath) if err != nil { return nil, fmt.Errorf("failed to load backend config: %w", err) @@ -557,7 +576,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model } // TODO: Do we always need a transcription model? It can be disabled. Note that any-to-any instruction following models don't transcribe as such, so if transcription is required it is a separate process - cfgSST, err := cl.LoadModelConfigFileByName(pipeline.Transcription, ml.ModelPath) + cfgSST, err := loadPipelineSubModel(cl, pipeline.Transcription, ml.ModelPath) if err != nil { return nil, fmt.Errorf("failed to load backend config: %w", err) @@ -589,7 +608,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model xlog.Debug("Loading a wrapped model") // Otherwise we want to return a wrapped model, which is a "virtual" model that re-uses other models to perform operations - cfgLLM, err := cl.LoadModelConfigFileByName(pipeline.LLM, ml.ModelPath) + cfgLLM, err := loadPipelineSubModel(cl, pipeline.LLM, ml.ModelPath) if err != nil { return nil, fmt.Errorf("failed to load backend config: %w", err) @@ -604,7 +623,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model applyPipelineReasoning(cfgLLM, *pipeline) applyPipelineThinking(cfgLLM, *pipeline) - cfgTTS, err := cl.LoadModelConfigFileByName(pipeline.TTS, ml.ModelPath) + cfgTTS, err := loadPipelineSubModel(cl, pipeline.TTS, ml.ModelPath) if err != nil { return nil, fmt.Errorf("failed to load backend config: %w", err) diff --git a/core/http/endpoints/openai/realtime_model_alias_test.go b/core/http/endpoints/openai/realtime_model_alias_test.go new file mode 100644 index 000000000..77179d963 --- /dev/null +++ b/core/http/endpoints/openai/realtime_model_alias_test.go @@ -0,0 +1,52 @@ +package openai + +import ( + "os" + "path/filepath" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/mudler/LocalAI/core/config" +) + +// loadPipelineSubModel must resolve a pipeline sub-model that references an +// alias (e.g. `llm: default`) one hop to the alias target's full config — so +// the effective backend is the target's backend, not the empty backend of the +// alias stub. This mirrors the top-level alias resolution done in +// core/http/middleware/request.go, which the realtime pipeline previously +// skipped (failing in distributed mode with "backend name is empty"). +var _ = Describe("loadPipelineSubModel", func() { + It("resolves a sub-model alias one hop to the target's config", func() { + tmpDir := GinkgoT().TempDir() + + // A real model config with a concrete backend. + realLLM := `name: real-llm +backend: llama-cpp +parameters: + model: real-llm.gguf +` + Expect(os.WriteFile(filepath.Join(tmpDir, "real-llm.yaml"), []byte(realLLM), 0644)).To(Succeed()) + + // An alias pointing at the real model. + aliasCfg := `name: default +alias: real-llm +` + Expect(os.WriteFile(filepath.Join(tmpDir, "default.yaml"), []byte(aliasCfg), 0644)).To(Succeed()) + + cl := config.NewModelConfigLoader(tmpDir) + Expect(cl.LoadModelConfigsFromPath(tmpDir)).To(Succeed()) + + // Resolving the alias must follow the hop to the target's full config. + resolved, err := loadPipelineSubModel(cl, "default", tmpDir) + Expect(err).NotTo(HaveOccurred()) + Expect(resolved.IsAlias()).To(BeFalse()) + Expect(resolved.Backend).To(Equal("llama-cpp")) + + // A non-alias name must load unchanged. + direct, err := loadPipelineSubModel(cl, "real-llm", tmpDir) + Expect(err).NotTo(HaveOccurred()) + Expect(direct.Backend).To(Equal("llama-cpp")) + Expect(direct.Name).To(Equal("real-llm")) + }) +}) From 193d0e6aefa2ffe051206c870f626148e3402f33 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 24 Jun 2026 22:19:03 +0200 Subject: [PATCH 2/7] fix(backends): darwin/metal support for supertonic (#10488) The supertonic Go TTS backend dlopens ONNX Runtime, but its runtime and packaging scripts were Linux-only: run.sh exported LD_LIBRARY_PATH, pointed ONNXRUNTIME_LIB_PATH at libonnxruntime.so, and always tried the ld.so exec path, while package.sh hard-failed on any non-Linux host. On macOS dyld has no ld.so loader, uses DYLD_LIBRARY_PATH, and ONNX Runtime ships as a .dylib. This applies the same purego .dylib/DYLD_LIBRARY_PATH fix that PR #10481 landed for 15 other ONNX/purego backends (sherpa-onnx, silero-vad, etc.) but which omitted supertonic: - run.sh: on darwin export DYLD_LIBRARY_PATH and point ONNXRUNTIME_LIB_PATH at libonnxruntime.dylib; guard the ld.so exec path to Linux only. - package.sh: recognize Darwin instead of erroring out; the bundled .dylib is resolved via DYLD_LIBRARY_PATH, no glibc/ld.so to bundle. - helper.go: platform-native default library extension (dylib on darwin) for the last-resort dlopen fallback. It also wires the darwin CI build and gallery entries, resolving the inconsistency where backend/index.yaml advertised metal for supertonic but no includeDarwin matrix entry built the image: - .github/backend-matrix.yml: add the -metal-darwin-arm64-supertonic Go entry. - backend/index.yaml: declare metal capabilities and add the concrete metal-supertonic / metal-supertonic-development child entries. The Makefile already detects Darwin/osx/arm64 and stages the per-OS ONNX Runtime tarball, mirroring sherpa-onnx, so no Makefile change is required. Assisted-by: Claude:opus-4.8 [Claude Code] Signed-off-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- .github/backend-matrix.yml | 4 ++++ backend/go/supertonic/helper.go | 9 ++++++++- backend/go/supertonic/package.sh | 4 ++++ backend/go/supertonic/run.sh | 17 ++++++++++++----- backend/index.yaml | 12 ++++++++++++ 5 files changed, 40 insertions(+), 6 deletions(-) diff --git a/.github/backend-matrix.yml b/.github/backend-matrix.yml index 593e44cde..4cfc937ac 100644 --- a/.github/backend-matrix.yml +++ b/.github/backend-matrix.yml @@ -4990,6 +4990,10 @@ includeDarwin: tag-suffix: "-metal-darwin-arm64-sherpa-onnx" build-type: "metal" lang: "go" + - backend: "supertonic" + tag-suffix: "-metal-darwin-arm64-supertonic" + build-type: "metal" + lang: "go" - backend: "local-store" tag-suffix: "-metal-darwin-arm64-local-store" build-type: "metal" diff --git a/backend/go/supertonic/helper.go b/backend/go/supertonic/helper.go index 9f927d5d3..884077e75 100644 --- a/backend/go/supertonic/helper.go +++ b/backend/go/supertonic/helper.go @@ -16,6 +16,7 @@ import ( "os" "path/filepath" "regexp" + "runtime" "strings" "time" "unicode" @@ -943,7 +944,13 @@ func InitializeONNXRuntime() error { } } if libPath == "" { - libPath = "/usr/local/lib/libonnxruntime.so" + // LocalAI: default to the platform-native shared library + // extension when nothing else is found (dyld vs ld.so). + if runtime.GOOS == "darwin" { + libPath = "/usr/local/lib/libonnxruntime.dylib" + } else { + libPath = "/usr/local/lib/libonnxruntime.so" + } } } ort.SetSharedLibraryPath(libPath) diff --git a/backend/go/supertonic/package.sh b/backend/go/supertonic/package.sh index 9e2a01625..678ca5ead 100755 --- a/backend/go/supertonic/package.sh +++ b/backend/go/supertonic/package.sh @@ -32,6 +32,10 @@ elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2 cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1 cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0 +elif [ $(uname -s) = "Darwin" ]; then + # macOS: dyld resolves the bundled .dylib via DYLD_LIBRARY_PATH (set in + # run.sh); there is no ld.so loader nor glibc to bundle. + echo "Detected Darwin" else echo "Error: Could not detect architecture" exit 1 diff --git a/backend/go/supertonic/run.sh b/backend/go/supertonic/run.sh index 2dabf7eb3..683c52ab2 100755 --- a/backend/go/supertonic/run.sh +++ b/backend/go/supertonic/run.sh @@ -3,12 +3,19 @@ set -ex CURDIR=$(dirname "$(realpath $0)") -export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH -export ONNXRUNTIME_LIB_PATH=$CURDIR/lib/libonnxruntime.so +if [ "$(uname)" = "Darwin" ]; then + # macOS uses dyld: there is no ld.so loader, and the search path env + # var is DYLD_LIBRARY_PATH. ONNX Runtime ships as a .dylib here. + export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH + export ONNXRUNTIME_LIB_PATH=$CURDIR/lib/libonnxruntime.dylib +else + export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH + export ONNXRUNTIME_LIB_PATH=$CURDIR/lib/libonnxruntime.so -if [ -f $CURDIR/lib/ld.so ]; then - echo "Using lib/ld.so" - exec $CURDIR/lib/ld.so $CURDIR/supertonic "$@" + if [ -f $CURDIR/lib/ld.so ]; then + echo "Using lib/ld.so" + exec $CURDIR/lib/ld.so $CURDIR/supertonic "$@" + fi fi exec $CURDIR/supertonic "$@" diff --git a/backend/index.yaml b/backend/index.yaml index 3f61f7b4e..592c8fd6b 100644 --- a/backend/index.yaml +++ b/backend/index.yaml @@ -1569,6 +1569,7 @@ - TTS capabilities: default: "cpu-supertonic" + metal: "metal-supertonic" - !!merge <<: *neutts name: "neutts-development" capabilities: @@ -5484,6 +5485,7 @@ name: "supertonic-development" capabilities: default: "cpu-supertonic-development" + metal: "metal-supertonic-development" - !!merge <<: *supertonic name: "cpu-supertonic" uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-supertonic" @@ -5494,3 +5496,13 @@ uri: "quay.io/go-skynet/local-ai-backends:master-cpu-supertonic" mirrors: - localai/localai-backends:master-cpu-supertonic +- !!merge <<: *supertonic + name: "metal-supertonic" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-supertonic" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-supertonic +- !!merge <<: *supertonic + name: "metal-supertonic-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-supertonic" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-supertonic From 62b14fd6354acf931f83b0f4655e32a9d57321ef Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 24 Jun 2026 23:16:27 +0200 Subject: [PATCH 3/7] feat(backends): add darwin/metal build for liquid-audio (#10486) * feat(backends): add darwin/metal build for liquid-audio Wire the already-MPS-ready liquid-audio backend (it ships requirements-mps.txt) into the darwin CI matrix and the gallery so metal-darwin-arm64 images are built and selectable. Signed-off-by: Ettore Di Giacinto Assisted-by: Claude:opus-4.8 [Claude Code] * ci(liquid-audio): trigger darwin build via requirements-mps note The changed-backends path filter only builds a backend when a file under its directory changes. The metal wiring lived in index.yaml + the matrix, so the darwin job was skipped. Add a documenting comment to the MPS requirements so CI actually exercises the darwin build. Signed-off-by: Ettore Di Giacinto Assisted-by: Claude:opus-4.8 [Claude Code] * fix(liquid-audio): guard uv-only --index-strategy for the pip/darwin path Same fix as trl: the darwin/MPS build installs with pip (USE_PIP=true), which rejects the uv-only --index-strategy flag and failed the darwin backend build. Add it only on the uv path; Linux/CUDA resolution is unchanged. Signed-off-by: Ettore Di Giacinto Assisted-by: Claude:opus-4.8 [Claude Code] --------- Signed-off-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- .github/backend-matrix.yml | 3 +++ backend/index.yaml | 12 ++++++++++++ backend/python/liquid-audio/install.sh | 8 +++++++- backend/python/liquid-audio/requirements-mps.txt | 1 + 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/.github/backend-matrix.yml b/.github/backend-matrix.yml index 4cfc937ac..17d436cc1 100644 --- a/.github/backend-matrix.yml +++ b/.github/backend-matrix.yml @@ -4974,6 +4974,9 @@ includeDarwin: - backend: "kitten-tts" tag-suffix: "-metal-darwin-arm64-kitten-tts" build-type: "mps" + - backend: "liquid-audio" + tag-suffix: "-metal-darwin-arm64-liquid-audio" + build-type: "mps" - backend: "piper" tag-suffix: "-metal-darwin-arm64-piper" build-type: "metal" diff --git a/backend/index.yaml b/backend/index.yaml index 592c8fd6b..f3a2b892d 100644 --- a/backend/index.yaml +++ b/backend/index.yaml @@ -1284,6 +1284,7 @@ nvidia-cuda-13: "cuda13-liquid-audio" nvidia-cuda-12: "cuda12-liquid-audio" nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-liquid-audio" + metal: "metal-liquid-audio" icon: https://cdn-avatars.huggingface.co/v1/production/uploads/61b8e2ba285851687028d395/7_6D7rWrLxp2hb6OHSV1p.png - &qwen-tts urls: @@ -4613,6 +4614,7 @@ nvidia-cuda-13: "cuda13-liquid-audio-development" nvidia-cuda-12: "cuda12-liquid-audio-development" nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-liquid-audio-development" + metal: "metal-liquid-audio-development" - !!merge <<: *liquid-audio name: "cpu-liquid-audio" uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-liquid-audio" @@ -4623,6 +4625,16 @@ uri: "quay.io/go-skynet/local-ai-backends:master-cpu-liquid-audio" mirrors: - localai/localai-backends:master-cpu-liquid-audio +- !!merge <<: *liquid-audio + name: "metal-liquid-audio" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-liquid-audio" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-liquid-audio +- !!merge <<: *liquid-audio + name: "metal-liquid-audio-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-liquid-audio" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-liquid-audio - !!merge <<: *liquid-audio name: "cuda12-liquid-audio" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-liquid-audio" diff --git a/backend/python/liquid-audio/install.sh b/backend/python/liquid-audio/install.sh index c7ed8eaa8..fe0f9caad 100755 --- a/backend/python/liquid-audio/install.sh +++ b/backend/python/liquid-audio/install.sh @@ -14,5 +14,11 @@ else fi # liquid-audio's torch wheels are large; allow upgrades to satisfy transitive pins -EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match" +EXTRA_PIP_INSTALL_FLAGS+=" --upgrade" +# --index-strategy is a uv-only flag. The darwin/MPS build installs with pip +# (USE_PIP=true in scripts/build/python-darwin.sh), which rejects it. Only add +# it on the uv path; Linux/CUDA resolution is unchanged. +if [ "x${USE_PIP:-}" != "xtrue" ]; then + EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-first-match" +fi installRequirements diff --git a/backend/python/liquid-audio/requirements-mps.txt b/backend/python/liquid-audio/requirements-mps.txt index f57687f29..3c9c36cca 100644 --- a/backend/python/liquid-audio/requirements-mps.txt +++ b/backend/python/liquid-audio/requirements-mps.txt @@ -1,3 +1,4 @@ +# MPS (Apple Silicon / Metal) build profile - installed by the darwin CI job. torch>=2.8.0 torchaudio>=2.8.0 torchcodec>=0.9.1 From 75ba2daba1e7ac12c91489d6c425df5f86ded598 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 24 Jun 2026 23:18:04 +0200 Subject: [PATCH 4/7] chore(model-gallery): :arrow_up: update checksum (#10495) :arrow_up: Checksum updates in gallery/index.yaml Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- gallery/index.yaml | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index e26f2a1f5..52f23a771 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -3,24 +3,7 @@ url: "github:mudler/LocalAI/gallery/virtual.yaml@master" urls: - https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF - description: | - Try LFM • Docs • LEAP • Discord - - # LFM2.5-1.2B-Instruct - - LFM2.5 is a new family of hybrid models designed for **on-device deployment**. It builds on the LFM2 architecture with extended pre-training and reinforcement learning. - - - **Best-in-class performance**: A 1.2B model rivaling much larger models, bringing high-quality AI to your pocket. - - **Fast edge inference**: 239 tok/s decode on AMD CPU, 82 tok/s on mobile NPU. Runs under 1GB of memory with day-one support for llama.cpp, MLX, and vLLM. - - **Scaled training**: Extended pre-training from 10T to 28T tokens and large-scale multi-stage reinforcement learning. - - Find more information about LFM2.5 in our blog post. - - ## 🗒️ Model Details - - LFM2.5-1.2B-Instruct is a general-purpose text-only model with the following features: - - ... + description: "Try LFM • Docs • LEAP • Discord\n\n# LFM2.5-1.2B-Instruct\n\nLFM2.5 is a new family of hybrid models designed for **on-device deployment**. It builds on the LFM2 architecture with extended pre-training and reinforcement learning.\n\n - **Best-in-class performance**: A 1.2B model rivaling much larger models, bringing high-quality AI to your pocket.\n - **Fast edge inference**: 239 tok/s decode on AMD CPU, 82 tok/s on mobile NPU. Runs under 1GB of memory with day-one support for llama.cpp, MLX, and vLLM.\n - **Scaled training**: Extended pre-training from 10T to 28T tokens and large-scale multi-stage reinforcement learning.\n\nFind more information about LFM2.5 in our blog post.\n\n## \U0001F5D2️ Model Details\n\nLFM2.5-1.2B-Instruct is a general-purpose text-only model with the following features:\n\n...\n" license: "other" tags: - llm @@ -842,8 +825,8 @@ use_tokenizer_template: true files: - filename: llama-cpp/models/Qwopus3.6-27B-Coder-MTP-GGUF/Qwopus3.6-27B-Coder-MTP-Q4_K_M.gguf - sha256: b2898667ed7b2388f0ab7691393833ae777f247492bbe62fdb4b2bd3e3cf3f79 uri: https://huggingface.co/Jackrong/Qwopus3.6-27B-Coder-MTP-GGUF/resolve/main/Qwopus3.6-27B-Coder-MTP-Q4_K_M.gguf + sha256: b2b9180093496da2e00439e3fa23227c591355901bfa579bc6897bbc01b755ef - filename: llama-cpp/mmproj/Qwopus3.6-27B-Coder-MTP-GGUF/mmproj-F32.gguf sha256: 32f7ea0600c07272547da401d460f8abbd980f3a57b69d6df87be0e2505e0b9c uri: https://huggingface.co/Jackrong/Qwopus3.6-27B-Coder-MTP-GGUF/resolve/main/mmproj-F32.gguf From 764b0352b938aebe21a91efa39867920adb7f26a Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 24 Jun 2026 23:18:24 +0200 Subject: [PATCH 5/7] docs: :arrow_up: update docs version mudler/LocalAI (#10491) :arrow_up: Update docs version mudler/LocalAI Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index 0abaf0d54..f8cc39cee 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v4.4.3" + "version": "v4.5.0" } From 5c3d48ab5093d49c67c60f0577f52173096ae27c Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 24 Jun 2026 23:30:08 +0200 Subject: [PATCH 6/7] feat(ui): usage & UX enhancements (last-used model, polling, starter models, usage cost, a11y) (#10496) * feat(ui): remember last-used model per capability ModelSelector auto-selected the first option whenever the bound value was empty or stale, so every visit to the Home chat box, Image, TTS or Talk pages reset the choice to whatever sorted first. Persist the user's pick in localStorage keyed by capability and prefer it on auto-select when the model is still available, falling back to the first option otherwise. Because every modality picker funnels through ModelSelector, this fixes the friction everywhere at once. External-options callers pass no capability and keep the previous first-item behaviour. Signed-off-by: Ettore Di Giacinto Assisted-by: Claude:claude-opus-4-8 [Claude Code] * feat(ui): add visibility-aware polling hook The app had 26 hand-rolled setInterval polls, none of which paused when the browser tab was hidden, so backgrounded dashboards kept hitting the server every few seconds for data nobody was looking at. Add usePolling: runs immediately, polls on a fixed interval, pauses while document.hidden, fires a catch-up poll on return, and guards against overlapping slow requests. Route useResources (the highest-frequency shared poll) through it. Further callers can be migrated incrementally. Signed-off-by: Ettore Di Giacinto Assisted-by: Claude:claude-opus-4-8 [Claude Code] * feat(ui): hardware-aware starter models on empty home A fresh install dropped admins straight into a 1000+ model gallery with no guidance. Add a StarterModels widget to the empty-state wizard that recommends a small, curated set tuned to the detected hardware: - CPU-only machines (no GPU VRAM) are steered to genuinely small models (1-4B, Q4) that stay responsive without a GPU. - GPU machines get suggestions scaled to available VRAM. Curated names are real gallery entries, intersected against the live gallery at render time so a trimmed/custom gallery degrades gracefully. Install is one click via the existing model-install API. Also routes Home's cluster and system-info polls through usePolling so a backgrounded home page stops fetching. Signed-off-by: Ettore Di Giacinto Assisted-by: Claude:claude-opus-4-8 [Claude Code] * feat(ui): optional token-cost estimates on usage dashboard The usage dashboard tracked tokens but had no monetary view. Multi-user deployments that bill back or budget compute had to export and compute cost elsewhere. Add an opt-in pricing control: admins set $ per 1M prompt/completion tokens (stored per-browser). When set, an estimated-cost summary card and per-model / per-user cost columns appear, computed from recorded token counts. The entire cost surface stays hidden until a price is entered, so the default view is unchanged. Cost is clearly labelled an estimate - LocalAI itself has no notion of price. Signed-off-by: Ettore Di Giacinto Assisted-by: Claude:claude-opus-4-8 [Claude Code] * fix(ui): label icon-only send buttons for screen readers The chat and agent-chat send buttons were a bare paper-plane icon with no accessible name, so screen readers announced only "button". Add an aria-label/title ("Send message") and mark the icon aria-hidden. An audit of all icon-only buttons found these were the only two unlabeled controls; the rest already carry visible text. Signed-off-by: Ettore Di Giacinto Assisted-by: Claude:claude-opus-4-8 [Claude Code] --------- Signed-off-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- .../http/react-ui/public/locales/en/chat.json | 1 + .../http/react-ui/public/locales/en/home.json | 14 ++ core/http/react-ui/src/App.css | 53 +++++++ .../react-ui/src/components/ModelSelector.jsx | 36 ++++- .../react-ui/src/components/StarterModels.jsx | 129 ++++++++++++++++++ core/http/react-ui/src/hooks/usePolling.js | 66 +++++++++ core/http/react-ui/src/hooks/useResources.js | 17 +-- core/http/react-ui/src/pages/AgentChat.jsx | 4 +- core/http/react-ui/src/pages/Chat.jsx | 4 +- core/http/react-ui/src/pages/Home.jsx | 74 +++++----- core/http/react-ui/src/pages/Usage.jsx | 86 +++++++++++- 11 files changed, 426 insertions(+), 58 deletions(-) create mode 100644 core/http/react-ui/src/components/StarterModels.jsx create mode 100644 core/http/react-ui/src/hooks/usePolling.js diff --git a/core/http/react-ui/public/locales/en/chat.json b/core/http/react-ui/public/locales/en/chat.json index de9d0507d..ffda226db 100644 --- a/core/http/react-ui/public/locales/en/chat.json +++ b/core/http/react-ui/public/locales/en/chat.json @@ -86,6 +86,7 @@ "input": { "placeholder": "Message...", "attachFile": "Attach file", + "send": "Send message", "stopGenerating": "Stop generating", "canvasTitle": "Canvas — extract code blocks and media into a side panel for preview, copy, and download", "canvasLabel": "Canvas", diff --git a/core/http/react-ui/public/locales/en/home.json b/core/http/react-ui/public/locales/en/home.json index fabd9e9dd..142767999 100644 --- a/core/http/react-ui/public/locales/en/home.json +++ b/core/http/react-ui/public/locales/en/home.json @@ -77,6 +77,20 @@ "noModelsTitle": "No Models Available", "noModelsBody": "There are no models installed yet. Ask your administrator to set up models so you can start chatting." }, + "starters": { + "title": "Recommended for your hardware", + "tier": { + "cpu": "CPU-only", + "gpu-small": "GPU", + "gpu-large": "GPU" + }, + "cpuNote": "No GPU detected — these small models stay responsive on CPU.", + "gpuNote": "Picked to fit your available VRAM with room for context.", + "install": "Install", + "installing": "Installing", + "installStarted": "Installing {{model}}…", + "installFailed": "Install failed: {{message}}" + }, "connect": { "title": "One endpoint, every API", "subtitle": "LocalAI serves its own full API — image & video generation, depth, object detection, reranking, audio, face & voice recognition, and realtime voice over WebRTC and WebSocket. On top of that, a drop-in compatibility layer lets any app built for OpenAI, Anthropic, Ollama or OpenAI Responses talk to it unchanged.", diff --git a/core/http/react-ui/src/App.css b/core/http/react-ui/src/App.css index cf1a46bd3..40eddc2e9 100644 --- a/core/http/react-ui/src/App.css +++ b/core/http/react-ui/src/App.css @@ -6363,6 +6363,59 @@ select.input { justify-content: center; } +/* ──────────────────── Home: hardware-aware starter models ──────────────────── */ + +.home-starters { + margin: var(--spacing-lg) 0; + padding: var(--spacing-lg); +} +.home-starters-head { + display: flex; + align-items: center; + justify-content: space-between; + gap: var(--spacing-md); +} +.home-starters-head strong { + font-size: 0.9375rem; +} +.home-starters-tier { + display: inline-flex; + align-items: center; + gap: var(--spacing-xs); + font-size: 0.75rem; + color: var(--color-text-muted); +} +.home-starters-sub { + margin: var(--spacing-xs) 0 var(--spacing-md); + font-size: 0.8125rem; + color: var(--color-text-secondary); +} +.home-starters-list { + list-style: none; + margin: 0; + padding: 0; + display: flex; + flex-direction: column; + gap: var(--spacing-xs); +} +.home-starters-item { + display: flex; + align-items: center; + gap: var(--spacing-md); + padding: var(--spacing-xs) 0; +} +.home-starters-name { + font-weight: 500; + font-size: 0.875rem; + word-break: break-all; +} +.home-starters-size { + margin-left: auto; + font-size: 0.75rem; + color: var(--color-text-muted); + white-space: nowrap; +} + /* ──────────────────── Home: drop-in endpoint / API compatibility ──────────────────── */ .home-connect { diff --git a/core/http/react-ui/src/components/ModelSelector.jsx b/core/http/react-ui/src/components/ModelSelector.jsx index 9009524ee..76a118ec9 100644 --- a/core/http/react-ui/src/components/ModelSelector.jsx +++ b/core/http/react-ui/src/components/ModelSelector.jsx @@ -1,8 +1,25 @@ -import { useEffect, useMemo } from 'react' +import { useEffect, useMemo, useCallback } from 'react' import { useModels } from '../hooks/useModels' import SearchableSelect from './SearchableSelect' import { useTranslation } from 'react-i18next' +// Remember the last model the user picked, keyed by capability, so returning to +// a page (Home chat box, Image, TTS, Talk...) defaults to that model instead of +// whatever happens to sort first. Only persisted when a capability key exists — +// `externalOptions` callers pass no capability and get the old first-item +// behaviour. localStorage access is wrapped because private-browsing modes throw. +const LAST_MODEL_PREFIX = 'localai_last_model:' + +function readLastModel(capability) { + if (!capability) return null + try { return localStorage.getItem(LAST_MODEL_PREFIX + capability) } catch { return null } +} + +function writeLastModel(capability, model) { + if (!capability || !model) return + try { localStorage.setItem(LAST_MODEL_PREFIX + capability, model) } catch { /* ignore */ } +} + export default function ModelSelector({ value, onChange, capability, className = '', options: externalOptions, loading: externalLoading, @@ -19,16 +36,27 @@ export default function ModelSelector({ const isLoading = externalOptions ? (externalLoading || false) : hookLoading const isDisabled = isLoading || (externalDisabled || false) + // Persist genuine selections so the next visit can restore them. + const handleChange = useCallback((next) => { + writeLastModel(capability, next) + onChange(next) + }, [capability, onChange]) + useEffect(() => { if (modelNames.length > 0 && (!value || !modelNames.includes(value))) { - onChange(modelNames[0]) + // Prefer the remembered model when it's still available; otherwise fall + // back to the first option. Don't re-persist here — auto-select is not a + // user choice, and writing back the stored value would be a harmless but + // pointless round-trip. + const remembered = readLastModel(capability) + onChange(remembered && modelNames.includes(remembered) ? remembered : modelNames[0]) } - }, [modelNames, value, onChange]) + }, [modelNames, value, onChange, capability]) return ( new Set()) + + const tier = useMemo(() => pickTier(resources), [resources]) + const candidates = tier.list + + // Verify candidates exist in the live gallery. One search per name (the tier + // has at most a handful) keeps this resilient to gallery customization. + useEffect(() => { + let cancelled = false + const names = [...new Set(candidates.map(c => c.name))] + Promise.all(names.map(name => + modelsApi.list({ search: name, page: 1 }) + .then(data => (data?.models || []).some(m => (m.name || m.id) === name) ? name : null) + .catch(() => null) + )).then(found => { + if (cancelled) return + const hits = found.filter(Boolean) + // If verification yielded nothing (e.g. gallery unreachable), fall back to + // showing the curated list rather than an empty widget. + setAvailable(hits.length > 0 ? new Set(hits) : null) + }) + return () => { cancelled = true } + }, [candidates]) + + const visible = available === null + ? candidates + : candidates.filter(c => available.has(c.name)) + + if (visible.length === 0) return null + + const install = async (name) => { + setInstalling(prev => new Set(prev).add(name)) + try { + await modelsApi.install(name) + addToast?.(t('starters.installStarted', { model: name }), 'success') + onInstallStarted?.(name) + } catch (err) { + addToast?.(t('starters.installFailed', { message: err.message }), 'error') + setInstalling(prev => { + const next = new Set(prev) + next.delete(name) + return next + }) + } + } + + return ( +
+
+ {t('starters.title')} + +