chore(deps): bump torch in /backend/python/vllm

Bumps torch from 2.9.1+cpu to 2.12.1+xpu. --- updated-dependencies: - dependency-name: torch dependency-version: 2.12.1+xpu dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com>
2026-06-27 18:06:58 -04:00 · 2026-06-25 14:01:50 +00:00
145 changed files with 1055 additions and 5022 deletions
--- a/.agents/adding-backends.md
+++ b/.agents/adding-backends.md
@@ -102,24 +102,6 @@ Multi-arch backends are NOT a single matrix entry with `platforms: 'linux/amd64,

 Entries whose `dockerfile` is `./backend/Dockerfile.{llama-cpp,ik-llama-cpp,turboquant}` must also set a `builder-base-image` field pointing at a prebuilt base from `quay.io/go-skynet/ci-cache:base-grpc-*` (CI builds these via `.github/workflows/base-images.yml`). The mapping is by `(build-type, platforms)` — see existing entries for the pattern. CI uses these prebuilt bases to skip the gRPC compile (~25–35 min cold). Local `make backends/<name>` ignores `builder-base-image` and uses the from-source path inside the Dockerfile, so you don't need quay access for local builds.

-### Cover every OS the project supports (Linux **and** Darwin)
-
-`.github/backend-matrix.yml` has two matrices, and they are the source of truth for which OS a backend ships on:
-
- `include:` — the **Linux** matrix (x86_64 + arm64; CPU and CUDA / ROCm / SYCL / Vulkan).
- `includeDarwin:` — the **macOS / Apple Silicon** matrix (arm64; Metal where the engine supports it, otherwise a native arm64 CPU build).
-
-**A new backend must target every OS it can build for — do not ship Linux-only by default.** A backend that appears only under `include:` is silently unavailable on macOS even when its code would run there. Most C/C++/GGML engines build on Darwin out of the box (ggml defaults `GGML_METAL=ON` on Apple, so a plain build is Metal-enabled), and many Python backends do too (CPU / MPS wheels). If a backend genuinely cannot support an OS (e.g. CUDA-only, no CPU variant), state that in the PR description instead of omitting it silently.
-
-Wiring a backend into `includeDarwin:` is more than the matrix entry:
-
-1. **`includeDarwin:` entry** — `tag-suffix: "-metal-darwin-arm64-<backend>"`, `build-type: "metal"`, `lang: "go"` for go+ggml backends; omit `build-type` for the bespoke C++ ones (llama-cpp / ds4 / privacy-filter). Match an existing entry of the same shape.
-2. **`backend/index.yaml`** — add `metal:` to the backend's `capabilities` map (main and `-development`) and concrete `metal-<backend>` / `metal-<backend>-development` image entries pointing at the `-metal-darwin-arm64-<backend>` images.
-3. **C/C++ backends only** — add an `inferBackendPathDarwin` case in `scripts/changed-backends.js` returning `backend/cpp/<backend>/` (the generic fallthrough assumes `backend/<lang>/`, which is wrong for a C++ source tree driven with `lang: go`), and give `run.sh` a Darwin branch that exports `DYLD_LIBRARY_PATH` instead of `LD_LIBRARY_PATH`. If the build is bespoke (single `grpc-server` + dylib bundling), model it on `scripts/build/ds4-darwin.sh` and add a `backends/<backend>-darwin` make target plus a gated step in `.github/workflows/backend_build_darwin.yml`.
-4. **C++ proto gotcha** — if the backend compiles the generated gRPC/protobuf in a separate CMake target (e.g. `hw_grpc_proto`), that target must link `protobuf::libprotobuf` + `gRPC::grpc++` so the Homebrew include dirs propagate; otherwise macOS fails with `google/protobuf/runtime_version.h not found` (Linux hides this because apt headers sit in `/usr/include`).
-
-The CI path filter only builds a backend on a PR when a file under its directory changes, so a darwin-only YAML edit builds nothing — touch a file under `backend/<lang>/<backend>/` (a one-line comment is enough) in the same PR.
-
 ## 3. Add Backend Metadata to `backend/index.yaml`

 **Step 3a: Add Meta Definition**
@@ -243,7 +225,6 @@ After adding a new backend, verify:

 - [ ] Backend directory structure is complete with all necessary files
 - [ ] Build configurations added to `.github/backend-matrix.yml` for all desired platforms (per-arch entries with `platform-tag` for multi-arch; `builder-base-image` for llama-cpp / ik-llama-cpp / turboquant)
- [ ] **OS coverage considered**: added to `includeDarwin:` (macOS/Apple Silicon) if the backend can build there — with the `backend/index.yaml` `metal:` capability + `metal-<backend>` image entries, a `run.sh` Darwin/DYLD branch and `inferBackendPathDarwin` case for C++ backends — or the PR explains why an OS is unsupported. Do not ship Linux-only by default.
 - [ ] Meta definition added to `backend/index.yaml` in the `## metas` section
 - [ ] Image entries added to `backend/index.yaml` for all build variants (latest + development)
 - [ ] Tag suffixes match between workflow file and index.yaml
--- a/.github/backend-matrix.yml
+++ b/.github/backend-matrix.yml
@@ -2,28 +2,6 @@
 # Matrix data for backend container image builds.
 # Consumed by scripts/changed-backends.js for both backend.yml and backend_pr.yml.
 # This file is NOT a workflow — it has no top-level 'on:' or 'jobs:'.
-#
-# OS / platform coverage — READ THIS WHEN ADDING A BACKEND
-# --------------------------------------------------------
-# This file is the source of truth for which OS each backend is built and
-# published for. A backend ships ONLY for the matrices it appears in:
-#   - Linux  -> the `include:` matrix below (x86_64 + arm64; CPU and
-#               CUDA / ROCm / SYCL / Vulkan variants).
-#   - macOS  -> the `includeDarwin:` matrix (Apple Silicon / arm64; Metal where
-#               the engine supports it, otherwise a native arm64 CPU build).
-#
-# New backends must target EVERY OS they can build for, not just Linux. A backend
-# listed only under `include:` is silently unavailable on macOS even when its code
-# would run there. Most C/C++/GGML engines build on Darwin (ggml defaults
-# GGML_METAL=ON on Apple, so a plain build is Metal-enabled), and many Python
-# backends do too (CPU / MPS). If a backend genuinely cannot support an OS, say so
-# in its PR description rather than silently omitting it.
-#
-# Adding a backend to `includeDarwin:` is more than one line — see the darwin
-# checklist in .agents/adding-backends.md (includeDarwin entry, the index.yaml
-# `metal:` capability + `metal-<backend>` image entries, a `run.sh` Darwin/DYLD
-# branch for C/C++ backends, and the inferBackendPathDarwin case in
-# scripts/changed-backends.js so the path filter actually builds it).

 # Linux matrix (consumed by backend-jobs).
 include:
@@ -4944,37 +4922,6 @@ includeDarwin:
    tag-suffix: "-metal-darwin-arm64-vibevoice-cpp"
    build-type: "metal"
    lang: "go"
-  # Vision/utility C++/ggml backends (go+cgo). Their Makefiles already carry a
-  # Darwin/Metal path (GGML_METAL=ON when build-type=metal); this just builds and
-  # publishes the metal image so Apple Silicon can install them.
-  - backend: "depth-anything-cpp"
-    tag-suffix: "-metal-darwin-arm64-depth-anything-cpp"
-    build-type: "metal"
-    lang: "go"
-  - backend: "locate-anything-cpp"
-    tag-suffix: "-metal-darwin-arm64-locate-anything-cpp"
-    build-type: "metal"
-    lang: "go"
-  - backend: "rfdetr-cpp"
-    tag-suffix: "-metal-darwin-arm64-rfdetr-cpp"
-    build-type: "metal"
-    lang: "go"
-  - backend: "sam3-cpp"
-    tag-suffix: "-metal-darwin-arm64-sam3-cpp"
-    build-type: "metal"
-    lang: "go"
-  # privacy-filter (PII/NER) is a C++/ggml backend built by a bespoke darwin
-  # script (make backends/privacy-filter-darwin); ggml defaults Metal ON on Apple
-  # so the build is Metal-enabled. lang=go drives runner/toolchain selection only.
-  - backend: "privacy-filter"
-    tag-suffix: "-metal-darwin-arm64-privacy-filter"
-    lang: "go"
-  # LocalVQE has no Metal path; on Apple Silicon it builds CPU-only (GGML_METAL
-  # OFF) but is still a native arm64 image. Uses the darwin/metal build profile.
-  - backend: "localvqe"
-    tag-suffix: "-metal-darwin-arm64-localvqe"
-    build-type: "metal"
-    lang: "go"
  - backend: "voxtral"
    tag-suffix: "-metal-darwin-arm64-voxtral"
    build-type: "metal"
@@ -4991,6 +4938,9 @@ includeDarwin:
  - backend: "qwen-tts"
    tag-suffix: "-metal-darwin-arm64-qwen-tts"
    build-type: "mps"
+  - backend: "fish-speech"
+    tag-suffix: "-metal-darwin-arm64-fish-speech"
+    build-type: "mps"
  - backend: "voxcpm"
    tag-suffix: "-metal-darwin-arm64-voxcpm"
    build-type: "mps"
--- a/.github/workflows/backend_build_darwin.yml
+++ b/.github/workflows/backend_build_darwin.yml
@@ -99,7 +99,6 @@ jobs:
            /opt/homebrew/Cellar/xxhash
            /opt/homebrew/Cellar/zstd
            /opt/homebrew/Cellar/nlohmann-json
-            /opt/homebrew/Cellar/opus
          key: brew-${{ runner.os }}-${{ runner.arch }}-v1-${{ hashFiles('.github/workflows/backend_build_darwin.yml') }}

      - name: Dependencies
@@ -114,12 +113,7 @@ jobs:
          # nlohmann-json is header-only and required by the ds4 backend
          # (dsml_renderer.cpp includes <nlohmann/json.hpp>); on Linux it comes
          # from the apt-installed nlohmann-json3-dev in the build image.
-          # opus + pkg-config are required by the opus go backend: its
-          # Makefile/package.sh call `pkg-config --cflags/--libs opus` to build
-          # libopusshim.dylib and to locate libopus.dylib for bundling. brew's
-          # pkg-config defaults its search path to the Homebrew prefix so the
-          # opus.pc is found.
-          brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm ccache blake3 fmt hiredis xxhash zstd nlohmann-json opus pkg-config
+          brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm ccache blake3 fmt hiredis xxhash zstd nlohmann-json
          # Force-reinstall ccache so brew re-validates its full runtime-dep
          # closure on every run. This is the durable fix: when the upstream
          # ccache formula gains a new transitive dep (as it has multiple times
@@ -138,7 +132,7 @@ jobs:
          # and decides "already installed" without re-linking, so on a cache-
          # hit run the formulas aren't on PATH. Force-link them; --overwrite
          # tolerates pre-existing symlinks from earlier installs.
-          brew link --overwrite protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm ccache blake3 fmt hiredis xxhash zstd nlohmann-json opus pkg-config 2>/dev/null || true
+          brew link --overwrite protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm ccache blake3 fmt hiredis xxhash zstd nlohmann-json 2>/dev/null || true

      - name: Save Homebrew cache
        if: github.event_name != 'pull_request' && steps.brew-cache.outputs.cache-hit != 'true'
@@ -159,7 +153,6 @@ jobs:
            /opt/homebrew/Cellar/xxhash
            /opt/homebrew/Cellar/zstd
            /opt/homebrew/Cellar/nlohmann-json
-            /opt/homebrew/Cellar/opus
          key: brew-${{ runner.os }}-${{ runner.arch }}-v1-${{ hashFiles('.github/workflows/backend_build_darwin.yml') }}

      # ---- ccache for llama.cpp CMake builds ----
@@ -235,17 +228,8 @@ jobs:
        run: |
          make backends/ds4-darwin

-      # privacy-filter is a C++/ggml backend like ds4 - a single grpc-server with
-      # otool dylib bundling - so it gets its own bespoke darwin script rather than
-      # the generic build-darwin-go-backend path.
-      - name: Build privacy-filter backend (Darwin Metal)
-        if: inputs.backend == 'privacy-filter'
-        run: |
-          make protogen-go
-          make backends/privacy-filter-darwin
-
      - name: Build ${{ inputs.backend }}-darwin
-        if: inputs.backend != 'llama-cpp' && inputs.backend != 'ds4' && inputs.backend != 'privacy-filter'
+        if: inputs.backend != 'llama-cpp' && inputs.backend != 'ds4'
        run: |
          make protogen-go
          BACKEND=${{ inputs.backend }} BUILD_TYPE=${{ inputs.build-type }} USE_PIP=${{ inputs.use-pip }} make build-darwin-${{ inputs.lang }}-backend
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -24,11 +24,6 @@ jobs:
          args: release --clean
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          MACOS_SIGN_P12: ${{ secrets.MACOS_CERTIFICATE }}
-          MACOS_SIGN_PASSWORD: ${{ secrets.MACOS_CERTIFICATE_PWD }}
-          MACOS_NOTARY_KEY: ${{ secrets.MACOS_NOTARY_KEY }}
-          MACOS_NOTARY_KEY_ID: ${{ secrets.MACOS_NOTARY_KEY_ID }}
-          MACOS_NOTARY_ISSUER_ID: ${{ secrets.MACOS_NOTARY_ISSUER_ID }}
  launcher-build-darwin:
    runs-on: macos-latest
    steps:
@@ -40,19 +35,9 @@ jobs:
        uses: actions/setup-go@v5
        with:
          go-version: 1.23
-      - name: Import signing certificate
-        env:
-          MACOS_CERTIFICATE: ${{ secrets.MACOS_CERTIFICATE }}
-          MACOS_CERTIFICATE_PWD: ${{ secrets.MACOS_CERTIFICATE_PWD }}
-          MACOS_CI_KEYCHAIN_PWD: ${{ secrets.MACOS_CI_KEYCHAIN_PWD }}
-        run: bash contrib/macos/sign-and-notarize.sh import-cert
-      - name: Build, sign and notarize the DMG
-        env:
-          MACOS_SIGN_IDENTITY: ${{ secrets.MACOS_SIGN_IDENTITY }}
-          MACOS_NOTARY_KEY: ${{ secrets.MACOS_NOTARY_KEY }}
-          MACOS_NOTARY_KEY_ID: ${{ secrets.MACOS_NOTARY_KEY_ID }}
-          MACOS_NOTARY_ISSUER_ID: ${{ secrets.MACOS_NOTARY_ISSUER_ID }}
-        run: make release-launcher-darwin
+      - name: Build launcher for macOS ARM64
+        run: |
+          make build-launcher-darwin
      - name: Upload DMG to Release
        uses: softprops/action-gh-release@v3
        with:
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -121,19 +121,3 @@ jobs:
          detached: true
          connect-timeout-seconds: 180
          limit-access-to-actor: true
-
-  # Fast standalone unit tests for the backends' pure C++ helpers - currently the
-  # llama-cpp message reconstruction (backend/cpp/llama-cpp/message_content.h),
-  # which guards the OpenAI chat content normalization (mudler/LocalAI#10524,
-  # #7324, #7528). The runner discovers every *_test.cpp under backend/cpp/, so
-  # new pure-C++ unit tests are picked up with no CI changes. These need only the
-  # C++ stdlib + nlohmann/json, so they run on every PR without the full
-  # llama.cpp + gRPC backend build. (The same suite is also wired as an opt-in
-  # CMake/ctest target, -DLLAMA_GRPC_BUILD_TESTS=ON, for in-backend-build runs.)
-  tests-backend-cpp:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v7
-      - name: Run backend C++ unit tests
-        run: make test-backend-cpp
--- a/.gitignore
+++ b/.gitignore
@@ -94,6 +94,3 @@ core/http/react-ui/test-results/

 # SDD / brainstorm scratch (agent-driven development)
 .superpowers/
-
-# Local Apple signing material (never commit)
-.certs/
--- a/.goreleaser.yaml
+++ b/.goreleaser.yaml
@@ -9,8 +9,7 @@ source:
  enabled: true
  name_template: '{{ .ProjectName }}-{{ .Tag }}-source'
 builds:
-  - id: local-ai
-    main: ./cmd/local-ai
+  - main: ./cmd/local-ai
    env:
      - CGO_ENABLED=0
    ldflags:
@@ -36,19 +35,3 @@ snapshot:
  version_template: "{{ .Tag }}-next"
 changelog:
  use: github-native
-# Sign + notarize the macOS server binary via the quill backend (runs on Linux,
-# no macOS runner needed). Disabled automatically when MACOS_SIGN_P12 is unset
-# (forks / PRs), so those builds stay unsigned and green.
-notarize:
-  macos:
-    - enabled: '{{ isEnvSet "MACOS_SIGN_P12" }}'
-      ids:
-        - local-ai
-      sign:
-        certificate: "{{.Env.MACOS_SIGN_P12}}"
-        password: "{{.Env.MACOS_SIGN_PASSWORD}}"
-      notarize:
-        issuer_id: "{{.Env.MACOS_NOTARY_ISSUER_ID}}"
-        key_id: "{{.Env.MACOS_NOTARY_KEY_ID}}"
-        key: "{{.Env.MACOS_NOTARY_KEY}}"
-        wait: true
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -43,5 +43,4 @@ LocalAI follows the Linux kernel project's [guidelines for AI coding assistants]
 - **New API endpoints**: LocalAI advertises its capability surface in several independent places — swagger `@Tags`, `/api/instructions` registry, auth `RouteFeatureRegistry`, React UI `capabilities.js`, docs. Read [.agents/api-endpoints-and-auth.md](.agents/api-endpoints-and-auth.md) and follow its checklist — missing any surface means clients, admins, and the UI won't know the endpoint exists.
 - **Admin endpoints → MCP tool**: every admin endpoint that an admin would manage conversationally (install/list/edit/toggle/upgrade) MUST also be exposed as an MCP tool in `pkg/mcp/localaitools/`. The LocalAI Assistant chat modality and the standalone `local-ai mcp-server` consume that package; drift between REST and MCP is a real risk. Read [.agents/localai-assistant-mcp.md](.agents/localai-assistant-mcp.md) — the `TestToolHTTPRouteMappingComplete` test fails until you wire the new tool and update the route map.
 - **Build**: Inspect `Makefile` and `.github/workflows/` — ask the user before running long builds
- **Backend OS coverage**: a new backend must target every OS it can build for, not just Linux. `.github/backend-matrix.yml` has two matrices — `include:` (Linux) and `includeDarwin:` (macOS / Apple Silicon). Most C/C++/GGML and many Python backends build on Darwin too — wire the `includeDarwin` entry + `backend/index.yaml` `metal:` entries, or say in the PR why an OS is unsupported. See the darwin checklist in [.agents/adding-backends.md](.agents/adding-backends.md).
 - **UI**: The active UI is the React app in `core/http/react-ui/`. The older Alpine.js/HTML UI in `core/http/static/` is pending deprecation — all new UI work goes in the React UI
--- a/50
+++ b/50
@@ -1,5 +1,5 @@
 # Disable parallel execution for backend builds
-.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/turboquant backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/crispasr backends/parakeet-cpp backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/rfdetr-cpp backends/insightface backends/speaker-recognition backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/sglang backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros backends/sam3-cpp backends/qwen3-tts-cpp backends/omnivoice-cpp backends/vibevoice-cpp backends/localvqe backends/tinygrad backends/sherpa-onnx backends/ds4 backends/ds4-darwin backends/liquid-audio backends/supertonic backends/depth-anything-cpp backends/privacy-filter backends/privacy-filter-darwin
+.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/turboquant backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/crispasr backends/parakeet-cpp backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/rfdetr-cpp backends/insightface backends/speaker-recognition backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/sglang backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros backends/sam3-cpp backends/qwen3-tts-cpp backends/omnivoice-cpp backends/vibevoice-cpp backends/localvqe backends/tinygrad backends/sherpa-onnx backends/ds4 backends/ds4-darwin backends/liquid-audio backends/supertonic backends/depth-anything-cpp backends/privacy-filter

 GOCMD=go
 GOTEST=$(GOCMD) test
@@ -103,7 +103,7 @@ COVERAGE_E2E_LABELS?=!real-models
 COVERAGE_EXCLUDE_RE?=grpc/proto/.*[.]pb[.]go


-.PHONY: all test test-coverage test-coverage-baseline test-coverage-check test-backend-cpp test-ui test-ui-coverage-baseline test-ui-coverage-check install-hooks build vendor lint lint-all
+.PHONY: all test test-coverage test-coverage-baseline test-coverage-check test-ui test-ui-coverage-baseline test-ui-coverage-check install-hooks build vendor lint lint-all

 all: help

@@ -201,13 +201,6 @@ test: prepare-test
 	OPUS_SHIM_LIBRARY=$(abspath ./pkg/opus/shim/libopusshim.so) \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)

-## Compiles and runs the standalone C++ unit tests for the backends (pure
-## helpers that depend only on the stdlib + nlohmann/json, no full backend
-## build). Discovers every *_test.cpp under backend/cpp/ - see
-## backend/cpp/run-unit-tests.sh. Set NLOHMANN_INCLUDE to skip the header fetch.
-test-backend-cpp:
-	bash backend/cpp/run-unit-tests.sh
-
 ## Runs the core suite ($(TEST_PATHS)) with statement-coverage instrumentation
 ## and writes a merged profile to $(COVERAGE_PROFILE). Deliberately omits
 ## --fail-fast so a single failure doesn't truncate the coverage number, and
@@ -1136,10 +1129,6 @@ backends/ds4-darwin: build
 	bash ./scripts/build/ds4-darwin.sh
 	./local-ai backends install "ocifile://$(abspath ./backend-images/ds4.tar)"

-backends/privacy-filter-darwin: build
-	bash ./scripts/build/privacy-filter-darwin.sh
-	./local-ai backends install "ocifile://$(abspath ./backend-images/privacy-filter.tar)"
-
 build-darwin-python-backend: build
 	bash ./scripts/build/python-darwin.sh

@@ -1460,32 +1449,13 @@ docs: docs/static/gallery.html
 ########################################################

 ## fyne cross-platform build
-# Build LocalAI.app from the launcher via fyne (metadata read from cmd/launcher/FyneApp.toml).
-# Signing happens via contrib/macos/sign-and-notarize.sh, which is a no-op when the signing
-# secrets are unset, so unsigned local/fork builds keep working.
-build-launcher-darwin:
-	rm -rf dist/LocalAI.app cmd/launcher/LocalAI.app
-	mkdir -p dist
-	cd cmd/launcher && go run fyne.io/tools/cmd/fyne@latest package -os darwin -icon ../../core/http/static/logo.png --executable $(LAUNCHER_BINARY_NAME)
-	mv cmd/launcher/LocalAI.app dist/LocalAI.app
-	bash contrib/macos/sign-and-notarize.sh sign dist/LocalAI.app
-
-# Wrap the (signed) app into a drag-to-Applications DMG via hdiutil, then sign the DMG.
-dmg-launcher-darwin: build-launcher-darwin
-	rm -rf dist/dmg dist/LocalAI.dmg
-	mkdir -p dist/dmg
-	cp -R dist/LocalAI.app dist/dmg/LocalAI.app
-	ln -s /Applications dist/dmg/Applications
-	hdiutil create -volname "LocalAI" -srcfolder dist/dmg -ov -format UDZO dist/LocalAI.dmg
-	bash contrib/macos/sign-and-notarize.sh sign dist/LocalAI.dmg
-
-# Submit the DMG to Apple notarization and staple the ticket (no-op without notary secrets).
-notarize-launcher-darwin: dmg-launcher-darwin
-	bash contrib/macos/sign-and-notarize.sh notarize dist/LocalAI.dmg
-
-# Single entrypoint for CI: build -> sign app -> dmg -> sign dmg -> notarize -> staple.
-release-launcher-darwin: notarize-launcher-darwin
-	@echo "dist/LocalAI.dmg is ready"
+build-launcher-darwin: build-launcher
+	go run github.com/tiagomelo/macos-dmg-creator/cmd/createdmg@latest \
+	--appName "LocalAI" \
+	--appBinaryPath "$(LAUNCHER_BINARY_NAME)" \
+	--bundleIdentifier "com.localai.launcher" \
+	--iconPath "core/http/static/logo.png" \
+	--outputDir "dist/"

 build-launcher-linux:
-	cd cmd/launcher && go run fyne.io/tools/cmd/fyne@latest package -os linux -icon ../../core/http/static/logo.png --executable $(LAUNCHER_BINARY_NAME)-linux && mv LocalAI.tar.xz ../../$(LAUNCHER_BINARY_NAME)-linux.tar.xz
+	cd cmd/launcher && go run fyne.io/tools/cmd/fyne@latest package -os linux -icon ../../core/http/static/logo.png --executable $(LAUNCHER_BINARY_NAME)-linux && mv launcher.tar.xz ../../$(LAUNCHER_BINARY_NAME)-linux.tar.xz
--- a/backend/cpp/ik-llama-cpp/Makefile
+++ b/backend/cpp/ik-llama-cpp/Makefile
@@ -1,5 +1,5 @@

-IK_LLAMA_VERSION?=b84902d2ad27c34f989f23947200c4b91b1568fd
+IK_LLAMA_VERSION?=d5507e33ae7ee2b7b41475f08044d3bde3b839ee
 LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp

 CMAKE_ARGS?=
--- a/backend/cpp/ik-llama-cpp/run.sh
+++ b/backend/cpp/ik-llama-cpp/run.sh
@@ -2,7 +2,7 @@
 set -ex

 # Get the absolute current dir where the script is located
-CURDIR=$(dirname "$(realpath "$0")")
+CURDIR=$(dirname "$(realpath $0)")

 cd /

@@ -13,28 +13,28 @@ grep -e "flags" /proc/cpuinfo | head -1
 # ik_llama.cpp requires AVX2 — default to avx2 binary
 BINARY=ik-llama-cpp-avx2

-if [ -e "$CURDIR"/ik-llama-cpp-fallback ] && ! grep -q -e "\savx2\s" /proc/cpuinfo ; then
+if [ -e $CURDIR/ik-llama-cpp-fallback ] && ! grep -q -e "\savx2\s" /proc/cpuinfo ; then
 	echo "CPU:    AVX2   NOT found, using fallback"
 	BINARY=ik-llama-cpp-fallback
 fi

 # Extend ld library path with the dir where this script is located/lib
 if [ "$(uname)" == "Darwin" ]; then
-	export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
-	#export DYLD_FALLBACK_LIBRARY_PATH="$CURDIR"/lib:$DYLD_FALLBACK_LIBRARY_PATH
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+	#export DYLD_FALLBACK_LIBRARY_PATH=$CURDIR/lib:$DYLD_FALLBACK_LIBRARY_PATH
 else
-	export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi

 # If there is a lib/ld.so, use it
-if [ -f "$CURDIR"/lib/ld.so ]; then
+if [ -f $CURDIR/lib/ld.so ]; then
 	echo "Using lib/ld.so"
 	echo "Using binary: $BINARY"
-	exec "$CURDIR"/lib/ld.so "$CURDIR"/$BINARY "$@"
+	exec $CURDIR/lib/ld.so $CURDIR/$BINARY "$@"
 fi

 echo "Using binary: $BINARY"
-exec "$CURDIR"/$BINARY "$@"
+exec $CURDIR/$BINARY "$@"

 # We should never reach this point, however just in case we do, run fallback
-exec "$CURDIR"/ik-llama-cpp-fallback "$@"
+exec $CURDIR/ik-llama-cpp-fallback "$@"
--- a/backend/cpp/llama-cpp/CMakeLists.txt
+++ b/backend/cpp/llama-cpp/CMakeLists.txt
@@ -87,18 +87,3 @@ target_compile_features(${TARGET} PRIVATE cxx_std_11)
 if(TARGET BUILD_INFO)
  add_dependencies(${TARGET} BUILD_INFO)
 endif()
-
-# Unit test for the message-content normalization helper (message_content.h).
-# Off by default so the normal backend build is untouched; enable with
-# -DLLAMA_GRPC_BUILD_TESTS=ON and run via ctest. It reuses llama.cpp's vendored
-# <nlohmann/json.hpp> (propagated by the common helpers library) so it has no
-# extra dependency beyond what the backend already builds against.
-option(LLAMA_GRPC_BUILD_TESTS "Build grpc-server unit tests" OFF)
-if(LLAMA_GRPC_BUILD_TESTS)
-    enable_testing()
-    add_executable(message_content_test message_content_test.cpp message_content.h)
-    target_include_directories(message_content_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
-    target_link_libraries(message_content_test PRIVATE ${_LLAMA_COMMON_TARGET})
-    target_compile_features(message_content_test PRIVATE cxx_std_17)
-    add_test(NAME message_content_test COMMAND message_content_test)
-endif()
--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@@ -1,5 +1,5 @@

-LLAMA_VERSION?=9d5d882d8cd0f0a9283d87ed5e6fe3ee0d925fb1
+LLAMA_VERSION?=8be759e6f70d629638a7eb70db3824cbdcea370b
 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp

 CMAKE_ARGS?=
--- a/backend/cpp/llama-cpp/grpc-server.cpp
+++ b/backend/cpp/llama-cpp/grpc-server.cpp
@@ -39,7 +39,6 @@
 #include "common.h"
 #include "arg.h"
 #include "chat-auto-parser.h"
-#include "message_content.h"
 #include <getopt.h>
 #include <grpcpp/ext/proto_server_reflection_plugin.h>
 #include <grpcpp/grpcpp.h>
@@ -1617,20 +1616,242 @@ public:

                for (int i = 0; i < request->messages_size(); i++) {
                    const auto& msg = request->messages(i);
-                    llama_grpc::ReconstructedMessageInput rin;
-                    rin.role = msg.role();
-                    rin.content = msg.content();
-                    rin.name = msg.name();
-                    rin.tool_call_id = msg.tool_call_id();
-                    rin.reasoning_content = msg.reasoning_content();
-                    rin.tool_calls = msg.tool_calls();
-                    rin.is_last_user_msg = (i == last_user_msg_idx);
-                    if (rin.is_last_user_msg) {
-                        for (int j = 0; j < request->images_size(); j++) rin.images.push_back(request->images(j));
-                        for (int j = 0; j < request->audios_size(); j++) rin.audios.push_back(request->audios(j));
-                        for (int j = 0; j < request->videos_size(); j++) rin.videos.push_back(request->videos(j));
+                    json msg_json;
+                    msg_json["role"] = msg.role();
+
+                    bool is_last_user_msg = (i == last_user_msg_idx);
+                    bool has_images_or_audio = (request->images_size() > 0 || request->audios_size() > 0 || request->videos_size() > 0);
+
+                    // Handle content - can be string, null, or array
+                    // For multimodal content, we'll embed images/audio from separate fields
+                    if (!msg.content().empty()) {
+                        // Try to parse content as JSON to see if it's already an array
+                        json content_val;
+                        try {
+                            content_val = json::parse(msg.content());
+                            // Handle null values - convert to empty string to avoid template errors
+                            if (content_val.is_null()) {
+                                content_val = "";
+                            }
+                        } catch (const json::parse_error&) {
+                            // Not JSON, treat as plain string
+                            content_val = msg.content();
+                        }
+
+                        // If content is an object (e.g., from tool call failures), convert to string
+                        if (content_val.is_object()) {
+                            content_val = content_val.dump();
+                        }
+
+                        // If content is a string and this is the last user message with images/audio, combine them
+                        if (content_val.is_string() && is_last_user_msg && has_images_or_audio) {
+                            json content_array = json::array();
+                            // Add text first
+                            content_array.push_back({{"type", "text"}, {"text", content_val.get<std::string>()}});
+                            // Add images
+                            if (request->images_size() > 0) {
+                                for (int j = 0; j < request->images_size(); j++) {
+                                    json image_chunk;
+                                    image_chunk["type"] = "image_url";
+                                    json image_url;
+                                    image_url["url"] = "data:image/jpeg;base64," + request->images(j);
+                                    image_chunk["image_url"] = image_url;
+                                    content_array.push_back(image_chunk);
+                                }
+                            }
+                            // Add audios
+                            if (request->audios_size() > 0) {
+                                for (int j = 0; j < request->audios_size(); j++) {
+                                    json audio_chunk;
+                                    audio_chunk["type"] = "input_audio";
+                                    json input_audio;
+                                    input_audio["data"] = request->audios(j);
+                                    input_audio["format"] = "wav"; // default, could be made configurable
+                                    audio_chunk["input_audio"] = input_audio;
+                                    content_array.push_back(audio_chunk);
+                                }
+                            }
+                            if (request->videos_size() > 0) {
+                                for (int j = 0; j < request->videos_size(); j++) {
+                                    json video_chunk;
+                                    video_chunk["type"] = "input_video";
+                                    json input_video;
+                                    input_video["data"] = request->videos(j);
+                                    video_chunk["input_video"] = input_video;
+                                    content_array.push_back(video_chunk);
+                                }
+                            }
+                            msg_json["content"] = content_array;
+                        } else {
+                            // Use content as-is (already array or not last user message)
+                            // Ensure null values are converted to empty string
+                            if (content_val.is_null()) {
+                                msg_json["content"] = "";
+                            } else {
+                                msg_json["content"] = content_val;
+                            }
+                        }
+                    } else if (is_last_user_msg && has_images_or_audio) {
+                        // If no content but this is the last user message with images/audio, create content array
+                        json content_array = json::array();
+                        if (request->images_size() > 0) {
+                            for (int j = 0; j < request->images_size(); j++) {
+                                json image_chunk;
+                                image_chunk["type"] = "image_url";
+                                json image_url;
+                                image_url["url"] = "data:image/jpeg;base64," + request->images(j);
+                                image_chunk["image_url"] = image_url;
+                                content_array.push_back(image_chunk);
+                            }
+                        }
+                        if (request->audios_size() > 0) {
+                            for (int j = 0; j < request->audios_size(); j++) {
+                                json audio_chunk;
+                                audio_chunk["type"] = "input_audio";
+                                json input_audio;
+                                input_audio["data"] = request->audios(j);
+                                input_audio["format"] = "wav"; // default, could be made configurable
+                                audio_chunk["input_audio"] = input_audio;
+                                content_array.push_back(audio_chunk);
+                            }
+                        }
+                        if (request->videos_size() > 0) {
+                            for (int j = 0; j < request->videos_size(); j++) {
+                                json video_chunk;
+                                video_chunk["type"] = "input_video";
+                                json input_video;
+                                input_video["data"] = request->videos(j);
+                                video_chunk["input_video"] = input_video;
+                                content_array.push_back(video_chunk);
+                            }
+                        }
+                        msg_json["content"] = content_array;
+                    } else if (msg.role() == "tool") {
+                        // Tool role messages must have content field set, even if empty
+                        // Jinja templates expect content to be a string, not null or object
+                        SRV_INF("[CONTENT DEBUG] PredictStream: Message %d is tool role, content_empty=%d\n", i, msg.content().empty() ? 1 : 0);
+                        if (msg.content().empty()) {
+                            msg_json["content"] = "";
+                            SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): empty content, set to empty string\n", i);
+                        } else {
+                            SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): content exists: %s\n",
+                                    i, msg.content().substr(0, std::min<size_t>(200, msg.content().size())).c_str());
+                            // Content exists, parse and ensure it's a string
+                            json content_val;
+                            try {
+                                content_val = json::parse(msg.content());
+                                SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): parsed JSON, type=%s\n",
+                                        i, content_val.is_null() ? "null" :
+                                           content_val.is_object() ? "object" :
+                                           content_val.is_string() ? "string" :
+                                           content_val.is_array() ? "array" : "other");
+                                // Handle null values - Jinja templates expect content to be a string, not null
+                                if (content_val.is_null()) {
+                                    msg_json["content"] = "";
+                                    SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): null content, converted to empty string\n", i);
+                                } else if (content_val.is_object()) {
+                                    // If content is an object (e.g., from tool call failures/errors), convert to string
+                                    msg_json["content"] = content_val.dump();
+                                    SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): object content, converted to string: %s\n",
+                                            i, content_val.dump().substr(0, std::min<size_t>(200, content_val.dump().size())).c_str());
+                                } else if (content_val.is_string()) {
+                                    msg_json["content"] = content_val.get<std::string>();
+                                    SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): string content, using as-is\n", i);
+                                } else {
+                                    // For arrays or other types, convert to string
+                                    msg_json["content"] = content_val.dump();
+                                    SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): %s content, converted to string\n",
+                                            i, content_val.is_array() ? "array" : "other type");
+                                }
+                            } catch (const json::parse_error&) {
+                                // Not JSON, treat as plain string
+                                msg_json["content"] = msg.content();
+                                SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): not JSON, using as string\n", i);
+                            }
+                        }
+                    } else {
+                        // Ensure all messages have content set (fallback for any unhandled cases)
+                        // Jinja templates expect content to be present, default to empty string if not set
+                        if (!msg_json.contains("content")) {
+                            SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (role=%s): no content field, adding empty string\n",
+                                    i, msg.role().c_str());
+                            msg_json["content"] = "";
+                        }
                    }
-                    messages_json.push_back(llama_grpc::build_reconstructed_message(rin));
+
+                    // Add optional fields for OpenAI-compatible message format
+                    if (!msg.name().empty()) {
+                        msg_json["name"] = msg.name();
+                    }
+                    if (!msg.tool_call_id().empty()) {
+                        msg_json["tool_call_id"] = msg.tool_call_id();
+                    }
+                    if (!msg.reasoning_content().empty()) {
+                        msg_json["reasoning_content"] = msg.reasoning_content();
+                    }
+                    if (!msg.tool_calls().empty()) {
+                        // Parse tool_calls JSON string and add to message
+                        try {
+                            json tool_calls = json::parse(msg.tool_calls());
+                            msg_json["tool_calls"] = tool_calls;
+                            SRV_INF("[TOOL CALLS DEBUG] PredictStream: Message %d has tool_calls: %s\n", i, tool_calls.dump().c_str());
+                            // IMPORTANT: If message has tool_calls but content is empty or not set,
+                            // set content to space " " instead of empty string "", because llama.cpp's
+                            // common_chat_msgs_to_json_oaicompat converts empty strings to null (line 312),
+                            // which causes template errors when accessing message.content[:tool_start_length]
+                            if (!msg_json.contains("content") || (msg_json.contains("content") && msg_json["content"].is_string() && msg_json["content"].get<std::string>().empty())) {
+                                SRV_INF("[CONTENT DEBUG] PredictStream: Message %d has tool_calls but empty content, setting to space\n", i);
+                                msg_json["content"] = " ";
+                            }
+                            // Log each tool call with name and arguments
+                            if (tool_calls.is_array()) {
+                                for (size_t tc_idx = 0; tc_idx < tool_calls.size(); tc_idx++) {
+                                    const auto& tc = tool_calls[tc_idx];
+                                    std::string tool_name = "unknown";
+                                    std::string tool_args = "{}";
+                                    if (tc.contains("function")) {
+                                        const auto& func = tc["function"];
+                                        if (func.contains("name")) {
+                                            tool_name = func["name"].get<std::string>();
+                                        }
+                                        if (func.contains("arguments")) {
+                                            tool_args = func["arguments"].is_string() ?
+                                                func["arguments"].get<std::string>() :
+                                                func["arguments"].dump();
+                                        }
+                                    } else if (tc.contains("name")) {
+                                        tool_name = tc["name"].get<std::string>();
+                                        if (tc.contains("arguments")) {
+                                            tool_args = tc["arguments"].is_string() ?
+                                                tc["arguments"].get<std::string>() :
+                                                tc["arguments"].dump();
+                                        }
+                                    }
+                                    SRV_INF("[TOOL CALLS DEBUG] PredictStream: Message %d, tool_call %zu: name=%s, arguments=%s\n",
+                                            i, tc_idx, tool_name.c_str(), tool_args.c_str());
+                                }
+                            }
+                        } catch (const json::parse_error& e) {
+                            SRV_WRN("Failed to parse tool_calls JSON: %s\n", e.what());
+                        }
+                    }
+
+                    // Debug: Log final content state before adding to array
+                    if (msg_json.contains("content")) {
+                        if (msg_json["content"].is_null()) {
+                            SRV_INF("[CONTENT DEBUG] PredictStream: Message %d FINAL STATE: content is NULL - THIS WILL CAUSE ERROR!\n", i);
+                        } else {
+                            SRV_INF("[CONTENT DEBUG] PredictStream: Message %d FINAL STATE: content type=%s, has_value=%d\n",
+                                    i, msg_json["content"].is_string() ? "string" :
+                                       msg_json["content"].is_array() ? "array" :
+                                       msg_json["content"].is_object() ? "object" : "other",
+                                    msg_json["content"].is_null() ? 0 : 1);
+                        }
+                    } else {
+                        SRV_INF("[CONTENT DEBUG] PredictStream: Message %d FINAL STATE: NO CONTENT FIELD - THIS WILL CAUSE ERROR!\n", i);
+                    }
+
+                    messages_json.push_back(msg_json);
                }

                // Final safety check: Ensure no message has null content (Jinja templates require strings)
@@ -1851,7 +2072,36 @@ public:
                if (body_json.contains("messages") && body_json["messages"].is_array()) {
                    SRV_INF("[CONTENT DEBUG] PredictStream: Before oaicompat_chat_params_parse - checking %zu messages\n", body_json["messages"].size());
                    for (size_t idx = 0; idx < body_json["messages"].size(); idx++) {
-                        llama_grpc::normalize_template_message(body_json["messages"][idx]);
+                        auto& msg = body_json["messages"][idx];
+                        std::string role_str = msg.contains("role") ? msg["role"].get<std::string>() : "unknown";
+                        if (msg.contains("content")) {
+                            if (msg["content"].is_null()) {
+                                SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s) has NULL content - FIXING!\n", idx, role_str.c_str());
+                                msg["content"] = ""; // Fix null content
+                            } else if (role_str == "tool" && msg["content"].is_array()) {
+                                // Tool messages must have string content, not array
+                                // oaicompat_chat_params_parse expects tool messages to have string content
+                                SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=tool) has array content, converting to string\n", idx);
+                                msg["content"] = msg["content"].dump();
+                            } else if (!msg["content"].is_string() && !msg["content"].is_array()) {
+                                // If content is object or other non-string type, convert to string for templates
+                                SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s) content is not string/array, converting\n", idx, role_str.c_str());
+                                if (msg["content"].is_object()) {
+                                    msg["content"] = msg["content"].dump();
+                                } else {
+                                    msg["content"] = "";
+                                }
+                            } else {
+                                SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s): content type=%s\n",
+                                        idx, role_str.c_str(),
+                                        msg["content"].is_string() ? "string" :
+                                        msg["content"].is_array() ? "array" :
+                                        msg["content"].is_object() ? "object" : "other");
+                            }
+                        } else {
+                            SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s) MISSING content field - ADDING!\n", idx, role_str.c_str());
+                            msg["content"] = ""; // Add missing content
+                        }
                    }
                }

@@ -2183,20 +2433,264 @@ public:
                SRV_INF("[CONTENT DEBUG] Predict: Processing %d messages\n", request->messages_size());
                for (int i = 0; i < request->messages_size(); i++) {
                    const auto& msg = request->messages(i);
-                    llama_grpc::ReconstructedMessageInput rin;
-                    rin.role = msg.role();
-                    rin.content = msg.content();
-                    rin.name = msg.name();
-                    rin.tool_call_id = msg.tool_call_id();
-                    rin.reasoning_content = msg.reasoning_content();
-                    rin.tool_calls = msg.tool_calls();
-                    rin.is_last_user_msg = (i == last_user_msg_idx);
-                    if (rin.is_last_user_msg) {
-                        for (int j = 0; j < request->images_size(); j++) rin.images.push_back(request->images(j));
-                        for (int j = 0; j < request->audios_size(); j++) rin.audios.push_back(request->audios(j));
-                        for (int j = 0; j < request->videos_size(); j++) rin.videos.push_back(request->videos(j));
+                    json msg_json;
+                    msg_json["role"] = msg.role();
+
+                    SRV_INF("[CONTENT DEBUG] Predict: Message %d: role=%s, content_empty=%d, content_length=%zu\n",
+                            i, msg.role().c_str(), msg.content().empty() ? 1 : 0, msg.content().size());
+                    if (!msg.content().empty()) {
+                        SRV_INF("[CONTENT DEBUG] Predict: Message %d content (first 200 chars): %s\n",
+                                i, msg.content().substr(0, std::min<size_t>(200, msg.content().size())).c_str());
                    }
-                    messages_json.push_back(llama_grpc::build_reconstructed_message(rin));
+
+                    bool is_last_user_msg = (i == last_user_msg_idx);
+                    bool has_images_or_audio = (request->images_size() > 0 || request->audios_size() > 0 || request->videos_size() > 0);
+
+                    // Handle content - can be string, null, or array
+                    // For multimodal content, we'll embed images/audio from separate fields
+                    if (!msg.content().empty()) {
+                        // Try to parse content as JSON to see if it's already an array
+                        json content_val;
+                        try {
+                            content_val = json::parse(msg.content());
+                            // Handle null values - convert to empty string to avoid template errors
+                            if (content_val.is_null()) {
+                                SRV_INF("[CONTENT DEBUG] Predict: Message %d parsed JSON is null, converting to empty string\n", i);
+                                content_val = "";
+                            }
+                        } catch (const json::parse_error&) {
+                            // Not JSON, treat as plain string
+                            content_val = msg.content();
+                        }
+
+                        // If content is an object (e.g., from tool call failures), convert to string
+                        if (content_val.is_object()) {
+                            SRV_INF("[CONTENT DEBUG] Predict: Message %d content is object, converting to string\n", i);
+                            content_val = content_val.dump();
+                        }
+
+                        // If content is a string and this is the last user message with images/audio, combine them
+                        if (content_val.is_string() && is_last_user_msg && has_images_or_audio) {
+                            json content_array = json::array();
+                            // Add text first
+                            content_array.push_back({{"type", "text"}, {"text", content_val.get<std::string>()}});
+                            // Add images
+                            if (request->images_size() > 0) {
+                                for (int j = 0; j < request->images_size(); j++) {
+                                    json image_chunk;
+                                    image_chunk["type"] = "image_url";
+                                    json image_url;
+                                    image_url["url"] = "data:image/jpeg;base64," + request->images(j);
+                                    image_chunk["image_url"] = image_url;
+                                    content_array.push_back(image_chunk);
+                                }
+                            }
+                            // Add audios
+                            if (request->audios_size() > 0) {
+                                for (int j = 0; j < request->audios_size(); j++) {
+                                    json audio_chunk;
+                                    audio_chunk["type"] = "input_audio";
+                                    json input_audio;
+                                    input_audio["data"] = request->audios(j);
+                                    input_audio["format"] = "wav"; // default, could be made configurable
+                                    audio_chunk["input_audio"] = input_audio;
+                                    content_array.push_back(audio_chunk);
+                                }
+                            }
+                            if (request->videos_size() > 0) {
+                                for (int j = 0; j < request->videos_size(); j++) {
+                                    json video_chunk;
+                                    video_chunk["type"] = "input_video";
+                                    json input_video;
+                                    input_video["data"] = request->videos(j);
+                                    video_chunk["input_video"] = input_video;
+                                    content_array.push_back(video_chunk);
+                                }
+                            }
+                            msg_json["content"] = content_array;
+                        } else {
+                            // Use content as-is (already array or not last user message)
+                            // Ensure null values are converted to empty string
+                            if (content_val.is_null()) {
+                                SRV_INF("[CONTENT DEBUG] Predict: Message %d content_val was null, setting to empty string\n", i);
+                                msg_json["content"] = "";
+                            } else {
+                                msg_json["content"] = content_val;
+                                SRV_INF("[CONTENT DEBUG] Predict: Message %d content set, type=%s\n",
+                                        i, content_val.is_string() ? "string" :
+                                           content_val.is_array() ? "array" :
+                                           content_val.is_object() ? "object" : "other");
+                            }
+                        }
+                    } else if (is_last_user_msg && has_images_or_audio) {
+                        // If no content but this is the last user message with images/audio, create content array
+                        json content_array = json::array();
+                        if (request->images_size() > 0) {
+                            for (int j = 0; j < request->images_size(); j++) {
+                                json image_chunk;
+                                image_chunk["type"] = "image_url";
+                                json image_url;
+                                image_url["url"] = "data:image/jpeg;base64," + request->images(j);
+                                image_chunk["image_url"] = image_url;
+                                content_array.push_back(image_chunk);
+                            }
+                        }
+                        if (request->audios_size() > 0) {
+                            for (int j = 0; j < request->audios_size(); j++) {
+                                json audio_chunk;
+                                audio_chunk["type"] = "input_audio";
+                                json input_audio;
+                                input_audio["data"] = request->audios(j);
+                                input_audio["format"] = "wav"; // default, could be made configurable
+                                audio_chunk["input_audio"] = input_audio;
+                                content_array.push_back(audio_chunk);
+                            }
+                        }
+                        if (request->videos_size() > 0) {
+                            for (int j = 0; j < request->videos_size(); j++) {
+                                json video_chunk;
+                                video_chunk["type"] = "input_video";
+                                json input_video;
+                                input_video["data"] = request->videos(j);
+                                video_chunk["input_video"] = input_video;
+                                content_array.push_back(video_chunk);
+                            }
+                        }
+                        msg_json["content"] = content_array;
+                        SRV_INF("[CONTENT DEBUG] Predict: Message %d created content array with media\n", i);
+                    } else if (!msg.tool_calls().empty()) {
+                        // Tool call messages may have null content, but templates expect string
+                        // IMPORTANT: Set to space " " instead of empty string "", because llama.cpp's
+                        // common_chat_msgs_to_json_oaicompat converts empty strings to null (line 312),
+                        // which causes template errors when accessing message.content[:tool_start_length]
+                        SRV_INF("[CONTENT DEBUG] Predict: Message %d has tool_calls, setting content to space (not empty string)\n", i);
+                        msg_json["content"] = " ";
+                    } else if (msg.role() == "tool") {
+                        // Tool role messages must have content field set, even if empty
+                        // Jinja templates expect content to be a string, not null or object
+                        SRV_INF("[CONTENT DEBUG] Predict: Message %d is tool role, content_empty=%d\n", i, msg.content().empty() ? 1 : 0);
+                        if (msg.content().empty()) {
+                            msg_json["content"] = "";
+                            SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): empty content, set to empty string\n", i);
+                        } else {
+                            SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): content exists: %s\n",
+                                    i, msg.content().substr(0, std::min<size_t>(200, msg.content().size())).c_str());
+                            // Content exists, parse and ensure it's a string
+                            json content_val;
+                            try {
+                                content_val = json::parse(msg.content());
+                                SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): parsed JSON, type=%s\n",
+                                        i, content_val.is_null() ? "null" :
+                                           content_val.is_object() ? "object" :
+                                           content_val.is_string() ? "string" :
+                                           content_val.is_array() ? "array" : "other");
+                                // Handle null values - Jinja templates expect content to be a string, not null
+                                if (content_val.is_null()) {
+                                    msg_json["content"] = "";
+                                    SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): null content, converted to empty string\n", i);
+                                } else if (content_val.is_object()) {
+                                    // If content is an object (e.g., from tool call failures/errors), convert to string
+                                    msg_json["content"] = content_val.dump();
+                                    SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): object content, converted to string: %s\n",
+                                            i, content_val.dump().substr(0, std::min<size_t>(200, content_val.dump().size())).c_str());
+                                } else if (content_val.is_string()) {
+                                    msg_json["content"] = content_val.get<std::string>();
+                                    SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): string content, using as-is\n", i);
+                                } else {
+                                    // For arrays or other types, convert to string
+                                    msg_json["content"] = content_val.dump();
+                                    SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): %s content, converted to string\n",
+                                            i, content_val.is_array() ? "array" : "other type");
+                                }
+                            } catch (const json::parse_error&) {
+                                // Not JSON, treat as plain string
+                                msg_json["content"] = msg.content();
+                                SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): not JSON, using as string\n", i);
+                            }
+                        }
+                    } else {
+                        // Ensure all messages have content set (fallback for any unhandled cases)
+                        // Jinja templates expect content to be present, default to empty string if not set
+                        if (!msg_json.contains("content")) {
+                            SRV_INF("[CONTENT DEBUG] Predict: Message %d (role=%s): no content field, adding empty string\n",
+                                    i, msg.role().c_str());
+                            msg_json["content"] = "";
+                        }
+                    }
+
+                    // Add optional fields for OpenAI-compatible message format
+                    if (!msg.name().empty()) {
+                        msg_json["name"] = msg.name();
+                    }
+                    if (!msg.tool_call_id().empty()) {
+                        msg_json["tool_call_id"] = msg.tool_call_id();
+                    }
+                    if (!msg.reasoning_content().empty()) {
+                        msg_json["reasoning_content"] = msg.reasoning_content();
+                    }
+                    if (!msg.tool_calls().empty()) {
+                        // Parse tool_calls JSON string and add to message
+                        try {
+                            json tool_calls = json::parse(msg.tool_calls());
+                            msg_json["tool_calls"] = tool_calls;
+                            SRV_INF("[TOOL CALLS DEBUG] Predict: Message %d has tool_calls: %s\n", i, tool_calls.dump().c_str());
+                            // IMPORTANT: If message has tool_calls but content is empty or not set,
+                            // set content to space " " instead of empty string "", because llama.cpp's
+                            // common_chat_msgs_to_json_oaicompat converts empty strings to null (line 312),
+                            // which causes template errors when accessing message.content[:tool_start_length]
+                            if (!msg_json.contains("content") || (msg_json.contains("content") && msg_json["content"].is_string() && msg_json["content"].get<std::string>().empty())) {
+                                SRV_INF("[CONTENT DEBUG] Predict: Message %d has tool_calls but empty content, setting to space\n", i);
+                                msg_json["content"] = " ";
+                            }
+                            // Log each tool call with name and arguments
+                            if (tool_calls.is_array()) {
+                                for (size_t tc_idx = 0; tc_idx < tool_calls.size(); tc_idx++) {
+                                    const auto& tc = tool_calls[tc_idx];
+                                    std::string tool_name = "unknown";
+                                    std::string tool_args = "{}";
+                                    if (tc.contains("function")) {
+                                        const auto& func = tc["function"];
+                                        if (func.contains("name")) {
+                                            tool_name = func["name"].get<std::string>();
+                                        }
+                                        if (func.contains("arguments")) {
+                                            tool_args = func["arguments"].is_string() ?
+                                                func["arguments"].get<std::string>() :
+                                                func["arguments"].dump();
+                                        }
+                                    } else if (tc.contains("name")) {
+                                        tool_name = tc["name"].get<std::string>();
+                                        if (tc.contains("arguments")) {
+                                            tool_args = tc["arguments"].is_string() ?
+                                                tc["arguments"].get<std::string>() :
+                                                tc["arguments"].dump();
+                                        }
+                                    }
+                                    SRV_INF("[TOOL CALLS DEBUG] Predict: Message %d, tool_call %zu: name=%s, arguments=%s\n",
+                                            i, tc_idx, tool_name.c_str(), tool_args.c_str());
+                                }
+                            }
+                        } catch (const json::parse_error& e) {
+                            SRV_WRN("Failed to parse tool_calls JSON: %s\n", e.what());
+                        }
+                    }
+
+                    // Debug: Log final content state before adding to array
+                    if (msg_json.contains("content")) {
+                        if (msg_json["content"].is_null()) {
+                            SRV_INF("[CONTENT DEBUG] Predict: Message %d FINAL STATE: content is NULL - THIS WILL CAUSE ERROR!\n", i);
+                        } else {
+                            SRV_INF("[CONTENT DEBUG] Predict: Message %d FINAL STATE: content type=%s, has_value=%d\n",
+                                    i, msg_json["content"].is_string() ? "string" :
+                                       msg_json["content"].is_array() ? "array" :
+                                       msg_json["content"].is_object() ? "object" : "other",
+                                    msg_json["content"].is_null() ? 0 : 1);
+                        }
+                    } else {
+                        SRV_INF("[CONTENT DEBUG] Predict: Message %d FINAL STATE: NO CONTENT FIELD - THIS WILL CAUSE ERROR!\n", i);
+                    }
+
+                    messages_json.push_back(msg_json);
                }

                // Final safety check: Ensure no message has null content (Jinja templates require strings)
@@ -2417,7 +2911,36 @@ public:
                if (body_json.contains("messages") && body_json["messages"].is_array()) {
                    SRV_INF("[CONTENT DEBUG] Predict: Before oaicompat_chat_params_parse - checking %zu messages\n", body_json["messages"].size());
                    for (size_t idx = 0; idx < body_json["messages"].size(); idx++) {
-                        llama_grpc::normalize_template_message(body_json["messages"][idx]);
+                        auto& msg = body_json["messages"][idx];
+                        std::string role_str = msg.contains("role") ? msg["role"].get<std::string>() : "unknown";
+                        if (msg.contains("content")) {
+                            if (msg["content"].is_null()) {
+                                SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s) has NULL content - FIXING!\n", idx, role_str.c_str());
+                                msg["content"] = ""; // Fix null content
+                            } else if (role_str == "tool" && msg["content"].is_array()) {
+                                // Tool messages must have string content, not array
+                                // oaicompat_chat_params_parse expects tool messages to have string content
+                                SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=tool) has array content, converting to string\n", idx);
+                                msg["content"] = msg["content"].dump();
+                            } else if (!msg["content"].is_string() && !msg["content"].is_array()) {
+                                // If content is object or other non-string type, convert to string for templates
+                                SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s) content is not string/array, converting\n", idx, role_str.c_str());
+                                if (msg["content"].is_object()) {
+                                    msg["content"] = msg["content"].dump();
+                                } else {
+                                    msg["content"] = "";
+                                }
+                            } else {
+                                SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s): content type=%s\n",
+                                        idx, role_str.c_str(),
+                                        msg["content"].is_string() ? "string" :
+                                        msg["content"].is_array() ? "array" :
+                                        msg["content"].is_object() ? "object" : "other");
+                            }
+                        } else {
+                            SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s) MISSING content field - ADDING!\n", idx, role_str.c_str());
+                            msg["content"] = ""; // Add missing content
+                        }
                    }
                }

--- a/backend/cpp/llama-cpp/message_content.h
+++ b/backend/cpp/llama-cpp/message_content.h
@@ -1,192 +0,0 @@
-#pragma once
-
-#include <string>
-#include <vector>
-
-#include <nlohmann/json.hpp>
-
-namespace llama_grpc {
-
-// Normalizes a proto message's content string into the JSON value used when
-// reconstructing OpenAI-format messages for the tokenizer (jinja) template.
-//
-// Shared by the streaming (PredictStream) and non-streaming (Predict) message
-// reconstruction paths so the two cannot drift.
-//
-// LocalAI's Go layer (schema.Messages.ToProto) always sends content as a plain
-// text string; multimodal media travels in separate proto fields, never inside
-// content. So user/system/developer content is *only ever* opaque text and must
-// NOT be JSON-sniffed: a prompt that merely looks like JSON (e.g. an ingredient
-// list ["1/4 cup sugar", ...]) would otherwise be reinterpreted as structured
-// content parts and rejected by oaicompat_chat_params_parse with
-// "unsupported content[].type" (https://github.com/mudler/LocalAI/issues/10524).
-// (developer is OpenAI's modern system alias - same "human-authored text" nature.)
-//
-// For assistant/tool messages we still collapse a literal JSON null/object
-// (tool-call bookkeeping) to a string, but we never turn a plain string into an
-// array/scalar. The array defense is therefore role-independent (arrays/scalars
-// fall through for every role); the role gate only governs the null/object case.
-inline nlohmann::ordered_json normalize_message_content(const std::string& role,
-                                                        const std::string& content) {
-    nlohmann::ordered_json content_val = content;
-    if (role != "user" && role != "system" && role != "developer") {
-        try {
-            nlohmann::ordered_json parsed = nlohmann::ordered_json::parse(content);
-            if (parsed.is_null()) {
-                content_val = "";
-            } else if (parsed.is_object()) {
-                content_val = parsed.dump();
-            }
-            // arrays / scalars: keep the original plain-text string as-is
-        } catch (const nlohmann::ordered_json::parse_error&) {
-            // Not JSON, already the plain string
-        }
-    }
-    return content_val;
-}
-
-// Final safety pass applied to each reconstructed OpenAI message right before it
-// is handed to oaicompat_chat_params_parse (jinja templating). Jinja templates
-// assume content is a string: a literal null breaks slicing such as
-// message.content[:N] (#7324), and a tool message with array content is rejected
-// (#7528). A multimodal user message legitimately carries a typed-part array
-// ({type:text}, {type:image_url}, ...), which must be left intact. Shared by the
-// streaming and non-streaming paths so this invariant cannot drift between them.
-inline void normalize_template_message(nlohmann::ordered_json& msg) {
-    if (!msg.contains("content")) {
-        msg["content"] = ""; // templates expect the field to exist
-        return;
-    }
-    nlohmann::ordered_json& content = msg["content"];
-    const std::string role = (msg.contains("role") && msg["role"].is_string())
-                                 ? msg["role"].get<std::string>()
-                                 : std::string();
-    if (content.is_null()) {
-        content = ""; // #7324: null would crash content[:N] slicing
-    } else if (role == "tool" && content.is_array()) {
-        content = content.dump(); // #7528: tool messages must have string content
-    } else if (!content.is_string() && !content.is_array()) {
-        if (content.is_object()) {
-            content = content.dump(); // tool-call bookkeeping object -> string
-        } else {
-            content = ""; // other scalar (number/bool) -> empty
-        }
-    }
-    // string, or a non-tool (multimodal) typed-part array: leave untouched
-}
-
-// One proto message's data, flattened to plain types so the reconstruction logic
-// can be shared and unit-tested without protobuf. The streaming and non-streaming
-// predict paths both populate this from proto::Message + the request's media.
-struct ReconstructedMessageInput {
-    std::string role;
-    std::string content;            // proto.Message.content (always a plain string)
-    std::string name;
-    std::string tool_call_id;
-    std::string reasoning_content;
-    std::string tool_calls;         // tool_calls as a JSON string, or empty
-    bool is_last_user_msg = false;  // attach request media to this message
-    std::vector<std::string> images; // base64 (jpeg)
-    std::vector<std::string> audios; // base64 (wav)
-    std::vector<std::string> videos; // base64
-};
-
-// Appends the request's media as OpenAI typed content parts. Imperative (not
-// brace-init) to avoid nlohmann's object-vs-array initializer-list ambiguity.
-inline void append_media_parts(nlohmann::ordered_json& content_array,
-                               const std::vector<std::string>& images,
-                               const std::vector<std::string>& audios,
-                               const std::vector<std::string>& videos) {
-    for (const auto& img : images) {
-        nlohmann::ordered_json image_chunk;
-        image_chunk["type"] = "image_url";
-        nlohmann::ordered_json image_url;
-        image_url["url"] = "data:image/jpeg;base64," + img;
-        image_chunk["image_url"] = image_url;
-        content_array.push_back(image_chunk);
-    }
-    for (const auto& aud : audios) {
-        nlohmann::ordered_json audio_chunk;
-        audio_chunk["type"] = "input_audio";
-        nlohmann::ordered_json input_audio;
-        input_audio["data"] = aud;
-        input_audio["format"] = "wav"; // default; could be made configurable
-        audio_chunk["input_audio"] = input_audio;
-        content_array.push_back(audio_chunk);
-    }
-    for (const auto& vid : videos) {
-        nlohmann::ordered_json video_chunk;
-        video_chunk["type"] = "input_video";
-        nlohmann::ordered_json input_video;
-        input_video["data"] = vid;
-        video_chunk["input_video"] = input_video;
-        content_array.push_back(video_chunk);
-    }
-}
-
-// Reconstructs a single OpenAI-format message (the object fed to
-// oaicompat_chat_params_parse) from a proto message. Shared by PredictStream and
-// Predict so the content/multimodal/tool_calls handling cannot drift between the
-// two stream modes (it previously lived as two ~150-line copies with a redundant
-// Predict-only tool_calls->" " branch). Guarantees content is always a string or
-// a typed-part array, never null/missing.
-inline nlohmann::ordered_json build_reconstructed_message(const ReconstructedMessageInput& in) {
-    nlohmann::ordered_json msg_json;
-    msg_json["role"] = in.role;
-    const bool has_media = !in.images.empty() || !in.audios.empty() || !in.videos.empty();
-
-    if (!in.content.empty()) {
-        nlohmann::ordered_json content_val = normalize_message_content(in.role, in.content);
-        if (content_val.is_string() && in.is_last_user_msg && has_media) {
-            // Last user message + media: build a typed-part array (text first).
-            nlohmann::ordered_json content_array = nlohmann::ordered_json::array();
-            nlohmann::ordered_json text_part;
-            text_part["type"] = "text";
-            text_part["text"] = content_val.get<std::string>();
-            content_array.push_back(text_part);
-            append_media_parts(content_array, in.images, in.audios, in.videos);
-            msg_json["content"] = content_array;
-        } else if (content_val.is_null()) {
-            msg_json["content"] = "";
-        } else {
-            msg_json["content"] = content_val;
-        }
-    } else if (in.is_last_user_msg && has_media) {
-        // No text but media on the last user message: media-only typed array.
-        nlohmann::ordered_json content_array = nlohmann::ordered_json::array();
-        append_media_parts(content_array, in.images, in.audios, in.videos);
-        msg_json["content"] = content_array;
-    } else {
-        // Empty content (any role, incl. tool/assistant): templates need a string.
-        msg_json["content"] = "";
-    }
-
-    if (!in.name.empty()) {
-        msg_json["name"] = in.name;
-    }
-    if (!in.tool_call_id.empty()) {
-        msg_json["tool_call_id"] = in.tool_call_id;
-    }
-    if (!in.reasoning_content.empty()) {
-        msg_json["reasoning_content"] = in.reasoning_content;
-    }
-    if (!in.tool_calls.empty()) {
-        try {
-            nlohmann::ordered_json tool_calls = nlohmann::ordered_json::parse(in.tool_calls);
-            msg_json["tool_calls"] = tool_calls;
-            // tool_calls + empty/blank content: use " " not "", because llama.cpp's
-            // common_chat_msgs_to_json_oaicompat turns "" into null, which breaks
-            // templates that slice message.content[:tool_start_length] (#7324).
-            if (!msg_json.contains("content") ||
-                (msg_json["content"].is_string() && msg_json["content"].get<std::string>().empty())) {
-                msg_json["content"] = " ";
-            }
-        } catch (const nlohmann::ordered_json::parse_error&) {
-            // Malformed tool_calls JSON: leave content as-is (prior behavior).
-        }
-    }
-
-    return msg_json;
-}
-
-}  // namespace llama_grpc
--- a/backend/cpp/llama-cpp/message_content_test.cpp
+++ b/backend/cpp/llama-cpp/message_content_test.cpp
@@ -1,234 +0,0 @@
-// Unit tests for the shared message-reconstruction helpers (message_content.h).
-//
-// Build & run standalone (nlohmann/json single header on the include path):
-//   g++ -std=c++17 -I<dir-with-nlohmann> message_content_test.cpp -o t && ./t
-// or via CMake: -DLLAMA_GRPC_BUILD_TESTS=ON then ctest.
-//
-// Regression coverage for:
-//   #10524 - a user/system prompt that is itself a JSON-array string must stay
-//            plain text, never be reinterpreted as OpenAI structured parts.
-//   #7324  - assistant/tool null content -> "" (templates slice content[:N]);
-//            assistant+tool_calls+empty content -> " " (not "", which becomes null).
-//   #7528  - tool message array content must reach the template as a string.
-//   multimodal - last user message text + media -> typed-part array, media kept.
-
-#include <cassert>
-#include <iostream>
-#include <string>
-
-#include "message_content.h"
-
-using nlohmann::ordered_json;
-using llama_grpc::normalize_message_content;
-using llama_grpc::normalize_template_message;
-using llama_grpc::build_reconstructed_message;
-using llama_grpc::ReconstructedMessageInput;
-
-static int failures = 0;
-
-static void check(bool ok, const std::string& name, const std::string& detail = "") {
-    if (!ok) {
-        std::cerr << "FAIL " << name << (detail.empty() ? "" : ": " + detail) << "\n";
-        failures++;
-    }
-}
-
-// ---- normalize_message_content -------------------------------------------
-
-static void expect_norm_string(const char* name, const std::string& role,
-                               const std::string& content, const std::string& want) {
-    auto got = normalize_message_content(role, content);
-    if (!got.is_string()) {
-        check(false, name, "expected a JSON string, got " +
-                               std::string(got.is_array() ? "array" : got.is_object() ? "object" : "other") +
-                               " (" + got.dump() + ")");
-        return;
-    }
-    check(got.get<std::string>() == want, name, "expected \"" + want + "\", got \"" + got.get<std::string>() + "\"");
-}
-
-static void test_normalize() {
-    const std::string ingredients = R"(["1/4 cup brown sugar, packed","1 pound ground beef"])";
-
-    // #10524 - JSON-array text must stay a string. Role-INDEPENDENT array defense.
-    for (const char* role : {"user", "system", "developer", "function", "assistant", "tool"}) {
-        expect_norm_string((std::string("json_array_stays_text:") + role).c_str(), role, ingredients, ingredients);
-    }
-
-    // #10524 - user/system/developer JSON-object text stays verbatim (NOT re-dumped).
-    expect_norm_string("user_json_object_verbatim", "user", R"({"a":1})", R"({"a":1})");
-    expect_norm_string("system_json_object_verbatim", "system", R"({"a":1})", R"({"a":1})");
-    expect_norm_string("developer_json_object_verbatim", "developer", R"({"a":1})", R"({"a":1})");
-
-    // Plain text unchanged for all roles.
-    expect_norm_string("user_plain_text", "user", "hello world", "hello world");
-    expect_norm_string("assistant_non_json_text_kept", "assistant", "hi [unclosed", "hi [unclosed");
-
-    // #7324 boundary - user/system/developer literal "null" preserved (never parsed).
-    expect_norm_string("user_literal_null_stays", "user", "null", "null");
-    expect_norm_string("system_literal_null_stays", "system", "null", "null");
-    expect_norm_string("developer_literal_null_stays", "developer", "null", "null");
-
-    // #7324 - assistant/tool literal null collapses to empty string.
-    expect_norm_string("assistant_null_to_empty", "assistant", "null", "");
-    expect_norm_string("tool_null_to_empty", "tool", "null", "");
-
-    // #7324/#7528 - assistant/tool object bookkeeping stringified (stays a string).
-    check(normalize_message_content("assistant", R"({"tool":"x"})").is_string(), "assistant_object_stringified");
-    check(normalize_message_content("tool", R"({"error":"boom"})").is_string(), "tool_object_stringified");
-
-    // #10524-family - a bare scalar that parses as a JSON number stays the string.
-    expect_norm_string("assistant_scalar_number_stays_string", "assistant", "42", "42");
-
-    // baseline - empty content stays empty.
-    expect_norm_string("user_empty_stays_empty", "user", "", "");
-}
-
-// ---- normalize_template_message (BEFORE TEMPLATE sanitizer) ---------------
-
-static void test_template_sanitizer() {
-    // #7528 - a tool message with an ACTUAL array becomes a string.
-    {
-        ordered_json msg = {{"role", "tool"}, {"content", ordered_json::array({{{"type", "text"}, {"text", "r"}}})}};
-        normalize_template_message(msg);
-        check(msg["content"].is_string(), "before_template_tool_array_to_string", "got " + msg["content"].dump());
-    }
-    // #7324 - null content -> "" for any role.
-    {
-        ordered_json msg = {{"role", "assistant"}, {"content", nullptr}};
-        normalize_template_message(msg);
-        check(msg["content"].is_string() && msg["content"] == "", "before_template_null_to_empty");
-    }
-    // object content -> dumped string (would otherwise throw at the template).
-    {
-        ordered_json msg = {{"role", "assistant"}, {"content", {{"x", 1}}}};
-        normalize_template_message(msg);
-        check(msg["content"].is_string(), "before_template_object_to_string", "got " + msg["content"].dump());
-    }
-    // missing content field -> "".
-    {
-        ordered_json msg = {{"role", "user"}};
-        normalize_template_message(msg);
-        check(msg.contains("content") && msg["content"] == "", "before_template_missing_to_empty");
-    }
-    // multimodal: a well-typed user array must be left UNTOUCHED (role!=tool).
-    {
-        ordered_json parts = ordered_json::array();
-        parts.push_back({{"type", "text"}, {"text", "x"}});
-        ordered_json img; img["type"] = "image_url"; img["image_url"] = {{"url", "data:..."}};
-        parts.push_back(img);
-        ordered_json msg = {{"role", "user"}, {"content", parts}};
-        normalize_template_message(msg);
-        check(msg["content"].is_array() && msg["content"].size() == 2, "before_template_user_typed_array_preserved",
-              "got " + msg["content"].dump());
-    }
-    // a plain string is left untouched.
-    {
-        ordered_json msg = {{"role", "user"}, {"content", "hello"}};
-        normalize_template_message(msg);
-        check(msg["content"] == "hello", "before_template_string_untouched");
-    }
-}
-
-// ---- build_reconstructed_message ----------------------------------------
-
-static void test_reconstruction() {
-    const std::string ingredients = R"(["1/4 cup brown sugar","1 pound ground beef"])";
-
-    // #10524 end-state - user JSON-array text, no media -> string content.
-    {
-        ReconstructedMessageInput in;
-        in.role = "user"; in.content = ingredients;
-        auto m = build_reconstructed_message(in);
-        check(m["content"].is_string() && m["content"] == ingredients, "recon_user_json_array_string",
-              "got " + m["content"].dump());
-    }
-    // multimodal - user text + one image on last user msg -> typed array, image kept.
-    {
-        ReconstructedMessageInput in;
-        in.role = "user"; in.content = ingredients; in.is_last_user_msg = true;
-        in.images.push_back("BASE64IMG");
-        auto m = build_reconstructed_message(in);
-        check(m["content"].is_array() && m["content"].size() == 2, "recon_multimodal_text_plus_image",
-              "got " + m["content"].dump());
-        check(m["content"][0]["type"] == "text" && m["content"][0]["text"] == ingredients, "recon_multimodal_text_first");
-        check(m["content"][1]["type"] == "image_url", "recon_multimodal_image_kept");
-    }
-    // multimodal media-only - empty text + image on last user msg.
-    {
-        ReconstructedMessageInput in;
-        in.role = "user"; in.content = ""; in.is_last_user_msg = true;
-        in.images.push_back("BASE64IMG");
-        auto m = build_reconstructed_message(in);
-        check(m["content"].is_array() && m["content"].size() == 1 && m["content"][0]["type"] == "image_url",
-              "recon_media_only", "got " + m["content"].dump());
-    }
-    // #7528 - tool array-string content stays a string.
-    {
-        ReconstructedMessageInput in;
-        in.role = "tool"; in.content = R"(["a","b"])"; in.tool_call_id = "call_1";
-        auto m = build_reconstructed_message(in);
-        check(m["content"].is_string() && m["content"] == R"(["a","b"])", "recon_tool_array_string",
-              "got " + m["content"].dump());
-        check(m["tool_call_id"] == "call_1", "recon_tool_call_id_set");
-    }
-    // tool empty content -> "".
-    {
-        ReconstructedMessageInput in;
-        in.role = "tool"; in.content = "";
-        auto m = build_reconstructed_message(in);
-        check(m["content"].is_string() && m["content"] == "", "recon_tool_empty_to_string");
-    }
-    // #7324 - assistant + tool_calls + empty content -> " " (single space, not "").
-    {
-        ReconstructedMessageInput in;
-        in.role = "assistant"; in.content = "";
-        in.tool_calls = R"([{"id":"c1","type":"function","function":{"name":"f","arguments":"{}"}}])";
-        auto m = build_reconstructed_message(in);
-        check(m["content"].is_string() && m["content"] == " ", "recon_toolcalls_empty_content_space",
-              "got " + m["content"].dump());
-        check(m["tool_calls"].is_array() && m["tool_calls"].size() == 1, "recon_toolcalls_parsed");
-    }
-    // assistant + tool_calls + real content keeps the content.
-    {
-        ReconstructedMessageInput in;
-        in.role = "assistant"; in.content = "I'll call f";
-        in.tool_calls = R"([{"id":"c1","type":"function","function":{"name":"f","arguments":"{}"}}])";
-        auto m = build_reconstructed_message(in);
-        check(m["content"] == "I'll call f", "recon_toolcalls_with_content_kept");
-    }
-    // assistant null content -> "".
-    {
-        ReconstructedMessageInput in;
-        in.role = "assistant"; in.content = "null";
-        auto m = build_reconstructed_message(in);
-        check(m["content"] == "", "recon_assistant_null_to_empty");
-    }
-    // malformed tool_calls JSON must not throw; content preserved.
-    {
-        ReconstructedMessageInput in;
-        in.role = "assistant"; in.content = "hi"; in.tool_calls = "{not json";
-        auto m = build_reconstructed_message(in);
-        check(m["content"] == "hi" && !m.contains("tool_calls"), "recon_malformed_toolcalls_safe");
-    }
-    // optional fields: name + reasoning carried through.
-    {
-        ReconstructedMessageInput in;
-        in.role = "tool"; in.content = "result"; in.name = "get_weather"; in.reasoning_content = "thinking";
-        auto m = build_reconstructed_message(in);
-        check(m["name"] == "get_weather" && m["reasoning_content"] == "thinking", "recon_optional_fields");
-    }
-}
-
-int main() {
-    test_normalize();
-    test_template_sanitizer();
-    test_reconstruction();
-
-    if (failures == 0) {
-        std::cout << "OK: all message_content tests passed\n";
-        return 0;
-    }
-    std::cerr << failures << " test(s) failed\n";
-    return 1;
-}
--- a/backend/cpp/llama-cpp/prepare.sh
+++ b/backend/cpp/llama-cpp/prepare.sh
@@ -18,10 +18,6 @@ done

 cp -r CMakeLists.txt llama.cpp/tools/grpc-server/
 cp -r grpc-server.cpp llama.cpp/tools/grpc-server/
-# Shared message-reconstruction helpers (included by grpc-server.cpp) and their
-# unit test (compiled only when -DLLAMA_GRPC_BUILD_TESTS=ON).
-cp -r message_content.h llama.cpp/tools/grpc-server/
-cp -r message_content_test.cpp llama.cpp/tools/grpc-server/
 cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/tools/grpc-server/
 cp -rfv llama.cpp/vendor/cpp-httplib/httplib.h llama.cpp/tools/grpc-server/

--- a/backend/cpp/llama-cpp/run.sh
+++ b/backend/cpp/llama-cpp/run.sh
@@ -2,7 +2,7 @@
 set -ex

 # Get the absolute current dir where the script is located
-CURDIR=$(dirname "$(realpath "$0")")
+CURDIR=$(dirname "$(realpath $0)")

 cd /

@@ -16,37 +16,37 @@ BINARY=llama-cpp-fallback
 # CPU_ALL_VARIANTS: ggml's backend registry dlopens the best libggml-cpu-*.so for this
 # host, so no shell-side AVX probing. GPU images (cublas/sycl/vulkan/hipblas) ship only
 # llama-cpp-fallback (the accelerator does the compute), so fall back to it when absent.
-if [ -e "$CURDIR"/llama-cpp-cpu-all ]; then
+if [ -e $CURDIR/llama-cpp-cpu-all ]; then
 	BINARY=llama-cpp-cpu-all
 fi

 if [ -n "$LLAMACPP_GRPC_SERVERS" ]; then
-	if [ -e "$CURDIR"/llama-cpp-grpc ]; then
+	if [ -e $CURDIR/llama-cpp-grpc ]; then
 		BINARY=llama-cpp-grpc
 	fi
 fi
 
 # Extend ld library path with the dir where this script is located/lib
 if [ "$(uname)" == "Darwin" ]; then
-	export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
-	#export DYLD_FALLBACK_LIBRARY_PATH="$CURDIR"/lib:$DYLD_FALLBACK_LIBRARY_PATH
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+	#export DYLD_FALLBACK_LIBRARY_PATH=$CURDIR/lib:$DYLD_FALLBACK_LIBRARY_PATH
 else
-	export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 	# Tell rocBLAS where to find TensileLibrary data (GPU kernel tuning files)
 	if [ -d "$CURDIR/lib/rocblas/library" ]; then
-		export ROCBLAS_TENSILE_LIBPATH="$CURDIR"/lib/rocblas/library
+		export ROCBLAS_TENSILE_LIBPATH=$CURDIR/lib/rocblas/library
 	fi
 fi

 # If there is a lib/ld.so, use it
-if [ -f "$CURDIR"/lib/ld.so ]; then
+if [ -f $CURDIR/lib/ld.so ]; then
 	echo "Using lib/ld.so"
 	echo "Using binary: $BINARY"
-	exec "$CURDIR"/lib/ld.so "$CURDIR"/$BINARY "$@"
+	exec $CURDIR/lib/ld.so $CURDIR/$BINARY "$@"
 fi

 echo "Using binary: $BINARY"
-exec "$CURDIR"/$BINARY "$@"
+exec $CURDIR/$BINARY "$@"

 # We should never reach this point, however just in case we do, run fallback
-exec "$CURDIR"/llama-cpp-fallback "$@"
+exec $CURDIR/llama-cpp-fallback "$@"
--- a/backend/cpp/privacy-filter/CMakeLists.txt
+++ b/backend/cpp/privacy-filter/CMakeLists.txt
@@ -51,14 +51,6 @@ add_library(hw_grpc_proto STATIC
    ${HW_GRPC_SRCS} ${HW_GRPC_HDRS}
    ${HW_PROTO_SRCS} ${HW_PROTO_HDRS})
 target_include_directories(hw_grpc_proto PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
-# The generated proto/grpc sources include protobuf and grpc++ headers, so this
-# library must see their include dirs. Linking the imported targets propagates
-# them. On Linux the apt headers live in /usr/include (default search path) so
-# this was a no-op; on macOS the Homebrew headers are under /opt/homebrew and
-# would otherwise be missed (runtime_version.h not found).
-target_link_libraries(hw_grpc_proto PUBLIC
-    protobuf::libprotobuf
-    gRPC::grpc++)

 # Build only the pf static lib (+ ggml) from the engine tree — no CLI/bench/tests.
 # PF_VULKAN is honored when passed on the cmake command line (it lands in the
--- a/backend/cpp/privacy-filter/run.sh
+++ b/backend/cpp/privacy-filter/run.sh
@@ -2,13 +2,7 @@
 # Entry point for the privacy-filter backend image / BACKEND_BINARY mode.
 set -e
 CURDIR=$(dirname "$(realpath "$0")")
-# macOS has no bundled ld.so; the darwin package ships only dylibs under lib/,
-# resolved via DYLD_LIBRARY_PATH (the ld.so branch below is skipped there).
-if [ "$(uname)" = "Darwin" ]; then
-    export DYLD_LIBRARY_PATH="$CURDIR/lib:$DYLD_LIBRARY_PATH"
-else
-    export LD_LIBRARY_PATH="$CURDIR/lib:$LD_LIBRARY_PATH"
-fi
+export LD_LIBRARY_PATH="$CURDIR/lib:$LD_LIBRARY_PATH"
 if [ -f "$CURDIR/lib/ld.so" ]; then
    exec "$CURDIR/lib/ld.so" "$CURDIR/grpc-server" "$@"
 fi
--- a/backend/cpp/run-unit-tests.sh
+++ b/backend/cpp/run-unit-tests.sh
@@ -1,71 +0,0 @@
-#!/bin/bash
-#
-# Discovers and runs every standalone C++ unit test under backend/cpp/.
-#
-# A "standalone" unit test is a *_test.cpp that depends only on the C++ standard
-# library and nlohmann/json (single header) - i.e. it exercises pure helpers and
-# does not need the full llama.cpp + gRPC backend build. Tests that DO need the
-# backend build use the CMake/ctest path (e.g. -DLLAMA_GRPC_BUILD_TESTS=ON)
-# instead and are skipped here.
-#
-# This keeps CI generic: adding a new pure-C++ unit test file named *_test.cpp in
-# an active backend source dir is picked up automatically, with no CI edits.
-#
-# Env:
-#   NLOHMANN_INCLUDE  include dir that contains nlohmann/json.hpp. If unset, the
-#                     nlohmann/json single header is fetched to a temp dir.
-#   CXX               compiler (default: g++).
-#   JSON_VERSION      nlohmann/json tag to fetch when NLOHMANN_INCLUDE is unset
-#                     (default: v3.11.3).
-set -uo pipefail
-
-ROOT="$(cd "$(dirname "$0")" && pwd)"
-CXX="${CXX:-g++}"
-JSON_VERSION="${JSON_VERSION:-v3.11.3}"
-
-JSON_INC="${NLOHMANN_INCLUDE:-}"
-if [ -z "$JSON_INC" ]; then
-    JSON_INC="$(mktemp -d)"
-    mkdir -p "$JSON_INC/nlohmann"
-    echo "Fetching nlohmann/json ${JSON_VERSION} single header..."
-    if ! curl -L -sf \
-        "https://raw.githubusercontent.com/nlohmann/json/${JSON_VERSION}/single_include/nlohmann/json.hpp" \
-        -o "$JSON_INC/nlohmann/json.hpp"; then
-        echo "ERROR: failed to fetch nlohmann/json header" >&2
-        exit 1
-    fi
-fi
-
-# Active source dirs only - exclude per-variant build copies, dev snapshots and
-# the vendored upstream llama.cpp tree.
-mapfile -t tests < <(find "$ROOT" -name '*_test.cpp' \
-    -not -path '*/llama.cpp/*' \
-    -not -path '*-build/*' \
-    -not -path '*-dev/*' \
-    -not -path '*fallback*' | sort)
-
-if [ "${#tests[@]}" -eq 0 ]; then
-    echo "No standalone C++ unit tests found under $ROOT"
-    exit 0
-fi
-
-fail=0
-for test_src in "${tests[@]}"; do
-    name="$(basename "$test_src" .cpp)"
-    bin="$(mktemp -d)/$name"
-    echo "==> $test_src"
-    if ! "$CXX" -std=c++17 -Wall -Wextra \
-        -I"$JSON_INC" -I"$(dirname "$test_src")" \
-        "$test_src" -o "$bin"; then
-        echo "COMPILE FAILED: $test_src" >&2
-        fail=1
-        continue
-    fi
-    if ! "$bin"; then
-        echo "TEST FAILED: $test_src" >&2
-        fail=1
-    fi
-done
-
-echo "Ran ${#tests[@]} standalone C++ unit test file(s)"
-exit "$fail"
--- a/backend/cpp/turboquant/run.sh
+++ b/backend/cpp/turboquant/run.sh
@@ -2,7 +2,7 @@
 set -ex

 # Get the absolute current dir where the script is located
-CURDIR=$(dirname "$(realpath "$0")")
+CURDIR=$(dirname "$(realpath $0)")

 cd /

@@ -15,36 +15,36 @@ BINARY=turboquant-fallback
 # x86/arm64 ship a single turboquant-cpu-all built with ggml CPU_ALL_VARIANTS: ggml's
 # backend registry dlopens the best libggml-cpu-*.so for this host, so no shell-side
 # probing. ROCm ships only turboquant-fallback, so fall back to it when cpu-all is absent.
-if [ -e "$CURDIR"/turboquant-cpu-all ]; then
+if [ -e $CURDIR/turboquant-cpu-all ]; then
 	BINARY=turboquant-cpu-all
 fi

 if [ -n "$LLAMACPP_GRPC_SERVERS" ]; then
-	if [ -e "$CURDIR"/turboquant-grpc ]; then
+	if [ -e $CURDIR/turboquant-grpc ]; then
 		BINARY=turboquant-grpc
 	fi
 fi

 # Extend ld library path with the dir where this script is located/lib
 if [ "$(uname)" == "Darwin" ]; then
-	export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
 else
-	export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 	# Tell rocBLAS where to find TensileLibrary data (GPU kernel tuning files)
 	if [ -d "$CURDIR/lib/rocblas/library" ]; then
-		export ROCBLAS_TENSILE_LIBPATH="$CURDIR"/lib/rocblas/library
+		export ROCBLAS_TENSILE_LIBPATH=$CURDIR/lib/rocblas/library
 	fi
 fi

 # If there is a lib/ld.so, use it
-if [ -f "$CURDIR"/lib/ld.so ]; then
+if [ -f $CURDIR/lib/ld.so ]; then
 	echo "Using lib/ld.so"
 	echo "Using binary: $BINARY"
-	exec "$CURDIR"/lib/ld.so "$CURDIR"/$BINARY "$@"
+	exec $CURDIR/lib/ld.so $CURDIR/$BINARY "$@"
 fi

 echo "Using binary: $BINARY"
-exec "$CURDIR"/$BINARY "$@"
+exec $CURDIR/$BINARY "$@"

 # We should never reach this point, however just in case we do, run fallback
-exec "$CURDIR"/turboquant-fallback "$@"
+exec $CURDIR/turboquant-fallback "$@"
--- a/backend/go/acestep-cpp/run.sh
+++ b/backend/go/acestep-cpp/run.sh
@@ -2,7 +2,7 @@
 set -ex

 # Get the absolute current dir where the script is located
-CURDIR=$(dirname "$(realpath "$0")")
+CURDIR=$(dirname "$(realpath $0)")

 cd /

@@ -21,20 +21,20 @@ if [ "$(uname)" = "Darwin" ]; then
 	if [ ! -e "$LIBRARY" ]; then
 		LIBRARY="$CURDIR/libgoacestepcpp-fallback.so"
 	fi
-	export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
 else
 	LIBRARY="$CURDIR/libgoacestepcpp-fallback.so"

 	if grep -q -e "\savx\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX    found OK"
-		if [ -e "$CURDIR"/libgoacestepcpp-avx.so ]; then
+		if [ -e $CURDIR/libgoacestepcpp-avx.so ]; then
 			LIBRARY="$CURDIR/libgoacestepcpp-avx.so"
 		fi
 	fi

 	if grep -q -e "\savx2\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX2   found OK"
-		if [ -e "$CURDIR"/libgoacestepcpp-avx2.so ]; then
+		if [ -e $CURDIR/libgoacestepcpp-avx2.so ]; then
 			LIBRARY="$CURDIR/libgoacestepcpp-avx2.so"
 		fi
 	fi
@@ -42,22 +42,22 @@ else
 	# Check avx 512
 	if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX512F found OK"
-		if [ -e "$CURDIR"/libgoacestepcpp-avx512.so ]; then
+		if [ -e $CURDIR/libgoacestepcpp-avx512.so ]; then
 			LIBRARY="$CURDIR/libgoacestepcpp-avx512.so"
 		fi
 	fi

-	export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi

 export ACESTEP_LIBRARY=$LIBRARY

 # If there is a lib/ld.so, use it
-if [ -f "$CURDIR"/lib/ld.so ]; then
+if [ -f $CURDIR/lib/ld.so ]; then
 	echo "Using lib/ld.so"
 	echo "Using library: $LIBRARY"
-	exec "$CURDIR"/lib/ld.so "$CURDIR"/acestep-cpp "$@"
+	exec $CURDIR/lib/ld.so $CURDIR/acestep-cpp "$@"
 fi

 echo "Using library: $LIBRARY"
-exec "$CURDIR"/acestep-cpp "$@"
+exec $CURDIR/acestep-cpp "$@"
--- a/backend/go/ced/run.sh
+++ b/backend/go/ced/run.sh
@@ -4,10 +4,10 @@ set -e
 CURDIR=$(dirname "$(realpath "$0")")

 if [ "$(uname)" = "Darwin" ]; then
-	export DYLD_LIBRARY_PATH="$CURDIR/lib:"$CURDIR":${DYLD_LIBRARY_PATH:-}"
+	export DYLD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${DYLD_LIBRARY_PATH:-}"
 	export CED_LIBRARY="$CURDIR/lib/libced.dylib"
 else
-	export LD_LIBRARY_PATH="$CURDIR/lib:"$CURDIR":${LD_LIBRARY_PATH:-}"
+	export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
 fi

 # If a self-contained ld.so was packaged, route through it so the packaged
--- a/backend/go/cloud-proxy/run.sh
+++ b/backend/go/cloud-proxy/run.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 set -ex

-CURDIR=$(dirname "$(realpath "$0")")
+CURDIR=$(dirname "$(realpath $0)")

-exec "$CURDIR"/cloud-proxy "$@"
+exec $CURDIR/cloud-proxy "$@"
--- a/backend/go/crispasr/Makefile
+++ b/backend/go/crispasr/Makefile
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)

 # CrispASR version (release tag)
 CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
-CRISPASR_VERSION?=8f1218141b792b8868861c1af17ba1e361b05dc0
+CRISPASR_VERSION?=96b2a6ee31d30389fed8a7ef1a54239b75231ddc
 SO_TARGET?=libgocrispasr.so

 CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
--- a/backend/go/crispasr/run.sh
+++ b/backend/go/crispasr/run.sh
@@ -2,7 +2,7 @@
 set -ex

 # Get the absolute current dir where the script is located
-CURDIR=$(dirname "$(realpath "$0")")
+CURDIR=$(dirname "$(realpath $0)")

 cd /

@@ -15,20 +15,20 @@ fi
 if [ "$(uname)" = "Darwin" ]; then
 	# macOS: single dylib variant (Metal or Accelerate)
 	LIBRARY="$CURDIR/libgocrispasr-fallback.dylib"
-	export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
 else
 	LIBRARY="$CURDIR/libgocrispasr-fallback.so"

 	if grep -q -e "\savx\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX    found OK"
-		if [ -e "$CURDIR"/libgocrispasr-avx.so ]; then
+		if [ -e $CURDIR/libgocrispasr-avx.so ]; then
 			LIBRARY="$CURDIR/libgocrispasr-avx.so"
 		fi
 	fi

 	if grep -q -e "\savx2\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX2   found OK"
-		if [ -e "$CURDIR"/libgocrispasr-avx2.so ]; then
+		if [ -e $CURDIR/libgocrispasr-avx2.so ]; then
 			LIBRARY="$CURDIR/libgocrispasr-avx2.so"
 		fi
 	fi
@@ -36,12 +36,12 @@ else
 	# Check avx 512
 	if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX512F found OK"
-		if [ -e "$CURDIR"/libgocrispasr-avx512.so ]; then
+		if [ -e $CURDIR/libgocrispasr-avx512.so ]; then
 			LIBRARY="$CURDIR/libgocrispasr-avx512.so"
 		fi
 	fi

-	export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi

 export CRISPASR_LIBRARY=$LIBRARY
@@ -49,14 +49,14 @@ export CRISPASR_LIBRARY=$LIBRARY
 # Point piper's espeak-ng phonemizer at the bundled voice data. The variable
 # names the directory CONTAINING espeak-ng-data (package.sh drops it next to
 # this script). Harmless when espeak-ng wasn't bundled.
-export CRISPASR_ESPEAK_DATA_PATH="$CURDIR"
+export CRISPASR_ESPEAK_DATA_PATH=$CURDIR

 # If there is a lib/ld.so, use it
-if [ -f "$CURDIR"/lib/ld.so ]; then
+if [ -f $CURDIR/lib/ld.so ]; then
 	echo "Using lib/ld.so"
 	echo "Using library: $LIBRARY"
-	exec "$CURDIR"/lib/ld.so "$CURDIR"/crispasr "$@"
+	exec $CURDIR/lib/ld.so $CURDIR/crispasr "$@"
 fi

 echo "Using library: $LIBRARY"
-exec "$CURDIR"/crispasr "$@"
+exec $CURDIR/crispasr "$@"
--- a/backend/go/depth-anything-cpp/Makefile
+++ b/backend/go/depth-anything-cpp/Makefile
@@ -40,8 +40,6 @@ else ifeq ($(BUILD_TYPE),hipblas)
 else ifeq ($(BUILD_TYPE),vulkan)
 	CMAKE_ARGS+=-DGGML_VULKAN=ON -DDA_GGML_VULKAN=ON
 else ifeq ($(OS),Darwin)
-	# macOS/Metal: built + published as an OCI image by CI (includeDarwin in
-	# .github/backend-matrix.yml) so Apple Silicon users can install this backend.
 	ifneq ($(BUILD_TYPE),metal)
 		CMAKE_ARGS+=-DGGML_METAL=OFF
 	else
--- a/backend/go/depth-anything-cpp/run.sh
+++ b/backend/go/depth-anything-cpp/run.sh
@@ -2,7 +2,7 @@
 set -ex

 # Get the absolute current dir where the script is located
-CURDIR=$(dirname "$(realpath "$0")")
+CURDIR=$(dirname "$(realpath $0)")

 cd /

@@ -15,20 +15,20 @@ fi
 if [ "$(uname)" = "Darwin" ]; then
 	# macOS: single dylib variant (Metal or Accelerate)
 	LIBRARY="$CURDIR/libdepthanythingcpp-fallback.dylib"
-	export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
 else
 	LIBRARY="$CURDIR/libdepthanythingcpp-fallback.so"

 	if grep -q -e "\savx\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX    found OK"
-		if [ -e "$CURDIR"/libdepthanythingcpp-avx.so ]; then
+		if [ -e $CURDIR/libdepthanythingcpp-avx.so ]; then
 			LIBRARY="$CURDIR/libdepthanythingcpp-avx.so"
 		fi
 	fi

 	if grep -q -e "\savx2\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX2   found OK"
-		if [ -e "$CURDIR"/libdepthanythingcpp-avx2.so ]; then
+		if [ -e $CURDIR/libdepthanythingcpp-avx2.so ]; then
 			LIBRARY="$CURDIR/libdepthanythingcpp-avx2.so"
 		fi
 	fi
@@ -36,22 +36,22 @@ else
 	# Check avx 512
 	if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX512F found OK"
-		if [ -e "$CURDIR"/libdepthanythingcpp-avx512.so ]; then
+		if [ -e $CURDIR/libdepthanythingcpp-avx512.so ]; then
 			LIBRARY="$CURDIR/libdepthanythingcpp-avx512.so"
 		fi
 	fi

-	export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi

 export DEPTHANYTHING_LIBRARY=$LIBRARY

 # If there is a lib/ld.so, use it
-if [ -f "$CURDIR"/lib/ld.so ]; then
+if [ -f $CURDIR/lib/ld.so ]; then
 	echo "Using lib/ld.so"
 	echo "Using library: $LIBRARY"
-	exec "$CURDIR"/lib/ld.so "$CURDIR"/depth-anything-cpp "$@"
+	exec $CURDIR/lib/ld.so $CURDIR/depth-anything-cpp "$@"
 fi

 echo "Using library: $LIBRARY"
-exec "$CURDIR"/depth-anything-cpp "$@"
+exec $CURDIR/depth-anything-cpp "$@"
--- a/backend/go/local-store/run.sh
+++ b/backend/go/local-store/run.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 set -ex

-CURDIR=$(dirname "$(realpath "$0")")
+CURDIR=$(dirname "$(realpath $0)")

-exec "$CURDIR"/local-store "$@"
+exec $CURDIR/local-store "$@"
--- a/backend/go/localvqe/Makefile
+++ b/backend/go/localvqe/Makefile
@@ -32,8 +32,6 @@ endif
 ifeq ($(BUILD_TYPE),vulkan)
 	CMAKE_ARGS+=-DGGML_VULKAN=ON -DLOCALVQE_VULKAN=ON
 else ifeq ($(OS),Darwin)
-	# Apple Silicon: CPU-only (no Metal upstream); built + published as an arm64
-	# image by CI (includeDarwin in .github/backend-matrix.yml) for macOS install.
 	CMAKE_ARGS+=-DGGML_METAL=OFF
 endif

--- a/backend/go/localvqe/run.sh
+++ b/backend/go/localvqe/run.sh
@@ -1,34 +1,34 @@
 #!/bin/bash
 set -ex

-CURDIR=$(dirname "$(realpath "$0")")
+CURDIR=$(dirname "$(realpath $0)")

 # LocalVQE's runtime CPU-variant loader (ggml_backend_load_all) searches
 # get_executable_path() and current_path() — the second one is what saves us
 # when /proc/self/exe resolves to lib/ld.so under the bundled-loader path.
-# So we cd into "$CURDIR" (where all the libggml-cpu-*.so files live) before
+# So we cd into $CURDIR (where all the libggml-cpu-*.so files live) before
 # exec'ing the binary.
 cd "$CURDIR"

 if [ "$(uname)" = "Darwin" ]; then
 	# macOS: LocalVQE is built as a SHARED library, so dyld needs the .dylib +
 	# DYLD_LIBRARY_PATH. Prefer .dylib and fall back to .so just in case.
-	export DYLD_LIBRARY_PATH="$CURDIR":"$CURDIR"/lib:$DYLD_LIBRARY_PATH
-	LOCALVQE_LIBRARY="$CURDIR"/liblocalvqe.dylib
+	export DYLD_LIBRARY_PATH=$CURDIR:$CURDIR/lib:$DYLD_LIBRARY_PATH
+	LOCALVQE_LIBRARY=$CURDIR/liblocalvqe.dylib
 	if [ ! -e "$LOCALVQE_LIBRARY" ]; then
-		LOCALVQE_LIBRARY="$CURDIR"/liblocalvqe.so
+		LOCALVQE_LIBRARY=$CURDIR/liblocalvqe.so
 	fi
 	export LOCALVQE_LIBRARY
 else
-	export LD_LIBRARY_PATH="$CURDIR":"$CURDIR"/lib:$LD_LIBRARY_PATH
-	export LOCALVQE_LIBRARY="$CURDIR"/liblocalvqe.so
+	export LD_LIBRARY_PATH=$CURDIR:$CURDIR/lib:$LD_LIBRARY_PATH
+	export LOCALVQE_LIBRARY=$CURDIR/liblocalvqe.so
 fi

-if [ -f "$CURDIR"/lib/ld.so ]; then
+if [ -f $CURDIR/lib/ld.so ]; then
 	echo "Using lib/ld.so"
 	echo "Using library: $LOCALVQE_LIBRARY"
-	exec "$CURDIR"/lib/ld.so "$CURDIR"/localvqe "$@"
+	exec $CURDIR/lib/ld.so $CURDIR/localvqe "$@"
 fi

 echo "Using library: $LOCALVQE_LIBRARY"
-exec "$CURDIR"/localvqe "$@"
+exec $CURDIR/localvqe "$@"
--- a/backend/go/locate-anything-cpp/Makefile
+++ b/backend/go/locate-anything-cpp/Makefile
@@ -33,8 +33,6 @@ else ifeq ($(BUILD_TYPE),hipblas)
 else ifeq ($(BUILD_TYPE),vulkan)
 	CMAKE_ARGS+=-DGGML_VULKAN=ON -DLA_GGML_VULKAN=ON
 else ifeq ($(OS),Darwin)
-	# macOS/Metal: built + published as an OCI image by CI (includeDarwin in
-	# .github/backend-matrix.yml) so Apple Silicon users can install this backend.
 	ifneq ($(BUILD_TYPE),metal)
 		CMAKE_ARGS+=-DGGML_METAL=OFF
 	else
--- a/backend/go/locate-anything-cpp/run.sh
+++ b/backend/go/locate-anything-cpp/run.sh
@@ -2,7 +2,7 @@
 set -ex

 # Get the absolute current dir where the script is located
-CURDIR=$(dirname "$(realpath "$0")")
+CURDIR=$(dirname "$(realpath $0)")

 cd /

@@ -15,20 +15,20 @@ fi
 if [ "$(uname)" = "Darwin" ]; then
 	# macOS: single dylib variant (Metal or Accelerate)
 	LIBRARY="$CURDIR/liblocateanythingcpp-fallback.dylib"
-	export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
 else
 	LIBRARY="$CURDIR/liblocateanythingcpp-fallback.so"

 	if grep -q -e "\savx\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX    found OK"
-		if [ -e "$CURDIR"/liblocateanythingcpp-avx.so ]; then
+		if [ -e $CURDIR/liblocateanythingcpp-avx.so ]; then
 			LIBRARY="$CURDIR/liblocateanythingcpp-avx.so"
 		fi
 	fi

 	if grep -q -e "\savx2\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX2   found OK"
-		if [ -e "$CURDIR"/liblocateanythingcpp-avx2.so ]; then
+		if [ -e $CURDIR/liblocateanythingcpp-avx2.so ]; then
 			LIBRARY="$CURDIR/liblocateanythingcpp-avx2.so"
 		fi
 	fi
@@ -36,22 +36,22 @@ else
 	# Check avx 512
 	if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX512F found OK"
-		if [ -e "$CURDIR"/liblocateanythingcpp-avx512.so ]; then
+		if [ -e $CURDIR/liblocateanythingcpp-avx512.so ]; then
 			LIBRARY="$CURDIR/liblocateanythingcpp-avx512.so"
 		fi
 	fi

-	export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi

 export LOCATEANYTHING_LIBRARY=$LIBRARY

 # If there is a lib/ld.so, use it
-if [ -f "$CURDIR"/lib/ld.so ]; then
+if [ -f $CURDIR/lib/ld.so ]; then
 	echo "Using lib/ld.so"
 	echo "Using library: $LIBRARY"
-	exec "$CURDIR"/lib/ld.so "$CURDIR"/locate-anything-cpp "$@"
+	exec $CURDIR/lib/ld.so $CURDIR/locate-anything-cpp "$@"
 fi

 echo "Using library: $LIBRARY"
-exec "$CURDIR"/locate-anything-cpp "$@"
+exec $CURDIR/locate-anything-cpp "$@"
--- a/backend/go/omnivoice-cpp/run.sh
+++ b/backend/go/omnivoice-cpp/run.sh
@@ -2,7 +2,7 @@
 set -ex

 # Get the absolute current dir where the script is located
-CURDIR=$(dirname "$(realpath "$0")")
+CURDIR=$(dirname "$(realpath $0)")

 cd /

@@ -15,20 +15,20 @@ fi
 if [ "$(uname)" = "Darwin" ]; then
 	# macOS: single dylib variant (Metal or Accelerate)
 	LIBRARY="$CURDIR/libgomnivoicecpp-fallback.dylib"
-	export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
 else
 	LIBRARY="$CURDIR/libgomnivoicecpp-fallback.so"

 	if grep -q -e "\savx\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX    found OK"
-		if [ -e "$CURDIR"/libgomnivoicecpp-avx.so ]; then
+		if [ -e $CURDIR/libgomnivoicecpp-avx.so ]; then
 			LIBRARY="$CURDIR/libgomnivoicecpp-avx.so"
 		fi
 	fi

 	if grep -q -e "\savx2\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX2   found OK"
-		if [ -e "$CURDIR"/libgomnivoicecpp-avx2.so ]; then
+		if [ -e $CURDIR/libgomnivoicecpp-avx2.so ]; then
 			LIBRARY="$CURDIR/libgomnivoicecpp-avx2.so"
 		fi
 	fi
@@ -36,22 +36,22 @@ else
 	# Check avx 512
 	if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX512F found OK"
-		if [ -e "$CURDIR"/libgomnivoicecpp-avx512.so ]; then
+		if [ -e $CURDIR/libgomnivoicecpp-avx512.so ]; then
 			LIBRARY="$CURDIR/libgomnivoicecpp-avx512.so"
 		fi
 	fi

-	export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi

 export OMNIVOICE_LIBRARY=$LIBRARY

 # If there is a lib/ld.so, use it
-if [ -f "$CURDIR"/lib/ld.so ]; then
+if [ -f $CURDIR/lib/ld.so ]; then
 	echo "Using lib/ld.so"
 	echo "Using library: $LIBRARY"
-	exec "$CURDIR"/lib/ld.so "$CURDIR"/omnivoice-cpp "$@"
+	exec $CURDIR/lib/ld.so $CURDIR/omnivoice-cpp "$@"
 fi

 echo "Using library: $LIBRARY"
-exec "$CURDIR"/omnivoice-cpp "$@"
+exec $CURDIR/omnivoice-cpp "$@"
--- a/backend/go/opus/Makefile
+++ b/backend/go/opus/Makefile
@@ -1,30 +1,13 @@
 GOCMD?=go
 GO_TAGS?=

-# The opus shim is a small C wrapper around libopus' variadic
-# opus_encoder_ctl (see csrc/opus_shim.c). It is built as a shared library
-# and dlopen'd at runtime by the Go backend (codec.go). The extension is
-# OS-specific: Linux uses .so, macOS uses .dylib. OS is exported by the root
-# Makefile (`export OS := $(shell uname -s)`).
-SHIM_EXT=so
-
 OPUS_CFLAGS := $(shell pkg-config --cflags opus)
 OPUS_LIBS := $(shell pkg-config --libs opus)
-SHIM_LDFLAGS := $(OPUS_LIBS)

-ifeq ($(OS),Darwin)
-	SHIM_EXT=dylib
-	# Resolve libopus symbols lazily from the already globally-loaded
-	# libopus (codec.go dlopens it RTLD_GLOBAL before the shim) rather than
-	# recording an absolute Homebrew path in the dylib. This keeps the
-	# packaged shim relocatable on machines that have no Homebrew.
-	SHIM_LDFLAGS := -undefined dynamic_lookup
-endif
+libopusshim.so: csrc/opus_shim.c
+	$(CC) -shared -fPIC -o $@ $< $(OPUS_CFLAGS) $(OPUS_LIBS)

-libopusshim.$(SHIM_EXT): csrc/opus_shim.c
-	$(CC) -shared -fPIC -o $@ $< $(OPUS_CFLAGS) $(SHIM_LDFLAGS)
-
-opus: libopusshim.$(SHIM_EXT)
+opus: libopusshim.so
 	$(GOCMD) build -tags "$(GO_TAGS)" -o opus ./

 package: opus
@@ -33,7 +16,4 @@ package: opus
 build: package

 clean:
-	rm -f opus libopusshim.$(SHIM_EXT)
-	rm -rf package
-
-.PHONY: build package clean
+	rm -f opus libopusshim.so
--- a/backend/go/opus/package.sh
+++ b/backend/go/opus/package.sh
@@ -8,23 +8,13 @@ mkdir -p $CURDIR/package/lib
 cp -avf $CURDIR/opus $CURDIR/package/
 cp -avf $CURDIR/run.sh $CURDIR/package/

-# The shim extension is OS-specific (.so on Linux, .dylib on macOS).
-SHIM_EXT=so
-if [ "$(uname)" = "Darwin" ]; then
-    SHIM_EXT=dylib
-fi
-
 # Copy the opus shim library
-cp -avf $CURDIR/libopusshim.$SHIM_EXT $CURDIR/package/lib/
+cp -avf $CURDIR/libopusshim.so $CURDIR/package/lib/

-# Copy system libopus so the backend is self-contained: the runtime base
-# image has neither libopus-dev (Linux) nor Homebrew (macOS), so codec.go's
-# dlopen would otherwise fail. Both name patterns are attempted; only the
-# host's matching one exists.
+# Copy system libopus
 if command -v pkg-config >/dev/null 2>&1 && pkg-config --exists opus; then
    LIBOPUS_DIR=$(pkg-config --variable=libdir opus)
-    cp -avf $LIBOPUS_DIR/libopus.so* $CURDIR/package/lib/ 2>/dev/null || true
-    cp -avf $LIBOPUS_DIR/libopus*.dylib $CURDIR/package/lib/ 2>/dev/null || true
+    cp -avfL $LIBOPUS_DIR/libopus.so* $CURDIR/package/lib/ 2>/dev/null || true
 fi

 # Detect architecture and copy appropriate libraries
@@ -48,8 +38,6 @@ elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
    cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
    cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
    cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
-elif [ "$(uname -s)" = "Darwin" ]; then
-    echo "Detected Darwin — system libraries linked dynamically, no bundled loader needed"
 else
    echo "Warning: Could not detect architecture for system library bundling"
 fi
--- a/backend/go/opus/run.sh
+++ b/backend/go/opus/run.sh
@@ -1,20 +1,15 @@
 #!/bin/bash
 set -ex

-CURDIR=$(dirname "$(realpath "$0")")
+CURDIR=$(dirname "$(realpath $0)")

-if [ "$(uname)" = "Darwin" ]; then
-	export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
-	export OPUS_SHIM_LIBRARY="$CURDIR"/lib/libopusshim.dylib
-else
-	export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
-	export OPUS_SHIM_LIBRARY="$CURDIR"/lib/libopusshim.so
-fi
+export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
+export OPUS_SHIM_LIBRARY=$CURDIR/lib/libopusshim.so

 # If there is a lib/ld.so, use it
-if [ -f "$CURDIR"/lib/ld.so ]; then
+if [ -f $CURDIR/lib/ld.so ]; then
 	echo "Using lib/ld.so"
-	exec "$CURDIR"/lib/ld.so "$CURDIR"/opus "$@"
+	exec $CURDIR/lib/ld.so $CURDIR/opus "$@"
 fi

-exec "$CURDIR"/opus "$@"
+exec $CURDIR/opus "$@"
--- a/backend/go/parakeet-cpp/Makefile
+++ b/backend/go/parakeet-cpp/Makefile
@@ -1,6 +1,6 @@
 # parakeet-cpp backend Makefile.
 #
-# Upstream pin lives below as PARAKEET_VERSION?=f469a57270a1cc4554acb15febf60e56619673b9
+# Upstream pin lives below as PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
 # (.github/bump_deps.sh) can find and update it - matches the
 # whisper.cpp / ds4 / vibevoice-cpp convention.
 #
@@ -15,7 +15,7 @@
 # That's what the L0 smoke test uses. The default target below does the
 # proper clone-at-pin + cmake build so CI doesn't need a side-checkout.

-PARAKEET_VERSION?=f469a57270a1cc4554acb15febf60e56619673b9
+PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
 PARAKEET_REPO?=https://github.com/mudler/parakeet.cpp

 GOCMD?=go
--- a/backend/go/parakeet-cpp/run.sh
+++ b/backend/go/parakeet-cpp/run.sh
@@ -4,10 +4,10 @@ set -e
 CURDIR=$(dirname "$(realpath "$0")")

 if [ "$(uname)" = "Darwin" ]; then
-	export DYLD_LIBRARY_PATH="$CURDIR/lib:"$CURDIR":${DYLD_LIBRARY_PATH:-}"
+	export DYLD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${DYLD_LIBRARY_PATH:-}"
 	export PARAKEET_LIBRARY="$CURDIR/lib/libparakeet.dylib"
 else
-	export LD_LIBRARY_PATH="$CURDIR/lib:"$CURDIR":${LD_LIBRARY_PATH:-}"
+	export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
 	export PARAKEET_LIBRARY="$CURDIR/lib/libparakeet.so"
 fi

--- a/backend/go/piper/package.sh
+++ b/backend/go/piper/package.sh
@@ -16,15 +16,7 @@ cp -rfv $CURDIR/run.sh $CURDIR/package/
 cp -rfLv $CURDIR/sources/go-piper/piper-phonemize/pi/lib/* $CURDIR/package/lib/

 # Detect architecture and copy appropriate libraries
-if [ "$(uname)" = "Darwin" ]; then
-    # macOS has no glibc loader to bundle. The piper binary links its bundled
-    # libs (libucd, libespeak-ng, libpiper_phonemize, libonnxruntime) via
-    # @rpath but ships with no LC_RPATH, so dyld aborts at launch with
-    # "Library not loaded: @rpath/libucd.dylib ... no LC_RPATH's found".
-    # Add an @loader_path/lib rpath so @rpath resolves to package/lib/.
-    echo "Detected macOS; adding @loader_path/lib rpath so bundled libs resolve via @rpath..."
-    install_name_tool -add_rpath @loader_path/lib "$CURDIR/package/piper"
-elif [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
+if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
    # x86_64 architecture
    echo "Detected x86_64 architecture, copying x86_64 libraries..."
    cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
--- a/backend/go/piper/run.sh
+++ b/backend/go/piper/run.sh
@@ -1,20 +1,15 @@
 #!/bin/bash
 set -ex

-CURDIR=$(dirname "$(realpath "$0")")
+CURDIR=$(dirname "$(realpath $0)")

-export ESPEAK_NG_DATA="$CURDIR"/espeak-ng-data
-
-if [ "$(uname)" = "Darwin" ]; then
-	export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
-else
-	export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
-fi
+export ESPEAK_NG_DATA=$CURDIR/espeak-ng-data
+export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH

 # If there is a lib/ld.so, use it
-if [ -f "$CURDIR"/lib/ld.so ]; then
+if [ -f $CURDIR/lib/ld.so ]; then
 	echo "Using lib/ld.so"
-	exec "$CURDIR"/lib/ld.so "$CURDIR"/piper "$@"
+	exec $CURDIR/lib/ld.so $CURDIR/piper "$@"
 fi

-exec "$CURDIR"/piper "$@"
+exec $CURDIR/piper "$@"
--- a/backend/go/qwen3-tts-cpp/run.sh
+++ b/backend/go/qwen3-tts-cpp/run.sh
@@ -2,7 +2,7 @@
 set -ex

 # Get the absolute current dir where the script is located
-CURDIR=$(dirname "$(realpath "$0")")
+CURDIR=$(dirname "$(realpath $0)")

 cd /

@@ -15,20 +15,20 @@ fi
 if [ "$(uname)" = "Darwin" ]; then
 	# macOS: single dylib variant (Metal or Accelerate)
 	LIBRARY="$CURDIR/libgoqwen3ttscpp-fallback.dylib"
-	export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
 else
 	LIBRARY="$CURDIR/libgoqwen3ttscpp-fallback.so"

 	if grep -q -e "\savx\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX    found OK"
-		if [ -e "$CURDIR"/libgoqwen3ttscpp-avx.so ]; then
+		if [ -e $CURDIR/libgoqwen3ttscpp-avx.so ]; then
 			LIBRARY="$CURDIR/libgoqwen3ttscpp-avx.so"
 		fi
 	fi

 	if grep -q -e "\savx2\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX2   found OK"
-		if [ -e "$CURDIR"/libgoqwen3ttscpp-avx2.so ]; then
+		if [ -e $CURDIR/libgoqwen3ttscpp-avx2.so ]; then
 			LIBRARY="$CURDIR/libgoqwen3ttscpp-avx2.so"
 		fi
 	fi
@@ -36,22 +36,22 @@ else
 	# Check avx 512
 	if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX512F found OK"
-		if [ -e "$CURDIR"/libgoqwen3ttscpp-avx512.so ]; then
+		if [ -e $CURDIR/libgoqwen3ttscpp-avx512.so ]; then
 			LIBRARY="$CURDIR/libgoqwen3ttscpp-avx512.so"
 		fi
 	fi

-	export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi

 export QWEN3TTS_LIBRARY=$LIBRARY

 # If there is a lib/ld.so, use it
-if [ -f "$CURDIR"/lib/ld.so ]; then
+if [ -f $CURDIR/lib/ld.so ]; then
 	echo "Using lib/ld.so"
 	echo "Using library: $LIBRARY"
-	exec "$CURDIR"/lib/ld.so "$CURDIR"/qwen3-tts-cpp "$@"
+	exec $CURDIR/lib/ld.so $CURDIR/qwen3-tts-cpp "$@"
 fi

 echo "Using library: $LIBRARY"
-exec "$CURDIR"/qwen3-tts-cpp "$@"
+exec $CURDIR/qwen3-tts-cpp "$@"
--- a/backend/go/rfdetr-cpp/Makefile
+++ b/backend/go/rfdetr-cpp/Makefile
@@ -34,8 +34,6 @@ else ifeq ($(BUILD_TYPE),hipblas)
 else ifeq ($(BUILD_TYPE),vulkan)
 	CMAKE_ARGS+=-DGGML_VULKAN=ON -DRFDETR_GGML_VULKAN=ON
 else ifeq ($(OS),Darwin)
-	# macOS/Metal: built + published as an OCI image by CI (includeDarwin in
-	# .github/backend-matrix.yml) so Apple Silicon users can install this backend.
 	ifneq ($(BUILD_TYPE),metal)
 		CMAKE_ARGS+=-DGGML_METAL=OFF
 	else
--- a/backend/go/rfdetr-cpp/run.sh
+++ b/backend/go/rfdetr-cpp/run.sh
@@ -2,7 +2,7 @@
 set -ex

 # Get the absolute current dir where the script is located
-CURDIR=$(dirname "$(realpath "$0")")
+CURDIR=$(dirname "$(realpath $0)")

 cd /

@@ -15,20 +15,20 @@ fi
 if [ "$(uname)" = "Darwin" ]; then
 	# macOS: single dylib variant (Metal or Accelerate)
 	LIBRARY="$CURDIR/librfdetrcpp-fallback.dylib"
-	export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
 else
 	LIBRARY="$CURDIR/librfdetrcpp-fallback.so"

 	if grep -q -e "\savx\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX    found OK"
-		if [ -e "$CURDIR"/librfdetrcpp-avx.so ]; then
+		if [ -e $CURDIR/librfdetrcpp-avx.so ]; then
 			LIBRARY="$CURDIR/librfdetrcpp-avx.so"
 		fi
 	fi

 	if grep -q -e "\savx2\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX2   found OK"
-		if [ -e "$CURDIR"/librfdetrcpp-avx2.so ]; then
+		if [ -e $CURDIR/librfdetrcpp-avx2.so ]; then
 			LIBRARY="$CURDIR/librfdetrcpp-avx2.so"
 		fi
 	fi
@@ -36,22 +36,22 @@ else
 	# Check avx 512
 	if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX512F found OK"
-		if [ -e "$CURDIR"/librfdetrcpp-avx512.so ]; then
+		if [ -e $CURDIR/librfdetrcpp-avx512.so ]; then
 			LIBRARY="$CURDIR/librfdetrcpp-avx512.so"
 		fi
 	fi

-	export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi

 export RFDETR_LIBRARY=$LIBRARY

 # If there is a lib/ld.so, use it
-if [ -f "$CURDIR"/lib/ld.so ]; then
+if [ -f $CURDIR/lib/ld.so ]; then
 	echo "Using lib/ld.so"
 	echo "Using library: $LIBRARY"
-	exec "$CURDIR"/lib/ld.so "$CURDIR"/rfdetr-cpp "$@"
+	exec $CURDIR/lib/ld.so $CURDIR/rfdetr-cpp "$@"
 fi

 echo "Using library: $LIBRARY"
-exec "$CURDIR"/rfdetr-cpp "$@"
+exec $CURDIR/rfdetr-cpp "$@"
--- a/backend/go/sam3-cpp/Makefile
+++ b/backend/go/sam3-cpp/Makefile
@@ -31,8 +31,6 @@ else ifeq ($(BUILD_TYPE),hipblas)
 else ifeq ($(BUILD_TYPE),vulkan)
 	CMAKE_ARGS+=-DGGML_VULKAN=ON
 else ifeq ($(OS),Darwin)
-	# macOS/Metal: built + published as an OCI image by CI (includeDarwin in
-	# .github/backend-matrix.yml) so Apple Silicon users can install this backend.
 	ifneq ($(BUILD_TYPE),metal)
 		CMAKE_ARGS+=-DGGML_METAL=OFF
 	else
--- a/backend/go/sam3-cpp/run.sh
+++ b/backend/go/sam3-cpp/run.sh
@@ -2,7 +2,7 @@
 set -ex

 # Get the absolute current dir where the script is located
-CURDIR=$(dirname "$(realpath "$0")")
+CURDIR=$(dirname "$(realpath $0)")

 cd /

@@ -15,20 +15,20 @@ fi
 if [ "$(uname)" = "Darwin" ]; then
 	# macOS: single dylib variant (Metal or Accelerate)
 	LIBRARY="$CURDIR/libgosam3-fallback.dylib"
-	export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
 else
 	LIBRARY="$CURDIR/libgosam3-fallback.so"

 	if grep -q -e "\savx\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX    found OK"
-		if [ -e "$CURDIR"/libgosam3-avx.so ]; then
+		if [ -e $CURDIR/libgosam3-avx.so ]; then
 			LIBRARY="$CURDIR/libgosam3-avx.so"
 		fi
 	fi

 	if grep -q -e "\savx2\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX2   found OK"
-		if [ -e "$CURDIR"/libgosam3-avx2.so ]; then
+		if [ -e $CURDIR/libgosam3-avx2.so ]; then
 			LIBRARY="$CURDIR/libgosam3-avx2.so"
 		fi
 	fi
@@ -36,22 +36,22 @@ else
 	# Check avx 512
 	if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX512F found OK"
-		if [ -e "$CURDIR"/libgosam3-avx512.so ]; then
+		if [ -e $CURDIR/libgosam3-avx512.so ]; then
 			LIBRARY="$CURDIR/libgosam3-avx512.so"
 		fi
 	fi

-	export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi

 export SAM3_LIBRARY=$LIBRARY

 # If there is a lib/ld.so, use it
-if [ -f "$CURDIR"/lib/ld.so ]; then
+if [ -f $CURDIR/lib/ld.so ]; then
 	echo "Using lib/ld.so"
 	echo "Using library: $LIBRARY"
-	exec "$CURDIR"/lib/ld.so "$CURDIR"/sam3-cpp "$@"
+	exec $CURDIR/lib/ld.so $CURDIR/sam3-cpp "$@"
 fi

 echo "Using library: $LIBRARY"
-exec "$CURDIR"/sam3-cpp "$@"
+exec $CURDIR/sam3-cpp "$@"
--- a/backend/go/sherpa-onnx/run.sh
+++ b/backend/go/sherpa-onnx/run.sh
@@ -1,19 +1,19 @@
 #!/bin/bash
 set -ex

-CURDIR=$(dirname "$(realpath "$0")")
+CURDIR=$(dirname "$(realpath $0)")

 if [ "$(uname)" = "Darwin" ]; then
-	export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
-	export SHERPA_SHIM_LIBRARY="$CURDIR"/lib/libsherpa-shim.dylib
-	export SHERPA_ONNX_LIBRARY="$CURDIR"/lib/libsherpa-onnx-c-api.dylib
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+	export SHERPA_SHIM_LIBRARY=$CURDIR/lib/libsherpa-shim.dylib
+	export SHERPA_ONNX_LIBRARY=$CURDIR/lib/libsherpa-onnx-c-api.dylib
 else
-	export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi

-if [ -f "$CURDIR"/lib/ld.so ]; then
+if [ -f $CURDIR/lib/ld.so ]; then
 	echo "Using lib/ld.so"
-	exec "$CURDIR"/lib/ld.so "$CURDIR"/sherpa-onnx "$@"
+	exec $CURDIR/lib/ld.so $CURDIR/sherpa-onnx "$@"
 fi

-exec "$CURDIR"/sherpa-onnx "$@"
+exec $CURDIR/sherpa-onnx "$@"
--- a/backend/go/silero-vad/package.sh
+++ b/backend/go/silero-vad/package.sh
@@ -15,14 +15,7 @@ cp -avf $CURDIR/run.sh $CURDIR/package/
 cp -rfLv $CURDIR/backend-assets/lib/* $CURDIR/package/lib/

 # Detect architecture and copy appropriate libraries
-if [ "$(uname)" = "Darwin" ]; then
-    # macOS has no glibc loader to bundle. silero-vad links its bundled
-    # libonnxruntime via @rpath but ships with no LC_RPATH, so dyld can't find
-    # it at runtime. Add an @loader_path/lib rpath so @rpath resolves to
-    # package/lib/ (matching the piper darwin fix, #10525).
-    echo "Detected macOS; adding @loader_path/lib rpath so bundled libs resolve via @rpath..."
-    install_name_tool -add_rpath @loader_path/lib "$CURDIR/package/silero-vad"
-elif [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
+if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
    # x86_64 architecture
    echo "Detected x86_64 architecture, copying x86_64 libraries..."
    cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
--- a/backend/go/silero-vad/run.sh
+++ b/backend/go/silero-vad/run.sh
@@ -1,18 +1,14 @@
 #!/bin/bash
 set -ex

-CURDIR=$(dirname "$(realpath "$0")")
+CURDIR=$(dirname "$(realpath $0)")

-if [ "$(uname)" = "Darwin" ]; then
-	export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
-else
-	export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
-fi
+export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH

 # If there is a lib/ld.so, use it
-if [ -f "$CURDIR"/lib/ld.so ]; then
+if [ -f $CURDIR/lib/ld.so ]; then
 	echo "Using lib/ld.so"
-	exec "$CURDIR"/lib/ld.so "$CURDIR"/silero-vad "$@"
+	exec $CURDIR/lib/ld.so $CURDIR/silero-vad "$@"
 fi

-exec "$CURDIR"/silero-vad "$@"
+exec $CURDIR/silero-vad "$@"
--- a/backend/go/stablediffusion-ggml/run.sh
+++ b/backend/go/stablediffusion-ggml/run.sh
@@ -2,7 +2,7 @@
 set -ex

 # Get the absolute current dir where the script is located
-CURDIR=$(dirname "$(realpath "$0")")
+CURDIR=$(dirname "$(realpath $0)")

 cd /

@@ -20,20 +20,20 @@ if [ "$(uname)" = "Darwin" ]; then
 	if [ ! -e "$LIBRARY" ]; then
 		LIBRARY="$CURDIR/libgosd-fallback.so"
 	fi
-	export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
 else
 	LIBRARY="$CURDIR/libgosd-fallback.so"

 	if grep -q -e "\savx\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX    found OK"
-		if [ -e "$CURDIR"/libgosd-avx.so ]; then
+		if [ -e $CURDIR/libgosd-avx.so ]; then
 			LIBRARY="$CURDIR/libgosd-avx.so"
 		fi
 	fi

 	if grep -q -e "\savx2\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX2   found OK"
-		if [ -e "$CURDIR"/libgosd-avx2.so ]; then
+		if [ -e $CURDIR/libgosd-avx2.so ]; then
 			LIBRARY="$CURDIR/libgosd-avx2.so"
 		fi
 	fi
@@ -41,22 +41,22 @@ else
 	# Check avx 512
 	if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX512F found OK"
-		if [ -e "$CURDIR"/libgosd-avx512.so ]; then
+		if [ -e $CURDIR/libgosd-avx512.so ]; then
 			LIBRARY="$CURDIR/libgosd-avx512.so"
 		fi
 	fi

-	export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi

 export SD_LIBRARY=$LIBRARY

 # If there is a lib/ld.so, use it
-if [ -f "$CURDIR"/lib/ld.so ]; then
+if [ -f $CURDIR/lib/ld.so ]; then
 	echo "Using lib/ld.so"
 	echo "Using library: $LIBRARY"
-	exec "$CURDIR"/lib/ld.so "$CURDIR"/stablediffusion-ggml "$@"
+	exec $CURDIR/lib/ld.so $CURDIR/stablediffusion-ggml "$@"
 fi

 echo "Using library: $LIBRARY"
-exec "$CURDIR"/stablediffusion-ggml "$@"
+exec $CURDIR/stablediffusion-ggml "$@"
--- a/backend/go/supertonic/run.sh
+++ b/backend/go/supertonic/run.sh
@@ -1,21 +1,21 @@
 #!/bin/bash
 set -ex

-CURDIR=$(dirname "$(realpath "$0")")
+CURDIR=$(dirname "$(realpath $0)")

 if [ "$(uname)" = "Darwin" ]; then
 	# macOS uses dyld: there is no ld.so loader, and the search path env
 	# var is DYLD_LIBRARY_PATH. ONNX Runtime ships as a .dylib here.
-	export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
-	export ONNXRUNTIME_LIB_PATH="$CURDIR"/lib/libonnxruntime.dylib
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+	export ONNXRUNTIME_LIB_PATH=$CURDIR/lib/libonnxruntime.dylib
 else
-	export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
-	export ONNXRUNTIME_LIB_PATH="$CURDIR"/lib/libonnxruntime.so
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
+	export ONNXRUNTIME_LIB_PATH=$CURDIR/lib/libonnxruntime.so

-	if [ -f "$CURDIR"/lib/ld.so ]; then
+	if [ -f $CURDIR/lib/ld.so ]; then
 		echo "Using lib/ld.so"
-		exec "$CURDIR"/lib/ld.so "$CURDIR"/supertonic "$@"
+		exec $CURDIR/lib/ld.so $CURDIR/supertonic "$@"
 	fi
 fi

-exec "$CURDIR"/supertonic "$@"
+exec $CURDIR/supertonic "$@"
--- a/backend/go/vibevoice-cpp/run.sh
+++ b/backend/go/vibevoice-cpp/run.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 set -ex

-CURDIR=$(dirname "$(realpath "$0")")
+CURDIR=$(dirname "$(realpath $0)")

 cd /

@@ -14,41 +14,41 @@ fi
 if [ "$(uname)" = "Darwin" ]; then
 	# macOS: single dylib variant (Metal or Accelerate)
 	LIBRARY="$CURDIR/libgovibevoicecpp-fallback.dylib"
-	export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
 else
 	LIBRARY="$CURDIR/libgovibevoicecpp-fallback.so"

 	if grep -q -e "\savx\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX    found OK"
-		if [ -e "$CURDIR"/libgovibevoicecpp-avx.so ]; then
+		if [ -e $CURDIR/libgovibevoicecpp-avx.so ]; then
 			LIBRARY="$CURDIR/libgovibevoicecpp-avx.so"
 		fi
 	fi

 	if grep -q -e "\savx2\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX2   found OK"
-		if [ -e "$CURDIR"/libgovibevoicecpp-avx2.so ]; then
+		if [ -e $CURDIR/libgovibevoicecpp-avx2.so ]; then
 			LIBRARY="$CURDIR/libgovibevoicecpp-avx2.so"
 		fi
 	fi

 	if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX512F found OK"
-		if [ -e "$CURDIR"/libgovibevoicecpp-avx512.so ]; then
+		if [ -e $CURDIR/libgovibevoicecpp-avx512.so ]; then
 			LIBRARY="$CURDIR/libgovibevoicecpp-avx512.so"
 		fi
 	fi

-	export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi

 export VIBEVOICECPP_LIBRARY=$LIBRARY

-if [ -f "$CURDIR"/lib/ld.so ]; then
+if [ -f $CURDIR/lib/ld.so ]; then
 	echo "Using lib/ld.so"
 	echo "Using library: $LIBRARY"
-	exec "$CURDIR"/lib/ld.so "$CURDIR"/vibevoice-cpp "$@"
+	exec $CURDIR/lib/ld.so $CURDIR/vibevoice-cpp "$@"
 fi

 echo "Using library: $LIBRARY"
-exec "$CURDIR"/vibevoice-cpp "$@"
+exec $CURDIR/vibevoice-cpp "$@"
--- a/backend/go/voxtral/run.sh
+++ b/backend/go/voxtral/run.sh
@@ -2,7 +2,7 @@
 set -ex

 # Get the absolute current dir where the script is located
-CURDIR=$(dirname "$(realpath "$0")")
+CURDIR=$(dirname "$(realpath $0)")

 cd /

@@ -15,35 +15,35 @@ fi
 if [ "$(uname)" = "Darwin" ]; then
 	# macOS: single dylib variant (Metal or Accelerate)
 	LIBRARY="$CURDIR/libgovoxtral-fallback.dylib"
-	export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
 else
 	LIBRARY="$CURDIR/libgovoxtral-fallback.so"

 	if grep -q -e "\savx\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX    found OK"
-		if [ -e "$CURDIR"/libgovoxtral-avx.so ]; then
+		if [ -e $CURDIR/libgovoxtral-avx.so ]; then
 			LIBRARY="$CURDIR/libgovoxtral-avx.so"
 		fi
 	fi

 	if grep -q -e "\savx2\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX2   found OK"
-		if [ -e "$CURDIR"/libgovoxtral-avx2.so ]; then
+		if [ -e $CURDIR/libgovoxtral-avx2.so ]; then
 			LIBRARY="$CURDIR/libgovoxtral-avx2.so"
 		fi
 	fi

-	export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi

 export VOXTRAL_LIBRARY=$LIBRARY

 # If there is a lib/ld.so, use it (Linux only)
-if [ -f "$CURDIR"/lib/ld.so ]; then
+if [ -f $CURDIR/lib/ld.so ]; then
 	echo "Using lib/ld.so"
 	echo "Using library: $LIBRARY"
-	exec "$CURDIR"/lib/ld.so "$CURDIR"/voxtral "$@"
+	exec $CURDIR/lib/ld.so $CURDIR/voxtral "$@"
 fi

 echo "Using library: $LIBRARY"
-exec "$CURDIR"/voxtral "$@"
+exec $CURDIR/voxtral "$@"
--- a/backend/go/whisper/run.sh
+++ b/backend/go/whisper/run.sh
@@ -2,7 +2,7 @@
 set -ex

 # Get the absolute current dir where the script is located
-CURDIR=$(dirname "$(realpath "$0")")
+CURDIR=$(dirname "$(realpath $0)")

 cd /

@@ -13,28 +13,22 @@ if [ "$(uname)" != "Darwin" ]; then
 fi

 if [ "$(uname)" = "Darwin" ]; then
-	# macOS: single fallback variant (Metal/Accelerate). The cmake build emits a
-	# Mach-O named .so, but tolerate .dylib too — pick whichever exists so the Go
-	# loader doesn't panic on a hardcoded name that isn't on disk.
-	if [ -e "$CURDIR/libgowhisper-fallback.dylib" ]; then
-		LIBRARY="$CURDIR/libgowhisper-fallback.dylib"
-	else
-		LIBRARY="$CURDIR/libgowhisper-fallback.so"
-	fi
-	export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
+	# macOS: single dylib variant (Metal or Accelerate)
+	LIBRARY="$CURDIR/libgowhisper-fallback.dylib"
+	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
 else
 	LIBRARY="$CURDIR/libgowhisper-fallback.so"

 	if grep -q -e "\savx\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX    found OK"
-		if [ -e "$CURDIR"/libgowhisper-avx.so ]; then
+		if [ -e $CURDIR/libgowhisper-avx.so ]; then
 			LIBRARY="$CURDIR/libgowhisper-avx.so"
 		fi
 	fi

 	if grep -q -e "\savx2\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX2   found OK"
-		if [ -e "$CURDIR"/libgowhisper-avx2.so ]; then
+		if [ -e $CURDIR/libgowhisper-avx2.so ]; then
 			LIBRARY="$CURDIR/libgowhisper-avx2.so"
 		fi
 	fi
@@ -42,22 +36,22 @@ else
 	# Check avx 512
 	if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
 		echo "CPU:    AVX512F found OK"
-		if [ -e "$CURDIR"/libgowhisper-avx512.so ]; then
+		if [ -e $CURDIR/libgowhisper-avx512.so ]; then
 			LIBRARY="$CURDIR/libgowhisper-avx512.so"
 		fi
 	fi

-	export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi

 export WHISPER_LIBRARY=$LIBRARY

 # If there is a lib/ld.so, use it
-if [ -f "$CURDIR"/lib/ld.so ]; then
+if [ -f $CURDIR/lib/ld.so ]; then
 	echo "Using lib/ld.so"
 	echo "Using library: $LIBRARY"
-	exec "$CURDIR"/lib/ld.so "$CURDIR"/whisper "$@"
+	exec $CURDIR/lib/ld.so $CURDIR/whisper "$@"
 fi

 echo "Using library: $LIBRARY"
-exec "$CURDIR"/whisper "$@"
+exec $CURDIR/whisper "$@"
--- a/backend/index.yaml
+++ b/backend/index.yaml
@@ -340,7 +340,6 @@
    nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-sam3-cpp"
    intel: "intel-sycl-f32-sam3-cpp"
    vulkan: "vulkan-sam3-cpp"
-    metal: "metal-sam3-cpp"
 - &rfdetrcpp
  name: "rfdetr-cpp"
  alias: "rfdetr-cpp"
@@ -369,7 +368,6 @@
    nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-rfdetr-cpp"
    intel: "intel-sycl-f32-rfdetr-cpp"
    vulkan: "vulkan-rfdetr-cpp"
-    metal: "metal-rfdetr-cpp"
 - &locateanything
  name: "locate-anything"
  alias: "locate-anything"
@@ -399,7 +397,6 @@
    nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-locate-anything-cpp"
    intel: "intel-sycl-f32-locate-anything-cpp"
    vulkan: "vulkan-locate-anything-cpp"
-    metal: "metal-locate-anything-cpp"
 - !!merge <<: *locateanything
  name: "locate-anything-development"
  capabilities:
@@ -412,7 +409,6 @@
    nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-locate-anything-cpp-development"
    intel: "intel-sycl-f32-locate-anything-cpp-development"
    vulkan: "vulkan-locate-anything-cpp-development"
-    metal: "metal-locate-anything-cpp-development"
 - !!merge <<: *locateanything
  name: "cpu-locate-anything-cpp"
  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-locate-anything-cpp"
@@ -423,16 +419,6 @@
  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-locate-anything-cpp"
  mirrors:
    - localai/localai-backends:master-cpu-locate-anything-cpp
- !!merge <<: *locateanything
-  name: "metal-locate-anything-cpp"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-locate-anything-cpp"
-  mirrors:
-    - localai/localai-backends:latest-metal-darwin-arm64-locate-anything-cpp
- !!merge <<: *locateanything
-  name: "metal-locate-anything-cpp-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-locate-anything-cpp"
-  mirrors:
-    - localai/localai-backends:master-metal-darwin-arm64-locate-anything-cpp
 - !!merge <<: *locateanything
  name: "cuda12-locate-anything-cpp"
  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-locate-anything-cpp"
@@ -531,7 +517,6 @@
    nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-depth-anything-cpp"
    intel: "intel-sycl-f32-depth-anything-cpp"
    vulkan: "vulkan-depth-anything-cpp"
-    metal: "metal-depth-anything-cpp"
 - !!merge <<: *depthanything
  name: "depth-anything-development"
  capabilities:
@@ -544,7 +529,6 @@
    nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-depth-anything-cpp-development"
    intel: "intel-sycl-f32-depth-anything-cpp-development"
    vulkan: "vulkan-depth-anything-cpp-development"
-    metal: "metal-depth-anything-cpp-development"
 - !!merge <<: *depthanything
  name: "cpu-depth-anything-cpp"
  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-depth-anything-cpp"
@@ -555,16 +539,6 @@
  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-depth-anything-cpp"
  mirrors:
    - localai/localai-backends:master-cpu-depth-anything-cpp
- !!merge <<: *depthanything
-  name: "metal-depth-anything-cpp"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-depth-anything-cpp"
-  mirrors:
-    - localai/localai-backends:latest-metal-darwin-arm64-depth-anything-cpp
- !!merge <<: *depthanything
-  name: "metal-depth-anything-cpp-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-depth-anything-cpp"
-  mirrors:
-    - localai/localai-backends:master-metal-darwin-arm64-depth-anything-cpp
 - !!merge <<: *depthanything
  name: "cuda12-depth-anything-cpp"
  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-depth-anything-cpp"
@@ -1057,8 +1031,6 @@
    nvidia-l4t: "vulkan-localvqe"
    nvidia-l4t-cuda-12: "vulkan-localvqe"
    nvidia-l4t-cuda-13: "vulkan-localvqe"
-    # Apple Silicon: CPU build (LocalVQE has no Metal path); still arm64-native.
-    metal: "metal-localvqe"
 - &privacyfilter
  name: "privacy-filter"
  alias: "privacy-filter"
@@ -1095,7 +1067,6 @@
    amd: "vulkan-privacy-filter"
    intel: "vulkan-privacy-filter"
    vulkan: "vulkan-privacy-filter"
-    metal: "metal-privacy-filter"
 - &faster-whisper
  icon: https://avatars.githubusercontent.com/u/1520500?s=200&v=4
  description: |
@@ -1356,6 +1327,7 @@
    intel: "intel-fish-speech"
    amd: "rocm-fish-speech"
    nvidia-l4t: "nvidia-l4t-fish-speech"
+    metal: "metal-fish-speech"
    default: "cpu-fish-speech"
    nvidia-cuda-13: "cuda13-fish-speech"
    nvidia-cuda-12: "cuda12-fish-speech"
@@ -2937,16 +2909,6 @@
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-privacy-filter"
  mirrors:
    - localai/localai-backends:master-gpu-vulkan-privacy-filter
- !!merge <<: *privacyfilter
-  name: "metal-privacy-filter"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-privacy-filter"
-  mirrors:
-    - localai/localai-backends:latest-metal-darwin-arm64-privacy-filter
- !!merge <<: *privacyfilter
-  name: "metal-privacy-filter-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-privacy-filter"
-  mirrors:
-    - localai/localai-backends:master-metal-darwin-arm64-privacy-filter
 - !!merge <<: *privacyfilter
  name: "cuda13-privacy-filter"
  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-privacy-filter"
@@ -3258,7 +3220,6 @@
    nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-sam3-cpp-development"
    intel: "intel-sycl-f32-sam3-cpp-development"
    vulkan: "vulkan-sam3-cpp-development"
-    metal: "metal-sam3-cpp-development"
 - !!merge <<: *sam3cpp
  name: "cpu-sam3-cpp"
  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-sam3-cpp"
@@ -3269,16 +3230,6 @@
  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-sam3-cpp"
  mirrors:
    - localai/localai-backends:master-cpu-sam3-cpp
- !!merge <<: *sam3cpp
-  name: "metal-sam3-cpp"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-sam3-cpp"
-  mirrors:
-    - localai/localai-backends:latest-metal-darwin-arm64-sam3-cpp
- !!merge <<: *sam3cpp
-  name: "metal-sam3-cpp-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-sam3-cpp"
-  mirrors:
-    - localai/localai-backends:master-metal-darwin-arm64-sam3-cpp
 - !!merge <<: *sam3cpp
  name: "cuda12-sam3-cpp"
  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-sam3-cpp"
@@ -3352,7 +3303,6 @@
    nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-rfdetr-cpp-development"
    intel: "intel-sycl-f32-rfdetr-cpp-development"
    vulkan: "vulkan-rfdetr-cpp-development"
-    metal: "metal-rfdetr-cpp-development"
 - !!merge <<: *rfdetrcpp
  name: "cpu-rfdetr-cpp"
  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-rfdetr-cpp"
@@ -3363,16 +3313,6 @@
  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-rfdetr-cpp"
  mirrors:
    - localai/localai-backends:master-cpu-rfdetr-cpp
- !!merge <<: *rfdetrcpp
-  name: "metal-rfdetr-cpp"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-rfdetr-cpp"
-  mirrors:
-    - localai/localai-backends:latest-metal-darwin-arm64-rfdetr-cpp
- !!merge <<: *rfdetrcpp
-  name: "metal-rfdetr-cpp-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-rfdetr-cpp"
-  mirrors:
-    - localai/localai-backends:master-metal-darwin-arm64-rfdetr-cpp
 - !!merge <<: *rfdetrcpp
  name: "cuda12-rfdetr-cpp"
  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-rfdetr-cpp"
@@ -4161,16 +4101,6 @@
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-localvqe"
  mirrors:
    - localai/localai-backends:master-gpu-vulkan-localvqe
- !!merge <<: *localvqecpp
-  name: "metal-localvqe"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-localvqe"
-  mirrors:
-    - localai/localai-backends:latest-metal-darwin-arm64-localvqe
- !!merge <<: *localvqecpp
-  name: "metal-localvqe-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-localvqe"
-  mirrors:
-    - localai/localai-backends:master-metal-darwin-arm64-localvqe
 ## kokoro
 - !!merge <<: *kokoro
  name: "kokoro-development"
@@ -4869,6 +4799,7 @@
    intel: "intel-fish-speech-development"
    amd: "rocm-fish-speech-development"
    nvidia-l4t: "nvidia-l4t-fish-speech-development"
+    metal: "metal-fish-speech-development"
    default: "cpu-fish-speech-development"
    nvidia-cuda-13: "cuda13-fish-speech-development"
    nvidia-cuda-12: "cuda12-fish-speech-development"
@@ -4944,6 +4875,16 @@
  uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-fish-speech"
  mirrors:
    - localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-fish-speech
+- !!merge <<: *fish-speech
+  name: "metal-fish-speech"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-fish-speech"
+  mirrors:
+    - localai/localai-backends:latest-metal-darwin-arm64-fish-speech
+- !!merge <<: *fish-speech
+  name: "metal-fish-speech-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-fish-speech"
+  mirrors:
+    - localai/localai-backends:master-metal-darwin-arm64-fish-speech
 ## faster-qwen3-tts
 - !!merge <<: *faster-qwen3-tts
  name: "faster-qwen3-tts-development"
--- a/backend/python/fish-speech/requirements-mps.txt
+++ b/backend/python/fish-speech/requirements-mps.txt
@@ -0,0 +1,2 @@
+torch
+torchaudio
--- a/backend/python/fish-speech/requirements.txt
+++ b/backend/python/fish-speech/requirements.txt
@@ -7,7 +7,3 @@ setuptools
 six
 scipy
 numpy
-# fish-speech is installed editable with --no-build-isolation, so the build
-# backends of its transitive deps must already be in the venv. One of them
-# builds a Rust extension and needs setuptools-rust present at metadata time.
-setuptools-rust
--- a/backend/python/llama-cpp-quantization/install.sh
+++ b/backend/python/llama-cpp-quantization/install.sh
@@ -11,31 +11,14 @@ fi
 EXTRA_PIP_INSTALL_FLAGS+=" --upgrade "
 installRequirements

-# Fetch convert_hf_to_gguf.py from llama.cpp.
-# Upstream split the model-specific logic out of the single file into a
-# sibling `conversion/` package (convert_hf_to_gguf.py now does
-# `from conversion import ...`), so a single-file download no longer runs —
-# it fails with `ModuleNotFoundError: No module named 'conversion'`. We clone
-# the repo and copy both the script and the package; Python puts the script's
-# own directory on sys.path[0], so the package resolves when placed beside it.
+# Fetch convert_hf_to_gguf.py from llama.cpp
 LLAMA_CPP_CONVERT_VERSION="${LLAMA_CPP_CONVERT_VERSION:-master}"
-LLAMA_CPP_SRC="${EDIR}/llama.cpp"
 CONVERT_SCRIPT="${EDIR}/convert_hf_to_gguf.py"
-
-cloneLlamaCpp() {
-    if [ ! -d "${LLAMA_CPP_SRC}/.git" ]; then
-        git clone --depth 1 --branch "${LLAMA_CPP_CONVERT_VERSION}" \
-            https://github.com/ggml-org/llama.cpp.git "${LLAMA_CPP_SRC}" 2>/dev/null || \
-        git clone --depth 1 https://github.com/ggml-org/llama.cpp.git "${LLAMA_CPP_SRC}"
-    fi
-}
-
-if [ ! -f "${CONVERT_SCRIPT}" ] || [ ! -d "${EDIR}/conversion" ]; then
-    echo "Fetching convert_hf_to_gguf.py + conversion/ from llama.cpp (${LLAMA_CPP_CONVERT_VERSION})..."
-    cloneLlamaCpp
-    cp "${LLAMA_CPP_SRC}/convert_hf_to_gguf.py" "${CONVERT_SCRIPT}"
-    rm -rf "${EDIR}/conversion"
-    cp -r "${LLAMA_CPP_SRC}/conversion" "${EDIR}/conversion"
+if [ ! -f "${CONVERT_SCRIPT}" ]; then
+    echo "Downloading convert_hf_to_gguf.py from llama.cpp (${LLAMA_CPP_CONVERT_VERSION})..."
+    curl -L --fail --retry 3 \
+        "https://raw.githubusercontent.com/ggml-org/llama.cpp/${LLAMA_CPP_CONVERT_VERSION}/convert_hf_to_gguf.py" \
+        -o "${CONVERT_SCRIPT}" || echo "Warning: Failed to download convert_hf_to_gguf.py."
 fi

 # Install gguf package from the same llama.cpp commit to keep them in sync
@@ -58,7 +41,12 @@ QUANTIZE_BIN="${EDIR}/llama-quantize"
 if [ ! -x "${QUANTIZE_BIN}" ] && ! command -v llama-quantize &>/dev/null; then
    if command -v cmake &>/dev/null; then
        echo "Building llama-quantize from llama.cpp (${LLAMA_CPP_CONVERT_VERSION})..."
-        cloneLlamaCpp  # reuses the clone fetched for convert_hf_to_gguf.py
+        LLAMA_CPP_SRC="${EDIR}/llama.cpp"
+        if [ ! -d "${LLAMA_CPP_SRC}" ]; then
+            git clone --depth 1 --branch "${LLAMA_CPP_CONVERT_VERSION}" \
+                https://github.com/ggml-org/llama.cpp.git "${LLAMA_CPP_SRC}" 2>/dev/null || \
+            git clone --depth 1 https://github.com/ggml-org/llama.cpp.git "${LLAMA_CPP_SRC}"
+        fi
        cmake -B "${LLAMA_CPP_SRC}/build" -S "${LLAMA_CPP_SRC}" -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF
        cmake --build "${LLAMA_CPP_SRC}/build" --target llama-quantize -j"$(nproc 2>/dev/null || echo 2)"
        cp "${LLAMA_CPP_SRC}/build/bin/llama-quantize" "${QUANTIZE_BIN}"
--- a/backend/python/sglang/install.sh
+++ b/backend/python/sglang/install.sh
@@ -85,15 +85,9 @@ if [ "x${BUILD_TYPE}" == "x" ] || [ "x${FROM_SOURCE:-}" == "xtrue" ]; then
    # The resulting binary still requires an AVX-512 capable CPU at runtime,
    # same constraint sglang upstream documents in docker/xeon.Dockerfile.

-    # Pin the source build to the same release the GPU path floors on
-    # (0.5.11, see requirements-cublas12-after.txt). An unpinned master clone
-    # pulls in newer CPU kernels (e.g. mamba/fla.cpp) that fail to compile
-    # (constexpr non-constant + kineto_LIBRARY-NOTFOUND). Bump deliberately.
-    SGLANG_VERSION="${SGLANG_VERSION:-v0.5.11}"
    _sgl_src=$(mktemp -d)
    trap 'rm -rf "${_sgl_src}"' EXIT
-    git clone --depth 1 --branch "${SGLANG_VERSION}" \
-        https://github.com/sgl-project/sglang "${_sgl_src}/sglang"
+    git clone --depth 1 https://github.com/sgl-project/sglang "${_sgl_src}/sglang"

    # Patch -march=native → -march=sapphirerapids in the CPU kernel CMakeLists
    sed -i 's/-march=native/-march=sapphirerapids/g' \
--- a/backend/python/vllm/requirements-cpu.txt
+++ b/backend/python/vllm/requirements-cpu.txt
@@ -1,6 +1,6 @@
 --extra-index-url https://download.pytorch.org/whl/cpu
 accelerate
-torch==2.9.1+cpu
+torch==2.12.1+xpu
 torchvision
 torchaudio
 transformers
--- a/backend/rust/kokoros/run.sh
+++ b/backend/rust/kokoros/run.sh
@@ -1,23 +1,23 @@
 #!/bin/bash
 set -ex

-CURDIR=$(dirname "$(realpath "$0")")
+CURDIR=$(dirname "$(realpath $0)")

-export LD_LIBRARY_PATH="$CURDIR"/lib:${LD_LIBRARY_PATH:-}
+export LD_LIBRARY_PATH=$CURDIR/lib:${LD_LIBRARY_PATH:-}

 # SSL certificates for model auto-download
 if [ -d "$CURDIR/etc/ssl/certs" ]; then
-    export SSL_CERT_DIR="$CURDIR"/etc/ssl/certs
+    export SSL_CERT_DIR=$CURDIR/etc/ssl/certs
 fi

 # espeak-ng data directory
 if [ -d "$CURDIR/espeak-ng-data" ]; then
-    export ESPEAK_NG_DATA="$CURDIR"/espeak-ng-data
+    export ESPEAK_NG_DATA=$CURDIR/espeak-ng-data
 fi

 # Use bundled ld.so if present (portability)
-if [ -f "$CURDIR"/lib/ld.so ]; then
-    exec "$CURDIR"/lib/ld.so "$CURDIR"/kokoros-grpc "$@"
+if [ -f $CURDIR/lib/ld.so ]; then
+    exec $CURDIR/lib/ld.so $CURDIR/kokoros-grpc "$@"
 fi

-exec "$CURDIR"/kokoros-grpc "$@"
+exec $CURDIR/kokoros-grpc "$@"
--- a/backend/rust/kokoros/src/service.rs
+++ b/backend/rust/kokoros/src/service.rs
@@ -570,43 +570,6 @@ impl Backend for KokorosService {
    ) -> Result<Response<backend::Result>, Status> {
        Err(Status::unimplemented("Not supported"))
    }
-
-    async fn sound_detection(
-        &self,
-        _: Request<backend::SoundDetectionRequest>,
-    ) -> Result<Response<backend::SoundDetectionResponse>, Status> {
-        Err(Status::unimplemented("Not supported"))
-    }
-
-    async fn depth(
-        &self,
-        _: Request<backend::DepthRequest>,
-    ) -> Result<Response<backend::DepthResponse>, Status> {
-        Err(Status::unimplemented("Not supported"))
-    }
-
-    async fn token_classify(
-        &self,
-        _: Request<backend::TokenClassifyRequest>,
-    ) -> Result<Response<backend::TokenClassifyResponse>, Status> {
-        Err(Status::unimplemented("Not supported"))
-    }
-
-    async fn score(
-        &self,
-        _: Request<backend::ScoreRequest>,
-    ) -> Result<Response<backend::ScoreResponse>, Status> {
-        Err(Status::unimplemented("Not supported"))
-    }
-
-    type ForwardStream = ReceiverStream<Result<backend::ForwardReply, Status>>;
-
-    async fn forward(
-        &self,
-        _: Request<tonic::Streaming<backend::ForwardRequest>>,
-    ) -> Result<Response<Self::ForwardStream>, Status> {
-        Err(Status::unimplemented("Not supported"))
-    }
 }

 #[cfg(test)]
--- a/cmd/launcher/FyneApp.toml
+++ b/cmd/launcher/FyneApp.toml
@@ -1,8 +0,0 @@
-Website = "https://localai.io"
-
-[Details]
-Icon = "../../core/http/static/logo.png"
-Name = "LocalAI"
-ID = "com.localai.launcher"
-Version = "0.0.0"
-Build = 1
--- a/contrib/macos/Launcher.entitlements
+++ b/contrib/macos/Launcher.entitlements
@@ -1,14 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
-    <key>com.apple.security.network.client</key>
-    <true/>
-    <key>com.apple.security.network.server</key>
-    <true/>
-    <key>com.apple.security.cs.allow-jit</key>
-    <true/>
-    <key>com.apple.security.cs.allow-unsigned-executable-memory</key>
-    <true/>
-</dict>
-</plist>
--- a/contrib/macos/sign-and-notarize.sh
+++ b/contrib/macos/sign-and-notarize.sh
@@ -1,84 +0,0 @@
-#!/usr/bin/env bash
-# Code-sign and notarize macOS artifacts for LocalAI.
-# Every sub-command is a no-op (exit 0) when its required secret is unset,
-# so unsigned builds (forks, local dev, PRs) keep working.
-set -euo pipefail
-
-ENTITLEMENTS="contrib/macos/Launcher.entitlements"
-KEYCHAIN="localai-ci.keychain-db"
-
-cmd_import_cert() {
-  if [ -z "${MACOS_CERTIFICATE:-}" ]; then
-    echo "[sign] MACOS_CERTIFICATE unset: skipping cert import (unsigned build)"
-    return 0
-  fi
-  local certfile keychain_pwd default_keychain
-  certfile="$(mktemp).p12"
-  keychain_pwd="${MACOS_CI_KEYCHAIN_PWD:?MACOS_CI_KEYCHAIN_PWD required when signing}"
-  echo "$MACOS_CERTIFICATE" | base64 --decode > "$certfile"
-  security create-keychain -p "$keychain_pwd" "$KEYCHAIN"
-  security set-keychain-settings -lut 21600 "$KEYCHAIN"
-  security unlock-keychain -p "$keychain_pwd" "$KEYCHAIN"
-  security import "$certfile" -k "$KEYCHAIN" -P "${MACOS_CERTIFICATE_PWD:?}" \
-    -T /usr/bin/codesign -T /usr/bin/security
-  security set-key-partition-list -S apple-tool:,apple:,codesign: \
-    -s -k "$keychain_pwd" "$KEYCHAIN" >/dev/null
-  default_keychain="$(security default-keychain | tr -d ' "')"
-  security list-keychains -d user -s "$KEYCHAIN" "$default_keychain"
-  rm -f "$certfile"
-  echo "[sign] certificate imported into $KEYCHAIN"
-}
-
-cmd_sign() {
-  local target="$1"
-  if [ -z "${MACOS_SIGN_IDENTITY:-}" ]; then
-    echo "[sign] MACOS_SIGN_IDENTITY unset: skipping codesign of $target"
-    return 0
-  fi
-  case "$target" in
-    *.app)
-      # Hardened runtime + entitlements are required for notarizing the app bundle.
-      codesign --deep --force --options runtime --timestamp \
-        --entitlements "$ENTITLEMENTS" \
-        --sign "$MACOS_SIGN_IDENTITY" "$target"
-      ;;
-    *)
-      # A disk image carries no entitlements/runtime; just sign the container.
-      codesign --force --timestamp --sign "$MACOS_SIGN_IDENTITY" "$target"
-      ;;
-  esac
-  codesign --verify --strict --verbose=2 "$target"
-  echo "[sign] signed $target"
-}
-
-cmd_notarize() {
-  local dmg="$1"
-  if [ -z "${MACOS_NOTARY_KEY:-}" ]; then
-    echo "[notarize] MACOS_NOTARY_KEY unset: skipping notarization of $dmg"
-    return 0
-  fi
-  local keyfile
-  keyfile="$(mktemp).p8"
-  echo "$MACOS_NOTARY_KEY" | base64 --decode > "$keyfile"
-  xcrun notarytool submit "$dmg" \
-    --key "$keyfile" \
-    --key-id "${MACOS_NOTARY_KEY_ID:?}" \
-    --issuer "${MACOS_NOTARY_ISSUER_ID:?}" \
-    --wait
-  rm -f "$keyfile"
-  xcrun stapler staple "$dmg"
-  xcrun stapler validate "$dmg"
-  echo "[notarize] notarized and stapled $dmg"
-}
-
-main() {
-  local sub="${1:-}"; shift || true
-  case "$sub" in
-    import-cert) cmd_import_cert ;;
-    sign)        cmd_sign "$@" ;;
-    notarize)    cmd_notarize "$@" ;;
-    *) echo "usage: $0 {import-cert|sign <path>|notarize <dmg>}" >&2; exit 2 ;;
-  esac
-}
-
-main "$@"
--- a/core/application/agent_jobs.go
+++ b/core/application/agent_jobs.go
@@ -37,8 +37,6 @@ func (a *Application) RestartAgentJobService() error {
 		if d.JobStore != nil {
 			agentJobService.SetDistributedJobStore(d.JobStore)
 		}
-		// Keep agent tasks consistent across replicas (same client the dispatcher uses).
-		agentJobService.SetTaskSyncNATS(d.Nats)
 	}

 	// Start the service
--- a/core/application/application.go
+++ b/core/application/application.go
@@ -604,10 +604,6 @@ func (a *Application) StartAgentPool() {
 			usm.SetJobDBStore(s)
 		}
 	}
-	// Keep per-user agent tasks consistent across replicas (nil in standalone).
-	if d := a.Distributed(); d != nil {
-		usm.SetJobSyncNATS(d.Nats)
-	}
 	aps.SetUserServicesManager(usm)

 	a.agentPoolService.Store(aps)
--- a/core/application/distributed.go
+++ b/core/application/distributed.go
@@ -355,7 +355,6 @@ func initDistributed(cfg *config.ApplicationConfig, authDB *gorm.DB, configLoade
 		PrefixProvider:   prefixProvider,
 		PrefixConfig:     prefixCfg,
 		Pressure:         pressure,
-		SharedModels:     cfg.Distributed.SharedModels,
 	})

 	// Wire staging-progress broadcasting so file-staging shows up on every
--- a/core/application/startup.go
+++ b/core/application/startup.go
@@ -16,7 +16,6 @@ import (
 	"github.com/mudler/LocalAI/core/services/galleryop"
 	"github.com/mudler/LocalAI/core/services/jobs"
 	"github.com/mudler/LocalAI/core/services/messaging"
-	"github.com/mudler/LocalAI/core/services/modeladmin"
 	"github.com/mudler/LocalAI/core/services/monitoring"
 	"github.com/mudler/LocalAI/core/services/nodes"
 	"github.com/mudler/LocalAI/core/services/routing/admission"
@@ -280,9 +279,6 @@ func New(opts ...config.AppOption) (*Application, error) {
 		if application.agentJobService != nil {
 			application.agentJobService.SetDistributedBackends(distSvc.Dispatcher)
 			application.agentJobService.SetDistributedJobStore(distSvc.JobStore)
-			// Keep agent tasks consistent across replicas (jobs already sync via the
-			// dispatcher + DB read-through). Same NATS client the dispatcher uses.
-			application.agentJobService.SetTaskSyncNATS(distSvc.Nats)
 		}
 		// Wire skill store into AgentPoolService (wired at pool start time via closure)
 		// The actual wiring happens in StartAgentPool since the pool doesn't exist yet.
@@ -334,14 +330,9 @@ func New(opts ...config.AppOption) (*Application, error) {
 			gs := application.galleryService
 			sys := options.SystemState
 			cfgLoaderOpts := options.ToConfigLoaderOptions()
-			gs.OnModelsChanged = func(evt messaging.CacheInvalidateEvent) {
-				// ApplyRemoteChange honors the op: a "delete" prunes the element
-				// (a reload-from-path is additive and cannot drop it), anything
-				// else reloads from disk; a named element's running instance is
-				// shut down so the new config takes effect. The originating
-				// replica reloads inline and never depends on this path.
-				if err := modeladmin.ApplyRemoteChange(application.ModelConfigLoader(), application.modelLoader, sys.Model.ModelsPath, evt, cfgLoaderOpts...); err != nil {
-					xlog.Warn("Failed to apply peer model config change", "error", err)
+			gs.OnModelsChanged = func(_ messaging.CacheInvalidateEvent) {
+				if err := application.ModelConfigLoader().LoadModelConfigsFromPath(sys.Model.ModelsPath, cfgLoaderOpts...); err != nil {
+					xlog.Warn("Failed to reload model configs after peer invalidation", "error", err)
 				}
 			}
 			if err := application.galleryService.SubscribeBroadcasts(); err != nil {
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -160,7 +160,6 @@ type RunCMD struct {
 	RegistrationRequireAuth   bool   `env:"LOCALAI_REGISTRATION_REQUIRE_AUTH" default:"false" help:"Fail startup when distributed mode is enabled but LOCALAI_REGISTRATION_TOKEN is empty (node endpoints and worker file-transfer server would otherwise be unauthenticated)" group:"distributed"`
 	DistributedRequireAuth    bool   `env:"LOCALAI_DISTRIBUTED_REQUIRE_AUTH" default:"false" help:"Umbrella switch: require BOTH NATS JWT credentials and a registration token when distributed mode is enabled (implies --nats-require-auth and --registration-require-auth)" group:"distributed"`
 	AutoApproveNodes          bool   `env:"LOCALAI_AUTO_APPROVE_NODES" default:"false" help:"Auto-approve new worker nodes (skip admin approval)" group:"distributed"`
-	DistributedSharedModels   bool   `env:"LOCALAI_DISTRIBUTED_SHARED_MODELS" default:"false" help:"Assert that every node mounts the SAME models directory at the SAME path (shared volume). When true, the router skips staging model files to workers and loads them directly from the shared path, avoiding re-downloads." group:"distributed"`
 	DistributedPrefixCache    bool   `env:"LOCALAI_DISTRIBUTED_PREFIX_CACHE" default:"true" help:"Enable prefix-cache-aware routing in distributed mode (default true). When false, routing falls back to round-robin." group:"distributed"`
 	DistributedPrefixCacheTTL string `env:"LOCALAI_DISTRIBUTED_PREFIX_CACHE_TTL" help:"Idle-timeout for prefix-cache index entries; also drives the background eviction cadence (every TTL/2). Default 5m." group:"distributed"`
 	BackendInstallTimeout     string `env:"LOCALAI_NATS_BACKEND_INSTALL_TIMEOUT" help:"NATS round-trip timeout for backend.install requests sent to worker nodes (default 15m). Increase for slow links pulling multi-GB images." group:"distributed"`
@@ -204,7 +203,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		system.WithBackendImagesReleaseTag(r.BackendImagesReleaseTag),
 		system.WithBackendImagesBranchTag(r.BackendImagesBranchTag),
 		system.WithBackendDevSuffix(r.BackendDevSuffix),
-		system.WithPreferDevelopmentBackends(r.PreferDevelopmentBackends),
 	)
 	if err != nil {
 		return err
@@ -311,9 +309,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 	if r.DistributedRequireAuth {
 		opts = append(opts, config.EnableDistributedRequireAuth)
 	}
-	if r.DistributedSharedModels {
-		opts = append(opts, config.EnableDistributedSharedModels)
-	}
 	if r.NatsAccountSeed != "" {
 		opts = append(opts, config.WithNatsAccountSeed(r.NatsAccountSeed))
 	}
--- a/core/config/defaults.go
+++ b/core/config/defaults.go
@@ -12,12 +12,14 @@ package config
 // these; config never imports backend.
 const (
 	// DefaultContextSize is the fallback context window when none is configured
-	// or estimable from the model. It is also the fallback for a GGUF whose
-	// metadata yields no usable estimate or that the parser cannot read at all
-	// (e.g. a quant type it does not know, such as NVFP4): a model-agnostic
-	// safe default beats a tiny, surprising window that truncates real prompts.
+	// or estimable from the model.
 	DefaultContextSize = 4096

+	// GGUFFallbackContextSize is the context window for a GGUF model whose
+	// metadata yields no usable estimate (see guessGGUFFromFile). Deliberately
+	// smaller than DefaultContextSize to stay conservative on memory there.
+	GGUFFallbackContextSize = 1024
+
 	// DefaultNGPULayers means "offload all layers"; the backend (fit_params)
 	// clamps to what actually fits in device memory.
 	DefaultNGPULayers = 99999999
--- a/core/config/distributed_config.go
+++ b/core/config/distributed_config.go
@@ -31,14 +31,6 @@ type DistributedConfig struct {
 	// available to enforce just one layer.
 	RequireAuth      bool // LOCALAI_DISTRIBUTED_REQUIRE_AUTH
 	AutoApproveNodes bool // --auto-approve-nodes / LOCALAI_AUTO_APPROVE_NODES (skip admin approval for new workers)
-	// SharedModels asserts that every node (frontend and workers) mounts the
-	// SAME models directory at the SAME path (e.g. a shared volume, as in
-	// docker-compose.distributed.yaml). When true, the router skips staging
-	// model files to workers entirely: the frontend's absolute model paths are
-	// already valid on the worker, so re-uploading them into a per-model
-	// subdirectory only re-downloads what is already present (#10556). Default
-	// false preserves the historical per-node staging behavior.
-	SharedModels bool // --distributed-shared-models / LOCALAI_DISTRIBUTED_SHARED_MODELS

 	// NATS JWT auth (optional; see pkg/natsauth and docs/features/distributed-mode.md)
 	NatsAccountSeed  string        // LOCALAI_NATS_ACCOUNT_SEED — account signing seed to mint per-node worker JWTs
@@ -290,13 +282,6 @@ var EnableAutoApproveNodes = func(o *ApplicationConfig) {
 	o.Distributed.AutoApproveNodes = true
 }

-// EnableDistributedSharedModels marks the cluster as sharing one models
-// directory across all nodes, so the router skips staging model files to
-// workers (see DistributedConfig.SharedModels).
-var EnableDistributedSharedModels = func(o *ApplicationConfig) {
-	o.Distributed.SharedModels = true
-}
-
 // DisablePrefixCache turns off prefix-cache-aware routing (falls back to
 // round-robin). Prefix-cache routing is enabled by default in distributed mode.
 var DisablePrefixCache = func(o *ApplicationConfig) {
--- a/core/config/gguf.go
+++ b/core/config/gguf.go
@@ -33,7 +33,7 @@ func guessGGUFFromFile(cfg *ModelConfig, f *gguf.GGUFFile, defaultCtx int) {
 			cSize := int(ctxSize)
 			cfg.ContextSize = &cSize
 		} else {
-			defaultCtx = DefaultContextSize
+			defaultCtx = GGUFFallbackContextSize
 			cfg.ContextSize = &defaultCtx
 		}
 	}
--- a/core/config/hooks_llamacpp.go
+++ b/core/config/hooks_llamacpp.go
@@ -34,7 +34,7 @@ func llamaCppDefaults(cfg *ModelConfig, modelPath string) {
 	// Default context size if not set, regardless of whether GGUF parsing succeeds
 	defer func() {
 		if cfg.ContextSize == nil {
-			ctx := DefaultContextSize
+			ctx := GGUFFallbackContextSize
 			cfg.ContextSize = &ctx
 		}
 	}()
--- a/core/config/hooks_test.go
+++ b/core/config/hooks_test.go
@@ -248,11 +248,7 @@ var _ = Describe("Backend hooks and parser defaults", func() {
 			}
 			cfg.SetDefaults(ModelPath(dir))

-			// An unreadable/unparseable GGUF (e.g. a quant type the parser does
-			// not know, such as NVFP4) yields no estimate, so the hook must fall
-			// back to DefaultContextSize rather than a tiny, surprising value.
 			Expect(cfg.ContextSize).NotTo(BeNil())
-			Expect(*cfg.ContextSize).To(Equal(DefaultContextSize))
 		})
 	})

--- a/core/gallery/backends.go
+++ b/core/gallery/backends.go
@@ -59,22 +59,6 @@ func getFallbackTagValues(systemState *system.SystemState) (latestTag, masterTag
 	return latestTag, masterTag, devSuffix
 }

-// developmentURI returns the development image URI for a released backend URI by
-// swapping the released tag for the branch tag (e.g.
-// latest-metal-darwin-arm64-llama-cpp -> master-metal-darwin-arm64-llama-cpp).
-// The branch image tracks development. ok is false when uri has no released tag
-// to swap or already uses the branch tag.
-func developmentURI(uri, latestTag, masterTag string) (string, bool) {
-	if strings.Contains(uri, masterTag+"-") {
-		return "", false
-	}
-	branchURI := strings.Replace(uri, latestTag+"-", masterTag+"-", 1)
-	if branchURI == uri {
-		return "", false
-	}
-	return branchURI, true
-}
-
 // backendCandidate represents an installed concrete backend option for a given alias
 type backendCandidate struct {
 	name    string
@@ -311,28 +295,15 @@ func InstallBackend(ctx context.Context, systemState *system.SystemState, modelL
 		return fmt.Errorf("backend %q: %w", config.Name, optsErr)
 	}

-	// PreferDevelopmentBackends installs the development image as the primary URI,
-	// keeping the released image reachable as the first fallback — instead of only
-	// reaching development when the released image is missing.
-	primaryURI := string(config.URI)
-	mirrors := config.Mirrors
-	if systemState.PreferDevelopmentBackends {
-		if devURI, ok := developmentURI(string(config.URI), latestTag, masterTag); ok {
-			xlog.Info("PreferDevelopmentBackends: installing development image first", "development", devURI, "released", config.URI)
-			primaryURI = devURI
-			mirrors = append([]string{string(config.URI)}, config.Mirrors...)
-		}
-	}
-
-	uri := downloader.URI(primaryURI)
+	uri := downloader.URI(config.URI)
 	// Check if it is a directory
 	if uri.LooksLikeDir() {
 		// It is a directory, we just copy it over in the backend folder
-		if err := cp.Copy(string(uri), backendPath); err != nil {
+		if err := cp.Copy(config.URI, backendPath); err != nil {
 			return fmt.Errorf("failed copying: %w", err)
 		}
 	} else {
-		xlog.Debug("Downloading backend", "uri", primaryURI, "backendPath", backendPath)
+		xlog.Debug("Downloading backend", "uri", config.URI, "backendPath", backendPath)
 		if err := uri.DownloadFileWithContext(ctx, backendPath, config.SHA256, 1, 1, downloadStatus, downloadOpts...); err != nil {
 			xlog.Debug("Backend download failed, trying fallback", "backendPath", backendPath, "error", err)

@@ -345,9 +316,8 @@ func InstallBackend(ctx context.Context, systemState *system.SystemState, modelL
 			}

 			success := false
-			// Try to download from mirrors (when development is preferred, the
-			// released image is prepended here as the first fallback).
-			for _, mirror := range mirrors {
+			// Try to download from mirrors
+			for _, mirror := range config.Mirrors {
 				// Check for cancellation before trying next mirror
 				select {
 				case <-ctx.Done():
--- a/core/gallery/backends_devuri_test.go
+++ b/core/gallery/backends_devuri_test.go
@@ -1,26 +0,0 @@
-package gallery
-
-import (
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-)
-
-var _ = Describe("developmentURI", func() {
-	const latest, master = "latest", "master"
-
-	It("rewrites a released image to its branch (development) image", func() {
-		got, ok := developmentURI("quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-llama-cpp", latest, master)
-		Expect(ok).To(BeTrue())
-		Expect(got).To(Equal("quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-llama-cpp"))
-	})
-
-	It("leaves an image already on the branch tag untouched", func() {
-		_, ok := developmentURI("quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-llama-cpp", latest, master)
-		Expect(ok).To(BeFalse())
-	})
-
-	It("returns ok=false when there is no released tag to swap", func() {
-		_, ok := developmentURI("oci://localhost/custom-backend:edge", latest, master)
-		Expect(ok).To(BeFalse())
-	})
-})
--- a/core/http/app.go
+++ b/core/http/app.go
@@ -23,10 +23,8 @@ import (

 	"github.com/mudler/LocalAI/core/application"
 	"github.com/mudler/LocalAI/core/schema"
-	"github.com/mudler/LocalAI/core/services/distributed"
 	"github.com/mudler/LocalAI/core/services/finetune"
 	"github.com/mudler/LocalAI/core/services/galleryop"
-	"github.com/mudler/LocalAI/core/services/messaging"
 	"github.com/mudler/LocalAI/core/services/nodes"
 	"github.com/mudler/LocalAI/core/services/quantization"

@@ -402,45 +400,25 @@ func API(application *application.Application) (*echo.Echo, error) {
 	routes.RegisterAgentPoolRoutes(e, application, agentsMw, skillsMw, collectionsMw)
 	// Fine-tuning routes
 	fineTuningMw := auth.RequireFeature(application.AuthDB(), auth.FeatureFineTuning)
-	// In distributed mode pass the shared NATS client + PostgreSQL store so
-	// fine-tune jobs stay consistent across replicas (the SyncedMap broadcasts
-	// mutations and hydrates from the DB); standalone passes nil for both.
-	var ftNats messaging.MessagingClient
-	var ftStore *distributed.FineTuneStore
-	if d := application.Distributed(); d != nil {
-		ftNats = d.Nats
-		if d.DistStores != nil && d.DistStores.FineTune != nil {
-			ftStore = d.DistStores.FineTune
-		}
-	}
 	ftService := finetune.NewFineTuneService(
 		application.ApplicationConfig(),
 		application.ModelLoader(),
 		application.ModelConfigLoader(),
-		ftNats,
-		ftStore,
 	)
+	if d := application.Distributed(); d != nil {
+		ftService.SetNATSClient(d.Nats)
+		if d.DistStores != nil && d.DistStores.FineTune != nil {
+			ftService.SetFineTuneStore(d.DistStores.FineTune)
+		}
+	}
 	routes.RegisterFineTuningRoutes(e, ftService, application.ApplicationConfig(), fineTuningMw)

 	// Quantization routes
 	quantizationMw := auth.RequireFeature(application.AuthDB(), auth.FeatureQuantization)
-	// In distributed mode pass the shared NATS client + PostgreSQL store so
-	// quantization jobs stay consistent across replicas (the SyncedMap broadcasts
-	// mutations and hydrates from the DB); standalone passes nil for both.
-	var quantNats messaging.MessagingClient
-	var quantStore *distributed.QuantStore
-	if d := application.Distributed(); d != nil {
-		quantNats = d.Nats
-		if d.DistStores != nil && d.DistStores.Quant != nil {
-			quantStore = d.DistStores.Quant
-		}
-	}
 	qService := quantization.NewQuantizationService(
 		application.ApplicationConfig(),
 		application.ModelLoader(),
 		application.ModelConfigLoader(),
-		quantNats,
-		quantStore,
 	)
 	routes.RegisterQuantizationRoutes(e, qService, application.ApplicationConfig(), quantizationMw)

--- a/core/http/auth/db_sqlite.go
+++ b/core/http/auth/db_sqlite.go
@@ -3,51 +3,10 @@
 package auth

 import (
-	"net/url"
-	"strings"
-
 	"gorm.io/driver/sqlite"
 	"gorm.io/gorm"
 )

 func openSQLiteDialector(path string) (gorm.Dialector, error) {
-	return sqlite.Open(buildSQLiteDSN(path)), nil
-}
-
-// buildSQLiteDSN augments a SQLite file path with connection pragmas that make
-// the auth DB resilient on slow or contended storage.
-//
-//   - _busy_timeout=5000 makes SQLite retry for up to 5s on SQLITE_BUSY instead
-//     of failing immediately. Network-backed storage (SMB/CIFS/NFS, e.g. Azure
-//     Files) is prone to transient lock contention during migration (see #10506).
-//   - _txlock=immediate takes the write lock at BEGIN, avoiding deadlocks when a
-//     read transaction later upgrades to a write during AutoMigrate.
-//
-// We deliberately do NOT set WAL journal mode: WAL relies on a shared-memory
-// mmap that does not work over SMB/NFS, which is exactly the failing case here.
-//
-// Caller-supplied values for either pragma are preserved.
-func buildSQLiteDSN(path string) string {
-	base := path
-	rawQuery := ""
-	if i := strings.IndexByte(path, '?'); i >= 0 {
-		base = path[:i]
-		rawQuery = path[i+1:]
-	}
-
-	values, err := url.ParseQuery(rawQuery)
-	if err != nil {
-		// An unparseable query string means a hand-crafted DSN we should not
-		// risk corrupting; leave it untouched.
-		return path
-	}
-
-	if values.Get("_busy_timeout") == "" {
-		values.Set("_busy_timeout", "5000")
-	}
-	if values.Get("_txlock") == "" {
-		values.Set("_txlock", "immediate")
-	}
-
-	return base + "?" + values.Encode()
+	return sqlite.Open(path), nil
 }
--- a/core/http/auth/db_sqlite_test.go
+++ b/core/http/auth/db_sqlite_test.go
@@ -1,57 +0,0 @@
-//go:build auth
-
-package auth
-
-import (
-	"net/url"
-	"strings"
-
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-)
-
-// parseDSN splits a "base?query" DSN into its base and decoded query values so
-// assertions don't depend on url.Values.Encode()'s key ordering.
-func parseDSN(dsn string) (string, url.Values) {
-	base := dsn
-	rawQuery := ""
-	if i := strings.IndexByte(dsn, '?'); i >= 0 {
-		base = dsn[:i]
-		rawQuery = dsn[i+1:]
-	}
-	values, err := url.ParseQuery(rawQuery)
-	Expect(err).ToNot(HaveOccurred())
-	return base, values
-}
-
-var _ = Describe("buildSQLiteDSN", func() {
-	It("adds busy_timeout and txlock to a plain file path", func() {
-		base, values := parseDSN(buildSQLiteDSN("/data/database.db"))
-		Expect(base).To(Equal("/data/database.db"))
-		Expect(values.Get("_busy_timeout")).To(Equal("5000"))
-		Expect(values.Get("_txlock")).To(Equal("immediate"))
-	})
-
-	It("adds pragmas to an in-memory database", func() {
-		base, values := parseDSN(buildSQLiteDSN(":memory:"))
-		Expect(base).To(Equal(":memory:"))
-		Expect(values.Get("_busy_timeout")).To(Equal("5000"))
-		Expect(values.Get("_txlock")).To(Equal("immediate"))
-	})
-
-	It("preserves an existing query string", func() {
-		base, values := parseDSN(buildSQLiteDSN("/data/database.db?cache=shared"))
-		Expect(base).To(Equal("/data/database.db"))
-		Expect(values.Get("cache")).To(Equal("shared"))
-		Expect(values.Get("_busy_timeout")).To(Equal("5000"))
-		Expect(values.Get("_txlock")).To(Equal("immediate"))
-	})
-
-	It("does not override a caller-supplied busy_timeout or txlock", func() {
-		_, values := parseDSN(buildSQLiteDSN("/data/database.db?_busy_timeout=1000&_txlock=deferred"))
-		Expect(values["_busy_timeout"]).To(HaveLen(1), "_busy_timeout should not be duplicated")
-		Expect(values.Get("_busy_timeout")).To(Equal("1000"))
-		Expect(values["_txlock"]).To(HaveLen(1), "_txlock should not be duplicated")
-		Expect(values.Get("_txlock")).To(Equal("deferred"))
-	})
-})
--- a/core/http/endpoints/localai/config_meta.go
+++ b/core/http/endpoints/localai/config_meta.go
@@ -155,7 +155,7 @@ func AutocompleteEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, a
 // @Param name path string true "Model name"
 // @Success 200 {object} map[string]any "success message"
 // @Router /api/models/config-json/{name} [patch]
-func PatchConfigEndpoint(cl *config.ModelConfigLoader, _ *model.ModelLoader, gs *galleryop.GalleryService, appConfig *config.ApplicationConfig) echo.HandlerFunc {
+func PatchConfigEndpoint(cl *config.ModelConfigLoader, _ *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
 	svc := modeladmin.NewConfigService(cl, appConfig)
 	return func(c echo.Context) error {
 		modelName := c.Param("name")
@@ -173,14 +173,6 @@ func PatchConfigEndpoint(cl *config.ModelConfigLoader, _ *model.ModelLoader, gs
 		if _, err := svc.PatchConfig(c.Request().Context(), modelName, patchMap); err != nil {
 			return c.JSON(httpStatusForModelAdminError(err), map[string]any{"error": err.Error()})
 		}
-
-		// Patch rewrites the config on disk and reloads only the local loader;
-		// tell peers to refresh so the change is consistent across replicas.
-		// No-op in standalone mode.
-		if gs != nil {
-			gs.BroadcastModelsChanged(modelName, "install")
-		}
-
 		return c.JSON(http.StatusOK, map[string]any{
 			"success": true,
 			"message": fmt.Sprintf("Model '%s' updated successfully", modelName),
--- a/core/http/endpoints/localai/config_meta_test.go
+++ b/core/http/endpoints/localai/config_meta_test.go
@@ -45,7 +45,7 @@ var _ = Describe("Config Metadata Endpoints", func() {
 		app = echo.New()
 		app.GET("/api/models/config-metadata", ConfigMetadataEndpoint())
 		app.GET("/api/models/config-metadata/autocomplete/:provider", AutocompleteEndpoint(configLoader, modelLoader, appConfig))
-		app.PATCH("/api/models/config-json/:name", PatchConfigEndpoint(configLoader, modelLoader, nil, appConfig))
+		app.PATCH("/api/models/config-json/:name", PatchConfigEndpoint(configLoader, modelLoader, appConfig))
 	})

 	AfterEach(func() {
--- a/core/http/endpoints/localai/edit_model.go
+++ b/core/http/endpoints/localai/edit_model.go
@@ -10,7 +10,6 @@ import (
 	"github.com/labstack/echo/v4"
 	"github.com/mudler/LocalAI/core/config"
 	httpUtils "github.com/mudler/LocalAI/core/http/middleware"
-	"github.com/mudler/LocalAI/core/services/galleryop"
 	"github.com/mudler/LocalAI/core/services/modeladmin"
 	"github.com/mudler/LocalAI/internal"
 	"github.com/mudler/LocalAI/pkg/model"
@@ -56,7 +55,7 @@ func GetEditModelPage(cl *config.ModelConfigLoader, appConfig *config.Applicatio
 }

 // EditModelEndpoint handles updating existing model configurations
-func EditModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, gs *galleryop.GalleryService, appConfig *config.ApplicationConfig) echo.HandlerFunc {
+func EditModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
 	svc := modeladmin.NewConfigService(cl, appConfig)
 	return func(c echo.Context) error {
 		modelName := c.Param("name")
@@ -71,17 +70,6 @@ func EditModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, gs *
 		if err != nil {
 			return c.JSON(httpStatusForModelAdminError(err), ModelResponse{Success: false, Error: err.Error()})
 		}
-
-		// Tell peer replicas to refresh their in-memory config: this endpoint
-		// only reloaded the local loader. A rename is a delete of the old name
-		// plus an install of the new one. No-op in standalone mode.
-		if gs != nil {
-			if result.Renamed {
-				gs.BroadcastModelsChanged(result.OldName, "delete")
-			}
-			gs.BroadcastModelsChanged(result.NewName, "install")
-		}
-
 		msg := fmt.Sprintf("Model '%s' updated successfully. Model has been reloaded with new configuration.", result.NewName)
 		if result.Renamed {
 			msg = fmt.Sprintf("Model '%s' renamed to '%s' and updated successfully.", result.OldName, result.NewName)
--- a/core/http/endpoints/localai/edit_model_test.go
+++ b/core/http/endpoints/localai/edit_model_test.go
@@ -56,7 +56,7 @@ var _ = Describe("Edit Model test", func() {
 			app := echo.New()
 			// Set up a simple renderer for the test
 			app.Renderer = &testRenderer{}
-			app.POST("/import-model", ImportModelEndpoint(modelConfigLoader, nil, applicationConfig))
+			app.POST("/import-model", ImportModelEndpoint(modelConfigLoader, applicationConfig))
 			app.GET("/edit-model/:name", GetEditModelPage(modelConfigLoader, applicationConfig))

 			requestBody := bytes.NewBufferString(`{"name": "foo", "backend": "foo", "model": "foo"}`)
@@ -106,7 +106,7 @@ var _ = Describe("Edit Model test", func() {
 			Expect(exists).To(BeTrue())

 			app := echo.New()
-			app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, nil, applicationConfig))
+			app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, applicationConfig))

 			newYAML := "name: newname\nbackend: llama\nmodel: foo\n"
 			req := httptest.NewRequest("POST", "/models/edit/oldname", bytes.NewBufferString(newYAML))
@@ -163,7 +163,7 @@ var _ = Describe("Edit Model test", func() {
 			Expect(modelConfigLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())

 			app := echo.New()
-			app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, nil, applicationConfig))
+			app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, applicationConfig))

 			req := httptest.NewRequest(
 				"POST",
@@ -204,7 +204,7 @@ var _ = Describe("Edit Model test", func() {
 			Expect(modelConfigLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())

 			app := echo.New()
-			app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, nil, applicationConfig))
+			app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, applicationConfig))

 			req := httptest.NewRequest(
 				"POST",
--- a/core/http/endpoints/localai/import_model.go
+++ b/core/http/endpoints/localai/import_model.go
@@ -125,7 +125,7 @@ func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.Appl
 }

 // ImportModelEndpoint handles creating new model configurations
-func ImportModelEndpoint(cl *config.ModelConfigLoader, gs *galleryop.GalleryService, appConfig *config.ApplicationConfig) echo.HandlerFunc {
+func ImportModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		// Get the raw body
 		body, err := io.ReadAll(c.Request().Body)
@@ -245,13 +245,6 @@ func ImportModelEndpoint(cl *config.ModelConfigLoader, gs *galleryop.GalleryServ
 			}
 			return c.JSON(http.StatusInternalServerError, response)
 		}
-		// Tell peer replicas to load the newly-created config from the shared
-		// models dir: this endpoint only reloaded the local loader. No-op in
-		// standalone mode.
-		if gs != nil {
-			gs.BroadcastModelsChanged(modelConfig.Name, "install")
-		}
-
 		// Return success response
 		response := ModelResponse{
 			Success:  true,
--- a/core/http/endpoints/localai/nodes.go
+++ b/core/http/endpoints/localai/nodes.go
@@ -60,10 +60,7 @@ func GetNodeEndpoint(registry *nodes.NodeRegistry) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		ctx := c.Request().Context()
 		id := c.Param("id")
-		// GetWithExtras (not Get) so the response carries the node's labels,
-		// loaded-model count, and in-flight total — the bare BackendNode keeps
-		// labels in a separate table, leaving the detail view's label list empty.
-		node, err := registry.GetWithExtras(ctx, id)
+		node, err := registry.Get(ctx, id)
 		if err != nil {
 			return c.JSON(http.StatusNotFound, nodeError(http.StatusNotFound, "node not found"))
 		}
--- a/core/http/endpoints/localai/toggle_model.go
+++ b/core/http/endpoints/localai/toggle_model.go
@@ -7,7 +7,6 @@ import (

 	"github.com/labstack/echo/v4"
 	"github.com/mudler/LocalAI/core/config"
-	"github.com/mudler/LocalAI/core/services/galleryop"
 	"github.com/mudler/LocalAI/core/services/modeladmin"
 	"github.com/mudler/LocalAI/pkg/model"
 )
@@ -25,7 +24,7 @@ import (
 // @Failure      404  {object}  ModelResponse
 // @Failure      500  {object}  ModelResponse
 // @Router       /api/models/{name}/{action} [put]
-func ToggleStateModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, gs *galleryop.GalleryService, appConfig *config.ApplicationConfig) echo.HandlerFunc {
+func ToggleStateModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
 	svc := modeladmin.NewConfigService(cl, appConfig)
 	return func(c echo.Context) error {
 		modelName := c.Param("name")
@@ -37,14 +36,6 @@ func ToggleStateModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoade
 		if err != nil {
 			return c.JSON(httpStatusForModelAdminError(err), ModelResponse{Success: false, Error: err.Error()})
 		}
-
-		// Enabling/disabling rewrites the config on disk and reloads only the
-		// local loader; tell peers to refresh so the model's availability is
-		// consistent across replicas. No-op in standalone mode.
-		if gs != nil {
-			gs.BroadcastModelsChanged(modelName, "install")
-		}
-
 		msg := fmt.Sprintf("Model '%s' has been %sd successfully.", modelName, action)
 		if action == modeladmin.ActionDisable {
 			msg += " The model will not be loaded on demand until re-enabled."
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -72,19 +72,19 @@ func RegisterLocalAIRoutes(router *echo.Echo,
 		router.POST("/backends/upgrades/check", backendGalleryEndpointService.CheckUpgradesEndpoint(), adminMiddleware)
 		router.POST("/backends/upgrade/:name", backendGalleryEndpointService.UpgradeBackendEndpoint(), adminMiddleware)
 		// Custom model import endpoint
-		router.POST("/models/import", localai.ImportModelEndpoint(cl, galleryService, appConfig), adminMiddleware)
+		router.POST("/models/import", localai.ImportModelEndpoint(cl, appConfig), adminMiddleware)

 		// URI model import endpoint
 		router.POST("/models/import-uri", localai.ImportModelURIEndpoint(cl, appConfig, galleryService, opcache), adminMiddleware)

 		// Custom model edit endpoint
-		router.POST("/models/edit/:name", localai.EditModelEndpoint(cl, ml, galleryService, appConfig), adminMiddleware)
+		router.POST("/models/edit/:name", localai.EditModelEndpoint(cl, ml, appConfig), adminMiddleware)

 		// List model aliases endpoint
 		router.GET("/api/aliases", localai.ListAliasesEndpoint(cl), adminMiddleware)

 		// Toggle model enable/disable endpoint
-		router.PUT("/models/toggle-state/:name/:action", localai.ToggleStateModelEndpoint(cl, ml, galleryService, appConfig), adminMiddleware)
+		router.PUT("/models/toggle-state/:name/:action", localai.ToggleStateModelEndpoint(cl, ml, appConfig), adminMiddleware)

 		// Toggle model pinned status endpoint
 		router.PUT("/models/toggle-pinned/:name/:action", localai.TogglePinnedModelEndpoint(cl, appConfig, func() {
--- a/core/http/routes/ui_api.go
+++ b/core/http/routes/ui_api.go
@@ -922,7 +922,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
 	app.GET("/api/models/config-metadata/autocomplete/:provider", localai.AutocompleteEndpoint(cl, ml, appConfig), adminMiddleware)

 	// PATCH config endpoint - partial update using nested JSON merge
-	app.PATCH("/api/models/config-json/:name", localai.PatchConfigEndpoint(cl, ml, galleryService, appConfig), adminMiddleware)
+	app.PATCH("/api/models/config-json/:name", localai.PatchConfigEndpoint(cl, ml, appConfig), adminMiddleware)

 	// VRAM estimation endpoint
 	app.POST("/api/models/vram-estimate", localai.VRAMEstimateEndpoint(cl, appConfig), adminMiddleware)
--- a/core/schema/message_test.go
+++ b/core/schema/message_test.go
@@ -68,32 +68,6 @@ var _ = Describe("LLM tests", func() {
 			Expect(protoMessages[0].Content).To(Equal("Hello World"))
 		})

-		// Regression for mudler/LocalAI#10524: a text part whose inner text is
-		// itself a JSON-array string (mealie sends an ingredient list) must
-		// flatten to that exact string verbatim. ToProto must NOT escape or
-		// restructure it - the C++ backend then treats it as opaque text. This
-		// pins the precise Go-side input that produced the "unsupported
-		// content[].type" gRPC error before the backend stopped re-parsing it.
-		It("flattens a JSON-array-looking text part to the verbatim string (#10524)", func() {
-			ingredients := `["1/4 cup brown sugar, packed","1 pound ground beef"]`
-			messages := Messages{
-				{
-					Role: "user",
-					Content: []any{
-						map[string]any{
-							"type": "text",
-							"text": ingredients,
-						},
-					},
-				},
-			}
-
-			protoMessages := messages.ToProto()
-
-			Expect(protoMessages).To(HaveLen(1))
-			Expect(protoMessages[0].Content).To(Equal(ingredients))
-		})
-
 		It("should convert message with tool_calls", func() {
 			messages := Messages{
 				{
--- a/core/services/advisorylock/advisorylock.go
+++ b/core/services/advisorylock/advisorylock.go
@@ -4,59 +4,14 @@ import (
 	"context"
 	"fmt"
 	"hash/fnv"
-	"strings"
-	"sync"

 	"gorm.io/gorm"
 )

-// localLocks holds one buffered channel (capacity 1) per lock key, used as an
-// in-process mutex for non-PostgreSQL dialects (SQLite). A SQLite auth DB is
-// effectively single-process, so serializing guarded sections within this
-// process is sufficient - we cannot and need not coordinate across processes
-// the way a PostgreSQL advisory lock does.
-var (
-	localLocksMu sync.Mutex
-	localLocks   = map[int64]chan struct{}{}
-)
-
-// localLockChan returns the per-key buffered channel, creating it on first use.
-func localLockChan(key int64) chan struct{} {
-	localLocksMu.Lock()
-	defer localLocksMu.Unlock()
-	ch, ok := localLocks[key]
-	if !ok {
-		ch = make(chan struct{}, 1)
-		localLocks[key] = ch
-	}
-	return ch
-}
-
-// isPostgres reports whether the gorm dialect is PostgreSQL. Anything else
-// (SQLite and any non-postgres dialect) uses the in-process fallback, because
-// the pg_* advisory lock functions only exist on PostgreSQL.
-func isPostgres(db *gorm.DB) bool {
-	return strings.Contains(db.Dialector.Name(), "postgres")
-}
-
-// TryWithLockCtx attempts to acquire a lock and run fn without blocking.
-// Returns (true, nil) if the lock was acquired and fn executed, (false, nil) if
-// the lock was already held, or (false, error) on failure.
-//
-// On PostgreSQL it uses pg_try_advisory_lock (cross-process). On other dialects
-// (SQLite) it uses a non-blocking in-process lock keyed by key.
+// TryWithLockCtx attempts to acquire a PostgreSQL advisory lock using the provided context.
+// Returns (true, nil) if the lock was acquired and fn executed, (false, nil) if the lock
+// was already held, or (false, error) on failure.
 func TryWithLockCtx(ctx context.Context, db *gorm.DB, key int64, fn func() error) (bool, error) {
-	if !isPostgres(db) {
-		ch := localLockChan(key)
-		select {
-		case ch <- struct{}{}:
-			defer func() { <-ch }()
-			return true, fn()
-		default:
-			return false, nil
-		}
-	}
-
 	sqlDB, err := db.DB()
 	if err != nil {
 		return false, fmt.Errorf("get sql.DB: %w", err)
@@ -95,31 +50,9 @@ func KeyFromString(s string) int64 {
 	return int64(h.Sum64()>>1) | 0x100000000
 }

-// WithLockCtx acquires a lock for key, runs fn, then releases it, respecting
-// context cancellation. If ctx is cancelled while waiting for the lock, the
-// function returns ctx.Err().
-//
-// On PostgreSQL it uses pg_advisory_lock (cross-process). On other dialects
-// (SQLite) it falls back to a blocking in-process lock keyed by key, which is
-// sufficient because a SQLite auth DB is effectively single-process.
+// WithLockCtx is like WithLock but respects context cancellation.
+// If ctx is cancelled while waiting for the lock, the function returns ctx.Err().
 func WithLockCtx(ctx context.Context, db *gorm.DB, key int64, fn func() error) error {
-	if !isPostgres(db) {
-		// Honor an already-cancelled context before attempting acquisition:
-		// select picks a ready case at random, so without this an already-free
-		// lock could be taken despite a cancelled ctx.
-		if err := ctx.Err(); err != nil {
-			return err
-		}
-		ch := localLockChan(key)
-		select {
-		case ch <- struct{}{}:
-			defer func() { <-ch }()
-			return fn()
-		case <-ctx.Done():
-			return ctx.Err()
-		}
-	}
-
 	sqlDB, err := db.DB()
 	if err != nil {
 		return fmt.Errorf("advisorylock: getting sql.DB: %w", err)
--- a/core/services/advisorylock/advisorylock_sqlite_test.go
+++ b/core/services/advisorylock/advisorylock_sqlite_test.go
@@ -1,129 +0,0 @@
-package advisorylock
-
-import (
-	"context"
-	"sync"
-	"sync/atomic"
-	"time"
-
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-
-	"gorm.io/driver/sqlite"
-	"gorm.io/gorm"
-)
-
-// These specs run against an in-memory SQLite DB and therefore do NOT require
-// Docker, unlike the PostgreSQL testcontainer specs.
-var _ = Describe("AdvisoryLock (SQLite fallback)", Label("sqlite"), func() {
-	var db *gorm.DB
-
-	BeforeEach(func() {
-		var err error
-		db, err = gorm.Open(sqlite.Open("file::memory:?cache=shared"), &gorm.Config{})
-		Expect(err).ToNot(HaveOccurred())
-		Expect(db.Dialector.Name()).To(ContainSubstring("sqlite"))
-	})
-
-	It("WithLockCtx executes fn and returns no error on SQLite", func() {
-		const lockKey int64 = 12001
-		executed := false
-
-		err := WithLockCtx(context.Background(), db, lockKey, func() error {
-			executed = true
-			return nil
-		})
-		Expect(err).ToNot(HaveOccurred())
-		Expect(executed).To(BeTrue(), "function should have run under the in-process lock")
-	})
-
-	It("WithLockCtx serializes concurrent goroutines on the same key", func() {
-		const lockKey int64 = 12002
-
-		var (
-			mu          sync.Mutex
-			maxRunning  int32
-			running     int32
-			concurrency int32
-		)
-
-		var wg sync.WaitGroup
-
-		for range 2 {
-			wg.Go(func() {
-				defer GinkgoRecover()
-				err := WithLockCtx(context.Background(), db, lockKey, func() error {
-					cur := atomic.AddInt32(&running, 1)
-					mu.Lock()
-					if cur > maxRunning {
-						maxRunning = cur
-					}
-					if cur > 1 {
-						atomic.AddInt32(&concurrency, 1)
-					}
-					mu.Unlock()
-
-					time.Sleep(50 * time.Millisecond)
-
-					atomic.AddInt32(&running, -1)
-					return nil
-				})
-				Expect(err).ToNot(HaveOccurred())
-			})
-		}
-
-		wg.Wait()
-
-		Expect(maxRunning).To(BeNumerically("<=", 1), "expected max 1 goroutine inside lock at a time")
-		Expect(concurrency).To(BeZero(), "detected concurrent execution inside advisory lock")
-	})
-
-	It("WithLockCtx returns an error and does not run fn with an already-cancelled context", func() {
-		const lockKey int64 = 12003
-		ctx, cancel := context.WithCancel(context.Background())
-		cancel()
-
-		err := WithLockCtx(ctx, db, lockKey, func() error {
-			Fail("function should not run with a cancelled context")
-			return nil
-		})
-		Expect(err).To(HaveOccurred())
-	})
-
-	It("TryWithLockCtx returns (true, nil) when free and (false, nil) when held", func() {
-		const lockKey int64 = 12004
-
-		acquired, err := TryWithLockCtx(context.Background(), db, lockKey, func() error {
-			return nil
-		})
-		Expect(err).ToNot(HaveOccurred())
-		Expect(acquired).To(BeTrue(), "expected TryWithLockCtx to acquire the free lock")
-
-		// Hold the lock in one goroutine while a concurrent TryWithLockCtx
-		// attempts to acquire the same key.
-		held := make(chan struct{})
-		release := make(chan struct{})
-		var wg sync.WaitGroup
-		wg.Go(func() {
-			defer GinkgoRecover()
-			ok, err := TryWithLockCtx(context.Background(), db, lockKey, func() error {
-				close(held)
-				<-release
-				return nil
-			})
-			Expect(err).ToNot(HaveOccurred())
-			Expect(ok).To(BeTrue())
-		})
-
-		<-held
-		ok, err := TryWithLockCtx(context.Background(), db, lockKey, func() error {
-			Fail("function should not run while lock is held")
-			return nil
-		})
-		Expect(err).ToNot(HaveOccurred())
-		Expect(ok).To(BeFalse(), "expected TryWithLockCtx to fail to acquire a held lock")
-
-		close(release)
-		wg.Wait()
-	})
-})
--- a/core/services/agentpool/agent_jobs.go
+++ b/core/services/agentpool/agent_jobs.go
@@ -30,8 +30,6 @@ import (
 	mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/core/services/jobs"
-	"github.com/mudler/LocalAI/core/services/messaging"
-	"github.com/mudler/LocalAI/core/services/syncstate"
 	"github.com/mudler/LocalAI/core/templates"
 	"github.com/mudler/LocalAI/pkg/httpclient"
 	"github.com/mudler/LocalAI/pkg/model"
@@ -45,18 +43,8 @@ type AgentJobService struct {
 	configLoader *config.ModelConfigLoader
 	evaluator    *templates.Evaluator

-	// tasks is the cross-replica task store: an in-memory map kept consistent
-	// across replicas via NATS, with read-through to the configured persister
-	// (file in standalone, PostgreSQL in distributed). Unlike jobs - which already
-	// converge via the dispatcher + DB read-through - tasks previously read
-	// in-memory only, so ListTasks went stale on non-originating replicas.
-	tasks *syncstate.SyncedMap[string, schema.Task]
-	// taskNats is the distributed NATS client backing the tasks SyncedMap. It is
-	// not available at construction time, so it is injected via SetTaskSyncNATS
-	// during distributed wiring; nil keeps tasks in-memory-only (standalone).
-	taskNats messaging.MessagingClient
-
 	// Storage (in-memory primary, persister for secondary persistence)
+	tasks     *xsync.SyncedMap[string, schema.Task]
 	jobs      *xsync.SyncedMap[string, schema.Job]
 	persister JobPersister
 	userID    string // Scoping: empty for global (main service), set for per-user instances
@@ -108,31 +96,6 @@ func (s *AgentJobService) SetDistributedJobStore(store *jobs.JobStore) {
 	s.persister = &dbJobPersister{store: store}
 }

-// SetTaskSyncNATS wires the distributed NATS client used to keep agent *tasks*
-// consistent across replicas (jobs already converge via the dispatcher + DB
-// read-through, so they are left untouched). The client is not available when the
-// service is constructed, so it is injected here during distributed wiring and the
-// tasks SyncedMap is rebuilt to pick it up. It is always called before Start /
-// hydrate, while the map is still empty, so rebuilding loses no state. Passing nil
-// (standalone) keeps the map in-memory-only with no broadcast.
-func (s *AgentJobService) SetTaskSyncNATS(nats messaging.MessagingClient) {
-	s.taskNats = nats
-	s.buildTasksMap()
-}
-
-// buildTasksMap (re)constructs the cross-replica tasks SyncedMap from the current
-// taskNats. The Store adapter reads s.persister/s.userID live, so a persister swap
-// (SetDistributedJobStore) needs no rebuild; only the NATS client, fixed at
-// New-time, forces one - hence SetTaskSyncNATS calls this.
-func (s *AgentJobService) buildTasksMap() {
-	s.tasks = syncstate.New(syncstate.Config[string, schema.Task]{
-		Name:  "agent.tasks",
-		Key:   func(t schema.Task) string { return t.ID },
-		Nats:  s.taskNats,
-		Store: &taskStoreAdapter{svc: s},
-	})
-}
-
 // Dispatcher returns the distributed dispatcher (nil if not in distributed mode).
 func (s *AgentJobService) Dispatcher() DistributedDispatcher {
 	return s.dispatcher
@@ -143,6 +106,13 @@ func (s *AgentJobService) DBStore() *jobs.JobStore {
 	return s.rawDBStore
 }

+// saveTasks persists tasks via the configured persister (file or DB).
+func (s *AgentJobService) saveTasks(task schema.Task) {
+	if err := s.persister.SaveTask(s.userID, task); err != nil {
+		xlog.Warn("Failed to persist task", "error", err, "task_id", task.ID)
+	}
+}
+
 // saveJobs persists jobs via the configured persister (file or DB).
 func (s *AgentJobService) saveJobs(job schema.Job) {
 	if err := s.persister.SaveJob(s.userID, job); err != nil {
@@ -159,8 +129,18 @@ func (s *AgentJobService) LoadFromDB() {

 // loadFromPersister loads tasks and jobs from the configured persister into memory.
 func (s *AgentJobService) loadFromPersister() {
-	if err := s.hydrateTasks(s.appConfig.Context); err != nil {
+	if tasks, err := s.persister.LoadTasks(s.userID); err != nil {
 		xlog.Warn("Failed to load tasks from persister", "error", err)
+	} else {
+		for _, task := range tasks {
+			s.tasks.Set(task.ID, task)
+			if task.Enabled && task.Cron != "" {
+				if err := s.ScheduleCronTask(task); err != nil {
+					xlog.Warn("Failed to schedule cron task on load", "error", err, "task_id", task.ID)
+				}
+			}
+		}
+		xlog.Info("Loaded tasks from persister", "count", len(tasks))
 	}

 	if loadedJobs, err := s.persister.LoadJobs(s.userID); err != nil {
@@ -173,27 +153,6 @@ func (s *AgentJobService) loadFromPersister() {
 	}
 }

-// hydrateTasks loads tasks into the cross-replica SyncedMap and (re)schedules
-// cron entries for enabled tasks. Hydration goes through the SyncedMap's Store
-// read-through (Start), not Set, so it neither re-persists nor re-broadcasts the
-// loaded tasks. Each service instance hydrates exactly once: the main service via
-// Start -> loadFromPersister, per-user services via LoadFromDB or LoadTasksFromFile.
-func (s *AgentJobService) hydrateTasks(ctx context.Context) error {
-	if err := s.tasks.Start(ctx); err != nil {
-		return err
-	}
-	tasks := s.tasks.List()
-	for _, task := range tasks {
-		if task.Enabled && task.Cron != "" {
-			if err := s.ScheduleCronTask(task); err != nil {
-				xlog.Warn("Failed to schedule cron task on load", "error", err, "task_id", task.ID)
-			}
-		}
-	}
-	xlog.Info("Loaded tasks from persister", "count", len(tasks))
-	return nil
-}
-
 // JobExecution represents a job to be executed
 type JobExecution struct {
 	Job    schema.Job
@@ -241,19 +200,21 @@ func NewAgentJobServiceWithPaths(
 ) *AgentJobService {
 	retentionDays := cmp.Or(appConfig.AgentJobRetentionDays, 30)

+	tasks := xsync.NewSyncedMap[string, schema.Task]()
 	jobsMap := xsync.NewSyncedMap[string, schema.Job]()

-	s := &AgentJobService{
+	return &AgentJobService{
 		appConfig:    appConfig,
 		modelLoader:  modelLoader,
 		configLoader: configLoader,
 		evaluator:    evaluator,
+		tasks:        tasks,
 		jobs:         jobsMap,
 		persister: &fileJobPersister{
+			tasks:     tasks,
 			jobs:      jobsMap,
 			tasksFile: tasksFile,
 			jobsFile:  jobsFile,
-			taskSet:   make(map[string]schema.Task),
 		},
 		jobQueue:      make(chan JobExecution, 100), // Buffer for 100 jobs
 		cancellations: xsync.NewSyncedMap[string, context.CancelFunc](),
@@ -261,17 +222,25 @@ func NewAgentJobServiceWithPaths(
 		cronEntries:   xsync.NewSyncedMap[string, cron.EntryID](),
 		retentionDays: retentionDays,
 	}
-	// Build the cross-replica tasks map standalone (nil NATS); SetTaskSyncNATS
-	// rebuilds it with the distributed client once that is available, before Start.
-	s.buildTasksMap()
-	return s
 }

 // LoadTasksFromFile loads tasks from the persister into the in-memory map
 // and schedules cron entries. Named "FromFile" for backward compat; in DB
 // mode it loads from the database.
 func (s *AgentJobService) LoadTasksFromFile() error {
-	return s.hydrateTasks(s.appConfig.Context)
+	tasks, err := s.persister.LoadTasks(s.userID)
+	if err != nil {
+		return err
+	}
+	for _, task := range tasks {
+		s.tasks.Set(task.ID, task)
+		if task.Enabled && task.Cron != "" {
+			if err := s.ScheduleCronTask(task); err != nil {
+				xlog.Warn("Failed to schedule cron task on load", "error", err, "task_id", task.ID)
+			}
+		}
+	}
+	return nil
 }

 // SaveTasksToFile flushes the current tasks map via the persister. File
@@ -324,12 +293,8 @@ func (s *AgentJobService) CreateTask(task schema.Task) (string, error) {
 		task.Enabled = true // Default to enabled
 	}

-	// Store task: Set updates the in-memory map, write-throughs to the persister
-	// (file or DB), and broadcasts the create to peer replicas. Background ctx
-	// because CreateTask carries no request ctx (mirrors the finetune service).
-	if err := s.tasks.Set(context.Background(), task); err != nil {
-		return "", fmt.Errorf("failed to persist task: %w", err)
-	}
+	// Store task
+	s.tasks.Set(id, task)

 	// Schedule cron if enabled and has cron expression
 	if task.Enabled && task.Cron != "" {
@@ -338,15 +303,16 @@ func (s *AgentJobService) CreateTask(task schema.Task) (string, error) {
 		}
 	}

+	s.saveTasks(task)
 	return id, nil
 }

 // UpdateTask updates an existing task
 func (s *AgentJobService) UpdateTask(id string, task schema.Task) error {
-	existing, ok := s.tasks.Get(id)
-	if !ok {
+	if !s.tasks.Exists(id) {
 		return fmt.Errorf("%w: %s", ErrTaskNotFound, id)
 	}
+	existing := s.tasks.Get(id)

 	// Preserve ID and CreatedAt
 	task.ID = id
@@ -358,10 +324,8 @@ func (s *AgentJobService) UpdateTask(id string, task schema.Task) error {
 		s.UnscheduleCronTask(id)
 	}

-	// Store updated task: write-through + broadcast (see CreateTask).
-	if err := s.tasks.Set(context.Background(), task); err != nil {
-		return fmt.Errorf("failed to persist task: %w", err)
-	}
+	// Store updated task
+	s.tasks.Set(id, task)

 	// Schedule new cron if enabled and has cron expression
 	if task.Enabled && task.Cron != "" {
@@ -370,22 +334,24 @@ func (s *AgentJobService) UpdateTask(id string, task schema.Task) error {
 		}
 	}

+	s.saveTasks(task)
 	return nil
 }

 // DeleteTask deletes a task
 func (s *AgentJobService) DeleteTask(id string) error {
-	if _, ok := s.tasks.Get(id); !ok {
+	if !s.tasks.Exists(id) {
 		return fmt.Errorf("%w: %s", ErrTaskNotFound, id)
 	}

 	// Unschedule cron
 	s.UnscheduleCronTask(id)

-	// Delete removes from the in-memory map, deletes from the persister, and
-	// broadcasts the removal to peer replicas.
-	if err := s.tasks.Delete(context.Background(), id); err != nil {
-		xlog.Warn("Failed to delete task from store", "error", err, "task_id", id)
+	// Remove from memory
+	s.tasks.Delete(id)
+
+	if err := s.persister.DeleteTask(id); err != nil {
+		xlog.Warn("Failed to delete task from persister", "error", err, "task_id", id)
 	}

 	return nil
@@ -393,8 +359,8 @@ func (s *AgentJobService) DeleteTask(id string) error {

 // GetTask retrieves a task by ID
 func (s *AgentJobService) GetTask(id string) (*schema.Task, error) {
-	task, ok := s.tasks.Get(id)
-	if !ok {
+	task := s.tasks.Get(id)
+	if task.ID == "" {
 		return nil, fmt.Errorf("%w: %s", ErrTaskNotFound, id)
 	}
 	return &task, nil
@@ -402,7 +368,7 @@ func (s *AgentJobService) GetTask(id string) (*schema.Task, error) {

 // ListTasks returns all tasks, sorted by creation date (newest first)
 func (s *AgentJobService) ListTasks() []schema.Task {
-	tasks := s.tasks.List()
+	tasks := s.tasks.Values()
 	// Sort by CreatedAt descending (newest first), then by Name for stability
 	slices.SortFunc(tasks, func(a, b schema.Task) int {
 		if a.CreatedAt.Equal(b.CreatedAt) {
@@ -431,8 +397,8 @@ func (s *AgentJobService) buildPrompt(templateStr string, params map[string]stri
 // ExecuteJob creates and queues a job for execution
 // multimedia can be nil for backward compatibility
 func (s *AgentJobService) ExecuteJob(taskID string, params map[string]string, triggeredBy string, multimedia *schema.MultimediaAttachment) (string, error) {
-	task, ok := s.tasks.Get(taskID)
-	if !ok {
+	task := s.tasks.Get(taskID)
+	if task.ID == "" {
 		return "", fmt.Errorf("%w: %s", ErrTaskNotFound, taskID)
 	}

@@ -1485,12 +1451,6 @@ func (s *AgentJobService) Stop() error {
 	if s.cronScheduler != nil {
 		s.cronScheduler.Stop()
 	}
-	// Release the tasks SyncedMap subscription / background workers.
-	if s.tasks != nil {
-		if err := s.tasks.Close(); err != nil {
-			xlog.Warn("Error closing tasks sync map", "error", err)
-		}
-	}
 	xlog.Info("AgentJobService stopped")
 	return nil
 }
--- a/core/services/agentpool/job_persister_file.go
+++ b/core/services/agentpool/job_persister_file.go
@@ -14,38 +14,24 @@ import (
 )

 // fileJobPersister persists tasks and jobs to JSON files.
-//
-// Jobs serialize the service's in-memory jobs syncmap on each save (bulk write).
-// Tasks are kept in this persister's own taskSet map instead: the tasks SyncedMap
-// calls SaveTask/DeleteTask while holding its internal lock (write-through), so
-// reading back the SyncedMap here would re-enter that lock and deadlock. The
-// self-contained taskSet, seeded by LoadTasks, lets a per-task write rewrite the
-// whole bulk file without touching the SyncedMap.
-//
-// Runtime reads (GetJob/ListJobs) return nil (the in-memory state is the
-// authoritative source); LoadTasks/LoadJobs bootstrap state at startup.
+// It holds references to the service's syncmaps and serializes the entire
+// map contents on each save (bulk write). Reads at runtime return nil
+// (the in-memory map is the authoritative source); LoadTasks/LoadJobs
+// are used only at startup to bootstrap the syncmaps.
 type fileJobPersister struct {
+	tasks     *xsync.SyncedMap[string, schema.Task]
 	jobs      *xsync.SyncedMap[string, schema.Job]
 	tasksFile string
 	jobsFile  string
 	mu        sync.Mutex
-	// taskSet is the persister's own view of all tasks, seeded by LoadTasks and
-	// updated by SaveTask/DeleteTask. The bulk JSON file is rewritten from it.
-	taskSet map[string]schema.Task
 }

-func (p *fileJobPersister) SaveTask(_ string, task schema.Task) error {
-	p.mu.Lock()
-	defer p.mu.Unlock()
-	p.taskSet[task.ID] = task
-	return p.writeTasksLocked()
+func (p *fileJobPersister) SaveTask(_ string, _ schema.Task) error {
+	return p.saveTasksToFile()
 }

-func (p *fileJobPersister) DeleteTask(taskID string) error {
-	p.mu.Lock()
-	defer p.mu.Unlock()
-	delete(p.taskSet, taskID)
-	return p.writeTasksLocked()
+func (p *fileJobPersister) DeleteTask(_ string) error {
+	return p.saveTasksToFile()
 }

 func (p *fileJobPersister) SaveJob(_ string, _ schema.Job) error {
@@ -57,9 +43,7 @@ func (p *fileJobPersister) DeleteJob(_ string) error {
 }

 func (p *fileJobPersister) FlushTasks() error {
-	p.mu.Lock()
-	defer p.mu.Unlock()
-	return p.writeTasksLocked()
+	return p.saveTasksToFile()
 }

 func (p *fileJobPersister) FlushJobs() error {
@@ -99,12 +83,6 @@ func (p *fileJobPersister) LoadTasks(_ string) ([]schema.Task, error) {
 		return nil, fmt.Errorf("failed to parse tasks file: %w", err)
 	}

-	// Seed the in-memory set so subsequent per-task SaveTask/DeleteTask merge into
-	// (rather than overwrite) the persisted tasks when the bulk file is rewritten.
-	for _, t := range tf.Tasks {
-		p.taskSet[t.ID] = t
-	}
-
 	xlog.Info("Loaded tasks from file", "count", len(tf.Tasks))
 	return tf.Tasks, nil
 }
@@ -140,19 +118,18 @@ func (p *fileJobPersister) CleanupOldJobs(_ time.Duration) (int64, error) {
 	return 0, nil // cleanup handled via in-memory filtering
 }

-// writeTasksLocked serializes the persister's task set to the JSON file. Callers
-// must hold p.mu.
-func (p *fileJobPersister) writeTasksLocked() error {
+// saveTasksToFile serializes the entire tasks map to the JSON file.
+func (p *fileJobPersister) saveTasksToFile() error {
 	if p.tasksFile == "" {
 		return nil
 	}

-	tasks := make([]schema.Task, 0, len(p.taskSet))
-	for _, t := range p.taskSet {
-		tasks = append(tasks, t)
-	}
+	p.mu.Lock()
+	defer p.mu.Unlock()

-	tf := schema.TasksFile{Tasks: tasks}
+	tf := schema.TasksFile{
+		Tasks: p.tasks.Values(),
+	}

 	data, err := json.MarshalIndent(tf, "", "  ")
 	if err != nil {
--- a/core/services/agentpool/job_persister_test.go
+++ b/core/services/agentpool/job_persister_test.go
@@ -20,26 +20,28 @@ var _ = Describe("JobPersister", func() {
 	Context("fileJobPersister", func() {
 		var (
 			p       *fileJobPersister
+			tasks   *xsync.SyncedMap[string, schema.Task]
 			jobsMap *xsync.SyncedMap[string, schema.Job]
 			tmpDir  string
 		)

 		BeforeEach(func() {
 			tmpDir = GinkgoT().TempDir()
+			tasks = xsync.NewSyncedMap[string, schema.Task]()
 			jobsMap = xsync.NewSyncedMap[string, schema.Job]()
 			p = &fileJobPersister{
+				tasks:     tasks,
 				jobs:      jobsMap,
 				tasksFile: filepath.Join(tmpDir, "tasks.json"),
 				jobsFile:  filepath.Join(tmpDir, "jobs.json"),
-				// taskSet is the persister's own task view (decoupled from the tasks
-				// SyncedMap to avoid re-entering its lock during write-through).
-				taskSet: make(map[string]schema.Task),
 			}
 		})

 		It("SaveTask writes all tasks to file", func() {
-			Expect(p.SaveTask("", schema.Task{ID: "t1", Name: "Task One", Model: "m", Prompt: "p"})).To(Succeed())
-			Expect(p.SaveTask("", schema.Task{ID: "t2", Name: "Task Two", Model: "m", Prompt: "p"})).To(Succeed())
+			tasks.Set("t1", schema.Task{ID: "t1", Name: "Task One", Model: "m", Prompt: "p"})
+			tasks.Set("t2", schema.Task{ID: "t2", Name: "Task Two", Model: "m", Prompt: "p"})
+
+			Expect(p.SaveTask("", schema.Task{})).To(Succeed())

 			// Verify file contents
 			data, err := os.ReadFile(p.tasksFile)
@@ -50,9 +52,11 @@ var _ = Describe("JobPersister", func() {
 		})

 		It("DeleteTask writes updated tasks to file", func() {
-			Expect(p.SaveTask("", schema.Task{ID: "t1", Name: "Keep"})).To(Succeed())
-			Expect(p.SaveTask("", schema.Task{ID: "t2", Name: "Delete"})).To(Succeed())
+			tasks.Set("t1", schema.Task{ID: "t1", Name: "Keep"})
+			tasks.Set("t2", schema.Task{ID: "t2", Name: "Delete"})

+			// Simulate deletion from memory (caller does this before calling persister)
+			tasks.Delete("t2")
 			Expect(p.DeleteTask("t2")).To(Succeed())

 			data, err := os.ReadFile(p.tasksFile)
--- a/core/services/agentpool/task_sync_test.go
+++ b/core/services/agentpool/task_sync_test.go
@@ -1,152 +0,0 @@
-package agentpool
-
-// White-box tests (package agentpool) so a spec can build two AgentJobService
-// instances sharing one in-memory bus and assert that agent *tasks* converge
-// across replicas - the bug this migration fixes (ListTasks used to read
-// in-memory only, so a task created on replica A was invisible on replica B).
-// Jobs are deliberately untouched here: they already converge via the dispatcher
-// + DB read-through.
-
-import (
-	"context"
-	"time"
-
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-
-	"github.com/mudler/LocalAI/core/config"
-	"github.com/mudler/LocalAI/core/schema"
-	"github.com/mudler/LocalAI/core/services/messaging"
-	"github.com/mudler/LocalAI/core/services/syncstate"
-	"github.com/mudler/LocalAI/core/services/testutil"
-	"github.com/mudler/LocalAI/pkg/system"
-)
-
-// newTaskSyncService builds an AgentJobService wired to the given bus and a
-// throwaway data dir (so the file persister has somewhere to write). Model/config
-// loaders are nil because the task sync paths under test never touch them.
-func newTaskSyncService(bus messaging.MessagingClient) *AgentJobService {
-	tmpDir := GinkgoT().TempDir()
-	sysState := &system.SystemState{}
-	sysState.Model.ModelsPath = tmpDir
-	appConfig := config.NewApplicationConfig(
-		config.WithDynamicConfigDir(tmpDir),
-		config.WithContext(context.Background()),
-	)
-	appConfig.SystemState = sysState
-
-	svc := NewAgentJobServiceWithPaths(appConfig, nil, nil, nil,
-		// Distinct per-replica files so the file persister write-through never
-		// crosses replicas: convergence here must be proven via the bus alone.
-		tmpDir+"/tasks.json", tmpDir+"/jobs.json")
-	svc.SetTaskSyncNATS(bus)
-	return svc
-}
-
-var _ = Describe("AgentJobService task cross-replica sync", func() {
-	Describe("two replicas sharing one bus", func() {
-		var (
-			bus  *testutil.FakeBus
-			a, b *AgentJobService
-		)
-
-		BeforeEach(func() {
-			// One shared bus, two replicas: exactly the distributed topology where a
-			// round-robin request may land on a replica that did not originate the
-			// change.
-			bus = testutil.NewFakeBus()
-			a = newTaskSyncService(bus)
-			b = newTaskSyncService(bus)
-			// Start hydrates (empty here) and subscribes both replicas to deltas.
-			Expect(a.Start(context.Background())).To(Succeed())
-			Expect(b.Start(context.Background())).To(Succeed())
-		})
-
-		AfterEach(func() {
-			Expect(a.Stop()).To(Succeed())
-			Expect(b.Stop()).To(Succeed())
-		})
-
-		It("makes a task created on A visible via B's GetTask and ListTasks", func() {
-			id, err := a.CreateTask(schema.Task{Name: "Shared", Model: "m", Prompt: "p"})
-			Expect(err).NotTo(HaveOccurred())
-
-			got, err := b.GetTask(id)
-			Expect(err).NotTo(HaveOccurred(), "B must see a task A just created")
-			Expect(got.Name).To(Equal("Shared"))
-
-			listed := b.ListTasks()
-			Expect(listed).To(HaveLen(1))
-			Expect(listed[0].ID).To(Equal(id))
-		})
-
-		It("propagates a task update from A to B", func() {
-			id, err := a.CreateTask(schema.Task{Name: "Before", Model: "m", Prompt: "p"})
-			Expect(err).NotTo(HaveOccurred())
-
-			Expect(a.UpdateTask(id, schema.Task{Name: "After", Model: "m", Prompt: "p"})).To(Succeed())
-
-			got, err := b.GetTask(id)
-			Expect(err).NotTo(HaveOccurred())
-			Expect(got.Name).To(Equal("After"), "an update on A must be visible on B")
-		})
-
-		It("removes a task from B when it is deleted on A", func() {
-			id, err := a.CreateTask(schema.Task{Name: "Doomed", Model: "m", Prompt: "p"})
-			Expect(err).NotTo(HaveOccurred())
-			_, err = b.GetTask(id)
-			Expect(err).NotTo(HaveOccurred(), "precondition: B must have the task before the delete")
-
-			Expect(a.DeleteTask(id)).To(Succeed())
-
-			_, err = b.GetTask(id)
-			Expect(err).To(HaveOccurred(), "a delete on A must remove the task from B")
-			Expect(b.ListTasks()).To(BeEmpty())
-		})
-
-		It("does not re-broadcast a delta it received (echo-loop guard)", func() {
-			subject := messaging.SubjectSyncStateDelta("agent.tasks")
-
-			_, err := a.CreateTask(schema.Task{Name: "Once", Model: "m", Prompt: "p"})
-			Expect(err).NotTo(HaveOccurred())
-
-			// Exactly one publish: A's create. B applies it without re-publishing,
-			// otherwise this would be 2+ and a real bus would storm.
-			Expect(bus.PublishCount(subject)).To(Equal(1))
-		})
-	})
-
-	Describe("ListTasks ordering and scoping", func() {
-		var svc *AgentJobService
-
-		BeforeEach(func() {
-			svc = newTaskSyncService(testutil.NewFakeBus())
-			Expect(svc.Start(context.Background())).To(Succeed())
-		})
-		AfterEach(func() { Expect(svc.Stop()).To(Succeed()) })
-
-		It("sorts newest-first, breaking ties by name", func() {
-			// CreateTask stamps CreatedAt with time.Now(); space them out so ordering
-			// is deterministic rather than relying on the sub-millisecond gap.
-			oldID, err := svc.CreateTask(schema.Task{Name: "Old", Model: "m", Prompt: "p"})
-			Expect(err).NotTo(HaveOccurred())
-			time.Sleep(5 * time.Millisecond)
-			newID, err := svc.CreateTask(schema.Task{Name: "New", Model: "m", Prompt: "p"})
-			Expect(err).NotTo(HaveOccurred())
-
-			listed := svc.ListTasks()
-			Expect(listed).To(HaveLen(2))
-			Expect(listed[0].ID).To(Equal(newID), "newest first")
-			Expect(listed[1].ID).To(Equal(oldID))
-		})
-	})
-
-	Describe("compile-time adapter contract", func() {
-		It("satisfies syncstate.Store for tasks", func() {
-			// Mirrors the var assertion in task_syncstore.go; keeps the type
-			// referenced from a spec so drift surfaces here too.
-			var _ syncstate.Store[string, schema.Task] = (*taskStoreAdapter)(nil)
-			Expect(&taskStoreAdapter{}).ToNot(BeNil())
-		})
-	})
-})
--- a/core/services/agentpool/task_syncstore.go
+++ b/core/services/agentpool/task_syncstore.go
@@ -1,47 +0,0 @@
-package agentpool
-
-import (
-	"context"
-
-	"github.com/mudler/LocalAI/core/schema"
-	"github.com/mudler/LocalAI/core/services/syncstate"
-)
-
-// taskStoreAdapter bridges the existing JobPersister (file- or DB-backed) to the
-// generic syncstate.Store the tasks SyncedMap consumes. Only tasks are migrated:
-// jobs already converge across replicas via the dispatcher (NATS) plus the DB
-// read-through in ListJobs/GetJob, whereas ListTasks read in-memory only and so
-// went stale on replicas that did not originate the change.
-//
-// The adapter reads svc.persister and svc.userID live (rather than capturing
-// them) because both are configured by setters - SetDistributedJobStore swaps the
-// file persister for the DB one, SetUserID scopes per-user queries - AFTER the
-// service, and thus this adapter, is constructed. Reading them at call time means
-// the SyncedMap never has to be rebuilt when the persister is swapped.
-//
-// The SyncedMap value type is schema.Task: the exact shape ListTasks returns, so
-// reads need no conversion and REST responses are provably unchanged.
-type taskStoreAdapter struct {
-	svc *AgentJobService
-}
-
-// compile-time assertion that the adapter satisfies the component's Store.
-var _ syncstate.Store[string, schema.Task] = (*taskStoreAdapter)(nil)
-
-// List hydrates the map from durable storage on Start/reconnect: the file's task
-// list (standalone) or every task row (DB / distributed).
-func (a *taskStoreAdapter) List(_ context.Context) ([]schema.Task, error) {
-	return a.svc.persister.LoadTasks(a.svc.userID)
-}
-
-// Upsert write-through persists a single task created/updated locally; the
-// SyncedMap then broadcasts the delta to peers.
-func (a *taskStoreAdapter) Upsert(_ context.Context, task schema.Task) error {
-	return a.svc.persister.SaveTask(a.svc.userID, task)
-}
-
-// Delete write-through removes a task locally; the SyncedMap then broadcasts the
-// removal to peers.
-func (a *taskStoreAdapter) Delete(_ context.Context, id string) error {
-	return a.svc.persister.DeleteTask(id)
-}
--- a/core/services/agentpool/user_services.go
+++ b/core/services/agentpool/user_services.go
@@ -7,7 +7,6 @@ import (
 	"github.com/mudler/LocalAGI/webui/collections"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/services/jobs"
-	"github.com/mudler/LocalAI/core/services/messaging"
 	"github.com/mudler/LocalAI/core/templates"
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/xlog"
@@ -29,9 +28,6 @@ type UserServicesManager struct {
 	// Shared distributed backends (set once, inherited by per-user job services)
 	jobDispatcher DistributedDispatcher
 	jobDBStore    *jobs.JobStore
-	// jobNats keeps per-user agent tasks consistent across replicas (nil in
-	// standalone). Inherited by each per-user AgentJobService.
-	jobNats messaging.MessagingClient
 }

 // NewUserServicesManager creates a new UserServicesManager.
@@ -166,10 +162,6 @@ func (m *UserServicesManager) GetJobs(userID string) (*AgentJobService, error) {
 	if m.jobDispatcher != nil {
 		svc.SetDistributedBackends(m.jobDispatcher)
 	}
-	// Inherit the NATS client so per-user tasks broadcast across replicas. Must be
-	// set before the hydrate below (LoadFromDB / LoadTasksFromFile) so the tasks
-	// SyncedMap is rebuilt with the client while it is still empty.
-	svc.SetTaskSyncNATS(m.jobNats)
 	if m.jobDBStore != nil {
 		svc.SetDistributedJobStore(m.jobDBStore)
 		// Load tasks/jobs from DB immediately (per-user services skip Start())
@@ -197,12 +189,6 @@ func (m *UserServicesManager) SetJobDBStore(s *jobs.JobStore) {
 	m.jobDBStore = s
 }

-// SetJobSyncNATS sets the NATS client used to keep per-user agent tasks consistent
-// across replicas.
-func (m *UserServicesManager) SetJobSyncNATS(nats messaging.MessagingClient) {
-	m.jobNats = nats
-}
-
 // ListAllUserIDs returns all user IDs that have scoped data directories.
 func (m *UserServicesManager) ListAllUserIDs() ([]string, error) {
 	return m.storage.ListUserDirs()
--- a/core/services/distributed/finetune.go
+++ b/core/services/distributed/finetune.go
@@ -8,7 +8,6 @@ import (
 	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/services/advisorylock"
 	"gorm.io/gorm"
-	"gorm.io/gorm/clause"
 )

 // FineTuneJobRecord tracks fine-tune jobs in PostgreSQL.
@@ -81,34 +80,6 @@ func (s *FineTuneStore) List(userID string) ([]FineTuneJobRecord, error) {
 	return jobs, q.Find(&jobs).Error
 }

-// ListAll returns every fine-tune job across all users. The SyncedMap that backs
-// FineTuneService is a single global map (the REST API filters by user at read
-// time), so hydrate needs the full set rather than the per-user List above.
-func (s *FineTuneStore) ListAll() ([]FineTuneJobRecord, error) {
-	var jobs []FineTuneJobRecord
-	return jobs, s.db.Order("created_at DESC").Find(&jobs).Error
-}
-
-// Upsert idempotently inserts or fully replaces a job row by primary key. The
-// SyncedMap write-through path issues a single Set per mutation regardless of
-// whether the job already exists, so it needs one create-or-update primitive
-// (Create alone fails on a duplicate key, UpdateStatus alone misses new rows and
-// only touches a few columns).
-func (s *FineTuneStore) Upsert(job *FineTuneJobRecord) error {
-	if job.ID == "" {
-		job.ID = uuid.New().String()
-	}
-	now := time.Now()
-	if job.CreatedAt.IsZero() {
-		job.CreatedAt = now
-	}
-	job.UpdatedAt = now
-	return s.db.Clauses(clause.OnConflict{
-		Columns:   []clause.Column{{Name: "id"}},
-		UpdateAll: true,
-	}).Create(job).Error
-}
-
 // UpdateStatus updates the status and message of a fine-tune job.
 func (s *FineTuneStore) UpdateStatus(id, status, message string) error {
 	return s.db.Model(&FineTuneJobRecord{}).Where("id = ?", id).Updates(map[string]any{
--- a/Show More
+++ b/Show More