chore: ⬆️ Update ServeurpersoCom/omnivoice.cpp to 0f37401bebe9b20c0160a888e592108fc1d17607 (#10492 )

⬆️ Update ServeurpersoCom/omnivoice.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
chore: ⬆️ Update ikawrakow/ik_llama.cpp to d5507e33ae7ee2b7b41475f08044d3bde3b839ee (#10498 )
2026-06-25 00:59:28 -04:00 · 2026-06-25 00:57:58 +02:00 · 2026-06-25 00:57:42 +02:00 · 2026-06-25 00:22:45 +02:00 · 2026-06-25 00:07:48 +02:00 · 2026-06-24 23:30:08 +02:00
170 changed files with 5817 additions and 1961 deletions
--- a/.github/backend-matrix.yml
+++ b/.github/backend-matrix.yml
@@ -4974,6 +4974,9 @@ includeDarwin:
  - backend: "kitten-tts"
    tag-suffix: "-metal-darwin-arm64-kitten-tts"
    build-type: "mps"
  - backend: "liquid-audio"
    tag-suffix: "-metal-darwin-arm64-liquid-audio"
    build-type: "mps"
  - backend: "piper"
    tag-suffix: "-metal-darwin-arm64-piper"
    build-type: "metal"
@@ -4990,6 +4993,10 @@ includeDarwin:
    tag-suffix: "-metal-darwin-arm64-sherpa-onnx"
    build-type: "metal"
    lang: "go"
  - backend: "supertonic"
    tag-suffix: "-metal-darwin-arm64-supertonic"
    build-type: "metal"
    lang: "go"
  - backend: "local-store"
    tag-suffix: "-metal-darwin-arm64-local-store"
    build-type: "metal"
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -44,7 +44,7 @@ jobs:
      has-merges-singlearch: ${{ steps.set-matrix.outputs['has-merges-singlearch'] }}
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
      - name: Setup Bun
        uses: oven-sh/setup-bun@v2
--- a/.github/workflows/backend_build.yml
+++ b/.github/workflows/backend_build.yml
@@ -101,7 +101,7 @@ jobs:
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
--- a/.github/workflows/backend_build_darwin.yml
+++ b/.github/workflows/backend_build_darwin.yml
@@ -57,7 +57,7 @@ jobs:
      HOMEBREW_NO_ANALYTICS: '1'
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
--- a/.github/workflows/backend_merge.yml
+++ b/.github/workflows/backend_merge.yml
@@ -49,7 +49,7 @@ jobs:
      # Sparse checkout: the merge job needs `.github/scripts/` (for the
      # keepalive cleanup script) but none of the source tree.
      - name: Checkout (.github/scripts only)
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          sparse-checkout: |
            .github/scripts
--- a/.github/workflows/backend_pr.yml
+++ b/.github/workflows/backend_pr.yml
@@ -23,7 +23,7 @@ jobs:
      has-merges-singlearch: ${{ steps.set-matrix.outputs['has-merges-singlearch'] }}
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
      - name: Setup Bun
        uses: oven-sh/setup-bun@v2
--- a/.github/workflows/base-images.yml
+++ b/.github/workflows/base-images.yml
@@ -127,7 +127,7 @@ jobs:
            # the original l4t matrix entry which set skip-drivers: 'true'.
            skip-drivers: 'true'
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v7
        with:
          submodules: false
      - name: Free disk space
--- a/.github/workflows/build-test.yaml
+++ b/.github/workflows/build-test.yaml
@@ -11,7 +11,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          fetch-depth: 0
      - name: Set up Go
@@ -25,7 +25,7 @@ jobs:
    runs-on: macos-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          fetch-depth: 0
      - name: Set up Go
@@ -47,7 +47,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          fetch-depth: 0
      - name: Configure apt mirror on runner
--- a/.github/workflows/bump-inference-defaults.yml
+++ b/.github/workflows/bump-inference-defaults.yml
@@ -14,7 +14,7 @@ jobs:
  bump:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v7
      - uses: actions/setup-go@v5
        with:
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -92,7 +92,7 @@ jobs:
            file: "backend/go/vibevoice-cpp/Makefile"
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v7
      - name: Bump dependencies 🔧
        id: bump
        run: |
@@ -128,7 +128,7 @@ jobs:
    if: github.repository == 'mudler/LocalAI'
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v7
      - name: Bump vLLM cu130 wheel pin 🔧
        id: bump
        run: |
--- a/.github/workflows/bump_docs.yaml
+++ b/.github/workflows/bump_docs.yaml
@@ -13,7 +13,7 @@ jobs:
          - repository: "mudler/LocalAI"
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v7
      - name: Bump dependencies 🔧
        run: |
          bash .github/bump_docs.sh ${{ matrix.repository }}
--- a/.github/workflows/checksum_checker.yaml
+++ b/.github/workflows/checksum_checker.yaml
@@ -8,7 +8,7 @@ jobs:
    if: github.repository == 'mudler/LocalAI'
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v7
      - name: Configure apt mirror on runner
        uses: ./.github/actions/configure-apt-mirror
      - name: Install dependencies
--- a/.github/workflows/deploy-explorer.yaml
+++ b/.github/workflows/deploy-explorer.yaml
@@ -16,7 +16,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - uses: actions/setup-go@v5
--- a/.github/workflows/gallery-agent.yaml
+++ b/.github/workflows/gallery-agent.yaml
@@ -31,7 +31,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          token: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/generate_intel_image.yaml
+++ b/.github/workflows/generate_intel_image.yaml
@@ -44,7 +44,7 @@ jobs:
        uses: docker/setup-buildx-action@master
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
      - name: Cache Intel images
        uses: docker/build-push-action@v7
--- a/.github/workflows/gh-pages.yml
+++ b/.github/workflows/gh-pages.yml
@@ -28,7 +28,7 @@ jobs:
      HUGO_VERSION: "0.146.3"
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          fetch-depth: 0  # needed for enableGitInfo
          submodules: true
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -80,7 +80,7 @@ jobs:
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
      - name: Configure apt mirror on runner
        id: apt_mirror
--- a/.github/workflows/image_merge.yml
+++ b/.github/workflows/image_merge.yml
@@ -36,7 +36,7 @@ jobs:
      # Sparse checkout: needed for .github/scripts/ (the keepalive cleanup
      # script). Skips the rest of the source tree.
      - name: Checkout (.github/scripts only)
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          sparse-checkout: |
            .github/scripts
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -20,7 +20,7 @@ jobs:
  golangci-lint:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v7
        with:
          # Full history so golangci-lint's new-from-merge-base can reach
          # origin/master and compute the diff against it.
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -10,7 +10,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          fetch-depth: 0
      - name: Set up Go
@@ -28,7 +28,7 @@ jobs:
    runs-on: macos-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          fetch-depth: 0
      - name: Set up Go
@@ -46,7 +46,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          fetch-depth: 0
      - name: Configure apt mirror on runner
--- a/.github/workflows/secscan.yaml
+++ b/.github/workflows/secscan.yaml
@@ -14,7 +14,7 @@ jobs:
      GO111MODULE: on
    steps:
      - name: Checkout Source
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        if: ${{ github.actor != 'dependabot[bot]' }}
      - name: Run Gosec Security Scanner
        if: ${{ github.actor != 'dependabot[bot]' }}
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -50,7 +50,7 @@ jobs:
      parakeet-cpp: ${{ steps.detect.outputs.parakeet-cpp }}
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
      - name: Setup Bun
        uses: oven-sh/setup-bun@v2
      - name: Install dependencies
@@ -67,7 +67,7 @@ jobs:
  #   runs-on: ubuntu-latest
  #   steps:
  #     - name: Clone
-  #       uses: actions/checkout@v6
+  #       uses: actions/checkout@v7
  #       with:
  #         submodules: true
  #     - name: Dependencies
@@ -90,7 +90,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Dependencies
@@ -113,7 +113,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Dependencies
@@ -137,7 +137,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Dependencies
@@ -158,7 +158,7 @@ jobs:
  #  runs-on: ubuntu-latest
  #  steps:
  #    - name: Clone
-  #      uses: actions/checkout@v6
+  #      uses: actions/checkout@v7
  #      with:
  #        submodules: true
  #    - name: Dependencies
@@ -178,7 +178,7 @@ jobs:
  #   runs-on: ubuntu-latest
  #   steps:
  #     - name: Clone
-  #       uses: actions/checkout@v6
+  #       uses: actions/checkout@v7
  #       with:
  #         submodules: true
  #     - name: Dependencies
@@ -240,7 +240,7 @@ jobs:
  #           sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
  #           df -h
  #     - name: Clone
-  #       uses: actions/checkout@v6
+  #       uses: actions/checkout@v7
  #       with:
  #         submodules: true
  #     - name: Dependencies
@@ -265,7 +265,7 @@ jobs:
  #   runs-on: ubuntu-latest
  #   steps:
  #     - name: Clone
-  #       uses: actions/checkout@v6
+  #       uses: actions/checkout@v7
  #       with:
  #         submodules: true
  #     - name: Dependencies
@@ -288,7 +288,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Dependencies
@@ -309,7 +309,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Dependencies
@@ -330,7 +330,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Dependencies
@@ -351,7 +351,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Dependencies
@@ -373,7 +373,7 @@ jobs:
  #   timeout-minutes: 45
  #   steps:
  #     - name: Clone
-  #       uses: actions/checkout@v6
+  #       uses: actions/checkout@v7
  #       with:
  #         submodules: true
  #     - name: Dependencies
@@ -394,7 +394,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Dependencies
@@ -415,7 +415,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Dependencies
@@ -436,7 +436,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Dependencies
@@ -462,7 +462,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Dependencies
@@ -484,7 +484,7 @@ jobs:
    timeout-minutes: 30
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Dependencies
@@ -513,7 +513,7 @@ jobs:
    timeout-minutes: 90
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Setup Go
@@ -530,7 +530,7 @@ jobs:
    timeout-minutes: 90
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Setup Go
@@ -552,7 +552,7 @@ jobs:
    timeout-minutes: 20
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Setup Go
@@ -579,7 +579,7 @@ jobs:
    timeout-minutes: 90
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Setup Go
@@ -604,7 +604,7 @@ jobs:
    timeout-minutes: 90
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Setup Go
@@ -625,7 +625,7 @@ jobs:
    timeout-minutes: 90
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Setup Go
@@ -645,7 +645,7 @@ jobs:
    timeout-minutes: 90
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Setup Go
@@ -664,7 +664,7 @@ jobs:
    timeout-minutes: 90
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Setup Go
@@ -681,7 +681,7 @@ jobs:
    timeout-minutes: 90
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Setup Go
@@ -698,7 +698,7 @@ jobs:
    timeout-minutes: 90
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Setup Go
@@ -741,7 +741,7 @@ jobs:
  #   timeout-minutes: 90
  #   steps:
  #     - name: Clone
-  #       uses: actions/checkout@v6
+  #       uses: actions/checkout@v7
  #       with:
  #         submodules: true
  #     - name: Dependencies
@@ -783,7 +783,7 @@ jobs:
  #   timeout-minutes: 90
  #   steps:
  #     - name: Clone
-  #       uses: actions/checkout@v6
+  #       uses: actions/checkout@v7
  #       with:
  #         submodules: true
  #     - name: Dependencies
@@ -808,7 +808,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Dependencies
@@ -840,7 +840,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Dependencies
@@ -876,7 +876,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Dependencies
@@ -915,7 +915,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Dependencies
@@ -952,7 +952,7 @@ jobs:
    timeout-minutes: 90
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Dependencies
@@ -987,7 +987,7 @@ jobs:
    timeout-minutes: 90
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Setup Go
@@ -1013,7 +1013,7 @@ jobs:
    timeout-minutes: 150
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Dependencies
@@ -1042,7 +1042,7 @@ jobs:
    timeout-minutes: 60
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Setup Go
@@ -1058,7 +1058,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Dependencies
@@ -1091,7 +1091,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Dependencies
@@ -1114,7 +1114,7 @@ jobs:
    timeout-minutes: 90
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Dependencies
@@ -1140,7 +1140,7 @@ jobs:
    timeout-minutes: 90
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Dependencies
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -21,7 +21,7 @@ jobs:
        go-version: ['1.26.x']
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Free disk space
@@ -84,7 +84,7 @@ jobs:
        go-version: ['1.26.x']
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Setup Go ${{ matrix.go-version }}
--- a/.github/workflows/tests-aio.yml
+++ b/.github/workflows/tests-aio.yml
@@ -62,7 +62,7 @@ jobs:
          sudo rm -rfv build || true
          df -h
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Dependencies
--- a/.github/workflows/tests-e2e.yml
+++ b/.github/workflows/tests-e2e.yml
@@ -21,7 +21,7 @@ jobs:
        go-version: ['1.25.x']
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Configure apt mirror on runner
--- a/.github/workflows/tests-pii-ner-e2e.yml
+++ b/.github/workflows/tests-pii-ner-e2e.yml
@@ -0,0 +1,97 @@
 ---
 name: 'PII NER tier E2E (live GGUF, CPU)'
 # Runs the real privacy-filter GGUF NER tier end-to-end on CPU — the gap the
 # hermetic tests/e2e suite cannot cover (it only exercises the in-process
 # pattern tier). Heavy (builds the C++ backend image + downloads a ~2.7 GB
 # GGUF), so it is path-filtered on PRs and otherwise runs nightly / on demand.
 #
 # This drives the container-level harness (tests/e2e-backends) via
 # `make test-extra-backend-privacy-filter`: it builds the privacy-filter image,
 # downloads the model, loads it on CPU, and asserts byte-correct, UTF-8-aligned
 # TokenClassify spans. The complementary HTTP-path specs in tests/e2e
 # (e2e_pii_ner_test.go) Skip unless PII_NER_MODEL_GGUF is wired.
 on:
  workflow_dispatch:
  schedule:
    - cron: '0 3 * * *'
  push:
    branches:
      - master
    paths:
      - 'backend/cpp/privacy-filter/**'
      - 'backend/Dockerfile.privacy-filter'
      - 'core/services/routing/pii/**'
      - 'core/services/routing/piidetector/**'
      - 'core/backend/token_classify.go'
      - 'core/http/endpoints/localai/pii.go'
      - 'core/schema/pii.go'
      - 'tests/e2e-backends/**'
      - 'tests/e2e/e2e_pii_ner_test.go'
      - 'tests/e2e/e2e_suite_test.go'
      - '.github/workflows/tests-pii-ner-e2e.yml'
  pull_request:
    paths:
      - 'backend/cpp/privacy-filter/**'
      - 'backend/Dockerfile.privacy-filter'
      - 'core/services/routing/pii/**'
      - 'core/services/routing/piidetector/**'
      - 'core/backend/token_classify.go'
      - 'core/http/endpoints/localai/pii.go'
      - 'core/schema/pii.go'
      - 'tests/e2e-backends/**'
      - 'tests/e2e/e2e_pii_ner_test.go'
      - 'tests/e2e/e2e_suite_test.go'
      - '.github/workflows/tests-pii-ner-e2e.yml'
 concurrency:
  group: ci-tests-pii-ner-e2e-${{ github.event.pull_request.number || github.sha }}-${{ github.repository }}
  cancel-in-progress: ${{ github.event_name == 'pull_request' }}
 jobs:
  tests-pii-ner-e2e:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        go-version: ['1.25.x']
    steps:
      - name: Clone
        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Free disk space
        run: |
          sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL || true
          sudo docker image prune --all --force || true
          df -h
      - name: Configure apt mirror on runner
        uses: ./.github/actions/configure-apt-mirror
      - name: Setup Go ${{ matrix.go-version }}
        uses: actions/setup-go@v5
        with:
          go-version: ${{ matrix.go-version }}
          cache: false
      - name: Proto Dependencies
        run: |
          curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
          unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
          rm protoc.zip
          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
          PATH="$PATH:$HOME/go/bin" make protogen-go
      - name: Dependencies
        run: |
          sudo apt-get update
          sudo apt-get install -y build-essential
      # Builds local-ai-backend:privacy-filter, downloads the GGUF, loads it on
      # CPU and runs the token_classify capability spec (byte-offset contract).
      - name: Run live PII NER backend E2E
        run: PATH="$PATH:$HOME/go/bin" make test-extra-backend-privacy-filter
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
        uses: mxschmitt/action-tmate@v3.23
        with:
          detached: true
          connect-timeout-seconds: 180
          limit-access-to-actor: true
--- a/.github/workflows/tests-ui-e2e.yml
+++ b/.github/workflows/tests-ui-e2e.yml
@@ -23,7 +23,7 @@ jobs:
        go-version: ['1.26.x']
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
        with:
          submodules: true
      - name: Configure apt mirror on runner
--- a/.github/workflows/update_swagger.yaml
+++ b/.github/workflows/update_swagger.yaml
@@ -10,7 +10,7 @@ jobs:
      fail-fast: false
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v7
      - name: Configure apt mirror on runner
        uses: ./.github/actions/configure-apt-mirror
      - uses: actions/setup-go@v5
--- a/.gitignore
+++ b/.gitignore
@@ -91,3 +91,6 @@ core/http/react-ui/test-results/
 # Local worktrees
 .worktrees/
 # SDD / brainstorm scratch (agent-driven development)
 .superpowers/
--- a/10
+++ b/10
@@ -690,6 +690,16 @@ test-extra-backend-llama-cpp-transcription: docker-build-llama-cpp
 	BACKEND_TEST_CTX_SIZE=2048 \
 	$(MAKE) test-extra-backend
 ## privacy-filter: the PII/NER token-classification backend. Exercises the
 ## TokenClassify RPC and asserts byte-correct, UTF-8-aligned span offsets
 ## against the openai-privacy-filter multilingual GGUF (CPU-runnable, ~50M
 ## active params). This is the live-backend coverage for the PII NER tier.
 test-extra-backend-privacy-filter: docker-build-privacy-filter
 	BACKEND_IMAGE=local-ai-backend:privacy-filter \
 	BACKEND_TEST_MODEL_URL=https://huggingface.co/LocalAI-io/privacy-filter-multilingual-GGUF/resolve/main/privacy-filter-multilingual-f16.gguf \
 	BACKEND_TEST_CAPS=health,load,token_classify \
 	$(MAKE) test-extra-backend
 ## vllm is resolved from a HuggingFace model id (no file download) and
 ## exercises Predict + streaming + tool-call extraction via the hermes parser.
 ## Requires a host CPU with the SIMD instructions the prebuilt vllm CPU
--- a/backend/cpp/ik-llama-cpp/Makefile
+++ b/backend/cpp/ik-llama-cpp/Makefile
@@ -1,5 +1,5 @@
-IK_LLAMA_VERSION?=6c00e87ac84404af588ad2e65935bd6f079c696f
+IK_LLAMA_VERSION?=d5507e33ae7ee2b7b41475f08044d3bde3b839ee
 LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
 CMAKE_ARGS?=
--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@@ -1,5 +1,5 @@
-LLAMA_VERSION?=e475fa2b5f9fb50c3d6fc3e7c6fdf1e004465b62
+LLAMA_VERSION?=be4a6a63eb2b848e19c277bdcf2bd399e8af76d9
 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
 CMAKE_ARGS?=
--- a/backend/go/crispasr/Makefile
+++ b/backend/go/crispasr/Makefile
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
 # CrispASR version (release tag)
 CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
-CRISPASR_VERSION?=d745bda4386ae0f9d1d2f23fff8ec95d76428221
+CRISPASR_VERSION?=96b2a6ee31d30389fed8a7ef1a54239b75231ddc
 SO_TARGET?=libgocrispasr.so
 CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
--- a/backend/go/omnivoice-cpp/Makefile
+++ b/backend/go/omnivoice-cpp/Makefile
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
 # omnivoice.cpp version
 OMNIVOICE_REPO?=https://github.com/ServeurpersoCom/omnivoice.cpp
-OMNIVOICE_VERSION?=96d30169afd5e6bb3fd6a0e9be0eb505bfe81fcd
+OMNIVOICE_VERSION?=0f37401bebe9b20c0160a888e592108fc1d17607
 SO_TARGET?=libgomnivoicecpp.so
 CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
--- a/backend/go/parakeet-cpp/Makefile
+++ b/backend/go/parakeet-cpp/Makefile
@@ -1,6 +1,6 @@
 # parakeet-cpp backend Makefile.
 #
-# Upstream pin lives below as PARAKEET_VERSION?=db755a78d39f789bb7d4e3935158a9e8105dbe36
+# Upstream pin lives below as PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
 # (.github/bump_deps.sh) can find and update it - matches the
 # whisper.cpp / ds4 / vibevoice-cpp convention.
 #
@@ -15,7 +15,7 @@
 # That's what the L0 smoke test uses. The default target below does the
 # proper clone-at-pin + cmake build so CI doesn't need a side-checkout.
-PARAKEET_VERSION?=db755a78d39f789bb7d4e3935158a9e8105dbe36
+PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
 PARAKEET_REPO?=https://github.com/mudler/parakeet.cpp
 GOCMD?=go
--- a/backend/go/stablediffusion-ggml/Makefile
+++ b/backend/go/stablediffusion-ggml/Makefile
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
 # stablediffusion.cpp (ggml)
 STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
-STABLEDIFFUSION_GGML_VERSION?=b12098f5d09fc83da36e65c784f7bdb16a5a5ebf
+STABLEDIFFUSION_GGML_VERSION?=f440ad9c29dd8bc34e5d1f4b863832b96d6ea05f
 CMAKE_ARGS+=-DGGML_MAX_NAME=128
--- a/backend/go/supertonic/helper.go
+++ b/backend/go/supertonic/helper.go
@@ -16,6 +16,7 @@ import (
 	"os"
 	"path/filepath"
 	"regexp"
 	"runtime"
 	"strings"
 	"time"
 	"unicode"
@@ -943,7 +944,13 @@ func InitializeONNXRuntime() error {
 			}
 		}
 		if libPath == "" {
-			libPath = "/usr/local/lib/libonnxruntime.so"
+			// LocalAI: default to the platform-native shared library
 			// extension when nothing else is found (dyld vs ld.so).
 			if runtime.GOOS == "darwin" {
 				libPath = "/usr/local/lib/libonnxruntime.dylib"
 			} else {
 				libPath = "/usr/local/lib/libonnxruntime.so"
 			}
 		}
 	}
 	ort.SetSharedLibraryPath(libPath)
--- a/backend/go/supertonic/package.sh
+++ b/backend/go/supertonic/package.sh
@@ -32,6 +32,10 @@ elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
    cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
    cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
    cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
 elif [ $(uname -s) = "Darwin" ]; then
    # macOS: dyld resolves the bundled .dylib via DYLD_LIBRARY_PATH (set in
    # run.sh); there is no ld.so loader nor glibc to bundle.
    echo "Detected Darwin"
 else
    echo "Error: Could not detect architecture"
    exit 1
--- a/backend/go/supertonic/run.sh
+++ b/backend/go/supertonic/run.sh
@@ -3,12 +3,19 @@ set -ex
 CURDIR=$(dirname "$(realpath $0)")
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
+if [ "$(uname)" = "Darwin" ]; then
-export ONNXRUNTIME_LIB_PATH=$CURDIR/lib/libonnxruntime.so
+	# macOS uses dyld: there is no ld.so loader, and the search path env
 	# var is DYLD_LIBRARY_PATH. ONNX Runtime ships as a .dylib here.
 	export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
 	export ONNXRUNTIME_LIB_PATH=$CURDIR/lib/libonnxruntime.dylib
 else
 	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 	export ONNXRUNTIME_LIB_PATH=$CURDIR/lib/libonnxruntime.so
-if [ -f $CURDIR/lib/ld.so ]; then
+	if [ -f $CURDIR/lib/ld.so ]; then
-	echo "Using lib/ld.so"
+		echo "Using lib/ld.so"
-	exec $CURDIR/lib/ld.so $CURDIR/supertonic "$@"
+		exec $CURDIR/lib/ld.so $CURDIR/supertonic "$@"
 	fi
 fi
 exec $CURDIR/supertonic "$@"
--- a/backend/go/whisper/Makefile
+++ b/backend/go/whisper/Makefile
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
-WHISPER_CPP_VERSION?=5ed76e9a079962f1c85cfce44edd325c27ef1f97
+WHISPER_CPP_VERSION?=43d78af5be58f41d6ffbc227d608f104577741ea
 SO_TARGET?=libgowhisper.so
 CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
--- a/backend/index.yaml
+++ b/backend/index.yaml
@@ -1284,6 +1284,7 @@
    nvidia-cuda-13: "cuda13-liquid-audio"
    nvidia-cuda-12: "cuda12-liquid-audio"
    nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-liquid-audio"
    metal: "metal-liquid-audio"
  icon: https://cdn-avatars.huggingface.co/v1/production/uploads/61b8e2ba285851687028d395/7_6D7rWrLxp2hb6OHSV1p.png
 - &qwen-tts
  urls:
@@ -1569,6 +1570,7 @@
    - TTS
  capabilities:
    default: "cpu-supertonic"
    metal: "metal-supertonic"
 - !!merge <<: *neutts
  name: "neutts-development"
  capabilities:
@@ -4612,6 +4614,7 @@
    nvidia-cuda-13: "cuda13-liquid-audio-development"
    nvidia-cuda-12: "cuda12-liquid-audio-development"
    nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-liquid-audio-development"
    metal: "metal-liquid-audio-development"
 - !!merge <<: *liquid-audio
  name: "cpu-liquid-audio"
  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-liquid-audio"
@@ -4622,6 +4625,16 @@
  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-liquid-audio"
  mirrors:
    - localai/localai-backends:master-cpu-liquid-audio
 - !!merge <<: *liquid-audio
  name: "metal-liquid-audio"
  uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-liquid-audio"
  mirrors:
    - localai/localai-backends:latest-metal-darwin-arm64-liquid-audio
 - !!merge <<: *liquid-audio
  name: "metal-liquid-audio-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-liquid-audio"
  mirrors:
    - localai/localai-backends:master-metal-darwin-arm64-liquid-audio
 - !!merge <<: *liquid-audio
  name: "cuda12-liquid-audio"
  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-liquid-audio"
@@ -5484,6 +5497,7 @@
  name: "supertonic-development"
  capabilities:
    default: "cpu-supertonic-development"
    metal: "metal-supertonic-development"
 - !!merge <<: *supertonic
  name: "cpu-supertonic"
  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-supertonic"
@@ -5494,3 +5508,13 @@
  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-supertonic"
  mirrors:
    - localai/localai-backends:master-cpu-supertonic
 - !!merge <<: *supertonic
  name: "metal-supertonic"
  uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-supertonic"
  mirrors:
    - localai/localai-backends:latest-metal-darwin-arm64-supertonic
 - !!merge <<: *supertonic
  name: "metal-supertonic-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-supertonic"
  mirrors:
    - localai/localai-backends:master-metal-darwin-arm64-supertonic
--- a/backend/python/diffusers/requirements-cpu.txt
+++ b/backend/python/diffusers/requirements-cpu.txt
@@ -1,7 +1,7 @@
 --extra-index-url https://download.pytorch.org/whl/cpu
-git+https://github.com/huggingface/diffusers
+diffusers==0.38.0
 opencv-python
-transformers
+transformers==4.57.6
 torchvision==0.22.1
 accelerate
 git+https://github.com/xhinker/sd_embed
@@ -10,9 +10,15 @@ sentencepiece
 torch==2.7.1
 optimum-quanto
 ftfy
-# TODO: re-add compel once it supports transformers >= 5.
+# diffusers and transformers are pinned together on purpose. transformers v5
-# Tracking: https://github.com/damian0815/compel/pull/129
+# restructured CLIPTextModel and dropped the `.text_model` attribute, which
-#           https://github.com/damian0815/compel/issues/128
+# breaks single-file Stable Diffusion loading on every released diffusers
-# compel currently pins transformers~=4.25, which forced pip into multi-hour
+# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
-# resolver backtracking storms in CI. backend.py imports it lazily and gates
+# main via git froze whichever broken pair existed at image-build time. Pin the
-# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
+# last known-good released pair so builds are reproducible and can't drift into
 # the broken window. See https://github.com/mudler/LocalAI/issues/9979
 #
 # compel is intentionally omitted: it pins transformers~=4.25, which conflicts
 # with this pin and previously forced pip into multi-hour resolver backtracking
 # storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
 # the import succeeding, so dropping it here is safe.
--- a/backend/python/diffusers/requirements-cublas12.txt
+++ b/backend/python/diffusers/requirements-cublas12.txt
@@ -1,7 +1,7 @@
 --extra-index-url https://download.pytorch.org/whl/cu121
-git+https://github.com/huggingface/diffusers
+diffusers==0.38.0
 opencv-python
-transformers
+transformers==4.57.6
 torchvision
 accelerate
 git+https://github.com/xhinker/sd_embed
@@ -10,9 +10,15 @@ sentencepiece
 torch
 ftfy
 optimum-quanto
-# TODO: re-add compel once it supports transformers >= 5.
+# diffusers and transformers are pinned together on purpose. transformers v5
-# Tracking: https://github.com/damian0815/compel/pull/129
+# restructured CLIPTextModel and dropped the `.text_model` attribute, which
-#           https://github.com/damian0815/compel/issues/128
+# breaks single-file Stable Diffusion loading on every released diffusers
-# compel currently pins transformers~=4.25, which forced pip into multi-hour
+# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
-# resolver backtracking storms in CI. backend.py imports it lazily and gates
+# main via git froze whichever broken pair existed at image-build time. Pin the
-# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
+# last known-good released pair so builds are reproducible and can't drift into
 # the broken window. See https://github.com/mudler/LocalAI/issues/9979
 #
 # compel is intentionally omitted: it pins transformers~=4.25, which conflicts
 # with this pin and previously forced pip into multi-hour resolver backtracking
 # storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
 # the import succeeding, so dropping it here is safe.
--- a/backend/python/diffusers/requirements-cublas13.txt
+++ b/backend/python/diffusers/requirements-cublas13.txt
@@ -1,7 +1,7 @@
 --extra-index-url https://download.pytorch.org/whl/cu130
-git+https://github.com/huggingface/diffusers
+diffusers==0.38.0
 opencv-python
-transformers
+transformers==4.57.6
 torchvision
 accelerate
 git+https://github.com/xhinker/sd_embed
@@ -10,9 +10,15 @@ sentencepiece
 torch
 ftfy
 optimum-quanto
-# TODO: re-add compel once it supports transformers >= 5.
+# diffusers and transformers are pinned together on purpose. transformers v5
-# Tracking: https://github.com/damian0815/compel/pull/129
+# restructured CLIPTextModel and dropped the `.text_model` attribute, which
-#           https://github.com/damian0815/compel/issues/128
+# breaks single-file Stable Diffusion loading on every released diffusers
-# compel currently pins transformers~=4.25, which forced pip into multi-hour
+# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
-# resolver backtracking storms in CI. backend.py imports it lazily and gates
+# main via git froze whichever broken pair existed at image-build time. Pin the
-# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
+# last known-good released pair so builds are reproducible and can't drift into
 # the broken window. See https://github.com/mudler/LocalAI/issues/9979
 #
 # compel is intentionally omitted: it pins transformers~=4.25, which conflicts
 # with this pin and previously forced pip into multi-hour resolver backtracking
 # storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
 # the import succeeding, so dropping it here is safe.
--- a/backend/python/diffusers/requirements-hipblas.txt
+++ b/backend/python/diffusers/requirements-hipblas.txt
@@ -1,17 +1,23 @@
 --extra-index-url https://download.pytorch.org/whl/rocm7.0
 torch==2.10.0+rocm7.0
 torchvision==0.25.0+rocm7.0
-git+https://github.com/huggingface/diffusers
+diffusers==0.38.0
 opencv-python
-transformers
+transformers==4.57.6
 accelerate
 peft
 sentencepiece
 optimum-quanto
 ftfy
-# TODO: re-add compel once it supports transformers >= 5.
+# diffusers and transformers are pinned together on purpose. transformers v5
-# Tracking: https://github.com/damian0815/compel/pull/129
+# restructured CLIPTextModel and dropped the `.text_model` attribute, which
-#           https://github.com/damian0815/compel/issues/128
+# breaks single-file Stable Diffusion loading on every released diffusers
-# compel currently pins transformers~=4.25, which forced pip into multi-hour
+# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
-# resolver backtracking storms in CI. backend.py imports it lazily and gates
+# main via git froze whichever broken pair existed at image-build time. Pin the
-# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
+# last known-good released pair so builds are reproducible and can't drift into
 # the broken window. See https://github.com/mudler/LocalAI/issues/9979
 #
 # compel is intentionally omitted: it pins transformers~=4.25, which conflicts
 # with this pin and previously forced pip into multi-hour resolver backtracking
 # storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
 # the import succeeding, so dropping it here is safe.
--- a/backend/python/diffusers/requirements-intel.txt
+++ b/backend/python/diffusers/requirements-intel.txt
@@ -3,18 +3,24 @@ torch
 torchvision
 optimum[openvino]
 setuptools
-git+https://github.com/huggingface/diffusers
+diffusers==0.38.0
 opencv-python
-transformers
+transformers==4.57.6
 accelerate
 git+https://github.com/xhinker/sd_embed
 peft
 sentencepiece
 optimum-quanto
 ftfy
-# TODO: re-add compel once it supports transformers >= 5.
+# diffusers and transformers are pinned together on purpose. transformers v5
-# Tracking: https://github.com/damian0815/compel/pull/129
+# restructured CLIPTextModel and dropped the `.text_model` attribute, which
-#           https://github.com/damian0815/compel/issues/128
+# breaks single-file Stable Diffusion loading on every released diffusers
-# compel currently pins transformers~=4.25, which forced pip into multi-hour
+# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
-# resolver backtracking storms in CI. backend.py imports it lazily and gates
+# main via git froze whichever broken pair existed at image-build time. Pin the
-# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
+# last known-good released pair so builds are reproducible and can't drift into
 # the broken window. See https://github.com/mudler/LocalAI/issues/9979
 #
 # compel is intentionally omitted: it pins transformers~=4.25, which conflicts
 # with this pin and previously forced pip into multi-hour resolver backtracking
 # storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
 # the import succeeding, so dropping it here is safe.
--- a/backend/python/diffusers/requirements-l4t12.txt
+++ b/backend/python/diffusers/requirements-l4t12.txt
@@ -1,7 +1,7 @@
 --extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu129/
 torch
-git+https://github.com/huggingface/diffusers
+diffusers==0.38.0
-transformers
+transformers==4.57.6
 accelerate
 peft
 optimum-quanto
@@ -9,9 +9,15 @@ numpy<2
 sentencepiece
 torchvision
 ftfy
-# TODO: re-add compel once it supports transformers >= 5.
+# diffusers and transformers are pinned together on purpose. transformers v5
-# Tracking: https://github.com/damian0815/compel/pull/129
+# restructured CLIPTextModel and dropped the `.text_model` attribute, which
-#           https://github.com/damian0815/compel/issues/128
+# breaks single-file Stable Diffusion loading on every released diffusers
-# compel currently pins transformers~=4.25, which forced pip into multi-hour
+# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
-# resolver backtracking storms in CI. backend.py imports it lazily and gates
+# main via git froze whichever broken pair existed at image-build time. Pin the
-# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
+# last known-good released pair so builds are reproducible and can't drift into
 # the broken window. See https://github.com/mudler/LocalAI/issues/9979
 #
 # compel is intentionally omitted: it pins transformers~=4.25, which conflicts
 # with this pin and previously forced pip into multi-hour resolver backtracking
 # storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
 # the import succeeding, so dropping it here is safe.
--- a/backend/python/diffusers/requirements-l4t13.txt
+++ b/backend/python/diffusers/requirements-l4t13.txt
@@ -1,7 +1,7 @@
 --extra-index-url https://download.pytorch.org/whl/cu130
 torch
-git+https://github.com/huggingface/diffusers
+diffusers==0.38.0
-transformers
+transformers==4.57.6
 accelerate
 peft
 optimum-quanto
@@ -10,9 +10,15 @@ sentencepiece
 torchvision
 ftfy
 chardet
-# TODO: re-add compel once it supports transformers >= 5.
+# diffusers and transformers are pinned together on purpose. transformers v5
-# Tracking: https://github.com/damian0815/compel/pull/129
+# restructured CLIPTextModel and dropped the `.text_model` attribute, which
-#           https://github.com/damian0815/compel/issues/128
+# breaks single-file Stable Diffusion loading on every released diffusers
-# compel currently pins transformers~=4.25, which forced pip into multi-hour
+# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
-# resolver backtracking storms in CI. backend.py imports it lazily and gates
+# main via git froze whichever broken pair existed at image-build time. Pin the
-# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
+# last known-good released pair so builds are reproducible and can't drift into
 # the broken window. See https://github.com/mudler/LocalAI/issues/9979
 #
 # compel is intentionally omitted: it pins transformers~=4.25, which conflicts
 # with this pin and previously forced pip into multi-hour resolver backtracking
 # storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
 # the import succeeding, so dropping it here is safe.
--- a/backend/python/diffusers/requirements-mps.txt
+++ b/backend/python/diffusers/requirements-mps.txt
@@ -1,16 +1,22 @@
 torch==2.7.1
 torchvision==0.22.1
-git+https://github.com/huggingface/diffusers
+diffusers==0.38.0
 opencv-python
-transformers
+transformers==4.57.6
 accelerate
 peft
 sentencepiece
 optimum-quanto
 ftfy
-# TODO: re-add compel once it supports transformers >= 5.
+# diffusers and transformers are pinned together on purpose. transformers v5
-# Tracking: https://github.com/damian0815/compel/pull/129
+# restructured CLIPTextModel and dropped the `.text_model` attribute, which
-#           https://github.com/damian0815/compel/issues/128
+# breaks single-file Stable Diffusion loading on every released diffusers
-# compel currently pins transformers~=4.25, which forced pip into multi-hour
+# (<=0.38.0); only unreleased diffusers main supports transformers v5. Tracking
-# resolver backtracking storms in CI. backend.py imports it lazily and gates
+# main via git froze whichever broken pair existed at image-build time. Pin the
-# the COMPEL=1 env var on the import succeeding, so dropping it here is safe.
+# last known-good released pair so builds are reproducible and can't drift into
 # the broken window. See https://github.com/mudler/LocalAI/issues/9979
 #
 # compel is intentionally omitted: it pins transformers~=4.25, which conflicts
 # with this pin and previously forced pip into multi-hour resolver backtracking
 # storms in CI. backend.py imports it lazily and gates the COMPEL=1 env var on
 # the import succeeding, so dropping it here is safe.
--- a/backend/python/liquid-audio/install.sh
+++ b/backend/python/liquid-audio/install.sh
@@ -14,5 +14,11 @@ else
 fi
 # liquid-audio's torch wheels are large; allow upgrades to satisfy transitive pins
-EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
+EXTRA_PIP_INSTALL_FLAGS+=" --upgrade"
 # --index-strategy is a uv-only flag. The darwin/MPS build installs with pip
 # (USE_PIP=true in scripts/build/python-darwin.sh), which rejects it. Only add
 # it on the uv path; Linux/CUDA resolution is unchanged.
 if [ "x${USE_PIP:-}" != "xtrue" ]; then
    EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-first-match"
 fi
 installRequirements
--- a/backend/python/liquid-audio/requirements-mps.txt
+++ b/backend/python/liquid-audio/requirements-mps.txt
@@ -1,3 +1,4 @@
 # MPS (Apple Silicon / Metal) build profile - installed by the darwin CI job.
 torch>=2.8.0
 torchaudio>=2.8.0
 torchcodec>=0.9.1
--- a/core/application/application.go
+++ b/core/application/application.go
@@ -341,11 +341,9 @@ func (a *Application) ResolvePIIPolicy(cfg *config.ModelConfig) (enabled bool, d
 	}
 	appCfg := a.ApplicationConfig()
-	if cfg.PII.Enabled != nil {
+	// PIIIsEnabled already encodes "explicit pii.enabled wins, else backend
-		enabled = *cfg.PII.Enabled
+	// default (cloud-proxy)" — the single source of that rule.
-	} else {
+	enabled = cfg.PIIIsEnabled()
 		enabled = cfg.PIIIsEnabled() // backend default (cloud-proxy)
 	}
 	if !enabled {
 		return false, nil
 	}
@@ -354,7 +352,7 @@ func (a *Application) ResolvePIIPolicy(cfg *config.ModelConfig) (enabled bool, d
 	if len(detectors) == 0 {
 		detectors = append([]string(nil), appCfg.PIIDefaultDetectors...)
 	}
-	return enabled, detectors
+	return true, detectors // enabled is necessarily true past the !enabled guard
 }
 // PIIPolicyResolver adapts ResolvePIIPolicy to pii.PolicyResolver for
--- a/core/application/config_file_watcher.go
+++ b/core/application/config_file_watcher.go
@@ -215,6 +215,7 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
 		envBackendGalleries := slices.Equal(appConfig.BackendGalleries, startupAppConfig.BackendGalleries)
 		envAutoloadGalleries := appConfig.AutoloadGalleries == startupAppConfig.AutoloadGalleries
 		envAutoloadBackendGalleries := appConfig.AutoloadBackendGalleries == startupAppConfig.AutoloadBackendGalleries
 		envPIIDefaultDetectors := slices.Equal(appConfig.PIIDefaultDetectors, startupAppConfig.PIIDefaultDetectors)
 		envAgentJobRetentionDays := appConfig.AgentJobRetentionDays == startupAppConfig.AgentJobRetentionDays
 		envForceEvictionWhenBusy := appConfig.ForceEvictionWhenBusy == startupAppConfig.ForceEvictionWhenBusy
 		envLRUEvictionMaxRetries := appConfig.LRUEvictionMaxRetries == startupAppConfig.LRUEvictionMaxRetries
@@ -335,6 +336,15 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
 			if settings.AutoloadBackendGalleries != nil && !envAutoloadBackendGalleries {
 				appConfig.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries
 			}
 			if settings.PIIDefaultDetectors != nil && !envPIIDefaultDetectors {
 				// Request-side default redaction reads this live via
 				// ResolvePIIPolicy, so a file edit takes effect on the next chat
 				// request. The MITM listener resolves its per-host detector map
 				// once at start, so a raw file edit reaches cloud-proxy traffic
 				// only after a restart or a POST /api/settings (which rebuilds
 				// the listener) — the admin UI uses the latter.
 				appConfig.PIIDefaultDetectors = append([]string(nil), (*settings.PIIDefaultDetectors)...)
 			}
 			if settings.AutoUpgradeBackends != nil {
 				appConfig.AutoUpgradeBackends = *settings.AutoUpgradeBackends
 			}
--- a/core/application/distributed.go
+++ b/core/application/distributed.go
@@ -357,6 +357,15 @@ func initDistributed(cfg *config.ApplicationConfig, authDB *gorm.DB, configLoade
 		Pressure:         pressure,
 	})
 	// Wire staging-progress broadcasting so file-staging shows up on every
 	// replica, not just the one performing the transfer. Without this, a
 	// /api/operations poll that round-robins onto a peer sees no staging row and
 	// the progress flickers. The origin publishes; peers mirror via the wildcard.
 	router.StagingTracker().SetPublisher(natsClient)
 	if _, err := router.StagingTracker().SubscribeBroadcasts(natsClient); err != nil {
 		xlog.Warn("Failed to subscribe to staging progress broadcasts", "error", err)
 	}
 	// Create ReplicaReconciler for auto-scaling model replicas. Adapter +
 	// RegistrationToken feed the state-reconciliation passes: pending op
 	// drain uses the adapter, and model health probes use the token to auth
--- a/core/application/runtime_settings_branding_test.go
+++ b/core/application/runtime_settings_branding_test.go
@@ -109,6 +109,52 @@ var _ = Describe("loadRuntimeSettingsFromFile", func() {
 		})
 	})
 	// Instance-wide default PII detectors. The file is the only source (no
 	// env var), and the loader runs immediately before startMITMIfConfigured,
 	// so a regression here means the cloud-proxy MITM listener resolves an
 	// empty detector set at boot and forwards intercepted traffic unredacted —
 	// even though pii_default_detectors is on disk and the MITM model has PII
 	// enabled. It also breaks request-side default redaction the same way.
 	Describe("PII default detectors", func() {
 		It("loads pii_default_detectors from the file", func() {
 			cfg := &config.ApplicationConfig{DynamicConfigsDir: seedSettings(`{"pii_default_detectors": ["privacy-filter-nemotron", "secret-filter"]}`)}
 			loadRuntimeSettingsFromFile(cfg)
 			Expect(cfg.PIIDefaultDetectors).To(Equal([]string{"privacy-filter-nemotron", "secret-filter"}))
 		})
 		It("does not override an env/CLI-set value (LOCALAI_PII_DEFAULT_DETECTORS)", func() {
 			cfg := &config.ApplicationConfig{
 				DynamicConfigsDir:   seedSettings(`{"pii_default_detectors": ["from-file"]}`),
 				PIIDefaultDetectors: []string{"from-env"}, // simulate WithPIIDefaultDetectors(env)
 			}
 			loadRuntimeSettingsFromFile(cfg)
 			Expect(cfg.PIIDefaultDetectors).To(Equal([]string{"from-env"}), "env var must win over the persisted file value")
 		})
 	})
 	// The live file watcher applies pii_default_detectors on a runtime change
 	// the same way it handles galleries/threads/etc.: env-set values (current
 	// == startup snapshot) are left alone, otherwise the file value is applied
 	// to the live config so request-side default redaction picks it up without
 	// a restart.
 	Describe("file watcher: pii_default_detectors", func() {
 		It("applies a changed file value to the live config", func() {
 			startup := config.ApplicationConfig{} // no env baseline
 			live := &config.ApplicationConfig{PIIDefaultDetectors: []string{"old"}}
 			handler := readRuntimeSettingsJson(startup)
 			Expect(handler([]byte(`{"pii_default_detectors":["new-a","new-b"]}`), live)).To(Succeed())
 			Expect(live.PIIDefaultDetectors).To(Equal([]string{"new-a", "new-b"}))
 		})
 		It("leaves an env-controlled value untouched", func() {
 			startup := config.ApplicationConfig{PIIDefaultDetectors: []string{"from-env"}}
 			live := &config.ApplicationConfig{PIIDefaultDetectors: []string{"from-env"}}
 			handler := readRuntimeSettingsJson(startup)
 			Expect(handler([]byte(`{"pii_default_detectors":["from-file"]}`), live)).To(Succeed())
 			Expect(live.PIIDefaultDetectors).To(Equal([]string{"from-env"}), "env-controlled detectors must not be overwritten by the file")
 		})
 	})
 	// The Agent Pool block has a mix of zero and non-zero defaults
 	// (Enabled=true, EmbeddingModel="granite-...", MaxChunkingSize=400,
 	// VectorEngine="chromem", AgentHubURL="https://agenthub.localai.io").
--- a/core/application/startup.go
+++ b/core/application/startup.go
@@ -750,6 +750,20 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
 		options.MITMListen = *settings.MITMListen
 	}
 	// Instance-wide default PII detectors. LOCALAI_PII_DEFAULT_DETECTORS (via
 	// WithPIIDefaultDetectors) wins when set; otherwise the file is the source
 	// — apply it only when the env/CLI left the value empty, mirroring the
 	// "env > file" precedence used for the other fields. This must land before
 	// startMITMIfConfigured (called right after this loader): the cloud-proxy
 	// listener resolves each intercept host's detectors once at start via
 	// ResolvePIIPolicy, and a MITM model that names no detectors of its own
 	// falls back to these defaults. Without it the listener (and request-side
 	// default redaction) starts with an empty detector set and forwards
 	// traffic unredacted even though pii_default_detectors is on disk.
 	if settings.PIIDefaultDetectors != nil && len(options.PIIDefaultDetectors) == 0 {
 		options.PIIDefaultDetectors = append([]string(nil), (*settings.PIIDefaultDetectors)...)
 	}
 	// Backend upgrade flags
 	if settings.AutoUpgradeBackends != nil {
 		if !options.AutoUpgradeBackends {
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -181,6 +181,8 @@ type RunCMD struct {
 	// Cloud-proxy MITM listener (off by default).
 	MITMListen string `env:"LOCALAI_MITM_LISTEN" help:"Address (host:port) for the cloudproxy MITM listener. Empty = disabled. Clients set HTTPS_PROXY=http://<this>:<port>. Intercept hosts are declared per-model via the model YAML mitm.hosts: block; create one from the Add Model UI." group:"middleware"`
 	MITMCADir  string `env:"LOCALAI_MITM_CA_DIR" type:"path" help:"Directory holding the MITM proxy CA cert + key. Defaults to <data-path>/mitm-ca." group:"middleware"`
 	PIIDefaultDetectors []string `env:"LOCALAI_PII_DEFAULT_DETECTORS" help:"Instance-wide default PII/secret detector model names applied to any PII-enabled model (chiefly cloud-proxy / MITM models) that names no pii.detectors of its own. Comma-separated, e.g. privacy-filter-nemotron,secret-filter. Takes precedence over the value persisted via the Middleware UI." group:"middleware"`
 }
 func (r *RunCMD) Run(ctx *cliContext.Context) error {
@@ -243,6 +245,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		config.WithAPIAddress(r.Address),
 		config.WithMITMListen(r.MITMListen),
 		config.WithMITMCADir(r.MITMCADir),
 		config.WithPIIDefaultDetectors(r.PIIDefaultDetectors),
 		config.WithAgentJobRetentionDays(r.AgentJobRetentionDays),
 		config.WithLlamaCPPTunnelCallback(func(tunnels []string) {
 			tunnelEnvVar := strings.Join(tunnels, ",")
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -712,6 +712,18 @@ func WithMITMCADir(dir string) AppOption {
 	}
 }
 // WithPIIDefaultDetectors sets the instance-wide default PII/secret detector
 // model names applied to any PII-enabled model (chiefly cloud-proxy / MITM
 // models) that names no pii.detectors of its own. CLI/env:
 // LOCALAI_PII_DEFAULT_DETECTORS. Empty leaves the value to
 // runtime_settings.json / the Middleware UI; a non-empty value takes
 // precedence over the file (env > file).
 func WithPIIDefaultDetectors(detectors []string) AppOption {
 	return func(o *ApplicationConfig) {
 		o.PIIDefaultDetectors = detectors
 	}
 }
 func WithDynamicConfigDir(dynamicConfigsDir string) AppOption {
 	return func(o *ApplicationConfig) {
 		o.DynamicConfigsDir = dynamicConfigsDir
--- a/core/config/hardware_defaults.go
+++ b/core/config/hardware_defaults.go
@@ -54,8 +54,35 @@ func (g GPU) IsNVIDIABlackwell() bool {
 	return maj >= 12
 }
 // Compute-buffer headroom guard for the raised physical batch.
 //
 // Raising n_ubatch grows the CUDA *compute buffer* (the scratch for the forward
 // graph), which is allocated PER DEVICE — it does not benefit from a second GPU
 // the way weights or KV (which are split across devices) do. The buffer scales
 // ~linearly with n_ubatch * n_ctx, so a large context turns the GB10-tuned
 // ub2048 into multi-GiB of extra scratch that must fit on a SINGLE card. On a
 // 16 GiB consumer Blackwell with a 200k context that overflows (issue #10485),
 // even though the GB10 it was measured on (128 GiB unified memory) had room.
 //
 // These constants size a conservative guard: only raise the batch when the
 // extra scratch fits the per-device VRAM ceiling.
 const (
 	// computeBufferBytesPerCell approximates the CUDA compute-buffer cost of one
 	// (n_ubatch * n_ctx) cell. Derived from an observed allocation (ub2048 *
 	// ctx204800 ~= 4.5 GiB => ~11 B/cell) and rounded up to 16 for margin, since
 	// the real cost also grows with model width (heads / embedding dim) which we
 	// don't know at config time.
 	computeBufferBytesPerCell = 16
 	// blackwellBatchHeadroomDivisor caps the extra compute buffer from raising the
 	// physical batch at VRAM/divisor. /4 keeps the bulk of a device for weights +
 	// KV, which already dominate VRAM use.
 	blackwellBatchHeadroomDivisor = 4
 )
 // PhysicalBatch returns the canonical physical batch (n_batch/n_ubatch) for the
-// given hardware, used when the model config leaves batch unset.
+// given hardware class, ignoring context/VRAM headroom. Use
 // PhysicalBatchForContext when a model context and per-device VRAM are known
 // (the load paths) so the raised batch can't overflow a single device.
 func PhysicalBatch(g GPU) int {
 	if g.IsNVIDIABlackwell() {
 		return BlackwellPhysicalBatch
@@ -63,6 +90,32 @@ func PhysicalBatch(g GPU) int {
 	return DefaultPhysicalBatch
 }
 // PhysicalBatchForContext is PhysicalBatch gated on per-device VRAM headroom for
 // the given context: it only raises the batch above the conservative default
 // when the extra compute buffer (which is allocated on a single device and grows
 // with n_ubatch * n_ctx) fits within blackwellBatchHeadroomDivisor of the GPU's
 // VRAM. g.VRAM must be the PER-DEVICE ceiling (the smallest device on a
 // multi-GPU host), not the summed total — the compute buffer can't be split.
 //
 // VRAM 0 (unknown) stays conservative rather than risk a per-device OOM; the
 // GB10 / unified-memory path reports system RAM, so it still clears the guard.
 func PhysicalBatchForContext(g GPU, ctx int) int {
 	if !g.IsNVIDIABlackwell() {
 		return DefaultPhysicalBatch
 	}
 	if ctx <= 0 {
 		ctx = DefaultContextSize
 	}
 	if g.VRAM == 0 {
 		return DefaultPhysicalBatch
 	}
 	extra := uint64(ctx) * uint64(BlackwellPhysicalBatch-DefaultPhysicalBatch) * computeBufferBytesPerCell
 	if extra <= g.VRAM/blackwellBatchHeadroomDivisor {
 		return BlackwellPhysicalBatch
 	}
 	return DefaultPhysicalBatch
 }
 // IsManagedPhysicalBatch reports whether n is a value PhysicalBatch assigns.
 // Callers that re-tune a value chosen by an upstream host (the distributed
 // router correcting the frontend's guess) use this to avoid clobbering an
@@ -122,7 +175,12 @@ func hasParallelOption(opts []string) bool {
 // deterministic device — detection does a live nvidia-smi call.
 var localGPU = func() GPU {
 	vendor, _ := xsysinfo.DetectGPUVendor()
-	vram, _ := xsysinfo.TotalAvailableVRAM()
+	// Use the SMALLEST device's VRAM, not the summed total: the parallel-slot
 	// tier and the batch headroom guard both reason about what fits on a single
 	// card, and per-device compute buffers can't be split across GPUs. Summing
 	// two 16 GiB cards into "32 GiB" is what over-provisioned multi-GPU hosts
 	// into OOM (issue #10485).
 	vram, _ := xsysinfo.MinPerGPUVRAM()
 	return GPU{
 		Vendor:            vendor,
 		ComputeCapability: xsysinfo.NVIDIAComputeCapability(),
@@ -137,10 +195,20 @@ func ApplyHardwareDefaults(cfg *ModelConfig, gpu GPU) {
 	if cfg == nil {
 		return
 	}
-	if cfg.Batch == 0 && gpu.IsNVIDIABlackwell() {
+	// Raise the physical batch on Blackwell only when the resulting compute
-		cfg.Batch = BlackwellPhysicalBatch
+	// buffer fits the per-device VRAM at THIS model's context. Leaving Batch at 0
-		xlog.Debug("[hardware_defaults] Blackwell GPU: defaulting physical batch",
+	// (rather than writing the default 512) preserves the downstream single-pass
-			"batch", cfg.Batch, "compute_cap", gpu.ComputeCapability)
+	// sizing in core/backend.EffectiveBatchSize for embedding/score/rerank.
 	if cfg.Batch == 0 {
 		ctx := DefaultContextSize
 		if cfg.ContextSize != nil {
 			ctx = *cfg.ContextSize
 		}
 		if PhysicalBatchForContext(gpu, ctx) == BlackwellPhysicalBatch {
 			cfg.Batch = BlackwellPhysicalBatch
 			xlog.Debug("[hardware_defaults] Blackwell GPU: defaulting physical batch",
 				"batch", cfg.Batch, "compute_cap", gpu.ComputeCapability, "context", ctx, "vram_gib", gpu.VRAM>>30)
 		}
 	}
 	// Enable concurrent serving by default on a capable GPU: without this the
--- a/core/config/hardware_defaults_internal_test.go
+++ b/core/config/hardware_defaults_internal_test.go
@@ -9,26 +9,37 @@ import (
 // GPU. The detection seam (localGPU) is injected so the path is deterministic
 // without a real GPU.
 var _ = Describe("SetDefaults hardware defaults (single-instance)", func() {
 	const gib = uint64(1) << 30
 	var orig func() GPU
 	BeforeEach(func() { orig = localGPU })
 	AfterEach(func() { localGPU = orig })
-	It("sets the physical batch on a local Blackwell GPU", func() {
+	It("sets the physical batch on a local Blackwell GPU with headroom", func() {
-		localGPU = func() GPU { return GPU{ComputeCapability: "12.1"} }
+		localGPU = func() GPU { return GPU{ComputeCapability: "12.1", VRAM: 119 * gib} }
 		cfg := &ModelConfig{}
 		cfg.SetDefaults()
 		Expect(cfg.Batch).To(Equal(BlackwellPhysicalBatch))
 	})
 	It("leaves batch unset when a large context would overflow the device", func() {
 		// Regression guard for issue #10485: 16 GiB consumer Blackwell + ~200k ctx.
 		localGPU = func() GPU { return GPU{ComputeCapability: "12.0", VRAM: 16 * gib} }
 		ctx := 204800
 		cfg := &ModelConfig{LLMConfig: LLMConfig{ContextSize: &ctx}}
 		cfg.SetDefaults()
 		Expect(cfg.Batch).To(Equal(0))
 	})
 	It("leaves batch unset on a non-Blackwell local GPU", func() {
-		localGPU = func() GPU { return GPU{ComputeCapability: "8.9"} }
+		localGPU = func() GPU { return GPU{ComputeCapability: "8.9", VRAM: 119 * gib} }
 		cfg := &ModelConfig{}
 		cfg.SetDefaults()
 		Expect(cfg.Batch).To(Equal(0))
 	})
 	It("never overrides an explicit batch", func() {
-		localGPU = func() GPU { return GPU{ComputeCapability: "12.1"} }
+		localGPU = func() GPU { return GPU{ComputeCapability: "12.1", VRAM: 119 * gib} }
 		cfg := &ModelConfig{}
 		cfg.Batch = 1024
 		cfg.SetDefaults()
--- a/core/config/hardware_defaults_test.go
+++ b/core/config/hardware_defaults_test.go
@@ -7,6 +7,8 @@ import (
 )
 var _ = Describe("Hardware-driven config defaults", func() {
 	const gib = uint64(1) << 30
 	DescribeTable("GPU.IsNVIDIABlackwell (sm_12x consumer family)",
 		func(cc string, want bool) {
 			Expect(GPU{ComputeCapability: cc}.IsNVIDIABlackwell()).To(Equal(want))
@@ -35,21 +37,54 @@ var _ = Describe("Hardware-driven config defaults", func() {
 		})
 	})
 	Describe("PhysicalBatchForContext (per-device VRAM headroom)", func() {
 		It("raises the batch when the compute buffer fits the device", func() {
 			// 16 GiB Blackwell with a small context: the extra scratch is tiny.
 			Expect(PhysicalBatchForContext(GPU{ComputeCapability: "12.0", VRAM: 16 * gib}, 8192)).
 				To(Equal(BlackwellPhysicalBatch))
 		})
 		It("keeps the default batch when a large context would overflow one device", func() {
 			// The issue #10485 case: 16 GiB consumer Blackwell, ~200k context.
 			Expect(PhysicalBatchForContext(GPU{ComputeCapability: "12.0", VRAM: 16 * gib}, 204800)).
 				To(Equal(DefaultPhysicalBatch))
 		})
 		It("still raises the batch on a large unified-memory device (GB10)", func() {
 			// GB10 reports system RAM (~119 GiB) as its single device's VRAM.
 			Expect(PhysicalBatchForContext(GPU{ComputeCapability: "12.1", VRAM: 119 * gib}, 204800)).
 				To(Equal(BlackwellPhysicalBatch))
 		})
 		It("stays conservative when VRAM is unknown", func() {
 			Expect(PhysicalBatchForContext(GPU{ComputeCapability: "12.1"}, 8192)).
 				To(Equal(DefaultPhysicalBatch))
 		})
 		It("never raises the batch on non-Blackwell", func() {
 			Expect(PhysicalBatchForContext(GPU{ComputeCapability: "9.0", VRAM: 80 * gib}, 8192)).
 				To(Equal(DefaultPhysicalBatch))
 		})
 	})
 	Describe("ApplyHardwareDefaults", func() {
-		It("raises an unset batch to 2048 on Blackwell", func() {
+		It("raises an unset batch to 2048 on Blackwell with headroom", func() {
 			cfg := &ModelConfig{}
-			ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1"})
+			ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1", VRAM: 119 * gib})
 			Expect(cfg.Batch).To(Equal(BlackwellPhysicalBatch))
 		})
 		It("leaves batch unset when a large context would overflow one device", func() {
 			// Regression guard for issue #10485: 16 GiB card + ~200k context.
 			ctx := 204800
 			cfg := &ModelConfig{LLMConfig: LLMConfig{ContextSize: &ctx}}
 			ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.0", VRAM: 16 * gib})
 			Expect(cfg.Batch).To(Equal(0))
 		})
 		It("leaves batch unset on non-Blackwell", func() {
 			cfg := &ModelConfig{}
-			ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "9.0"})
+			ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "9.0", VRAM: 119 * gib})
 			Expect(cfg.Batch).To(Equal(0))
 		})
 		It("never overrides an explicit batch", func() {
 			cfg := &ModelConfig{}
 			cfg.Batch = 1024
-			ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1"})
+			ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1", VRAM: 119 * gib})
 			Expect(cfg.Batch).To(Equal(1024))
 		})
 		It("no-ops on nil", func() {
@@ -57,8 +92,6 @@ var _ = Describe("Hardware-driven config defaults", func() {
 		})
 	})
 	const gib = uint64(1) << 30
 	DescribeTable("DefaultParallelSlots (by VRAM)",
 		func(vramGiB uint64, want int) {
 			Expect(DefaultParallelSlots(GPU{VRAM: vramGiB * gib})).To(Equal(want))
--- a/core/config/meta/registry.go
+++ b/core/config/meta/registry.go
@@ -537,6 +537,36 @@ func DefaultRegistry() map[string]FieldMetaOverride {
 			Component:   "number",
 			Order:       79,
 		},
 		"pipeline.compaction.enabled": {
 			Section:     "pipeline",
 			Label:       "Compaction Enabled",
 			Description: "Fold conversation items that age out of the live window (Max History Items) into a rolling summary instead of dropping them, so long realtime sessions stay cheap without losing earlier context. Off by default.",
 			Component:   "toggle",
 			Order:       80,
 		},
 		"pipeline.compaction.trigger_items": {
 			Section:     "pipeline",
 			Label:       "Compaction Trigger Items",
 			Description: "High-water mark: once the live conversation exceeds this many items, the overflow above Max History Items is summarized and evicted. Must be greater than Max History Items; defaults to twice it. The gap controls how often summarization runs.",
 			Component:   "number",
 			Order:       81,
 		},
 		"pipeline.compaction.summary_model": {
 			Section:     "pipeline",
 			Label:       "Compaction Summary Model",
 			Description: "Optional smaller/cheaper model used to produce the rolling summary. Empty reuses the pipeline's own LLM. On CPU, a tiny model here keeps compaction from competing with the conversation LLM.",
 			Component:   "input",
 			Advanced:    true,
 			Order:       82,
 		},
 		"pipeline.compaction.max_summary_tokens": {
 			Section:     "pipeline",
 			Label:       "Compaction Max Summary Tokens",
 			Description: "Advisory cap on the rolling summary length (fed to the summarizer prompt). Defaults to 512.",
 			Component:   "number",
 			Advanced:    true,
 			Order:       83,
 		},
 		// --- Functions ---
 		"function.grammar.parallel_calls": {
--- a/core/config/model_config.go
+++ b/core/config/model_config.go
@@ -641,11 +641,32 @@ type Pipeline struct {
 	// context fills.
 	MaxHistoryItems *int `yaml:"max_history_items,omitempty" json:"max_history_items,omitempty"`
 	// Compaction folds conversation items that age out of the live window
 	// (max_history_items) into a rolling summary instead of dropping them, so
 	// long realtime sessions stay cheap without losing earlier context. Nil
 	// (block absent) means disabled, preserving existing behavior.
 	Compaction *PipelineCompaction `yaml:"compaction,omitempty" json:"compaction,omitempty"`
 	// VoiceRecognition gates the pipeline behind speaker verification. Nil
 	// (block absent) means no gate, preserving existing behavior.
 	VoiceRecognition *PipelineVoiceRecognition `yaml:"voice_recognition,omitempty" json:"voice_recognition,omitempty"`
 }
 // PipelineCompaction configures summarize-then-drop for a realtime pipeline.
 type PipelineCompaction struct {
 	// Enabled turns summarize-then-drop on. Default false.
 	Enabled bool `yaml:"enabled,omitempty" json:"enabled,omitempty"`
 	// TriggerItems is the high-water mark: once live items exceed it, overflow
 	// above max_history_items is summarized and evicted. Must exceed
 	// max_history_items; clamped up if not. Default: 2x max_history_items.
 	TriggerItems int `yaml:"trigger_items,omitempty" json:"trigger_items,omitempty"`
 	// SummaryModel optionally names a smaller/cheaper model for the summary
 	// call. Empty uses the pipeline's own LLM.
 	SummaryModel string `yaml:"summary_model,omitempty" json:"summary_model,omitempty"`
 	// MaxSummaryTokens advises the summary length (fed to the prompt). Default 512.
 	MaxSummaryTokens int `yaml:"max_summary_tokens,omitempty" json:"max_summary_tokens,omitempty"`
 }
 // ApplyReasoningEffort resolves the effective reasoning effort — a per-request
 // value (requestEffort) overrides the config's own ReasoningEffort default —
 // stores it on the config so gRPCPredictOpts forwards it to the backend as the
@@ -1183,11 +1204,6 @@ func (cfg *ModelConfig) SetDefaults(opts ...ConfigLoaderOption) {
 	// This ensures gallery-installed and runtime-loaded models get optimal parameters.
 	ApplyInferenceDefaults(cfg, cfg.Name, cfg.Model)
 	// Apply hardware-driven defaults (e.g. a larger physical batch on Blackwell).
 	// Uses the local GPU here; in distributed mode the router re-applies the same
 	// heuristics for the selected node's GPU before loading. Explicit config wins.
 	ApplyHardwareDefaults(cfg, localGPU())
 	// Apply serving-policy defaults (device-independent): cross-request prefix
 	// caching. Propagates to distributed nodes via the model options.
 	ApplyServingDefaults(cfg)
@@ -1226,6 +1242,16 @@ func (cfg *ModelConfig) SetDefaults(opts ...ConfigLoaderOption) {
 		cfg.ContextSize = &ctx
 	}
 	runBackendHooks(cfg, lo.modelPath)
 	// Apply hardware-driven defaults (e.g. a larger physical batch on Blackwell)
 	// LAST, after the context size is fully resolved (explicit config, LoadOptions,
 	// then the GGUF guess inside runBackendHooks): the Blackwell batch guard sizes
 	// the per-device compute buffer against this model's context, so it must see
 	// the final value, not a pre-guess nil. Uses the local GPU here; in distributed
 	// mode the router re-applies the same heuristics for the selected node's GPU
 	// before loading. Explicit config always wins.
 	ApplyHardwareDefaults(cfg, localGPU())
 	cfg.syncKnownUsecasesFromString()
 }
--- a/core/config/runtime_settings_persist.go
+++ b/core/config/runtime_settings_persist.go
@@ -5,6 +5,7 @@ import (
 	"errors"
 	"os"
 	"path/filepath"
 	"reflect"
 )
 // runtimeSettingsFile is the on-disk filename inside DynamicConfigsDir.
@@ -33,6 +34,35 @@ func (o *ApplicationConfig) ReadPersistedSettings() (RuntimeSettings, error) {
 	return settings, nil
 }
 // MergeNonNil overlays every set (non-nil) field of overlay onto the
 // receiver, leaving the receiver's value untouched wherever overlay left a
 // field unset. Every RuntimeSettings field is a pointer precisely so "set"
 // can be told apart from "absent" (see the type doc), which makes this a
 // faithful partial update: a caller that submits only the field it owns
 // changes exactly that field and never clobbers unrelated settings.
 //
 // This is the read-modify-write contract the persistence helpers exist for.
 // UpdateSettingsEndpoint reads the on-disk settings, merges the request body
 // on top, and writes the result — so a focused admin page that POSTs only its
 // own field (the Middleware page sends only mitm_listen; the detector table
 // only pii_default_detectors) no longer nulls every other setting.
 //
 // Reflection keeps the merge total over the struct: a field added to
 // RuntimeSettings later is merged automatically, so the persistence path can
 // never silently drop a new setting the way a hand-maintained field list
 // would. Non-pointer fields (none today) are skipped — they cannot express
 // "absent", so the receiver wins.
 func (s *RuntimeSettings) MergeNonNil(overlay RuntimeSettings) {
 	dst := reflect.ValueOf(s).Elem()
 	src := reflect.ValueOf(overlay)
 	for i := 0; i < src.NumField(); i++ {
 		f := src.Field(i)
 		if f.Kind() == reflect.Pointer && !f.IsNil() {
 			dst.Field(i).Set(f)
 		}
 	}
 }
 // WritePersistedSettings serialises the given RuntimeSettings to
 // runtime_settings.json with restricted permissions (it may carry API
 // keys and P2P tokens).
--- a/core/config/runtime_settings_persist_test.go
+++ b/core/config/runtime_settings_persist_test.go
@@ -12,6 +12,7 @@ import (
 )
 func strPtr(s string) *string { return &s }
 func boolPtr(b bool) *bool     { return &b }
 var _ = Describe("RuntimeSettings persistence helpers", func() {
 	var (
@@ -51,6 +52,47 @@ var _ = Describe("RuntimeSettings persistence helpers", func() {
 		})
 	})
 	// MergeNonNil is the partial-update primitive UpdateSettingsEndpoint
 	// relies on: a focused admin page POSTs only the field it owns, and the
 	// handler reads the on-disk settings and overlays the request on top.
 	// Without it, the body would be written verbatim and every field the
 	// caller omitted would be nulled (the reported regression: changing
 	// mitm_listen wiped the galleries, api keys, watchdog config, etc.).
 	Describe("MergeNonNil partial update", func() {
 		It("overlays set fields and preserves unset ones", func() {
 			base := config.RuntimeSettings{
 				MITMListen:          strPtr(":9000"),
 				Galleries:           &[]config.Gallery{{Name: "g1", URL: "http://example/g1"}},
 				WatchdogIdleEnabled: boolPtr(true),
 				ApiKeys:             &[]string{"persisted-key"},
 				PIIDefaultDetectors: &[]string{"det-a"},
 			}
 			// Simulate the Middleware proxy tab: only mitm_listen is sent.
 			overlay := config.RuntimeSettings{MITMListen: strPtr(":8443")}
 			base.MergeNonNil(overlay)
 			Expect(base.MITMListen).ToNot(BeNil())
 			Expect(*base.MITMListen).To(Equal(":8443"), "set field should be overlaid")
 			// Everything the overlay left unset must survive untouched.
 			Expect(base.Galleries).ToNot(BeNil(), "galleries were clobbered")
 			Expect(*base.Galleries).To(HaveLen(1))
 			Expect(base.WatchdogIdleEnabled).ToNot(BeNil())
 			Expect(*base.WatchdogIdleEnabled).To(BeTrue())
 			Expect(base.ApiKeys).ToNot(BeNil(), "api_keys were clobbered")
 			Expect(*base.ApiKeys).To(Equal([]string{"persisted-key"}))
 			Expect(base.PIIDefaultDetectors).ToNot(BeNil(), "pii_default_detectors were clobbered")
 			Expect(*base.PIIDefaultDetectors).To(Equal([]string{"det-a"}))
 		})
 		It("lets an explicit empty slice clear a field", func() {
 			base := config.RuntimeSettings{PIIDefaultDetectors: &[]string{"det-a"}}
 			base.MergeNonNil(config.RuntimeSettings{PIIDefaultDetectors: &[]string{}})
 			Expect(base.PIIDefaultDetectors).ToNot(BeNil())
 			Expect(*base.PIIDefaultDetectors).To(BeEmpty(), "an explicit empty slice should clear, not preserve")
 		})
 	})
 	// MITM round trip pins the contract that loadRuntimeSettingsFromFile
 	// MITM listener address must survive a write/read round trip so the
 	// next process restart can bring the listener back up. (Intercept
--- a/core/http/endpoints/localai/agent_collections.go
+++ b/core/http/endpoints/localai/agent_collections.go
@@ -70,7 +70,7 @@ func UploadToCollectionEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		name := c.Param("name")
+		name := decodedParam(c, "name")
 		file, err := c.FormFile("file")
 		if err != nil {
 			return c.JSON(http.StatusBadRequest, map[string]string{"error": "file required"})
@@ -116,7 +116,7 @@ func ListCollectionEntriesEndpoint(app *application.Application) echo.HandlerFun
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		entries, err := svc.ListCollectionEntriesForUser(userID, c.Param("name"))
+		entries, err := svc.ListCollectionEntriesForUser(userID, decodedParam(c, "name"))
 		if err != nil {
 			if strings.Contains(err.Error(), "not found") {
 				return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
@@ -139,7 +139,7 @@ func GetCollectionEntryContentEndpoint(app *application.Application) echo.Handle
 		if err != nil {
 			entry = entryParam
 		}
-		content, chunkCount, err := svc.GetCollectionEntryContentForUser(userID, c.Param("name"), entry)
+		content, chunkCount, err := svc.GetCollectionEntryContentForUser(userID, decodedParam(c, "name"), entry)
 		if err != nil {
 			if strings.Contains(err.Error(), "not found") {
 				return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
@@ -164,7 +164,7 @@ func SearchCollectionEndpoint(app *application.Application) echo.HandlerFunc {
 		if err := c.Bind(&payload); err != nil {
 			return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
 		}
-		results, err := svc.SearchCollectionForUser(userID, c.Param("name"), payload.Query, payload.MaxResults)
+		results, err := svc.SearchCollectionForUser(userID, decodedParam(c, "name"), payload.Query, payload.MaxResults)
 		if err != nil {
 			if strings.Contains(err.Error(), "not found") {
 				return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
@@ -182,7 +182,7 @@ func ResetCollectionEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		if err := svc.ResetCollectionForUser(userID, c.Param("name")); err != nil {
+		if err := svc.ResetCollectionForUser(userID, decodedParam(c, "name")); err != nil {
 			if strings.Contains(err.Error(), "not found") {
 				return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
 			}
@@ -202,7 +202,7 @@ func DeleteCollectionEntryEndpoint(app *application.Application) echo.HandlerFun
 		if err := c.Bind(&payload); err != nil {
 			return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
 		}
-		remaining, err := svc.DeleteCollectionEntryForUser(userID, c.Param("name"), payload.Entry)
+		remaining, err := svc.DeleteCollectionEntryForUser(userID, decodedParam(c, "name"), payload.Entry)
 		if err != nil {
 			if strings.Contains(err.Error(), "not found") {
 				return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
@@ -230,7 +230,7 @@ func AddCollectionSourceEndpoint(app *application.Application) echo.HandlerFunc
 		if payload.UpdateInterval < 1 {
 			payload.UpdateInterval = 60
 		}
-		if err := svc.AddCollectionSourceForUser(userID, c.Param("name"), payload.URL, payload.UpdateInterval); err != nil {
+		if err := svc.AddCollectionSourceForUser(userID, decodedParam(c, "name"), payload.URL, payload.UpdateInterval); err != nil {
 			if strings.Contains(err.Error(), "not found") {
 				return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
 			}
@@ -250,7 +250,7 @@ func RemoveCollectionSourceEndpoint(app *application.Application) echo.HandlerFu
 		if err := c.Bind(&payload); err != nil {
 			return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
 		}
-		if err := svc.RemoveCollectionSourceForUser(userID, c.Param("name"), payload.URL); err != nil {
+		if err := svc.RemoveCollectionSourceForUser(userID, decodedParam(c, "name"), payload.URL); err != nil {
 			return c.JSON(http.StatusInternalServerError, map[string]string{"error": err.Error()})
 		}
 		return c.JSON(http.StatusOK, map[string]string{"status": "ok"})
@@ -267,7 +267,7 @@ func GetCollectionEntryRawFileEndpoint(app *application.Application) echo.Handle
 		if err != nil {
 			entry = entryParam
 		}
-		fpath, err := svc.GetCollectionEntryFilePathForUser(userID, c.Param("name"), entry)
+		fpath, err := svc.GetCollectionEntryFilePathForUser(userID, decodedParam(c, "name"), entry)
 		if err != nil {
 			if strings.Contains(err.Error(), "not found") {
 				return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
@@ -282,7 +282,7 @@ func ListCollectionSourcesEndpoint(app *application.Application) echo.HandlerFun
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		sources, err := svc.ListCollectionSourcesForUser(userID, c.Param("name"))
+		sources, err := svc.ListCollectionSourcesForUser(userID, decodedParam(c, "name"))
 		if err != nil {
 			if strings.Contains(err.Error(), "not found") {
 				return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
--- a/core/http/endpoints/localai/agent_collections_param_test.go
+++ b/core/http/endpoints/localai/agent_collections_param_test.go
@@ -0,0 +1,49 @@
 package localai
 import (
 	"net/http"
 	"net/http/httptest"
 	"github.com/labstack/echo/v4"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 )
 // Regression for #10443: agent/collection names carry a "legacy-api-key:"
 // prefix, so the ':' is percent-encoded as %3A in the request path. Echo routes
 // such paths via URL.RawPath and stores the path-param value still escaped, so
 // handlers must URL-decode it before looking the collection up in the store -
 // otherwise the lookup sees "legacy-api-key%3ALiteraryResearch" and 404s.
 var _ = Describe("decodedParam", func() {
 	var e *echo.Echo
 	BeforeEach(func() {
 		e = echo.New()
 	})
 	// route runs a request through Echo's real router so the path param is
 	// populated exactly as it would be in production, then returns the decoded
 	// value the handler would observe.
 	route := func(rawPath string) string {
 		var got string
 		e.GET("/api/agents/collections/:name/upload", func(c echo.Context) error {
 			got = decodedParam(c, "name")
 			return c.NoContent(http.StatusOK)
 		})
 		req := httptest.NewRequest(http.MethodGet, rawPath, nil)
 		rec := httptest.NewRecorder()
 		e.ServeHTTP(rec, req)
 		Expect(rec.Code).To(Equal(http.StatusOK))
 		return got
 	}
 	It("decodes a percent-encoded colon in the collection name", func() {
 		got := route("/api/agents/collections/legacy-api-key%3ALiteraryResearch/upload")
 		Expect(got).To(Equal("legacy-api-key:LiteraryResearch"))
 	})
 	It("leaves an unencoded name untouched", func() {
 		got := route("/api/agents/collections/PlainCollection/upload")
 		Expect(got).To(Equal("PlainCollection"))
 	})
 })
--- a/core/http/endpoints/localai/agents.go
+++ b/core/http/endpoints/localai/agents.go
@@ -6,6 +6,7 @@ import (
 	"io"
 	"maps"
 	"net/http"
 	"net/url"
 	"os"
 	"path/filepath"
 	"slices"
@@ -33,6 +34,22 @@ func getUserID(c echo.Context) string {
 	return user.ID
 }
 // decodedParam returns the named path parameter, URL-decoding it.
 //
 // Echo routes a request via URL.RawPath whenever the path contains
 // percent-encoded characters (e.g. %3A for ':'), and in that case stores the
 // matched path-param value raw/escaped. Agent and collection names carry a
 // "legacy-api-key:" prefix, so the ':' arrives as %3A and the raw param no
 // longer matches the stored name. Callers must unescape before lookups.
 // Falls back to the raw value if it isn't valid percent-encoding.
 func decodedParam(c echo.Context, name string) string {
 	raw := c.Param(name)
 	if decoded, err := url.PathUnescape(raw); err == nil {
 		return decoded
 	}
 	return raw
 }
 // isAdminUser returns true if the authenticated user has admin role.
 func isAdminUser(c echo.Context) bool {
 	user := auth.GetUser(c)
@@ -127,7 +144,7 @@ func GetAgentEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		name := c.Param("name")
+		name := decodedParam(c, "name")
 		statuses := svc.ListAgentsForUser(userID)
 		active, exists := statuses[name]
@@ -142,7 +159,7 @@ func UpdateAgentEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		name := c.Param("name")
+		name := decodedParam(c, "name")
 		var cfg state.AgentConfig
 		if err := c.Bind(&cfg); err != nil {
 			return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
@@ -161,7 +178,7 @@ func DeleteAgentEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		name := c.Param("name")
+		name := decodedParam(c, "name")
 		if err := svc.DeleteAgentForUser(userID, name); err != nil {
 			return c.JSON(http.StatusInternalServerError, map[string]string{"error": err.Error()})
 		}
@@ -173,7 +190,7 @@ func GetAgentConfigEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		name := c.Param("name")
+		name := decodedParam(c, "name")
 		cfg := svc.GetAgentConfigForUser(userID, name)
 		if cfg == nil {
 			return c.JSON(http.StatusNotFound, map[string]string{"error": "Agent not found"})
@@ -186,7 +203,7 @@ func PauseAgentEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		if err := svc.PauseAgentForUser(userID, c.Param("name")); err != nil {
+		if err := svc.PauseAgentForUser(userID, decodedParam(c, "name")); err != nil {
 			return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
 		}
 		return c.JSON(http.StatusOK, map[string]string{"status": "ok"})
@@ -197,7 +214,7 @@ func ResumeAgentEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		if err := svc.ResumeAgentForUser(userID, c.Param("name")); err != nil {
+		if err := svc.ResumeAgentForUser(userID, decodedParam(c, "name")); err != nil {
 			return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
 		}
 		return c.JSON(http.StatusOK, map[string]string{"status": "ok"})
@@ -208,7 +225,7 @@ func GetAgentStatusEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		name := c.Param("name")
+		name := decodedParam(c, "name")
 		history := svc.GetAgentStatusForUser(userID, name)
 		if history == nil {
@@ -241,7 +258,7 @@ func GetAgentObservablesEndpoint(app *application.Application) echo.HandlerFunc
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		name := c.Param("name")
+		name := decodedParam(c, "name")
 		history, err := svc.GetAgentObservablesForUser(userID, name)
 		if err != nil {
@@ -261,7 +278,7 @@ func ClearAgentObservablesEndpoint(app *application.Application) echo.HandlerFun
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		name := c.Param("name")
+		name := decodedParam(c, "name")
 		if err := svc.ClearAgentObservablesForUser(userID, name); err != nil {
 			return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
 		}
@@ -273,7 +290,7 @@ func ChatWithAgentEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		name := c.Param("name")
+		name := decodedParam(c, "name")
 		var payload struct {
 			Message string `json:"message"`
 		}
@@ -302,7 +319,7 @@ func AgentSSEEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		name := c.Param("name")
+		name := decodedParam(c, "name")
 		// Try local SSE manager first
 		manager := svc.GetSSEManagerForUser(userID, name)
@@ -334,7 +351,7 @@ func ExportAgentEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		name := c.Param("name")
+		name := decodedParam(c, "name")
 		data, err := svc.ExportAgentForUser(userID, name)
 		if err != nil {
 			return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
--- a/core/http/endpoints/localai/nodes.go
+++ b/core/http/endpoints/localai/nodes.go
@@ -385,6 +385,23 @@ func GetNodeModelsEndpoint(registry *nodes.NodeRegistry) echo.HandlerFunc {
 	}
 }
 // ListAllNodeModelsEndpoint returns all loaded models across all healthy nodes.
 // @Summary List all loaded models cluster-wide
 // @Tags Nodes
 // @Success 200 {array} nodes.NodeModel
 // @Router /api/nodes/models [get]
 func ListAllNodeModelsEndpoint(registry *nodes.NodeRegistry) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		ctx := c.Request().Context()
 		models, err := registry.ListAllLoadedModels(ctx)
 		if err != nil {
 			xlog.Error("Failed to list all node models", "error", err)
 			return c.JSON(http.StatusInternalServerError, nodeError(http.StatusInternalServerError, "failed to list node models"))
 		}
 		return c.JSON(http.StatusOK, models)
 	}
 }
 // DrainNodeEndpoint sets a node to draining status (no new requests).
 func DrainNodeEndpoint(registry *nodes.NodeRegistry) echo.HandlerFunc {
 	return func(c echo.Context) error {
--- a/core/http/endpoints/localai/nodes_test.go
+++ b/core/http/endpoints/localai/nodes_test.go
@@ -407,4 +407,44 @@ var _ = Describe("Node HTTP handlers", func() {
 			Expect(names).To(ConsistOf("alpha", "beta"))
 		})
 	})
 	Describe("ListAllNodeModelsEndpoint", func() {
 		It("returns an empty list when no models are loaded", func() {
 			e := echo.New()
 			req := httptest.NewRequest(http.MethodGet, "/", nil)
 			rec := httptest.NewRecorder()
 			c := e.NewContext(req, rec)
 			handler := ListAllNodeModelsEndpoint(registry)
 			Expect(handler(c)).To(Succeed())
 			Expect(rec.Code).To(Equal(http.StatusOK))
 			var list []nodes.NodeModel
 			Expect(json.Unmarshal(rec.Body.Bytes(), &list)).To(Succeed())
 			Expect(list).To(BeEmpty())
 		})
 		It("returns loaded models across healthy nodes", func() {
 			ctx := context.Background()
 			Expect(registry.Register(ctx, &nodes.BackendNode{
 				ID: "n1", Name: "alpha", Address: "10.0.0.1:50051", Status: nodes.StatusHealthy,
 			}, true)).To(Succeed())
 			Expect(registry.SetNodeModel(ctx, "n1", "llama-3.3", 0, "loaded", "10.0.0.1:50051", 0)).To(Succeed())
 			e := echo.New()
 			req := httptest.NewRequest(http.MethodGet, "/", nil)
 			rec := httptest.NewRecorder()
 			c := e.NewContext(req, rec)
 			handler := ListAllNodeModelsEndpoint(registry)
 			Expect(handler(c)).To(Succeed())
 			Expect(rec.Code).To(Equal(http.StatusOK))
 			var list []nodes.NodeModel
 			Expect(json.Unmarshal(rec.Body.Bytes(), &list)).To(Succeed())
 			Expect(list).To(HaveLen(1))
 			Expect(list[0].ModelName).To(Equal("llama-3.3"))
 			Expect(list[0].NodeID).To(Equal("n1"))
 		})
 	})
 })
--- a/core/http/endpoints/localai/settings.go
+++ b/core/http/endpoints/localai/settings.go
@@ -4,8 +4,6 @@ import (
 	"encoding/json"
 	"io"
 	"net/http"
 	"os"
 	"path/filepath"
 	"time"
 	"github.com/labstack/echo/v4"
@@ -110,6 +108,18 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
 			})
 		}
 		// Read whatever is already persisted: it is both the source of truth
 		// for branding asset filenames (below) and the base we merge this
 		// request onto before writing. A read failure must not let a Save
 		// silently discard the existing settings — surface it instead.
 		persisted, err := appConfig.ReadPersistedSettings()
 		if err != nil {
 			return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
 				Success: false,
 				Error:   "Failed to read existing settings: " + err.Error(),
 			})
 		}
 		// Branding asset filenames are owned exclusively by
 		// /api/branding/asset/{kind} (upload/delete). The Settings page also
 		// round-trips them via GET /api/settings, but its local state is stale
@@ -118,11 +128,9 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
 		// at page open. Replace whatever the body sent for these three fields
 		// with the values currently on disk so /api/settings can never
 		// regress them.
-		if existing, err := appConfig.ReadPersistedSettings(); err == nil {
+		settings.LogoFile = persisted.LogoFile
-			settings.LogoFile = existing.LogoFile
+		settings.LogoHorizontalFile = persisted.LogoHorizontalFile
-			settings.LogoHorizontalFile = existing.LogoHorizontalFile
+		settings.FaviconFile = persisted.FaviconFile
 			settings.FaviconFile = existing.FaviconFile
 		}
 		// The UI reads ApiKeys from GET /api/settings, which already returns the
 		// merged env+runtime list. When the user clicks Save, the same merged
@@ -145,16 +153,17 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
 			settings.ApiKeys = &runtimeOnly
 		}
-		settingsFile := filepath.Join(appConfig.DynamicConfigsDir, "runtime_settings.json")
+		// Persist as a partial update: overlay only the fields this request set
-		settingsJSON, err := json.MarshalIndent(settings, "", "  ")
+		// onto the settings already on disk. Focused admin pages POST just the
-		if err != nil {
+		// keys they own (the Middleware proxy tab sends only mitm_listen; the
-			return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
+		// detector table only pii_default_detectors), so writing the request
-				Success: false,
+		// body verbatim would null every unrelated setting (the no-omitempty
-				Error:   "Failed to marshal settings: " + err.Error(),
+		// api_keys / pii_default_detectors fields even round-trip as JSON
-			})
+		// null). The full Settings page still round-trips every field, so its
-		}
+		// Save is unchanged.
-
+		toPersist := persisted
-		if err := os.WriteFile(settingsFile, settingsJSON, 0600); err != nil {
+		toPersist.MergeNonNil(settings)
 		if err := appConfig.WritePersistedSettings(toPersist); err != nil {
 			return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
 				Success: false,
 				Error:   "Failed to write settings file: " + err.Error(),
@@ -262,7 +271,14 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
 			}
 		}
-		if settings.MITMListen != nil {
+		// Rebuild the MITM listener when its address OR the instance-wide
 		// default detectors change. The per-host detector map is resolved once
 		// at listener start (startMITMLocked → ResolvePIIPolicy), so a
 		// default-detector change is otherwise invisible to cloud-proxy traffic
 		// until the next restart — an admin toggling a default detector would
 		// see no redaction. RestartMITM is a no-op when the listener is
 		// disabled (empty address).
 		if settings.MITMListen != nil || settings.PIIDefaultDetectors != nil {
 			if err := app.RestartMITM(); err != nil {
 				xlog.Error("Failed to restart MITM proxy", "error", err)
 				return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
--- a/core/http/endpoints/localai/settings_test.go
+++ b/core/http/endpoints/localai/settings_test.go
@@ -52,6 +52,10 @@ var _ = Describe("Settings endpoints", func() {
 		// Settings are persisted here; set after construction since there's no
 		// dedicated AppOption for it.
 		app.ApplicationConfig().DynamicConfigsDir = tmp
 		// Contain the MITM CA inside tmp too. The partial-save spec flips
 		// mitm_listen, which starts the listener and writes a CA; without this
 		// it defaults to ./mitm-ca and litters the package source tree.
 		app.ApplicationConfig().MITMCADir = filepath.Join(tmp, "mitm-ca")
 		e = echo.New()
 		e.GET("/api/settings", GetSettingsEndpoint(app))
@@ -109,6 +113,57 @@ var _ = Describe("Settings endpoints", func() {
 		Expect(err).ToNot(HaveOccurred())
 	})
 	// Regression: a focused admin page (the Middleware proxy tab) POSTs only
 	// the one field it owns — mitm_listen. The old handler wrote the request
 	// body verbatim, so every other persisted setting was dropped (and
 	// api_keys / pii_default_detectors, which lack omitempty, were written as
 	// null). A partial POST must now merge onto what is already on disk.
 	It("preserves unrelated persisted settings when a partial POST sets only mitm_listen", func() {
 		// First save establishes a fuller settings file (as the full Settings
 		// page would): galleries, an API key, and the MITM listener. The
 		// listener restart binds a real socket, so use 127.0.0.1:0 for an
 		// ephemeral free port rather than a fixed one that may be in use.
 		rec := post(`{"mitm_listen":"127.0.0.1:0","galleries":[{"name":"g1","url":"http://example/g1"}],"api_keys":["k1"],"pii_default_detectors":["det-a"]}`)
 		Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
 		// The Middleware proxy tab then changes only the listen address — the
 		// exact partial body that nulled everything else before the fix.
 		rec = post(`{"mitm_listen":"127.0.0.1:0"}`)
 		Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
 		raw, err := os.ReadFile(filepath.Join(tmp, "runtime_settings.json"))
 		Expect(err).ToNot(HaveOccurred())
 		var ondisk config.RuntimeSettings
 		Expect(json.Unmarshal(raw, &ondisk)).To(Succeed())
 		Expect(ondisk.MITMListen).ToNot(BeNil())
 		Expect(*ondisk.MITMListen).To(Equal("127.0.0.1:0"), "the changed field should be saved")
 		Expect(ondisk.Galleries).ToNot(BeNil(), "galleries were clobbered by the partial save")
 		Expect(*ondisk.Galleries).To(HaveLen(1))
 		Expect(ondisk.ApiKeys).ToNot(BeNil(), "api_keys were nulled by the partial save")
 		Expect(*ondisk.ApiKeys).To(Equal([]string{"k1"}))
 		Expect(ondisk.PIIDefaultDetectors).ToNot(BeNil(), "pii_default_detectors were nulled by the partial save")
 		Expect(*ondisk.PIIDefaultDetectors).To(Equal([]string{"det-a"}))
 	})
 	// The MITM listener resolves its per-host PII detectors once at start
 	// (startMITMLocked → ResolvePIIPolicy), and the handler used to restart it
 	// only when mitm_listen changed. So an admin toggling a default detector
 	// (the Middleware detector table POSTs only pii_default_detectors) left
 	// cloud-proxy traffic unredacted until the next reboot. A
 	// pii_default_detectors change must now rebuild the listener.
 	It("rebuilds the MITM listener when only pii_default_detectors changes", func() {
 		rec := post(`{"mitm_listen":"127.0.0.1:0"}`)
 		Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
 		srv1 := app.MITMServer()
 		Expect(srv1).ToNot(BeNil(), "listener should be running after mitm_listen is set")
 		rec = post(`{"pii_default_detectors":["det-a"]}`)
 		Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
 		Expect(app.MITMServer()).ToNot(BeIdenticalTo(srv1),
 			"a default-detector change must restart the listener so it picks up the new detectors")
 	})
 	// Residual #9125: enabling the watchdog from a cold (off) state via the
 	// React master toggle must start the live watchdog immediately, without a
 	// restart. The toggle posts watchdog_idle_enabled/busy_enabled=true while
--- a/core/http/endpoints/openai/realtime.go
+++ b/core/http/endpoints/openai/realtime.go
@@ -12,6 +12,7 @@ import (
 	"os"
 	"strconv"
 	"sync"
 	"sync/atomic"
 	"time"
 	"net/http"
@@ -134,6 +135,18 @@ type Session struct {
 	// pairs are kept together so we never feed an orphaned tool result.
 	MaxHistoryItems int
 	// Compaction settings resolved from pipeline.compaction (see resolveCompaction).
 	CompactionEnabled bool
 	CompactionTrigger int
 	SummaryModel      string
 	MaxSummaryTokens  int
 	// summarizerFactory lazily builds the model used for compaction summaries
 	// when summary_model is configured; nil means reuse the pipeline LLM.
 	summarizerFactory func() (Model, error)
 	summarizerOnce    sync.Once
 	summarizerCached  Model
 	// AssistantExecutor is non-nil when the session opted into the in-process
 	// LocalAI Assistant tool surface. Tool calls whose name matches this
 	// executor's catalog are run inproc and their output is fed back to the
@@ -241,6 +254,12 @@ type Conversation struct {
 	ID    string
 	Items []*types.MessageItemUnion
 	Lock  sync.Mutex
 	// Memory is the rolling summary of items already evicted by compaction. It
 	// is kept out of Items (so trimRealtimeItems never drops it) and rendered
 	// as a system message right after the session instructions.
 	Memory string
 	// compacting ensures at most one background compaction runs per conversation.
 	compacting atomic.Bool
 }
 func (c *Conversation) ToServer() types.Conversation {
@@ -540,13 +559,12 @@ func runRealtimeSession(application *application.Application, t Transport, model
 		SoundDetectionWindowMs:  cfg.Pipeline.SoundDetectionWindowMs,
 		SoundDetectionHopMs:     cfg.Pipeline.SoundDetectionHopMs,
 	}
 	session.CompactionEnabled, session.CompactionTrigger, session.MaxSummaryTokens, session.SummaryModel = resolveCompaction(cfg, session.MaxHistoryItems)
 	// Create a default conversation
 	conversationID := generateConversationID()
 	conversation := &Conversation{
-		ID: conversationID,
+		ID:    conversationID,
 		// TODO: We need to truncate the conversation items when a new item is added and we have run out of space. There are multiple places where items
 		//       can be added so we could use a datastructure here that enforces truncation upon addition
 		Items: []*types.MessageItemUnion{},
 	}
 	session.Conversations[conversationID] = conversation
@@ -577,6 +595,18 @@ func runRealtimeSession(application *application.Application, t Transport, model
 	}
 	session.ModelInterface = m
 	if session.SummaryModel != "" {
 		summaryModelName := session.SummaryModel
 		sid := sessionID
 		session.summarizerFactory = func() (Model, error) {
 			summaryCfg, lerr := application.ModelConfigLoader().LoadModelConfigFileByNameDefaultOptions(summaryModelName, application.ApplicationConfig())
 			if lerr != nil {
 				return nil, fmt.Errorf("load summary model config %q: %w", summaryModelName, lerr)
 			}
 			return newModel(&summaryCfg.Pipeline, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), evaluator, buildRealtimeRoutingContext(application, sid))
 		}
 	}
 	if cfg.Pipeline.VoiceGateEnabled() {
 		gate, gerr := newVoiceGate(
 			*cfg.Pipeline.VoiceRecognition,
@@ -807,6 +837,15 @@ func runRealtimeSession(application *application.Application, t Transport, model
 				commitUtterance(respCtx, allAudio, session, conversation, t)
 			}()
 		case types.InputAudioBufferClearEvent:
 			xlog.Debug("recv", "message", string(msg))
 			// Discard a partially-captured utterance so the client can restart
 			// input cleanly without the stale buffer leaking into the next commit.
 			clearInputAudio(session)
 			sendEvent(t, types.InputAudioBufferClearedEvent{
 				ServerEventBase: types.ServerEventBase{EventID: e.EventID},
 			})
 		case types.ConversationItemCreateEvent:
 			xlog.Debug("recv", "message", string(msg))
 			// Add the item to the conversation
@@ -841,7 +880,39 @@ func runRealtimeSession(application *application.Application, t Transport, model
 			})
 		case types.ConversationItemDeleteEvent:
-			sendError(t, "not_implemented", "Deleting items not implemented", "", "event_TODO")
+			xlog.Debug("recv", "message", string(msg))
 			if e.ItemID == "" {
 				sendError(t, "invalid_item_id", "Need item_id, but none specified", "", "event_TODO")
 				continue
 			}
 			conversation.Lock.Lock()
 			updated, ok := deleteItem(conversation.Items, e.ItemID)
 			conversation.Items = updated
 			conversation.Lock.Unlock()
 			if !ok {
 				sendError(t, "invalid_item_id", "Item to delete not found", "", "event_TODO")
 				continue
 			}
 			sendEvent(t, types.ConversationItemDeletedEvent{
 				ServerEventBase: types.ServerEventBase{EventID: e.EventID},
 				ItemID:          e.ItemID,
 			})
 		case types.ConversationItemTruncateEvent:
 			xlog.Debug("recv", "message", string(msg))
 			conversation.Lock.Lock()
 			ok := truncateAssistantText(conversation.Items, e.ItemID, e.ContentIndex)
 			conversation.Lock.Unlock()
 			if !ok {
 				sendError(t, "invalid_item_id", "Item to truncate not found", "", "event_TODO")
 				continue
 			}
 			sendEvent(t, types.ConversationItemTruncatedEvent{
 				ServerEventBase: types.ServerEventBase{EventID: e.EventID},
 				ItemID:          e.ItemID,
 				ContentIndex:    e.ContentIndex,
 				AudioEndMs:      e.AudioEndMs,
 			})
 		case types.ConversationItemRetrieveEvent:
 			xlog.Debug("recv", "message", string(msg))
@@ -854,21 +925,7 @@ func runRealtimeSession(application *application.Application, t Transport, model
 			conversation.Lock.Lock()
 			var retrievedItem types.MessageItemUnion
 			for _, item := range conversation.Items {
-				// We need to check ID in the union
+				if itemID(item) == e.ItemID {
 				var id string
 				if item.System != nil {
 					id = item.System.ID
 				} else if item.User != nil {
 					id = item.User.ID
 				} else if item.Assistant != nil {
 					id = item.Assistant.ID
 				} else if item.FunctionCall != nil {
 					id = item.FunctionCall.ID
 				} else if item.FunctionCallOutput != nil {
 					id = item.FunctionCallOutput.ID
 				}
 				if id == e.ItemID {
 					retrievedItem = *item
 					break
 				}
@@ -1666,6 +1723,9 @@ const maxAssistantToolTurns = 10
 func triggerResponse(ctx context.Context, session *Session, conv *Conversation, t Transport, overrides *types.ResponseCreateParams) {
 	triggerResponseAtTurn(ctx, session, conv, t, overrides, 0)
 	// Fold aged-out turns into the rolling memory off the critical path; the
 	// next turn reaps the smaller buffer.
 	session.maybeCompact(conv)
 }
 func triggerResponseAtTurn(ctx context.Context, session *Session, conv *Conversation, t Transport, overrides *types.ResponseCreateParams, toolTurn int) {
@@ -1721,6 +1781,7 @@ func triggerResponseAtTurn(ctx context.Context, session *Session, conv *Conversa
 	var lastUserSpeaker *types.Speaker
 	personalize := session.voiceGate != nil && session.voiceGate.cfg.PersonalizeEnabled()
 	conv.Lock.Lock()
 	conversationHistory = withMemory(conversationHistory, conv.Memory)
 	items := trimRealtimeItems(conv.Items, session.MaxHistoryItems)
 	for _, item := range items {
 		if item.User != nil {
--- a/core/http/endpoints/openai/realtime_compaction.go
+++ b/core/http/endpoints/openai/realtime_compaction.go
@@ -0,0 +1,326 @@
 package openai
 import (
 	"context"
 	"fmt"
 	"strings"
 	"time"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/http/endpoints/openai/types"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/reasoning"
 	"github.com/mudler/xlog"
 )
 const (
 	defaultMaxSummaryTokens = 512
 	memoryPrefix            = "Summary of earlier conversation:\n"
 	// compactionTimeout bounds the summarizer call so a stuck model can't pin the
 	// compacting flag (and thus block all further compaction) forever.
 	compactionTimeout = 60 * time.Second
 )
 // withMemory inserts the rolling summary as a system message after the existing
 // (instructions) history. No-op when memory is empty.
 func withMemory(history schema.Messages, memory string) schema.Messages {
 	if memory == "" {
 		return history
 	}
 	content := memoryPrefix + memory
 	return append(history, schema.Message{
 		Role:          string(types.MessageRoleSystem),
 		StringContent: content,
 		Content:       content,
 	})
 }
 // renderItemsTranscript renders conversation items as a plain "role: text"
 // transcript for summarization. Non-text items (bare tool calls) are labelled
 // so the summarizer keeps track of actions taken.
 func renderItemsTranscript(items []*types.MessageItemUnion) string {
 	var b strings.Builder
 	for _, item := range items {
 		switch {
 		case item.User != nil:
 			b.WriteString("user: ")
 			for _, c := range item.User.Content {
 				if c.Text != "" {
 					b.WriteString(c.Text)
 				}
 				if c.Transcript != "" {
 					b.WriteString(c.Transcript)
 				}
 			}
 			b.WriteString("\n")
 		case item.Assistant != nil:
 			b.WriteString("assistant: ")
 			// Realtime assistant *audio* turns store the spoken words in
 			// .Transcript (not .Text), so emit both or spoken turns are dropped.
 			for _, c := range item.Assistant.Content {
 				if c.Text != "" {
 					b.WriteString(c.Text)
 				}
 				if c.Transcript != "" {
 					b.WriteString(c.Transcript)
 				}
 			}
 			b.WriteString("\n")
 		case item.FunctionCall != nil:
 			b.WriteString(fmt.Sprintf("assistant called tool %s(%s)\n", item.FunctionCall.Name, item.FunctionCall.Arguments))
 		case item.FunctionCallOutput != nil:
 			b.WriteString(fmt.Sprintf("tool result: %s\n", item.FunctionCallOutput.Output))
 		}
 	}
 	return strings.TrimSpace(b.String())
 }
 // buildSummaryMessages builds the chat messages for the summarizer LLM: a system
 // instruction plus prior memory and the new transcript to fold in. maxTokens is
 // advisory (fed to the prompt; not hard-enforced in v1).
 func buildSummaryMessages(priorMemory, transcript string, maxTokens int) schema.Messages {
 	system := fmt.Sprintf("You maintain a running memory of a live voice conversation. "+
 		"Merge the prior memory with the new exchanges into an updated memory. "+
 		"Keep names, decisions, facts, preferences, and open threads. Be concise "+
 		"(under ~%d tokens). Output only the updated memory, with no reasoning or tags.", maxTokens)
 	var user strings.Builder
 	if priorMemory != "" {
 		user.WriteString("Prior memory:\n")
 		user.WriteString(priorMemory)
 		user.WriteString("\n\n")
 	}
 	user.WriteString("New exchanges to fold in:\n")
 	user.WriteString(transcript)
 	return schema.Messages{
 		{Role: string(types.MessageRoleSystem), StringContent: system, Content: system},
 		{Role: string(types.MessageRoleUser), StringContent: user.String(), Content: user.String()},
 	}
 }
 // clearInputAudio resets the session's pending input audio buffer (the raw
 // PCM and any buffered Opus frames). Used by the input_audio_buffer.clear
 // realtime event so a client can discard a partially-captured utterance.
 func clearInputAudio(s *Session) {
 	s.AudioBufferLock.Lock()
 	s.InputAudioBuffer = nil
 	s.AudioBufferLock.Unlock()
 	s.OpusFramesLock.Lock()
 	s.OpusFrames = nil
 	s.OpusFramesLock.Unlock()
 }
 // itemID extracts the id from any MessageItemUnion variant ("" if none).
 func itemID(item *types.MessageItemUnion) string {
 	switch {
 	case item == nil:
 		return ""
 	case item.System != nil:
 		return item.System.ID
 	case item.User != nil:
 		return item.User.ID
 	case item.Assistant != nil:
 		return item.Assistant.ID
 	case item.FunctionCall != nil:
 		return item.FunctionCall.ID
 	case item.FunctionCallOutput != nil:
 		return item.FunctionCallOutput.ID
 	default:
 		return ""
 	}
 }
 // deleteItem removes the item with id from items, returning the new slice and
 // whether it was found.
 func deleteItem(items []*types.MessageItemUnion, id string) ([]*types.MessageItemUnion, bool) {
 	for i, item := range items {
 		if itemID(item) == id {
 			return append(items[:i:i], items[i+1:]...), true
 		}
 	}
 	return items, false
 }
 // truncateAssistantText clears the text of the assistant item's content part at
 // contentIndex. Minimal truncate: used to discard an interrupted/barge-in
 // response tail. Both .Text and .Transcript are cleared because realtime audio
 // turns store the spoken words in .Transcript (clearing only .Text would no-op).
 func truncateAssistantText(items []*types.MessageItemUnion, id string, contentIndex int) bool {
 	for _, item := range items {
 		if itemID(item) != id || item.Assistant == nil {
 			continue
 		}
 		if contentIndex >= 0 && contentIndex < len(item.Assistant.Content) {
 			item.Assistant.Content[contentIndex].Text = ""
 			item.Assistant.Content[contentIndex].Transcript = ""
 		}
 		return true
 	}
 	return false
 }
 // compactionCut returns the index splitting items into overflow (items[:cut],
 // to be summarized+evicted) and the kept live tail (items[cut:]), keeping the
 // last `keep` items. It mirrors trimRealtimeItems' pair-safety: the cut is
 // pulled left so a function_call and its function_call_output are never split
 // across the boundary (the whole pair lands in the kept tail). Returns 0 when
 // there is nothing to cut.
 func compactionCut(items []*types.MessageItemUnion, keep int) int {
 	// keep <= 0 means no live-window cap (the "unlimited history" sentinel, as
 	// in trimRealtimeItems): there is nothing to evict, so cut nothing. This
 	// also avoids indexing items[len(items)] in the pair-safety loop below.
 	if keep <= 0 {
 		return 0
 	}
 	cut := len(items) - keep
 	if cut <= 0 {
 		return 0
 	}
 	for cut > 0 && items[cut] != nil && items[cut].FunctionCallOutput != nil {
 		cut--
 	}
 	return cut
 }
 // resolveCompaction reads the pipeline.compaction block, applying defaults and
 // the trigger>max_history invariant. maxHistory is the already-resolved live
 // window size. Returns enabled=false (and zero values) when compaction is off.
 func resolveCompaction(cfg *config.ModelConfig, maxHistory int) (enabled bool, trigger, maxSummaryTokens int, summaryModel string) {
 	if cfg == nil || cfg.Pipeline.Compaction == nil || !cfg.Pipeline.Compaction.Enabled {
 		return false, 0, 0, ""
 	}
 	c := cfg.Pipeline.Compaction
 	trigger = c.TriggerItems
 	if trigger <= 0 {
 		trigger = maxHistory * 2
 	}
 	if trigger <= maxHistory {
 		trigger = maxHistory + 1
 	}
 	maxSummaryTokens = c.MaxSummaryTokens
 	if maxSummaryTokens <= 0 {
 		maxSummaryTokens = defaultMaxSummaryTokens
 	}
 	return true, trigger, maxSummaryTokens, c.SummaryModel
 }
 // prefixMatches reports whether items begins with the same ids, in order, as
 // snapshot — i.e. the overflow we summarized is still at the head (no concurrent
 // client delete reshuffled it).
 func prefixMatches(items, snapshot []*types.MessageItemUnion) bool {
 	if len(items) < len(snapshot) {
 		return false
 	}
 	for i := range snapshot {
 		if itemID(items[i]) != itemID(snapshot[i]) {
 			return false
 		}
 	}
 	return true
 }
 // compact folds overflow items into conv.Memory and evicts them. It never holds
 // conv.Lock across the summarizer call: snapshot under lock, summarize unlocked,
 // commit under lock (re-validating the head is unchanged). On any error it
 // leaves the conversation untouched — items are never dropped without a summary.
 func (s *Session) compact(conv *Conversation, model Model) {
 	if model == nil {
 		return
 	}
 	// Snapshot.
 	conv.Lock.Lock()
 	if len(conv.Items) <= s.CompactionTrigger {
 		conv.Lock.Unlock()
 		return
 	}
 	cut := compactionCut(conv.Items, s.MaxHistoryItems)
 	if cut <= 0 {
 		conv.Lock.Unlock()
 		return
 	}
 	overflow := append([]*types.MessageItemUnion(nil), conv.Items[:cut]...)
 	prior := conv.Memory
 	conv.Lock.Unlock()
 	// Summarize (unlocked).
 	msgs := buildSummaryMessages(prior, renderItemsTranscript(overflow), s.MaxSummaryTokens)
 	ctx, cancel := context.WithTimeout(context.Background(), compactionTimeout)
 	defer cancel()
 	predFunc, err := model.Predict(ctx, msgs, nil, nil, nil, nil, nil, nil, nil, nil, nil)
 	if err != nil {
 		xlog.Warn("realtime compaction: summarizer predict failed", "error", err)
 		return
 	}
 	pred, err := predFunc()
 	if err != nil {
 		xlog.Warn("realtime compaction: summarizer inference failed", "error", err)
 		return
 	}
 	// Strip any leaked reasoning/thinking spans using the same extractor the
 	// rest of the realtime path uses, rather than a bespoke regex.
 	rcfg := reasoning.Config{}
 	if mc := model.PredictConfig(); mc != nil {
 		rcfg = spokenReasoningConfig(mc.ReasoningConfig)
 	}
 	_, summary := reasoning.ExtractReasoningComplete(pred.Response, "", rcfg)
 	summary = strings.TrimSpace(summary)
 	if summary == "" {
 		xlog.Warn("realtime compaction: empty summary, skipping eviction")
 		return
 	}
 	// Commit.
 	conv.Lock.Lock()
 	defer conv.Lock.Unlock()
 	if !prefixMatches(conv.Items, overflow) {
 		xlog.Debug("realtime compaction: head changed during summary, skipping")
 		return
 	}
 	conv.Memory = summary
 	conv.Items = conv.Items[len(overflow):]
 	xlog.Debug("realtime compaction: evicted items into memory", "evicted", len(overflow), "remaining", len(conv.Items))
 }
 // summarizerModel resolves the model used to produce compaction summaries.
 // Without a configured summary_model (or factory) it reuses the pipeline LLM.
 func (s *Session) summarizerModel() Model {
 	if s.SummaryModel == "" || s.summarizerFactory == nil {
 		return s.ModelInterface
 	}
 	s.summarizerOnce.Do(func() {
 		m, err := s.summarizerFactory()
 		if err != nil {
 			xlog.Warn("realtime compaction: summary_model load failed, falling back to pipeline LLM", "model", s.SummaryModel, "error", err)
 			m = s.ModelInterface
 		}
 		s.summarizerCached = m
 	})
 	return s.summarizerCached
 }
 // maybeCompact schedules a background compaction when the live buffer has grown
 // past the trigger and none is already running. Returns immediately.
 func (s *Session) maybeCompact(conv *Conversation) {
 	if !s.CompactionEnabled {
 		return
 	}
 	conv.Lock.Lock()
 	over := len(conv.Items) > s.CompactionTrigger
 	conv.Lock.Unlock()
 	if !over {
 		return
 	}
 	if !conv.compacting.CompareAndSwap(false, true) {
 		return
 	}
 	go func() {
 		defer conv.compacting.Store(false)
 		// Resolve (and, for a configured summary_model, lazily load) the
 		// summarizer only when a compaction actually runs, off the response
 		// path — so the model load never blocks a user turn.
 		model := s.summarizerModel()
 		if model == nil {
 			return
 		}
 		s.compact(conv, model)
 	}()
 }
--- a/core/http/endpoints/openai/realtime_compaction_test.go
+++ b/core/http/endpoints/openai/realtime_compaction_test.go
@@ -0,0 +1,308 @@
 package openai
 import (
 	"errors"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 	"github.com/mudler/LocalAI/core/backend"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/http/endpoints/openai/types"
 	"github.com/mudler/LocalAI/core/schema"
 )
 var _ = Describe("resolveCompaction", func() {
 	It("disables when the block is absent", func() {
 		enabled, _, _, _ := resolveCompaction(&config.ModelConfig{}, 6)
 		Expect(enabled).To(BeFalse())
 	})
 	It("defaults trigger to 2x max history and tokens to 512", func() {
 		cfg := &config.ModelConfig{Pipeline: config.Pipeline{Compaction: &config.PipelineCompaction{Enabled: true}}}
 		enabled, trigger, maxTok, _ := resolveCompaction(cfg, 6)
 		Expect(enabled).To(BeTrue())
 		Expect(trigger).To(Equal(12))
 		Expect(maxTok).To(Equal(512))
 	})
 	It("clamps trigger to max history + 1 when misconfigured", func() {
 		cfg := &config.ModelConfig{Pipeline: config.Pipeline{Compaction: &config.PipelineCompaction{Enabled: true, TriggerItems: 4}}}
 		_, trigger, _, _ := resolveCompaction(cfg, 6)
 		Expect(trigger).To(Equal(7))
 	})
 	It("honors explicit values", func() {
 		cfg := &config.ModelConfig{Pipeline: config.Pipeline{Compaction: &config.PipelineCompaction{
 			Enabled: true, TriggerItems: 20, MaxSummaryTokens: 256, SummaryModel: "tiny"}}}
 		enabled, trigger, maxTok, model := resolveCompaction(cfg, 6)
 		Expect(enabled).To(BeTrue())
 		Expect(trigger).To(Equal(20))
 		Expect(maxTok).To(Equal(256))
 		Expect(model).To(Equal("tiny"))
 	})
 })
 var _ = Describe("deleteItem", func() {
 	mk := func(ids ...string) []*types.MessageItemUnion {
 		out := make([]*types.MessageItemUnion, len(ids))
 		for i, id := range ids {
 			out[i] = &types.MessageItemUnion{User: &types.MessageItemUser{ID: id}}
 		}
 		return out
 	}
 	It("removes the item with the given id", func() {
 		items, ok := deleteItem(mk("a", "b", "c"), "b")
 		Expect(ok).To(BeTrue())
 		Expect(len(items)).To(Equal(2))
 		Expect(itemID(items[0])).To(Equal("a"))
 		Expect(itemID(items[1])).To(Equal("c"))
 	})
 	It("reports not found for an unknown id", func() {
 		_, ok := deleteItem(mk("a"), "zzz")
 		Expect(ok).To(BeFalse())
 	})
 })
 var _ = Describe("clearInputAudio", func() {
 	It("resets the pending PCM and buffered Opus frames", func() {
 		s := &Session{InputAudioBuffer: []byte{1, 2, 3}, OpusFrames: [][]byte{{9}}}
 		clearInputAudio(s)
 		Expect(s.InputAudioBuffer).To(BeNil())
 		Expect(s.OpusFrames).To(BeNil())
 	})
 })
 var _ = Describe("truncateAssistantText", func() {
 	It("clears the text of the assistant content part at the index", func() {
 		items := []*types.MessageItemUnion{{Assistant: &types.MessageItemAssistant{
 			ID:      "a1",
 			Content: []types.MessageContentOutput{{Type: types.MessageContentTypeText, Text: "hello world"}},
 		}}}
 		ok := truncateAssistantText(items, "a1", 0)
 		Expect(ok).To(BeTrue())
 		Expect(items[0].Assistant.Content[0].Text).To(Equal(""))
 	})
 	// Realtime assistant *audio* turns store the spoken words in .Transcript, not
 	// .Text, so a barge-in truncate must clear .Transcript too or it would no-op.
 	It("clears the transcript of an assistant audio content part", func() {
 		items := []*types.MessageItemUnion{{Assistant: &types.MessageItemAssistant{
 			ID:      "a1",
 			Content: []types.MessageContentOutput{{Type: types.MessageContentTypeAudio, Transcript: "hello world"}},
 		}}}
 		ok := truncateAssistantText(items, "a1", 0)
 		Expect(ok).To(BeTrue())
 		Expect(items[0].Assistant.Content[0].Transcript).To(Equal(""))
 	})
 	It("returns false for an unknown id", func() {
 		Expect(truncateAssistantText(nil, "nope", 0)).To(BeFalse())
 	})
 })
 var _ = Describe("compactionCut", func() {
 	user := func(id string) *types.MessageItemUnion {
 		return &types.MessageItemUnion{User: &types.MessageItemUser{ID: id}}
 	}
 	call := func(id string) *types.MessageItemUnion {
 		return &types.MessageItemUnion{FunctionCall: &types.MessageItemFunctionCall{ID: id}}
 	}
 	out := func(id string) *types.MessageItemUnion {
 		return &types.MessageItemUnion{FunctionCallOutput: &types.MessageItemFunctionCallOutput{ID: id}}
 	}
 	It("cuts exactly len-keep when no pairs straddle the boundary", func() {
 		items := []*types.MessageItemUnion{user("1"), user("2"), user("3"), user("4")}
 		Expect(compactionCut(items, 2)).To(Equal(2))
 	})
 	It("returns 0 when nothing to cut", func() {
 		Expect(compactionCut([]*types.MessageItemUnion{user("1")}, 2)).To(Equal(0))
 	})
 	It("returns 0 (cuts nothing) when keep is 0 — the unlimited-window sentinel", func() {
 		items := []*types.MessageItemUnion{user("1"), user("2"), user("3")}
 		Expect(compactionCut(items, 0)).To(Equal(0))
 	})
 	It("moves the boundary so a call/output pair is not split", func() {
 		// keep=2 -> naive cut=2, but items[2] is the output of items[1]'s call;
 		// pull the cut right so the whole pair stays in the kept tail.
 		items := []*types.MessageItemUnion{user("1"), call("c"), out("c"), user("4")}
 		Expect(compactionCut(items, 2)).To(Equal(1))
 	})
 })
 var _ = Describe("withMemory", func() {
 	It("inserts a memory system message when memory is non-empty", func() {
 		base := schema.Messages{{Role: "system", StringContent: "instructions"}}
 		out := withMemory(base, "user is Bob; wants pizza")
 		Expect(len(out)).To(Equal(2))
 		Expect(out[1].Role).To(Equal("system"))
 		Expect(out[1].StringContent).To(ContainSubstring("user is Bob"))
 		Expect(out[1].StringContent).To(ContainSubstring("Summary of earlier conversation"))
 	})
 	It("is a no-op when memory is empty", func() {
 		base := schema.Messages{{Role: "system", StringContent: "instructions"}}
 		Expect(withMemory(base, "")).To(HaveLen(1))
 	})
 })
 var _ = Describe("renderItemsTranscript", func() {
 	It("renders user and assistant text turns", func() {
 		items := []*types.MessageItemUnion{
 			{User: &types.MessageItemUser{Content: []types.MessageContentInput{{Type: types.MessageContentTypeInputText, Text: "hi"}}}},
 			{Assistant: &types.MessageItemAssistant{Content: []types.MessageContentOutput{{Type: types.MessageContentTypeText, Text: "hello"}}}},
 		}
 		out := renderItemsTranscript(items)
 		Expect(out).To(ContainSubstring("user: hi"))
 		Expect(out).To(ContainSubstring("assistant: hello"))
 	})
 	// Realtime assistant *audio* turns store the spoken words in .Transcript, not
 	// .Text, so the transcript builder must emit .Transcript too or spoken turns
 	// would be dropped from the summary.
 	It("renders an assistant audio turn from its transcript", func() {
 		items := []*types.MessageItemUnion{
 			{Assistant: &types.MessageItemAssistant{Content: []types.MessageContentOutput{{Type: types.MessageContentTypeAudio, Transcript: "spoken words"}}}},
 		}
 		Expect(renderItemsTranscript(items)).To(ContainSubstring("assistant: spoken words"))
 	})
 })
 var _ = Describe("buildSummaryMessages", func() {
 	It("includes prior memory and the new transcript", func() {
 		msgs := buildSummaryMessages("prior facts", "user: hi", 512)
 		Expect(len(msgs)).To(Equal(2))
 		Expect(msgs[0].Role).To(Equal("system"))
 		Expect(msgs[1].StringContent).To(ContainSubstring("prior facts"))
 		Expect(msgs[1].StringContent).To(ContainSubstring("user: hi"))
 	})
 })
 var _ = Describe("compact", func() {
 	user := func(id, text string) *types.MessageItemUnion {
 		return &types.MessageItemUnion{User: &types.MessageItemUser{ID: id,
 			Content: []types.MessageContentInput{{Type: types.MessageContentTypeInputText, Text: text}}}}
 	}
 	It("summarizes overflow into Memory and evicts it, keeping the live tail", func() {
 		conv := &Conversation{Items: []*types.MessageItemUnion{
 			user("1", "a"), user("2", "b"), user("3", "c"), user("4", "d"),
 			user("5", "e"), user("6", "f"), user("7", "g"), user("8", "h"),
 		}}
 		s := &Session{CompactionEnabled: true, CompactionTrigger: 7, MaxHistoryItems: 4, MaxSummaryTokens: 512}
 		m := &fakeModel{predictResp: backend.LLMResponse{Response: "ROLLED UP"}}
 		s.compact(conv, m)
 		Expect(conv.Memory).To(Equal("ROLLED UP"))
 		Expect(len(conv.Items)).To(Equal(4))
 		Expect(itemID(conv.Items[0])).To(Equal("5"))
 		// The summarizer saw the evicted turns.
 		Expect(m.lastMessages[1].StringContent).To(ContainSubstring("a"))
 	})
 	It("leaves Items and Memory untouched when the summarizer errors", func() {
 		items := []*types.MessageItemUnion{user("1", "a"), user("2", "b"), user("3", "c")}
 		conv := &Conversation{Items: items}
 		s := &Session{CompactionEnabled: true, CompactionTrigger: 2, MaxHistoryItems: 1, MaxSummaryTokens: 512}
 		m := &fakeModel{predictErr: errors.New("boom")}
 		s.compact(conv, m)
 		Expect(conv.Memory).To(Equal(""))
 		Expect(len(conv.Items)).To(Equal(3))
 	})
 	It("strips leaked reasoning tags from the summary via the shared extractor", func() {
 		conv := &Conversation{Items: []*types.MessageItemUnion{
 			user("1", "a"), user("2", "b"), user("3", "c"), user("4", "d"),
 			user("5", "e"), user("6", "f"), user("7", "g"), user("8", "h"),
 		}}
 		s := &Session{CompactionEnabled: true, CompactionTrigger: 7, MaxHistoryItems: 4, MaxSummaryTokens: 512}
 		m := &fakeModel{predictResp: backend.LLMResponse{Response: "<think>planning the summary</think>CLEAN SUMMARY"}}
 		s.compact(conv, m)
 		Expect(conv.Memory).To(Equal("CLEAN SUMMARY"))
 		Expect(conv.Memory).ToNot(ContainSubstring("planning"))
 	})
 	It("does nothing when items are at or below the trigger", func() {
 		conv := &Conversation{Items: []*types.MessageItemUnion{user("1", "a")}}
 		s := &Session{CompactionEnabled: true, CompactionTrigger: 7, MaxHistoryItems: 4}
 		s.compact(conv, &fakeModel{predictResp: backend.LLMResponse{Response: "x"}})
 		Expect(conv.Memory).To(Equal(""))
 		Expect(len(conv.Items)).To(Equal(1))
 	})
 })
 var _ = Describe("prefixMatches", func() {
 	user := func(id string) *types.MessageItemUnion {
 		return &types.MessageItemUnion{User: &types.MessageItemUser{ID: id}}
 	}
 	It("matches when items begins with the snapshot ids in order", func() {
 		items := []*types.MessageItemUnion{user("1"), user("2"), user("3")}
 		snap := []*types.MessageItemUnion{user("1"), user("2")}
 		Expect(prefixMatches(items, snap)).To(BeTrue())
 	})
 	It("matches an empty snapshot", func() {
 		Expect(prefixMatches([]*types.MessageItemUnion{user("1")}, nil)).To(BeTrue())
 	})
 	It("fails when items is shorter than the snapshot (a concurrent delete shrank the head)", func() {
 		items := []*types.MessageItemUnion{user("1")}
 		snap := []*types.MessageItemUnion{user("1"), user("2")}
 		Expect(prefixMatches(items, snap)).To(BeFalse())
 	})
 	It("fails when the head ids differ (a concurrent delete reordered the head)", func() {
 		items := []*types.MessageItemUnion{user("2"), user("3")}
 		snap := []*types.MessageItemUnion{user("1"), user("2")}
 		Expect(prefixMatches(items, snap)).To(BeFalse())
 	})
 })
 var _ = Describe("summarizerModel", func() {
 	It("returns the pipeline model when no summary_model is set", func() {
 		m := &fakeModel{}
 		s := &Session{ModelInterface: m}
 		Expect(s.summarizerModel()).To(Equal(m))
 	})
 	It("uses the factory (once) when summary_model is set", func() {
 		pipeline := &fakeModel{}
 		small := &fakeModel{}
 		calls := 0
 		s := &Session{ModelInterface: pipeline, SummaryModel: "tiny",
 			summarizerFactory: func() (Model, error) { calls++; return small, nil }}
 		Expect(s.summarizerModel()).To(Equal(small))
 		Expect(s.summarizerModel()).To(Equal(small))
 		Expect(calls).To(Equal(1))
 	})
 	It("falls back to the pipeline model when the factory errors", func() {
 		pipeline := &fakeModel{}
 		s := &Session{ModelInterface: pipeline, SummaryModel: "tiny",
 			summarizerFactory: func() (Model, error) { return nil, errors.New("nope") }}
 		Expect(s.summarizerModel()).To(Equal(pipeline))
 	})
 })
 var _ = Describe("itemID", func() {
 	It("returns the id for each variant and empty for nil", func() {
 		Expect(itemID(nil)).To(Equal(""))
 		Expect(itemID(&types.MessageItemUnion{User: &types.MessageItemUser{ID: "u1"}})).To(Equal("u1"))
 		Expect(itemID(&types.MessageItemUnion{Assistant: &types.MessageItemAssistant{ID: "a1"}})).To(Equal("a1"))
 		Expect(itemID(&types.MessageItemUnion{System: &types.MessageItemSystem{ID: "s1"}})).To(Equal("s1"))
 		Expect(itemID(&types.MessageItemUnion{FunctionCall: &types.MessageItemFunctionCall{ID: "f1"}})).To(Equal("f1"))
 		Expect(itemID(&types.MessageItemUnion{FunctionCallOutput: &types.MessageItemFunctionCallOutput{ID: "o1"}})).To(Equal("o1"))
 	})
 })
--- a/core/http/endpoints/openai/realtime_model.go
+++ b/core/http/endpoints/openai/realtime_model.go
@@ -432,7 +432,7 @@ func loadSoundDetectionConfig(pipeline *config.Pipeline, cl *config.ModelConfigL
 	if pipeline.SoundDetection == "" {
 		return nil, nil
 	}
-	cfg, err := cl.LoadModelConfigFileByName(pipeline.SoundDetection, ml.ModelPath)
+	cfg, err := loadPipelineSubModel(cl, pipeline.SoundDetection, ml.ModelPath)
 	if err != nil {
 		return nil, fmt.Errorf("failed to load sound detection config: %w", err)
 	}
@@ -443,7 +443,7 @@ func loadSoundDetectionConfig(pipeline *config.Pipeline, cl *config.ModelConfigL
 }
 func newTranscriptionOnlyModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (Model, *config.ModelConfig, error) {
-	cfgVAD, err := cl.LoadModelConfigFileByName(pipeline.VAD, ml.ModelPath)
+	cfgVAD, err := loadPipelineSubModel(cl, pipeline.VAD, ml.ModelPath)
 	if err != nil {
 		return nil, nil, fmt.Errorf("failed to load backend config: %w", err)
@@ -453,7 +453,7 @@ func newTranscriptionOnlyModel(pipeline *config.Pipeline, cl *config.ModelConfig
 		return nil, nil, fmt.Errorf("failed to validate config: %w", err)
 	}
-	cfgSST, err := cl.LoadModelConfigFileByName(pipeline.Transcription, ml.ModelPath)
+	cfgSST, err := loadPipelineSubModel(cl, pipeline.Transcription, ml.ModelPath)
 	if err != nil {
 		return nil, nil, fmt.Errorf("failed to load backend config: %w", err)
@@ -542,11 +542,30 @@ func buildRealtimeRoutingContext(a *application.Application, sessionID string) *
 	}
 }
 // loadPipelineSubModel loads a pipeline sub-model config by name and follows a
 // single alias hop, so a pipeline that references an alias (e.g. `llm: default`)
 // gets the alias target's full config (Backend, Model, ...) rather than the
 // alias stub with an empty Backend. Without this the alias survives unresolved
 // into model loading and fails downstream — notably in distributed mode with
 // "backend name is empty". Mirrors the top-level alias resolution in
 // core/http/middleware/request.go.
 func loadPipelineSubModel(cl *config.ModelConfigLoader, name, modelPath string) (*config.ModelConfig, error) {
 	cfg, err := cl.LoadModelConfigFileByName(name, modelPath)
 	if err != nil {
 		return nil, err
 	}
 	resolved, _, err := cl.ResolveAlias(cfg)
 	if err != nil {
 		return nil, err
 	}
 	return resolved, nil
 }
 // returns and loads either a wrapped model or a model that support audio-to-audio
 func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, evaluator *templates.Evaluator, routing *RealtimeRoutingContext) (Model, error) {
 	xlog.Debug("Creating new model pipeline model", "pipeline", pipeline)
-	cfgVAD, err := cl.LoadModelConfigFileByName(pipeline.VAD, ml.ModelPath)
+	cfgVAD, err := loadPipelineSubModel(cl, pipeline.VAD, ml.ModelPath)
 	if err != nil {
 		return nil, fmt.Errorf("failed to load backend config: %w", err)
@@ -557,7 +576,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
 	}
 	// TODO: Do we always need a transcription model? It can be disabled. Note that any-to-any instruction following models don't transcribe as such, so if transcription is required it is a separate process
-	cfgSST, err := cl.LoadModelConfigFileByName(pipeline.Transcription, ml.ModelPath)
+	cfgSST, err := loadPipelineSubModel(cl, pipeline.Transcription, ml.ModelPath)
 	if err != nil {
 		return nil, fmt.Errorf("failed to load backend config: %w", err)
@@ -589,7 +608,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
 	xlog.Debug("Loading a wrapped model")
 	// Otherwise we want to return a wrapped model, which is a "virtual" model that re-uses other models to perform operations
-	cfgLLM, err := cl.LoadModelConfigFileByName(pipeline.LLM, ml.ModelPath)
+	cfgLLM, err := loadPipelineSubModel(cl, pipeline.LLM, ml.ModelPath)
 	if err != nil {
 		return nil, fmt.Errorf("failed to load backend config: %w", err)
@@ -604,7 +623,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
 	applyPipelineReasoning(cfgLLM, *pipeline)
 	applyPipelineThinking(cfgLLM, *pipeline)
-	cfgTTS, err := cl.LoadModelConfigFileByName(pipeline.TTS, ml.ModelPath)
+	cfgTTS, err := loadPipelineSubModel(cl, pipeline.TTS, ml.ModelPath)
 	if err != nil {
 		return nil, fmt.Errorf("failed to load backend config: %w", err)
--- a/core/http/endpoints/openai/realtime_model_alias_test.go
+++ b/core/http/endpoints/openai/realtime_model_alias_test.go
@@ -0,0 +1,52 @@
 package openai
 import (
 	"os"
 	"path/filepath"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 	"github.com/mudler/LocalAI/core/config"
 )
 // loadPipelineSubModel must resolve a pipeline sub-model that references an
 // alias (e.g. `llm: default`) one hop to the alias target's full config — so
 // the effective backend is the target's backend, not the empty backend of the
 // alias stub. This mirrors the top-level alias resolution done in
 // core/http/middleware/request.go, which the realtime pipeline previously
 // skipped (failing in distributed mode with "backend name is empty").
 var _ = Describe("loadPipelineSubModel", func() {
 	It("resolves a sub-model alias one hop to the target's config", func() {
 		tmpDir := GinkgoT().TempDir()
 		// A real model config with a concrete backend.
 		realLLM := `name: real-llm
 backend: llama-cpp
 parameters:
  model: real-llm.gguf
 `
 		Expect(os.WriteFile(filepath.Join(tmpDir, "real-llm.yaml"), []byte(realLLM), 0644)).To(Succeed())
 		// An alias pointing at the real model.
 		aliasCfg := `name: default
 alias: real-llm
 `
 		Expect(os.WriteFile(filepath.Join(tmpDir, "default.yaml"), []byte(aliasCfg), 0644)).To(Succeed())
 		cl := config.NewModelConfigLoader(tmpDir)
 		Expect(cl.LoadModelConfigsFromPath(tmpDir)).To(Succeed())
 		// Resolving the alias must follow the hop to the target's full config.
 		resolved, err := loadPipelineSubModel(cl, "default", tmpDir)
 		Expect(err).NotTo(HaveOccurred())
 		Expect(resolved.IsAlias()).To(BeFalse())
 		Expect(resolved.Backend).To(Equal("llama-cpp"))
 		// A non-alias name must load unchanged.
 		direct, err := loadPipelineSubModel(cl, "real-llm", tmpDir)
 		Expect(err).NotTo(HaveOccurred())
 		Expect(direct.Backend).To(Equal("llama-cpp"))
 		Expect(direct.Name).To(Equal("real-llm"))
 	})
 })
--- a/core/http/react-ui/e2e/model-config.spec.js
+++ b/core/http/react-ui/e2e/model-config.spec.js
@@ -288,6 +288,21 @@ test.describe('Model Editor - Interactive Tab', () => {
    await expect(page.locator('input[placeholder^="match,"]')).toBeVisible()
  })
  test('pattern min_len clamps a directly-typed negative to 0', async ({ page }) => {
    const searchInput = page.locator('input[placeholder="Search fields to add..."]')
    await searchInput.fill('Custom Secret Patterns')
    const dropdown = searchInput.locator('..').locator('..')
    await dropdown.locator('div', { hasText: 'Custom Secret Patterns' }).first().click()
    await page.locator('button', { hasText: 'Add pattern' }).click()
    // The number input's min={0} only limits the spinner arrows, not keyboard
    // entry; the editor must sanitise a typed negative so a meaningless
    // negative length floor never reaches the saved config.
    const minLen = page.locator('input[aria-label="Minimum length"]')
    await minLen.fill('-5')
    await expect(minLen).toHaveValue('0')
  })
  // Regression: a map-typed field (entity_actions) present in the loaded YAML
  // must render WITH its values. flattenConfig used to recurse into the map,
  // scattering it across pii_detection.entity_actions.<GROUP> paths that match
@@ -329,4 +344,37 @@ test.describe('Model Editor - Interactive Tab', () => {
    await expect(page.getByText(/block —/i).first()).toBeVisible()
  })
  // A map cannot hold two values for one key, so renaming a row to an existing
  // group must collapse to a single row (Object.fromEntries, last write wins)
  // rather than rendering two conflicting rows that silently lose one on save.
  test('entity_actions collapses a duplicate group to a single row', async ({ page }) => {
    await page.route('**/api/models/edit/ner-model', (route) => {
      route.fulfill({
        contentType: 'application/json',
        body: JSON.stringify({
          name: 'ner-model',
          config: [
            'name: ner-model',
            'backend: llama-cpp',
            'pii_detection:',
            '    entity_actions:',
            '        SSN: block',
            '        EMAIL: mask',
            '',
          ].join('\n'),
        }),
      })
    })
    await page.goto('/app/model-editor/ner-model')
    const groupInputs = page.locator('input[aria-label="Entity group"]')
    await expect(groupInputs).toHaveCount(2)
    // Rename the EMAIL row to duplicate SSN; the editor collapses to one SSN row.
    await groupInputs.nth(1).fill('SSN')
    await expect(groupInputs).toHaveCount(1)
    await expect(groupInputs.nth(0)).toHaveValue('SSN')
  })
 })
--- a/core/http/react-ui/e2e/nodes-detail.spec.js
+++ b/core/http/react-ui/e2e/nodes-detail.spec.js
@@ -0,0 +1,34 @@
 import { test, expect } from './coverage-fixtures.js'
 const ID = 'n1'
 async function mockNode(page) {
  await page.route(`**/api/nodes/${ID}`, r => r.fulfill({ status: 200, contentType: 'application/json',
    body: JSON.stringify({ id: ID, name: 'alpha', node_type: 'backend', address: '10.0.0.1:50051', status: 'healthy', total_vram: 24e9, available_vram: 12e9, max_replicas_per_model: 1, labels: { env: 'prod' } }) }))
  await page.route(`**/api/nodes/${ID}/models`, r => r.fulfill({ status: 200, contentType: 'application/json',
    body: JSON.stringify([{ node_id: ID, model_name: 'llama-3.3', state: 'loaded', in_flight: 0, replica_index: 0 }]) }))
  await page.route(`**/api/nodes/${ID}/backends`, r => r.fulfill({ status: 200, contentType: 'application/json',
    body: JSON.stringify([{ name: 'llama-cpp', is_system: true, installed_at: '2026-06-01T00:00:00Z' }]) }))
 }
 test.describe('Node detail page', () => {
  test('renders sections for a node', async ({ page }) => {
    await mockNode(page)
    await page.goto(`/app/nodes/${ID}`)
    await expect(page.locator('.page-title').first()).toBeVisible({ timeout: 15_000 })
    await expect(page.getByText('alpha')).toBeVisible()
    await expect(page.getByText('llama-3.3')).toBeVisible()
    await expect(page.getByText('llama-cpp')).toBeVisible()
    await expect(page.getByText('env=prod')).toBeVisible()
  })
  test('is reachable by clicking a roster panel', async ({ page }) => {
    await page.route('**/api/nodes', r => r.fulfill({ status: 200, contentType: 'application/json',
      body: JSON.stringify([{ id: ID, name: 'alpha', node_type: 'backend', address: '10.0.0.1:50051', status: 'healthy' }]) }))
    await page.route('**/api/nodes/models', r => r.fulfill({ status: 200, contentType: 'application/json', body: '[]' }))
    await page.route('**/api/nodes/scheduling', r => r.fulfill({ status: 200, contentType: 'application/json', body: '[]' }))
    await mockNode(page)
    await page.goto('/app/nodes')
    await page.locator('.node-panel').filter({ hasText: 'alpha' }).getByText('alpha').click()
    await expect(page).toHaveURL(new RegExp(`/app/nodes/${ID}$`))
  })
 })
--- a/core/http/react-ui/e2e/nodes-per-node-backend-actions.spec.js
+++ b/core/http/react-ui/e2e/nodes-per-node-backend-actions.spec.js
@@ -12,28 +12,37 @@ const NODE_NAME = 'worker-test'
 const BACKEND_NAME = 'cuda12-vllm-development'
 async function mockDistributedNodes(page, { onDelete } = {}) {
  const nodeRecord = {
    id: NODE_ID,
    name: NODE_NAME,
    node_type: 'backend',
    address: '10.0.0.1:50051',
    http_address: '10.0.0.1:8090',
    status: 'healthy',
    total_vram: 0,
    available_vram: 0,
    total_ram: 8_000_000_000,
    available_ram: 4_000_000_000,
    gpu_vendor: '',
    last_heartbeat: new Date().toISOString(),
    created_at: new Date().toISOString(),
    updated_at: new Date().toISOString(),
  }
  await page.route('**/api/nodes', (route) => {
    route.fulfill({
      status: 200,
      contentType: 'application/json',
-      body: JSON.stringify([
+      body: JSON.stringify([nodeRecord]),
-        {
+    })
-          id: NODE_ID,
+  })
-          name: NODE_NAME,
+
-          node_type: 'backend',
+  // The detail page fetches the single node via nodesApi.get(id).
-          address: '10.0.0.1:50051',
+  await page.route(`**/api/nodes/${NODE_ID}`, (route) => {
-          http_address: '10.0.0.1:8090',
+    route.fulfill({
-          status: 'healthy',
+      status: 200,
-          total_vram: 0,
+      contentType: 'application/json',
-          available_vram: 0,
+      body: JSON.stringify(nodeRecord),
          total_ram: 8_000_000_000,
          available_ram: 4_000_000_000,
          gpu_vendor: '',
          last_heartbeat: new Date().toISOString(),
          created_at: new Date().toISOString(),
          updated_at: new Date().toISOString(),
        },
      ]),
    })
  })
@@ -80,24 +89,18 @@ async function mockDistributedNodes(page, { onDelete } = {}) {
  })
 }
-async function expandNodeAndWaitForBackends(page) {
+async function openNodeDetail(page) {
-  await page.goto('/app/nodes')
+  // The per-node backend table now lives on the deep-linkable detail page
-  // Click the row to expand it. The chevron toggle and the row both work,
+  // at /app/nodes/:id (the old expand-row + "Manage" disclosure was removed
-  // but clicking the name cell is the most user-like.
+  // when the roster was restructured). Navigate straight there.
-  await page.getByText(NODE_NAME).first().click()
+  await page.goto(`/app/nodes/${NODE_ID}`)
  // Backends, Capacity and Labels live behind a "Manage" <details>
  // disclosure (the drawer was distilled to keep at-a-glance content
  // lean — see distill refactor in the multi-replica branch). Open it
  // by clicking the summary inside the .node-manage scope so the
  // per-node backend table is in the DOM before assertions run.
  await page.locator('.node-manage > summary').first().click()
  await expect(page.getByRole('cell', { name: BACKEND_NAME, exact: true })).toBeVisible({ timeout: 10_000 })
 }
 test.describe('Nodes page — per-node backend actions', () => {
  test('upgrade affordance is self-explanatory (not "Reinstall backend" with a sync icon)', async ({ page }) => {
    await mockDistributedNodes(page)
-    await expandNodeAndWaitForBackends(page)
+    await openNodeDetail(page)
    // Negative: the old, ambiguous wording must not be used.
    await expect(page.locator('button[title="Reinstall backend"]')).toHaveCount(0)
@@ -114,7 +117,7 @@ test.describe('Nodes page — per-node backend actions', () => {
  test('per-node backend row shows a delete (trash) button next to upgrade', async ({ page }) => {
    await mockDistributedNodes(page)
-    await expandNodeAndWaitForBackends(page)
+    await openNodeDetail(page)
    const deleteBtn = page.locator('button[title="Delete backend from this node"]')
    await expect(deleteBtn).toBeVisible()
@@ -128,7 +131,7 @@ test.describe('Nodes page — per-node backend actions', () => {
        postedBody = route.request().postDataJSON()
      },
    })
-    await expandNodeAndWaitForBackends(page)
+    await openNodeDetail(page)
    await page.locator('button[title="Delete backend from this node"]').click()
@@ -150,7 +153,7 @@ test.describe('Nodes page — per-node backend actions', () => {
        deleteCalls += 1
      },
    })
-    await expandNodeAndWaitForBackends(page)
+    await openNodeDetail(page)
    await page.locator('button[title="Delete backend from this node"]').click()
--- a/core/http/react-ui/e2e/nodes-roster.spec.js
+++ b/core/http/react-ui/e2e/nodes-roster.spec.js
@@ -0,0 +1,47 @@
 import { test, expect } from './coverage-fixtures.js'
 async function mockCluster(page, nodes) {
  await page.route('**/api/nodes', r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(nodes) }))
  await page.route('**/api/nodes/models', r => r.fulfill({ status: 200, contentType: 'application/json', body: '[]' }))
  await page.route('**/api/nodes/scheduling', r => r.fulfill({ status: 200, contentType: 'application/json', body: '[]' }))
 }
 test.describe('Nodes roster header', () => {
  test('shows a cluster pulse line and no stat-card grid', async ({ page }) => {
    await mockCluster(page, [
      { id: 'n1', name: 'alpha', node_type: 'backend', address: '10.0.0.1:50051', status: 'healthy' },
      { id: 'n2', name: 'beta', node_type: 'backend', address: '10.0.0.2:50051', status: 'draining' },
    ])
    await page.goto('/app/nodes')
    await expect(page.locator('.cluster-pulse')).toBeVisible({ timeout: 15_000 })
    await expect(page.locator('.cluster-pulse')).toContainText('2 nodes')
    await expect(page.locator('.stat-grid')).toHaveCount(0)
  })
  test('shows an approval callout for pending nodes', async ({ page }) => {
    await mockCluster(page, [{ id: 'n3', name: 'gamma', node_type: 'backend', address: '10.0.0.3:50051', status: 'pending' }])
    await page.goto('/app/nodes')
    await expect(page.locator('.attention-callout')).toContainText('approval', { timeout: 15_000 })
  })
 })
 test.describe('Nodes roster panels', () => {
  test('shows model chips without clicking and filters by type', async ({ page }) => {
    await page.route('**/api/nodes', r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify([
      { id: 'n1', name: 'alpha', node_type: 'backend', address: '10.0.0.1:50051', status: 'healthy' },
      { id: 'a1', name: 'agent-1', node_type: 'agent', address: '10.0.0.9:50051', status: 'healthy' },
    ]) }))
    await page.route('**/api/nodes/models', r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify([
      { node_id: 'n1', model_name: 'llama-3.3', state: 'loaded', in_flight: 2, replica_index: 0 },
    ]) }))
    await page.route('**/api/nodes/scheduling', r => r.fulfill({ status: 200, contentType: 'application/json', body: '[]' }))
    await page.goto('/app/nodes')
    // model chip visible without any expand click
    await expect(page.locator('.node-panel').filter({ hasText: 'alpha' }).getByText('llama-3.3')).toBeVisible({ timeout: 15_000 })
    // segmented filter: Agent shows the agent node, hides the backend node
    await page.getByRole('radio', { name: /Agent/ }).click()
    await expect(page.getByText('agent-1')).toBeVisible()
    await expect(page.getByText('alpha')).toHaveCount(0)
  })
 })
--- a/core/http/react-ui/e2e/page-render-smoke.spec.js
+++ b/core/http/react-ui/e2e/page-render-smoke.spec.js
@@ -21,6 +21,7 @@ const PAGES = [
  ['/app/backends', 'Backends'],
  ['/app/settings', 'Settings'],
  ['/app/nodes', 'Nodes'],
  ['/app/scheduling', 'Scheduling'],
  ['/app/face', 'Face recognition'],
  ['/app/voice', 'Voice recognition'],
  ['/app/fine-tune', 'Fine-tuning'],
--- a/core/http/react-ui/e2e/scheduling.spec.js
+++ b/core/http/react-ui/e2e/scheduling.spec.js
@@ -0,0 +1,16 @@
 import { test, expect } from './coverage-fixtures.js'
 test.describe('Scheduling page', () => {
  test('renders at /app/scheduling with rules from the API', async ({ page }) => {
    await page.route('**/api/nodes/scheduling', (route) => {
      route.fulfill({
        status: 200, contentType: 'application/json',
        body: JSON.stringify([{ model_name: 'llama-3.3', spread_all: true, min_replicas: 0, max_replicas: 0 }]),
      })
    })
    await page.goto('/app/scheduling')
    await expect(page.locator('.page-title').first()).toBeVisible({ timeout: 15_000 })
    await expect(page).toHaveURL(/\/app\/scheduling$/)
    await expect(page.getByText('llama-3.3')).toBeVisible()
  })
 })
--- a/core/http/react-ui/public/locales/de/admin.json
+++ b/core/http/react-ui/public/locales/de/admin.json
@@ -43,6 +43,10 @@
    "title": "Verteilte Knoten",
    "subtitle": "Backend- und Agenten-Worker-Knoten verwalten"
  },
  "scheduling": {
    "title": "Planung",
    "subtitle": "Modellplatzierung und Replikat-Regeln im gesamten Cluster"
  },
  "p2p": {
    "title": "Verteilte KI-Berechnung",
    "subtitle": "Skalieren Sie Ihre KI-Workloads über mehrere Geräte mit Peer-to-Peer-Verteilung"
--- a/core/http/react-ui/public/locales/de/nav.json
+++ b/core/http/react-ui/public/locales/de/nav.json
@@ -50,6 +50,7 @@
    "backends": "Backends",
    "traces": "Traces",
    "nodes": "Knoten",
    "scheduling": "Planung",
    "swarm": "Swarm",
    "system": "System",
    "settings": "Einstellungen",
--- a/core/http/react-ui/public/locales/en/admin.json
+++ b/core/http/react-ui/public/locales/en/admin.json
@@ -43,6 +43,10 @@
    "title": "Distributed Nodes",
    "subtitle": "Manage backend and agent worker nodes"
  },
  "scheduling": {
    "title": "Scheduling",
    "subtitle": "Model placement and replica rules across the cluster"
  },
  "p2p": {
    "title": "Distributed AI Computing",
    "subtitle": "Scale your AI workloads across multiple devices with peer-to-peer distribution"
--- a/core/http/react-ui/public/locales/en/chat.json
+++ b/core/http/react-ui/public/locales/en/chat.json
@@ -86,6 +86,7 @@
  "input": {
    "placeholder": "Message...",
    "attachFile": "Attach file",
    "send": "Send message",
    "stopGenerating": "Stop generating",
    "canvasTitle": "Canvas — extract code blocks and media into a side panel for preview, copy, and download",
    "canvasLabel": "Canvas",
--- a/core/http/react-ui/public/locales/en/home.json
+++ b/core/http/react-ui/public/locales/en/home.json
@@ -77,6 +77,21 @@
    "noModelsTitle": "No Models Available",
    "noModelsBody": "There are no models installed yet. Ask your administrator to set up models so you can start chatting."
  },
  "starters": {
    "title": "Recommended for your hardware",
    "tier": {
      "cpu": "CPU-only",
      "gpu-small": "GPU",
      "gpu-mid": "GPU",
      "gpu-large": "GPU"
    },
    "cpuNote": "No GPU detected — these small models stay responsive on CPU.",
    "gpuNote": "Picked to fit your available VRAM with room for context.",
    "install": "Install",
    "installing": "Installing",
    "installStarted": "Installing {{model}}…",
    "installFailed": "Install failed: {{message}}"
  },
  "connect": {
    "title": "One endpoint, every API",
    "subtitle": "LocalAI serves its own full API — image & video generation, depth, object detection, reranking, audio, face & voice recognition, and realtime voice over WebRTC and WebSocket. On top of that, a drop-in compatibility layer lets any app built for OpenAI, Anthropic, Ollama or OpenAI Responses talk to it unchanged.",
--- a/core/http/react-ui/public/locales/en/models.json
+++ b/core/http/react-ui/public/locales/en/models.json
@@ -2,6 +2,16 @@
  "title": "Install Models",
  "subtitle": "Browse and install AI models from the gallery",
  "models": "Models",
  "recommended": {
    "title": "Recommended for your hardware",
    "cpuNote": "No GPU detected - small models that stay responsive on CPU.",
    "gpuNote": "Sized to fit your available VRAM with room for context.",
    "install": "Install",
    "installing": "Installing",
    "installStarted": "Installing {{model}}…",
    "installFailed": "Install failed: {{message}}",
    "dismiss": "Dismiss recommendations"
  },
  "stats": {
    "available": "Available",
    "installed": "Installed"
--- a/core/http/react-ui/public/locales/en/nav.json
+++ b/core/http/react-ui/public/locales/en/nav.json
@@ -51,6 +51,7 @@
    "backends": "Backends",
    "traces": "Traces",
    "nodes": "Nodes",
    "scheduling": "Scheduling",
    "swarm": "Swarm",
    "system": "System",
    "settings": "Settings",
--- a/core/http/react-ui/public/locales/es/admin.json
+++ b/core/http/react-ui/public/locales/es/admin.json
@@ -43,6 +43,10 @@
    "title": "Nodos distribuidos",
    "subtitle": "Administra nodos worker de backends y agentes"
  },
  "scheduling": {
    "title": "Planificación",
    "subtitle": "Reglas de ubicación de modelos y réplicas en el clúster"
  },
  "p2p": {
    "title": "Computación de IA distribuida",
    "subtitle": "Escala tus cargas de trabajo de IA en múltiples dispositivos con distribución peer-to-peer"
--- a/core/http/react-ui/public/locales/es/nav.json
+++ b/core/http/react-ui/public/locales/es/nav.json
@@ -50,6 +50,7 @@
    "backends": "Backends",
    "traces": "Trazas",
    "nodes": "Nodos",
    "scheduling": "Planificación",
    "swarm": "Swarm",
    "system": "Sistema",
    "settings": "Configuración",
--- a/core/http/react-ui/public/locales/id/admin.json
+++ b/core/http/react-ui/public/locales/id/admin.json
@@ -43,6 +43,10 @@
    "title": "Node Terdistribusi",
    "subtitle": "Kelola node backend dan node worker"
  },
  "scheduling": {
    "title": "Penjadwalan",
    "subtitle": "Aturan penempatan model dan replika di seluruh kluster"
  },
  "p2p": {
    "title": "Komputasi AI Terdistribusi",
    "subtitle": "Skalakan beban kerja AI Anda ke beberapa perangkat dengan distribusi peer-to-peer"
--- a/core/http/react-ui/public/locales/id/chat.json
+++ b/core/http/react-ui/public/locales/id/chat.json
@@ -72,7 +72,7 @@
  "actions": {
    "copy": "Salin",
    "regenerate": "Hasilkan ulang",
-    "jumpToLatest": "Jump to latest"
+    "jumpToLatest": "Lompat ke terbaru"
  },
  "streaming": {
    "transferring": "Mentransfer model...",
--- a/core/http/react-ui/public/locales/id/common.json
+++ b/core/http/react-ui/public/locales/id/common.json
@@ -1,8 +1,8 @@
 {
  "unsaved": {
-    "title": "Discard unsaved changes?",
+    "title": "Buang perubahan yang belum disimpan?",
-    "message": "You have unsaved changes that will be lost if you leave this page.",
+    "message": "Anda memiliki perubahan yang belum disimpan. Perubahan tersebut akan hilang jika Anda meninggalkan halaman ini.",
-    "leave": "Leave"
+    "leave": "Tinggalkan Halaman"
  },
  "actions": {
    "save": "Simpan",
--- a/core/http/react-ui/public/locales/id/home.json
+++ b/core/http/react-ui/public/locales/id/home.json
@@ -7,15 +7,15 @@
  "resourceGpu": "GPU",
  "resourceRam": "RAM",
  "greeting": {
-    "morning": "Good morning",
+    "morning": "Selamat pagi",
-    "afternoon": "Good afternoon",
+    "afternoon": "Selamat siang",
-    "evening": "Good evening",
+    "evening": "Selamat malam",
-    "night": "Working late"
+    "night": "Selamat lembur"
  },
  "statusLine": {
-    "modelsLoaded_one": "{{count}} model loaded",
+    "modelsLoaded_one": "{{count}} model dimuat",
-    "modelsLoaded_other": "{{count}} models loaded",
+    "modelsLoaded_other": "{{count}} model dimuat",
-    "noModelsLoaded": "No models loaded",
+    "noModelsLoaded": "Tidak ada model yang dimuat",
    "nodes_one": "{{count}} node",
    "nodes_other": "{{count}} nodes"
  },
@@ -79,14 +79,14 @@
  },
  "connect": {
    "title": "Satu endpoint, semua API",
-    "subtitle": "LocalAI menyediakan API miliknya sendiri yang lengkap — pembuatan gambar & video, depth, deteksi objek, reranking, audio, pengenalan wajah & suara, serta suara realtime melalui WebRTC dan WebSocket. Di atas itu, lapisan kompatibilitas drop-in membuat aplikasi apa pun yang dibuat untuk OpenAI, Anthropic, Ollama, atau OpenAI Responses bekerja tanpa perubahan.",
+    "subtitle": "LocalAI menyediakan API miliknya sendiri yang lengkap — pembuatan gambar & video, depth, deteksi objek, reranking, audio, pengenalan wajah & suara, serta suara realtime melalui WebRTC dan WebSocket. Selain itu, lapisan kompatibilitas drop-in membuat aplikasi apa pun yang dibuat untuk OpenAI, Anthropic, Ollama, atau OpenAI Responses bekerja tanpa perubahan.",
    "nativeTitle": "API native",
    "compatTitle": "Kompatibilitas drop-in",
    "apiReference": "Referensi API lengkap",
    "copy": "Salin",
    "copied": "Disalin",
-    "browse": "Browse the API",
+    "browse": "Jelajahi API",
-    "hide": "Hide endpoints",
+    "hide": "Sembunyikan endpoint",
-    "dismiss": "Dismiss"
+    "dismiss": "Abaikan"
  }
 }
--- a/core/http/react-ui/public/locales/id/media.json
+++ b/core/http/react-ui/public/locales/id/media.json
@@ -5,7 +5,7 @@
      "video": "Video",
      "tts": "TTS",
      "sound": "Suara",
-      "transform": "Transform"
+      "transform": "Transformasi"
    }
  },
  "image": {
@@ -30,7 +30,7 @@
      "refImagesAdded_other": "{{count}} gambar ditambahkan"
    },
    "actions": {
-      "view": "View",
+      "view": "Lihat",
      "generate": "Hasilkan",
      "generating": "Menghasilkan..."
    },
--- a/core/http/react-ui/public/locales/id/nav.json
+++ b/core/http/react-ui/public/locales/id/nav.json
@@ -19,11 +19,11 @@
    "operate": "Operasikan"
  },
  "operate": {
-    "inference": "Inference",
+    "inference": "Inferensi",
-    "cluster": "Cluster",
+    "cluster": "Kluster",
-    "observability": "Observability",
+    "observability": "Observabilitas",
-    "access": "Access",
+    "access": "Akses",
-    "system": "System"
+    "system": "Sistem"
  },
  "items": {
    "home": "Beranda",
@@ -51,6 +51,7 @@
    "backends": "Backend",
    "traces": "Trace",
    "nodes": "Node",
    "scheduling": "Penjadwalan",
    "swarm": "Swarm",
    "system": "Sistem",
    "settings": "Pengaturan",
@@ -63,7 +64,7 @@
    "copyright": "© 2023-{{year}} {{author}}"
  },
  "console": {
-    "automation": "Otomasi",
+    "automation": "Automasi",
    "training": "Pelatihan"
  }
 }
--- a/core/http/react-ui/public/locales/it/admin.json
+++ b/core/http/react-ui/public/locales/it/admin.json
@@ -43,6 +43,10 @@
    "title": "Nodi distribuiti",
    "subtitle": "Gestisci i nodi worker dei backend e degli agenti"
  },
  "scheduling": {
    "title": "Pianificazione",
    "subtitle": "Regole di posizionamento dei modelli e delle repliche nel cluster"
  },
  "p2p": {
    "title": "Calcolo AI distribuito",
    "subtitle": "Scala i tuoi carichi di lavoro AI su più dispositivi con la distribuzione peer-to-peer"
--- a/core/http/react-ui/public/locales/it/nav.json
+++ b/core/http/react-ui/public/locales/it/nav.json
@@ -50,6 +50,7 @@
    "backends": "Backend",
    "traces": "Tracce",
    "nodes": "Nodi",
    "scheduling": "Pianificazione",
    "swarm": "Swarm",
    "system": "Sistema",
    "settings": "Impostazioni",
--- a/Show More
+++ b/Show More