chore: ⬆️ Update ikawrakow/ik_llama.cpp to f9a93c37e2fc021760c3c1aa99cf74c73b7591a7 (#9795 )

⬆️ Update ikawrakow/ik_llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
docs: ⬆️ update docs version mudler/LocalAI (#9792 )
2026-05-20 06:35:41 -04:00 · 2026-05-13 00:40:48 +02:00 · 2026-05-13 00:40:37 +02:00 · 2026-05-13 00:40:09 +02:00 · 2026-05-13 00:39:57 +02:00 · 2026-05-12 22:35:55 +00:00
34 changed files with 864 additions and 164 deletions
--- a/.github/backend-matrix.yml
+++ b/.github/backend-matrix.yml
@@ -389,7 +389,12 @@ include:
    tag-latest: 'auto'
    tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp'
    builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-cuda-12-amd64'
-    runs-on: 'ubuntu-latest'
+    # bigger-runner: cold builds for this entry consistently take 5h+ on
+    # ubuntu-latest (observed 5h36m on v4.2.1). Move back to bigger-runner
+    # so the build finishes well within GHA's 6h job timeout. Phase 5.3 of
+    # the free-tier migration (PR #9730) flipped this to ubuntu-latest as
+    # a 'highest-risk batch' with explicit per-entry revert.
+    runs-on: 'bigger-runner'
    base-image: "ubuntu:24.04"
    skip-drivers: 'false'
    backend: "llama-cpp"
@@ -403,7 +408,9 @@ include:
    tag-latest: 'auto'
    tag-suffix: '-gpu-nvidia-cuda-12-turboquant'
    builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-cuda-12-amd64'
-    runs-on: 'ubuntu-latest'
+    # bigger-runner: same rationale as -gpu-nvidia-cuda-12-llama-cpp above
+    # (observed 6h5m wall-clock on v4.2.1, just past the 6h job timeout).
+    runs-on: 'bigger-runner'
    base-image: "ubuntu:24.04"
    skip-drivers: 'false'
    backend: "turboquant"
@@ -899,7 +906,9 @@ include:
    tag-latest: 'auto'
    tag-suffix: '-gpu-nvidia-cuda-13-llama-cpp'
    builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-cuda-13-amd64'
-    runs-on: 'ubuntu-latest'
+    # bigger-runner: cold builds for this entry take 5h+ on ubuntu-latest
+    # (observed 5h37m on v4.2.1). Same rationale as the cuda-12 variant.
+    runs-on: 'bigger-runner'
    base-image: "ubuntu:24.04"
    skip-drivers: 'false'
    backend: "llama-cpp"
@@ -913,7 +922,8 @@ include:
    tag-latest: 'auto'
    tag-suffix: '-gpu-nvidia-cuda-13-turboquant'
    builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-cuda-13-amd64'
-    runs-on: 'ubuntu-latest'
+    # bigger-runner: observed 6h5m wall-clock on v4.2.1 — at the GHA timeout.
+    runs-on: 'bigger-runner'
    base-image: "ubuntu:24.04"
    skip-drivers: 'false'
    backend: "turboquant"
--- a/.github/scripts/anchor-digest-in-cache.sh
+++ b/.github/scripts/anchor-digest-in-cache.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+# Anchor a backend per-arch digest in quay.io/go-skynet/ci-cache so quay's
+# garbage collector won't reap the manifest before backend_merge.yml runs.
+#
+# Context: backend_build.yml pushes by canonical digest only
+# (push-by-digest=true). Unreferenced manifests on quay can be reaped within
+# ~1-2h, but backend-merge-jobs runs only after the *entire* per-arch build
+# matrix drains (max-parallel: 8 × dozens of entries → ~2h+). Without an
+# anchoring tag, the earliest digests are gone by the time `imagetools create`
+# tries to read them, producing "manifest not found" merge failures.
+#
+# We tag the digest under our internal ci-cache image; quay does not GC tagged
+# manifests. The user-facing manifest list still references the original
+# digest in local-ai-backends. backend_merge.yml deletes the anchor tag after
+# the user-facing manifest is published — see cleanup-keepalive-tags.sh.
+#
+# Required env:
+#   GITHUB_RUN_ID  - current workflow run id (set automatically by GHA)
+#   TAG_SUFFIX     - matrix entry's tag-suffix (e.g. -gpu-nvidia-cuda-12-vllm)
+#   PLATFORM_TAG   - amd64 / arm64 / single (single = singleton matrix entry)
+#   DIGEST         - canonical content digest from build step (sha256:...)
+#
+# Optional env:
+#   ANCHOR_IMAGE   - target image (default: quay.io/go-skynet/ci-cache)
+#   SOURCE_IMAGE   - source image (default: quay.io/go-skynet/local-ai-backends)
+#   GITHUB_STEP_SUMMARY - if set, an anchored-by line is appended to it
+set -euo pipefail
+
+: "${GITHUB_RUN_ID:?}"
+: "${TAG_SUFFIX:?}"
+: "${PLATFORM_TAG:?}"
+: "${DIGEST:?}"
+
+anchor_image="${ANCHOR_IMAGE:-quay.io/go-skynet/ci-cache}"
+source_image="${SOURCE_IMAGE:-quay.io/go-skynet/local-ai-backends}"
+
+tag="keepalive-${GITHUB_RUN_ID}${TAG_SUFFIX}-${PLATFORM_TAG}"
+
+docker buildx imagetools create \
+  -t "${anchor_image}:${tag}" \
+  "${source_image}@${DIGEST}"
+
+echo "anchored ${DIGEST} as ${anchor_image}:${tag}"
+if [[ -n "${GITHUB_STEP_SUMMARY:-}" ]]; then
+  echo "anchored \`${DIGEST}\` as \`${anchor_image}:${tag}\`" >> "${GITHUB_STEP_SUMMARY}"
+fi
--- a/.github/scripts/cleanup-keepalive-tags.sh
+++ b/.github/scripts/cleanup-keepalive-tags.sh
@@ -0,0 +1,49 @@
+#!/usr/bin/env bash
+# Best-effort cleanup of the keepalive anchor tags written by
+# anchor-digest-in-cache.sh. Called from backend_merge.yml after the
+# user-facing manifest list has been published.
+#
+# Quay's docker registry v2 doesn't allow tag deletes — only digest deletes.
+# The proper delete is the quay REST API, which requires an OAuth-scoped
+# token. We try QUAY_TOKEN as a bearer token: if the secret is an OAuth app
+# token (typical for service accounts) the delete succeeds; otherwise this
+# is a soft no-op and the tag persists until manually pruned.
+#
+# Cleanup failure MUST NOT fail the merge — the merge has already produced
+# the user-facing manifest list at this point and the keepalive tags are
+# pure overhead. We always exit 0.
+#
+# Required env:
+#   GITHUB_RUN_ID  - current workflow run id (set automatically by GHA)
+#   TAG_SUFFIX     - matrix entry's tag-suffix (e.g. -gpu-nvidia-cuda-12-vllm)
+#   QUAY_TOKEN     - bearer token for quay's REST API
+#
+# Optional env:
+#   QUAY_REPO      - target repo (default: go-skynet/ci-cache)
+#   PLATFORM_TAGS  - space-separated list of platform-tag values to try
+#                    (default: "amd64 arm64 single")
+#                    We don't know which platform-tag(s) exist for this
+#                    tag-suffix without an extra API call, so we just try
+#                    all three and ignore 404s for the ones that don't.
+set -uo pipefail
+
+: "${GITHUB_RUN_ID:?}"
+: "${TAG_SUFFIX:?}"
+: "${QUAY_TOKEN:?}"
+
+quay_repo="${QUAY_REPO:-go-skynet/ci-cache}"
+platform_tags="${PLATFORM_TAGS:-amd64 arm64 single}"
+
+for plat in $platform_tags; do
+  tag="keepalive-${GITHUB_RUN_ID}${TAG_SUFFIX}-${plat}"
+  url="https://quay.io/api/v1/repository/${quay_repo}/tag/${tag}"
+  http=$(curl -sS -o /dev/null -w '%{http_code}' \
+    -X DELETE -H "Authorization: Bearer ${QUAY_TOKEN}" "$url" || echo "000")
+  case "$http" in
+    204|200) echo "deleted $tag" ;;
+    404)     echo "not present: $tag" ;;
+    401|403) echo "auth not OAuth-scoped (http $http) for $tag - skipping; orphan tag will persist" ;;
+    *)       echo "unexpected http $http deleting $tag - skipping" ;;
+  esac
+done
+exit 0
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -154,7 +154,13 @@ jobs:
  # digest only — no tags are applied at build time.
  backend-merge-jobs-multiarch:
    needs: [generate-matrix, backend-jobs-multiarch]
-    if: needs.generate-matrix.outputs['has-merges-multiarch'] == 'true'
+    # !cancelled() lets the merge run even when a few build legs failed.
+    # Without it, GHA's default `needs:` cascade skips the entire merge
+    # matrix on a single failed/cancelled cell. We still want to publish
+    # the manifest lists for tag-suffixes whose legs all succeeded.
+    # Observed in v4.2.1: 2 singlearch build failures cascade-skipped all
+    # ~199 singlearch merge entries.
+    if: ${{ !cancelled() && needs.generate-matrix.outputs['has-merges-multiarch'] == 'true' }}
    uses: ./.github/workflows/backend_merge.yml
    with:
      tag-latest: ${{ matrix.tag-latest }}
@@ -170,7 +176,8 @@ jobs:

  backend-merge-jobs-singlearch:
    needs: [generate-matrix, backend-jobs-singlearch]
-    if: needs.generate-matrix.outputs['has-merges-singlearch'] == 'true'
+    # See note on backend-merge-jobs-multiarch above for !cancelled().
+    if: ${{ !cancelled() && needs.generate-matrix.outputs['has-merges-singlearch'] == 'true' }}
    uses: ./.github/workflows/backend_merge.yml
    with:
      tag-latest: ${{ matrix.tag-latest }}
--- a/.github/workflows/backend_build.yml
+++ b/.github/workflows/backend_build.yml
@@ -228,6 +228,16 @@ jobs:
          digest="${{ steps.build.outputs.digest }}"
          touch "/tmp/digests/${digest#sha256:}"

+      # See .github/scripts/anchor-digest-in-cache.sh for why this is needed
+      # and how it interacts with backend_merge.yml's cleanup step.
+      - name: Anchor digest in ci-cache so quay GC won't reap before merge
+        if: github.event_name != 'pull_request'
+        env:
+          TAG_SUFFIX: ${{ inputs.tag-suffix }}
+          PLATFORM_TAG: ${{ inputs.platform-tag || 'single' }}
+          DIGEST: ${{ steps.build.outputs.digest }}
+        run: .github/scripts/anchor-digest-in-cache.sh
+
      # Artifact name uses a `--` separator between tag-suffix and platform-tag
      # to avoid prefix collisions during the merge job's pattern-based download.
      # Tag-suffixes are not prefix-disjoint (e.g. -gpu-nvidia-cuda-12-vllm is a
@@ -237,7 +247,7 @@ jobs:
      # platform-tag (single-arch entries) keeps the artifact name non-trailing.
      - name: Upload digest artifact
        if: github.event_name != 'pull_request'
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v7
        with:
          name: digests${{ inputs.tag-suffix }}--${{ inputs.platform-tag || 'single' }}
          path: /tmp/digests/*
--- a/.github/workflows/backend_build_darwin.yml
+++ b/.github/workflows/backend_build_darwin.yml
@@ -116,6 +116,13 @@ jobs:
          # already), we don't have to chase missing dylibs one at a time.
          # The downloads cache makes the reinstall fast (~5s on a hit).
          brew reinstall ccache
+          # Same pattern for grpc: its CMake config (used by the llama-cpp
+          # `grpc-server` target) does find_package(absl). The cache restores
+          # /opt/homebrew/Cellar/grpc so brew above no-ops the install, but
+          # abseil isn't in our Cellar cache list and never gets installed
+          # alongside, leaving grpc's CMake unable to resolve it. Reinstalling
+          # grpc re-validates and pulls abseil in, mirroring the ccache fix.
+          brew reinstall grpc
          # The brew cache restores the Cellar dirs but NOT the bin symlinks
          # at /opt/homebrew/bin/*. brew install above sees the Cellar present
          # and decides "already installed" without re-linking, so on a cache-
--- a/.github/workflows/backend_merge.yml
+++ b/.github/workflows/backend_merge.yml
@@ -34,12 +34,21 @@ jobs:
    env:
      quay_username: ${{ secrets.quayUsername }}
    steps:
+      # Sparse checkout: the merge job needs `.github/scripts/` (for the
+      # keepalive cleanup script) but none of the source tree.
+      - name: Checkout (.github/scripts only)
+        uses: actions/checkout@v6
+        with:
+          sparse-checkout: |
+            .github/scripts
+          sparse-checkout-cone-mode: false
+
      # `--` separator anchors the glob so we don't over-match sibling
      # backends whose tag-suffix happens to be a prefix of ours
      # (e.g. -cpu-vllm vs -cpu-vllm-omni). Must stay in sync with the
      # upload-artifact name in backend_build.yml.
      - name: Download digests
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@v8
        with:
          pattern: digests${{ inputs.tag-suffix }}--*
          merge-multiple: true
@@ -79,6 +88,25 @@ jobs:
            latest=${{ inputs.tag-latest }}
            suffix=${{ inputs.tag-suffix }},onlatest=true

+      # Source from ci-cache, not local-ai-backends.
+      #
+      # The build job pushes per-arch manifests to local-ai-backends with
+      # push-by-digest=true (no tag), then anchors a tagged copy into
+      # ci-cache so the manifest can be retrieved hours later when this
+      # merge runs. Quay's manifest GC, however, is per-repository: the
+      # anchor tag in ci-cache protects the manifest there, but the same
+      # digest in local-ai-backends has no tag in *that* repo and gets
+      # reaped independently. Sourcing local-ai-backends@<digest> here
+      # then fails with "manifest not found" — exactly the regression
+      # we hit on v4.2.2 (19/37 multiarch merges failed).
+      #
+      # ci-cache@<digest> resolves because we anchored it there. buildx
+      # imagetools create copies the manifest into local-ai-backends
+      # (cross-repo within the same registry, blobs already cross-mounted
+      # from the original push so no transfer needed) and publishes the
+      # manifest list with the user-facing tags. The resulting manifest
+      # list is fully self-contained in local-ai-backends — child digests
+      # only, no embedded references to ci-cache.
      - name: Create manifest list and push (quay)
        if: github.event_name != 'pull_request'
        working-directory: /tmp/digests
@@ -95,7 +123,7 @@ jobs:
          else
            # shellcheck disable=SC2086
            docker buildx imagetools create $tags \
-              $(printf 'quay.io/go-skynet/local-ai-backends@sha256:%s ' *)
+              $(printf 'quay.io/go-skynet/ci-cache@sha256:%s ' *)
          fi

      - name: Create manifest list and push (dockerhub)
@@ -126,6 +154,15 @@ jobs:
            docker buildx imagetools inspect "$first_tag"
          fi

+      # See .github/scripts/cleanup-keepalive-tags.sh for why this is
+      # best-effort and what the failure modes are.
+      - name: Cleanup keepalive tags in ci-cache
+        if: github.event_name != 'pull_request' && success()
+        env:
+          TAG_SUFFIX: ${{ inputs.tag-suffix }}
+          QUAY_TOKEN: ${{ secrets.quayPassword }}
+        run: .github/scripts/cleanup-keepalive-tags.sh
+
      - name: Job summary
        if: github.event_name != 'pull_request'
        run: |
--- a/.github/workflows/backend_pr.yml
+++ b/.github/workflows/backend_pr.yml
@@ -104,7 +104,9 @@ jobs:
    # backend_merge.yml's push-side steps are all gated on
    # github.event_name != 'pull_request', so on a PR the merge job would
    # do nothing. Skip it entirely to avoid spinning up an empty runner.
-    if: github.event_name != 'pull_request' && needs.generate-matrix.outputs['has-merges-multiarch'] == 'true'
+    # !cancelled() lets the merge run even when a few build legs fail —
+    # see the matching note in backend.yml.
+    if: ${{ !cancelled() && github.event_name != 'pull_request' && needs.generate-matrix.outputs['has-merges-multiarch'] == 'true' }}
    uses: ./.github/workflows/backend_merge.yml
    with:
      tag-latest: ${{ matrix.tag-latest }}
@@ -118,7 +120,7 @@ jobs:

  backend-merge-jobs-singlearch:
    needs: [generate-matrix, backend-jobs-singlearch]
-    if: github.event_name != 'pull_request' && needs.generate-matrix.outputs['has-merges-singlearch'] == 'true'
+    if: ${{ !cancelled() && github.event_name != 'pull_request' && needs.generate-matrix.outputs['has-merges-singlearch'] == 'true' }}
    uses: ./.github/workflows/backend_merge.yml
    with:
      tag-latest: ${{ matrix.tag-latest }}
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -187,7 +187,7 @@ jobs:

      - name: Upload digest artifact
        if: github.event_name != 'pull_request'
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v7
        with:
          name: digests-localai${{ inputs.tag-suffix == '' && '-core' || inputs.tag-suffix }}-${{ inputs.platform-tag }}
          path: /tmp/digests/*
--- a/.github/workflows/image_merge.yml
+++ b/.github/workflows/image_merge.yml
@@ -34,7 +34,7 @@ jobs:
      quay_username: ${{ secrets.quayUsername }}
    steps:
      - name: Download digests
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@v8
        with:
          pattern: digests-localai${{ inputs.tag-suffix == '' && '-core' || inputs.tag-suffix }}-*
          merge-multiple: true
--- a/2
+++ b/2
@@ -305,7 +305,7 @@ EOT
 ###################################

 # Build React UI
-FROM node:25-slim AS react-ui-builder
+FROM node:26-slim AS react-ui-builder
 WORKDIR /app
 COPY core/http/react-ui/package*.json ./
 RUN npm install
--- a/backend/Dockerfile.turboquant
+++ b/backend/Dockerfile.turboquant
@@ -117,6 +117,12 @@ ARG CUDA_DOCKER_ARCH
 ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
 ARG CMAKE_ARGS
 ENV CMAKE_ARGS=${CMAKE_ARGS}
+# AMDGPU_TARGETS must be forwarded into the env here too — backend/cpp/llama-cpp/Makefile
+# (which the turboquant Makefile reuses via a sibling build dir) errors out when the var
+# is empty on a hipblas build, and the prebuilt path is what CI exercises most of the
+# time. The builder-fromsource stage above already does this; mirror it here.
+ARG AMDGPU_TARGETS
+ENV AMDGPU_TARGETS=${AMDGPU_TARGETS}
 ARG TARGETARCH
 ARG TARGETVARIANT

--- a/backend/cpp/ds4/Makefile
+++ b/backend/cpp/ds4/Makefile
@@ -1,10 +1,10 @@
 # ds4 backend Makefile.
 #
-# Upstream pin lives below as DS4_VERSION?= so the bump-deps bot
+# Upstream pin lives below as DS4_VERSION?=f8b4ed635d559b3a5b44bf2df6a77e21b3e9178f
 # (.github/bump_deps.sh) can find and update it - matches the
 # llama-cpp / ik-llama-cpp / turboquant convention.

-DS4_VERSION?=ae302c2fa18cc6d9aefc021d0f27ae03c9ad2fc0
+DS4_VERSION?=f8b4ed635d559b3a5b44bf2df6a77e21b3e9178f
 DS4_REPO?=https://github.com/antirez/ds4

 CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
--- a/backend/cpp/ik-llama-cpp/Makefile
+++ b/backend/cpp/ik-llama-cpp/Makefile
@@ -1,5 +1,5 @@

-IK_LLAMA_VERSION?=eb570eb96689c235933b813693ca28ab9d3d26de
+IK_LLAMA_VERSION?=f9a93c37e2fc021760c3c1aa99cf74c73b7591a7
 LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp

 CMAKE_ARGS?=
--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@@ -1,5 +1,5 @@

-LLAMA_VERSION?=389ff61d77b5c71cec0cf92fe4e5d01ace80b797
+LLAMA_VERSION?=1ec7ba0c14f33f17e980daeeda5f35b225d41994
 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp

 CMAKE_ARGS?=
--- a/backend/cpp/llama-cpp/grpc-server.cpp
+++ b/backend/cpp/llama-cpp/grpc-server.cpp
@@ -36,6 +36,8 @@
 #include <cstdlib>
 #include <fstream>
 #include <iterator>
+#include <list>
+#include <map>
 #include <mutex>
 #include <signal.h>
 #include <thread>
@@ -443,10 +445,22 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
    // Draft model for speculative decoding
    if (!request->draftmodel().empty()) {
        params.speculative.draft.mparams.path = request->draftmodel();
-        // Default to draft type if a draft model is set but no explicit type
+        // Default to draft type if a draft model is set but no explicit type.
+        // Upstream (post ggml-org/llama.cpp#22838) made the speculative type a
+        // vector; the turboquant fork still uses the legacy scalar. The
+        // LOCALAI_LEGACY_LLAMA_CPP_SPEC macro is injected by
+        // backend/cpp/turboquant/patch-grpc-server.sh for fork builds only.
+#ifdef LOCALAI_LEGACY_LLAMA_CPP_SPEC
        if (params.speculative.type == COMMON_SPECULATIVE_TYPE_NONE) {
            params.speculative.type = COMMON_SPECULATIVE_TYPE_DRAFT;
        }
+#else
+        const bool no_spec_type = params.speculative.types.empty() ||
+            (params.speculative.types.size() == 1 && params.speculative.types[0] == COMMON_SPECULATIVE_TYPE_NONE);
+        if (no_spec_type) {
+            params.speculative.types = { COMMON_SPECULATIVE_TYPE_DRAFT };
+        }
+#endif
    }

    //  params.model_alias ??
@@ -673,10 +687,35 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
            }
        // Speculative decoding options
        } else if (!strcmp(optname, "spec_type") || !strcmp(optname, "speculative_type")) {
-            auto type = common_speculative_type_from_name(optval_str);
+#ifdef LOCALAI_LEGACY_LLAMA_CPP_SPEC
+            // Fork only knows a single scalar `type`. Take the first comma-
+            // separated value and assign it via the singular helper.
+            std::string first = optval_str;
+            const auto comma = first.find(',');
+            if (comma != std::string::npos) first = first.substr(0, comma);
+            auto type = common_speculative_type_from_name(first);
            if (type != COMMON_SPECULATIVE_TYPE_COUNT) {
                params.speculative.type = type;
            }
+#else
+            // Upstream switched to a vector of types (comma-separated for multi-type
+            // chaining via common_speculative_types_from_names). We keep accepting a
+            // single value here, but also tolerate comma-separated lists.
+            std::vector<std::string> names;
+            std::string item;
+            for (char c : optval_str) {
+                if (c == ',') {
+                    if (!item.empty()) { names.push_back(item); item.clear(); }
+                } else {
+                    item.push_back(c);
+                }
+            }
+            if (!item.empty()) names.push_back(item);
+            auto parsed = common_speculative_types_from_names(names);
+            if (!parsed.empty()) {
+                params.speculative.types = parsed;
+            }
+#endif
        } else if (!strcmp(optname, "spec_n_max") || !strcmp(optname, "draft_max")) {
            if (optval != NULL) {
                try { params.speculative.draft.n_max = std::stoi(optval_str); } catch (...) {}
@@ -710,10 +749,155 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
                try { params.speculative.draft.n_gpu_layers = std::stoi(optval_str); } catch (...) {}
            }
        } else if (!strcmp(optname, "draft_ctx_size")) {
-            if (optval != NULL) {
-                try { params.speculative.draft.n_ctx = std::stoi(optval_str); } catch (...) {}
-            }
+            // The draft context size is no longer a separate field upstream: the draft
+            // shares the target context size. Accept the option for backward
+            // compatibility but silently ignore it.
+
+// Everything below relies on struct shape introduced in ggml-org/llama.cpp#22838
+// (parallel drafting): `ngram_mod`, `ngram_map_k`, `ngram_map_k4v`,
+// `ngram_cache`, and the `draft.{cache_type_*, cpuparams*, tensor_buft_overrides}`
+// fields. The turboquant fork branched before that, so its build defines
+// LOCALAI_LEGACY_LLAMA_CPP_SPEC via patch-grpc-server.sh and these option
+// keys become unrecognized (silently dropped, like any unknown opt) for it.
+//
+// The `#ifdef LOCALAI_LEGACY_LLAMA_CPP_SPEC` / `#else` split below sits at the
+// closing-brace position of the `draft_ctx_size` branch on purpose: in the
+// legacy build the chain ends here (the brace closes draft_ctx_size), and in
+// the modern build the chain continues with `} else if (...)` instead, so the
+// brace count stays balanced under both branches of the preprocessor.
+#ifdef LOCALAI_LEGACY_LLAMA_CPP_SPEC
        }
+#else
+        // --- ngram_mod family (upstream --spec-ngram-mod-*) ---
+        } else if (!strcmp(optname, "spec_ngram_mod_n_min")) {
+            if (optval != NULL) {
+                try { params.speculative.ngram_mod.n_min = std::stoi(optval_str); } catch (...) {}
+            }
+        } else if (!strcmp(optname, "spec_ngram_mod_n_max")) {
+            if (optval != NULL) {
+                try { params.speculative.ngram_mod.n_max = std::stoi(optval_str); } catch (...) {}
+            }
+        } else if (!strcmp(optname, "spec_ngram_mod_n_match")) {
+            if (optval != NULL) {
+                try { params.speculative.ngram_mod.n_match = std::stoi(optval_str); } catch (...) {}
+            }
+
+        // --- ngram_map_k family (upstream --spec-ngram-map-k-*) ---
+        } else if (!strcmp(optname, "spec_ngram_map_k_size_n")) {
+            if (optval != NULL) {
+                try { params.speculative.ngram_map_k.size_n = (uint16_t)std::stoi(optval_str); } catch (...) {}
+            }
+        } else if (!strcmp(optname, "spec_ngram_map_k_size_m")) {
+            if (optval != NULL) {
+                try { params.speculative.ngram_map_k.size_m = (uint16_t)std::stoi(optval_str); } catch (...) {}
+            }
+        } else if (!strcmp(optname, "spec_ngram_map_k_min_hits")) {
+            if (optval != NULL) {
+                try { params.speculative.ngram_map_k.min_hits = (uint16_t)std::stoi(optval_str); } catch (...) {}
+            }
+
+        // --- ngram_map_k4v family (upstream --spec-ngram-map-k4v-*) ---
+        } else if (!strcmp(optname, "spec_ngram_map_k4v_size_n")) {
+            if (optval != NULL) {
+                try { params.speculative.ngram_map_k4v.size_n = (uint16_t)std::stoi(optval_str); } catch (...) {}
+            }
+        } else if (!strcmp(optname, "spec_ngram_map_k4v_size_m")) {
+            if (optval != NULL) {
+                try { params.speculative.ngram_map_k4v.size_m = (uint16_t)std::stoi(optval_str); } catch (...) {}
+            }
+        } else if (!strcmp(optname, "spec_ngram_map_k4v_min_hits")) {
+            if (optval != NULL) {
+                try { params.speculative.ngram_map_k4v.min_hits = (uint16_t)std::stoi(optval_str); } catch (...) {}
+            }
+
+        // --- ngram lookup caches (upstream --lookup-cache-static / -dynamic) ---
+        } else if (!strcmp(optname, "spec_lookup_cache_static") || !strcmp(optname, "lookup_cache_static")) {
+            params.speculative.ngram_cache.lookup_cache_static = optval_str;
+        } else if (!strcmp(optname, "spec_lookup_cache_dynamic") || !strcmp(optname, "lookup_cache_dynamic")) {
+            params.speculative.ngram_cache.lookup_cache_dynamic = optval_str;
+
+        // --- draft model KV cache types (upstream --spec-draft-type-k / -v) ---
+        } else if (!strcmp(optname, "draft_cache_type_k") || !strcmp(optname, "spec_draft_cache_type_k")) {
+            params.speculative.draft.cache_type_k = kv_cache_type_from_str(optval_str);
+        } else if (!strcmp(optname, "draft_cache_type_v") || !strcmp(optname, "spec_draft_cache_type_v")) {
+            params.speculative.draft.cache_type_v = kv_cache_type_from_str(optval_str);
+
+        // --- draft model thread counts (upstream --spec-draft-threads / -batch) ---
+        } else if (!strcmp(optname, "draft_threads") || !strcmp(optname, "spec_draft_threads")) {
+            if (optval != NULL) {
+                try {
+                    int n = std::stoi(optval_str);
+                    if (n <= 0) n = (int)std::thread::hardware_concurrency();
+                    params.speculative.draft.cpuparams.n_threads = n;
+                } catch (...) {}
+            }
+        } else if (!strcmp(optname, "draft_threads_batch") || !strcmp(optname, "spec_draft_threads_batch")) {
+            if (optval != NULL) {
+                try {
+                    int n = std::stoi(optval_str);
+                    if (n <= 0) n = (int)std::thread::hardware_concurrency();
+                    params.speculative.draft.cpuparams_batch.n_threads = n;
+                } catch (...) {}
+            }
+
+        // --- draft model MoE on CPU (upstream --spec-draft-cpu-moe / --spec-draft-n-cpu-moe) ---
+        } else if (!strcmp(optname, "draft_cpu_moe") || !strcmp(optname, "spec_draft_cpu_moe")) {
+            // Bool-style flag: optval may be missing, "true"/"1"/"yes" enables.
+            const bool enable = (optval == NULL) ||
+                optval_str == "true" || optval_str == "1" || optval_str == "yes" ||
+                optval_str == "on" || optval_str == "enabled";
+            if (enable) {
+                params.speculative.draft.tensor_buft_overrides.push_back(llm_ffn_exps_cpu_override());
+            }
+        } else if (!strcmp(optname, "draft_n_cpu_moe") || !strcmp(optname, "spec_draft_n_cpu_moe")) {
+            if (optval != NULL) {
+                try {
+                    int n = std::stoi(optval_str);
+                    if (n < 0) n = 0;
+                    // Keep override-name storage alive for the lifetime of the params struct
+                    // (mirrors upstream arg.cpp behavior with a function-local static).
+                    static std::list<std::string> buft_overrides_draft;
+                    for (int i = 0; i < n; ++i) {
+                        buft_overrides_draft.push_back(llm_ffn_exps_block_regex(i));
+                        params.speculative.draft.tensor_buft_overrides.push_back(
+                            {buft_overrides_draft.back().c_str(), ggml_backend_cpu_buffer_type()});
+                    }
+                } catch (...) {}
+            }
+
+        // --- draft model tensor buffer overrides (upstream --spec-draft-override-tensor) ---
+        } else if (!strcmp(optname, "draft_override_tensor") || !strcmp(optname, "spec_draft_override_tensor")) {
+            // Format: <tensor regex>=<buffer type>,<tensor regex>=<buffer type>,...
+            // We replicate upstream's parse_tensor_buffer_overrides (static in arg.cpp).
+            ggml_backend_load_all();
+            std::map<std::string, ggml_backend_buffer_type_t> buft_list;
+            for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
+                auto * dev = ggml_backend_dev_get(i);
+                auto * buft = ggml_backend_dev_buffer_type(dev);
+                if (buft) {
+                    buft_list[ggml_backend_buft_name(buft)] = buft;
+                }
+            }
+            static std::list<std::string> draft_override_names;
+            std::string cur;
+            auto flush = [&](const std::string & spec) {
+                auto pos = spec.find('=');
+                if (pos == std::string::npos) return;
+                const std::string name = spec.substr(0, pos);
+                const std::string type = spec.substr(pos + 1);
+                auto it = buft_list.find(type);
+                if (it == buft_list.end()) return; // unknown buffer type: ignore
+                draft_override_names.push_back(name);
+                params.speculative.draft.tensor_buft_overrides.push_back(
+                    {draft_override_names.back().c_str(), it->second});
+            };
+            for (char c : optval_str) {
+                if (c == ',') { if (!cur.empty()) { flush(cur); cur.clear(); } }
+                else { cur.push_back(c); }
+            }
+            if (!cur.empty()) flush(cur);
+        }
+#endif // LOCALAI_LEGACY_LLAMA_CPP_SPEC — closes the `else`/`#ifdef` opened at draft_ctx_size
    }

    // Set params.n_parallel from environment variable if not set via options (fallback)
@@ -2704,7 +2888,7 @@ public:

            tasks.reserve(documents.size());
            for (size_t i = 0; i < documents.size(); i++) {
-                auto tmp = format_prompt_rerank(ctx_server.impl->model, ctx_server.impl->vocab, ctx_server.impl->mctx, request->query(), documents[i]);
+                auto tmp = format_prompt_rerank(ctx_server.impl->model_tgt, ctx_server.impl->vocab, ctx_server.impl->mctx, request->query(), documents[i]);
                server_task task = server_task(SERVER_TASK_TYPE_RERANK);
                task.id = rd.queue_tasks.get_new_id();
                task.index = i;
@@ -2882,7 +3066,7 @@ public:
                // Get template source and reconstruct a common_chat_template for analysis
                std::string tmpl_src = common_chat_templates_source(ctx_server.impl->chat_params.tmpls.get());
                if (!tmpl_src.empty()) {
-                    const auto * vocab = llama_model_get_vocab(ctx_server.impl->model);
+                    const auto * vocab = llama_model_get_vocab(ctx_server.impl->model_tgt);
                    std::string token_bos, token_eos;
                    if (vocab) {
                        auto bos_id = llama_vocab_bos(vocab);
--- a/backend/cpp/turboquant/patch-grpc-server.sh
+++ b/backend/cpp/turboquant/patch-grpc-server.sh
@@ -108,4 +108,47 @@ else
    echo "==> $SRC has no post-#22397 speculative field refs, skipping spec rename patch"
 fi

+# 4. Revert the `ctx_server.impl->model_tgt` rename introduced by upstream
+#    ggml-org/llama.cpp#22838 (parallel drafting). The turboquant fork still
+#    exposes the field as `model` on `server_context_impl`. The two call sites
+#    are in the Rerank and ModelMetadata RPC handlers.
+if grep -q 'ctx_server\.impl->model_tgt' "$SRC"; then
+    echo "==> patching $SRC to revert ctx_server.impl->model_tgt -> ctx_server.impl->model"
+    sed -E 's/ctx_server\.impl->model_tgt/ctx_server.impl->model/g' "$SRC" > "$SRC.tmp"
+    mv "$SRC.tmp" "$SRC"
+    echo "==> model_tgt rename OK"
+else
+    echo "==> $SRC has no ctx_server.impl->model_tgt refs, skipping model_tgt rename patch"
+fi
+
+# 5. Define LOCALAI_LEGACY_LLAMA_CPP_SPEC at the top of the file so the
+#    grpc-server option parser skips the new option-handler blocks (ngram_mod,
+#    ngram_map_k, ngram_map_k4v, ngram_cache, draft.cache_type_*, draft.cpuparams*,
+#    draft.tensor_buft_overrides) introduced for the post-#22838 layout. Those
+#    blocks reference struct fields that simply do not exist in the fork.
+if grep -q '^#define LOCALAI_LEGACY_LLAMA_CPP_SPEC' "$SRC"; then
+    echo "==> $SRC already defines LOCALAI_LEGACY_LLAMA_CPP_SPEC, skipping"
+else
+    echo "==> patching $SRC to define LOCALAI_LEGACY_LLAMA_CPP_SPEC at the top"
+    # Insert the define before the very first `#include` so it precedes all the
+    # speculative-decoding code paths.
+    awk '
+        !done && /^#include/ {
+            print "#define LOCALAI_LEGACY_LLAMA_CPP_SPEC 1"
+            print "// ^ injected by backend/cpp/turboquant/patch-grpc-server.sh"
+            print ""
+            done = 1
+        }
+        { print }
+        END {
+            if (!done) {
+                print "patch-grpc-server.sh: no #include anchor found to insert LOCALAI_LEGACY_LLAMA_CPP_SPEC" > "/dev/stderr"
+                exit 1
+            }
+        }
+    ' "$SRC" > "$SRC.tmp"
+    mv "$SRC.tmp" "$SRC"
+    echo "==> LOCALAI_LEGACY_LLAMA_CPP_SPEC define OK"
+fi
+
 echo "==> all patches applied"
--- a/backend/go/whisper/Makefile
+++ b/backend/go/whisper/Makefile
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)

 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
-WHISPER_CPP_VERSION?=c33c5618b72bb345df029b730b36bc0e369845a3
+WHISPER_CPP_VERSION?=338cce1e58133261753243802a0e7a430118866d
 SO_TARGET?=libgowhisper.so

 CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
--- a/backend/python/transformers/requirements-cpu.txt
+++ b/backend/python/transformers/requirements-cpu.txt
@@ -2,7 +2,7 @@ torch==2.7.1
 llvmlite==0.43.0
 numba==0.60.0
 accelerate
-transformers>=5.0.0
+transformers>=5.8.0
 bitsandbytes
 sentence-transformers==5.4.0
 diffusers
--- a/backend/python/transformers/requirements-cublas12.txt
+++ b/backend/python/transformers/requirements-cublas12.txt
@@ -2,7 +2,7 @@ torch==2.7.1
 accelerate
 llvmlite==0.43.0
 numba==0.60.0
-transformers>=5.0.0
+transformers>=5.8.0
 bitsandbytes
 sentence-transformers==5.4.0
 diffusers
--- a/backend/python/transformers/requirements-cublas13.txt
+++ b/backend/python/transformers/requirements-cublas13.txt
@@ -2,7 +2,7 @@
 torch==2.9.0
 llvmlite==0.43.0
 numba==0.60.0
-transformers>=5.0.0
+transformers>=5.8.0
 bitsandbytes
 sentence-transformers==5.4.0
 diffusers
--- a/backend/python/transformers/requirements-hipblas.txt
+++ b/backend/python/transformers/requirements-hipblas.txt
@@ -1,7 +1,7 @@
 --extra-index-url https://download.pytorch.org/whl/rocm7.0
 torch==2.10.0+rocm7.0
 accelerate
-transformers>=5.0.0
+transformers>=5.8.0
 llvmlite==0.43.0
 numba==0.60.0
 bitsandbytes
--- a/backend/python/transformers/requirements-intel.txt
+++ b/backend/python/transformers/requirements-intel.txt
@@ -3,7 +3,7 @@ torch
 optimum[openvino]
 llvmlite==0.43.0
 numba==0.60.0
-transformers>=5.0.0
+transformers>=5.8.0
 bitsandbytes
 sentence-transformers==5.4.0
 diffusers
--- a/backend/python/transformers/requirements-mps.txt
+++ b/backend/python/transformers/requirements-mps.txt
@@ -2,7 +2,7 @@ torch==2.7.1
 llvmlite==0.43.0
 numba==0.60.0
 accelerate
-transformers>=5.0.0
+transformers>=5.8.0
 bitsandbytes
 sentence-transformers==5.4.0
 diffusers
--- a/backend/python/vllm/pyproject.toml
+++ b/backend/python/vllm/pyproject.toml
@@ -33,7 +33,7 @@ dependencies = [
    "certifi",
    "setuptools",
    "pillow",
-    "charset-normalizer>=3.4.0",
+    "charset-normalizer>=3.4.7",
    "chardet",
    # L4T-specific accelerator stack (sourced from jetson-ai-lab below).
    "torch",
--- a/backend/python/vllm/requirements.txt
+++ b/backend/python/vllm/requirements.txt
@@ -3,5 +3,5 @@ protobuf
 certifi
 setuptools
 pillow
-charset-normalizer>=3.4.0
+charset-normalizer>=3.4.7
 chardet
--- a/core/http/react-ui/package-lock.json
+++ b/core/http/react-ui/package-lock.json
@@ -16,6 +16,8 @@
        "@codemirror/search": "^6.5.10",
        "@codemirror/state": "^6.5.2",
        "@codemirror/view": "^6.36.8",
+        "@fontsource-variable/geist": "^5.2.8",
+        "@fontsource-variable/geist-mono": "^5.2.7",
        "@fortawesome/fontawesome-free": "^6.7.2",
        "@lezer/highlight": "^1.2.1",
        "@modelcontextprotocol/ext-apps": "^1.2.2",
@@ -965,6 +967,24 @@
        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
      }
    },
+    "node_modules/@fontsource-variable/geist": {
+      "version": "5.2.8",
+      "resolved": "https://registry.npmjs.org/@fontsource-variable/geist/-/geist-5.2.8.tgz",
+      "integrity": "sha512-cJ6m9e+8MQ5dCYJsLylfZrgBh6KkG4bOLckB35Tr9J/EqdkEM6QllH5PxqP1dhTvFup+HtMRPuz9xOjxXJggxw==",
+      "license": "OFL-1.1",
+      "funding": {
+        "url": "https://github.com/sponsors/ayuhito"
+      }
+    },
+    "node_modules/@fontsource-variable/geist-mono": {
+      "version": "5.2.7",
+      "resolved": "https://registry.npmjs.org/@fontsource-variable/geist-mono/-/geist-mono-5.2.7.tgz",
+      "integrity": "sha512-ZKlZ5sjtalb2TwXKs400mAGDlt/+2ENLNySPx0wTz3bP3mWARCsUW+rpxzZc7e05d2qGch70pItt3K4qttbIYA==",
+      "license": "OFL-1.1",
+      "funding": {
+        "url": "https://github.com/sponsors/ayuhito"
+      }
+    },
    "node_modules/@fortawesome/fontawesome-free": {
      "version": "6.7.2",
      "resolved": "https://registry.npmjs.org/@fortawesome/fontawesome-free/-/fontawesome-free-6.7.2.tgz",
@@ -2903,11 +2923,12 @@
      }
    },
    "node_modules/express-rate-limit": {
-      "version": "8.3.1",
-      "resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-8.3.1.tgz",
-      "integrity": "sha512-D1dKN+cmyPWuvB+G2SREQDzPY1agpBIcTa9sJxOPMCNeH3gwzhqJRDWCXW3gg0y//+LQ/8j52JbMROWyrKdMdw==",
+      "version": "8.5.1",
+      "resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-8.5.1.tgz",
+      "integrity": "sha512-5O6KYmyJEpuPJV5hNTXKbAHWRqrzyu+OI3vUnSd2kXFubIVpG7ezpgxQy76Zo5GQZtrQBg86hF+CM/NX+cioiQ==",
+      "license": "MIT",
      "dependencies": {
-        "ip-address": "10.1.0"
+        "ip-address": "^10.2.0"
      },
      "engines": {
        "node": ">= 16"
@@ -2951,9 +2972,9 @@
      "dev": true
    },
    "node_modules/fast-uri": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.0.tgz",
-      "integrity": "sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==",
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.2.tgz",
+      "integrity": "sha512-rVjf7ArG3LTk+FS6Yw81V1DLuZl1bRbNrev6Tmd/9RaroeeRRJhAt7jg/6YFxbvAQXUCavSoZhPPj6oOx+5KjQ==",
      "funding": [
        {
          "type": "github",
@@ -2963,7 +2984,8 @@
          "type": "opencollective",
          "url": "https://opencollective.com/fastify"
        }
-      ]
+      ],
+      "license": "BSD-3-Clause"
    },
    "node_modules/fastq": {
      "version": "1.20.1",
@@ -3421,9 +3443,9 @@
      }
    },
    "node_modules/hono": {
-      "version": "4.12.14",
-      "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.14.tgz",
-      "integrity": "sha512-am5zfg3yu6sqn5yjKBNqhnTX7Cv+m00ox+7jbaKkrLMRJ4rAdldd1xPd/JzbBWspqaQv6RSTrgFN95EsfhC+7w==",
+      "version": "4.12.18",
+      "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.18.tgz",
+      "integrity": "sha512-RWzP96k/yv0PQfyXnWjs6zot20TqfpfsNXhOnev8d1InAxubW93L11/oNUc3tQqn2G0bSdAOBpX+2uDFHV7kdQ==",
      "license": "MIT",
      "engines": {
        "node": ">=16.9.0"
@@ -3681,9 +3703,10 @@
      "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="
    },
    "node_modules/ip-address": {
-      "version": "10.1.0",
-      "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.1.0.tgz",
-      "integrity": "sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==",
+      "version": "10.2.0",
+      "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.2.0.tgz",
+      "integrity": "sha512-/+S6j4E9AHvW9SWMSEY9Xfy66O5PWvVEJ08O0y5JGyEKQpojb0K0GKpz/v5HJ/G0vi3D2sjGK78119oXZeE0qA==",
+      "license": "MIT",
      "engines": {
        "node": ">= 12"
      }
--- a/core/schema/ollama.go
+++ b/core/schema/ollama.go
@@ -120,10 +120,14 @@ type OllamaGenerateResponse struct {
 	EvalDuration       int64     `json:"eval_duration,omitempty"`
 }

-// OllamaEmbedRequest represents a request to the Ollama Embed API
+// OllamaEmbedRequest represents a request to the Ollama Embed API.
+// Ollama's /api/embed endpoint accepts both `input` and `prompt` as the
+// input string value (see https://github.com/ollama/ollama/blob/main/docs/api.md#generate-embeddings),
+// so both keys are deserialized here for client compatibility.
 type OllamaEmbedRequest struct {
-	Model   string `json:"model"`
-	Input   any    `json:"input"` // string or []string
+	Model   string         `json:"model"`
+	Input   any            `json:"input,omitempty"`  // string or []string
+	Prompt  any            `json:"prompt,omitempty"` // string or []string (Ollama alias for Input)
 	Options *OllamaOptions `json:"options,omitempty"`
 }

@@ -135,10 +139,21 @@ func (r *OllamaEmbedRequest) ModelName(s *string) string {
 	return r.Model
 }

-// GetInputStrings normalizes the Input field to a string slice
+// GetInputStrings normalizes the Input/Prompt field to a string slice.
+// Input takes precedence over Prompt when both are provided.
 func (r *OllamaEmbedRequest) GetInputStrings() []string {
-	switch v := r.Input.(type) {
+	if v := normalizeOllamaEmbedInput(r.Input); v != nil {
+		return v
+	}
+	return normalizeOllamaEmbedInput(r.Prompt)
+}
+
+func normalizeOllamaEmbedInput(v any) []string {
+	switch v := v.(type) {
 	case string:
+		if v == "" {
+			return nil
+		}
 		return []string{v}
 	case []any:
 		var result []string
--- a/core/schema/ollama_test.go
+++ b/core/schema/ollama_test.go
@@ -0,0 +1,86 @@
+package schema_test
+
+import (
+	"encoding/json"
+
+	. "github.com/mudler/LocalAI/core/schema"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("OllamaEmbedRequest", func() {
+
+	Context("GetInputStrings", func() {
+		It("returns a single string when Input is a string", func() {
+			req := OllamaEmbedRequest{Input: "hello world"}
+
+			Expect(req.GetInputStrings()).To(Equal([]string{"hello world"}))
+		})
+
+		It("returns a list of strings when Input is a []string", func() {
+			req := OllamaEmbedRequest{Input: []string{"hello", "world"}}
+
+			Expect(req.GetInputStrings()).To(Equal([]string{"hello", "world"}))
+		})
+
+		It("returns a list of strings when Input is a []any (post JSON unmarshal)", func() {
+			req := OllamaEmbedRequest{Input: []any{"hello", "world"}}
+
+			Expect(req.GetInputStrings()).To(Equal([]string{"hello", "world"}))
+		})
+	})
+
+	Context("JSON unmarshaling (Ollama API compatibility)", func() {
+		It("accepts the 'input' field as a single string", func() {
+			body := []byte(`{"model": "m", "input": "why is the sky blue?"}`)
+
+			var req OllamaEmbedRequest
+			Expect(json.Unmarshal(body, &req)).To(Succeed())
+
+			Expect(req.Model).To(Equal("m"))
+			Expect(req.GetInputStrings()).To(Equal([]string{"why is the sky blue?"}))
+		})
+
+		It("accepts the 'input' field as an array of strings", func() {
+			body := []byte(`{"model": "m", "input": ["why is the sky blue?", "why is the grass green?"]}`)
+
+			var req OllamaEmbedRequest
+			Expect(json.Unmarshal(body, &req)).To(Succeed())
+
+			Expect(req.GetInputStrings()).To(Equal([]string{"why is the sky blue?", "why is the grass green?"}))
+		})
+
+		// Ollama's embedding endpoint accepts both `input` and `prompt` keys:
+		// https://github.com/ollama/ollama/blob/main/docs/api.md#generate-embeddings
+		// LocalAI must accept `prompt` so client libraries using that key are not broken.
+		// See https://github.com/mudler/LocalAI/issues/9767.
+		It("accepts the 'prompt' field as a single string (Ollama compatibility)", func() {
+			body := []byte(`{"model": "m", "prompt": "why is the sky blue?"}`)
+
+			var req OllamaEmbedRequest
+			Expect(json.Unmarshal(body, &req)).To(Succeed())
+
+			Expect(req.Model).To(Equal("m"))
+			Expect(req.GetInputStrings()).To(Equal([]string{"why is the sky blue?"}))
+		})
+
+		It("accepts the 'prompt' field as an array of strings (Ollama compatibility)", func() {
+			body := []byte(`{"model": "m", "prompt": ["why is the sky blue?", "why is the grass green?"]}`)
+
+			var req OllamaEmbedRequest
+			Expect(json.Unmarshal(body, &req)).To(Succeed())
+
+			Expect(req.GetInputStrings()).To(Equal([]string{"why is the sky blue?", "why is the grass green?"}))
+		})
+
+		It("prefers 'input' when both 'input' and 'prompt' are provided", func() {
+			body := []byte(`{"model": "m", "input": "from input", "prompt": "from prompt"}`)
+
+			var req OllamaEmbedRequest
+			Expect(json.Unmarshal(body, &req)).To(Succeed())
+
+			Expect(req.GetInputStrings()).To(Equal([]string{"from input"}))
+		})
+	})
+})
--- a/docs/content/advanced/model-configuration.md
+++ b/docs/content/advanced/model-configuration.md
@@ -251,18 +251,68 @@ options:

 These are set via the `options:` array in the model configuration (format: `key:value`):

+**Common options**
+
 | Option | Type | Default | Description |
 |--------|------|---------|-------------|
-| `spec_type` | string | `none` | Speculative decoding type (see table below) |
+| `spec_type` / `speculative_type` | string | `none` | Speculative decoding type, or comma-separated list to chain multiple (see table below) |
 | `spec_n_max` / `draft_max` | int | 16 | Maximum number of tokens to draft per step |
 | `spec_n_min` / `draft_min` | int | 0 | Minimum draft tokens required to use speculation |
 | `spec_p_min` / `draft_p_min` | float | 0.75 | Minimum probability threshold for greedy acceptance |
 | `spec_p_split` | float | 0.1 | Split probability for tree-based branching |
+
+**Draft-model options** (apply when `spec_type=draft`, i.e. a `draft_model` is configured)
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `draft_gpu_layers` | int | -1 | GPU layers for the draft model (-1 = use default) |
+| `draft_threads` / `spec_draft_threads` | int | same as main | Threads used by the draft model (`<= 0` = hardware concurrency) |
+| `draft_threads_batch` / `spec_draft_threads_batch` | int | same as `draft_threads` | Threads used by the draft model during batch / prompt processing |
+| `draft_cache_type_k` / `spec_draft_cache_type_k` | string | `f16` | KV cache K data type for the draft model (same values as `cache_type_k`) |
+| `draft_cache_type_v` / `spec_draft_cache_type_v` | string | `f16` | KV cache V data type for the draft model |
+| `draft_cpu_moe` / `spec_draft_cpu_moe` | bool | false | Keep all MoE expert weights of the draft model on CPU |
+| `draft_n_cpu_moe` / `spec_draft_n_cpu_moe` | int | 0 | Keep MoE expert weights of the first N draft-model layers on CPU |
+| `draft_override_tensor` / `spec_draft_override_tensor` | string | "" | Comma-separated `<tensor regex>=<buffer type>` overrides for the draft model |
+| `draft_ctx_size` | int | (ignored) | Deprecated upstream: the draft now shares the target context size. Accepted for backward compatibility but has no effect. |
+
+**`ngram_simple` options** (used when `spec_type` includes `ngram_simple`)
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
 | `spec_ngram_size_n` / `ngram_size_n` | int | 12 | N-gram lookup size |
 | `spec_ngram_size_m` / `ngram_size_m` | int | 48 | M-gram proposal size |
 | `spec_ngram_min_hits` / `ngram_min_hits` | int | 1 | Minimum hits for accepting n-gram proposals |
-| `draft_gpu_layers` | int | -1 | GPU layers for the draft model (-1 = use default) |
-| `draft_ctx_size` | int | 0 | Context size for the draft model (0 = auto) |
+
+**`ngram_mod` options** (used when `spec_type` includes `ngram_mod`)
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `spec_ngram_mod_n_min` | int | 48 | Minimum number of ngram tokens to use |
+| `spec_ngram_mod_n_max` | int | 64 | Maximum number of ngram tokens to use |
+| `spec_ngram_mod_n_match` | int | 24 | Ngram lookup length |
+
+**`ngram_map_k` options** (used when `spec_type` includes `ngram_map_k`)
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `spec_ngram_map_k_size_n` | int | 12 | N-gram lookup size |
+| `spec_ngram_map_k_size_m` | int | 48 | M-gram proposal size |
+| `spec_ngram_map_k_min_hits` | int | 1 | Minimum hits for accepting proposals |
+
+**`ngram_map_k4v` options** (used when `spec_type` includes `ngram_map_k4v`)
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `spec_ngram_map_k4v_size_n` | int | 12 | N-gram lookup size |
+| `spec_ngram_map_k4v_size_m` | int | 48 | M-gram proposal size |
+| `spec_ngram_map_k4v_min_hits` | int | 1 | Minimum hits for accepting proposals |
+
+**`ngram_cache` lookup files**
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `spec_lookup_cache_static` / `lookup_cache_static` | string | "" | Path to a static ngram lookup cache file |
+| `spec_lookup_cache_dynamic` / `lookup_cache_dynamic` | string | "" | Path to a dynamic ngram lookup cache file (updated by generation) |

 #### Speculative Type Values

@@ -277,6 +327,8 @@ These are set via the `options:` array in the model configuration (format: `key:
 | `ngram_mod` | Modified n-gram speculation |
 | `ngram_cache` | 3-level n-gram cache |

+Multiple types can be chained by passing a comma-separated list to `spec_type` (e.g. `spec_type:ngram_simple,ngram_mod`). The runtime tries them in order and accepts the first proposal that meets the acceptance criteria.
+
 {{% notice note %}}
 Speculative decoding is automatically disabled when multimodal models (with `mmproj`) are active. The `n_draft` parameter can also be overridden per-request.
 {{% /notice %}}
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v4.2.0"
+  "version": "v4.2.2"
 }
--- a/go.mod
+++ b/go.mod
@@ -7,7 +7,7 @@ require (
 	fyne.io/fyne/v2 v2.7.3
 	github.com/Masterminds/sprig/v3 v3.3.0
 	github.com/alecthomas/kong v1.14.0
-	github.com/anthropics/anthropic-sdk-go v1.27.0
+	github.com/anthropics/anthropic-sdk-go v1.42.0
 	github.com/aws/aws-sdk-go-v2 v1.41.6
 	github.com/aws/aws-sdk-go-v2/config v1.32.16
 	github.com/aws/aws-sdk-go-v2/credentials v1.19.15
@@ -18,7 +18,7 @@ require (
 	github.com/dhowden/tag v0.0.0-20240417053706-3d75831295e8
 	github.com/ebitengine/purego v0.10.0
 	github.com/emirpasic/gods/v2 v2.0.0-alpha
-	github.com/fsnotify/fsnotify v1.9.0
+	github.com/fsnotify/fsnotify v1.10.1
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/go-llama.cpp v0.0.0-20240314183750-6a8041ef6b46
 	github.com/gofrs/flock v0.13.0
@@ -37,14 +37,14 @@ require (
 	github.com/microcosm-cc/bluemonday v1.0.27
 	github.com/modelcontextprotocol/go-sdk v1.5.0
 	github.com/mudler/cogito v0.9.5-0.20260315222927-63abdec7189b
-	github.com/mudler/edgevpn v0.31.1
+	github.com/mudler/edgevpn v0.32.2
 	github.com/mudler/go-processmanager v0.1.1
 	github.com/mudler/memory v0.0.0-20260406210934-424c1ecf2cf8
 	github.com/mudler/xlog v0.0.6
 	github.com/nats-io/nats.go v1.50.0
 	github.com/ollama/ollama v0.20.4
 	github.com/onsi/ginkgo/v2 v2.28.2
-	github.com/onsi/gomega v1.39.1
+	github.com/onsi/gomega v1.40.0
 	github.com/openai/openai-go/v3 v3.26.0
 	github.com/otiai10/copy v1.14.1
 	github.com/otiai10/openaigo v1.7.0
@@ -95,7 +95,9 @@ require (
 	github.com/bahlo/generic-list-go v0.2.0 // indirect
 	github.com/buger/jsonparser v1.1.2 // indirect
 	github.com/dunglas/httpsfv v1.1.0 // indirect
+	github.com/filecoin-project/go-clock v0.1.0 // indirect
 	github.com/go-jose/go-jose/v4 v4.1.4 // indirect
+	github.com/invopop/jsonschema v0.13.0 // indirect
 	github.com/jinzhu/inflection v1.0.0 // indirect
 	github.com/jinzhu/now v1.1.5 // indirect
 	github.com/jolestar/go-commons-pool/v2 v2.1.2 // indirect
@@ -105,6 +107,7 @@ require (
 	github.com/moby/moby/client v0.4.0 // indirect
 	github.com/nats-io/nkeys v0.4.15 // indirect
 	github.com/nats-io/nuid v1.0.1 // indirect
+	github.com/standard-webhooks/standard-webhooks/libraries v0.0.0-20260508151727-1282bb917829 // indirect
 	github.com/stretchr/testify v1.11.1 // indirect
 	github.com/sv-tools/openapi v0.2.1 // indirect
 	github.com/swaggo/swag/v2 v2.0.0-rc4 // indirect
@@ -243,7 +246,7 @@ require (
 	github.com/jeandeaual/go-locale v0.0.0-20250612000132-0ef82f21eade // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
 	github.com/jsummers/gobmp v0.0.0-20230614200233-a9de23ed2e25 // indirect
-	github.com/libp2p/go-yamux/v5 v5.0.1 // indirect
+	github.com/libp2p/go-yamux/v5 v5.1.0 // indirect
 	github.com/magiconair/properties v1.8.10 // indirect
 	github.com/moby/docker-image-spec v1.3.1 // indirect
 	github.com/moby/go-archive v0.2.0 // indirect
@@ -280,7 +283,7 @@ require (
 	github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
 	github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
 	go.opentelemetry.io/auto/sdk v1.2.1 // indirect
-	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect
+	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0 // indirect
 	go.uber.org/mock v0.5.2 // indirect
 	go.yaml.in/yaml/v2 v2.4.4
 	go.yaml.in/yaml/v3 v3.0.4 // indirect
@@ -315,7 +318,7 @@ require (
 	github.com/creachadair/otp v0.5.0 // indirect
 	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
 	github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c // indirect
-	github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.0 // indirect
+	github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.1 // indirect
 	github.com/dlclark/regexp2 v1.11.5 // indirect
 	github.com/docker/cli v29.4.0+incompatible // indirect
 	github.com/docker/docker v28.5.2+incompatible
@@ -335,7 +338,7 @@ require (
 	github.com/go-openapi/swag v0.23.0 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
-	github.com/golang/snappy v0.0.4 // indirect
+	github.com/golang/snappy v0.0.5-0.20231225225746-43d5d4cd4e0e // indirect
 	github.com/google/btree v1.1.3 // indirect
 	github.com/google/go-cmp v0.7.0 // indirect
 	github.com/google/gopacket v1.1.19 // indirect
@@ -347,10 +350,10 @@ require (
 	github.com/henvic/httpretty v0.1.4 // indirect
 	github.com/huandu/xstrings v1.5.0 // indirect
 	github.com/huin/goupnp v1.3.0 // indirect
-	github.com/ipfs/boxo v0.30.0 // indirect
+	github.com/ipfs/boxo v0.37.0 // indirect
 	github.com/ipfs/go-cid v0.6.1 // indirect
-	github.com/ipfs/go-datastore v0.8.2 // indirect
-	github.com/ipfs/go-log/v2 v2.6.0 // indirect
+	github.com/ipfs/go-datastore v0.9.1 // indirect
+	github.com/ipfs/go-log/v2 v2.9.1 // indirect
 	github.com/ipld/go-ipld-prime v0.23.0 // indirect
 	github.com/jackpal/go-nat-pmp v1.0.2 // indirect
 	github.com/jaypipes/pcidb v1.1.1 // indirect
@@ -361,11 +364,11 @@ require (
 	github.com/koron/go-ssdp v0.0.6 // indirect
 	github.com/libp2p/go-buffer-pool v0.1.0 // indirect
 	github.com/libp2p/go-cidranger v1.1.0 // indirect
-	github.com/libp2p/go-flow-metrics v0.2.0 // indirect
+	github.com/libp2p/go-flow-metrics v0.3.0 // indirect
 	github.com/libp2p/go-libp2p-asn-util v0.4.1 // indirect
-	github.com/libp2p/go-libp2p-kad-dht v0.33.1 // indirect
-	github.com/libp2p/go-libp2p-kbucket v0.7.0 // indirect
-	github.com/libp2p/go-libp2p-pubsub v0.14.2 // indirect
+	github.com/libp2p/go-libp2p-kad-dht v0.39.0 // indirect
+	github.com/libp2p/go-libp2p-kbucket v0.8.0 // indirect
+	github.com/libp2p/go-libp2p-pubsub v0.15.0 // indirect
 	github.com/libp2p/go-libp2p-record v0.3.1 // indirect
 	github.com/libp2p/go-libp2p-routing-helpers v0.7.5 // indirect
 	github.com/libp2p/go-msgio v0.3.0 // indirect
@@ -379,7 +382,7 @@ require (
 	github.com/mattn/go-colorable v0.1.14 // indirect
 	github.com/mattn/go-isatty v0.0.20 // indirect
 	github.com/mattn/go-runewidth v0.0.17 // indirect
-	github.com/miekg/dns v1.1.66 // indirect
+	github.com/miekg/dns v1.1.72 // indirect
 	github.com/mikioh/tcpinfo v0.0.0-20190314235526-30a79bb1804b // indirect
 	github.com/mikioh/tcpopt v0.0.0-20190314235656-172688c1accc // indirect
 	github.com/minio/sha256-simd v1.0.1 // indirect
@@ -397,7 +400,7 @@ require (
 	github.com/multiformats/go-base32 v0.1.0 // indirect
 	github.com/multiformats/go-base36 v0.2.0 // indirect
 	github.com/multiformats/go-multiaddr v0.16.1
-	github.com/multiformats/go-multiaddr-dns v0.4.1 // indirect
+	github.com/multiformats/go-multiaddr-dns v0.5.0 // indirect
 	github.com/multiformats/go-multiaddr-fmt v0.1.0 // indirect
 	github.com/multiformats/go-multibase v0.3.0 // indirect
 	github.com/multiformats/go-multicodec v0.10.0 // indirect
@@ -435,7 +438,7 @@ require (
 	github.com/ulikunitz/xz v0.5.14 // indirect
 	github.com/valyala/bytebufferpool v1.0.0 // indirect
 	github.com/vbatts/tar-split v0.12.2 // indirect
-	github.com/vishvananda/netlink v1.3.0 // indirect
+	github.com/vishvananda/netlink v1.3.1 // indirect
 	github.com/vishvananda/netns v0.0.5 // indirect
 	github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1 // indirect
 	github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
@@ -448,7 +451,7 @@ require (
 	go.uber.org/dig v1.19.0 // indirect
 	go.uber.org/fx v1.24.0 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
-	go.uber.org/zap v1.27.0 // indirect
+	go.uber.org/zap v1.27.1 // indirect
 	golang.org/x/crypto v0.50.0
 	golang.org/x/exp v0.0.0-20260410095643-746e56fc9e2f // indirect
 	golang.org/x/mod v0.35.0 // indirect
@@ -461,7 +464,7 @@ require (
 	golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb // indirect
 	golang.zx2c4.com/wireguard/windows v0.5.3 // indirect
 	gonum.org/v1/gonum v0.17.0 // indirect
-	google.golang.org/genproto/googleapis/rpc v0.0.0-20260120221211-b8f7ae30c516 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409 // indirect
 	gopkg.in/fsnotify.v1 v1.4.7 // indirect
 	gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
 	howett.net/plist v1.0.2-0.20250314012144-ee69052608d9 // indirect
--- a/go.sum
+++ b/go.sum
@@ -100,8 +100,8 @@ github.com/antchfx/xmlquery v1.4.4/go.mod h1:AEPEEPYE9GnA2mj5Ur2L5Q5/2PycJ0N9Fus
 github.com/antchfx/xpath v1.3.3/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
 github.com/antchfx/xpath v1.3.6 h1:s0y+ElRRtTQdfHP609qFu0+c6bglDv20pqOViQjjdPI=
 github.com/antchfx/xpath v1.3.6/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
-github.com/anthropics/anthropic-sdk-go v1.27.0 h1:0CWbmBq5ofGAjF2H6lefCNRbnaUMGiTKO+lb7RLhDbI=
-github.com/anthropics/anthropic-sdk-go v1.27.0/go.mod h1:qUKmaW+uuPB64iy1l+4kOSvaLqPXnHTTBKH6RVZ7q5Q=
+github.com/anthropics/anthropic-sdk-go v1.42.0 h1:Zv882/dnrE4OHnwhMAsi9lwVVXRF8GtR3ofiBResYUw=
+github.com/anthropics/anthropic-sdk-go v1.42.0/go.mod h1:r4eaLX9tBolUrXLOrLj7eU8tmeBtoobCkM0kBsivBaY=
 github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
 github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
 github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=
@@ -279,8 +279,8 @@ github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c h1:pFUpOrbxDR
 github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c/go.mod h1:6UhI8N9EjYm1c2odKpFpAYeR8dsBeM7PtzQhRgxRr9U=
 github.com/decred/dcrd/crypto/blake256 v1.1.0 h1:zPMNGQCm0g4QTY27fOCorQW7EryeQ/U0x++OzVrdms8=
 github.com/decred/dcrd/crypto/blake256 v1.1.0/go.mod h1:2OfgNZ5wDpcsFmHmCK5gZTPcCXqlm2ArzUIkw9czNJo=
-github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.0 h1:NMZiJj8QnKe1LgsbDayM4UoHwbvwDRwnI3hwNaAHRnc=
-github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.0/go.mod h1:ZXNYxsqcloTdSy/rNShjYzMhyjf0LaoftYK0p+A3h40=
+github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.1 h1:5RVFMOWjMyRy8cARdy79nAmgYw3hK/4HUq48LQ6Wwqo=
+github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.1/go.mod h1:ZXNYxsqcloTdSy/rNShjYzMhyjf0LaoftYK0p+A3h40=
 github.com/dhowden/tag v0.0.0-20240417053706-3d75831295e8 h1:OtSeLS5y0Uy01jaKK4mA/WVIYtpzVm63vLVAPzJXigg=
 github.com/dhowden/tag v0.0.0-20240417053706-3d75831295e8/go.mod h1:apkPC/CR3s48O2D7Y++n1XWEpgPNNCjXYga3PPbJe2E=
 github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
@@ -334,6 +334,8 @@ github.com/felixge/fgprof v0.9.3 h1:VvyZxILNuCiUCSXtPtYmmtGvb65nqXh2QFWc0Wpf2/g=
 github.com/felixge/fgprof v0.9.3/go.mod h1:RdbpDgzqYVh/T9fPELJyV7EYJuHB55UTEULNun8eiPw=
 github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
 github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
+github.com/filecoin-project/go-clock v0.1.0 h1:SFbYIM75M8NnFm1yMHhN9Ahy3W5bEZV9gd6MPfXbKVU=
+github.com/filecoin-project/go-clock v0.1.0/go.mod h1:4uB/O4PvOjlx1VCMdZ9MyDZXRm//gkj1ELEbxfI1AZs=
 github.com/flynn/noise v1.1.0 h1:KjPQoQCEFdZDiP03phOvGi11+SVVhBG2wOWAorLsstg=
 github.com/flynn/noise v1.1.0/go.mod h1:xbMo+0i6+IGbYdJhF31t2eR1BIU0CYc12+BNAKwUTag=
 github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw=
@@ -343,8 +345,8 @@ github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7z
 github.com/fredbi/uri v1.1.1 h1:xZHJC08GZNIUhbP5ImTHnt5Ya0T8FI2VAwI/37kh2Ko=
 github.com/fredbi/uri v1.1.1/go.mod h1:4+DZQ5zBjEwQCDmXW5JdIjz0PUA+yJbvtBv+u+adr5o=
 github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
-github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
-github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
+github.com/fsnotify/fsnotify v1.10.1 h1:b0/UzAf9yR5rhf3RPm9gf3ehBPpf0oZKIjtpKrx59Ho=
+github.com/fsnotify/fsnotify v1.10.1/go.mod h1:TLheqan6HD6GBK6PrDWyDPBaEV8LspOxvPSjC+bVfgo=
 github.com/fyne-io/gl-js v0.2.0 h1:+EXMLVEa18EfkXBVKhifYB6OGs3HwKO3lUElA0LlAjs=
 github.com/fyne-io/gl-js v0.2.0/go.mod h1:ZcepK8vmOYLu96JoxbCKJy2ybr+g1pTnaBDdl7c3ajI=
 github.com/fyne-io/glfw-js v0.3.0 h1:d8k2+Y7l+zy2pc7wlGRyPfTgZoqDf3AI4G+2zOWhWUk=
@@ -468,8 +470,8 @@ github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiu
 github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
 github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
 github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
-github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
-github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
+github.com/golang/snappy v0.0.5-0.20231225225746-43d5d4cd4e0e h1:4bw4WeyTYPp0smaXiJZCNnLrvVBqirQVreixayXezGc=
+github.com/golang/snappy v0.0.5-0.20231225225746-43d5d4cd4e0e/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/gomarkdown/markdown v0.0.0-20250311123330-531bef5e742b h1:EY/KpStFl60qA17CptGXhwfZ+k1sFNJIUNR8DdbcuUk=
 github.com/gomarkdown/markdown v0.0.0-20250311123330-531bef5e742b/go.mod h1:JDGcbDT52eL4fju3sZ4TeHGsQwhG9nbDV21aMyhwPoA=
 github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
@@ -576,25 +578,25 @@ github.com/huin/goupnp v1.3.0/go.mod h1:gnGPsThkYa7bFi/KWmEysQRf48l2dvR5bxr2OFck
 github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
 github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
 github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
-github.com/ipfs/boxo v0.30.0 h1:7afsoxPGGqfoH7Dum/wOTGUB9M5fb8HyKPMlLfBvIEQ=
-github.com/ipfs/boxo v0.30.0/go.mod h1:BPqgGGyHB9rZZcPSzah2Dc9C+5Or3U1aQe7EH1H7370=
-github.com/ipfs/go-block-format v0.2.0 h1:ZqrkxBA2ICbDRbK8KJs/u0O3dlp6gmAuuXUJNiW1Ycs=
-github.com/ipfs/go-block-format v0.2.0/go.mod h1:+jpL11nFx5A/SPpsoBn6Bzkra/zaArfSmsknbPMYgzM=
+github.com/invopop/jsonschema v0.13.0 h1:KvpoAJWEjR3uD9Kbm2HWJmqsEaHt8lBUpd0qHcIi21E=
+github.com/invopop/jsonschema v0.13.0/go.mod h1:ffZ5Km5SWWRAIN6wbDXItl95euhFz2uON45H2qjYt+0=
+github.com/ipfs/boxo v0.37.0 h1:2E3mZvydMI2t5IkAgtkmZ3sGsld0oS7o3I+xyzDk6uI=
+github.com/ipfs/boxo v0.37.0/go.mod h1:8yyiRn54F2CsW13n0zwXEPrVsZix/gFj9SYIRYMZ6KE=
+github.com/ipfs/go-block-format v0.2.3 h1:mpCuDaNXJ4wrBJLrtEaGFGXkferrw5eqVvzaHhtFKQk=
+github.com/ipfs/go-block-format v0.2.3/go.mod h1:WJaQmPAKhD3LspLixqlqNFxiZ3BZ3xgqxxoSR/76pnA=
 github.com/ipfs/go-cid v0.6.1 h1:T5TnNb08+ueovG76Z5gx1L4Y7QOaGTXHg1F6raWFxIc=
 github.com/ipfs/go-cid v0.6.1/go.mod h1:zrY0SwOhjrrIdfPQ/kf+k1sXyJ0QE7cMxfCployLBs0=
-github.com/ipfs/go-datastore v0.8.2 h1:Jy3wjqQR6sg/LhyY0NIePZC3Vux19nLtg7dx0TVqr6U=
-github.com/ipfs/go-datastore v0.8.2/go.mod h1:W+pI1NsUsz3tcsAACMtfC+IZdnQTnC/7VfPoJBQuts0=
+github.com/ipfs/go-datastore v0.9.1 h1:67Po2epre/o0UxrmkzdS9ZTe2GFGODgTd2odx8Wh6Yo=
+github.com/ipfs/go-datastore v0.9.1/go.mod h1:zi07Nvrpq1bQwSkEnx3bfjz+SQZbdbWyCNvyxMh9pN0=
 github.com/ipfs/go-detect-race v0.0.1 h1:qX/xay2W3E4Q1U7d9lNs1sU9nvguX0a7319XbyQ6cOk=
 github.com/ipfs/go-detect-race v0.0.1/go.mod h1:8BNT7shDZPo99Q74BpGMK+4D8Mn4j46UU0LZ723meps=
-github.com/ipfs/go-ipfs-util v0.0.3 h1:2RFdGez6bu2ZlZdI+rWfIdbQb1KudQp3VGwPtdNCmE0=
-github.com/ipfs/go-ipfs-util v0.0.3/go.mod h1:LHzG1a0Ig4G+iZ26UUOMjHd+lfM84LZCrn17xAKWBvs=
 github.com/ipfs/go-log v1.0.5 h1:2dOuUCB1Z7uoczMWgAyDck5JLb72zHzrMnGnCNNbvY8=
 github.com/ipfs/go-log v1.0.5/go.mod h1:j0b8ZoR+7+R99LD9jZ6+AJsrzkPbSXbZfGakb5JPtIo=
 github.com/ipfs/go-log/v2 v2.1.3/go.mod h1:/8d0SH3Su5Ooc31QlL1WysJhvyOTDCjcCZ9Axpmri6g=
-github.com/ipfs/go-log/v2 v2.6.0 h1:2Nu1KKQQ2ayonKp4MPo6pXCjqw1ULc9iohRqWV5EYqg=
-github.com/ipfs/go-log/v2 v2.6.0/go.mod h1:p+Efr3qaY5YXpx9TX7MoLCSEZX5boSWj9wh86P5HJa8=
-github.com/ipfs/go-test v0.2.1 h1:/D/a8xZ2JzkYqcVcV/7HYlCnc7bv/pKHQiX5TdClkPE=
-github.com/ipfs/go-test v0.2.1/go.mod h1:dzu+KB9cmWjuJnXFDYJwC25T3j1GcN57byN+ixmK39M=
+github.com/ipfs/go-log/v2 v2.9.1 h1:3JXwHWU31dsCpvQ+7asz6/QsFJHqFr4gLgQ0FWteujk=
+github.com/ipfs/go-log/v2 v2.9.1/go.mod h1:evFx7sBiohUN3AG12mXlZBw5hacBQld3ZPHrowlJYoo=
+github.com/ipfs/go-test v0.2.3 h1:Z/jXNAReQFtCYyn7bsv/ZqUwS6E7iIcSpJ2CuzCvnrc=
+github.com/ipfs/go-test v0.2.3/go.mod h1:QW8vSKkwYvWFwIZQLGQXdkt9Ud76eQXRQ9Ao2H+cA1o=
 github.com/ipld/go-ipld-prime v0.23.0 h1:csqdPZH60BsTC+AZrv7fpa27v+09I/oTqyHYYYE27eE=
 github.com/ipld/go-ipld-prime v0.23.0/go.mod h1:46YCFSFNFBJHPjB0pfMuv7Ly7df2eChpkpyPo5SE0bA=
 github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
@@ -682,18 +684,18 @@ github.com/libp2p/go-buffer-pool v0.1.0 h1:oK4mSFcQz7cTQIfqbe4MIj9gLW+mnanjyFtc6
 github.com/libp2p/go-buffer-pool v0.1.0/go.mod h1:N+vh8gMqimBzdKkSMVuydVDq+UV5QTWy5HSiZacSbPg=
 github.com/libp2p/go-cidranger v1.1.0 h1:ewPN8EZ0dd1LSnrtuwd4709PXVcITVeuwbag38yPW7c=
 github.com/libp2p/go-cidranger v1.1.0/go.mod h1:KWZTfSr+r9qEo9OkI9/SIEeAtw+NNoU0dXIXt15Okic=
-github.com/libp2p/go-flow-metrics v0.2.0 h1:EIZzjmeOE6c8Dav0sNv35vhZxATIXWZg6j/C08XmmDw=
-github.com/libp2p/go-flow-metrics v0.2.0/go.mod h1:st3qqfu8+pMfh+9Mzqb2GTiwrAGjIPszEjZmtksN8Jc=
+github.com/libp2p/go-flow-metrics v0.3.0 h1:q31zcHUvHnwDO0SHaukewPYgwOBSxtt830uJtUx6784=
+github.com/libp2p/go-flow-metrics v0.3.0/go.mod h1:nuhlreIwEguM1IvHAew3ij7A8BMlyHQJ279ao24eZZo=
 github.com/libp2p/go-libp2p v0.48.0 h1:h2BrLAgrj7X8bEN05K7qmrjpNHYA+6tnsGRdprjTnvo=
 github.com/libp2p/go-libp2p v0.48.0/go.mod h1:Q1fBZNdmC2Hf82husCTfkKJVfHm2we5zk+NWmOGEmWk=
 github.com/libp2p/go-libp2p-asn-util v0.4.1 h1:xqL7++IKD9TBFMgnLPZR6/6iYhawHKHl950SO9L6n94=
 github.com/libp2p/go-libp2p-asn-util v0.4.1/go.mod h1:d/NI6XZ9qxw67b4e+NgpQexCIiFYJjErASrYW4PFDN8=
-github.com/libp2p/go-libp2p-kad-dht v0.33.1 h1:hKFhHMf7WH69LDjaxsJUWOU6qZm71uO47M/a5ijkiP0=
-github.com/libp2p/go-libp2p-kad-dht v0.33.1/go.mod h1:CdmNk4VeGJa9EXM9SLNyNVySEvduKvb+5rSC/H4pLAo=
-github.com/libp2p/go-libp2p-kbucket v0.7.0 h1:vYDvRjkyJPeWunQXqcW2Z6E93Ywx7fX0jgzb/dGOKCs=
-github.com/libp2p/go-libp2p-kbucket v0.7.0/go.mod h1:blOINGIj1yiPYlVEX0Rj9QwEkmVnz3EP8LK1dRKBC6g=
-github.com/libp2p/go-libp2p-pubsub v0.14.2 h1:nT5lFHPQOFJcp9CW8hpKtvbpQNdl2udJuzLQWbgRum8=
-github.com/libp2p/go-libp2p-pubsub v0.14.2/go.mod h1:MKPU5vMI8RRFyTP0HfdsF9cLmL1nHAeJm44AxJGJx44=
+github.com/libp2p/go-libp2p-kad-dht v0.39.0 h1:mww38eBYiUvdsu+Xl/GLlBC0Aa8M+5HAwvafkFOygAM=
+github.com/libp2p/go-libp2p-kad-dht v0.39.0/go.mod h1:Po2JugFEkDq9Vig/JXtc153ntOi0q58o4j7IuITCOVs=
+github.com/libp2p/go-libp2p-kbucket v0.8.0 h1:QAK7RzKJpYe+EuSEATAaaHYMYLkPDGC18m9jxPLnU8s=
+github.com/libp2p/go-libp2p-kbucket v0.8.0/go.mod h1:JMlxqcEyKwO6ox716eyC0hmiduSWZZl6JY93mGaaqc4=
+github.com/libp2p/go-libp2p-pubsub v0.15.0 h1:cG7Cng2BT82WttmPFMi50gDNV+58K626m/wR00vGL1o=
+github.com/libp2p/go-libp2p-pubsub v0.15.0/go.mod h1:lr4oE8bFgQaifRcoc2uWhWWiK6tPdOEKpUuR408GFN4=
 github.com/libp2p/go-libp2p-record v0.3.1 h1:cly48Xi5GjNw5Wq+7gmjfBiG9HCzQVkiZOUZ8kUl+Fg=
 github.com/libp2p/go-libp2p-record v0.3.1/go.mod h1:T8itUkLcWQLCYMqtX7Th6r7SexyUJpIyPgks757td/E=
 github.com/libp2p/go-libp2p-routing-helpers v0.7.5 h1:HdwZj9NKovMx0vqq6YNPTh6aaNzey5zHD7HeLJtq6fI=
@@ -706,8 +708,8 @@ github.com/libp2p/go-netroute v0.4.0 h1:sZZx9hyANYUx9PZyqcgE/E1GUG3iEtTZHUEvdtXT
 github.com/libp2p/go-netroute v0.4.0/go.mod h1:Nkd5ShYgSMS5MUKy/MU2T57xFoOKvvLR92Lic48LEyA=
 github.com/libp2p/go-reuseport v0.4.0 h1:nR5KU7hD0WxXCJbmw7r2rhRYruNRl2koHw8fQscQm2s=
 github.com/libp2p/go-reuseport v0.4.0/go.mod h1:ZtI03j/wO5hZVDFo2jKywN6bYKWLOy8Se6DrI2E1cLU=
-github.com/libp2p/go-yamux/v5 v5.0.1 h1:f0WoX/bEF2E8SbE4c/k1Mo+/9z0O4oC/hWEA+nfYRSg=
-github.com/libp2p/go-yamux/v5 v5.0.1/go.mod h1:en+3cdX51U0ZslwRdRLrvQsdayFt3TSUKvBGErzpWbU=
+github.com/libp2p/go-yamux/v5 v5.1.0 h1:8Qlxj4E9JGJAQVW6+uj2o7mqkqsIVlSUGmTWhlXzoHE=
+github.com/libp2p/go-yamux/v5 v5.1.0/go.mod h1:tgIQ07ObtRR/I0IWsFOyQIL9/dR5UXgc2s8xKmNZv1o=
 github.com/libp2p/zeroconf/v2 v2.2.0 h1:Cup06Jv6u81HLhIj1KasuNM/RHHrJ8T7wOTS4+Tv53Q=
 github.com/libp2p/zeroconf/v2 v2.2.0/go.mod h1:fuJqLnUwZTshS3U/bMRJ3+ow/v9oid1n0DmyYyNO1Xs=
 github.com/lithammer/fuzzysearch v1.1.8 h1:/HIuJnjHuXS8bKaiTMeeDlW2/AyIWk2brx1V8LFgLN4=
@@ -752,8 +754,8 @@ github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwX
 github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA=
 github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg=
 github.com/miekg/dns v1.1.43/go.mod h1:+evo5L0630/F6ca/Z9+GAqzhjGyn8/c+TBaOyfEl0V4=
-github.com/miekg/dns v1.1.66 h1:FeZXOS3VCVsKnEAd+wBkjMC3D2K+ww66Cq3VnCINuJE=
-github.com/miekg/dns v1.1.66/go.mod h1:jGFzBsSNbJw6z1HYut1RKBKHA9PBdxeHrZG8J+gC2WE=
+github.com/miekg/dns v1.1.72 h1:vhmr+TF2A3tuoGNkLDFK9zi36F2LS+hKTRW0Uf8kbzI=
+github.com/miekg/dns v1.1.72/go.mod h1:+EuEPhdHOsfk6Wk5TT2CzssZdqkmFhf8r+aVyDEToIs=
 github.com/mikioh/tcp v0.0.0-20190314235350-803a9b46060c h1:bzE/A84HN25pxAuk9Eej1Kz9OUelF97nAc82bDquQI8=
 github.com/mikioh/tcp v0.0.0-20190314235350-803a9b46060c/go.mod h1:0SQS9kMwD2VsyFEB++InYyBJroV/FRmBgcydeSUcJms=
 github.com/mikioh/tcpinfo v0.0.0-20190314235526-30a79bb1804b h1:z78hV3sbSMAUoyUMM0I83AUIT6Hu17AWfgjzIbtrYFc=
@@ -819,8 +821,8 @@ github.com/mudler/LocalAGI v0.0.0-20260508125235-37810d918a87 h1:az+2umaD/sT1rRv
 github.com/mudler/LocalAGI v0.0.0-20260508125235-37810d918a87/go.mod h1:x77p9W1zKZr+W+UcEwg8/qdp00p4XXOI69wE7WlXZc0=
 github.com/mudler/cogito v0.9.5-0.20260315222927-63abdec7189b h1:A74T2Lauvg61KodYqsjTYDY05kPLcW+efVZjd23dghU=
 github.com/mudler/cogito v0.9.5-0.20260315222927-63abdec7189b/go.mod h1:6sfja3lcu2nWRzEc0wwqGNu/eCG3EWgij+8s7xyUeQ4=
-github.com/mudler/edgevpn v0.31.1 h1:7qegiDWd0kAg6ljhNHxqvp8hbo/6BbzSdbb7/2WZfiY=
-github.com/mudler/edgevpn v0.31.1/go.mod h1:ftV5B0nKFzm4R8vR80UYnCb2nf7lxCRgAALxUEEgCf8=
+github.com/mudler/edgevpn v0.32.2 h1:umTPyyZgkom/A81Bk4HbP0p1ZSEU5EFPW3Bg+YPxI8A=
+github.com/mudler/edgevpn v0.32.2/go.mod h1:UaMc8MORbcRsAjuO5gVJj9Bn3Nq2AP5U9NTb6epVyv8=
 github.com/mudler/go-piper v0.0.0-20241023091659-2494246fd9fc h1:RxwneJl1VgvikiX28EkpdAyL4yQVnJMrbquKospjHyA=
 github.com/mudler/go-piper v0.0.0-20241023091659-2494246fd9fc/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
 github.com/mudler/go-processmanager v0.1.1 h1:c/1NRZOZpW8HuFv9RhBG57nQu1oDMRomEHedwBFMlrw=
@@ -846,8 +848,8 @@ github.com/multiformats/go-base36 v0.2.0/go.mod h1:qvnKE++v+2MWCfePClUEjE78Z7P2a
 github.com/multiformats/go-multiaddr v0.1.1/go.mod h1:aMKBKNEYmzmDmxfX88/vz+J5IU55txyt0p4aiWVohjo=
 github.com/multiformats/go-multiaddr v0.16.1 h1:fgJ0Pitow+wWXzN9do+1b8Pyjmo8m5WhGfzpL82MpCw=
 github.com/multiformats/go-multiaddr v0.16.1/go.mod h1:JSVUmXDjsVFiW7RjIFMP7+Ev+h1DTbiJgVeTV/tcmP0=
-github.com/multiformats/go-multiaddr-dns v0.4.1 h1:whi/uCLbDS3mSEUMb1MsoT4uzUeZB0N32yzufqS0i5M=
-github.com/multiformats/go-multiaddr-dns v0.4.1/go.mod h1:7hfthtB4E4pQwirrz+J0CcDUfbWzTqEzVyYKKIKpgkc=
+github.com/multiformats/go-multiaddr-dns v0.5.0 h1:p/FTyHKX0nl59f+S+dEUe8HRK+i5Ow/QHMw8Nh3gPCo=
+github.com/multiformats/go-multiaddr-dns v0.5.0/go.mod h1:yJ349b8TPIAANUyuOzn1oz9o22tV9f+06L+cCeMxC14=
 github.com/multiformats/go-multiaddr-fmt v0.1.0 h1:WLEFClPycPkp4fnIzoFoV9FVd49/eQsuaL3/CWe167E=
 github.com/multiformats/go-multiaddr-fmt v0.1.0/go.mod h1:hGtDIW4PU4BqJ50gW2quDuPVjyWNZxToGUh/HwTZYJo=
 github.com/multiformats/go-multibase v0.3.0 h1:8helZD2+4Db7NNWFiktk2NePbF0boolBe6bDQvM4r68=
@@ -887,8 +889,8 @@ github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
 github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU=
 github.com/onsi/ginkgo/v2 v2.28.2 h1:DTrMfpqxiNUyQ3Y0zhn1n3cOO2euFgQPYIpkWwxVFps=
 github.com/onsi/ginkgo/v2 v2.28.2/go.mod h1:CLtbVInNckU3/+gC8LzkGUb9oF+e8W8TdUsxPwvdOgE=
-github.com/onsi/gomega v1.39.1 h1:1IJLAad4zjPn2PsnhH70V4DKRFlrCzGBNrNaru+Vf28=
-github.com/onsi/gomega v1.39.1/go.mod h1:hL6yVALoTOxeWudERyfppUcZXjMwIMLnuSfruD2lcfg=
+github.com/onsi/gomega v1.40.0 h1:Vtol0e1MghCD2ZVIilPDIg44XSL9l2QAn8ZNaljWcJc=
+github.com/onsi/gomega v1.40.0/go.mod h1:M/Uqpu/8qTjtzCLUA2zJHX9Iilrau25x1PdoSRbWh5A=
 github.com/openai/openai-go/v3 v3.26.0 h1:bRt6H/ozMNt/dDkN4gobnLqaEGrRGBzmbVs0xxJEnQE=
 github.com/openai/openai-go/v3 v3.26.0/go.mod h1:cdufnVK14cWcT9qA1rRtrXx4FTRsgbDPW7Ia7SS5cZo=
 github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
@@ -1071,6 +1073,8 @@ github.com/srwiley/rasterx v0.0.0-20220730225603-2ab79fcdd4ef h1:Ch6Q+AZUxDBCVqd
 github.com/srwiley/rasterx v0.0.0-20220730225603-2ab79fcdd4ef/go.mod h1:nXTWP6+gD5+LUJ8krVhhoeHjvHTutPxMYl5SvkcnJNE=
 github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf h1:pvbZ0lM0XWPBqUKqFU8cmavspvIl9nulOYwdy6IFRRo=
 github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf/go.mod h1:RJID2RhlZKId02nZ62WenDCkgHFerpIOmW0iT7GKmXM=
+github.com/standard-webhooks/standard-webhooks/libraries v0.0.0-20260508151727-1282bb917829 h1:zGlGD0Zfk2HaIo4EnUVBRhnXQ+cnGQz5X2PdBcplOyw=
+github.com/standard-webhooks/standard-webhooks/libraries v0.0.0-20260508151727-1282bb917829/go.mod h1:L1MQhA6x4dn9r007T033lsaZMv9EmBAdXyU/+EF40fo=
 github.com/streamer45/silero-vad-go v0.2.1 h1:Li1/tTC4H/3cyw6q4weX+U8GWwEL3lTekK/nYa1Cvuk=
 github.com/streamer45/silero-vad-go v0.2.1/go.mod h1:B+2FXs/5fZ6pzl6unUZYhZqkYdOB+3saBVzjOzdZnUs=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
@@ -1146,9 +1150,8 @@ github.com/valyala/fasttemplate v1.2.2 h1:lxLXG0uE3Qnshl9QyaK6XJxMXlQZELvChBOCmQ
 github.com/valyala/fasttemplate v1.2.2/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ=
 github.com/vbatts/tar-split v0.12.2 h1:w/Y6tjxpeiFMR47yzZPlPj/FcPLpXbTUi/9H7d3CPa4=
 github.com/vbatts/tar-split v0.12.2/go.mod h1:eF6B6i6ftWQcDqEn3/iGFRFRo8cBIMSJVOpnNdfTMFA=
-github.com/vishvananda/netlink v1.3.0 h1:X7l42GfcV4S6E4vHTsw48qbrV+9PVojNfIhZcwQdrZk=
-github.com/vishvananda/netlink v1.3.0/go.mod h1:i6NetklAujEcC6fK0JPjT8qSwWyO0HLn4UKG+hGqeJs=
-github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
+github.com/vishvananda/netlink v1.3.1 h1:3AEMt62VKqz90r0tmNhog0r/PpWKmrEShJU0wJW6bV0=
+github.com/vishvananda/netlink v1.3.1/go.mod h1:ARtKouGSTGchR8aMwmkzC0qiNPrrWO5JS/XMVl45+b4=
 github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zdEY=
 github.com/vishvananda/netns v0.0.5/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
 github.com/warpfork/go-wish v0.0.0-20220906213052-39a1cc7a02d0 h1:GDDkbFiaK8jsSDJfjId/PEGEShv6ugrt4kYsC5UIDaQ=
@@ -1199,8 +1202,8 @@ go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
 go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
 go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
 go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
-go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus=
-go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0 h1:7iP2uCb7sGddAr30RRS6xjKy7AZ2JtTOPA3oolgVSw8=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0/go.mod h1:c7hN3ddxs/z6q9xwvfLPk+UHlWRQyaeR1LdgfL/66l0=
 go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I=
 go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0=
 go.opentelemetry.io/otel/exporters/prometheus v0.65.0 h1:jOveH/b4lU9HT7y+Gfamf18BqlOuz2PWEvs8yM7Q6XE=
@@ -1232,8 +1235,8 @@ go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN8
 go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA=
 go.uber.org/zap v1.16.0/go.mod h1:MA8QOfq0BHJwdXa996Y4dYkAqRKB8/1K1QMMZVaNZjQ=
 go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo=
-go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8=
-go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
+go.uber.org/zap v1.27.1 h1:08RqriUEv8+ArZRYSTXy1LeBScaMpVSTBhCeaZYfMYc=
+go.uber.org/zap v1.27.1/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
 go.yaml.in/yaml/v2 v2.4.4 h1:tuyd0P+2Ont/d6e2rl3be67goVK4R6deVxCUX5vyPaQ=
 go.yaml.in/yaml/v2 v2.4.4/go.mod h1:gMZqIpDtDqOfM0uNfy0SkpRhvUryYH0Z6wdMYcacYXQ=
 go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
@@ -1641,8 +1644,8 @@ google.golang.org/genproto v0.0.0-20210310155132-4ce2db91004e/go.mod h1:FWY/as6D
 google.golang.org/genproto v0.0.0-20210319143718-93e7006c17a6/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
 google.golang.org/genproto v0.0.0-20210402141018-6c239bbf2bb1/go.mod h1:9lPAdzaEmUacj36I+k7YKbEc5CXzPIeORRgDAUOu28A=
 google.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20260120221211-b8f7ae30c516 h1:sNrWoksmOyF5bvJUcnmbeAmQi8baNhqg5IWaI3llQqU=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20260120221211-b8f7ae30c516/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409 h1:H86B94AW+VfJWDqFeEbBPhEtHzJwJfTbgE2lZa54ZAQ=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ=
 google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
 google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
 google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
--- a/pkg/xsysinfo/gpu.go
+++ b/pkg/xsysinfo/gpu.go
@@ -1,8 +1,10 @@
 package xsysinfo

 import (
+	"bufio"
 	"bytes"
 	"encoding/json"
+	"io"
 	"os"
 	"os/exec"
 	"strconv"
@@ -801,14 +803,15 @@ func GetResourceAggregateInfo() AggregateMemoryInfo {
 	return resourceInfo.Aggregate
 }

-// getVulkanGPUMemory queries GPUs using vulkaninfo as a fallback
-// Note: Vulkan provides memory heap info but not real-time usage
+// getVulkanGPUMemory queries GPUs using vulkaninfo as a fallback.
+// Note: vulkaninfo JSON is a Vulkan Profiles export and does not include
+// VkPhysicalDeviceMemoryProperties, so memory heaps are parsed from text output.
 func getVulkanGPUMemory() []GPUMemoryInfo {
 	if _, err := exec.LookPath("vulkaninfo"); err != nil {
 		return nil
 	}

-	cmd := exec.Command("vulkaninfo", "--json")
+	cmd := exec.Command("vulkaninfo", "--text")

 	var stdout, stderr bytes.Buffer
 	cmd.Stdout = &stdout
@@ -819,60 +822,174 @@ func getVulkanGPUMemory() []GPUMemoryInfo {
 		return nil
 	}

-	// Parse Vulkan JSON output
-	var result struct {
-		VkPhysicalDevices []struct {
-			DeviceName                       string `json:"deviceName"`
-			DeviceType                       string `json:"deviceType"`
-			VkPhysicalDeviceMemoryProperties struct {
-				MemoryHeaps []struct {
-					Flags int    `json:"flags"`
-					Size  uint64 `json:"size"`
-				} `json:"memoryHeaps"`
-			} `json:"VkPhysicalDeviceMemoryProperties"`
-		} `json:"VkPhysicalDevices"`
-	}
+	return parseVulkanGPUMemoryText(strings.NewReader(stdout.String()))

-	if err := json.Unmarshal(stdout.Bytes(), &result); err != nil {
-		xlog.Debug("failed to parse vulkaninfo output", "error", err)
-		return nil
-	}
+}

+type vulkanGPUTextInfo struct {
+	index      int
+	name       string
+	deviceType string
+	totalVRAM  uint64
+}
+
+func parseVulkanGPUMemoryText(r io.Reader) []GPUMemoryInfo {
 	var gpus []GPUMemoryInfo
+	var current *vulkanGPUTextInfo

-	for i, device := range result.VkPhysicalDevices {
-		// Skip non-discrete/integrated GPUs if possible
-		if device.DeviceType == "VK_PHYSICAL_DEVICE_TYPE_CPU" {
-			continue
+	inMemoryProperties := false
+	inMemoryHeaps := false
+	inHeap := false
+	heapSize := uint64(0)
+	heapDeviceLocal := false
+
+	flushHeap := func() {
+		if current != nil && inHeap && heapDeviceLocal {
+			current.totalVRAM += heapSize
 		}
+		heapSize = 0
+		heapDeviceLocal = false
+		inHeap = false
+	}

-		// Sum up device-local memory heaps
-		var totalVRAM uint64
-		for _, heap := range device.VkPhysicalDeviceMemoryProperties.MemoryHeaps {
-			// Flag 1 = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT
-			if heap.Flags&1 != 0 {
-				totalVRAM += heap.Size
-			}
-		}
-
-		if totalVRAM == 0 {
-			continue
+	flushGPU := func() {
+		if current == nil || current.totalVRAM == 0 || current.deviceType == "PHYSICAL_DEVICE_TYPE_CPU" {
+			return
 		}

 		gpus = append(gpus, GPUMemoryInfo{
-			Index:        i,
-			Name:         device.DeviceName,
+			Index:        current.index,
+			Name:         current.name,
 			Vendor:       VendorVulkan,
-			TotalVRAM:    totalVRAM,
-			UsedVRAM:     0, // Vulkan doesn't provide real-time usage
-			FreeVRAM:     totalVRAM,
+			TotalVRAM:    current.totalVRAM,
+			UsedVRAM:     0, // Vulkan heap size is capacity, not real-time usage.
+			FreeVRAM:     current.totalVRAM,
 			UsagePercent: 0,
 		})
 	}

+	scanner := bufio.NewScanner(r)
+	for scanner.Scan() {
+		line := strings.TrimSpace(scanner.Text())
+		if line == "" {
+			continue
+		}
+
+		if index, ok := parseVulkanGPUHeader(line); ok {
+			flushHeap()
+			flushGPU()
+			current = &vulkanGPUTextInfo{index: index}
+			inMemoryProperties = false
+			inMemoryHeaps = false
+			continue
+		}
+
+		if current == nil {
+			continue
+		}
+
+		if strings.HasPrefix(line, "deviceType") {
+			current.deviceType = parseVulkanValue(line)
+			continue
+		}
+
+		if strings.HasPrefix(line, "deviceName") {
+			current.name = parseVulkanValue(line)
+			continue
+		}
+
+		if line == "VkPhysicalDeviceMemoryProperties:" {
+			inMemoryProperties = true
+			inMemoryHeaps = false
+			flushHeap()
+			continue
+		}
+
+		if !inMemoryProperties {
+			continue
+		}
+
+		if strings.HasPrefix(line, "memoryHeaps:") {
+			inMemoryHeaps = true
+			continue
+		}
+
+		if strings.HasPrefix(line, "memoryTypes:") {
+			flushHeap()
+			inMemoryProperties = false
+			inMemoryHeaps = false
+			continue
+		}
+
+		if !inMemoryHeaps {
+			continue
+		}
+
+		if strings.HasPrefix(line, "memoryHeaps[") {
+			flushHeap()
+			inHeap = true
+			continue
+		}
+
+		if !inHeap {
+			continue
+		}
+
+		if strings.HasPrefix(line, "size") {
+			if size, ok := parseVulkanUintValue(line); ok {
+				heapSize = size
+			}
+			continue
+		}
+
+		if strings.Contains(line, "MEMORY_HEAP_DEVICE_LOCAL_BIT") {
+			heapDeviceLocal = true
+		}
+	}
+
+	flushHeap()
+	flushGPU()
+
 	return gpus
 }

+func parseVulkanGPUHeader(line string) (int, bool) {
+	if !strings.HasPrefix(line, "GPU") || !strings.HasSuffix(line, ":") {
+		return 0, false
+	}
+
+	index, err := strconv.Atoi(strings.TrimSuffix(strings.TrimPrefix(line, "GPU"), ":"))
+	if err != nil {
+		return 0, false
+	}
+
+	return index, true
+}
+
+func parseVulkanValue(line string) string {
+	_, value, ok := strings.Cut(line, "=")
+	if !ok {
+		return ""
+	}
+
+	return strings.TrimSpace(value)
+}
+
+func parseVulkanUintValue(line string) (uint64, bool) {
+	value := parseVulkanValue(line)
+	fields := strings.Fields(value)
+	if len(fields) == 0 {
+		return 0, false
+	}
+
+	parsed, err := strconv.ParseUint(fields[0], 0, 64)
+	if err != nil {
+		return 0, false
+	}
+
+	return parsed, true
+}
+
 // getAppleGPUMemory detects Apple Silicon GPUs using system_profiler (macOS only).
 // Apple Silicon uses unified memory, so GPU memory is reported as system RAM.
 func getAppleGPUMemory() []GPUMemoryInfo {