From 733c254b320ebaeeba7d3222438fe022f87b7a30 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 9 May 2026 10:18:17 +0200
Subject: [PATCH] ci: consolidate llama-cpp-darwin into the matrix-driven
 Darwin flow (#9731)

The bespoke llama-cpp-darwin + llama-cpp-darwin-publish top-level jobs
in backend.yml ran unconditionally on every backend.yml trigger
(push/cron), bypassing the path filter that all 34 other Darwin
backends already honor via backend-jobs-darwin -> backend_build_darwin.yml.

Move llama-cpp into the includeDarwin matrix:
- New entry in .github/backend-matrix.yml (lang=go, no build-type).
- backend_build_darwin.yml gains an `if: inputs.backend == 'llama-cpp'`
  build step that drives `make backends/llama-cpp-darwin`. The bespoke
  script (scripts/build/llama-cpp-darwin.sh) compiles three CMake
  variants from backend/cpp/llama-cpp and bundles dylibs via otool, so
  it doesn't fit the build-darwin-go-backend mold; the existing
  llama-cpp-aware ccache setup blocks already in this workflow are
  what motivated the consolidation in the first place.
- scripts/changed-backends.js's inferBackendPathDarwin gains a special
  case so llama-cpp on Darwin maps to backend/cpp/llama-cpp/ (the C++
  source tree) rather than the non-existent backend/go/llama-cpp/.
- Bumps Darwin go-version from 1.24.x -> 1.25.x in backend.yml and
  backend_pr.yml so llama-cpp keeps the Go toolchain it had under the
  bespoke job; the other 34 Darwin backends pick this up too with no
  known reason to pin 1.24.
- Removes ~80 lines of bespoke YAML from backend.yml.

The publish path is unchanged in shape - every Darwin backend now uses
the same crane-push leg from ubuntu-latest in
backend_build_darwin.yml; only the build target differs per backend.

After this commit, llama-cpp-darwin only rebuilds when
backend/cpp/llama-cpp/ is touched (verified locally) - same behavior
as every other Darwin backend.

Assisted-by: Claude:claude-opus-4-7

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/backend-matrix.yml                 |  3 +
 .github/workflows/backend.yml              | 89 +---------------------
 .github/workflows/backend_build_darwin.yml | 11 +++
 .github/workflows/backend_pr.yml           |  2 +-
 scripts/changed-backends.js                |  6 ++
 5 files changed, 22 insertions(+), 89 deletions(-)

diff --git a/.github/backend-matrix.yml b/.github/backend-matrix.yml
index fafd8fe28..7a4388e95 100644
--- a/.github/backend-matrix.yml
+++ b/.github/backend-matrix.yml
@@ -3679,6 +3679,9 @@ includeDarwin:
   - backend: "mlx-distributed"
     tag-suffix: "-metal-darwin-arm64-mlx-distributed"
     build-type: "mps"
+  - backend: "llama-cpp"
+    tag-suffix: "-metal-darwin-arm64-llama-cpp"
+    lang: "go"
   - backend: "stablediffusion-ggml"
     tag-suffix: "-metal-darwin-arm64-stablediffusion-ggml"
     build-type: "metal"
diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index 8799f00b1..bce6e9d38 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -114,7 +114,7 @@ jobs:
     with:
       backend: ${{ matrix.backend }}
       build-type: ${{ matrix.build-type }}
-      go-version: "1.24.x"
+      go-version: "1.25.x"
       tag-suffix: ${{ matrix.tag-suffix }}
       lang: ${{ matrix.lang || 'python' }}
       use-pip: ${{ matrix.backend == 'diffusers' }}
@@ -127,90 +127,3 @@ jobs:
     strategy:
       fail-fast: false
       matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix-darwin) }}
-
-  llama-cpp-darwin:
-    runs-on: macos-latest
-    strategy:
-      matrix:
-        go-version: ['1.25.x']
-    steps:
-      - name: Clone
-        uses: actions/checkout@v6
-        with:
-          submodules: true
-      - name: Setup Go ${{ matrix.go-version }}
-        uses: actions/setup-go@v5
-        with:
-          go-version: ${{ matrix.go-version }}
-          cache: false
-      # You can test your matrix by printing the current Go version
-      - name: Display Go version
-        run: go version
-      - name: Dependencies
-        run: |
-          brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
-      - name: Build llama-cpp-darwin
-        run: |
-          make protogen-go
-          make backends/llama-cpp-darwin
-      - name: Upload llama-cpp.tar
-        uses: actions/upload-artifact@v7
-        with:
-          name: llama-cpp-tar
-          path: backend-images/llama-cpp.tar
-  llama-cpp-darwin-publish:
-    needs: llama-cpp-darwin
-    if: github.event_name != 'pull_request'
-    runs-on: ubuntu-latest
-    steps:
-      - name: Download llama-cpp.tar
-        uses: actions/download-artifact@v8
-        with:
-          name: llama-cpp-tar
-          path: .
-      - name: Install crane
-        run: |
-          curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz | tar -xz
-          sudo mv crane /usr/local/bin/
-      - name: Log in to DockerHub
-        run: |
-          echo "${{ secrets.DOCKERHUB_PASSWORD }}" | crane auth login docker.io -u "${{ secrets.DOCKERHUB_USERNAME }}" --password-stdin
-      - name: Log in to quay.io
-        run: |
-          echo "${{ secrets.LOCALAI_REGISTRY_PASSWORD }}" | crane auth login quay.io -u "${{ secrets.LOCALAI_REGISTRY_USERNAME }}" --password-stdin
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@v6
-        with:
-          images: |
-            localai/localai-backends
-          tags: |
-            type=ref,event=branch
-            type=semver,pattern={{raw}}
-            type=sha
-          flavor: |
-            latest=auto
-            suffix=-metal-darwin-arm64-llama-cpp,onlatest=true
-      - name: Docker meta
-        id: quaymeta
-        uses: docker/metadata-action@v6
-        with:
-          images: |
-            quay.io/go-skynet/local-ai-backends
-          tags: |
-            type=ref,event=branch
-            type=semver,pattern={{raw}}
-            type=sha
-          flavor: |
-            latest=auto
-            suffix=-metal-darwin-arm64-llama-cpp,onlatest=true
-      - name: Push Docker image (DockerHub)
-        run: |
-          for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr ',' '\n'); do
-            crane push llama-cpp.tar $tag
-          done
-      - name: Push Docker image (Quay)
-        run: |
-          for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do
-            crane push llama-cpp.tar $tag
-          done
diff --git a/.github/workflows/backend_build_darwin.yml b/.github/workflows/backend_build_darwin.yml
index 895fa391f..5a2fe32fd 100644
--- a/.github/workflows/backend_build_darwin.yml
+++ b/.github/workflows/backend_build_darwin.yml
@@ -175,7 +175,18 @@ jobs:
           restore-keys: |
             pyenv-darwin-${{ inputs.backend }}-
 
+      # llama-cpp on Darwin uses a bespoke build script (scripts/build/llama-cpp-darwin.sh)
+      # that compiles three CMake variants from backend/cpp/llama-cpp and bundles dylibs
+      # via otool — it doesn't fit the build-darwin-go-backend / build-darwin-python-backend
+      # mold. Drive it via its dedicated `backends/llama-cpp-darwin` make target instead.
+      - name: Build ${{ inputs.backend }}-darwin (llama-cpp)
+        if: inputs.backend == 'llama-cpp'
+        run: |
+          make protogen-go
+          make backends/llama-cpp-darwin
+
       - name: Build ${{ inputs.backend }}-darwin
+        if: inputs.backend != 'llama-cpp'
         run: |
           make protogen-go
           BACKEND=${{ inputs.backend }} BUILD_TYPE=${{ inputs.build-type }} USE_PIP=${{ inputs.use-pip }} make build-darwin-${{ inputs.lang }}-backend
diff --git a/.github/workflows/backend_pr.yml b/.github/workflows/backend_pr.yml
index 4ca1fa1d9..85bb0a16b 100644
--- a/.github/workflows/backend_pr.yml
+++ b/.github/workflows/backend_pr.yml
@@ -87,7 +87,7 @@ jobs:
     with:
       backend: ${{ matrix.backend }}
       build-type: ${{ matrix.build-type }}
-      go-version: "1.24.x"
+      go-version: "1.25.x"
       tag-suffix: ${{ matrix.tag-suffix }}
       lang: ${{ matrix.lang || 'python' }}
       use-pip: ${{ matrix.backend == 'diffusers' }}
diff --git a/scripts/changed-backends.js b/scripts/changed-backends.js
index 06ce5bb40..a006ea80f 100644
--- a/scripts/changed-backends.js
+++ b/scripts/changed-backends.js
@@ -39,6 +39,12 @@ function inferBackendPath(item) {
 }
 
 function inferBackendPathDarwin(item) {
+  // llama-cpp on Darwin builds from the C++ sources, not a backend/go/llama-cpp
+  // tree (which doesn't exist). The Darwin job is matrix-driven with lang=go
+  // for runner/toolchain selection, but the source path is C++.
+  if (item.backend === "llama-cpp") {
+    return `backend/cpp/llama-cpp/`;
+  }
   if (!item.lang) {
     return `backend/python/${item.backend}/`;
   }