ci: consolidate llama-cpp-darwin into the matrix-driven Darwin flow (#9731)

The bespoke llama-cpp-darwin + llama-cpp-darwin-publish top-level jobs in backend.yml ran unconditionally on every backend.yml trigger (push/cron), bypassing the path filter that all 34 other Darwin backends already honor via backend-jobs-darwin -> backend_build_darwin.yml. Move llama-cpp into the includeDarwin matrix: - New entry in .github/backend-matrix.yml (lang=go, no build-type). - backend_build_darwin.yml gains an `if: inputs.backend == 'llama-cpp'` build step that drives `make backends/llama-cpp-darwin`. The bespoke script (scripts/build/llama-cpp-darwin.sh) compiles three CMake variants from backend/cpp/llama-cpp and bundles dylibs via otool, so it doesn't fit the build-darwin-go-backend mold; the existing llama-cpp-aware ccache setup blocks already in this workflow are what motivated the consolidation in the first place. - scripts/changed-backends.js's inferBackendPathDarwin gains a special case so llama-cpp on Darwin maps to backend/cpp/llama-cpp/ (the C++ source tree) rather than the non-existent backend/go/llama-cpp/. - Bumps Darwin go-version from 1.24.x -> 1.25.x in backend.yml and backend_pr.yml so llama-cpp keeps the Go toolchain it had under the bespoke job; the other 34 Darwin backends pick this up too with no known reason to pin 1.24. - Removes ~80 lines of bespoke YAML from backend.yml. The publish path is unchanged in shape - every Darwin backend now uses the same crane-push leg from ubuntu-latest in backend_build_darwin.yml; only the build target differs per backend. After this commit, llama-cpp-darwin only rebuilds when backend/cpp/llama-cpp/ is touched (verified locally) - same behavior as every other Darwin backend. Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-05-16 20:52:08 -04:00 · 2026-05-09 10:18:17 +02:00
parent 4542833cb4
commit 733c254b32
5 changed files with 22 additions and 89 deletions
--- a/.github/backend-matrix.yml
+++ b/.github/backend-matrix.yml
@@ -3679,6 +3679,9 @@ includeDarwin:
  - backend: "mlx-distributed"
    tag-suffix: "-metal-darwin-arm64-mlx-distributed"
    build-type: "mps"
+  - backend: "llama-cpp"
+    tag-suffix: "-metal-darwin-arm64-llama-cpp"
+    lang: "go"
  - backend: "stablediffusion-ggml"
    tag-suffix: "-metal-darwin-arm64-stablediffusion-ggml"
    build-type: "metal"
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -114,7 +114,7 @@ jobs:
    with:
      backend: ${{ matrix.backend }}
      build-type: ${{ matrix.build-type }}
-      go-version: "1.24.x"
+      go-version: "1.25.x"
      tag-suffix: ${{ matrix.tag-suffix }}
      lang: ${{ matrix.lang || 'python' }}
      use-pip: ${{ matrix.backend == 'diffusers' }}
@@ -127,90 +127,3 @@ jobs:
    strategy:
      fail-fast: false
      matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix-darwin) }}
-
-  llama-cpp-darwin:
-    runs-on: macos-latest
-    strategy:
-      matrix:
-        go-version: ['1.25.x']
-    steps:
-      - name: Clone
-        uses: actions/checkout@v6
-        with:
-          submodules: true
-      - name: Setup Go ${{ matrix.go-version }}
-        uses: actions/setup-go@v5
-        with:
-          go-version: ${{ matrix.go-version }}
-          cache: false
-      # You can test your matrix by printing the current Go version
-      - name: Display Go version
-        run: go version
-      - name: Dependencies
-        run: |
-          brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
-      - name: Build llama-cpp-darwin
-        run: |
-          make protogen-go
-          make backends/llama-cpp-darwin
-      - name: Upload llama-cpp.tar
-        uses: actions/upload-artifact@v7
-        with:
-          name: llama-cpp-tar
-          path: backend-images/llama-cpp.tar
-  llama-cpp-darwin-publish:
-    needs: llama-cpp-darwin
-    if: github.event_name != 'pull_request'
-    runs-on: ubuntu-latest
-    steps:
-      - name: Download llama-cpp.tar
-        uses: actions/download-artifact@v8
-        with:
-          name: llama-cpp-tar
-          path: .
-      - name: Install crane
-        run: |
-          curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz | tar -xz
-          sudo mv crane /usr/local/bin/
-      - name: Log in to DockerHub
-        run: |
-          echo "${{ secrets.DOCKERHUB_PASSWORD }}" | crane auth login docker.io -u "${{ secrets.DOCKERHUB_USERNAME }}" --password-stdin
-      - name: Log in to quay.io
-        run: |
-          echo "${{ secrets.LOCALAI_REGISTRY_PASSWORD }}" | crane auth login quay.io -u "${{ secrets.LOCALAI_REGISTRY_USERNAME }}" --password-stdin
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@v6
-        with:
-          images: |
-            localai/localai-backends
-          tags: |
-            type=ref,event=branch
-            type=semver,pattern={{raw}}
-            type=sha
-          flavor: |
-            latest=auto
-            suffix=-metal-darwin-arm64-llama-cpp,onlatest=true
-      - name: Docker meta
-        id: quaymeta
-        uses: docker/metadata-action@v6
-        with:
-          images: |
-            quay.io/go-skynet/local-ai-backends
-          tags: |
-            type=ref,event=branch
-            type=semver,pattern={{raw}}
-            type=sha
-          flavor: |
-            latest=auto
-            suffix=-metal-darwin-arm64-llama-cpp,onlatest=true
-      - name: Push Docker image (DockerHub)
-        run: |
-          for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr ',' '\n'); do
-            crane push llama-cpp.tar $tag
-          done
-      - name: Push Docker image (Quay)
-        run: |
-          for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do
-            crane push llama-cpp.tar $tag
-          done
--- a/.github/workflows/backend_build_darwin.yml
+++ b/.github/workflows/backend_build_darwin.yml
@@ -175,7 +175,18 @@ jobs:
          restore-keys: |
            pyenv-darwin-${{ inputs.backend }}-

+      # llama-cpp on Darwin uses a bespoke build script (scripts/build/llama-cpp-darwin.sh)
+      # that compiles three CMake variants from backend/cpp/llama-cpp and bundles dylibs
+      # via otool — it doesn't fit the build-darwin-go-backend / build-darwin-python-backend
+      # mold. Drive it via its dedicated `backends/llama-cpp-darwin` make target instead.
+      - name: Build ${{ inputs.backend }}-darwin (llama-cpp)
+        if: inputs.backend == 'llama-cpp'
+        run: |
+          make protogen-go
+          make backends/llama-cpp-darwin
+
      - name: Build ${{ inputs.backend }}-darwin
+        if: inputs.backend != 'llama-cpp'
        run: |
          make protogen-go
          BACKEND=${{ inputs.backend }} BUILD_TYPE=${{ inputs.build-type }} USE_PIP=${{ inputs.use-pip }} make build-darwin-${{ inputs.lang }}-backend
--- a/.github/workflows/backend_pr.yml
+++ b/.github/workflows/backend_pr.yml
@@ -87,7 +87,7 @@ jobs:
    with:
      backend: ${{ matrix.backend }}
      build-type: ${{ matrix.build-type }}
-      go-version: "1.24.x"
+      go-version: "1.25.x"
      tag-suffix: ${{ matrix.tag-suffix }}
      lang: ${{ matrix.lang || 'python' }}
      use-pip: ${{ matrix.backend == 'diffusers' }}