From 733c254b320ebaeeba7d3222438fe022f87b7a30 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 9 May 2026 10:18:17 +0200 Subject: [PATCH] ci: consolidate llama-cpp-darwin into the matrix-driven Darwin flow (#9731) The bespoke llama-cpp-darwin + llama-cpp-darwin-publish top-level jobs in backend.yml ran unconditionally on every backend.yml trigger (push/cron), bypassing the path filter that all 34 other Darwin backends already honor via backend-jobs-darwin -> backend_build_darwin.yml. Move llama-cpp into the includeDarwin matrix: - New entry in .github/backend-matrix.yml (lang=go, no build-type). - backend_build_darwin.yml gains an `if: inputs.backend == 'llama-cpp'` build step that drives `make backends/llama-cpp-darwin`. The bespoke script (scripts/build/llama-cpp-darwin.sh) compiles three CMake variants from backend/cpp/llama-cpp and bundles dylibs via otool, so it doesn't fit the build-darwin-go-backend mold; the existing llama-cpp-aware ccache setup blocks already in this workflow are what motivated the consolidation in the first place. - scripts/changed-backends.js's inferBackendPathDarwin gains a special case so llama-cpp on Darwin maps to backend/cpp/llama-cpp/ (the C++ source tree) rather than the non-existent backend/go/llama-cpp/. - Bumps Darwin go-version from 1.24.x -> 1.25.x in backend.yml and backend_pr.yml so llama-cpp keeps the Go toolchain it had under the bespoke job; the other 34 Darwin backends pick this up too with no known reason to pin 1.24. - Removes ~80 lines of bespoke YAML from backend.yml. The publish path is unchanged in shape - every Darwin backend now uses the same crane-push leg from ubuntu-latest in backend_build_darwin.yml; only the build target differs per backend. After this commit, llama-cpp-darwin only rebuilds when backend/cpp/llama-cpp/ is touched (verified locally) - same behavior as every other Darwin backend. Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Ettore Di Giacinto --- .github/backend-matrix.yml | 3 + .github/workflows/backend.yml | 89 +--------------------- .github/workflows/backend_build_darwin.yml | 11 +++ .github/workflows/backend_pr.yml | 2 +- scripts/changed-backends.js | 6 ++ 5 files changed, 22 insertions(+), 89 deletions(-) diff --git a/.github/backend-matrix.yml b/.github/backend-matrix.yml index fafd8fe28..7a4388e95 100644 --- a/.github/backend-matrix.yml +++ b/.github/backend-matrix.yml @@ -3679,6 +3679,9 @@ includeDarwin: - backend: "mlx-distributed" tag-suffix: "-metal-darwin-arm64-mlx-distributed" build-type: "mps" + - backend: "llama-cpp" + tag-suffix: "-metal-darwin-arm64-llama-cpp" + lang: "go" - backend: "stablediffusion-ggml" tag-suffix: "-metal-darwin-arm64-stablediffusion-ggml" build-type: "metal" diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 8799f00b1..bce6e9d38 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -114,7 +114,7 @@ jobs: with: backend: ${{ matrix.backend }} build-type: ${{ matrix.build-type }} - go-version: "1.24.x" + go-version: "1.25.x" tag-suffix: ${{ matrix.tag-suffix }} lang: ${{ matrix.lang || 'python' }} use-pip: ${{ matrix.backend == 'diffusers' }} @@ -127,90 +127,3 @@ jobs: strategy: fail-fast: false matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix-darwin) }} - - llama-cpp-darwin: - runs-on: macos-latest - strategy: - matrix: - go-version: ['1.25.x'] - steps: - - name: Clone - uses: actions/checkout@v6 - with: - submodules: true - - name: Setup Go ${{ matrix.go-version }} - uses: actions/setup-go@v5 - with: - go-version: ${{ matrix.go-version }} - cache: false - # You can test your matrix by printing the current Go version - - name: Display Go version - run: go version - - name: Dependencies - run: | - brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm - - name: Build llama-cpp-darwin - run: | - make protogen-go - make backends/llama-cpp-darwin - - name: Upload llama-cpp.tar - uses: actions/upload-artifact@v7 - with: - name: llama-cpp-tar - path: backend-images/llama-cpp.tar - llama-cpp-darwin-publish: - needs: llama-cpp-darwin - if: github.event_name != 'pull_request' - runs-on: ubuntu-latest - steps: - - name: Download llama-cpp.tar - uses: actions/download-artifact@v8 - with: - name: llama-cpp-tar - path: . - - name: Install crane - run: | - curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz | tar -xz - sudo mv crane /usr/local/bin/ - - name: Log in to DockerHub - run: | - echo "${{ secrets.DOCKERHUB_PASSWORD }}" | crane auth login docker.io -u "${{ secrets.DOCKERHUB_USERNAME }}" --password-stdin - - name: Log in to quay.io - run: | - echo "${{ secrets.LOCALAI_REGISTRY_PASSWORD }}" | crane auth login quay.io -u "${{ secrets.LOCALAI_REGISTRY_USERNAME }}" --password-stdin - - name: Docker meta - id: meta - uses: docker/metadata-action@v6 - with: - images: | - localai/localai-backends - tags: | - type=ref,event=branch - type=semver,pattern={{raw}} - type=sha - flavor: | - latest=auto - suffix=-metal-darwin-arm64-llama-cpp,onlatest=true - - name: Docker meta - id: quaymeta - uses: docker/metadata-action@v6 - with: - images: | - quay.io/go-skynet/local-ai-backends - tags: | - type=ref,event=branch - type=semver,pattern={{raw}} - type=sha - flavor: | - latest=auto - suffix=-metal-darwin-arm64-llama-cpp,onlatest=true - - name: Push Docker image (DockerHub) - run: | - for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr ',' '\n'); do - crane push llama-cpp.tar $tag - done - - name: Push Docker image (Quay) - run: | - for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do - crane push llama-cpp.tar $tag - done diff --git a/.github/workflows/backend_build_darwin.yml b/.github/workflows/backend_build_darwin.yml index 895fa391f..5a2fe32fd 100644 --- a/.github/workflows/backend_build_darwin.yml +++ b/.github/workflows/backend_build_darwin.yml @@ -175,7 +175,18 @@ jobs: restore-keys: | pyenv-darwin-${{ inputs.backend }}- + # llama-cpp on Darwin uses a bespoke build script (scripts/build/llama-cpp-darwin.sh) + # that compiles three CMake variants from backend/cpp/llama-cpp and bundles dylibs + # via otool — it doesn't fit the build-darwin-go-backend / build-darwin-python-backend + # mold. Drive it via its dedicated `backends/llama-cpp-darwin` make target instead. + - name: Build ${{ inputs.backend }}-darwin (llama-cpp) + if: inputs.backend == 'llama-cpp' + run: | + make protogen-go + make backends/llama-cpp-darwin + - name: Build ${{ inputs.backend }}-darwin + if: inputs.backend != 'llama-cpp' run: | make protogen-go BACKEND=${{ inputs.backend }} BUILD_TYPE=${{ inputs.build-type }} USE_PIP=${{ inputs.use-pip }} make build-darwin-${{ inputs.lang }}-backend diff --git a/.github/workflows/backend_pr.yml b/.github/workflows/backend_pr.yml index 4ca1fa1d9..85bb0a16b 100644 --- a/.github/workflows/backend_pr.yml +++ b/.github/workflows/backend_pr.yml @@ -87,7 +87,7 @@ jobs: with: backend: ${{ matrix.backend }} build-type: ${{ matrix.build-type }} - go-version: "1.24.x" + go-version: "1.25.x" tag-suffix: ${{ matrix.tag-suffix }} lang: ${{ matrix.lang || 'python' }} use-pip: ${{ matrix.backend == 'diffusers' }} diff --git a/scripts/changed-backends.js b/scripts/changed-backends.js index 06ce5bb40..a006ea80f 100644 --- a/scripts/changed-backends.js +++ b/scripts/changed-backends.js @@ -39,6 +39,12 @@ function inferBackendPath(item) { } function inferBackendPathDarwin(item) { + // llama-cpp on Darwin builds from the C++ sources, not a backend/go/llama-cpp + // tree (which doesn't exist). The Darwin job is matrix-driven with lang=go + // for runner/toolchain selection, but the source path is C++. + if (item.backend === "llama-cpp") { + return `backend/cpp/llama-cpp/`; + } if (!item.lang) { return `backend/python/${item.backend}/`; }