LocalAI/.github/workflows/backend_build_darwin.yml

---
name: 'build darwin python backend container images (reusable)'

on:
  workflow_call:
    inputs:
      backend:
        description: 'Backend to build'
        required: true
        type: string
      build-type:
        description: 'Build type (e.g., mps)'
        default: ''
        type: string
      use-pip:
        description: 'Use pip to install dependencies'
        default: false
        type: boolean
      lang:
        description: 'Programming language (e.g. go)'
        default: 'python'
        type: string
      go-version:
        description: 'Go version to use'
        default: '1.24.x'
        type: string
      tag-suffix:
        description: 'Tag suffix for the built image'
        required: true
        type: string
      runs-on:
        description: 'Runner to use'
        default: 'macOS-14'
        type: string
    secrets:
      dockerUsername:
        required: false
      dockerPassword:
        required: false
      quayUsername:
        required: true
      quayPassword:
        required: true

jobs:
  darwin-backend-build:
    runs-on: ${{ inputs.runs-on }}
    strategy:
      matrix:
        go-version: ['${{ inputs.go-version }}']
    env:
      # Keep the brew Cellar stable across cache restores. Without these,
      # `brew install` would auto-update brew itself and re-link formulas,
      # mutating the very paths the cache just restored.
      HOMEBREW_NO_AUTO_UPDATE: '1'
      HOMEBREW_NO_INSTALL_CLEANUP: '1'
      HOMEBREW_NO_ANALYTICS: '1'
    steps:
      - name: Clone
        uses: actions/checkout@v6
        with:
          submodules: true

      - name: Setup Go ${{ matrix.go-version }}
        uses: actions/setup-go@v5
        with:
          go-version: ${{ matrix.go-version }}
          # Caches ~/go/pkg/mod and ~/Library/Caches/go-build keyed on go.sum.
          # Shared across every darwin matrix entry — first job in a run warms
          # it, the rest hit warm.
          cache: true

      # You can test your matrix by printing the current Go version
      - name: Display Go version
        run: go version

      # ---- Homebrew cache ----
      # macOS runners have no Docker daemon, so the BuildKit registry cache used
      # for Linux backend images (see .agents/ci-caching.md) doesn't apply here.
      # We cache the brew downloads + Cellar entries for the formulas we install
      # below. Read on every run, write only on master/tag pushes — same policy
      # as the Linux registry cache.
      - name: Restore Homebrew cache
        id: brew-cache
        uses: actions/cache/restore@v4
        with:
          path: |
            ~/Library/Caches/Homebrew/downloads
            /opt/homebrew/Cellar/protobuf
            /opt/homebrew/Cellar/grpc
            /opt/homebrew/Cellar/protoc-gen-go
            /opt/homebrew/Cellar/protoc-gen-go-grpc
            /opt/homebrew/Cellar/libomp
            /opt/homebrew/Cellar/llvm
            /opt/homebrew/Cellar/ccache
            /opt/homebrew/Cellar/blake3
            /opt/homebrew/Cellar/fmt
            /opt/homebrew/Cellar/hiredis
            /opt/homebrew/Cellar/xxhash
            /opt/homebrew/Cellar/zstd
            /opt/homebrew/Cellar/nlohmann-json
          key: brew-${{ runner.os }}-${{ runner.arch }}-v1-${{ hashFiles('.github/workflows/backend_build_darwin.yml') }}

      - name: Dependencies
        run: |
          # ccache is always installed (used by the llama-cpp variant build) so
          # the brew cache content stays stable across every backend in the
          # matrix — they all share one cache key.
          # blake3, fmt, hiredis, xxhash, zstd are ccache's runtime dylib deps.
          # Without explicitly installing them, a brew cache-hit run restores
          # ccache's Cellar dir but skips installing those transitive deps,
          # and ccache fails at runtime with `dyld: Library not loaded`.
          # nlohmann-json is header-only and required by the ds4 backend
          # (dsml_renderer.cpp includes <nlohmann/json.hpp>); on Linux it comes
          # from the apt-installed nlohmann-json3-dev in the build image.
          brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm ccache blake3 fmt hiredis xxhash zstd nlohmann-json
          # Force-reinstall ccache so brew re-validates its full runtime-dep
          # closure on every run. This is the durable fix: when the upstream
          # ccache formula gains a new transitive dep (as it has multiple times
          # already), we don't have to chase missing dylibs one at a time.
          # The downloads cache makes the reinstall fast (~5s on a hit).
          brew reinstall ccache
          # Same pattern for grpc: its CMake config (used by the llama-cpp
          # `grpc-server` target) does find_package(absl). The cache restores
          # /opt/homebrew/Cellar/grpc so brew above no-ops the install, but
          # abseil isn't in our Cellar cache list and never gets installed
          # alongside, leaving grpc's CMake unable to resolve it. Reinstalling
          # grpc re-validates and pulls abseil in, mirroring the ccache fix.
          brew reinstall grpc
          # The brew cache restores the Cellar dirs but NOT the bin symlinks
          # at /opt/homebrew/bin/*. brew install above sees the Cellar present
          # and decides "already installed" without re-linking, so on a cache-
          # hit run the formulas aren't on PATH. Force-link them; --overwrite
          # tolerates pre-existing symlinks from earlier installs.
          brew link --overwrite protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm ccache blake3 fmt hiredis xxhash zstd nlohmann-json 2>/dev/null || true

      - name: Save Homebrew cache
        if: github.event_name != 'pull_request' && steps.brew-cache.outputs.cache-hit != 'true'
        uses: actions/cache/save@v4
        with:
          path: |
            ~/Library/Caches/Homebrew/downloads
            /opt/homebrew/Cellar/protobuf
            /opt/homebrew/Cellar/grpc
            /opt/homebrew/Cellar/protoc-gen-go
            /opt/homebrew/Cellar/protoc-gen-go-grpc
            /opt/homebrew/Cellar/libomp
            /opt/homebrew/Cellar/llvm
            /opt/homebrew/Cellar/ccache
            /opt/homebrew/Cellar/blake3
            /opt/homebrew/Cellar/fmt
            /opt/homebrew/Cellar/hiredis
            /opt/homebrew/Cellar/xxhash
            /opt/homebrew/Cellar/zstd
            /opt/homebrew/Cellar/nlohmann-json
          key: brew-${{ runner.os }}-${{ runner.arch }}-v1-${{ hashFiles('.github/workflows/backend_build_darwin.yml') }}

      # ---- ccache for llama.cpp CMake builds ----
      # Three CMake variants (fallback, grpc, rpc-server) compile the same
      # llama.cpp source tree with overlapping flags — ccache dedupes object
      # files across them. Key on the pinned LLAMA_VERSION so a pin bump
      # invalidates cleanly; restore-keys fall back to the latest entry for the
      # same pin so unchanged TUs stay warm even when the cache is fresh.
      - name: Compute llama.cpp version
        if: inputs.backend == 'llama-cpp'
        id: llama-version
        run: |
          version=$(grep '^LLAMA_VERSION' backend/cpp/llama-cpp/Makefile | head -1 | cut -d= -f2 | cut -d'?' -f1 | tr -d ' ')
          echo "version=${version}" >> "$GITHUB_OUTPUT"

      - name: Restore ccache
        if: inputs.backend == 'llama-cpp'
        id: ccache-cache
        uses: actions/cache/restore@v4
        with:
          path: ~/Library/Caches/ccache
          key: ccache-llama-${{ runner.arch }}-${{ steps.llama-version.outputs.version }}-${{ github.run_id }}
          restore-keys: |
            ccache-llama-${{ runner.arch }}-${{ steps.llama-version.outputs.version }}-

      - name: Configure ccache
        if: inputs.backend == 'llama-cpp'
        run: |
          mkdir -p "$HOME/Library/Caches/ccache"
          ccache -M 2G
          ccache -z
          # llama-cpp-darwin.sh reads CMAKE_ARGS / CCACHE_DIR from env.
          {
            echo "CMAKE_ARGS=${CMAKE_ARGS:-} -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache"
            echo "CCACHE_DIR=$HOME/Library/Caches/ccache"
          } >> "$GITHUB_ENV"

      # ---- Python wheel cache (uv + pip) ----
      # Mirrors the Linux DEPS_REFRESH cadence (see .agents/ci-caching.md): the
      # ISO-week segment of the cache key forces at most one cold rebuild per
      # backend per week, automatically picking up newer wheels for unpinned
      # deps (torch, mlx, diffusers, …). Restore-keys fall back to the most
      # recent build of the same backend so off-week PRs still hit warm.
      - name: Compute weekly cache bucket
        if: inputs.lang == 'python'
        id: weekly
        run: echo "bucket=$(date -u +%Y-W%V)" >> "$GITHUB_OUTPUT"

      - name: Restore Python wheel cache
        if: inputs.lang == 'python'
        id: pyenv-cache
        uses: actions/cache/restore@v4
        with:
          path: |
            ~/Library/Caches/pip
            ~/Library/Caches/uv
          key: pyenv-darwin-${{ inputs.backend }}-${{ steps.weekly.outputs.bucket }}-${{ hashFiles(format('backend/python/{0}/requirements*.txt', inputs.backend)) }}
          restore-keys: |
            pyenv-darwin-${{ inputs.backend }}-

      # llama-cpp on Darwin uses a bespoke build script (scripts/build/llama-cpp-darwin.sh)
      # that compiles three CMake variants from backend/cpp/llama-cpp and bundles dylibs
      # via otool — it doesn't fit the build-darwin-go-backend / build-darwin-python-backend
      # mold. Drive it via its dedicated `backends/llama-cpp-darwin` make target instead.
      - name: Build ${{ inputs.backend }}-darwin (llama-cpp)
        if: inputs.backend == 'llama-cpp'
        run: |
          make protogen-go
          make backends/llama-cpp-darwin

      - name: Build ds4 backend (Darwin Metal)
        if: inputs.backend == 'ds4'
        run: |
          make backends/ds4-darwin

      - name: Build ${{ inputs.backend }}-darwin
        if: inputs.backend != 'llama-cpp' && inputs.backend != 'ds4'
        run: |
          make protogen-go
          BACKEND=${{ inputs.backend }} BUILD_TYPE=${{ inputs.build-type }} USE_PIP=${{ inputs.use-pip }} make build-darwin-${{ inputs.lang }}-backend

      - name: ccache stats
        if: inputs.backend == 'llama-cpp'
        run: ccache -s

      - name: Save ccache
        if: inputs.backend == 'llama-cpp' && github.event_name != 'pull_request'
        uses: actions/cache/save@v4
        with:
          path: ~/Library/Caches/ccache
          key: ccache-llama-${{ runner.arch }}-${{ steps.llama-version.outputs.version }}-${{ github.run_id }}

      - name: Save Python wheel cache
        if: inputs.lang == 'python' && github.event_name != 'pull_request' && steps.pyenv-cache.outputs.cache-hit != 'true'
        uses: actions/cache/save@v4
        with:
          path: |
            ~/Library/Caches/pip
            ~/Library/Caches/uv
          key: pyenv-darwin-${{ inputs.backend }}-${{ steps.weekly.outputs.bucket }}-${{ hashFiles(format('backend/python/{0}/requirements*.txt', inputs.backend)) }}

      - name: Upload ${{ inputs.backend }}.tar
        uses: actions/upload-artifact@v7
        with:
          name: ${{ inputs.backend }}-tar
          path: backend-images/${{ inputs.backend }}.tar

  darwin-backend-publish:
    needs: darwin-backend-build
    if: github.event_name != 'pull_request'
    runs-on: ubuntu-latest
    steps:
      - name: Download ${{ inputs.backend }}.tar
        uses: actions/download-artifact@v8
        with:
          name: ${{ inputs.backend }}-tar
          path: .

      - name: Install crane
        run: |
          curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz | tar -xz
          sudo mv crane /usr/local/bin/

      - name: Log in to DockerHub
        run: |
          echo "${{ secrets.dockerPassword }}" | crane auth login docker.io -u "${{ secrets.dockerUsername }}" --password-stdin

      - name: Log in to quay.io
        run: |
          echo "${{ secrets.quayPassword }}" | crane auth login quay.io -u "${{ secrets.quayUsername }}" --password-stdin

      - name: Docker meta
        id: meta
        uses: docker/metadata-action@v6
        with:
          images: |
            localai/localai-backends
          tags: |
            type=ref,event=branch
            type=semver,pattern={{raw}}
            type=sha
          flavor: |
            latest=auto
            suffix=${{ inputs.tag-suffix }},onlatest=true

      - name: Docker meta
        id: quaymeta
        uses: docker/metadata-action@v6
        with:
          images: |
            quay.io/go-skynet/local-ai-backends
          tags: |
            type=ref,event=branch
            type=semver,pattern={{raw}}
            type=sha
          flavor: |
            latest=auto
            suffix=${{ inputs.tag-suffix }},onlatest=true

      - name: Push Docker image (DockerHub)
        run: |
          for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr ',' '\n'); do
            crane push ${{ inputs.backend }}.tar $tag
          done

      - name: Push Docker image (Quay)
        run: |
          for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do
            crane push ${{ inputs.backend }}.tar $tag
          done