--- name: 'build darwin python backend container images (reusable)' on: workflow_call: inputs: backend: description: 'Backend to build' required: true type: string build-type: description: 'Build type (e.g., mps)' default: '' type: string use-pip: description: 'Use pip to install dependencies' default: false type: boolean lang: description: 'Programming language (e.g. go)' default: 'python' type: string go-version: description: 'Go version to use' default: '1.24.x' type: string tag-suffix: description: 'Tag suffix for the built image' required: true type: string runs-on: description: 'Runner to use' default: 'macOS-14' type: string secrets: dockerUsername: required: false dockerPassword: required: false quayUsername: required: true quayPassword: required: true jobs: darwin-backend-build: runs-on: ${{ inputs.runs-on }} strategy: matrix: go-version: ['${{ inputs.go-version }}'] env: # Keep the brew Cellar stable across cache restores. Without these, # `brew install` would auto-update brew itself and re-link formulas, # mutating the very paths the cache just restored. HOMEBREW_NO_AUTO_UPDATE: '1' HOMEBREW_NO_INSTALL_CLEANUP: '1' HOMEBREW_NO_ANALYTICS: '1' steps: - name: Clone uses: actions/checkout@v6 with: submodules: true - name: Setup Go ${{ matrix.go-version }} uses: actions/setup-go@v5 with: go-version: ${{ matrix.go-version }} # Caches ~/go/pkg/mod and ~/Library/Caches/go-build keyed on go.sum. # Shared across every darwin matrix entry — first job in a run warms # it, the rest hit warm. cache: true # You can test your matrix by printing the current Go version - name: Display Go version run: go version # ---- Homebrew cache ---- # macOS runners have no Docker daemon, so the BuildKit registry cache used # for Linux backend images (see .agents/ci-caching.md) doesn't apply here. # We cache the brew downloads + Cellar entries for the formulas we install # below. Read on every run, write only on master/tag pushes — same policy # as the Linux registry cache. - name: Restore Homebrew cache id: brew-cache uses: actions/cache/restore@v4 with: path: | ~/Library/Caches/Homebrew/downloads /opt/homebrew/Cellar/protobuf /opt/homebrew/Cellar/grpc /opt/homebrew/Cellar/protoc-gen-go /opt/homebrew/Cellar/protoc-gen-go-grpc /opt/homebrew/Cellar/libomp /opt/homebrew/Cellar/llvm /opt/homebrew/Cellar/ccache /opt/homebrew/Cellar/blake3 /opt/homebrew/Cellar/fmt /opt/homebrew/Cellar/hiredis /opt/homebrew/Cellar/xxhash /opt/homebrew/Cellar/zstd key: brew-${{ runner.os }}-${{ runner.arch }}-v1-${{ hashFiles('.github/workflows/backend_build_darwin.yml') }} - name: Dependencies run: | # ccache is always installed (used by the llama-cpp variant build) so # the brew cache content stays stable across every backend in the # matrix — they all share one cache key. # blake3, fmt, hiredis, xxhash, zstd are ccache's runtime dylib deps. # Without explicitly installing them, a brew cache-hit run restores # ccache's Cellar dir but skips installing those transitive deps, # and ccache fails at runtime with `dyld: Library not loaded`. brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm ccache blake3 fmt hiredis xxhash zstd # Force-reinstall ccache so brew re-validates its full runtime-dep # closure on every run. This is the durable fix: when the upstream # ccache formula gains a new transitive dep (as it has multiple times # already), we don't have to chase missing dylibs one at a time. # The downloads cache makes the reinstall fast (~5s on a hit). brew reinstall ccache # Same pattern for grpc: its CMake config (used by the llama-cpp # `grpc-server` target) does find_package(absl). The cache restores # /opt/homebrew/Cellar/grpc so brew above no-ops the install, but # abseil isn't in our Cellar cache list and never gets installed # alongside, leaving grpc's CMake unable to resolve it. Reinstalling # grpc re-validates and pulls abseil in, mirroring the ccache fix. brew reinstall grpc # The brew cache restores the Cellar dirs but NOT the bin symlinks # at /opt/homebrew/bin/*. brew install above sees the Cellar present # and decides "already installed" without re-linking, so on a cache- # hit run the formulas aren't on PATH. Force-link them; --overwrite # tolerates pre-existing symlinks from earlier installs. brew link --overwrite protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm ccache blake3 fmt hiredis xxhash zstd 2>/dev/null || true - name: Save Homebrew cache if: github.event_name != 'pull_request' && steps.brew-cache.outputs.cache-hit != 'true' uses: actions/cache/save@v4 with: path: | ~/Library/Caches/Homebrew/downloads /opt/homebrew/Cellar/protobuf /opt/homebrew/Cellar/grpc /opt/homebrew/Cellar/protoc-gen-go /opt/homebrew/Cellar/protoc-gen-go-grpc /opt/homebrew/Cellar/libomp /opt/homebrew/Cellar/llvm /opt/homebrew/Cellar/ccache /opt/homebrew/Cellar/blake3 /opt/homebrew/Cellar/fmt /opt/homebrew/Cellar/hiredis /opt/homebrew/Cellar/xxhash /opt/homebrew/Cellar/zstd key: brew-${{ runner.os }}-${{ runner.arch }}-v1-${{ hashFiles('.github/workflows/backend_build_darwin.yml') }} # ---- ccache for llama.cpp CMake builds ---- # Three CMake variants (fallback, grpc, rpc-server) compile the same # llama.cpp source tree with overlapping flags — ccache dedupes object # files across them. Key on the pinned LLAMA_VERSION so a pin bump # invalidates cleanly; restore-keys fall back to the latest entry for the # same pin so unchanged TUs stay warm even when the cache is fresh. - name: Compute llama.cpp version if: inputs.backend == 'llama-cpp' id: llama-version run: | version=$(grep '^LLAMA_VERSION' backend/cpp/llama-cpp/Makefile | head -1 | cut -d= -f2 | cut -d'?' -f1 | tr -d ' ') echo "version=${version}" >> "$GITHUB_OUTPUT" - name: Restore ccache if: inputs.backend == 'llama-cpp' id: ccache-cache uses: actions/cache/restore@v4 with: path: ~/Library/Caches/ccache key: ccache-llama-${{ runner.arch }}-${{ steps.llama-version.outputs.version }}-${{ github.run_id }} restore-keys: | ccache-llama-${{ runner.arch }}-${{ steps.llama-version.outputs.version }}- - name: Configure ccache if: inputs.backend == 'llama-cpp' run: | mkdir -p "$HOME/Library/Caches/ccache" ccache -M 2G ccache -z # llama-cpp-darwin.sh reads CMAKE_ARGS / CCACHE_DIR from env. { echo "CMAKE_ARGS=${CMAKE_ARGS:-} -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache" echo "CCACHE_DIR=$HOME/Library/Caches/ccache" } >> "$GITHUB_ENV" # ---- Python wheel cache (uv + pip) ---- # Mirrors the Linux DEPS_REFRESH cadence (see .agents/ci-caching.md): the # ISO-week segment of the cache key forces at most one cold rebuild per # backend per week, automatically picking up newer wheels for unpinned # deps (torch, mlx, diffusers, …). Restore-keys fall back to the most # recent build of the same backend so off-week PRs still hit warm. - name: Compute weekly cache bucket if: inputs.lang == 'python' id: weekly run: echo "bucket=$(date -u +%Y-W%V)" >> "$GITHUB_OUTPUT" - name: Restore Python wheel cache if: inputs.lang == 'python' id: pyenv-cache uses: actions/cache/restore@v4 with: path: | ~/Library/Caches/pip ~/Library/Caches/uv key: pyenv-darwin-${{ inputs.backend }}-${{ steps.weekly.outputs.bucket }}-${{ hashFiles(format('backend/python/{0}/requirements*.txt', inputs.backend)) }} restore-keys: | pyenv-darwin-${{ inputs.backend }}- # llama-cpp on Darwin uses a bespoke build script (scripts/build/llama-cpp-darwin.sh) # that compiles three CMake variants from backend/cpp/llama-cpp and bundles dylibs # via otool — it doesn't fit the build-darwin-go-backend / build-darwin-python-backend # mold. Drive it via its dedicated `backends/llama-cpp-darwin` make target instead. - name: Build ${{ inputs.backend }}-darwin (llama-cpp) if: inputs.backend == 'llama-cpp' run: | make protogen-go make backends/llama-cpp-darwin - name: Build ds4 backend (Darwin Metal) if: inputs.backend == 'ds4' run: | make backends/ds4-darwin - name: Build ${{ inputs.backend }}-darwin if: inputs.backend != 'llama-cpp' && inputs.backend != 'ds4' run: | make protogen-go BACKEND=${{ inputs.backend }} BUILD_TYPE=${{ inputs.build-type }} USE_PIP=${{ inputs.use-pip }} make build-darwin-${{ inputs.lang }}-backend - name: ccache stats if: inputs.backend == 'llama-cpp' run: ccache -s - name: Save ccache if: inputs.backend == 'llama-cpp' && github.event_name != 'pull_request' uses: actions/cache/save@v4 with: path: ~/Library/Caches/ccache key: ccache-llama-${{ runner.arch }}-${{ steps.llama-version.outputs.version }}-${{ github.run_id }} - name: Save Python wheel cache if: inputs.lang == 'python' && github.event_name != 'pull_request' && steps.pyenv-cache.outputs.cache-hit != 'true' uses: actions/cache/save@v4 with: path: | ~/Library/Caches/pip ~/Library/Caches/uv key: pyenv-darwin-${{ inputs.backend }}-${{ steps.weekly.outputs.bucket }}-${{ hashFiles(format('backend/python/{0}/requirements*.txt', inputs.backend)) }} - name: Upload ${{ inputs.backend }}.tar uses: actions/upload-artifact@v7 with: name: ${{ inputs.backend }}-tar path: backend-images/${{ inputs.backend }}.tar darwin-backend-publish: needs: darwin-backend-build if: github.event_name != 'pull_request' runs-on: ubuntu-latest steps: - name: Download ${{ inputs.backend }}.tar uses: actions/download-artifact@v8 with: name: ${{ inputs.backend }}-tar path: . - name: Install crane run: | curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz | tar -xz sudo mv crane /usr/local/bin/ - name: Log in to DockerHub run: | echo "${{ secrets.dockerPassword }}" | crane auth login docker.io -u "${{ secrets.dockerUsername }}" --password-stdin - name: Log in to quay.io run: | echo "${{ secrets.quayPassword }}" | crane auth login quay.io -u "${{ secrets.quayUsername }}" --password-stdin - name: Docker meta id: meta uses: docker/metadata-action@v6 with: images: | localai/localai-backends tags: | type=ref,event=branch type=semver,pattern={{raw}} type=sha flavor: | latest=auto suffix=${{ inputs.tag-suffix }},onlatest=true - name: Docker meta id: quaymeta uses: docker/metadata-action@v6 with: images: | quay.io/go-skynet/local-ai-backends tags: | type=ref,event=branch type=semver,pattern={{raw}} type=sha flavor: | latest=auto suffix=${{ inputs.tag-suffix }},onlatest=true - name: Push Docker image (DockerHub) run: | for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr ',' '\n'); do crane push ${{ inputs.backend }}.tar $tag done - name: Push Docker image (Quay) run: | for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do crane push ${{ inputs.backend }}.tar $tag done