diff --git a/.agents/ci-caching.md b/.agents/ci-caching.md index 61e4715be..c1127b65c 100644 --- a/.agents/ci-caching.md +++ b/.agents/ci-caching.md @@ -76,6 +76,30 @@ Eviction is rarely needed in normal operation — `DEPS_REFRESH` handles weekly - The "Free Disk Space" / "Release space from worker" steps run on every job — these reclaim ~6 GB on `ubuntu-latest` runners. They are runner-state cleanup, not Docker, and BuildKit caches don't apply. - Intermediate artifacts of `Build and push (PR)` are not pushed anywhere — PRs only build for verification. +- Darwin builds (see below) — macOS runners have no Docker daemon, so the registry-backed BuildKit cache cannot apply. + +## Darwin native caches + +`backend_build_darwin.yml` runs natively on `macOS-14` GitHub-hosted runners — there is no Docker, no BuildKit, no cross-job registry cache. Instead, the reusable workflow uses `actions/cache@v4` for four native caches that mirror the spirit of the Linux cache (warm by default, weekly refresh for unpinned Python deps, PRs read-only). + +| Cache | Path(s) | Key | Scope | +|---|---|---|---| +| Go modules + build | `~/go/pkg/mod`, `~/Library/Caches/go-build` | `go.sum` (managed by `actions/setup-go@v5` `cache: true`) | All darwin jobs | +| Homebrew | `~/Library/Caches/Homebrew/downloads`, selected `/opt/homebrew/Cellar/*` | hash of `backend_build_darwin.yml` | All darwin jobs | +| ccache (llama.cpp CMake) | `~/Library/Caches/ccache` | pinned `LLAMA_VERSION` from `backend/cpp/llama-cpp/Makefile` | `inputs.backend == 'llama-cpp'` only | +| Python wheels (uv + pip) | `~/Library/Caches/pip`, `~/Library/Caches/uv` | `inputs.backend` + ISO week (`+%Y-W%V`) + hash of that backend's `requirements*.txt` | `inputs.lang == 'python'` only | + +Read/write semantics match the BuildKit cache: `actions/cache/restore` runs every time, `actions/cache/save` is gated on `github.event_name != 'pull_request'`. PRs read master's warm cache but never write back. + +The Python wheel cache uses the same ISO-week cache-buster as the Linux `DEPS_REFRESH` build-arg — same problem (unpinned `torch`/`mlx`/`diffusers`/`transformers` resolve to fresh wheels weekly), same ~one-cold-rebuild-per-week solution. + +The brew Cellar cache requires `HOMEBREW_NO_AUTO_UPDATE=1` and `HOMEBREW_NO_INSTALL_CLEANUP=1` (set as job-level env). Without those, `brew install` would mutate the very directories that were just restored, defeating the cache. + +For ccache, the workflow exports `CMAKE_ARGS=… -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache` via `$GITHUB_ENV` before running `make build-darwin-go-backend`. The Makefile in `backend/cpp/llama-cpp/` already forwards `CMAKE_ARGS` through to each variant build (`fallback`, `grpc`, `rpc-server`), so no script changes are needed. The three variants share most TUs, so ccache dedupes object files across them. + +### Cache budget on Darwin + +GitHub Actions caches are limited to 10 GB per repo. Steady-state worst case: ~800 MB Go cache + ~2 GB brew Cellar + up to 2 GB ccache + ~1.5 GB × 5 python backends. If the cap is hit, prefer collapsing the per-backend Python keys into a shared `pyenv-darwin-shared-` key (accepts more cross-backend churn for a smaller footprint) before reducing other caches. ## Touching the cache pipeline diff --git a/.github/workflows/backend_build_darwin.yml b/.github/workflows/backend_build_darwin.yml index afc93ab44..895fa391f 100644 --- a/.github/workflows/backend_build_darwin.yml +++ b/.github/workflows/backend_build_darwin.yml @@ -48,6 +48,13 @@ jobs: strategy: matrix: go-version: ['${{ inputs.go-version }}'] + env: + # Keep the brew Cellar stable across cache restores. Without these, + # `brew install` would auto-update brew itself and re-link formulas, + # mutating the very paths the cache just restored. + HOMEBREW_NO_AUTO_UPDATE: '1' + HOMEBREW_NO_INSTALL_CLEANUP: '1' + HOMEBREW_NO_ANALYTICS: '1' steps: - name: Clone uses: actions/checkout@v6 @@ -58,21 +65,141 @@ jobs: uses: actions/setup-go@v5 with: go-version: ${{ matrix.go-version }} - cache: false + # Caches ~/go/pkg/mod and ~/Library/Caches/go-build keyed on go.sum. + # Shared across every darwin matrix entry — first job in a run warms + # it, the rest hit warm. + cache: true # You can test your matrix by printing the current Go version - name: Display Go version run: go version + # ---- Homebrew cache ---- + # macOS runners have no Docker daemon, so the BuildKit registry cache used + # for Linux backend images (see .agents/ci-caching.md) doesn't apply here. + # We cache the brew downloads + Cellar entries for the formulas we install + # below. Read on every run, write only on master/tag pushes — same policy + # as the Linux registry cache. + - name: Restore Homebrew cache + id: brew-cache + uses: actions/cache/restore@v4 + with: + path: | + ~/Library/Caches/Homebrew/downloads + /opt/homebrew/Cellar/protobuf + /opt/homebrew/Cellar/grpc + /opt/homebrew/Cellar/protoc-gen-go + /opt/homebrew/Cellar/protoc-gen-go-grpc + /opt/homebrew/Cellar/libomp + /opt/homebrew/Cellar/llvm + /opt/homebrew/Cellar/ccache + key: brew-${{ runner.os }}-${{ runner.arch }}-v1-${{ hashFiles('.github/workflows/backend_build_darwin.yml') }} + - name: Dependencies run: | - brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm + # ccache is always installed (used by the llama-cpp variant build) so + # the brew cache content stays stable across every backend in the + # matrix — they all share one cache key. + brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm ccache + + - name: Save Homebrew cache + if: github.event_name != 'pull_request' && steps.brew-cache.outputs.cache-hit != 'true' + uses: actions/cache/save@v4 + with: + path: | + ~/Library/Caches/Homebrew/downloads + /opt/homebrew/Cellar/protobuf + /opt/homebrew/Cellar/grpc + /opt/homebrew/Cellar/protoc-gen-go + /opt/homebrew/Cellar/protoc-gen-go-grpc + /opt/homebrew/Cellar/libomp + /opt/homebrew/Cellar/llvm + /opt/homebrew/Cellar/ccache + key: brew-${{ runner.os }}-${{ runner.arch }}-v1-${{ hashFiles('.github/workflows/backend_build_darwin.yml') }} + + # ---- ccache for llama.cpp CMake builds ---- + # Three CMake variants (fallback, grpc, rpc-server) compile the same + # llama.cpp source tree with overlapping flags — ccache dedupes object + # files across them. Key on the pinned LLAMA_VERSION so a pin bump + # invalidates cleanly; restore-keys fall back to the latest entry for the + # same pin so unchanged TUs stay warm even when the cache is fresh. + - name: Compute llama.cpp version + if: inputs.backend == 'llama-cpp' + id: llama-version + run: | + version=$(grep '^LLAMA_VERSION' backend/cpp/llama-cpp/Makefile | head -1 | cut -d= -f2 | cut -d'?' -f1 | tr -d ' ') + echo "version=${version}" >> "$GITHUB_OUTPUT" + + - name: Restore ccache + if: inputs.backend == 'llama-cpp' + id: ccache-cache + uses: actions/cache/restore@v4 + with: + path: ~/Library/Caches/ccache + key: ccache-llama-${{ runner.arch }}-${{ steps.llama-version.outputs.version }}-${{ github.run_id }} + restore-keys: | + ccache-llama-${{ runner.arch }}-${{ steps.llama-version.outputs.version }}- + + - name: Configure ccache + if: inputs.backend == 'llama-cpp' + run: | + mkdir -p "$HOME/Library/Caches/ccache" + ccache -M 2G + ccache -z + # llama-cpp-darwin.sh reads CMAKE_ARGS / CCACHE_DIR from env. + { + echo "CMAKE_ARGS=${CMAKE_ARGS:-} -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache" + echo "CCACHE_DIR=$HOME/Library/Caches/ccache" + } >> "$GITHUB_ENV" + + # ---- Python wheel cache (uv + pip) ---- + # Mirrors the Linux DEPS_REFRESH cadence (see .agents/ci-caching.md): the + # ISO-week segment of the cache key forces at most one cold rebuild per + # backend per week, automatically picking up newer wheels for unpinned + # deps (torch, mlx, diffusers, …). Restore-keys fall back to the most + # recent build of the same backend so off-week PRs still hit warm. + - name: Compute weekly cache bucket + if: inputs.lang == 'python' + id: weekly + run: echo "bucket=$(date -u +%Y-W%V)" >> "$GITHUB_OUTPUT" + + - name: Restore Python wheel cache + if: inputs.lang == 'python' + id: pyenv-cache + uses: actions/cache/restore@v4 + with: + path: | + ~/Library/Caches/pip + ~/Library/Caches/uv + key: pyenv-darwin-${{ inputs.backend }}-${{ steps.weekly.outputs.bucket }}-${{ hashFiles(format('backend/python/{0}/requirements*.txt', inputs.backend)) }} + restore-keys: | + pyenv-darwin-${{ inputs.backend }}- - name: Build ${{ inputs.backend }}-darwin run: | make protogen-go BACKEND=${{ inputs.backend }} BUILD_TYPE=${{ inputs.build-type }} USE_PIP=${{ inputs.use-pip }} make build-darwin-${{ inputs.lang }}-backend + - name: ccache stats + if: inputs.backend == 'llama-cpp' + run: ccache -s + + - name: Save ccache + if: inputs.backend == 'llama-cpp' && github.event_name != 'pull_request' + uses: actions/cache/save@v4 + with: + path: ~/Library/Caches/ccache + key: ccache-llama-${{ runner.arch }}-${{ steps.llama-version.outputs.version }}-${{ github.run_id }} + + - name: Save Python wheel cache + if: inputs.lang == 'python' && github.event_name != 'pull_request' && steps.pyenv-cache.outputs.cache-hit != 'true' + uses: actions/cache/save@v4 + with: + path: | + ~/Library/Caches/pip + ~/Library/Caches/uv + key: pyenv-darwin-${{ inputs.backend }}-${{ steps.weekly.outputs.bucket }}-${{ hashFiles(format('backend/python/{0}/requirements*.txt', inputs.backend)) }} + - name: Upload ${{ inputs.backend }}.tar uses: actions/upload-artifact@v7 with: