From 35f6db8c76cdc658abc3c356f6193ecccdcd97df Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 10 May 2026 18:15:53 +0200 Subject: [PATCH] ci: split backend-jobs into single-arch and multi-arch matrices (#9746) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Symptom (run 25612992409): backend-merge-jobs failed with "quay.io/go-skynet/local-ai-backends@sha256:fdbd93ca...: not found" even though the per-arch build for -cpu-llama-cpp pushed that exact digest 14h31m earlier. Root cause: backend-merge-jobs was gated on the WHOLE backend-jobs matrix (`needs: backend-jobs`). The multi-arch -cpu-llama-cpp legs finished within 30 min, but a single-arch CUDA-12-llama-cpp slot in the same matrix queued for ~8h (max-parallel: 8 throttle) and then took ~6h to build cold. By the time it freed the merge to run, quay's GC had reaped the per-arch digests pushed by the fast multi-arch legs the day before. Fix: split the linux backend matrix in two. backend-jobs-multiarch - entries with `platform-tag` set (paired per-arch legs that feed backend-merge-jobs). backend-jobs-singlearch - entries without `platform-tag` (heavy standalone builds: CUDA, ROCm, Intel oneAPI, vLLM, sglang, etc.). backend-merge-jobs now `needs:` only backend-jobs-multiarch. The multi-arch matrix completes in ~2-3h, well inside quay's GC window. Heavy single-arch entries keep running independently with no merge dependency. scripts/changed-backends.js gains a splitByArch() helper that partitions filtered entries by whether `platform-tag` is set, and emits matrix-singlearch + matrix-multiarch + has-backends-singlearch + has-backends-multiarch outputs (replacing the previous combined matrix / has-backends pair). Applied in both the full-matrix and filtered-matrix code paths. Smoke test: 199 single-arch + 72 multi- arch + 35 darwin = 271 total entries; 36 merge-matrix entries (one per multi-arch backend pair). Matches expectation. Local `make backends/` is unaffected — the script's outputs only feed CI workflow matrices. Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- .github/workflows/backend.yml | 70 ++++++++++++++++++++++++++++---- .github/workflows/backend_pr.yml | 52 +++++++++++++++++++----- scripts/changed-backends.js | 32 ++++++++++++--- 3 files changed, 129 insertions(+), 25 deletions(-) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 6e6c4ab33..b09e14d90 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -32,11 +32,13 @@ jobs: if: github.repository == 'mudler/LocalAI' runs-on: ubuntu-latest outputs: - matrix: ${{ steps.set-matrix.outputs.matrix }} - matrix-darwin: ${{ steps.set-matrix.outputs.matrix-darwin }} + matrix-singlearch: ${{ steps.set-matrix.outputs['matrix-singlearch'] }} + matrix-multiarch: ${{ steps.set-matrix.outputs['matrix-multiarch'] }} + matrix-darwin: ${{ steps.set-matrix.outputs['matrix-darwin'] }} merge-matrix: ${{ steps.set-matrix.outputs['merge-matrix'] }} - has-backends: ${{ steps.set-matrix.outputs.has-backends }} - has-backends-darwin: ${{ steps.set-matrix.outputs.has-backends-darwin }} + has-backends-singlearch: ${{ steps.set-matrix.outputs['has-backends-singlearch'] }} + has-backends-multiarch: ${{ steps.set-matrix.outputs['has-backends-multiarch'] }} + has-backends-darwin: ${{ steps.set-matrix.outputs['has-backends-darwin'] }} has-merges: ${{ steps.set-matrix.outputs['has-merges'] }} steps: - name: Checkout repository @@ -53,6 +55,9 @@ jobs: # Filter the backend matrix from .github/backend-matrix.yml against the # files changed by this push. Tag pushes set FORCE_ALL=true so the script # falls through to the full matrix (releases must rebuild everything). + # The script splits the linux matrix into single-arch and multi-arch + # groups so backend-merge-jobs can `needs:` only the multi-arch one — + # see the comment block above the merge job for context. - name: Filter matrix for changed backends id: set-matrix env: @@ -61,9 +66,14 @@ jobs: FORCE_ALL: ${{ startsWith(github.ref, 'refs/tags/') && 'true' || 'false' }} run: bun run scripts/changed-backends.js - backend-jobs: + # Multi-arch backends — entries with a `platform-tag` set, paired with a + # sibling entry sharing the same `tag-suffix` (one amd64 leg, one arm64 + # leg). Their digests are the inputs to backend-merge-jobs, so they're in + # their own matrix to bound how long the merge waits before quay GCs the + # untagged digests. + backend-jobs-multiarch: needs: generate-matrix - if: needs.generate-matrix.outputs.has-backends == 'true' + if: needs.generate-matrix.outputs['has-backends-multiarch'] == 'true' uses: ./.github/workflows/backend_build.yml with: tag-latest: ${{ matrix.tag-latest }} @@ -90,11 +100,53 @@ jobs: strategy: fail-fast: false max-parallel: 8 - matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix) }} + matrix: ${{ fromJson(needs.generate-matrix.outputs['matrix-multiarch']) }} + # Single-arch backends — no `platform-tag`. Heavy ones (CUDA, ROCm, Intel + # oneAPI, vLLM/sglang) live here. Independent of the merge job: they can + # take their full ~6h cold without blocking manifest assembly for the + # multi-arch backends whose per-arch digests would otherwise sit untagged + # on quay long enough to be GC'd. + backend-jobs-singlearch: + needs: generate-matrix + if: needs.generate-matrix.outputs['has-backends-singlearch'] == 'true' + uses: ./.github/workflows/backend_build.yml + with: + tag-latest: ${{ matrix.tag-latest }} + tag-suffix: ${{ matrix.tag-suffix }} + build-type: ${{ matrix.build-type }} + cuda-major-version: ${{ matrix.cuda-major-version }} + cuda-minor-version: ${{ matrix.cuda-minor-version }} + platforms: ${{ matrix.platforms }} + platform-tag: ${{ matrix.platform-tag || '' }} + runs-on: ${{ matrix.runs-on }} + builder-base-image: ${{ matrix.builder-base-image || '' }} + base-image: ${{ matrix.base-image }} + backend: ${{ matrix.backend }} + dockerfile: ${{ matrix.dockerfile }} + skip-drivers: ${{ matrix.skip-drivers }} + context: ${{ matrix.context }} + ubuntu-version: ${{ matrix.ubuntu-version }} + amdgpu-targets: ${{ matrix.amdgpu-targets || 'gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1151,gfx1200,gfx1201' }} + secrets: + dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} + dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} + quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} + quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} + strategy: + fail-fast: false + max-parallel: 8 + matrix: ${{ fromJson(needs.generate-matrix.outputs['matrix-singlearch']) }} + + # Merge per-arch digests into manifest lists. Depends ONLY on + # backend-jobs-multiarch — single-arch builds are independent and slow. + # Without this split, a 6h CUDA-12 single-arch job would gate the merge, + # leaving multi-arch digests untagged on quay long enough for quay's + # garbage collector to reap them and the merge step to fail with + # "manifest not found". backend-merge-jobs: - needs: [generate-matrix, backend-jobs] - if: needs.generate-matrix.outputs.has-merges == 'true' + needs: [generate-matrix, backend-jobs-multiarch] + if: needs.generate-matrix.outputs['has-merges'] == 'true' uses: ./.github/workflows/backend_merge.yml with: tag-latest: ${{ matrix.tag-latest }} diff --git a/.github/workflows/backend_pr.yml b/.github/workflows/backend_pr.yml index b01e5dddc..cf7a96a99 100644 --- a/.github/workflows/backend_pr.yml +++ b/.github/workflows/backend_pr.yml @@ -11,11 +11,13 @@ jobs: generate-matrix: runs-on: ubuntu-latest outputs: - matrix: ${{ steps.set-matrix.outputs.matrix }} - matrix-darwin: ${{ steps.set-matrix.outputs.matrix-darwin }} + matrix-singlearch: ${{ steps.set-matrix.outputs['matrix-singlearch'] }} + matrix-multiarch: ${{ steps.set-matrix.outputs['matrix-multiarch'] }} + matrix-darwin: ${{ steps.set-matrix.outputs['matrix-darwin'] }} merge-matrix: ${{ steps.set-matrix.outputs['merge-matrix'] }} - has-backends: ${{ steps.set-matrix.outputs.has-backends }} - has-backends-darwin: ${{ steps.set-matrix.outputs.has-backends-darwin }} + has-backends-singlearch: ${{ steps.set-matrix.outputs['has-backends-singlearch'] }} + has-backends-multiarch: ${{ steps.set-matrix.outputs['has-backends-multiarch'] }} + has-backends-darwin: ${{ steps.set-matrix.outputs['has-backends-darwin'] }} has-merges: ${{ steps.set-matrix.outputs['has-merges'] }} steps: - name: Checkout repository @@ -29,7 +31,9 @@ jobs: bun add js-yaml bun add @octokit/core - # filters the matrix in backend.yml + # filters the matrix in backend.yml; splits into single-arch and + # multi-arch groups so backend-merge-jobs can `needs:` only the latter + # (matches backend.yml's structure). - name: Filter matrix for changed backends id: set-matrix env: @@ -37,10 +41,10 @@ jobs: GITHUB_EVENT_PATH: ${{ github.event_path }} run: bun run scripts/changed-backends.js - backend-jobs: + backend-jobs-multiarch: needs: generate-matrix uses: ./.github/workflows/backend_build.yml - if: needs.generate-matrix.outputs.has-backends == 'true' + if: needs.generate-matrix.outputs['has-backends-multiarch'] == 'true' with: tag-latest: ${{ matrix.tag-latest }} tag-suffix: ${{ matrix.tag-suffix }} @@ -64,13 +68,41 @@ jobs: strategy: fail-fast: true max-parallel: 8 - matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix) }} + matrix: ${{ fromJson(needs.generate-matrix.outputs['matrix-multiarch']) }} + backend-jobs-singlearch: + needs: generate-matrix + uses: ./.github/workflows/backend_build.yml + if: needs.generate-matrix.outputs['has-backends-singlearch'] == 'true' + with: + tag-latest: ${{ matrix.tag-latest }} + tag-suffix: ${{ matrix.tag-suffix }} + build-type: ${{ matrix.build-type }} + cuda-major-version: ${{ matrix.cuda-major-version }} + cuda-minor-version: ${{ matrix.cuda-minor-version }} + platforms: ${{ matrix.platforms }} + platform-tag: ${{ matrix.platform-tag || '' }} + runs-on: ${{ matrix.runs-on }} + builder-base-image: ${{ matrix.builder-base-image || '' }} + base-image: ${{ matrix.base-image }} + backend: ${{ matrix.backend }} + dockerfile: ${{ matrix.dockerfile }} + skip-drivers: ${{ matrix.skip-drivers }} + context: ${{ matrix.context }} + ubuntu-version: ${{ matrix.ubuntu-version }} + amdgpu-targets: ${{ matrix.amdgpu-targets || 'gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1151,gfx1200,gfx1201' }} + secrets: + quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} + quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} + strategy: + fail-fast: true + max-parallel: 8 + matrix: ${{ fromJson(needs.generate-matrix.outputs['matrix-singlearch']) }} backend-merge-jobs: - needs: [generate-matrix, backend-jobs] + needs: [generate-matrix, backend-jobs-multiarch] # backend_merge.yml's push-side steps are all gated on # github.event_name != 'pull_request', so on a PR the merge job would # do nothing. Skip it entirely to avoid spinning up an empty runner. - if: github.event_name != 'pull_request' && needs.generate-matrix.outputs.has-merges == 'true' + if: github.event_name != 'pull_request' && needs.generate-matrix.outputs['has-merges'] == 'true' uses: ./.github/workflows/backend_merge.yml with: tag-latest: ${{ matrix.tag-latest }} diff --git a/scripts/changed-backends.js b/scripts/changed-backends.js index a006ea80f..95d9e356f 100644 --- a/scripts/changed-backends.js +++ b/scripts/changed-backends.js @@ -163,14 +163,29 @@ function computeMergeMatrix(entries) { return { include }; } +// Split a list of linux matrix entries into single-arch (no platform-tag) and +// multi-arch (platform-tag set, paired with a sibling entry sharing the same +// tag-suffix). The two are run as separate matrix jobs so backend-merge-jobs +// can `needs:` only the multi-arch one — slow single-arch builds (CUDA, ROCm, +// vLLM, etc.) don't block manifest assembly while their per-arch counterparts' +// untagged digests sit on quay long enough to be GC'd. +function splitByArch(entries) { + const multiarch = entries.filter(e => e['platform-tag']); + const singlearch = entries.filter(e => !e['platform-tag']); + return { multiarch, singlearch }; +} + function emitFullMatrix() { + const { multiarch, singlearch } = splitByArch(includes); const mergeMatrix = computeMergeMatrix(includes); const hasMerges = mergeMatrix.include.length > 0 ? 'true' : 'false'; fs.appendFileSync(process.env.GITHUB_OUTPUT, `run-all=true\n`); - fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends=true\n`); + fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-singlearch=${singlearch.length > 0 ? 'true' : 'false'}\n`); + fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-multiarch=${multiarch.length > 0 ? 'true' : 'false'}\n`); fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-darwin=true\n`); fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-merges=${hasMerges}\n`); - fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix=${JSON.stringify({ include: includes })}\n`); + fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-singlearch=${JSON.stringify({ include: singlearch })}\n`); + fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-multiarch=${JSON.stringify({ include: multiarch })}\n`); fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-darwin=${JSON.stringify({ include: includesDarwin })}\n`); fs.appendFileSync(process.env.GITHUB_OUTPUT, `merge-matrix=${JSON.stringify(mergeMatrix)}\n`); for (const backend of allBackendPaths.keys()) { @@ -195,19 +210,24 @@ function emitFilteredMatrix(changedFiles) { console.log("Filtered files:", filtered); console.log("Filtered files Darwin:", filteredDarwin); - const hasBackends = filtered.length > 0 ? 'true' : 'false'; + const { multiarch, singlearch } = splitByArch(filtered); + const hasBackendsSinglearch = singlearch.length > 0 ? 'true' : 'false'; + const hasBackendsMultiarch = multiarch.length > 0 ? 'true' : 'false'; const hasBackendsDarwin = filteredDarwin.length > 0 ? 'true' : 'false'; - console.log("Has backends?:", hasBackends); + console.log("Has single-arch backends?:", hasBackendsSinglearch); + console.log("Has multi-arch backends?:", hasBackendsMultiarch); console.log("Has Darwin backends?:", hasBackendsDarwin); const mergeMatrix = computeMergeMatrix(filtered); const hasMerges = mergeMatrix.include.length > 0 ? 'true' : 'false'; fs.appendFileSync(process.env.GITHUB_OUTPUT, `run-all=false\n`); - fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends=${hasBackends}\n`); + fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-singlearch=${hasBackendsSinglearch}\n`); + fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-multiarch=${hasBackendsMultiarch}\n`); fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-darwin=${hasBackendsDarwin}\n`); fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-merges=${hasMerges}\n`); - fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix=${JSON.stringify({ include: filtered })}\n`); + fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-singlearch=${JSON.stringify({ include: singlearch })}\n`); + fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-multiarch=${JSON.stringify({ include: multiarch })}\n`); fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-darwin=${JSON.stringify({ include: filteredDarwin })}\n`); fs.appendFileSync(process.env.GITHUB_OUTPUT, `merge-matrix=${JSON.stringify(mergeMatrix)}\n`);