mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-16 20:52:08 -04:00
ci: split backend-jobs into single-arch and multi-arch matrices (#9746)
Symptom (run 25612992409): backend-merge-jobs failed with
"quay.io/go-skynet/local-ai-backends@sha256:fdbd93ca...: not found"
even though the per-arch build for -cpu-llama-cpp pushed that exact
digest 14h31m earlier.
Root cause: backend-merge-jobs was gated on the WHOLE backend-jobs
matrix (`needs: backend-jobs`). The multi-arch -cpu-llama-cpp legs
finished within 30 min, but a single-arch CUDA-12-llama-cpp slot in
the same matrix queued for ~8h (max-parallel: 8 throttle) and then
took ~6h to build cold. By the time it freed the merge to run, quay's
GC had reaped the per-arch digests pushed by the fast multi-arch legs
the day before.
Fix: split the linux backend matrix in two.
backend-jobs-multiarch - entries with `platform-tag` set (paired
per-arch legs that feed backend-merge-jobs).
backend-jobs-singlearch - entries without `platform-tag` (heavy
standalone builds: CUDA, ROCm, Intel oneAPI, vLLM, sglang, etc.).
backend-merge-jobs now `needs:` only backend-jobs-multiarch. The
multi-arch matrix completes in ~2-3h, well inside quay's GC window.
Heavy single-arch entries keep running independently with no merge
dependency.
scripts/changed-backends.js gains a splitByArch() helper that
partitions filtered entries by whether `platform-tag` is set, and
emits matrix-singlearch + matrix-multiarch + has-backends-singlearch
+ has-backends-multiarch outputs (replacing the previous combined
matrix / has-backends pair). Applied in both the full-matrix and
filtered-matrix code paths. Smoke test: 199 single-arch + 72 multi-
arch + 35 darwin = 271 total entries; 36 merge-matrix entries
(one per multi-arch backend pair). Matches expectation.
Local `make backends/<name>` is unaffected — the script's outputs
only feed CI workflow matrices.
Assisted-by: Claude:claude-opus-4-7
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
70
.github/workflows/backend.yml
vendored
70
.github/workflows/backend.yml
vendored
@@ -32,11 +32,13 @@ jobs:
|
||||
if: github.repository == 'mudler/LocalAI'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
matrix-darwin: ${{ steps.set-matrix.outputs.matrix-darwin }}
|
||||
matrix-singlearch: ${{ steps.set-matrix.outputs['matrix-singlearch'] }}
|
||||
matrix-multiarch: ${{ steps.set-matrix.outputs['matrix-multiarch'] }}
|
||||
matrix-darwin: ${{ steps.set-matrix.outputs['matrix-darwin'] }}
|
||||
merge-matrix: ${{ steps.set-matrix.outputs['merge-matrix'] }}
|
||||
has-backends: ${{ steps.set-matrix.outputs.has-backends }}
|
||||
has-backends-darwin: ${{ steps.set-matrix.outputs.has-backends-darwin }}
|
||||
has-backends-singlearch: ${{ steps.set-matrix.outputs['has-backends-singlearch'] }}
|
||||
has-backends-multiarch: ${{ steps.set-matrix.outputs['has-backends-multiarch'] }}
|
||||
has-backends-darwin: ${{ steps.set-matrix.outputs['has-backends-darwin'] }}
|
||||
has-merges: ${{ steps.set-matrix.outputs['has-merges'] }}
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
@@ -53,6 +55,9 @@ jobs:
|
||||
# Filter the backend matrix from .github/backend-matrix.yml against the
|
||||
# files changed by this push. Tag pushes set FORCE_ALL=true so the script
|
||||
# falls through to the full matrix (releases must rebuild everything).
|
||||
# The script splits the linux matrix into single-arch and multi-arch
|
||||
# groups so backend-merge-jobs can `needs:` only the multi-arch one —
|
||||
# see the comment block above the merge job for context.
|
||||
- name: Filter matrix for changed backends
|
||||
id: set-matrix
|
||||
env:
|
||||
@@ -61,9 +66,14 @@ jobs:
|
||||
FORCE_ALL: ${{ startsWith(github.ref, 'refs/tags/') && 'true' || 'false' }}
|
||||
run: bun run scripts/changed-backends.js
|
||||
|
||||
backend-jobs:
|
||||
# Multi-arch backends — entries with a `platform-tag` set, paired with a
|
||||
# sibling entry sharing the same `tag-suffix` (one amd64 leg, one arm64
|
||||
# leg). Their digests are the inputs to backend-merge-jobs, so they're in
|
||||
# their own matrix to bound how long the merge waits before quay GCs the
|
||||
# untagged digests.
|
||||
backend-jobs-multiarch:
|
||||
needs: generate-matrix
|
||||
if: needs.generate-matrix.outputs.has-backends == 'true'
|
||||
if: needs.generate-matrix.outputs['has-backends-multiarch'] == 'true'
|
||||
uses: ./.github/workflows/backend_build.yml
|
||||
with:
|
||||
tag-latest: ${{ matrix.tag-latest }}
|
||||
@@ -90,11 +100,53 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
max-parallel: 8
|
||||
matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix) }}
|
||||
matrix: ${{ fromJson(needs.generate-matrix.outputs['matrix-multiarch']) }}
|
||||
|
||||
# Single-arch backends — no `platform-tag`. Heavy ones (CUDA, ROCm, Intel
|
||||
# oneAPI, vLLM/sglang) live here. Independent of the merge job: they can
|
||||
# take their full ~6h cold without blocking manifest assembly for the
|
||||
# multi-arch backends whose per-arch digests would otherwise sit untagged
|
||||
# on quay long enough to be GC'd.
|
||||
backend-jobs-singlearch:
|
||||
needs: generate-matrix
|
||||
if: needs.generate-matrix.outputs['has-backends-singlearch'] == 'true'
|
||||
uses: ./.github/workflows/backend_build.yml
|
||||
with:
|
||||
tag-latest: ${{ matrix.tag-latest }}
|
||||
tag-suffix: ${{ matrix.tag-suffix }}
|
||||
build-type: ${{ matrix.build-type }}
|
||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||
platforms: ${{ matrix.platforms }}
|
||||
platform-tag: ${{ matrix.platform-tag || '' }}
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
builder-base-image: ${{ matrix.builder-base-image || '' }}
|
||||
base-image: ${{ matrix.base-image }}
|
||||
backend: ${{ matrix.backend }}
|
||||
dockerfile: ${{ matrix.dockerfile }}
|
||||
skip-drivers: ${{ matrix.skip-drivers }}
|
||||
context: ${{ matrix.context }}
|
||||
ubuntu-version: ${{ matrix.ubuntu-version }}
|
||||
amdgpu-targets: ${{ matrix.amdgpu-targets || 'gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1151,gfx1200,gfx1201' }}
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
max-parallel: 8
|
||||
matrix: ${{ fromJson(needs.generate-matrix.outputs['matrix-singlearch']) }}
|
||||
|
||||
# Merge per-arch digests into manifest lists. Depends ONLY on
|
||||
# backend-jobs-multiarch — single-arch builds are independent and slow.
|
||||
# Without this split, a 6h CUDA-12 single-arch job would gate the merge,
|
||||
# leaving multi-arch digests untagged on quay long enough for quay's
|
||||
# garbage collector to reap them and the merge step to fail with
|
||||
# "manifest not found".
|
||||
backend-merge-jobs:
|
||||
needs: [generate-matrix, backend-jobs]
|
||||
if: needs.generate-matrix.outputs.has-merges == 'true'
|
||||
needs: [generate-matrix, backend-jobs-multiarch]
|
||||
if: needs.generate-matrix.outputs['has-merges'] == 'true'
|
||||
uses: ./.github/workflows/backend_merge.yml
|
||||
with:
|
||||
tag-latest: ${{ matrix.tag-latest }}
|
||||
|
||||
52
.github/workflows/backend_pr.yml
vendored
52
.github/workflows/backend_pr.yml
vendored
@@ -11,11 +11,13 @@ jobs:
|
||||
generate-matrix:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
matrix-darwin: ${{ steps.set-matrix.outputs.matrix-darwin }}
|
||||
matrix-singlearch: ${{ steps.set-matrix.outputs['matrix-singlearch'] }}
|
||||
matrix-multiarch: ${{ steps.set-matrix.outputs['matrix-multiarch'] }}
|
||||
matrix-darwin: ${{ steps.set-matrix.outputs['matrix-darwin'] }}
|
||||
merge-matrix: ${{ steps.set-matrix.outputs['merge-matrix'] }}
|
||||
has-backends: ${{ steps.set-matrix.outputs.has-backends }}
|
||||
has-backends-darwin: ${{ steps.set-matrix.outputs.has-backends-darwin }}
|
||||
has-backends-singlearch: ${{ steps.set-matrix.outputs['has-backends-singlearch'] }}
|
||||
has-backends-multiarch: ${{ steps.set-matrix.outputs['has-backends-multiarch'] }}
|
||||
has-backends-darwin: ${{ steps.set-matrix.outputs['has-backends-darwin'] }}
|
||||
has-merges: ${{ steps.set-matrix.outputs['has-merges'] }}
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
@@ -29,7 +31,9 @@ jobs:
|
||||
bun add js-yaml
|
||||
bun add @octokit/core
|
||||
|
||||
# filters the matrix in backend.yml
|
||||
# filters the matrix in backend.yml; splits into single-arch and
|
||||
# multi-arch groups so backend-merge-jobs can `needs:` only the latter
|
||||
# (matches backend.yml's structure).
|
||||
- name: Filter matrix for changed backends
|
||||
id: set-matrix
|
||||
env:
|
||||
@@ -37,10 +41,10 @@ jobs:
|
||||
GITHUB_EVENT_PATH: ${{ github.event_path }}
|
||||
run: bun run scripts/changed-backends.js
|
||||
|
||||
backend-jobs:
|
||||
backend-jobs-multiarch:
|
||||
needs: generate-matrix
|
||||
uses: ./.github/workflows/backend_build.yml
|
||||
if: needs.generate-matrix.outputs.has-backends == 'true'
|
||||
if: needs.generate-matrix.outputs['has-backends-multiarch'] == 'true'
|
||||
with:
|
||||
tag-latest: ${{ matrix.tag-latest }}
|
||||
tag-suffix: ${{ matrix.tag-suffix }}
|
||||
@@ -64,13 +68,41 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: true
|
||||
max-parallel: 8
|
||||
matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix) }}
|
||||
matrix: ${{ fromJson(needs.generate-matrix.outputs['matrix-multiarch']) }}
|
||||
backend-jobs-singlearch:
|
||||
needs: generate-matrix
|
||||
uses: ./.github/workflows/backend_build.yml
|
||||
if: needs.generate-matrix.outputs['has-backends-singlearch'] == 'true'
|
||||
with:
|
||||
tag-latest: ${{ matrix.tag-latest }}
|
||||
tag-suffix: ${{ matrix.tag-suffix }}
|
||||
build-type: ${{ matrix.build-type }}
|
||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||
platforms: ${{ matrix.platforms }}
|
||||
platform-tag: ${{ matrix.platform-tag || '' }}
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
builder-base-image: ${{ matrix.builder-base-image || '' }}
|
||||
base-image: ${{ matrix.base-image }}
|
||||
backend: ${{ matrix.backend }}
|
||||
dockerfile: ${{ matrix.dockerfile }}
|
||||
skip-drivers: ${{ matrix.skip-drivers }}
|
||||
context: ${{ matrix.context }}
|
||||
ubuntu-version: ${{ matrix.ubuntu-version }}
|
||||
amdgpu-targets: ${{ matrix.amdgpu-targets || 'gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1151,gfx1200,gfx1201' }}
|
||||
secrets:
|
||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
strategy:
|
||||
fail-fast: true
|
||||
max-parallel: 8
|
||||
matrix: ${{ fromJson(needs.generate-matrix.outputs['matrix-singlearch']) }}
|
||||
backend-merge-jobs:
|
||||
needs: [generate-matrix, backend-jobs]
|
||||
needs: [generate-matrix, backend-jobs-multiarch]
|
||||
# backend_merge.yml's push-side steps are all gated on
|
||||
# github.event_name != 'pull_request', so on a PR the merge job would
|
||||
# do nothing. Skip it entirely to avoid spinning up an empty runner.
|
||||
if: github.event_name != 'pull_request' && needs.generate-matrix.outputs.has-merges == 'true'
|
||||
if: github.event_name != 'pull_request' && needs.generate-matrix.outputs['has-merges'] == 'true'
|
||||
uses: ./.github/workflows/backend_merge.yml
|
||||
with:
|
||||
tag-latest: ${{ matrix.tag-latest }}
|
||||
|
||||
@@ -163,14 +163,29 @@ function computeMergeMatrix(entries) {
|
||||
return { include };
|
||||
}
|
||||
|
||||
// Split a list of linux matrix entries into single-arch (no platform-tag) and
|
||||
// multi-arch (platform-tag set, paired with a sibling entry sharing the same
|
||||
// tag-suffix). The two are run as separate matrix jobs so backend-merge-jobs
|
||||
// can `needs:` only the multi-arch one — slow single-arch builds (CUDA, ROCm,
|
||||
// vLLM, etc.) don't block manifest assembly while their per-arch counterparts'
|
||||
// untagged digests sit on quay long enough to be GC'd.
|
||||
function splitByArch(entries) {
|
||||
const multiarch = entries.filter(e => e['platform-tag']);
|
||||
const singlearch = entries.filter(e => !e['platform-tag']);
|
||||
return { multiarch, singlearch };
|
||||
}
|
||||
|
||||
function emitFullMatrix() {
|
||||
const { multiarch, singlearch } = splitByArch(includes);
|
||||
const mergeMatrix = computeMergeMatrix(includes);
|
||||
const hasMerges = mergeMatrix.include.length > 0 ? 'true' : 'false';
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `run-all=true\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends=true\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-singlearch=${singlearch.length > 0 ? 'true' : 'false'}\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-multiarch=${multiarch.length > 0 ? 'true' : 'false'}\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-darwin=true\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-merges=${hasMerges}\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix=${JSON.stringify({ include: includes })}\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-singlearch=${JSON.stringify({ include: singlearch })}\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-multiarch=${JSON.stringify({ include: multiarch })}\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-darwin=${JSON.stringify({ include: includesDarwin })}\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `merge-matrix=${JSON.stringify(mergeMatrix)}\n`);
|
||||
for (const backend of allBackendPaths.keys()) {
|
||||
@@ -195,19 +210,24 @@ function emitFilteredMatrix(changedFiles) {
|
||||
console.log("Filtered files:", filtered);
|
||||
console.log("Filtered files Darwin:", filteredDarwin);
|
||||
|
||||
const hasBackends = filtered.length > 0 ? 'true' : 'false';
|
||||
const { multiarch, singlearch } = splitByArch(filtered);
|
||||
const hasBackendsSinglearch = singlearch.length > 0 ? 'true' : 'false';
|
||||
const hasBackendsMultiarch = multiarch.length > 0 ? 'true' : 'false';
|
||||
const hasBackendsDarwin = filteredDarwin.length > 0 ? 'true' : 'false';
|
||||
console.log("Has backends?:", hasBackends);
|
||||
console.log("Has single-arch backends?:", hasBackendsSinglearch);
|
||||
console.log("Has multi-arch backends?:", hasBackendsMultiarch);
|
||||
console.log("Has Darwin backends?:", hasBackendsDarwin);
|
||||
|
||||
const mergeMatrix = computeMergeMatrix(filtered);
|
||||
const hasMerges = mergeMatrix.include.length > 0 ? 'true' : 'false';
|
||||
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `run-all=false\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends=${hasBackends}\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-singlearch=${hasBackendsSinglearch}\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-multiarch=${hasBackendsMultiarch}\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-darwin=${hasBackendsDarwin}\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-merges=${hasMerges}\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix=${JSON.stringify({ include: filtered })}\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-singlearch=${JSON.stringify({ include: singlearch })}\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-multiarch=${JSON.stringify({ include: multiarch })}\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-darwin=${JSON.stringify({ include: filteredDarwin })}\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `merge-matrix=${JSON.stringify(mergeMatrix)}\n`);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user