From cb68cd1cf495a08cf56cb752ec5c1d78e780d101 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 9 May 2026 00:04:42 +0200 Subject: [PATCH] ci: pilot per-arch split + manifest merge for faster-whisper and llama-cpp-quantization (#9727) ci: pilot per-arch split for faster-whisper and llama-cpp-quantization Convert two backends from QEMU-emulated multi-arch (linux/amd64,linux/arm64 on a single ubuntu-latest) to native per-arch + manifest-list merge: - amd64 leg on ubuntu-latest - arm64 leg on ubuntu-24.04-arm (native, ~5-10x faster than emulated) - merge job assembles both digests under the final tag via docker buildx imagetools create Backends piloted: - -cpu-faster-whisper (small Python, fast baseline) - -cpu-llama-cpp-quantization (heavier compile path, stress test) Infrastructure changes that the rest of Phase 2 (Tasks 2.5+) will reuse: - .github/backend-matrix.yml entries gain a `platform-tag` field ('amd64'/'arm64') for matrix entries that participate in the split. Other entries omit it; backend_build.yml already defaults missing values to '' (empty cache key suffix preserved as cache-). - backend.yml + backend_pr.yml forward `platform-tag` from matrix to the reusable backend_build.yml. - scripts/changed-backends.js groups filtered entries by tag-suffix and emits a `merge-matrix` (plus `has-merges`) for groups of size>=2. Singletons aren't merged. - backend.yml + backend_pr.yml gain a `backend-merge-jobs` job that consumes merge-matrix and calls backend_merge.yml after backend-jobs. PR variant is also event-gated so the no-op-on-PR merge job doesn't even start. The other 34 multi-arch entries are unchanged in this PR -- Task 2.5 fans out the same shape to them once the pilot is observed green. Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- .github/backend-matrix.yml | 34 ++++++++++++++++++++++-- .github/workflows/backend.yml | 19 ++++++++++++++ .github/workflows/backend_pr.yml | 19 ++++++++++++++ scripts/changed-backends.js | 44 ++++++++++++++++++++++++++++++++ 4 files changed, 114 insertions(+), 2 deletions(-) diff --git a/.github/backend-matrix.yml b/.github/backend-matrix.yml index e7b8afeb9..28a4e3978 100644 --- a/.github/backend-matrix.yml +++ b/.github/backend-matrix.yml @@ -118,7 +118,8 @@ include: - build-type: '' cuda-major-version: "" cuda-minor-version: "" - platforms: 'linux/amd64,linux/arm64' + platforms: 'linux/amd64' + platform-tag: 'amd64' tag-latest: 'auto' tag-suffix: '-cpu-faster-whisper' runs-on: 'ubuntu-latest' @@ -128,6 +129,20 @@ include: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' + - build-type: '' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/arm64' + platform-tag: 'arm64' + tag-latest: 'auto' + tag-suffix: '-cpu-faster-whisper' + runs-on: 'ubuntu-24.04-arm' + base-image: "ubuntu:24.04" + skip-drivers: 'true' + backend: "faster-whisper" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' - build-type: '' cuda-major-version: "" cuda-minor-version: "" @@ -157,7 +172,8 @@ include: - build-type: '' cuda-major-version: "" cuda-minor-version: "" - platforms: 'linux/amd64,linux/arm64' + platforms: 'linux/amd64' + platform-tag: 'amd64' tag-latest: 'auto' tag-suffix: '-cpu-llama-cpp-quantization' runs-on: 'ubuntu-latest' @@ -167,6 +183,20 @@ include: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' + - build-type: '' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/arm64' + platform-tag: 'arm64' + tag-latest: 'auto' + tag-suffix: '-cpu-llama-cpp-quantization' + runs-on: 'ubuntu-24.04-arm' + base-image: "ubuntu:24.04" + skip-drivers: 'true' + backend: "llama-cpp-quantization" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' - build-type: '' cuda-major-version: "" cuda-minor-version: "" diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index f0bc98537..8799f00b1 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -34,8 +34,10 @@ jobs: outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} matrix-darwin: ${{ steps.set-matrix.outputs.matrix-darwin }} + merge-matrix: ${{ steps.set-matrix.outputs['merge-matrix'] }} has-backends: ${{ steps.set-matrix.outputs.has-backends }} has-backends-darwin: ${{ steps.set-matrix.outputs.has-backends-darwin }} + has-merges: ${{ steps.set-matrix.outputs['has-merges'] }} steps: - name: Checkout repository uses: actions/checkout@v6 @@ -70,6 +72,7 @@ jobs: cuda-major-version: ${{ matrix.cuda-major-version }} cuda-minor-version: ${{ matrix.cuda-minor-version }} platforms: ${{ matrix.platforms }} + platform-tag: ${{ matrix.platform-tag || '' }} runs-on: ${{ matrix.runs-on }} base-image: ${{ matrix.base-image }} backend: ${{ matrix.backend }} @@ -88,6 +91,22 @@ jobs: max-parallel: 8 matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix) }} + backend-merge-jobs: + needs: [generate-matrix, backend-jobs] + if: needs.generate-matrix.outputs.has-merges == 'true' + uses: ./.github/workflows/backend_merge.yml + with: + tag-latest: ${{ matrix.tag-latest }} + tag-suffix: ${{ matrix.tag-suffix }} + secrets: + dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} + dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} + quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} + quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} + strategy: + fail-fast: false + matrix: ${{ fromJson(needs.generate-matrix.outputs['merge-matrix']) }} + backend-jobs-darwin: needs: generate-matrix if: needs.generate-matrix.outputs.has-backends-darwin == 'true' diff --git a/.github/workflows/backend_pr.yml b/.github/workflows/backend_pr.yml index 6af08467a..4ca1fa1d9 100644 --- a/.github/workflows/backend_pr.yml +++ b/.github/workflows/backend_pr.yml @@ -13,8 +13,10 @@ jobs: outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} matrix-darwin: ${{ steps.set-matrix.outputs.matrix-darwin }} + merge-matrix: ${{ steps.set-matrix.outputs['merge-matrix'] }} has-backends: ${{ steps.set-matrix.outputs.has-backends }} has-backends-darwin: ${{ steps.set-matrix.outputs.has-backends-darwin }} + has-merges: ${{ steps.set-matrix.outputs['has-merges'] }} steps: - name: Checkout repository uses: actions/checkout@v6 @@ -46,6 +48,7 @@ jobs: cuda-major-version: ${{ matrix.cuda-major-version }} cuda-minor-version: ${{ matrix.cuda-minor-version }} platforms: ${{ matrix.platforms }} + platform-tag: ${{ matrix.platform-tag || '' }} runs-on: ${{ matrix.runs-on }} base-image: ${{ matrix.base-image }} backend: ${{ matrix.backend }} @@ -61,6 +64,22 @@ jobs: fail-fast: true max-parallel: 8 matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix) }} + backend-merge-jobs: + needs: [generate-matrix, backend-jobs] + # backend_merge.yml's push-side steps are all gated on + # github.event_name != 'pull_request', so on a PR the merge job would + # do nothing. Skip it entirely to avoid spinning up an empty runner. + if: github.event_name != 'pull_request' && needs.generate-matrix.outputs.has-merges == 'true' + uses: ./.github/workflows/backend_merge.yml + with: + tag-latest: ${{ matrix.tag-latest }} + tag-suffix: ${{ matrix.tag-suffix }} + secrets: + quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} + quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} + strategy: + fail-fast: false + matrix: ${{ fromJson(needs.generate-matrix.outputs['merge-matrix']) }} backend-jobs-darwin: needs: generate-matrix uses: ./.github/workflows/backend_build_darwin.yml diff --git a/scripts/changed-backends.js b/scripts/changed-backends.js index 7a8f627b3..06ce5bb40 100644 --- a/scripts/changed-backends.js +++ b/scripts/changed-backends.js @@ -122,12 +122,51 @@ async function getChangedFilesForPush(event) { return res.data.files.map(f => f.filename); } +// Group filtered linux matrix entries by tag-suffix and emit a merge-matrix +// entry for any tag-suffix that appears 2+ times. That's the trigger for +// "this backend has multiple per-arch legs and we need a manifest list". +// Singletons aren't merged — single-arch backends push by digest and don't +// need a manifest list assembled across legs. +function computeMergeMatrix(entries) { + const groups = new Map(); + for (const item of entries) { + if (!item['tag-suffix']) continue; + const key = item['tag-suffix']; + if (!groups.has(key)) groups.set(key, []); + groups.get(key).push(item); + } + const include = []; + for (const [tagSuffix, group] of groups) { + if (group.length < 2) continue; + // tag-latest must agree across legs — they're going to publish under + // the same final tag, so disagreeing on whether it's also the :latest + // tag is an authoring bug. Warn loudly so a Task 2.5 fan-out typo is + // visible in CI logs instead of silently shipping the leg-0 value. + const first = group[0]['tag-latest'] || ''; + for (const m of group) { + if ((m['tag-latest'] || '') !== first) { + console.warn(`tag-latest mismatch in group ${tagSuffix}: legs disagree (using ${first})`); + break; + } + } + include.push({ + 'tag-suffix': tagSuffix, + 'tag-latest': first, + }); + } + return { include }; +} + function emitFullMatrix() { + const mergeMatrix = computeMergeMatrix(includes); + const hasMerges = mergeMatrix.include.length > 0 ? 'true' : 'false'; fs.appendFileSync(process.env.GITHUB_OUTPUT, `run-all=true\n`); fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends=true\n`); fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-darwin=true\n`); + fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-merges=${hasMerges}\n`); fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix=${JSON.stringify({ include: includes })}\n`); fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-darwin=${JSON.stringify({ include: includesDarwin })}\n`); + fs.appendFileSync(process.env.GITHUB_OUTPUT, `merge-matrix=${JSON.stringify(mergeMatrix)}\n`); for (const backend of allBackendPaths.keys()) { fs.appendFileSync(process.env.GITHUB_OUTPUT, `${backend}=true\n`); } @@ -155,11 +194,16 @@ function emitFilteredMatrix(changedFiles) { console.log("Has backends?:", hasBackends); console.log("Has Darwin backends?:", hasBackendsDarwin); + const mergeMatrix = computeMergeMatrix(filtered); + const hasMerges = mergeMatrix.include.length > 0 ? 'true' : 'false'; + fs.appendFileSync(process.env.GITHUB_OUTPUT, `run-all=false\n`); fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends=${hasBackends}\n`); fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-darwin=${hasBackendsDarwin}\n`); + fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-merges=${hasMerges}\n`); fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix=${JSON.stringify({ include: filtered })}\n`); fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-darwin=${JSON.stringify({ include: filteredDarwin })}\n`); + fs.appendFileSync(process.env.GITHUB_OUTPUT, `merge-matrix=${JSON.stringify(mergeMatrix)}\n`); // Per-backend boolean outputs for (const [backend, pathPrefix] of allBackendPaths) {