ci: tag every backend digest, including singletons

backend_build.yml pushes by canonical digest only (push-by-digest=true,
no tags applied at build time). User-facing tagging happens in
backend_merge.yml's `imagetools create` step. Before this commit,
scripts/changed-backends.js emitted a merge entry only for tag-suffixes
with 2+ legs, so every single-arch backend (CUDA/ROCm/Intel Python
images, vLLM, sglang, transformers, diffusers, ...) pushed its digest
untagged and stayed that way until quay's GC reaped it. Symptom: tag
releases shipped multi-arch backends tagged correctly, but no
v<X>-gpu-nvidia-cuda-12-vllm (or any singleton variant) ever appeared
in the registry.

Changes:

- scripts/changed-backends.js drops the `group.length < 2` skip and
  emits two merge matrices, one per arch class, so each downstream
  merge job can `needs:` only its corresponding build matrix.
- backend.yml splits backend-merge-jobs into multiarch and singlearch
  variants. The split preserves PR #9746's fix: slow singlearch CUDA
  builds (~6h) must not gate multiarch merges, or quay's GC reaps the
  multiarch per-arch digests before they're tagged.
- backend_pr.yml mirrors the split.
- backend_build.yml renames the digest artifact from
  `digests<suffix>-<platform-tag>` to
  `digests<suffix>--<platform-tag-or-"single">`. The `--` separator
  prevents the merge-side glob from over-matching sibling backends
  whose tag-suffix is a prefix of ours (e.g. -cpu-vllm vs
  -cpu-vllm-omni, -cpu-mlx vs -cpu-mlx-audio); the `single` placeholder
  keeps the name well-formed when platform-tag is empty.
- backend_merge.yml updates the download pattern to match.

Verified locally: a tag-push event now expands to 36 multiarch merge
entries (= 72 builds / 2 legs) and 199 singlearch merge entries (one
per singleton, including -gpu-nvidia-cuda-12-vllm at index 24).

Assisted-by: Claude:claude-opus-4-7
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2026-05-11 13:22:00 +00:00
parent b9e81dbfd4
commit ea00199554
5 changed files with 94 additions and 32 deletions

View File

@@ -35,11 +35,13 @@ jobs:
matrix-singlearch: ${{ steps.set-matrix.outputs['matrix-singlearch'] }}
matrix-multiarch: ${{ steps.set-matrix.outputs['matrix-multiarch'] }}
matrix-darwin: ${{ steps.set-matrix.outputs['matrix-darwin'] }}
merge-matrix: ${{ steps.set-matrix.outputs['merge-matrix'] }}
merge-matrix-multiarch: ${{ steps.set-matrix.outputs['merge-matrix-multiarch'] }}
merge-matrix-singlearch: ${{ steps.set-matrix.outputs['merge-matrix-singlearch'] }}
has-backends-singlearch: ${{ steps.set-matrix.outputs['has-backends-singlearch'] }}
has-backends-multiarch: ${{ steps.set-matrix.outputs['has-backends-multiarch'] }}
has-backends-darwin: ${{ steps.set-matrix.outputs['has-backends-darwin'] }}
has-merges: ${{ steps.set-matrix.outputs['has-merges'] }}
has-merges-multiarch: ${{ steps.set-matrix.outputs['has-merges-multiarch'] }}
has-merges-singlearch: ${{ steps.set-matrix.outputs['has-merges-singlearch'] }}
steps:
- name: Checkout repository
uses: actions/checkout@v6
@@ -138,15 +140,21 @@ jobs:
max-parallel: 8
matrix: ${{ fromJson(needs.generate-matrix.outputs['matrix-singlearch']) }}
# Merge per-arch digests into manifest lists. Depends ONLY on
# backend-jobs-multiarch — single-arch builds are independent and slow.
# Without this split, a 6h CUDA-12 single-arch job would gate the merge,
# leaving multi-arch digests untagged on quay long enough for quay's
# garbage collector to reap them and the merge step to fail with
# "manifest not found".
backend-merge-jobs:
# Apply tags to per-arch digests via `imagetools create`. Split into two
# jobs that mirror the build split so each merge waits ONLY on its
# corresponding build matrix:
#
# - backend-merge-jobs-multiarch needs backend-jobs-multiarch (~2-3h)
# - backend-merge-jobs-singlearch needs backend-jobs-singlearch (up to ~6h)
#
# If a single shared merge job depended on both, slow CUDA singlearch
# builds would block multiarch merges long enough for quay's GC to reap
# the multiarch per-arch digests (the bug fixed by PR #9746). Singletons
# also need a merge step because backend_build.yml pushes by canonical
# digest only — no tags are applied at build time.
backend-merge-jobs-multiarch:
needs: [generate-matrix, backend-jobs-multiarch]
if: needs.generate-matrix.outputs['has-merges'] == 'true'
if: needs.generate-matrix.outputs['has-merges-multiarch'] == 'true'
uses: ./.github/workflows/backend_merge.yml
with:
tag-latest: ${{ matrix.tag-latest }}
@@ -158,7 +166,23 @@ jobs:
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
fail-fast: false
matrix: ${{ fromJson(needs.generate-matrix.outputs['merge-matrix']) }}
matrix: ${{ fromJson(needs.generate-matrix.outputs['merge-matrix-multiarch']) }}
backend-merge-jobs-singlearch:
needs: [generate-matrix, backend-jobs-singlearch]
if: needs.generate-matrix.outputs['has-merges-singlearch'] == 'true'
uses: ./.github/workflows/backend_merge.yml
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
fail-fast: false
matrix: ${{ fromJson(needs.generate-matrix.outputs['merge-matrix-singlearch']) }}
backend-jobs-darwin:
needs: generate-matrix

View File

@@ -228,11 +228,18 @@ jobs:
digest="${{ steps.build.outputs.digest }}"
touch "/tmp/digests/${digest#sha256:}"
# Artifact name uses a `--` separator between tag-suffix and platform-tag
# to avoid prefix collisions during the merge job's pattern-based download.
# Tag-suffixes are not prefix-disjoint (e.g. -gpu-nvidia-cuda-12-vllm is a
# prefix of -gpu-nvidia-cuda-12-vllm-omni); a single `-` separator plus the
# merge-side `digests<tag-suffix>-*` glob would let one merge over-match
# the other backend's artifacts. The `-single` placeholder for empty
# platform-tag (single-arch entries) keeps the artifact name non-trailing.
- name: Upload digest artifact
if: github.event_name != 'pull_request'
uses: actions/upload-artifact@v4
with:
name: digests${{ inputs.tag-suffix }}-${{ inputs.platform-tag }}
name: digests${{ inputs.tag-suffix }}--${{ inputs.platform-tag || 'single' }}
path: /tmp/digests/*
if-no-files-found: error
retention-days: 1

View File

@@ -34,10 +34,14 @@ jobs:
env:
quay_username: ${{ secrets.quayUsername }}
steps:
# `--` separator anchors the glob so we don't over-match sibling
# backends whose tag-suffix happens to be a prefix of ours
# (e.g. -cpu-vllm vs -cpu-vllm-omni). Must stay in sync with the
# upload-artifact name in backend_build.yml.
- name: Download digests
uses: actions/download-artifact@v4
with:
pattern: digests${{ inputs.tag-suffix }}-*
pattern: digests${{ inputs.tag-suffix }}--*
merge-multiple: true
path: /tmp/digests

View File

@@ -14,11 +14,13 @@ jobs:
matrix-singlearch: ${{ steps.set-matrix.outputs['matrix-singlearch'] }}
matrix-multiarch: ${{ steps.set-matrix.outputs['matrix-multiarch'] }}
matrix-darwin: ${{ steps.set-matrix.outputs['matrix-darwin'] }}
merge-matrix: ${{ steps.set-matrix.outputs['merge-matrix'] }}
merge-matrix-multiarch: ${{ steps.set-matrix.outputs['merge-matrix-multiarch'] }}
merge-matrix-singlearch: ${{ steps.set-matrix.outputs['merge-matrix-singlearch'] }}
has-backends-singlearch: ${{ steps.set-matrix.outputs['has-backends-singlearch'] }}
has-backends-multiarch: ${{ steps.set-matrix.outputs['has-backends-multiarch'] }}
has-backends-darwin: ${{ steps.set-matrix.outputs['has-backends-darwin'] }}
has-merges: ${{ steps.set-matrix.outputs['has-merges'] }}
has-merges-multiarch: ${{ steps.set-matrix.outputs['has-merges-multiarch'] }}
has-merges-singlearch: ${{ steps.set-matrix.outputs['has-merges-singlearch'] }}
steps:
- name: Checkout repository
uses: actions/checkout@v6
@@ -97,12 +99,12 @@ jobs:
fail-fast: true
max-parallel: 8
matrix: ${{ fromJson(needs.generate-matrix.outputs['matrix-singlearch']) }}
backend-merge-jobs:
backend-merge-jobs-multiarch:
needs: [generate-matrix, backend-jobs-multiarch]
# backend_merge.yml's push-side steps are all gated on
# github.event_name != 'pull_request', so on a PR the merge job would
# do nothing. Skip it entirely to avoid spinning up an empty runner.
if: github.event_name != 'pull_request' && needs.generate-matrix.outputs['has-merges'] == 'true'
if: github.event_name != 'pull_request' && needs.generate-matrix.outputs['has-merges-multiarch'] == 'true'
uses: ./.github/workflows/backend_merge.yml
with:
tag-latest: ${{ matrix.tag-latest }}
@@ -112,7 +114,21 @@ jobs:
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
fail-fast: false
matrix: ${{ fromJson(needs.generate-matrix.outputs['merge-matrix']) }}
matrix: ${{ fromJson(needs.generate-matrix.outputs['merge-matrix-multiarch']) }}
backend-merge-jobs-singlearch:
needs: [generate-matrix, backend-jobs-singlearch]
if: github.event_name != 'pull_request' && needs.generate-matrix.outputs['has-merges-singlearch'] == 'true'
uses: ./.github/workflows/backend_merge.yml
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
secrets:
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
fail-fast: false
matrix: ${{ fromJson(needs.generate-matrix.outputs['merge-matrix-singlearch']) }}
backend-jobs-darwin:
needs: generate-matrix
uses: ./.github/workflows/backend_build_darwin.yml

View File

@@ -128,11 +128,15 @@ async function getChangedFilesForPush(event) {
return res.data.files.map(f => f.filename);
}
// Group filtered linux matrix entries by tag-suffix and emit a merge-matrix
// entry for any tag-suffix that appears 2+ times. That's the trigger for
// "this backend has multiple per-arch legs and we need a manifest list".
// Singletons aren't merged — single-arch backends push by digest and don't
// need a manifest list assembled across legs.
// Group matrix entries by tag-suffix and emit a merge-matrix entry per group.
// Both multi-leg groups (per-arch fan-out) and singletons get one entry each:
// the build job pushes by digest only with no tags applied, so every backend
// needs a downstream merge step to apply its tags via `imagetools create`,
// regardless of how many per-arch legs feed it. Callers split entries by
// arch class first (see splitByArch) and call this once per class so the
// resulting matrices can be wired to merge jobs that `needs:` only their
// corresponding build matrix — preventing slow single-arch builds from
// gating multi-arch merges (the bug fixed in PR #9746).
function computeMergeMatrix(entries) {
const groups = new Map();
for (const item of entries) {
@@ -143,7 +147,6 @@ function computeMergeMatrix(entries) {
}
const include = [];
for (const [tagSuffix, group] of groups) {
if (group.length < 2) continue;
// tag-latest must agree across legs — they're going to publish under
// the same final tag, so disagreeing on whether it's also the :latest
// tag is an authoring bug. Warn loudly so a Task 2.5 fan-out typo is
@@ -177,17 +180,21 @@ function splitByArch(entries) {
function emitFullMatrix() {
const { multiarch, singlearch } = splitByArch(includes);
const mergeMatrix = computeMergeMatrix(includes);
const hasMerges = mergeMatrix.include.length > 0 ? 'true' : 'false';
const mergeMatrixMultiarch = computeMergeMatrix(multiarch);
const mergeMatrixSinglearch = computeMergeMatrix(singlearch);
const hasMergesMultiarch = mergeMatrixMultiarch.include.length > 0 ? 'true' : 'false';
const hasMergesSinglearch = mergeMatrixSinglearch.include.length > 0 ? 'true' : 'false';
fs.appendFileSync(process.env.GITHUB_OUTPUT, `run-all=true\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-singlearch=${singlearch.length > 0 ? 'true' : 'false'}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-multiarch=${multiarch.length > 0 ? 'true' : 'false'}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-darwin=true\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-merges=${hasMerges}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-merges-multiarch=${hasMergesMultiarch}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-merges-singlearch=${hasMergesSinglearch}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-singlearch=${JSON.stringify({ include: singlearch })}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-multiarch=${JSON.stringify({ include: multiarch })}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-darwin=${JSON.stringify({ include: includesDarwin })}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `merge-matrix=${JSON.stringify(mergeMatrix)}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `merge-matrix-multiarch=${JSON.stringify(mergeMatrixMultiarch)}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `merge-matrix-singlearch=${JSON.stringify(mergeMatrixSinglearch)}\n`);
for (const backend of allBackendPaths.keys()) {
fs.appendFileSync(process.env.GITHUB_OUTPUT, `${backend}=true\n`);
}
@@ -218,18 +225,22 @@ function emitFilteredMatrix(changedFiles) {
console.log("Has multi-arch backends?:", hasBackendsMultiarch);
console.log("Has Darwin backends?:", hasBackendsDarwin);
const mergeMatrix = computeMergeMatrix(filtered);
const hasMerges = mergeMatrix.include.length > 0 ? 'true' : 'false';
const mergeMatrixMultiarch = computeMergeMatrix(multiarch);
const mergeMatrixSinglearch = computeMergeMatrix(singlearch);
const hasMergesMultiarch = mergeMatrixMultiarch.include.length > 0 ? 'true' : 'false';
const hasMergesSinglearch = mergeMatrixSinglearch.include.length > 0 ? 'true' : 'false';
fs.appendFileSync(process.env.GITHUB_OUTPUT, `run-all=false\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-singlearch=${hasBackendsSinglearch}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-multiarch=${hasBackendsMultiarch}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-darwin=${hasBackendsDarwin}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-merges=${hasMerges}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-merges-multiarch=${hasMergesMultiarch}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-merges-singlearch=${hasMergesSinglearch}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-singlearch=${JSON.stringify({ include: singlearch })}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-multiarch=${JSON.stringify({ include: multiarch })}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-darwin=${JSON.stringify({ include: filteredDarwin })}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `merge-matrix=${JSON.stringify(mergeMatrix)}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `merge-matrix-multiarch=${JSON.stringify(mergeMatrixMultiarch)}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `merge-matrix-singlearch=${JSON.stringify(mergeMatrixSinglearch)}\n`);
// Per-backend boolean outputs
for (const [backend, pathPrefix] of allBackendPaths) {