ci: pilot per-arch split + manifest merge for faster-whisper and llama-cpp-quantization (#9727)

ci: pilot per-arch split for faster-whisper and llama-cpp-quantization

Convert two backends from QEMU-emulated multi-arch (linux/amd64,linux/arm64
on a single ubuntu-latest) to native per-arch + manifest-list merge:
- amd64 leg on ubuntu-latest
- arm64 leg on ubuntu-24.04-arm (native, ~5-10x faster than emulated)
- merge job assembles both digests under the final tag via
  docker buildx imagetools create

Backends piloted:
- -cpu-faster-whisper (small Python, fast baseline)
- -cpu-llama-cpp-quantization (heavier compile path, stress test)

Infrastructure changes that the rest of Phase 2 (Tasks 2.5+) will reuse:
- .github/backend-matrix.yml entries gain a `platform-tag` field
  ('amd64'/'arm64') for matrix entries that participate in the split.
  Other entries omit it; backend_build.yml already defaults missing
  values to '' (empty cache key suffix preserved as cache<suffix>-).
- backend.yml + backend_pr.yml forward `platform-tag` from matrix to
  the reusable backend_build.yml.
- scripts/changed-backends.js groups filtered entries by tag-suffix
  and emits a `merge-matrix` (plus `has-merges`) for groups of size>=2.
  Singletons aren't merged.
- backend.yml + backend_pr.yml gain a `backend-merge-jobs` job that
  consumes merge-matrix and calls backend_merge.yml after backend-jobs.
  PR variant is also event-gated so the no-op-on-PR merge job doesn't
  even start.

The other 34 multi-arch entries are unchanged in this PR -- Task 2.5
fans out the same shape to them once the pilot is observed green.

Assisted-by: Claude:claude-opus-4-7

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
LocalAI [bot]
2026-05-09 00:04:42 +02:00
committed by GitHub
parent 624fa946f8
commit cb68cd1cf4
4 changed files with 114 additions and 2 deletions

View File

@@ -122,12 +122,51 @@ async function getChangedFilesForPush(event) {
return res.data.files.map(f => f.filename);
}
// Group filtered linux matrix entries by tag-suffix and emit a merge-matrix
// entry for any tag-suffix that appears 2+ times. That's the trigger for
// "this backend has multiple per-arch legs and we need a manifest list".
// Singletons aren't merged — single-arch backends push by digest and don't
// need a manifest list assembled across legs.
function computeMergeMatrix(entries) {
const groups = new Map();
for (const item of entries) {
if (!item['tag-suffix']) continue;
const key = item['tag-suffix'];
if (!groups.has(key)) groups.set(key, []);
groups.get(key).push(item);
}
const include = [];
for (const [tagSuffix, group] of groups) {
if (group.length < 2) continue;
// tag-latest must agree across legs — they're going to publish under
// the same final tag, so disagreeing on whether it's also the :latest
// tag is an authoring bug. Warn loudly so a Task 2.5 fan-out typo is
// visible in CI logs instead of silently shipping the leg-0 value.
const first = group[0]['tag-latest'] || '';
for (const m of group) {
if ((m['tag-latest'] || '') !== first) {
console.warn(`tag-latest mismatch in group ${tagSuffix}: legs disagree (using ${first})`);
break;
}
}
include.push({
'tag-suffix': tagSuffix,
'tag-latest': first,
});
}
return { include };
}
function emitFullMatrix() {
const mergeMatrix = computeMergeMatrix(includes);
const hasMerges = mergeMatrix.include.length > 0 ? 'true' : 'false';
fs.appendFileSync(process.env.GITHUB_OUTPUT, `run-all=true\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends=true\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-darwin=true\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-merges=${hasMerges}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix=${JSON.stringify({ include: includes })}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-darwin=${JSON.stringify({ include: includesDarwin })}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `merge-matrix=${JSON.stringify(mergeMatrix)}\n`);
for (const backend of allBackendPaths.keys()) {
fs.appendFileSync(process.env.GITHUB_OUTPUT, `${backend}=true\n`);
}
@@ -155,11 +194,16 @@ function emitFilteredMatrix(changedFiles) {
console.log("Has backends?:", hasBackends);
console.log("Has Darwin backends?:", hasBackendsDarwin);
const mergeMatrix = computeMergeMatrix(filtered);
const hasMerges = mergeMatrix.include.length > 0 ? 'true' : 'false';
fs.appendFileSync(process.env.GITHUB_OUTPUT, `run-all=false\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends=${hasBackends}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-darwin=${hasBackendsDarwin}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-merges=${hasMerges}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix=${JSON.stringify({ include: filtered })}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-darwin=${JSON.stringify({ include: filteredDarwin })}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `merge-matrix=${JSON.stringify(mergeMatrix)}\n`);
// Per-backend boolean outputs
for (const [backend, pathPrefix] of allBackendPaths) {