ci: pilot per-arch split + manifest merge for faster-whisper and llama-cpp-quantization (#9727)

ci: pilot per-arch split for faster-whisper and llama-cpp-quantization Convert two backends from QEMU-emulated multi-arch (linux/amd64,linux/arm64 on a single ubuntu-latest) to native per-arch + manifest-list merge: - amd64 leg on ubuntu-latest - arm64 leg on ubuntu-24.04-arm (native, ~5-10x faster than emulated) - merge job assembles both digests under the final tag via docker buildx imagetools create Backends piloted: - -cpu-faster-whisper (small Python, fast baseline) - -cpu-llama-cpp-quantization (heavier compile path, stress test) Infrastructure changes that the rest of Phase 2 (Tasks 2.5+) will reuse: - .github/backend-matrix.yml entries gain a `platform-tag` field ('amd64'/'arm64') for matrix entries that participate in the split. Other entries omit it; backend_build.yml already defaults missing values to '' (empty cache key suffix preserved as cache<suffix>-). - backend.yml + backend_pr.yml forward `platform-tag` from matrix to the reusable backend_build.yml. - scripts/changed-backends.js groups filtered entries by tag-suffix and emits a `merge-matrix` (plus `has-merges`) for groups of size>=2. Singletons aren't merged. - backend.yml + backend_pr.yml gain a `backend-merge-jobs` job that consumes merge-matrix and calls backend_merge.yml after backend-jobs. PR variant is also event-gated so the no-op-on-PR merge job doesn't even start. The other 34 multi-arch entries are unchanged in this PR -- Task 2.5 fans out the same shape to them once the pilot is observed green. Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
2026-05-17 04:56:52 -04:00 · 2026-05-09 00:04:42 +02:00
parent 624fa946f8
commit cb68cd1cf4
4 changed files with 114 additions and 2 deletions
--- a/scripts/changed-backends.js
+++ b/scripts/changed-backends.js
@@ -122,12 +122,51 @@ async function getChangedFilesForPush(event) {
  return res.data.files.map(f => f.filename);
 }

+// Group filtered linux matrix entries by tag-suffix and emit a merge-matrix
+// entry for any tag-suffix that appears 2+ times. That's the trigger for
+// "this backend has multiple per-arch legs and we need a manifest list".
+// Singletons aren't merged — single-arch backends push by digest and don't
+// need a manifest list assembled across legs.
+function computeMergeMatrix(entries) {
+  const groups = new Map();
+  for (const item of entries) {
+    if (!item['tag-suffix']) continue;
+    const key = item['tag-suffix'];
+    if (!groups.has(key)) groups.set(key, []);
+    groups.get(key).push(item);
+  }
+  const include = [];
+  for (const [tagSuffix, group] of groups) {
+    if (group.length < 2) continue;
+    // tag-latest must agree across legs — they're going to publish under
+    // the same final tag, so disagreeing on whether it's also the :latest
+    // tag is an authoring bug. Warn loudly so a Task 2.5 fan-out typo is
+    // visible in CI logs instead of silently shipping the leg-0 value.
+    const first = group[0]['tag-latest'] || '';
+    for (const m of group) {
+      if ((m['tag-latest'] || '') !== first) {
+        console.warn(`tag-latest mismatch in group ${tagSuffix}: legs disagree (using ${first})`);
+        break;
+      }
+    }
+    include.push({
+      'tag-suffix': tagSuffix,
+      'tag-latest': first,
+    });
+  }
+  return { include };
+}
+
 function emitFullMatrix() {
+  const mergeMatrix = computeMergeMatrix(includes);
+  const hasMerges = mergeMatrix.include.length > 0 ? 'true' : 'false';
  fs.appendFileSync(process.env.GITHUB_OUTPUT, `run-all=true\n`);
  fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends=true\n`);
  fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-darwin=true\n`);
+  fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-merges=${hasMerges}\n`);
  fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix=${JSON.stringify({ include: includes })}\n`);
  fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-darwin=${JSON.stringify({ include: includesDarwin })}\n`);
+  fs.appendFileSync(process.env.GITHUB_OUTPUT, `merge-matrix=${JSON.stringify(mergeMatrix)}\n`);
  for (const backend of allBackendPaths.keys()) {
    fs.appendFileSync(process.env.GITHUB_OUTPUT, `${backend}=true\n`);
  }
@@ -155,11 +194,16 @@ function emitFilteredMatrix(changedFiles) {
  console.log("Has backends?:", hasBackends);
  console.log("Has Darwin backends?:", hasBackendsDarwin);

+  const mergeMatrix = computeMergeMatrix(filtered);
+  const hasMerges = mergeMatrix.include.length > 0 ? 'true' : 'false';
+
  fs.appendFileSync(process.env.GITHUB_OUTPUT, `run-all=false\n`);
  fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends=${hasBackends}\n`);
  fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-darwin=${hasBackendsDarwin}\n`);
+  fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-merges=${hasMerges}\n`);
  fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix=${JSON.stringify({ include: filtered })}\n`);
  fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-darwin=${JSON.stringify({ include: filteredDarwin })}\n`);
+  fs.appendFileSync(process.env.GITHUB_OUTPUT, `merge-matrix=${JSON.stringify(mergeMatrix)}\n`);

  // Per-backend boolean outputs
  for (const [backend, pathPrefix] of allBackendPaths) {