mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-17 04:56:52 -04:00
ci: pilot per-arch split + manifest merge for faster-whisper and llama-cpp-quantization (#9727)
ci: pilot per-arch split for faster-whisper and llama-cpp-quantization
Convert two backends from QEMU-emulated multi-arch (linux/amd64,linux/arm64
on a single ubuntu-latest) to native per-arch + manifest-list merge:
- amd64 leg on ubuntu-latest
- arm64 leg on ubuntu-24.04-arm (native, ~5-10x faster than emulated)
- merge job assembles both digests under the final tag via
docker buildx imagetools create
Backends piloted:
- -cpu-faster-whisper (small Python, fast baseline)
- -cpu-llama-cpp-quantization (heavier compile path, stress test)
Infrastructure changes that the rest of Phase 2 (Tasks 2.5+) will reuse:
- .github/backend-matrix.yml entries gain a `platform-tag` field
('amd64'/'arm64') for matrix entries that participate in the split.
Other entries omit it; backend_build.yml already defaults missing
values to '' (empty cache key suffix preserved as cache<suffix>-).
- backend.yml + backend_pr.yml forward `platform-tag` from matrix to
the reusable backend_build.yml.
- scripts/changed-backends.js groups filtered entries by tag-suffix
and emits a `merge-matrix` (plus `has-merges`) for groups of size>=2.
Singletons aren't merged.
- backend.yml + backend_pr.yml gain a `backend-merge-jobs` job that
consumes merge-matrix and calls backend_merge.yml after backend-jobs.
PR variant is also event-gated so the no-op-on-PR merge job doesn't
even start.
The other 34 multi-arch entries are unchanged in this PR -- Task 2.5
fans out the same shape to them once the pilot is observed green.
Assisted-by: Claude:claude-opus-4-7
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
@@ -122,12 +122,51 @@ async function getChangedFilesForPush(event) {
|
||||
return res.data.files.map(f => f.filename);
|
||||
}
|
||||
|
||||
// Group filtered linux matrix entries by tag-suffix and emit a merge-matrix
|
||||
// entry for any tag-suffix that appears 2+ times. That's the trigger for
|
||||
// "this backend has multiple per-arch legs and we need a manifest list".
|
||||
// Singletons aren't merged — single-arch backends push by digest and don't
|
||||
// need a manifest list assembled across legs.
|
||||
function computeMergeMatrix(entries) {
|
||||
const groups = new Map();
|
||||
for (const item of entries) {
|
||||
if (!item['tag-suffix']) continue;
|
||||
const key = item['tag-suffix'];
|
||||
if (!groups.has(key)) groups.set(key, []);
|
||||
groups.get(key).push(item);
|
||||
}
|
||||
const include = [];
|
||||
for (const [tagSuffix, group] of groups) {
|
||||
if (group.length < 2) continue;
|
||||
// tag-latest must agree across legs — they're going to publish under
|
||||
// the same final tag, so disagreeing on whether it's also the :latest
|
||||
// tag is an authoring bug. Warn loudly so a Task 2.5 fan-out typo is
|
||||
// visible in CI logs instead of silently shipping the leg-0 value.
|
||||
const first = group[0]['tag-latest'] || '';
|
||||
for (const m of group) {
|
||||
if ((m['tag-latest'] || '') !== first) {
|
||||
console.warn(`tag-latest mismatch in group ${tagSuffix}: legs disagree (using ${first})`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
include.push({
|
||||
'tag-suffix': tagSuffix,
|
||||
'tag-latest': first,
|
||||
});
|
||||
}
|
||||
return { include };
|
||||
}
|
||||
|
||||
function emitFullMatrix() {
|
||||
const mergeMatrix = computeMergeMatrix(includes);
|
||||
const hasMerges = mergeMatrix.include.length > 0 ? 'true' : 'false';
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `run-all=true\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends=true\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-darwin=true\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-merges=${hasMerges}\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix=${JSON.stringify({ include: includes })}\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-darwin=${JSON.stringify({ include: includesDarwin })}\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `merge-matrix=${JSON.stringify(mergeMatrix)}\n`);
|
||||
for (const backend of allBackendPaths.keys()) {
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `${backend}=true\n`);
|
||||
}
|
||||
@@ -155,11 +194,16 @@ function emitFilteredMatrix(changedFiles) {
|
||||
console.log("Has backends?:", hasBackends);
|
||||
console.log("Has Darwin backends?:", hasBackendsDarwin);
|
||||
|
||||
const mergeMatrix = computeMergeMatrix(filtered);
|
||||
const hasMerges = mergeMatrix.include.length > 0 ? 'true' : 'false';
|
||||
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `run-all=false\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends=${hasBackends}\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-backends-darwin=${hasBackendsDarwin}\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `has-merges=${hasMerges}\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix=${JSON.stringify({ include: filtered })}\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `matrix-darwin=${JSON.stringify({ include: filteredDarwin })}\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `merge-matrix=${JSON.stringify(mergeMatrix)}\n`);
|
||||
|
||||
// Per-backend boolean outputs
|
||||
for (const [backend, pathPrefix] of allBackendPaths) {
|
||||
|
||||
Reference in New Issue
Block a user