ci: pilot per-arch split + manifest merge for faster-whisper and llama-cpp-quantization (#9727)

ci: pilot per-arch split for faster-whisper and llama-cpp-quantization Convert two backends from QEMU-emulated multi-arch (linux/amd64,linux/arm64 on a single ubuntu-latest) to native per-arch + manifest-list merge: - amd64 leg on ubuntu-latest - arm64 leg on ubuntu-24.04-arm (native, ~5-10x faster than emulated) - merge job assembles both digests under the final tag via docker buildx imagetools create Backends piloted: - -cpu-faster-whisper (small Python, fast baseline) - -cpu-llama-cpp-quantization (heavier compile path, stress test) Infrastructure changes that the rest of Phase 2 (Tasks 2.5+) will reuse: - .github/backend-matrix.yml entries gain a `platform-tag` field ('amd64'/'arm64') for matrix entries that participate in the split. Other entries omit it; backend_build.yml already defaults missing values to '' (empty cache key suffix preserved as cache<suffix>-). - backend.yml + backend_pr.yml forward `platform-tag` from matrix to the reusable backend_build.yml. - scripts/changed-backends.js groups filtered entries by tag-suffix and emits a `merge-matrix` (plus `has-merges`) for groups of size>=2. Singletons aren't merged. - backend.yml + backend_pr.yml gain a `backend-merge-jobs` job that consumes merge-matrix and calls backend_merge.yml after backend-jobs. PR variant is also event-gated so the no-op-on-PR merge job doesn't even start. The other 34 multi-arch entries are unchanged in this PR -- Task 2.5 fans out the same shape to them once the pilot is observed green. Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
2026-07-04 21:37:02 -04:00 · 2026-05-09 00:04:42 +02:00
parent 624fa946f8
commit cb68cd1cf4
4 changed files with 114 additions and 2 deletions
--- a/.github/backend-matrix.yml
+++ b/.github/backend-matrix.yml
@@ -118,7 +118,8 @@ include:
  - build-type: ''
    cuda-major-version: ""
    cuda-minor-version: ""
-    platforms: 'linux/amd64,linux/arm64'
+    platforms: 'linux/amd64'
+    platform-tag: 'amd64'
    tag-latest: 'auto'
    tag-suffix: '-cpu-faster-whisper'
    runs-on: 'ubuntu-latest'
@@ -128,6 +129,20 @@ include:
    dockerfile: "./backend/Dockerfile.python"
    context: "./"
    ubuntu-version: '2404'
+  - build-type: ''
+    cuda-major-version: ""
+    cuda-minor-version: ""
+    platforms: 'linux/arm64'
+    platform-tag: 'arm64'
+    tag-latest: 'auto'
+    tag-suffix: '-cpu-faster-whisper'
+    runs-on: 'ubuntu-24.04-arm'
+    base-image: "ubuntu:24.04"
+    skip-drivers: 'true'
+    backend: "faster-whisper"
+    dockerfile: "./backend/Dockerfile.python"
+    context: "./"
+    ubuntu-version: '2404'
  - build-type: ''
    cuda-major-version: ""
    cuda-minor-version: ""
@@ -157,7 +172,8 @@ include:
  - build-type: ''
    cuda-major-version: ""
    cuda-minor-version: ""
-    platforms: 'linux/amd64,linux/arm64'
+    platforms: 'linux/amd64'
+    platform-tag: 'amd64'
    tag-latest: 'auto'
    tag-suffix: '-cpu-llama-cpp-quantization'
    runs-on: 'ubuntu-latest'
@@ -167,6 +183,20 @@ include:
    dockerfile: "./backend/Dockerfile.python"
    context: "./"
    ubuntu-version: '2404'
+  - build-type: ''
+    cuda-major-version: ""
+    cuda-minor-version: ""
+    platforms: 'linux/arm64'
+    platform-tag: 'arm64'
+    tag-latest: 'auto'
+    tag-suffix: '-cpu-llama-cpp-quantization'
+    runs-on: 'ubuntu-24.04-arm'
+    base-image: "ubuntu:24.04"
+    skip-drivers: 'true'
+    backend: "llama-cpp-quantization"
+    dockerfile: "./backend/Dockerfile.python"
+    context: "./"
+    ubuntu-version: '2404'
  - build-type: ''
    cuda-major-version: ""
    cuda-minor-version: ""
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -34,8 +34,10 @@ jobs:
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
      matrix-darwin: ${{ steps.set-matrix.outputs.matrix-darwin }}
+      merge-matrix: ${{ steps.set-matrix.outputs['merge-matrix'] }}
      has-backends: ${{ steps.set-matrix.outputs.has-backends }}
      has-backends-darwin: ${{ steps.set-matrix.outputs.has-backends-darwin }}
+      has-merges: ${{ steps.set-matrix.outputs['has-merges'] }}
    steps:
      - name: Checkout repository
        uses: actions/checkout@v6
@@ -70,6 +72,7 @@ jobs:
      cuda-major-version: ${{ matrix.cuda-major-version }}
      cuda-minor-version: ${{ matrix.cuda-minor-version }}
      platforms: ${{ matrix.platforms }}
+      platform-tag: ${{ matrix.platform-tag || '' }}
      runs-on: ${{ matrix.runs-on }}
      base-image: ${{ matrix.base-image }}
      backend: ${{ matrix.backend }}
@@ -88,6 +91,22 @@ jobs:
      max-parallel: 8
      matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix) }}

+  backend-merge-jobs:
+    needs: [generate-matrix, backend-jobs]
+    if: needs.generate-matrix.outputs.has-merges == 'true'
+    uses: ./.github/workflows/backend_merge.yml
+    with:
+      tag-latest: ${{ matrix.tag-latest }}
+      tag-suffix: ${{ matrix.tag-suffix }}
+    secrets:
+      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
+      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
+      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
+      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
+    strategy:
+      fail-fast: false
+      matrix: ${{ fromJson(needs.generate-matrix.outputs['merge-matrix']) }}
+
  backend-jobs-darwin:
    needs: generate-matrix
    if: needs.generate-matrix.outputs.has-backends-darwin == 'true'
--- a/.github/workflows/backend_pr.yml
+++ b/.github/workflows/backend_pr.yml
@@ -13,8 +13,10 @@ jobs:
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
      matrix-darwin: ${{ steps.set-matrix.outputs.matrix-darwin }}
+      merge-matrix: ${{ steps.set-matrix.outputs['merge-matrix'] }}
      has-backends: ${{ steps.set-matrix.outputs.has-backends }}
      has-backends-darwin: ${{ steps.set-matrix.outputs.has-backends-darwin }}
+      has-merges: ${{ steps.set-matrix.outputs['has-merges'] }}
    steps:
      - name: Checkout repository
        uses: actions/checkout@v6
@@ -46,6 +48,7 @@ jobs:
      cuda-major-version: ${{ matrix.cuda-major-version }}
      cuda-minor-version: ${{ matrix.cuda-minor-version }}
      platforms: ${{ matrix.platforms }}
+      platform-tag: ${{ matrix.platform-tag || '' }}
      runs-on: ${{ matrix.runs-on }}
      base-image: ${{ matrix.base-image }}
      backend: ${{ matrix.backend }}
@@ -61,6 +64,22 @@ jobs:
      fail-fast: true
      max-parallel: 8
      matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix) }}
+  backend-merge-jobs:
+    needs: [generate-matrix, backend-jobs]
+    # backend_merge.yml's push-side steps are all gated on
+    # github.event_name != 'pull_request', so on a PR the merge job would
+    # do nothing. Skip it entirely to avoid spinning up an empty runner.
+    if: github.event_name != 'pull_request' && needs.generate-matrix.outputs.has-merges == 'true'
+    uses: ./.github/workflows/backend_merge.yml
+    with:
+      tag-latest: ${{ matrix.tag-latest }}
+      tag-suffix: ${{ matrix.tag-suffix }}
+    secrets:
+      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
+      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
+    strategy:
+      fail-fast: false
+      matrix: ${{ fromJson(needs.generate-matrix.outputs['merge-matrix']) }}
  backend-jobs-darwin:
    needs: generate-matrix
    uses: ./.github/workflows/backend_build_darwin.yml