mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-17 04:56:52 -04:00
ci: pilot per-arch split + manifest merge for faster-whisper and llama-cpp-quantization (#9727)
ci: pilot per-arch split for faster-whisper and llama-cpp-quantization
Convert two backends from QEMU-emulated multi-arch (linux/amd64,linux/arm64
on a single ubuntu-latest) to native per-arch + manifest-list merge:
- amd64 leg on ubuntu-latest
- arm64 leg on ubuntu-24.04-arm (native, ~5-10x faster than emulated)
- merge job assembles both digests under the final tag via
docker buildx imagetools create
Backends piloted:
- -cpu-faster-whisper (small Python, fast baseline)
- -cpu-llama-cpp-quantization (heavier compile path, stress test)
Infrastructure changes that the rest of Phase 2 (Tasks 2.5+) will reuse:
- .github/backend-matrix.yml entries gain a `platform-tag` field
('amd64'/'arm64') for matrix entries that participate in the split.
Other entries omit it; backend_build.yml already defaults missing
values to '' (empty cache key suffix preserved as cache<suffix>-).
- backend.yml + backend_pr.yml forward `platform-tag` from matrix to
the reusable backend_build.yml.
- scripts/changed-backends.js groups filtered entries by tag-suffix
and emits a `merge-matrix` (plus `has-merges`) for groups of size>=2.
Singletons aren't merged.
- backend.yml + backend_pr.yml gain a `backend-merge-jobs` job that
consumes merge-matrix and calls backend_merge.yml after backend-jobs.
PR variant is also event-gated so the no-op-on-PR merge job doesn't
even start.
The other 34 multi-arch entries are unchanged in this PR -- Task 2.5
fans out the same shape to them once the pilot is observed green.
Assisted-by: Claude:claude-opus-4-7
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
34
.github/backend-matrix.yml
vendored
34
.github/backend-matrix.yml
vendored
@@ -118,7 +118,8 @@ include:
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64,linux/arm64'
|
||||
platforms: 'linux/amd64'
|
||||
platform-tag: 'amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cpu-faster-whisper'
|
||||
runs-on: 'ubuntu-latest'
|
||||
@@ -128,6 +129,20 @@ include:
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/arm64'
|
||||
platform-tag: 'arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cpu-faster-whisper'
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'true'
|
||||
backend: "faster-whisper"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
@@ -157,7 +172,8 @@ include:
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64,linux/arm64'
|
||||
platforms: 'linux/amd64'
|
||||
platform-tag: 'amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cpu-llama-cpp-quantization'
|
||||
runs-on: 'ubuntu-latest'
|
||||
@@ -167,6 +183,20 @@ include:
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/arm64'
|
||||
platform-tag: 'arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cpu-llama-cpp-quantization'
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'true'
|
||||
backend: "llama-cpp-quantization"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
|
||||
19
.github/workflows/backend.yml
vendored
19
.github/workflows/backend.yml
vendored
@@ -34,8 +34,10 @@ jobs:
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
matrix-darwin: ${{ steps.set-matrix.outputs.matrix-darwin }}
|
||||
merge-matrix: ${{ steps.set-matrix.outputs['merge-matrix'] }}
|
||||
has-backends: ${{ steps.set-matrix.outputs.has-backends }}
|
||||
has-backends-darwin: ${{ steps.set-matrix.outputs.has-backends-darwin }}
|
||||
has-merges: ${{ steps.set-matrix.outputs['has-merges'] }}
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v6
|
||||
@@ -70,6 +72,7 @@ jobs:
|
||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||
platforms: ${{ matrix.platforms }}
|
||||
platform-tag: ${{ matrix.platform-tag || '' }}
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
base-image: ${{ matrix.base-image }}
|
||||
backend: ${{ matrix.backend }}
|
||||
@@ -88,6 +91,22 @@ jobs:
|
||||
max-parallel: 8
|
||||
matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix) }}
|
||||
|
||||
backend-merge-jobs:
|
||||
needs: [generate-matrix, backend-jobs]
|
||||
if: needs.generate-matrix.outputs.has-merges == 'true'
|
||||
uses: ./.github/workflows/backend_merge.yml
|
||||
with:
|
||||
tag-latest: ${{ matrix.tag-latest }}
|
||||
tag-suffix: ${{ matrix.tag-suffix }}
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix: ${{ fromJson(needs.generate-matrix.outputs['merge-matrix']) }}
|
||||
|
||||
backend-jobs-darwin:
|
||||
needs: generate-matrix
|
||||
if: needs.generate-matrix.outputs.has-backends-darwin == 'true'
|
||||
|
||||
19
.github/workflows/backend_pr.yml
vendored
19
.github/workflows/backend_pr.yml
vendored
@@ -13,8 +13,10 @@ jobs:
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
matrix-darwin: ${{ steps.set-matrix.outputs.matrix-darwin }}
|
||||
merge-matrix: ${{ steps.set-matrix.outputs['merge-matrix'] }}
|
||||
has-backends: ${{ steps.set-matrix.outputs.has-backends }}
|
||||
has-backends-darwin: ${{ steps.set-matrix.outputs.has-backends-darwin }}
|
||||
has-merges: ${{ steps.set-matrix.outputs['has-merges'] }}
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v6
|
||||
@@ -46,6 +48,7 @@ jobs:
|
||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||
platforms: ${{ matrix.platforms }}
|
||||
platform-tag: ${{ matrix.platform-tag || '' }}
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
base-image: ${{ matrix.base-image }}
|
||||
backend: ${{ matrix.backend }}
|
||||
@@ -61,6 +64,22 @@ jobs:
|
||||
fail-fast: true
|
||||
max-parallel: 8
|
||||
matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix) }}
|
||||
backend-merge-jobs:
|
||||
needs: [generate-matrix, backend-jobs]
|
||||
# backend_merge.yml's push-side steps are all gated on
|
||||
# github.event_name != 'pull_request', so on a PR the merge job would
|
||||
# do nothing. Skip it entirely to avoid spinning up an empty runner.
|
||||
if: github.event_name != 'pull_request' && needs.generate-matrix.outputs.has-merges == 'true'
|
||||
uses: ./.github/workflows/backend_merge.yml
|
||||
with:
|
||||
tag-latest: ${{ matrix.tag-latest }}
|
||||
tag-suffix: ${{ matrix.tag-suffix }}
|
||||
secrets:
|
||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix: ${{ fromJson(needs.generate-matrix.outputs['merge-matrix']) }}
|
||||
backend-jobs-darwin:
|
||||
needs: generate-matrix
|
||||
uses: ./.github/workflows/backend_build_darwin.yml
|
||||
|
||||
Reference in New Issue
Block a user