mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-17 04:56:52 -04:00
ci: pilot per-arch split for faster-whisper and llama-cpp-quantization
Convert two backends from QEMU-emulated multi-arch (linux/amd64,linux/arm64
on a single ubuntu-latest) to native per-arch + manifest-list merge:
- amd64 leg on ubuntu-latest
- arm64 leg on ubuntu-24.04-arm (native, ~5-10x faster than emulated)
- merge job assembles both digests under the final tag via
docker buildx imagetools create
Backends piloted:
- -cpu-faster-whisper (small Python, fast baseline)
- -cpu-llama-cpp-quantization (heavier compile path, stress test)
Infrastructure changes that the rest of Phase 2 (Tasks 2.5+) will reuse:
- .github/backend-matrix.yml entries gain a `platform-tag` field
('amd64'/'arm64') for matrix entries that participate in the split.
Other entries omit it; backend_build.yml already defaults missing
values to '' (empty cache key suffix preserved as cache<suffix>-).
- backend.yml + backend_pr.yml forward `platform-tag` from matrix to
the reusable backend_build.yml.
- scripts/changed-backends.js groups filtered entries by tag-suffix
and emits a `merge-matrix` (plus `has-merges`) for groups of size>=2.
Singletons aren't merged.
- backend.yml + backend_pr.yml gain a `backend-merge-jobs` job that
consumes merge-matrix and calls backend_merge.yml after backend-jobs.
PR variant is also event-gated so the no-op-on-PR merge job doesn't
even start.
The other 34 multi-arch entries are unchanged in this PR -- Task 2.5
fans out the same shape to them once the pilot is observed green.
Assisted-by: Claude:claude-opus-4-7
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
217 lines
7.9 KiB
YAML
217 lines
7.9 KiB
YAML
---
|
|
name: 'build backend container images'
|
|
|
|
on:
|
|
push:
|
|
branches:
|
|
- master
|
|
tags:
|
|
- '*'
|
|
schedule:
|
|
# Weekly full-matrix rebuild to pick up upstream Python wheel updates
|
|
# (torch, transformers, vllm, ...) which most backends pull unpinned.
|
|
# The DEPS_REFRESH build-arg in backend_build.yml busts the install
|
|
# layer cache on a new ISO week, but only fires when the build runs.
|
|
# Path filtering on commit-driven pushes (scripts/changed-backends.js)
|
|
# skips untouched backends, so without this cron those images would
|
|
# drift on stale wheels indefinitely. C++/Go backends with pinned
|
|
# deps cache-hit and finish fast.
|
|
#
|
|
# Schedule events have no event.ref / event.before, so the script's
|
|
# changedFiles==null fallback emits the full matrix automatically —
|
|
# no script changes needed.
|
|
- cron: '0 6 * * 0' # Sundays 06:00 UTC
|
|
workflow_dispatch:
|
|
|
|
concurrency:
|
|
group: ci-backends-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
|
cancel-in-progress: true
|
|
|
|
jobs:
|
|
generate-matrix:
|
|
if: github.repository == 'mudler/LocalAI'
|
|
runs-on: ubuntu-latest
|
|
outputs:
|
|
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
|
matrix-darwin: ${{ steps.set-matrix.outputs.matrix-darwin }}
|
|
merge-matrix: ${{ steps.set-matrix.outputs['merge-matrix'] }}
|
|
has-backends: ${{ steps.set-matrix.outputs.has-backends }}
|
|
has-backends-darwin: ${{ steps.set-matrix.outputs.has-backends-darwin }}
|
|
has-merges: ${{ steps.set-matrix.outputs['has-merges'] }}
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@v6
|
|
|
|
- name: Setup Bun
|
|
uses: oven-sh/setup-bun@v2
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
bun add js-yaml
|
|
bun add @octokit/core
|
|
|
|
# Filter the backend matrix from .github/backend-matrix.yml against the
|
|
# files changed by this push. Tag pushes set FORCE_ALL=true so the script
|
|
# falls through to the full matrix (releases must rebuild everything).
|
|
- name: Filter matrix for changed backends
|
|
id: set-matrix
|
|
env:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
GITHUB_EVENT_PATH: ${{ github.event_path }}
|
|
FORCE_ALL: ${{ startsWith(github.ref, 'refs/tags/') && 'true' || 'false' }}
|
|
run: bun run scripts/changed-backends.js
|
|
|
|
backend-jobs:
|
|
needs: generate-matrix
|
|
if: needs.generate-matrix.outputs.has-backends == 'true'
|
|
uses: ./.github/workflows/backend_build.yml
|
|
with:
|
|
tag-latest: ${{ matrix.tag-latest }}
|
|
tag-suffix: ${{ matrix.tag-suffix }}
|
|
build-type: ${{ matrix.build-type }}
|
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
|
platforms: ${{ matrix.platforms }}
|
|
platform-tag: ${{ matrix.platform-tag || '' }}
|
|
runs-on: ${{ matrix.runs-on }}
|
|
base-image: ${{ matrix.base-image }}
|
|
backend: ${{ matrix.backend }}
|
|
dockerfile: ${{ matrix.dockerfile }}
|
|
skip-drivers: ${{ matrix.skip-drivers }}
|
|
context: ${{ matrix.context }}
|
|
ubuntu-version: ${{ matrix.ubuntu-version }}
|
|
amdgpu-targets: ${{ matrix.amdgpu-targets || 'gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1151,gfx1200,gfx1201' }}
|
|
secrets:
|
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
|
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
|
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
|
strategy:
|
|
fail-fast: false
|
|
max-parallel: 8
|
|
matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix) }}
|
|
|
|
backend-merge-jobs:
|
|
needs: [generate-matrix, backend-jobs]
|
|
if: needs.generate-matrix.outputs.has-merges == 'true'
|
|
uses: ./.github/workflows/backend_merge.yml
|
|
with:
|
|
tag-latest: ${{ matrix.tag-latest }}
|
|
tag-suffix: ${{ matrix.tag-suffix }}
|
|
secrets:
|
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
|
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
|
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
|
strategy:
|
|
fail-fast: false
|
|
matrix: ${{ fromJson(needs.generate-matrix.outputs['merge-matrix']) }}
|
|
|
|
backend-jobs-darwin:
|
|
needs: generate-matrix
|
|
if: needs.generate-matrix.outputs.has-backends-darwin == 'true'
|
|
uses: ./.github/workflows/backend_build_darwin.yml
|
|
with:
|
|
backend: ${{ matrix.backend }}
|
|
build-type: ${{ matrix.build-type }}
|
|
go-version: "1.24.x"
|
|
tag-suffix: ${{ matrix.tag-suffix }}
|
|
lang: ${{ matrix.lang || 'python' }}
|
|
use-pip: ${{ matrix.backend == 'diffusers' }}
|
|
runs-on: "macos-latest"
|
|
secrets:
|
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
|
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
|
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
|
strategy:
|
|
fail-fast: false
|
|
matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix-darwin) }}
|
|
|
|
llama-cpp-darwin:
|
|
runs-on: macos-latest
|
|
strategy:
|
|
matrix:
|
|
go-version: ['1.25.x']
|
|
steps:
|
|
- name: Clone
|
|
uses: actions/checkout@v6
|
|
with:
|
|
submodules: true
|
|
- name: Setup Go ${{ matrix.go-version }}
|
|
uses: actions/setup-go@v5
|
|
with:
|
|
go-version: ${{ matrix.go-version }}
|
|
cache: false
|
|
# You can test your matrix by printing the current Go version
|
|
- name: Display Go version
|
|
run: go version
|
|
- name: Dependencies
|
|
run: |
|
|
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
|
|
- name: Build llama-cpp-darwin
|
|
run: |
|
|
make protogen-go
|
|
make backends/llama-cpp-darwin
|
|
- name: Upload llama-cpp.tar
|
|
uses: actions/upload-artifact@v7
|
|
with:
|
|
name: llama-cpp-tar
|
|
path: backend-images/llama-cpp.tar
|
|
llama-cpp-darwin-publish:
|
|
needs: llama-cpp-darwin
|
|
if: github.event_name != 'pull_request'
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: Download llama-cpp.tar
|
|
uses: actions/download-artifact@v8
|
|
with:
|
|
name: llama-cpp-tar
|
|
path: .
|
|
- name: Install crane
|
|
run: |
|
|
curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz | tar -xz
|
|
sudo mv crane /usr/local/bin/
|
|
- name: Log in to DockerHub
|
|
run: |
|
|
echo "${{ secrets.DOCKERHUB_PASSWORD }}" | crane auth login docker.io -u "${{ secrets.DOCKERHUB_USERNAME }}" --password-stdin
|
|
- name: Log in to quay.io
|
|
run: |
|
|
echo "${{ secrets.LOCALAI_REGISTRY_PASSWORD }}" | crane auth login quay.io -u "${{ secrets.LOCALAI_REGISTRY_USERNAME }}" --password-stdin
|
|
- name: Docker meta
|
|
id: meta
|
|
uses: docker/metadata-action@v6
|
|
with:
|
|
images: |
|
|
localai/localai-backends
|
|
tags: |
|
|
type=ref,event=branch
|
|
type=semver,pattern={{raw}}
|
|
type=sha
|
|
flavor: |
|
|
latest=auto
|
|
suffix=-metal-darwin-arm64-llama-cpp,onlatest=true
|
|
- name: Docker meta
|
|
id: quaymeta
|
|
uses: docker/metadata-action@v6
|
|
with:
|
|
images: |
|
|
quay.io/go-skynet/local-ai-backends
|
|
tags: |
|
|
type=ref,event=branch
|
|
type=semver,pattern={{raw}}
|
|
type=sha
|
|
flavor: |
|
|
latest=auto
|
|
suffix=-metal-darwin-arm64-llama-cpp,onlatest=true
|
|
- name: Push Docker image (DockerHub)
|
|
run: |
|
|
for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr ',' '\n'); do
|
|
crane push llama-cpp.tar $tag
|
|
done
|
|
- name: Push Docker image (Quay)
|
|
run: |
|
|
for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do
|
|
crane push llama-cpp.tar $tag
|
|
done
|