Files
LocalAI/.github/workflows/backend.yml
Richard Palethorpe a3b7c3a819 ci: layered Python base images for cross-matrix dedup
The 234-entry backend matrix runs the same apt-update + GPU SDK install +
Python toolchain bootstrap into N independent registry-cache tags. Factor
that shared work out into a tier-1+2 base image (lang × accel × ubuntu ×
cuda) built once per workflow run, then consumed by every backend that
matches its tuple via BASE_IMAGE_PREBUILT.

The matrix data moves to .github/backend-matrix.yaml so backend.yml can
switch to fromJSON without duplicating the matrix. scripts/changed-backends.js
reads the data file, derives the deduplicated bases-matrix, annotates each
Python entry with the right base-image-prebuilt ref, and runs a collision
check that fails loudly if a future matrix change makes two consumers want
incompatible bases under the same tag-stem.

PR builds tag with -pr<N> so end-to-end validation lives within one PR;
master builds tag without the suffix. The base-images registry cache
parallels the existing per-matrix-entry caches.

Adding a new (accel, cuda) flavour is a backend-matrix.yaml edit; adding
a new language tier is a Dockerfile.<lang> recipe + a slim of the
consumer Dockerfile (script auto-detects via .docker/bases/).

10 distinct bases derive from the current 234 entries, replacing the
inline bootstrap that previously ran into ~10 separate cache tags.

Assisted-by: Claude:opus-4-7-1m [Claude Code]
Signed-off-by: Richard Palethorpe <io@richiejp.com>
2026-05-06 16:10:49 +01:00

199 lines
7.1 KiB
YAML

---
name: 'build backend container images'
on:
push:
branches:
- master
tags:
- '*'
concurrency:
group: ci-backends-${{ github.head_ref || github.ref }}-${{ github.repository }}
cancel-in-progress: true
jobs:
derive-bases:
if: github.repository == 'mudler/LocalAI'
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.derive.outputs.matrix }}
matrix-darwin: ${{ steps.derive.outputs.matrix-darwin }}
bases-matrix: ${{ steps.derive.outputs.bases-matrix }}
has-backends: ${{ steps.derive.outputs.has-backends }}
has-backends-darwin: ${{ steps.derive.outputs.has-backends-darwin }}
has-bases: ${{ steps.derive.outputs.has-bases }}
steps:
- uses: actions/checkout@v6
- uses: oven-sh/setup-bun@v2
- run: |
bun add js-yaml
bun add @octokit/core
- id: derive
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_EVENT_PATH: ${{ github.event_path }}
run: bun run scripts/changed-backends.js
build-bases:
needs: derive-bases
if: needs.derive-bases.outputs.has-bases == 'true'
strategy:
fail-fast: false
matrix: ${{ fromJSON(needs.derive-bases.outputs.bases-matrix) }}
uses: ./.github/workflows/base_images.yml
with:
lang: ${{ matrix.lang }}
base-image: ${{ matrix.base-image }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
ubuntu-version: ${{ matrix.ubuntu-version }}
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
tag-stem: ${{ matrix.tag-stem }}
skip-drivers: ${{ matrix.skip-drivers }}
secrets:
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
backend-jobs:
if: |
always() && github.repository == 'mudler/LocalAI' &&
(needs.build-bases.result == 'success' || needs.build-bases.result == 'skipped')
needs: [derive-bases, build-bases]
uses: ./.github/workflows/backend_build.yml
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
backend: ${{ matrix.backend }}
dockerfile: ${{ matrix.dockerfile }}
skip-drivers: ${{ matrix.skip-drivers }}
context: ${{ matrix.context }}
ubuntu-version: ${{ matrix.ubuntu-version }}
amdgpu-targets: ${{ matrix.amdgpu-targets || 'gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1151,gfx1200,gfx1201' }}
# Set by scripts/changed-backends.js for langs that have a
# .docker/bases/Dockerfile.<lang> recipe; '' otherwise (those run
# the inline bootstrap in their own Dockerfile).
base-image-prebuilt: ${{ matrix.base-image-prebuilt || '' }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
fail-fast: false
matrix: ${{ fromJSON(needs.derive-bases.outputs.matrix) }}
backend-jobs-darwin:
if: github.repository == 'mudler/LocalAI'
needs: derive-bases
uses: ./.github/workflows/backend_build_darwin.yml
strategy:
fail-fast: false
matrix: ${{ fromJSON(needs.derive-bases.outputs.matrix-darwin) }}
with:
backend: ${{ matrix.backend }}
build-type: ${{ matrix.build-type }}
go-version: "1.24.x"
tag-suffix: ${{ matrix.tag-suffix }}
lang: ${{ matrix.lang || 'python' }}
use-pip: ${{ matrix.backend == 'diffusers' }}
runs-on: "macos-latest"
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
llama-cpp-darwin:
runs-on: macos-latest
strategy:
matrix:
go-version: ['1.25.x']
steps:
- name: Clone
uses: actions/checkout@v6
with:
submodules: true
- name: Setup Go ${{ matrix.go-version }}
uses: actions/setup-go@v5
with:
go-version: ${{ matrix.go-version }}
cache: false
# You can test your matrix by printing the current Go version
- name: Display Go version
run: go version
- name: Dependencies
run: |
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
- name: Build llama-cpp-darwin
run: |
make protogen-go
make backends/llama-cpp-darwin
- name: Upload llama-cpp.tar
uses: actions/upload-artifact@v7
with:
name: llama-cpp-tar
path: backend-images/llama-cpp.tar
llama-cpp-darwin-publish:
needs: llama-cpp-darwin
if: github.event_name != 'pull_request'
runs-on: ubuntu-latest
steps:
- name: Download llama-cpp.tar
uses: actions/download-artifact@v8
with:
name: llama-cpp-tar
path: .
- name: Install crane
run: |
curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz | tar -xz
sudo mv crane /usr/local/bin/
- name: Log in to DockerHub
run: |
echo "${{ secrets.DOCKERHUB_PASSWORD }}" | crane auth login docker.io -u "${{ secrets.DOCKERHUB_USERNAME }}" --password-stdin
- name: Log in to quay.io
run: |
echo "${{ secrets.LOCALAI_REGISTRY_PASSWORD }}" | crane auth login quay.io -u "${{ secrets.LOCALAI_REGISTRY_USERNAME }}" --password-stdin
- name: Docker meta
id: meta
uses: docker/metadata-action@v6
with:
images: |
localai/localai-backends
tags: |
type=ref,event=branch
type=semver,pattern={{raw}}
type=sha
flavor: |
latest=auto
suffix=-metal-darwin-arm64-llama-cpp,onlatest=true
- name: Docker meta
id: quaymeta
uses: docker/metadata-action@v6
with:
images: |
quay.io/go-skynet/local-ai-backends
tags: |
type=ref,event=branch
type=semver,pattern={{raw}}
type=sha
flavor: |
latest=auto
suffix=-metal-darwin-arm64-llama-cpp,onlatest=true
- name: Push Docker image (DockerHub)
run: |
for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr ',' '\n'); do
crane push llama-cpp.tar $tag
done
- name: Push Docker image (Quay)
run: |
for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do
crane push llama-cpp.tar $tag
done