LocalAI/.github/workflows/backend_build.yml

---
name: 'build backend container images (reusable)'

on:
  workflow_call:
    inputs:
      base-image:
        description: 'Base image'
        required: true
        type: string
      build-type:
        description: 'Build type'
        default: ''
        type: string
      cuda-major-version:
        description: 'CUDA major version'
        default: "12"
        type: string
      cuda-minor-version:
        description: 'CUDA minor version'
        default: "1"
        type: string
      platforms:
        description: 'Platforms'
        default: ''
        type: string
      platform-tag:
        description: |
          Short tag identifying the platform leg, e.g. "amd64" or "arm64".
          Used to scope the per-arch registry cache and the digest artifact name.
          Required for split-and-merge multi-arch builds; pass "amd64" for
          single-arch amd64 builds too. Optional (default '') during the
          migration to per-arch matrix expansion; will be flipped to
          required: true in Phase 6 once all callers pass an explicit value.
        required: false
        default: ''
        type: string
      tag-latest:
        description: 'Tag latest'
        default: ''
        type: string
      tag-suffix:
        description: 'Tag suffix'
        default: ''
        type: string
      runs-on:
        description: 'Runs on'
        required: true
        default: ''
        type: string
      backend:
        description: 'Backend to build'
        required: true
        type: string
      context:
        description: 'Build context'
        required: true
        type: string
      dockerfile:
        description: 'Build Dockerfile'
        required: true
        type: string
      skip-drivers:
        description: 'Skip drivers'
        default: 'false'
        type: string
      ubuntu-version:
        description: 'Ubuntu version'
        required: false
        default: '2204'
        type: string
      amdgpu-targets:
        description: 'AMD GPU targets for ROCm/HIP builds'
        required: false
        default: ''
        type: string
      builder-base-image:
        description: |
          Pre-built builder base image (e.g. quay.io/go-skynet/ci-cache:base-grpc-cuda-13-amd64).
          When set, the variant Dockerfile uses its `builder-prebuilt` stage which FROMs this
          image directly instead of running its own gRPC stage + apt installs. Empty for
          backends whose Dockerfile doesn't support a prebuilt base.
        required: false
        default: ''
        type: string
    secrets:
      dockerUsername:
        required: false
      dockerPassword:
        required: false
      quayUsername:
        required: true
      quayPassword:
        required: true

jobs:
  backend-build:
    runs-on: ${{ inputs.runs-on }}
    env:
        quay_username: ${{ secrets.quayUsername }}
    steps:

      - name: Checkout
        uses: actions/checkout@v6
        with:
          submodules: true

      - name: Configure apt mirror on runner
        id: apt_mirror
        uses: ./.github/actions/configure-apt-mirror

      - name: Free disk space
        uses: ./.github/actions/free-disk-space
        with:
          mode: ${{ inputs.runs-on == 'ubuntu-latest' && 'hosted' || 'skip' }}

      - name: Set up build disk
        uses: ./.github/actions/setup-build-disk

      - name: Docker meta
        id: meta
        if: github.event_name != 'pull_request'
        uses: docker/metadata-action@v6
        with:
          images: |
            quay.io/go-skynet/local-ai-backends
            localai/localai-backends
          tags: |
            type=ref,event=branch
            type=semver,pattern={{raw}}
            type=sha
          flavor: |
            latest=${{ inputs.tag-latest }}
            suffix=${{ inputs.tag-suffix }},onlatest=true

      - name: Docker meta for PR
        id: meta_pull_request
        if: github.event_name == 'pull_request'
        uses: docker/metadata-action@v6
        with:
          images: |
            quay.io/go-skynet/ci-tests
          tags: |
            type=ref,event=branch,suffix=${{ github.event.number }}-${{ inputs.backend }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
            type=semver,pattern={{raw}},suffix=${{ github.event.number }}-${{ inputs.backend }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
            type=sha,suffix=${{ github.event.number }}-${{ inputs.backend }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
          flavor: |
            latest=${{ inputs.tag-latest }}
            suffix=${{ inputs.tag-suffix }},onlatest=true
## End testing image
      - name: Set up QEMU
        uses: docker/setup-qemu-action@master
        with:
          platforms: all

      - name: Set up Docker Buildx
        id: buildx
        uses: docker/setup-buildx-action@master

      - name: Login to DockerHub
        if: github.event_name != 'pull_request'
        uses: docker/login-action@v4
        with:
          username: ${{ secrets.dockerUsername }}
          password: ${{ secrets.dockerPassword }}

      - name: Login to Quay.io
        if: ${{ env.quay_username != '' }}
        uses: docker/login-action@v4
        with:
          registry: quay.io
          username: ${{ secrets.quayUsername }}
          password: ${{ secrets.quayPassword }}

      # Weekly cache-buster for the per-backend `make` step. Most Python
      # backends list unpinned deps (torch, transformers, vllm, ...), so a
      # warm cache freezes upstream versions indefinitely. Rolling this
      # weekly forces a re-resolve of the install layer at most once per
      # week, picking up newer wheels without a full cold rebuild.
      - name: Compute deps refresh key
        id: deps_refresh
        run: echo "key=$(date -u +%Y-W%V)" >> "$GITHUB_OUTPUT"

      - name: Build and push by digest
        id: build
        uses: docker/build-push-action@v7
        if: github.event_name != 'pull_request'
        with:
          builder: ${{ steps.buildx.outputs.name }}
          build-args: |
            BUILD_TYPE=${{ inputs.build-type }}
            SKIP_DRIVERS=${{ inputs.skip-drivers }}
            CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
            CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
            BASE_IMAGE=${{ inputs.base-image }}
            BACKEND=${{ inputs.backend }}
            UBUNTU_VERSION=${{ inputs.ubuntu-version }}
            AMDGPU_TARGETS=${{ inputs.amdgpu-targets }}
            APT_MIRROR=${{ steps.apt_mirror.outputs.effective-mirror }}
            APT_PORTS_MIRROR=${{ steps.apt_mirror.outputs.effective-ports-mirror }}
            DEPS_REFRESH=${{ steps.deps_refresh.outputs.key }}
            BUILDER_BASE_IMAGE=${{ inputs.builder-base-image }}
            BUILDER_TARGET=${{ inputs.builder-base-image != '' && 'builder-prebuilt' || 'builder-fromsource' }}
          context: ${{ inputs.context }}
          file: ${{ inputs.dockerfile }}
          cache-from: type=registry,ref=quay.io/go-skynet/ci-cache:cache${{ inputs.tag-suffix }}-${{ inputs.platform-tag }}
          cache-to: type=registry,ref=quay.io/go-skynet/ci-cache:cache${{ inputs.tag-suffix }}-${{ inputs.platform-tag }},mode=max,ignore-error=true
          platforms: ${{ inputs.platforms }}
          outputs: |
            type=image,name=quay.io/go-skynet/local-ai-backends,push-by-digest=true,name-canonical=true,push=true
            type=image,name=localai/localai-backends,push-by-digest=true,name-canonical=true,push=true
          # Disable provenance: with mode=max (the default for push:true)
          # buildx bundles a per-registry attestation manifest into each
          # registry's manifest list, which makes the resulting list digest
          # diverge across registries. steps.build.outputs.digest then
          # only matches one of them, and the merge job's
          # `imagetools create <reg>@sha256:<digest>` lookup fails on the
          # other. Disabling provenance keeps the digest content-only and
          # identical across both registries — required for digest-based
          # cross-registry merge.
          provenance: false
          labels: ${{ steps.meta.outputs.labels }}

      - name: Export digest
        if: github.event_name != 'pull_request'
        run: |
          mkdir -p /tmp/digests
          digest="${{ steps.build.outputs.digest }}"
          touch "/tmp/digests/${digest#sha256:}"

      # See .github/scripts/anchor-digest-in-cache.sh for why this is needed
      # and how it interacts with backend_merge.yml's cleanup step.
      - name: Anchor digest in ci-cache so quay GC won't reap before merge
        if: github.event_name != 'pull_request'
        env:
          TAG_SUFFIX: ${{ inputs.tag-suffix }}
          PLATFORM_TAG: ${{ inputs.platform-tag || 'single' }}
          DIGEST: ${{ steps.build.outputs.digest }}
        run: .github/scripts/anchor-digest-in-cache.sh

      # Artifact name uses a `--` separator between tag-suffix and platform-tag
      # to avoid prefix collisions during the merge job's pattern-based download.
      # Tag-suffixes are not prefix-disjoint (e.g. -gpu-nvidia-cuda-12-vllm is a
      # prefix of -gpu-nvidia-cuda-12-vllm-omni); a single `-` separator plus the
      # merge-side `digests<tag-suffix>-*` glob would let one merge over-match
      # the other backend's artifacts. The `-single` placeholder for empty
      # platform-tag (single-arch entries) keeps the artifact name non-trailing.
      - name: Upload digest artifact
        if: github.event_name != 'pull_request'
        uses: actions/upload-artifact@v7
        with:
          name: digests${{ inputs.tag-suffix }}--${{ inputs.platform-tag || 'single' }}
          path: /tmp/digests/*
          if-no-files-found: error
          retention-days: 1

      - name: Build (PR)
        uses: docker/build-push-action@v7
        if: github.event_name == 'pull_request'
        with:
          builder: ${{ steps.buildx.outputs.name }}
          build-args: |
            BUILD_TYPE=${{ inputs.build-type }}
            SKIP_DRIVERS=${{ inputs.skip-drivers }}
            CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
            CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
            BASE_IMAGE=${{ inputs.base-image }}
            BACKEND=${{ inputs.backend }}
            UBUNTU_VERSION=${{ inputs.ubuntu-version }}
            AMDGPU_TARGETS=${{ inputs.amdgpu-targets }}
            APT_MIRROR=${{ steps.apt_mirror.outputs.effective-mirror }}
            APT_PORTS_MIRROR=${{ steps.apt_mirror.outputs.effective-ports-mirror }}
            DEPS_REFRESH=${{ steps.deps_refresh.outputs.key }}
            BUILDER_BASE_IMAGE=${{ inputs.builder-base-image }}
            BUILDER_TARGET=${{ inputs.builder-base-image != '' && 'builder-prebuilt' || 'builder-fromsource' }}
          context: ${{ inputs.context }}
          file: ${{ inputs.dockerfile }}
          cache-from: type=registry,ref=quay.io/go-skynet/ci-cache:cache${{ inputs.tag-suffix }}-${{ inputs.platform-tag }}
          platforms: ${{ inputs.platforms }}
          push: ${{ env.quay_username != '' }}
          tags: ${{ steps.meta_pull_request.outputs.tags }}
          labels: ${{ steps.meta_pull_request.outputs.labels }}


      - name: job summary
        run: |
          echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY