LocalAI/.github/workflows/backend.yml

---
name: 'build backend container images'

on:
  push:
    branches:
      - master
    tags:
      - '*'

concurrency:
  group: ci-backends-${{ github.head_ref || github.ref }}-${{ github.repository }}
  cancel-in-progress: true

jobs:
  backend-jobs:
    uses: ./.github/workflows/backend_build.yml
    with:
      tag-latest: ${{ matrix.tag-latest }}
      tag-suffix: ${{ matrix.tag-suffix }}
      build-type: ${{ matrix.build-type }}
      cuda-major-version: ${{ matrix.cuda-major-version }}
      cuda-minor-version: ${{ matrix.cuda-minor-version }}
      platforms: ${{ matrix.platforms }}
      runs-on: ${{ matrix.runs-on }}
      base-image: ${{ matrix.base-image }}
      backend: ${{ matrix.backend }}
      dockerfile: ${{ matrix.dockerfile }}
      skip-drivers: ${{ matrix.skip-drivers }}
      context: ${{ matrix.context }}
    secrets:
      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
    strategy:
      fail-fast: false
      #max-parallel: ${{ github.event_name != 'pull_request' && 6 || 4 }}
      matrix:
        include:
          # CUDA 11 builds
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-rerankers'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "rerankers"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-llama-cpp'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "llama-cpp"
            dockerfile: "./backend/Dockerfile.llama-cpp"
            context: "./"
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-transformers'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "transformers"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-diffusers'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "diffusers"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          # CUDA 11 additional backends
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-kokoro'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "kokoro"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "faster-whisper"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-coqui'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "coqui"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-bark'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "bark"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-chatterbox'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "chatterbox"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          # CUDA 12 builds
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-rerankers'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "rerankers"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "llama-cpp"
            dockerfile: "./backend/Dockerfile.llama-cpp"
            context: "./"
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-vllm'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "vllm"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-transformers'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "transformers"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "diffusers"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          # CUDA 12 additional backends
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-kokoro'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "kokoro"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "faster-whisper"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-coqui'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "coqui"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-bark'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "bark"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-chatterbox'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "chatterbox"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          # hipblas builds
          - build-type: 'hipblas'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-rerankers'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            skip-drivers: 'false'
            backend: "rerankers"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'hipblas'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-llama-cpp'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            skip-drivers: 'false'
            backend: "llama-cpp"
            dockerfile: "./backend/Dockerfile.llama-cpp"
            context: "./"
          - build-type: 'hipblas'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-vllm'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            skip-drivers: 'false'
            backend: "vllm"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'hipblas'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-transformers'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            skip-drivers: 'false'
            backend: "transformers"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'hipblas'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-diffusers'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            skip-drivers: 'false'
            backend: "diffusers"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          # ROCm additional backends
          - build-type: 'hipblas'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-kokoro'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            skip-drivers: 'false'
            backend: "kokoro"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'hipblas'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-faster-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            skip-drivers: 'false'
            backend: "faster-whisper"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'hipblas'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-coqui'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            skip-drivers: 'false'
            backend: "coqui"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'hipblas'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-bark'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            skip-drivers: 'false'
            backend: "bark"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          # sycl builds
          - build-type: 'sycl_f32'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-rerankers'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            skip-drivers: 'false'
            backend: "rerankers"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f16'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-rerankers'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            skip-drivers: 'false'
            backend: "rerankers"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f32'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-llama-cpp'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            skip-drivers: 'false'
            backend: "llama-cpp"
            dockerfile: "./backend/Dockerfile.llama-cpp"
            context: "./"
          - build-type: 'sycl_f16'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-llama-cpp'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            skip-drivers: 'false'
            backend: "llama-cpp"
            dockerfile: "./backend/Dockerfile.llama-cpp"
            context: "./"
          - build-type: 'sycl_f32'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-vllm'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            skip-drivers: 'false'
            backend: "vllm"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f16'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-vllm'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            skip-drivers: 'false'
            backend: "vllm"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f32'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-transformers'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            skip-drivers: 'false'
            backend: "transformers"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f16'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-transformers'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            skip-drivers: 'false'
            backend: "transformers"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f32'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-diffusers'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            skip-drivers: 'false'
            backend: "diffusers"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          # SYCL additional backends
          - build-type: 'sycl_f32'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-kokoro'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            skip-drivers: 'false'
            backend: "kokoro"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f16'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-kokoro'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            skip-drivers: 'false'
            backend: "kokoro"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f32'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-faster-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            skip-drivers: 'false'
            backend: "faster-whisper"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f16'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-faster-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            skip-drivers: 'false'
            backend: "faster-whisper"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f32'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-coqui'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            skip-drivers: 'false'
            backend: "coqui"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f16'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-coqui'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            skip-drivers: 'false'
            backend: "coqui"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f32'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-bark'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            skip-drivers: 'false'
            backend: "bark"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          - build-type: 'sycl_f16'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-bark'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            skip-drivers: 'false'
            backend: "bark"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
          # piper
          - build-type: ''
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64,linux/arm64'
            tag-latest: 'auto'
            tag-suffix: '-piper'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "piper"
            dockerfile: "./backend/Dockerfile.go"
            context: "./"
          # bark-cpp
          - build-type: ''
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-bark-cpp'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "bark-cpp"
            dockerfile: "./backend/Dockerfile.go"
            context: "./"
          - build-type: ''
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64,linux/arm64'
            tag-latest: 'auto'
            tag-suffix: '-cpu-llama-cpp'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "llama-cpp"
            dockerfile: "./backend/Dockerfile.llama-cpp"
            context: "./"
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/arm64'
            skip-drivers: 'true'
            tag-latest: 'auto'
            tag-suffix: '-nvidia-l4t-arm64-llama-cpp'
            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
            runs-on: 'ubuntu-24.04-arm'
            backend: "llama-cpp"
            dockerfile: "./backend/Dockerfile.llama-cpp"
            context: "./"
          - build-type: 'vulkan'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-vulkan-llama-cpp'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "llama-cpp"
            dockerfile: "./backend/Dockerfile.llama-cpp"
            context: "./"
          # Stablediffusion-ggml
          - build-type: ''
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-cpu-stablediffusion-ggml'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "stablediffusion-ggml"
            dockerfile: "./backend/Dockerfile.go"
            context: "./"
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "stablediffusion-ggml"
            dockerfile: "./backend/Dockerfile.go"
            context: "./"
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-stablediffusion-ggml'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "stablediffusion-ggml"
            dockerfile: "./backend/Dockerfile.go"
            context: "./"
          - build-type: 'sycl_f32'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-stablediffusion-ggml'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            skip-drivers: 'false'
            backend: "stablediffusion-ggml"
            dockerfile: "./backend/Dockerfile.go"
            context: "./"
          - build-type: 'sycl_f16'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-stablediffusion-ggml'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            skip-drivers: 'false'
            backend: "stablediffusion-ggml"
            dockerfile: "./backend/Dockerfile.go"
            context: "./"
          - build-type: 'vulkan'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-vulkan-stablediffusion-ggml'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "stablediffusion-ggml"
            dockerfile: "./backend/Dockerfile.go"
            context: "./"
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/arm64'
            skip-drivers: 'true'
            tag-latest: 'auto'
            tag-suffix: '-nvidia-l4t-arm64-stablediffusion-ggml'
            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
            runs-on: 'ubuntu-24.04-arm'
            backend: "stablediffusion-ggml"
            dockerfile: "./backend/Dockerfile.go"
            context: "./"
          # whisper
          - build-type: ''
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64,linux/arm64'
            tag-latest: 'auto'
            tag-suffix: '-cpu-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "whisper"
            dockerfile: "./backend/Dockerfile.go"
            context: "./"
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "whisper"
            dockerfile: "./backend/Dockerfile.go"
            context: "./"
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "whisper"
            dockerfile: "./backend/Dockerfile.go"
            context: "./"
          - build-type: 'sycl_f32'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            skip-drivers: 'false'
            backend: "whisper"
            dockerfile: "./backend/Dockerfile.go"
            context: "./"
          - build-type: 'sycl_f16'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            skip-drivers: 'false'
            backend: "whisper"
            dockerfile: "./backend/Dockerfile.go"
            context: "./"
          - build-type: 'vulkan'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-vulkan-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "whisper"
            dockerfile: "./backend/Dockerfile.go"
            context: "./"
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/arm64'
            skip-drivers: 'true'
            tag-latest: 'auto'
            tag-suffix: '-nvidia-l4t-arm64-whisper'
            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
            runs-on: 'ubuntu-24.04-arm'
            backend: "whisper"
            dockerfile: "./backend/Dockerfile.go"
            context: "./"
          - build-type: 'hipblas'
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-hipblas-whisper'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            runs-on: 'ubuntu-latest'
            skip-drivers: 'false'
            backend: "whisper"
            dockerfile: "./backend/Dockerfile.go"
            context: "./"
          #silero-vad
          - build-type: ''
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64,linux/arm64'
            tag-latest: 'auto'
            tag-suffix: '-cpu-silero-vad'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "silero-vad"
            dockerfile: "./backend/Dockerfile.go"
            context: "./"
          # local-store
          - build-type: ''
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64,linux/arm64'
            tag-latest: 'auto'
            tag-suffix: '-cpu-local-store'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "local-store"
            dockerfile: "./backend/Dockerfile.go"
            context: "./"
          # huggingface
          - build-type: ''
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64,linux/arm64'
            tag-latest: 'auto'
            tag-suffix: '-huggingface'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            backend: "huggingface"
            dockerfile: "./backend/Dockerfile.go"
            context: "./"
  llama-cpp-darwin:
    runs-on: macOS-14
    strategy:
      matrix:
        go-version: ['1.21.x']
    steps:
      - name: Clone
        uses: actions/checkout@v4
        with:
          submodules: true
      - name: Setup Go ${{ matrix.go-version }}
        uses: actions/setup-go@v5
        with:
          go-version: ${{ matrix.go-version }}
          cache: false
      # You can test your matrix by printing the current Go version
      - name: Display Go version
        run: go version
      - name: Dependencies
        run: |
          brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
      - name: Build llama-cpp-darwin
        run: |
          make protogen-go
          make build
          bash scripts/build-llama-cpp-darwin.sh
          ls -la build/darwin.tar
          mv build/darwin.tar build/llama-cpp.tar
      - name: Upload llama-cpp.tar
        uses: actions/upload-artifact@v4
        with:
          name: llama-cpp-tar
          path: build/llama-cpp.tar
  llama-cpp-darwin-publish:
    needs: llama-cpp-darwin
    runs-on: ubuntu-latest
    steps:
      - name: Download llama-cpp.tar
        uses: actions/download-artifact@v4
        with:
          name: llama-cpp-tar
          path: .
      - name: Install crane
        run: |
          curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz | tar -xz
          sudo mv crane /usr/local/bin/
      - name: Log in to DockerHub
        run: |
          echo "${{ secrets.DOCKERHUB_PASSWORD }}" | crane auth login docker.io -u "${{ secrets.DOCKERHUB_USERNAME }}" --password-stdin
      - name: Log in to quay.io
        run: |
          echo "${{ secrets.LOCALAI_REGISTRY_PASSWORD }}" | crane auth login quay.io -u "${{ secrets.LOCALAI_REGISTRY_USERNAME }}" --password-stdin
      - name: Docker meta
        id: meta
        uses: docker/metadata-action@v5
        with:
          images: |
            localai/localai-backends
          tags: |
            type=ref,event=branch
            type=semver,pattern={{raw}}
            type=sha
          flavor: |
            latest=auto
            suffix=-metal-darwin-arm64-llama-cpp,onlatest=true
      - name: Docker meta
        id: quaymeta
        uses: docker/metadata-action@v5
        with:
          images: |
            quay.io/go-skynet/local-ai-backends
          tags: |
            type=ref,event=branch
            type=semver,pattern={{raw}}
            type=sha
          flavor: |
            latest=auto
            suffix=-metal-darwin-arm64-llama-cpp,onlatest=true
      - name: Push Docker image (DockerHub)
        run: |
          for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr ',' '\n'); do
            crane push llama-cpp.tar $tag
          done
      - name: Push Docker image (Quay)
        run: |
          for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do
            crane push llama-cpp.tar $tag
          done
  llama-cpp-darwin-x86:
    runs-on: macos-13
    strategy:
      matrix:
        go-version: ['1.21.x']
    steps:
      - name: Clone
        uses: actions/checkout@v4
        with:
          submodules: true
      - name: Setup Go ${{ matrix.go-version }}
        uses: actions/setup-go@v5
        with:
          go-version: ${{ matrix.go-version }}
          cache: false
      # You can test your matrix by printing the current Go version
      - name: Display Go version
        run: go version
      - name: Dependencies
        run: |
          brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
      - name: Build llama-cpp-darwin
        run: |
          make protogen-go
          make build
          export PLATFORMARCH=darwin/amd64
          bash scripts/build-llama-cpp-darwin.sh
          ls -la build/darwin.tar
          mv build/darwin.tar build/llama-cpp.tar
      - name: Upload llama-cpp.tar
        uses: actions/upload-artifact@v4
        with:
          name: llama-cpp-tar-x86
          path: build/llama-cpp.tar
  llama-cpp-darwin-x86-publish:
    needs: llama-cpp-darwin-x86
    runs-on: ubuntu-latest
    steps:
      - name: Download llama-cpp.tar
        uses: actions/download-artifact@v4
        with:
          name: llama-cpp-tar-x86
          path: .
      - name: Install crane
        run: |
          curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz | tar -xz
          sudo mv crane /usr/local/bin/
      - name: Log in to DockerHub
        run: |
          echo "${{ secrets.DOCKERHUB_PASSWORD }}" | crane auth login docker.io -u "${{ secrets.DOCKERHUB_USERNAME }}" --password-stdin
      - name: Log in to quay.io
        run: |
          echo "${{ secrets.LOCALAI_REGISTRY_PASSWORD }}" | crane auth login quay.io -u "${{ secrets.LOCALAI_REGISTRY_USERNAME }}" --password-stdin
      - name: Docker meta
        id: meta
        uses: docker/metadata-action@v5
        with:
          images: |
            localai/localai-backends
          tags: |
            type=ref,event=branch
            type=semver,pattern={{raw}}
            type=sha
          flavor: |
            latest=auto
            suffix=-darwin-x86-llama-cpp,onlatest=true
      - name: Docker meta
        id: quaymeta
        uses: docker/metadata-action@v5
        with:
          images: |
            quay.io/go-skynet/local-ai-backends
          tags: |
            type=ref,event=branch
            type=semver,pattern={{raw}}
            type=sha
          flavor: |
            latest=auto
            suffix=-darwin-x86-llama-cpp,onlatest=true
      - name: Push Docker image (DockerHub)
        run: |
          for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr ',' '\n'); do
            crane push llama-cpp.tar $tag
          done
      - name: Push Docker image (Quay)
        run: |
          for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do
            crane push llama-cpp.tar $tag
          done