--- name: 'build backend container images' on: push: branches: - master tags: - '*' concurrency: group: ci-backends-${{ github.head_ref || github.ref }}-${{ github.repository }} cancel-in-progress: true jobs: backend-jobs: uses: ./.github/workflows/backend_build.yml with: tag-latest: ${{ matrix.tag-latest }} tag-suffix: ${{ matrix.tag-suffix }} build-type: ${{ matrix.build-type }} cuda-major-version: ${{ matrix.cuda-major-version }} cuda-minor-version: ${{ matrix.cuda-minor-version }} platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} base-image: ${{ matrix.base-image }} backend: ${{ matrix.backend }} dockerfile: ${{ matrix.dockerfile }} skip-drivers: ${{ matrix.skip-drivers }} context: ${{ matrix.context }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} strategy: fail-fast: false #max-parallel: ${{ github.event_name != 'pull_request' && 6 || 4 }} matrix: include: # CUDA 11 builds - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-11-rerankers' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "rerankers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-11-llama-cpp' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-11-transformers' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "transformers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-11-diffusers' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" context: "./backend" # CUDA 11 additional backends - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-11-kokoro' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-11-coqui' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "coqui" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-11-bark' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "bark" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-11-chatterbox' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" context: "./backend" # CUDA 12 builds - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-rerankers' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "rerankers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-vllm' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "vllm" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-transformers' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "transformers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-diffusers' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" context: "./backend" # CUDA 12 additional backends - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-kokoro' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-coqui' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "coqui" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-bark' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "bark" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-chatterbox' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" context: "./backend" # hipblas builds - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-rerankers' runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" skip-drivers: 'false' backend: "rerankers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-llama-cpp' runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-vllm' runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" skip-drivers: 'false' backend: "vllm" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-transformers' runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" skip-drivers: 'false' backend: "transformers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-diffusers' runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" skip-drivers: 'false' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" context: "./backend" # ROCm additional backends - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-kokoro' runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" skip-drivers: 'false' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-faster-whisper' runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" skip-drivers: 'false' backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-coqui' runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" skip-drivers: 'false' backend: "coqui" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-bark' runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" skip-drivers: 'false' backend: "bark" dockerfile: "./backend/Dockerfile.python" context: "./backend" # sycl builds - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-rerankers' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" skip-drivers: 'false' backend: "rerankers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f16' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-rerankers' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" skip-drivers: 'false' backend: "rerankers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-llama-cpp' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - build-type: 'sycl_f16' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-llama-cpp' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-vllm' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" skip-drivers: 'false' backend: "vllm" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f16' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-vllm' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" skip-drivers: 'false' backend: "vllm" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-transformers' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" skip-drivers: 'false' backend: "transformers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f16' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-transformers' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" skip-drivers: 'false' backend: "transformers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-diffusers' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" skip-drivers: 'false' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" context: "./backend" # SYCL additional backends - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-kokoro' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" skip-drivers: 'false' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f16' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-kokoro' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" skip-drivers: 'false' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-faster-whisper' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" skip-drivers: 'false' backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f16' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-faster-whisper' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" skip-drivers: 'false' backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-coqui' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" skip-drivers: 'false' backend: "coqui" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f16' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-coqui' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" skip-drivers: 'false' backend: "coqui" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-bark' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" skip-drivers: 'false' backend: "bark" dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f16' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-bark' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" skip-drivers: 'false' backend: "bark" dockerfile: "./backend/Dockerfile.python" context: "./backend" # piper - build-type: '' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64,linux/arm64' tag-latest: 'auto' tag-suffix: '-piper' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "piper" dockerfile: "./backend/Dockerfile.go" context: "./" # bark-cpp - build-type: '' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-bark-cpp' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "bark-cpp" dockerfile: "./backend/Dockerfile.go" context: "./" - build-type: '' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64,linux/arm64' tag-latest: 'auto' tag-suffix: '-cpu-llama-cpp' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64-llama-cpp' base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" runs-on: 'ubuntu-24.04-arm' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - build-type: 'vulkan' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-vulkan-llama-cpp' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" # Stablediffusion-ggml - build-type: '' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-cpu-stablediffusion-ggml' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.go" context: "./" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.go" context: "./" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-11-stablediffusion-ggml' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.go" context: "./" - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-stablediffusion-ggml' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" skip-drivers: 'false' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.go" context: "./" - build-type: 'sycl_f16' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-stablediffusion-ggml' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" skip-drivers: 'false' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.go" context: "./" - build-type: 'vulkan' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-vulkan-stablediffusion-ggml' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.go" context: "./" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64-stablediffusion-ggml' base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" runs-on: 'ubuntu-24.04-arm' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.go" context: "./" # whisper - build-type: '' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64,linux/arm64' tag-latest: 'auto' tag-suffix: '-cpu-whisper' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.go" context: "./" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-whisper' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.go" context: "./" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-11-whisper' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.go" context: "./" - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-whisper' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.go" context: "./" - build-type: 'sycl_f16' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-whisper' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.go" context: "./" - build-type: 'vulkan' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-vulkan-whisper' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.go" context: "./" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64-whisper' base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" runs-on: 'ubuntu-24.04-arm' backend: "whisper" dockerfile: "./backend/Dockerfile.go" context: "./" - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-hipblas-whisper' base-image: "rocm/dev-ubuntu-22.04:6.1" runs-on: 'ubuntu-latest' skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.go" context: "./" #silero-vad - build-type: '' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64,linux/arm64' tag-latest: 'auto' tag-suffix: '-cpu-silero-vad' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "silero-vad" dockerfile: "./backend/Dockerfile.go" context: "./" # local-store - build-type: '' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64,linux/arm64' tag-latest: 'auto' tag-suffix: '-cpu-local-store' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "local-store" dockerfile: "./backend/Dockerfile.go" context: "./" # huggingface - build-type: '' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64,linux/arm64' tag-latest: 'auto' tag-suffix: '-huggingface' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' backend: "huggingface" dockerfile: "./backend/Dockerfile.go" context: "./" llama-cpp-darwin: runs-on: macOS-14 strategy: matrix: go-version: ['1.21.x'] steps: - name: Clone uses: actions/checkout@v4 with: submodules: true - name: Setup Go ${{ matrix.go-version }} uses: actions/setup-go@v5 with: go-version: ${{ matrix.go-version }} cache: false # You can test your matrix by printing the current Go version - name: Display Go version run: go version - name: Dependencies run: | brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm - name: Build llama-cpp-darwin run: | make protogen-go make build bash scripts/build-llama-cpp-darwin.sh ls -la build/darwin.tar mv build/darwin.tar build/llama-cpp.tar - name: Upload llama-cpp.tar uses: actions/upload-artifact@v4 with: name: llama-cpp-tar path: build/llama-cpp.tar llama-cpp-darwin-publish: needs: llama-cpp-darwin runs-on: ubuntu-latest steps: - name: Download llama-cpp.tar uses: actions/download-artifact@v4 with: name: llama-cpp-tar path: . - name: Install crane run: | curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz | tar -xz sudo mv crane /usr/local/bin/ - name: Log in to DockerHub run: | echo "${{ secrets.DOCKERHUB_PASSWORD }}" | crane auth login docker.io -u "${{ secrets.DOCKERHUB_USERNAME }}" --password-stdin - name: Log in to quay.io run: | echo "${{ secrets.LOCALAI_REGISTRY_PASSWORD }}" | crane auth login quay.io -u "${{ secrets.LOCALAI_REGISTRY_USERNAME }}" --password-stdin - name: Docker meta id: meta uses: docker/metadata-action@v5 with: images: | localai/localai-backends tags: | type=ref,event=branch type=semver,pattern={{raw}} type=sha flavor: | latest=auto suffix=-metal-darwin-arm64-llama-cpp,onlatest=true - name: Docker meta id: quaymeta uses: docker/metadata-action@v5 with: images: | quay.io/go-skynet/local-ai-backends tags: | type=ref,event=branch type=semver,pattern={{raw}} type=sha flavor: | latest=auto suffix=-metal-darwin-arm64-llama-cpp,onlatest=true - name: Push Docker image (DockerHub) run: | for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr ',' '\n'); do crane push llama-cpp.tar $tag done - name: Push Docker image (Quay) run: | for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do crane push llama-cpp.tar $tag done llama-cpp-darwin-x86: runs-on: macos-13 strategy: matrix: go-version: ['1.21.x'] steps: - name: Clone uses: actions/checkout@v4 with: submodules: true - name: Setup Go ${{ matrix.go-version }} uses: actions/setup-go@v5 with: go-version: ${{ matrix.go-version }} cache: false # You can test your matrix by printing the current Go version - name: Display Go version run: go version - name: Dependencies run: | brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm - name: Build llama-cpp-darwin run: | make protogen-go make build export PLATFORMARCH=darwin/amd64 bash scripts/build-llama-cpp-darwin.sh ls -la build/darwin.tar mv build/darwin.tar build/llama-cpp.tar - name: Upload llama-cpp.tar uses: actions/upload-artifact@v4 with: name: llama-cpp-tar-x86 path: build/llama-cpp.tar llama-cpp-darwin-x86-publish: needs: llama-cpp-darwin-x86 runs-on: ubuntu-latest steps: - name: Download llama-cpp.tar uses: actions/download-artifact@v4 with: name: llama-cpp-tar-x86 path: . - name: Install crane run: | curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz | tar -xz sudo mv crane /usr/local/bin/ - name: Log in to DockerHub run: | echo "${{ secrets.DOCKERHUB_PASSWORD }}" | crane auth login docker.io -u "${{ secrets.DOCKERHUB_USERNAME }}" --password-stdin - name: Log in to quay.io run: | echo "${{ secrets.LOCALAI_REGISTRY_PASSWORD }}" | crane auth login quay.io -u "${{ secrets.LOCALAI_REGISTRY_USERNAME }}" --password-stdin - name: Docker meta id: meta uses: docker/metadata-action@v5 with: images: | localai/localai-backends tags: | type=ref,event=branch type=semver,pattern={{raw}} type=sha flavor: | latest=auto suffix=-darwin-x86-llama-cpp,onlatest=true - name: Docker meta id: quaymeta uses: docker/metadata-action@v5 with: images: | quay.io/go-skynet/local-ai-backends tags: | type=ref,event=branch type=semver,pattern={{raw}} type=sha flavor: | latest=auto suffix=-darwin-x86-llama-cpp,onlatest=true - name: Push Docker image (DockerHub) run: | for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr ',' '\n'); do crane push llama-cpp.tar $tag done - name: Push Docker image (Quay) run: | for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do crane push llama-cpp.tar $tag done