From 002f75ac79499cb33ff8ef38c9fbd1863157f15c Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Sun, 28 Sep 2025 18:23:46 +0200
Subject: [PATCH] ci(tests): drop me. Test vulkan build

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/workflows/backend.yml | 1697 +++++++++++----------------------
 1 file changed, 563 insertions(+), 1134 deletions(-)

diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index 393f094e6..39b5506d5 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -2,6 +2,7 @@
 name: 'build backend container images'
 
 on:
+  pull_request:
   push:
     branches:
       - master
@@ -38,568 +39,568 @@ jobs:
       #max-parallel: ${{ github.event_name != 'pull_request' && 6 || 4 }}
       matrix:
         include:
-          # CUDA 11 builds
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-rerankers'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "rerankers"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-llama-cpp'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "llama-cpp"
-            dockerfile: "./backend/Dockerfile.llama-cpp"
-            context: "./"
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-transformers'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "transformers"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-diffusers'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "diffusers"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'l4t'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/arm64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-l4t-diffusers'
-            runs-on: 'ubuntu-24.04-arm'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
-            skip-drivers: 'true'
-            backend: "diffusers"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: ''
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-cpu-diffusers'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'true'
-            backend: "diffusers"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: ''
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-cpu-chatterbox'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'true'
-            backend: "chatterbox"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          # CUDA 11 additional backends
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-kokoro'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "kokoro"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "faster-whisper"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-coqui'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "coqui"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-bark'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "bark"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-chatterbox'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "chatterbox"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          # CUDA 12 builds
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-rerankers'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "rerankers"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "llama-cpp"
-            dockerfile: "./backend/Dockerfile.llama-cpp"
-            context: "./"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-vllm'
-            runs-on: 'arc-runner-set'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "vllm"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-transformers'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "transformers"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "diffusers"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          # CUDA 12 additional backends
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-kokoro'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "kokoro"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "faster-whisper"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-coqui'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "coqui"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-bark'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "bark"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-chatterbox'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "chatterbox"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          # hipblas builds
-          - build-type: 'hipblas'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-rocm-hipblas-rerankers'
-            runs-on: 'ubuntu-latest'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
-            skip-drivers: 'false'
-            backend: "rerankers"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'hipblas'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-rocm-hipblas-llama-cpp'
-            runs-on: 'ubuntu-latest'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
-            skip-drivers: 'false'
-            backend: "llama-cpp"
-            dockerfile: "./backend/Dockerfile.llama-cpp"
-            context: "./"
-          - build-type: 'hipblas'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-rocm-hipblas-vllm'
-            runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
-            skip-drivers: 'false'
-            backend: "vllm"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'hipblas'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-rocm-hipblas-transformers'
-            runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
-            skip-drivers: 'false'
-            backend: "transformers"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'hipblas'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-rocm-hipblas-diffusers'
-            runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
-            skip-drivers: 'false'
-            backend: "diffusers"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          # ROCm additional backends
-          - build-type: 'hipblas'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-rocm-hipblas-kokoro'
-            runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
-            skip-drivers: 'false'
-            backend: "kokoro"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'hipblas'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-rocm-hipblas-faster-whisper'
-            runs-on: 'ubuntu-latest'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
-            skip-drivers: 'false'
-            backend: "faster-whisper"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'hipblas'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-rocm-hipblas-coqui'
-            runs-on: 'ubuntu-latest'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
-            skip-drivers: 'false'
-            backend: "coqui"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'hipblas'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-rocm-hipblas-bark'
-            runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
-            skip-drivers: 'false'
-            backend: "bark"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          # sycl builds
-          - build-type: 'intel'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-intel-rerankers'
-            runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            skip-drivers: 'false'
-            backend: "rerankers"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'sycl_f32'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-intel-sycl-f32-llama-cpp'
-            runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            skip-drivers: 'false'
-            backend: "llama-cpp"
-            dockerfile: "./backend/Dockerfile.llama-cpp"
-            context: "./"
-          - build-type: 'sycl_f16'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-intel-sycl-f16-llama-cpp'
-            runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            skip-drivers: 'false'
-            backend: "llama-cpp"
-            dockerfile: "./backend/Dockerfile.llama-cpp"
-            context: "./"
-          - build-type: 'intel'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-intel-vllm'
-            runs-on: 'arc-runner-set'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            skip-drivers: 'false'
-            backend: "vllm"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'intel'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-intel-transformers'
-            runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            skip-drivers: 'false'
-            backend: "transformers"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'intel'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-intel-diffusers'
-            runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            skip-drivers: 'false'
-            backend: "diffusers"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'l4t'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/arm64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-l4t-kokoro'
-            runs-on: 'ubuntu-24.04-arm'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
-            skip-drivers: 'true'
-            backend: "kokoro"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          # SYCL additional backends
-          - build-type: 'intel'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-intel-kokoro'
-            runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            skip-drivers: 'false'
-            backend: "kokoro"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'intel'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-intel-faster-whisper'
-            runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            skip-drivers: 'false'
-            backend: "faster-whisper"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'intel'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-intel-coqui'
-            runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            skip-drivers: 'false'
-            backend: "coqui"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'intel'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-intel-bark'
-            runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            skip-drivers: 'false'
-            backend: "bark"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          # piper
-          - build-type: ''
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64,linux/arm64'
-            tag-latest: 'auto'
-            tag-suffix: '-piper'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "piper"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
-          # bark-cpp
-          - build-type: ''
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-bark-cpp'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "bark-cpp"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
-          - build-type: ''
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64,linux/arm64'
-            tag-latest: 'auto'
-            tag-suffix: '-cpu-llama-cpp'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "llama-cpp"
-            dockerfile: "./backend/Dockerfile.llama-cpp"
-            context: "./"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/arm64'
-            skip-drivers: 'true'
-            tag-latest: 'auto'
-            tag-suffix: '-nvidia-l4t-arm64-llama-cpp'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
-            runs-on: 'ubuntu-24.04-arm'
-            backend: "llama-cpp"
-            dockerfile: "./backend/Dockerfile.llama-cpp"
-            context: "./"
+          # # CUDA 11 builds
+          # - build-type: 'cublas'
+          #   cuda-major-version: "11"
+          #   cuda-minor-version: "7"
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-nvidia-cuda-11-rerankers'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "ubuntu:22.04"
+          #   skip-drivers: 'false'
+          #   backend: "rerankers"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'cublas'
+          #   cuda-major-version: "11"
+          #   cuda-minor-version: "7"
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-nvidia-cuda-11-llama-cpp'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "ubuntu:22.04"
+          #   skip-drivers: 'false'
+          #   backend: "llama-cpp"
+          #   dockerfile: "./backend/Dockerfile.llama-cpp"
+          #   context: "./"
+          # - build-type: 'cublas'
+          #   cuda-major-version: "11"
+          #   cuda-minor-version: "7"
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-nvidia-cuda-11-transformers'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "ubuntu:22.04"
+          #   skip-drivers: 'false'
+          #   backend: "transformers"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'cublas'
+          #   cuda-major-version: "11"
+          #   cuda-minor-version: "7"
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-nvidia-cuda-11-diffusers'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "ubuntu:22.04"
+          #   skip-drivers: 'false'
+          #   backend: "diffusers"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'l4t'
+          #   cuda-major-version: "12"
+          #   cuda-minor-version: "0"
+          #   platforms: 'linux/arm64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-nvidia-l4t-diffusers'
+          #   runs-on: 'ubuntu-24.04-arm'
+          #   base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+          #   skip-drivers: 'true'
+          #   backend: "diffusers"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: ''
+          #   cuda-major-version: ""
+          #   cuda-minor-version: ""
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-cpu-diffusers'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "ubuntu:22.04"
+          #   skip-drivers: 'true'
+          #   backend: "diffusers"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: ''
+          #   cuda-major-version: ""
+          #   cuda-minor-version: ""
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-cpu-chatterbox'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "ubuntu:22.04"
+          #   skip-drivers: 'true'
+          #   backend: "chatterbox"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # # CUDA 11 additional backends
+          # - build-type: 'cublas'
+          #   cuda-major-version: "11"
+          #   cuda-minor-version: "7"
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-nvidia-cuda-11-kokoro'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "ubuntu:22.04"
+          #   skip-drivers: 'false'
+          #   backend: "kokoro"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'cublas'
+          #   cuda-major-version: "11"
+          #   cuda-minor-version: "7"
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "ubuntu:22.04"
+          #   skip-drivers: 'false'
+          #   backend: "faster-whisper"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'cublas'
+          #   cuda-major-version: "11"
+          #   cuda-minor-version: "7"
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-nvidia-cuda-11-coqui'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "ubuntu:22.04"
+          #   skip-drivers: 'false'
+          #   backend: "coqui"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'cublas'
+          #   cuda-major-version: "11"
+          #   cuda-minor-version: "7"
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-nvidia-cuda-11-bark'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "ubuntu:22.04"
+          #   skip-drivers: 'false'
+          #   backend: "bark"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'cublas'
+          #   cuda-major-version: "11"
+          #   cuda-minor-version: "7"
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-nvidia-cuda-11-chatterbox'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "ubuntu:22.04"
+          #   skip-drivers: 'false'
+          #   backend: "chatterbox"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # # CUDA 12 builds
+          # - build-type: 'cublas'
+          #   cuda-major-version: "12"
+          #   cuda-minor-version: "0"
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-nvidia-cuda-12-rerankers'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "ubuntu:22.04"
+          #   skip-drivers: 'false'
+          #   backend: "rerankers"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'cublas'
+          #   cuda-major-version: "12"
+          #   cuda-minor-version: "0"
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "ubuntu:22.04"
+          #   skip-drivers: 'false'
+          #   backend: "llama-cpp"
+          #   dockerfile: "./backend/Dockerfile.llama-cpp"
+          #   context: "./"
+          # - build-type: 'cublas'
+          #   cuda-major-version: "12"
+          #   cuda-minor-version: "0"
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-nvidia-cuda-12-vllm'
+          #   runs-on: 'arc-runner-set'
+          #   base-image: "ubuntu:22.04"
+          #   skip-drivers: 'false'
+          #   backend: "vllm"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'cublas'
+          #   cuda-major-version: "12"
+          #   cuda-minor-version: "0"
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-nvidia-cuda-12-transformers'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "ubuntu:22.04"
+          #   skip-drivers: 'false'
+          #   backend: "transformers"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'cublas'
+          #   cuda-major-version: "12"
+          #   cuda-minor-version: "0"
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "ubuntu:22.04"
+          #   skip-drivers: 'false'
+          #   backend: "diffusers"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # # CUDA 12 additional backends
+          # - build-type: 'cublas'
+          #   cuda-major-version: "12"
+          #   cuda-minor-version: "0"
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-nvidia-cuda-12-kokoro'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "ubuntu:22.04"
+          #   skip-drivers: 'false'
+          #   backend: "kokoro"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'cublas'
+          #   cuda-major-version: "12"
+          #   cuda-minor-version: "0"
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "ubuntu:22.04"
+          #   skip-drivers: 'false'
+          #   backend: "faster-whisper"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'cublas'
+          #   cuda-major-version: "12"
+          #   cuda-minor-version: "0"
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-nvidia-cuda-12-coqui'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "ubuntu:22.04"
+          #   skip-drivers: 'false'
+          #   backend: "coqui"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'cublas'
+          #   cuda-major-version: "12"
+          #   cuda-minor-version: "0"
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-nvidia-cuda-12-bark'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "ubuntu:22.04"
+          #   skip-drivers: 'false'
+          #   backend: "bark"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'cublas'
+          #   cuda-major-version: "12"
+          #   cuda-minor-version: "0"
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-nvidia-cuda-12-chatterbox'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "ubuntu:22.04"
+          #   skip-drivers: 'false'
+          #   backend: "chatterbox"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # # hipblas builds
+          # - build-type: 'hipblas'
+          #   cuda-major-version: ""
+          #   cuda-minor-version: ""
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-rocm-hipblas-rerankers'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+          #   skip-drivers: 'false'
+          #   backend: "rerankers"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'hipblas'
+          #   cuda-major-version: ""
+          #   cuda-minor-version: ""
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-rocm-hipblas-llama-cpp'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+          #   skip-drivers: 'false'
+          #   backend: "llama-cpp"
+          #   dockerfile: "./backend/Dockerfile.llama-cpp"
+          #   context: "./"
+          # - build-type: 'hipblas'
+          #   cuda-major-version: ""
+          #   cuda-minor-version: ""
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-rocm-hipblas-vllm'
+          #   runs-on: 'arc-runner-set'
+          #   base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+          #   skip-drivers: 'false'
+          #   backend: "vllm"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'hipblas'
+          #   cuda-major-version: ""
+          #   cuda-minor-version: ""
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-rocm-hipblas-transformers'
+          #   runs-on: 'arc-runner-set'
+          #   base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+          #   skip-drivers: 'false'
+          #   backend: "transformers"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'hipblas'
+          #   cuda-major-version: ""
+          #   cuda-minor-version: ""
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-rocm-hipblas-diffusers'
+          #   runs-on: 'arc-runner-set'
+          #   base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+          #   skip-drivers: 'false'
+          #   backend: "diffusers"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # # ROCm additional backends
+          # - build-type: 'hipblas'
+          #   cuda-major-version: ""
+          #   cuda-minor-version: ""
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-rocm-hipblas-kokoro'
+          #   runs-on: 'arc-runner-set'
+          #   base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+          #   skip-drivers: 'false'
+          #   backend: "kokoro"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'hipblas'
+          #   cuda-major-version: ""
+          #   cuda-minor-version: ""
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-rocm-hipblas-faster-whisper'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+          #   skip-drivers: 'false'
+          #   backend: "faster-whisper"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'hipblas'
+          #   cuda-major-version: ""
+          #   cuda-minor-version: ""
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-rocm-hipblas-coqui'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+          #   skip-drivers: 'false'
+          #   backend: "coqui"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'hipblas'
+          #   cuda-major-version: ""
+          #   cuda-minor-version: ""
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-rocm-hipblas-bark'
+          #   runs-on: 'arc-runner-set'
+          #   base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+          #   skip-drivers: 'false'
+          #   backend: "bark"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # # sycl builds
+          # - build-type: 'intel'
+          #   cuda-major-version: ""
+          #   cuda-minor-version: ""
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-intel-rerankers'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+          #   skip-drivers: 'false'
+          #   backend: "rerankers"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'sycl_f32'
+          #   cuda-major-version: ""
+          #   cuda-minor-version: ""
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-intel-sycl-f32-llama-cpp'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+          #   skip-drivers: 'false'
+          #   backend: "llama-cpp"
+          #   dockerfile: "./backend/Dockerfile.llama-cpp"
+          #   context: "./"
+          # - build-type: 'sycl_f16'
+          #   cuda-major-version: ""
+          #   cuda-minor-version: ""
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-intel-sycl-f16-llama-cpp'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+          #   skip-drivers: 'false'
+          #   backend: "llama-cpp"
+          #   dockerfile: "./backend/Dockerfile.llama-cpp"
+          #   context: "./"
+          # - build-type: 'intel'
+          #   cuda-major-version: ""
+          #   cuda-minor-version: ""
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-intel-vllm'
+          #   runs-on: 'arc-runner-set'
+          #   base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+          #   skip-drivers: 'false'
+          #   backend: "vllm"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'intel'
+          #   cuda-major-version: ""
+          #   cuda-minor-version: ""
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-intel-transformers'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+          #   skip-drivers: 'false'
+          #   backend: "transformers"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'intel'
+          #   cuda-major-version: ""
+          #   cuda-minor-version: ""
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-intel-diffusers'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+          #   skip-drivers: 'false'
+          #   backend: "diffusers"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'l4t'
+          #   cuda-major-version: "12"
+          #   cuda-minor-version: "0"
+          #   platforms: 'linux/arm64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-nvidia-l4t-kokoro'
+          #   runs-on: 'ubuntu-24.04-arm'
+          #   base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+          #   skip-drivers: 'true'
+          #   backend: "kokoro"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # # SYCL additional backends
+          # - build-type: 'intel'
+          #   cuda-major-version: ""
+          #   cuda-minor-version: ""
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-intel-kokoro'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+          #   skip-drivers: 'false'
+          #   backend: "kokoro"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'intel'
+          #   cuda-major-version: ""
+          #   cuda-minor-version: ""
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-intel-faster-whisper'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+          #   skip-drivers: 'false'
+          #   backend: "faster-whisper"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'intel'
+          #   cuda-major-version: ""
+          #   cuda-minor-version: ""
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-intel-coqui'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+          #   skip-drivers: 'false'
+          #   backend: "coqui"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # - build-type: 'intel'
+          #   cuda-major-version: ""
+          #   cuda-minor-version: ""
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-gpu-intel-bark'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+          #   skip-drivers: 'false'
+          #   backend: "bark"
+          #   dockerfile: "./backend/Dockerfile.python"
+          #   context: "./backend"
+          # # piper
+          # - build-type: ''
+          #   cuda-major-version: ""
+          #   cuda-minor-version: ""
+          #   platforms: 'linux/amd64,linux/arm64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-piper'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "ubuntu:22.04"
+          #   skip-drivers: 'false'
+          #   backend: "piper"
+          #   dockerfile: "./backend/Dockerfile.golang"
+          #   context: "./"
+          # # bark-cpp
+          # - build-type: ''
+          #   cuda-major-version: ""
+          #   cuda-minor-version: ""
+          #   platforms: 'linux/amd64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-bark-cpp'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "ubuntu:22.04"
+          #   skip-drivers: 'false'
+          #   backend: "bark-cpp"
+          #   dockerfile: "./backend/Dockerfile.golang"
+          #   context: "./"
+          # - build-type: ''
+          #   cuda-major-version: ""
+          #   cuda-minor-version: ""
+          #   platforms: 'linux/amd64,linux/arm64'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-cpu-llama-cpp'
+          #   runs-on: 'ubuntu-latest'
+          #   base-image: "ubuntu:22.04"
+          #   skip-drivers: 'false'
+          #   backend: "llama-cpp"
+          #   dockerfile: "./backend/Dockerfile.llama-cpp"
+          #   context: "./"
+          # - build-type: 'cublas'
+          #   cuda-major-version: "12"
+          #   cuda-minor-version: "0"
+          #   platforms: 'linux/arm64'
+          #   skip-drivers: 'true'
+          #   tag-latest: 'auto'
+          #   tag-suffix: '-nvidia-l4t-arm64-llama-cpp'
+          #   base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+          #   runs-on: 'ubuntu-24.04-arm'
+          #   backend: "llama-cpp"
+          #   dockerfile: "./backend/Dockerfile.llama-cpp"
+          #   context: "./"
           - build-type: 'vulkan'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -612,67 +613,6 @@ jobs:
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-          # Stablediffusion-ggml
-          - build-type: ''
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-cpu-stablediffusion-ggml'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "stablediffusion-ggml"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "stablediffusion-ggml"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-stablediffusion-ggml'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "stablediffusion-ggml"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
-          - build-type: 'sycl_f32'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-intel-sycl-f32-stablediffusion-ggml'
-            runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            skip-drivers: 'false'
-            backend: "stablediffusion-ggml"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
-          - build-type: 'sycl_f16'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-intel-sycl-f16-stablediffusion-ggml'
-            runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            skip-drivers: 'false'
-            backend: "stablediffusion-ggml"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
           - build-type: 'vulkan'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -685,79 +625,6 @@ jobs:
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/arm64'
-            skip-drivers: 'true'
-            tag-latest: 'auto'
-            tag-suffix: '-nvidia-l4t-arm64-stablediffusion-ggml'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
-            runs-on: 'ubuntu-24.04-arm'
-            backend: "stablediffusion-ggml"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
-          # whisper
-          - build-type: ''
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64,linux/arm64'
-            tag-latest: 'auto'
-            tag-suffix: '-cpu-whisper'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "whisper"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-whisper'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "whisper"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-whisper'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "whisper"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
-          - build-type: 'sycl_f32'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-intel-sycl-f32-whisper'
-            runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            skip-drivers: 'false'
-            backend: "whisper"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
-          - build-type: 'sycl_f16'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-intel-sycl-f16-whisper'
-            runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            skip-drivers: 'false'
-            backend: "whisper"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
           - build-type: 'vulkan'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -770,441 +637,3 @@ jobs:
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/arm64'
-            skip-drivers: 'true'
-            tag-latest: 'auto'
-            tag-suffix: '-nvidia-l4t-arm64-whisper'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
-            runs-on: 'ubuntu-24.04-arm'
-            backend: "whisper"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
-          - build-type: 'hipblas'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-rocm-hipblas-whisper'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
-            runs-on: 'ubuntu-latest'
-            skip-drivers: 'false'
-            backend: "whisper"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
-          #silero-vad
-          - build-type: ''
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64,linux/arm64'
-            tag-latest: 'auto'
-            tag-suffix: '-cpu-silero-vad'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "silero-vad"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
-          # local-store
-          - build-type: ''
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64,linux/arm64'
-            tag-latest: 'auto'
-            tag-suffix: '-cpu-local-store'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "local-store"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
-          # huggingface
-          - build-type: ''
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64,linux/arm64'
-            tag-latest: 'auto'
-            tag-suffix: '-huggingface'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "huggingface"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
-          # rfdetr
-          - build-type: ''
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64,linux/arm64'
-            tag-latest: 'auto'
-            tag-suffix: '-cpu-rfdetr'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "rfdetr"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-rfdetr'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "rfdetr"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-rfdetr'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "rfdetr"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'intel'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-intel-rfdetr'
-            runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            skip-drivers: 'false'
-            backend: "rfdetr"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'l4t'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/arm64'
-            skip-drivers: 'true'
-            tag-latest: 'auto'
-            tag-suffix: '-nvidia-l4t-arm64-rfdetr'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
-            runs-on: 'ubuntu-24.04-arm'
-            backend: "rfdetr"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          # exllama2
-          - build-type: ''
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-cpu-exllama2'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "exllama2"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-exllama2'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "exllama2"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-exllama2'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "exllama2"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'intel'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-intel-exllama2'
-            runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            skip-drivers: 'false'
-            backend: "exllama2"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'hipblas'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            skip-drivers: 'true'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-hipblas-exllama2'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
-            runs-on: 'ubuntu-latest'
-            backend: "exllama2"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          - build-type: 'l4t'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/arm64'
-            skip-drivers: 'true'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-l4t-arm64-chatterbox'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
-            runs-on: 'ubuntu-24.04-arm'
-            backend: "chatterbox"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-          # runs out of space on the runner
-          # - build-type: 'hipblas'
-          #   cuda-major-version: ""
-          #   cuda-minor-version: ""
-          #   platforms: 'linux/amd64'
-          #   tag-latest: 'auto'
-          #   tag-suffix: '-gpu-hipblas-rfdetr'
-          #   base-image: "rocm/dev-ubuntu-22.04:6.4.3"
-          #   runs-on: 'ubuntu-latest'
-          #   skip-drivers: 'false'
-          #   backend: "rfdetr"
-          #   dockerfile: "./backend/Dockerfile.python"
-          #   context: "./backend"
-          # kitten-tts
-          - build-type: ''
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64,linux/arm64'
-            tag-latest: 'auto'
-            tag-suffix: '-kitten-tts'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "kitten-tts"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-  backend-jobs-darwin:
-    uses: ./.github/workflows/backend_build_darwin.yml
-    strategy:
-      matrix:
-        include:
-          - backend: "diffusers"
-            tag-suffix: "-metal-darwin-arm64-diffusers"
-            build-type: "mps"
-          - backend: "mlx"
-            tag-suffix: "-metal-darwin-arm64-mlx"
-            build-type: "mps"
-          - backend: "chatterbox"
-            tag-suffix: "-metal-darwin-arm64-chatterbox"
-            build-type: "mps"
-          - backend: "mlx-vlm"
-            tag-suffix: "-metal-darwin-arm64-mlx-vlm"
-            build-type: "mps"
-          - backend: "mlx-audio"
-            tag-suffix: "-metal-darwin-arm64-mlx-audio"
-            build-type: "mps"
-          - backend: "stablediffusion-ggml"
-            tag-suffix: "-metal-darwin-arm64-stablediffusion-ggml"
-            build-type: "metal"
-            lang: "go"
-          - backend: "whisper"
-            tag-suffix: "-metal-darwin-arm64-whisper"
-            build-type: "metal"
-            lang: "go"
-    with:
-      backend: ${{ matrix.backend }}
-      build-type: ${{ matrix.build-type }}
-      go-version: "1.24.x"
-      tag-suffix: ${{ matrix.tag-suffix }}
-      lang: ${{ matrix.lang || 'python' }}
-      use-pip: ${{ matrix.backend == 'diffusers' }}
-      runs-on: "macOS-14"
-    secrets:
-      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
-      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
-      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
-      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
-  llama-cpp-darwin:
-    runs-on: macOS-14
-    strategy:
-      matrix:
-        go-version: ['1.21.x']
-    steps:
-      - name: Clone
-        uses: actions/checkout@v5
-        with:
-          submodules: true
-      - name: Setup Go ${{ matrix.go-version }}
-        uses: actions/setup-go@v5
-        with:
-          go-version: ${{ matrix.go-version }}
-          cache: false
-      # You can test your matrix by printing the current Go version
-      - name: Display Go version
-        run: go version
-      - name: Dependencies
-        run: |
-          brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
-      - name: Build llama-cpp-darwin
-        run: |
-          make protogen-go
-          make backends/llama-cpp-darwin
-      - name: Upload llama-cpp.tar
-        uses: actions/upload-artifact@v4
-        with:
-          name: llama-cpp-tar
-          path: backend-images/llama-cpp.tar
-  llama-cpp-darwin-publish:
-    needs: llama-cpp-darwin
-    if: github.event_name != 'pull_request'
-    runs-on: ubuntu-latest
-    steps:
-      - name: Download llama-cpp.tar
-        uses: actions/download-artifact@v5
-        with:
-          name: llama-cpp-tar
-          path: .
-      - name: Install crane
-        run: |
-          curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz | tar -xz
-          sudo mv crane /usr/local/bin/
-      - name: Log in to DockerHub
-        run: |
-          echo "${{ secrets.DOCKERHUB_PASSWORD }}" | crane auth login docker.io -u "${{ secrets.DOCKERHUB_USERNAME }}" --password-stdin
-      - name: Log in to quay.io
-        run: |
-          echo "${{ secrets.LOCALAI_REGISTRY_PASSWORD }}" | crane auth login quay.io -u "${{ secrets.LOCALAI_REGISTRY_USERNAME }}" --password-stdin
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            localai/localai-backends
-          tags: |
-            type=ref,event=branch
-            type=semver,pattern={{raw}}
-            type=sha
-          flavor: |
-            latest=auto
-            suffix=-metal-darwin-arm64-llama-cpp,onlatest=true
-      - name: Docker meta
-        id: quaymeta
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            quay.io/go-skynet/local-ai-backends
-          tags: |
-            type=ref,event=branch
-            type=semver,pattern={{raw}}
-            type=sha
-          flavor: |
-            latest=auto
-            suffix=-metal-darwin-arm64-llama-cpp,onlatest=true
-      - name: Push Docker image (DockerHub)
-        run: |
-          for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr ',' '\n'); do
-            crane push llama-cpp.tar $tag
-          done
-      - name: Push Docker image (Quay)
-        run: |
-          for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do
-            crane push llama-cpp.tar $tag
-          done
-  llama-cpp-darwin-x86:
-    runs-on: macos-13
-    strategy:
-      matrix:
-        go-version: ['1.21.x']
-    steps:
-      - name: Clone
-        uses: actions/checkout@v5
-        with:
-          submodules: true
-      - name: Setup Go ${{ matrix.go-version }}
-        uses: actions/setup-go@v5
-        with:
-          go-version: ${{ matrix.go-version }}
-          cache: false
-      # You can test your matrix by printing the current Go version
-      - name: Display Go version
-        run: go version
-      - name: Dependencies
-        run: |
-          brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
-      - name: Build llama-cpp-darwin
-        run: |
-          make protogen-go
-          make build
-          export PLATFORMARCH=darwin/amd64
-          make backends/llama-cpp-darwin
-      - name: Upload llama-cpp.tar
-        uses: actions/upload-artifact@v4
-        with:
-          name: llama-cpp-tar-x86
-          path: backend-images/llama-cpp.tar
-  llama-cpp-darwin-x86-publish:
-    if: github.event_name != 'pull_request'
-    needs: llama-cpp-darwin-x86
-    runs-on: ubuntu-latest
-    steps:
-      - name: Download llama-cpp.tar
-        uses: actions/download-artifact@v5
-        with:
-          name: llama-cpp-tar-x86
-          path: .
-      - name: Install crane
-        run: |
-          curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz | tar -xz
-          sudo mv crane /usr/local/bin/
-      - name: Log in to DockerHub
-        run: |
-          echo "${{ secrets.DOCKERHUB_PASSWORD }}" | crane auth login docker.io -u "${{ secrets.DOCKERHUB_USERNAME }}" --password-stdin
-      - name: Log in to quay.io
-        run: |
-          echo "${{ secrets.LOCALAI_REGISTRY_PASSWORD }}" | crane auth login quay.io -u "${{ secrets.LOCALAI_REGISTRY_USERNAME }}" --password-stdin
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            localai/localai-backends
-          tags: |
-            type=ref,event=branch
-            type=semver,pattern={{raw}}
-            type=sha
-          flavor: |
-            latest=auto
-            suffix=-darwin-x86-llama-cpp,onlatest=true
-      - name: Docker meta
-        id: quaymeta
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            quay.io/go-skynet/local-ai-backends
-          tags: |
-            type=ref,event=branch
-            type=semver,pattern={{raw}}
-            type=sha
-          flavor: |
-            latest=auto
-            suffix=-darwin-x86-llama-cpp,onlatest=true
-      - name: Push Docker image (DockerHub)
-        run: |
-          for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr ',' '\n'); do
-            crane push llama-cpp.tar $tag
-          done
-      - name: Push Docker image (Quay)
-        run: |
-          for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do
-            crane push llama-cpp.tar $tag
-          done