WIP

Revert "feat(nvidia-gpu): bump images to cuda 12.8" (#6303 )
Revert "feat(nvidia-gpu): bump images to cuda 12.8 (#6239)" This reverts commit d9e25af7b5.
2026-02-03 03:02:38 -05:00 · 2025-09-17 21:52:53 +02:00 · 2025-09-17 19:31:43 +02:00
6 changed files with 214 additions and 26 deletions
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -89,7 +89,7 @@ jobs:
            context: "./backend"
          - build-type: 'l4t'
            cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
            platforms: 'linux/arm64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-l4t-diffusers'
@@ -187,7 +187,7 @@ jobs:
          # CUDA 12 builds
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-rerankers'
@@ -197,9 +197,21 @@ jobs:
            backend: "rerankers"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-rerankers'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "rerankers"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp'
@@ -209,9 +221,21 @@ jobs:
            backend: "llama-cpp"
            dockerfile: "./backend/Dockerfile.llama-cpp"
            context: "./"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-llama-cpp'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "llama-cpp"
+            dockerfile: "./backend/Dockerfile.llama-cpp"
+            context: "./"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-vllm'
@@ -221,9 +245,21 @@ jobs:
            backend: "vllm"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-vllm'
+            runs-on: 'arc-runner-set'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "vllm"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-transformers'
@@ -233,9 +269,21 @@ jobs:
            backend: "transformers"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-transformers'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "transformers"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
@@ -245,10 +293,22 @@ jobs:
            backend: "diffusers"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
-          # CUDA 12 additional backends
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-diffusers'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "diffusers"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
+          # CUDA additional backends
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-kokoro'
@@ -258,9 +318,21 @@ jobs:
            backend: "kokoro"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-kokoro'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "kokoro"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper'
@@ -270,9 +342,21 @@ jobs:
            backend: "faster-whisper"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-faster-whisper'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "faster-whisper"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-coqui'
@@ -282,9 +366,21 @@ jobs:
            backend: "coqui"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-coqui'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "coqui"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-bark'
@@ -294,9 +390,21 @@ jobs:
            backend: "bark"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-bark'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "bark"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-chatterbox'
@@ -306,6 +414,18 @@ jobs:
            backend: "chatterbox"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-chatterbox'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "chatterbox"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
          # hipblas builds
          - build-type: 'hipblas'
            cuda-major-version: ""
@@ -578,7 +698,7 @@ jobs:
            context: "./"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
            platforms: 'linux/arm64'
            skip-drivers: 'true'
            tag-latest: 'auto'
@@ -615,7 +735,7 @@ jobs:
            context: "./"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
@@ -625,6 +745,18 @@ jobs:
            backend: "stablediffusion-ggml"
            dockerfile: "./backend/Dockerfile.golang"
            context: "./"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-stablediffusion-ggml'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "stablediffusion-ggml"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
@@ -675,7 +807,7 @@ jobs:
            context: "./"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
            platforms: 'linux/arm64'
            skip-drivers: 'true'
            tag-latest: 'auto'
@@ -700,7 +832,19 @@ jobs:
            context: "./"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-12-whisper'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "whisper"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-whisper'
@@ -760,7 +904,7 @@ jobs:
            context: "./"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
            platforms: 'linux/arm64'
            skip-drivers: 'true'
            tag-latest: 'auto'
@@ -836,7 +980,19 @@ jobs:
            context: "./backend"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-12-rfdetr'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "rfdetr"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-rfdetr'
@@ -872,7 +1028,7 @@ jobs:
            context: "./backend"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
            platforms: 'linux/arm64'
            skip-drivers: 'true'
            tag-latest: 'auto'
@@ -897,7 +1053,19 @@ jobs:
            context: "./backend"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-12-exllama2'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "exllama2"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-exllama2'
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -34,9 +34,18 @@ jobs:
      fail-fast: false
      matrix:
        include:
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: '-gpu-nvidia-cuda-13'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-gpu-nvidia-cuda-12'
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -91,7 +91,7 @@ jobs:
            aio: "-aio-gpu-nvidia-cuda-11"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12'
@@ -100,6 +100,17 @@ jobs:
            skip-drivers: 'false'
            makeflags: "--jobs=4 --output-sync=target"
            aio: "-aio-gpu-nvidia-cuda-12"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            makeflags: "--jobs=4 --output-sync=target"
+            aio: "-aio-gpu-nvidia-cuda-13"
          - build-type: 'vulkan'
            platforms: 'linux/amd64'
            tag-latest: 'auto'
@@ -144,7 +155,7 @@ jobs:
        include:
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
            platforms: 'linux/arm64'
            tag-latest: 'auto'
            tag-suffix: '-nvidia-l4t-arm64'
--- a/2
+++ b/2
@@ -18,7 +18,7 @@ FROM requirements AS requirements-drivers

 ARG BUILD_TYPE
 ARG CUDA_MAJOR_VERSION=12
-ARG CUDA_MINOR_VERSION=8
+ARG CUDA_MINOR_VERSION=0
 ARG SKIP_DRIVERS=false
 ARG TARGETARCH
 ARG TARGETVARIANT
--- a/2
+++ b/2
@@ -170,7 +170,7 @@ prepare-e2e:
 	mkdir -p $(TEST_DIR)
 	cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
 	test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
-	docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=8 -t localai-tests .
+	docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 -t localai-tests .

 run-e2e-image:
 	ls -liah $(abspath ./tests/e2e-fixtures)
--- a/backend/README.md
+++ b/backend/README.md
@@ -111,7 +111,7 @@ docker build -f backend/Dockerfile.python \
  --build-arg BACKEND=transformers \
  --build-arg BUILD_TYPE=cublas12 \
  --build-arg CUDA_MAJOR_VERSION=12 \
-  --build-arg CUDA_MINOR_VERSION=8 \
+  --build-arg CUDA_MINOR_VERSION=0 \
  -t localai-backend-transformers .

 # Build Go backend
Author	SHA1	Message	Date
Ettore Di Giacinto	9352107999	WIP	2025-09-17 21:52:53 +02:00
Ettore Di Giacinto	77c5acb9db	Revert "feat(nvidia-gpu): bump images to cuda 12.8" (#6303 ) Revert "feat(nvidia-gpu): bump images to cuda 12.8 (#6239)" This reverts commit `d9e25af7b5`.	2025-09-17 19:31:43 +02:00