fix: use ubuntu 24.04 for cuda13 l4t images (#7418)

* fix: use ubuntu 24.04 for cuda13 l4t images Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop openblas from containers Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-02-18 15:12:39 -05:00 · 2025-12-03 09:47:03 +01:00
parent fea9018dc5
commit 8dfeea2f55
9 changed files with 75 additions and 53 deletions
--- a/.env
+++ b/.env
@@ -32,15 +32,6 @@
 # Forces shutdown of the backends if busy (only if LOCALAI_SINGLE_ACTIVE_BACKEND is set)
 # LOCALAI_FORCE_BACKEND_SHUTDOWN=true

-## Specify a build type. Available: cublas, openblas, clblas.
-## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
-## OpenBLAS: This is an open-source implementation of the BLAS library that aims to provide highly optimized code for various platforms. It includes support for multi-threading and can be compiled to use hardware-specific features for additional performance. OpenBLAS can run on many kinds of hardware, including CPUs from Intel, AMD, and ARM.
-## clBLAS:   This is an open-source implementation of the BLAS library that uses OpenCL, a framework for writing programs that execute across heterogeneous platforms consisting of CPUs, GPUs, and other processors. clBLAS is designed to take advantage of the parallel computing power of GPUs but can also run on any hardware that supports OpenCL. This includes hardware from different vendors like Nvidia, AMD, and Intel.
-# BUILD_TYPE=openblas
-
-## Uncomment and set to true to enable rebuilding from source
-# REBUILD=true
-
 ## Path where to store generated images
 # LOCALAI_IMAGE_PATH=/tmp/generated/images

--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -394,11 +394,12 @@ jobs:
            cuda-major-version: "13"
            cuda-minor-version: "0"
            platforms: 'linux/arm64'
-            skip-drivers: 'true'
+            skip-drivers: 'false'
            tag-latest: 'auto'
            tag-suffix: '-nvidia-l4t-cuda-13-arm64-llama-cpp'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            base-image: "ubuntu:24.04"
            runs-on: 'ubuntu-24.04-arm'
+            ubuntu-version: '2404'
            backend: "llama-cpp"
            dockerfile: "./backend/Dockerfile.llama-cpp"
            context: "./"
@@ -433,8 +434,9 @@ jobs:
            tag-latest: 'auto'
            tag-suffix: '-nvidia-l4t-cuda-13-arm64-diffusers'
            runs-on: 'ubuntu-24.04-arm'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
-            skip-drivers: 'true'
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            ubuntu-version: '2404'
            backend: "diffusers"
            dockerfile: "./backend/Dockerfile.python"
            context: "./backend"
@@ -502,10 +504,11 @@ jobs:
            cuda-major-version: "13"
            cuda-minor-version: "0"
            platforms: 'linux/arm64'
-            skip-drivers: 'true'
+            skip-drivers: 'false'
            tag-latest: 'auto'
            tag-suffix: '-nvidia-l4t-cuda-13-arm64-stablediffusion-ggml'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            base-image: "ubuntu:24.04"
+            ubuntu-version: '2404'
            runs-on: 'ubuntu-24.04-arm'
            backend: "stablediffusion-ggml"
            dockerfile: "./backend/Dockerfile.golang"
@@ -526,10 +529,11 @@ jobs:
            cuda-major-version: "13"
            cuda-minor-version: "0"
            platforms: 'linux/arm64'
-            skip-drivers: 'true'
+            skip-drivers: 'false'
            tag-latest: 'auto'
            tag-suffix: '-nvidia-l4t-cuda-13-arm64-whisper'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            base-image: "ubuntu:24.04"
+            ubuntu-version: '2404'
            runs-on: 'ubuntu-24.04-arm'
            backend: "whisper"
            dockerfile: "./backend/Dockerfile.golang"
--- a/.github/workflows/backend_build.yml
+++ b/.github/workflows/backend_build.yml
@@ -1,5 +1,5 @@
 ---
-name: 'build python backend container images (reusable)'
+name: 'build backend container images (reusable)'

 on:
  workflow_call:
@@ -53,6 +53,11 @@ on:
        description: 'Skip drivers'
        default: 'false'
        type: string
+      ubuntu-version:
+        description: 'Ubuntu version'
+        required: false
+        default: '2204'
+        type: string
    secrets:
      dockerUsername:
        required: false
@@ -208,6 +213,7 @@ jobs:
            CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
            BASE_IMAGE=${{ inputs.base-image }}
            BACKEND=${{ inputs.backend }}
+            UBUNTU_VERSION=${{ inputs.ubuntu-version }}
          context: ${{ inputs.context }}
          file: ${{ inputs.dockerfile }}
          cache-from: type=gha
@@ -228,6 +234,7 @@ jobs:
            CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
            BASE_IMAGE=${{ inputs.base-image }}
            BACKEND=${{ inputs.backend }}
+            UBUNTU_VERSION=${{ inputs.ubuntu-version }}
          context: ${{ inputs.context }}
          file: ${{ inputs.dockerfile }}
          cache-from: type=gha
--- a/.github/workflows/backend_pr.yml
+++ b/.github/workflows/backend_pr.yml
@@ -52,6 +52,7 @@ jobs:
      dockerfile: ${{ matrix.dockerfile }}
      skip-drivers: ${{ matrix.skip-drivers }}
      context: ${{ matrix.context }}
+      ubuntu-version: ${{ matrix.ubuntu-version }}
    secrets:
      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -81,7 +81,8 @@ jobs:
            platforms: 'linux/arm64'
            tag-latest: 'false'
            tag-suffix: '-nvidia-l4t-arm64-cuda-13'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            base-image: "ubuntu:24.04"
            runs-on: 'ubuntu-24.04-arm'
            makeflags: "--jobs=4 --output-sync=target"
-            skip-drivers: 'false'
+            skip-drivers: 'false'
+            ubuntu-version: '2404'
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -169,7 +169,8 @@ jobs:
            platforms: 'linux/arm64'
            tag-latest: 'auto'
            tag-suffix: '-nvidia-l4t-arm64-cuda-13'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            base-image: "ubuntu:24.04"
            runs-on: 'ubuntu-24.04-arm'
            makeflags: "--jobs=4 --output-sync=target"
            skip-drivers: 'false'
+            ubuntu-version: '2404'
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -56,6 +56,11 @@ on:
        required: false
        default: ''
        type: string
+      ubuntu-version:
+        description: 'Ubuntu version'
+        required: false
+        default: '2204'
+        type: string
    secrets:
      dockerUsername:
        required: true
@@ -238,6 +243,7 @@ jobs:
            GRPC_VERSION=v1.65.0
            MAKEFLAGS=${{ inputs.makeflags }}
            SKIP_DRIVERS=${{ inputs.skip-drivers }}
+            UBUNTU_VERSION=${{ inputs.ubuntu-version }}
          context: .
          file: ./Dockerfile
          cache-from: type=gha
@@ -265,6 +271,7 @@ jobs:
            GRPC_VERSION=v1.65.0
            MAKEFLAGS=${{ inputs.makeflags }}
            SKIP_DRIVERS=${{ inputs.skip-drivers }}
+            UBUNTU_VERSION=${{ inputs.ubuntu-version }}
          context: .
          file: ./Dockerfile
          cache-from: type=gha
--- a/40
+++ b/40
@@ -9,7 +9,7 @@ ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        ca-certificates curl wget espeak-ng libgomp1 \
-        ffmpeg libopenblas-base libopenblas-dev && \
+        ffmpeg && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*

@@ -23,6 +23,7 @@ ARG SKIP_DRIVERS=false
 ARG TARGETARCH
 ARG TARGETVARIANT
 ENV BUILD_TYPE=${BUILD_TYPE}
+ARG UBUNTU_VERSION=2204

 RUN mkdir -p /run/localai
 RUN echo "default" > /run/localai/capability
@@ -51,23 +52,13 @@ RUN <<EOT bash
        apt-get install -y  --no-install-recommends \
            software-properties-common pciutils
        if [ "amd64" = "$TARGETARCH" ]; then
-            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
+            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/cuda-keyring_1.1-1_all.deb
        fi
        if [ "arm64" = "$TARGETARCH" ]; then
            if [ "${CUDA_MAJOR_VERSION}" = "13" ]; then
-                apt-get install -y cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
-                apt-get remove -y cuda-keyring && \
-                apt-get clean && \
-                rm -rf /var/lib/apt/lists/* && \
-                apt-get remove -y cuda-nvcc-* \
-                            libcufft-dev-* \
-                            libcurand-dev-* \
-                            libcublas-dev-* \
-                            libcusparse-dev-* \
-                            libcusolver-dev-* && \
-                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/sbsa/cuda-keyring_1.1-1_all.deb
+                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/sbsa/cuda-keyring_1.1-1_all.deb
            else
-                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
+                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/arm64/cuda-keyring_1.1-1_all.deb
            fi
        fi
        dpkg -i cuda-keyring_1.1-1_all.deb && \
@@ -79,6 +70,7 @@ RUN <<EOT bash
            libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
            libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
            libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
            libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
        apt-get clean && \
        rm -rf /var/lib/apt/lists/* && \
@@ -95,13 +87,13 @@ EOT
 # https://github.com/NVIDIA/Isaac-GR00T/issues/343
 RUN <<EOT bash
    if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "arm64" ]; then
-        wget https://developer.download.nvidia.com/compute/cudss/0.6.0/local_installers/cudss-local-tegra-repo-ubuntu2204-0.6.0_0.6.0-1_arm64.deb && \
-        dpkg -i cudss-local-tegra-repo-ubuntu2204-0.6.0_0.6.0-1_arm64.deb && \
-        cp /var/cudss-local-tegra-repo-ubuntu2204-0.6.0/cudss-*-keyring.gpg /usr/share/keyrings/ && \
+        wget https://developer.download.nvidia.com/compute/cudss/0.6.0/local_installers/cudss-local-tegra-repo-ubuntu${UBUNTU_VERSION}-0.6.0_0.6.0-1_arm64.deb && \
+        dpkg -i cudss-local-tegra-repo-ubuntu${UBUNTU_VERSION}-0.6.0_0.6.0-1_arm64.deb && \
+        cp /var/cudss-local-tegra-repo-ubuntu${UBUNTU_VERSION}-0.6.0/cudss-*-keyring.gpg /usr/share/keyrings/ && \
        apt-get update && apt-get -y install cudss cudss-cuda-${CUDA_MAJOR_VERSION} && \
-        wget https://developer.download.nvidia.com/compute/nvpl/25.5/local_installers/nvpl-local-repo-ubuntu2404-25.5_1.0-1_arm64.deb && \
-        dpkg -i nvpl-local-repo-ubuntu2404-25.5_1.0-1_arm64.deb && \
-        cp /var/nvpl-local-repo-ubuntu2404-25.5/nvpl-*-keyring.gpg /usr/share/keyrings/ && \
+        wget https://developer.download.nvidia.com/compute/nvpl/25.5/local_installers/nvpl-local-repo-ubuntu${UBUNTU_VERSION}-25.5_1.0-1_arm64.deb && \
+        dpkg -i nvpl-local-repo-ubuntu${UBUNTU_VERSION}-25.5_1.0-1_arm64.deb && \
+        cp /var/nvpl-local-repo-ubuntu${UBUNTU_VERSION}-25.5/nvpl-*-keyring.gpg /usr/share/keyrings/ && \
        apt-get update && apt-get install -y nvpl
    fi
 EOT
@@ -189,14 +181,6 @@ RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
 COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
 RUN update-ca-certificates

-
-# OpenBLAS requirements and stable diffusion
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-        libopenblas-dev && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
 RUN test -n "$TARGETARCH" \
    || (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')

--- a/backend/Dockerfile.python
+++ b/backend/Dockerfile.python
@@ -12,6 +12,7 @@ ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
 ENV DEBIAN_FRONTEND=noninteractive
 ARG TARGETARCH
 ARG TARGETVARIANT
+ARG UBUNTU_VERSION=2204

 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
@@ -58,15 +59,19 @@ EOT

 # CuBLAS requirements
 RUN <<EOT bash
-    if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
+    if ( [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "l4t" ] ) && [ "${SKIP_DRIVERS}" = "false" ]; then
        apt-get update && \
        apt-get install -y  --no-install-recommends \
            software-properties-common pciutils
        if [ "amd64" = "$TARGETARCH" ]; then
-            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
+            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/cuda-keyring_1.1-1_all.deb
        fi
        if [ "arm64" = "$TARGETARCH" ]; then
-            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
+            if [ "${CUDA_MAJOR_VERSION}" = "13" ]; then
+                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/sbsa/cuda-keyring_1.1-1_all.deb
+            else
+                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/arm64/cuda-keyring_1.1-1_all.deb
+            fi
        fi
        dpkg -i cuda-keyring_1.1-1_all.deb && \
        rm -f cuda-keyring_1.1-1_all.deb && \
@@ -77,9 +82,25 @@ RUN <<EOT bash
            libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
            libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
            libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
            libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
        apt-get clean && \
-        rm -rf /var/lib/apt/lists/*
+        rm -rf /var/lib/apt/lists/* && \
+        echo "nvidia-cuda-${CUDA_MAJOR_VERSION}" > /run/localai/capability
+    fi
+EOT
+
+# https://github.com/NVIDIA/Isaac-GR00T/issues/343
+RUN <<EOT bash
+    if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "arm64" ]; then
+        wget https://developer.download.nvidia.com/compute/cudss/0.6.0/local_installers/cudss-local-tegra-repo-ubuntu${UBUNTU_VERSION}-0.6.0_0.6.0-1_arm64.deb && \
+        dpkg -i cudss-local-tegra-repo-ubuntu${UBUNTU_VERSION}-0.6.0_0.6.0-1_arm64.deb && \
+        cp /var/cudss-local-tegra-repo-ubuntu${UBUNTU_VERSION}-0.6.0/cudss-*-keyring.gpg /usr/share/keyrings/ && \
+        apt-get update && apt-get -y install cudss cudss-cuda-${CUDA_MAJOR_VERSION} && \
+        wget https://developer.download.nvidia.com/compute/nvpl/25.5/local_installers/nvpl-local-repo-ubuntu2404-25.5_1.0-1_arm64.deb && \
+        dpkg -i nvpl-local-repo-ubuntu2404-25.5_1.0-1_arm64.deb && \
+        cp /var/nvpl-local-repo-ubuntu2404-25.5/nvpl-*-keyring.gpg /usr/share/keyrings/ && \
+        apt-get update && apt-get install -y nvpl
    fi
 EOT

@@ -103,6 +124,11 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
        # to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
        ldconfig \
    ; fi
+
+RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
+    ln -s /opt/rocm-**/lib/llvm/lib/libomp.so /usr/lib/libomp.so \
+    ; fi
+
 # Install uv as a system package
 RUN curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR=/usr/bin sh
 ENV PATH="/root/.cargo/bin:${PATH}"