fix: use ubuntu 24.04 for cuda13 l4t images (#7418)

* fix: use ubuntu 24.04 for cuda13 l4t images

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Drop openblas from containers

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2025-12-03 09:47:03 +01:00
committed by GitHub
parent fea9018dc5
commit 8dfeea2f55
9 changed files with 75 additions and 53 deletions

9
.env
View File

@@ -32,15 +32,6 @@
# Forces shutdown of the backends if busy (only if LOCALAI_SINGLE_ACTIVE_BACKEND is set)
# LOCALAI_FORCE_BACKEND_SHUTDOWN=true
## Specify a build type. Available: cublas, openblas, clblas.
## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
## OpenBLAS: This is an open-source implementation of the BLAS library that aims to provide highly optimized code for various platforms. It includes support for multi-threading and can be compiled to use hardware-specific features for additional performance. OpenBLAS can run on many kinds of hardware, including CPUs from Intel, AMD, and ARM.
## clBLAS: This is an open-source implementation of the BLAS library that uses OpenCL, a framework for writing programs that execute across heterogeneous platforms consisting of CPUs, GPUs, and other processors. clBLAS is designed to take advantage of the parallel computing power of GPUs but can also run on any hardware that supports OpenCL. This includes hardware from different vendors like Nvidia, AMD, and Intel.
# BUILD_TYPE=openblas
## Uncomment and set to true to enable rebuilding from source
# REBUILD=true
## Path where to store generated images
# LOCALAI_IMAGE_PATH=/tmp/generated/images

View File

@@ -394,11 +394,12 @@ jobs:
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'true'
skip-drivers: 'false'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-cuda-13-arm64-llama-cpp'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
base-image: "ubuntu:24.04"
runs-on: 'ubuntu-24.04-arm'
ubuntu-version: '2404'
backend: "llama-cpp"
dockerfile: "./backend/Dockerfile.llama-cpp"
context: "./"
@@ -433,8 +434,9 @@ jobs:
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-cuda-13-arm64-diffusers'
runs-on: 'ubuntu-24.04-arm'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
skip-drivers: 'true'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
ubuntu-version: '2404'
backend: "diffusers"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
@@ -502,10 +504,11 @@ jobs:
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'true'
skip-drivers: 'false'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-cuda-13-arm64-stablediffusion-ggml'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
base-image: "ubuntu:24.04"
ubuntu-version: '2404'
runs-on: 'ubuntu-24.04-arm'
backend: "stablediffusion-ggml"
dockerfile: "./backend/Dockerfile.golang"
@@ -526,10 +529,11 @@ jobs:
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'true'
skip-drivers: 'false'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-cuda-13-arm64-whisper'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
base-image: "ubuntu:24.04"
ubuntu-version: '2404'
runs-on: 'ubuntu-24.04-arm'
backend: "whisper"
dockerfile: "./backend/Dockerfile.golang"

View File

@@ -1,5 +1,5 @@
---
name: 'build python backend container images (reusable)'
name: 'build backend container images (reusable)'
on:
workflow_call:
@@ -53,6 +53,11 @@ on:
description: 'Skip drivers'
default: 'false'
type: string
ubuntu-version:
description: 'Ubuntu version'
required: false
default: '2204'
type: string
secrets:
dockerUsername:
required: false
@@ -208,6 +213,7 @@ jobs:
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
BASE_IMAGE=${{ inputs.base-image }}
BACKEND=${{ inputs.backend }}
UBUNTU_VERSION=${{ inputs.ubuntu-version }}
context: ${{ inputs.context }}
file: ${{ inputs.dockerfile }}
cache-from: type=gha
@@ -228,6 +234,7 @@ jobs:
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
BASE_IMAGE=${{ inputs.base-image }}
BACKEND=${{ inputs.backend }}
UBUNTU_VERSION=${{ inputs.ubuntu-version }}
context: ${{ inputs.context }}
file: ${{ inputs.dockerfile }}
cache-from: type=gha

View File

@@ -52,6 +52,7 @@ jobs:
dockerfile: ${{ matrix.dockerfile }}
skip-drivers: ${{ matrix.skip-drivers }}
context: ${{ matrix.context }}
ubuntu-version: ${{ matrix.ubuntu-version }}
secrets:
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}

View File

@@ -81,7 +81,8 @@ jobs:
platforms: 'linux/arm64'
tag-latest: 'false'
tag-suffix: '-nvidia-l4t-arm64-cuda-13'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
base-image: "ubuntu:24.04"
runs-on: 'ubuntu-24.04-arm'
makeflags: "--jobs=4 --output-sync=target"
skip-drivers: 'false'
skip-drivers: 'false'
ubuntu-version: '2404'

View File

@@ -169,7 +169,8 @@ jobs:
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-arm64-cuda-13'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
base-image: "ubuntu:24.04"
runs-on: 'ubuntu-24.04-arm'
makeflags: "--jobs=4 --output-sync=target"
skip-drivers: 'false'
ubuntu-version: '2404'

View File

@@ -56,6 +56,11 @@ on:
required: false
default: ''
type: string
ubuntu-version:
description: 'Ubuntu version'
required: false
default: '2204'
type: string
secrets:
dockerUsername:
required: true
@@ -238,6 +243,7 @@ jobs:
GRPC_VERSION=v1.65.0
MAKEFLAGS=${{ inputs.makeflags }}
SKIP_DRIVERS=${{ inputs.skip-drivers }}
UBUNTU_VERSION=${{ inputs.ubuntu-version }}
context: .
file: ./Dockerfile
cache-from: type=gha
@@ -265,6 +271,7 @@ jobs:
GRPC_VERSION=v1.65.0
MAKEFLAGS=${{ inputs.makeflags }}
SKIP_DRIVERS=${{ inputs.skip-drivers }}
UBUNTU_VERSION=${{ inputs.ubuntu-version }}
context: .
file: ./Dockerfile
cache-from: type=gha

View File

@@ -9,7 +9,7 @@ ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && \
apt-get install -y --no-install-recommends \
ca-certificates curl wget espeak-ng libgomp1 \
ffmpeg libopenblas-base libopenblas-dev && \
ffmpeg && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
@@ -23,6 +23,7 @@ ARG SKIP_DRIVERS=false
ARG TARGETARCH
ARG TARGETVARIANT
ENV BUILD_TYPE=${BUILD_TYPE}
ARG UBUNTU_VERSION=2204
RUN mkdir -p /run/localai
RUN echo "default" > /run/localai/capability
@@ -51,23 +52,13 @@ RUN <<EOT bash
apt-get install -y --no-install-recommends \
software-properties-common pciutils
if [ "amd64" = "$TARGETARCH" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/cuda-keyring_1.1-1_all.deb
fi
if [ "arm64" = "$TARGETARCH" ]; then
if [ "${CUDA_MAJOR_VERSION}" = "13" ]; then
apt-get install -y cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
apt-get remove -y cuda-keyring && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
apt-get remove -y cuda-nvcc-* \
libcufft-dev-* \
libcurand-dev-* \
libcublas-dev-* \
libcusparse-dev-* \
libcusolver-dev-* && \
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/sbsa/cuda-keyring_1.1-1_all.deb
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/sbsa/cuda-keyring_1.1-1_all.deb
else
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/arm64/cuda-keyring_1.1-1_all.deb
fi
fi
dpkg -i cuda-keyring_1.1-1_all.deb && \
@@ -79,6 +70,7 @@ RUN <<EOT bash
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
@@ -95,13 +87,13 @@ EOT
# https://github.com/NVIDIA/Isaac-GR00T/issues/343
RUN <<EOT bash
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "arm64" ]; then
wget https://developer.download.nvidia.com/compute/cudss/0.6.0/local_installers/cudss-local-tegra-repo-ubuntu2204-0.6.0_0.6.0-1_arm64.deb && \
dpkg -i cudss-local-tegra-repo-ubuntu2204-0.6.0_0.6.0-1_arm64.deb && \
cp /var/cudss-local-tegra-repo-ubuntu2204-0.6.0/cudss-*-keyring.gpg /usr/share/keyrings/ && \
wget https://developer.download.nvidia.com/compute/cudss/0.6.0/local_installers/cudss-local-tegra-repo-ubuntu${UBUNTU_VERSION}-0.6.0_0.6.0-1_arm64.deb && \
dpkg -i cudss-local-tegra-repo-ubuntu${UBUNTU_VERSION}-0.6.0_0.6.0-1_arm64.deb && \
cp /var/cudss-local-tegra-repo-ubuntu${UBUNTU_VERSION}-0.6.0/cudss-*-keyring.gpg /usr/share/keyrings/ && \
apt-get update && apt-get -y install cudss cudss-cuda-${CUDA_MAJOR_VERSION} && \
wget https://developer.download.nvidia.com/compute/nvpl/25.5/local_installers/nvpl-local-repo-ubuntu2404-25.5_1.0-1_arm64.deb && \
dpkg -i nvpl-local-repo-ubuntu2404-25.5_1.0-1_arm64.deb && \
cp /var/nvpl-local-repo-ubuntu2404-25.5/nvpl-*-keyring.gpg /usr/share/keyrings/ && \
wget https://developer.download.nvidia.com/compute/nvpl/25.5/local_installers/nvpl-local-repo-ubuntu${UBUNTU_VERSION}-25.5_1.0-1_arm64.deb && \
dpkg -i nvpl-local-repo-ubuntu${UBUNTU_VERSION}-25.5_1.0-1_arm64.deb && \
cp /var/nvpl-local-repo-ubuntu${UBUNTU_VERSION}-25.5/nvpl-*-keyring.gpg /usr/share/keyrings/ && \
apt-get update && apt-get install -y nvpl
fi
EOT
@@ -189,14 +181,6 @@ RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
RUN update-ca-certificates
# OpenBLAS requirements and stable diffusion
RUN apt-get update && \
apt-get install -y --no-install-recommends \
libopenblas-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN test -n "$TARGETARCH" \
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')

View File

@@ -12,6 +12,7 @@ ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
ENV DEBIAN_FRONTEND=noninteractive
ARG TARGETARCH
ARG TARGETVARIANT
ARG UBUNTU_VERSION=2204
RUN apt-get update && \
apt-get install -y --no-install-recommends \
@@ -58,15 +59,19 @@ EOT
# CuBLAS requirements
RUN <<EOT bash
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
if ( [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "l4t" ] ) && [ "${SKIP_DRIVERS}" = "false" ]; then
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils
if [ "amd64" = "$TARGETARCH" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/cuda-keyring_1.1-1_all.deb
fi
if [ "arm64" = "$TARGETARCH" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
if [ "${CUDA_MAJOR_VERSION}" = "13" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/sbsa/cuda-keyring_1.1-1_all.deb
else
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/arm64/cuda-keyring_1.1-1_all.deb
fi
fi
dpkg -i cuda-keyring_1.1-1_all.deb && \
rm -f cuda-keyring_1.1-1_all.deb && \
@@ -77,9 +82,25 @@ RUN <<EOT bash
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
rm -rf /var/lib/apt/lists/* && \
echo "nvidia-cuda-${CUDA_MAJOR_VERSION}" > /run/localai/capability
fi
EOT
# https://github.com/NVIDIA/Isaac-GR00T/issues/343
RUN <<EOT bash
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "arm64" ]; then
wget https://developer.download.nvidia.com/compute/cudss/0.6.0/local_installers/cudss-local-tegra-repo-ubuntu${UBUNTU_VERSION}-0.6.0_0.6.0-1_arm64.deb && \
dpkg -i cudss-local-tegra-repo-ubuntu${UBUNTU_VERSION}-0.6.0_0.6.0-1_arm64.deb && \
cp /var/cudss-local-tegra-repo-ubuntu${UBUNTU_VERSION}-0.6.0/cudss-*-keyring.gpg /usr/share/keyrings/ && \
apt-get update && apt-get -y install cudss cudss-cuda-${CUDA_MAJOR_VERSION} && \
wget https://developer.download.nvidia.com/compute/nvpl/25.5/local_installers/nvpl-local-repo-ubuntu2404-25.5_1.0-1_arm64.deb && \
dpkg -i nvpl-local-repo-ubuntu2404-25.5_1.0-1_arm64.deb && \
cp /var/nvpl-local-repo-ubuntu2404-25.5/nvpl-*-keyring.gpg /usr/share/keyrings/ && \
apt-get update && apt-get install -y nvpl
fi
EOT
@@ -103,6 +124,11 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
ldconfig \
; fi
RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
ln -s /opt/rocm-**/lib/llvm/lib/libomp.so /usr/lib/libomp.so \
; fi
# Install uv as a system package
RUN curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR=/usr/bin sh
ENV PATH="/root/.cargo/bin:${PATH}"