diff --git a/.docker/ik-llama-cpp-compile.sh b/.docker/ik-llama-cpp-compile.sh new file mode 100755 index 000000000..3da869007 --- /dev/null +++ b/.docker/ik-llama-cpp-compile.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +# Shared compile logic for backend/Dockerfile.ik-llama-cpp. +# Sourced (via bind mount) from both builder-fromsource and builder-prebuilt stages. + +set -euxo pipefail + +export CCACHE_DIR=/root/.ccache +ccache --max-size=5G || true +ccache -z || true + +export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache" + +if [[ -n "${CUDA_DOCKER_ARCH:-}" ]]; then + CUDA_ARCH_ESC="${CUDA_DOCKER_ARCH//;/\\;}" + export CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}" + echo "CMAKE_ARGS(env) = ${CMAKE_ARGS}" + rm -rf /LocalAI/backend/cpp/ik-llama-cpp-*-build +fi + +cd /LocalAI/backend/cpp/ik-llama-cpp + +if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then + # ARM64 / ROCm: build without x86 SIMD + make ik-llama-cpp-fallback +else + # ik_llama.cpp's IQK kernels require at least AVX2 + make ik-llama-cpp-avx2 +fi + +ccache -s || true diff --git a/.docker/install-base-deps.sh b/.docker/install-base-deps.sh new file mode 100755 index 000000000..5b0908fa8 --- /dev/null +++ b/.docker/install-base-deps.sh @@ -0,0 +1,244 @@ +#!/usr/bin/env bash +# Single source of truth for builder-base contents. +# +# Used by: +# - backend/Dockerfile.base-grpc-builder (CI prebuilt-base source of truth) +# - backend/Dockerfile.llama-cpp (builder-fromsource stage) +# - backend/Dockerfile.ik-llama-cpp (builder-fromsource stage) +# - backend/Dockerfile.turboquant (builder-fromsource stage) +# +# All four files invoke this script via +# RUN --mount=type=bind,source=.docker/install-base-deps.sh,target=/usr/local/sbin/install-base-deps \ +# --mount=type=bind,source=.docker/apt-mirror.sh,target=/usr/local/sbin/apt-mirror \ +# bash /usr/local/sbin/install-base-deps +# +# so the prebuilt CI base image and the from-source local-dev path are +# bit-equivalent by construction. +# +# Inputs (env, populated from Dockerfile ARG/ENV): +# BUILD_TYPE ("cublas"|"l4t"|"hipblas"|"vulkan"|"sycl"|"clblas"|"") +# CUDA_MAJOR_VERSION ("12" | "13" | "") +# CUDA_MINOR_VERSION ("8" | "0" | "") +# TARGETARCH ("amd64" | "arm64") +# UBUNTU_VERSION ("2204" | "2404") +# SKIP_DRIVERS ("false" | "true") +# CMAKE_FROM_SOURCE ("false" | "true") +# CMAKE_VERSION ("3.31.10") +# GRPC_VERSION ("v1.65.0") +# GRPC_MAKEFLAGS ("-j4 -Otarget") +# APT_MIRROR / APT_PORTS_MIRROR (optional; consumed by /usr/local/sbin/apt-mirror) +# AMDGPU_TARGETS (optional; only relevant for hipblas downstream) +# +# IMPORTANT: install logic is copied verbatim from the prior in-Dockerfile +# RUN blocks. Do not paraphrase apt invocations / version pins / sed line +# numbers / deb URLs — the bit-equivalence guarantee depends on it. + +set -eux + +# --- 0. apt mirror rewrite (no-op when APT_MIRROR / APT_PORTS_MIRROR unset) --- +if [ -x /usr/local/sbin/apt-mirror ]; then + APT_MIRROR="${APT_MIRROR:-}" APT_PORTS_MIRROR="${APT_PORTS_MIRROR:-}" \ + sh /usr/local/sbin/apt-mirror +fi + +export DEBIAN_FRONTEND=noninteractive +export MAKEFLAGS="${GRPC_MAKEFLAGS:-}" + +# --- 1. Base apt build deps --- +apt-get update +apt-get install -y --no-install-recommends \ + build-essential \ + ccache git \ + ca-certificates \ + make \ + pkg-config libcurl4-openssl-dev \ + curl unzip \ + libssl-dev wget +apt-get clean +rm -rf /var/lib/apt/lists/* + +# --- 2. Vulkan SDK (BUILD_TYPE=vulkan) --- +# NB: this block intentionally installs `cmake` via apt as part of the +# Vulkan tooling — must run before the dedicated CMake step below. +if [ "${BUILD_TYPE:-}" = "vulkan" ] && [ "${SKIP_DRIVERS:-false}" = "false" ]; then + apt-get update + apt-get install -y --no-install-recommends \ + software-properties-common pciutils wget gpg-agent + apt-get install -y libglm-dev cmake libxcb-dri3-0 libxcb-present0 libpciaccess0 \ + libpng-dev libxcb-keysyms1-dev libxcb-dri3-dev libx11-dev g++ gcc \ + libwayland-dev libxrandr-dev libxcb-randr0-dev libxcb-ewmh-dev \ + git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \ + ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \ + clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils + if [ "amd64" = "${TARGETARCH:-}" ]; then + wget "https://sdk.lunarg.com/sdk/download/1.4.335.0/linux/vulkansdk-linux-x86_64-1.4.335.0.tar.xz" + tar -xf vulkansdk-linux-x86_64-1.4.335.0.tar.xz + rm vulkansdk-linux-x86_64-1.4.335.0.tar.xz + mkdir -p /opt/vulkan-sdk + mv 1.4.335.0 /opt/vulkan-sdk/ + ( cd /opt/vulkan-sdk/1.4.335.0 && \ + ./vulkansdk --no-deps --maxjobs \ + vulkan-loader \ + vulkan-validationlayers \ + vulkan-extensionlayer \ + vulkan-tools \ + shaderc ) + cp -rfv /opt/vulkan-sdk/1.4.335.0/x86_64/bin/* /usr/bin/ + cp -rfv /opt/vulkan-sdk/1.4.335.0/x86_64/lib/* /usr/lib/x86_64-linux-gnu/ + cp -rfv /opt/vulkan-sdk/1.4.335.0/x86_64/include/* /usr/include/ + cp -rfv /opt/vulkan-sdk/1.4.335.0/x86_64/share/* /usr/share/ + rm -rf /opt/vulkan-sdk + fi + if [ "arm64" = "${TARGETARCH:-}" ]; then + mkdir vulkan + ( cd vulkan && \ + curl -L -o vulkan-sdk.tar.xz https://github.com/mudler/vulkan-sdk-arm/releases/download/1.4.335.0/vulkansdk-ubuntu-24.04-arm-1.4.335.0.tar.xz && \ + tar -xvf vulkan-sdk.tar.xz && \ + rm vulkan-sdk.tar.xz && \ + cd 1.4.335.0 && \ + cp -rfv aarch64/bin/* /usr/bin/ && \ + cp -rfv aarch64/lib/* /usr/lib/aarch64-linux-gnu/ && \ + cp -rfv aarch64/include/* /usr/include/ && \ + cp -rfv aarch64/share/* /usr/share/ ) + rm -rf vulkan + fi + ldconfig + apt-get clean + rm -rf /var/lib/apt/lists/* +fi + +# --- 3. CUDA toolkit (BUILD_TYPE=cublas|l4t) --- +if { [ "${BUILD_TYPE:-}" = "cublas" ] || [ "${BUILD_TYPE:-}" = "l4t" ]; } && [ "${SKIP_DRIVERS:-false}" = "false" ]; then + apt-get update + apt-get install -y --no-install-recommends \ + software-properties-common pciutils + if [ "amd64" = "${TARGETARCH:-}" ]; then + curl -O "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/cuda-keyring_1.1-1_all.deb" + fi + if [ "arm64" = "${TARGETARCH:-}" ]; then + if [ "${CUDA_MAJOR_VERSION}" = "13" ]; then + curl -O "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/sbsa/cuda-keyring_1.1-1_all.deb" + else + curl -O "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/arm64/cuda-keyring_1.1-1_all.deb" + fi + fi + dpkg -i cuda-keyring_1.1-1_all.deb + rm -f cuda-keyring_1.1-1_all.deb + apt-get update + apt-get install -y --no-install-recommends \ + "cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}" \ + "libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}" \ + "libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}" \ + "libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}" \ + "libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}" \ + "libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}" + if [ "${CUDA_MAJOR_VERSION}" = "13" ] && [ "arm64" = "${TARGETARCH:-}" ]; then + apt-get install -y --no-install-recommends \ + "libcufile-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}" \ + "libcudnn9-cuda-${CUDA_MAJOR_VERSION}" \ + "cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}" \ + "libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}" + fi + apt-get clean + rm -rf /var/lib/apt/lists/* +fi + +# --- 4. cuDSS / NVPL on arm64 + cublas (legacy JetPack / Tegra) --- +# https://github.com/NVIDIA/Isaac-GR00T/issues/343 +if [ "${BUILD_TYPE:-}" = "cublas" ] && [ "${TARGETARCH:-}" = "arm64" ]; then + wget "https://developer.download.nvidia.com/compute/cudss/0.6.0/local_installers/cudss-local-tegra-repo-ubuntu${UBUNTU_VERSION}-0.6.0_0.6.0-1_arm64.deb" + dpkg -i "cudss-local-tegra-repo-ubuntu${UBUNTU_VERSION}-0.6.0_0.6.0-1_arm64.deb" + cp /var/cudss-local-tegra-repo-ubuntu"${UBUNTU_VERSION}"-0.6.0/cudss-*-keyring.gpg /usr/share/keyrings/ + apt-get update + apt-get -y install cudss "cudss-cuda-${CUDA_MAJOR_VERSION}" + wget "https://developer.download.nvidia.com/compute/nvpl/25.5/local_installers/nvpl-local-repo-ubuntu${UBUNTU_VERSION}-25.5_1.0-1_arm64.deb" + dpkg -i "nvpl-local-repo-ubuntu${UBUNTU_VERSION}-25.5_1.0-1_arm64.deb" + cp /var/nvpl-local-repo-ubuntu"${UBUNTU_VERSION}"-25.5/nvpl-*-keyring.gpg /usr/share/keyrings/ + apt-get update + apt-get install -y nvpl +fi + +# --- 5. clBLAS (BUILD_TYPE=clblas) --- +# Present in variant Dockerfiles' from-source path but not in master's +# Dockerfile.base-grpc-builder. No CI matrix entry currently uses this, +# but keep parity so a future BUILD_TYPE=clblas build doesn't drift. +if [ "${BUILD_TYPE:-}" = "clblas" ] && [ "${SKIP_DRIVERS:-false}" = "false" ]; then + apt-get update + apt-get install -y --no-install-recommends \ + libclblast-dev + apt-get clean + rm -rf /var/lib/apt/lists/* +fi + +# --- 6. ROCm / HIP build deps (BUILD_TYPE=hipblas) --- +if [ "${BUILD_TYPE:-}" = "hipblas" ] && [ "${SKIP_DRIVERS:-false}" = "false" ]; then + apt-get update + apt-get install -y --no-install-recommends \ + hipblas-dev \ + hipblaslt-dev \ + rocblas-dev + apt-get clean + rm -rf /var/lib/apt/lists/* + # I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, + # which results in local-ai and others not being able to locate the libraries. + # We run ldconfig ourselves to work around this packaging deficiency. + ldconfig + # Log which GPU architectures have rocBLAS kernel support + echo "rocBLAS library data architectures:" + (ls /opt/rocm*/lib/rocblas/library/Kernels* 2>/dev/null || ls /opt/rocm*/lib64/rocblas/library/Kernels* 2>/dev/null) | grep -oP 'gfx[0-9a-z+-]+' | sort -u || \ + echo "WARNING: No rocBLAS kernel data found" +fi + +echo "TARGETARCH: ${TARGETARCH:-}" + +# --- 7. protoc (always) --- +# The version in 22.04 is too old. We will create one as part of installing +# the GRPC build below but that will also bring in a newer version of absl +# which stablediffusion cannot compile with. This version of protoc is only +# here so that we can generate the grpc code for the stablediffusion build. +if [ "amd64" = "${TARGETARCH:-}" ]; then + curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip -o protoc.zip + unzip -j -d /usr/local/bin protoc.zip bin/protoc + rm protoc.zip +fi +if [ "arm64" = "${TARGETARCH:-}" ]; then + curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-aarch_64.zip -o protoc.zip + unzip -j -d /usr/local/bin protoc.zip bin/protoc + rm protoc.zip +fi + +# --- 8. CMake (apt or compiled from source) --- +# The version in 22.04 is too old. Vulkan path above already pulled cmake +# via apt; the from-source branch here will install over it which is fine. +if [ "${CMAKE_FROM_SOURCE:-false}" = "true" ]; then + curl -L -s "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz" -o cmake.tar.gz + tar xvf cmake.tar.gz + ( cd "cmake-${CMAKE_VERSION}" && ./configure && make && make install ) +else + apt-get update + apt-get install -y \ + cmake + apt-get clean + rm -rf /var/lib/apt/lists/* +fi + +# --- 9. gRPC compile + install at /opt/grpc --- +# We install GRPC to a different prefix here so that we can copy in only +# the build artifacts later — saves several hundred MB on the final docker +# image size vs copying in the entire GRPC source tree and running +# `make install` in the target container. +# +# The TESTONLY abseil sed patch and /opt/grpc prefix are load-bearing — +# downstream Dockerfiles `COPY` /opt/grpc to /usr/local (or rely on the +# prebuilt base having it at /opt/grpc). +mkdir -p /build +cd /build +git clone --recurse-submodules --jobs 4 -b "${GRPC_VERSION}" --depth 1 --shallow-submodules https://github.com/grpc/grpc +mkdir -p /build/grpc/cmake/build +cd /build/grpc/cmake/build +sed -i "216i\\ TESTONLY" "../../third_party/abseil-cpp/absl/container/CMakeLists.txt" +cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. +make +make install +cd / +rm -rf /build diff --git a/.docker/llama-cpp-compile.sh b/.docker/llama-cpp-compile.sh new file mode 100755 index 000000000..bbc9aa21f --- /dev/null +++ b/.docker/llama-cpp-compile.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +# Shared compile logic for backend/Dockerfile.llama-cpp. +# Sourced (via bind mount) from both builder-fromsource and builder-prebuilt stages. + +set -euxo pipefail + +export CCACHE_DIR=/root/.ccache +ccache --max-size=5G || true +ccache -z || true + +export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache" + +if [[ -n "${CUDA_DOCKER_ARCH:-}" ]]; then + CUDA_ARCH_ESC="${CUDA_DOCKER_ARCH//;/\\;}" + export CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}" + echo "CMAKE_ARGS(env) = ${CMAKE_ARGS}" + rm -rf /LocalAI/backend/cpp/llama-cpp-*-build +fi + +if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then + cd /LocalAI/backend/cpp/llama-cpp + make llama-cpp-fallback + make llama-cpp-grpc + make llama-cpp-rpc-server +else + cd /LocalAI/backend/cpp/llama-cpp + make llama-cpp-avx + make llama-cpp-avx2 + make llama-cpp-avx512 + make llama-cpp-fallback + make llama-cpp-grpc + make llama-cpp-rpc-server +fi + +ccache -s || true diff --git a/.docker/turboquant-compile.sh b/.docker/turboquant-compile.sh new file mode 100755 index 000000000..7468bc1a7 --- /dev/null +++ b/.docker/turboquant-compile.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +# Shared compile logic for backend/Dockerfile.turboquant. +# Sourced (via bind mount) from both builder-fromsource and builder-prebuilt stages. + +set -euxo pipefail + +export CCACHE_DIR=/root/.ccache +ccache --max-size=5G || true +ccache -z || true + +export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache" + +if [[ -n "${CUDA_DOCKER_ARCH:-}" ]]; then + CUDA_ARCH_ESC="${CUDA_DOCKER_ARCH//;/\\;}" + export CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}" + echo "CMAKE_ARGS(env) = ${CMAKE_ARGS}" + rm -rf /LocalAI/backend/cpp/turboquant-*-build +fi + +cd /LocalAI/backend/cpp/turboquant + +if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then + make turboquant-fallback + make turboquant-grpc + make turboquant-rpc-server +else + make turboquant-avx + make turboquant-avx2 + make turboquant-avx512 + make turboquant-fallback + make turboquant-grpc + make turboquant-rpc-server +fi + +ccache -s || true diff --git a/.github/backend-matrix.yml b/.github/backend-matrix.yml index 7a4388e95..f957cea03 100644 --- a/.github/backend-matrix.yml +++ b/.github/backend-matrix.yml @@ -388,6 +388,7 @@ include: platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp' + builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-cuda-12-amd64' runs-on: 'ubuntu-latest' base-image: "ubuntu:24.04" skip-drivers: 'false' @@ -401,6 +402,7 @@ include: platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-turboquant' + builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-cuda-12-amd64' runs-on: 'ubuntu-latest' base-image: "ubuntu:24.04" skip-drivers: 'false' @@ -896,6 +898,7 @@ include: platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-llama-cpp' + builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-cuda-13-amd64' runs-on: 'ubuntu-latest' base-image: "ubuntu:24.04" skip-drivers: 'false' @@ -909,6 +912,7 @@ include: platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-turboquant' + builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-cuda-13-amd64' runs-on: 'ubuntu-latest' base-image: "ubuntu:24.04" skip-drivers: 'false' @@ -923,6 +927,7 @@ include: skip-drivers: 'false' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-cuda-13-arm64-llama-cpp' + builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-cuda-13-arm64' base-image: "ubuntu:24.04" runs-on: 'ubuntu-24.04-arm' ubuntu-version: '2404' @@ -936,6 +941,7 @@ include: skip-drivers: 'false' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-cuda-13-arm64-turboquant' + builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-cuda-13-arm64' base-image: "ubuntu:24.04" runs-on: 'ubuntu-24.04-arm' ubuntu-version: '2404' @@ -1560,6 +1566,7 @@ include: platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-llama-cpp' + builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-rocm-amd64' runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-24.04:7.2.1" skip-drivers: 'false' @@ -1573,6 +1580,7 @@ include: platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-turboquant' + builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-rocm-amd64' runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-24.04:7.2.1" skip-drivers: 'false' @@ -1809,6 +1817,7 @@ include: platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-llama-cpp' + builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-intel-amd64' runs-on: 'ubuntu-latest' base-image: "intel/oneapi-basekit:2025.3.2-0-devel-ubuntu24.04" skip-drivers: 'false' @@ -1822,6 +1831,7 @@ include: platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-turboquant' + builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-intel-amd64' runs-on: 'ubuntu-latest' base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' @@ -1835,6 +1845,7 @@ include: platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-llama-cpp' + builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-intel-amd64' runs-on: 'ubuntu-latest' base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' @@ -1848,6 +1859,7 @@ include: platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-turboquant' + builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-intel-amd64' runs-on: 'ubuntu-latest' base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' @@ -2256,6 +2268,7 @@ include: platform-tag: 'amd64' tag-latest: 'auto' tag-suffix: '-cpu-llama-cpp' + builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-amd64' runs-on: 'ubuntu-latest' base-image: "ubuntu:24.04" skip-drivers: 'false' @@ -2270,6 +2283,7 @@ include: platform-tag: 'arm64' tag-latest: 'auto' tag-suffix: '-cpu-llama-cpp' + builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-arm64' runs-on: 'ubuntu-24.04-arm' base-image: "ubuntu:24.04" skip-drivers: 'false' @@ -2284,6 +2298,7 @@ include: platform-tag: 'amd64' tag-latest: 'auto' tag-suffix: '-cpu-turboquant' + builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-amd64' runs-on: 'ubuntu-latest' base-image: "ubuntu:24.04" skip-drivers: 'false' @@ -2298,6 +2313,7 @@ include: platform-tag: 'arm64' tag-latest: 'auto' tag-suffix: '-cpu-turboquant' + builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-arm64' runs-on: 'ubuntu-24.04-arm' base-image: "ubuntu:24.04" skip-drivers: 'false' @@ -2311,6 +2327,7 @@ include: platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-cpu-ik-llama-cpp' + builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-amd64' runs-on: 'ubuntu-latest' base-image: "ubuntu:24.04" skip-drivers: 'false' @@ -2325,6 +2342,7 @@ include: skip-drivers: 'false' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64-llama-cpp' + builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-l4t-cuda-12-arm64' base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" runs-on: 'ubuntu-24.04-arm' backend: "llama-cpp" @@ -2338,6 +2356,7 @@ include: skip-drivers: 'false' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64-turboquant' + builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-l4t-cuda-12-arm64' base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" runs-on: 'ubuntu-24.04-arm' backend: "turboquant" @@ -2351,6 +2370,7 @@ include: platform-tag: 'amd64' tag-latest: 'auto' tag-suffix: '-gpu-vulkan-llama-cpp' + builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-vulkan-amd64' runs-on: 'ubuntu-latest' base-image: "ubuntu:24.04" skip-drivers: 'false' @@ -2365,6 +2385,7 @@ include: platform-tag: 'arm64' tag-latest: 'auto' tag-suffix: '-gpu-vulkan-llama-cpp' + builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-vulkan-arm64' runs-on: 'ubuntu-24.04-arm' base-image: "ubuntu:24.04" skip-drivers: 'false' @@ -2379,6 +2400,7 @@ include: platform-tag: 'amd64' tag-latest: 'auto' tag-suffix: '-gpu-vulkan-turboquant' + builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-vulkan-amd64' runs-on: 'ubuntu-latest' base-image: "ubuntu:24.04" skip-drivers: 'false' @@ -2394,6 +2416,7 @@ include: platform-tag: 'arm64' tag-latest: 'auto' tag-suffix: '-gpu-vulkan-turboquant' + builder-base-image: 'quay.io/go-skynet/ci-cache:base-grpc-vulkan-arm64' runs-on: 'ubuntu-24.04-arm' base-image: "ubuntu:24.04" skip-drivers: 'false' diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 8eb0f31e9..6e6c4ab33 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -74,6 +74,7 @@ jobs: platforms: ${{ matrix.platforms }} platform-tag: ${{ matrix.platform-tag || '' }} runs-on: ${{ matrix.runs-on }} + builder-base-image: ${{ matrix.builder-base-image || '' }} base-image: ${{ matrix.base-image }} backend: ${{ matrix.backend }} dockerfile: ${{ matrix.dockerfile }} diff --git a/.github/workflows/backend_build.yml b/.github/workflows/backend_build.yml index 97b650075..524615baa 100644 --- a/.github/workflows/backend_build.yml +++ b/.github/workflows/backend_build.yml @@ -74,6 +74,15 @@ on: required: false default: '' type: string + builder-base-image: + description: | + Pre-built builder base image (e.g. quay.io/go-skynet/ci-cache:base-grpc-cuda-13-amd64). + When set, the variant Dockerfile uses its `builder-prebuilt` stage which FROMs this + image directly instead of running its own gRPC stage + apt installs. Empty for + backends whose Dockerfile doesn't support a prebuilt base. + required: false + default: '' + type: string secrets: dockerUsername: required: false @@ -190,6 +199,8 @@ jobs: APT_MIRROR=${{ steps.apt_mirror.outputs.effective-mirror }} APT_PORTS_MIRROR=${{ steps.apt_mirror.outputs.effective-ports-mirror }} DEPS_REFRESH=${{ steps.deps_refresh.outputs.key }} + BUILDER_BASE_IMAGE=${{ inputs.builder-base-image }} + BUILDER_TARGET=${{ inputs.builder-base-image != '' && 'builder-prebuilt' || 'builder-fromsource' }} context: ${{ inputs.context }} file: ${{ inputs.dockerfile }} cache-from: type=registry,ref=quay.io/go-skynet/ci-cache:cache${{ inputs.tag-suffix }}-${{ inputs.platform-tag }} @@ -243,6 +254,8 @@ jobs: APT_MIRROR=${{ steps.apt_mirror.outputs.effective-mirror }} APT_PORTS_MIRROR=${{ steps.apt_mirror.outputs.effective-ports-mirror }} DEPS_REFRESH=${{ steps.deps_refresh.outputs.key }} + BUILDER_BASE_IMAGE=${{ inputs.builder-base-image }} + BUILDER_TARGET=${{ inputs.builder-base-image != '' && 'builder-prebuilt' || 'builder-fromsource' }} context: ${{ inputs.context }} file: ${{ inputs.dockerfile }} cache-from: type=registry,ref=quay.io/go-skynet/ci-cache:cache${{ inputs.tag-suffix }}-${{ inputs.platform-tag }} diff --git a/.github/workflows/backend_pr.yml b/.github/workflows/backend_pr.yml index 863610c87..b01e5dddc 100644 --- a/.github/workflows/backend_pr.yml +++ b/.github/workflows/backend_pr.yml @@ -50,6 +50,7 @@ jobs: platforms: ${{ matrix.platforms }} platform-tag: ${{ matrix.platform-tag || '' }} runs-on: ${{ matrix.runs-on }} + builder-base-image: ${{ matrix.builder-base-image || '' }} base-image: ${{ matrix.base-image }} backend: ${{ matrix.backend }} dockerfile: ${{ matrix.dockerfile }} diff --git a/.github/workflows/base-images.yml b/.github/workflows/base-images.yml index a8baece3c..7d6f2b238 100644 --- a/.github/workflows/base-images.yml +++ b/.github/workflows/base-images.yml @@ -104,6 +104,23 @@ jobs: cuda-major-version: '' cuda-minor-version: '' ubuntu-version: '2404' + # Legacy JetPack r36.4.0 base for older Jetson devices (CUDA 12). + # Distinct from base-grpc-cuda-13-arm64 (Ubuntu 24.04 + CUDA 13 sbsa) + # which targets newer Jetsons. Some matrix entries + # (-nvidia-l4t-arm64-llama-cpp / -turboquant) still build against + # the JetPack image, so we need a matching base. + - tag: 'base-grpc-l4t-cuda-12-arm64' + runs-on: 'ubuntu-24.04-arm' + base-image: 'nvcr.io/nvidia/l4t-jetpack:r36.4.0' + build-type: 'l4t' + cuda-major-version: '12' + cuda-minor-version: '0' + ubuntu-version: '2204' + # JetPack r36.4.0 already ships CUDA preinstalled at /usr/local/cuda; + # apt-installing cuda-nvcc-12-0 from the public repos fails because + # those packages aren't published for the JetPack apt feed. Match + # the original l4t matrix entry which set skip-drivers: 'true'. + skip-drivers: 'true' steps: - uses: actions/checkout@v6 with: @@ -131,6 +148,7 @@ jobs: CUDA_MAJOR_VERSION=${{ matrix.cuda-major-version }} CUDA_MINOR_VERSION=${{ matrix.cuda-minor-version }} UBUNTU_VERSION=${{ matrix.ubuntu-version }} + SKIP_DRIVERS=${{ matrix.skip-drivers || 'false' }} cache-from: type=registry,ref=quay.io/go-skynet/ci-cache:cache-${{ matrix.tag }} cache-to: type=registry,ref=quay.io/go-skynet/ci-cache:cache-${{ matrix.tag }},mode=max,ignore-error=true provenance: false diff --git a/backend/Dockerfile.base-grpc-builder b/backend/Dockerfile.base-grpc-builder index ab1b6eadf..8843a0a72 100644 --- a/backend/Dockerfile.base-grpc-builder +++ b/backend/Dockerfile.base-grpc-builder @@ -25,6 +25,7 @@ # base-grpc-cuda-12-amd64 ubuntu:24.04 + CUDA 12.8 # base-grpc-cuda-13-amd64 ubuntu:22.04 + CUDA 13.0 # base-grpc-cuda-13-arm64 ubuntu:24.04 + CUDA 13.0 (sbsa) +# base-grpc-l4t-cuda-12-arm64 ubuntu:22.04 + CUDA 12.x (legacy JetPack) # base-grpc-rocm-amd64 rocm/dev-ubuntu-24.04:7.2.1 + hipblas # base-grpc-vulkan-amd64 ubuntu:24.04 + Vulkan SDK 1.4.335 # base-grpc-vulkan-arm64 ubuntu:24.04 + Vulkan SDK ARM 1.4.335 @@ -38,10 +39,11 @@ # downstream builds will add it to CMAKE_PREFIX_PATH (or copy to # /usr/local) the same way Dockerfile.llama-cpp does today. # -# Install logic is copied verbatim from backend/Dockerfile.llama-cpp on -# master so the resulting image is bit-identical to what the variant -# Dockerfile produces today. Do not paraphrase apt invocations — PR 2 -# depends on bit-equivalence. +# Install logic lives in .docker/install-base-deps.sh, which is also +# bind-mounted by the variant Dockerfiles' builder-fromsource stage. +# This guarantees bit-equivalence between the prebuilt CI base and the +# from-source local-dev path — both invoke the same script with the +# same env inputs. ARG BASE_IMAGE=ubuntu:24.04 @@ -64,12 +66,21 @@ ARG APT_MIRROR="" ARG APT_PORTS_MIRROR="" ARG AMDGPU_TARGETS="" -ENV BUILD_TYPE=${BUILD_TYPE} -ENV CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} -ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -ENV AMDGPU_TARGETS=${AMDGPU_TARGETS} -ENV MAKEFLAGS=${GRPC_MAKEFLAGS} -ENV DEBIAN_FRONTEND=noninteractive +ENV BUILD_TYPE=${BUILD_TYPE} \ + CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} \ + CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} \ + CMAKE_FROM_SOURCE=${CMAKE_FROM_SOURCE} \ + CMAKE_VERSION=${CMAKE_VERSION} \ + GRPC_VERSION=${GRPC_VERSION} \ + GRPC_MAKEFLAGS=${GRPC_MAKEFLAGS} \ + SKIP_DRIVERS=${SKIP_DRIVERS} \ + TARGETARCH=${TARGETARCH} \ + UBUNTU_VERSION=${UBUNTU_VERSION} \ + APT_MIRROR=${APT_MIRROR} \ + APT_PORTS_MIRROR=${APT_PORTS_MIRROR} \ + AMDGPU_TARGETS=${AMDGPU_TARGETS} \ + MAKEFLAGS=${GRPC_MAKEFLAGS} \ + DEBIAN_FRONTEND=noninteractive # CUDA on PATH (no-op when CUDA isn't installed) ENV PATH=/usr/local/cuda/bin:${PATH} @@ -78,191 +89,10 @@ ENV PATH=/opt/rocm/bin:${PATH} WORKDIR /build -# Base apt build deps. Mirrors backend/Dockerfile.llama-cpp lines 85-97 -# (the `builder` stage's apt block) — superset of the gRPC stage's deps -# so the same image can compile gRPC and downstream backends. -RUN --mount=type=bind,source=.docker/apt-mirror.sh,target=/usr/local/sbin/apt-mirror \ - APT_MIRROR="${APT_MIRROR}" APT_PORTS_MIRROR="${APT_PORTS_MIRROR}" sh /usr/local/sbin/apt-mirror && \ - apt-get update && \ - apt-get install -y --no-install-recommends \ - build-essential \ - ccache git \ - ca-certificates \ - make \ - pkg-config libcurl4-openssl-dev \ - curl unzip \ - libssl-dev wget && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -# Vulkan SDK install. Mirrors backend/Dockerfile.llama-cpp lines 107-154. -RUN </dev/null || ls /opt/rocm*/lib64/rocblas/library/Kernels* 2>/dev/null) | grep -oP 'gfx[0-9a-z+-]+' | sort -u || \ - echo "WARNING: No rocBLAS kernel data found" \ - ; fi - -RUN echo "TARGETARCH: $TARGETARCH" - -# protoc download. Mirrors backend/Dockerfile.llama-cpp lines 237-248. -# We need protoc installed, and the version in 22.04 is too old. We will create one as part of installing the GRPC build below -# but that will also bring in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only -# here so that we can generate the grpc code for the stablediffusion build. -RUN < /usr/local exactly like -# `COPY --from=grpc /opt/grpc /usr/local` does today. -# -# We install GRPC to a different prefix here so that we can copy in only the build artifacts later -# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree -# and running make install in the target container -RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ - mkdir -p /build/grpc/cmake/build && \ - cd /build/grpc/cmake/build && \ - sed -i "216i\ TESTONLY" "../../third_party/abseil-cpp/absl/container/CMakeLists.txt" && \ - cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \ - make && \ - make install && \ - rm -rf /build +# Single RUN that delegates to .docker/install-base-deps.sh — the same +# script the variant Dockerfiles' builder-fromsource stage runs. +RUN --mount=type=bind,source=.docker/install-base-deps.sh,target=/usr/local/sbin/install-base-deps \ + --mount=type=bind,source=.docker/apt-mirror.sh,target=/usr/local/sbin/apt-mirror \ + bash /usr/local/sbin/install-base-deps WORKDIR / diff --git a/backend/Dockerfile.ik-llama-cpp b/backend/Dockerfile.ik-llama-cpp index 364de3447..9694441b0 100644 --- a/backend/Dockerfile.ik-llama-cpp +++ b/backend/Dockerfile.ik-llama-cpp @@ -1,261 +1,85 @@ ARG BASE_IMAGE=ubuntu:24.04 -ARG GRPC_BASE_IMAGE=${BASE_IMAGE} +# BUILDER_BASE_IMAGE defaults to BASE_IMAGE so the Dockerfile parses even +# when no prebuilt base is supplied. The builder-prebuilt stage is only +# entered when BUILDER_TARGET=builder-prebuilt, so a "wrong" fallback +# content here is harmless — BuildKit prunes the unreferenced builder. +ARG BUILDER_BASE_IMAGE=${BASE_IMAGE} +# BUILDER_TARGET selects which builder stage the final scratch image copies +# package output from. Declared at global scope (before any FROM) so it's +# usable in `FROM ${BUILDER_TARGET}` below. Default keeps local +# `make backends/ik-llama-cpp` on the from-source path. +ARG BUILDER_TARGET=builder-fromsource ARG APT_MIRROR="" ARG APT_PORTS_MIRROR="" -# The grpc target does one thing, it builds and installs GRPC. This is in it's own layer so that it can be effectively cached by CI. -# You probably don't need to change anything here, and if you do, make sure that CI is adjusted so that the cache continues to work. -FROM ${GRPC_BASE_IMAGE} AS grpc - -# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI -ARG GRPC_MAKEFLAGS="-j4 -Otarget" -ARG GRPC_VERSION=v1.65.0 +# ============================================================================ +# Stage: builder-fromsource — self-contained build path. +# Runs .docker/install-base-deps.sh (apt deps + cmake + protoc + gRPC + +# conditional CUDA/ROCm/Vulkan), copies /opt/grpc to /usr/local, then +# compiles the variant. Used when BUILDER_TARGET=builder-fromsource (the +# default; local `make backends/ik-llama-cpp`). +# +# The install script is the same one that backend/Dockerfile.base-grpc-builder +# runs, so the result is bit-equivalent to the prebuilt-base path +# (builder-prebuilt below). +# ============================================================================ +FROM ${BASE_IMAGE} AS builder-fromsource +ARG BUILD_TYPE +ARG CUDA_MAJOR_VERSION +ARG CUDA_MINOR_VERSION ARG CMAKE_FROM_SOURCE=false # CUDA Toolkit 13.x compatibility: CMake 3.31.9+ fixes toolchain detection/arch table issues ARG CMAKE_VERSION=3.31.10 -ARG APT_MIRROR -ARG APT_PORTS_MIRROR - -ENV MAKEFLAGS=${GRPC_MAKEFLAGS} - -WORKDIR /build - -RUN --mount=type=bind,source=.docker/apt-mirror.sh,target=/usr/local/sbin/apt-mirror \ - APT_MIRROR="${APT_MIRROR}" APT_PORTS_MIRROR="${APT_PORTS_MIRROR}" sh /usr/local/sbin/apt-mirror && \ - apt-get update && \ - apt-get install -y --no-install-recommends \ - ca-certificates \ - build-essential curl libssl-dev \ - git wget && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -# Install CMake (the version in 22.04 is too old) -RUN </dev/null || ls /opt/rocm*/lib64/rocblas/library/Kernels* 2>/dev/null) | grep -oP 'gfx[0-9a-z+-]+' | sort -u || \ - echo "WARNING: No rocBLAS kernel data found" \ - ; fi - -RUN echo "TARGETARCH: $TARGETARCH" - -# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below -# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only -# here so that we can generate the grpc code for the stablediffusion build -RUN </dev/null || ls /opt/rocm*/lib64/rocblas/library/Kernels* 2>/dev/null) | grep -oP 'gfx[0-9a-z+-]+' | sort -u || \ - echo "WARNING: No rocBLAS kernel data found" \ - ; fi - -RUN echo "TARGETARCH: $TARGETARCH" - -# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below -# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only -# here so that we can generate the grpc code for the stablediffusion build -RUN <