LocalAI/backend/Dockerfile.llama-cpp

ARG BASE_IMAGE=ubuntu:24.04
# BUILDER_BASE_IMAGE defaults to BASE_IMAGE so the Dockerfile parses even
# when no prebuilt base is supplied. The builder-prebuilt stage is only
# entered when BUILDER_TARGET=builder-prebuilt, so a "wrong" fallback
# content here is harmless — BuildKit prunes the unreferenced builder.
ARG BUILDER_BASE_IMAGE=${BASE_IMAGE}
# BUILDER_TARGET selects which builder stage the final scratch image copies
# package output from. Declared at global scope (before any FROM) so it's
# usable in `FROM ${BUILDER_TARGET}` below. Default keeps local
# `make backends/llama-cpp` on the from-source path.
ARG BUILDER_TARGET=builder-fromsource
ARG APT_MIRROR=""
ARG APT_PORTS_MIRROR=""


# ============================================================================
# Stage: builder-fromsource — self-contained build path.
# Runs .docker/install-base-deps.sh (apt deps + cmake + protoc + gRPC +
# conditional CUDA/ROCm/Vulkan), copies /opt/grpc to /usr/local, then
# compiles the variant. Used when BUILDER_TARGET=builder-fromsource (the
# default; local `make backends/llama-cpp`).
#
# The install script is the same one that backend/Dockerfile.base-grpc-builder
# runs, so the result is bit-equivalent to the prebuilt-base path
# (builder-prebuilt below).
# ============================================================================
FROM ${BASE_IMAGE} AS builder-fromsource
ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION
ARG CUDA_MINOR_VERSION
ARG CMAKE_FROM_SOURCE=false
# CUDA Toolkit 13.x compatibility: CMake 3.31.9+ fixes toolchain detection/arch table issues
ARG CMAKE_VERSION=3.31.10
ARG GRPC_VERSION=v1.65.0
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
ARG SKIP_DRIVERS=false
ARG TARGETARCH
ARG TARGETVARIANT
ARG GO_VERSION=1.25.4
ARG UBUNTU_VERSION=2404
ARG APT_MIRROR
ARG APT_PORTS_MIRROR
ARG AMDGPU_TARGETS
# CUDA target archs, e.g. --build-arg CUDA_DOCKER_ARCH='75;86;89;120'
ARG CUDA_DOCKER_ARCH
ARG CMAKE_ARGS

ENV BUILD_TYPE=${BUILD_TYPE} \
    CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} \
    CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} \
    CMAKE_FROM_SOURCE=${CMAKE_FROM_SOURCE} \
    CMAKE_VERSION=${CMAKE_VERSION} \
    GRPC_VERSION=${GRPC_VERSION} \
    GRPC_MAKEFLAGS=${GRPC_MAKEFLAGS} \
    SKIP_DRIVERS=${SKIP_DRIVERS} \
    TARGETARCH=${TARGETARCH} \
    UBUNTU_VERSION=${UBUNTU_VERSION} \
    APT_MIRROR=${APT_MIRROR} \
    APT_PORTS_MIRROR=${APT_PORTS_MIRROR} \
    AMDGPU_TARGETS=${AMDGPU_TARGETS} \
    CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} \
    CMAKE_ARGS=${CMAKE_ARGS} \
    DEBIAN_FRONTEND=noninteractive

# CUDA on PATH (no-op when CUDA isn't installed)
ENV PATH=/usr/local/cuda/bin:${PATH}
# HipBLAS / ROCm on PATH (no-op when ROCm isn't installed)
ENV PATH=/opt/rocm/bin:${PATH}

WORKDIR /build

# Install everything via the shared script — the same one that
# backend/Dockerfile.base-grpc-builder runs, so the prebuilt CI base and
# this from-source path are bit-equivalent.
RUN --mount=type=bind,source=.docker/install-base-deps.sh,target=/usr/local/sbin/install-base-deps \
    --mount=type=bind,source=.docker/apt-mirror.sh,target=/usr/local/sbin/apt-mirror \
    bash /usr/local/sbin/install-base-deps

# Mirror builder-prebuilt: copy gRPC from /opt/grpc to /usr/local so
# CMake's find_package finds it at the canonical prefix the Makefile expects.
RUN cp -a /opt/grpc/. /usr/local/

COPY . /LocalAI

# BuildKit cache mount for ccache. Persists compiler outputs across builds
# via the registry cache (cache-to: type=registry,mode=max in CI). On a
# LLAMA_VERSION bump most TUs are byte-identical to the previous version's
# preprocessed source — ccache returns the previous .o file and skips the
# real compile. Same for LocalAI source changes that don't touch llama.cpp.
# CMAKE_*_COMPILER_LAUNCHER threads ccache through CMake to wrap gcc/g++/nvcc.
# sharing=locked serializes concurrent writes if multiple matrix variants
# share the same cache mount id.
#
# The compile body is shared with builder-prebuilt via .docker/llama-cpp-compile.sh.
RUN --mount=type=bind,source=.docker/llama-cpp-compile.sh,target=/usr/local/sbin/compile.sh \
    --mount=type=cache,target=/root/.ccache,id=llama-cpp-ccache-${TARGETARCH}-${BUILD_TYPE},sharing=locked \
    bash /usr/local/sbin/compile.sh


# Copy libraries using a script to handle architecture differences
RUN make -BC /LocalAI/backend/cpp/llama-cpp package


# ============================================================================
# Stage: builder-prebuilt — uses the pre-built base from
# quay.io/go-skynet/ci-cache:base-grpc-* (built by .github/workflows/base-images.yml).
# That image already has gRPC at /opt/grpc + apt deps + CUDA/ROCm/Vulkan
# pre-installed, so we just copy gRPC to /usr/local and compile. Used when
# BUILDER_TARGET=builder-prebuilt (CI when the matrix entry sets
# builder-base-image).
# ============================================================================
FROM ${BUILDER_BASE_IMAGE} AS builder-prebuilt

ARG BUILD_TYPE
ENV BUILD_TYPE=${BUILD_TYPE}
ARG CUDA_DOCKER_ARCH
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
ARG CMAKE_ARGS
ENV CMAKE_ARGS=${CMAKE_ARGS}
ARG AMDGPU_TARGETS
ENV AMDGPU_TARGETS=${AMDGPU_TARGETS}
ARG TARGETARCH
ARG TARGETVARIANT

# The base-grpc-* image installs gRPC to /opt/grpc but doesn't copy it to
# /usr/local. The variant Dockerfile's from-source path does that too;
# mirror it here so the compile step can find gRPC at the canonical
# prefix the Makefile expects.
RUN cp -a /opt/grpc/. /usr/local/

COPY . /LocalAI

RUN --mount=type=bind,source=.docker/llama-cpp-compile.sh,target=/usr/local/sbin/compile.sh \
    --mount=type=cache,target=/root/.ccache,id=llama-cpp-ccache-${TARGETARCH}-${BUILD_TYPE},sharing=locked \
    bash /usr/local/sbin/compile.sh

RUN make -BC /LocalAI/backend/cpp/llama-cpp package


# ============================================================================
# Final stage — copies package output from one of the two builders.
# BUILDER_TARGET selects which one. BuildKit prunes the unreferenced builder.
#
# BuildKit doesn't support variable expansion in `COPY --from=` directly,
# so we resolve the ARG by aliasing the chosen builder to a fixed stage
# name via `FROM ${BUILDER_TARGET} AS builder` and then COPY --from=builder.
# BUILDER_TARGET itself is declared as a global ARG at the top of this
# file (required for use in FROM), so we just re-import it into this
# stage's scope before the FROM directive.
# ============================================================================
FROM ${BUILDER_TARGET} AS builder

FROM scratch


# Copy all available binaries (the build process only creates the appropriate ones for the target architecture)
COPY --from=builder /LocalAI/backend/cpp/llama-cpp/package/. ./