From 31aa0582a5763ec03b31b4552a9fe7412c2dae01 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 9 May 2026 16:21:49 +0000 Subject: [PATCH] ci(ik-llama-cpp,turboquant): add BuildKit ccache mount to compile steps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirror the ccache mount added to Dockerfile.llama-cpp in 9228e5b4 for the other two llama.cpp-derived backends. Same shape, distinct mount ids so each backend's cache is independent: ik-llama-cpp-ccache-${TARGETARCH}-${BUILD_TYPE} turboquant-ccache-${TARGETARCH}-${BUILD_TYPE} ik_llama.cpp is a different upstream fork; no source overlap with llama-cpp, separate cache makes sense. turboquant is a llama.cpp fork that reuses backend/cpp/llama-cpp source via a thin wrapper Makefile — most TUs would in principle hit llama-cpp's ccache too. Keeping them separate for now to avoid one fork's regressions poisoning the other; revisit sharing after we have hit-rate numbers. Same registry-export behavior as llama-cpp: the cache mount rides on backend_build.yml's existing cache-to: type=registry,mode=max. Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Ettore Di Giacinto --- backend/Dockerfile.ik-llama-cpp | 16 ++++++++++++++-- backend/Dockerfile.turboquant | 19 +++++++++++++++++-- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/backend/Dockerfile.ik-llama-cpp b/backend/Dockerfile.ik-llama-cpp index e2387dfba..364de3447 100644 --- a/backend/Dockerfile.ik-llama-cpp +++ b/backend/Dockerfile.ik-llama-cpp @@ -259,12 +259,22 @@ COPY --from=grpc /opt/grpc /usr/local COPY . /LocalAI -RUN <<'EOT' bash +# BuildKit cache mount for ccache. See Dockerfile.llama-cpp (commit 9228e5b4) +# for the rationale. Distinct mount id so ik-llama-cpp's cache doesn't +# overlap with llama-cpp's — ik_llama.cpp is a different fork with +# different source. +RUN --mount=type=cache,target=/root/.ccache,id=ik-llama-cpp-ccache-${TARGETARCH}-${BUILD_TYPE},sharing=locked <<'EOT' bash set -euxo pipefail +export CCACHE_DIR=/root/.ccache +ccache --max-size=5G || true +ccache -z || true + +export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache" + if [[ -n "${CUDA_DOCKER_ARCH:-}" ]]; then CUDA_ARCH_ESC="${CUDA_DOCKER_ARCH//;/\\;}" - export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}" + export CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}" echo "CMAKE_ARGS(env) = ${CMAKE_ARGS}" rm -rf /LocalAI/backend/cpp/ik-llama-cpp-*-build fi @@ -278,6 +288,8 @@ else # ik_llama.cpp's IQK kernels require at least AVX2 make ik-llama-cpp-avx2 fi + +ccache -s || true EOT diff --git a/backend/Dockerfile.turboquant b/backend/Dockerfile.turboquant index 4235b0fb2..db8648417 100644 --- a/backend/Dockerfile.turboquant +++ b/backend/Dockerfile.turboquant @@ -263,12 +263,25 @@ COPY --from=grpc /opt/grpc /usr/local COPY . /LocalAI -RUN <<'EOT' bash +# BuildKit cache mount for ccache. See Dockerfile.llama-cpp (commit 9228e5b4) +# for rationale. turboquant is a llama.cpp fork that reuses +# backend/cpp/llama-cpp source via a thin wrapper Makefile, so MOST TUs +# are content-identical to the upstream llama-cpp build. Sharing a cache +# id with llama-cpp could give cross-fork hits — but for now keep them +# separate so a regression in one doesn't poison the other. Revisit +# sharing after measuring the actual hit rate. +RUN --mount=type=cache,target=/root/.ccache,id=turboquant-ccache-${TARGETARCH}-${BUILD_TYPE},sharing=locked <<'EOT' bash set -euxo pipefail +export CCACHE_DIR=/root/.ccache +ccache --max-size=5G || true +ccache -z || true + +export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache" + if [[ -n "${CUDA_DOCKER_ARCH:-}" ]]; then CUDA_ARCH_ESC="${CUDA_DOCKER_ARCH//;/\\;}" - export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}" + export CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}" echo "CMAKE_ARGS(env) = ${CMAKE_ARGS}" rm -rf /LocalAI/backend/cpp/turboquant-*-build fi @@ -287,6 +300,8 @@ else make turboquant-grpc make turboquant-rpc-server fi + +ccache -s || true EOT