From 31aa0582a5763ec03b31b4552a9fe7412c2dae01 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Sat, 9 May 2026 16:21:49 +0000
Subject: [PATCH] ci(ik-llama-cpp,turboquant): add BuildKit ccache mount to
 compile steps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Mirror the ccache mount added to Dockerfile.llama-cpp in 9228e5b4 for
the other two llama.cpp-derived backends. Same shape, distinct mount
ids so each backend's cache is independent:

  ik-llama-cpp-ccache-${TARGETARCH}-${BUILD_TYPE}
  turboquant-ccache-${TARGETARCH}-${BUILD_TYPE}

ik_llama.cpp is a different upstream fork; no source overlap with
llama-cpp, separate cache makes sense.

turboquant is a llama.cpp fork that reuses backend/cpp/llama-cpp
source via a thin wrapper Makefile — most TUs would in principle hit
llama-cpp's ccache too. Keeping them separate for now to avoid one
fork's regressions poisoning the other; revisit sharing after we have
hit-rate numbers.

Same registry-export behavior as llama-cpp: the cache mount rides on
backend_build.yml's existing cache-to: type=registry,mode=max.

Assisted-by: Claude:claude-opus-4-7
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 backend/Dockerfile.ik-llama-cpp | 16 ++++++++++++++--
 backend/Dockerfile.turboquant   | 19 +++++++++++++++++--
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/backend/Dockerfile.ik-llama-cpp b/backend/Dockerfile.ik-llama-cpp
index e2387dfba..364de3447 100644
--- a/backend/Dockerfile.ik-llama-cpp
+++ b/backend/Dockerfile.ik-llama-cpp
@@ -259,12 +259,22 @@ COPY --from=grpc /opt/grpc /usr/local
 
 COPY . /LocalAI
 
-RUN <<'EOT' bash
+# BuildKit cache mount for ccache. See Dockerfile.llama-cpp (commit 9228e5b4)
+# for the rationale. Distinct mount id so ik-llama-cpp's cache doesn't
+# overlap with llama-cpp's — ik_llama.cpp is a different fork with
+# different source.
+RUN --mount=type=cache,target=/root/.ccache,id=ik-llama-cpp-ccache-${TARGETARCH}-${BUILD_TYPE},sharing=locked <<'EOT' bash
 set -euxo pipefail
 
+export CCACHE_DIR=/root/.ccache
+ccache --max-size=5G || true
+ccache -z || true
+
+export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache"
+
 if [[ -n "${CUDA_DOCKER_ARCH:-}" ]]; then
   CUDA_ARCH_ESC="${CUDA_DOCKER_ARCH//;/\\;}"
-  export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}"
+  export CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}"
   echo "CMAKE_ARGS(env) = ${CMAKE_ARGS}"
   rm -rf /LocalAI/backend/cpp/ik-llama-cpp-*-build
 fi
@@ -278,6 +288,8 @@ else
   # ik_llama.cpp's IQK kernels require at least AVX2
   make ik-llama-cpp-avx2
 fi
+
+ccache -s || true
 EOT
 
 
diff --git a/backend/Dockerfile.turboquant b/backend/Dockerfile.turboquant
index 4235b0fb2..db8648417 100644
--- a/backend/Dockerfile.turboquant
+++ b/backend/Dockerfile.turboquant
@@ -263,12 +263,25 @@ COPY --from=grpc /opt/grpc /usr/local
 
 COPY . /LocalAI
 
-RUN <<'EOT' bash
+# BuildKit cache mount for ccache. See Dockerfile.llama-cpp (commit 9228e5b4)
+# for rationale. turboquant is a llama.cpp fork that reuses
+# backend/cpp/llama-cpp source via a thin wrapper Makefile, so MOST TUs
+# are content-identical to the upstream llama-cpp build. Sharing a cache
+# id with llama-cpp could give cross-fork hits — but for now keep them
+# separate so a regression in one doesn't poison the other. Revisit
+# sharing after measuring the actual hit rate.
+RUN --mount=type=cache,target=/root/.ccache,id=turboquant-ccache-${TARGETARCH}-${BUILD_TYPE},sharing=locked <<'EOT' bash
 set -euxo pipefail
 
+export CCACHE_DIR=/root/.ccache
+ccache --max-size=5G || true
+ccache -z || true
+
+export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache"
+
 if [[ -n "${CUDA_DOCKER_ARCH:-}" ]]; then
   CUDA_ARCH_ESC="${CUDA_DOCKER_ARCH//;/\\;}"
-  export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}"
+  export CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}"
   echo "CMAKE_ARGS(env) = ${CMAKE_ARGS}"
   rm -rf /LocalAI/backend/cpp/turboquant-*-build
 fi
@@ -287,6 +300,8 @@ else
   make turboquant-grpc
   make turboquant-rpc-server
 fi
+
+ccache -s || true
 EOT