diff --git a/backend/Dockerfile.ik-llama-cpp b/backend/Dockerfile.ik-llama-cpp
index e2387dfba..364de3447 100644
--- a/backend/Dockerfile.ik-llama-cpp
+++ b/backend/Dockerfile.ik-llama-cpp
@@ -259,12 +259,22 @@ COPY --from=grpc /opt/grpc /usr/local
 
 COPY . /LocalAI
 
-RUN <<'EOT' bash
+# BuildKit cache mount for ccache. See Dockerfile.llama-cpp (commit 9228e5b4)
+# for the rationale. Distinct mount id so ik-llama-cpp's cache doesn't
+# overlap with llama-cpp's — ik_llama.cpp is a different fork with
+# different source.
+RUN --mount=type=cache,target=/root/.ccache,id=ik-llama-cpp-ccache-${TARGETARCH}-${BUILD_TYPE},sharing=locked <<'EOT' bash
 set -euxo pipefail
 
+export CCACHE_DIR=/root/.ccache
+ccache --max-size=5G || true
+ccache -z || true
+
+export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache"
+
 if [[ -n "${CUDA_DOCKER_ARCH:-}" ]]; then
   CUDA_ARCH_ESC="${CUDA_DOCKER_ARCH//;/\\;}"
-  export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}"
+  export CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}"
   echo "CMAKE_ARGS(env) = ${CMAKE_ARGS}"
   rm -rf /LocalAI/backend/cpp/ik-llama-cpp-*-build
 fi
@@ -278,6 +288,8 @@ else
   # ik_llama.cpp's IQK kernels require at least AVX2
   make ik-llama-cpp-avx2
 fi
+
+ccache -s || true
 EOT
 
 
diff --git a/backend/Dockerfile.turboquant b/backend/Dockerfile.turboquant
index 4235b0fb2..db8648417 100644
--- a/backend/Dockerfile.turboquant
+++ b/backend/Dockerfile.turboquant
@@ -263,12 +263,25 @@ COPY --from=grpc /opt/grpc /usr/local
 
 COPY . /LocalAI
 
-RUN <<'EOT' bash
+# BuildKit cache mount for ccache. See Dockerfile.llama-cpp (commit 9228e5b4)
+# for rationale. turboquant is a llama.cpp fork that reuses
+# backend/cpp/llama-cpp source via a thin wrapper Makefile, so MOST TUs
+# are content-identical to the upstream llama-cpp build. Sharing a cache
+# id with llama-cpp could give cross-fork hits — but for now keep them
+# separate so a regression in one doesn't poison the other. Revisit
+# sharing after measuring the actual hit rate.
+RUN --mount=type=cache,target=/root/.ccache,id=turboquant-ccache-${TARGETARCH}-${BUILD_TYPE},sharing=locked <<'EOT' bash
 set -euxo pipefail
 
+export CCACHE_DIR=/root/.ccache
+ccache --max-size=5G || true
+ccache -z || true
+
+export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache"
+
 if [[ -n "${CUDA_DOCKER_ARCH:-}" ]]; then
   CUDA_ARCH_ESC="${CUDA_DOCKER_ARCH//;/\\;}"
-  export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}"
+  export CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}"
   echo "CMAKE_ARGS(env) = ${CMAKE_ARGS}"
   rm -rf /LocalAI/backend/cpp/turboquant-*-build
 fi
@@ -287,6 +300,8 @@ else
   make turboquant-grpc
   make turboquant-rpc-server
 fi
+
+ccache -s || true
 EOT