mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-16 20:52:08 -04:00
ci(ik-llama-cpp,turboquant): add BuildKit ccache mount to compile steps
Mirror the ccache mount added to Dockerfile.llama-cpp in 9228e5b4 for
the other two llama.cpp-derived backends. Same shape, distinct mount
ids so each backend's cache is independent:
ik-llama-cpp-ccache-${TARGETARCH}-${BUILD_TYPE}
turboquant-ccache-${TARGETARCH}-${BUILD_TYPE}
ik_llama.cpp is a different upstream fork; no source overlap with
llama-cpp, separate cache makes sense.
turboquant is a llama.cpp fork that reuses backend/cpp/llama-cpp
source via a thin wrapper Makefile — most TUs would in principle hit
llama-cpp's ccache too. Keeping them separate for now to avoid one
fork's regressions poisoning the other; revisit sharing after we have
hit-rate numbers.
Same registry-export behavior as llama-cpp: the cache mount rides on
backend_build.yml's existing cache-to: type=registry,mode=max.
Assisted-by: Claude:claude-opus-4-7
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
@@ -259,12 +259,22 @@ COPY --from=grpc /opt/grpc /usr/local
|
||||
|
||||
COPY . /LocalAI
|
||||
|
||||
RUN <<'EOT' bash
|
||||
# BuildKit cache mount for ccache. See Dockerfile.llama-cpp (commit 9228e5b4)
|
||||
# for the rationale. Distinct mount id so ik-llama-cpp's cache doesn't
|
||||
# overlap with llama-cpp's — ik_llama.cpp is a different fork with
|
||||
# different source.
|
||||
RUN --mount=type=cache,target=/root/.ccache,id=ik-llama-cpp-ccache-${TARGETARCH}-${BUILD_TYPE},sharing=locked <<'EOT' bash
|
||||
set -euxo pipefail
|
||||
|
||||
export CCACHE_DIR=/root/.ccache
|
||||
ccache --max-size=5G || true
|
||||
ccache -z || true
|
||||
|
||||
export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache"
|
||||
|
||||
if [[ -n "${CUDA_DOCKER_ARCH:-}" ]]; then
|
||||
CUDA_ARCH_ESC="${CUDA_DOCKER_ARCH//;/\\;}"
|
||||
export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}"
|
||||
export CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}"
|
||||
echo "CMAKE_ARGS(env) = ${CMAKE_ARGS}"
|
||||
rm -rf /LocalAI/backend/cpp/ik-llama-cpp-*-build
|
||||
fi
|
||||
@@ -278,6 +288,8 @@ else
|
||||
# ik_llama.cpp's IQK kernels require at least AVX2
|
||||
make ik-llama-cpp-avx2
|
||||
fi
|
||||
|
||||
ccache -s || true
|
||||
EOT
|
||||
|
||||
|
||||
|
||||
@@ -263,12 +263,25 @@ COPY --from=grpc /opt/grpc /usr/local
|
||||
|
||||
COPY . /LocalAI
|
||||
|
||||
RUN <<'EOT' bash
|
||||
# BuildKit cache mount for ccache. See Dockerfile.llama-cpp (commit 9228e5b4)
|
||||
# for rationale. turboquant is a llama.cpp fork that reuses
|
||||
# backend/cpp/llama-cpp source via a thin wrapper Makefile, so MOST TUs
|
||||
# are content-identical to the upstream llama-cpp build. Sharing a cache
|
||||
# id with llama-cpp could give cross-fork hits — but for now keep them
|
||||
# separate so a regression in one doesn't poison the other. Revisit
|
||||
# sharing after measuring the actual hit rate.
|
||||
RUN --mount=type=cache,target=/root/.ccache,id=turboquant-ccache-${TARGETARCH}-${BUILD_TYPE},sharing=locked <<'EOT' bash
|
||||
set -euxo pipefail
|
||||
|
||||
export CCACHE_DIR=/root/.ccache
|
||||
ccache --max-size=5G || true
|
||||
ccache -z || true
|
||||
|
||||
export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache"
|
||||
|
||||
if [[ -n "${CUDA_DOCKER_ARCH:-}" ]]; then
|
||||
CUDA_ARCH_ESC="${CUDA_DOCKER_ARCH//;/\\;}"
|
||||
export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}"
|
||||
export CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}"
|
||||
echo "CMAKE_ARGS(env) = ${CMAKE_ARGS}"
|
||||
rm -rf /LocalAI/backend/cpp/turboquant-*-build
|
||||
fi
|
||||
@@ -287,6 +300,8 @@ else
|
||||
make turboquant-grpc
|
||||
make turboquant-rpc-server
|
||||
fi
|
||||
|
||||
ccache -s || true
|
||||
EOT
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user