Files
LocalAI/.docker/llama-cpp-compile.sh
Ettore Di Giacinto 3b47122e54 feat(llama-cpp,turboquant): arm64 gcc-14 for SME variants + darwin cpu-all packaging
- arm64: ggml CPU_ALL_VARIANTS builds armv9.2 SME variants whose -march=...+sme
  is rejected by the Ubuntu 24.04 default gcc-13. Build the arm64 variants with
  gcc-14 (installed in the compile step). The host only selects a variant it
  actually supports at runtime, but every variant must still compile.
- darwin: scripts/build/llama-cpp-darwin.sh builds llama-cpp-cpu-all instead of
  the fallback binary, keeps Metal (GGML_METAL stays ON; --target ggml also builds
  ggml-metal). The per-microarch libggml-cpu-*.dylib are placed in the package
  root next to the binary (darwin has no bundled ld.so, so ggml's executable-dir
  scan looks there), while the other shared dylibs go in lib/ for DYLD_LIBRARY_PATH.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:claude-opus-4-8 [Claude Code]
2026-06-24 21:50:29 +00:00

42 lines
1.6 KiB
Bash
Executable File

#!/usr/bin/env bash
# Shared compile logic for backend/Dockerfile.llama-cpp.
# Sourced (via bind mount) from both builder-fromsource and builder-prebuilt stages.
set -euxo pipefail
export CCACHE_DIR=/root/.ccache
ccache --max-size=5G || true
ccache -z || true
export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache"
if [[ -n "${CUDA_DOCKER_ARCH:-}" ]]; then
CUDA_ARCH_ESC="${CUDA_DOCKER_ARCH//;/\\;}"
export CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}"
echo "CMAKE_ARGS(env) = ${CMAKE_ARGS}"
rm -rf /LocalAI/backend/cpp/llama-cpp-*-build
fi
cd /LocalAI/backend/cpp/llama-cpp
if [ "${BUILD_TYPE}" = "hipblas" ]; then
# ROCm: the GPU does the compute, so a single fallback CPU build is enough.
make llama-cpp-fallback
else
# arm64: ggml's CPU_ALL_VARIANTS table includes armv9.2 SME variants whose
# -march=...+sme is rejected by the Ubuntu 24.04 default gcc-13. gcc-14 accepts it, so
# build the arm64 variants with gcc-14 (the host never *selects* SME unless it has it,
# but every variant must still compile).
if [ "${TARGETARCH}" = "arm64" ]; then
apt-get update -qq && apt-get install -y -qq gcc-14 g++-14
export CC=gcc-14 CXX=g++-14
fi
# x86 and arm64: one build with ggml CPU_ALL_VARIANTS replaces the per-microarch
# binaries (x86: avx/avx2/avx512/fallback; arm64: armv8.x/armv9.x). ggml dlopens the
# best libggml-cpu-*.so at runtime by probing host CPU features.
make llama-cpp-cpu-all
fi
make llama-cpp-grpc
make llama-cpp-rpc-server
ccache -s || true