mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-25 00:59:28 -04:00
- arm64: ggml CPU_ALL_VARIANTS builds armv9.2 SME variants whose -march=...+sme is rejected by the Ubuntu 24.04 default gcc-13. Build the arm64 variants with gcc-14 (installed in the compile step). The host only selects a variant it actually supports at runtime, but every variant must still compile. - darwin: scripts/build/llama-cpp-darwin.sh builds llama-cpp-cpu-all instead of the fallback binary, keeps Metal (GGML_METAL stays ON; --target ggml also builds ggml-metal). The per-microarch libggml-cpu-*.dylib are placed in the package root next to the binary (darwin has no bundled ld.so, so ggml's executable-dir scan looks there), while the other shared dylibs go in lib/ for DYLD_LIBRARY_PATH. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code]
42 lines
1.6 KiB
Bash
Executable File
42 lines
1.6 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Shared compile logic for backend/Dockerfile.llama-cpp.
|
|
# Sourced (via bind mount) from both builder-fromsource and builder-prebuilt stages.
|
|
|
|
set -euxo pipefail
|
|
|
|
export CCACHE_DIR=/root/.ccache
|
|
ccache --max-size=5G || true
|
|
ccache -z || true
|
|
|
|
export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache"
|
|
|
|
if [[ -n "${CUDA_DOCKER_ARCH:-}" ]]; then
|
|
CUDA_ARCH_ESC="${CUDA_DOCKER_ARCH//;/\\;}"
|
|
export CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}"
|
|
echo "CMAKE_ARGS(env) = ${CMAKE_ARGS}"
|
|
rm -rf /LocalAI/backend/cpp/llama-cpp-*-build
|
|
fi
|
|
|
|
cd /LocalAI/backend/cpp/llama-cpp
|
|
if [ "${BUILD_TYPE}" = "hipblas" ]; then
|
|
# ROCm: the GPU does the compute, so a single fallback CPU build is enough.
|
|
make llama-cpp-fallback
|
|
else
|
|
# arm64: ggml's CPU_ALL_VARIANTS table includes armv9.2 SME variants whose
|
|
# -march=...+sme is rejected by the Ubuntu 24.04 default gcc-13. gcc-14 accepts it, so
|
|
# build the arm64 variants with gcc-14 (the host never *selects* SME unless it has it,
|
|
# but every variant must still compile).
|
|
if [ "${TARGETARCH}" = "arm64" ]; then
|
|
apt-get update -qq && apt-get install -y -qq gcc-14 g++-14
|
|
export CC=gcc-14 CXX=g++-14
|
|
fi
|
|
# x86 and arm64: one build with ggml CPU_ALL_VARIANTS replaces the per-microarch
|
|
# binaries (x86: avx/avx2/avx512/fallback; arm64: armv8.x/armv9.x). ggml dlopens the
|
|
# best libggml-cpu-*.so at runtime by probing host CPU features.
|
|
make llama-cpp-cpu-all
|
|
fi
|
|
make llama-cpp-grpc
|
|
make llama-cpp-rpc-server
|
|
|
|
ccache -s || true
|