diff --git a/.docker/llama-cpp-compile.sh b/.docker/llama-cpp-compile.sh index b4791a348..776a2cceb 100755 --- a/.docker/llama-cpp-compile.sh +++ b/.docker/llama-cpp-compile.sh @@ -22,6 +22,14 @@ if [ "${BUILD_TYPE}" = "hipblas" ]; then # ROCm: the GPU does the compute, so a single fallback CPU build is enough. make llama-cpp-fallback else + # arm64: ggml's CPU_ALL_VARIANTS table includes armv9.2 SME variants whose + # -march=...+sme is rejected by the Ubuntu 24.04 default gcc-13. gcc-14 accepts it, so + # build the arm64 variants with gcc-14 (the host never *selects* SME unless it has it, + # but every variant must still compile). + if [ "${TARGETARCH}" = "arm64" ]; then + apt-get update -qq && apt-get install -y -qq gcc-14 g++-14 + export CC=gcc-14 CXX=g++-14 + fi # x86 and arm64: one build with ggml CPU_ALL_VARIANTS replaces the per-microarch # binaries (x86: avx/avx2/avx512/fallback; arm64: armv8.x/armv9.x). ggml dlopens the # best libggml-cpu-*.so at runtime by probing host CPU features. diff --git a/.docker/turboquant-compile.sh b/.docker/turboquant-compile.sh index c1a970010..baf814c7e 100755 --- a/.docker/turboquant-compile.sh +++ b/.docker/turboquant-compile.sh @@ -23,6 +23,11 @@ if [ "${BUILD_TYPE}" = "hipblas" ]; then # ROCm: single fallback CPU build (GPU does the compute). make turboquant-fallback else + # arm64: the CPU_ALL_VARIANTS armv9.2 SME variants need gcc-14 (gcc-13 rejects +sme). + if [ "${TARGETARCH}" = "arm64" ]; then + apt-get update -qq && apt-get install -y -qq gcc-14 g++-14 + export CC=gcc-14 CXX=g++-14 + fi # x86 and arm64: one ggml CPU_ALL_VARIANTS build replaces the per-microarch binaries. make turboquant-cpu-all fi diff --git a/scripts/build/llama-cpp-darwin.sh b/scripts/build/llama-cpp-darwin.sh index 9bdf36875..3bbd963e6 100644 --- a/scripts/build/llama-cpp-darwin.sh +++ b/scripts/build/llama-cpp-darwin.sh @@ -6,10 +6,11 @@ IMAGE_NAME="${IMAGE_NAME:-localai/llama-cpp-darwin}" pushd backend/cpp/llama-cpp -# make llama-cpp-avx && \ -# make llama-cpp-avx2 && \ -# make llama-cpp-avx512 && \ -make llama-cpp-fallback && \ +# Single build via ggml CPU_ALL_VARIANTS: one binary plus the per-microarch Apple/arm +# dylibs (apple_m1/m2_m3/m4, armv8.x) that ggml selects at runtime. GGML_METAL stays ON +# and --target ggml also builds ggml-metal (via add_dependencies), so the Metal GPU +# backend is still produced as a loadable libggml-metal.dylib. +make llama-cpp-cpu-all && \ make llama-cpp-grpc && \ make llama-cpp-rpc-server @@ -19,13 +20,22 @@ mkdir -p build/darwin mkdir -p backend-images mkdir -p build/darwin/lib -# cp -rf backend/cpp/llama-cpp/llama-cpp-avx build/darwin/ -# cp -rf backend/cpp/llama-cpp/llama-cpp-avx2 build/darwin/ -# cp -rf backend/cpp/llama-cpp/llama-cpp-avx512 build/darwin/ -cp -rf backend/cpp/llama-cpp/llama-cpp-fallback build/darwin/ +cp -rf backend/cpp/llama-cpp/llama-cpp-cpu-all build/darwin/ cp -rf backend/cpp/llama-cpp/llama-cpp-grpc build/darwin/ cp -rf backend/cpp/llama-cpp/llama-cpp-rpc-server build/darwin/ +# Distribute the shared ggml/llama dylibs from the CPU_ALL_VARIANTS build. Unlike the old +# fully-static fallback build, these are real dylibs with @rpath install names, so the +# otool loop below (which only copies deps that exist on disk) will not pick them up. +# - the per-microarch libggml-cpu-*.dylib go in the package ROOT, next to the binary, +# because on darwin run.sh execs the binary directly (no bundled ld.so) and ggml +# discovers CPU backends by scanning the executable's own directory. +# - everything else (libggml-base/libggml/libllama/libmtmd/libggml-metal/...) goes in +# lib/, resolved at load time via the DYLD_LIBRARY_PATH=lib that run.sh exports. +SHLIBS=backend/cpp/llama-cpp/ggml-shared-libs +cp -rfv $SHLIBS/libggml-cpu-*.dylib build/darwin/ +find $SHLIBS -name '*.dylib' ! -name 'libggml-cpu-*.dylib' -exec cp -rfv {} build/darwin/lib/ \; + # Set default additional libs only for Darwin on M chips (arm64) if [[ "$(uname -s)" == "Darwin" && "$(uname -m)" == "arm64" ]]; then ADDITIONAL_LIBS=${ADDITIONAL_LIBS:-$(ls /opt/homebrew/Cellar/protobuf/**/lib/libutf8_validity*.dylib 2>/dev/null)}