diff --git a/.docker/llama-cpp-compile.sh b/.docker/llama-cpp-compile.sh
index b4791a348..776a2cceb 100755
--- a/.docker/llama-cpp-compile.sh
+++ b/.docker/llama-cpp-compile.sh
@@ -22,6 +22,14 @@ if [ "${BUILD_TYPE}" = "hipblas" ]; then
   # ROCm: the GPU does the compute, so a single fallback CPU build is enough.
   make llama-cpp-fallback
 else
+  # arm64: ggml's CPU_ALL_VARIANTS table includes armv9.2 SME variants whose
+  # -march=...+sme is rejected by the Ubuntu 24.04 default gcc-13. gcc-14 accepts it, so
+  # build the arm64 variants with gcc-14 (the host never *selects* SME unless it has it,
+  # but every variant must still compile).
+  if [ "${TARGETARCH}" = "arm64" ]; then
+    apt-get update -qq && apt-get install -y -qq gcc-14 g++-14
+    export CC=gcc-14 CXX=g++-14
+  fi
   # x86 and arm64: one build with ggml CPU_ALL_VARIANTS replaces the per-microarch
   # binaries (x86: avx/avx2/avx512/fallback; arm64: armv8.x/armv9.x). ggml dlopens the
   # best libggml-cpu-*.so at runtime by probing host CPU features.
diff --git a/.docker/turboquant-compile.sh b/.docker/turboquant-compile.sh
index c1a970010..baf814c7e 100755
--- a/.docker/turboquant-compile.sh
+++ b/.docker/turboquant-compile.sh
@@ -23,6 +23,11 @@ if [ "${BUILD_TYPE}" = "hipblas" ]; then
   # ROCm: single fallback CPU build (GPU does the compute).
   make turboquant-fallback
 else
+  # arm64: the CPU_ALL_VARIANTS armv9.2 SME variants need gcc-14 (gcc-13 rejects +sme).
+  if [ "${TARGETARCH}" = "arm64" ]; then
+    apt-get update -qq && apt-get install -y -qq gcc-14 g++-14
+    export CC=gcc-14 CXX=g++-14
+  fi
   # x86 and arm64: one ggml CPU_ALL_VARIANTS build replaces the per-microarch binaries.
   make turboquant-cpu-all
 fi
diff --git a/scripts/build/llama-cpp-darwin.sh b/scripts/build/llama-cpp-darwin.sh
index 9bdf36875..3bbd963e6 100644
--- a/scripts/build/llama-cpp-darwin.sh
+++ b/scripts/build/llama-cpp-darwin.sh
@@ -6,10 +6,11 @@ IMAGE_NAME="${IMAGE_NAME:-localai/llama-cpp-darwin}"
 
 pushd backend/cpp/llama-cpp
 
-# make llama-cpp-avx && \
-# make llama-cpp-avx2 && \
-# make llama-cpp-avx512 && \
-make llama-cpp-fallback && \
+# Single build via ggml CPU_ALL_VARIANTS: one binary plus the per-microarch Apple/arm
+# dylibs (apple_m1/m2_m3/m4, armv8.x) that ggml selects at runtime. GGML_METAL stays ON
+# and --target ggml also builds ggml-metal (via add_dependencies), so the Metal GPU
+# backend is still produced as a loadable libggml-metal.dylib.
+make llama-cpp-cpu-all && \
 make llama-cpp-grpc && \
 make llama-cpp-rpc-server
 
@@ -19,13 +20,22 @@ mkdir -p build/darwin
 mkdir -p backend-images
 mkdir -p build/darwin/lib
 
-# cp -rf backend/cpp/llama-cpp/llama-cpp-avx build/darwin/
-# cp -rf backend/cpp/llama-cpp/llama-cpp-avx2 build/darwin/
-# cp -rf backend/cpp/llama-cpp/llama-cpp-avx512 build/darwin/
-cp -rf backend/cpp/llama-cpp/llama-cpp-fallback build/darwin/
+cp -rf backend/cpp/llama-cpp/llama-cpp-cpu-all build/darwin/
 cp -rf backend/cpp/llama-cpp/llama-cpp-grpc build/darwin/
 cp -rf backend/cpp/llama-cpp/llama-cpp-rpc-server build/darwin/
 
+# Distribute the shared ggml/llama dylibs from the CPU_ALL_VARIANTS build. Unlike the old
+# fully-static fallback build, these are real dylibs with @rpath install names, so the
+# otool loop below (which only copies deps that exist on disk) will not pick them up.
+#  - the per-microarch libggml-cpu-*.dylib go in the package ROOT, next to the binary,
+#    because on darwin run.sh execs the binary directly (no bundled ld.so) and ggml
+#    discovers CPU backends by scanning the executable's own directory.
+#  - everything else (libggml-base/libggml/libllama/libmtmd/libggml-metal/...) goes in
+#    lib/, resolved at load time via the DYLD_LIBRARY_PATH=lib that run.sh exports.
+SHLIBS=backend/cpp/llama-cpp/ggml-shared-libs
+cp -rfv $SHLIBS/libggml-cpu-*.dylib build/darwin/
+find $SHLIBS -name '*.dylib' ! -name 'libggml-cpu-*.dylib' -exec cp -rfv {} build/darwin/lib/ \;
+
 # Set default additional libs only for Darwin on M chips (arm64)
 if [[ "$(uname -s)" == "Darwin" && "$(uname -m)" == "arm64" ]]; then
     ADDITIONAL_LIBS=${ADDITIONAL_LIBS:-$(ls /opt/homebrew/Cellar/protobuf/**/lib/libutf8_validity*.dylib 2>/dev/null)}