mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-26 01:16:58 -04:00
Merge branch 'master' into worktree-feat+paged-attention
Resolve pkg/xsysinfo/gpu.go: keep master's NVIDIAComputeCapability + parseComputeCap (the #10485 multi-GPU work); re-express our IsNVIDIABlackwell as a thin wrapper over NVIDIAComputeCapability instead of a duplicate nvidia-smi probe. Assisted-by: Claude:opus-4.8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
|
||||
LLAMA_VERSION?=f3e182816421c648188b5eab269853bf1531d950
|
||||
LLAMA_VERSION?=8be759e6f70d629638a7eb70db3824cbdcea370b
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
# LLAMA_PAGED controls whether the vendored paged-attention patch series
|
||||
# (patches/paged/) is applied on top of the pinned llama.cpp. Default on; set
|
||||
@@ -18,8 +18,16 @@ TARGET?=--target grpc-server
|
||||
JOBS?=$(shell nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 1)
|
||||
ARCH?=$(shell uname -m)
|
||||
|
||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
|
||||
# Shared libs default to OFF: we link static gRPC and the avx/avx2/avx512/fallback
|
||||
# variants are fully static. The CPU_ALL_VARIANTS build flips SHARED_LIBS=ON (ggml/llama
|
||||
# become shared so the dynamic CPU backends work; gRPC stays static via its imported
|
||||
# targets). SHARED_LIBS is a make variable, not an appended -D, so it survives the
|
||||
# recursive sub-make into the VARIANT build dir (which re-parses this Makefile) instead
|
||||
# of being re-clobbered by a second -DBUILD_SHARED_LIBS=OFF. EXTRA_CMAKE_ARGS is the hook
|
||||
# the CPU_ALL_VARIANTS target uses to inject -DGGML_BACKEND_DL/-DGGML_CPU_ALL_VARIANTS.
|
||||
SHARED_LIBS?=OFF
|
||||
EXTRA_CMAKE_ARGS?=
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=$(SHARED_LIBS) -DLLAMA_CURL=OFF $(EXTRA_CMAKE_ARGS)
|
||||
|
||||
CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
||||
ifeq ($(NATIVE),false)
|
||||
@@ -128,6 +136,30 @@ llama-cpp-fallback: llama.cpp
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build/grpc-server llama-cpp-fallback
|
||||
|
||||
# Single-build CPU backend using ggml's CPU_ALL_VARIANTS. Produces ONE grpc-server
|
||||
# plus a set of dlopen-able libggml-cpu-*.so (sandybridge/haswell/skylakex/...) that
|
||||
# ggml's backend registry selects from at runtime by probing host CPU features.
|
||||
# Replaces the avx/avx2/avx512/fallback multi-binary build on x86.
|
||||
#
|
||||
# CPU_ALL_VARIANTS requires GGML_BACKEND_DL, which requires BUILD_SHARED_LIBS=ON, so we
|
||||
# pass SHARED_LIBS=ON and the DL flags as make variables (NOT pre-expanded into the
|
||||
# CMAKE_ARGS env string): command-line make variables propagate through every recursive
|
||||
# sub-make, so the deepest VARIANT-dir build computes BUILD_SHARED_LIBS=ON consistently.
|
||||
# Only ggml/llama go shared - gRPC is found via its static imported targets, so the
|
||||
# grpc-server binary keeps static gRPC and only dynamically links ggml.
|
||||
#
|
||||
# TARGET adds "ggml": the per-microarch backends are runtime-dlopened, not link deps of
|
||||
# grpc-server, so they only build because each is an add_dependencies() of the ggml target.
|
||||
llama-cpp-cpu-all: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-cpu-all-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-cpu-all-build purge
|
||||
$(info ${GREEN}I llama-cpp build info:cpu-all-variants${RESET})
|
||||
$(MAKE) SHARED_LIBS=ON EXTRA_CMAKE_ARGS="-DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON" TARGET="--target grpc-server --target ggml" VARIANT="llama-cpp-cpu-all-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-cpu-all-build/grpc-server llama-cpp-cpu-all
|
||||
rm -rf ggml-shared-libs && mkdir -p ggml-shared-libs
|
||||
find $(CURRENT_MAKEFILE_DIR)/../llama-cpp-cpu-all-build/llama.cpp/build \( -name '*.so*' -o -name '*.dylib' \) -exec cp -av {} ggml-shared-libs/ \;
|
||||
@echo "Collected ggml shared backends:" && ls -la ggml-shared-libs/
|
||||
|
||||
llama-cpp-grpc: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build purge
|
||||
|
||||
Reference in New Issue
Block a user