mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-07 05:04:29 -05:00
Compare commits
1 Commits
v3.6.0
...
llama_cpp/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3f52776a1c |
@@ -11,6 +11,7 @@ ARG GRPC_MAKEFLAGS="-j4 -Otarget"
|
|||||||
ARG GRPC_VERSION=v1.65.0
|
ARG GRPC_VERSION=v1.65.0
|
||||||
ARG CMAKE_FROM_SOURCE=false
|
ARG CMAKE_FROM_SOURCE=false
|
||||||
ARG CMAKE_VERSION=3.26.4
|
ARG CMAKE_VERSION=3.26.4
|
||||||
|
ARG PROTOBUF_VERSION=v21.12
|
||||||
|
|
||||||
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
|
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
|
||||||
|
|
||||||
@@ -49,6 +50,14 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
|
|||||||
make install && \
|
make install && \
|
||||||
rm -rf /build
|
rm -rf /build
|
||||||
|
|
||||||
|
RUN git clone --recurse-submodules --branch ${PROTOBUF_VERSION} https://github.com/protocolbuffers/protobuf.git && \
|
||||||
|
mkdir -p /build/protobuf/build && \
|
||||||
|
cd /build/protobuf/build && \
|
||||||
|
cmake -Dprotobuf_BUILD_SHARED_LIBS=ON -Dprotobuf_BUILD_TESTS=OFF .. && \
|
||||||
|
make && \
|
||||||
|
make install && \
|
||||||
|
rm -rf /build
|
||||||
|
|
||||||
FROM ${BASE_IMAGE} AS builder
|
FROM ${BASE_IMAGE} AS builder
|
||||||
ARG BACKEND=rerankers
|
ARG BACKEND=rerankers
|
||||||
ARG BUILD_TYPE
|
ARG BUILD_TYPE
|
||||||
@@ -180,21 +189,9 @@ COPY --from=grpc /opt/grpc /usr/local
|
|||||||
|
|
||||||
COPY . /LocalAI
|
COPY . /LocalAI
|
||||||
|
|
||||||
## Otherwise just run the normal build
|
RUN make -C /LocalAI/backend/cpp/llama-cpp llama-cpp
|
||||||
RUN <<EOT bash
|
RUN make -C /LocalAI/backend/cpp/llama-cpp llama-cpp-grpc
|
||||||
if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
RUN make -C /LocalAI/backend/cpp/llama-cpp llama-cpp-rpc-server
|
||||||
cd /LocalAI/backend/cpp/llama-cpp && make llama-cpp-fallback && \
|
|
||||||
make llama-cpp-grpc && make llama-cpp-rpc-server; \
|
|
||||||
else \
|
|
||||||
cd /LocalAI/backend/cpp/llama-cpp && make llama-cpp-avx && \
|
|
||||||
make llama-cpp-avx2 && \
|
|
||||||
make llama-cpp-avx512 && \
|
|
||||||
make llama-cpp-fallback && \
|
|
||||||
make llama-cpp-grpc && \
|
|
||||||
make llama-cpp-rpc-server; \
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
|
|
||||||
# Copy libraries using a script to handle architecture differences
|
# Copy libraries using a script to handle architecture differences
|
||||||
RUN make -C /LocalAI/backend/cpp/llama-cpp package
|
RUN make -C /LocalAI/backend/cpp/llama-cpp package
|
||||||
|
|||||||
@@ -17,6 +17,8 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
|||||||
include_directories("${HOMEBREW_DEFAULT_PREFIX}/include")
|
include_directories("${HOMEBREW_DEFAULT_PREFIX}/include")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
set(Protobuf_USE_STATIC_LIBS OFF)
|
||||||
|
set(gRPC_USE_STATIC_LIBS OFF)
|
||||||
find_package(absl CONFIG REQUIRED)
|
find_package(absl CONFIG REQUIRED)
|
||||||
find_package(Protobuf CONFIG REQUIRED)
|
find_package(Protobuf CONFIG REQUIRED)
|
||||||
find_package(gRPC CONFIG REQUIRED)
|
find_package(gRPC CONFIG REQUIRED)
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
|||||||
TARGET?=--target grpc-server
|
TARGET?=--target grpc-server
|
||||||
|
|
||||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
|
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=ON -DLLAMA_CURL=OFF -DGGML_CPU_ALL_VARIANTS=ON -DGGML_BACKEND_DL=ON
|
||||||
|
|
||||||
CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
||||||
ifeq ($(NATIVE),false)
|
ifeq ($(NATIVE),false)
|
||||||
@@ -89,33 +89,12 @@ else
|
|||||||
LLAMA_VERSION=$(LLAMA_VERSION) $(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(VARIANT) grpc-server
|
LLAMA_VERSION=$(LLAMA_VERSION) $(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(VARIANT) grpc-server
|
||||||
endif
|
endif
|
||||||
|
|
||||||
llama-cpp-avx2: llama.cpp
|
llama-cpp: llama.cpp
|
||||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build
|
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-build
|
||||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build purge
|
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-build purge
|
||||||
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
|
$(info ${GREEN}I llama-cpp build info:${RESET})
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-cpp-avx2-build" build-llama-cpp-grpc-server
|
CMAKE_ARGS="$(CMAKE_ARGS)" $(MAKE) VARIANT="llama-cpp-build" build-llama-cpp-grpc-server
|
||||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build/grpc-server llama-cpp-avx2
|
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-build/grpc-server llama-cpp
|
||||||
|
|
||||||
llama-cpp-avx512: llama.cpp
|
|
||||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build
|
|
||||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build purge
|
|
||||||
$(info ${GREEN}I llama-cpp build info:avx512${RESET})
|
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-cpp-avx512-build" build-llama-cpp-grpc-server
|
|
||||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build/grpc-server llama-cpp-avx512
|
|
||||||
|
|
||||||
llama-cpp-avx: llama.cpp
|
|
||||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build
|
|
||||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build purge
|
|
||||||
$(info ${GREEN}I llama-cpp build info:avx${RESET})
|
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server
|
|
||||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build/grpc-server llama-cpp-avx
|
|
||||||
|
|
||||||
llama-cpp-fallback: llama.cpp
|
|
||||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build
|
|
||||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build purge
|
|
||||||
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
|
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
|
|
||||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build/grpc-server llama-cpp-fallback
|
|
||||||
|
|
||||||
llama-cpp-grpc: llama.cpp
|
llama-cpp-grpc: llama.cpp
|
||||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build
|
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build
|
||||||
|
|||||||
@@ -6,34 +6,9 @@ CURDIR=$(dirname "$(realpath $0)")
|
|||||||
|
|
||||||
cd /
|
cd /
|
||||||
|
|
||||||
echo "CPU info:"
|
BINARY=llama-cpp
|
||||||
grep -e "model\sname" /proc/cpuinfo | head -1
|
|
||||||
grep -e "flags" /proc/cpuinfo | head -1
|
|
||||||
|
|
||||||
BINARY=llama-cpp-fallback
|
|
||||||
|
|
||||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
|
||||||
echo "CPU: AVX found OK"
|
|
||||||
if [ -e $CURDIR/llama-cpp-avx ]; then
|
|
||||||
BINARY=llama-cpp-avx
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
|
|
||||||
echo "CPU: AVX2 found OK"
|
|
||||||
if [ -e $CURDIR/llama-cpp-avx2 ]; then
|
|
||||||
BINARY=llama-cpp-avx2
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Check avx 512
|
|
||||||
if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
|
|
||||||
echo "CPU: AVX512F found OK"
|
|
||||||
if [ -e $CURDIR/llama-cpp-avx512 ]; then
|
|
||||||
BINARY=llama-cpp-avx512
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
## P2P/GRPC mode
|
||||||
if [ -n "$LLAMACPP_GRPC_SERVERS" ]; then
|
if [ -n "$LLAMACPP_GRPC_SERVERS" ]; then
|
||||||
if [ -e $CURDIR/llama-cpp-grpc ]; then
|
if [ -e $CURDIR/llama-cpp-grpc ]; then
|
||||||
BINARY=llama-cpp-grpc
|
BINARY=llama-cpp-grpc
|
||||||
@@ -56,6 +31,3 @@ fi
|
|||||||
|
|
||||||
echo "Using binary: $BINARY"
|
echo "Using binary: $BINARY"
|
||||||
exec $CURDIR/$BINARY "$@"
|
exec $CURDIR/$BINARY "$@"
|
||||||
|
|
||||||
# In case we fail execing, just run fallback
|
|
||||||
exec $CURDIR/llama-cpp-fallback "$@"
|
|
||||||
Reference in New Issue
Block a user