mirror of
https://github.com/mudler/LocalAI.git
synced 2026-04-20 14:57:35 -04:00
feat(llama.cpp): add turboquant support
This PR adds patchset from the great work of @TheTom in https://github.com/TheTom/llama-cpp-turboquant and creates a pipeline that updates the patches against upstream automatically. It also creates necessary scaffolding for doing this with other patches sources. Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
9
backend/cpp/llama-cpp/patches/sources.yaml
Normal file
9
backend/cpp/llama-cpp/patches/sources.yaml
Normal file
@@ -0,0 +1,9 @@
|
||||
# Patch sources for the llama-cpp backend.
|
||||
# Each source declares a fork whose commits are extracted as patches
|
||||
# and applied on top of upstream llama.cpp during the build.
|
||||
# See scripts/patch_utils/apply_patches.sh for the generic patch engine.
|
||||
sources:
|
||||
- name: turboquant
|
||||
repo: https://github.com/TheTom/llama-cpp-turboquant.git
|
||||
branch: feature/turboquant-kv-cache
|
||||
upstream_repo: https://github.com/ggml-org/llama.cpp.git
|
||||
@@ -1,17 +1,13 @@
|
||||
#!/bin/bash
|
||||
|
||||
## Patches
|
||||
|
||||
## Apply patches from the `patches` directory
|
||||
if [ -d "patches" ]; then
|
||||
for patch in $(ls patches); do
|
||||
echo "Applying patch $patch"
|
||||
patch -d llama.cpp/ -p1 < patches/$patch
|
||||
done
|
||||
fi
|
||||
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
REPO_ROOT="$SCRIPT_DIR/../../.."
|
||||
|
||||
## Apply patches from sources and/or local .patch files
|
||||
"$REPO_ROOT/scripts/patch_utils/apply_patches.sh" "$SCRIPT_DIR" llama.cpp
|
||||
|
||||
## Copy server files into grpc-server build directory
|
||||
for file in $(ls llama.cpp/tools/server/); do
|
||||
cp -rfv llama.cpp/tools/server/$file llama.cpp/tools/grpc-server/
|
||||
done
|
||||
@@ -28,4 +24,3 @@ else
|
||||
echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt
|
||||
fi
|
||||
set -e
|
||||
|
||||
|
||||
Reference in New Issue
Block a user