feat(llama.cpp): add turboquant support

This PR adds patchset from the great work of @TheTom in
https://github.com/TheTom/llama-cpp-turboquant and creates a pipeline
that updates the patches against upstream automatically.

It also creates necessary scaffolding for doing this with other patches
sources.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2026-04-01 17:46:44 +00:00
parent 6c635e8353
commit e502e51d78
4 changed files with 125 additions and 15 deletions

View File

@@ -0,0 +1,9 @@
# Patch sources for the llama-cpp backend.
# Each source declares a fork whose commits are extracted as patches
# and applied on top of upstream llama.cpp during the build.
# See scripts/patch_utils/apply_patches.sh for the generic patch engine.
sources:
- name: turboquant
repo: https://github.com/TheTom/llama-cpp-turboquant.git
branch: feature/turboquant-kv-cache
upstream_repo: https://github.com/ggml-org/llama.cpp.git

View File

@@ -1,17 +1,13 @@
#!/bin/bash
## Patches
## Apply patches from the `patches` directory
if [ -d "patches" ]; then
for patch in $(ls patches); do
echo "Applying patch $patch"
patch -d llama.cpp/ -p1 < patches/$patch
done
fi
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_ROOT="$SCRIPT_DIR/../../.."
## Apply patches from sources and/or local .patch files
"$REPO_ROOT/scripts/patch_utils/apply_patches.sh" "$SCRIPT_DIR" llama.cpp
## Copy server files into grpc-server build directory
for file in $(ls llama.cpp/tools/server/); do
cp -rfv llama.cpp/tools/server/$file llama.cpp/tools/grpc-server/
done
@@ -28,4 +24,3 @@ else
echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt
fi
set -e