feat(llama.cpp): add turboquant support

This PR adds patchset from the great work of @TheTom in https://github.com/TheTom/llama-cpp-turboquant and creates a pipeline that updates the patches against upstream automatically. It also creates necessary scaffolding for doing this with other patches sources. Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-06-12 02:38:19 -04:00 · 2026-04-01 17:46:44 +00:00
parent 6c635e8353
commit e502e51d78
4 changed files with 125 additions and 15 deletions
--- a/backend/cpp/llama-cpp/patches/sources.yaml
+++ b/backend/cpp/llama-cpp/patches/sources.yaml
@@ -0,0 +1,9 @@
+# Patch sources for the llama-cpp backend.
+# Each source declares a fork whose commits are extracted as patches
+# and applied on top of upstream llama.cpp during the build.
+# See scripts/patch_utils/apply_patches.sh for the generic patch engine.
+sources:
+  - name: turboquant
+    repo: https://github.com/TheTom/llama-cpp-turboquant.git
+    branch: feature/turboquant-kv-cache
+    upstream_repo: https://github.com/ggml-org/llama.cpp.git
--- a/backend/cpp/llama-cpp/prepare.sh
+++ b/backend/cpp/llama-cpp/prepare.sh
@@ -1,17 +1,13 @@
 #!/bin/bash
-
-## Patches
-
-## Apply patches from the `patches` directory
-if [ -d "patches" ]; then
-    for patch in $(ls patches); do
-        echo "Applying patch $patch"
-        patch -d llama.cpp/ -p1 < patches/$patch
-    done 
-fi
-
 set -e

+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+REPO_ROOT="$SCRIPT_DIR/../../.."
+
+## Apply patches from sources and/or local .patch files
+"$REPO_ROOT/scripts/patch_utils/apply_patches.sh" "$SCRIPT_DIR" llama.cpp
+
+## Copy server files into grpc-server build directory
 for file in $(ls llama.cpp/tools/server/); do
    cp -rfv llama.cpp/tools/server/$file llama.cpp/tools/grpc-server/
 done
@@ -28,4 +24,3 @@ else
    echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt
 fi
 set -e
-