From e502e51d78e31d7dd00f92f3d626216da111672f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 1 Apr 2026 17:46:44 +0000 Subject: [PATCH] feat(llama.cpp): add turboquant support This PR adds patchset from the great work of @TheTom in https://github.com/TheTom/llama-cpp-turboquant and creates a pipeline that updates the patches against upstream automatically. It also creates necessary scaffolding for doing this with other patches sources. Signed-off-by: Ettore Di Giacinto --- .github/workflows/bump_deps.yaml | 3 - backend/cpp/llama-cpp/patches/sources.yaml | 9 ++ backend/cpp/llama-cpp/prepare.sh | 19 ++-- scripts/patch_utils/apply_patches.sh | 109 +++++++++++++++++++++ 4 files changed, 125 insertions(+), 15 deletions(-) create mode 100644 backend/cpp/llama-cpp/patches/sources.yaml create mode 100755 scripts/patch_utils/apply_patches.sh diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml index 49e489beb..ae63fd232 100644 --- a/.github/workflows/bump_deps.yaml +++ b/.github/workflows/bump_deps.yaml @@ -63,6 +63,3 @@ jobs: branch: "update/${{ matrix.variable }}" body: ${{ steps.bump.outputs.message }} signoff: true - - - diff --git a/backend/cpp/llama-cpp/patches/sources.yaml b/backend/cpp/llama-cpp/patches/sources.yaml new file mode 100644 index 000000000..9c13ae7a4 --- /dev/null +++ b/backend/cpp/llama-cpp/patches/sources.yaml @@ -0,0 +1,9 @@ +# Patch sources for the llama-cpp backend. +# Each source declares a fork whose commits are extracted as patches +# and applied on top of upstream llama.cpp during the build. +# See scripts/patch_utils/apply_patches.sh for the generic patch engine. +sources: + - name: turboquant + repo: https://github.com/TheTom/llama-cpp-turboquant.git + branch: feature/turboquant-kv-cache + upstream_repo: https://github.com/ggml-org/llama.cpp.git diff --git a/backend/cpp/llama-cpp/prepare.sh b/backend/cpp/llama-cpp/prepare.sh index f9b7e3dd2..b6f2c25c8 100644 --- a/backend/cpp/llama-cpp/prepare.sh +++ b/backend/cpp/llama-cpp/prepare.sh @@ -1,17 +1,13 @@ #!/bin/bash - -## Patches - -## Apply patches from the `patches` directory -if [ -d "patches" ]; then - for patch in $(ls patches); do - echo "Applying patch $patch" - patch -d llama.cpp/ -p1 < patches/$patch - done -fi - set -e +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_ROOT="$SCRIPT_DIR/../../.." + +## Apply patches from sources and/or local .patch files +"$REPO_ROOT/scripts/patch_utils/apply_patches.sh" "$SCRIPT_DIR" llama.cpp + +## Copy server files into grpc-server build directory for file in $(ls llama.cpp/tools/server/); do cp -rfv llama.cpp/tools/server/$file llama.cpp/tools/grpc-server/ done @@ -28,4 +24,3 @@ else echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt fi set -e - diff --git a/scripts/patch_utils/apply_patches.sh b/scripts/patch_utils/apply_patches.sh new file mode 100755 index 000000000..9386b3ac6 --- /dev/null +++ b/scripts/patch_utils/apply_patches.sh @@ -0,0 +1,109 @@ +#!/bin/bash +# apply_patches.sh — Generic patch fetcher and applier for any backend. +# +# Usage: ./apply_patches.sh +# +# Directory containing a patches/ folder (with optional sources.yaml) +# The cloned upstream repo to patch (e.g., llama.cpp/) +# +# Behavior (idempotent): +# 1. If patches/sources.yaml exists and yq is available, for each source: +# - If patches// already has .patch files: skip fetching (vendored) +# - Otherwise: clone the fork, auto-detect the fork base via merge-base +# with the upstream repo, and generate patches +# 2. Apply all patches from source subdirectories (alphabetical), then top-level .patch files +# 3. Fails fast on any patch application error + +set -e + +apply_patches() { + local SOURCE_DIR="$(cd "$1" && pwd)" + local TARGET_DIR="$2" + local PATCHES_DIR="$SOURCE_DIR/patches" + + if [ ! -d "$PATCHES_DIR" ]; then + return 0 + fi + + # Phase 1: Generate missing patches from fork sources + if [ -f "$PATCHES_DIR/sources.yaml" ] && command -v yq &>/dev/null; then + local SOURCE_COUNT + SOURCE_COUNT=$(yq '.sources | length' "$PATCHES_DIR/sources.yaml") + + for i in $(seq 0 $((SOURCE_COUNT - 1))); do + local NAME REPO BRANCH UPSTREAM_REPO + NAME=$(yq ".sources[$i].name" "$PATCHES_DIR/sources.yaml") + REPO=$(yq ".sources[$i].repo" "$PATCHES_DIR/sources.yaml") + BRANCH=$(yq ".sources[$i].branch" "$PATCHES_DIR/sources.yaml") + UPSTREAM_REPO=$(yq ".sources[$i].upstream_repo" "$PATCHES_DIR/sources.yaml") + + local SOURCE_PATCH_DIR="$PATCHES_DIR/$NAME" + local EXISTING + EXISTING=$(ls "$SOURCE_PATCH_DIR"/*.patch 2>/dev/null | wc -l) + + if [ "$EXISTING" -gt 0 ]; then + echo "Patches [$NAME]: $EXISTING patches already present — skipping fetch." + else + echo "Patches [$NAME]: fetching from $REPO ($BRANCH)" + + local TMPDIR + TMPDIR=$(mktemp -d) + + if git clone --single-branch -b "$BRANCH" "$REPO" "$TMPDIR/fork" 2>&1; then + cd "$TMPDIR/fork" + + # Auto-detect fork base: merge-base between fork and upstream + git remote add upstream "$UPSTREAM_REPO" + git fetch upstream 2>&1 + + local FORK_BASE + FORK_BASE=$(git merge-base HEAD upstream/master 2>/dev/null || \ + git merge-base HEAD upstream/main 2>/dev/null || echo "") + + if [ -z "$FORK_BASE" ]; then + echo "WARNING: Could not find merge-base with upstream — skipping source '$NAME'" + cd "$SOURCE_DIR" + rm -rf "$TMPDIR" + continue + fi + + local PATCH_COUNT + PATCH_COUNT=$(git rev-list --count "$FORK_BASE"..HEAD 2>/dev/null || echo "0") + echo " Fork base: ${FORK_BASE:0:12} ($PATCH_COUNT commits to extract)" + + if [ "$PATCH_COUNT" -gt 0 ]; then + mkdir -p "$SOURCE_PATCH_DIR" + git format-patch "$FORK_BASE"..HEAD -o "$SOURCE_PATCH_DIR/" >/dev/null 2>&1 + echo " Generated $PATCH_COUNT patches in patches/$NAME/" + fi + cd "$SOURCE_DIR" + else + echo "WARNING: Failed to clone $REPO — skipping source '$NAME'" + fi + + rm -rf "$TMPDIR" + fi + done + elif [ -f "$PATCHES_DIR/sources.yaml" ]; then + echo "WARNING: yq not found — skipping source-based patch generation." + fi + + # Phase 2: Apply patches (subdirectories first, then top-level) + for source_dir in $(find "$PATCHES_DIR" -mindepth 1 -maxdepth 1 -type d | sort); do + for p in $(ls "$source_dir"/*.patch 2>/dev/null | sort); do + echo "Applying: $(basename "$source_dir")/$(basename "$p")" + patch -d "$TARGET_DIR" -p1 < "$p" || { echo "FAILED: $p"; exit 1; } + done + done + for p in $(ls "$PATCHES_DIR"/*.patch 2>/dev/null | sort); do + echo "Applying: $(basename "$p")" + patch -d "$TARGET_DIR" -p1 < "$p" || { echo "FAILED: $p"; exit 1; } + done +} + +# Run with arguments +if [ $# -lt 2 ]; then + echo "Usage: $0 " + exit 1 +fi +apply_patches "$1" "$2"