build(paged): stacking patch-series scaffolding for llama.cpp paged attention

Numbered patches under backend/cpp/llama-cpp/patches/ applied in order against the pinned LLAMA_VERSION (build hook in the llama.cpp: target). Each phase is one small, independently-buildable patch so the work rebases cleanly across llama.cpp bumps (anti-drift). README defines the series (0001 vendor manager -> 0006 prefix caching) + the regen workflow. Assisted-by: Claude:opus-4.8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-06-23 16:19:07 -04:00 · 2026-06-19 22:53:20 +00:00
parent 62f0ae17e3
commit ba3fa5a633
2 changed files with 64 additions and 1 deletions
--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@@ -137,7 +137,12 @@ llama.cpp:
 	git remote add origin $(LLAMA_REPO)  && \
 	git fetch --all --tags && \
 	git checkout -b build $(LLAMA_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
+	git submodule update --init --recursive --depth 1 --single-branch && \
+	for p in $(CURRENT_MAKEFILE_DIR)patches/0*.patch; do \
+		[ -e "$$p" ] || continue; \
+		echo "applying llama.cpp patch: $$p"; \
+		git apply --verbose "$$p" || { echo "patch failed: $$p"; exit 1; }; \
+	done

 llama.cpp/tools/grpc-server: llama.cpp
 	mkdir -p llama.cpp/tools/grpc-server