From 4da769c1ca3bc1ccecc597f14ce9865bf21f5aa2 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 28 Jun 2026 06:18:56 +0000 Subject: [PATCH] paged headers: self-include / for size_t/uintN_t (fix amd64/non-arm64 build; compile-only) Vendored paged headers used size_t / uintN_t without including / . The arm64 DGX toolchain provides them transitively so the build passed there, but amd64/older toolchains do not, failing the CI amd64 build one header at a time ('size_t' does not name a type -> cascade). paged-kv-manager.h was already fixed. This adds the missing includes to the remaining vendored headers at the point each is created/rewritten in the patch series so every src/paged*.h self-includes both: * paged-attn.h (0003): add (had ) * paged-alloc.h (0007): add (had ) * paged-prefix-api.h (0007): add + (had only llama.h) The .cpp units include their own paged header, so they inherit the includes transitively. Whole series still applies clean on the pinned llama.cpp. Compile-only change: no runtime behavior change, bit-exactness unaffected. Assisted-by: Claude:opus-4.8 [Claude Code] Signed-off-by: Ettore Di Giacinto --- .../paged/0003-paged-gather-read-env-LLAMA_KV_PAGED.patch | 3 ++- ...d-engine-prefix-recompute-skip-env-LLAMA_KV_PAGED.patch | 7 +++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/backend/cpp/llama-cpp-localai-paged/patches/paged/0003-paged-gather-read-env-LLAMA_KV_PAGED.patch b/backend/cpp/llama-cpp-localai-paged/patches/paged/0003-paged-gather-read-env-LLAMA_KV_PAGED.patch index e8b28224b..347f34f15 100644 --- a/backend/cpp/llama-cpp-localai-paged/patches/paged/0003-paged-gather-read-env-LLAMA_KV_PAGED.patch +++ b/backend/cpp/llama-cpp-localai-paged/patches/paged/0003-paged-gather-read-env-LLAMA_KV_PAGED.patch @@ -323,7 +323,7 @@ new file mode 100644 index 0000000..c5b7bd7 --- /dev/null +++ b/src/paged-attn.h -@@ -0,0 +1,40 @@ +@@ -0,0 +1,41 @@ +#pragma once +// Paged attention gather-read (patch 0003, experimental). +// @@ -341,6 +341,7 @@ index 0000000..c5b7bd7 +// All logic lives here to keep the core files additive: build_attn gets one +// call, llama_kv_cache_context gets two thin accessors, CMake gets one line. + ++#include +#include + +struct ggml_context; diff --git a/backend/cpp/llama-cpp-localai-paged/patches/paged/0007-paged-engine-prefix-recompute-skip-env-LLAMA_KV_PAGED.patch b/backend/cpp/llama-cpp-localai-paged/patches/paged/0007-paged-engine-prefix-recompute-skip-env-LLAMA_KV_PAGED.patch index 97392c95b..7a5dabb21 100644 --- a/backend/cpp/llama-cpp-localai-paged/patches/paged/0007-paged-engine-prefix-recompute-skip-env-LLAMA_KV_PAGED.patch +++ b/backend/cpp/llama-cpp-localai-paged/patches/paged/0007-paged-engine-prefix-recompute-skip-env-LLAMA_KV_PAGED.patch @@ -350,7 +350,7 @@ diff --git a/src/paged-alloc.h b/src/paged-alloc.h index bf66665..88dedef 100644 --- a/src/paged-alloc.h +++ b/src/paged-alloc.h -@@ -1,17 +1,27 @@ +@@ -1,17 +1,28 @@ #pragma once -// On-demand paged KV block allocation (patch 0004, experimental). +// On-demand paged KV block allocation + cross-request prefix reuse @@ -387,6 +387,7 @@ index bf66665..88dedef 100644 +// registry), so the core kv-cache struct stays untouched - find_slot gains only +// gated calls. Gated behind env LLAMA_KV_PAGED; a no-op when unset. ++#include #include #include @@ -21,19 +31,42 @@ namespace paged_alloc { @@ -498,7 +499,7 @@ new file mode 100644 index 0000000..78a3864 --- /dev/null +++ b/src/paged-prefix-api.h -@@ -0,0 +1,27 @@ +@@ -0,0 +1,29 @@ +#pragma once +// Thin test/diagnostic shim over the paged cross-request prefix engine seam +// (patch 0007). Lets a driver that only includes the public llama.h reach the @@ -506,6 +507,8 @@ index 0000000..78a3864 +// without pulling in the internal kv-cache headers. All entry points are no-ops +// (return 0) unless env LLAMA_KV_PAGED is set. Experimental; not a stable API. + ++#include ++#include +#include "llama.h" + +namespace paged_prefix_api {