mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-28 10:27:30 -04:00
paged headers: self-include <cstddef>/<cstdint> for size_t/uintN_t (fix amd64/non-arm64 build; compile-only)
Vendored paged headers used size_t / uintN_t without including <cstddef> /
<cstdint>. The arm64 DGX toolchain provides them transitively so the build
passed there, but amd64/older toolchains do not, failing the CI amd64 build one
header at a time ('size_t' does not name a type -> cascade).
paged-kv-manager.h was already fixed. This adds the missing includes to the
remaining vendored headers at the point each is created/rewritten in the patch
series so every src/paged*.h self-includes both:
* paged-attn.h (0003): add <cstddef> (had <cstdint>)
* paged-alloc.h (0007): add <cstddef> (had <cstdint>)
* paged-prefix-api.h (0007): add <cstddef> + <cstdint> (had only llama.h)
The .cpp units include their own paged header, so they inherit the includes
transitively. Whole series still applies clean on the pinned llama.cpp.
Compile-only change: no runtime behavior change, bit-exactness unaffected.
Assisted-by: Claude:opus-4.8 [Claude Code]
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
@@ -323,7 +323,7 @@ new file mode 100644
|
||||
index 0000000..c5b7bd7
|
||||
--- /dev/null
|
||||
+++ b/src/paged-attn.h
|
||||
@@ -0,0 +1,40 @@
|
||||
@@ -0,0 +1,41 @@
|
||||
+#pragma once
|
||||
+// Paged attention gather-read (patch 0003, experimental).
|
||||
+//
|
||||
@@ -341,6 +341,7 @@ index 0000000..c5b7bd7
|
||||
+// All logic lives here to keep the core files additive: build_attn gets one
|
||||
+// call, llama_kv_cache_context gets two thin accessors, CMake gets one line.
|
||||
+
|
||||
+#include <cstddef>
|
||||
+#include <cstdint>
|
||||
+
|
||||
+struct ggml_context;
|
||||
|
||||
@@ -350,7 +350,7 @@ diff --git a/src/paged-alloc.h b/src/paged-alloc.h
|
||||
index bf66665..88dedef 100644
|
||||
--- a/src/paged-alloc.h
|
||||
+++ b/src/paged-alloc.h
|
||||
@@ -1,17 +1,27 @@
|
||||
@@ -1,17 +1,28 @@
|
||||
#pragma once
|
||||
-// On-demand paged KV block allocation (patch 0004, experimental).
|
||||
+// On-demand paged KV block allocation + cross-request prefix reuse
|
||||
@@ -387,6 +387,7 @@ index bf66665..88dedef 100644
|
||||
+// registry), so the core kv-cache struct stays untouched - find_slot gains only
|
||||
+// gated calls. Gated behind env LLAMA_KV_PAGED; a no-op when unset.
|
||||
|
||||
+#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
@@ -21,19 +31,42 @@ namespace paged_alloc {
|
||||
@@ -498,7 +499,7 @@ new file mode 100644
|
||||
index 0000000..78a3864
|
||||
--- /dev/null
|
||||
+++ b/src/paged-prefix-api.h
|
||||
@@ -0,0 +1,27 @@
|
||||
@@ -0,0 +1,29 @@
|
||||
+#pragma once
|
||||
+// Thin test/diagnostic shim over the paged cross-request prefix engine seam
|
||||
+// (patch 0007). Lets a driver that only includes the public llama.h reach the
|
||||
@@ -506,6 +507,8 @@ index 0000000..78a3864
|
||||
+// without pulling in the internal kv-cache headers. All entry points are no-ops
|
||||
+// (return 0) unless env LLAMA_KV_PAGED is set. Experimental; not a stable API.
|
||||
+
|
||||
+#include <cstddef>
|
||||
+#include <cstdint>
|
||||
+#include "llama.h"
|
||||
+
|
||||
+namespace paged_prefix_api {
|
||||
|
||||
Reference in New Issue
Block a user