mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-23 16:19:07 -04:00
Validate the paged KV read/write path at the ggml-op level, driven by
PagedKVManager:
- write: ggml_set_rows(pool, k_src, slot_mapping) scatter K rows by slot
- read: ggml_get_rows(pool, gather_idx) gather a seq's slots into
contiguous scratch (the tensor an attention kernel consumes)
The test forces a non-contiguous, out-of-order physical block layout
(allocate seqA+seqB, free seqA, reallocate seqC -> blocks [2,1,5]) and
proves gather(write(x)) == x plus cross-sequence isolation in the shared
pool. This de-risks the central question (does slot-addressed paged storage
round-trip correctly through ggml) before the llama-graph integration.
Pool is statically allocated via ggml_backend_alloc_ctx_tensors, mirroring
how llama.cpp allocates its KV cache. CPU backend, no new ggml op.
Built against ggml from the vendored llama.cpp checkout.
Phase 1 of docs/superpowers/plans/2026-06-19-paged-attention-llamacpp.md.
Assisted-by: Claude:opus-4.8 [Claude Code]
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
33 lines
1.1 KiB
Makefile
33 lines
1.1 KiB
Makefile
CXX ?= g++
|
|
CXXFLAGS ?= -std=c++17 -O2 -Wall -Wextra -I.
|
|
|
|
TESTS = test_free_block_queue test_block_pool test_paged_kv_manager test_prefix_cache
|
|
BINS = $(addprefix tests/,$(TESTS))
|
|
|
|
all: $(BINS)
|
|
|
|
tests/%: tests/%.cpp paged_kv_manager.cpp paged_kv_manager.h
|
|
$(CXX) $(CXXFLAGS) -o $@ $< paged_kv_manager.cpp
|
|
|
|
check: all
|
|
@for t in $(BINS); do echo "== $$t =="; ./$$t || exit 1; done
|
|
|
|
# --- Optional ggml integration test (Phase 1: paged write/gather mechanism) ---
|
|
# Requires a built ggml. Override these to point at your checkout / build:
|
|
# make ggml-check GGML_SRC=<llama.cpp>/ggml GGML_BUILD=<ggml-build>
|
|
GGML_SRC ?= ../../llama-cpp-fallback-build/llama.cpp/ggml
|
|
GGML_BUILD ?= /tmp/ggml-build
|
|
GGML_LIBDIR = $(GGML_BUILD)/src
|
|
|
|
tests/test_ggml_paged_rw: tests/test_ggml_paged_rw.cpp paged_kv_manager.cpp paged_kv_manager.h
|
|
$(CXX) $(CXXFLAGS) -I$(GGML_SRC)/include -o $@ $< paged_kv_manager.cpp \
|
|
-L$(GGML_LIBDIR) -lggml -lggml-base -lggml-cpu -Wl,-rpath,$(GGML_LIBDIR)
|
|
|
|
ggml-check: tests/test_ggml_paged_rw
|
|
@echo "== tests/test_ggml_paged_rw =="; ./tests/test_ggml_paged_rw
|
|
|
|
clean:
|
|
rm -f $(BINS) tests/test_ggml_paged_rw
|
|
|
|
.PHONY: all check ggml-check clean
|