Merge origin/master + pin-sync paged backend to 0ed235ea

master auto-bumped the stock llama-cpp pin 9d5d882d -> 0ed235ea and updated the shared grpc-server.cpp. The paged backend's pin must track the stock pin (the grpc-server.cpp is shared), so bump its LLAMA_VERSION to match. All 28 paged patches apply clean on 0ed235ea (verified against a fresh upstream clone). The bf16-tau state-serialization fix (patch 0026) is included. Bit-exact gate + full grpc-server build verify on GPU/CI to follow. Assisted-by: Claude:opus-4.8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-06-29 02:46:37 -04:00 · 2026-06-28 07:56:47 +00:00
parent 1f3e5ba301 de2ec2f136
commit ea72a56e2c
95 changed files with 6339 additions and 487 deletions
--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@@ -4,7 +4,7 @@
 # (backend/cpp/llama-cpp-localai-paged) does NOT inherit this pin: it owns its
 # own LLAMA_VERSION because its vendored patch series would break on a naive
 # bump and is advanced only by the manual PIN_SYNC process.
-LLAMA_VERSION?=9d5d882d8cd0f0a9283d87ed5e6fe3ee0d925fb1
+LLAMA_VERSION?=0ed235ea2c17a19fc8238668653946721ed136fd
 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp

 CMAKE_ARGS?=
@@ -161,11 +161,11 @@ llama-cpp-grpc: llama.cpp
 	cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build
 	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build purge
 	$(info ${GREEN}I llama-cpp build info:grpc${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" TARGET="--target grpc-server --target ggml-rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
 	cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/grpc-server llama-cpp-grpc

 llama-cpp-rpc-server: llama-cpp-grpc
-	cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/llama.cpp/build/bin/rpc-server llama-cpp-rpc-server
+	cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/llama.cpp/build/bin/ggml-rpc-server llama-cpp-rpc-server

 llama.cpp:
 	mkdir -p llama.cpp
--- a/backend/cpp/llama-cpp/grpc-server.cpp
+++ b/backend/cpp/llama-cpp/grpc-server.cpp
@@ -30,6 +30,19 @@
 #define LOCALAI_HAS_SERVER_SCHEMA 1
 #include "server-schema.cpp"
 #endif
+// server-stream.cpp exists only in llama.cpp after the upstream refactor that
+// added the SSE stream-resumption layer (stream_session/stream_pipe_producer).
+// server-context.cpp calls into it (spipe->cleanup(), stream_aware_should_stop,
+// stream_session_attach_pipe), so its definitions must be part of this
+// translation unit or the link fails with "undefined reference to
+// stream_pipe_producer::cleanup()". The file is self-contained (its only
+// external symbols come from server-common, already pulled in above) and the
+// http route-handler factories it also defines are unused here but harmless.
+// __has_include keeps the source compatible with older pins/forks that predate
+// the split.
+#if __has_include("server-stream.cpp")
+#include "server-stream.cpp"
+#endif
 #include "server-context.cpp"

 // LocalAI