diff --git a/backend/cpp/turboquant/Makefile b/backend/cpp/turboquant/Makefile index cdfb0489f..062ccda2d 100644 --- a/backend/cpp/turboquant/Makefile +++ b/backend/cpp/turboquant/Makefile @@ -1,7 +1,7 @@ # Pinned to the HEAD of feature/turboquant-kv-cache on https://github.com/TheTom/llama-cpp-turboquant. # Auto-bumped nightly by .github/workflows/bump_deps.yaml. -TURBOQUANT_VERSION?=2cbfdc62a1a047b01377948dfdede8cb6a744866 +TURBOQUANT_VERSION?=4c1c3ac09d2dba0aa9a55b94f6c50c41a92f9c8c LLAMA_REPO?=https://github.com/TheTom/llama-cpp-turboquant CMAKE_ARGS?= diff --git a/backend/cpp/turboquant/patch-grpc-server.sh b/backend/cpp/turboquant/patch-grpc-server.sh index c9555052e..3a61e21c4 100755 --- a/backend/cpp/turboquant/patch-grpc-server.sh +++ b/backend/cpp/turboquant/patch-grpc-server.sh @@ -9,7 +9,7 @@ # fork and upstream (flat vs nested `common_params_speculative`, missing # `get_media_marker()`, `ctx_server.impl->model` vs `model_tgt`, and a # LOCALAI_LEGACY_LLAMA_CPP_SPEC compile gate). As of TURBOQUANT_VERSION -# 2cbfdc62 the fork has rebased past ggml-org/llama.cpp#21962, #22397 and +# 4c1c3ac0 the fork has rebased past ggml-org/llama.cpp#21962, #22397 and # #22838, so the shared grpc-server.cpp compiles unmodified against the fork. # Only the fork-specific KV-cache enum entries remain. #