From 53bdb18d1068ba622b9555fc41fa91b51fb44fdb Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 14 May 2026 08:53:23 +0200 Subject: [PATCH] chore: :arrow_up: Update ggml-org/llama.cpp to `7f3f843c31cd32dc4adc10b393342dfee071c332` (#9809) * :arrow_up: Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> * fix(llama-cpp): adapt to upstream COMMON_SPECULATIVE_TYPE_DRAFT rename ggml-org/llama.cpp#22964 ("spec: update CLI arguments for better consistency") renamed the speculative type enum values: COMMON_SPECULATIVE_TYPE_DRAFT -> COMMON_SPECULATIVE_TYPE_DRAFT_SIMPLE COMMON_SPECULATIVE_TYPE_EAGLE3 -> COMMON_SPECULATIVE_TYPE_DRAFT_EAGLE3 and the registered name strings flipped from underscore- to dash- separated form (e.g. ngram_simple -> ngram-simple), with the bare draft/eagle3 aliases replaced by draft-simple/draft-eagle3. This broke the build with the new LLAMA_VERSION on every variant (vulkan/arm64, darwin and likely all the rest) at grpc-server.cpp:461. Update the upstream branch of the speculative-type fallback to use the new identifier (the LOCALAI_LEGACY_LLAMA_CPP_SPEC fork branch keeps the old name), and normalize spec_type option tokens before passing them to common_speculative_types_from_names so existing model configs that say spec_type:draft / spec_type:ngram_simple keep working. Signed-off-by: Ettore Di Giacinto Assisted-by: claude-code:claude-opus-4-7 --------- Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Signed-off-by: Ettore Di Giacinto Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> Co-authored-by: Ettore Di Giacinto --- backend/cpp/llama-cpp/Makefile | 2 +- backend/cpp/llama-cpp/grpc-server.cpp | 20 +++++++++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile index 865f9481f..b3a27fd4b 100644 --- a/backend/cpp/llama-cpp/Makefile +++ b/backend/cpp/llama-cpp/Makefile @@ -1,5 +1,5 @@ -LLAMA_VERSION?=a9883db8ee021cf16783016a60996d41820b5195 +LLAMA_VERSION?=7f3f843c31cd32dc4adc10b393342dfee071c332 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp CMAKE_ARGS?= diff --git a/backend/cpp/llama-cpp/grpc-server.cpp b/backend/cpp/llama-cpp/grpc-server.cpp index 5c1d07766..6c4fa6946 100644 --- a/backend/cpp/llama-cpp/grpc-server.cpp +++ b/backend/cpp/llama-cpp/grpc-server.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -450,6 +451,8 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt // vector; the turboquant fork still uses the legacy scalar. The // LOCALAI_LEGACY_LLAMA_CPP_SPEC macro is injected by // backend/cpp/turboquant/patch-grpc-server.sh for fork builds only. + // Upstream renamed COMMON_SPECULATIVE_TYPE_DRAFT -> ..._DRAFT_SIMPLE + // in ggml-org/llama.cpp#22964; the fork still uses the old name. #ifdef LOCALAI_LEGACY_LLAMA_CPP_SPEC if (params.speculative.type == COMMON_SPECULATIVE_TYPE_NONE) { params.speculative.type = COMMON_SPECULATIVE_TYPE_DRAFT; @@ -458,7 +461,7 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt const bool no_spec_type = params.speculative.types.empty() || (params.speculative.types.size() == 1 && params.speculative.types[0] == COMMON_SPECULATIVE_TYPE_NONE); if (no_spec_type) { - params.speculative.types = { COMMON_SPECULATIVE_TYPE_DRAFT }; + params.speculative.types = { COMMON_SPECULATIVE_TYPE_DRAFT_SIMPLE }; } #endif } @@ -701,16 +704,27 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt // Upstream switched to a vector of types (comma-separated for multi-type // chaining via common_speculative_types_from_names). We keep accepting a // single value here, but also tolerate comma-separated lists. + // + // ggml-org/llama.cpp#22964 also renamed the registered names from + // underscore- to dash-separated form, and replaced the bare + // `draft`/`eagle3` aliases with `draft-simple`/`draft-eagle3`. We + // normalize each token here so existing model configs keep working. + auto normalize_spec_name = [](std::string s) -> std::string { + std::replace(s.begin(), s.end(), '_', '-'); + if (s == "draft") return "draft-simple"; + if (s == "eagle3") return "draft-eagle3"; + return s; + }; std::vector names; std::string item; for (char c : optval_str) { if (c == ',') { - if (!item.empty()) { names.push_back(item); item.clear(); } + if (!item.empty()) { names.push_back(normalize_spec_name(item)); item.clear(); } } else { item.push_back(c); } } - if (!item.empty()) names.push_back(item); + if (!item.empty()) names.push_back(normalize_spec_name(item)); auto parsed = common_speculative_types_from_names(names); if (!parsed.empty()) { params.speculative.types = parsed;