From 518381278e362423ab70aaa8ad23c7b44ee13a03 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 20 Jun 2026 08:22:22 +0200 Subject: [PATCH] chore: :arrow_up: Update ggml-org/llama.cpp to `e475fa2b5f9fb50c3d6fc3e7c6fdf1e004465b62` (#10392) * :arrow_up: Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> * fix(llama-cpp): adapt grpc-server to upstream server-schema split Upstream llama.cpp (e475fa2) extracted the JSON request-schema evaluation out of the static server_task::params_from_json_cmpl into the new server_schema::eval_llama_cmpl_schema (tools/server/server-schema.cpp). The grpc-server unity build still called the old static member, breaking every llama-cpp backend build with "no member named 'params_from_json_cmpl' in 'server_task'". Pull server-schema.cpp into the translation unit and call the new function, keeping both guarded by __has_include so forks that predate the split (e.g. llama-cpp-turboquant, which still exposes params_from_json_cmpl) keep compiling against the old static member. Signed-off-by: Ettore Di Giacinto Assisted-by: Claude:claude-opus-4-8 [Claude Code] --------- Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Signed-off-by: Ettore Di Giacinto Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> Co-authored-by: Ettore Di Giacinto --- backend/cpp/llama-cpp/Makefile | 2 +- backend/cpp/llama-cpp/grpc-server.cpp | 24 ++++++++++++++++++++++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile index 64414ec30..bf9f4f608 100644 --- a/backend/cpp/llama-cpp/Makefile +++ b/backend/cpp/llama-cpp/Makefile @@ -1,5 +1,5 @@ -LLAMA_VERSION?=f3e182816421c648188b5eab269853bf1531d950 +LLAMA_VERSION?=e475fa2b5f9fb50c3d6fc3e7c6fdf1e004465b62 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp CMAKE_ARGS?= diff --git a/backend/cpp/llama-cpp/grpc-server.cpp b/backend/cpp/llama-cpp/grpc-server.cpp index 8502e9530..c2e7f22e4 100644 --- a/backend/cpp/llama-cpp/grpc-server.cpp +++ b/backend/cpp/llama-cpp/grpc-server.cpp @@ -18,6 +18,18 @@ #if __has_include("server-chat.cpp") #include "server-chat.cpp" #endif +// server-schema.cpp exists only in llama.cpp after the upstream refactor that +// extracted the JSON request-schema evaluation (previously the static +// server_task::params_from_json_cmpl) into server_schema::eval_llama_cmpl_schema. +// server-context.cpp and grpc-server.cpp both call into it, so its definitions +// must be part of this translation unit or the link fails. __has_include keeps +// the source compatible with older pins/forks (e.g. llama-cpp-turboquant) that +// predate the split and still expose params_from_json_cmpl (see the guarded +// call sites below). +#if __has_include("server-schema.cpp") +#define LOCALAI_HAS_SERVER_SCHEMA 1 +#include "server-schema.cpp" +#endif #include "server-context.cpp" // LocalAI @@ -2102,7 +2114,11 @@ public: task.index = i; task.tokens = std::move(inputs[i]); +#ifdef LOCALAI_HAS_SERVER_SCHEMA + task.params = server_schema::eval_llama_cmpl_schema( +#else task.params = server_task::params_from_json_cmpl( +#endif ctx_server.impl->vocab, params_base, ctx_server.get_meta().slot_n_ctx, @@ -2116,7 +2132,7 @@ public: // cannot detect tool calls or separate reasoning from content. task.params.res_type = TASK_RESPONSE_TYPE_OAI_CHAT; task.params.oaicompat_cmpl_id = completion_id; - // oaicompat_model is already populated by params_from_json_cmpl + // oaicompat_model is already populated by eval_llama_cmpl_schema tasks.push_back(std::move(task)); } @@ -2940,7 +2956,11 @@ public: task.index = i; task.tokens = std::move(inputs[i]); +#ifdef LOCALAI_HAS_SERVER_SCHEMA + task.params = server_schema::eval_llama_cmpl_schema( +#else task.params = server_task::params_from_json_cmpl( +#endif ctx_server.impl->vocab, params_base, ctx_server.get_meta().slot_n_ctx, @@ -2952,7 +2972,7 @@ public: // reasoning, tool calls, and content are classified into ChatDeltas. task.params.res_type = TASK_RESPONSE_TYPE_OAI_CHAT; task.params.oaicompat_cmpl_id = completion_id; - // oaicompat_model is already populated by params_from_json_cmpl + // oaicompat_model is already populated by eval_llama_cmpl_schema tasks.push_back(std::move(task)); }