chore: ⬆️ Update ggml-org/llama.cpp to e475fa2b5f9fb50c3d6fc3e7c6fdf1e004465b62 (#10392)

* ⬆️ Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> * fix(llama-cpp): adapt grpc-server to upstream server-schema split Upstream llama.cpp (e475fa2) extracted the JSON request-schema evaluation out of the static server_task::params_from_json_cmpl into the new server_schema::eval_llama_cmpl_schema (tools/server/server-schema.cpp). The grpc-server unity build still called the old static member, breaking every llama-cpp backend build with "no member named 'params_from_json_cmpl' in 'server_task'". Pull server-schema.cpp into the translation unit and call the new function, keeping both guarded by __has_include so forks that predate the split (e.g. llama-cpp-turboquant, which still exposes params_from_json_cmpl) keep compiling against the old static member. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] --------- Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
2026-06-20 14:49:09 -04:00 · 2026-06-20 08:22:22 +02:00
parent 93706fec57
commit 518381278e
2 changed files with 23 additions and 3 deletions
--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@@ -1,5 +1,5 @@

-LLAMA_VERSION?=f3e182816421c648188b5eab269853bf1531d950
+LLAMA_VERSION?=e475fa2b5f9fb50c3d6fc3e7c6fdf1e004465b62
 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp

 CMAKE_ARGS?=
--- a/backend/cpp/llama-cpp/grpc-server.cpp
+++ b/backend/cpp/llama-cpp/grpc-server.cpp
@@ -18,6 +18,18 @@
 #if __has_include("server-chat.cpp")
 #include "server-chat.cpp"
 #endif
+// server-schema.cpp exists only in llama.cpp after the upstream refactor that
+// extracted the JSON request-schema evaluation (previously the static
+// server_task::params_from_json_cmpl) into server_schema::eval_llama_cmpl_schema.
+// server-context.cpp and grpc-server.cpp both call into it, so its definitions
+// must be part of this translation unit or the link fails. __has_include keeps
+// the source compatible with older pins/forks (e.g. llama-cpp-turboquant) that
+// predate the split and still expose params_from_json_cmpl (see the guarded
+// call sites below).
+#if __has_include("server-schema.cpp")
+#define LOCALAI_HAS_SERVER_SCHEMA 1
+#include "server-schema.cpp"
+#endif
 #include "server-context.cpp"

 // LocalAI
@@ -2102,7 +2114,11 @@ public:
                task.index = i;

                task.tokens    = std::move(inputs[i]);
+#ifdef LOCALAI_HAS_SERVER_SCHEMA
+                task.params           = server_schema::eval_llama_cmpl_schema(
+#else
                task.params           = server_task::params_from_json_cmpl(
+#endif
                        ctx_server.impl->vocab,
                        params_base,
                        ctx_server.get_meta().slot_n_ctx,
@@ -2116,7 +2132,7 @@ public:
                // cannot detect tool calls or separate reasoning from content.
                task.params.res_type                 = TASK_RESPONSE_TYPE_OAI_CHAT;
                task.params.oaicompat_cmpl_id         = completion_id;
-                // oaicompat_model is already populated by params_from_json_cmpl
+                // oaicompat_model is already populated by eval_llama_cmpl_schema

                tasks.push_back(std::move(task));
            }
@@ -2940,7 +2956,11 @@ public:
                task.index = i;

                task.tokens    = std::move(inputs[i]);
+#ifdef LOCALAI_HAS_SERVER_SCHEMA
+                task.params           = server_schema::eval_llama_cmpl_schema(
+#else
                task.params           = server_task::params_from_json_cmpl(
+#endif
                        ctx_server.impl->vocab,
                        params_base,
                        ctx_server.get_meta().slot_n_ctx,
@@ -2952,7 +2972,7 @@ public:
                // reasoning, tool calls, and content are classified into ChatDeltas.
                task.params.res_type                 = TASK_RESPONSE_TYPE_OAI_CHAT;
                task.params.oaicompat_cmpl_id         = completion_id;
-                // oaicompat_model is already populated by params_from_json_cmpl
+                // oaicompat_model is already populated by eval_llama_cmpl_schema

                tasks.push_back(std::move(task));
            }