mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-20 22:59:09 -04:00
chore: ⬆️ Update ggml-org/llama.cpp to e475fa2b5f9fb50c3d6fc3e7c6fdf1e004465b62 (#10392)
* ⬆️ Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> * fix(llama-cpp): adapt grpc-server to upstream server-schema split Upstream llama.cpp (e475fa2) extracted the JSON request-schema evaluation out of the static server_task::params_from_json_cmpl into the new server_schema::eval_llama_cmpl_schema (tools/server/server-schema.cpp). The grpc-server unity build still called the old static member, breaking every llama-cpp backend build with "no member named 'params_from_json_cmpl' in 'server_task'". Pull server-schema.cpp into the translation unit and call the new function, keeping both guarded by __has_include so forks that predate the split (e.g. llama-cpp-turboquant, which still exposes params_from_json_cmpl) keep compiling against the old static member. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] --------- Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
@@ -18,6 +18,18 @@
|
||||
#if __has_include("server-chat.cpp")
|
||||
#include "server-chat.cpp"
|
||||
#endif
|
||||
// server-schema.cpp exists only in llama.cpp after the upstream refactor that
|
||||
// extracted the JSON request-schema evaluation (previously the static
|
||||
// server_task::params_from_json_cmpl) into server_schema::eval_llama_cmpl_schema.
|
||||
// server-context.cpp and grpc-server.cpp both call into it, so its definitions
|
||||
// must be part of this translation unit or the link fails. __has_include keeps
|
||||
// the source compatible with older pins/forks (e.g. llama-cpp-turboquant) that
|
||||
// predate the split and still expose params_from_json_cmpl (see the guarded
|
||||
// call sites below).
|
||||
#if __has_include("server-schema.cpp")
|
||||
#define LOCALAI_HAS_SERVER_SCHEMA 1
|
||||
#include "server-schema.cpp"
|
||||
#endif
|
||||
#include "server-context.cpp"
|
||||
|
||||
// LocalAI
|
||||
@@ -2102,7 +2114,11 @@ public:
|
||||
task.index = i;
|
||||
|
||||
task.tokens = std::move(inputs[i]);
|
||||
#ifdef LOCALAI_HAS_SERVER_SCHEMA
|
||||
task.params = server_schema::eval_llama_cmpl_schema(
|
||||
#else
|
||||
task.params = server_task::params_from_json_cmpl(
|
||||
#endif
|
||||
ctx_server.impl->vocab,
|
||||
params_base,
|
||||
ctx_server.get_meta().slot_n_ctx,
|
||||
@@ -2116,7 +2132,7 @@ public:
|
||||
// cannot detect tool calls or separate reasoning from content.
|
||||
task.params.res_type = TASK_RESPONSE_TYPE_OAI_CHAT;
|
||||
task.params.oaicompat_cmpl_id = completion_id;
|
||||
// oaicompat_model is already populated by params_from_json_cmpl
|
||||
// oaicompat_model is already populated by eval_llama_cmpl_schema
|
||||
|
||||
tasks.push_back(std::move(task));
|
||||
}
|
||||
@@ -2940,7 +2956,11 @@ public:
|
||||
task.index = i;
|
||||
|
||||
task.tokens = std::move(inputs[i]);
|
||||
#ifdef LOCALAI_HAS_SERVER_SCHEMA
|
||||
task.params = server_schema::eval_llama_cmpl_schema(
|
||||
#else
|
||||
task.params = server_task::params_from_json_cmpl(
|
||||
#endif
|
||||
ctx_server.impl->vocab,
|
||||
params_base,
|
||||
ctx_server.get_meta().slot_n_ctx,
|
||||
@@ -2952,7 +2972,7 @@ public:
|
||||
// reasoning, tool calls, and content are classified into ChatDeltas.
|
||||
task.params.res_type = TASK_RESPONSE_TYPE_OAI_CHAT;
|
||||
task.params.oaicompat_cmpl_id = completion_id;
|
||||
// oaicompat_model is already populated by params_from_json_cmpl
|
||||
// oaicompat_model is already populated by eval_llama_cmpl_schema
|
||||
|
||||
tasks.push_back(std::move(task));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user