diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile index a6d4c5497..e131a07a8 100644 --- a/backend/cpp/llama-cpp/Makefile +++ b/backend/cpp/llama-cpp/Makefile @@ -1,5 +1,5 @@ -LLAMA_VERSION?=66c4f9ded01b29d9120255be1ed8d5835bcbb51d +LLAMA_VERSION?=d12cc3d1ca6bba741cd77887ac9c9ee18c8415c7 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp CMAKE_ARGS?= diff --git a/backend/cpp/llama-cpp/grpc-server.cpp b/backend/cpp/llama-cpp/grpc-server.cpp index 036ff9c14..b3f32b575 100644 --- a/backend/cpp/llama-cpp/grpc-server.cpp +++ b/backend/cpp/llama-cpp/grpc-server.cpp @@ -1614,6 +1614,7 @@ public: ctx_server.impl->vocab, params_base, ctx_server.get_meta().slot_n_ctx, + ctx_server.get_meta().logit_bias_eog, data); task.id_slot = json_value(data, "id_slot", -1); @@ -2382,6 +2383,7 @@ public: ctx_server.impl->vocab, params_base, ctx_server.get_meta().slot_n_ctx, + ctx_server.get_meta().logit_bias_eog, data); task.id_slot = json_value(data, "id_slot", -1);