diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile index cf8c11692..094d2a2fc 100644 --- a/backend/cpp/llama-cpp/Makefile +++ b/backend/cpp/llama-cpp/Makefile @@ -1,5 +1,5 @@ -LLAMA_VERSION?=723c71064da0908c19683f8c344715fbf6d986fd +LLAMA_VERSION?=ecbcb7ea9d3303097519723b264a8b5f1e977028 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp CMAKE_ARGS?= diff --git a/backend/cpp/llama-cpp/grpc-server.cpp b/backend/cpp/llama-cpp/grpc-server.cpp index 5292ef0f6..7bdd123c2 100644 --- a/backend/cpp/llama-cpp/grpc-server.cpp +++ b/backend/cpp/llama-cpp/grpc-server.cpp @@ -362,7 +362,7 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt params.mmproj.path = request->mmproj(); } // params.model_alias ?? - params.model_alias = request->modelfile(); + params.model_alias.insert(request->modelfile()); if (!request->cachetypekey().empty()) { params.cache_type_k = kv_cache_type_from_str(request->cachetypekey()); }