diff --git a/backend/cpp/ik-llama-cpp/grpc-server.cpp b/backend/cpp/ik-llama-cpp/grpc-server.cpp index 3e88022dc..30e6dc692 100644 --- a/backend/cpp/ik-llama-cpp/grpc-server.cpp +++ b/backend/cpp/ik-llama-cpp/grpc-server.cpp @@ -326,7 +326,7 @@ struct llama_client_slot char buffer[512]; double t_token = t_prompt_processing / num_prompt_tokens_processed; double n_tokens_second = 1e3 / t_prompt_processing * num_prompt_tokens_processed; - sprintf(buffer, "prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)", + snprintf(buffer, sizeof(buffer), "prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)", t_prompt_processing, num_prompt_tokens_processed, t_token, n_tokens_second); LOG_INFO(buffer, { @@ -340,7 +340,7 @@ struct llama_client_slot t_token = t_token_generation / n_decoded; n_tokens_second = 1e3 / t_token_generation * n_decoded; - sprintf(buffer, "generation eval time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)", + snprintf(buffer, sizeof(buffer), "generation eval time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)", t_token_generation, n_decoded, t_token, n_tokens_second); LOG_INFO(buffer, { @@ -352,7 +352,7 @@ struct llama_client_slot {"n_tokens_second", n_tokens_second}, }); - sprintf(buffer, " total time = %10.2f ms", t_prompt_processing + t_token_generation); + snprintf(buffer, sizeof(buffer), " total time = %10.2f ms", t_prompt_processing + t_token_generation); LOG_INFO(buffer, { {"slot_id", id}, {"task_id", task_id},