From b24ca512878d02283d8469e6f3ab34f0f4e43cb3 Mon Sep 17 00:00:00 2001
From: Richard Palethorpe <io@richiejp.com>
Date: Thu, 12 Mar 2026 12:32:29 +0000
Subject: [PATCH] fix(llama-cpp): Set enable_thinking in the correct place
 (#8973)

Signed-off-by: Richard Palethorpe <io@richiejp.com>
---
 backend/cpp/llama-cpp/grpc-server.cpp | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/backend/cpp/llama-cpp/grpc-server.cpp b/backend/cpp/llama-cpp/grpc-server.cpp
index 511cfc347..5b86ca24b 100644
--- a/backend/cpp/llama-cpp/grpc-server.cpp
+++ b/backend/cpp/llama-cpp/grpc-server.cpp
@@ -1348,11 +1348,14 @@ public:
                     body_json["min_p"] = data["min_p"];
                 }
 
-                // Pass metadata fields to body_json
+                // Pass enable_thinking via chat_template_kwargs (where oaicompat_chat_params_parse reads it)
                 const auto& metadata = request->metadata();
                 auto et_it = metadata.find("enable_thinking");
                 if (et_it != metadata.end()) {
-                    body_json["enable_thinking"] = (et_it->second == "true");
+                    if (!body_json.contains("chat_template_kwargs")) {
+                        body_json["chat_template_kwargs"] = json::object();
+                    }
+                    body_json["chat_template_kwargs"]["enable_thinking"] = (et_it->second == "true");
                 }
 
                 // Debug: Print full body_json before template processing (includes messages, tools, tool_choice, etc.)
@@ -2071,11 +2074,14 @@ public:
                     body_json["min_p"] = data["min_p"];
                 }
 
-                // Pass metadata fields to body_json
+                // Pass enable_thinking via chat_template_kwargs (where oaicompat_chat_params_parse reads it)
                 const auto& predict_metadata = request->metadata();
                 auto predict_et_it = predict_metadata.find("enable_thinking");
                 if (predict_et_it != predict_metadata.end()) {
-                    body_json["enable_thinking"] = (predict_et_it->second == "true");
+                    if (!body_json.contains("chat_template_kwargs")) {
+                        body_json["chat_template_kwargs"] = json::object();
+                    }
+                    body_json["chat_template_kwargs"]["enable_thinking"] = (predict_et_it->second == "true");
                 }
 
                 // Debug: Print full body_json before template processing (includes messages, tools, tool_choice, etc.)