From b962f4a19296aaecc8e8e79dd00179e32617f09b Mon Sep 17 00:00:00 2001 From: pos-ei-don <1822533+pos-ei-don@users.noreply.github.com> Date: Thu, 11 Jun 2026 23:55:38 +0200 Subject: [PATCH] fix(vllm): parse tool_call function arguments before applying the chat template (#10256) Signed-off-by: pos-ei-don <1822533+pos-ei-don@users.noreply.github.com> --- backend/python/vllm/backend.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/backend/python/vllm/backend.py b/backend/python/vllm/backend.py index ada087c35..5d5662857 100644 --- a/backend/python/vllm/backend.py +++ b/backend/python/vllm/backend.py @@ -150,9 +150,24 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): d["reasoning_content"] = msg.reasoning_content if msg.tool_calls: try: - d["tool_calls"] = json.loads(msg.tool_calls) + tool_calls = json.loads(msg.tool_calls) except json.JSONDecodeError: pass + else: + # OpenAI wire format carries function.arguments as a + # JSON-encoded string, but chat templates (e.g. Qwen3) + # iterate over it as a mapping. vLLM's own OpenAI server + # parses arguments before applying the template, so do + # the same here. + if isinstance(tool_calls, list): + for tc in tool_calls: + func = tc.get("function") if isinstance(tc, dict) else None + if isinstance(func, dict) and isinstance(func.get("arguments"), str): + try: + func["arguments"] = json.loads(func["arguments"]) + except json.JSONDecodeError: + pass + d["tool_calls"] = tool_calls result.append(d) return result