From 6ab29ec8b9c25e002dcd68b540241d2031c58ff2 Mon Sep 17 00:00:00 2001 From: pos-ei-don <1822533+pos-ei-don@users.noreply.github.com> Date: Tue, 30 Jun 2026 09:00:51 +0200 Subject: [PATCH] fix(sglang): parse tool_call function arguments before applying the chat template (#10558) OpenAI wire format carries `function.arguments` as a JSON-encoded string, but chat templates (e.g. Qwen3-Coder) iterate over it as a mapping. The vllm backend already parses arguments before applying the chat template (PR #10256); this mirrors that fix in the sglang backend. Without this fix the second turn of any tool-using session (assistant returns tool_calls, user posts `role:"tool"` result, model is invoked with arguments still as a string) crashes inside transformers' Jinja chat-template rendering with: TypeError: Can only get item pairs from a mapping. File ".../transformers/utils/chat_template_utils.py", in render_jinja_template File ".../jinja2/filters.py", in do_items raise TypeError("Can only get item pairs from a mapping.") Reproduced on `lmsysorg/sglang:v0.5.14` via LocalAI v4.5.4 with `saricles/Qwen3-Coder-Next-NVFP4-GB10` (W4A4 NVFP4 / compressed-tensors) on NVIDIA DGX Spark (GB10, sm_121). After the patch, a tool-call roundtrip (assistant tool_calls -> tool result -> assistant final answer) returns http=200 with the expected follow-up content; no behaviour change on requests that don't carry tool_calls. Signed-off-by: Poseidon Co-authored-by: Poseidon --- backend/python/sglang/backend.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/backend/python/sglang/backend.py b/backend/python/sglang/backend.py index 8b48d2323..0d38c6b7d 100644 --- a/backend/python/sglang/backend.py +++ b/backend/python/sglang/backend.py @@ -147,9 +147,25 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): d["reasoning_content"] = msg.reasoning_content if msg.tool_calls: try: - d["tool_calls"] = json.loads(msg.tool_calls) + tool_calls = json.loads(msg.tool_calls) except json.JSONDecodeError: pass + else: + # OpenAI wire format carries function.arguments as a + # JSON-encoded string, but chat templates (e.g. Qwen3) + # iterate over it as a mapping. The vllm backend + # already parses arguments before applying the chat + # template (PR #10256); mirror that here so the + # sglang backend works with the same wire format. + if isinstance(tool_calls, list): + for tc in tool_calls: + func = tc.get("function") if isinstance(tc, dict) else None + if isinstance(func, dict) and isinstance(func.get("arguments"), str): + try: + func["arguments"] = json.loads(func["arguments"]) + except json.JSONDecodeError: + pass + d["tool_calls"] = tool_calls result.append(d) return result