fix(vllm): parse tool_call function arguments before applying the chat template (#10256)

Signed-off-by: pos-ei-don <1822533+pos-ei-don@users.noreply.github.com>
2026-08-01 02:49:51 -04:00 · 2026-06-11 23:55:38 +02:00
parent b6fcb3e1db
commit b962f4a192
1 changed files with 16 additions and 1 deletions
--- a/backend/python/vllm/backend.py
+++ b/backend/python/vllm/backend.py
@@ -150,9 +150,24 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                d["reasoning_content"] = msg.reasoning_content
            if msg.tool_calls:
                try:
-                    d["tool_calls"] = json.loads(msg.tool_calls)
+                    tool_calls = json.loads(msg.tool_calls)
                except json.JSONDecodeError:
                    pass
+                else:
+                    # OpenAI wire format carries function.arguments as a
+                    # JSON-encoded string, but chat templates (e.g. Qwen3)
+                    # iterate over it as a mapping. vLLM's own OpenAI server
+                    # parses arguments before applying the template, so do
+                    # the same here.
+                    if isinstance(tool_calls, list):
+                        for tc in tool_calls:
+                            func = tc.get("function") if isinstance(tc, dict) else None
+                            if isinstance(func, dict) and isinstance(func.get("arguments"), str):
+                                try:
+                                    func["arguments"] = json.loads(func["arguments"])
+                                except json.JSONDecodeError:
+                                    pass
+                    d["tool_calls"] = tool_calls
            result.append(d)
        return result