mirror of
https://github.com/mudler/LocalAI.git
synced 2026-07-03 12:57:02 -04:00
fix(python-backends): parse tool-call arguments for chat templates and split implicit reasoning blocks (#10658)
Two bugs broke OpenAI-style tool calling on the MLX backend (and any
Python backend sharing backend/python/common), reproduced end-to-end on
LocalAI v4.5.5 with the metal-mlx backend and
mlx-community/Qwen3.5-2B-MLX-8bit.
messages_to_dicts left each tool call's function.arguments as the raw
OpenAI-wire JSON string. HuggingFace chat templates (e.g. Qwen3.5)
iterate arguments as a mapping (.items()), so any request whose history
contained a prior assistant tool_calls message failed with HTTP 500
"Generation failed: Can only get item pairs from a mapping." — breaking
every agent loop on its second turn. Decode the string back into a dict
so the template sees a mapping.
split_reasoning returned ("", text) whenever the opening think tag was
absent. Models like Qwen3.5 open the assistant turn already inside
thinking, so the generated text carries only the closing </think>; the
whole chain-of-thought leaked into content. When the opener is missing
but the closer is present, treat everything before the closer as
reasoning.
Adds platform-independent unit tests under backend/python/common
(stdlib-only, no MLX/venv required, following parent_watch_test.py).
Assisted-by: Claude Code:claude-opus-4-8
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
@@ -20,7 +20,15 @@ def split_reasoning(text, think_start, think_end):
|
||||
Returns ``(reasoning_content, remaining_text)``. When ``think_start`` is
|
||||
empty or not found, returns ``("", text)`` unchanged.
|
||||
"""
|
||||
if not think_start or not text or think_start not in text:
|
||||
if not think_start or not text:
|
||||
return "", text
|
||||
if think_start not in text:
|
||||
# Models like Qwen3.5 open assistant turns already INSIDE thinking, so
|
||||
# the generated text carries only the closing tag. Everything before it
|
||||
# is reasoning that would otherwise leak into the content.
|
||||
if think_end and think_end in text:
|
||||
head, _, tail = text.partition(think_end)
|
||||
return head.strip(), tail.strip()
|
||||
return "", text
|
||||
pattern = re.compile(
|
||||
re.escape(think_start) + r"(.*?)" + re.escape(think_end or ""),
|
||||
|
||||
75
backend/python/common/mlx_utils_test.py
Normal file
75
backend/python/common/mlx_utils_test.py
Normal file
@@ -0,0 +1,75 @@
|
||||
"""Unit tests for the mlx/mlx-vlm shared helpers (mlx_utils.py).
|
||||
|
||||
Run standalone (Python standard library only, no backend venv needed):
|
||||
python3 -m unittest mlx_utils_test
|
||||
|
||||
These mirror the server-less helper tests in backend/python/mlx/test.py
|
||||
(TestSharedHelpers), but live here so they run on any platform: the mlx
|
||||
test module imports grpc/backend_pb2 at import time and needs the MLX venv,
|
||||
whereas mlx_utils only needs the standard library.
|
||||
"""
|
||||
|
||||
import types
|
||||
import unittest
|
||||
|
||||
from mlx_utils import parse_tool_calls, split_reasoning
|
||||
|
||||
|
||||
class TestSplitReasoning(unittest.TestCase):
|
||||
def test_both_tags(self):
|
||||
r, c = split_reasoning(
|
||||
"<think>step 1\nstep 2</think>The answer is 42.", "<think>", "</think>"
|
||||
)
|
||||
self.assertEqual(r, "step 1\nstep 2")
|
||||
self.assertEqual(c, "The answer is 42.")
|
||||
|
||||
def test_implicit_opener_only_closing_tag(self):
|
||||
# Qwen3.5 opens the assistant turn already inside thinking, so the
|
||||
# output carries only the closing tag; everything before it is reasoning.
|
||||
r, c = split_reasoning(
|
||||
"The user is asking about the weather.\n</think>\n\nThe weather in Rome is sunny.",
|
||||
"<think>",
|
||||
"</think>",
|
||||
)
|
||||
self.assertEqual(r, "The user is asking about the weather.")
|
||||
self.assertEqual(c, "The weather in Rome is sunny.")
|
||||
|
||||
def test_no_tags_at_all(self):
|
||||
r, c = split_reasoning("just text", "<think>", "</think>")
|
||||
self.assertEqual(r, "")
|
||||
self.assertEqual(c, "just text")
|
||||
|
||||
def test_empty_think_end_and_no_opener_match(self):
|
||||
# No think_end to anchor on, and the opener is absent → return unchanged.
|
||||
r, c = split_reasoning("no opener here", "<think>", "")
|
||||
self.assertEqual(r, "")
|
||||
self.assertEqual(c, "no opener here")
|
||||
|
||||
def test_empty_text(self):
|
||||
r, c = split_reasoning("", "<think>", "</think>")
|
||||
self.assertEqual(r, "")
|
||||
self.assertEqual(c, "")
|
||||
|
||||
|
||||
class TestParseToolCalls(unittest.TestCase):
|
||||
def test_with_shim(self):
|
||||
tm = types.SimpleNamespace(
|
||||
tool_call_start="<tool_call>",
|
||||
tool_call_end="</tool_call>",
|
||||
parse_tool_call=lambda body, tools: {
|
||||
"name": "get_weather",
|
||||
"arguments": {"location": body.strip()},
|
||||
},
|
||||
)
|
||||
calls, remaining = parse_tool_calls(
|
||||
"Sure: <tool_call>Paris</tool_call>", tm, tools=None
|
||||
)
|
||||
self.assertEqual(len(calls), 1)
|
||||
self.assertEqual(calls[0]["name"], "get_weather")
|
||||
self.assertEqual(calls[0]["arguments"], '{"location": "Paris"}')
|
||||
self.assertEqual(calls[0]["index"], 0)
|
||||
self.assertNotIn("<tool_call>", remaining)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -58,7 +58,18 @@ def messages_to_dicts(proto_messages):
|
||||
d["reasoning_content"] = msg.reasoning_content
|
||||
if msg.tool_calls:
|
||||
try:
|
||||
d["tool_calls"] = json.loads(msg.tool_calls)
|
||||
tool_calls = json.loads(msg.tool_calls)
|
||||
# Chat templates (e.g. Qwen) iterate function.arguments as a
|
||||
# mapping, but the OpenAI wire format carries it as a JSON
|
||||
# string — decode it back so the template's .items() works.
|
||||
for tc in tool_calls:
|
||||
fn = tc.get("function") if isinstance(tc, dict) else None
|
||||
if isinstance(fn, dict) and isinstance(fn.get("arguments"), str):
|
||||
try:
|
||||
fn["arguments"] = json.loads(fn["arguments"])
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
d["tool_calls"] = tool_calls
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
result.append(d)
|
||||
|
||||
122
backend/python/common/python_utils_test.py
Normal file
122
backend/python/common/python_utils_test.py
Normal file
@@ -0,0 +1,122 @@
|
||||
"""Unit tests for the shared python backend helpers (python_utils.py).
|
||||
|
||||
Run standalone (Python standard library only, no backend venv needed):
|
||||
python3 -m unittest python_utils_test
|
||||
|
||||
These mirror the server-less helper tests in backend/python/mlx/test.py
|
||||
(TestSharedHelpers), but live here so they run on any platform: the mlx
|
||||
test module imports grpc/backend_pb2 at import time and needs the MLX venv,
|
||||
whereas python_utils has no third-party dependency. Proto Message objects
|
||||
are faked with types.SimpleNamespace (real proto fields default to "").
|
||||
"""
|
||||
|
||||
import json
|
||||
import types
|
||||
import unittest
|
||||
|
||||
from python_utils import messages_to_dicts, parse_options
|
||||
|
||||
|
||||
def _msg(**fields):
|
||||
"""Fake a proto Message: every unset field is the empty string, as protobuf."""
|
||||
defaults = {
|
||||
"role": "",
|
||||
"content": "",
|
||||
"name": "",
|
||||
"tool_call_id": "",
|
||||
"reasoning_content": "",
|
||||
"tool_calls": "",
|
||||
}
|
||||
defaults.update(fields)
|
||||
return types.SimpleNamespace(**defaults)
|
||||
|
||||
|
||||
class TestParseOptions(unittest.TestCase):
|
||||
def test_type_inference(self):
|
||||
opts = parse_options(
|
||||
["temperature:0.7", "max_tokens:128", "trust:true", "name:hello", "no_colon_skipped"]
|
||||
)
|
||||
self.assertEqual(opts["temperature"], 0.7)
|
||||
self.assertEqual(opts["max_tokens"], 128)
|
||||
self.assertIs(opts["trust"], True)
|
||||
self.assertEqual(opts["name"], "hello")
|
||||
self.assertNotIn("no_colon_skipped", opts)
|
||||
|
||||
|
||||
class TestMessagesToDicts(unittest.TestCase):
|
||||
def test_basic_fields(self):
|
||||
out = messages_to_dicts(
|
||||
[
|
||||
_msg(role="user", content="hi"),
|
||||
_msg(role="tool", content="42", tool_call_id="call_1", name="f"),
|
||||
]
|
||||
)
|
||||
self.assertEqual(out[0], {"role": "user", "content": "hi"})
|
||||
self.assertEqual(out[1]["tool_call_id"], "call_1")
|
||||
self.assertEqual(out[1]["name"], "f")
|
||||
|
||||
def test_tool_call_arguments_string_decoded_to_mapping(self):
|
||||
# OpenAI wire format ships function.arguments as a JSON *string*; chat
|
||||
# templates iterate it as a mapping, so it must come back as a dict.
|
||||
out = messages_to_dicts(
|
||||
[
|
||||
_msg(
|
||||
role="assistant",
|
||||
tool_calls=json.dumps(
|
||||
[
|
||||
{
|
||||
"id": "call_1",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"arguments": '{"location": "Rome"}',
|
||||
},
|
||||
}
|
||||
]
|
||||
),
|
||||
)
|
||||
]
|
||||
)
|
||||
args = out[0]["tool_calls"][0]["function"]["arguments"]
|
||||
self.assertEqual(args, {"location": "Rome"})
|
||||
self.assertEqual(dict(args.items()), {"location": "Rome"})
|
||||
|
||||
def test_tool_call_arguments_already_mapping_is_idempotent(self):
|
||||
out = messages_to_dicts(
|
||||
[
|
||||
_msg(
|
||||
role="assistant",
|
||||
tool_calls=json.dumps(
|
||||
[{"function": {"name": "f", "arguments": {"a": 1}}}]
|
||||
),
|
||||
)
|
||||
]
|
||||
)
|
||||
self.assertEqual(out[0]["tool_calls"][0]["function"]["arguments"], {"a": 1})
|
||||
|
||||
def test_tool_call_arguments_invalid_json_left_as_string(self):
|
||||
out = messages_to_dicts(
|
||||
[
|
||||
_msg(
|
||||
role="assistant",
|
||||
tool_calls=json.dumps(
|
||||
[{"function": {"name": "f", "arguments": "not-json"}}]
|
||||
),
|
||||
)
|
||||
]
|
||||
)
|
||||
self.assertEqual(out[0]["tool_calls"][0]["function"]["arguments"], "not-json")
|
||||
|
||||
def test_tool_call_without_function_key(self):
|
||||
out = messages_to_dicts(
|
||||
[_msg(role="assistant", tool_calls=json.dumps([{"id": "call_1"}]))]
|
||||
)
|
||||
self.assertEqual(out[0]["tool_calls"], [{"id": "call_1"}])
|
||||
|
||||
def test_tool_calls_invalid_json_dropped(self):
|
||||
out = messages_to_dicts([_msg(role="assistant", tool_calls="{not json")])
|
||||
self.assertNotIn("tool_calls", out[0])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user