diff --git a/backend/python/common/mlx_utils.py b/backend/python/common/mlx_utils.py index 6b34eb962..de624fd5b 100644 --- a/backend/python/common/mlx_utils.py +++ b/backend/python/common/mlx_utils.py @@ -20,7 +20,15 @@ def split_reasoning(text, think_start, think_end): Returns ``(reasoning_content, remaining_text)``. When ``think_start`` is empty or not found, returns ``("", text)`` unchanged. """ - if not think_start or not text or think_start not in text: + if not think_start or not text: + return "", text + if think_start not in text: + # Models like Qwen3.5 open assistant turns already INSIDE thinking, so + # the generated text carries only the closing tag. Everything before it + # is reasoning that would otherwise leak into the content. + if think_end and think_end in text: + head, _, tail = text.partition(think_end) + return head.strip(), tail.strip() return "", text pattern = re.compile( re.escape(think_start) + r"(.*?)" + re.escape(think_end or ""), diff --git a/backend/python/common/mlx_utils_test.py b/backend/python/common/mlx_utils_test.py new file mode 100644 index 000000000..809354ede --- /dev/null +++ b/backend/python/common/mlx_utils_test.py @@ -0,0 +1,75 @@ +"""Unit tests for the mlx/mlx-vlm shared helpers (mlx_utils.py). + +Run standalone (Python standard library only, no backend venv needed): + python3 -m unittest mlx_utils_test + +These mirror the server-less helper tests in backend/python/mlx/test.py +(TestSharedHelpers), but live here so they run on any platform: the mlx +test module imports grpc/backend_pb2 at import time and needs the MLX venv, +whereas mlx_utils only needs the standard library. +""" + +import types +import unittest + +from mlx_utils import parse_tool_calls, split_reasoning + + +class TestSplitReasoning(unittest.TestCase): + def test_both_tags(self): + r, c = split_reasoning( + "step 1\nstep 2The answer is 42.", "", "" + ) + self.assertEqual(r, "step 1\nstep 2") + self.assertEqual(c, "The answer is 42.") + + def test_implicit_opener_only_closing_tag(self): + # Qwen3.5 opens the assistant turn already inside thinking, so the + # output carries only the closing tag; everything before it is reasoning. + r, c = split_reasoning( + "The user is asking about the weather.\n\n\nThe weather in Rome is sunny.", + "", + "", + ) + self.assertEqual(r, "The user is asking about the weather.") + self.assertEqual(c, "The weather in Rome is sunny.") + + def test_no_tags_at_all(self): + r, c = split_reasoning("just text", "", "") + self.assertEqual(r, "") + self.assertEqual(c, "just text") + + def test_empty_think_end_and_no_opener_match(self): + # No think_end to anchor on, and the opener is absent → return unchanged. + r, c = split_reasoning("no opener here", "", "") + self.assertEqual(r, "") + self.assertEqual(c, "no opener here") + + def test_empty_text(self): + r, c = split_reasoning("", "", "") + self.assertEqual(r, "") + self.assertEqual(c, "") + + +class TestParseToolCalls(unittest.TestCase): + def test_with_shim(self): + tm = types.SimpleNamespace( + tool_call_start="", + tool_call_end="", + parse_tool_call=lambda body, tools: { + "name": "get_weather", + "arguments": {"location": body.strip()}, + }, + ) + calls, remaining = parse_tool_calls( + "Sure: Paris", tm, tools=None + ) + self.assertEqual(len(calls), 1) + self.assertEqual(calls[0]["name"], "get_weather") + self.assertEqual(calls[0]["arguments"], '{"location": "Paris"}') + self.assertEqual(calls[0]["index"], 0) + self.assertNotIn("", remaining) + + +if __name__ == "__main__": + unittest.main() diff --git a/backend/python/common/python_utils.py b/backend/python/common/python_utils.py index aa61ab578..c89813e2c 100644 --- a/backend/python/common/python_utils.py +++ b/backend/python/common/python_utils.py @@ -58,7 +58,18 @@ def messages_to_dicts(proto_messages): d["reasoning_content"] = msg.reasoning_content if msg.tool_calls: try: - d["tool_calls"] = json.loads(msg.tool_calls) + tool_calls = json.loads(msg.tool_calls) + # Chat templates (e.g. Qwen) iterate function.arguments as a + # mapping, but the OpenAI wire format carries it as a JSON + # string — decode it back so the template's .items() works. + for tc in tool_calls: + fn = tc.get("function") if isinstance(tc, dict) else None + if isinstance(fn, dict) and isinstance(fn.get("arguments"), str): + try: + fn["arguments"] = json.loads(fn["arguments"]) + except json.JSONDecodeError: + pass + d["tool_calls"] = tool_calls except json.JSONDecodeError: pass result.append(d) diff --git a/backend/python/common/python_utils_test.py b/backend/python/common/python_utils_test.py new file mode 100644 index 000000000..c395ce92d --- /dev/null +++ b/backend/python/common/python_utils_test.py @@ -0,0 +1,122 @@ +"""Unit tests for the shared python backend helpers (python_utils.py). + +Run standalone (Python standard library only, no backend venv needed): + python3 -m unittest python_utils_test + +These mirror the server-less helper tests in backend/python/mlx/test.py +(TestSharedHelpers), but live here so they run on any platform: the mlx +test module imports grpc/backend_pb2 at import time and needs the MLX venv, +whereas python_utils has no third-party dependency. Proto Message objects +are faked with types.SimpleNamespace (real proto fields default to ""). +""" + +import json +import types +import unittest + +from python_utils import messages_to_dicts, parse_options + + +def _msg(**fields): + """Fake a proto Message: every unset field is the empty string, as protobuf.""" + defaults = { + "role": "", + "content": "", + "name": "", + "tool_call_id": "", + "reasoning_content": "", + "tool_calls": "", + } + defaults.update(fields) + return types.SimpleNamespace(**defaults) + + +class TestParseOptions(unittest.TestCase): + def test_type_inference(self): + opts = parse_options( + ["temperature:0.7", "max_tokens:128", "trust:true", "name:hello", "no_colon_skipped"] + ) + self.assertEqual(opts["temperature"], 0.7) + self.assertEqual(opts["max_tokens"], 128) + self.assertIs(opts["trust"], True) + self.assertEqual(opts["name"], "hello") + self.assertNotIn("no_colon_skipped", opts) + + +class TestMessagesToDicts(unittest.TestCase): + def test_basic_fields(self): + out = messages_to_dicts( + [ + _msg(role="user", content="hi"), + _msg(role="tool", content="42", tool_call_id="call_1", name="f"), + ] + ) + self.assertEqual(out[0], {"role": "user", "content": "hi"}) + self.assertEqual(out[1]["tool_call_id"], "call_1") + self.assertEqual(out[1]["name"], "f") + + def test_tool_call_arguments_string_decoded_to_mapping(self): + # OpenAI wire format ships function.arguments as a JSON *string*; chat + # templates iterate it as a mapping, so it must come back as a dict. + out = messages_to_dicts( + [ + _msg( + role="assistant", + tool_calls=json.dumps( + [ + { + "id": "call_1", + "type": "function", + "function": { + "name": "get_weather", + "arguments": '{"location": "Rome"}', + }, + } + ] + ), + ) + ] + ) + args = out[0]["tool_calls"][0]["function"]["arguments"] + self.assertEqual(args, {"location": "Rome"}) + self.assertEqual(dict(args.items()), {"location": "Rome"}) + + def test_tool_call_arguments_already_mapping_is_idempotent(self): + out = messages_to_dicts( + [ + _msg( + role="assistant", + tool_calls=json.dumps( + [{"function": {"name": "f", "arguments": {"a": 1}}}] + ), + ) + ] + ) + self.assertEqual(out[0]["tool_calls"][0]["function"]["arguments"], {"a": 1}) + + def test_tool_call_arguments_invalid_json_left_as_string(self): + out = messages_to_dicts( + [ + _msg( + role="assistant", + tool_calls=json.dumps( + [{"function": {"name": "f", "arguments": "not-json"}}] + ), + ) + ] + ) + self.assertEqual(out[0]["tool_calls"][0]["function"]["arguments"], "not-json") + + def test_tool_call_without_function_key(self): + out = messages_to_dicts( + [_msg(role="assistant", tool_calls=json.dumps([{"id": "call_1"}]))] + ) + self.assertEqual(out[0]["tool_calls"], [{"id": "call_1"}]) + + def test_tool_calls_invalid_json_dropped(self): + out = messages_to_dicts([_msg(role="assistant", tool_calls="{not json")]) + self.assertNotIn("tool_calls", out[0]) + + +if __name__ == "__main__": + unittest.main()