mirror of
https://github.com/mudler/LocalAI.git
synced 2026-07-03 12:57:02 -04:00
Two bugs broke OpenAI-style tool calling on the MLX backend (and any
Python backend sharing backend/python/common), reproduced end-to-end on
LocalAI v4.5.5 with the metal-mlx backend and
mlx-community/Qwen3.5-2B-MLX-8bit.
messages_to_dicts left each tool call's function.arguments as the raw
OpenAI-wire JSON string. HuggingFace chat templates (e.g. Qwen3.5)
iterate arguments as a mapping (.items()), so any request whose history
contained a prior assistant tool_calls message failed with HTTP 500
"Generation failed: Can only get item pairs from a mapping." — breaking
every agent loop on its second turn. Decode the string back into a dict
so the template sees a mapping.
split_reasoning returned ("", text) whenever the opening think tag was
absent. Models like Qwen3.5 open the assistant turn already inside
thinking, so the generated text carries only the closing </think>; the
whole chain-of-thought leaked into content. When the opener is missing
but the closer is present, treat everything before the closer as
reasoning.
Adds platform-independent unit tests under backend/python/common
(stdlib-only, no MLX/venv required, following parent_watch_test.py).
Assisted-by: Claude Code:claude-opus-4-8
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
109 lines
3.7 KiB
Python
109 lines
3.7 KiB
Python
"""Shared utilities for the mlx and mlx-vlm gRPC backends.
|
|
|
|
These helpers wrap mlx-lm's and mlx-vlm's native tool-parser modules, which
|
|
auto-detect the right parser from the model's chat template. Each tool
|
|
module exposes ``tool_call_start``, ``tool_call_end`` and
|
|
``parse_tool_call(text, tools) -> dict | list[dict]``.
|
|
|
|
The split-reasoning helper is generic enough to work with any think-start /
|
|
think-end delimiter pair.
|
|
"""
|
|
import json
|
|
import re
|
|
import sys
|
|
import uuid
|
|
|
|
|
|
def split_reasoning(text, think_start, think_end):
|
|
"""Split ``<think>...</think>`` blocks out of ``text``.
|
|
|
|
Returns ``(reasoning_content, remaining_text)``. When ``think_start`` is
|
|
empty or not found, returns ``("", text)`` unchanged.
|
|
"""
|
|
if not think_start or not text:
|
|
return "", text
|
|
if think_start not in text:
|
|
# Models like Qwen3.5 open assistant turns already INSIDE thinking, so
|
|
# the generated text carries only the closing tag. Everything before it
|
|
# is reasoning that would otherwise leak into the content.
|
|
if think_end and think_end in text:
|
|
head, _, tail = text.partition(think_end)
|
|
return head.strip(), tail.strip()
|
|
return "", text
|
|
pattern = re.compile(
|
|
re.escape(think_start) + r"(.*?)" + re.escape(think_end or ""),
|
|
re.DOTALL,
|
|
)
|
|
reasoning_parts = pattern.findall(text)
|
|
if not reasoning_parts:
|
|
return "", text
|
|
remaining = pattern.sub("", text).strip()
|
|
return "\n".join(p.strip() for p in reasoning_parts), remaining
|
|
|
|
|
|
def parse_tool_calls(text, tool_module, tools):
|
|
"""Extract tool calls from ``text`` using a mlx-lm tool module.
|
|
|
|
Ports the ``process_tool_calls`` logic from
|
|
``mlx_vlm/server.py`` (v0.10 onwards). ``tool_module`` must expose
|
|
``tool_call_start``, ``tool_call_end`` and ``parse_tool_call``.
|
|
|
|
Returns ``(calls, remaining_text)`` where ``calls`` is a list of dicts:
|
|
|
|
[{"index": int, "id": str, "name": str, "arguments": str (JSON)}]
|
|
|
|
and ``remaining_text`` is the free-form text with the tool call blocks
|
|
removed. ``(calls, text)`` is returned unchanged if ``tool_module`` is
|
|
``None`` or the start delimiter isn't present.
|
|
"""
|
|
if tool_module is None or not text:
|
|
return [], text
|
|
start = getattr(tool_module, "tool_call_start", None)
|
|
end = getattr(tool_module, "tool_call_end", None)
|
|
parse_fn = getattr(tool_module, "parse_tool_call", None)
|
|
if not start or parse_fn is None or start not in text:
|
|
return [], text
|
|
|
|
if end == "" or end is None:
|
|
pattern = re.compile(
|
|
re.escape(start) + r".*?(?:\n|$)",
|
|
re.DOTALL,
|
|
)
|
|
else:
|
|
pattern = re.compile(
|
|
re.escape(start) + r".*?" + re.escape(end),
|
|
re.DOTALL,
|
|
)
|
|
|
|
matches = pattern.findall(text)
|
|
if not matches:
|
|
return [], text
|
|
|
|
remaining = pattern.sub(" ", text).strip()
|
|
calls = []
|
|
for match in matches:
|
|
call_body = match.strip().removeprefix(start)
|
|
if end:
|
|
call_body = call_body.removesuffix(end)
|
|
call_body = call_body.strip()
|
|
try:
|
|
parsed = parse_fn(call_body, tools)
|
|
except Exception as e:
|
|
print(
|
|
f"[mlx_utils] Invalid tool call: {call_body!r} ({e})",
|
|
file=sys.stderr,
|
|
)
|
|
continue
|
|
if not isinstance(parsed, list):
|
|
parsed = [parsed]
|
|
for tc in parsed:
|
|
calls.append(
|
|
{
|
|
"index": len(calls),
|
|
"id": str(uuid.uuid4()),
|
|
"name": (tc.get("name") or "").strip(),
|
|
"arguments": json.dumps(tc.get("arguments", {}), ensure_ascii=False),
|
|
}
|
|
)
|
|
return calls, remaining
|