mirror of
https://github.com/mudler/LocalAI.git
synced 2026-07-03 12:57:02 -04:00
Two bugs broke OpenAI-style tool calling on the MLX backend (and any
Python backend sharing backend/python/common), reproduced end-to-end on
LocalAI v4.5.5 with the metal-mlx backend and
mlx-community/Qwen3.5-2B-MLX-8bit.
messages_to_dicts left each tool call's function.arguments as the raw
OpenAI-wire JSON string. HuggingFace chat templates (e.g. Qwen3.5)
iterate arguments as a mapping (.items()), so any request whose history
contained a prior assistant tool_calls message failed with HTTP 500
"Generation failed: Can only get item pairs from a mapping." — breaking
every agent loop on its second turn. Decode the string back into a dict
so the template sees a mapping.
split_reasoning returned ("", text) whenever the opening think tag was
absent. Models like Qwen3.5 open the assistant turn already inside
thinking, so the generated text carries only the closing </think>; the
whole chain-of-thought leaked into content. When the opener is missing
but the closer is present, treat everything before the closer as
reasoning.
Adds platform-independent unit tests under backend/python/common
(stdlib-only, no MLX/venv required, following parent_watch_test.py).
Assisted-by: Claude Code:claude-opus-4-8
77 lines
2.9 KiB
Python
77 lines
2.9 KiB
Python
"""Generic utilities shared across Python gRPC backends.
|
|
|
|
These helpers don't depend on any specific inference framework and can be
|
|
imported by any backend that needs to parse LocalAI gRPC options or build a
|
|
chat-template-compatible message list from proto Message objects.
|
|
"""
|
|
import json
|
|
|
|
|
|
def parse_options(options_list):
|
|
"""Parse Options[] list of ``key:value`` strings into a dict.
|
|
|
|
Supports type inference for common cases (bool, int, float). Unknown or
|
|
mixed-case values are returned as strings.
|
|
|
|
Used by LoadModel to extract backend-specific options passed via
|
|
``ModelOptions.Options`` in ``backend.proto``.
|
|
"""
|
|
opts = {}
|
|
for opt in options_list:
|
|
if ":" not in opt:
|
|
continue
|
|
key, value = opt.split(":", 1)
|
|
key = key.strip()
|
|
value = value.strip()
|
|
# Try type conversion
|
|
if value.lower() in ("true", "false"):
|
|
opts[key] = value.lower() == "true"
|
|
else:
|
|
try:
|
|
opts[key] = int(value)
|
|
except ValueError:
|
|
try:
|
|
opts[key] = float(value)
|
|
except ValueError:
|
|
opts[key] = value
|
|
return opts
|
|
|
|
|
|
def messages_to_dicts(proto_messages):
|
|
"""Convert proto ``Message`` objects to dicts suitable for ``apply_chat_template``.
|
|
|
|
Handles: ``role``, ``content``, ``name``, ``tool_call_id``,
|
|
``reasoning_content``, ``tool_calls`` (JSON string → Python list).
|
|
|
|
HuggingFace chat templates (and their MLX/vLLM wrappers) expect a list of
|
|
plain dicts — proto Message objects don't work directly with Jinja, so
|
|
this conversion is needed before every ``apply_chat_template`` call.
|
|
"""
|
|
result = []
|
|
for msg in proto_messages:
|
|
d = {"role": msg.role, "content": msg.content or ""}
|
|
if msg.name:
|
|
d["name"] = msg.name
|
|
if msg.tool_call_id:
|
|
d["tool_call_id"] = msg.tool_call_id
|
|
if msg.reasoning_content:
|
|
d["reasoning_content"] = msg.reasoning_content
|
|
if msg.tool_calls:
|
|
try:
|
|
tool_calls = json.loads(msg.tool_calls)
|
|
# Chat templates (e.g. Qwen) iterate function.arguments as a
|
|
# mapping, but the OpenAI wire format carries it as a JSON
|
|
# string — decode it back so the template's .items() works.
|
|
for tc in tool_calls:
|
|
fn = tc.get("function") if isinstance(tc, dict) else None
|
|
if isinstance(fn, dict) and isinstance(fn.get("arguments"), str):
|
|
try:
|
|
fn["arguments"] = json.loads(fn["arguments"])
|
|
except json.JSONDecodeError:
|
|
pass
|
|
d["tool_calls"] = tool_calls
|
|
except json.JSONDecodeError:
|
|
pass
|
|
result.append(d)
|
|
return result
|