Files
LocalAI/backend/python/common/vllm_utils.py
Ettore Di Giacinto b215843807 feat(vllm): CPU support + shared utils + vllm-omni feature parity
- Split vllm install per acceleration: move generic `vllm` out of
  requirements-after.txt into per-profile after files (cublas12, hipblas,
  intel) and add CPU wheel URL for cpu-after.txt
- requirements-cpu.txt now pulls torch==2.7.0+cpu from PyTorch CPU index
- backend/index.yaml: register cpu-vllm / cpu-vllm-development variants
- New backend/python/common/vllm_utils.py: shared parse_options,
  messages_to_dicts, setup_parsers helpers (used by both vllm backends)
- vllm-omni: replace hardcoded chat template with tokenizer.apply_chat_template,
  wire native parsers via shared utils, emit ChatDelta with token counts,
  add TokenizeString and Free RPCs, detect CPU and set VLLM_TARGET_DEVICE
- Add test_cpu_inference.py: standalone script to validate CPU build with
  a small model (Qwen2.5-0.5B-Instruct)
2026-04-12 14:48:28 +00:00

85 lines
2.9 KiB
Python

"""Shared utilities for vLLM-based backends."""
import json
import sys
def parse_options(options_list):
"""Parse Options[] list of 'key:value' strings into a dict.
Supports type inference for common cases (bool, int, float).
Used by LoadModel to extract backend-specific options.
"""
opts = {}
for opt in options_list:
if ":" not in opt:
continue
key, value = opt.split(":", 1)
key = key.strip()
value = value.strip()
# Try type conversion
if value.lower() in ("true", "false"):
opts[key] = value.lower() == "true"
else:
try:
opts[key] = int(value)
except ValueError:
try:
opts[key] = float(value)
except ValueError:
opts[key] = value
return opts
def messages_to_dicts(proto_messages):
"""Convert proto Message objects to list of dicts for apply_chat_template().
Handles: role, content, name, tool_call_id, reasoning_content, tool_calls (JSON string -> list).
"""
result = []
for msg in proto_messages:
d = {"role": msg.role, "content": msg.content or ""}
if msg.name:
d["name"] = msg.name
if msg.tool_call_id:
d["tool_call_id"] = msg.tool_call_id
if msg.reasoning_content:
d["reasoning_content"] = msg.reasoning_content
if msg.tool_calls:
try:
d["tool_calls"] = json.loads(msg.tool_calls)
except json.JSONDecodeError:
pass
result.append(d)
return result
def setup_parsers(opts):
"""Return (tool_parser_cls, reasoning_parser_cls) tuple from opts dict.
Uses vLLM's native ToolParserManager and ReasoningParserManager.
Returns (None, None) if vLLM is not installed or parsers not available.
"""
tool_parser_cls = None
reasoning_parser_cls = None
tool_parser_name = opts.get("tool_parser")
reasoning_parser_name = opts.get("reasoning_parser")
if tool_parser_name:
try:
from vllm.tool_parsers import ToolParserManager
tool_parser_cls = ToolParserManager.get_tool_parser(tool_parser_name)
print(f"[vllm_utils] Loaded tool_parser: {tool_parser_name}", file=sys.stderr)
except Exception as e:
print(f"[vllm_utils] Failed to load tool_parser {tool_parser_name}: {e}", file=sys.stderr)
if reasoning_parser_name:
try:
from vllm.reasoning import ReasoningParserManager
reasoning_parser_cls = ReasoningParserManager.get_reasoning_parser(reasoning_parser_name)
print(f"[vllm_utils] Loaded reasoning_parser: {reasoning_parser_name}", file=sys.stderr)
except Exception as e:
print(f"[vllm_utils] Failed to load reasoning_parser {reasoning_parser_name}: {e}", file=sys.stderr)
return tool_parser_cls, reasoning_parser_cls