From 228a6dfe795e093e97703fce22eaf9475fa7d26c Mon Sep 17 00:00:00 2001 From: pos-ei-don <1822533+pos-ei-don@users.noreply.github.com> Date: Thu, 11 Jun 2026 09:05:23 +0200 Subject: [PATCH] fix(vllm): restore compatibility with vLLM >= 0.22 (get_tokenizer moved to vllm.tokenizers) (#10252) fix(vllm): restore compatibility with vLLM >= 0.22 (get_tokenizer moved) vLLM 0.22 moved get_tokenizer from vllm.transformers_utils.tokenizer to vllm.tokenizers. Since the backend requirements install vllm unpinned, freshly built/installed vllm backends currently fail to start with ModuleNotFoundError: No module named 'vllm.transformers_utils.tokenizer' (surfacing as 'grpc service not ready' when loading a model). Use the same try/except version-compat import pattern already used elsewhere in this file: try the new vllm.tokenizers location first and fall back to the pre-0.22 path. Tested on a DGX Spark (GB10, ARM64) with the cuda13-nvidia-l4t-arm64-vllm backend and vllm 0.22.0: model load, chat completions and tool calls all work with this patch applied. Signed-off-by: pos-ei-don <1822533+pos-ei-don@users.noreply.github.com> Co-authored-by: Claude Opus 4.8 (1M context) --- backend/python/vllm/backend.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backend/python/vllm/backend.py b/backend/python/vllm/backend.py index 74598660b..ada087c35 100644 --- a/backend/python/vllm/backend.py +++ b/backend/python/vllm/backend.py @@ -26,7 +26,10 @@ from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.sampling_params import SamplingParams from vllm.utils import random_uuid -from vllm.transformers_utils.tokenizer import get_tokenizer +try: + from vllm.tokenizers import get_tokenizer # vLLM >= 0.22 +except ImportError: + from vllm.transformers_utils.tokenizer import get_tokenizer # vLLM < 0.22 from vllm.multimodal.utils import fetch_image from vllm.assets.video import VideoAsset import base64