From 228a6dfe795e093e97703fce22eaf9475fa7d26c Mon Sep 17 00:00:00 2001
From: pos-ei-don <1822533+pos-ei-don@users.noreply.github.com>
Date: Thu, 11 Jun 2026 09:05:23 +0200
Subject: [PATCH] fix(vllm): restore compatibility with vLLM >= 0.22
 (get_tokenizer moved to vllm.tokenizers) (#10252)

fix(vllm): restore compatibility with vLLM >= 0.22 (get_tokenizer moved)

vLLM 0.22 moved get_tokenizer from vllm.transformers_utils.tokenizer
to vllm.tokenizers. Since the backend requirements install vllm
unpinned, freshly built/installed vllm backends currently fail to
start with ModuleNotFoundError: No module named
'vllm.transformers_utils.tokenizer' (surfacing as 'grpc service not
ready' when loading a model).

Use the same try/except version-compat import pattern already used
elsewhere in this file: try the new vllm.tokenizers location first and
fall back to the pre-0.22 path.

Tested on a DGX Spark (GB10, ARM64) with the
cuda13-nvidia-l4t-arm64-vllm backend and vllm 0.22.0: model load, chat
completions and tool calls all work with this patch applied.

Signed-off-by: pos-ei-don <1822533+pos-ei-don@users.noreply.github.com>
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 backend/python/vllm/backend.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/backend/python/vllm/backend.py b/backend/python/vllm/backend.py
index 74598660b..ada087c35 100644
--- a/backend/python/vllm/backend.py
+++ b/backend/python/vllm/backend.py
@@ -26,7 +26,10 @@ from vllm.engine.arg_utils import AsyncEngineArgs
 from vllm.engine.async_llm_engine import AsyncLLMEngine
 from vllm.sampling_params import SamplingParams
 from vllm.utils import random_uuid
-from vllm.transformers_utils.tokenizer import get_tokenizer
+try:
+    from vllm.tokenizers import get_tokenizer  # vLLM >= 0.22
+except ImportError:
+    from vllm.transformers_utils.tokenizer import get_tokenizer  # vLLM < 0.22
 from vllm.multimodal.utils import fetch_image
 from vllm.assets.video import VideoAsset
 import base64