From 2de6ca51d4b573314ab5271801a1fe1784970de4 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 22 May 2026 19:32:04 +0000 Subject: [PATCH] fix(vllm): switch L4T13 backend to PyPI aarch64+cu130 wheels The L4T13 vllm backend pulled torch / torchvision / torchaudio / vllm from pypi.jetson-ai-lab.io's sbsa/cu130 mirror via [tool.uv.sources] with no version pins. That mirror started shipping torch 2.11.0 next to a vllm-0.20.0+cu130 wheel that was still compiled against torch 2.10's c10 ABI, so uv landed on the mismatched pair and vllm crashed at import: ImportError: vllm/_C.abi3.so: undefined symbol: _ZN3c1013MessageLoggerC1EPKciib (c10::MessageLogger's constructor signature changed between torch 2.10 and 2.11; the vllm wheel referenced the 2.10 form, the installed libc10.so exported only the 2.11 form.) Since torch 2.11 (April 2026) PyPI publishes its own aarch64 + cu130 manylinux wheels, and vllm 0.20.0 ships an aarch64 wheel whose Requires- Dist locks torch==2.11.0 / torchvision==0.26.0 / torchaudio==2.11.0. That makes uv's resolver produce an ABI-consistent set automatically, so the mirror and the [tool.uv.sources] pinning are no longer needed. flash-attn is dropped from the dep list: PyPI has no aarch64 wheel, but vLLM 0.20+ already bundles its own vllm_flash_attn (fa2 + fa3) inside the main wheel, so the Dao-AILab package isn't required at runtime. Reference: https://pytorch.org/blog/vllm-and-pytorch-work-together-to-improve-the-developer-experience-on-aarch64/ Assisted-by: Claude:claude-opus-4-7 [Read] [Edit] [Write] [Bash] [WebFetch] Signed-off-by: Ettore Di Giacinto --- backend/python/vllm/install.sh | 28 +++++++------- backend/python/vllm/pyproject.toml | 60 +++++++++++++----------------- 2 files changed, 41 insertions(+), 47 deletions(-) diff --git a/backend/python/vllm/install.sh b/backend/python/vllm/install.sh index cb8729ac1..c6f7fe3ba 100755 --- a/backend/python/vllm/install.sh +++ b/backend/python/vllm/install.sh @@ -43,14 +43,16 @@ if [ "x${BUILD_PROFILE}" == "xcublas13" ]; then EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-best-match" fi -# JetPack 7 / L4T arm64 wheels (torch, vllm, flash-attn) live on -# pypi.jetson-ai-lab.io and are built for cp312, so bump the venv Python -# accordingly. JetPack 6 keeps cp310 + USE_PIP=true. +# JetPack 7 / L4T arm64 vllm + torch wheels come straight from PyPI now +# (torch 2.11+ ships aarch64 + cu130 manylinux wheels and vllm 0.20+ ships +# an aarch64 wheel pinned to that torch). They're cp312-only, so bump the +# venv Python accordingly. JetPack 6 keeps cp310 + USE_PIP=true. # -# l4t13 uses pyproject.toml (see the elif branch below) to pin only the -# L4T-specific wheels to the jetson-ai-lab index via [tool.uv.sources]. -# That keeps PyPI as the resolution path for transitive deps like -# anthropic/openai/propcache, which the L4T mirror's proxy 503s on. +# l4t13 still drives the install through pyproject.toml (see the elif +# branch below) so the requirements-install.txt build-deps pass runs +# first; the historical [tool.uv.sources] / jetson-ai-lab pinning was +# dropped after that mirror started shipping ABI-mismatched torch / vllm +# pairs. See backend/python/vllm/pyproject.toml for the full story. if [ "x${BUILD_PROFILE}" == "xl4t12" ]; then USE_PIP=true fi @@ -103,12 +105,12 @@ if [ "x${BUILD_TYPE}" == "xintel" ]; then export CMAKE_PREFIX_PATH="$(python -c 'import site; print(site.getsitepackages()[0])'):${CMAKE_PREFIX_PATH:-}" VLLM_TARGET_DEVICE=xpu uv pip install ${EXTRA_PIP_INSTALL_FLAGS:-} --no-deps . popd -# L4T arm64 (JetPack 7): drive the install through pyproject.toml so that -# [tool.uv.sources] can pin torch/vllm/flash-attn/torchvision/torchaudio -# to the jetson-ai-lab index, while everything else (transitive deps and -# PyPI-resolvable packages like transformers) comes from PyPI. Bypasses -# installRequirements because uv pip install -r requirements.txt does not -# honor sources — see backend/python/vllm/pyproject.toml for the rationale. +# L4T arm64 (JetPack 7): drive the install through pyproject.toml so the +# requirements-install.txt build-deps pass (pybind11 for fastsafetensors, +# etc.) can run before the main resolve under --no-build-isolation. Bypasses +# installRequirements because requirements.txt doesn't carry that separate +# pass natively. See backend/python/vllm/pyproject.toml for the full +# rationale on why the jetson-ai-lab mirror was retired in favor of PyPI. elif [ "x${BUILD_PROFILE}" == "xl4t13" ]; then ensureVenv if [ "x${PORTABLE_PYTHON}" == "xtrue" ]; then diff --git a/backend/python/vllm/pyproject.toml b/backend/python/vllm/pyproject.toml index b06b9c425..19c436eb0 100644 --- a/backend/python/vllm/pyproject.toml +++ b/backend/python/vllm/pyproject.toml @@ -1,32 +1,34 @@ # L4T arm64 (JetPack 7 / sbsa cu130) install spec for the vllm backend. # -# Why this file exists, and why only the l4t13 BUILD_PROFILE consumes it: +# Since PyTorch 2.11 (April 2026) PyPI publishes aarch64 + cu130 manylinux +# wheels directly for torch / torchvision / torchaudio, and vllm 0.20+ ships +# an aarch64 wheel whose Requires-Dist pins those exact versions. uv's +# resolver therefore locks an ABI-consistent set without any custom index. +# https://pytorch.org/blog/vllm-and-pytorch-work-together-to-improve-the-developer-experience-on-aarch64/ # -# pypi.jetson-ai-lab.io hosts the L4T-specific torch / vllm / flash-attn -# wheels we need on aarch64 + cuda13, but it ALSO transparently proxies the -# rest of PyPI through `/+f//` URLs that 503 frequently. With -# `--extra-index-url` + `--index-strategy=unsafe-best-match` (the historical -# fix in install.sh) uv would pick those proxy URLs for ordinary PyPI -# packages — `anthropic`, `openai`, `propcache`, `annotated-types` — and -# trip on the 503s. See e.g. CI run 25212201349 (anthropic-0.97.0). +# Historically this file pinned torch / vllm / flash-attn / torchvision / +# torchaudio to pypi.jetson-ai-lab.io's SBSA cu130 mirror via +# [tool.uv.sources]. That mirror drifted out of sync (it published torch +# 2.11.0 next to a vllm wheel still built against torch 2.10's c10 ABI, +# producing `undefined symbol: _ZN3c1013MessageLoggerC1EPKciib` at import +# time). Moving to PyPI eliminates that drift class entirely. # -# `explicit = true` on the index makes uv consult the L4T mirror ONLY for -# packages mapped under [tool.uv.sources]. Everything else goes to PyPI. -# This breaks the historical 503 path without losing access to the L4T -# wheels we actually need from there. +# flash-attn is intentionally dropped: PyPI ships no aarch64 wheel for it, +# but vLLM 0.20+ already bundles its own vllm_flash_attn (fa2 + fa3) +# inside the main wheel, so the Dao-AILab package is not required at +# runtime. # -# `uv pip install -r requirements.txt` does NOT honor [tool.uv.sources] -# (sources are project-mode only, not pip-compat mode), so install.sh's -# l4t13 branch invokes `uv pip install --requirement pyproject.toml` -# directly. Other BUILD_PROFILEs continue to use the requirements-*.txt -# pipeline through libbackend.sh's installRequirements and never read -# this file. +# pyproject.toml (rather than requirements.txt) is still used on l4t13 so +# the build deps pass in requirements-install.txt - fastsafetensors's sdist +# needs pybind11 in the venv before --no-build-isolation can succeed - can +# run first; install.sh's l4t13 branch invokes `uv pip install --requirement +# pyproject.toml` after that pre-pass. [project] name = "localai-vllm-l4t13" version = "0.0.0" requires-python = ">=3.12,<3.13" dependencies = [ - # Mirror of requirements.txt — kept in sync manually for now since the + # Mirror of requirements.txt - kept in sync manually for now since the # l4t13 path bypasses installRequirements (see install.sh). "grpcio==1.80.0", "protobuf", @@ -35,27 +37,17 @@ dependencies = [ "pillow", "charset-normalizer>=3.4.7", "chardet", - # L4T-specific accelerator stack (sourced from jetson-ai-lab below). + # Accelerator stack from PyPI (aarch64 + cu130 wheels). vllm's + # Requires-Dist locks torch==2.11.0 / torchvision==0.26.0 / + # torchaudio==2.11.0, so listing them unpinned here just lets the + # resolver echo those exact versions back. "torch", "torchvision", "torchaudio", - "flash-attn", "vllm", - # PyPI-resolvable packages that complete the runtime — accelerate, + # PyPI-resolvable packages that complete the runtime - accelerate, # transformers, bitsandbytes carry their own wheels for aarch64. "accelerate", "transformers", "bitsandbytes", ] - -[[tool.uv.index]] -name = "jetson-ai-lab" -url = "https://pypi.jetson-ai-lab.io/sbsa/cu130" -explicit = true - -[tool.uv.sources] -torch = { index = "jetson-ai-lab" } -torchvision = { index = "jetson-ai-lab" } -torchaudio = { index = "jetson-ai-lab" } -flash-attn = { index = "jetson-ai-lab" } -vllm = { index = "jetson-ai-lab" }