From 2de6ca51d4b573314ab5271801a1fe1784970de4 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Fri, 22 May 2026 19:32:04 +0000
Subject: [PATCH] fix(vllm): switch L4T13 backend to PyPI aarch64+cu130 wheels

The L4T13 vllm backend pulled torch / torchvision / torchaudio / vllm from
pypi.jetson-ai-lab.io's sbsa/cu130 mirror via [tool.uv.sources] with no
version pins. That mirror started shipping torch 2.11.0 next to a
vllm-0.20.0+cu130 wheel that was still compiled against torch 2.10's c10
ABI, so uv landed on the mismatched pair and vllm crashed at import:

  ImportError: vllm/_C.abi3.so: undefined symbol:
  _ZN3c1013MessageLoggerC1EPKciib

(c10::MessageLogger's constructor signature changed between torch 2.10 and
2.11; the vllm wheel referenced the 2.10 form, the installed libc10.so
exported only the 2.11 form.)

Since torch 2.11 (April 2026) PyPI publishes its own aarch64 + cu130
manylinux wheels, and vllm 0.20.0 ships an aarch64 wheel whose Requires-
Dist locks torch==2.11.0 / torchvision==0.26.0 / torchaudio==2.11.0. That
makes uv's resolver produce an ABI-consistent set automatically, so the
mirror and the [tool.uv.sources] pinning are no longer needed.

flash-attn is dropped from the dep list: PyPI has no aarch64 wheel, but
vLLM 0.20+ already bundles its own vllm_flash_attn (fa2 + fa3) inside the
main wheel, so the Dao-AILab package isn't required at runtime.

Reference: https://pytorch.org/blog/vllm-and-pytorch-work-together-to-improve-the-developer-experience-on-aarch64/

Assisted-by: Claude:claude-opus-4-7 [Read] [Edit] [Write] [Bash] [WebFetch]
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 backend/python/vllm/install.sh     | 28 +++++++-------
 backend/python/vllm/pyproject.toml | 60 +++++++++++++-----------------
 2 files changed, 41 insertions(+), 47 deletions(-)

diff --git a/backend/python/vllm/install.sh b/backend/python/vllm/install.sh
index cb8729ac1..c6f7fe3ba 100755
--- a/backend/python/vllm/install.sh
+++ b/backend/python/vllm/install.sh
@@ -43,14 +43,16 @@ if [ "x${BUILD_PROFILE}" == "xcublas13" ]; then
     EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-best-match"
 fi
 
-# JetPack 7 / L4T arm64 wheels (torch, vllm, flash-attn) live on
-# pypi.jetson-ai-lab.io and are built for cp312, so bump the venv Python
-# accordingly. JetPack 6 keeps cp310 + USE_PIP=true.
+# JetPack 7 / L4T arm64 vllm + torch wheels come straight from PyPI now
+# (torch 2.11+ ships aarch64 + cu130 manylinux wheels and vllm 0.20+ ships
+# an aarch64 wheel pinned to that torch). They're cp312-only, so bump the
+# venv Python accordingly. JetPack 6 keeps cp310 + USE_PIP=true.
 #
-# l4t13 uses pyproject.toml (see the elif branch below) to pin only the
-# L4T-specific wheels to the jetson-ai-lab index via [tool.uv.sources].
-# That keeps PyPI as the resolution path for transitive deps like
-# anthropic/openai/propcache, which the L4T mirror's proxy 503s on.
+# l4t13 still drives the install through pyproject.toml (see the elif
+# branch below) so the requirements-install.txt build-deps pass runs
+# first; the historical [tool.uv.sources] / jetson-ai-lab pinning was
+# dropped after that mirror started shipping ABI-mismatched torch / vllm
+# pairs. See backend/python/vllm/pyproject.toml for the full story.
 if [ "x${BUILD_PROFILE}" == "xl4t12" ]; then
     USE_PIP=true
 fi
@@ -103,12 +105,12 @@ if [ "x${BUILD_TYPE}" == "xintel" ]; then
         export CMAKE_PREFIX_PATH="$(python -c 'import site; print(site.getsitepackages()[0])'):${CMAKE_PREFIX_PATH:-}"
         VLLM_TARGET_DEVICE=xpu uv pip install ${EXTRA_PIP_INSTALL_FLAGS:-} --no-deps .
     popd
-# L4T arm64 (JetPack 7): drive the install through pyproject.toml so that
-# [tool.uv.sources] can pin torch/vllm/flash-attn/torchvision/torchaudio
-# to the jetson-ai-lab index, while everything else (transitive deps and
-# PyPI-resolvable packages like transformers) comes from PyPI. Bypasses
-# installRequirements because uv pip install -r requirements.txt does not
-# honor sources — see backend/python/vllm/pyproject.toml for the rationale.
+# L4T arm64 (JetPack 7): drive the install through pyproject.toml so the
+# requirements-install.txt build-deps pass (pybind11 for fastsafetensors,
+# etc.) can run before the main resolve under --no-build-isolation. Bypasses
+# installRequirements because requirements.txt doesn't carry that separate
+# pass natively. See backend/python/vllm/pyproject.toml for the full
+# rationale on why the jetson-ai-lab mirror was retired in favor of PyPI.
 elif [ "x${BUILD_PROFILE}" == "xl4t13" ]; then
     ensureVenv
     if [ "x${PORTABLE_PYTHON}" == "xtrue" ]; then
diff --git a/backend/python/vllm/pyproject.toml b/backend/python/vllm/pyproject.toml
index b06b9c425..19c436eb0 100644
--- a/backend/python/vllm/pyproject.toml
+++ b/backend/python/vllm/pyproject.toml
@@ -1,32 +1,34 @@
 # L4T arm64 (JetPack 7 / sbsa cu130) install spec for the vllm backend.
 #
-# Why this file exists, and why only the l4t13 BUILD_PROFILE consumes it:
+# Since PyTorch 2.11 (April 2026) PyPI publishes aarch64 + cu130 manylinux
+# wheels directly for torch / torchvision / torchaudio, and vllm 0.20+ ships
+# an aarch64 wheel whose Requires-Dist pins those exact versions. uv's
+# resolver therefore locks an ABI-consistent set without any custom index.
+# https://pytorch.org/blog/vllm-and-pytorch-work-together-to-improve-the-developer-experience-on-aarch64/
 #
-# pypi.jetson-ai-lab.io hosts the L4T-specific torch / vllm / flash-attn
-# wheels we need on aarch64 + cuda13, but it ALSO transparently proxies the
-# rest of PyPI through `/+f/<sha>/<filename>` URLs that 503 frequently. With
-# `--extra-index-url` + `--index-strategy=unsafe-best-match` (the historical
-# fix in install.sh) uv would pick those proxy URLs for ordinary PyPI
-# packages — `anthropic`, `openai`, `propcache`, `annotated-types` — and
-# trip on the 503s. See e.g. CI run 25212201349 (anthropic-0.97.0).
+# Historically this file pinned torch / vllm / flash-attn / torchvision /
+# torchaudio to pypi.jetson-ai-lab.io's SBSA cu130 mirror via
+# [tool.uv.sources]. That mirror drifted out of sync (it published torch
+# 2.11.0 next to a vllm wheel still built against torch 2.10's c10 ABI,
+# producing `undefined symbol: _ZN3c1013MessageLoggerC1EPKciib` at import
+# time). Moving to PyPI eliminates that drift class entirely.
 #
-# `explicit = true` on the index makes uv consult the L4T mirror ONLY for
-# packages mapped under [tool.uv.sources]. Everything else goes to PyPI.
-# This breaks the historical 503 path without losing access to the L4T
-# wheels we actually need from there.
+# flash-attn is intentionally dropped: PyPI ships no aarch64 wheel for it,
+# but vLLM 0.20+ already bundles its own vllm_flash_attn (fa2 + fa3)
+# inside the main wheel, so the Dao-AILab package is not required at
+# runtime.
 #
-# `uv pip install -r requirements.txt` does NOT honor [tool.uv.sources]
-# (sources are project-mode only, not pip-compat mode), so install.sh's
-# l4t13 branch invokes `uv pip install --requirement pyproject.toml`
-# directly. Other BUILD_PROFILEs continue to use the requirements-*.txt
-# pipeline through libbackend.sh's installRequirements and never read
-# this file.
+# pyproject.toml (rather than requirements.txt) is still used on l4t13 so
+# the build deps pass in requirements-install.txt - fastsafetensors's sdist
+# needs pybind11 in the venv before --no-build-isolation can succeed - can
+# run first; install.sh's l4t13 branch invokes `uv pip install --requirement
+# pyproject.toml` after that pre-pass.
 [project]
 name = "localai-vllm-l4t13"
 version = "0.0.0"
 requires-python = ">=3.12,<3.13"
 dependencies = [
-    # Mirror of requirements.txt — kept in sync manually for now since the
+    # Mirror of requirements.txt - kept in sync manually for now since the
     # l4t13 path bypasses installRequirements (see install.sh).
     "grpcio==1.80.0",
     "protobuf",
@@ -35,27 +37,17 @@ dependencies = [
     "pillow",
     "charset-normalizer>=3.4.7",
     "chardet",
-    # L4T-specific accelerator stack (sourced from jetson-ai-lab below).
+    # Accelerator stack from PyPI (aarch64 + cu130 wheels). vllm's
+    # Requires-Dist locks torch==2.11.0 / torchvision==0.26.0 /
+    # torchaudio==2.11.0, so listing them unpinned here just lets the
+    # resolver echo those exact versions back.
     "torch",
     "torchvision",
     "torchaudio",
-    "flash-attn",
     "vllm",
-    # PyPI-resolvable packages that complete the runtime — accelerate,
+    # PyPI-resolvable packages that complete the runtime - accelerate,
     # transformers, bitsandbytes carry their own wheels for aarch64.
     "accelerate",
     "transformers",
     "bitsandbytes",
 ]
-
-[[tool.uv.index]]
-name = "jetson-ai-lab"
-url = "https://pypi.jetson-ai-lab.io/sbsa/cu130"
-explicit = true
-
-[tool.uv.sources]
-torch = { index = "jetson-ai-lab" }
-torchvision = { index = "jetson-ai-lab" }
-torchaudio = { index = "jetson-ai-lab" }
-flash-attn = { index = "jetson-ai-lab" }
-vllm = { index = "jetson-ai-lab" }