From 66963f49db8061fb6ac4e664bdad660653612336 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 22 May 2026 19:46:13 +0000 Subject: [PATCH] refactor(vllm): retire l4t13 pyproject.toml in favor of requirements-*.txt pyproject.toml only existed because uv pip install -r requirements.txt doesn't honor [tool.uv.sources]. The previous commit dropped [tool.uv. sources] (PyPI now serves the aarch64 + cu130 wheels directly), so the file no longer carries any logic the requirements-*.txt path can't. Replace with the same two-file pattern every other build profile uses: - requirements-l4t13.txt (accelerate / torch / transformers / bitsandbytes - matches cublas13's split) - requirements-l4t13-after.txt (vllm; runs after the base resolve so the cu130 torch wheel lands first) install.sh's whole l4t13 elif branch goes away; libbackend.sh's installRequirements already handles the requirements-install.txt build- deps pass, the C_INCLUDE_PATH export for PORTABLE_PYTHON, and the runProtogen call, so falling through to the standard else: branch produces identical install behavior with less surface area. No functional change at install time - same wheels, same order. Assisted-by: Claude:claude-opus-4-7 [Read] [Edit] [Write] [Bash] Signed-off-by: Ettore Di Giacinto --- backend/python/vllm/install.sh | 26 +-------- backend/python/vllm/pyproject.toml | 53 ------------------- .../python/vllm/requirements-l4t13-after.txt | 4 ++ backend/python/vllm/requirements-l4t13.txt | 8 +++ 4 files changed, 13 insertions(+), 78 deletions(-) delete mode 100644 backend/python/vllm/pyproject.toml create mode 100644 backend/python/vllm/requirements-l4t13-after.txt create mode 100644 backend/python/vllm/requirements-l4t13.txt diff --git a/backend/python/vllm/install.sh b/backend/python/vllm/install.sh index c6f7fe3ba..320ef6772 100755 --- a/backend/python/vllm/install.sh +++ b/backend/python/vllm/install.sh @@ -47,12 +47,7 @@ fi # (torch 2.11+ ships aarch64 + cu130 manylinux wheels and vllm 0.20+ ships # an aarch64 wheel pinned to that torch). They're cp312-only, so bump the # venv Python accordingly. JetPack 6 keeps cp310 + USE_PIP=true. -# -# l4t13 still drives the install through pyproject.toml (see the elif -# branch below) so the requirements-install.txt build-deps pass runs -# first; the historical [tool.uv.sources] / jetson-ai-lab pinning was -# dropped after that mirror started shipping ABI-mismatched torch / vllm -# pairs. See backend/python/vllm/pyproject.toml for the full story. +# https://pytorch.org/blog/vllm-and-pytorch-work-together-to-improve-the-developer-experience-on-aarch64/ if [ "x${BUILD_PROFILE}" == "xl4t12" ]; then USE_PIP=true fi @@ -105,25 +100,6 @@ if [ "x${BUILD_TYPE}" == "xintel" ]; then export CMAKE_PREFIX_PATH="$(python -c 'import site; print(site.getsitepackages()[0])'):${CMAKE_PREFIX_PATH:-}" VLLM_TARGET_DEVICE=xpu uv pip install ${EXTRA_PIP_INSTALL_FLAGS:-} --no-deps . popd -# L4T arm64 (JetPack 7): drive the install through pyproject.toml so the -# requirements-install.txt build-deps pass (pybind11 for fastsafetensors, -# etc.) can run before the main resolve under --no-build-isolation. Bypasses -# installRequirements because requirements.txt doesn't carry that separate -# pass natively. See backend/python/vllm/pyproject.toml for the full -# rationale on why the jetson-ai-lab mirror was retired in favor of PyPI. -elif [ "x${BUILD_PROFILE}" == "xl4t13" ]; then - ensureVenv - if [ "x${PORTABLE_PYTHON}" == "xtrue" ]; then - export C_INCLUDE_PATH="${C_INCLUDE_PATH:-}:$(_portable_dir)/include/python${PYTHON_VERSION}" - fi - pushd "${backend_dir}" - # Build deps first (matches installRequirements' requirements-install.txt - # pass — fastsafetensors and friends need pybind11 in the venv before - # their sdists can build under --no-build-isolation). - uv pip install ${EXTRA_PIP_INSTALL_FLAGS:-} -r requirements-install.txt - uv pip install ${EXTRA_PIP_INSTALL_FLAGS:-} --requirement pyproject.toml - popd - runProtogen # FROM_SOURCE=true on a CPU build skips the prebuilt vllm wheel in # requirements-cpu-after.txt and compiles vllm locally against the host's # actual CPU. Not used by default because it takes ~30-40 minutes, but diff --git a/backend/python/vllm/pyproject.toml b/backend/python/vllm/pyproject.toml deleted file mode 100644 index 19c436eb0..000000000 --- a/backend/python/vllm/pyproject.toml +++ /dev/null @@ -1,53 +0,0 @@ -# L4T arm64 (JetPack 7 / sbsa cu130) install spec for the vllm backend. -# -# Since PyTorch 2.11 (April 2026) PyPI publishes aarch64 + cu130 manylinux -# wheels directly for torch / torchvision / torchaudio, and vllm 0.20+ ships -# an aarch64 wheel whose Requires-Dist pins those exact versions. uv's -# resolver therefore locks an ABI-consistent set without any custom index. -# https://pytorch.org/blog/vllm-and-pytorch-work-together-to-improve-the-developer-experience-on-aarch64/ -# -# Historically this file pinned torch / vllm / flash-attn / torchvision / -# torchaudio to pypi.jetson-ai-lab.io's SBSA cu130 mirror via -# [tool.uv.sources]. That mirror drifted out of sync (it published torch -# 2.11.0 next to a vllm wheel still built against torch 2.10's c10 ABI, -# producing `undefined symbol: _ZN3c1013MessageLoggerC1EPKciib` at import -# time). Moving to PyPI eliminates that drift class entirely. -# -# flash-attn is intentionally dropped: PyPI ships no aarch64 wheel for it, -# but vLLM 0.20+ already bundles its own vllm_flash_attn (fa2 + fa3) -# inside the main wheel, so the Dao-AILab package is not required at -# runtime. -# -# pyproject.toml (rather than requirements.txt) is still used on l4t13 so -# the build deps pass in requirements-install.txt - fastsafetensors's sdist -# needs pybind11 in the venv before --no-build-isolation can succeed - can -# run first; install.sh's l4t13 branch invokes `uv pip install --requirement -# pyproject.toml` after that pre-pass. -[project] -name = "localai-vllm-l4t13" -version = "0.0.0" -requires-python = ">=3.12,<3.13" -dependencies = [ - # Mirror of requirements.txt - kept in sync manually for now since the - # l4t13 path bypasses installRequirements (see install.sh). - "grpcio==1.80.0", - "protobuf", - "certifi", - "setuptools", - "pillow", - "charset-normalizer>=3.4.7", - "chardet", - # Accelerator stack from PyPI (aarch64 + cu130 wheels). vllm's - # Requires-Dist locks torch==2.11.0 / torchvision==0.26.0 / - # torchaudio==2.11.0, so listing them unpinned here just lets the - # resolver echo those exact versions back. - "torch", - "torchvision", - "torchaudio", - "vllm", - # PyPI-resolvable packages that complete the runtime - accelerate, - # transformers, bitsandbytes carry their own wheels for aarch64. - "accelerate", - "transformers", - "bitsandbytes", -] diff --git a/backend/python/vllm/requirements-l4t13-after.txt b/backend/python/vllm/requirements-l4t13-after.txt new file mode 100644 index 000000000..c959c6ae0 --- /dev/null +++ b/backend/python/vllm/requirements-l4t13-after.txt @@ -0,0 +1,4 @@ +# vLLM 0.20+ ships an aarch64 manylinux wheel on PyPI whose Requires-Dist pins +# torch==2.11.0 / torchvision==0.26.0 / torchaudio==2.11.0, locking an ABI- +# consistent set with the cu130 torch wheel installed above. +vllm diff --git a/backend/python/vllm/requirements-l4t13.txt b/backend/python/vllm/requirements-l4t13.txt new file mode 100644 index 000000000..e566fa855 --- /dev/null +++ b/backend/python/vllm/requirements-l4t13.txt @@ -0,0 +1,8 @@ +# JetPack 7 / L4T arm64 + CUDA 13. Since PyTorch 2.11 (April 2026), PyPI ships +# aarch64 + cu130 manylinux wheels for torch/torchvision/torchaudio directly, +# so we no longer need a custom --extra-index-url for the L4T mirror. +# https://pytorch.org/blog/vllm-and-pytorch-work-together-to-improve-the-developer-experience-on-aarch64/ +accelerate +torch +transformers +bitsandbytes