diff --git a/backend/python/vllm/install.sh b/backend/python/vllm/install.sh index 46cb879f9..35f3b3e8a 100755 --- a/backend/python/vllm/install.sh +++ b/backend/python/vllm/install.sh @@ -35,6 +35,21 @@ if [ "x${BUILD_PROFILE}" == "xcpu" ]; then EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-best-match" fi +# AMD ROCm: vLLM ships prebuilt ROCm wheels, but on a DEDICATED index +# (https://wheels.vllm.ai/rocm/), NOT PyPI, and ONLY for CPython 3.12. On any +# other Python the installer silently falls back to the CUDA-only PyPI wheel, +# which is unusable on an AMD GPU (import fails, so the backend never finds the +# vllm module). Force Python 3.12 before the venv is created (matches the +# intel/l4t13 cp312 bump); the hipblas branch below pulls vllm from the ROCm +# wheel index. unsafe-best-match lets uv consult that index and PyPI together. +# https://docs.vllm.ai/en/latest/getting_started/installation/gpu.html?device=rocm +if [ "x${BUILD_TYPE}" == "xhipblas" ]; then + PYTHON_VERSION="3.12" + PYTHON_PATCH="12" + PY_STANDALONE_TAG="20251120" + EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-best-match" +fi + # cublas13 pulls the vLLM wheel from a per-tag cu130 index (PyPI's vllm wheel # is built against CUDA 12 and won't load on cu130). uv's default per-package # first-match strategy would still pick the PyPI wheel, so allow it to consult @@ -194,6 +209,22 @@ elif [ "x${BUILD_TYPE}" == "xintel" ]; then export CMAKE_PREFIX_PATH="$(python -c 'import site; print(site.getsitepackages()[0])'):${CMAKE_PREFIX_PATH:-}" VLLM_TARGET_DEVICE=xpu uv pip install ${EXTRA_PIP_INSTALL_FLAGS:-} --no-deps . popd +# AMD ROCm: install vllm from its dedicated ROCm wheel index instead of the +# CUDA-only PyPI wheel. installRequirements brings the base ROCm +# torch/transformers (requirements-hipblas.txt), then we pull vllm (plus the +# matching ROCm torch, via --upgrade) from wheels.vllm.ai/rocm. This is the +# method upstream prescribes for AMD; the Python-3.12 pin is set above. +# There is intentionally no requirements-hipblas-after.txt: a bare `vllm` +# there would resolve to the CUDA wheel, and installRequirements never loads +# a ${BUILD_TYPE}-after file for hipblas anyway (BUILD_TYPE == BUILD_PROFILE). +# https://docs.vllm.ai/en/latest/getting_started/installation/gpu.html?device=rocm +elif [ "x${BUILD_TYPE}" == "xhipblas" ]; then + installRequirements + + # --upgrade reconciles the base ROCm torch to whatever the vllm ROCm wheel + # pins; --extra-index-url adds the ROCm wheel repository on top of PyPI. + uv pip install ${EXTRA_PIP_INSTALL_FLAGS:-} \ + --extra-index-url https://wheels.vllm.ai/rocm/ --upgrade vllm # FROM_SOURCE=true on a CPU build skips the prebuilt vllm wheel in # requirements-cpu-after.txt and compiles vllm locally against the host's # actual CPU. Not used by default because it takes ~30-40 minutes, but diff --git a/backend/python/vllm/requirements-hipblas-after.txt b/backend/python/vllm/requirements-hipblas-after.txt deleted file mode 100644 index e7a6c7781..000000000 --- a/backend/python/vllm/requirements-hipblas-after.txt +++ /dev/null @@ -1 +0,0 @@ -vllm