diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index ad26cfcc7..dc38029b3 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -519,15 +519,16 @@ jobs: - name: Dependencies # bigger-runner is a bare self-hosted runner — install the tools # we need for docker-build + protogen-go + go test (make, curl, - # unzip for the protoc download, build-essential for cgo), plus - # libnuma1 which the vllm CPU wheel's _C extension dlopens at - # runtime (libnuma.so.1 missing → init_cpu_threads_env op is not - # registered → AttributeError on LoadModel). + # unzip for the protoc download, build-essential for cgo). + # Runtime shared libraries the vllm backend needs (libnuma, + # libgomp) are packaged into the backend image via package.sh + # and NOT installed on the host — that way the CI exercises the + # packaging path end-to-end and catches missing libs that users + # would otherwise hit on a bare production host. run: | sudo apt-get update sudo apt-get install -y --no-install-recommends \ - make build-essential curl unzip ca-certificates git tar \ - libnuma1 libgomp1 + make build-essential curl unzip ca-certificates git tar - name: Setup Go uses: actions/setup-go@v5 with: diff --git a/backend/Dockerfile.python b/backend/Dockerfile.python index 16159c67b..f3bcf8d34 100644 --- a/backend/Dockerfile.python +++ b/backend/Dockerfile.python @@ -29,6 +29,7 @@ RUN apt-get update && \ curl python3-pip \ python-is-python3 \ python3-dev llvm \ + libnuma1 libgomp1 \ python3-venv make cmake && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* diff --git a/backend/python/vllm/package.sh b/backend/python/vllm/package.sh new file mode 100755 index 000000000..3c4ba8c19 --- /dev/null +++ b/backend/python/vllm/package.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# Script to package runtime shared libraries for the vllm backend. +# +# The final Dockerfile.python stage is FROM scratch, so system libraries +# must be explicitly copied into ${BACKEND}/lib so the backend can run on +# any host without installing them. libbackend.sh automatically adds that +# directory to LD_LIBRARY_PATH at run time. +# +# vllm's CPU C++ extension (vllm._C) dlopens libnuma.so.1 at import time; +# if it's missing, the _C_utils torch ops are never registered and the +# engine crashes with AttributeError on init_cpu_threads_env. libgomp is +# used by torch's CPU kernels; on some stripped-down hosts it's also +# absent, so we bundle it too. + +set -e + +CURDIR=$(dirname "$(realpath "$0")") +LIB_DIR="${CURDIR}/lib" +mkdir -p "${LIB_DIR}" + +copy_with_symlinks() { + local soname="$1" + local hit="" + for dir in /usr/lib/x86_64-linux-gnu /usr/lib/aarch64-linux-gnu /lib/x86_64-linux-gnu /lib/aarch64-linux-gnu /usr/lib /lib; do + if [ -e "${dir}/${soname}" ]; then + hit="${dir}/${soname}" + break + fi + done + if [ -z "${hit}" ]; then + echo "warning: ${soname} not found in standard lib paths" >&2 + return 0 + fi + # Follow the symlink to the real file, copy it, then recreate the symlink. + local real + real=$(readlink -f "${hit}") + cp -v "${real}" "${LIB_DIR}/" + local real_base + real_base=$(basename "${real}") + if [ "${real_base}" != "${soname}" ]; then + ln -sf "${real_base}" "${LIB_DIR}/${soname}" + fi +} + +copy_with_symlinks libnuma.so.1 +copy_with_symlinks libgomp.so.1 + +echo "vllm packaging completed successfully" +ls -liah "${LIB_DIR}/"