From d74cd56b144d7215eb6ba6f470d6ce969bf465bd Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 12 Apr 2026 20:20:21 +0000 Subject: [PATCH] feat(vllm): bundle libnuma/libgomp via package.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vllm CPU wheel ships a _C extension that dlopens libnuma.so.1 at import time; torch's CPU kernels in turn use libgomp.so.1 (OpenMP). Without these on the host, vllm._C silently fails to register its torch ops and EngineCore crashes with: AttributeError: '_OpNamespace' '_C_utils' object has no attribute 'init_cpu_threads_env' Rather than asking every user to install libnuma1/libgomp1 on their host (or every LocalAI base image to ship them), bundle them into the backend image itself — same pattern fish-speech and the GPU libs already use. libbackend.sh adds ${EDIR}/lib to LD_LIBRARY_PATH at run time so the bundled copies are picked up automatically. - backend/python/vllm/package.sh (new): copies libnuma.so.1 and libgomp.so.1 from the builder's multilib paths into ${BACKEND}/lib, preserving soname symlinks. Runs during Dockerfile.python's 'Run backend-specific packaging' step (which already invokes package.sh if present). - backend/Dockerfile.python: install libnuma1 + libgomp1 in the builder stage so package.sh has something to copy (the Ubuntu base image otherwise only has libgomp in the gcc dep chain). - test-extra.yml: drop the workaround that installed these libs on the runner host — with the backend image self-contained, the runner no longer needs them, and the test now exercises the packaging path end-to-end the way a production host would. --- .github/workflows/test-extra.yml | 13 +++++---- backend/Dockerfile.python | 1 + backend/python/vllm/package.sh | 49 ++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 6 deletions(-) create mode 100755 backend/python/vllm/package.sh diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index ad26cfcc7..dc38029b3 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -519,15 +519,16 @@ jobs: - name: Dependencies # bigger-runner is a bare self-hosted runner — install the tools # we need for docker-build + protogen-go + go test (make, curl, - # unzip for the protoc download, build-essential for cgo), plus - # libnuma1 which the vllm CPU wheel's _C extension dlopens at - # runtime (libnuma.so.1 missing → init_cpu_threads_env op is not - # registered → AttributeError on LoadModel). + # unzip for the protoc download, build-essential for cgo). + # Runtime shared libraries the vllm backend needs (libnuma, + # libgomp) are packaged into the backend image via package.sh + # and NOT installed on the host — that way the CI exercises the + # packaging path end-to-end and catches missing libs that users + # would otherwise hit on a bare production host. run: | sudo apt-get update sudo apt-get install -y --no-install-recommends \ - make build-essential curl unzip ca-certificates git tar \ - libnuma1 libgomp1 + make build-essential curl unzip ca-certificates git tar - name: Setup Go uses: actions/setup-go@v5 with: diff --git a/backend/Dockerfile.python b/backend/Dockerfile.python index 16159c67b..f3bcf8d34 100644 --- a/backend/Dockerfile.python +++ b/backend/Dockerfile.python @@ -29,6 +29,7 @@ RUN apt-get update && \ curl python3-pip \ python-is-python3 \ python3-dev llvm \ + libnuma1 libgomp1 \ python3-venv make cmake && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* diff --git a/backend/python/vllm/package.sh b/backend/python/vllm/package.sh new file mode 100755 index 000000000..3c4ba8c19 --- /dev/null +++ b/backend/python/vllm/package.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# Script to package runtime shared libraries for the vllm backend. +# +# The final Dockerfile.python stage is FROM scratch, so system libraries +# must be explicitly copied into ${BACKEND}/lib so the backend can run on +# any host without installing them. libbackend.sh automatically adds that +# directory to LD_LIBRARY_PATH at run time. +# +# vllm's CPU C++ extension (vllm._C) dlopens libnuma.so.1 at import time; +# if it's missing, the _C_utils torch ops are never registered and the +# engine crashes with AttributeError on init_cpu_threads_env. libgomp is +# used by torch's CPU kernels; on some stripped-down hosts it's also +# absent, so we bundle it too. + +set -e + +CURDIR=$(dirname "$(realpath "$0")") +LIB_DIR="${CURDIR}/lib" +mkdir -p "${LIB_DIR}" + +copy_with_symlinks() { + local soname="$1" + local hit="" + for dir in /usr/lib/x86_64-linux-gnu /usr/lib/aarch64-linux-gnu /lib/x86_64-linux-gnu /lib/aarch64-linux-gnu /usr/lib /lib; do + if [ -e "${dir}/${soname}" ]; then + hit="${dir}/${soname}" + break + fi + done + if [ -z "${hit}" ]; then + echo "warning: ${soname} not found in standard lib paths" >&2 + return 0 + fi + # Follow the symlink to the real file, copy it, then recreate the symlink. + local real + real=$(readlink -f "${hit}") + cp -v "${real}" "${LIB_DIR}/" + local real_base + real_base=$(basename "${real}") + if [ "${real_base}" != "${soname}" ]; then + ln -sf "${real_base}" "${LIB_DIR}/${soname}" + fi +} + +copy_with_symlinks libnuma.so.1 +copy_with_symlinks libgomp.so.1 + +echo "vllm packaging completed successfully" +ls -liah "${LIB_DIR}/"