feat(vllm): bundle libnuma/libgomp via package.sh

The vllm CPU wheel ships a _C extension that dlopens libnuma.so.1 at import time; torch's CPU kernels in turn use libgomp.so.1 (OpenMP). Without these on the host, vllm._C silently fails to register its torch ops and EngineCore crashes with: AttributeError: '_OpNamespace' '_C_utils' object has no attribute 'init_cpu_threads_env' Rather than asking every user to install libnuma1/libgomp1 on their host (or every LocalAI base image to ship them), bundle them into the backend image itself — same pattern fish-speech and the GPU libs already use. libbackend.sh adds ${EDIR}/lib to LD_LIBRARY_PATH at run time so the bundled copies are picked up automatically. - backend/python/vllm/package.sh (new): copies libnuma.so.1 and libgomp.so.1 from the builder's multilib paths into ${BACKEND}/lib, preserving soname symlinks. Runs during Dockerfile.python's 'Run backend-specific packaging' step (which already invokes package.sh if present). - backend/Dockerfile.python: install libnuma1 + libgomp1 in the builder stage so package.sh has something to copy (the Ubuntu base image otherwise only has libgomp in the gcc dep chain). - test-extra.yml: drop the workaround that installed these libs on the runner host — with the backend image self-contained, the runner no longer needs them, and the test now exercises the packaging path end-to-end the way a production host would.
2026-06-08 00:36:37 -04:00 · 2026-04-12 20:20:21 +00:00
parent 017bdee4e4
commit d74cd56b14
3 changed files with 57 additions and 6 deletions
--- a/backend/python/vllm/package.sh
+++ b/backend/python/vllm/package.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+# Script to package runtime shared libraries for the vllm backend.
+#
+# The final Dockerfile.python stage is FROM scratch, so system libraries
+# must be explicitly copied into ${BACKEND}/lib so the backend can run on
+# any host without installing them. libbackend.sh automatically adds that
+# directory to LD_LIBRARY_PATH at run time.
+#
+# vllm's CPU C++ extension (vllm._C) dlopens libnuma.so.1 at import time;
+# if it's missing, the _C_utils torch ops are never registered and the
+# engine crashes with AttributeError on init_cpu_threads_env. libgomp is
+# used by torch's CPU kernels; on some stripped-down hosts it's also
+# absent, so we bundle it too.
+
+set -e
+
+CURDIR=$(dirname "$(realpath "$0")")
+LIB_DIR="${CURDIR}/lib"
+mkdir -p "${LIB_DIR}"
+
+copy_with_symlinks() {
+    local soname="$1"
+    local hit=""
+    for dir in /usr/lib/x86_64-linux-gnu /usr/lib/aarch64-linux-gnu /lib/x86_64-linux-gnu /lib/aarch64-linux-gnu /usr/lib /lib; do
+        if [ -e "${dir}/${soname}" ]; then
+            hit="${dir}/${soname}"
+            break
+        fi
+    done
+    if [ -z "${hit}" ]; then
+        echo "warning: ${soname} not found in standard lib paths" >&2
+        return 0
+    fi
+    # Follow the symlink to the real file, copy it, then recreate the symlink.
+    local real
+    real=$(readlink -f "${hit}")
+    cp -v "${real}" "${LIB_DIR}/"
+    local real_base
+    real_base=$(basename "${real}")
+    if [ "${real_base}" != "${soname}" ]; then
+        ln -sf "${real_base}" "${LIB_DIR}/${soname}"
+    fi
+}
+
+copy_with_symlinks libnuma.so.1
+copy_with_symlinks libgomp.so.1
+
+echo "vllm packaging completed successfully"
+ls -liah "${LIB_DIR}/"