From d74cd56b144d7215eb6ba6f470d6ce969bf465bd Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Sun, 12 Apr 2026 20:20:21 +0000
Subject: [PATCH] feat(vllm): bundle libnuma/libgomp via package.sh
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The vllm CPU wheel ships a _C extension that dlopens libnuma.so.1 at
import time; torch's CPU kernels in turn use libgomp.so.1 (OpenMP).
Without these on the host, vllm._C silently fails to register its
torch ops and EngineCore crashes with:

  AttributeError: '_OpNamespace' '_C_utils' object has no attribute
    'init_cpu_threads_env'

Rather than asking every user to install libnuma1/libgomp1 on their
host (or every LocalAI base image to ship them), bundle them into
the backend image itself — same pattern fish-speech and the GPU libs
already use. libbackend.sh adds ${EDIR}/lib to LD_LIBRARY_PATH at
run time so the bundled copies are picked up automatically.

- backend/python/vllm/package.sh (new): copies libnuma.so.1 and
  libgomp.so.1 from the builder's multilib paths into ${BACKEND}/lib,
  preserving soname symlinks. Runs during Dockerfile.python's
  'Run backend-specific packaging' step (which already invokes
  package.sh if present).
- backend/Dockerfile.python: install libnuma1 + libgomp1 in the
  builder stage so package.sh has something to copy (the Ubuntu
  base image otherwise only has libgomp in the gcc dep chain).
- test-extra.yml: drop the workaround that installed these libs on
  the runner host — with the backend image self-contained, the
  runner no longer needs them, and the test now exercises the
  packaging path end-to-end the way a production host would.
---
 .github/workflows/test-extra.yml | 13 +++++----
 backend/Dockerfile.python        |  1 +
 backend/python/vllm/package.sh   | 49 ++++++++++++++++++++++++++++++++
 3 files changed, 57 insertions(+), 6 deletions(-)
 create mode 100755 backend/python/vllm/package.sh

diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml
index ad26cfcc7..dc38029b3 100644
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -519,15 +519,16 @@ jobs:
       - name: Dependencies
         # bigger-runner is a bare self-hosted runner — install the tools
         # we need for docker-build + protogen-go + go test (make, curl,
-        # unzip for the protoc download, build-essential for cgo), plus
-        # libnuma1 which the vllm CPU wheel's _C extension dlopens at
-        # runtime (libnuma.so.1 missing → init_cpu_threads_env op is not
-        # registered → AttributeError on LoadModel).
+        # unzip for the protoc download, build-essential for cgo).
+        # Runtime shared libraries the vllm backend needs (libnuma,
+        # libgomp) are packaged into the backend image via package.sh
+        # and NOT installed on the host — that way the CI exercises the
+        # packaging path end-to-end and catches missing libs that users
+        # would otherwise hit on a bare production host.
         run: |
           sudo apt-get update
           sudo apt-get install -y --no-install-recommends \
-              make build-essential curl unzip ca-certificates git tar \
-              libnuma1 libgomp1
+              make build-essential curl unzip ca-certificates git tar
       - name: Setup Go
         uses: actions/setup-go@v5
         with:
diff --git a/backend/Dockerfile.python b/backend/Dockerfile.python
index 16159c67b..f3bcf8d34 100644
--- a/backend/Dockerfile.python
+++ b/backend/Dockerfile.python
@@ -29,6 +29,7 @@ RUN apt-get update && \
         curl python3-pip \
         python-is-python3 \
         python3-dev llvm \
+        libnuma1 libgomp1 \
         python3-venv make cmake && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
diff --git a/backend/python/vllm/package.sh b/backend/python/vllm/package.sh
new file mode 100755
index 000000000..3c4ba8c19
--- /dev/null
+++ b/backend/python/vllm/package.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+# Script to package runtime shared libraries for the vllm backend.
+#
+# The final Dockerfile.python stage is FROM scratch, so system libraries
+# must be explicitly copied into ${BACKEND}/lib so the backend can run on
+# any host without installing them. libbackend.sh automatically adds that
+# directory to LD_LIBRARY_PATH at run time.
+#
+# vllm's CPU C++ extension (vllm._C) dlopens libnuma.so.1 at import time;
+# if it's missing, the _C_utils torch ops are never registered and the
+# engine crashes with AttributeError on init_cpu_threads_env. libgomp is
+# used by torch's CPU kernels; on some stripped-down hosts it's also
+# absent, so we bundle it too.
+
+set -e
+
+CURDIR=$(dirname "$(realpath "$0")")
+LIB_DIR="${CURDIR}/lib"
+mkdir -p "${LIB_DIR}"
+
+copy_with_symlinks() {
+    local soname="$1"
+    local hit=""
+    for dir in /usr/lib/x86_64-linux-gnu /usr/lib/aarch64-linux-gnu /lib/x86_64-linux-gnu /lib/aarch64-linux-gnu /usr/lib /lib; do
+        if [ -e "${dir}/${soname}" ]; then
+            hit="${dir}/${soname}"
+            break
+        fi
+    done
+    if [ -z "${hit}" ]; then
+        echo "warning: ${soname} not found in standard lib paths" >&2
+        return 0
+    fi
+    # Follow the symlink to the real file, copy it, then recreate the symlink.
+    local real
+    real=$(readlink -f "${hit}")
+    cp -v "${real}" "${LIB_DIR}/"
+    local real_base
+    real_base=$(basename "${real}")
+    if [ "${real_base}" != "${soname}" ]; then
+        ln -sf "${real_base}" "${LIB_DIR}/${soname}"
+    fi
+}
+
+copy_with_symlinks libnuma.so.1
+copy_with_symlinks libgomp.so.1
+
+echo "vllm packaging completed successfully"
+ls -liah "${LIB_DIR}/"