fix(sglang): force AVX-512 CXXFLAGS and disable CI e2e job

sgl-kernel's shm.cpp uses __m512 AVX-512 intrinsics unconditionally; -march=native fails on CI runners without AVX-512 in /proc/cpuinfo. Force -march=sapphirerapids so the build always succeeds, matching sglang upstream's docker/xeon.Dockerfile recipe. The resulting binary still requires an AVX-512 capable CPU at runtime, so disable tests-sglang-grpc in test-extra.yml for the same reason tests-vllm-grpc is disabled. Local runs with make test-extra-backend-sglang still work on hosts with the right SIMD baseline. Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-04-17 13:28:31 -04:00 · 2026-04-15 07:28:21 +00:00
parent d47e2aa93f
commit 06b5b93556
3 changed files with 54 additions and 29 deletions
--- a/backend/python/sglang/install.sh
+++ b/backend/python/sglang/install.sh
@@ -50,6 +50,16 @@ if [ "x${BUILD_TYPE}" == "x" ] || [ "x${FROM_SOURCE:-}" == "xtrue" ]; then
    # present in the venv before we build from source.
    uv pip install --no-build-isolation "scikit-build-core>=0.10" ninja cmake

+    # sgl-kernel's CPU shm.cpp uses __m512 AVX-512 intrinsics unconditionally.
+    # CMakeLists passes -march=native, which on runners without AVX-512 in
+    # /proc/cpuinfo (ubuntu-latest, most shared CI pools) fails with
+    # "__m512 return without 'avx512f' enabled changes the ABI".
+    # Force Sapphire Rapids ISA at compile time so the build always succeeds;
+    # the resulting binary still requires an AVX-512 capable CPU at runtime,
+    # same constraint sglang upstream documents in docker/xeon.Dockerfile.
+    export CXXFLAGS="${CXXFLAGS:-} -march=sapphirerapids"
+    export CFLAGS="${CFLAGS:-} -march=sapphirerapids"
+
    _sgl_src=$(mktemp -d)
    trap 'rm -rf "${_sgl_src}"' EXIT
    git clone --depth 1 https://github.com/sgl-project/sglang "${_sgl_src}/sglang"
--- a/backend/python/sglang/requirements-cublas12.txt
+++ b/backend/python/sglang/requirements-cublas12.txt
@@ -1,6 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu124
 accelerate
-torch==2.9.1
+torch==2.7.1
 torchvision
-torchaudio==2.9.1
+torchaudio==2.7.1
 transformers