mirror of
https://github.com/mudler/LocalAI.git
synced 2026-04-17 05:18:53 -04:00
fix(sglang): force AVX-512 CXXFLAGS and disable CI e2e job
sgl-kernel's shm.cpp uses __m512 AVX-512 intrinsics unconditionally; -march=native fails on CI runners without AVX-512 in /proc/cpuinfo. Force -march=sapphirerapids so the build always succeeds, matching sglang upstream's docker/xeon.Dockerfile recipe. The resulting binary still requires an AVX-512 capable CPU at runtime, so disable tests-sglang-grpc in test-extra.yml for the same reason tests-vllm-grpc is disabled. Local runs with make test-extra-backend-sglang still work on hosts with the right SIMD baseline. Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
68
.github/workflows/test-extra.yml
vendored
68
.github/workflows/test-extra.yml
vendored
@@ -590,32 +590,48 @@ jobs:
|
||||
# - name: Build vllm (cpu) backend image and run gRPC e2e tests
|
||||
# run: |
|
||||
# make test-extra-backend-vllm
|
||||
tests-sglang-grpc:
|
||||
needs: detect-changes
|
||||
if: needs.detect-changes.outputs.sglang == 'true' || needs.detect-changes.outputs.run-all == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 90
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends \
|
||||
make build-essential curl unzip ca-certificates git tar
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.25.4'
|
||||
- name: Free disk space
|
||||
run: |
|
||||
sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /opt/hostedtoolcache/CodeQL || true
|
||||
df -h
|
||||
- name: Build sglang (cpu) backend image and run gRPC e2e tests
|
||||
run: |
|
||||
make test-extra-backend-sglang
|
||||
# tests-sglang-grpc is currently disabled in CI for the same reason as
|
||||
# tests-vllm-grpc: sglang's CPU kernel (sgl-kernel) uses __m512 AVX-512
|
||||
# intrinsics unconditionally in shm.cpp, so the from-source build
|
||||
# requires `-march=sapphirerapids` (already set in install.sh) and the
|
||||
# resulting binary SIGILLs at import on CPUs without AVX-512 VNNI/BF16.
|
||||
# The ubuntu-latest runner pool does not guarantee that ISA baseline.
|
||||
#
|
||||
# The test itself (tests/e2e-backends + make test-extra-backend-sglang)
|
||||
# is fully working and validated locally on a host with the right
|
||||
# SIMD baseline. Run it manually with:
|
||||
#
|
||||
# make test-extra-backend-sglang
|
||||
#
|
||||
# Re-enable this job once we have a self-hosted runner label with
|
||||
# guaranteed AVX-512 VNNI/BF16 support.
|
||||
#
|
||||
# tests-sglang-grpc:
|
||||
# needs: detect-changes
|
||||
# if: needs.detect-changes.outputs.sglang == 'true' || needs.detect-changes.outputs.run-all == 'true'
|
||||
# runs-on: bigger-runner
|
||||
# timeout-minutes: 90
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v6
|
||||
# with:
|
||||
# submodules: true
|
||||
# - name: Dependencies
|
||||
# run: |
|
||||
# sudo apt-get update
|
||||
# sudo apt-get install -y --no-install-recommends \
|
||||
# make build-essential curl unzip ca-certificates git tar
|
||||
# - name: Setup Go
|
||||
# uses: actions/setup-go@v5
|
||||
# with:
|
||||
# go-version: '1.25.4'
|
||||
# - name: Free disk space
|
||||
# run: |
|
||||
# sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /opt/hostedtoolcache/CodeQL || true
|
||||
# df -h
|
||||
# - name: Build sglang (cpu) backend image and run gRPC e2e tests
|
||||
# run: |
|
||||
# make test-extra-backend-sglang
|
||||
tests-acestep-cpp:
|
||||
needs: detect-changes
|
||||
if: needs.detect-changes.outputs.acestep-cpp == 'true' || needs.detect-changes.outputs.run-all == 'true'
|
||||
|
||||
@@ -50,6 +50,16 @@ if [ "x${BUILD_TYPE}" == "x" ] || [ "x${FROM_SOURCE:-}" == "xtrue" ]; then
|
||||
# present in the venv before we build from source.
|
||||
uv pip install --no-build-isolation "scikit-build-core>=0.10" ninja cmake
|
||||
|
||||
# sgl-kernel's CPU shm.cpp uses __m512 AVX-512 intrinsics unconditionally.
|
||||
# CMakeLists passes -march=native, which on runners without AVX-512 in
|
||||
# /proc/cpuinfo (ubuntu-latest, most shared CI pools) fails with
|
||||
# "__m512 return without 'avx512f' enabled changes the ABI".
|
||||
# Force Sapphire Rapids ISA at compile time so the build always succeeds;
|
||||
# the resulting binary still requires an AVX-512 capable CPU at runtime,
|
||||
# same constraint sglang upstream documents in docker/xeon.Dockerfile.
|
||||
export CXXFLAGS="${CXXFLAGS:-} -march=sapphirerapids"
|
||||
export CFLAGS="${CFLAGS:-} -march=sapphirerapids"
|
||||
|
||||
_sgl_src=$(mktemp -d)
|
||||
trap 'rm -rf "${_sgl_src}"' EXIT
|
||||
git clone --depth 1 https://github.com/sgl-project/sglang "${_sgl_src}/sglang"
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu124
|
||||
accelerate
|
||||
torch==2.9.1
|
||||
torch==2.7.1
|
||||
torchvision
|
||||
torchaudio==2.9.1
|
||||
torchaudio==2.7.1
|
||||
transformers
|
||||
|
||||
Reference in New Issue
Block a user