From 4e154b59e54f028cf44f8bad88b6bca352e41b7f Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 6 May 2026 17:07:24 +0200 Subject: [PATCH] fix(ci): unbreak rerankers (torch bump) and vllm-omni on aarch64 (#9688) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two unrelated CI breakages bundled together since both are one-liners: - rerankers: bump torch 2.4.1 -> 2.7.1 on cpu/cublas12. The unpinned transformers resolves to 5.x, whose moe.py registers a custom_op with string-typed `'torch.Tensor'` annotations that torch 2.4.1's infer_schema rejects, blocking the gRPC server from starting and failing all 5 backend tests with "Connection refused" on :50051. Matches the version used by the transformers backend. - vllm-omni: strip fa3-fwd from the upstream requirements/cuda.txt before resolving on aarch64. fa3-fwd 0.0.3 ships only an x86_64 wheel and has no sdist, making the cuda profile unsatisfiable on Jetson/SBSA. fa3-fwd is a soft runtime dep — vllm-omni's attention backends fall back to FA2 then SDPA when it's missing. Signed-off-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- backend/python/rerankers/requirements-cpu.txt | 2 +- backend/python/rerankers/requirements-cublas12.txt | 2 +- backend/python/vllm-omni/install.sh | 8 ++++++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/backend/python/rerankers/requirements-cpu.txt b/backend/python/rerankers/requirements-cpu.txt index e27a47263..1225fa4db 100644 --- a/backend/python/rerankers/requirements-cpu.txt +++ b/backend/python/rerankers/requirements-cpu.txt @@ -1,4 +1,4 @@ transformers accelerate -torch==2.4.1 +torch==2.7.1 rerankers[transformers] \ No newline at end of file diff --git a/backend/python/rerankers/requirements-cublas12.txt b/backend/python/rerankers/requirements-cublas12.txt index e27a47263..1225fa4db 100644 --- a/backend/python/rerankers/requirements-cublas12.txt +++ b/backend/python/rerankers/requirements-cublas12.txt @@ -1,4 +1,4 @@ transformers accelerate -torch==2.4.1 +torch==2.7.1 rerankers[transformers] \ No newline at end of file diff --git a/backend/python/vllm-omni/install.sh b/backend/python/vllm-omni/install.sh index 9a42b4727..8823948ec 100755 --- a/backend/python/vllm-omni/install.sh +++ b/backend/python/vllm-omni/install.sh @@ -79,6 +79,14 @@ fi cd vllm-omni/ +# fa3-fwd ships no aarch64 wheels and there is no source distribution, so on +# aarch64 (e.g. l4t13 / SBSA cu130) the upstream requirements/cuda.txt is +# unsatisfiable. Drop it before resolving — vllm-omni does not hard-require +# the fused FA3 kernel at import time on Jetson/SBSA targets. +if [ "$(uname -m)" = "aarch64" ] && [ -f requirements/cuda.txt ]; then + sed -i '/^fa3-fwd[[:space:]]*==/d' requirements/cuda.txt +fi + if [ "x${USE_PIP}" == "xtrue" ]; then pip install ${EXTRA_PIP_INSTALL_FLAGS:-} -e . else