From c7f444d18bc3a5c201cf8d467e5d78a000f2477b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 12 Apr 2026 14:53:44 +0000 Subject: [PATCH] ci(test-extra): run vllm e2e tests on CPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds tests-vllm-grpc to the test-extra workflow, mirroring the llama-cpp and ik-llama-cpp gRPC jobs. Triggers when files under backend/python/vllm/ change (or on run-all), builds the local-ai vllm container image, and runs the tests/e2e-backends harness with BACKEND_TEST_MODEL_NAME=Qwen/Qwen2.5-0.5B-Instruct, tool_parser:hermes, and the tools capability enabled. Uses ubuntu-latest (no GPU) — vllm runs on CPU via the cpu-vllm wheel we pinned in requirements-cpu-after.txt. Frees disk space before the build since the docker image + torch + vllm wheel is sizeable. --- .github/workflows/test-extra.yml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index 6b590d156..b6c72b1a7 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -31,6 +31,7 @@ jobs: llama-cpp-quantization: ${{ steps.detect.outputs.llama-cpp-quantization }} llama-cpp: ${{ steps.detect.outputs.llama-cpp }} ik-llama-cpp: ${{ steps.detect.outputs.ik-llama-cpp }} + vllm: ${{ steps.detect.outputs.vllm }} acestep-cpp: ${{ steps.detect.outputs.acestep-cpp }} qwen3-tts-cpp: ${{ steps.detect.outputs.qwen3-tts-cpp }} voxtral: ${{ steps.detect.outputs.voxtral }} @@ -501,6 +502,27 @@ jobs: - name: Build ik-llama-cpp backend image and run gRPC e2e tests run: | make test-extra-backend-ik-llama-cpp + tests-vllm-grpc: + needs: detect-changes + if: needs.detect-changes.outputs.vllm == 'true' || needs.detect-changes.outputs.run-all == 'true' + runs-on: ubuntu-latest + timeout-minutes: 120 + steps: + - name: Clone + uses: actions/checkout@v6 + with: + submodules: true + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version: '1.25.4' + - name: Free disk space + run: | + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /opt/hostedtoolcache/CodeQL || true + df -h + - name: Build vllm (cpu) backend image and run gRPC e2e tests + run: | + make test-extra-backend-vllm tests-acestep-cpp: needs: detect-changes if: needs.detect-changes.outputs.acestep-cpp == 'true' || needs.detect-changes.outputs.run-all == 'true'