ci(test-extra): run vllm e2e tests on CPU

Adds tests-vllm-grpc to the test-extra workflow, mirroring the llama-cpp and ik-llama-cpp gRPC jobs. Triggers when files under backend/python/vllm/ change (or on run-all), builds the local-ai vllm container image, and runs the tests/e2e-backends harness with BACKEND_TEST_MODEL_NAME=Qwen/Qwen2.5-0.5B-Instruct, tool_parser:hermes, and the tools capability enabled. Uses ubuntu-latest (no GPU) — vllm runs on CPU via the cpu-vllm wheel we pinned in requirements-cpu-after.txt. Frees disk space before the build since the docker image + torch + vllm wheel is sizeable.
2026-06-07 08:16:53 -04:00 · 2026-04-12 14:53:44 +00:00
parent e7f406169a
commit c7f444d18b
1 changed files with 22 additions and 0 deletions
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -31,6 +31,7 @@ jobs:
      llama-cpp-quantization: ${{ steps.detect.outputs.llama-cpp-quantization }}
      llama-cpp: ${{ steps.detect.outputs.llama-cpp }}
      ik-llama-cpp: ${{ steps.detect.outputs.ik-llama-cpp }}
+      vllm: ${{ steps.detect.outputs.vllm }}
      acestep-cpp: ${{ steps.detect.outputs.acestep-cpp }}
      qwen3-tts-cpp: ${{ steps.detect.outputs.qwen3-tts-cpp }}
      voxtral: ${{ steps.detect.outputs.voxtral }}
@@ -501,6 +502,27 @@ jobs:
      - name: Build ik-llama-cpp backend image and run gRPC e2e tests
        run: |
          make test-extra-backend-ik-llama-cpp
+  tests-vllm-grpc:
+    needs: detect-changes
+    if: needs.detect-changes.outputs.vllm == 'true' || needs.detect-changes.outputs.run-all == 'true'
+    runs-on: ubuntu-latest
+    timeout-minutes: 120
+    steps:
+      - name: Clone
+        uses: actions/checkout@v6
+        with:
+          submodules: true
+      - name: Setup Go
+        uses: actions/setup-go@v5
+        with:
+          go-version: '1.25.4'
+      - name: Free disk space
+        run: |
+          sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /opt/hostedtoolcache/CodeQL || true
+          df -h
+      - name: Build vllm (cpu) backend image and run gRPC e2e tests
+        run: |
+          make test-extra-backend-vllm
  tests-acestep-cpp:
    needs: detect-changes
    if: needs.detect-changes.outputs.acestep-cpp == 'true' || needs.detect-changes.outputs.run-all == 'true'