From c7f444d18bc3a5c201cf8d467e5d78a000f2477b Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Sun, 12 Apr 2026 14:53:44 +0000
Subject: [PATCH] ci(test-extra): run vllm e2e tests on CPU
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds tests-vllm-grpc to the test-extra workflow, mirroring the
llama-cpp and ik-llama-cpp gRPC jobs. Triggers when files under
backend/python/vllm/ change (or on run-all), builds the local-ai
vllm container image, and runs the tests/e2e-backends harness with
BACKEND_TEST_MODEL_NAME=Qwen/Qwen2.5-0.5B-Instruct, tool_parser:hermes,
and the tools capability enabled.

Uses ubuntu-latest (no GPU) — vllm runs on CPU via the cpu-vllm
wheel we pinned in requirements-cpu-after.txt. Frees disk space
before the build since the docker image + torch + vllm wheel is
sizeable.
---
 .github/workflows/test-extra.yml | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml
index 6b590d156..b6c72b1a7 100644
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -31,6 +31,7 @@ jobs:
       llama-cpp-quantization: ${{ steps.detect.outputs.llama-cpp-quantization }}
       llama-cpp: ${{ steps.detect.outputs.llama-cpp }}
       ik-llama-cpp: ${{ steps.detect.outputs.ik-llama-cpp }}
+      vllm: ${{ steps.detect.outputs.vllm }}
       acestep-cpp: ${{ steps.detect.outputs.acestep-cpp }}
       qwen3-tts-cpp: ${{ steps.detect.outputs.qwen3-tts-cpp }}
       voxtral: ${{ steps.detect.outputs.voxtral }}
@@ -501,6 +502,27 @@ jobs:
       - name: Build ik-llama-cpp backend image and run gRPC e2e tests
         run: |
           make test-extra-backend-ik-llama-cpp
+  tests-vllm-grpc:
+    needs: detect-changes
+    if: needs.detect-changes.outputs.vllm == 'true' || needs.detect-changes.outputs.run-all == 'true'
+    runs-on: ubuntu-latest
+    timeout-minutes: 120
+    steps:
+      - name: Clone
+        uses: actions/checkout@v6
+        with:
+          submodules: true
+      - name: Setup Go
+        uses: actions/setup-go@v5
+        with:
+          go-version: '1.25.4'
+      - name: Free disk space
+        run: |
+          sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /opt/hostedtoolcache/CodeQL || true
+          df -h
+      - name: Build vllm (cpu) backend image and run gRPC e2e tests
+        run: |
+          make test-extra-backend-vllm
   tests-acestep-cpp:
     needs: detect-changes
     if: needs.detect-changes.outputs.acestep-cpp == 'true' || needs.detect-changes.outputs.run-all == 'true'