mirror of
https://github.com/mudler/LocalAI.git
synced 2026-04-16 12:59:33 -04:00
ci(test-extra): run vllm e2e tests on CPU
Adds tests-vllm-grpc to the test-extra workflow, mirroring the llama-cpp and ik-llama-cpp gRPC jobs. Triggers when files under backend/python/vllm/ change (or on run-all), builds the local-ai vllm container image, and runs the tests/e2e-backends harness with BACKEND_TEST_MODEL_NAME=Qwen/Qwen2.5-0.5B-Instruct, tool_parser:hermes, and the tools capability enabled. Uses ubuntu-latest (no GPU) — vllm runs on CPU via the cpu-vllm wheel we pinned in requirements-cpu-after.txt. Frees disk space before the build since the docker image + torch + vllm wheel is sizeable.
This commit is contained in:
22
.github/workflows/test-extra.yml
vendored
22
.github/workflows/test-extra.yml
vendored
@@ -31,6 +31,7 @@ jobs:
|
||||
llama-cpp-quantization: ${{ steps.detect.outputs.llama-cpp-quantization }}
|
||||
llama-cpp: ${{ steps.detect.outputs.llama-cpp }}
|
||||
ik-llama-cpp: ${{ steps.detect.outputs.ik-llama-cpp }}
|
||||
vllm: ${{ steps.detect.outputs.vllm }}
|
||||
acestep-cpp: ${{ steps.detect.outputs.acestep-cpp }}
|
||||
qwen3-tts-cpp: ${{ steps.detect.outputs.qwen3-tts-cpp }}
|
||||
voxtral: ${{ steps.detect.outputs.voxtral }}
|
||||
@@ -501,6 +502,27 @@ jobs:
|
||||
- name: Build ik-llama-cpp backend image and run gRPC e2e tests
|
||||
run: |
|
||||
make test-extra-backend-ik-llama-cpp
|
||||
tests-vllm-grpc:
|
||||
needs: detect-changes
|
||||
if: needs.detect-changes.outputs.vllm == 'true' || needs.detect-changes.outputs.run-all == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 120
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.25.4'
|
||||
- name: Free disk space
|
||||
run: |
|
||||
sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /opt/hostedtoolcache/CodeQL || true
|
||||
df -h
|
||||
- name: Build vllm (cpu) backend image and run gRPC e2e tests
|
||||
run: |
|
||||
make test-extra-backend-vllm
|
||||
tests-acestep-cpp:
|
||||
needs: detect-changes
|
||||
if: needs.detect-changes.outputs.acestep-cpp == 'true' || needs.detect-changes.outputs.run-all == 'true'
|
||||
|
||||
Reference in New Issue
Block a user