mirror of
https://github.com/exo-explore/exo.git
synced 2026-06-02 19:27:55 -04:00
## Motivation No automated integration tests exist for exo. Manual testing against real hardware clusters is slow and error-prone. We need a pytest framework that deploys clusters via `eco`, runs inference scenarios, and tears down cleanly. ## Changes - **`tools/src/exo_tools/`** — New workspace member shared by bench, eval, and tests: - `client.py` — `ExoClient` HTTP client (extracted from `bench/harness.py`) - `harness.py` — instance lifecycle helpers (placement, wait-for-ready, etc.) - `cluster.py` — `EcoSession` for eco cluster lifecycle (deploy/stop/start/release/logs/exec) with unique `USER=<prefix>-<uuid>` per session and atexit/signal cleanup - **`tests/integration/`** — 17 pytest tests across 5 files: - `test_1node.py` — place, chat, multi-turn, delete, state/models endpoints, cluster snapshot, download-from-scratch - `test_2node.py` — parametrized tensor/jaccl + pipeline/ring inference and multi-turn - `test_4node.py` — parametrized 4-node pipeline/ring inference, cluster state - `test_resilience.py` — full disconnect/reconnect cycle (2-node → disconnect → 1-node → reconnect → 2-node) - `test_dashboard.py` — Playwright: dashboard loads, shows node info, chat flow - `helpers.py` — placement/inference helpers, re-exports from `exo_tools` - `conftest.py` — session-scoped cluster fixtures with constraint-based eco reservations; `--hosts` override; `EXO_REF` env var for CI deployments from a GitHub branch - **`bench/`** — Updated imports from `exo_tools.client` / `exo_tools.harness` - **`pyproject.toml`** — Added `tools` workspace member, `playwright` dev dep, `--ignore=tests/integration` ## Why It Works Tests use `eco` for cluster lifecycle and `ExoClient` for API interactions — same tools humans use. Session-scoped fixtures deploy once per file. Unique eco users prevent test runs from interfering with each other or manual usage. ## Test Plan ### Automated Testing - `uv run pytest tests/integration/ -v -s` — full suite (~4-5 min, 17/17 passing) - `uv run pytest tests/integration/ -v -s --hosts s4,s9,s10,s22` — pin specific hosts - `EXO_REF=main uv run pytest tests/integration/ -v` — deploy from a GitHub branch (CI) - `uv run pytest` — confirms integration tests are excluded from default runs
103 lines
3.7 KiB
Python
103 lines
3.7 KiB
Python
# type: ignore
|
|
"""Dashboard end-to-end tests using Playwright (headless Chromium).
|
|
|
|
Prerequisites:
|
|
uv run playwright install chromium
|
|
|
|
Run with:
|
|
uv run pytest tests/test_dashboard.py -v
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import contextlib
|
|
|
|
import pytest
|
|
|
|
try:
|
|
from playwright.sync_api import sync_playwright
|
|
|
|
_HAS_PLAYWRIGHT = True
|
|
except ImportError:
|
|
_HAS_PLAYWRIGHT = False
|
|
|
|
# Check if Chromium is installed by attempting a quick launch
|
|
_HAS_CHROMIUM = False
|
|
if _HAS_PLAYWRIGHT:
|
|
try:
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=True)
|
|
browser.close()
|
|
_HAS_CHROMIUM = True
|
|
except Exception:
|
|
pass
|
|
|
|
pytestmark = pytest.mark.skipif(
|
|
not _HAS_PLAYWRIGHT or not _HAS_CHROMIUM,
|
|
reason="playwright or chromium not installed (run: uv run playwright install chromium)",
|
|
)
|
|
|
|
|
|
def _mark_onboarding_complete(session) -> None:
|
|
"""Mark onboarding complete on the server so the wizard doesn't auto-launch a model."""
|
|
with contextlib.suppress(Exception):
|
|
session.client.request_json("POST", "/onboarding")
|
|
|
|
|
|
@pytest.mark.cluster(count=1)
|
|
def test_dashboard_chat_inference(session):
|
|
"""Full UI flow: open dashboard, pick a model, send a chat, verify response.
|
|
|
|
The instance is created via the dashboard UI (model picker → chat send
|
|
triggers the dashboard's auto-launch flow), not via @pytest.mark.instance.
|
|
"""
|
|
_mark_onboarding_complete(session)
|
|
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=True)
|
|
page = browser.new_page(viewport={"width": 1280, "height": 800})
|
|
page.goto(session.cluster.api_url, wait_until="networkidle")
|
|
page.wait_for_timeout(3000)
|
|
page.screenshot(path="/tmp/dashboard_initial.png")
|
|
|
|
# Open the model picker by clicking the "SELECT MODEL" button
|
|
page.get_by_text("SELECT MODEL", exact=False).first.click()
|
|
page.wait_for_timeout(1000)
|
|
page.screenshot(path="/tmp/dashboard_picker_open.png")
|
|
|
|
# Search for the model — uses the model id substring; the picker
|
|
# matches against name/id so "Llama-3.2-1B" filters to the small Llama.
|
|
search_input = page.locator('input[placeholder*="Search models"]').first
|
|
search_input.fill("Llama-3.2-1B")
|
|
page.wait_for_timeout(1500)
|
|
page.screenshot(path="/tmp/dashboard_picker_search.png")
|
|
|
|
# Click the only matching result. The picker shows the model's
|
|
# display name (e.g. "Llama 3.2 1B") which differs from the model_id.
|
|
# We click the first visible button-like row in the result list.
|
|
page.get_by_text("Llama 3.2 1B", exact=False).first.click()
|
|
page.wait_for_timeout(1500)
|
|
page.screenshot(path="/tmp/dashboard_model_selected.png")
|
|
|
|
# Type a chat message — sending triggers the dashboard's auto-launch
|
|
# flow: it picks an optimal placement for the selected model and POSTs
|
|
# to /instance, then sends the chat once the runner is ready.
|
|
chat_input = page.locator("textarea").first
|
|
chat_input.fill("Say hello")
|
|
chat_input.press("Enter")
|
|
page.screenshot(path="/tmp/dashboard_chat_sent.png")
|
|
|
|
# Wait for the instance to launch and respond. Generous timeout
|
|
# because this includes model placement + load + generation.
|
|
page.wait_for_timeout(60000)
|
|
page.screenshot(path="/tmp/dashboard_after_chat.png")
|
|
|
|
# Verify an instance was created and the chat got a response
|
|
instances = session.client.request_json("GET", "/state").get("instances", {})
|
|
assert len(instances) > 0, "Expected the dashboard to have created an instance"
|
|
|
|
body_text = page.text_content("body") or ""
|
|
assert len(body_text) > 0
|
|
|
|
browser.close()
|