mirror of
https://github.com/exo-explore/exo.git
synced 2026-05-24 06:35:32 -04:00
## Motivation No automated integration tests exist for exo. Manual testing against real hardware clusters is slow and error-prone. We need a pytest framework that deploys clusters via `eco`, runs inference scenarios, and tears down cleanly. ## Changes - **`tools/src/exo_tools/`** — New workspace member shared by bench, eval, and tests: - `client.py` — `ExoClient` HTTP client (extracted from `bench/harness.py`) - `harness.py` — instance lifecycle helpers (placement, wait-for-ready, etc.) - `cluster.py` — `EcoSession` for eco cluster lifecycle (deploy/stop/start/release/logs/exec) with unique `USER=<prefix>-<uuid>` per session and atexit/signal cleanup - **`tests/integration/`** — 17 pytest tests across 5 files: - `test_1node.py` — place, chat, multi-turn, delete, state/models endpoints, cluster snapshot, download-from-scratch - `test_2node.py` — parametrized tensor/jaccl + pipeline/ring inference and multi-turn - `test_4node.py` — parametrized 4-node pipeline/ring inference, cluster state - `test_resilience.py` — full disconnect/reconnect cycle (2-node → disconnect → 1-node → reconnect → 2-node) - `test_dashboard.py` — Playwright: dashboard loads, shows node info, chat flow - `helpers.py` — placement/inference helpers, re-exports from `exo_tools` - `conftest.py` — session-scoped cluster fixtures with constraint-based eco reservations; `--hosts` override; `EXO_REF` env var for CI deployments from a GitHub branch - **`bench/`** — Updated imports from `exo_tools.client` / `exo_tools.harness` - **`pyproject.toml`** — Added `tools` workspace member, `playwright` dev dep, `--ignore=tests/integration` ## Why It Works Tests use `eco` for cluster lifecycle and `ExoClient` for API interactions — same tools humans use. Session-scoped fixtures deploy once per file. Unique eco users prevent test runs from interfering with each other or manual usage. ## Test Plan ### Automated Testing - `uv run pytest tests/integration/ -v -s` — full suite (~4-5 min, 17/17 passing) - `uv run pytest tests/integration/ -v -s --hosts s4,s9,s10,s22` — pin specific hosts - `EXO_REF=main uv run pytest tests/integration/ -v` — deploy from a GitHub branch (CI) - `uv run pytest` — confirms integration tests are excluded from default runs
76 lines
2.3 KiB
Python
76 lines
2.3 KiB
Python
# type: ignore
|
|
"""Single-node integration tests.
|
|
|
|
Run with:
|
|
uv run pytest tests/test_1node.py -v
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import time
|
|
|
|
import pytest
|
|
from exo_tools.harness import is_model_downloaded, place_instance
|
|
|
|
from .framework import DEFAULT_MODEL, InstanceSpec
|
|
|
|
|
|
@pytest.mark.cluster(count=1)
|
|
@pytest.mark.instance(DEFAULT_MODEL)
|
|
def test_place_instance_and_chat(session):
|
|
resp = session.chat("Say hello in one sentence.")
|
|
assert len(resp) > 0
|
|
|
|
|
|
@pytest.mark.cluster(count=1)
|
|
@pytest.mark.instance(DEFAULT_MODEL)
|
|
def test_chat_multiple_turns(session):
|
|
first_reply = session.chat("What is 2 + 2?")
|
|
assert len(first_reply) > 0
|
|
|
|
second_reply = session.multi_turn(
|
|
[
|
|
{"role": "user", "content": "What is 2 + 2?"},
|
|
{"role": "assistant", "content": first_reply},
|
|
{"role": "user", "content": "Now multiply that by 3."},
|
|
]
|
|
)
|
|
assert len(second_reply) > 0
|
|
|
|
|
|
@pytest.mark.cluster(count=1)
|
|
@pytest.mark.instance(DEFAULT_MODEL)
|
|
def test_delete_instance(session):
|
|
from exo_tools.harness import wait_for_instance_gone
|
|
|
|
session.client.request_json("DELETE", f"/instance/{session.instance_id}")
|
|
wait_for_instance_gone(session.client, session.instance_id, timeout=30.0)
|
|
assert len(session.instances) == 0, (
|
|
f"Expected no instances, found {len(session.instances)}"
|
|
)
|
|
|
|
|
|
@pytest.mark.cluster(count=1)
|
|
def test_download_from_scratch(session):
|
|
"""Ensure the model is not on the cluster, then place an instance to
|
|
trigger a fresh download and verify inference.
|
|
"""
|
|
node_id = next(iter(session.state.get("nodeIdentities", {})))
|
|
|
|
# Delete any existing download — the API call is idempotent
|
|
session.client.request_json("DELETE", f"/download/{node_id}/{DEFAULT_MODEL}")
|
|
|
|
# Poll until the model is gone (it may already be gone)
|
|
deadline = time.time() + 60.0
|
|
while time.time() < deadline:
|
|
if not is_model_downloaded(session.client, DEFAULT_MODEL):
|
|
break
|
|
time.sleep(2.0)
|
|
else:
|
|
raise AssertionError(f"Expected {DEFAULT_MODEL} to be deleted from cluster")
|
|
|
|
place_instance(session.client, DEFAULT_MODEL, timeout=900.0)
|
|
session.instance_spec = InstanceSpec(model_id=DEFAULT_MODEL)
|
|
resp = session.chat("Say hello in one sentence.")
|
|
assert len(resp) > 0
|