feat(ci): running CI on paperspace (#998)

* chore: update tiny script Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> * feat(ci): running on paperspace machines Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update models and increase timeout readiness Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * fix: schema validation for inputs and update client supporting stop Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> * chore: update coverage config Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> * chore: remove some non-essentials Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> * chore: update locks Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
2026-01-17 20:08:17 -05:00 · 2024-05-26 13:14:54 -04:00
parent a58e12d116
commit f248ea25cd
18 changed files with 180 additions and 760 deletions
--- a/openllm-python/tests/conftest.py
+++ b/openllm-python/tests/conftest.py
@@ -1,16 +0,0 @@
-from __future__ import annotations
-
-import pytest, typing as t
-
-
-@pytest.fixture(
-  scope='function',
-  name='model_id',
-  params={
-    'meta-llama/Meta-Llama-3-8B-Instruct',
-    'casperhansen/llama-3-70b-instruct-awq',
-    'TheBloke/Nous-Hermes-2-Mixtral-8x7B-DPO-AWQ',
-  },
-)
-def fixture_model_id(request) -> t.Generator[str, None, None]:
-  yield request.param
--- a/openllm-python/tests/regression_test.py
+++ b/openllm-python/tests/regression_test.py
@@ -1,26 +1,27 @@
 from __future__ import annotations

-import pytest, subprocess, sys, openllm, bentoml, asyncio
-from openai import AsyncOpenAI
+import pytest, subprocess, sys, asyncio, openllm, bentoml
+from openai import OpenAI
 from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam

 SERVER_PORT = 53822


@pytest.mark.asyncio
-async def test_openai_compatible(model_id: str):
+async def test_openai_compatible():
+  model_id = 'meta-llama/Meta-Llama-3-8B-Instruct'
  server = subprocess.Popen([sys.executable, '-m', 'openllm', 'start', model_id, '--port', str(SERVER_PORT)])
-  await asyncio.sleep(5)
-  with bentoml.SyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', server_ready_timeout=90) as client:
+  await asyncio.sleep(10)
+  with bentoml.SyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', server_ready_timeout=120) as client:
    assert client.is_ready(30)

  try:
-    client = AsyncOpenAI(api_key='na', base_url=f'http://127.0.0.1:{SERVER_PORT}/v1')
-    serve_model = (await client.models.list()).data[0].id
+    client = OpenAI(api_key='na', base_url=f'http://127.0.0.1:{SERVER_PORT}/v1')
+    serve_model = client.models.list().data[0].id
    assert serve_model == openllm.utils.normalise_model_name(model_id)
-    streamable = await client.chat.completions.create(
+    streamable = client.chat.completions.create(
      model=serve_model,
-      max_tokens=512,
+      max_tokens=128,
      stream=False,
      messages=[
        ChatCompletionSystemMessageParam(
@@ -37,18 +38,27 @@ async def test_openai_compatible(model_id: str):


@pytest.mark.asyncio
-async def test_generate_endpoint(model_id: str):
-  server = subprocess.Popen([sys.executable, '-m', 'openllm', 'start', model_id, '--port', str(SERVER_PORT)])
-  await asyncio.sleep(5)
+async def test_generate_endpoint():
+  server = subprocess.Popen([
+    sys.executable,
+    '-m',
+    'openllm',
+    'start',
+    'microsoft/Phi-3-mini-4k-instruct',
+    '--trust-remote-code',
+    '--port',
+    str(SERVER_PORT),
+  ])
+  await asyncio.sleep(10)

-  with bentoml.SyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', server_ready_timeout=90) as client:
+  with bentoml.SyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', server_ready_timeout=120) as client:
    assert client.is_ready(30)

  try:
-    client = openllm.AsyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', api_version='v1')
-    assert await client.health()
+    client = openllm.HTTPClient(f'http://127.0.0.1:{SERVER_PORT}', api_version='v1')
+    assert client.health()

-    response = await client.generate(
+    response = client.generate(
      'Tell me more about Apple as a company', stop='technology', llm_config={'temperature': 0.5, 'top_p': 0.2}
    )
    assert response is not None