feat(ci): running CI on paperspace (#998)

* chore: update tiny script

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>

* feat(ci): running on paperspace machines

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* chore: update models and increase timeout readiness

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* fix: schema validation for inputs and update client supporting stop

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>

* chore: update coverage config

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>

* chore: remove some non-essentials

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>

* chore: update locks

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>

---------

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>
Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
Aaron Pham
2024-05-26 13:14:54 -04:00
committed by GitHub
parent a58e12d116
commit f248ea25cd
18 changed files with 180 additions and 760 deletions

View File

@@ -1,16 +0,0 @@
from __future__ import annotations
import pytest, typing as t
@pytest.fixture(
scope='function',
name='model_id',
params={
'meta-llama/Meta-Llama-3-8B-Instruct',
'casperhansen/llama-3-70b-instruct-awq',
'TheBloke/Nous-Hermes-2-Mixtral-8x7B-DPO-AWQ',
},
)
def fixture_model_id(request) -> t.Generator[str, None, None]:
yield request.param

View File

@@ -1,26 +1,27 @@
from __future__ import annotations
import pytest, subprocess, sys, openllm, bentoml, asyncio
from openai import AsyncOpenAI
import pytest, subprocess, sys, asyncio, openllm, bentoml
from openai import OpenAI
from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
SERVER_PORT = 53822
@pytest.mark.asyncio
async def test_openai_compatible(model_id: str):
async def test_openai_compatible():
model_id = 'meta-llama/Meta-Llama-3-8B-Instruct'
server = subprocess.Popen([sys.executable, '-m', 'openllm', 'start', model_id, '--port', str(SERVER_PORT)])
await asyncio.sleep(5)
with bentoml.SyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', server_ready_timeout=90) as client:
await asyncio.sleep(10)
with bentoml.SyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', server_ready_timeout=120) as client:
assert client.is_ready(30)
try:
client = AsyncOpenAI(api_key='na', base_url=f'http://127.0.0.1:{SERVER_PORT}/v1')
serve_model = (await client.models.list()).data[0].id
client = OpenAI(api_key='na', base_url=f'http://127.0.0.1:{SERVER_PORT}/v1')
serve_model = client.models.list().data[0].id
assert serve_model == openllm.utils.normalise_model_name(model_id)
streamable = await client.chat.completions.create(
streamable = client.chat.completions.create(
model=serve_model,
max_tokens=512,
max_tokens=128,
stream=False,
messages=[
ChatCompletionSystemMessageParam(
@@ -37,18 +38,27 @@ async def test_openai_compatible(model_id: str):
@pytest.mark.asyncio
async def test_generate_endpoint(model_id: str):
server = subprocess.Popen([sys.executable, '-m', 'openllm', 'start', model_id, '--port', str(SERVER_PORT)])
await asyncio.sleep(5)
async def test_generate_endpoint():
server = subprocess.Popen([
sys.executable,
'-m',
'openllm',
'start',
'microsoft/Phi-3-mini-4k-instruct',
'--trust-remote-code',
'--port',
str(SERVER_PORT),
])
await asyncio.sleep(10)
with bentoml.SyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', server_ready_timeout=90) as client:
with bentoml.SyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', server_ready_timeout=120) as client:
assert client.is_ready(30)
try:
client = openllm.AsyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', api_version='v1')
assert await client.health()
client = openllm.HTTPClient(f'http://127.0.0.1:{SERVER_PORT}', api_version='v1')
assert client.health()
response = await client.generate(
response = client.generate(
'Tell me more about Apple as a company', stop='technology', llm_config={'temperature': 0.5, 'top_p': 0.2}
)
assert response is not None