Files
OpenLLM/openllm-python/tests/regression_test.py
Aaron Pham 97d76eec85 tests: add additional basic testing (#982)
* chore: update rebase tests

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>

* chore: update partial clients before removing

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>

* fix: update clients parsing logics to work with 0.5

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>

* chore: ignore ci runs as to run locally

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>

* chore: update async client tests

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>

* chore: update pre-commit

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>

---------

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>
2024-05-23 10:02:23 -04:00

57 lines
2.0 KiB
Python

from __future__ import annotations
import pytest, subprocess, sys, openllm, bentoml, asyncio
from openai import AsyncOpenAI
from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
SERVER_PORT = 53822
@pytest.mark.asyncio
async def test_openai_compatible(model_id: str):
server = subprocess.Popen([sys.executable, '-m', 'openllm', 'start', model_id, '--port', str(SERVER_PORT)])
await asyncio.sleep(5)
with bentoml.SyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', server_ready_timeout=90) as client:
assert client.is_ready(30)
try:
client = AsyncOpenAI(api_key='na', base_url=f'http://127.0.0.1:{SERVER_PORT}/v1')
serve_model = (await client.models.list()).data[0].id
assert serve_model == openllm.utils.normalise_model_name(model_id)
streamable = await client.chat.completions.create(
model=serve_model,
max_tokens=512,
stream=False,
messages=[
ChatCompletionSystemMessageParam(
role='system', content='You will be the writing assistant that assume the tone of Ernest Hemmingway.'
),
ChatCompletionUserMessageParam(
role='user', content='Comment on why Camus thinks we should revolt against life absurdity.'
),
],
)
assert streamable is not None
finally:
server.terminate()
@pytest.mark.asyncio
async def test_generate_endpoint(model_id: str):
server = subprocess.Popen([sys.executable, '-m', 'openllm', 'start', model_id, '--port', str(SERVER_PORT)])
await asyncio.sleep(5)
with bentoml.SyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', server_ready_timeout=90) as client:
assert client.is_ready(30)
try:
client = openllm.AsyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', api_version='v1')
assert await client.health()
response = await client.generate(
'Tell me more about Apple as a company', stop='technology', llm_config={'temperature': 0.5, 'top_p': 0.2}
)
assert response is not None
finally:
server.terminate()