mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-01-15 10:57:49 -05:00
* chore: update rebase tests Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> * chore: update partial clients before removing Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> * fix: update clients parsing logics to work with 0.5 Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> * chore: ignore ci runs as to run locally Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> * chore: update async client tests Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> * chore: update pre-commit Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>
57 lines
2.0 KiB
Python
57 lines
2.0 KiB
Python
from __future__ import annotations
|
|
|
|
import pytest, subprocess, sys, openllm, bentoml, asyncio
|
|
from openai import AsyncOpenAI
|
|
from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
|
|
|
|
SERVER_PORT = 53822
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_openai_compatible(model_id: str):
|
|
server = subprocess.Popen([sys.executable, '-m', 'openllm', 'start', model_id, '--port', str(SERVER_PORT)])
|
|
await asyncio.sleep(5)
|
|
with bentoml.SyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', server_ready_timeout=90) as client:
|
|
assert client.is_ready(30)
|
|
|
|
try:
|
|
client = AsyncOpenAI(api_key='na', base_url=f'http://127.0.0.1:{SERVER_PORT}/v1')
|
|
serve_model = (await client.models.list()).data[0].id
|
|
assert serve_model == openllm.utils.normalise_model_name(model_id)
|
|
streamable = await client.chat.completions.create(
|
|
model=serve_model,
|
|
max_tokens=512,
|
|
stream=False,
|
|
messages=[
|
|
ChatCompletionSystemMessageParam(
|
|
role='system', content='You will be the writing assistant that assume the tone of Ernest Hemmingway.'
|
|
),
|
|
ChatCompletionUserMessageParam(
|
|
role='user', content='Comment on why Camus thinks we should revolt against life absurdity.'
|
|
),
|
|
],
|
|
)
|
|
assert streamable is not None
|
|
finally:
|
|
server.terminate()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_generate_endpoint(model_id: str):
|
|
server = subprocess.Popen([sys.executable, '-m', 'openllm', 'start', model_id, '--port', str(SERVER_PORT)])
|
|
await asyncio.sleep(5)
|
|
|
|
with bentoml.SyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', server_ready_timeout=90) as client:
|
|
assert client.is_ready(30)
|
|
|
|
try:
|
|
client = openllm.AsyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', api_version='v1')
|
|
assert await client.health()
|
|
|
|
response = await client.generate(
|
|
'Tell me more about Apple as a company', stop='technology', llm_config={'temperature': 0.5, 'top_p': 0.2}
|
|
)
|
|
assert response is not None
|
|
finally:
|
|
server.terminate()
|