Files
OpenLLM/openllm-python/tests/regression_test.py
Aaron Pham f248ea25cd feat(ci): running CI on paperspace (#998)
* chore: update tiny script

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>

* feat(ci): running on paperspace machines

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* chore: update models and increase timeout readiness

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* fix: schema validation for inputs and update client supporting stop

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>

* chore: update coverage config

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>

* chore: remove some non-essentials

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>

* chore: update locks

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>

---------

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>
Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
2024-05-26 13:14:54 -04:00

67 lines
2.1 KiB
Python

from __future__ import annotations
import pytest, subprocess, sys, asyncio, openllm, bentoml
from openai import OpenAI
from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
SERVER_PORT = 53822
@pytest.mark.asyncio
async def test_openai_compatible():
model_id = 'meta-llama/Meta-Llama-3-8B-Instruct'
server = subprocess.Popen([sys.executable, '-m', 'openllm', 'start', model_id, '--port', str(SERVER_PORT)])
await asyncio.sleep(10)
with bentoml.SyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', server_ready_timeout=120) as client:
assert client.is_ready(30)
try:
client = OpenAI(api_key='na', base_url=f'http://127.0.0.1:{SERVER_PORT}/v1')
serve_model = client.models.list().data[0].id
assert serve_model == openllm.utils.normalise_model_name(model_id)
streamable = client.chat.completions.create(
model=serve_model,
max_tokens=128,
stream=False,
messages=[
ChatCompletionSystemMessageParam(
role='system', content='You will be the writing assistant that assume the tone of Ernest Hemmingway.'
),
ChatCompletionUserMessageParam(
role='user', content='Comment on why Camus thinks we should revolt against life absurdity.'
),
],
)
assert streamable is not None
finally:
server.terminate()
@pytest.mark.asyncio
async def test_generate_endpoint():
server = subprocess.Popen([
sys.executable,
'-m',
'openllm',
'start',
'microsoft/Phi-3-mini-4k-instruct',
'--trust-remote-code',
'--port',
str(SERVER_PORT),
])
await asyncio.sleep(10)
with bentoml.SyncHTTPClient(f'http://127.0.0.1:{SERVER_PORT}', server_ready_timeout=120) as client:
assert client.is_ready(30)
try:
client = openllm.HTTPClient(f'http://127.0.0.1:{SERVER_PORT}', api_version='v1')
assert client.health()
response = client.generate(
'Tell me more about Apple as a company', stop='technology', llm_config={'temperature': 0.5, 'top_p': 0.2}
)
assert response is not None
finally:
server.terminate()