mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-01-18 20:41:11 -05:00
* chore: synch generate_iterator to be the same as server Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * --wip-- Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * wip Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * feat: cleanup shim implementation Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * ci: auto fixes from pre-commit.ci For more information, see https://pre-commit.ci * chore: fix pre-commit Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update changelog Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update check with tuple Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
34 lines
1.2 KiB
Python
34 lines
1.2 KiB
Python
# NOTE: Make sure to install openai>1
|
|
import os, openai, typing as t
|
|
from openai.types.chat import (
|
|
ChatCompletionMessageParam,
|
|
ChatCompletionSystemMessageParam,
|
|
ChatCompletionUserMessageParam,
|
|
ChatCompletionAssistantMessageParam,
|
|
)
|
|
|
|
client = openai.OpenAI(base_url=os.getenv('OPENLLM_ENDPOINT', 'http://localhost:3000') + '/v1', api_key='na')
|
|
|
|
models = client.models.list()
|
|
print('Models:', models.model_dump_json(indent=2))
|
|
model = models.data[0].id
|
|
|
|
# Chat completion API
|
|
stream = str(os.getenv('STREAM', False)).upper() in ['TRUE', '1', 'YES', 'Y', 'ON']
|
|
messages: t.List[ChatCompletionMessageParam]= [
|
|
ChatCompletionSystemMessageParam(role='system', content='You are acting as Ernest Hemmingway.'),
|
|
ChatCompletionUserMessageParam(role='user', content='Hi there!'),
|
|
ChatCompletionAssistantMessageParam(role='assistant', content='Yes?'),
|
|
ChatCompletionUserMessageParam(role='user', content='What is the meaning of life?'),
|
|
]
|
|
completions = client.chat.completions.create(messages=messages, model=model, max_tokens=128, stream=stream)
|
|
|
|
print(f'Chat completion result (stream={stream}):')
|
|
if stream:
|
|
for chunk in completions:
|
|
text = chunk.choices[0].delta.content
|
|
if text:
|
|
print(text, flush=True, end='')
|
|
else:
|
|
print(completions)
|