From ef45ca611e1e8eee77ea687035ddfb97dc33d991 Mon Sep 17 00:00:00 2001
From: bojiang <bojiang_@outlook.com>
Date: Wed, 3 Jul 2024 20:28:46 +0800
Subject: [PATCH] refactor: use openai client in run

---
 openllm_next/local.py | 17 +++++++++++++----
 pyproject.toml        |  1 +
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/openllm_next/local.py b/openllm_next/local.py
index e68336c8..c0dd0f5a 100644
--- a/openllm_next/local.py
+++ b/openllm_next/local.py
@@ -46,7 +46,6 @@ async def _run_model(
         venv=venv,
         silent=False,
     ) as server_proc:
-        import bentoml
 
         output(f"Model server started {server_proc.pid}")
 
@@ -82,8 +81,12 @@ async def _run_model(
             stderr_streamer.cancel()
 
         output("Model is ready", style="green")
-        messages = []
-        client = bentoml.AsyncHTTPClient(f"http://localhost:{port}", timeout=timeout)
+        messages: list[dict[str, str]] = []
+
+        from openai import AsyncOpenAI
+
+        client = AsyncOpenAI(base_url=f"http://localhost:{port}/v1")
+        model_id = (await client.models.list()).data[0].id
         while True:
             try:
                 message = input("user: ")
@@ -93,7 +96,13 @@ async def _run_model(
                 messages.append(dict(role="user", content=message))
                 output("assistant: ", end="", style="lightgreen")
                 assistant_message = ""
-                async for text in client.chat(messages=messages):  # type: ignore
+                stream = await client.chat.completions.create(
+                    model=model_id,
+                    messages=messages,  # type: ignore
+                    stream=True,
+                )
+                async for chunk in stream:
+                    text = chunk.choices[0].delta.content or ""
                     assistant_message += text
                     output(text, end="", style="lightgreen")
                 messages.append(dict(role="assistant", content=assistant_message))
diff --git a/pyproject.toml b/pyproject.toml
index 341cd4dd..95968631 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,6 +20,7 @@ dependencies = [
     "dulwich",
     "tabulate",
     "uv",
+    "openai==1.35.9",
 ]
 
 [project.scripts]