From 1d2e554a9445e938298ac4ecdb43842832fd98ad Mon Sep 17 00:00:00 2001
From: paperspace <29749331+aarnphm@users.noreply.github.com>
Date: Fri, 10 May 2024 03:11:47 +0000
Subject: [PATCH] chore: disable progressbar for cleaner log trace

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>
---
 examples/openai_chat_completion_client.py       | 15 +++++----------
 openllm-core/src/openllm_core/_configuration.py |  4 ++++
 openllm-python/src/_openllm_tiny/_entrypoint.py |  3 +++
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/examples/openai_chat_completion_client.py b/examples/openai_chat_completion_client.py
index c4d0b4fe..dced50ff 100644
--- a/examples/openai_chat_completion_client.py
+++ b/examples/openai_chat_completion_client.py
@@ -1,10 +1,8 @@
 # NOTE: Make sure to install openai>1
-import os, openai, typing as t
+import os, openai
 from openai.types.chat import (
-  ChatCompletionMessageParam,
   ChatCompletionSystemMessageParam,
   ChatCompletionUserMessageParam,
-  ChatCompletionAssistantMessageParam,
 )
 
 client = openai.OpenAI(base_url=os.getenv('OPENLLM_ENDPOINT', 'http://localhost:3000') + '/v1', api_key='na')
@@ -15,13 +13,10 @@ model = models.data[0].id
 
 # Chat completion API
 stream = str(os.getenv('STREAM', False)).upper() in ['TRUE', '1', 'YES', 'Y', 'ON']
-messages: t.List[ChatCompletionMessageParam]= [
-  ChatCompletionSystemMessageParam(role='system', content='You are acting as Ernest Hemmingway.'),
-  ChatCompletionUserMessageParam(role='user', content='Hi there!'),
-  ChatCompletionAssistantMessageParam(role='assistant', content='Yes?'),
-  ChatCompletionUserMessageParam(role='user', content='What is the meaning of life?'),
-]
-completions = client.chat.completions.create(messages=messages, model=model, max_tokens=128, stream=stream)
+completions = client.chat.completions.create(messages=[
+  ChatCompletionSystemMessageParam(role='system', content='You will be the writing assistant that assume the ton of Ernest Hemmingway.'),
+  ChatCompletionUserMessageParam(role='user', content='Write an essay on Nietzsche and absurdism.'),
+], model=model, max_tokens=1024, stream=stream)
 
 print(f'Chat completion result (stream={stream}):')
 if stream:
diff --git a/openllm-core/src/openllm_core/_configuration.py b/openllm-core/src/openllm_core/_configuration.py
index 42708ed3..ba225e15 100644
--- a/openllm-core/src/openllm_core/_configuration.py
+++ b/openllm-core/src/openllm_core/_configuration.py
@@ -479,6 +479,10 @@ class LLMConfig(pydantic.BaseModel, abc.ABC):
   @overload
   def __getitem__(self, item: t.Literal['logprobs']) -> t.Optional[int]: ...
   @overload
+  def __getitem__(self, item: t.Literal['detokenize']) -> bool: ...
+  @overload
+  def __getitem__(self, item: t.Literal['truncate_prompt_tokens']) -> t.Optional[Annotated[int, pydantic.Field(ge=1)]]: ...
+  @overload
   def __getitem__(self, item: t.Literal['prompt_logprobs']) -> t.Optional[int]: ...
   @overload
   def __getitem__(self, item: t.Literal['skip_special_tokens']) -> bool: ...
diff --git a/openllm-python/src/_openllm_tiny/_entrypoint.py b/openllm-python/src/_openllm_tiny/_entrypoint.py
index b342bbe2..83e0a262 100644
--- a/openllm-python/src/_openllm_tiny/_entrypoint.py
+++ b/openllm-python/src/_openllm_tiny/_entrypoint.py
@@ -52,6 +52,7 @@ max_model_len=orjson.loads(coreutils.getenv('max_model_len', default=orjson.dump
 gpu_memory_utilization=orjson.loads(coreutils.getenv('gpu_memory_utilization', default=orjson.dumps({__gpu_memory_utilization__}), var=['GPU_MEMORY_UTILISATION']))
 services_config=orjson.loads(coreutils.getenv('services_config',"""{__services_config__}"""))
 '''
+HF_HUB_DISABLE_PROGRESS_BARS = 'HF_HUB_DISABLE_PROGRESS_BARS'
 
 
 class ItemState(enum.Enum):
@@ -261,6 +262,7 @@ def start_command(
   os.environ.update({
     QUIET_ENV_VAR: str(openllm.utils.get_quiet_mode()),
     DEBUG_ENV_VAR: str(openllm.utils.get_debug_mode()),
+    HF_HUB_DISABLE_PROGRESS_BARS: str(not openllm.utils.get_debug_mode()),
     'MODEL_ID': model_id,
     'MODEL_NAME': model_name,
     'SERIALIZATION': serialisation,
@@ -468,6 +470,7 @@ def build_command(
           envs=[
             EnvironmentEntry(name='NVIDIA_DRIVER_CAPABILITIES', value='compute,utility'),
             EnvironmentEntry(name='VLLM_VERSION', value='0.4.2'),
+            EnvironmentEntry(name=HF_HUB_DISABLE_PROGRESS_BARS, value='TRUE'),
           ],
           description=f"OpenLLM service for {llm_config['start_name']}",
           include=list(llm_fs.walk.files()),