From 1d2e554a9445e938298ac4ecdb43842832fd98ad Mon Sep 17 00:00:00 2001 From: paperspace <29749331+aarnphm@users.noreply.github.com> Date: Fri, 10 May 2024 03:11:47 +0000 Subject: [PATCH] chore: disable progressbar for cleaner log trace Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> --- examples/openai_chat_completion_client.py | 15 +++++---------- openllm-core/src/openllm_core/_configuration.py | 4 ++++ openllm-python/src/_openllm_tiny/_entrypoint.py | 3 +++ 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/examples/openai_chat_completion_client.py b/examples/openai_chat_completion_client.py index c4d0b4fe..dced50ff 100644 --- a/examples/openai_chat_completion_client.py +++ b/examples/openai_chat_completion_client.py @@ -1,10 +1,8 @@ # NOTE: Make sure to install openai>1 -import os, openai, typing as t +import os, openai from openai.types.chat import ( - ChatCompletionMessageParam, ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam, - ChatCompletionAssistantMessageParam, ) client = openai.OpenAI(base_url=os.getenv('OPENLLM_ENDPOINT', 'http://localhost:3000') + '/v1', api_key='na') @@ -15,13 +13,10 @@ model = models.data[0].id # Chat completion API stream = str(os.getenv('STREAM', False)).upper() in ['TRUE', '1', 'YES', 'Y', 'ON'] -messages: t.List[ChatCompletionMessageParam]= [ - ChatCompletionSystemMessageParam(role='system', content='You are acting as Ernest Hemmingway.'), - ChatCompletionUserMessageParam(role='user', content='Hi there!'), - ChatCompletionAssistantMessageParam(role='assistant', content='Yes?'), - ChatCompletionUserMessageParam(role='user', content='What is the meaning of life?'), -] -completions = client.chat.completions.create(messages=messages, model=model, max_tokens=128, stream=stream) +completions = client.chat.completions.create(messages=[ + ChatCompletionSystemMessageParam(role='system', content='You will be the writing assistant that assume the ton of Ernest Hemmingway.'), + ChatCompletionUserMessageParam(role='user', content='Write an essay on Nietzsche and absurdism.'), +], model=model, max_tokens=1024, stream=stream) print(f'Chat completion result (stream={stream}):') if stream: diff --git a/openllm-core/src/openllm_core/_configuration.py b/openllm-core/src/openllm_core/_configuration.py index 42708ed3..ba225e15 100644 --- a/openllm-core/src/openllm_core/_configuration.py +++ b/openllm-core/src/openllm_core/_configuration.py @@ -479,6 +479,10 @@ class LLMConfig(pydantic.BaseModel, abc.ABC): @overload def __getitem__(self, item: t.Literal['logprobs']) -> t.Optional[int]: ... @overload + def __getitem__(self, item: t.Literal['detokenize']) -> bool: ... + @overload + def __getitem__(self, item: t.Literal['truncate_prompt_tokens']) -> t.Optional[Annotated[int, pydantic.Field(ge=1)]]: ... + @overload def __getitem__(self, item: t.Literal['prompt_logprobs']) -> t.Optional[int]: ... @overload def __getitem__(self, item: t.Literal['skip_special_tokens']) -> bool: ... diff --git a/openllm-python/src/_openllm_tiny/_entrypoint.py b/openllm-python/src/_openllm_tiny/_entrypoint.py index b342bbe2..83e0a262 100644 --- a/openllm-python/src/_openllm_tiny/_entrypoint.py +++ b/openllm-python/src/_openllm_tiny/_entrypoint.py @@ -52,6 +52,7 @@ max_model_len=orjson.loads(coreutils.getenv('max_model_len', default=orjson.dump gpu_memory_utilization=orjson.loads(coreutils.getenv('gpu_memory_utilization', default=orjson.dumps({__gpu_memory_utilization__}), var=['GPU_MEMORY_UTILISATION'])) services_config=orjson.loads(coreutils.getenv('services_config',"""{__services_config__}""")) ''' +HF_HUB_DISABLE_PROGRESS_BARS = 'HF_HUB_DISABLE_PROGRESS_BARS' class ItemState(enum.Enum): @@ -261,6 +262,7 @@ def start_command( os.environ.update({ QUIET_ENV_VAR: str(openllm.utils.get_quiet_mode()), DEBUG_ENV_VAR: str(openllm.utils.get_debug_mode()), + HF_HUB_DISABLE_PROGRESS_BARS: str(not openllm.utils.get_debug_mode()), 'MODEL_ID': model_id, 'MODEL_NAME': model_name, 'SERIALIZATION': serialisation, @@ -468,6 +470,7 @@ def build_command( envs=[ EnvironmentEntry(name='NVIDIA_DRIVER_CAPABILITIES', value='compute,utility'), EnvironmentEntry(name='VLLM_VERSION', value='0.4.2'), + EnvironmentEntry(name=HF_HUB_DISABLE_PROGRESS_BARS, value='TRUE'), ], description=f"OpenLLM service for {llm_config['start_name']}", include=list(llm_fs.walk.files()),