From 3ef93fe371a4457fe1af5052f5a89eef3ece6d30 Mon Sep 17 00:00:00 2001
From: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
Date: Fri, 22 Mar 2024 01:19:32 +0000
Subject: [PATCH] chore: update support development_mode as DEBUG and support
 for RELOAD envvar

Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
---
 openllm-core/src/openllm_core/utils/__init__.py | 4 ++--
 openllm-python/src/_openllm_tiny/_entrypoint.py | 3 ++-
 openllm-python/src/_openllm_tiny/_service.py    | 6 +-----
 3 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/openllm-core/src/openllm_core/utils/__init__.py b/openllm-core/src/openllm_core/utils/__init__.py
index 471a84ce..5e352aaf 100644
--- a/openllm-core/src/openllm_core/utils/__init__.py
+++ b/openllm-core/src/openllm_core/utils/__init__.py
@@ -105,11 +105,11 @@ def io_descriptor(model) -> type[IODescriptor] | None:
   if model is None:
     return model
   try:
-    from _bentoml_sdk.io_models import IOMixin
+    from _bentoml_sdk.io_models import IODescriptor
   except ImportError as err:
     raise RuntimeError('Requires "bentoml>1.2" to use `openllm_core.utils.io_descriptor`') from err
 
-  return pydantic.create_model(f'{model.__class__.__name__}IODescriptor', __base__=(IOMixin, model))
+  return pydantic.create_model(f'{model.__class__.__name__}IODescriptor', __base__=(IODescriptor, model))
 
 
 def api(
diff --git a/openllm-python/src/_openllm_tiny/_entrypoint.py b/openllm-python/src/_openllm_tiny/_entrypoint.py
index f9b71206..7bbb538f 100644
--- a/openllm-python/src/_openllm_tiny/_entrypoint.py
+++ b/openllm-python/src/_openllm_tiny/_entrypoint.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 import os, logging, traceback, pathlib, sys, fs, click, enum, inflection, bentoml, orjson, openllm, openllm_core, platform, typing as t
 from ._helpers import recommended_instance_type
 from openllm_core.utils import (
+  DEBUG,
   DEBUG_ENV_VAR,
   QUIET_ENV_VAR,
   SHOW_CODEGEN,
@@ -286,7 +287,7 @@ def start_command(
     sys.path.insert(0, working_dir)
   service = load('.', working_dir=working_dir)
   service.inject_config()
-  serve_http('.', working_dir=working_dir)
+  serve_http('.', working_dir=working_dir, reload=check_bool_env('RELOAD', default=False), development_mode=DEBUG)
 
 
 def construct_python_options(llm_config, llm_fs):
diff --git a/openllm-python/src/_openllm_tiny/_service.py b/openllm-python/src/_openllm_tiny/_service.py
index a2eb2a90..0f25ef19 100644
--- a/openllm-python/src/_openllm_tiny/_service.py
+++ b/openllm-python/src/_openllm_tiny/_service.py
@@ -26,7 +26,7 @@ except Exception:
 llm_config = core.AutoConfig.for_model(svars.model_name)
 GenerationInput = core.GenerationInput.from_config(llm_config)
 
-app_v1 = FastAPI(debug=True, description='OpenAI Compatible API support')
+app_v1 = FastAPI(description='OpenAI Compatible API support')
 
 
 @bentoml.mount_asgi_app(app_v1)
@@ -160,7 +160,3 @@ class LLMService:
     return ModelList(
       data=[ModelCard(root=core.utils.normalise_model_name(model_id), id=core.utils.normalise_model_name(model_id))]
     )
-
-
-if __name__ == '__main__':
-  LLMService.serve_http(reload=core.utils.check_bool_env('RELOAD', False))