diff --git a/openllm-python/src/_openllm_tiny/Dockerfile.j2 b/openllm-python/src/_openllm_tiny/Dockerfile.j2
new file mode 100644
index 00000000..f2dbdc3d
--- /dev/null
+++ b/openllm-python/src/_openllm_tiny/Dockerfile.j2
@@ -0,0 +1,4 @@
+{% extends bento_base_template %}
+{% block SETUP_BENTO_BASE_IMAGE %}
+{{ super() }}
+{% endblock %}
diff --git a/openllm-python/src/_openllm_tiny/_entrypoint.py b/openllm-python/src/_openllm_tiny/_entrypoint.py
index 55570cd1..f4b57101 100644
--- a/openllm-python/src/_openllm_tiny/_entrypoint.py
+++ b/openllm-python/src/_openllm_tiny/_entrypoint.py
@@ -51,12 +51,6 @@ max_model_len=orjson.loads(coreutils.getenv('max_model_len', default=orjson.dump
 gpu_memory_utilization=orjson.loads(coreutils.getenv('gpu_memory_utilization', default=orjson.dumps({__gpu_memory_utilization__}), var=['GPU_MEMORY_UTILISATION']))
 services_config=orjson.loads(coreutils.getenv('services_config',"""{__services_config__}"""))
 '''
-_DOCKERFILE_TEMPLATE = """\
-{% extends bento_base_template %}
-{% block SETUP_BENTO_BASE_IMAGE %}
-{{ super() }}
-{% endblock %}
-"""
 
 
 class ItemState(enum.Enum):
@@ -235,6 +229,7 @@ def start_command(
   """
   import transformers
 
+  from _bentoml_impl.server import serve_http
   from bentoml._internal.service.loader import load
   from bentoml._internal.log import configure_server_logging
 
@@ -284,9 +279,8 @@ def start_command(
   working_dir = os.path.abspath(os.path.dirname(__file__))
   if sys.path[0] != working_dir:
     sys.path.insert(0, working_dir)
-  load('.', working_dir=working_dir).serve_http(
-    working_dir=working_dir, reload=check_bool_env('RELOAD', default=False), development_mode=DEBUG
-  )
+  load('.', working_dir=working_dir).inject_config()
+  serve_http('.', working_dir=working_dir, reload=check_bool_env('RELOAD', default=False), development_mode=DEBUG)
 
 
 def construct_python_options(llm_config, llm_fs):