diff --git a/openllm-python/src/openllm_cli/entrypoint.py b/openllm-python/src/openllm_cli/entrypoint.py
index 28c07ba2..6506dabd 100644
--- a/openllm-python/src/openllm_cli/entrypoint.py
+++ b/openllm-python/src/openllm_cli/entrypoint.py
@@ -363,7 +363,7 @@ def start_command(
     elif dtype not in {'float', 'float32'}:
       logger.warning('"bfloat16" and "half" are not supported on CPU. OpenLLM will default fallback to "float32".')
     dtype = 'float'  # we need to cast back to full precision if cuda is not available
-  llm = openllm.LLM[t.Any, t.Any](
+  llm = openllm.LLM(
     model_id=model_id,
     model_version=model_version,
     backend=backend,
@@ -590,7 +590,7 @@ def import_command(
       fg='yellow',
     )
 
-  llm = openllm.LLM[t.Any, t.Any](
+  llm = openllm.LLM(
     model_id=model_id,
     model_version=model_version,
     quantize=quantize,
@@ -769,7 +769,7 @@ def build_command(
   if backend == 'pt':
     logger.warning("PyTorch backend is deprecated and will be removed from the next releases. Will set default backend to 'vllm' instead.")
 
-  llm = openllm.LLM[t.Any, t.Any](
+  llm = openllm.LLM(
     model_id=model_id,
     model_version=model_version,
     backend=backend,