fix(cli): avoid runtime __origin__ check for older Python (#798)

fix(cli): avoid runtime __origin__ on older Python Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
2026-05-01 12:22:43 -04:00 · 2023-12-18 12:33:36 -05:00
parent 37dbccc3d7
commit 5d27337e82
1 changed files with 3 additions and 3 deletions
--- a/openllm-python/src/openllm_cli/entrypoint.py
+++ b/openllm-python/src/openllm_cli/entrypoint.py
@@ -363,7 +363,7 @@ def start_command(
    elif dtype not in {'float', 'float32'}:
      logger.warning('"bfloat16" and "half" are not supported on CPU. OpenLLM will default fallback to "float32".')
    dtype = 'float'  # we need to cast back to full precision if cuda is not available
-  llm = openllm.LLM[t.Any, t.Any](
+  llm = openllm.LLM(
    model_id=model_id,
    model_version=model_version,
    backend=backend,
@@ -590,7 +590,7 @@ def import_command(
      fg='yellow',
    )

-  llm = openllm.LLM[t.Any, t.Any](
+  llm = openllm.LLM(
    model_id=model_id,
    model_version=model_version,
    quantize=quantize,
@@ -769,7 +769,7 @@ def build_command(
  if backend == 'pt':
    logger.warning("PyTorch backend is deprecated and will be removed from the next releases. Will set default backend to 'vllm' instead.")

-  llm = openllm.LLM[t.Any, t.Any](
+  llm = openllm.LLM(
    model_id=model_id,
    model_version=model_version,
    backend=backend,