refactor: packages (#249)

2026-04-23 08:28:24 -04:00 · 2023-08-22 08:55:46 -04:00
parent a964e659c1
commit 3ffb25a872
148 changed files with 2899 additions and 1937 deletions
--- a/openllm-python/src/openllm/cli/_factory.py
+++ b/openllm-python/src/openllm/cli/_factory.py
@@ -1,16 +1,16 @@
 from __future__ import annotations
-import functools, importlib.util, os, typing as t, logging
-import click, click_option_group as cog, inflection, orjson, bentoml, openllm
+import functools, importlib.util, os, typing as t, logging, click, click_option_group as cog, inflection, orjson, bentoml, openllm
+from click import shell_completion as sc
 from bentoml_cli.utils import BentoMLCommandGroup
 from click.shell_completion import CompletionItem
-from openllm.utils import DEBUG
+from openllm_core.utils import DEBUG
 from bentoml._internal.configuration.containers import BentoMLContainer
-from openllm._typing_compat import LiteralString, DictStrAny, ParamSpec, Concatenate
+from openllm_core._typing_compat import LiteralString, DictStrAny, ParamSpec, Concatenate
 from . import termui

 if t.TYPE_CHECKING:
  import subprocess
-  from openllm._configuration import LLMConfig
+  from openllm_core._configuration import LLMConfig

 logger = logging.getLogger(__name__)

@@ -20,6 +20,12 @@ LiteralOutput = t.Literal["json", "pretty", "porcelain"]
 _AnyCallable = t.Callable[..., t.Any]
 FC = t.TypeVar("FC", bound=t.Union[_AnyCallable, click.Command])

+def bento_complete_envvar(ctx: click.Context, param: click.Parameter, incomplete: str) -> list[sc.CompletionItem]:
+  return [sc.CompletionItem(str(it.tag), help="Bento") for it in bentoml.list() if str(it.tag).startswith(incomplete) and all(k in it.info.labels for k in {"start_name", "bundler"})]
+
+def model_complete_envvar(ctx: click.Context, param: click.Parameter, incomplete: str) -> list[sc.CompletionItem]:
+  return [sc.CompletionItem(inflection.dasherize(it), help="Model") for it in openllm.CONFIG_MAPPING if it.startswith(incomplete)]
+
 def parse_config_options(config: LLMConfig, server_timeout: int, workers_per_resource: float, device: t.Tuple[str, ...] | None, cors: bool, environ: DictStrAny) -> DictStrAny:
  # TODO: Support amd.com/gpu on k8s
  _bentoml_config_options_env = environ.pop("BENTOML_CONFIG_OPTIONS", "")
@@ -316,7 +322,7 @@ def cors_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[[FC
 def machine_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[[FC], FC]: return cli_option("--machine", is_flag=True, default=False, hidden=True, **attrs)(f)
 def model_id_option(f: _AnyCallable | None = None, *, model_env: openllm.utils.EnvVarMixin | None = None, **attrs: t.Any) -> t.Callable[[FC], FC]: return cli_option("--model-id", type=click.STRING, default=None, envvar=model_env.model_id if model_env is not None else None, show_envvar=model_env is not None, help="Optional model_id name or path for (fine-tune) weight.", **attrs)(f)
 def model_version_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[[FC], FC]: return cli_option("--model-version", type=click.STRING, default=None, help="Optional model version to save for this model. It will be inferred automatically from model-id.", **attrs)(f)
-def model_name_argument(f: _AnyCallable | None = None, required: bool = True) -> t.Callable[[FC], FC]: return cli_argument("model_name", type=click.Choice([inflection.dasherize(name) for name in openllm.CONFIG_MAPPING]), required=required)(f)
+def model_name_argument(f: _AnyCallable | None = None, required: bool = True, **attrs: t.Any) -> t.Callable[[FC], FC]: return cli_argument("model_name", type=click.Choice([inflection.dasherize(name) for name in openllm.CONFIG_MAPPING]), required=required, **attrs)(f)
 def quantize_option(f: _AnyCallable | None = None, *, build: bool = False, model_env: openllm.utils.EnvVarMixin | None = None, **attrs: t.Any) -> t.Callable[[FC], FC]:
  return cli_option(
      "--quantise", "--quantize", "quantize", type=click.Choice(["int8", "int4", "gptq"]), default=None, envvar=model_env.quantize if model_env is not None else None, show_envvar=model_env is not None, help="""Dynamic quantization for running this LLM.
@@ -382,7 +388,7 @@ def serialisation_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Cal
  )(f)
 def container_registry_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[[FC], FC]:
  return cli_option(
-      "--container-registry", "container_registry", type=str, default="ecr", show_default=True, show_envvar=True, envvar="OPENLLM_CONTAINER_REGISTRY", callback=container_registry_callback, help="""The default container registry to get the base image for building BentoLLM.
+      "--container-registry", "container_registry", type=click.Choice(list(openllm.bundle.CONTAINER_NAMES)), default="ecr", show_default=True, show_envvar=True, envvar="OPENLLM_CONTAINER_REGISTRY", callback=container_registry_callback, help="""The default container registry to get the base image for building BentoLLM.

                                                                                                                        Currently, it supports 'ecr', 'ghcr.io', 'docker.io'

--- a/openllm-python/src/openllm/cli/_sdk.py
+++ b/openllm-python/src/openllm/cli/_sdk.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
-import itertools, logging, os, re, subprocess, sys, typing as t
-import bentoml, openllm
+import itertools, logging, os, re, subprocess, sys, typing as t, bentoml, openllm, openllm_core
 from simple_di import Provide, inject
 from bentoml._internal.configuration.containers import BentoMLContainer
 from openllm.exceptions import OpenLLMException
@@ -8,10 +7,9 @@ from . import termui
 from ._factory import start_command_factory

 if t.TYPE_CHECKING:
-  from openllm._typing_compat import LiteralString, LiteralRuntime
+  from openllm_core._configuration import LLMConfig
+  from openllm_core._typing_compat import LiteralString, LiteralRuntime, LiteralContainerRegistry, LiteralContainerVersionStrategy
  from bentoml._internal.bento import BentoStore
-  from openllm._configuration import LLMConfig
-  from openllm.bundle.oci import LiteralContainerRegistry, LiteralContainerVersionStrategy

 logger = logging.getLogger(__name__)

@@ -58,7 +56,7 @@ def _start(model_name: str, /, *, model_id: str | None = None, timeout: int = 30
  """
  from .entrypoint import start_command, start_grpc_command
  llm_config = openllm.AutoConfig.for_model(model_name)
-  _ModelEnv = openllm.utils.EnvVarMixin(model_name, openllm.utils.first_not_none(framework, default=llm_config.default_implementation()), model_id=model_id, bettertransformer=bettertransformer, quantize=quantize, runtime=runtime)
+  _ModelEnv = openllm_core.utils.EnvVarMixin(model_name, openllm_core.utils.first_not_none(framework, default=llm_config.default_implementation()), model_id=model_id, bettertransformer=bettertransformer, quantize=quantize, runtime=runtime)
  os.environ[_ModelEnv.framework] = _ModelEnv["framework_value"]

  args: list[str] = ["--runtime", runtime]
@@ -203,5 +201,5 @@ def _list_models() -> dict[str, t.Any]:
  return models_command.main(args=["-o", "json", "--show-available", "--machine"], standalone_mode=False)


-start, start_grpc, build, import_model, list_models = openllm.utils.codegen.gen_sdk(_start, _serve_grpc=False), openllm.utils.codegen.gen_sdk(_start, _serve_grpc=True), openllm.utils.codegen.gen_sdk(_build), openllm.utils.codegen.gen_sdk(_import_model), openllm.utils.codegen.gen_sdk(_list_models)
+start, start_grpc, build, import_model, list_models = openllm_core.utils.codegen.gen_sdk(_start, _serve_grpc=False), openllm_core.utils.codegen.gen_sdk(_start, _serve_grpc=True), openllm_core.utils.codegen.gen_sdk(_build), openllm_core.utils.codegen.gen_sdk(_import_model), openllm_core.utils.codegen.gen_sdk(_list_models)
 __all__ = ["start", "start_grpc", "build", "import_model", "list_models"]
--- a/openllm-python/src/openllm/cli/entrypoint.py
+++ b/openllm-python/src/openllm/cli/entrypoint.py
@@ -20,10 +20,9 @@ bentomodel = openllm.import_model("falcon", model_id='tiiuae/falcon-7b-instruct'
 ```
 """
 from __future__ import annotations
-import functools, http.client, inspect, itertools, logging, os, platform, re, subprocess, sys, time, traceback, typing as t
-import attr, click, click_option_group as cog, fs, fs.copy, fs.errors, inflection, orjson, bentoml, openllm
-from bentoml_cli.utils import BentoMLCommandGroup, opt_callback
+import functools, http.client, inspect, itertools, logging, os, platform, re, subprocess, sys, time, traceback, typing as t, attr, click, click_option_group as cog, fs, fs.copy, fs.errors, inflection, orjson, bentoml, openllm
 from simple_di import Provide, inject
+from bentoml_cli.utils import BentoMLCommandGroup, opt_callback
 from bentoml._internal.configuration.containers import BentoMLContainer
 from bentoml._internal.models.model import ModelStore
 from . import termui
@@ -56,8 +55,8 @@ from openllm.models.auto import (
  AutoConfig,
  AutoLLM,
 )
-from openllm._typing_compat import DictStrAny, ParamSpec, Concatenate, LiteralString, Self, LiteralRuntime
-from openllm.utils import (
+from openllm_core._typing_compat import DictStrAny, ParamSpec, Concatenate, LiteralString, Self, LiteralRuntime
+from openllm_core.utils import (
  DEBUG,
  DEBUG_ENV_VAR,
  OPTIONAL_DEPENDENCIES,
@@ -72,21 +71,20 @@ from openllm.utils import (
  first_not_none,
  get_debug_mode,
  get_quiet_mode,
-  infer_auto_class,
  is_torch_available,
  is_transformers_supports_agent,
  resolve_user_filepath,
  set_debug_mode,
  set_quiet_mode,
 )
+from openllm.utils import infer_auto_class

 if t.TYPE_CHECKING:
  import torch
  from bentoml._internal.bento import BentoStore
  from bentoml._internal.container import DefaultBuilder
-  from openllm.client import BaseClient
-  from openllm._schema import EmbeddingsOutput
-  from openllm.bundle.oci import LiteralContainerRegistry, LiteralContainerVersionStrategy
+  from openllm_core._schema import EmbeddingsOutput
+  from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy
 else: torch = LazyLoader("torch", globals(), "torch")

 P = ParamSpec("P")
@@ -271,7 +269,7 @@ def cli() -> None:
  \b
  An open platform for operating large language models in production.
  Fine-tune, serve, deploy, and monitor any LLMs with ease.
-  """  # noqa: D205
+  """

@cli.group(cls=OpenLLMCommandGroup, context_settings=termui.CONTEXT_SETTINGS, name="start", aliases=["start-http"])
 def start_command() -> None:
@@ -670,10 +668,8 @@ def instruct_command(endpoint: str, timeout: int, agent: LiteralString, output:
  """
  client = openllm.client.HTTPClient(endpoint, timeout=timeout)

-  try:
-    client.call("metadata")
-  except http.client.BadStatusLine:
-    raise click.ClickException(f"{endpoint} is neither a HTTP server nor reachable.") from None
+  try: client.call("metadata")
+  except http.client.BadStatusLine: raise click.ClickException(f"{endpoint} is neither a HTTP server nor reachable.") from None
  if agent == "hf":
    if not is_transformers_supports_agent(): raise click.UsageError("Transformers version should be at least 4.29 to support HfAgent. Upgrade with 'pip install -U transformers'")
    _memoized = {k: v[0] for k, v in _memoized.items() if v}
@@ -700,7 +696,7 @@ def embed_command(ctx: click.Context, text: tuple[str, ...], endpoint: str, time
  $ openllm embed --endpoint http://12.323.2.1:3000 "What is the meaning of life?" "How many stars are there in the sky?"
  ```
  """
-  client = t.cast("BaseClient[t.Any]", openllm.client.HTTPClient(endpoint, timeout=timeout) if server_type == "http" else openllm.client.GrpcClient(endpoint, timeout=timeout))
+  client = openllm.client.HTTPClient(endpoint, timeout=timeout) if server_type == "http" else openllm.client.GrpcClient(endpoint, timeout=timeout)
  try:
    gen_embed = client.embed(text)
  except ValueError:
@@ -733,14 +729,14 @@ def query_command(ctx: click.Context, /, prompt: str, endpoint: str, timeout: in
  """
  _memoized = {k: orjson.loads(v[0]) for k, v in _memoized.items() if v}
  if server_type == "grpc": endpoint = re.sub(r"http://", "", endpoint)
-  client = t.cast("BaseClient[t.Any]", openllm.client.HTTPClient(endpoint, timeout=timeout) if server_type == "http" else openllm.client.GrpcClient(endpoint, timeout=timeout))
+  client = openllm.client.HTTPClient(endpoint, timeout=timeout) if server_type == "http" else openllm.client.GrpcClient(endpoint, timeout=timeout)
  input_fg, generated_fg = "magenta", "cyan"
  if output != "porcelain":
    termui.echo("==Input==\n", fg="white")
    termui.echo(f"{prompt}", fg=input_fg)
  res = client.query(prompt, return_response="raw", **{**client.configuration, **_memoized})
  if output == "pretty":
-    response = client.llm.postprocess_generate(prompt, res["responses"])
+    response = client.config.postprocess_generate(prompt, res["responses"])
    termui.echo("\n\n==Responses==\n", fg="white")
    termui.echo(response, fg=generated_fg)
  elif output == "json":
--- a/openllm-python/src/openllm/cli/extension/build_base_container.py
+++ b/openllm-python/src/openllm/cli/extension/build_base_container.py
@@ -1,37 +1,26 @@
-
 from __future__ import annotations
-import typing as t
-
-import click
-import orjson
-
-import openllm
-
-from .. import termui
-from .._factory import machine_option
-
-if t.TYPE_CHECKING:
-  from openllm.bundle.oci import LiteralContainerRegistry, LiteralContainerVersionStrategy
-
+import typing as t, click, orjson, openllm
+from openllm.cli import termui
+from openllm.cli._factory import machine_option, container_registry_option
+if t.TYPE_CHECKING: from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy
@click.command(
    "build_base_container", context_settings=termui.CONTEXT_SETTINGS, help="""Base image builder for BentoLLM.

                By default, the base image will include custom kernels (PagedAttention via vllm, FlashAttention-v2, etc.) built with CUDA 11.8, Python 3.9 on Ubuntu22.04.
-
                Optionally, this can also be pushed directly to remote registry. Currently support ``docker.io``, ``ghcr.io`` and ``quay.io``.

+                \b
                If '--machine' is passed, then it will run the process quietly, and output a JSON to the current running terminal.
-
                This command is only useful for debugging and for building custom base image for extending BentoML with custom base images and custom kernels.

                Note that we already release images on our CI to ECR and GHCR, so you don't need to build it yourself.
                """
 )
-@click.option("--registry", multiple=True, type=click.Choice(list(openllm.bundle.CONTAINER_NAMES)), help="Target registry to create image tag on.", default=None)
+@container_registry_option
@click.option("--version-strategy", type=click.Choice(["release", "latest", "nightly"]), default="nightly", help="Version strategy to use for tagging the image.")
@click.option("--push/--no-push", help="Whether to push to remote repository", is_flag=True, default=False)
@machine_option
-def cli(registry: tuple[LiteralContainerRegistry, ...] | None, version_strategy: LiteralContainerVersionStrategy, push: bool, machine: bool) -> dict[str, str]:
-  mapping = openllm.bundle.build_container(registry, version_strategy, push, machine)
+def cli(container_registry: tuple[LiteralContainerRegistry, ...] | None, version_strategy: LiteralContainerVersionStrategy, push: bool, machine: bool) -> dict[str, str]:
+  mapping = openllm.bundle.build_container(container_registry, version_strategy, push, machine)
  if machine: termui.echo(orjson.dumps(mapping, option=orjson.OPT_INDENT_2).decode(), fg="white")
  return mapping
--- a/openllm-python/src/openllm/cli/extension/dive_bentos.py
+++ b/openllm-python/src/openllm/cli/extension/dive_bentos.py
@@ -1,24 +1,16 @@
-
 from __future__ import annotations
-import shutil
-import subprocess
-import typing as t
-
-import click
-import psutil
+import shutil, subprocess, typing as t, click, psutil, bentoml
 from simple_di import Provide, inject
-
-import bentoml
 from bentoml._internal.configuration.containers import BentoMLContainer

-from .. import termui
+from openllm.cli import termui
+from openllm.cli._factory import bento_complete_envvar, machine_option

-if t.TYPE_CHECKING:
-  from bentoml._internal.bento import BentoStore
+if t.TYPE_CHECKING: from bentoml._internal.bento import BentoStore

@click.command("dive_bentos", context_settings=termui.CONTEXT_SETTINGS)
-@click.argument("bento", type=str)
-@click.option("--machine", is_flag=True, default=False, hidden=True)
+@click.argument("bento", type=str, shell_complete=bento_complete_envvar)
+@machine_option
@click.pass_context
@inject
 def cli(ctx: click.Context, bento: str, machine: bool, _bento_store: BentoStore = Provide[BentoMLContainer.bento_store]) -> str | None:
@@ -32,5 +24,5 @@ def cli(ctx: click.Context, bento: str, machine: bool, _bento_store: BentoStore
  if machine: return bentomodel.path
  # copy and paste this into a new shell
  if psutil.WINDOWS: subprocess.check_call([shutil.which("dir") or "dir"], cwd=bentomodel.path)
-  else: subprocess.check_call([shutil.which("tree") or "tree"], cwd=bentomodel.path)
+  else: subprocess.check_call([shutil.which("ls") or "ls", "-Rrthla"], cwd=bentomodel.path)
  ctx.exit(0)
--- a/openllm-python/src/openllm/cli/extension/get_containerfile.py
+++ b/openllm-python/src/openllm/cli/extension/get_containerfile.py
@@ -1,24 +1,18 @@
-
 from __future__ import annotations
-import typing as t
-
-import click
+import typing as t, click, bentoml
 from simple_di import Provide, inject
-
-import bentoml
 from bentoml._internal.bento.bento import BentoInfo
 from bentoml._internal.bento.build_config import DockerOptions
 from bentoml._internal.configuration.containers import BentoMLContainer
 from bentoml._internal.container.generate import generate_containerfile
+from openllm.cli import termui
+from openllm.cli._factory import bento_complete_envvar
+from openllm_core.utils import bentoml_cattr

-from .. import termui
-from ...utils import bentoml_cattr
-
-if t.TYPE_CHECKING:
-  from bentoml._internal.bento import BentoStore
+if t.TYPE_CHECKING: from bentoml._internal.bento import BentoStore

@click.command("get_containerfile", context_settings=termui.CONTEXT_SETTINGS, help="Return Containerfile of any given Bento.")
-@click.argument("bento", type=str)
+@click.argument("bento", type=str, shell_complete=bento_complete_envvar)
@click.pass_context
@inject
 def cli(ctx: click.Context, bento: str, _bento_store: BentoStore = Provide[BentoMLContainer.bento_store]) -> str:
--- a/openllm-python/src/openllm/cli/extension/get_prompt.py
+++ b/openllm-python/src/openllm/cli/extension/get_prompt.py
@@ -1,25 +1,18 @@
-
 from __future__ import annotations
-import typing as t
-
-import click
-import inflection
-import orjson
+import typing as t, click, inflection, orjson, openllm
 from bentoml_cli.utils import opt_callback
-
-import openllm
-
-from .. import termui
-from ..._prompt import process_prompt
+from openllm.cli import termui
+from openllm.cli._factory import model_complete_envvar, output_option, machine_option
+from openllm_core._prompt import process_prompt

 LiteralOutput = t.Literal["json", "pretty", "porcelain"]

@click.command("get_prompt", context_settings=termui.CONTEXT_SETTINGS)
-@click.argument("model_name", type=click.Choice([inflection.dasherize(name) for name in openllm.CONFIG_MAPPING.keys()]))
+@click.argument("model_name", type=click.Choice([inflection.dasherize(name) for name in openllm.CONFIG_MAPPING.keys()]), shell_complete=model_complete_envvar)
@click.argument("prompt", type=click.STRING)
-@click.option("-o", "--output", "output", type=click.Choice(["json", "pretty", "porcelain"]), default="pretty", help="Showing output type.", show_default=True, envvar="OPENLLM_OUTPUT", show_envvar=True)
+@output_option
@click.option("--format", type=click.STRING, default=None)
-@click.option("--machine", is_flag=True, default=False, hidden=True)
+@machine_option
@click.option("--opt", help="Define additional prompt variables. (format: ``--opt system_prompt='You are a useful assistant'``)", required=False, multiple=True, callback=opt_callback, metavar="ARG=VALUE[,ARG=VALUE]")
@click.pass_context
 def cli(ctx: click.Context, /, model_name: str, prompt: str, format: str | None, output: LiteralOutput, machine: bool, _memoized: dict[str, t.Any], **_: t.Any) -> str | None:
--- a/openllm-python/src/openllm/cli/extension/list_bentos.py
+++ b/openllm-python/src/openllm/cli/extension/list_bentos.py
@@ -1,16 +1,8 @@
-
 from __future__ import annotations
-
-import click
-import inflection
-import orjson
-
-import bentoml
-import openllm
+import click, inflection, orjson, bentoml, openllm
 from bentoml._internal.utils import human_readable_size
-
-from .. import termui
-from .._factory import LiteralOutput, output_option
+from openllm.cli import termui
+from openllm.cli._factory import LiteralOutput, output_option

@click.command("list_bentos", context_settings=termui.CONTEXT_SETTINGS)
@output_option(default_value="json")
--- a/openllm-python/src/openllm/cli/extension/list_models.py
+++ b/openllm-python/src/openllm/cli/extension/list_models.py
@@ -1,14 +1,13 @@
 from __future__ import annotations
 import typing as t, bentoml, openllm, orjson, inflection ,click
-from bentoml._internal.utils import human_readable_size
-
 from openllm.cli import termui
-from openllm.cli._factory import LiteralOutput, model_name_argument, output_option
+from bentoml._internal.utils import human_readable_size
+from openllm.cli._factory import LiteralOutput, model_name_argument, output_option, model_complete_envvar

-if t.TYPE_CHECKING: from openllm._typing_compat import DictStrAny
+if t.TYPE_CHECKING: from openllm_core._typing_compat import DictStrAny

@click.command("list_models", context_settings=termui.CONTEXT_SETTINGS)
-@model_name_argument(required=False)
+@model_name_argument(required=False, shell_complete=model_complete_envvar)
@output_option(default_value="json")
 def cli(model_name: str | None, output: LiteralOutput) -> DictStrAny:
  """This is equivalent to openllm models --show-available less the nice table."""
--- a/openllm-python/src/openllm/cli/extension/playground.py
+++ b/openllm-python/src/openllm/cli/extension/playground.py
@@ -1,13 +1,12 @@
 from __future__ import annotations
-import importlib.machinery, logging, os, pkgutil, subprocess, sys, tempfile, typing as t
-import click, yaml
+import importlib.machinery, logging, os, pkgutil, subprocess, sys, tempfile, typing as t, click, yaml
 from openllm.cli import termui
 from openllm import playground
-from openllm.utils import is_jupyter_available, is_jupytext_available, is_notebook_available
+from openllm_core.utils import is_jupyter_available, is_jupytext_available, is_notebook_available

 if t.TYPE_CHECKING:
  import jupytext, nbformat
-  from openllm._typing_compat import DictStrAny
+  from openllm_core._typing_compat import DictStrAny

 logger = logging.getLogger(__name__)

@@ -38,7 +37,7 @@ def cli(ctx: click.Context, output_dir: str | None, port: int) -> None:
  \b
  > [!NOTE]
  > This command requires Jupyter to be installed. Install it with 'pip install "openllm[playground]"'
-  """  # noqa: D301
+  """
  if not is_jupyter_available() or not is_jupytext_available() or not is_notebook_available():
    raise RuntimeError("Playground requires 'jupyter', 'jupytext', and 'notebook'. Install it with 'pip install \"openllm[playground]\"'")
  metadata = load_notebook_metadata()
--- a/openllm-python/src/openllm/cli/termui.py
+++ b/openllm-python/src/openllm/cli/termui.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 import os, typing as t, click, inflection, openllm
-if t.TYPE_CHECKING: from openllm._typing_compat import DictStrAny
+if t.TYPE_CHECKING: from openllm_core._typing_compat import DictStrAny

 def echo(text: t.Any, fg: str = "green", _with_style: bool = True, **attrs: t.Any) -> None:
  attrs["fg"] = fg if not openllm.utils.get_debug_mode() else None