refactor(cli): move out to its own packages (#619)

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
2026-03-05 07:36:15 -05:00 · 2023-11-12 18:25:44 -05:00
parent 38a7d2a5b5
commit e0632a85ed
22 changed files with 70 additions and 48 deletions
--- a/openllm-python/pyproject.toml
+++ b/openllm-python/pyproject.toml
@@ -75,14 +75,14 @@ license = "Apache-2.0"
 name = "openllm"
 requires-python = ">=3.8"
 [project.scripts]
-openllm = "openllm.cli.entrypoint:cli"
-openllm-build-base-container = "openllm.cli.extension.build_base_container:cli"
-openllm-dive-bentos = "openllm.cli.extension.dive_bentos:cli"
-openllm-get-containerfile = "openllm.cli.extension.get_containerfile:cli"
-openllm-get-prompt = "openllm.cli.extension.get_prompt:cli"
-openllm-list-bentos = "openllm.cli.extension.list_bentos:cli"
-openllm-list-models = "openllm.cli.extension.list_models:cli"
-openllm-playground = "openllm.cli.extension.playground:cli"
+openllm = "openllm_cli.entrypoint:cli"
+openllm-build-base-container = "openllm_cli.extension.build_base_container:cli"
+openllm-dive-bentos = "openllm_cli.extension.dive_bentos:cli"
+openllm-get-containerfile = "openllm_cli.extension.get_containerfile:cli"
+openllm-get-prompt = "openllm_cli.extension.get_prompt:cli"
+openllm-list-bentos = "openllm_cli.extension.list_bentos:cli"
+openllm-list-models = "openllm_cli.extension.list_models:cli"
+openllm-playground = "openllm_cli.extension.playground:cli"

 [project.urls]
 Blog = "https://modelserving.com"
@@ -136,7 +136,7 @@ root = ".."
 [tool.hatch.metadata]
 allow-direct-references = true
 [tool.hatch.build.targets.wheel]
-only-include = ["src/openllm"]
+only-include = ["src/openllm", "src/openllm_cli"]
 sources = ["src"]
 [tool.hatch.build.targets.sdist]
 exclude = [
--- a/openllm-python/src/openllm/init.py
+++ b/openllm-python/src/openllm/init.py
@@ -14,6 +14,10 @@ import os as _os
 import pathlib as _pathlib
 import warnings as _warnings

+import openllm_cli as _cli
+
+from openllm_cli import _sdk
+
 from . import utils as utils


@@ -55,7 +59,6 @@ __lazy = utils.LazyModule(
    '_strategies': ['CascadingResourceStrategy', 'get_resource'],
    'entrypoints': ['mount_entrypoints'],
    'serialisation': ['ggml', 'transformers'],
-    'cli._sdk': ['start', 'start_grpc', 'build', 'import_model', 'list_models'],
    '_quantisation': ['infer_quantisation_config'],
    '_llm': ['LLM', 'LLMRunner', 'LLMRunnable'],
    '_generation': [
@@ -66,7 +69,15 @@ __lazy = utils.LazyModule(
      'prepare_logits_processor',
    ],
  },
-  extra_objects={'COMPILED': COMPILED},
+  extra_objects={
+    'COMPILED': COMPILED,
+    'cli': _cli,
+    'start': _sdk.start,
+    'start_grpc': _sdk.start_grpc,
+    'build': _sdk.build,
+    'import_model': _sdk.import_model,
+    'list_models': _sdk.list_models,
+  },
 )
 __all__ = __lazy.__all__
 __dir__ = __lazy.__dir__
--- a/openllm-python/src/openllm/init.pyi
+++ b/openllm-python/src/openllm/init.pyi
@@ -1,3 +1,4 @@
+import openllm_cli as _cli
 from openllm_core._configuration import GenerationConfig as GenerationConfig
 from openllm_core._configuration import LLMConfig as LLMConfig
 from openllm_core._configuration import SamplingParams as SamplingParams
@@ -21,7 +22,6 @@ from openllm_core.config import StableLMConfig as StableLMConfig
 from openllm_core.config import StarCoderConfig as StarCoderConfig
 from . import exceptions as exceptions
 from . import bundle as bundle
-from . import cli as cli
 from . import client as client
 from . import playground as playground
 from . import serialisation as serialisation
@@ -39,11 +39,11 @@ from ._llm import LLMRunner as LLMRunner
 from ._quantisation import infer_quantisation_config as infer_quantisation_config
 from ._strategies import CascadingResourceStrategy as CascadingResourceStrategy
 from ._strategies import get_resource as get_resource
-from .cli._sdk import build as build
-from .cli._sdk import import_model as import_model
-from .cli._sdk import list_models as list_models
-from .cli._sdk import start as start
-from .cli._sdk import start_grpc as start_grpc
+from openllm_cli._sdk import build as build
+from openllm_cli._sdk import import_model as import_model
+from openllm_cli._sdk import list_models as list_models
+from openllm_cli._sdk import start as start
+from openllm_cli._sdk import start_grpc as start_grpc
 from .client import AsyncHTTPClient as AsyncHTTPClient
 from .client import HTTPClient as HTTPClient
 from .entrypoints import mount_entrypoints as mount_entrypoints
@@ -51,4 +51,5 @@ from .protocol import openai as openai
 from .serialisation import ggml as ggml
 from .serialisation import transformers as transformers

+cli = _cli
 COMPILED: bool = ...
--- a/openllm-python/src/openllm/main.py
+++ b/openllm-python/src/openllm/main.py
@@ -8,6 +8,6 @@ To start any OpenLLM model:
 """

 if __name__ == '__main__':
-  from openllm.cli.entrypoint import cli
+  from openllm_cli.entrypoint import cli

  cli()
--- a/openllm-python/src/openllm/bundle/_package.py
+++ b/openllm-python/src/openllm/bundle/_package.py
@@ -130,7 +130,7 @@ def construct_docker_options(
  container_registry: LiteralContainerRegistry,
  container_version_strategy: LiteralContainerVersionStrategy,
 ) -> DockerOptions:
-  from openllm.cli._factory import parse_config_options
+  from openllm_cli._factory import parse_config_options

  environ = parse_config_options(llm.config, llm.config['timeout'], 1.0, None, True, os.environ.copy())
  env_dict = {
--- a/openllm-python/src/openllm_cli/init.py
+++ b/openllm-python/src/openllm_cli/init.py
--- a/openllm-python/src/openllm_cli/_factory.py
+++ b/openllm-python/src/openllm_cli/_factory.py
--- a/openllm-python/src/openllm_cli/_sdk.py
+++ b/openllm-python/src/openllm_cli/_sdk.py
@@ -189,7 +189,7 @@ def _build(
  Returns:
      ``bentoml.Bento | str``: BentoLLM instance. This can be used to serve the LLM or can be pushed to BentoCloud.
  """
-  from ..serialisation.transformers.weights import has_safetensors_weights
+  from openllm.serialisation.transformers.weights import has_safetensors_weights

  args: list[str] = [
    sys.executable,
--- a/openllm-python/src/openllm_cli/entrypoint.py
+++ b/openllm-python/src/openllm_cli/entrypoint.py
@@ -172,7 +172,7 @@ class Extensions(click.MultiCommand):

  def get_command(self, ctx: click.Context, cmd_name: str) -> click.Command | None:
    try:
-      mod = __import__(f'openllm.cli.extension.{cmd_name}', None, None, ['cli'])
+      mod = __import__(f'openllm_cli.extension.{cmd_name}', None, None, ['cli'])
    except ImportError:
      return None
    return mod.cli
@@ -345,12 +345,16 @@ class OpenLLMCommandGroup(BentoMLCommandGroup):
          formatter.write_dl(rows)


+_PACKAGE_NAME = 'openllm'
+
+
@click.group(cls=OpenLLMCommandGroup, context_settings=termui.CONTEXT_SETTINGS, name='openllm')
@click.version_option(
  None,
  '--version',
  '-v',
-  message=f'%(prog)s, %(version)s (compiled: {openllm.COMPILED})\nPython ({platform.python_implementation()}) {platform.python_version()}',
+  package_name=_PACKAGE_NAME,
+  message=f'{_PACKAGE_NAME}, %(version)s (compiled: {openllm.COMPILED})\nPython ({platform.python_implementation()}) {platform.python_version()}',
 )
 def cli() -> None:
  """\b
@@ -421,7 +425,7 @@ def start_command(
  adapter_map: dict[str, str] | None = attrs.pop('adapter_map', None)
  prompt_template = prompt_template_file.read() if prompt_template_file is not None else None

-  from ..serialisation.transformers.weights import has_safetensors_weights
+  from openllm.serialisation.transformers.weights import has_safetensors_weights

  serialisation = t.cast(
    LiteralSerialisation,
@@ -545,7 +549,7 @@ def start_grpc_command(
  adapter_map: dict[str, str] | None = attrs.pop('adapter_map', None)
  prompt_template = prompt_template_file.read() if prompt_template_file is not None else None

-  from ..serialisation.transformers.weights import has_safetensors_weights
+  from openllm.serialisation.transformers.weights import has_safetensors_weights

  serialisation = first_not_none(
    serialisation, default='safetensors' if has_safetensors_weights(model_id, model_version) else 'legacy'
@@ -786,7 +790,7 @@ def import_command(
  > only use this option if you want the weight to be quantized by default. Note that OpenLLM also
  > support on-demand quantisation during initial startup.
  """
-  from ..serialisation.transformers.weights import has_safetensors_weights
+  from openllm.serialisation.transformers.weights import has_safetensors_weights

  if model_id in openllm.CONFIG_MAPPING:
    _model_name = model_id
@@ -971,8 +975,8 @@ def build_command(
  > To build the bento with compiled OpenLLM, make sure to prepend HATCH_BUILD_HOOKS_ENABLE=1. Make sure that the deployment
  > target also use the same Python version and architecture as build machine.
  """
-  from .._llm import normalise_model_name
-  from ..serialisation.transformers.weights import has_safetensors_weights
+  from openllm._llm import normalise_model_name
+  from openllm.serialisation.transformers.weights import has_safetensors_weights

  if model_id in openllm.CONFIG_MAPPING:
    _model_name = model_id
@@ -1402,7 +1406,7 @@ def query_command(
    raise click.ClickException("'grpc' is currently disabled.")
  _memoized = {k: orjson.loads(v[0]) for k, v in _memoized.items() if v}
  # TODO: grpc support
-  client = openllm.client.HTTPClient(address=endpoint, timeout=timeout)
+  client = openllm.HTTPClient(address=endpoint, timeout=timeout)
  input_fg, generated_fg = 'magenta', 'cyan'

  if stream:
--- a/openllm-python/src/openllm_cli/extension/init.py
+++ b/openllm-python/src/openllm_cli/extension/init.py
--- a/openllm-python/src/openllm_cli/extension/build_base_container.py
+++ b/openllm-python/src/openllm_cli/extension/build_base_container.py
@@ -6,9 +6,9 @@ import orjson

 import openllm

-from openllm.cli import termui
-from openllm.cli._factory import container_registry_option
-from openllm.cli._factory import machine_option
+from openllm_cli import termui
+from openllm_cli._factory import container_registry_option
+from openllm_cli._factory import machine_option


 if t.TYPE_CHECKING:
--- a/openllm-python/src/openllm_cli/extension/dive_bentos.py
+++ b/openllm-python/src/openllm_cli/extension/dive_bentos.py
@@ -12,9 +12,9 @@ from simple_di import inject
 import bentoml

 from bentoml._internal.configuration.containers import BentoMLContainer
-from openllm.cli import termui
-from openllm.cli._factory import bento_complete_envvar
-from openllm.cli._factory import machine_option
+from openllm_cli import termui
+from openllm_cli._factory import bento_complete_envvar
+from openllm_cli._factory import machine_option


 if t.TYPE_CHECKING:
--- a/openllm-python/src/openllm_cli/extension/get_containerfile.py
+++ b/openllm-python/src/openllm_cli/extension/get_containerfile.py
@@ -12,8 +12,8 @@ from bentoml._internal.bento.bento import BentoInfo
 from bentoml._internal.bento.build_config import DockerOptions
 from bentoml._internal.configuration.containers import BentoMLContainer
 from bentoml._internal.container.generate import generate_containerfile
-from openllm.cli import termui
-from openllm.cli._factory import bento_complete_envvar
+from openllm_cli import termui
+from openllm_cli._factory import bento_complete_envvar
 from openllm_core.utils import converter


--- a/openllm-python/src/openllm_cli/extension/get_prompt.py
+++ b/openllm-python/src/openllm_cli/extension/get_prompt.py
@@ -12,8 +12,8 @@ from bentoml_cli.utils import opt_callback
 import openllm
 import openllm_core

-from openllm.cli import termui
-from openllm.cli._factory import model_complete_envvar
+from openllm_cli import termui
+from openllm_cli._factory import model_complete_envvar
 from openllm_core.prompts import process_prompt


--- a/openllm-python/src/openllm_cli/extension/list_bentos.py
+++ b/openllm-python/src/openllm_cli/extension/list_bentos.py
@@ -8,7 +8,7 @@ import bentoml
 import openllm

 from bentoml._internal.utils import human_readable_size
-from openllm.cli import termui
+from openllm_cli import termui


@click.command('list_bentos', context_settings=termui.CONTEXT_SETTINGS)
--- a/openllm-python/src/openllm_cli/extension/list_models.py
+++ b/openllm-python/src/openllm_cli/extension/list_models.py
@@ -9,9 +9,9 @@ import bentoml
 import openllm

 from bentoml._internal.utils import human_readable_size
-from openllm.cli import termui
-from openllm.cli._factory import model_complete_envvar
-from openllm.cli._factory import model_name_argument
+from openllm_cli import termui
+from openllm_cli._factory import model_complete_envvar
+from openllm_cli._factory import model_name_argument


 if t.TYPE_CHECKING:
--- a/openllm-python/src/openllm_cli/extension/playground.py
+++ b/openllm-python/src/openllm_cli/extension/playground.py
@@ -14,7 +14,7 @@ import nbformat
 import yaml

 from openllm import playground
-from openllm.cli import termui
+from openllm_cli import termui
 from openllm_core.utils import is_jupyter_available
 from openllm_core.utils import is_jupytext_available
 from openllm_core.utils import is_notebook_available
--- a/openllm-python/src/openllm_cli/termui.py
+++ b/openllm-python/src/openllm_cli/termui.py