mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-01-13 01:49:58 -05:00
refactor(cli): move out to its own packages (#619)
Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -75,14 +75,14 @@ license = "Apache-2.0"
|
||||
name = "openllm"
|
||||
requires-python = ">=3.8"
|
||||
[project.scripts]
|
||||
openllm = "openllm.cli.entrypoint:cli"
|
||||
openllm-build-base-container = "openllm.cli.extension.build_base_container:cli"
|
||||
openllm-dive-bentos = "openllm.cli.extension.dive_bentos:cli"
|
||||
openllm-get-containerfile = "openllm.cli.extension.get_containerfile:cli"
|
||||
openllm-get-prompt = "openllm.cli.extension.get_prompt:cli"
|
||||
openllm-list-bentos = "openllm.cli.extension.list_bentos:cli"
|
||||
openllm-list-models = "openllm.cli.extension.list_models:cli"
|
||||
openllm-playground = "openllm.cli.extension.playground:cli"
|
||||
openllm = "openllm_cli.entrypoint:cli"
|
||||
openllm-build-base-container = "openllm_cli.extension.build_base_container:cli"
|
||||
openllm-dive-bentos = "openllm_cli.extension.dive_bentos:cli"
|
||||
openllm-get-containerfile = "openllm_cli.extension.get_containerfile:cli"
|
||||
openllm-get-prompt = "openllm_cli.extension.get_prompt:cli"
|
||||
openllm-list-bentos = "openllm_cli.extension.list_bentos:cli"
|
||||
openllm-list-models = "openllm_cli.extension.list_models:cli"
|
||||
openllm-playground = "openllm_cli.extension.playground:cli"
|
||||
|
||||
[project.urls]
|
||||
Blog = "https://modelserving.com"
|
||||
@@ -136,7 +136,7 @@ root = ".."
|
||||
[tool.hatch.metadata]
|
||||
allow-direct-references = true
|
||||
[tool.hatch.build.targets.wheel]
|
||||
only-include = ["src/openllm"]
|
||||
only-include = ["src/openllm", "src/openllm_cli"]
|
||||
sources = ["src"]
|
||||
[tool.hatch.build.targets.sdist]
|
||||
exclude = [
|
||||
|
||||
@@ -14,6 +14,10 @@ import os as _os
|
||||
import pathlib as _pathlib
|
||||
import warnings as _warnings
|
||||
|
||||
import openllm_cli as _cli
|
||||
|
||||
from openllm_cli import _sdk
|
||||
|
||||
from . import utils as utils
|
||||
|
||||
|
||||
@@ -55,7 +59,6 @@ __lazy = utils.LazyModule(
|
||||
'_strategies': ['CascadingResourceStrategy', 'get_resource'],
|
||||
'entrypoints': ['mount_entrypoints'],
|
||||
'serialisation': ['ggml', 'transformers'],
|
||||
'cli._sdk': ['start', 'start_grpc', 'build', 'import_model', 'list_models'],
|
||||
'_quantisation': ['infer_quantisation_config'],
|
||||
'_llm': ['LLM', 'LLMRunner', 'LLMRunnable'],
|
||||
'_generation': [
|
||||
@@ -66,7 +69,15 @@ __lazy = utils.LazyModule(
|
||||
'prepare_logits_processor',
|
||||
],
|
||||
},
|
||||
extra_objects={'COMPILED': COMPILED},
|
||||
extra_objects={
|
||||
'COMPILED': COMPILED,
|
||||
'cli': _cli,
|
||||
'start': _sdk.start,
|
||||
'start_grpc': _sdk.start_grpc,
|
||||
'build': _sdk.build,
|
||||
'import_model': _sdk.import_model,
|
||||
'list_models': _sdk.list_models,
|
||||
},
|
||||
)
|
||||
__all__ = __lazy.__all__
|
||||
__dir__ = __lazy.__dir__
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import openllm_cli as _cli
|
||||
from openllm_core._configuration import GenerationConfig as GenerationConfig
|
||||
from openllm_core._configuration import LLMConfig as LLMConfig
|
||||
from openllm_core._configuration import SamplingParams as SamplingParams
|
||||
@@ -21,7 +22,6 @@ from openllm_core.config import StableLMConfig as StableLMConfig
|
||||
from openllm_core.config import StarCoderConfig as StarCoderConfig
|
||||
from . import exceptions as exceptions
|
||||
from . import bundle as bundle
|
||||
from . import cli as cli
|
||||
from . import client as client
|
||||
from . import playground as playground
|
||||
from . import serialisation as serialisation
|
||||
@@ -39,11 +39,11 @@ from ._llm import LLMRunner as LLMRunner
|
||||
from ._quantisation import infer_quantisation_config as infer_quantisation_config
|
||||
from ._strategies import CascadingResourceStrategy as CascadingResourceStrategy
|
||||
from ._strategies import get_resource as get_resource
|
||||
from .cli._sdk import build as build
|
||||
from .cli._sdk import import_model as import_model
|
||||
from .cli._sdk import list_models as list_models
|
||||
from .cli._sdk import start as start
|
||||
from .cli._sdk import start_grpc as start_grpc
|
||||
from openllm_cli._sdk import build as build
|
||||
from openllm_cli._sdk import import_model as import_model
|
||||
from openllm_cli._sdk import list_models as list_models
|
||||
from openllm_cli._sdk import start as start
|
||||
from openllm_cli._sdk import start_grpc as start_grpc
|
||||
from .client import AsyncHTTPClient as AsyncHTTPClient
|
||||
from .client import HTTPClient as HTTPClient
|
||||
from .entrypoints import mount_entrypoints as mount_entrypoints
|
||||
@@ -51,4 +51,5 @@ from .protocol import openai as openai
|
||||
from .serialisation import ggml as ggml
|
||||
from .serialisation import transformers as transformers
|
||||
|
||||
cli = _cli
|
||||
COMPILED: bool = ...
|
||||
|
||||
@@ -8,6 +8,6 @@ To start any OpenLLM model:
|
||||
"""
|
||||
|
||||
if __name__ == '__main__':
|
||||
from openllm.cli.entrypoint import cli
|
||||
from openllm_cli.entrypoint import cli
|
||||
|
||||
cli()
|
||||
|
||||
@@ -130,7 +130,7 @@ def construct_docker_options(
|
||||
container_registry: LiteralContainerRegistry,
|
||||
container_version_strategy: LiteralContainerVersionStrategy,
|
||||
) -> DockerOptions:
|
||||
from openllm.cli._factory import parse_config_options
|
||||
from openllm_cli._factory import parse_config_options
|
||||
|
||||
environ = parse_config_options(llm.config, llm.config['timeout'], 1.0, None, True, os.environ.copy())
|
||||
env_dict = {
|
||||
|
||||
@@ -189,7 +189,7 @@ def _build(
|
||||
Returns:
|
||||
``bentoml.Bento | str``: BentoLLM instance. This can be used to serve the LLM or can be pushed to BentoCloud.
|
||||
"""
|
||||
from ..serialisation.transformers.weights import has_safetensors_weights
|
||||
from openllm.serialisation.transformers.weights import has_safetensors_weights
|
||||
|
||||
args: list[str] = [
|
||||
sys.executable,
|
||||
@@ -172,7 +172,7 @@ class Extensions(click.MultiCommand):
|
||||
|
||||
def get_command(self, ctx: click.Context, cmd_name: str) -> click.Command | None:
|
||||
try:
|
||||
mod = __import__(f'openllm.cli.extension.{cmd_name}', None, None, ['cli'])
|
||||
mod = __import__(f'openllm_cli.extension.{cmd_name}', None, None, ['cli'])
|
||||
except ImportError:
|
||||
return None
|
||||
return mod.cli
|
||||
@@ -345,12 +345,16 @@ class OpenLLMCommandGroup(BentoMLCommandGroup):
|
||||
formatter.write_dl(rows)
|
||||
|
||||
|
||||
_PACKAGE_NAME = 'openllm'
|
||||
|
||||
|
||||
@click.group(cls=OpenLLMCommandGroup, context_settings=termui.CONTEXT_SETTINGS, name='openllm')
|
||||
@click.version_option(
|
||||
None,
|
||||
'--version',
|
||||
'-v',
|
||||
message=f'%(prog)s, %(version)s (compiled: {openllm.COMPILED})\nPython ({platform.python_implementation()}) {platform.python_version()}',
|
||||
package_name=_PACKAGE_NAME,
|
||||
message=f'{_PACKAGE_NAME}, %(version)s (compiled: {openllm.COMPILED})\nPython ({platform.python_implementation()}) {platform.python_version()}',
|
||||
)
|
||||
def cli() -> None:
|
||||
"""\b
|
||||
@@ -421,7 +425,7 @@ def start_command(
|
||||
adapter_map: dict[str, str] | None = attrs.pop('adapter_map', None)
|
||||
prompt_template = prompt_template_file.read() if prompt_template_file is not None else None
|
||||
|
||||
from ..serialisation.transformers.weights import has_safetensors_weights
|
||||
from openllm.serialisation.transformers.weights import has_safetensors_weights
|
||||
|
||||
serialisation = t.cast(
|
||||
LiteralSerialisation,
|
||||
@@ -545,7 +549,7 @@ def start_grpc_command(
|
||||
adapter_map: dict[str, str] | None = attrs.pop('adapter_map', None)
|
||||
prompt_template = prompt_template_file.read() if prompt_template_file is not None else None
|
||||
|
||||
from ..serialisation.transformers.weights import has_safetensors_weights
|
||||
from openllm.serialisation.transformers.weights import has_safetensors_weights
|
||||
|
||||
serialisation = first_not_none(
|
||||
serialisation, default='safetensors' if has_safetensors_weights(model_id, model_version) else 'legacy'
|
||||
@@ -786,7 +790,7 @@ def import_command(
|
||||
> only use this option if you want the weight to be quantized by default. Note that OpenLLM also
|
||||
> support on-demand quantisation during initial startup.
|
||||
"""
|
||||
from ..serialisation.transformers.weights import has_safetensors_weights
|
||||
from openllm.serialisation.transformers.weights import has_safetensors_weights
|
||||
|
||||
if model_id in openllm.CONFIG_MAPPING:
|
||||
_model_name = model_id
|
||||
@@ -971,8 +975,8 @@ def build_command(
|
||||
> To build the bento with compiled OpenLLM, make sure to prepend HATCH_BUILD_HOOKS_ENABLE=1. Make sure that the deployment
|
||||
> target also use the same Python version and architecture as build machine.
|
||||
"""
|
||||
from .._llm import normalise_model_name
|
||||
from ..serialisation.transformers.weights import has_safetensors_weights
|
||||
from openllm._llm import normalise_model_name
|
||||
from openllm.serialisation.transformers.weights import has_safetensors_weights
|
||||
|
||||
if model_id in openllm.CONFIG_MAPPING:
|
||||
_model_name = model_id
|
||||
@@ -1402,7 +1406,7 @@ def query_command(
|
||||
raise click.ClickException("'grpc' is currently disabled.")
|
||||
_memoized = {k: orjson.loads(v[0]) for k, v in _memoized.items() if v}
|
||||
# TODO: grpc support
|
||||
client = openllm.client.HTTPClient(address=endpoint, timeout=timeout)
|
||||
client = openllm.HTTPClient(address=endpoint, timeout=timeout)
|
||||
input_fg, generated_fg = 'magenta', 'cyan'
|
||||
|
||||
if stream:
|
||||
@@ -6,9 +6,9 @@ import orjson
|
||||
|
||||
import openllm
|
||||
|
||||
from openllm.cli import termui
|
||||
from openllm.cli._factory import container_registry_option
|
||||
from openllm.cli._factory import machine_option
|
||||
from openllm_cli import termui
|
||||
from openllm_cli._factory import container_registry_option
|
||||
from openllm_cli._factory import machine_option
|
||||
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
@@ -12,9 +12,9 @@ from simple_di import inject
|
||||
import bentoml
|
||||
|
||||
from bentoml._internal.configuration.containers import BentoMLContainer
|
||||
from openllm.cli import termui
|
||||
from openllm.cli._factory import bento_complete_envvar
|
||||
from openllm.cli._factory import machine_option
|
||||
from openllm_cli import termui
|
||||
from openllm_cli._factory import bento_complete_envvar
|
||||
from openllm_cli._factory import machine_option
|
||||
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
@@ -12,8 +12,8 @@ from bentoml._internal.bento.bento import BentoInfo
|
||||
from bentoml._internal.bento.build_config import DockerOptions
|
||||
from bentoml._internal.configuration.containers import BentoMLContainer
|
||||
from bentoml._internal.container.generate import generate_containerfile
|
||||
from openllm.cli import termui
|
||||
from openllm.cli._factory import bento_complete_envvar
|
||||
from openllm_cli import termui
|
||||
from openllm_cli._factory import bento_complete_envvar
|
||||
from openllm_core.utils import converter
|
||||
|
||||
|
||||
@@ -12,8 +12,8 @@ from bentoml_cli.utils import opt_callback
|
||||
import openllm
|
||||
import openllm_core
|
||||
|
||||
from openllm.cli import termui
|
||||
from openllm.cli._factory import model_complete_envvar
|
||||
from openllm_cli import termui
|
||||
from openllm_cli._factory import model_complete_envvar
|
||||
from openllm_core.prompts import process_prompt
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ import bentoml
|
||||
import openllm
|
||||
|
||||
from bentoml._internal.utils import human_readable_size
|
||||
from openllm.cli import termui
|
||||
from openllm_cli import termui
|
||||
|
||||
|
||||
@click.command('list_bentos', context_settings=termui.CONTEXT_SETTINGS)
|
||||
@@ -9,9 +9,9 @@ import bentoml
|
||||
import openllm
|
||||
|
||||
from bentoml._internal.utils import human_readable_size
|
||||
from openllm.cli import termui
|
||||
from openllm.cli._factory import model_complete_envvar
|
||||
from openllm.cli._factory import model_name_argument
|
||||
from openllm_cli import termui
|
||||
from openllm_cli._factory import model_complete_envvar
|
||||
from openllm_cli._factory import model_name_argument
|
||||
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
@@ -14,7 +14,7 @@ import nbformat
|
||||
import yaml
|
||||
|
||||
from openllm import playground
|
||||
from openllm.cli import termui
|
||||
from openllm_cli import termui
|
||||
from openllm_core.utils import is_jupyter_available
|
||||
from openllm_core.utils import is_jupytext_available
|
||||
from openllm_core.utils import is_notebook_available
|
||||
Reference in New Issue
Block a user