refactor(cli): move out to its own packages (#619)

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
Aaron Pham
2023-11-12 18:25:44 -05:00
committed by GitHub
parent 38a7d2a5b5
commit e0632a85ed
22 changed files with 70 additions and 48 deletions

View File

@@ -14,6 +14,10 @@ import os as _os
import pathlib as _pathlib
import warnings as _warnings
import openllm_cli as _cli
from openllm_cli import _sdk
from . import utils as utils
@@ -55,7 +59,6 @@ __lazy = utils.LazyModule(
'_strategies': ['CascadingResourceStrategy', 'get_resource'],
'entrypoints': ['mount_entrypoints'],
'serialisation': ['ggml', 'transformers'],
'cli._sdk': ['start', 'start_grpc', 'build', 'import_model', 'list_models'],
'_quantisation': ['infer_quantisation_config'],
'_llm': ['LLM', 'LLMRunner', 'LLMRunnable'],
'_generation': [
@@ -66,7 +69,15 @@ __lazy = utils.LazyModule(
'prepare_logits_processor',
],
},
extra_objects={'COMPILED': COMPILED},
extra_objects={
'COMPILED': COMPILED,
'cli': _cli,
'start': _sdk.start,
'start_grpc': _sdk.start_grpc,
'build': _sdk.build,
'import_model': _sdk.import_model,
'list_models': _sdk.list_models,
},
)
__all__ = __lazy.__all__
__dir__ = __lazy.__dir__

View File

@@ -1,3 +1,4 @@
import openllm_cli as _cli
from openllm_core._configuration import GenerationConfig as GenerationConfig
from openllm_core._configuration import LLMConfig as LLMConfig
from openllm_core._configuration import SamplingParams as SamplingParams
@@ -21,7 +22,6 @@ from openllm_core.config import StableLMConfig as StableLMConfig
from openllm_core.config import StarCoderConfig as StarCoderConfig
from . import exceptions as exceptions
from . import bundle as bundle
from . import cli as cli
from . import client as client
from . import playground as playground
from . import serialisation as serialisation
@@ -39,11 +39,11 @@ from ._llm import LLMRunner as LLMRunner
from ._quantisation import infer_quantisation_config as infer_quantisation_config
from ._strategies import CascadingResourceStrategy as CascadingResourceStrategy
from ._strategies import get_resource as get_resource
from .cli._sdk import build as build
from .cli._sdk import import_model as import_model
from .cli._sdk import list_models as list_models
from .cli._sdk import start as start
from .cli._sdk import start_grpc as start_grpc
from openllm_cli._sdk import build as build
from openllm_cli._sdk import import_model as import_model
from openllm_cli._sdk import list_models as list_models
from openllm_cli._sdk import start as start
from openllm_cli._sdk import start_grpc as start_grpc
from .client import AsyncHTTPClient as AsyncHTTPClient
from .client import HTTPClient as HTTPClient
from .entrypoints import mount_entrypoints as mount_entrypoints
@@ -51,4 +51,5 @@ from .protocol import openai as openai
from .serialisation import ggml as ggml
from .serialisation import transformers as transformers
cli = _cli
COMPILED: bool = ...

View File

@@ -8,6 +8,6 @@ To start any OpenLLM model:
"""
if __name__ == '__main__':
from openllm.cli.entrypoint import cli
from openllm_cli.entrypoint import cli
cli()

View File

@@ -130,7 +130,7 @@ def construct_docker_options(
container_registry: LiteralContainerRegistry,
container_version_strategy: LiteralContainerVersionStrategy,
) -> DockerOptions:
from openllm.cli._factory import parse_config_options
from openllm_cli._factory import parse_config_options
environ = parse_config_options(llm.config, llm.config['timeout'], 1.0, None, True, os.environ.copy())
env_dict = {

View File

@@ -1,4 +0,0 @@
"""OpenLLM CLI.
For more information see ``openllm -h``.
"""

View File

@@ -1,501 +0,0 @@
from __future__ import annotations
import functools
import logging
import os
import typing as t
import click
import click_option_group as cog
import inflection
from bentoml_cli.utils import BentoMLCommandGroup
from click import ClickException
from click import shell_completion as sc
import bentoml
import openllm
from openllm_core._configuration import LLMConfig
from openllm_core._typing_compat import Concatenate
from openllm_core._typing_compat import DictStrAny
from openllm_core._typing_compat import LiteralBackend
from openllm_core._typing_compat import LiteralQuantise
from openllm_core._typing_compat import LiteralSerialisation
from openllm_core._typing_compat import ParamSpec
from openllm_core._typing_compat import get_literal_args
from openllm_core.utils import DEBUG
class _OpenLLM_GenericInternalConfig(LLMConfig):
__config__ = {
'name_type': 'lowercase',
'default_id': 'openllm/generic',
'model_ids': ['openllm/generic'],
'architecture': 'PreTrainedModel',
}
class GenerationConfig:
top_k: int = 15
top_p: float = 0.9
temperature: float = 0.75
max_new_tokens: int = 128
logger = logging.getLogger(__name__)
P = ParamSpec('P')
LiteralOutput = t.Literal['json', 'pretty', 'porcelain']
_AnyCallable = t.Callable[..., t.Any]
FC = t.TypeVar('FC', bound=t.Union[_AnyCallable, click.Command])
def bento_complete_envvar(ctx: click.Context, param: click.Parameter, incomplete: str) -> list[sc.CompletionItem]:
return [
sc.CompletionItem(str(it.tag), help='Bento')
for it in bentoml.list()
if str(it.tag).startswith(incomplete) and all(k in it.info.labels for k in {'start_name', 'bundler'})
]
def model_complete_envvar(ctx: click.Context, param: click.Parameter, incomplete: str) -> list[sc.CompletionItem]:
return [
sc.CompletionItem(inflection.dasherize(it), help='Model')
for it in openllm.CONFIG_MAPPING
if it.startswith(incomplete)
]
def parse_config_options(
config: LLMConfig,
server_timeout: int,
workers_per_resource: float,
device: t.Tuple[str, ...] | None,
cors: bool,
environ: DictStrAny,
) -> DictStrAny:
# TODO: Support amd.com/gpu on k8s
_bentoml_config_options_env = environ.pop('BENTOML_CONFIG_OPTIONS', '')
_bentoml_config_options_opts = [
'tracing.sample_rate=1.0',
'api_server.max_runner_connections=25',
f'runners."llm-{config["start_name"]}-runner".batching.max_batch_size=128',
f'api_server.traffic.timeout={server_timeout}',
f'runners."llm-{config["start_name"]}-runner".traffic.timeout={config["timeout"]}',
f'runners."llm-{config["start_name"]}-runner".workers_per_resource={workers_per_resource}',
]
if device:
if len(device) > 1:
_bentoml_config_options_opts.extend(
[
f'runners."llm-{config["start_name"]}-runner".resources."nvidia.com/gpu"[{idx}]={dev}'
for idx, dev in enumerate(device)
]
)
else:
_bentoml_config_options_opts.append(
f'runners."llm-{config["start_name"]}-runner".resources."nvidia.com/gpu"=[{device[0]}]'
)
if cors:
_bentoml_config_options_opts.extend(
['api_server.http.cors.enabled=true', 'api_server.http.cors.access_control_allow_origins="*"']
)
_bentoml_config_options_opts.extend(
[
f'api_server.http.cors.access_control_allow_methods[{idx}]="{it}"'
for idx, it in enumerate(['GET', 'OPTIONS', 'POST', 'HEAD', 'PUT'])
]
)
_bentoml_config_options_env += ' ' if _bentoml_config_options_env else '' + ' '.join(_bentoml_config_options_opts)
environ['BENTOML_CONFIG_OPTIONS'] = _bentoml_config_options_env
if DEBUG:
logger.debug('Setting BENTOML_CONFIG_OPTIONS=%s', _bentoml_config_options_env)
return environ
_adapter_mapping_key = 'adapter_map'
def _id_callback(ctx: click.Context, _: click.Parameter, value: t.Tuple[str, ...] | None) -> None:
if not value:
return None
if _adapter_mapping_key not in ctx.params:
ctx.params[_adapter_mapping_key] = {}
for v in value:
adapter_id, *adapter_name = v.rsplit(':', maxsplit=1)
# try to resolve the full path if users pass in relative,
# currently only support one level of resolve path with current directory
try:
adapter_id = openllm.utils.resolve_user_filepath(adapter_id, os.getcwd())
except FileNotFoundError:
pass
if len(adapter_name) == 0:
raise ClickException(f'Adapter name is required for {adapter_id}')
ctx.params[_adapter_mapping_key][adapter_id] = adapter_name[0]
return None
def start_decorator(serve_grpc: bool = False) -> t.Callable[[FC], t.Callable[[FC], FC]]:
def wrapper(fn: FC) -> t.Callable[[FC], FC]:
composed = openllm.utils.compose(
_OpenLLM_GenericInternalConfig().to_click_options,
_http_server_args if not serve_grpc else _grpc_server_args,
cog.optgroup.group('General LLM Options', help='The following options are related to running LLM Server.'),
model_version_option(factory=cog.optgroup),
system_message_option(factory=cog.optgroup),
prompt_template_file_option(factory=cog.optgroup),
cog.optgroup.option('--server-timeout', type=int, default=None, help='Server timeout in seconds'),
workers_per_resource_option(factory=cog.optgroup),
cors_option(factory=cog.optgroup),
backend_option(factory=cog.optgroup),
cog.optgroup.group(
'LLM Optimization Options',
help="""Optimization related options.
OpenLLM supports running model k-bit quantization (8-bit, 4-bit), GPTQ quantization, PagedAttention via vLLM.
The following are either in our roadmap or currently being worked on:
- DeepSpeed Inference: [link](https://www.deepspeed.ai/inference/)
- GGML: Fast inference on [bare metal](https://github.com/ggerganov/ggml)
""",
),
quantize_option(factory=cog.optgroup),
serialisation_option(factory=cog.optgroup),
cog.optgroup.option(
'--device',
type=openllm.utils.dantic.CUDA,
multiple=True,
envvar='CUDA_VISIBLE_DEVICES',
callback=parse_device_callback,
help='Assign GPU devices (if available)',
show_envvar=True,
),
cog.optgroup.group(
'Fine-tuning related options',
help="""\
Note that the argument `--adapter-id` can accept the following format:
- `--adapter-id /path/to/adapter` (local adapter)
- `--adapter-id remote/adapter` (remote adapter from HuggingFace Hub)
- `--adapter-id remote/adapter:eng_lora` (two previous adapter options with the given adapter_name)
```bash
$ openllm start opt --adapter-id /path/to/adapter_dir --adapter-id remote/adapter:eng_lora
```
""",
),
cog.optgroup.option(
'--adapter-id',
default=None,
help='Optional name or path for given LoRA adapter',
multiple=True,
callback=_id_callback,
metavar='[PATH | [remote/][adapter_name:]adapter_id][, ...]',
),
click.option('--return-process', is_flag=True, default=False, help='Internal use only.', hidden=True),
)
return composed(fn)
return wrapper
def parse_device_callback(
ctx: click.Context, param: click.Parameter, value: tuple[tuple[str], ...] | None
) -> t.Tuple[str, ...] | None:
if value is None:
return value
if not isinstance(value, tuple):
ctx.fail(f'{param} only accept multiple values, not {type(value)} (value: {value})')
el: t.Tuple[str, ...] = tuple(i for k in value for i in k)
# NOTE: --device all is a special case
if len(el) == 1 and el[0] == 'all':
return tuple(map(str, openllm.utils.available_devices()))
return el
# NOTE: A list of bentoml option that is not needed for parsing.
# NOTE: User shouldn't set '--working-dir', as OpenLLM will setup this.
# NOTE: production is also deprecated
_IGNORED_OPTIONS = {'working_dir', 'production', 'protocol_version'}
def parse_serve_args(serve_grpc: bool) -> t.Callable[[t.Callable[..., LLMConfig]], t.Callable[[FC], FC]]:
"""Parsing `bentoml serve|serve-grpc` click.Option to be parsed via `openllm start`."""
from bentoml_cli.cli import cli
command = 'serve' if not serve_grpc else 'serve-grpc'
group = cog.optgroup.group(
f"Start a {'HTTP' if not serve_grpc else 'gRPC'} server options",
help=f"Related to serving the model [synonymous to `bentoml {'serve-http' if not serve_grpc else command }`]",
)
def decorator(f: t.Callable[Concatenate[int, t.Optional[str], P], LLMConfig]) -> t.Callable[[FC], FC]:
serve_command = cli.commands[command]
# The first variable is the argument bento
# The last five is from BentoMLCommandGroup.NUMBER_OF_COMMON_PARAMS
serve_options = [
p
for p in serve_command.params[1 : -BentoMLCommandGroup.NUMBER_OF_COMMON_PARAMS]
if p.name not in _IGNORED_OPTIONS
]
for options in reversed(serve_options):
attrs = options.to_info_dict()
# we don't need param_type_name, since it should all be options
attrs.pop('param_type_name')
# name is not a valid args
attrs.pop('name')
# type can be determine from default value
attrs.pop('type')
param_decls = (*attrs.pop('opts'), *attrs.pop('secondary_opts'))
f = cog.optgroup.option(*param_decls, **attrs)(f)
return group(f)
return decorator
_http_server_args, _grpc_server_args = parse_serve_args(False), parse_serve_args(True)
def _click_factory_type(*param_decls: t.Any, **attrs: t.Any) -> t.Callable[[FC | None], FC]:
"""General ``@click`` decorator with some sauce.
This decorator extends the default ``@click.option`` plus a factory option and factory attr to
provide type-safe click.option or click.argument wrapper for all compatible factory.
"""
factory = attrs.pop('factory', click)
factory_attr = attrs.pop('attr', 'option')
if factory_attr != 'argument':
attrs.setdefault('help', 'General option for OpenLLM CLI.')
def decorator(f: FC | None) -> FC:
callback = getattr(factory, factory_attr, None)
if callback is None:
raise ValueError(f'Factory {factory} has no attribute {factory_attr}.')
return t.cast(FC, callback(*param_decls, **attrs)(f) if f is not None else callback(*param_decls, **attrs))
return decorator
cli_option = functools.partial(_click_factory_type, attr='option')
cli_argument = functools.partial(_click_factory_type, attr='argument')
def cors_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[[FC], FC]:
return cli_option(
'--cors/--no-cors',
show_default=True,
default=False,
envvar='OPENLLM_CORS',
show_envvar=True,
help='Enable CORS for the server.',
**attrs,
)(f)
def machine_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[[FC], FC]:
return cli_option('--machine', is_flag=True, default=False, hidden=True, **attrs)(f)
def model_id_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[[FC], FC]:
return cli_option(
'--model-id',
type=click.STRING,
default=None,
envvar='OPENLLM_MODEL_ID',
show_envvar=True,
help='Optional model_id name or path for (fine-tune) weight.',
**attrs,
)(f)
def model_version_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[[FC], FC]:
return cli_option(
'--model-version',
type=click.STRING,
default=None,
help='Optional model version to save for this model. It will be inferred automatically from model-id.',
**attrs,
)(f)
def system_message_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[[FC], FC]:
return cli_option(
'--system-message',
type=click.STRING,
default=None,
envvar='OPENLLM_SYSTEM_MESSAGE',
help='Optional system message for supported LLMs. If given LLM supports system message, OpenLLM will provide a default system message.',
**attrs,
)(f)
def prompt_template_file_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[[FC], FC]:
return cli_option(
'--prompt-template-file',
type=click.File(),
default=None,
help='Optional file path containing user-defined custom prompt template. By default, the prompt template for the specified LLM will be used.',
**attrs,
)(f)
def backend_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[[FC], FC]:
# NOTE: LiteralBackend needs to remove the last two item as ggml and mlc is wip
# XXX: remove the check for __args__ once we have ggml and mlc supports
return cli_option(
'--backend',
type=click.Choice(get_literal_args(LiteralBackend)[:2]),
default=None,
envvar='OPENLLM_BACKEND',
show_envvar=True,
help='The implementation for saving this LLM.',
**attrs,
)(f)
def model_name_argument(f: _AnyCallable | None = None, required: bool = True, **attrs: t.Any) -> t.Callable[[FC], FC]:
return cli_argument(
'model_name',
type=click.Choice([inflection.dasherize(name) for name in openllm.CONFIG_MAPPING]),
required=required,
**attrs,
)(f)
def quantize_option(f: _AnyCallable | None = None, *, build: bool = False, **attrs: t.Any) -> t.Callable[[FC], FC]:
return cli_option(
'--quantise',
'--quantize',
'quantize',
type=click.Choice(get_literal_args(LiteralQuantise)),
default=None,
envvar='OPENLLM_QUANTIZE',
show_envvar=True,
help="""Dynamic quantization for running this LLM.
The following quantization strategies are supported:
- ``int8``: ``LLM.int8`` for [8-bit](https://arxiv.org/abs/2208.07339) quantization.
- ``int4``: ``SpQR`` for [4-bit](https://arxiv.org/abs/2306.03078) quantization.
- ``gptq``: ``GPTQ`` [quantization](https://arxiv.org/abs/2210.17323)
> [!NOTE] that the model can also be served with quantized weights.
"""
+ (
"""
> [!NOTE] that this will set the mode for serving within deployment."""
if build
else ''
)
+ """
> [!NOTE] that quantization are currently only available in *PyTorch* models.""",
**attrs,
)(f)
def workers_per_resource_option(
f: _AnyCallable | None = None, *, build: bool = False, **attrs: t.Any
) -> t.Callable[[FC], FC]:
return cli_option(
'--workers-per-resource',
default=None,
callback=workers_per_resource_callback,
type=str,
required=False,
help="""Number of workers per resource assigned.
See https://docs.bentoml.org/en/latest/guides/scheduling.html#resource-scheduling-strategy
for more information. By default, this is set to 1.
> [!NOTE] ``--workers-per-resource`` will also accept the following strategies:
- ``round_robin``: Similar behaviour when setting ``--workers-per-resource 1``. This is useful for smaller models.
- ``conserved``: This will determine the number of available GPU resources, and only assign one worker for the LLMRunner. For example, if ther are 4 GPUs available, then ``conserved`` is equivalent to ``--workers-per-resource 0.25``.
"""
+ (
"""\n
> [!NOTE] The workers value passed into 'build' will determine how the LLM can
> be provisioned in Kubernetes as well as in standalone container. This will
> ensure it has the same effect with 'openllm start --api-workers ...'"""
if build
else ''
),
**attrs,
)(f)
def serialisation_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[[FC], FC]:
return cli_option(
'--serialisation',
'--serialization',
'serialisation',
type=click.Choice(get_literal_args(LiteralSerialisation)),
default=None,
show_default=True,
show_envvar=True,
envvar='OPENLLM_SERIALIZATION',
help="""Serialisation format for save/load LLM.
Currently the following strategies are supported:
- ``safetensors``: This will use safetensors format, which is synonymous to ``safe_serialization=True``.
> [!NOTE] Safetensors might not work for every cases, and you can always fallback to ``legacy`` if needed.
- ``legacy``: This will use PyTorch serialisation format, often as ``.bin`` files. This should be used if the model doesn't yet support safetensors.
""",
**attrs,
)(f)
def container_registry_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[[FC], FC]:
return cli_option(
'--container-registry',
'container_registry',
type=click.Choice(list(openllm.bundle.CONTAINER_NAMES)),
default='ecr',
show_default=True,
show_envvar=True,
envvar='OPENLLM_CONTAINER_REGISTRY',
callback=container_registry_callback,
help='The default container registry to get the base image for building BentoLLM. Currently, it supports ecr, ghcr, docker',
**attrs,
)(f)
_wpr_strategies = {'round_robin', 'conserved'}
def workers_per_resource_callback(ctx: click.Context, param: click.Parameter, value: str | None) -> str | None:
if value is None:
return value
value = inflection.underscore(value)
if value in _wpr_strategies:
return value
else:
try:
float(value) # type: ignore[arg-type]
except ValueError:
raise click.BadParameter(
f"'workers_per_resource' only accept '{_wpr_strategies}' as possible strategies, otherwise pass in float.",
ctx,
param,
) from None
else:
return value
def container_registry_callback(ctx: click.Context, param: click.Parameter, value: str | None) -> str | None:
if value is None:
return value
if value not in openllm.bundle.supported_registries:
raise click.BadParameter(f'Value must be one of {openllm.bundle.supported_registries}', ctx, param)
return value

View File

@@ -1,330 +0,0 @@
from __future__ import annotations
import itertools
import logging
import os
import re
import subprocess
import sys
import typing as t
import orjson
from simple_di import Provide
from simple_di import inject
import bentoml
import openllm_core
from bentoml._internal.configuration.containers import BentoMLContainer
from openllm_core._typing_compat import LiteralSerialisation
from openllm_core.exceptions import OpenLLMException
from openllm_core.utils import WARNING_ENV_VAR
from openllm_core.utils import codegen
from openllm_core.utils import first_not_none
from openllm_core.utils import get_disable_warnings
from openllm_core.utils import is_vllm_available
if t.TYPE_CHECKING:
from bentoml._internal.bento import BentoStore
from openllm_core._configuration import LLMConfig
from openllm_core._typing_compat import LiteralBackend
from openllm_core._typing_compat import LiteralContainerRegistry
from openllm_core._typing_compat import LiteralContainerVersionStrategy
from openllm_core._typing_compat import LiteralQuantise
from openllm_core._typing_compat import LiteralString
logger = logging.getLogger(__name__)
def _start(
model_id: str,
timeout: int = 30,
workers_per_resource: t.Literal['conserved', 'round_robin'] | float | None = None,
device: tuple[str, ...] | t.Literal['all'] | None = None,
quantize: LiteralQuantise | None = None,
system_message: str | None = None,
prompt_template_file: str | None = None,
adapter_map: dict[LiteralString, str | None] | None = None,
backend: LiteralBackend | None = None,
additional_args: list[str] | None = None,
cors: bool = False,
_serve_grpc: bool = False,
__test__: bool = False,
**_: t.Any,
) -> LLMConfig | subprocess.Popen[bytes]:
"""Python API to start a LLM server. These provides one-to-one mapping to CLI arguments.
For all additional arguments, pass it as string to ``additional_args``. For example, if you want to
pass ``--port 5001``, you can pass ``additional_args=["--port", "5001"]``
> [!NOTE] This will create a blocking process, so if you use this API, you can create a running sub thread
> to start the server instead of blocking the main thread.
``openllm.start`` will invoke ``click.Command`` under the hood, so it behaves exactly the same as the CLI interaction.
Args:
model_id: The model id to start this LLMServer
timeout: The server timeout
system_message: Optional system message for supported LLMs. If given LLM supports system message, OpenLLM will provide a default system message.
prompt_template_file: Optional file path containing user-defined custom prompt template. By default, the prompt template for the specified LLM will be used..
workers_per_resource: Number of workers per resource assigned.
See [resource scheduling](https://docs.bentoml.org/en/latest/guides/scheduling.html#resource-scheduling-strategy)
for more information. By default, this is set to 1.
> [!NOTE] ``--workers-per-resource`` will also accept the following strategies:
> - ``round_robin``: Similar behaviour when setting ``--workers-per-resource 1``. This is useful for smaller models.
> - ``conserved``: This will determine the number of available GPU resources, and only assign
> one worker for the LLMRunner. For example, if ther are 4 GPUs available, then ``conserved`` is
> equivalent to ``--workers-per-resource 0.25``.
device: Assign GPU devices (if available) to this LLM. By default, this is set to ``None``. It also accepts 'all'
argument to assign all available GPUs to this LLM.
quantize: Quantize the model weights. This is only applicable for PyTorch models.
Possible quantisation strategies:
- int8: Quantize the model with 8bit (bitsandbytes required)
- int4: Quantize the model with 4bit (bitsandbytes required)
- gptq: Quantize the model with GPTQ (auto-gptq required)
cors: Whether to enable CORS for this LLM. By default, this is set to ``False``.
adapter_map: The adapter mapping of LoRA to use for this LLM. It accepts a dictionary of ``{adapter_id: adapter_name}``.
backend: The backend to use for this LLM. By default, this is set to ``pt``.
additional_args: Additional arguments to pass to ``openllm start``.
"""
from .entrypoint import start_command
from .entrypoint import start_grpc_command
os.environ['OPENLLM_BACKEND'] = openllm_core.utils.first_not_none(
backend, default='vllm' if is_vllm_available() else 'pt'
)
args: list[str] = [model_id]
if system_message:
args.extend(['--system-message', system_message])
if prompt_template_file:
args.extend(['--prompt-template-file', openllm_core.utils.resolve_filepath(prompt_template_file)])
if timeout:
args.extend(['--server-timeout', str(timeout)])
if workers_per_resource:
args.extend(
[
'--workers-per-resource',
str(workers_per_resource) if not isinstance(workers_per_resource, str) else workers_per_resource,
]
)
if device and not os.environ.get('CUDA_VISIBLE_DEVICES'):
args.extend(['--device', ','.join(device)])
if quantize:
args.extend(['--quantize', str(quantize)])
if cors:
args.append('--cors')
if adapter_map:
args.extend(
list(
itertools.chain.from_iterable([['--adapter-id', f"{k}{':'+v if v else ''}"] for k, v in adapter_map.items()])
)
)
if additional_args:
args.extend(additional_args)
if __test__:
args.append('--return-process')
cmd = start_command if not _serve_grpc else start_grpc_command
return cmd.main(args=args, standalone_mode=False)
@inject
def _build(
model_id: str,
model_version: str | None = None,
bento_version: str | None = None,
quantize: LiteralQuantise | None = None,
adapter_map: dict[str, str | None] | None = None,
system_message: str | None = None,
prompt_template_file: str | None = None,
build_ctx: str | None = None,
enable_features: tuple[str, ...] | None = None,
dockerfile_template: str | None = None,
overwrite: bool = False,
container_registry: LiteralContainerRegistry | None = None,
container_version_strategy: LiteralContainerVersionStrategy | None = None,
push: bool = False,
force_push: bool = False,
containerize: bool = False,
serialisation: LiteralSerialisation | None = None,
additional_args: list[str] | None = None,
bento_store: BentoStore = Provide[BentoMLContainer.bento_store],
) -> bentoml.Bento:
"""Package a LLM into a BentoLLM.
The LLM will be built into a BentoService with the following structure:
if ``quantize`` is passed, it will instruct the model to be quantized dynamically during serving time.
``openllm.build`` will invoke ``click.Command`` under the hood, so it behaves exactly the same as ``openllm build`` CLI.
Args:
model_id: The model id to build this BentoLLM
model_version: Optional model version for this given LLM
bento_version: Optional bento veresion for this given BentoLLM
system_message: Optional system message for supported LLMs. If given LLM supports system message, OpenLLM will provide a default system message.
prompt_template_file: Optional file path containing user-defined custom prompt template. By default, the prompt template for the specified LLM will be used..
quantize: Quantize the model weights. This is only applicable for PyTorch models.
Possible quantisation strategies:
- int8: Quantize the model with 8bit (bitsandbytes required)
- int4: Quantize the model with 4bit (bitsandbytes required)
- gptq: Quantize the model with GPTQ (auto-gptq required)
adapter_map: The adapter mapping of LoRA to use for this LLM. It accepts a dictionary of ``{adapter_id: adapter_name}``.
build_ctx: The build context to use for building BentoLLM. By default, it sets to current directory.
enable_features: Additional OpenLLM features to be included with this BentoLLM.
dockerfile_template: The dockerfile template to use for building BentoLLM. See https://docs.bentoml.com/en/latest/guides/containerization.html#dockerfile-template.
overwrite: Whether to overwrite the existing BentoLLM. By default, this is set to ``False``.
push: Whether to push the result bento to BentoCloud. Make sure to login with 'bentoml cloud login' first.
containerize: Whether to containerize the Bento after building. '--containerize' is the shortcut of 'openllm build && bentoml containerize'.
Note that 'containerize' and 'push' are mutually exclusive
container_registry: Container registry to choose the base OpenLLM container image to build from. Default to ECR.
container_registry: Container registry to choose the base OpenLLM container image to build from. Default to ECR.
container_version_strategy: The container version strategy. Default to the latest release of OpenLLM.
serialisation: Serialisation for saving models. Default to 'safetensors', which is equivalent to `safe_serialization=True`
additional_args: Additional arguments to pass to ``openllm build``.
bento_store: Optional BentoStore for saving this BentoLLM. Default to the default BentoML local store.
Returns:
``bentoml.Bento | str``: BentoLLM instance. This can be used to serve the LLM or can be pushed to BentoCloud.
"""
from ..serialisation.transformers.weights import has_safetensors_weights
args: list[str] = [
sys.executable,
'-m',
'openllm',
'build',
model_id,
'--machine',
'--serialisation',
first_not_none(
serialisation, default='safetensors' if has_safetensors_weights(model_id, model_version) else 'legacy'
),
]
if quantize:
args.extend(['--quantize', quantize])
if containerize and push:
raise OpenLLMException("'containerize' and 'push' are currently mutually exclusive.")
if push:
args.extend(['--push'])
if containerize:
args.extend(['--containerize'])
if build_ctx:
args.extend(['--build-ctx', build_ctx])
if enable_features:
args.extend([f'--enable-features={f}' for f in enable_features])
if overwrite:
args.append('--overwrite')
if system_message:
args.extend(['--system-message', system_message])
if prompt_template_file:
args.extend(['--prompt-template-file', openllm_core.utils.resolve_filepath(prompt_template_file)])
if adapter_map:
args.extend([f"--adapter-id={k}{':'+v if v is not None else ''}" for k, v in adapter_map.items()])
if model_version:
args.extend(['--model-version', model_version])
if bento_version:
args.extend(['--bento-version', bento_version])
if dockerfile_template:
args.extend(['--dockerfile-template', dockerfile_template])
if container_registry is None:
container_registry = 'ecr'
if container_version_strategy is None:
container_version_strategy = 'release'
args.extend(['--container-registry', container_registry, '--container-version-strategy', container_version_strategy])
if additional_args:
args.extend(additional_args)
if force_push:
args.append('--force-push')
current_disable_warning = get_disable_warnings()
os.environ[WARNING_ENV_VAR] = str(True)
try:
output = subprocess.check_output(args, env=os.environ.copy(), cwd=build_ctx or os.getcwd())
except subprocess.CalledProcessError as e:
logger.error("Exception caught while building Bento for '%s'", model_id, exc_info=e)
if e.stderr:
raise OpenLLMException(e.stderr.decode('utf-8')) from None
raise OpenLLMException(str(e)) from None
matched = re.match(r'__object__:(\{.*\})$', output.decode('utf-8').strip())
if matched is None:
raise ValueError(
f"Failed to find tag from output: {output.decode('utf-8').strip()}\nNote: Output from 'openllm build' might not be correct. Please open an issue on GitHub."
)
os.environ[WARNING_ENV_VAR] = str(current_disable_warning)
try:
result = orjson.loads(matched.group(1))
except orjson.JSONDecodeError as e:
raise ValueError(
f"Failed to decode JSON from output: {output.decode('utf-8').strip()}\nNote: Output from 'openllm build' might not be correct. Please open an issue on GitHub."
) from e
return bentoml.get(result['tag'], _bento_store=bento_store)
def _import_model(
model_id: str,
model_version: str | None = None,
backend: LiteralBackend | None = None,
quantize: LiteralQuantise | None = None,
serialisation: LiteralSerialisation | None = None,
additional_args: t.Sequence[str] | None = None,
) -> dict[str, t.Any]:
"""Import a LLM into local store.
> [!NOTE]
> If ``quantize`` is passed, the model weights will be saved as quantized weights. You should
> only use this option if you want the weight to be quantized by default. Note that OpenLLM also
> support on-demand quantisation during initial startup.
``openllm.import_model`` will invoke ``click.Command`` under the hood, so it behaves exactly the same as the CLI ``openllm import``.
> [!NOTE]
> ``openllm.start`` will automatically invoke ``openllm.import_model`` under the hood.
Args:
model_id: required model id for this given LLM
model_version: Optional model version for this given LLM
backend: The backend to use for this LLM. By default, this is set to ``pt``.
quantize: Quantize the model weights. This is only applicable for PyTorch models.
Possible quantisation strategies:
- int8: Quantize the model with 8bit (bitsandbytes required)
- int4: Quantize the model with 4bit (bitsandbytes required)
- gptq: Quantize the model with GPTQ (auto-gptq required)
serialisation: Type of model format to save to local store. If set to 'safetensors', then OpenLLM will save model using safetensors. Default behaviour is similar to ``safe_serialization=False``.
additional_args: Additional arguments to pass to ``openllm import``.
Returns:
``bentoml.Model``:BentoModel of the given LLM. This can be used to serve the LLM or can be pushed to BentoCloud.
"""
from .entrypoint import import_command
args = [model_id, '--quiet']
if backend is not None:
args.extend(['--backend', backend])
if model_version is not None:
args.extend(['--model-version', str(model_version)])
if quantize is not None:
args.extend(['--quantize', quantize])
if serialisation is not None:
args.extend(['--serialisation', serialisation])
if additional_args is not None:
args.extend(additional_args)
return import_command.main(args=args, standalone_mode=False)
def _list_models() -> dict[str, t.Any]:
"""List all available models within the local store."""
from .entrypoint import models_command
return models_command.main(args=['--show-available', '--quiet'], standalone_mode=False)
start, start_grpc = codegen.gen_sdk(_start, _serve_grpc=False), codegen.gen_sdk(_start, _serve_grpc=True)
build, import_model, list_models = (
codegen.gen_sdk(_build),
codegen.gen_sdk(_import_model),
codegen.gen_sdk(_list_models),
)
__all__ = ['start', 'start_grpc', 'build', 'import_model', 'list_models']

View File

File diff suppressed because it is too large Load Diff

View File

@@ -1,16 +0,0 @@
"""OpenLLM CLI Extension.
The following directory contains all possible extensions for OpenLLM CLI
For adding new extension, just simply name that ext to `<name_ext>.py` and define
a ``click.command()`` with the following format:
```python
import click
@click.command(<name_ext>)
...
def cli(...): # <- this is important here, it should always name CLI in order for the extension resolver to know how to import this extensions.
```
NOTE: Make sure to keep this file blank such that it won't mess with the import order.
"""

View File

@@ -1,52 +0,0 @@
from __future__ import annotations
import typing as t
import click
import orjson
import openllm
from openllm.cli import termui
from openllm.cli._factory import container_registry_option
from openllm.cli._factory import machine_option
if t.TYPE_CHECKING:
from openllm_core._typing_compat import LiteralContainerRegistry
from openllm_core._typing_compat import LiteralContainerVersionStrategy
@click.command(
'build_base_container',
context_settings=termui.CONTEXT_SETTINGS,
help="""Base image builder for BentoLLM.
By default, the base image will include custom kernels (PagedAttention via vllm, FlashAttention-v2, etc.) built with CUDA 11.8, Python 3.9 on Ubuntu22.04.
Optionally, this can also be pushed directly to remote registry. Currently support ``docker.io``, ``ghcr.io`` and ``quay.io``.
\b
If '--machine' is passed, then it will run the process quietly, and output a JSON to the current running terminal.
This command is only useful for debugging and for building custom base image for extending BentoML with custom base images and custom kernels.
Note that we already release images on our CI to ECR and GHCR, so you don't need to build it yourself.
""",
)
@container_registry_option
@click.option(
'--version-strategy',
type=click.Choice(['release', 'latest', 'nightly']),
default='nightly',
help='Version strategy to use for tagging the image.',
)
@click.option('--push/--no-push', help='Whether to push to remote repository', is_flag=True, default=False)
@machine_option
def cli(
container_registry: tuple[LiteralContainerRegistry, ...] | None,
version_strategy: LiteralContainerVersionStrategy,
push: bool,
machine: bool,
) -> dict[str, str]:
mapping = openllm.bundle.build_container(container_registry, version_strategy, push, machine)
if machine:
termui.echo(orjson.dumps(mapping, option=orjson.OPT_INDENT_2).decode(), fg='white')
return mapping

View File

@@ -1,48 +0,0 @@
from __future__ import annotations
import shutil
import subprocess
import typing as t
import click
import psutil
from simple_di import Provide
from simple_di import inject
import bentoml
from bentoml._internal.configuration.containers import BentoMLContainer
from openllm.cli import termui
from openllm.cli._factory import bento_complete_envvar
from openllm.cli._factory import machine_option
if t.TYPE_CHECKING:
from bentoml._internal.bento import BentoStore
@click.command('dive_bentos', context_settings=termui.CONTEXT_SETTINGS)
@click.argument('bento', type=str, shell_complete=bento_complete_envvar)
@machine_option
@click.pass_context
@inject
def cli(
ctx: click.Context, bento: str, machine: bool, _bento_store: BentoStore = Provide[BentoMLContainer.bento_store]
) -> str | None:
"""Dive into a BentoLLM. This is synonymous to cd $(b get <bento>:<tag> -o path)."""
try:
bentomodel = _bento_store.get(bento)
except bentoml.exceptions.NotFound:
ctx.fail(f'Bento {bento} not found. Make sure to call `openllm build` first.')
if 'bundler' not in bentomodel.info.labels or bentomodel.info.labels['bundler'] != 'openllm.bundle':
ctx.fail(
f"Bento is either too old or not built with OpenLLM. Make sure to use ``openllm build {bentomodel.info.labels['start_name']}`` for correctness."
)
if machine:
return bentomodel.path
# copy and paste this into a new shell
if psutil.WINDOWS:
subprocess.check_call([shutil.which('dir') or 'dir'], cwd=bentomodel.path)
else:
subprocess.check_call([shutil.which('ls') or 'ls', '-Rrthla'], cwd=bentomodel.path)
ctx.exit(0)

View File

@@ -1,57 +0,0 @@
from __future__ import annotations
import typing as t
import click
from simple_di import Provide
from simple_di import inject
import bentoml
from bentoml._internal.bento.bento import BentoInfo
from bentoml._internal.bento.build_config import DockerOptions
from bentoml._internal.configuration.containers import BentoMLContainer
from bentoml._internal.container.generate import generate_containerfile
from openllm.cli import termui
from openllm.cli._factory import bento_complete_envvar
from openllm_core.utils import converter
if t.TYPE_CHECKING:
from bentoml._internal.bento import BentoStore
@click.command(
'get_containerfile', context_settings=termui.CONTEXT_SETTINGS, help='Return Containerfile of any given Bento.'
)
@click.argument('bento', type=str, shell_complete=bento_complete_envvar)
@click.pass_context
@inject
def cli(ctx: click.Context, bento: str, _bento_store: BentoStore = Provide[BentoMLContainer.bento_store]) -> str:
try:
bentomodel = _bento_store.get(bento)
except bentoml.exceptions.NotFound:
ctx.fail(f'Bento {bento} not found. Make sure to call `openllm build` first.')
# The logic below are similar to bentoml._internal.container.construct_containerfile
with open(bentomodel.path_of('bento.yaml'), 'r') as f:
options = BentoInfo.from_yaml_file(f)
# NOTE: dockerfile_template is already included in the
# Dockerfile inside bento, and it is not relevant to
# construct_containerfile. Hence it is safe to set it to None here.
# See https://github.com/bentoml/BentoML/issues/3399.
docker_attrs = converter.unstructure(options.docker)
# NOTE: if users specify a dockerfile_template, we will
# save it to /env/docker/Dockerfile.template. This is necessary
# for the reconstruction of the Dockerfile.
if 'dockerfile_template' in docker_attrs and docker_attrs['dockerfile_template'] is not None:
docker_attrs['dockerfile_template'] = 'env/docker/Dockerfile.template'
doc = generate_containerfile(
docker=DockerOptions(**docker_attrs),
build_ctx=bentomodel.path,
conda=options.conda,
bento_fs=bentomodel._fs,
enable_buildkit=True,
add_header=True,
)
termui.echo(doc, fg='white')
return bentomodel.path

View File

@@ -1,82 +0,0 @@
from __future__ import annotations
import logging
import traceback
import typing as t
import click
import inflection
import orjson
from bentoml_cli.utils import opt_callback
import openllm
import openllm_core
from openllm.cli import termui
from openllm.cli._factory import model_complete_envvar
from openllm_core.prompts import process_prompt
logger = logging.getLogger(__name__)
@click.command('get_prompt', context_settings=termui.CONTEXT_SETTINGS)
@click.argument(
'model_name',
type=click.Choice([inflection.dasherize(name) for name in openllm.CONFIG_MAPPING.keys()]),
shell_complete=model_complete_envvar,
)
@click.argument('prompt', type=click.STRING)
@click.option('--format', type=click.STRING, default=None)
@click.option(
'--opt',
help="Define additional prompt variables. (format: ``--opt system_prompt='You are a useful assistant'``)",
required=False,
multiple=True,
callback=opt_callback,
metavar='ARG=VALUE[,ARG=VALUE]',
)
@click.pass_context
def cli(
ctx: click.Context, /, model_name: str, prompt: str, format: str | None, _memoized: dict[str, t.Any], **_: t.Any
) -> str | None:
"""Get the default prompt used by OpenLLM."""
module = getattr(openllm_core.config, f'configuration_{model_name}')
_memoized = {k: v[0] for k, v in _memoized.items() if v}
try:
template = getattr(module, 'DEFAULT_PROMPT_TEMPLATE', None)
prompt_mapping = getattr(module, 'PROMPT_MAPPING', None)
if template is None:
raise click.BadArgumentUsage(f'model {model_name} does not have a default prompt template') from None
if callable(template):
if format is None:
if not hasattr(module, 'PROMPT_MAPPING') or module.PROMPT_MAPPING is None:
raise RuntimeError('Failed to find prompt mapping while DEFAULT_PROMPT_TEMPLATE is a function.')
raise click.BadOptionUsage(
'format',
f"{model_name} prompt requires passing '--format' (available format: {list(module.PROMPT_MAPPING)})",
)
if prompt_mapping is None:
raise click.BadArgumentUsage(
f'Failed to fine prompt mapping while the default prompt for {model_name} is a callable.'
) from None
if format not in prompt_mapping:
raise click.BadOptionUsage(
'format', f'Given format {format} is not valid for {model_name} (available format: {list(prompt_mapping)})'
)
_prompt_template = template(format)
else:
_prompt_template = template
try:
# backward-compatible. TO BE REMOVED once every model has default system message and prompt template.
fully_formatted = process_prompt(prompt, _prompt_template, True, **_memoized)
except RuntimeError as err:
logger.debug('Exception caught while formatting prompt: %s', err)
fully_formatted = openllm.AutoConfig.for_model(model_name).sanitize_parameters(
prompt, prompt_template=_prompt_template
)[0]
termui.echo(orjson.dumps({'prompt': fully_formatted}, option=orjson.OPT_INDENT_2).decode(), fg='white')
except Exception as err:
traceback.print_exc()
raise click.ClickException(f'Failed to determine a default prompt template for {model_name}.') from err
ctx.exit(0)

View File

@@ -1,35 +0,0 @@
from __future__ import annotations
import click
import inflection
import orjson
import bentoml
import openllm
from bentoml._internal.utils import human_readable_size
from openllm.cli import termui
@click.command('list_bentos', context_settings=termui.CONTEXT_SETTINGS)
@click.pass_context
def cli(ctx: click.Context) -> None:
"""List available bentos built by OpenLLM."""
mapping = {
k: [
{
'tag': str(b.tag),
'size': human_readable_size(openllm.utils.calc_dir_size(b.path)),
'models': [
{'tag': str(m.tag), 'size': human_readable_size(openllm.utils.calc_dir_size(m.path))}
for m in (bentoml.models.get(_.tag) for _ in b.info.models)
],
}
for b in tuple(i for i in bentoml.list() if all(k in i.info.labels for k in {'start_name', 'bundler'}))
if b.info.labels['start_name'] == k
]
for k in tuple(inflection.dasherize(key) for key in openllm.CONFIG_MAPPING.keys())
}
mapping = {k: v for k, v in mapping.items() if v}
termui.echo(orjson.dumps(mapping, option=orjson.OPT_INDENT_2).decode(), fg='white')
ctx.exit(0)

View File

@@ -1,52 +0,0 @@
from __future__ import annotations
import typing as t
import click
import inflection
import orjson
import bentoml
import openllm
from bentoml._internal.utils import human_readable_size
from openllm.cli import termui
from openllm.cli._factory import model_complete_envvar
from openllm.cli._factory import model_name_argument
if t.TYPE_CHECKING:
from openllm_core._typing_compat import DictStrAny
@click.command('list_models', context_settings=termui.CONTEXT_SETTINGS)
@model_name_argument(required=False, shell_complete=model_complete_envvar)
def cli(model_name: str | None) -> DictStrAny:
"""This is equivalent to openllm models --show-available less the nice table."""
models = tuple(inflection.dasherize(key) for key in openllm.CONFIG_MAPPING.keys())
ids_in_local_store = {
k: [
i
for i in bentoml.models.list()
if 'framework' in i.info.labels
and i.info.labels['framework'] == 'openllm'
and 'model_name' in i.info.labels
and i.info.labels['model_name'] == k
]
for k in models
}
if model_name is not None:
ids_in_local_store = {
k: [
i
for i in v
if 'model_name' in i.info.labels and i.info.labels['model_name'] == inflection.dasherize(model_name)
]
for k, v in ids_in_local_store.items()
}
ids_in_local_store = {k: v for k, v in ids_in_local_store.items() if v}
local_models = {
k: [{'tag': str(i.tag), 'size': human_readable_size(openllm.utils.calc_dir_size(i.path))} for i in val]
for k, val in ids_in_local_store.items()
}
termui.echo(orjson.dumps(local_models, option=orjson.OPT_INDENT_2).decode(), fg='white')
return local_models

View File

@@ -1,114 +0,0 @@
from __future__ import annotations
import importlib.machinery
import logging
import os
import pkgutil
import subprocess
import sys
import tempfile
import typing as t
import click
import jupytext
import nbformat
import yaml
from openllm import playground
from openllm.cli import termui
from openllm_core.utils import is_jupyter_available
from openllm_core.utils import is_jupytext_available
from openllm_core.utils import is_notebook_available
if t.TYPE_CHECKING:
from openllm_core._typing_compat import DictStrAny
logger = logging.getLogger(__name__)
def load_notebook_metadata() -> DictStrAny:
with open(os.path.join(os.path.dirname(playground.__file__), '_meta.yml'), 'r') as f:
content = yaml.safe_load(f)
if not all('description' in k for k in content.values()):
raise ValueError("Invalid metadata file. All entries must have a 'description' key.")
return content
@click.command('playground', context_settings=termui.CONTEXT_SETTINGS)
@click.argument('output-dir', default=None, required=False)
@click.option(
'--port',
envvar='JUPYTER_PORT',
show_envvar=True,
show_default=True,
default=8888,
help='Default port for Jupyter server',
)
@click.pass_context
def cli(ctx: click.Context, output_dir: str | None, port: int) -> None:
"""OpenLLM Playground.
A collections of notebooks to explore the capabilities of OpenLLM.
This includes notebooks for fine-tuning, inference, and more.
All of the script available in the playground can also be run directly as a Python script:
For example:
\b
```bash
python -m openllm.playground.falcon_tuned --help
```
\b
> [!NOTE]
> This command requires Jupyter to be installed. Install it with 'pip install "openllm[playground]"'
"""
if not is_jupyter_available() or not is_jupytext_available() or not is_notebook_available():
raise RuntimeError(
"Playground requires 'jupyter', 'jupytext', and 'notebook'. Install it with 'pip install \"openllm[playground]\"'"
)
metadata = load_notebook_metadata()
_temp_dir = False
if output_dir is None:
_temp_dir = True
output_dir = tempfile.mkdtemp(prefix='openllm-playground-')
else:
os.makedirs(os.path.abspath(os.path.expandvars(os.path.expanduser(output_dir))), exist_ok=True)
termui.echo('The playground notebooks will be saved to: ' + os.path.abspath(output_dir), fg='blue')
for module in pkgutil.iter_modules(playground.__path__):
if module.ispkg or os.path.exists(os.path.join(output_dir, module.name + '.ipynb')):
logger.debug(
'Skipping: %s (%s)', module.name, 'File already exists' if not module.ispkg else f'{module.name} is a module'
)
continue
if not isinstance(module.module_finder, importlib.machinery.FileFinder):
continue
termui.echo('Generating notebook for: ' + module.name, fg='magenta')
markdown_cell = nbformat.v4.new_markdown_cell(metadata[module.name]['description'])
f = jupytext.read(os.path.join(module.module_finder.path, module.name + '.py'))
f.cells.insert(0, markdown_cell)
jupytext.write(f, os.path.join(output_dir, module.name + '.ipynb'), fmt='notebook')
try:
subprocess.check_output(
[
sys.executable,
'-m',
'jupyter',
'notebook',
'--notebook-dir',
output_dir,
'--port',
str(port),
'--no-browser',
'--debug',
]
)
except subprocess.CalledProcessError as e:
termui.echo(e.output, fg='red')
raise click.ClickException(f'Failed to start a jupyter server:\n{e}') from None
except KeyboardInterrupt:
termui.echo('\nShutting down Jupyter server...', fg='yellow')
if _temp_dir:
termui.echo('Note: You can access the generated notebooks in: ' + output_dir, fg='blue')
ctx.exit(0)

View File

@@ -1,91 +0,0 @@
from __future__ import annotations
import enum
import functools
import logging
import os
import typing as t
import click
import inflection
import orjson
from openllm_core._typing_compat import DictStrAny
from openllm_core.utils import get_debug_mode
logger = logging.getLogger('openllm')
class Level(enum.IntEnum):
NOTSET = logging.DEBUG
DEBUG = logging.DEBUG
INFO = logging.INFO
WARNING = logging.WARNING
ERROR = logging.ERROR
CRITICAL = logging.CRITICAL
@property
def color(self) -> str | None:
return {
Level.NOTSET: None,
Level.DEBUG: 'cyan',
Level.INFO: 'green',
Level.WARNING: 'yellow',
Level.ERROR: 'red',
Level.CRITICAL: 'red',
}[self]
@classmethod
def from_logging_level(cls, level: int) -> Level:
return {
logging.DEBUG: Level.DEBUG,
logging.INFO: Level.INFO,
logging.WARNING: Level.WARNING,
logging.ERROR: Level.ERROR,
logging.CRITICAL: Level.CRITICAL,
}[level]
class JsonLog(t.TypedDict):
log_level: Level
content: str
def log(content: str, level: Level = Level.INFO, fg: str | None = None) -> None:
if get_debug_mode():
echo(content, fg=fg)
else:
echo(orjson.dumps(JsonLog(log_level=level, content=content)).decode(), fg=fg, json=True)
warning = functools.partial(log, level=Level.WARNING)
error = functools.partial(log, level=Level.ERROR)
critical = functools.partial(log, level=Level.CRITICAL)
debug = functools.partial(log, level=Level.DEBUG)
info = functools.partial(log, level=Level.INFO)
notset = functools.partial(log, level=Level.NOTSET)
def echo(text: t.Any, fg: str | None = None, *, _with_style: bool = True, json: bool = False, **attrs: t.Any) -> None:
if json:
text = orjson.loads(text)
if 'content' in text and 'log_level' in text:
content = text['content']
fg = Level.from_logging_level(text['log_level']).color
else:
content = orjson.dumps(text).decode()
fg = Level.INFO.color if not get_debug_mode() else Level.DEBUG.color
else:
content = t.cast(str, text)
attrs['fg'] = fg
(click.echo if not _with_style else click.secho)(content, **attrs)
COLUMNS: int = int(os.environ.get('COLUMNS', str(120)))
CONTEXT_SETTINGS: DictStrAny = {
'help_option_names': ['-h', '--help'],
'max_content_width': COLUMNS,
'token_normalize_func': inflection.underscore,
}
__all__ = ['echo', 'COLUMNS', 'CONTEXT_SETTINGS', 'log', 'warning', 'error', 'critical', 'debug', 'info', 'Level']