revert: "ci: pre-commit autoupdate [pre-commit.ci] (#931)"

This reverts commit 7b00c84c2a.
This commit is contained in:
Aaron
2024-03-15 03:47:23 -04:00
parent 7b00c84c2a
commit e3392476be
69 changed files with 368 additions and 1300 deletions

View File

@@ -5,25 +5,12 @@ from bentoml_cli.utils import BentoMLCommandGroup
from click import shell_completion as sc
from openllm_core._configuration import LLMConfig
from openllm_core._typing_compat import (
Concatenate,
DictStrAny,
LiteralBackend,
LiteralSerialisation,
ParamSpec,
AnyCallable,
get_literal_args,
)
from openllm_core._typing_compat import Concatenate, DictStrAny, LiteralBackend, LiteralSerialisation, ParamSpec, AnyCallable, get_literal_args
from openllm_core.utils import DEBUG, compose, dantic, resolve_user_filepath
class _OpenLLM_GenericInternalConfig(LLMConfig):
__config__ = {
'name_type': 'lowercase',
'default_id': 'openllm/generic',
'model_ids': ['openllm/generic'],
'architecture': 'PreTrainedModel',
}
__config__ = {'name_type': 'lowercase', 'default_id': 'openllm/generic', 'model_ids': ['openllm/generic'], 'architecture': 'PreTrainedModel'}
class GenerationConfig:
top_k: int = 15
@@ -50,20 +37,11 @@ def bento_complete_envvar(ctx: click.Context, param: click.Parameter, incomplete
def model_complete_envvar(ctx: click.Context, param: click.Parameter, incomplete: str) -> list[sc.CompletionItem]:
return [
sc.CompletionItem(inflection.dasherize(it), help='Model')
for it in openllm.CONFIG_MAPPING
if it.startswith(incomplete)
]
return [sc.CompletionItem(inflection.dasherize(it), help='Model') for it in openllm.CONFIG_MAPPING if it.startswith(incomplete)]
def parse_config_options(
config: LLMConfig,
server_timeout: int,
workers_per_resource: float,
device: t.Tuple[str, ...] | None,
cors: bool,
environ: DictStrAny,
config: LLMConfig, server_timeout: int, workers_per_resource: float, device: t.Tuple[str, ...] | None, cors: bool, environ: DictStrAny
) -> DictStrAny:
# TODO: Support amd.com/gpu on k8s
_bentoml_config_options_env = environ.pop('BENTOML_CONFIG_OPTIONS', '')
@@ -78,21 +56,14 @@ def parse_config_options(
if device:
if len(device) > 1:
_bentoml_config_options_opts.extend([
f'runners."llm-{config["start_name"]}-runner".resources."nvidia.com/gpu"[{idx}]={dev}'
for idx, dev in enumerate(device)
f'runners."llm-{config["start_name"]}-runner".resources."nvidia.com/gpu"[{idx}]={dev}' for idx, dev in enumerate(device)
])
else:
_bentoml_config_options_opts.append(
f'runners."llm-{config["start_name"]}-runner".resources."nvidia.com/gpu"=[{device[0]}]'
)
_bentoml_config_options_opts.append(f'runners."llm-{config["start_name"]}-runner".resources."nvidia.com/gpu"=[{device[0]}]')
if cors:
_bentoml_config_options_opts.extend(['api_server.http.cors.enabled=true', 'api_server.http.cors.access_control_allow_origins="*"'])
_bentoml_config_options_opts.extend([
'api_server.http.cors.enabled=true',
'api_server.http.cors.access_control_allow_origins="*"',
])
_bentoml_config_options_opts.extend([
f'api_server.http.cors.access_control_allow_methods[{idx}]="{it}"'
for idx, it in enumerate(['GET', 'OPTIONS', 'POST', 'HEAD', 'PUT'])
f'api_server.http.cors.access_control_allow_methods[{idx}]="{it}"' for idx, it in enumerate(['GET', 'OPTIONS', 'POST', 'HEAD', 'PUT'])
])
_bentoml_config_options_env += ' ' if _bentoml_config_options_env else '' + ' '.join(_bentoml_config_options_opts)
environ['BENTOML_CONFIG_OPTIONS'] = _bentoml_config_options_env
@@ -171,9 +142,7 @@ def start_decorator(fn: FC) -> FC:
return composed(fn)
def parse_device_callback(
_: click.Context, param: click.Parameter, value: tuple[tuple[str], ...] | None
) -> t.Tuple[str, ...] | None:
def parse_device_callback(_: click.Context, param: click.Parameter, value: tuple[tuple[str], ...] | None) -> t.Tuple[str, ...] | None:
if value is None:
return value
el: t.Tuple[str, ...] = tuple(i for k in value for i in k)
@@ -192,19 +161,13 @@ _IGNORED_OPTIONS = {'working_dir', 'production', 'protocol_version'}
def parse_serve_args() -> t.Callable[[t.Callable[..., LLMConfig]], t.Callable[[FC], FC]]:
from bentoml_cli.cli import cli
group = cog.optgroup.group(
'Start a HTTP server options', help='Related to serving the model [synonymous to `bentoml serve-http`]'
)
group = cog.optgroup.group('Start a HTTP server options', help='Related to serving the model [synonymous to `bentoml serve-http`]')
def decorator(f: t.Callable[Concatenate[int, t.Optional[str], P], LLMConfig]) -> t.Callable[[FC], FC]:
serve_command = cli.commands['serve']
# The first variable is the argument bento
# The last five is from BentoMLCommandGroup.NUMBER_OF_COMMON_PARAMS
serve_options = [
p
for p in serve_command.params[1 : -BentoMLCommandGroup.NUMBER_OF_COMMON_PARAMS]
if p.name not in _IGNORED_OPTIONS
]
serve_options = [p for p in serve_command.params[1 : -BentoMLCommandGroup.NUMBER_OF_COMMON_PARAMS] if p.name not in _IGNORED_OPTIONS]
for options in reversed(serve_options):
attrs = options.to_info_dict()
# we don't need param_type_name, since it should all be options
@@ -258,13 +221,7 @@ def adapter_id_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callab
def cors_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[[FC], FC]:
return cli_option(
'--cors/--no-cors',
show_default=True,
default=False,
envvar='OPENLLM_CORS',
show_envvar=True,
help='Enable CORS for the server.',
**attrs,
'--cors/--no-cors', show_default=True, default=False, envvar='OPENLLM_CORS', show_envvar=True, help='Enable CORS for the server.', **attrs
)(f)
@@ -318,12 +275,7 @@ def backend_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[
def model_name_argument(f: _AnyCallable | None = None, required: bool = True, **attrs: t.Any) -> t.Callable[[FC], FC]:
return cli_argument(
'model_name',
type=click.Choice([inflection.dasherize(name) for name in openllm.CONFIG_MAPPING]),
required=required,
**attrs,
)(f)
return cli_argument('model_name', type=click.Choice([inflection.dasherize(name) for name in openllm.CONFIG_MAPPING]), required=required, **attrs)(f)
def quantize_option(f: _AnyCallable | None = None, *, build: bool = False, **attrs: t.Any) -> t.Callable[[FC], FC]:
@@ -361,9 +313,7 @@ def quantize_option(f: _AnyCallable | None = None, *, build: bool = False, **att
)(f)
def workers_per_resource_option(
f: _AnyCallable | None = None, *, build: bool = False, **attrs: t.Any
) -> t.Callable[[FC], FC]:
def workers_per_resource_option(f: _AnyCallable | None = None, *, build: bool = False, **attrs: t.Any) -> t.Callable[[FC], FC]:
return cli_option(
'--workers-per-resource',
default=None,
@@ -431,9 +381,7 @@ def workers_per_resource_callback(ctx: click.Context, param: click.Parameter, va
float(value) # type: ignore[arg-type]
except ValueError:
raise click.BadParameter(
f"'workers_per_resource' only accept '{_wpr_strategies}' as possible strategies, otherwise pass in float.",
ctx,
param,
f"'workers_per_resource' only accept '{_wpr_strategies}' as possible strategies, otherwise pass in float.", ctx, param
) from None
else:
return value

View File

@@ -69,10 +69,7 @@ def _start(
if timeout:
args.extend(['--server-timeout', str(timeout)])
if workers_per_resource:
args.extend([
'--workers-per-resource',
str(workers_per_resource) if not isinstance(workers_per_resource, str) else workers_per_resource,
])
args.extend(['--workers-per-resource', str(workers_per_resource) if not isinstance(workers_per_resource, str) else workers_per_resource])
if device and not os.environ.get('CUDA_VISIBLE_DEVICES'):
args.extend(['--device', ','.join(device)])
if quantize:
@@ -80,11 +77,7 @@ def _start(
if cors:
args.append('--cors')
if adapter_map:
args.extend(
list(
itertools.chain.from_iterable([['--adapter-id', f"{k}{':' + v if v else ''}"] for k, v in adapter_map.items()])
)
)
args.extend(list(itertools.chain.from_iterable([['--adapter-id', f"{k}{':'+v if v else ''}"] for k, v in adapter_map.items()])))
if additional_args:
args.extend(additional_args)
if __test__:
@@ -155,9 +148,7 @@ def _build(
'--machine',
'--quiet',
'--serialisation',
first_not_none(
serialisation, default='safetensors' if has_safetensors_weights(model_id, model_version) else 'legacy'
),
first_not_none(serialisation, default='safetensors' if has_safetensors_weights(model_id, model_version) else 'legacy'),
]
if quantize:
args.extend(['--quantize', quantize])
@@ -174,7 +165,7 @@ def _build(
if overwrite:
args.append('--overwrite')
if adapter_map:
args.extend([f"--adapter-id={k}{':' + v if v is not None else ''}" for k, v in adapter_map.items()])
args.extend([f"--adapter-id={k}{':'+v if v is not None else ''}" for k, v in adapter_map.items()])
if model_version:
args.extend(['--model-version', model_version])
if bento_version:
@@ -274,4 +265,4 @@ start, build, import_model, list_models = (
codegen.gen_sdk(_import_model),
codegen.gen_sdk(_list_models),
)
__all__ = ['build', 'import_model', 'list_models', 'start']
__all__ = ['start', 'build', 'import_model', 'list_models']

View File

@@ -43,15 +43,7 @@ from openllm_core.utils import (
)
from . import termui
from ._factory import (
FC,
_AnyCallable,
machine_option,
model_name_argument,
parse_config_options,
start_decorator,
optimization_decorator,
)
from ._factory import FC, _AnyCallable, machine_option, model_name_argument, parse_config_options, start_decorator, optimization_decorator
if t.TYPE_CHECKING:
import torch
@@ -103,18 +95,12 @@ def backend_warning(backend: LiteralBackend, build: bool = False) -> None:
'vLLM is not available. Note that PyTorch backend is not as performant as vLLM and you should always consider using vLLM for production.'
)
if build:
logger.info(
"Tip: You can set '--backend vllm' to package your Bento with vLLM backend regardless if vLLM is available locally."
)
logger.info("Tip: You can set '--backend vllm' to package your Bento with vLLM backend regardless if vLLM is available locally.")
class Extensions(click.MultiCommand):
def list_commands(self, ctx: click.Context) -> list[str]:
return sorted([
filename[:-3]
for filename in os.listdir(_EXT_FOLDER)
if filename.endswith('.py') and not filename.startswith('__')
])
return sorted([filename[:-3] for filename in os.listdir(_EXT_FOLDER) if filename.endswith('.py') and not filename.startswith('__')])
def get_command(self, ctx: click.Context, cmd_name: str) -> click.Command | None:
try:
@@ -131,41 +117,19 @@ class OpenLLMCommandGroup(BentoMLCommandGroup):
def common_params(f: t.Callable[P, t.Any]) -> t.Callable[[FC], FC]:
# The following logics is similar to one of BentoMLCommandGroup
@cog.optgroup.group(name='Global options', help='Shared globals options for all OpenLLM CLI.') # type: ignore[misc]
@cog.optgroup.option('-q', '--quiet', envvar=QUIET_ENV_VAR, is_flag=True, default=False, help='Suppress all output.', show_envvar=True)
@cog.optgroup.option(
'-q', '--quiet', envvar=QUIET_ENV_VAR, is_flag=True, default=False, help='Suppress all output.', show_envvar=True
'--debug', '--verbose', 'debug', envvar=DEBUG_ENV_VAR, is_flag=True, default=False, help='Print out debug logs.', show_envvar=True
)
@cog.optgroup.option(
'--debug',
'--verbose',
'debug',
envvar=DEBUG_ENV_VAR,
is_flag=True,
default=False,
help='Print out debug logs.',
show_envvar=True,
'--do-not-track', is_flag=True, default=False, envvar=analytics.OPENLLM_DO_NOT_TRACK, help='Do not send usage info', show_envvar=True
)
@cog.optgroup.option(
'--do-not-track',
is_flag=True,
default=False,
envvar=analytics.OPENLLM_DO_NOT_TRACK,
help='Do not send usage info',
show_envvar=True,
)
@cog.optgroup.option(
'--context',
'cloud_context',
envvar='BENTOCLOUD_CONTEXT',
type=click.STRING,
default=None,
help='BentoCloud context name.',
show_envvar=True,
'--context', 'cloud_context', envvar='BENTOCLOUD_CONTEXT', type=click.STRING, default=None, help='BentoCloud context name.', show_envvar=True
)
@click.pass_context
@functools.wraps(f)
def wrapper(
ctx: click.Context, quiet: bool, debug: bool, cloud_context: str | None, *args: P.args, **attrs: P.kwargs
) -> t.Any:
def wrapper(ctx: click.Context, quiet: bool, debug: bool, cloud_context: str | None, *args: P.args, **attrs: P.kwargs) -> t.Any:
ctx.obj = GlobalOptions(cloud_context=cloud_context)
if quiet:
set_quiet_mode(True)
@@ -179,9 +143,7 @@ class OpenLLMCommandGroup(BentoMLCommandGroup):
return wrapper
@staticmethod
def usage_tracking(
func: t.Callable[P, t.Any], group: click.Group, **attrs: t.Any
) -> t.Callable[Concatenate[bool, P], t.Any]:
def usage_tracking(func: t.Callable[P, t.Any], group: click.Group, **attrs: t.Any) -> t.Callable[Concatenate[bool, P], t.Any]:
command_name = attrs.get('name', func.__name__)
@functools.wraps(func)
@@ -240,9 +202,7 @@ class OpenLLMCommandGroup(BentoMLCommandGroup):
_memo = getattr(wrapped, '__click_params__', None)
if _memo is None:
raise ValueError('Click command not register correctly.')
_object_setattr(
wrapped, '__click_params__', _memo[-self.NUMBER_OF_COMMON_PARAMS :] + _memo[: -self.NUMBER_OF_COMMON_PARAMS]
)
_object_setattr(wrapped, '__click_params__', _memo[-self.NUMBER_OF_COMMON_PARAMS :] + _memo[: -self.NUMBER_OF_COMMON_PARAMS])
# NOTE: we need to call super of super to avoid conflict with BentoMLCommandGroup command setup
cmd = super(BentoMLCommandGroup, self).command(*args, **kwargs)(wrapped)
# NOTE: add aliases to a given commands if it is specified.
@@ -250,7 +210,7 @@ class OpenLLMCommandGroup(BentoMLCommandGroup):
if not cmd.name:
raise ValueError('name is required when aliases are available.')
self._commands[cmd.name] = aliases
self._aliases.update(dict.fromkeys(aliases, cmd.name))
self._aliases.update({alias: cmd.name for alias in aliases})
return cmd
return decorator
@@ -317,12 +277,7 @@ def cli() -> None:
"""
@cli.command(
context_settings=termui.CONTEXT_SETTINGS,
name='start',
aliases=['start-http'],
short_help='Start a LLMServer for any supported LLM.',
)
@cli.command(context_settings=termui.CONTEXT_SETTINGS, name='start', aliases=['start-http'], short_help='Start a LLMServer for any supported LLM.')
@click.argument('model_id', type=click.STRING, metavar='[REMOTE_REPO/MODEL_ID | /path/to/local/model]', required=True)
@click.option(
'--model-id',
@@ -375,9 +330,7 @@ def start_command(
```
"""
if backend == 'pt':
logger.warning(
'PyTorch backend is deprecated and will be removed in future releases. Make sure to use vLLM instead.'
)
logger.warning('PyTorch backend is deprecated and will be removed in future releases. Make sure to use vLLM instead.')
if model_id in openllm.CONFIG_MAPPING:
_model_name = model_id
if deprecated_model_id is not None:
@@ -395,17 +348,11 @@ def start_command(
from openllm.serialisation.transformers.weights import has_safetensors_weights
serialisation = first_not_none(
serialisation, default='safetensors' if has_safetensors_weights(model_id, model_version) else 'legacy'
)
serialisation = first_not_none(serialisation, default='safetensors' if has_safetensors_weights(model_id, model_version) else 'legacy')
if serialisation == 'safetensors' and quantize is not None:
logger.warning("'--quantize=%s' might not work with 'safetensors' serialisation format.", quantize)
logger.warning(
"Make sure to check out '%s' repository to see if the weights is in '%s' format if unsure.",
model_id,
serialisation,
)
logger.warning("Make sure to check out '%s' repository to see if the weights is in '%s' format if unsure.", model_id, serialisation)
logger.info("Tip: You can always fallback to '--serialisation legacy' when running quantisation.")
import torch
@@ -433,9 +380,7 @@ def start_command(
config, server_attrs = llm.config.model_validate_click(**attrs)
server_timeout = first_not_none(server_timeout, default=config['timeout'])
server_attrs.update({'working_dir': pkg.source_locations('openllm'), 'timeout': server_timeout})
development = server_attrs.pop(
'development'
) # XXX: currently, theres no development args in bentoml.Server. To be fixed upstream.
development = server_attrs.pop('development') # XXX: currently, theres no development args in bentoml.Server. To be fixed upstream.
server_attrs.setdefault('production', not development)
start_env = process_environ(
@@ -465,12 +410,8 @@ def start_command(
return config
def process_environ(
config, server_timeout, wpr, device, cors, model_id, adapter_map, serialisation, llm, use_current_env=True
):
environ = parse_config_options(
config, server_timeout, wpr, device, cors, os.environ.copy() if use_current_env else {}
)
def process_environ(config, server_timeout, wpr, device, cors, model_id, adapter_map, serialisation, llm, use_current_env=True):
environ = parse_config_options(config, server_timeout, wpr, device, cors, os.environ.copy() if use_current_env else {})
environ.update({
'OPENLLM_MODEL_ID': model_id,
'BENTOML_DEBUG': str(openllm.utils.get_debug_mode()),
@@ -515,8 +456,7 @@ def build_bento_instruction(llm, model_id, serialisation, adapter_map):
cmd_name += f' --serialization {serialisation}'
if adapter_map is not None:
cmd_name += ' ' + ' '.join([
f'--adapter-id {s}'
for s in [f'{p}:{name}' if name not in (None, 'default') else p for p, name in adapter_map.items()]
f'--adapter-id {s}' for s in [f'{p}:{name}' if name not in (None, 'default') else p for p, name in adapter_map.items()]
])
if not openllm.utils.get_quiet_mode():
termui.info(f"🚀Tip: run '{cmd_name}' to create a BentoLLM for '{model_id}'")
@@ -551,12 +491,8 @@ def run_server(args, env, return_process=False) -> subprocess.Popen[bytes] | int
if return_process:
return process
stop_event = threading.Event()
stdout, stderr = (
threading.Thread(target=handle, args=(process.stdout, stop_event)),
threading.Thread(target=handle, args=(process.stderr, stop_event)),
)
stdout.start()
stderr.start() # noqa: E702
stdout, stderr = threading.Thread(target=handle, args=(process.stdout, stop_event)), threading.Thread(target=handle, args=(process.stderr, stop_event))
stdout.start(); stderr.start() # noqa: E702
try:
process.wait()
@@ -571,12 +507,9 @@ def run_server(args, env, return_process=False) -> subprocess.Popen[bytes] | int
raise
finally:
stop_event.set()
stdout.join()
stderr.join() # noqa: E702
if process.poll() is not None:
process.kill()
stdout.join()
stderr.join() # noqa: E702
stdout.join(); stderr.join() # noqa: E702
if process.poll() is not None: process.kill()
stdout.join(); stderr.join() # noqa: E702
return process.returncode
@@ -664,10 +597,7 @@ def import_command(
backend=backend,
dtype=dtype,
serialisation=t.cast(
LiteralSerialisation,
first_not_none(
serialisation, default='safetensors' if has_safetensors_weights(model_id, model_version) else 'legacy'
),
LiteralSerialisation, first_not_none(serialisation, default='safetensors' if has_safetensors_weights(model_id, model_version) else 'legacy')
),
)
backend_warning(llm.__llm_backend__)
@@ -726,21 +656,14 @@ class BuildBentoOutput(t.TypedDict):
metavar='[REMOTE_REPO/MODEL_ID | /path/to/local/model]',
help='Deprecated. Use positional argument instead.',
)
@click.option(
'--bento-version',
type=str,
default=None,
help='Optional bento version for this BentoLLM. Default is the the model revision.',
)
@click.option('--bento-version', type=str, default=None, help='Optional bento version for this BentoLLM. Default is the the model revision.')
@click.option('--overwrite', is_flag=True, help='Overwrite existing Bento for given LLM if it already exists.')
@click.option(
'--enable-features',
multiple=True,
nargs=1,
metavar='FEATURE[,FEATURE]',
help='Enable additional features for building this LLM Bento. Available: {}'.format(
', '.join(OPTIONAL_DEPENDENCIES)
),
help='Enable additional features for building this LLM Bento. Available: {}'.format(', '.join(OPTIONAL_DEPENDENCIES)),
)
@optimization_decorator
@click.option(
@@ -751,12 +674,7 @@ class BuildBentoOutput(t.TypedDict):
help="Optional adapters id to be included within the Bento. Note that if you are using relative path, '--build-ctx' must be passed.",
)
@click.option('--build-ctx', help='Build context. This is required if --adapter-id uses relative path', default=None)
@click.option(
'--dockerfile-template',
default=None,
type=click.File(),
help='Optional custom dockerfile template to be used with this BentoLLM.',
)
@click.option('--dockerfile-template', default=None, type=click.File(), help='Optional custom dockerfile template to be used with this BentoLLM.')
@cog.optgroup.group(cls=cog.MutuallyExclusiveOptionGroup, name='Utilities options') # type: ignore[misc]
@cog.optgroup.option(
'--containerize',
@@ -849,9 +767,7 @@ def build_command(
state = ItemState.NOT_FOUND
if backend == 'pt':
logger.warning(
"PyTorch backend is deprecated and will be removed from the next releases. Will set default backend to 'vllm' instead."
)
logger.warning("PyTorch backend is deprecated and will be removed from the next releases. Will set default backend to 'vllm' instead.")
llm = openllm.LLM(
model_id=model_id,
@@ -861,9 +777,7 @@ def build_command(
dtype=dtype,
max_model_len=max_model_len,
gpu_memory_utilization=gpu_memory_utilization,
serialisation=first_not_none(
serialisation, default='safetensors' if has_safetensors_weights(model_id, model_version) else 'legacy'
),
serialisation=first_not_none(serialisation, default='safetensors' if has_safetensors_weights(model_id, model_version) else 'legacy'),
_eager=False,
)
if llm.__llm_backend__ not in llm.config['backend']:
@@ -875,9 +789,7 @@ def build_command(
model = openllm.serialisation.import_model(llm, trust_remote_code=llm.trust_remote_code)
llm._tag = model.tag
os.environ.update(
**process_environ(llm.config, llm.config['timeout'], 1.0, None, True, llm.model_id, None, llm._serialisation, llm)
)
os.environ.update(**process_environ(llm.config, llm.config['timeout'], 1.0, None, True, llm.model_id, None, llm._serialisation, llm))
try:
assert llm.bentomodel # HACK: call it here to patch correct tag with revision and everything
@@ -944,11 +856,7 @@ def build_command(
def get_current_bentocloud_context() -> str | None:
try:
context = (
cloud_config.get_context(ctx.obj.cloud_context)
if ctx.obj.cloud_context
else cloud_config.get_current_context()
)
context = cloud_config.get_context(ctx.obj.cloud_context) if ctx.obj.cloud_context else cloud_config.get_current_context()
return context.name
except Exception:
return None
@@ -972,9 +880,7 @@ def build_command(
tag=str(bento_tag),
backend=llm.__llm_backend__,
instructions=[
DeploymentInstruction.from_content(
type='bentocloud', instr="☁️ Push to BentoCloud with 'bentoml push':\n $ {cmd}", cmd=push_cmd
),
DeploymentInstruction.from_content(type='bentocloud', instr="☁️ Push to BentoCloud with 'bentoml push':\n $ {cmd}", cmd=push_cmd),
DeploymentInstruction.from_content(
type='container',
instr="🐳 Container BentoLLM with 'bentoml containerize':\n $ {cmd}",
@@ -1000,9 +906,7 @@ def build_command(
termui.echo(f" * {instruction['content']}\n", nl=False)
if push:
BentoMLContainer.bentocloud_client.get().push_bento(
bento, context=t.cast(GlobalOptions, ctx.obj).cloud_context, force=force_push
)
BentoMLContainer.bentocloud_client.get().push_bento(bento, context=t.cast(GlobalOptions, ctx.obj).cloud_context, force=force_push)
elif containerize:
container_backend = t.cast('DefaultBuilder', os.environ.get('BENTOML_CONTAINERIZE_BACKEND', 'docker'))
try:
@@ -1042,8 +946,7 @@ def models_command(**_: t.Any) -> dict[t.LiteralString, ModelItem]:
architecture=config.__openllm_architecture__,
example_id=random.choice(config.__openllm_model_ids__),
supported_backends=config.__openllm_backend__,
installation='pip install '
+ (f'"openllm[{m}]"' if m in OPTIONAL_DEPENDENCIES or config.__openllm_requirements__ else 'openllm'),
installation='pip install ' + (f'"openllm[{m}]"' if m in OPTIONAL_DEPENDENCIES or config.__openllm_requirements__ else 'openllm'),
items=[
str(md.tag)
for md in bentoml.models.list()
@@ -1062,13 +965,7 @@ def models_command(**_: t.Any) -> dict[t.LiteralString, ModelItem]:
@cli.command()
@model_name_argument(required=False)
@click.option('-y', '--yes', '--assume-yes', is_flag=True, help='Skip confirmation when deleting a specific model')
@click.option(
'--include-bentos/--no-include-bentos',
is_flag=True,
hidden=True,
default=True,
help='Whether to also include pruning bentos.',
)
@click.option('--include-bentos/--no-include-bentos', is_flag=True, hidden=True, default=True, help='Whether to also include pruning bentos.')
@inject
@click.pass_context
def prune_command(
@@ -1085,32 +982,24 @@ def prune_command(
If a model type is passed, then only prune models for that given model type.
"""
available: list[tuple[bentoml.Model | bentoml.Bento, ModelStore | BentoStore]] = [
(m, model_store)
for m in bentoml.models.list()
if 'framework' in m.info.labels and m.info.labels['framework'] == 'openllm'
(m, model_store) for m in bentoml.models.list() if 'framework' in m.info.labels and m.info.labels['framework'] == 'openllm'
]
if model_name is not None:
available = [
(m, store)
for m, store in available
if 'model_name' in m.info.labels and m.info.labels['model_name'] == inflection.underscore(model_name)
(m, store) for m, store in available if 'model_name' in m.info.labels and m.info.labels['model_name'] == inflection.underscore(model_name)
] + [
(b, bento_store)
for b in bentoml.bentos.list()
if 'start_name' in b.info.labels and b.info.labels['start_name'] == inflection.underscore(model_name)
]
else:
available += [
(b, bento_store) for b in bentoml.bentos.list() if '_type' in b.info.labels and '_framework' in b.info.labels
]
available += [(b, bento_store) for b in bentoml.bentos.list() if '_type' in b.info.labels and '_framework' in b.info.labels]
for store_item, store in available:
if yes:
delete_confirmed = True
else:
delete_confirmed = click.confirm(
f"delete {'model' if isinstance(store, ModelStore) else 'bento'} {store_item.tag}?"
)
delete_confirmed = click.confirm(f"delete {'model' if isinstance(store, ModelStore) else 'bento'} {store_item.tag}?")
if delete_confirmed:
store.delete(store_item.tag)
termui.warning(f"{store_item} deleted from {'model' if isinstance(store, ModelStore) else 'bento'} store.")
@@ -1157,17 +1046,8 @@ def shared_client_options(f: _AnyCallable | None = None) -> t.Callable[[FC], FC]
@cli.command()
@shared_client_options
@click.option(
'--server-type',
type=click.Choice(['grpc', 'http']),
help='Server type',
default='http',
show_default=True,
hidden=True,
)
@click.option(
'--stream/--no-stream', type=click.BOOL, is_flag=True, default=True, help='Whether to stream the response.'
)
@click.option('--server-type', type=click.Choice(['grpc', 'http']), help='Server type', default='http', show_default=True, hidden=True)
@click.option('--stream/--no-stream', type=click.BOOL, is_flag=True, default=True, help='Whether to stream the response.')
@click.argument('prompt', type=click.STRING)
@click.option(
'--sampling-params',

View File

@@ -21,9 +21,7 @@ if t.TYPE_CHECKING:
@machine_option
@click.pass_context
@inject
def cli(
ctx: click.Context, bento: str, machine: bool, _bento_store: BentoStore = Provide[BentoMLContainer.bento_store]
) -> str | None:
def cli(ctx: click.Context, bento: str, machine: bool, _bento_store: BentoStore = Provide[BentoMLContainer.bento_store]) -> str | None:
"""Dive into a BentoLLM. This is synonymous to cd $(b get <bento>:<tag> -o path)."""
try:
bentomodel = _bento_store.get(bento)

View File

@@ -17,9 +17,7 @@ if t.TYPE_CHECKING:
from bentoml._internal.bento import BentoStore
@click.command(
'get_containerfile', context_settings=termui.CONTEXT_SETTINGS, help='Return Containerfile of any given Bento.'
)
@click.command('get_containerfile', context_settings=termui.CONTEXT_SETTINGS, help='Return Containerfile of any given Bento.')
@click.argument('bento', type=str, shell_complete=bento_complete_envvar)
@click.pass_context
@inject

View File

@@ -22,9 +22,7 @@ class PromptFormatter(string.Formatter):
raise ValueError('Positional arguments are not supported')
return super().vformat(format_string, args, kwargs)
def check_unused_args(
self, used_args: set[int | str], args: t.Sequence[t.Any], kwargs: t.Mapping[str, t.Any]
) -> None:
def check_unused_args(self, used_args: set[int | str], args: t.Sequence[t.Any], kwargs: t.Mapping[str, t.Any]) -> None:
extras = set(kwargs).difference(used_args)
if extras:
raise KeyError(f'Extra params passed: {extras}')
@@ -58,9 +56,7 @@ class PromptTemplate:
try:
return self.template.format(**prompt_variables)
except KeyError as e:
raise RuntimeError(
f"Missing variable '{e.args[0]}' (required: {self._input_variables}) in the prompt template."
) from None
raise RuntimeError(f"Missing variable '{e.args[0]}' (required: {self._input_variables}) in the prompt template.") from None
@click.command('get_prompt', context_settings=termui.CONTEXT_SETTINGS)
@@ -128,21 +124,15 @@ def cli(
if prompt_template_file and chat_template_file:
ctx.fail('prompt-template-file and chat-template-file are mutually exclusive.')
acceptable = set(openllm.CONFIG_MAPPING_NAMES.keys()) | set(
inflection.dasherize(name) for name in openllm.CONFIG_MAPPING_NAMES.keys()
)
acceptable = set(openllm.CONFIG_MAPPING_NAMES.keys()) | set(inflection.dasherize(name) for name in openllm.CONFIG_MAPPING_NAMES.keys())
if model_id in acceptable:
logger.warning(
'Using a default prompt from OpenLLM. Note that this prompt might not work for your intended usage.\n'
)
logger.warning('Using a default prompt from OpenLLM. Note that this prompt might not work for your intended usage.\n')
config = openllm.AutoConfig.for_model(model_id)
template = prompt_template_file.read() if prompt_template_file is not None else config.template
system_message = system_message or config.system_message
try:
formatted = (
PromptTemplate(template).with_options(system_message=system_message).format(instruction=prompt, **_memoized)
)
formatted = PromptTemplate(template).with_options(system_message=system_message).format(instruction=prompt, **_memoized)
except RuntimeError as err:
logger.debug('Exception caught while formatting prompt: %s', err)
ctx.fail(str(err))
@@ -159,21 +149,15 @@ def cli(
for architecture in config.architectures:
if architecture in openllm.AutoConfig._CONFIG_MAPPING_NAMES_TO_ARCHITECTURE():
system_message = (
openllm.AutoConfig.infer_class_from_name(
openllm.AutoConfig._CONFIG_MAPPING_NAMES_TO_ARCHITECTURE()[architecture]
)
openllm.AutoConfig.infer_class_from_name(openllm.AutoConfig._CONFIG_MAPPING_NAMES_TO_ARCHITECTURE()[architecture])
.model_construct_env()
.system_message
)
break
else:
ctx.fail(
f'Failed to infer system message from model architecture: {config.architectures}. Please pass in --system-message'
)
ctx.fail(f'Failed to infer system message from model architecture: {config.architectures}. Please pass in --system-message')
messages = [{'role': 'system', 'content': system_message}, {'role': 'user', 'content': prompt}]
formatted = tokenizer.apply_chat_template(
messages, chat_template=chat_template_file, add_generation_prompt=add_generation_prompt, tokenize=False
)
formatted = tokenizer.apply_chat_template(messages, chat_template=chat_template_file, add_generation_prompt=add_generation_prompt, tokenize=False)
termui.echo(orjson.dumps({'prompt': formatted}, option=orjson.OPT_INDENT_2).decode(), fg='white')
ctx.exit(0)

View File

@@ -33,17 +33,12 @@ def cli(model_name: str | None) -> DictStrAny:
}
if model_name is not None:
ids_in_local_store = {
k: [
i
for i in v
if 'model_name' in i.info.labels and i.info.labels['model_name'] == inflection.dasherize(model_name)
]
k: [i for i in v if 'model_name' in i.info.labels and i.info.labels['model_name'] == inflection.dasherize(model_name)]
for k, v in ids_in_local_store.items()
}
ids_in_local_store = {k: v for k, v in ids_in_local_store.items() if v}
local_models = {
k: [{'tag': str(i.tag), 'size': human_readable_size(openllm.utils.calc_dir_size(i.path))} for i in val]
for k, val in ids_in_local_store.items()
k: [{'tag': str(i.tag), 'size': human_readable_size(openllm.utils.calc_dir_size(i.path))} for i in val] for k, val in ids_in_local_store.items()
}
termui.echo(orjson.dumps(local_models, option=orjson.OPT_INDENT_2).decode(), fg='white')
return local_models

View File

@@ -32,14 +32,7 @@ def load_notebook_metadata() -> DictStrAny:
@click.command('playground', context_settings=termui.CONTEXT_SETTINGS)
@click.argument('output-dir', default=None, required=False)
@click.option(
'--port',
envvar='JUPYTER_PORT',
show_envvar=True,
show_default=True,
default=8888,
help='Default port for Jupyter server',
)
@click.option('--port', envvar='JUPYTER_PORT', show_envvar=True, show_default=True, default=8888, help='Default port for Jupyter server')
@click.pass_context
def cli(ctx: click.Context, output_dir: str | None, port: int) -> None:
"""OpenLLM Playground.
@@ -60,9 +53,7 @@ def cli(ctx: click.Context, output_dir: str | None, port: int) -> None:
> This command requires Jupyter to be installed. Install it with 'pip install "openllm[playground]"'
"""
if not is_jupyter_available() or not is_jupytext_available() or not is_notebook_available():
raise RuntimeError(
"Playground requires 'jupyter', 'jupytext', and 'notebook'. Install it with 'pip install \"openllm[playground]\"'"
)
raise RuntimeError("Playground requires 'jupyter', 'jupytext', and 'notebook'. Install it with 'pip install \"openllm[playground]\"'")
metadata = load_notebook_metadata()
_temp_dir = False
if output_dir is None:
@@ -74,9 +65,7 @@ def cli(ctx: click.Context, output_dir: str | None, port: int) -> None:
termui.echo('The playground notebooks will be saved to: ' + os.path.abspath(output_dir), fg='blue')
for module in pkgutil.iter_modules(playground.__path__):
if module.ispkg or os.path.exists(os.path.join(output_dir, module.name + '.ipynb')):
logger.debug(
'Skipping: %s (%s)', module.name, 'File already exists' if not module.ispkg else f'{module.name} is a module'
)
logger.debug('Skipping: %s (%s)', module.name, 'File already exists' if not module.ispkg else f'{module.name} is a module')
continue
if not isinstance(module.module_finder, importlib.machinery.FileFinder):
continue

View File

@@ -25,14 +25,7 @@ class Level(enum.IntEnum):
@property
def color(self) -> str | None:
return {
Level.NOTSET: None,
Level.DEBUG: 'cyan',
Level.INFO: 'green',
Level.WARNING: 'yellow',
Level.ERROR: 'red',
Level.CRITICAL: 'red',
}[self]
return {Level.NOTSET: None, Level.DEBUG: 'cyan', Level.INFO: 'green', Level.WARNING: 'yellow', Level.ERROR: 'red', Level.CRITICAL: 'red'}[self]
@classmethod
def from_logging_level(cls, level: int) -> Level:
@@ -82,9 +75,5 @@ def echo(text: t.Any, fg: str | None = None, *, _with_style: bool = True, json:
COLUMNS: int = int(os.environ.get('COLUMNS', str(120)))
CONTEXT_SETTINGS: DictStrAny = {
'help_option_names': ['-h', '--help'],
'max_content_width': COLUMNS,
'token_normalize_func': inflection.underscore,
}
__all__ = ['COLUMNS', 'CONTEXT_SETTINGS', 'Level', 'critical', 'debug', 'echo', 'error', 'info', 'log', 'warning']
CONTEXT_SETTINGS: DictStrAny = {'help_option_names': ['-h', '--help'], 'max_content_width': COLUMNS, 'token_normalize_func': inflection.underscore}
__all__ = ['echo', 'COLUMNS', 'CONTEXT_SETTINGS', 'log', 'warning', 'error', 'critical', 'debug', 'info', 'Level']