ci: pre-commit autoupdate [pre-commit.ci] (#931)

* ci: pre-commit autoupdate [pre-commit.ci]

updates:
- [github.com/astral-sh/ruff-pre-commit: v0.2.2 → v0.3.2](https://github.com/astral-sh/ruff-pre-commit/compare/v0.2.2...v0.3.2)
- [github.com/pre-commit/mirrors-eslint: v9.0.0-beta.0 → v9.0.0-beta.2](https://github.com/pre-commit/mirrors-eslint/compare/v9.0.0-beta.0...v9.0.0-beta.2)

* ci: auto fixes from pre-commit.ci

For more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
pre-commit-ci[bot]
2024-03-15 03:46:28 -04:00
committed by GitHub
parent 7edbcf8a2d
commit 7b00c84c2a
69 changed files with 1298 additions and 366 deletions

View File

@@ -5,12 +5,25 @@ from bentoml_cli.utils import BentoMLCommandGroup
from click import shell_completion as sc
from openllm_core._configuration import LLMConfig
from openllm_core._typing_compat import Concatenate, DictStrAny, LiteralBackend, LiteralSerialisation, ParamSpec, AnyCallable, get_literal_args
from openllm_core._typing_compat import (
Concatenate,
DictStrAny,
LiteralBackend,
LiteralSerialisation,
ParamSpec,
AnyCallable,
get_literal_args,
)
from openllm_core.utils import DEBUG, compose, dantic, resolve_user_filepath
class _OpenLLM_GenericInternalConfig(LLMConfig):
__config__ = {'name_type': 'lowercase', 'default_id': 'openllm/generic', 'model_ids': ['openllm/generic'], 'architecture': 'PreTrainedModel'}
__config__ = {
'name_type': 'lowercase',
'default_id': 'openllm/generic',
'model_ids': ['openllm/generic'],
'architecture': 'PreTrainedModel',
}
class GenerationConfig:
top_k: int = 15
@@ -37,11 +50,20 @@ def bento_complete_envvar(ctx: click.Context, param: click.Parameter, incomplete
def model_complete_envvar(ctx: click.Context, param: click.Parameter, incomplete: str) -> list[sc.CompletionItem]:
return [sc.CompletionItem(inflection.dasherize(it), help='Model') for it in openllm.CONFIG_MAPPING if it.startswith(incomplete)]
return [
sc.CompletionItem(inflection.dasherize(it), help='Model')
for it in openllm.CONFIG_MAPPING
if it.startswith(incomplete)
]
def parse_config_options(
config: LLMConfig, server_timeout: int, workers_per_resource: float, device: t.Tuple[str, ...] | None, cors: bool, environ: DictStrAny
config: LLMConfig,
server_timeout: int,
workers_per_resource: float,
device: t.Tuple[str, ...] | None,
cors: bool,
environ: DictStrAny,
) -> DictStrAny:
# TODO: Support amd.com/gpu on k8s
_bentoml_config_options_env = environ.pop('BENTOML_CONFIG_OPTIONS', '')
@@ -56,14 +78,21 @@ def parse_config_options(
if device:
if len(device) > 1:
_bentoml_config_options_opts.extend([
f'runners."llm-{config["start_name"]}-runner".resources."nvidia.com/gpu"[{idx}]={dev}' for idx, dev in enumerate(device)
f'runners."llm-{config["start_name"]}-runner".resources."nvidia.com/gpu"[{idx}]={dev}'
for idx, dev in enumerate(device)
])
else:
_bentoml_config_options_opts.append(f'runners."llm-{config["start_name"]}-runner".resources."nvidia.com/gpu"=[{device[0]}]')
_bentoml_config_options_opts.append(
f'runners."llm-{config["start_name"]}-runner".resources."nvidia.com/gpu"=[{device[0]}]'
)
if cors:
_bentoml_config_options_opts.extend(['api_server.http.cors.enabled=true', 'api_server.http.cors.access_control_allow_origins="*"'])
_bentoml_config_options_opts.extend([
f'api_server.http.cors.access_control_allow_methods[{idx}]="{it}"' for idx, it in enumerate(['GET', 'OPTIONS', 'POST', 'HEAD', 'PUT'])
'api_server.http.cors.enabled=true',
'api_server.http.cors.access_control_allow_origins="*"',
])
_bentoml_config_options_opts.extend([
f'api_server.http.cors.access_control_allow_methods[{idx}]="{it}"'
for idx, it in enumerate(['GET', 'OPTIONS', 'POST', 'HEAD', 'PUT'])
])
_bentoml_config_options_env += ' ' if _bentoml_config_options_env else '' + ' '.join(_bentoml_config_options_opts)
environ['BENTOML_CONFIG_OPTIONS'] = _bentoml_config_options_env
@@ -142,7 +171,9 @@ def start_decorator(fn: FC) -> FC:
return composed(fn)
def parse_device_callback(_: click.Context, param: click.Parameter, value: tuple[tuple[str], ...] | None) -> t.Tuple[str, ...] | None:
def parse_device_callback(
_: click.Context, param: click.Parameter, value: tuple[tuple[str], ...] | None
) -> t.Tuple[str, ...] | None:
if value is None:
return value
el: t.Tuple[str, ...] = tuple(i for k in value for i in k)
@@ -161,13 +192,19 @@ _IGNORED_OPTIONS = {'working_dir', 'production', 'protocol_version'}
def parse_serve_args() -> t.Callable[[t.Callable[..., LLMConfig]], t.Callable[[FC], FC]]:
from bentoml_cli.cli import cli
group = cog.optgroup.group('Start a HTTP server options', help='Related to serving the model [synonymous to `bentoml serve-http`]')
group = cog.optgroup.group(
'Start a HTTP server options', help='Related to serving the model [synonymous to `bentoml serve-http`]'
)
def decorator(f: t.Callable[Concatenate[int, t.Optional[str], P], LLMConfig]) -> t.Callable[[FC], FC]:
serve_command = cli.commands['serve']
# The first variable is the argument bento
# The last five is from BentoMLCommandGroup.NUMBER_OF_COMMON_PARAMS
serve_options = [p for p in serve_command.params[1 : -BentoMLCommandGroup.NUMBER_OF_COMMON_PARAMS] if p.name not in _IGNORED_OPTIONS]
serve_options = [
p
for p in serve_command.params[1 : -BentoMLCommandGroup.NUMBER_OF_COMMON_PARAMS]
if p.name not in _IGNORED_OPTIONS
]
for options in reversed(serve_options):
attrs = options.to_info_dict()
# we don't need param_type_name, since it should all be options
@@ -221,7 +258,13 @@ def adapter_id_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callab
def cors_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[[FC], FC]:
return cli_option(
'--cors/--no-cors', show_default=True, default=False, envvar='OPENLLM_CORS', show_envvar=True, help='Enable CORS for the server.', **attrs
'--cors/--no-cors',
show_default=True,
default=False,
envvar='OPENLLM_CORS',
show_envvar=True,
help='Enable CORS for the server.',
**attrs,
)(f)
@@ -275,7 +318,12 @@ def backend_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[
def model_name_argument(f: _AnyCallable | None = None, required: bool = True, **attrs: t.Any) -> t.Callable[[FC], FC]:
return cli_argument('model_name', type=click.Choice([inflection.dasherize(name) for name in openllm.CONFIG_MAPPING]), required=required, **attrs)(f)
return cli_argument(
'model_name',
type=click.Choice([inflection.dasherize(name) for name in openllm.CONFIG_MAPPING]),
required=required,
**attrs,
)(f)
def quantize_option(f: _AnyCallable | None = None, *, build: bool = False, **attrs: t.Any) -> t.Callable[[FC], FC]:
@@ -313,7 +361,9 @@ def quantize_option(f: _AnyCallable | None = None, *, build: bool = False, **att
)(f)
def workers_per_resource_option(f: _AnyCallable | None = None, *, build: bool = False, **attrs: t.Any) -> t.Callable[[FC], FC]:
def workers_per_resource_option(
f: _AnyCallable | None = None, *, build: bool = False, **attrs: t.Any
) -> t.Callable[[FC], FC]:
return cli_option(
'--workers-per-resource',
default=None,
@@ -381,7 +431,9 @@ def workers_per_resource_callback(ctx: click.Context, param: click.Parameter, va
float(value) # type: ignore[arg-type]
except ValueError:
raise click.BadParameter(
f"'workers_per_resource' only accept '{_wpr_strategies}' as possible strategies, otherwise pass in float.", ctx, param
f"'workers_per_resource' only accept '{_wpr_strategies}' as possible strategies, otherwise pass in float.",
ctx,
param,
) from None
else:
return value

View File

@@ -69,7 +69,10 @@ def _start(
if timeout:
args.extend(['--server-timeout', str(timeout)])
if workers_per_resource:
args.extend(['--workers-per-resource', str(workers_per_resource) if not isinstance(workers_per_resource, str) else workers_per_resource])
args.extend([
'--workers-per-resource',
str(workers_per_resource) if not isinstance(workers_per_resource, str) else workers_per_resource,
])
if device and not os.environ.get('CUDA_VISIBLE_DEVICES'):
args.extend(['--device', ','.join(device)])
if quantize:
@@ -77,7 +80,11 @@ def _start(
if cors:
args.append('--cors')
if adapter_map:
args.extend(list(itertools.chain.from_iterable([['--adapter-id', f"{k}{':'+v if v else ''}"] for k, v in adapter_map.items()])))
args.extend(
list(
itertools.chain.from_iterable([['--adapter-id', f"{k}{':' + v if v else ''}"] for k, v in adapter_map.items()])
)
)
if additional_args:
args.extend(additional_args)
if __test__:
@@ -148,7 +155,9 @@ def _build(
'--machine',
'--quiet',
'--serialisation',
first_not_none(serialisation, default='safetensors' if has_safetensors_weights(model_id, model_version) else 'legacy'),
first_not_none(
serialisation, default='safetensors' if has_safetensors_weights(model_id, model_version) else 'legacy'
),
]
if quantize:
args.extend(['--quantize', quantize])
@@ -165,7 +174,7 @@ def _build(
if overwrite:
args.append('--overwrite')
if adapter_map:
args.extend([f"--adapter-id={k}{':'+v if v is not None else ''}" for k, v in adapter_map.items()])
args.extend([f"--adapter-id={k}{':' + v if v is not None else ''}" for k, v in adapter_map.items()])
if model_version:
args.extend(['--model-version', model_version])
if bento_version:
@@ -265,4 +274,4 @@ start, build, import_model, list_models = (
codegen.gen_sdk(_import_model),
codegen.gen_sdk(_list_models),
)
__all__ = ['start', 'build', 'import_model', 'list_models']
__all__ = ['build', 'import_model', 'list_models', 'start']

View File

@@ -43,7 +43,15 @@ from openllm_core.utils import (
)
from . import termui
from ._factory import FC, _AnyCallable, machine_option, model_name_argument, parse_config_options, start_decorator, optimization_decorator
from ._factory import (
FC,
_AnyCallable,
machine_option,
model_name_argument,
parse_config_options,
start_decorator,
optimization_decorator,
)
if t.TYPE_CHECKING:
import torch
@@ -95,12 +103,18 @@ def backend_warning(backend: LiteralBackend, build: bool = False) -> None:
'vLLM is not available. Note that PyTorch backend is not as performant as vLLM and you should always consider using vLLM for production.'
)
if build:
logger.info("Tip: You can set '--backend vllm' to package your Bento with vLLM backend regardless if vLLM is available locally.")
logger.info(
"Tip: You can set '--backend vllm' to package your Bento with vLLM backend regardless if vLLM is available locally."
)
class Extensions(click.MultiCommand):
def list_commands(self, ctx: click.Context) -> list[str]:
return sorted([filename[:-3] for filename in os.listdir(_EXT_FOLDER) if filename.endswith('.py') and not filename.startswith('__')])
return sorted([
filename[:-3]
for filename in os.listdir(_EXT_FOLDER)
if filename.endswith('.py') and not filename.startswith('__')
])
def get_command(self, ctx: click.Context, cmd_name: str) -> click.Command | None:
try:
@@ -117,19 +131,41 @@ class OpenLLMCommandGroup(BentoMLCommandGroup):
def common_params(f: t.Callable[P, t.Any]) -> t.Callable[[FC], FC]:
# The following logics is similar to one of BentoMLCommandGroup
@cog.optgroup.group(name='Global options', help='Shared globals options for all OpenLLM CLI.') # type: ignore[misc]
@cog.optgroup.option('-q', '--quiet', envvar=QUIET_ENV_VAR, is_flag=True, default=False, help='Suppress all output.', show_envvar=True)
@cog.optgroup.option(
'--debug', '--verbose', 'debug', envvar=DEBUG_ENV_VAR, is_flag=True, default=False, help='Print out debug logs.', show_envvar=True
'-q', '--quiet', envvar=QUIET_ENV_VAR, is_flag=True, default=False, help='Suppress all output.', show_envvar=True
)
@cog.optgroup.option(
'--do-not-track', is_flag=True, default=False, envvar=analytics.OPENLLM_DO_NOT_TRACK, help='Do not send usage info', show_envvar=True
'--debug',
'--verbose',
'debug',
envvar=DEBUG_ENV_VAR,
is_flag=True,
default=False,
help='Print out debug logs.',
show_envvar=True,
)
@cog.optgroup.option(
'--context', 'cloud_context', envvar='BENTOCLOUD_CONTEXT', type=click.STRING, default=None, help='BentoCloud context name.', show_envvar=True
'--do-not-track',
is_flag=True,
default=False,
envvar=analytics.OPENLLM_DO_NOT_TRACK,
help='Do not send usage info',
show_envvar=True,
)
@cog.optgroup.option(
'--context',
'cloud_context',
envvar='BENTOCLOUD_CONTEXT',
type=click.STRING,
default=None,
help='BentoCloud context name.',
show_envvar=True,
)
@click.pass_context
@functools.wraps(f)
def wrapper(ctx: click.Context, quiet: bool, debug: bool, cloud_context: str | None, *args: P.args, **attrs: P.kwargs) -> t.Any:
def wrapper(
ctx: click.Context, quiet: bool, debug: bool, cloud_context: str | None, *args: P.args, **attrs: P.kwargs
) -> t.Any:
ctx.obj = GlobalOptions(cloud_context=cloud_context)
if quiet:
set_quiet_mode(True)
@@ -143,7 +179,9 @@ class OpenLLMCommandGroup(BentoMLCommandGroup):
return wrapper
@staticmethod
def usage_tracking(func: t.Callable[P, t.Any], group: click.Group, **attrs: t.Any) -> t.Callable[Concatenate[bool, P], t.Any]:
def usage_tracking(
func: t.Callable[P, t.Any], group: click.Group, **attrs: t.Any
) -> t.Callable[Concatenate[bool, P], t.Any]:
command_name = attrs.get('name', func.__name__)
@functools.wraps(func)
@@ -202,7 +240,9 @@ class OpenLLMCommandGroup(BentoMLCommandGroup):
_memo = getattr(wrapped, '__click_params__', None)
if _memo is None:
raise ValueError('Click command not register correctly.')
_object_setattr(wrapped, '__click_params__', _memo[-self.NUMBER_OF_COMMON_PARAMS :] + _memo[: -self.NUMBER_OF_COMMON_PARAMS])
_object_setattr(
wrapped, '__click_params__', _memo[-self.NUMBER_OF_COMMON_PARAMS :] + _memo[: -self.NUMBER_OF_COMMON_PARAMS]
)
# NOTE: we need to call super of super to avoid conflict with BentoMLCommandGroup command setup
cmd = super(BentoMLCommandGroup, self).command(*args, **kwargs)(wrapped)
# NOTE: add aliases to a given commands if it is specified.
@@ -210,7 +250,7 @@ class OpenLLMCommandGroup(BentoMLCommandGroup):
if not cmd.name:
raise ValueError('name is required when aliases are available.')
self._commands[cmd.name] = aliases
self._aliases.update({alias: cmd.name for alias in aliases})
self._aliases.update(dict.fromkeys(aliases, cmd.name))
return cmd
return decorator
@@ -277,7 +317,12 @@ def cli() -> None:
"""
@cli.command(context_settings=termui.CONTEXT_SETTINGS, name='start', aliases=['start-http'], short_help='Start a LLMServer for any supported LLM.')
@cli.command(
context_settings=termui.CONTEXT_SETTINGS,
name='start',
aliases=['start-http'],
short_help='Start a LLMServer for any supported LLM.',
)
@click.argument('model_id', type=click.STRING, metavar='[REMOTE_REPO/MODEL_ID | /path/to/local/model]', required=True)
@click.option(
'--model-id',
@@ -330,7 +375,9 @@ def start_command(
```
"""
if backend == 'pt':
logger.warning('PyTorch backend is deprecated and will be removed in future releases. Make sure to use vLLM instead.')
logger.warning(
'PyTorch backend is deprecated and will be removed in future releases. Make sure to use vLLM instead.'
)
if model_id in openllm.CONFIG_MAPPING:
_model_name = model_id
if deprecated_model_id is not None:
@@ -348,11 +395,17 @@ def start_command(
from openllm.serialisation.transformers.weights import has_safetensors_weights
serialisation = first_not_none(serialisation, default='safetensors' if has_safetensors_weights(model_id, model_version) else 'legacy')
serialisation = first_not_none(
serialisation, default='safetensors' if has_safetensors_weights(model_id, model_version) else 'legacy'
)
if serialisation == 'safetensors' and quantize is not None:
logger.warning("'--quantize=%s' might not work with 'safetensors' serialisation format.", quantize)
logger.warning("Make sure to check out '%s' repository to see if the weights is in '%s' format if unsure.", model_id, serialisation)
logger.warning(
"Make sure to check out '%s' repository to see if the weights is in '%s' format if unsure.",
model_id,
serialisation,
)
logger.info("Tip: You can always fallback to '--serialisation legacy' when running quantisation.")
import torch
@@ -380,7 +433,9 @@ def start_command(
config, server_attrs = llm.config.model_validate_click(**attrs)
server_timeout = first_not_none(server_timeout, default=config['timeout'])
server_attrs.update({'working_dir': pkg.source_locations('openllm'), 'timeout': server_timeout})
development = server_attrs.pop('development') # XXX: currently, theres no development args in bentoml.Server. To be fixed upstream.
development = server_attrs.pop(
'development'
) # XXX: currently, theres no development args in bentoml.Server. To be fixed upstream.
server_attrs.setdefault('production', not development)
start_env = process_environ(
@@ -410,8 +465,12 @@ def start_command(
return config
def process_environ(config, server_timeout, wpr, device, cors, model_id, adapter_map, serialisation, llm, use_current_env=True):
environ = parse_config_options(config, server_timeout, wpr, device, cors, os.environ.copy() if use_current_env else {})
def process_environ(
config, server_timeout, wpr, device, cors, model_id, adapter_map, serialisation, llm, use_current_env=True
):
environ = parse_config_options(
config, server_timeout, wpr, device, cors, os.environ.copy() if use_current_env else {}
)
environ.update({
'OPENLLM_MODEL_ID': model_id,
'BENTOML_DEBUG': str(openllm.utils.get_debug_mode()),
@@ -456,7 +515,8 @@ def build_bento_instruction(llm, model_id, serialisation, adapter_map):
cmd_name += f' --serialization {serialisation}'
if adapter_map is not None:
cmd_name += ' ' + ' '.join([
f'--adapter-id {s}' for s in [f'{p}:{name}' if name not in (None, 'default') else p for p, name in adapter_map.items()]
f'--adapter-id {s}'
for s in [f'{p}:{name}' if name not in (None, 'default') else p for p, name in adapter_map.items()]
])
if not openllm.utils.get_quiet_mode():
termui.info(f"🚀Tip: run '{cmd_name}' to create a BentoLLM for '{model_id}'")
@@ -491,8 +551,12 @@ def run_server(args, env, return_process=False) -> subprocess.Popen[bytes] | int
if return_process:
return process
stop_event = threading.Event()
stdout, stderr = threading.Thread(target=handle, args=(process.stdout, stop_event)), threading.Thread(target=handle, args=(process.stderr, stop_event))
stdout.start(); stderr.start() # noqa: E702
stdout, stderr = (
threading.Thread(target=handle, args=(process.stdout, stop_event)),
threading.Thread(target=handle, args=(process.stderr, stop_event)),
)
stdout.start()
stderr.start() # noqa: E702
try:
process.wait()
@@ -507,9 +571,12 @@ def run_server(args, env, return_process=False) -> subprocess.Popen[bytes] | int
raise
finally:
stop_event.set()
stdout.join(); stderr.join() # noqa: E702
if process.poll() is not None: process.kill()
stdout.join(); stderr.join() # noqa: E702
stdout.join()
stderr.join() # noqa: E702
if process.poll() is not None:
process.kill()
stdout.join()
stderr.join() # noqa: E702
return process.returncode
@@ -597,7 +664,10 @@ def import_command(
backend=backend,
dtype=dtype,
serialisation=t.cast(
LiteralSerialisation, first_not_none(serialisation, default='safetensors' if has_safetensors_weights(model_id, model_version) else 'legacy')
LiteralSerialisation,
first_not_none(
serialisation, default='safetensors' if has_safetensors_weights(model_id, model_version) else 'legacy'
),
),
)
backend_warning(llm.__llm_backend__)
@@ -656,14 +726,21 @@ class BuildBentoOutput(t.TypedDict):
metavar='[REMOTE_REPO/MODEL_ID | /path/to/local/model]',
help='Deprecated. Use positional argument instead.',
)
@click.option('--bento-version', type=str, default=None, help='Optional bento version for this BentoLLM. Default is the the model revision.')
@click.option(
'--bento-version',
type=str,
default=None,
help='Optional bento version for this BentoLLM. Default is the the model revision.',
)
@click.option('--overwrite', is_flag=True, help='Overwrite existing Bento for given LLM if it already exists.')
@click.option(
'--enable-features',
multiple=True,
nargs=1,
metavar='FEATURE[,FEATURE]',
help='Enable additional features for building this LLM Bento. Available: {}'.format(', '.join(OPTIONAL_DEPENDENCIES)),
help='Enable additional features for building this LLM Bento. Available: {}'.format(
', '.join(OPTIONAL_DEPENDENCIES)
),
)
@optimization_decorator
@click.option(
@@ -674,7 +751,12 @@ class BuildBentoOutput(t.TypedDict):
help="Optional adapters id to be included within the Bento. Note that if you are using relative path, '--build-ctx' must be passed.",
)
@click.option('--build-ctx', help='Build context. This is required if --adapter-id uses relative path', default=None)
@click.option('--dockerfile-template', default=None, type=click.File(), help='Optional custom dockerfile template to be used with this BentoLLM.')
@click.option(
'--dockerfile-template',
default=None,
type=click.File(),
help='Optional custom dockerfile template to be used with this BentoLLM.',
)
@cog.optgroup.group(cls=cog.MutuallyExclusiveOptionGroup, name='Utilities options') # type: ignore[misc]
@cog.optgroup.option(
'--containerize',
@@ -767,7 +849,9 @@ def build_command(
state = ItemState.NOT_FOUND
if backend == 'pt':
logger.warning("PyTorch backend is deprecated and will be removed from the next releases. Will set default backend to 'vllm' instead.")
logger.warning(
"PyTorch backend is deprecated and will be removed from the next releases. Will set default backend to 'vllm' instead."
)
llm = openllm.LLM(
model_id=model_id,
@@ -777,7 +861,9 @@ def build_command(
dtype=dtype,
max_model_len=max_model_len,
gpu_memory_utilization=gpu_memory_utilization,
serialisation=first_not_none(serialisation, default='safetensors' if has_safetensors_weights(model_id, model_version) else 'legacy'),
serialisation=first_not_none(
serialisation, default='safetensors' if has_safetensors_weights(model_id, model_version) else 'legacy'
),
_eager=False,
)
if llm.__llm_backend__ not in llm.config['backend']:
@@ -789,7 +875,9 @@ def build_command(
model = openllm.serialisation.import_model(llm, trust_remote_code=llm.trust_remote_code)
llm._tag = model.tag
os.environ.update(**process_environ(llm.config, llm.config['timeout'], 1.0, None, True, llm.model_id, None, llm._serialisation, llm))
os.environ.update(
**process_environ(llm.config, llm.config['timeout'], 1.0, None, True, llm.model_id, None, llm._serialisation, llm)
)
try:
assert llm.bentomodel # HACK: call it here to patch correct tag with revision and everything
@@ -856,7 +944,11 @@ def build_command(
def get_current_bentocloud_context() -> str | None:
try:
context = cloud_config.get_context(ctx.obj.cloud_context) if ctx.obj.cloud_context else cloud_config.get_current_context()
context = (
cloud_config.get_context(ctx.obj.cloud_context)
if ctx.obj.cloud_context
else cloud_config.get_current_context()
)
return context.name
except Exception:
return None
@@ -880,7 +972,9 @@ def build_command(
tag=str(bento_tag),
backend=llm.__llm_backend__,
instructions=[
DeploymentInstruction.from_content(type='bentocloud', instr="☁️ Push to BentoCloud with 'bentoml push':\n $ {cmd}", cmd=push_cmd),
DeploymentInstruction.from_content(
type='bentocloud', instr="☁️ Push to BentoCloud with 'bentoml push':\n $ {cmd}", cmd=push_cmd
),
DeploymentInstruction.from_content(
type='container',
instr="🐳 Container BentoLLM with 'bentoml containerize':\n $ {cmd}",
@@ -906,7 +1000,9 @@ def build_command(
termui.echo(f" * {instruction['content']}\n", nl=False)
if push:
BentoMLContainer.bentocloud_client.get().push_bento(bento, context=t.cast(GlobalOptions, ctx.obj).cloud_context, force=force_push)
BentoMLContainer.bentocloud_client.get().push_bento(
bento, context=t.cast(GlobalOptions, ctx.obj).cloud_context, force=force_push
)
elif containerize:
container_backend = t.cast('DefaultBuilder', os.environ.get('BENTOML_CONTAINERIZE_BACKEND', 'docker'))
try:
@@ -946,7 +1042,8 @@ def models_command(**_: t.Any) -> dict[t.LiteralString, ModelItem]:
architecture=config.__openllm_architecture__,
example_id=random.choice(config.__openllm_model_ids__),
supported_backends=config.__openllm_backend__,
installation='pip install ' + (f'"openllm[{m}]"' if m in OPTIONAL_DEPENDENCIES or config.__openllm_requirements__ else 'openllm'),
installation='pip install '
+ (f'"openllm[{m}]"' if m in OPTIONAL_DEPENDENCIES or config.__openllm_requirements__ else 'openllm'),
items=[
str(md.tag)
for md in bentoml.models.list()
@@ -965,7 +1062,13 @@ def models_command(**_: t.Any) -> dict[t.LiteralString, ModelItem]:
@cli.command()
@model_name_argument(required=False)
@click.option('-y', '--yes', '--assume-yes', is_flag=True, help='Skip confirmation when deleting a specific model')
@click.option('--include-bentos/--no-include-bentos', is_flag=True, hidden=True, default=True, help='Whether to also include pruning bentos.')
@click.option(
'--include-bentos/--no-include-bentos',
is_flag=True,
hidden=True,
default=True,
help='Whether to also include pruning bentos.',
)
@inject
@click.pass_context
def prune_command(
@@ -982,24 +1085,32 @@ def prune_command(
If a model type is passed, then only prune models for that given model type.
"""
available: list[tuple[bentoml.Model | bentoml.Bento, ModelStore | BentoStore]] = [
(m, model_store) for m in bentoml.models.list() if 'framework' in m.info.labels and m.info.labels['framework'] == 'openllm'
(m, model_store)
for m in bentoml.models.list()
if 'framework' in m.info.labels and m.info.labels['framework'] == 'openllm'
]
if model_name is not None:
available = [
(m, store) for m, store in available if 'model_name' in m.info.labels and m.info.labels['model_name'] == inflection.underscore(model_name)
(m, store)
for m, store in available
if 'model_name' in m.info.labels and m.info.labels['model_name'] == inflection.underscore(model_name)
] + [
(b, bento_store)
for b in bentoml.bentos.list()
if 'start_name' in b.info.labels and b.info.labels['start_name'] == inflection.underscore(model_name)
]
else:
available += [(b, bento_store) for b in bentoml.bentos.list() if '_type' in b.info.labels and '_framework' in b.info.labels]
available += [
(b, bento_store) for b in bentoml.bentos.list() if '_type' in b.info.labels and '_framework' in b.info.labels
]
for store_item, store in available:
if yes:
delete_confirmed = True
else:
delete_confirmed = click.confirm(f"delete {'model' if isinstance(store, ModelStore) else 'bento'} {store_item.tag}?")
delete_confirmed = click.confirm(
f"delete {'model' if isinstance(store, ModelStore) else 'bento'} {store_item.tag}?"
)
if delete_confirmed:
store.delete(store_item.tag)
termui.warning(f"{store_item} deleted from {'model' if isinstance(store, ModelStore) else 'bento'} store.")
@@ -1046,8 +1157,17 @@ def shared_client_options(f: _AnyCallable | None = None) -> t.Callable[[FC], FC]
@cli.command()
@shared_client_options
@click.option('--server-type', type=click.Choice(['grpc', 'http']), help='Server type', default='http', show_default=True, hidden=True)
@click.option('--stream/--no-stream', type=click.BOOL, is_flag=True, default=True, help='Whether to stream the response.')
@click.option(
'--server-type',
type=click.Choice(['grpc', 'http']),
help='Server type',
default='http',
show_default=True,
hidden=True,
)
@click.option(
'--stream/--no-stream', type=click.BOOL, is_flag=True, default=True, help='Whether to stream the response.'
)
@click.argument('prompt', type=click.STRING)
@click.option(
'--sampling-params',

View File

@@ -21,7 +21,9 @@ if t.TYPE_CHECKING:
@machine_option
@click.pass_context
@inject
def cli(ctx: click.Context, bento: str, machine: bool, _bento_store: BentoStore = Provide[BentoMLContainer.bento_store]) -> str | None:
def cli(
ctx: click.Context, bento: str, machine: bool, _bento_store: BentoStore = Provide[BentoMLContainer.bento_store]
) -> str | None:
"""Dive into a BentoLLM. This is synonymous to cd $(b get <bento>:<tag> -o path)."""
try:
bentomodel = _bento_store.get(bento)

View File

@@ -17,7 +17,9 @@ if t.TYPE_CHECKING:
from bentoml._internal.bento import BentoStore
@click.command('get_containerfile', context_settings=termui.CONTEXT_SETTINGS, help='Return Containerfile of any given Bento.')
@click.command(
'get_containerfile', context_settings=termui.CONTEXT_SETTINGS, help='Return Containerfile of any given Bento.'
)
@click.argument('bento', type=str, shell_complete=bento_complete_envvar)
@click.pass_context
@inject

View File

@@ -22,7 +22,9 @@ class PromptFormatter(string.Formatter):
raise ValueError('Positional arguments are not supported')
return super().vformat(format_string, args, kwargs)
def check_unused_args(self, used_args: set[int | str], args: t.Sequence[t.Any], kwargs: t.Mapping[str, t.Any]) -> None:
def check_unused_args(
self, used_args: set[int | str], args: t.Sequence[t.Any], kwargs: t.Mapping[str, t.Any]
) -> None:
extras = set(kwargs).difference(used_args)
if extras:
raise KeyError(f'Extra params passed: {extras}')
@@ -56,7 +58,9 @@ class PromptTemplate:
try:
return self.template.format(**prompt_variables)
except KeyError as e:
raise RuntimeError(f"Missing variable '{e.args[0]}' (required: {self._input_variables}) in the prompt template.") from None
raise RuntimeError(
f"Missing variable '{e.args[0]}' (required: {self._input_variables}) in the prompt template."
) from None
@click.command('get_prompt', context_settings=termui.CONTEXT_SETTINGS)
@@ -124,15 +128,21 @@ def cli(
if prompt_template_file and chat_template_file:
ctx.fail('prompt-template-file and chat-template-file are mutually exclusive.')
acceptable = set(openllm.CONFIG_MAPPING_NAMES.keys()) | set(inflection.dasherize(name) for name in openllm.CONFIG_MAPPING_NAMES.keys())
acceptable = set(openllm.CONFIG_MAPPING_NAMES.keys()) | set(
inflection.dasherize(name) for name in openllm.CONFIG_MAPPING_NAMES.keys()
)
if model_id in acceptable:
logger.warning('Using a default prompt from OpenLLM. Note that this prompt might not work for your intended usage.\n')
logger.warning(
'Using a default prompt from OpenLLM. Note that this prompt might not work for your intended usage.\n'
)
config = openllm.AutoConfig.for_model(model_id)
template = prompt_template_file.read() if prompt_template_file is not None else config.template
system_message = system_message or config.system_message
try:
formatted = PromptTemplate(template).with_options(system_message=system_message).format(instruction=prompt, **_memoized)
formatted = (
PromptTemplate(template).with_options(system_message=system_message).format(instruction=prompt, **_memoized)
)
except RuntimeError as err:
logger.debug('Exception caught while formatting prompt: %s', err)
ctx.fail(str(err))
@@ -149,15 +159,21 @@ def cli(
for architecture in config.architectures:
if architecture in openllm.AutoConfig._CONFIG_MAPPING_NAMES_TO_ARCHITECTURE():
system_message = (
openllm.AutoConfig.infer_class_from_name(openllm.AutoConfig._CONFIG_MAPPING_NAMES_TO_ARCHITECTURE()[architecture])
openllm.AutoConfig.infer_class_from_name(
openllm.AutoConfig._CONFIG_MAPPING_NAMES_TO_ARCHITECTURE()[architecture]
)
.model_construct_env()
.system_message
)
break
else:
ctx.fail(f'Failed to infer system message from model architecture: {config.architectures}. Please pass in --system-message')
ctx.fail(
f'Failed to infer system message from model architecture: {config.architectures}. Please pass in --system-message'
)
messages = [{'role': 'system', 'content': system_message}, {'role': 'user', 'content': prompt}]
formatted = tokenizer.apply_chat_template(messages, chat_template=chat_template_file, add_generation_prompt=add_generation_prompt, tokenize=False)
formatted = tokenizer.apply_chat_template(
messages, chat_template=chat_template_file, add_generation_prompt=add_generation_prompt, tokenize=False
)
termui.echo(orjson.dumps({'prompt': formatted}, option=orjson.OPT_INDENT_2).decode(), fg='white')
ctx.exit(0)

View File

@@ -33,12 +33,17 @@ def cli(model_name: str | None) -> DictStrAny:
}
if model_name is not None:
ids_in_local_store = {
k: [i for i in v if 'model_name' in i.info.labels and i.info.labels['model_name'] == inflection.dasherize(model_name)]
k: [
i
for i in v
if 'model_name' in i.info.labels and i.info.labels['model_name'] == inflection.dasherize(model_name)
]
for k, v in ids_in_local_store.items()
}
ids_in_local_store = {k: v for k, v in ids_in_local_store.items() if v}
local_models = {
k: [{'tag': str(i.tag), 'size': human_readable_size(openllm.utils.calc_dir_size(i.path))} for i in val] for k, val in ids_in_local_store.items()
k: [{'tag': str(i.tag), 'size': human_readable_size(openllm.utils.calc_dir_size(i.path))} for i in val]
for k, val in ids_in_local_store.items()
}
termui.echo(orjson.dumps(local_models, option=orjson.OPT_INDENT_2).decode(), fg='white')
return local_models

View File

@@ -32,7 +32,14 @@ def load_notebook_metadata() -> DictStrAny:
@click.command('playground', context_settings=termui.CONTEXT_SETTINGS)
@click.argument('output-dir', default=None, required=False)
@click.option('--port', envvar='JUPYTER_PORT', show_envvar=True, show_default=True, default=8888, help='Default port for Jupyter server')
@click.option(
'--port',
envvar='JUPYTER_PORT',
show_envvar=True,
show_default=True,
default=8888,
help='Default port for Jupyter server',
)
@click.pass_context
def cli(ctx: click.Context, output_dir: str | None, port: int) -> None:
"""OpenLLM Playground.
@@ -53,7 +60,9 @@ def cli(ctx: click.Context, output_dir: str | None, port: int) -> None:
> This command requires Jupyter to be installed. Install it with 'pip install "openllm[playground]"'
"""
if not is_jupyter_available() or not is_jupytext_available() or not is_notebook_available():
raise RuntimeError("Playground requires 'jupyter', 'jupytext', and 'notebook'. Install it with 'pip install \"openllm[playground]\"'")
raise RuntimeError(
"Playground requires 'jupyter', 'jupytext', and 'notebook'. Install it with 'pip install \"openllm[playground]\"'"
)
metadata = load_notebook_metadata()
_temp_dir = False
if output_dir is None:
@@ -65,7 +74,9 @@ def cli(ctx: click.Context, output_dir: str | None, port: int) -> None:
termui.echo('The playground notebooks will be saved to: ' + os.path.abspath(output_dir), fg='blue')
for module in pkgutil.iter_modules(playground.__path__):
if module.ispkg or os.path.exists(os.path.join(output_dir, module.name + '.ipynb')):
logger.debug('Skipping: %s (%s)', module.name, 'File already exists' if not module.ispkg else f'{module.name} is a module')
logger.debug(
'Skipping: %s (%s)', module.name, 'File already exists' if not module.ispkg else f'{module.name} is a module'
)
continue
if not isinstance(module.module_finder, importlib.machinery.FileFinder):
continue

View File

@@ -25,7 +25,14 @@ class Level(enum.IntEnum):
@property
def color(self) -> str | None:
return {Level.NOTSET: None, Level.DEBUG: 'cyan', Level.INFO: 'green', Level.WARNING: 'yellow', Level.ERROR: 'red', Level.CRITICAL: 'red'}[self]
return {
Level.NOTSET: None,
Level.DEBUG: 'cyan',
Level.INFO: 'green',
Level.WARNING: 'yellow',
Level.ERROR: 'red',
Level.CRITICAL: 'red',
}[self]
@classmethod
def from_logging_level(cls, level: int) -> Level:
@@ -75,5 +82,9 @@ def echo(text: t.Any, fg: str | None = None, *, _with_style: bool = True, json:
COLUMNS: int = int(os.environ.get('COLUMNS', str(120)))
CONTEXT_SETTINGS: DictStrAny = {'help_option_names': ['-h', '--help'], 'max_content_width': COLUMNS, 'token_normalize_func': inflection.underscore}
__all__ = ['echo', 'COLUMNS', 'CONTEXT_SETTINGS', 'log', 'warning', 'error', 'critical', 'debug', 'info', 'Level']
CONTEXT_SETTINGS: DictStrAny = {
'help_option_names': ['-h', '--help'],
'max_content_width': COLUMNS,
'token_normalize_func': inflection.underscore,
}
__all__ = ['COLUMNS', 'CONTEXT_SETTINGS', 'Level', 'critical', 'debug', 'echo', 'error', 'info', 'log', 'warning']