refactor: packages (#249)

This commit is contained in:
Aaron Pham
2023-08-22 08:55:46 -04:00
committed by GitHub
parent a964e659c1
commit 3ffb25a872
148 changed files with 2899 additions and 1937 deletions

View File

@@ -1,16 +1,16 @@
from __future__ import annotations
import functools, importlib.util, os, typing as t, logging
import click, click_option_group as cog, inflection, orjson, bentoml, openllm
import functools, importlib.util, os, typing as t, logging, click, click_option_group as cog, inflection, orjson, bentoml, openllm
from click import shell_completion as sc
from bentoml_cli.utils import BentoMLCommandGroup
from click.shell_completion import CompletionItem
from openllm.utils import DEBUG
from openllm_core.utils import DEBUG
from bentoml._internal.configuration.containers import BentoMLContainer
from openllm._typing_compat import LiteralString, DictStrAny, ParamSpec, Concatenate
from openllm_core._typing_compat import LiteralString, DictStrAny, ParamSpec, Concatenate
from . import termui
if t.TYPE_CHECKING:
import subprocess
from openllm._configuration import LLMConfig
from openllm_core._configuration import LLMConfig
logger = logging.getLogger(__name__)
@@ -20,6 +20,12 @@ LiteralOutput = t.Literal["json", "pretty", "porcelain"]
_AnyCallable = t.Callable[..., t.Any]
FC = t.TypeVar("FC", bound=t.Union[_AnyCallable, click.Command])
def bento_complete_envvar(ctx: click.Context, param: click.Parameter, incomplete: str) -> list[sc.CompletionItem]:
return [sc.CompletionItem(str(it.tag), help="Bento") for it in bentoml.list() if str(it.tag).startswith(incomplete) and all(k in it.info.labels for k in {"start_name", "bundler"})]
def model_complete_envvar(ctx: click.Context, param: click.Parameter, incomplete: str) -> list[sc.CompletionItem]:
return [sc.CompletionItem(inflection.dasherize(it), help="Model") for it in openllm.CONFIG_MAPPING if it.startswith(incomplete)]
def parse_config_options(config: LLMConfig, server_timeout: int, workers_per_resource: float, device: t.Tuple[str, ...] | None, cors: bool, environ: DictStrAny) -> DictStrAny:
# TODO: Support amd.com/gpu on k8s
_bentoml_config_options_env = environ.pop("BENTOML_CONFIG_OPTIONS", "")
@@ -316,7 +322,7 @@ def cors_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[[FC
def machine_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[[FC], FC]: return cli_option("--machine", is_flag=True, default=False, hidden=True, **attrs)(f)
def model_id_option(f: _AnyCallable | None = None, *, model_env: openllm.utils.EnvVarMixin | None = None, **attrs: t.Any) -> t.Callable[[FC], FC]: return cli_option("--model-id", type=click.STRING, default=None, envvar=model_env.model_id if model_env is not None else None, show_envvar=model_env is not None, help="Optional model_id name or path for (fine-tune) weight.", **attrs)(f)
def model_version_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[[FC], FC]: return cli_option("--model-version", type=click.STRING, default=None, help="Optional model version to save for this model. It will be inferred automatically from model-id.", **attrs)(f)
def model_name_argument(f: _AnyCallable | None = None, required: bool = True) -> t.Callable[[FC], FC]: return cli_argument("model_name", type=click.Choice([inflection.dasherize(name) for name in openllm.CONFIG_MAPPING]), required=required)(f)
def model_name_argument(f: _AnyCallable | None = None, required: bool = True, **attrs: t.Any) -> t.Callable[[FC], FC]: return cli_argument("model_name", type=click.Choice([inflection.dasherize(name) for name in openllm.CONFIG_MAPPING]), required=required, **attrs)(f)
def quantize_option(f: _AnyCallable | None = None, *, build: bool = False, model_env: openllm.utils.EnvVarMixin | None = None, **attrs: t.Any) -> t.Callable[[FC], FC]:
return cli_option(
"--quantise", "--quantize", "quantize", type=click.Choice(["int8", "int4", "gptq"]), default=None, envvar=model_env.quantize if model_env is not None else None, show_envvar=model_env is not None, help="""Dynamic quantization for running this LLM.
@@ -382,7 +388,7 @@ def serialisation_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Cal
)(f)
def container_registry_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[[FC], FC]:
return cli_option(
"--container-registry", "container_registry", type=str, default="ecr", show_default=True, show_envvar=True, envvar="OPENLLM_CONTAINER_REGISTRY", callback=container_registry_callback, help="""The default container registry to get the base image for building BentoLLM.
"--container-registry", "container_registry", type=click.Choice(list(openllm.bundle.CONTAINER_NAMES)), default="ecr", show_default=True, show_envvar=True, envvar="OPENLLM_CONTAINER_REGISTRY", callback=container_registry_callback, help="""The default container registry to get the base image for building BentoLLM.
Currently, it supports 'ecr', 'ghcr.io', 'docker.io'

View File

@@ -1,6 +1,5 @@
from __future__ import annotations
import itertools, logging, os, re, subprocess, sys, typing as t
import bentoml, openllm
import itertools, logging, os, re, subprocess, sys, typing as t, bentoml, openllm, openllm_core
from simple_di import Provide, inject
from bentoml._internal.configuration.containers import BentoMLContainer
from openllm.exceptions import OpenLLMException
@@ -8,10 +7,9 @@ from . import termui
from ._factory import start_command_factory
if t.TYPE_CHECKING:
from openllm._typing_compat import LiteralString, LiteralRuntime
from openllm_core._configuration import LLMConfig
from openllm_core._typing_compat import LiteralString, LiteralRuntime, LiteralContainerRegistry, LiteralContainerVersionStrategy
from bentoml._internal.bento import BentoStore
from openllm._configuration import LLMConfig
from openllm.bundle.oci import LiteralContainerRegistry, LiteralContainerVersionStrategy
logger = logging.getLogger(__name__)
@@ -58,7 +56,7 @@ def _start(model_name: str, /, *, model_id: str | None = None, timeout: int = 30
"""
from .entrypoint import start_command, start_grpc_command
llm_config = openllm.AutoConfig.for_model(model_name)
_ModelEnv = openllm.utils.EnvVarMixin(model_name, openllm.utils.first_not_none(framework, default=llm_config.default_implementation()), model_id=model_id, bettertransformer=bettertransformer, quantize=quantize, runtime=runtime)
_ModelEnv = openllm_core.utils.EnvVarMixin(model_name, openllm_core.utils.first_not_none(framework, default=llm_config.default_implementation()), model_id=model_id, bettertransformer=bettertransformer, quantize=quantize, runtime=runtime)
os.environ[_ModelEnv.framework] = _ModelEnv["framework_value"]
args: list[str] = ["--runtime", runtime]
@@ -203,5 +201,5 @@ def _list_models() -> dict[str, t.Any]:
return models_command.main(args=["-o", "json", "--show-available", "--machine"], standalone_mode=False)
start, start_grpc, build, import_model, list_models = openllm.utils.codegen.gen_sdk(_start, _serve_grpc=False), openllm.utils.codegen.gen_sdk(_start, _serve_grpc=True), openllm.utils.codegen.gen_sdk(_build), openllm.utils.codegen.gen_sdk(_import_model), openllm.utils.codegen.gen_sdk(_list_models)
start, start_grpc, build, import_model, list_models = openllm_core.utils.codegen.gen_sdk(_start, _serve_grpc=False), openllm_core.utils.codegen.gen_sdk(_start, _serve_grpc=True), openllm_core.utils.codegen.gen_sdk(_build), openllm_core.utils.codegen.gen_sdk(_import_model), openllm_core.utils.codegen.gen_sdk(_list_models)
__all__ = ["start", "start_grpc", "build", "import_model", "list_models"]

View File

@@ -20,10 +20,9 @@ bentomodel = openllm.import_model("falcon", model_id='tiiuae/falcon-7b-instruct'
```
"""
from __future__ import annotations
import functools, http.client, inspect, itertools, logging, os, platform, re, subprocess, sys, time, traceback, typing as t
import attr, click, click_option_group as cog, fs, fs.copy, fs.errors, inflection, orjson, bentoml, openllm
from bentoml_cli.utils import BentoMLCommandGroup, opt_callback
import functools, http.client, inspect, itertools, logging, os, platform, re, subprocess, sys, time, traceback, typing as t, attr, click, click_option_group as cog, fs, fs.copy, fs.errors, inflection, orjson, bentoml, openllm
from simple_di import Provide, inject
from bentoml_cli.utils import BentoMLCommandGroup, opt_callback
from bentoml._internal.configuration.containers import BentoMLContainer
from bentoml._internal.models.model import ModelStore
from . import termui
@@ -56,8 +55,8 @@ from openllm.models.auto import (
AutoConfig,
AutoLLM,
)
from openllm._typing_compat import DictStrAny, ParamSpec, Concatenate, LiteralString, Self, LiteralRuntime
from openllm.utils import (
from openllm_core._typing_compat import DictStrAny, ParamSpec, Concatenate, LiteralString, Self, LiteralRuntime
from openllm_core.utils import (
DEBUG,
DEBUG_ENV_VAR,
OPTIONAL_DEPENDENCIES,
@@ -72,21 +71,20 @@ from openllm.utils import (
first_not_none,
get_debug_mode,
get_quiet_mode,
infer_auto_class,
is_torch_available,
is_transformers_supports_agent,
resolve_user_filepath,
set_debug_mode,
set_quiet_mode,
)
from openllm.utils import infer_auto_class
if t.TYPE_CHECKING:
import torch
from bentoml._internal.bento import BentoStore
from bentoml._internal.container import DefaultBuilder
from openllm.client import BaseClient
from openllm._schema import EmbeddingsOutput
from openllm.bundle.oci import LiteralContainerRegistry, LiteralContainerVersionStrategy
from openllm_core._schema import EmbeddingsOutput
from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy
else: torch = LazyLoader("torch", globals(), "torch")
P = ParamSpec("P")
@@ -271,7 +269,7 @@ def cli() -> None:
\b
An open platform for operating large language models in production.
Fine-tune, serve, deploy, and monitor any LLMs with ease.
""" # noqa: D205
"""
@cli.group(cls=OpenLLMCommandGroup, context_settings=termui.CONTEXT_SETTINGS, name="start", aliases=["start-http"])
def start_command() -> None:
@@ -670,10 +668,8 @@ def instruct_command(endpoint: str, timeout: int, agent: LiteralString, output:
"""
client = openllm.client.HTTPClient(endpoint, timeout=timeout)
try:
client.call("metadata")
except http.client.BadStatusLine:
raise click.ClickException(f"{endpoint} is neither a HTTP server nor reachable.") from None
try: client.call("metadata")
except http.client.BadStatusLine: raise click.ClickException(f"{endpoint} is neither a HTTP server nor reachable.") from None
if agent == "hf":
if not is_transformers_supports_agent(): raise click.UsageError("Transformers version should be at least 4.29 to support HfAgent. Upgrade with 'pip install -U transformers'")
_memoized = {k: v[0] for k, v in _memoized.items() if v}
@@ -700,7 +696,7 @@ def embed_command(ctx: click.Context, text: tuple[str, ...], endpoint: str, time
$ openllm embed --endpoint http://12.323.2.1:3000 "What is the meaning of life?" "How many stars are there in the sky?"
```
"""
client = t.cast("BaseClient[t.Any]", openllm.client.HTTPClient(endpoint, timeout=timeout) if server_type == "http" else openllm.client.GrpcClient(endpoint, timeout=timeout))
client = openllm.client.HTTPClient(endpoint, timeout=timeout) if server_type == "http" else openllm.client.GrpcClient(endpoint, timeout=timeout)
try:
gen_embed = client.embed(text)
except ValueError:
@@ -733,14 +729,14 @@ def query_command(ctx: click.Context, /, prompt: str, endpoint: str, timeout: in
"""
_memoized = {k: orjson.loads(v[0]) for k, v in _memoized.items() if v}
if server_type == "grpc": endpoint = re.sub(r"http://", "", endpoint)
client = t.cast("BaseClient[t.Any]", openllm.client.HTTPClient(endpoint, timeout=timeout) if server_type == "http" else openllm.client.GrpcClient(endpoint, timeout=timeout))
client = openllm.client.HTTPClient(endpoint, timeout=timeout) if server_type == "http" else openllm.client.GrpcClient(endpoint, timeout=timeout)
input_fg, generated_fg = "magenta", "cyan"
if output != "porcelain":
termui.echo("==Input==\n", fg="white")
termui.echo(f"{prompt}", fg=input_fg)
res = client.query(prompt, return_response="raw", **{**client.configuration, **_memoized})
if output == "pretty":
response = client.llm.postprocess_generate(prompt, res["responses"])
response = client.config.postprocess_generate(prompt, res["responses"])
termui.echo("\n\n==Responses==\n", fg="white")
termui.echo(response, fg=generated_fg)
elif output == "json":

View File

@@ -1,37 +1,26 @@
from __future__ import annotations
import typing as t
import click
import orjson
import openllm
from .. import termui
from .._factory import machine_option
if t.TYPE_CHECKING:
from openllm.bundle.oci import LiteralContainerRegistry, LiteralContainerVersionStrategy
import typing as t, click, orjson, openllm
from openllm.cli import termui
from openllm.cli._factory import machine_option, container_registry_option
if t.TYPE_CHECKING: from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy
@click.command(
"build_base_container", context_settings=termui.CONTEXT_SETTINGS, help="""Base image builder for BentoLLM.
By default, the base image will include custom kernels (PagedAttention via vllm, FlashAttention-v2, etc.) built with CUDA 11.8, Python 3.9 on Ubuntu22.04.
Optionally, this can also be pushed directly to remote registry. Currently support ``docker.io``, ``ghcr.io`` and ``quay.io``.
\b
If '--machine' is passed, then it will run the process quietly, and output a JSON to the current running terminal.
This command is only useful for debugging and for building custom base image for extending BentoML with custom base images and custom kernels.
Note that we already release images on our CI to ECR and GHCR, so you don't need to build it yourself.
"""
)
@click.option("--registry", multiple=True, type=click.Choice(list(openllm.bundle.CONTAINER_NAMES)), help="Target registry to create image tag on.", default=None)
@container_registry_option
@click.option("--version-strategy", type=click.Choice(["release", "latest", "nightly"]), default="nightly", help="Version strategy to use for tagging the image.")
@click.option("--push/--no-push", help="Whether to push to remote repository", is_flag=True, default=False)
@machine_option
def cli(registry: tuple[LiteralContainerRegistry, ...] | None, version_strategy: LiteralContainerVersionStrategy, push: bool, machine: bool) -> dict[str, str]:
mapping = openllm.bundle.build_container(registry, version_strategy, push, machine)
def cli(container_registry: tuple[LiteralContainerRegistry, ...] | None, version_strategy: LiteralContainerVersionStrategy, push: bool, machine: bool) -> dict[str, str]:
mapping = openllm.bundle.build_container(container_registry, version_strategy, push, machine)
if machine: termui.echo(orjson.dumps(mapping, option=orjson.OPT_INDENT_2).decode(), fg="white")
return mapping

View File

@@ -1,24 +1,16 @@
from __future__ import annotations
import shutil
import subprocess
import typing as t
import click
import psutil
import shutil, subprocess, typing as t, click, psutil, bentoml
from simple_di import Provide, inject
import bentoml
from bentoml._internal.configuration.containers import BentoMLContainer
from .. import termui
from openllm.cli import termui
from openllm.cli._factory import bento_complete_envvar, machine_option
if t.TYPE_CHECKING:
from bentoml._internal.bento import BentoStore
if t.TYPE_CHECKING: from bentoml._internal.bento import BentoStore
@click.command("dive_bentos", context_settings=termui.CONTEXT_SETTINGS)
@click.argument("bento", type=str)
@click.option("--machine", is_flag=True, default=False, hidden=True)
@click.argument("bento", type=str, shell_complete=bento_complete_envvar)
@machine_option
@click.pass_context
@inject
def cli(ctx: click.Context, bento: str, machine: bool, _bento_store: BentoStore = Provide[BentoMLContainer.bento_store]) -> str | None:
@@ -32,5 +24,5 @@ def cli(ctx: click.Context, bento: str, machine: bool, _bento_store: BentoStore
if machine: return bentomodel.path
# copy and paste this into a new shell
if psutil.WINDOWS: subprocess.check_call([shutil.which("dir") or "dir"], cwd=bentomodel.path)
else: subprocess.check_call([shutil.which("tree") or "tree"], cwd=bentomodel.path)
else: subprocess.check_call([shutil.which("ls") or "ls", "-Rrthla"], cwd=bentomodel.path)
ctx.exit(0)

View File

@@ -1,24 +1,18 @@
from __future__ import annotations
import typing as t
import click
import typing as t, click, bentoml
from simple_di import Provide, inject
import bentoml
from bentoml._internal.bento.bento import BentoInfo
from bentoml._internal.bento.build_config import DockerOptions
from bentoml._internal.configuration.containers import BentoMLContainer
from bentoml._internal.container.generate import generate_containerfile
from openllm.cli import termui
from openllm.cli._factory import bento_complete_envvar
from openllm_core.utils import bentoml_cattr
from .. import termui
from ...utils import bentoml_cattr
if t.TYPE_CHECKING:
from bentoml._internal.bento import BentoStore
if t.TYPE_CHECKING: from bentoml._internal.bento import BentoStore
@click.command("get_containerfile", context_settings=termui.CONTEXT_SETTINGS, help="Return Containerfile of any given Bento.")
@click.argument("bento", type=str)
@click.argument("bento", type=str, shell_complete=bento_complete_envvar)
@click.pass_context
@inject
def cli(ctx: click.Context, bento: str, _bento_store: BentoStore = Provide[BentoMLContainer.bento_store]) -> str:

View File

@@ -1,25 +1,18 @@
from __future__ import annotations
import typing as t
import click
import inflection
import orjson
import typing as t, click, inflection, orjson, openllm
from bentoml_cli.utils import opt_callback
import openllm
from .. import termui
from ..._prompt import process_prompt
from openllm.cli import termui
from openllm.cli._factory import model_complete_envvar, output_option, machine_option
from openllm_core._prompt import process_prompt
LiteralOutput = t.Literal["json", "pretty", "porcelain"]
@click.command("get_prompt", context_settings=termui.CONTEXT_SETTINGS)
@click.argument("model_name", type=click.Choice([inflection.dasherize(name) for name in openllm.CONFIG_MAPPING.keys()]))
@click.argument("model_name", type=click.Choice([inflection.dasherize(name) for name in openllm.CONFIG_MAPPING.keys()]), shell_complete=model_complete_envvar)
@click.argument("prompt", type=click.STRING)
@click.option("-o", "--output", "output", type=click.Choice(["json", "pretty", "porcelain"]), default="pretty", help="Showing output type.", show_default=True, envvar="OPENLLM_OUTPUT", show_envvar=True)
@output_option
@click.option("--format", type=click.STRING, default=None)
@click.option("--machine", is_flag=True, default=False, hidden=True)
@machine_option
@click.option("--opt", help="Define additional prompt variables. (format: ``--opt system_prompt='You are a useful assistant'``)", required=False, multiple=True, callback=opt_callback, metavar="ARG=VALUE[,ARG=VALUE]")
@click.pass_context
def cli(ctx: click.Context, /, model_name: str, prompt: str, format: str | None, output: LiteralOutput, machine: bool, _memoized: dict[str, t.Any], **_: t.Any) -> str | None:

View File

@@ -1,16 +1,8 @@
from __future__ import annotations
import click
import inflection
import orjson
import bentoml
import openllm
import click, inflection, orjson, bentoml, openllm
from bentoml._internal.utils import human_readable_size
from .. import termui
from .._factory import LiteralOutput, output_option
from openllm.cli import termui
from openllm.cli._factory import LiteralOutput, output_option
@click.command("list_bentos", context_settings=termui.CONTEXT_SETTINGS)
@output_option(default_value="json")

View File

@@ -1,14 +1,13 @@
from __future__ import annotations
import typing as t, bentoml, openllm, orjson, inflection ,click
from bentoml._internal.utils import human_readable_size
from openllm.cli import termui
from openllm.cli._factory import LiteralOutput, model_name_argument, output_option
from bentoml._internal.utils import human_readable_size
from openllm.cli._factory import LiteralOutput, model_name_argument, output_option, model_complete_envvar
if t.TYPE_CHECKING: from openllm._typing_compat import DictStrAny
if t.TYPE_CHECKING: from openllm_core._typing_compat import DictStrAny
@click.command("list_models", context_settings=termui.CONTEXT_SETTINGS)
@model_name_argument(required=False)
@model_name_argument(required=False, shell_complete=model_complete_envvar)
@output_option(default_value="json")
def cli(model_name: str | None, output: LiteralOutput) -> DictStrAny:
"""This is equivalent to openllm models --show-available less the nice table."""

View File

@@ -1,13 +1,12 @@
from __future__ import annotations
import importlib.machinery, logging, os, pkgutil, subprocess, sys, tempfile, typing as t
import click, yaml
import importlib.machinery, logging, os, pkgutil, subprocess, sys, tempfile, typing as t, click, yaml
from openllm.cli import termui
from openllm import playground
from openllm.utils import is_jupyter_available, is_jupytext_available, is_notebook_available
from openllm_core.utils import is_jupyter_available, is_jupytext_available, is_notebook_available
if t.TYPE_CHECKING:
import jupytext, nbformat
from openllm._typing_compat import DictStrAny
from openllm_core._typing_compat import DictStrAny
logger = logging.getLogger(__name__)
@@ -38,7 +37,7 @@ def cli(ctx: click.Context, output_dir: str | None, port: int) -> None:
\b
> [!NOTE]
> This command requires Jupyter to be installed. Install it with 'pip install "openllm[playground]"'
""" # noqa: D301
"""
if not is_jupyter_available() or not is_jupytext_available() or not is_notebook_available():
raise RuntimeError("Playground requires 'jupyter', 'jupytext', and 'notebook'. Install it with 'pip install \"openllm[playground]\"'")
metadata = load_notebook_metadata()

View File

@@ -1,6 +1,6 @@
from __future__ import annotations
import os, typing as t, click, inflection, openllm
if t.TYPE_CHECKING: from openllm._typing_compat import DictStrAny
if t.TYPE_CHECKING: from openllm_core._typing_compat import DictStrAny
def echo(text: t.Any, fg: str = "green", _with_style: bool = True, **attrs: t.Any) -> None:
attrs["fg"] = fg if not openllm.utils.get_debug_mode() else None