chore(release): update base container restriction (#173)

Prepare for 0.2.12 release

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
pre-commit-ci[bot]
2023-08-01 15:25:17 -04:00
committed by GitHub
parent 6ba8899743
commit c2ed1d56da
20 changed files with 319 additions and 256 deletions

View File

@@ -8,3 +8,6 @@ charset = utf-8
[*.py]
indent_style = space
indent_size = 2
[src/openllm/cli/entrypoint.py]
indent_size = unset

View File

@@ -21,10 +21,14 @@ ci:
exclude: '.*\.(css|js|svg)$'
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: 'v0.0.280'
rev: 'v0.0.281'
hooks:
- id: ruff
args: [--exit-non-zero-on-fix, --show-fixes]
- repo: https://github.com/editorconfig-checker/editorconfig-checker.python
rev: '2.7.2'
hooks:
- id: editorconfig-checker
- repo: https://github.com/econchick/interrogate
rev: 1.5.0
hooks:
@@ -33,7 +37,7 @@ repos:
exclude: ^(docs|tools|tests)
args: [--config=pyproject.toml]
- repo: https://github.com/google/yapf
rev: v0.40.1
rev: v0.40.0
hooks:
- id: yapf
types: [python]

View File

@@ -414,7 +414,7 @@ class GenerationConfig(ReprMixin):
bentoml_cattr.register_unstructure_hook_factory(
lambda cls: attr.has(cls) and lenient_issubclass(cls, GenerationConfig),
lambda cls: make_dict_unstructure_fn(cls, bentoml_cattr, _cattrs_omit_if_default=False, _cattrs_use_linecache=True,
**{k: override(omit=True) for k, v in attr.fields_dict(cls).items() if v.default in (None, attr.NOTHING)}))
**{k: override(omit=True) for k, v in attr.fields_dict(cls).items() if v.default in (None, attr.NOTHING)}))
@attr.frozen(slots=True, repr=False, init=False)
class SamplingParams(ReprMixin):
@@ -450,7 +450,7 @@ class SamplingParams(ReprMixin):
ignore_eos: bool = dantic.Field(False, description="Whether to ignore the EOS token and continue generating tokens after the EOS token is generated.")
logprobs: int = dantic.Field(None, description="Number of log probabilities to return per output token.")
if t.TYPE_CHECKING and not MYPY:
if t.TYPE_CHECKING:
max_tokens: int
temperature: float
top_k: int
@@ -490,7 +490,7 @@ class SamplingParams(ReprMixin):
bentoml_cattr.register_unstructure_hook_factory(
lambda cls: attr.has(cls) and lenient_issubclass(cls, SamplingParams),
lambda cls: make_dict_unstructure_fn(cls, bentoml_cattr, _cattrs_omit_if_default=False, _cattrs_use_linecache=True,
**{k: override(omit=True) for k, v in attr.fields_dict(cls).items() if v.default in (None, attr.NOTHING)}))
**{k: override(omit=True) for k, v in attr.fields_dict(cls).items() if v.default in (None, attr.NOTHING)}))
bentoml_cattr.register_structure_hook_factory(lambda cls: attr.has(cls) and lenient_issubclass(cls, SamplingParams), lambda cls: make_dict_structure_fn(cls, bentoml_cattr, _cattrs_forbid_extra_keys=True, max_new_tokens=override(rename="max_tokens")))
# cached it here to save one lookup per assignment
@@ -758,7 +758,7 @@ class _ConfigAttr:
For example:
For FLAN-T5 impl, this would be ["google/flan-t5-small", "google/flan-t5-base",
"google/flan-t5-large", "google/flan-t5-xl", "google/flan-t5-xxl"]
"google/flan-t5-large", "google/flan-t5-xl", "google/flan-t5-xxl"]
This field is required when defining under '__config__'.
"""
@@ -1028,7 +1028,7 @@ class LLMConfig(_ConfigAttr):
klass = attr.make_class(
f"{camel_name}{class_attr}", [], bases=(base,), slots=True, weakref_slot=True, frozen=True, repr=False, init=False, collect_by_mro=True,
field_transformer=codegen.make_env_transformer(cls, cls.__openllm_model_name__, suffix=suffix_env, globs=globs,
default_callback=lambda field_name, field_default: getattr(getattr(cls, class_attr), field_name, field_default) if codegen.has_own_attribute(cls, class_attr) else field_default))
default_callback=lambda field_name, field_default: getattr(getattr(cls, class_attr), field_name, field_default) if codegen.has_own_attribute(cls, class_attr) else field_default))
# For pickling to work, the __module__ variable needs to be set to the
# frame where the class is created. This respect the module that is created from cls
try: klass.__module__ = cls.__module__
@@ -1338,7 +1338,7 @@ class LLMConfig(_ConfigAttr):
Args:
name: The name of the new class.
**attrs: The attributes to be added to the new class. This will override
any existing attributes with the same name.
any existing attributes with the same name.
"""
if not hasattr(cls, "__config__"):
raise ValueError("Cannot derivate a LLMConfig without __config__")

View File

@@ -344,7 +344,8 @@ _object_setattr = object.__setattr__
def _wrapped_import_model(f: _import_model_wrapper[bentoml.Model, M, T]) -> t.Callable[[LLM[M, T]], bentoml.Model]:
@functools.wraps(f)
def wrapper(self: LLM[M, T], *decls: t.Any, trust_remote_code: bool | None = None, **attrs: t.Any) -> bentoml.Model:
trust_remote_code: bool = first_not_none(trust_remote_code, default=self.__llm_trust_remote_code__)
trust_remote_code = first_not_none(trust_remote_code, default=self.__llm_trust_remote_code__)
if t.TYPE_CHECKING: assert trust_remote_code is not None # NOTE: Mypy is too stupid to understand that the default type of trust_remote_code is bool in L347
(model_decls, model_attrs), _ = self.llm_parameters
decls = (*model_decls, *decls)
attrs = {**model_attrs, **attrs}
@@ -567,8 +568,8 @@ class LLM(LLMInterface[M, T], ReprMixin):
model_name: Optional model name to be saved with this LLM. Default to None. It will be inferred automatically from model_id.
If model_id is a custom path, it will be the basename of the given path.
model_version: Optional version for this given model id. Default to None. This is useful for saving from custom path.
If set to None, the version will either be the git hash from given pretrained model, or the hash inferred
from last modified time of the given directory.
If set to None, the version will either be the git hash from given pretrained model, or the hash inferred
from last modified time of the given directory.
llm_config: The config to use for this LLM. Defaults to None. If not passed, OpenLLM
will use `config_class` to construct default configuration.
quantize: The quantization to use for this LLM. Defaults to None. Possible values
@@ -576,7 +577,7 @@ class LLM(LLMInterface[M, T], ReprMixin):
runtime: Optional runtime to run this LLM. Default to 'transformers'. 'ggml' supports is working in progress.
quantization_config: The quantization config (`transformers.BitsAndBytesConfig` | `autogtpq.BaseQuantizeConfig`) to use. Note that this is mutually exclusive with `quantize`
serialisation: Type of model format to save to local store. If set to 'safetensors', then OpenLLM will save model using safetensors.
Default behaviour is similar to ``safe_serialization=False``.
Default behaviour is similar to ``safe_serialization=False``.
bettertransformer: Whether to use BetterTransformer with this model. Defaults to False.
adapter_id: The [LoRA](https://arxiv.org/pdf/2106.09685.pdf) pretrained id or local path to use for this LLM. Defaults to None.
adapter_name: The adapter name to use for this LLM. Defaults to None.

View File

@@ -76,11 +76,11 @@ def _parse_list_with_prefix(lst: str, prefix: str) -> list[str]:
_STACK_LEVEL = 3
@overload
def _parse_visible_devices(default_var: str | None = ..., respect_env: t.Literal[True] = True) -> list[str] | None:
def _parse_visible_devices(default_var: str | None = ..., *, respect_env: t.Literal[True]) -> list[str] | None:
...
@overload
def _parse_visible_devices(default_var: str = ..., respect_env: t.Literal[False] = ...) -> list[str]:
def _parse_visible_devices(default_var: str = ..., *, respect_env: t.Literal[False]) -> list[str]:
...
def _parse_visible_devices(default_var: str | None = None, respect_env: bool = True) -> list[str] | None:

View File

@@ -33,8 +33,10 @@ if t.TYPE_CHECKING:
import peft
import openllm
from openllm._llm import M as _M
from openllm._llm import T as _T
from .utils.lazy import VersionInfo
from .bundle.oci import LiteralContainerVersionStrategy
from ._llm import M as _M
from ._llm import T as _T
from bentoml._internal.runner.runnable import RunnableMethod
from bentoml._internal.runner.runner import RunnerMethod
from bentoml._internal.runner.strategy import Strategy
@@ -64,6 +66,11 @@ class AdaptersTuple(TupleAny):
name: str | None
config: DictStrAny
class RefTuple(TupleAny):
git_hash: str
version: VersionInfo
strategy: LiteralContainerVersionStrategy
AdaptersMapping = dict[AdapterType, tuple[AdaptersTuple, ...]]
class LLMRunnable(bentoml.Runnable, t.Generic[_M, _T]):

View File

@@ -131,6 +131,14 @@ COPY --from=flash-attn-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x8
COPY src src
COPY hatch.toml README.md CHANGELOG.md pyproject.toml ./
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
ccache \
curl \
git && \
rm -rf /var/lib/apt/lists/*
# Install all required dependencies
RUN pip install "ray==2.6.0" "jax[cuda11_local]" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html ".[opt,fine-tune,llama,gptq,falcon,chatglm]" -v --no-cache-dir

View File

@@ -15,28 +15,42 @@
from __future__ import annotations
import functools
import importlib
import logging
import pathlib
import shutil
import subprocess
import typing as t
import git.cmd
import attr
import bentoml
from ...exceptions import Error
from ...exceptions import OpenLLMException
from ...utils import LazyLoader
from ...utils import VersionInfo
from ...utils import apply
from ...utils import device_count
from ...utils import get_debug_mode
from ...utils import pkg
from ...utils.codegen import make_attr_tuple_class
if t.TYPE_CHECKING:
import git.cmd
from ..._types import RefTuple
else:
git = LazyLoader("git", globals(), "git")
git.cmd = LazyLoader("git.cmd", globals(), "git.cmd")
logger = logging.getLogger(__name__)
_BUILDER = bentoml.container.get_backend("buildx")
ROOT_DIR = pathlib.Path(__file__).parent.parent.parent
# TODO: support quay
LiteralContainerRegistry = t.Literal["docker", "gh", "ecr"]
LiteralContainerVersionStrategy = t.Literal["release", "nightly", "latest"]
LiteralContainerVersionStrategy = t.Literal["release", "nightly", "latest", "custom"]
# XXX: This registry will be hard code for now for easier to maintain
# but in the future, we can infer based on git repo and everything to make it more options for users
@@ -44,32 +58,68 @@ LiteralContainerVersionStrategy = t.Literal["release", "nightly", "latest"]
# NOTE: The ECR registry is the public one and currently only @bentoml team has access to push it.
_CONTAINER_REGISTRY: dict[LiteralContainerRegistry, str] = {"docker": "docker.io/bentoml/openllm", "gh": "ghcr.io/bentoml/openllm", "ecr": "public.ecr.aws/y5w8i4y6/bentoml/openllm"}
# TODO: support custom fork. Currently it only support openllm main.
_URI = "https://github.com/bentoml/openllm.git"
_module_location = pkg.source_locations("openllm")
@functools.lru_cache
@apply(str.lower)
def get_base_container_name(reg: LiteralContainerRegistry) -> str:
return _CONTAINER_REGISTRY[reg]
def get_base_container_name(reg: LiteralContainerRegistry) -> str: return _CONTAINER_REGISTRY[reg]
@functools.lru_cache(maxsize=1)
def _git() -> git.cmd.Git:
return git.cmd.Git(_URI)
def _convert_version_from_string(s: str) -> VersionInfo: return VersionInfo.from_version_string(s)
@functools.lru_cache
def _nightly_ref() -> tuple[str, str]:
return _git().ls_remote(_URI, "main", heads=True).split()
class VersionNotSupported(OpenLLMException):
"""Raised when the stable release is too low that it doesn't include OpenLLM base container."""
@functools.lru_cache
def _stable_ref() -> tuple[str, str]:
return max([item.split() for item in _git().ls_remote(_URI, refs=True, tags=True).split("\n")], key=lambda tag: tuple(int(k) for k in tag[-1].replace("refs/tags/v", "").split(".")))
_RefTuple: type[RefTuple] = make_attr_tuple_class("_RefTuple", ["git_hash", "version", "strategy"])
def get_base_container_tag(strategy: LiteralContainerVersionStrategy) -> str:
if strategy == "release": return _stable_ref()[-1].replace("refs/tags/v", "") # for stable, we can also use latest, but discouraged
elif strategy == "latest": return "latest"
elif strategy == "nightly": return f"sha-{_nightly_ref()[0][:7]}" # we prefixed with sha-<git_rev_short> (giv_rev[:7])
else: raise ValueError(f"Unknown strategy '{strategy}'. Valid strategies are 'release', 'nightly', and 'latest'")
@attr.attrs(eq=False, order=False, slots=True, frozen=True)
class Ref:
"""TODO: Support offline mode.
Maybe we need to save git hash when building the Bento.
"""
git_hash: str = attr.field()
version: VersionInfo = attr.field(converter=_convert_version_from_string)
strategy: LiteralContainerVersionStrategy = attr.field()
_git: git.cmd.Git = git.cmd.Git(_URI) # TODO: support offline mode
@classmethod
def _nightly_ref(cls) -> RefTuple: return _RefTuple((*cls._git.ls_remote(_URI, "main", heads=True).split(), "nightly"))
@classmethod
def _release_ref(cls, version_str: str | None = None) -> RefTuple:
_use_base_strategy = version_str is None
if version_str is None:
# NOTE: This strategy will only support openllm>0.2.12
version: tuple[str, str] = tuple(max([item.split() for item in cls._git.ls_remote(_URI, refs=True, tags=True).split("\n")], key=lambda tag: tuple(int(k) for k in tag[-1].replace("refs/tags/v", "").split("."))))
version_str = version[-1].replace("refs/tags/v", "")
version = (version[0], version_str)
else:
version = ("", version_str)
if t.TYPE_CHECKING: assert version_str # NOTE: Mypy cannot infer the correct type here. We have handle the cases where version_str is None in L86
if VersionInfo.from_version_string(version_str) < (0, 2, 12): raise VersionNotSupported(f"Version {version_str} doesn't support OpenLLM base container. Consider using 'nightly' or upgrade 'openllm>=0.2.12'")
return _RefTuple((*version, "release" if not _use_base_strategy else "custom"))
@classmethod
def from_strategy(cls, strategy_or_version: t.Literal["release", "nightly"] | str | None = None) -> Ref:
if strategy_or_version is None or strategy_or_version == "release":
logger.debug("Using default strategy 'release' for resolving base image version.")
return cls(*cls._release_ref())
elif strategy_or_version == "latest": return cls("latest", "0.0.0", "latest")
elif strategy_or_version == "nightly":
_ref = cls._nightly_ref()
return cls(_ref[0], "0.0.0", _ref[-1])
else:
logger.warning("Using custom %s. Make sure that it is at lease 0.2.12 for base container support.", strategy_or_version)
return cls(*cls._release_ref(version_str=strategy_or_version))
@property
def tag(self) -> str:
if self.strategy == "latest": return "latest"
elif self.strategy == "nightly": return f"sha-{self.git_hash[:7]}"
else: return repr(self.version)
@functools.lru_cache(maxsize=256)
def get_base_container_tag(strategy: LiteralContainerVersionStrategy | None = None) -> str: return Ref.from_strategy(strategy).tag
def build_container(registries: LiteralContainerRegistry | t.Sequence[LiteralContainerRegistry] | None = None, version_strategy: LiteralContainerVersionStrategy = "release", push: bool = False, machine: bool = False) -> dict[str | LiteralContainerRegistry, str]:
"""This is a utility function for building base container for OpenLLM. It will build the base container for all registries if ``None`` is passed.

View File

@@ -346,8 +346,7 @@ def fast_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[[FC
"--fast/--no-fast", show_default=True, default=False, envvar="OPENLLM_USE_LOCAL_LATEST", show_envvar=True, help="""Whether to skip checking if models is already in store.
This is useful if you already downloaded or setup the model beforehand.
""", **attrs
)(f)
""", **attrs)(f)
def machine_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[[FC], FC]:
return cli_option("--machine", is_flag=True, default=False, hidden=True, **attrs)(f)
@@ -379,8 +378,7 @@ def quantize_option(f: _AnyCallable | None = None, *, build: bool = False, model
"""
**Note** that this will set the mode for serving within deployment.""" if build else ""
) + """
**Note** that quantization are currently only available in *PyTorch* models.""", **attrs
)(f)
**Note** that quantization are currently only available in *PyTorch* models.""", **attrs)(f)
def workers_per_resource_option(f: _AnyCallable | None = None, *, build: bool = False, **attrs: t.Any) -> t.Callable[[FC], FC]:
return cli_option(
@@ -399,8 +397,7 @@ def workers_per_resource_option(f: _AnyCallable | None = None, *, build: bool =
**Note**: The workers value passed into 'build' will determine how the LLM can
be provisioned in Kubernetes as well as in standalone container. This will
ensure it has the same effect with 'openllm start --workers ...'""" if build else ""
), **attrs
)(f)
), **attrs)(f)
def bettertransformer_option(f: _AnyCallable | None = None, *, build: bool = False, model_env: EnvVarMixin | None = None, **attrs: t.Any) -> t.Callable[[FC], FC]:
return cli_option(
@@ -411,23 +408,22 @@ def serialisation_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Cal
return cli_option(
"--serialisation", "--serialization", "serialisation_format", type=click.Choice(["safetensors", "legacy"]), default="safetensors", show_default=True, show_envvar=True, envvar="OPENLLM_SERIALIZATION", help="""Serialisation format for save/load LLM.
Currently the following strategies are supported:
Currently the following strategies are supported:
- ``safetensors``: This will use safetensors format, which is synonymous to
- ``safetensors``: This will use safetensors format, which is synonymous to
\b
``safe_serialization=True``.
\b
``safe_serialization=True``.
\b
**Note** that this format might not work for every cases, and
you can always fallback to ``legacy`` if needed.
\b
**Note** that this format might not work for every cases, and
you can always fallback to ``legacy`` if needed.
- ``legacy``: This will use PyTorch serialisation format, often as ``.bin`` files.
This should be used if the model doesn't yet support safetensors.
- ``legacy``: This will use PyTorch serialisation format, often as ``.bin`` files.
This should be used if the model doesn't yet support safetensors.
**Note** that GGML format is working in progress.
""", **attrs
)(f)
**Note** that GGML format is working in progress.
""", **attrs)(f)
def container_registry_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[[FC], FC]:
return cli_option(
@@ -437,8 +433,7 @@ def container_registry_option(f: _AnyCallable | None = None, **attrs: t.Any) ->
\b
**Note** that in order to build the base image, you will need a GPUs to compile custom kernel. See ``openllm ext build-base-container`` for more information.
"""
)(f)
""")(f)
_wpr_strategies = {"round_robin", "conserved"}

View File

@@ -162,7 +162,7 @@ ServeCommand = t.Literal["serve", "serve-grpc"]
@attr.define
class GlobalOptions:
cloud_context: str | None = attr.field(default=None, converter=attr.converters.default_if_none("default"))
cloud_context: str | None = attr.field(default=None)
def with_options(self, **attrs: t.Any) -> t.Self:
return attr.evolve(self, **attrs)
@@ -223,7 +223,7 @@ class OpenLLMCommandGroup(BentoMLCommandGroup):
analytics.track(event)
raise
return wrapper
return t.cast("t.Callable[t.Concatenate[bool, P], t.Any]", wrapper)
@staticmethod
def exception_handling(func: t.Callable[P, t.Any], group: click.Group, **attrs: t.Any) -> t.Callable[P, t.Any]:

View File

@@ -28,7 +28,6 @@ from .constants import HUB_ATTRS
from ..exceptions import OpenLLMException
from ..utils import LazyLoader
from ..utils import LazyType
from ..utils import device_count
from ..utils import first_not_none
from ..utils import generate_context
from ..utils import generate_labels
@@ -40,7 +39,7 @@ from ..utils import normalize_attrs_to_model_tokenizer_pair
if t.TYPE_CHECKING:
import auto_gptq as autogptq
import torch
import torch.cuda
import torch.nn
import vllm
import openllm
@@ -55,7 +54,6 @@ else:
autogptq = LazyLoader("autogptq", globals(), "auto_gptq")
_transformers = LazyLoader("_transformers", globals(), "transformers")
torch = LazyLoader("torch", globals(), "torch")
torch.cuda = LazyLoader("torch.cuda", globals(), "torch.cuda")
_object_setattr = object.__setattr__
@@ -91,6 +89,10 @@ def infer_autoclass_from_llm_config(llm: openllm.LLM[M, T], config: _transformer
else: raise OpenLLMException(f"Model type {type(config)} is not supported yet.")
return getattr(_transformers, FRAMEWORK_TO_AUTOCLASS_MAPPING[llm.__llm_implementation__][idx])
def check_initialized(model: torch.nn.Module) -> None:
unintialized = [n for n, param in model.named_parameters() if param.data.device == torch.device("meta")]
if len(unintialized) > 0: raise RuntimeError(f"Found the following unintialized parameters in {model}: {unintialized}")
def import_model(llm: openllm.LLM[M, T], *decls: t.Any, trust_remote_code: bool, **attrs: t.Any) -> bentoml.Model:
"""Auto detect model type from given model_id and import it to bentoml's model store.
@@ -184,22 +186,15 @@ def load_model(llm: openllm.LLM[M, T], *decls: t.Any, **attrs: t.Any) -> M:
if "_quantize" in llm._bentomodel.info.metadata and llm._bentomodel.info.metadata["_quantize"] == "gptq":
if not is_autogptq_available(): raise OpenLLMException("GPTQ quantisation requires 'auto-gptq' (Not found in local environment). Install it with 'pip install \"openllm[gptq]\"'")
if llm.config["model_type"] != "causal_lm": raise OpenLLMException(f"GPTQ only support Causal LM (got {llm.__class__} of {llm.config['model_type']})")
return autogptq.AutoGPTQForCausalLM.from_quantized(llm._bentomodel.path, *decls, quantize_config=t.cast("autogptq.BaseQuantizeConfig", llm.quantization_config), trust_remote_code=llm.__llm_trust_remote_code__, use_safetensors=safe_serialization, **hub_attrs, **attrs,)
return autogptq.AutoGPTQForCausalLM.from_quantized(llm._bentomodel.path, *decls, quantize_config=t.cast("autogptq.BaseQuantizeConfig", llm.quantization_config), trust_remote_code=llm.__llm_trust_remote_code__, use_safetensors=safe_serialization, **hub_attrs, **attrs)
model = infer_autoclass_from_llm_config(llm, config).from_pretrained(llm._bentomodel.path, *decls, config=config, trust_remote_code=llm.__llm_trust_remote_code__, **hub_attrs, **attrs,)
# NOTE: we only cast and load the model if it is not already quantized and setup correctly
loaded_in_kbit = getattr(model, "is_loaded_in_8bit", False) or getattr(model, "is_loaded_in_4bit", False) or getattr(model, "is_quantized", False)
if torch.cuda.is_available() and device_count() == 1 and not loaded_in_kbit:
try:
model = model.to("cuda")
except torch.cuda.OutOfMemoryError as err:
raise RuntimeError(f"Failed to convert {llm.config['model_name']} with model_id '{llm.model_id}' to CUDA.\nNote: You can try out '--quantize int8 | int4' for dynamic quantization.") from err
model = infer_autoclass_from_llm_config(llm, config).from_pretrained(llm._bentomodel.path, *decls, config=config, trust_remote_code=llm.__llm_trust_remote_code__, **hub_attrs, **attrs).eval()
if llm.__llm_implementation__ in {"pt", "vllm"}: check_initialized(model)
# BetterTransformer is currently only supported on PyTorch.
if llm.bettertransformer and isinstance(model, _transformers.PreTrainedModel): model = model.to_bettertransformer()
return t.cast("M", model)
def save_pretrained(llm: openllm.LLM[M, T], save_directory: str, is_main_process: bool = True, state_dict: DictStrAny | None = None, save_function: t.Callable[..., None] | None = None, push_to_hub: bool = False, max_shard_size: int | str = "10GB", safe_serialization: bool = False, variant: str | None = None, **attrs: t.Any,) -> None:
"""Light wrapper around ``transformers.PreTrainedTokenizer.save_pretrained`` and ``transformers.PreTrainedModel.save_pretrained``."""
save_function = first_not_none(save_function, default=torch.save)
model_save_attrs, tokenizer_save_attrs = normalize_attrs_to_model_tokenizer_pair(**attrs)
safe_serialization = safe_serialization or llm._serialisation_format == "safetensors"

View File

@@ -46,6 +46,7 @@ from bentoml._internal.utils import reserve_free_port as reserve_free_port
from bentoml._internal.utils import resolve_user_filepath as resolve_user_filepath
from .lazy import LazyModule
from .lazy import VersionInfo as VersionInfo
logger = logging.getLogger(__name__)

View File

@@ -376,7 +376,7 @@ class EnvVarMixin(ReprMixin):
if hasattr(self, item): return getattr(self, item)
raise KeyError(f"Key {item} not found in {self}")
def __init__(self, model_name: str, implementation: LiteralRuntime = "pt", model_id: str | None = None, bettertransformer: bool | None = None, quantize: t.LiteralString | None = None,
runtime: t.Literal["ggml", "transformers"] = "transformers") -> None:
runtime: t.Literal["ggml", "transformers"] = "transformers") -> None:
"""EnvVarMixin is a mixin class that returns the value extracted from environment variables."""
from .._configuration import field_env_key
self.model_name = inflection.underscore(model_name)

View File

@@ -36,7 +36,7 @@ class MissingAttributesError(OpenLLMException):
"""Raised when given keys is not available in LazyModule special mapping."""
@functools.total_ordering
@attr.attrs(eq=False, order=False, slots=True, frozen=True)
@attr.attrs(eq=False, order=False, slots=True, frozen=True, repr=False)
class VersionInfo:
"""A version object that can be compared to tuple of length 1--4.
@@ -96,6 +96,8 @@ class VersionInfo:
# have to do anything special with releaselevel for now.
return us < them
def __repr__(self) -> str: return "{0}.{1}.{2}".format(*attr.astuple(self)[:3])
_sentinel, _reserved_namespace = object(), {"__openllm_special__", "__openllm_migration__"}
class LazyModule(types.ModuleType):
@@ -120,7 +122,7 @@ class LazyModule(types.ModuleType):
module_spec: __spec__ of the lazily loaded module
doc: Optional docstring for this module.
extra_objects: Any additional objects that this module can also be accessed. Useful for additional metadata as well
as any locals() functions
as any locals() functions
"""
super().__init__(name)
self._modules = set(import_structure.keys())

View File

@@ -53,7 +53,7 @@ _value_docstring = {
For example:
For FLAN-T5 impl, this would be ["google/flan-t5-small", "google/flan-t5-base",
"google/flan-t5-large", "google/flan-t5-xl", "google/flan-t5-xxl"]
"google/flan-t5-large", "google/flan-t5-xl", "google/flan-t5-xxl"]
This field is required when defining under '__config__'.
""", "architecture": """The model architecture that is supported by this LLM.

View File

@@ -25,7 +25,7 @@ PRECISION = Decimal(".01")
ROOT = Path(__file__).resolve().parent.parent
def main():
def main() -> int:
coverage_summary = ROOT / "coverage-summary.json"
coverage_data = orjson.loads(coverage_summary.read_text(encoding="utf-8"))
@@ -39,8 +39,7 @@ def main():
rate = Decimal(statements_covered) / Decimal(statements) * 100
rate = rate.quantize(PRECISION, rounding=ROUND_DOWN)
lines.append(f"{package} | {100 if rate == 100 else rate}% ({statements_covered} / {statements})\n" # noqa: PLR2004
)
lines.append(f"{package} | {100 if rate == 100 else rate}% ({statements_covered} / {statements})\n")
total_statements_covered = total_data["statements_covered"]
total_statements = total_data["statements"]

View File

@@ -1,30 +1,30 @@
from ._core import AllOptionGroup
from ._core import GroupedOption
from ._core import MutuallyExclusiveOptionGroup
from ._core import OptionGroup
from ._core import RequiredAllOptionGroup
from ._core import RequiredAnyOptionGroup
from ._core import RequiredMutuallyExclusiveOptionGroup
from ._decorators import optgroup
from ._version import __version__
"""
click-option-group
~~~~~~~~~~~~~~~~~~
Option groups missing in Click
:copyright: © 2019-2020 by Eugene Prilepin
:license: BSD, see LICENSE for more details.
"""
__all__ = [
"__version__",
"optgroup",
"GroupedOption",
"OptionGroup",
"RequiredAnyOptionGroup",
"AllOptionGroup",
"RequiredAllOptionGroup",
"MutuallyExclusiveOptionGroup",
"RequiredMutuallyExclusiveOptionGroup",
]
from ._core import AllOptionGroup
from ._core import GroupedOption
from ._core import MutuallyExclusiveOptionGroup
from ._core import OptionGroup
from ._core import RequiredAllOptionGroup
from ._core import RequiredAnyOptionGroup
from ._core import RequiredMutuallyExclusiveOptionGroup
from ._decorators import optgroup
from ._version import __version__
"""
click-option-group
~~~~~~~~~~~~~~~~~~
Option groups missing in Click
:copyright: © 2019-2020 by Eugene Prilepin
:license: BSD, see LICENSE for more details.
"""
__all__ = [
"__version__",
"optgroup",
"GroupedOption",
"OptionGroup",
"RequiredAnyOptionGroup",
"AllOptionGroup",
"RequiredAllOptionGroup",
"MutuallyExclusiveOptionGroup",
"RequiredMutuallyExclusiveOptionGroup",
]

View File

@@ -1,82 +1,80 @@
from typing import Any
from typing import Callable
from typing import Dict
from typing import List
from typing import Mapping
from typing import Optional
from typing import Sequence
from typing import Set
from typing import Tuple
from typing import TypeAlias
from typing import TypeVar
from typing import Union
import click
_R = TypeVar("_R")
_T = TypeVar("_T")
AnyCallable: TypeAlias = Callable[..., Any]
_FC = TypeVar("_FC", bound=Union[AnyCallable, click.Command])
class GroupedOption(click.Option):
def __init__(self, param_decls: Optional[Sequence[str]] = ..., *, group: OptionGroup, **attrs: Any) -> None: ...
@property
def group(self) -> OptionGroup: ...
def handle_parse_result(
self, ctx: click.Context, opts: Mapping[str, Any], args: List[str]
) -> Tuple[Any, List[str]]: ...
def get_help_record(self, ctx: click.Context) -> Optional[Tuple[str, str]]: ...
class _GroupTitleFakeOption(click.Option):
def __init__(self, param_decls: Optional[Sequence[str]] = ..., *, group: OptionGroup, **attrs: Any) -> None: ...
def get_help_record(self, ctx: click.Context) -> Optional[Tuple[str, str]]: ...
class OptionGroup:
def __init__(self, name: Optional[str] = ..., *, hidden: bool = ..., help: Optional[str] = ...) -> None: ...
@property
def name(self) -> str: ...
@property
def help(self) -> str: ...
@property
def name_extra(self) -> List[str]: ...
@property
def forbidden_option_attrs(self) -> List[str]: ...
def get_help_record(self, ctx: click.Context) -> Optional[Tuple[str, str]]: ...
def option(self, *param_decls: Any, **attrs: Any) -> Callable[[_FC], _FC]: ...
def get_options(self, ctx: click.Context) -> Dict[str, GroupedOption]: ...
def get_option_names(self, ctx: click.Context) -> List[str]: ...
def get_error_hint(self, ctx: click.Context, option_names: Optional[Set[str]] = ...) -> str: ...
def handle_parse_result(self, option: GroupedOption, ctx: click.Context, opts: Mapping[str, Any]) -> None: ...
class RequiredAnyOptionGroup(OptionGroup):
@property
def forbidden_option_attrs(self) -> List[str]: ...
@property
def name_extra(self) -> List[str]: ...
def handle_parse_result(self, option: GroupedOption, ctx: click.Context, opts: Mapping[str, Any]) -> None: ...
class RequiredAllOptionGroup(OptionGroup):
@property
def forbidden_option_attrs(self) -> List[str]: ...
@property
def name_extra(self) -> List[str]: ...
def handle_parse_result(self, option: GroupedOption, ctx: click.Context, opts: Mapping[str, Any]) -> None: ...
class MutuallyExclusiveOptionGroup(OptionGroup):
@property
def forbidden_option_attrs(self) -> List[str]: ...
@property
def name_extra(self) -> List[str]: ...
def handle_parse_result(self, option: GroupedOption, ctx: click.Context, opts: Mapping[str, Any]) -> None: ...
class RequiredMutuallyExclusiveOptionGroup(MutuallyExclusiveOptionGroup):
@property
def name_extra(self) -> List[str]: ...
def handle_parse_result(self, option: GroupedOption, ctx: click.Context, opts: Mapping[str, Any]) -> None: ...
class AllOptionGroup(OptionGroup):
@property
def forbidden_option_attrs(self) -> List[str]: ...
@property
def name_extra(self) -> List[str]: ...
def handle_parse_result(self, option: GroupedOption, ctx: click.Context, opts: Mapping[str, Any]) -> None: ...
from typing import Any
from typing import Callable
from typing import Dict
from typing import List
from typing import Mapping
from typing import Optional
from typing import Sequence
from typing import Set
from typing import Tuple
from typing import TypeAlias
from typing import TypeVar
from typing import Union
import click
AnyCallable: TypeAlias = Callable[..., Any]
_FC = TypeVar("_FC", bound=Union[AnyCallable, click.Command])
class GroupedOption(click.Option):
def __init__(self, param_decls: Optional[Sequence[str]] = ..., *, group: OptionGroup, **attrs: Any) -> None: ...
@property
def group(self) -> OptionGroup: ...
def handle_parse_result(
self, ctx: click.Context, opts: Mapping[str, Any], args: List[str]
) -> Tuple[Any, List[str]]: ...
def get_help_record(self, ctx: click.Context) -> Optional[Tuple[str, str]]: ...
class _GroupTitleFakeOption(click.Option):
def __init__(self, param_decls: Optional[Sequence[str]] = ..., *, group: OptionGroup, **attrs: Any) -> None: ...
def get_help_record(self, ctx: click.Context) -> Optional[Tuple[str, str]]: ...
class OptionGroup:
def __init__(self, name: Optional[str] = ..., *, hidden: bool = ..., help: Optional[str] = ...) -> None: ...
@property
def name(self) -> str: ...
@property
def help(self) -> str: ...
@property
def name_extra(self) -> List[str]: ...
@property
def forbidden_option_attrs(self) -> List[str]: ...
def get_help_record(self, ctx: click.Context) -> Optional[Tuple[str, str]]: ...
def option(self, *param_decls: Any, **attrs: Any) -> Callable[[_FC], _FC]: ...
def get_options(self, ctx: click.Context) -> Dict[str, GroupedOption]: ...
def get_option_names(self, ctx: click.Context) -> List[str]: ...
def get_error_hint(self, ctx: click.Context, option_names: Optional[Set[str]] = ...) -> str: ...
def handle_parse_result(self, option: GroupedOption, ctx: click.Context, opts: Mapping[str, Any]) -> None: ...
class RequiredAnyOptionGroup(OptionGroup):
@property
def forbidden_option_attrs(self) -> List[str]: ...
@property
def name_extra(self) -> List[str]: ...
def handle_parse_result(self, option: GroupedOption, ctx: click.Context, opts: Mapping[str, Any]) -> None: ...
class RequiredAllOptionGroup(OptionGroup):
@property
def forbidden_option_attrs(self) -> List[str]: ...
@property
def name_extra(self) -> List[str]: ...
def handle_parse_result(self, option: GroupedOption, ctx: click.Context, opts: Mapping[str, Any]) -> None: ...
class MutuallyExclusiveOptionGroup(OptionGroup):
@property
def forbidden_option_attrs(self) -> List[str]: ...
@property
def name_extra(self) -> List[str]: ...
def handle_parse_result(self, option: GroupedOption, ctx: click.Context, opts: Mapping[str, Any]) -> None: ...
class RequiredMutuallyExclusiveOptionGroup(MutuallyExclusiveOptionGroup):
@property
def name_extra(self) -> List[str]: ...
def handle_parse_result(self, option: GroupedOption, ctx: click.Context, opts: Mapping[str, Any]) -> None: ...
class AllOptionGroup(OptionGroup):
@property
def forbidden_option_attrs(self) -> List[str]: ...
@property
def name_extra(self) -> List[str]: ...
def handle_parse_result(self, option: GroupedOption, ctx: click.Context, opts: Mapping[str, Any]) -> None: ...

View File

@@ -1,65 +1,65 @@
from typing import Any
from typing import Callable
from typing import Dict
from typing import List
from typing import NamedTuple
from typing import Optional
from typing import Tuple
from typing import Type
from typing import TypeVar
from typing import Union
from typing import overload
import click
from ._core import _FC
from ._core import AnyCallable
from ._core import OptionGroup
class OptionStackItem(NamedTuple):
param_decls: Tuple[str, ...]
attrs: Dict[str, Any]
param_count: int
class _NotAttachedOption(click.Option):
def __init__(self, param_decls: Any = ..., *, all_not_attached_options: Any, **attrs: Any) -> None: ...
def handle_parse_result(self, ctx: click.Context, opts: Any, args: List[str]) -> Any: ...
_GrpType = TypeVar("_GrpType", bound=OptionGroup)
class _OptGroup:
def __init__(self) -> None: ...
def __call__(
self,
name: Optional[str] = ...,
*,
help: Optional[str] = None,
cls: Optional[Type[_GrpType]] = None,
**attrs: Any,
) -> Union[click.Command, Callable[[AnyCallable], click.Command]]: ...
@overload
def group(
self,
name: Optional[str],
cls: type[_GrpType],
**attrs: Any,
) -> Callable[[AnyCallable], click.Command]: ...
@overload
def group(
self,
name: str = ...,
cls: None = None,
**attrs: Any,
) -> Callable[[AnyCallable], click.Command]: ...
@overload
def group(
self,
name: Optional[str] = ...,
*,
help: Optional[str] = ...,
cls: Optional[Type[_GrpType]] = None,
**attrs: Any,
) -> Union[click.Command, Callable[[AnyCallable], click.Command]]: ...
def option(self, *param_decls: Any, **attrs: Any) -> Callable[[_FC], _FC]: ...
optgroup: _OptGroup = ...
from typing import Any
from typing import Callable
from typing import Dict
from typing import List
from typing import NamedTuple
from typing import Optional
from typing import Tuple
from typing import Type
from typing import TypeVar
from typing import Union
from typing import overload
import click
from ._core import _FC
from ._core import AnyCallable
from ._core import OptionGroup
class OptionStackItem(NamedTuple):
param_decls: Tuple[str, ...]
attrs: Dict[str, Any]
param_count: int
class _NotAttachedOption(click.Option):
def __init__(self, param_decls: Any = ..., *, all_not_attached_options: Any, **attrs: Any) -> None: ...
def handle_parse_result(self, ctx: click.Context, opts: Any, args: List[str]) -> Any: ...
_GrpType = TypeVar("_GrpType", bound=OptionGroup)
class _OptGroup:
def __init__(self) -> None: ...
def __call__(
self,
name: Optional[str] = ...,
*,
help: Optional[str] = None,
cls: Optional[Type[_GrpType]] = None,
**attrs: Any,
) -> Union[click.Command, Callable[[AnyCallable], click.Command]]: ...
@overload
def group(
self,
name: Optional[str],
cls: type[_GrpType],
**attrs: Any,
) -> Callable[[AnyCallable], click.Command]: ...
@overload
def group(
self,
name: str = ...,
cls: None = None,
**attrs: Any,
) -> Callable[[AnyCallable], click.Command]: ...
@overload
def group(
self,
name: Optional[str] = ...,
*,
help: Optional[str] = ...,
cls: Optional[Type[_GrpType]] = None,
**attrs: Any,
) -> Union[click.Command, Callable[[AnyCallable], click.Command]]: ...
def option(self, *param_decls: Any, **attrs: Any) -> Callable[[_FC], _FC]: ...
optgroup: _OptGroup = ...

View File

@@ -1,3 +1,3 @@
"""This type stub file was generated by pyright."""
__version__ = ...
"""This type stub file was generated by pyright."""
__version__ = ...