mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-05-19 14:16:22 -04:00
feat(infra): add tools for managing optional-dependencies
based on llm config Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -652,6 +652,10 @@ class LLMConfig:
|
||||
__openllm_url__: str = Field(None, init=False)
|
||||
"""The resolved url for this LLMConfig."""
|
||||
|
||||
__openllm_requirements__: list[str] | None = None
|
||||
"""The default PyPI requirements needed to run this given LLM. By default, we will depend on
|
||||
bentoml, torch, transformers."""
|
||||
|
||||
GenerationConfig: type = type
|
||||
"""Users can override this subclass of any given LLMConfig to provide GenerationConfig
|
||||
default value. For example:
|
||||
@@ -682,6 +686,7 @@ class LLMConfig:
|
||||
trust_remote_code: bool = False,
|
||||
requires_gpu: bool = False,
|
||||
url: str | None = None,
|
||||
requirements: list[str] | None = None,
|
||||
):
|
||||
if name_type == "dasherize":
|
||||
model_name = inflection.underscore(cls.__name__.replace("Config", ""))
|
||||
@@ -699,6 +704,7 @@ class LLMConfig:
|
||||
cls.__openllm_start_name__ = start_name
|
||||
cls.__openllm_env__ = openllm.utils.ModelEnv(model_name)
|
||||
cls.__openllm_url__ = url or "(not set)"
|
||||
cls.__openllm_requirements__ = requirements
|
||||
|
||||
# NOTE: Since we want to enable a pydantic-like experience
|
||||
# this means we will have to hide the attr abstraction, and generate
|
||||
|
||||
@@ -173,7 +173,6 @@ _reserved_namespace = _required_namespace | {
|
||||
"model",
|
||||
"tokenizer",
|
||||
"import_kwargs",
|
||||
"requirements",
|
||||
}
|
||||
|
||||
|
||||
@@ -199,10 +198,6 @@ class LLMInterface(ABC):
|
||||
"""The default import kwargs to used when importing the model.
|
||||
This will be passed into 'openllm.LLM.import_model'."""
|
||||
|
||||
requirements: list[str] | None = None
|
||||
"""The default PyPI requirements needed to run this given LLM. By default, we will depend on
|
||||
bentoml, torch, transformers."""
|
||||
|
||||
@abstractmethod
|
||||
def generate(self, prompt: str, **preprocess_generate_kwds: t.Any) -> t.Any:
|
||||
"""The main function implementation for generating from given prompt. It takes the prompt
|
||||
|
||||
@@ -72,8 +72,8 @@ def construct_python_options(llm: openllm.LLM, llm_fs: FS) -> PythonOptions:
|
||||
packages: list[str] = []
|
||||
|
||||
ModelEnv = openllm.utils.ModelEnv(llm.__openllm_start_name__)
|
||||
if llm.requirements is not None:
|
||||
packages.extend(llm.requirements)
|
||||
if llm.config.__openllm_requirements__ is not None:
|
||||
packages.extend(llm.config.__openllm_requirements__)
|
||||
|
||||
if not (str(os.environ.get("BENTOML_BUNDLE_LOCAL_BUILD", False)).lower() == "false"):
|
||||
packages.append(f"bentoml>={'.'.join([str(i) for i in pkg.pkg_version_info('bentoml')])}")
|
||||
|
||||
@@ -422,8 +422,11 @@ def start_model_command(
|
||||
}
|
||||
)
|
||||
|
||||
if llm.requirements is not None:
|
||||
_echo(f"Make sure to have the following dependencies available: {llm.requirements}", fg="yellow")
|
||||
if llm.config.__openllm_requirements__ is not None:
|
||||
_echo(
|
||||
f"Make sure to have the following dependencies available: {llm.config.__openllm_requirements__}",
|
||||
fg="yellow",
|
||||
)
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
server_cls: type[bentoml.HTTPServer] if not _serve_grpc else type[bentoml.GrpcServer]
|
||||
|
||||
@@ -23,6 +23,7 @@ class ChatGLMConfig(
|
||||
default_timeout=3600000,
|
||||
requires_gpu=True,
|
||||
url="https://github.com/THUDM/ChatGLM-6B",
|
||||
requirements=["cpm_kernels", "sentencepiece"],
|
||||
):
|
||||
"""
|
||||
ChatGLM is an open bilingual language model based on
|
||||
|
||||
@@ -64,8 +64,6 @@ class ChatGLM(openllm.LLM):
|
||||
|
||||
default_model = "THUDM/chatglm-6b-int4"
|
||||
|
||||
requirements = ["cpm_kernels", "sentencepiece"]
|
||||
|
||||
pretrained = ["THUDM/chatglm-6b", "THUDM/chatglm-6b-int8", "THUDM/chatglm-6b-int4"]
|
||||
|
||||
device = torch.device("cuda")
|
||||
|
||||
@@ -23,6 +23,7 @@ class FalconConfig(
|
||||
requires_gpu=True,
|
||||
default_timeout=3600000,
|
||||
url="https://falconllm.tii.ae/",
|
||||
requirements=["einops", "xformers", "safetensors"],
|
||||
):
|
||||
"""Falcon-7B is a 7B parameters causal decoder-only model built by
|
||||
TII and trained on 1,500B tokens of [RefinedWeb](https://huggingface.co/datasets/tiiuae/falcon-refinedweb)
|
||||
|
||||
@@ -36,8 +36,6 @@ class Falcon(openllm.LLM):
|
||||
|
||||
default_model = "tiiuae/falcon-7b"
|
||||
|
||||
requirements = ["einops", "xformers", "safetensors"]
|
||||
|
||||
pretrained = ["tiiuae/falcon-7b", "tiiuae/falcon-40b", "tiiuae/falcon-7b-instruct", "tiiuae/falcon-40b-instruct"]
|
||||
|
||||
import_kwargs = {"torch_dtype": torch.bfloat16, "device_map": "auto"}
|
||||
|
||||
@@ -21,6 +21,7 @@ class StarCoderConfig(
|
||||
name_type="lowercase",
|
||||
requires_gpu=True,
|
||||
url="https://github.com/bigcode-project/starcoder",
|
||||
requirements=["bitandbytes"],
|
||||
):
|
||||
"""The StarCoder models are 15.5B parameter models trained on 80+ programming languages from
|
||||
[The Stack (v1.2)](https://huggingface.co/datasets/bigcode/the-stack), with opt-out requests excluded.
|
||||
|
||||
@@ -44,8 +44,6 @@ class StarCoder(openllm.LLM):
|
||||
|
||||
default_model = "bigcode/starcoder"
|
||||
|
||||
requirements = ["bitandbytes"]
|
||||
|
||||
pretrained = ["bigcode/starcoder", "bigcode/starcoderbase"]
|
||||
|
||||
device = torch.device("cuda")
|
||||
|
||||
Reference in New Issue
Block a user