feat(infra): add tools for managing optional-dependencies

based on llm config Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
2026-05-19 14:16:22 -04:00 · 2023-06-08 08:57:19 -04:00
parent 23d98a2729
commit c0418b76ec
13 changed files with 98 additions and 21 deletions
--- a/src/openllm/_configuration.py
+++ b/src/openllm/_configuration.py
@@ -652,6 +652,10 @@ class LLMConfig:
        __openllm_url__: str = Field(None, init=False)
        """The resolved url for this LLMConfig."""

+        __openllm_requirements__: list[str] | None = None
+        """The default PyPI requirements needed to run this given LLM. By default, we will depend on
+        bentoml, torch, transformers."""
+
        GenerationConfig: type = type
        """Users can override this subclass of any given LLMConfig to provide GenerationConfig
        default value. For example:
@@ -682,6 +686,7 @@ class LLMConfig:
        trust_remote_code: bool = False,
        requires_gpu: bool = False,
        url: str | None = None,
+        requirements: list[str] | None = None,
    ):
        if name_type == "dasherize":
            model_name = inflection.underscore(cls.__name__.replace("Config", ""))
@@ -699,6 +704,7 @@ class LLMConfig:
        cls.__openllm_start_name__ = start_name
        cls.__openllm_env__ = openllm.utils.ModelEnv(model_name)
        cls.__openllm_url__ = url or "(not set)"
+        cls.__openllm_requirements__ = requirements

        # NOTE: Since we want to enable a pydantic-like experience
        # this means we will have to hide the attr abstraction, and generate
--- a/src/openllm/_llm.py
+++ b/src/openllm/_llm.py
@@ -173,7 +173,6 @@ _reserved_namespace = _required_namespace | {
    "model",
    "tokenizer",
    "import_kwargs",
-    "requirements",
 }


@@ -199,10 +198,6 @@ class LLMInterface(ABC):
    """The default import kwargs to used when importing the model.
    This will be passed into 'openllm.LLM.import_model'."""

-    requirements: list[str] | None = None
-    """The default PyPI requirements needed to run this given LLM. By default, we will depend on
-    bentoml, torch, transformers."""
-
    @abstractmethod
    def generate(self, prompt: str, **preprocess_generate_kwds: t.Any) -> t.Any:
        """The main function implementation for generating from given prompt.  It takes the prompt
--- a/src/openllm/_package.py
+++ b/src/openllm/_package.py
@@ -72,8 +72,8 @@ def construct_python_options(llm: openllm.LLM, llm_fs: FS) -> PythonOptions:
    packages: list[str] = []

    ModelEnv = openllm.utils.ModelEnv(llm.__openllm_start_name__)
-    if llm.requirements is not None:
-        packages.extend(llm.requirements)
+    if llm.config.__openllm_requirements__ is not None:
+        packages.extend(llm.config.__openllm_requirements__)

    if not (str(os.environ.get("BENTOML_BUNDLE_LOCAL_BUILD", False)).lower() == "false"):
        packages.append(f"bentoml>={'.'.join([str(i) for i in pkg.pkg_version_info('bentoml')])}")
--- a/src/openllm/cli.py
+++ b/src/openllm/cli.py
@@ -422,8 +422,11 @@ def start_model_command(
            }
        )

-        if llm.requirements is not None:
-            _echo(f"Make sure to have the following dependencies available: {llm.requirements}", fg="yellow")
+        if llm.config.__openllm_requirements__ is not None:
+            _echo(
+                f"Make sure to have the following dependencies available: {llm.config.__openllm_requirements__}",
+                fg="yellow",
+            )

        if t.TYPE_CHECKING:
            server_cls: type[bentoml.HTTPServer] if not _serve_grpc else type[bentoml.GrpcServer]
--- a/src/openllm/models/chatglm/configuration_chatglm.py
+++ b/src/openllm/models/chatglm/configuration_chatglm.py
@@ -23,6 +23,7 @@ class ChatGLMConfig(
    default_timeout=3600000,
    requires_gpu=True,
    url="https://github.com/THUDM/ChatGLM-6B",
+    requirements=["cpm_kernels", "sentencepiece"],
 ):
    """
    ChatGLM is an open bilingual language model based on
--- a/src/openllm/models/chatglm/modeling_chatglm.py
+++ b/src/openllm/models/chatglm/modeling_chatglm.py
@@ -64,8 +64,6 @@ class ChatGLM(openllm.LLM):

    default_model = "THUDM/chatglm-6b-int4"

-    requirements = ["cpm_kernels", "sentencepiece"]
-
    pretrained = ["THUDM/chatglm-6b", "THUDM/chatglm-6b-int8", "THUDM/chatglm-6b-int4"]

    device = torch.device("cuda")
--- a/src/openllm/models/falcon/configuration_falcon.py
+++ b/src/openllm/models/falcon/configuration_falcon.py
@@ -23,6 +23,7 @@ class FalconConfig(
    requires_gpu=True,
    default_timeout=3600000,
    url="https://falconllm.tii.ae/",
+    requirements=["einops", "xformers", "safetensors"],
 ):
    """Falcon-7B is a 7B parameters causal decoder-only model built by
    TII and trained on 1,500B tokens of [RefinedWeb](https://huggingface.co/datasets/tiiuae/falcon-refinedweb)
--- a/src/openllm/models/falcon/modeling_falcon.py
+++ b/src/openllm/models/falcon/modeling_falcon.py
@@ -36,8 +36,6 @@ class Falcon(openllm.LLM):

    default_model = "tiiuae/falcon-7b"

-    requirements = ["einops", "xformers", "safetensors"]
-
    pretrained = ["tiiuae/falcon-7b", "tiiuae/falcon-40b", "tiiuae/falcon-7b-instruct", "tiiuae/falcon-40b-instruct"]

    import_kwargs = {"torch_dtype": torch.bfloat16, "device_map": "auto"}
--- a/src/openllm/models/starcoder/configuration_starcoder.py
+++ b/src/openllm/models/starcoder/configuration_starcoder.py
@@ -21,6 +21,7 @@ class StarCoderConfig(
    name_type="lowercase",
    requires_gpu=True,
    url="https://github.com/bigcode-project/starcoder",
+    requirements=["bitandbytes"],
 ):
    """The StarCoder models are 15.5B parameter models trained on 80+ programming languages from
    [The Stack (v1.2)](https://huggingface.co/datasets/bigcode/the-stack), with opt-out requests excluded.
--- a/src/openllm/models/starcoder/modeling_starcoder.py
+++ b/src/openllm/models/starcoder/modeling_starcoder.py
@@ -44,8 +44,6 @@ class StarCoder(openllm.LLM):

    default_model = "bigcode/starcoder"

-    requirements = ["bitandbytes"]
-
    pretrained = ["bigcode/starcoder", "bigcode/starcoderbase"]

    device = torch.device("cuda")