chore(docs): update docs for to_runner

Signed-off-by: aarnphm-ec2-dev <29749331+aarnphm@users.noreply.github.com>
2026-05-05 06:12:43 -04:00 · 2023-06-11 03:38:56 +00:00
parent 6a937d8b51
commit 5a7942574f
2 changed files with 4 additions and 2 deletions
--- a/src/openllm/_llm.py
+++ b/src/openllm/_llm.py
@@ -704,7 +704,6 @@ class LLM(LLMInterface, metaclass=LLMMetaclass):
        """Convert this LLM into a Runner.

        Args:
-            name: The name of the runner to generate. Optional as this will be generated based on the model_name.
            models: Any additional ``bentoml.Model`` to be included in this given models.
                    By default, this will be determined from the model_name.
            max_batch_size: The maximum batch size for the runner.
@@ -713,6 +712,10 @@ class LLM(LLMInterface, metaclass=LLMMetaclass):
            strategy: The strategy to use for this runner.
            embedded: Whether to run this runner in embedded mode.
            scheduling_strategy: Whether to create a custom scheduling strategy for this Runner.
+
+        NOTE: There are some difference between bentoml.models.get().to_runner() and LLM.to_runner(): 'name'.
+        - 'name': will be generated by OpenLLM, hence users don't shouldn't worry about this.
+            The generated name will be 'llm-<model-start-name>-runner' (ex: llm-dolly-v2-runner, llm-chatglm-runner)
        """

        name = f"llm-{self.config.__openllm_start_name__}-runner"
--- a/src/openllm/models/auto/factory.py
+++ b/src/openllm/models/auto/factory.py
@@ -88,7 +88,6 @@ class _BaseAutoLLMClass:
            "max_latency_ms",
            "method_configs",
            "embedded",
-            "scheduling_strategy",
        ]
        to_runner_attrs = {k: v for k, v in attrs.items() if k in runner_kwargs_name}
        if not isinstance(llm_config, openllm.LLMConfig):