diff --git a/src/openllm/_llm.py b/src/openllm/_llm.py index a0f7714c..0d623df0 100644 --- a/src/openllm/_llm.py +++ b/src/openllm/_llm.py @@ -704,7 +704,6 @@ class LLM(LLMInterface, metaclass=LLMMetaclass): """Convert this LLM into a Runner. Args: - name: The name of the runner to generate. Optional as this will be generated based on the model_name. models: Any additional ``bentoml.Model`` to be included in this given models. By default, this will be determined from the model_name. max_batch_size: The maximum batch size for the runner. @@ -713,6 +712,10 @@ class LLM(LLMInterface, metaclass=LLMMetaclass): strategy: The strategy to use for this runner. embedded: Whether to run this runner in embedded mode. scheduling_strategy: Whether to create a custom scheduling strategy for this Runner. + + NOTE: There are some difference between bentoml.models.get().to_runner() and LLM.to_runner(): 'name'. + - 'name': will be generated by OpenLLM, hence users don't shouldn't worry about this. + The generated name will be 'llm--runner' (ex: llm-dolly-v2-runner, llm-chatglm-runner) """ name = f"llm-{self.config.__openllm_start_name__}-runner" diff --git a/src/openllm/models/auto/factory.py b/src/openllm/models/auto/factory.py index f2af4d06..1a88d6b9 100644 --- a/src/openllm/models/auto/factory.py +++ b/src/openllm/models/auto/factory.py @@ -88,7 +88,6 @@ class _BaseAutoLLMClass: "max_latency_ms", "method_configs", "embedded", - "scheduling_strategy", ] to_runner_attrs = {k: v for k, v in attrs.items() if k in runner_kwargs_name} if not isinstance(llm_config, openllm.LLMConfig):