chore(docs): update docs for to_runner

Signed-off-by: aarnphm-ec2-dev <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
aarnphm-ec2-dev
2023-06-11 03:38:56 +00:00
parent 6a937d8b51
commit 5a7942574f
2 changed files with 4 additions and 2 deletions

View File

@@ -704,7 +704,6 @@ class LLM(LLMInterface, metaclass=LLMMetaclass):
"""Convert this LLM into a Runner.
Args:
name: The name of the runner to generate. Optional as this will be generated based on the model_name.
models: Any additional ``bentoml.Model`` to be included in this given models.
By default, this will be determined from the model_name.
max_batch_size: The maximum batch size for the runner.
@@ -713,6 +712,10 @@ class LLM(LLMInterface, metaclass=LLMMetaclass):
strategy: The strategy to use for this runner.
embedded: Whether to run this runner in embedded mode.
scheduling_strategy: Whether to create a custom scheduling strategy for this Runner.
NOTE: There are some difference between bentoml.models.get().to_runner() and LLM.to_runner(): 'name'.
- 'name': will be generated by OpenLLM, hence users don't shouldn't worry about this.
The generated name will be 'llm-<model-start-name>-runner' (ex: llm-dolly-v2-runner, llm-chatglm-runner)
"""
name = f"llm-{self.config.__openllm_start_name__}-runner"

View File

@@ -88,7 +88,6 @@ class _BaseAutoLLMClass:
"max_latency_ms",
"method_configs",
"embedded",
"scheduling_strategy",
]
to_runner_attrs = {k: v for k, v in attrs.items() if k in runner_kwargs_name}
if not isinstance(llm_config, openllm.LLMConfig):