mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-03-14 13:06:09 -04:00
chore(docs): update docs for to_runner
Signed-off-by: aarnphm-ec2-dev <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -704,7 +704,6 @@ class LLM(LLMInterface, metaclass=LLMMetaclass):
|
||||
"""Convert this LLM into a Runner.
|
||||
|
||||
Args:
|
||||
name: The name of the runner to generate. Optional as this will be generated based on the model_name.
|
||||
models: Any additional ``bentoml.Model`` to be included in this given models.
|
||||
By default, this will be determined from the model_name.
|
||||
max_batch_size: The maximum batch size for the runner.
|
||||
@@ -713,6 +712,10 @@ class LLM(LLMInterface, metaclass=LLMMetaclass):
|
||||
strategy: The strategy to use for this runner.
|
||||
embedded: Whether to run this runner in embedded mode.
|
||||
scheduling_strategy: Whether to create a custom scheduling strategy for this Runner.
|
||||
|
||||
NOTE: There are some difference between bentoml.models.get().to_runner() and LLM.to_runner(): 'name'.
|
||||
- 'name': will be generated by OpenLLM, hence users don't shouldn't worry about this.
|
||||
The generated name will be 'llm-<model-start-name>-runner' (ex: llm-dolly-v2-runner, llm-chatglm-runner)
|
||||
"""
|
||||
|
||||
name = f"llm-{self.config.__openllm_start_name__}-runner"
|
||||
|
||||
@@ -88,7 +88,6 @@ class _BaseAutoLLMClass:
|
||||
"max_latency_ms",
|
||||
"method_configs",
|
||||
"embedded",
|
||||
"scheduling_strategy",
|
||||
]
|
||||
to_runner_attrs = {k: v for k, v in attrs.items() if k in runner_kwargs_name}
|
||||
if not isinstance(llm_config, openllm.LLMConfig):
|
||||
|
||||
Reference in New Issue
Block a user