feat(cli): --device all --workers-per-resource

synonymous to the configuration arguments

add support for --device all

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
Aaron
2023-06-14 06:36:54 -04:00
parent d07cc95ea0
commit d7e92ae525
2 changed files with 39 additions and 11 deletions

View File

@@ -168,7 +168,7 @@ def build(model_name: str, *, __cli__: bool = False, **attrs: t.Any) -> tuple[be
current_model_envvar = os.environ.pop("OPENLLM_MODEL", None)
current_model_id_envvar = os.environ.pop("OPENLLM_MODEL_ID", None)
_previously_built = False
workers = attrs.pop("_workers", None)
workers_per_resource = attrs.pop("_workers_per_resource", None)
model_id: str = attrs.pop("model_id", None)
llm_config = openllm.AutoConfig.for_model(model_name)
@@ -193,7 +193,9 @@ def build(model_name: str, *, __cli__: bool = False, **attrs: t.Any) -> tuple[be
labels = dict(llm.identifying_params)
labels.update({"_type": llm.llm_type, "_framework": to_use_framework})
service_name = f"generated_{llm.config.__openllm_model_name__}_service.py"
workers_per_resource = utils.first_not_none(workers, default=llm.config.__openllm_workers_per_resource__)
workers_per_resource = utils.first_not_none(
workers_per_resource, default=llm.config.__openllm_workers_per_resource__
)
with fs.open_fs(f"temp://llm_{llm.config.__openllm_model_name__}") as llm_fs:
# add service.py definition to this temporary folder

View File

@@ -162,11 +162,11 @@ Available model_id(s): {llm_config.__openllm_model_ids__} [default: {llm_config.
help=f"Assign GPU devices (if available) for {model_name}.",
show_envvar=True,
)
@workers_option(cog.optgroup)
@workers_per_resource_option(cog.optgroup)
def model_start(
server_timeout: int | None,
model_id: str | None,
workers: float | None,
workers_per_resource: float | None,
device: tuple[str, ...] | None,
**attrs: t.Any,
) -> openllm.LLMConfig:
@@ -201,7 +201,7 @@ Available model_id(s): {llm_config.__openllm_model_ids__} [default: {llm_config.
]
)
workers_per_resource = first_not_none(workers, default=llm.config.__openllm_workers_per_resource__)
workers_per_resource = first_not_none(workers_per_resource, default=llm.config.__openllm_workers_per_resource__)
server_timeout = first_not_none(server_timeout, default=llm.config.__openllm_timeout__)
num_workers = int(1 / workers_per_resource)
@@ -543,10 +543,30 @@ class NargsOptions(cog.GroupedOption):
return retval
def parse_device_callback(_: click.Context, params: click.Parameter, value: tuple[str, ...] | None) -> t.Any:
def parse_device_callback(
_: click.Context, params: click.Parameter, value: tuple[str, ...] | t.Literal["all"] | None
) -> t.Any:
if value is None:
return value
# NOTE: --device all is a special case
if isinstance(value, str):
if value != "all":
raise RuntimeError(f"{params} parameter only accept 'all' as a string value.")
import pynvml # transitive dependencies of BentoML
try:
pynvml.nvmlInit()
return tuple(range(pynvml.nvmlDeviceGetCount()))
except (pynvml.nvml.NVMLError, OSError):
logger.debug("GPU not detected. Unable to initialize pynvml lib.")
return ()
finally:
try:
pynvml.nvmlShutdown()
except Exception:
pass
if not LazyType(TupleStrAny).isinstance(value):
raise RuntimeError(f"{params} only accept multiple values.")
parsed: tuple[str, ...] = tuple()
@@ -608,7 +628,7 @@ def model_id_option(factory: t.Any, model_env: ModelEnv | None = None):
)
def workers_option(factory: t.Any, build: bool = False):
def workers_per_resource_option(factory: t.Any, build: bool = False):
help_str = """Number of workers per resource assigned.
See https://docs.bentoml.org/en/latest/guides/scheduling.html#resource-scheduling-strategy
for more information. By default, this is set to 1."""
@@ -618,7 +638,7 @@ def workers_option(factory: t.Any, build: bool = False):
be provisioned in Kubernetes as well as in standalone container. This will
ensure it has the same effect with 'openllm start --workers ...'"""
return factory.option(
"--workers",
"--workers-per-resource",
default=None,
type=click.FLOAT,
help=help_str,
@@ -673,8 +693,14 @@ def cli_factory() -> click.Group:
@model_id_option(click)
@output_option
@click.option("--overwrite", is_flag=True, help="Overwrite existing Bento for given LLM if it already exists.")
@workers_option(click, build=True)
def build(model_name: str, model_id: str | None, overwrite: bool, output: OutputLiteral, workers: float | None):
@workers_per_resource_option(click, build=True)
def build(
model_name: str,
model_id: str | None,
overwrite: bool,
output: OutputLiteral,
workers_per_resource: float | None,
):
"""Package a given models into a Bento.
$ openllm build flan-t5
@@ -695,7 +721,7 @@ def cli_factory() -> click.Group:
model_name,
__cli__=True,
model_id=model_id,
_workers=workers,
_workers_per_resource=workers_per_resource,
_overwrite_existing_bento=overwrite,
)