refactor(configuration): __config__ and perf

move model_ids and default_id to config class declaration,
cleanup dependencies between config and LLM implementation

lazy load module during LLM creation to llm_post_init

fix post_init hooks to run load_in_mha.

Signed-off-by: aarnphm-ec2-dev <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
aarnphm-ec2-dev
2023-06-11 12:53:15 +00:00
parent 17241292da
commit 2e453fb005
22 changed files with 565 additions and 376 deletions

View File

@@ -384,12 +384,10 @@ def start_model_command(
ModelEnv = openllm.utils.ModelEnv(model_name)
llm_config = openllm.AutoConfig.for_model(model_name)
for_doc = openllm.AutoLLM.for_model(model_name)
docstring = f"""\
{ModelEnv.start_docstring}
\b
Available model_id(s) to use with '{model_name}' are: {for_doc.model_ids} [default: {for_doc.default_id}]
Tip: One can pass one of the aforementioned to '--model-id' to use other pretrained weights.
Available model_id(s): {llm_config.__openllm_model_ids__} [default: {llm_config.__openllm_default_id__}]
"""
command_attrs: dict[str, t.Any] = {
"name": ModelEnv.model_name,
@@ -399,7 +397,7 @@ Tip: One can pass one of the aforementioned to '--model-id' to use other pretrai
}
aliases: list[str] = []
if llm_config.name_type == "dasherize":
if llm_config.__openllm_name_type__ == "dasherize":
aliases.append(llm_config.__openllm_start_name__)
command_attrs["aliases"] = aliases if len(aliases) > 0 else None
@@ -429,8 +427,9 @@ Tip: One can pass one of the aforementioned to '--model-id' to use other pretrai
@group.command(**command_attrs)
@llm_config.to_click_options
@parse_serve_args(_serve_grpc)
@click.option("--server-timeout", type=int, default=3600, help="Server timeout in seconds")
@model_id_option
@cog.optgroup.group("General LLM Options")
@cog.optgroup.option("--server-timeout", type=int, default=3600, help="Server timeout in seconds")
@model_id_option(cog.optgroup, model_env=ModelEnv)
@click.option(
"--device",
type=tuple,
@@ -439,6 +438,7 @@ Tip: One can pass one of the aforementioned to '--model-id' to use other pretrai
envvar="CUDA_VISIBLE_DEVICES",
callback=parse_device_callback,
help=f"Assign GPU devices (if available) for {model_name}.",
show_envvar=True,
)
def model_start(
server_timeout: int,
@@ -575,12 +575,20 @@ output_option = click.option(
default="pretty",
help="Showing output type. Default to 'pretty'",
)
model_id_option = click.option(
"--model-id",
type=click.STRING,
default=None,
help="Optional model_id name or path for (fine-tune) weight.",
)
def model_id_option(factory: t.Any, model_env: openllm.utils.ModelEnv | None = None):
envvar = None
if model_env is not None:
envvar = model_env.model_id
return factory.option(
"--model-id",
type=click.STRING,
default=None,
help="Optional model_id name or path for (fine-tune) weight.",
envvar=envvar,
show_envvar=True if envvar is not None else False,
)
def cli_factory() -> click.Group:
@@ -627,7 +635,7 @@ def cli_factory() -> click.Group:
@click.argument(
"model_name", type=click.Choice([inflection.dasherize(name) for name in openllm.CONFIG_MAPPING.keys()])
)
@model_id_option
@model_id_option(click)
@output_option
@click.option("--overwrite", is_flag=True, help="Overwrite existing Bento for given LLM if it already exists.")
def build(model_name: str, model_id: str | None, overwrite: bool, output: OutputLiteral):
@@ -713,8 +721,12 @@ def cli_factory() -> click.Group:
runtime_impl += ("flax",)
if model.config.__openllm_model_name__ in openllm.MODEL_TF_MAPPING_NAMES:
runtime_impl += ("tf",)
json_data[m] = {"model_id": model.model_ids, "description": docs, "runtime_impl": runtime_impl}
converted.extend([convert_transformers_model_name(i) for i in model.model_ids])
json_data[m] = {
"model_id": model.config.__openllm_model_ids__,
"description": docs,
"runtime_impl": runtime_impl,
}
converted.extend([convert_transformers_model_name(i) for i in model.config.__openllm_model_ids__])
except Exception as err:
failed_initialized.append((m, err))
@@ -783,7 +795,7 @@ def cli_factory() -> click.Group:
@click.argument(
"model_name", type=click.Choice([inflection.dasherize(name) for name in openllm.CONFIG_MAPPING.keys()])
)
@model_id_option
@model_id_option(click)
@output_option
def download_models(model_name: str, model_id: str | None, output: OutputLiteral):
"""Setup LLM interactively.