From 05fa34f9e6e8a862730c23a892d49f656ddfbcb6 Mon Sep 17 00:00:00 2001 From: Aaron <29749331+aarnphm@users.noreply.github.com> Date: Sat, 10 Jun 2023 17:36:02 -0400 Subject: [PATCH] refactor: pretrained => model_id I think model_id makes more sense than calling it pretrained Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> --- src/openllm/_llm.py | 78 ++++++++++--------- src/openllm/cli.py | 57 +++++++------- src/openllm/models/auto/factory.py | 21 +++-- .../models/chatglm/configuration_chatglm.py | 8 +- .../models/chatglm/modeling_chatglm.py | 15 ++-- .../models/dolly_v2/configuration_dolly_v2.py | 8 +- .../models/dolly_v2/modeling_dolly_v2.py | 10 +-- .../models/falcon/configuration_falcon.py | 8 +- src/openllm/models/falcon/modeling_falcon.py | 10 +-- .../models/flan_t5/configuration_flan_t5.py | 8 +- .../models/flan_t5/modeling_flan_t5.py | 4 +- .../models/flan_t5/modeling_flax_flan_t5.py | 4 +- .../models/flan_t5/modeling_tf_flan_t5.py | 4 +- .../models/stablelm/configuration_stablelm.py | 8 +- .../models/stablelm/modeling_stablelm.py | 6 +- .../starcoder/configuration_starcoder.py | 8 +- .../models/starcoder/modeling_starcoder.py | 10 +-- src/openllm/utils/__init__.py | 4 +- 18 files changed, 155 insertions(+), 116 deletions(-) diff --git a/src/openllm/_llm.py b/src/openllm/_llm.py index 258d3fa4..a0f7714c 100644 --- a/src/openllm/_llm.py +++ b/src/openllm/_llm.py @@ -92,14 +92,14 @@ class TaskType(enum.Enum, metaclass=TypeMeta): def import_model( - model_name: str, + model_id: str, tag: bentoml.Tag, _model_framework: str, *model_args: t.Any, tokenizer_kwds: dict[str, t.Any], **attrs: t.Any, ): - """Auto detect model type from given model_name and import it to bentoml's model store. + """Auto detect model type from given model_id and import it to bentoml's model store. For all kwargs, it will be parsed into `transformers.AutoConfig.from_pretrained` first, returning all of the unused kwargs. @@ -111,7 +111,7 @@ def import_model( Refer to Transformers documentation for more information about kwargs. Args: - model_name: Model name to be imported. use `openllm models` to see available entries + model_id: Model id to be imported. See `openllm models` for all supported models. tag: Tag to be used for the model. This is usually generated for you. model_args: Args to be passed into AutoModelForSeq2SeqLM or AutoModelForCausalLM (+ TF, Flax variants). **attrs: Kwargs to be passed into AutoModelForSeq2SeqLM or AutoModelForCausalLM (+ TF, Flax variants). @@ -139,7 +139,7 @@ def import_model( config, attrs = t.cast( "tuple[transformers.PretrainedConfig, dict[str, t.Any]]", transformers.AutoConfig.from_pretrained( - model_name, return_unused_kwargs=True, trust_remote_code=trust_remote_code, **hub_attrs, **copied_attrs + model_id, return_unused_kwargs=True, trust_remote_code=trust_remote_code, **hub_attrs, **copied_attrs ), ) @@ -156,13 +156,13 @@ def import_model( getattr( transformers, FRAMEWORK_TO_AUTOCLASS_MAPPING[_model_framework][TaskType[task_type].value - 1] ).from_pretrained( - model_name, *model_args, config=config, trust_remote_code=trust_remote_code, **hub_attrs, **attrs + model_id, *model_args, config=config, trust_remote_code=trust_remote_code, **hub_attrs, **attrs ), custom_objects={ "tokenizer": t.cast( "LLMTokenizer", transformers.AutoTokenizer.from_pretrained( - model_name, config=config, trust_remote_code=trust_remote_code, **hub_attrs, **tokenizer_kwds + model_id, config=config, trust_remote_code=trust_remote_code, **hub_attrs, **tokenizer_kwds ), ) }, @@ -179,7 +179,7 @@ def import_model( torch.cuda.empty_cache() -_required_namespace = {"default_model", "pretrained"} +_required_namespace = {"default_id", "model_ids"} _reserved_namespace = _required_namespace | { "config_class", @@ -192,11 +192,11 @@ _reserved_namespace = _required_namespace | { class LLMInterface(ABC): """This defines the loose contract for all openllm.LLM implementations.""" - default_model: str - """Return the default model to use when using 'openllm start '. - This could be one of the keys in 'self.pretrained' or custom users model.""" + default_id: str + """Return the default model to use when using 'openllm start '. + This could be one of the keys in 'self.model_ids' or custom users model.""" - pretrained: list[str] + model_ids: list[str] """A list of supported pretrained models tag for this given runnable. For example: @@ -253,7 +253,7 @@ class LLMInterface(ABC): pass def import_model( - self, pretrained: str, tag: bentoml.Tag, *args: t.Any, tokenizer_kwds: dict[str, t.Any], **attrs: t.Any + self, model_id: str, tag: bentoml.Tag, *args: t.Any, tokenizer_kwds: dict[str, t.Any], **attrs: t.Any ) -> bentoml.Model: """This function can be implemented if default import_model doesn't satisfy your needs.""" raise NotImplementedError @@ -275,6 +275,8 @@ class LLMMetaclass(ABCMeta): namespace["__annotations__"] = annotations_dict # NOTE: check for required attributes + if "__openllm_internal__" not in namespace: + _required_namespace.add("config_class") for k in _required_namespace: if k not in namespace: raise RuntimeError(f"Missing required key '{k}'. Make sure to define it within the LLM subclass.") @@ -378,13 +380,13 @@ class LLM(LLMInterface, metaclass=LLMMetaclass): @classmethod def from_pretrained( - cls, pretrained: str | None = None, llm_config: openllm.LLMConfig | None = None, *args: t.Any, **attrs: t.Any + cls, model_id: str | None = None, llm_config: openllm.LLMConfig | None = None, *args: t.Any, **attrs: t.Any ) -> LLM: - return cls(pretrained=pretrained, llm_config=llm_config, *args, **attrs) + return cls(model_id=model_id, llm_config=llm_config, *args, **attrs) def __init__( self, - pretrained: str | None = None, + model_id: str | None = None, llm_config: openllm.LLMConfig | None = None, *args: t.Any, **attrs: t.Any, @@ -408,7 +410,7 @@ class LLM(LLMInterface, metaclass=LLMMetaclass): ```python def import_model( self, - pretrained: str, + model_id: str, tag: bentoml.Tag, *args: t.Any, tokenizer_kwds: dict[str, t.Any], @@ -417,11 +419,11 @@ class LLM(LLMInterface, metaclass=LLMMetaclass): return bentoml.transformers.save_model( tag, transformers.AutoModelForCausalLM.from_pretrained( - pretrained, device_map="auto", torch_dtype=torch.bfloat16, **attrs + model_id, device_map="auto", torch_dtype=torch.bfloat16, **attrs ), custom_objects={ "tokenizer": transformers.AutoTokenizer.from_pretrained( - pretrained, padding_size="left", **tokenizer_kwds + model_id, padding_size="left", **tokenizer_kwds ) }, ) @@ -440,14 +442,14 @@ class LLM(LLMInterface, metaclass=LLMMetaclass): Note: If you implement your own `import_model`, then `import_kwargs` will be the default kwargs for every load. You can still override those via ``openllm.Runner``. - Note that this tag will be generated based on `self.default_model` or the given `pretrained` kwds. + Note that this tag will be generated based on `self.default_id` or the given `pretrained` kwds. passed from the __init__ constructor. ``llm_post_init`` can also be implemented if you need to do any additional initialization after everything is setup. Args: - pretrained: The pretrained model to use. Defaults to None. It will use 'self.default_model' if None. + model_id: The pretrained model to use. Defaults to None. If None, 'self.default_id' will be used. llm_config: The config to use for this LLM. Defaults to None. If not passed, we will use 'self.config_class' to construct default configuration. *args: The args to be passed to the model. @@ -462,14 +464,14 @@ class LLM(LLMInterface, metaclass=LLMMetaclass): # The rests of the kwargs that is not used by the config class should be stored into __openllm_extras__. attrs = self.config.__openllm_extras__ - if pretrained is None: - pretrained = os.environ.get(self.config.__openllm_env__.pretrained, None) - if not pretrained: - assert self.default_model, "A default model is required for any LLM." - pretrained = self.default_model + if model_id is None: + model_id = os.environ.get(self.config.__openllm_env__.model_id, None) + if not model_id: + assert self.default_id, "A default model is required for any LLM." + model_id = self.default_id # NOTE: This is the actual given path or pretrained weight for this LLM. - self._pretrained = pretrained + self._model_id = model_id # NOTE: Save the args and kwargs for latter load self._llm_args = args @@ -491,19 +493,19 @@ class LLM(LLMInterface, metaclass=LLMMetaclass): # NOTE: The section below defines a loose contract with langchain's LLM interface. @property def llm_type(self) -> str: - return convert_transformers_model_name(self._pretrained) + return convert_transformers_model_name(self._model_id) @property def identifying_params(self) -> dict[str, t.Any]: return { "configuration": self.config.model_dump_json().decode(), - "pretrained": orjson.dumps(self.pretrained).decode(), + "model_ids": orjson.dumps(self.model_ids).decode(), } @t.overload def make_tag( self, - model_name_or_path: str | None = None, + model_id: str | None = None, return_unused_kwargs: t.Literal[False] = ..., trust_remote_code: bool = ..., **attrs: t.Any, @@ -513,7 +515,7 @@ class LLM(LLMInterface, metaclass=LLMMetaclass): @t.overload def make_tag( self, - model_name_or_path: str | None = None, + model_id: str | None = None, return_unused_kwargs: t.Literal[True] = ..., trust_remote_code: bool = ..., **attrs: t.Any, @@ -522,7 +524,7 @@ class LLM(LLMInterface, metaclass=LLMMetaclass): def make_tag( self, - model_name_or_path: str | None = None, + model_id: str | None = None, return_unused_kwargs: bool = False, trust_remote_code: bool = False, **attrs: t.Any, @@ -543,8 +545,8 @@ class LLM(LLMInterface, metaclass=LLMMetaclass): Returns: A tuple of ``bentoml.Tag`` and a dict of unused kwargs. """ - if model_name_or_path is None: - model_name_or_path = self._pretrained + if model_id is None: + model_id = self._model_id if "return_unused_kwargs" in attrs: logger.debug("Ignoring 'return_unused_kwargs' in 'generate_tag_from_model_name'.") @@ -553,12 +555,12 @@ class LLM(LLMInterface, metaclass=LLMMetaclass): config, attrs = t.cast( "tuple[transformers.PretrainedConfig, dict[str, t.Any]]", transformers.AutoConfig.from_pretrained( - model_name_or_path, return_unused_kwargs=True, trust_remote_code=trust_remote_code, **attrs + model_id, return_unused_kwargs=True, trust_remote_code=trust_remote_code, **attrs ), ) - name = convert_transformers_model_name(model_name_or_path) + name = convert_transformers_model_name(model_id) - if os.path.exists(os.path.dirname(model_name_or_path)): + if os.path.exists(os.path.dirname(model_id)): # If the model_name_or_path is a path, we assume it's a local path, # then users must pass a version for this. model_version = attrs.pop("openllm_model_version", None) @@ -590,7 +592,7 @@ class LLM(LLMInterface, metaclass=LLMMetaclass): "Given %s from '%s' doesn't contain a commit hash. We will generate" " the tag without specific version.", t.cast("type[transformers.PretrainedConfig]", config.__class__), - model_name_or_path, + model_id, ) tag = bentoml.Tag.from_taglike(f"{self.__llm_implementation__}-{name}:{model_version}") @@ -621,7 +623,7 @@ class LLM(LLMInterface, metaclass=LLMMetaclass): } return self.import_model( - self._pretrained, + self._model_id, tag, *self._llm_args, tokenizer_kwds=tokenizer_kwds, diff --git a/src/openllm/cli.py b/src/openllm/cli.py index a9bb1d40..0f79efec 100644 --- a/src/openllm/cli.py +++ b/src/openllm/cli.py @@ -388,8 +388,8 @@ def start_model_command( docstring = f"""\ {ModelEnv.start_docstring} \b -The available pretrained models to use with '{model_name}' are: {for_doc.pretrained} [default: {for_doc.default_model}] -Tip: One can pass one of the aforementioned to '--pretrained' to use other pretrained weights. +Available model_id(s) to use with '{model_name}' are: {for_doc.model_ids} [default: {for_doc.default_id}] +Tip: One can pass one of the aforementioned to '--model-id' to use other pretrained weights. """ command_attrs: dict[str, t.Any] = { "name": ModelEnv.model_name, @@ -430,9 +430,7 @@ Tip: One can pass one of the aforementioned to '--pretrained' to use other pretr @llm_config.to_click_options @parse_serve_args(_serve_grpc) @click.option("--server-timeout", type=int, default=3600, help="Server timeout in seconds") - @click.option( - "--pretrained", type=click.STRING, default=None, help="Optional pretrained name or path to fine-tune weight." - ) + @model_id_option @click.option( "--device", type=tuple, @@ -444,18 +442,18 @@ Tip: One can pass one of the aforementioned to '--pretrained' to use other pretr ) def model_start( server_timeout: int, - pretrained: str | None, + model_id: str | None, device: tuple[str, ...] | None, **attrs: t.Any, ) -> openllm.LLMConfig: config, server_attrs = llm_config.model_validate_click(**attrs) if ModelEnv.get_framework_env() == "flax": - llm = openllm.AutoFlaxLLM.for_model(model_name, pretrained=pretrained, llm_config=config) + llm = openllm.AutoFlaxLLM.for_model(model_name, model_id=model_id, llm_config=config) elif ModelEnv.get_framework_env() == "tf": - llm = openllm.AutoTFLLM.for_model(model_name, pretrained=pretrained, llm_config=config) + llm = openllm.AutoTFLLM.for_model(model_name, model_id=model_id, llm_config=config) else: - llm = openllm.AutoLLM.for_model(model_name, pretrained=pretrained, llm_config=config) + llm = openllm.AutoLLM.for_model(model_name, mdoel_id=model_id, llm_config=config) # NOTE: We need to initialize llm here first to check if the model is already downloaded to # avoid deadlock before the subprocess forking. @@ -580,6 +578,12 @@ output_option = click.option( default="pretty", help="Showing output type. Default to 'pretty'", ) +model_id_option = click.option( + "--model-id", + type=click.STRING, + default=None, + help="Optional model_id name or path for (fine-tune) weight.", +) def cli_factory() -> click.Group: @@ -626,16 +630,15 @@ def cli_factory() -> click.Group: @click.argument( "model_name", type=click.Choice([inflection.dasherize(name) for name in openllm.CONFIG_MAPPING.keys()]) ) - @click.option("--pretrained", default=None, help="Given pretrained model name for the given model name [Optional].") - @click.option("--overwrite", is_flag=True, help="Overwrite existing Bento for given LLM if it already exists.") + @model_id_option @output_option - def build(model_name: str, pretrained: str | None, overwrite: bool, output: OutputLiteral): + @click.option("--overwrite", is_flag=True, help="Overwrite existing Bento for given LLM if it already exists.") + def build(model_name: str, model_id: str | None, overwrite: bool, output: OutputLiteral): """Package a given models into a Bento. $ openllm build flan-t5 \b - NOTE: To run a container built from this Bento with GPU support, make sure to have https://github.com/NVIDIA/nvidia-container-toolkit install locally. """ @@ -645,7 +648,7 @@ def cli_factory() -> click.Group: bento, _previously_built = openllm.build( model_name, __cli__=True, - pretrained=pretrained, + model_id=model_id, _overwrite_existing_bento=overwrite, ) @@ -684,13 +687,13 @@ def cli_factory() -> click.Group: else: failed_initialized: list[tuple[str, Exception]] = [] - json_data: dict[str, dict[t.Literal["pretrained", "description"], t.Any]] = {} + json_data: dict[str, dict[t.Literal["model_id", "description"], t.Any]] = {} for m in models: try: model = openllm.AutoLLM.for_model(m) docs = inspect.cleandoc(model.config.__doc__ or "(No description)") - json_data[m] = {"pretrained": model.pretrained, "description": docs} + json_data[m] = {"model_id": model.model_ids, "description": docs} except Exception as err: failed_initialized.append((m, err)) @@ -701,7 +704,7 @@ def cli_factory() -> click.Group: data: list[str | tuple[str, str, list[str]]] = [] for m, v in json_data.items(): - data.extend([(m, v["description"], v["pretrained"])]) + data.extend([(m, v["description"], v["model_id"])]) column_widths = [int(COLUMNS / 6), int(COLUMNS / 3 * 2), int(COLUMNS / 6)] if len(data) == 0 and len(failed_initialized) > 0: @@ -714,7 +717,7 @@ def cli_factory() -> click.Group: table = tabulate.tabulate( data, tablefmt="fancy_grid", - headers=["LLM", "Description", "Pretrained"], + headers=["LLM", "Description", "Models Id"], maxcolwidths=column_widths, ) @@ -739,11 +742,9 @@ def cli_factory() -> click.Group: @click.argument( "model_name", type=click.Choice([inflection.dasherize(name) for name in openllm.CONFIG_MAPPING.keys()]) ) - @click.option( - "--pretrained", type=click.STRING, default=None, help="Optional pretrained name or path to fine-tune weight." - ) + @model_id_option @output_option - def download_models(model_name: str, pretrained: str | None, output: OutputLiteral): + def download_models(model_name: str, model_id: str | None, output: OutputLiteral): """Setup LLM interactively. Note: This is useful for development and setup for fine-tune. @@ -751,11 +752,11 @@ def cli_factory() -> click.Group: config = openllm.AutoConfig.for_model(model_name) env = config.__openllm_env__.get_framework_env() if env == "flax": - model = openllm.AutoFlaxLLM.for_model(model_name, pretrained=pretrained, llm_config=config) + model = openllm.AutoFlaxLLM.for_model(model_name, model_id=model_id, llm_config=config) elif env == "tf": - model = openllm.AutoTFLLM.for_model(model_name, pretrained=pretrained, llm_config=config) + model = openllm.AutoTFLLM.for_model(model_name, model_id=model_id, llm_config=config) else: - model = openllm.AutoLLM.for_model(model_name, pretrained=pretrained, llm_config=config) + model = openllm.AutoLLM.for_model(model_name, model_id=model_id, llm_config=config) tag = model.make_tag(trust_remote_code=config.__openllm_trust_remote_code__) @@ -829,7 +830,7 @@ def cli_factory() -> click.Group: ) @output_option @click.argument("query", type=click.STRING) - def query( + def query_( query: str, endpoint: str, timeout: int, @@ -838,7 +839,7 @@ def cli_factory() -> click.Group: ): """Ask a LLM interactively, from a terminal. - $ openllm query --endpoint http://12.323.2.1 "What is the meaning of life?" + $ openllm query --endpoint http://12.323.2.1:3000 "What is the meaning of life?" """ if server_type == "grpc": endpoint = re.sub(r"http://", "", endpoint) @@ -870,7 +871,7 @@ def cli_factory() -> click.Group: _echo(res["responses"], fg="white") if t.TYPE_CHECKING: - assert download_models and build and models and start and start_grpc and query and prune + assert download_models and build and models and start and start_grpc and query_ and prune if psutil.WINDOWS: sys.stdout.reconfigure(encoding="utf-8") # type: ignore diff --git a/src/openllm/models/auto/factory.py b/src/openllm/models/auto/factory.py index 48620bd5..f2af4d06 100644 --- a/src/openllm/models/auto/factory.py +++ b/src/openllm/models/auto/factory.py @@ -47,7 +47,7 @@ class _BaseAutoLLMClass: def for_model( cls, model_name: str, - pretrained: str | None = None, + model_id: str | None = None, return_runner_kwargs: t.Literal[False] = ..., llm_config: openllm.LLMConfig | None = ..., **attrs: t.Any, @@ -59,7 +59,7 @@ class _BaseAutoLLMClass: def for_model( cls, model_name: str, - pretrained: str | None = None, + model_id: str | None = None, return_runner_kwargs: t.Literal[True] = ..., llm_config: openllm.LLMConfig | None = ..., **attrs: t.Any, @@ -70,11 +70,18 @@ class _BaseAutoLLMClass: def for_model( cls, model_name: str, - pretrained: str | None = None, + model_id: str | None = None, return_runner_kwargs: bool = False, llm_config: openllm.LLMConfig | None = ..., **attrs: t.Any, ) -> openllm.LLM | tuple[openllm.LLM, dict[str, t.Any]]: + """The lower level API for creating a LLM instance. + + ```python + >>> import openllm + >>> llm = openllm.AutoLLM.for_model("flan-t5") + ``` + """ runner_kwargs_name = [ "models", "max_batch_size", @@ -88,7 +95,7 @@ class _BaseAutoLLMClass: # The rest of kwargs is now passed to config llm_config = AutoConfig.for_model(model_name, **attrs) if type(llm_config) in cls._model_mapping.keys(): - llm = cls._model_mapping[type(llm_config)].from_pretrained(pretrained, llm_config=llm_config, **attrs) + llm = cls._model_mapping[type(llm_config)].from_pretrained(model_id, llm_config=llm_config, **attrs) if not return_runner_kwargs: return llm return llm, to_runner_attrs @@ -98,19 +105,19 @@ class _BaseAutoLLMClass: ) @classmethod - def create_runner(cls, model_name: str, pretrained: str | None = None, **attrs: t.Any) -> bentoml.Runner: + def create_runner(cls, model_name: str, model_id: str | None = None, **attrs: t.Any) -> bentoml.Runner: """ Create a LLM Runner for the given model name. Args: model_name: The model name to instantiate. - pretrained: The pretrained model name to instantiate. + model_id: The pretrained model name to instantiate. **attrs: Additional keyword arguments passed along to the specific configuration class. Returns: A LLM instance. """ - llm, runner_attrs = cls.for_model(model_name, pretrained, return_runner_kwargs=True, **attrs) + llm, runner_attrs = cls.for_model(model_name, model_id, return_runner_kwargs=True, **attrs) return llm.to_runner(**runner_attrs) @classmethod diff --git a/src/openllm/models/chatglm/configuration_chatglm.py b/src/openllm/models/chatglm/configuration_chatglm.py index 783b844c..d5230d2e 100644 --- a/src/openllm/models/chatglm/configuration_chatglm.py +++ b/src/openllm/models/chatglm/configuration_chatglm.py @@ -55,7 +55,7 @@ class ChatGLMConfig( START_CHATGLM_COMMAND_DOCSTRING = """\ -Run a LLMServer for ChatGLM model and pretrained. +Run a LLMServer for ChatGLM model. \b > See more information about ChatGLM at [THUDM/ChatGLM-6b](https://huggingface.co/thudm/chatglm-6b) @@ -67,7 +67,11 @@ Currently, ChatGLM only supports PyTorch. Make sure ``torch`` is available in yo \b ChatGLM Runner will use THUDM/ChatGLM-6b as the default model. To change any to any other ChatGLM -saved pretrained, or a fine-tune ChatGLM, provide ``OPENLLM_CHATGLM_PRETRAINED='thudm/chatglm-6b-int8'`` +saved pretrained, or a fine-tune ChatGLM, provide ``OPENLLM_CHATGLM_MODEL_ID='thudm/chatglm-6b-int8'`` +or provide `--model-id` flag when running ``openllm start chatglm``: + +\b +$ openllm start chatglm --model-id='thudm/chatglm-6b-int8' """ DEFAULT_PROMPT_TEMPLATE = """{instruction}""" diff --git a/src/openllm/models/chatglm/modeling_chatglm.py b/src/openllm/models/chatglm/modeling_chatglm.py index bce2f513..01b1154b 100644 --- a/src/openllm/models/chatglm/modeling_chatglm.py +++ b/src/openllm/models/chatglm/modeling_chatglm.py @@ -41,22 +41,27 @@ class InvalidScoreLogitsProcessor(LogitsProcessor): class ChatGLM(openllm.LLM): __openllm_internal__ = True - default_model = "thudm/chatglm-6b-int4" + default_id = "thudm/chatglm-6b-int4" - pretrained = ["thudm/chatglm-6b", "thudm/chatglm-6b-int8", "thudm/chatglm-6b-int4"] + model_ids = ["thudm/chatglm-6b", "thudm/chatglm-6b-int8", "thudm/chatglm-6b-int4"] device = torch.device("cuda") def import_model( - self, pretrained: str, tag: bentoml.Tag, *model_args: t.Any, tokenizer_kwds: dict[str, t.Any], **attrs: t.Any + self, + model_id: str, + tag: bentoml.Tag, + *model_args: t.Any, + tokenizer_kwds: dict[str, t.Any], + **attrs: t.Any, ) -> bentoml.Model: trust_remote_code = attrs.pop("trust_remote_code", True) return bentoml.transformers.save_model( tag, - transformers.AutoModel.from_pretrained(pretrained, trust_remote_code=trust_remote_code), + transformers.AutoModel.from_pretrained(model_id, trust_remote_code=trust_remote_code), custom_objects={ "tokenizer": transformers.AutoTokenizer.from_pretrained( - pretrained, trust_remote_code=trust_remote_code, **tokenizer_kwds + model_id, trust_remote_code=trust_remote_code, **tokenizer_kwds ) }, ) diff --git a/src/openllm/models/dolly_v2/configuration_dolly_v2.py b/src/openllm/models/dolly_v2/configuration_dolly_v2.py index 42a4b0bb..2b148bce 100644 --- a/src/openllm/models/dolly_v2/configuration_dolly_v2.py +++ b/src/openllm/models/dolly_v2/configuration_dolly_v2.py @@ -51,7 +51,7 @@ class DollyV2Config( START_DOLLY_V2_COMMAND_DOCSTRING = """\ -Run a LLMServer for dolly-v2 model and pretrained. +Run a LLMServer for dolly-v2 model. \b > See more information about dolly-v2 at [databricks/dolly-v2-3b](https://huggingface.co/databricks/dolly-v2-3b) @@ -63,7 +63,11 @@ Currently, dolly-v2 only supports PyTorch. Make sure ``torch`` is available in y \b Dolly-v2 Runner will use databricks/dolly-v2-3b as the default model. To change any to any other dolly-v2 -saved pretrained, or a fine-tune dolly-v2, provide ``OPENLLM_DOLLY_V2_PRETRAINED='databricks/dolly-v2-7b'`` +saved pretrained, or a fine-tune dolly-v2, provide ``OPENLLM_DOLLY_V2_MODEL_ID='databricks/dolly-v2-7b'`` +or provide `--model-id` flag when running ``openllm start dolly-v2``: + +\b +$ openllm start dolly-v2 --model-id databricks/dolly-v2-7b """ INSTRUCTION_KEY = "### Instruction:" diff --git a/src/openllm/models/dolly_v2/modeling_dolly_v2.py b/src/openllm/models/dolly_v2/modeling_dolly_v2.py index fc1434ee..d01b8cfe 100644 --- a/src/openllm/models/dolly_v2/modeling_dolly_v2.py +++ b/src/openllm/models/dolly_v2/modeling_dolly_v2.py @@ -38,9 +38,9 @@ class DollyV2(openllm.LLM): __openllm_internal__ = True - default_model = "databricks/dolly-v2-3b" + default_id = "databricks/dolly-v2-3b" - pretrained = ["databricks/dolly-v2-3b", "databricks/dolly-v2-7b", "databricks/dolly-v2-12b"] + model_ids = ["databricks/dolly-v2-3b", "databricks/dolly-v2-7b", "databricks/dolly-v2-12b"] import_kwargs = { "device_map": "auto" if torch.cuda.is_available() and torch.cuda.device_count() > 1 else None, @@ -51,15 +51,15 @@ class DollyV2(openllm.LLM): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") def import_model( - self, pretrained: str, tag: bentoml.Tag, *model_args: t.Any, tokenizer_kwds: dict[str, t.Any], **attrs: t.Any + self, model_id: str, tag: bentoml.Tag, *model_args: t.Any, tokenizer_kwds: dict[str, t.Any], **attrs: t.Any ) -> bentoml.Model: trust_remote_code = attrs.pop("trust_remote_code", True) torch_dtype = attrs.pop("torch_dtype", torch.bfloat16) device_map = attrs.pop("device_map", "auto") - tokenizer = transformers.AutoTokenizer.from_pretrained(pretrained, **tokenizer_kwds) + tokenizer = transformers.AutoTokenizer.from_pretrained(model_id, **tokenizer_kwds) pipeline = transformers.pipeline( - model=pretrained, + model=model_id, tokenizer=tokenizer, trust_remote_code=trust_remote_code, torch_dtype=torch_dtype, diff --git a/src/openllm/models/falcon/configuration_falcon.py b/src/openllm/models/falcon/configuration_falcon.py index bdd2e453..a2704e25 100644 --- a/src/openllm/models/falcon/configuration_falcon.py +++ b/src/openllm/models/falcon/configuration_falcon.py @@ -40,7 +40,7 @@ class FalconConfig( START_FALCON_COMMAND_DOCSTRING = """\ -Run a LLMServer for FalconLM model and pretrained. +Run a LLMServer for FalconLM model. \b > See more information about falcon at [tiiuae/falcon-7b](https://huggingface.co/tiiuae/falcon-7b) @@ -52,7 +52,11 @@ Currently, FalconLM only supports PyTorch. Make sure ``torch`` is available in y \b FalconLM Runner will use tiiuae/falcon-7b as the default model. To change any to any other FalconLM -saved pretrained, or a fine-tune FalconLM, provide ``OPENLLM_FALCON_PRETRAINED='tiiuae/falcon-7b-instruct'`` +saved pretrained, or a fine-tune FalconLM, provide ``OPENLLM_FALCON_MODEL_ID='tiiuae/falcon-7b-instruct'`` +or provide `--model-id` flag when running ``openllm start falcon``: + +\b +$ openllm start falcon --model-id tiiuae/falcon-7b-instruct """ DEFAULT_PROMPT_TEMPLATE = """{context} diff --git a/src/openllm/models/falcon/modeling_falcon.py b/src/openllm/models/falcon/modeling_falcon.py index d44a0f84..41535d0f 100644 --- a/src/openllm/models/falcon/modeling_falcon.py +++ b/src/openllm/models/falcon/modeling_falcon.py @@ -34,9 +34,9 @@ else: class Falcon(openllm.LLM): __openllm_internal__ = True - default_model = "tiiuae/falcon-7b" + default_id = "tiiuae/falcon-7b" - pretrained = ["tiiuae/falcon-7b", "tiiuae/falcon-40b", "tiiuae/falcon-7b-instruct", "tiiuae/falcon-40b-instruct"] + model_ids = ["tiiuae/falcon-7b", "tiiuae/falcon-40b", "tiiuae/falcon-7b-instruct", "tiiuae/falcon-40b-instruct"] import_kwargs = { "torch_dtype": torch.bfloat16, @@ -44,15 +44,15 @@ class Falcon(openllm.LLM): } def import_model( - self, pretrained: str, tag: bentoml.Tag, *model_args: t.Any, tokenizer_kwds: dict[str, t.Any], **attrs: t.Any + self, model_id: str, tag: bentoml.Tag, *model_args: t.Any, tokenizer_kwds: dict[str, t.Any], **attrs: t.Any ) -> bentoml.Model: trust_remote_code = attrs.pop("trust_remote_code", True) torch_dtype = attrs.pop("torch_dtype", torch.bfloat16) device_map = attrs.pop("device_map", "auto") - tokenizer = transformers.AutoTokenizer.from_pretrained(pretrained) + tokenizer = transformers.AutoTokenizer.from_pretrained(model_id) model = transformers.AutoModelForCausalLM.from_pretrained( - pretrained, + model_id, trust_remote_code=trust_remote_code, torch_dtype=torch_dtype, device_map=device_map, diff --git a/src/openllm/models/flan_t5/configuration_flan_t5.py b/src/openllm/models/flan_t5/configuration_flan_t5.py index 9f0584e8..d67f972c 100644 --- a/src/openllm/models/flan_t5/configuration_flan_t5.py +++ b/src/openllm/models/flan_t5/configuration_flan_t5.py @@ -16,7 +16,7 @@ from __future__ import annotations import openllm START_FLAN_T5_COMMAND_DOCSTRING = """\ -Run a LLMServer for FLAN-T5 model and pretrained. +Run a LLMServer for FLAN-T5 model. \b > See more information about FLAN-T5 at [huggingface/transformers](https://huggingface.co/docs/transformers/model_doc/flan-t5) @@ -34,7 +34,11 @@ By default, this model will use the PyTorch model for inference. However, this m \b FLAN-T5 Runner will use google/flan-t5-large as the default model. To change any to any other FLAN-T5 -saved pretrained, or a fine-tune FLAN-T5, provide ``OPENLLM_FLAN_T5_PRETRAINED='google/flan-t5-xxl'`` +saved pretrained, or a fine-tune FLAN-T5, provide ``OPENLLM_FLAN_T5_MODEL_ID='google/flan-t5-xxl'`` +or provide `--model-id` flag when running ``openllm start flan-t5``: + +\b +$ openllm start flan-t5 --model-id google/flan-t5-xxl """ DEFAULT_PROMPT_TEMPLATE = """Answer the following question:\nQuestion: {instruction}\nAnswer:""" diff --git a/src/openllm/models/flan_t5/modeling_flan_t5.py b/src/openllm/models/flan_t5/modeling_flan_t5.py index e748076e..658436e0 100644 --- a/src/openllm/models/flan_t5/modeling_flan_t5.py +++ b/src/openllm/models/flan_t5/modeling_flan_t5.py @@ -29,9 +29,9 @@ else: class FlanT5(openllm.LLM): __openllm_internal__ = True - default_model = "google/flan-t5-large" + default_id = "google/flan-t5-large" - pretrained = [ + model_ids = [ "google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", diff --git a/src/openllm/models/flan_t5/modeling_flax_flan_t5.py b/src/openllm/models/flan_t5/modeling_flax_flan_t5.py index f7827c7b..5c08db5e 100644 --- a/src/openllm/models/flan_t5/modeling_flax_flan_t5.py +++ b/src/openllm/models/flan_t5/modeling_flax_flan_t5.py @@ -24,9 +24,9 @@ from .configuration_flan_t5 import DEFAULT_PROMPT_TEMPLATE class FlaxFlanT5(openllm.LLM): __openllm_internal__ = True - default_model: str = "google/flan-t5-large" + default_id: str = "google/flan-t5-large" - pretrained = [ + model_ids = [ "google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", diff --git a/src/openllm/models/flan_t5/modeling_tf_flan_t5.py b/src/openllm/models/flan_t5/modeling_tf_flan_t5.py index 6ba86448..e950919c 100644 --- a/src/openllm/models/flan_t5/modeling_tf_flan_t5.py +++ b/src/openllm/models/flan_t5/modeling_tf_flan_t5.py @@ -24,9 +24,9 @@ from .configuration_flan_t5 import DEFAULT_PROMPT_TEMPLATE class TFFlanT5(openllm.LLM): __openllm_internal__ = True - default_model: str = "google/flan-t5-large" + default_id: str = "google/flan-t5-large" - pretrained = [ + model_ids = [ "google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", diff --git a/src/openllm/models/stablelm/configuration_stablelm.py b/src/openllm/models/stablelm/configuration_stablelm.py index 7552578e..4dd777e1 100644 --- a/src/openllm/models/stablelm/configuration_stablelm.py +++ b/src/openllm/models/stablelm/configuration_stablelm.py @@ -38,7 +38,7 @@ class StableLMConfig(openllm.LLMConfig, name_type="lowercase", url="https://gith START_STABLELM_COMMAND_DOCSTRING = """\ -Run a LLMServer for StableLM model and pretrained. +Run a LLMServer for StableLM model. \b > See more information about StableLM at [stabilityai/stablelm-base-alpha-3b](https://huggingface.co/stabilityai/stablelm-base-alpha-3b) @@ -50,7 +50,11 @@ Currently, StableLM only supports PyTorch. Make sure ``torch`` is available in y \b StableLM Runner will use stabilityai/stablelm-base-alpha-3b as the default model. To change any to any other StableLM -saved pretrained, or a fine-tune StableLM, provide ``OPENLLM_STABLELM_PRETRAINED='stabilityai/stablelm-tuned-alpha-3b'`` +saved pretrained, or a fine-tune StableLM, provide ``OPENLLM_STABLELM_MODEL_ID='stabilityai/stablelm-tuned-alpha-3b'`` +or provide `--model-id` flag when running ``openllm start stablelm``: + +\b +$ openllm start stablelm --model-id 'stabilityai/stablelm-tuned-alpha-3b' """ SYSTEM_PROMPT = """<|SYSTEM|># StableLM Tuned (Alpha version) diff --git a/src/openllm/models/stablelm/modeling_stablelm.py b/src/openllm/models/stablelm/modeling_stablelm.py index e0e574e8..12817e2c 100644 --- a/src/openllm/models/stablelm/modeling_stablelm.py +++ b/src/openllm/models/stablelm/modeling_stablelm.py @@ -43,9 +43,9 @@ class StableLM(openllm.LLM): __openllm_internal__ = True load_in_mha = True - default_model = "stabilityai/stablelm-tuned-alpha-3b" + default_id = "stabilityai/stablelm-tuned-alpha-3b" - pretrained = [ + model_ids = [ "stabilityai/stablelm-tuned-alpha-3b", "stabilityai/stablelm-tuned-alpha-7b", "stabilityai/stablelm-base-alpha-3b", @@ -70,7 +70,7 @@ class StableLM(openllm.LLM): use_default_prompt_template: bool = True, **attrs: t.Any, ) -> tuple[str, dict[str, t.Any], dict[str, t.Any]]: - if "tuned" in self._pretrained and use_default_prompt_template: + if "tuned" in self._model_id and use_default_prompt_template: prompt_variables = { k: v for k, v in attrs.items() diff --git a/src/openllm/models/starcoder/configuration_starcoder.py b/src/openllm/models/starcoder/configuration_starcoder.py index cfc874ad..2dc9b307 100644 --- a/src/openllm/models/starcoder/configuration_starcoder.py +++ b/src/openllm/models/starcoder/configuration_starcoder.py @@ -42,7 +42,7 @@ class StarCoderConfig( START_STARCODER_COMMAND_DOCSTRING = """\ -Run a LLMServer for StarCoder model and pretrained. +Run a LLMServer for StarCoder model. \b > See more information about StarCoder at [bigcode/starcoder](https://huggingface.co/bigcode/starcoder) @@ -54,7 +54,11 @@ Currently, StarCoder only supports PyTorch. Make sure ``torch`` is available in \b StarCoder Runner will use bigcode/starcoder as the default model. To change any to any other StarCoder -saved pretrained, or a fine-tune StarCoder, provide ``OPENLLM_STARCODER_PRETRAINED='bigcode/starcoder'`` +saved pretrained, or a fine-tune StarCoder, provide ``OPENLLM_STARCODER_MODEL_ID='bigcode/starcoder'`` +or provide `--model-id` flag when running ``openllm start starcoder``: + +\b +$ openllm start starcoder --model-id 'bigcode/starcoder' """ DEFAULT_PROMPT_TEMPLATE = """{instruction}""" diff --git a/src/openllm/models/starcoder/modeling_starcoder.py b/src/openllm/models/starcoder/modeling_starcoder.py index c84ddf8f..00f4c639 100644 --- a/src/openllm/models/starcoder/modeling_starcoder.py +++ b/src/openllm/models/starcoder/modeling_starcoder.py @@ -40,21 +40,21 @@ FIM_INDICATOR = "" class StarCoder(openllm.LLM): __openllm_internal__ = True - default_model = "bigcode/starcoder" + default_id = "bigcode/starcoder" - pretrained = ["bigcode/starcoder", "bigcode/starcoderbase"] + model_ids = ["bigcode/starcoder", "bigcode/starcoderbase"] device = torch.device("cuda") def import_model( self, - pretrained: str, + model_id: str, tag: bentoml.Tag, *model_args: t.Any, tokenizer_kwds: dict[str, t.Any], **attrs: t.Any, ) -> bentoml.Model: - tokenizer = transformers.AutoTokenizer.from_pretrained(pretrained, **tokenizer_kwds) + tokenizer = transformers.AutoTokenizer.from_pretrained(model_id, **tokenizer_kwds) tokenizer.add_special_tokens( { "additional_special_tokens": [EOD, FIM_PREFIX, FIM_MIDDLE, FIM_SUFFIX, FIM_PAD], @@ -62,7 +62,7 @@ class StarCoder(openllm.LLM): } ) - model = transformers.AutoModelForCausalLM.from_pretrained(pretrained, **attrs) + model = transformers.AutoModelForCausalLM.from_pretrained(model_id, **attrs) try: return bentoml.transformers.save_model(tag, model, custom_objects={"tokenizer": tokenizer}) diff --git a/src/openllm/utils/__init__.py b/src/openllm/utils/__init__.py index 3d883698..cf042b4a 100644 --- a/src/openllm/utils/__init__.py +++ b/src/openllm/utils/__init__.py @@ -91,8 +91,8 @@ class ModelEnv: return f"OPENLLM_{self.model_name.upper()}_CONFIG" @property - def pretrained(self) -> str: - return f"OPENLLM_{self.model_name.upper()}_PRETRAINED" + def model_id(self) -> str: + return f"OPENLLM_{self.model_name.upper()}_MODEL_ID" @property def bettertransformer(self) -> str: