refactor: pretrained => model_id

I think model_id makes more sense than calling it pretrained

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
Aaron
2023-06-10 17:36:02 -04:00
parent 4841051fc5
commit 05fa34f9e6
18 changed files with 155 additions and 116 deletions

View File

@@ -92,14 +92,14 @@ class TaskType(enum.Enum, metaclass=TypeMeta):
def import_model(
model_name: str,
model_id: str,
tag: bentoml.Tag,
_model_framework: str,
*model_args: t.Any,
tokenizer_kwds: dict[str, t.Any],
**attrs: t.Any,
):
"""Auto detect model type from given model_name and import it to bentoml's model store.
"""Auto detect model type from given model_id and import it to bentoml's model store.
For all kwargs, it will be parsed into `transformers.AutoConfig.from_pretrained` first,
returning all of the unused kwargs.
@@ -111,7 +111,7 @@ def import_model(
Refer to Transformers documentation for more information about kwargs.
Args:
model_name: Model name to be imported. use `openllm models` to see available entries
model_id: Model id to be imported. See `openllm models` for all supported models.
tag: Tag to be used for the model. This is usually generated for you.
model_args: Args to be passed into AutoModelForSeq2SeqLM or AutoModelForCausalLM (+ TF, Flax variants).
**attrs: Kwargs to be passed into AutoModelForSeq2SeqLM or AutoModelForCausalLM (+ TF, Flax variants).
@@ -139,7 +139,7 @@ def import_model(
config, attrs = t.cast(
"tuple[transformers.PretrainedConfig, dict[str, t.Any]]",
transformers.AutoConfig.from_pretrained(
model_name, return_unused_kwargs=True, trust_remote_code=trust_remote_code, **hub_attrs, **copied_attrs
model_id, return_unused_kwargs=True, trust_remote_code=trust_remote_code, **hub_attrs, **copied_attrs
),
)
@@ -156,13 +156,13 @@ def import_model(
getattr(
transformers, FRAMEWORK_TO_AUTOCLASS_MAPPING[_model_framework][TaskType[task_type].value - 1]
).from_pretrained(
model_name, *model_args, config=config, trust_remote_code=trust_remote_code, **hub_attrs, **attrs
model_id, *model_args, config=config, trust_remote_code=trust_remote_code, **hub_attrs, **attrs
),
custom_objects={
"tokenizer": t.cast(
"LLMTokenizer",
transformers.AutoTokenizer.from_pretrained(
model_name, config=config, trust_remote_code=trust_remote_code, **hub_attrs, **tokenizer_kwds
model_id, config=config, trust_remote_code=trust_remote_code, **hub_attrs, **tokenizer_kwds
),
)
},
@@ -179,7 +179,7 @@ def import_model(
torch.cuda.empty_cache()
_required_namespace = {"default_model", "pretrained"}
_required_namespace = {"default_id", "model_ids"}
_reserved_namespace = _required_namespace | {
"config_class",
@@ -192,11 +192,11 @@ _reserved_namespace = _required_namespace | {
class LLMInterface(ABC):
"""This defines the loose contract for all openllm.LLM implementations."""
default_model: str
"""Return the default model to use when using 'openllm start <model_name>'.
This could be one of the keys in 'self.pretrained' or custom users model."""
default_id: str
"""Return the default model to use when using 'openllm start <model_id>'.
This could be one of the keys in 'self.model_ids' or custom users model."""
pretrained: list[str]
model_ids: list[str]
"""A list of supported pretrained models tag for this given runnable.
For example:
@@ -253,7 +253,7 @@ class LLMInterface(ABC):
pass
def import_model(
self, pretrained: str, tag: bentoml.Tag, *args: t.Any, tokenizer_kwds: dict[str, t.Any], **attrs: t.Any
self, model_id: str, tag: bentoml.Tag, *args: t.Any, tokenizer_kwds: dict[str, t.Any], **attrs: t.Any
) -> bentoml.Model:
"""This function can be implemented if default import_model doesn't satisfy your needs."""
raise NotImplementedError
@@ -275,6 +275,8 @@ class LLMMetaclass(ABCMeta):
namespace["__annotations__"] = annotations_dict
# NOTE: check for required attributes
if "__openllm_internal__" not in namespace:
_required_namespace.add("config_class")
for k in _required_namespace:
if k not in namespace:
raise RuntimeError(f"Missing required key '{k}'. Make sure to define it within the LLM subclass.")
@@ -378,13 +380,13 @@ class LLM(LLMInterface, metaclass=LLMMetaclass):
@classmethod
def from_pretrained(
cls, pretrained: str | None = None, llm_config: openllm.LLMConfig | None = None, *args: t.Any, **attrs: t.Any
cls, model_id: str | None = None, llm_config: openllm.LLMConfig | None = None, *args: t.Any, **attrs: t.Any
) -> LLM:
return cls(pretrained=pretrained, llm_config=llm_config, *args, **attrs)
return cls(model_id=model_id, llm_config=llm_config, *args, **attrs)
def __init__(
self,
pretrained: str | None = None,
model_id: str | None = None,
llm_config: openllm.LLMConfig | None = None,
*args: t.Any,
**attrs: t.Any,
@@ -408,7 +410,7 @@ class LLM(LLMInterface, metaclass=LLMMetaclass):
```python
def import_model(
self,
pretrained: str,
model_id: str,
tag: bentoml.Tag,
*args: t.Any,
tokenizer_kwds: dict[str, t.Any],
@@ -417,11 +419,11 @@ class LLM(LLMInterface, metaclass=LLMMetaclass):
return bentoml.transformers.save_model(
tag,
transformers.AutoModelForCausalLM.from_pretrained(
pretrained, device_map="auto", torch_dtype=torch.bfloat16, **attrs
model_id, device_map="auto", torch_dtype=torch.bfloat16, **attrs
),
custom_objects={
"tokenizer": transformers.AutoTokenizer.from_pretrained(
pretrained, padding_size="left", **tokenizer_kwds
model_id, padding_size="left", **tokenizer_kwds
)
},
)
@@ -440,14 +442,14 @@ class LLM(LLMInterface, metaclass=LLMMetaclass):
Note: If you implement your own `import_model`, then `import_kwargs` will be the
default kwargs for every load. You can still override those via ``openllm.Runner``.
Note that this tag will be generated based on `self.default_model` or the given `pretrained` kwds.
Note that this tag will be generated based on `self.default_id` or the given `pretrained` kwds.
passed from the __init__ constructor.
``llm_post_init`` can also be implemented if you need to do any
additional initialization after everything is setup.
Args:
pretrained: The pretrained model to use. Defaults to None. It will use 'self.default_model' if None.
model_id: The pretrained model to use. Defaults to None. If None, 'self.default_id' will be used.
llm_config: The config to use for this LLM. Defaults to None. If not passed, we will use 'self.config_class'
to construct default configuration.
*args: The args to be passed to the model.
@@ -462,14 +464,14 @@ class LLM(LLMInterface, metaclass=LLMMetaclass):
# The rests of the kwargs that is not used by the config class should be stored into __openllm_extras__.
attrs = self.config.__openllm_extras__
if pretrained is None:
pretrained = os.environ.get(self.config.__openllm_env__.pretrained, None)
if not pretrained:
assert self.default_model, "A default model is required for any LLM."
pretrained = self.default_model
if model_id is None:
model_id = os.environ.get(self.config.__openllm_env__.model_id, None)
if not model_id:
assert self.default_id, "A default model is required for any LLM."
model_id = self.default_id
# NOTE: This is the actual given path or pretrained weight for this LLM.
self._pretrained = pretrained
self._model_id = model_id
# NOTE: Save the args and kwargs for latter load
self._llm_args = args
@@ -491,19 +493,19 @@ class LLM(LLMInterface, metaclass=LLMMetaclass):
# NOTE: The section below defines a loose contract with langchain's LLM interface.
@property
def llm_type(self) -> str:
return convert_transformers_model_name(self._pretrained)
return convert_transformers_model_name(self._model_id)
@property
def identifying_params(self) -> dict[str, t.Any]:
return {
"configuration": self.config.model_dump_json().decode(),
"pretrained": orjson.dumps(self.pretrained).decode(),
"model_ids": orjson.dumps(self.model_ids).decode(),
}
@t.overload
def make_tag(
self,
model_name_or_path: str | None = None,
model_id: str | None = None,
return_unused_kwargs: t.Literal[False] = ...,
trust_remote_code: bool = ...,
**attrs: t.Any,
@@ -513,7 +515,7 @@ class LLM(LLMInterface, metaclass=LLMMetaclass):
@t.overload
def make_tag(
self,
model_name_or_path: str | None = None,
model_id: str | None = None,
return_unused_kwargs: t.Literal[True] = ...,
trust_remote_code: bool = ...,
**attrs: t.Any,
@@ -522,7 +524,7 @@ class LLM(LLMInterface, metaclass=LLMMetaclass):
def make_tag(
self,
model_name_or_path: str | None = None,
model_id: str | None = None,
return_unused_kwargs: bool = False,
trust_remote_code: bool = False,
**attrs: t.Any,
@@ -543,8 +545,8 @@ class LLM(LLMInterface, metaclass=LLMMetaclass):
Returns:
A tuple of ``bentoml.Tag`` and a dict of unused kwargs.
"""
if model_name_or_path is None:
model_name_or_path = self._pretrained
if model_id is None:
model_id = self._model_id
if "return_unused_kwargs" in attrs:
logger.debug("Ignoring 'return_unused_kwargs' in 'generate_tag_from_model_name'.")
@@ -553,12 +555,12 @@ class LLM(LLMInterface, metaclass=LLMMetaclass):
config, attrs = t.cast(
"tuple[transformers.PretrainedConfig, dict[str, t.Any]]",
transformers.AutoConfig.from_pretrained(
model_name_or_path, return_unused_kwargs=True, trust_remote_code=trust_remote_code, **attrs
model_id, return_unused_kwargs=True, trust_remote_code=trust_remote_code, **attrs
),
)
name = convert_transformers_model_name(model_name_or_path)
name = convert_transformers_model_name(model_id)
if os.path.exists(os.path.dirname(model_name_or_path)):
if os.path.exists(os.path.dirname(model_id)):
# If the model_name_or_path is a path, we assume it's a local path,
# then users must pass a version for this.
model_version = attrs.pop("openllm_model_version", None)
@@ -590,7 +592,7 @@ class LLM(LLMInterface, metaclass=LLMMetaclass):
"Given %s from '%s' doesn't contain a commit hash. We will generate"
" the tag without specific version.",
t.cast("type[transformers.PretrainedConfig]", config.__class__),
model_name_or_path,
model_id,
)
tag = bentoml.Tag.from_taglike(f"{self.__llm_implementation__}-{name}:{model_version}")
@@ -621,7 +623,7 @@ class LLM(LLMInterface, metaclass=LLMMetaclass):
}
return self.import_model(
self._pretrained,
self._model_id,
tag,
*self._llm_args,
tokenizer_kwds=tokenizer_kwds,

View File

@@ -388,8 +388,8 @@ def start_model_command(
docstring = f"""\
{ModelEnv.start_docstring}
\b
The available pretrained models to use with '{model_name}' are: {for_doc.pretrained} [default: {for_doc.default_model}]
Tip: One can pass one of the aforementioned to '--pretrained' to use other pretrained weights.
Available model_id(s) to use with '{model_name}' are: {for_doc.model_ids} [default: {for_doc.default_id}]
Tip: One can pass one of the aforementioned to '--model-id' to use other pretrained weights.
"""
command_attrs: dict[str, t.Any] = {
"name": ModelEnv.model_name,
@@ -430,9 +430,7 @@ Tip: One can pass one of the aforementioned to '--pretrained' to use other pretr
@llm_config.to_click_options
@parse_serve_args(_serve_grpc)
@click.option("--server-timeout", type=int, default=3600, help="Server timeout in seconds")
@click.option(
"--pretrained", type=click.STRING, default=None, help="Optional pretrained name or path to fine-tune weight."
)
@model_id_option
@click.option(
"--device",
type=tuple,
@@ -444,18 +442,18 @@ Tip: One can pass one of the aforementioned to '--pretrained' to use other pretr
)
def model_start(
server_timeout: int,
pretrained: str | None,
model_id: str | None,
device: tuple[str, ...] | None,
**attrs: t.Any,
) -> openllm.LLMConfig:
config, server_attrs = llm_config.model_validate_click(**attrs)
if ModelEnv.get_framework_env() == "flax":
llm = openllm.AutoFlaxLLM.for_model(model_name, pretrained=pretrained, llm_config=config)
llm = openllm.AutoFlaxLLM.for_model(model_name, model_id=model_id, llm_config=config)
elif ModelEnv.get_framework_env() == "tf":
llm = openllm.AutoTFLLM.for_model(model_name, pretrained=pretrained, llm_config=config)
llm = openllm.AutoTFLLM.for_model(model_name, model_id=model_id, llm_config=config)
else:
llm = openllm.AutoLLM.for_model(model_name, pretrained=pretrained, llm_config=config)
llm = openllm.AutoLLM.for_model(model_name, mdoel_id=model_id, llm_config=config)
# NOTE: We need to initialize llm here first to check if the model is already downloaded to
# avoid deadlock before the subprocess forking.
@@ -580,6 +578,12 @@ output_option = click.option(
default="pretty",
help="Showing output type. Default to 'pretty'",
)
model_id_option = click.option(
"--model-id",
type=click.STRING,
default=None,
help="Optional model_id name or path for (fine-tune) weight.",
)
def cli_factory() -> click.Group:
@@ -626,16 +630,15 @@ def cli_factory() -> click.Group:
@click.argument(
"model_name", type=click.Choice([inflection.dasherize(name) for name in openllm.CONFIG_MAPPING.keys()])
)
@click.option("--pretrained", default=None, help="Given pretrained model name for the given model name [Optional].")
@click.option("--overwrite", is_flag=True, help="Overwrite existing Bento for given LLM if it already exists.")
@model_id_option
@output_option
def build(model_name: str, pretrained: str | None, overwrite: bool, output: OutputLiteral):
@click.option("--overwrite", is_flag=True, help="Overwrite existing Bento for given LLM if it already exists.")
def build(model_name: str, model_id: str | None, overwrite: bool, output: OutputLiteral):
"""Package a given models into a Bento.
$ openllm build flan-t5
\b
NOTE: To run a container built from this Bento with GPU support, make sure
to have https://github.com/NVIDIA/nvidia-container-toolkit install locally.
"""
@@ -645,7 +648,7 @@ def cli_factory() -> click.Group:
bento, _previously_built = openllm.build(
model_name,
__cli__=True,
pretrained=pretrained,
model_id=model_id,
_overwrite_existing_bento=overwrite,
)
@@ -684,13 +687,13 @@ def cli_factory() -> click.Group:
else:
failed_initialized: list[tuple[str, Exception]] = []
json_data: dict[str, dict[t.Literal["pretrained", "description"], t.Any]] = {}
json_data: dict[str, dict[t.Literal["model_id", "description"], t.Any]] = {}
for m in models:
try:
model = openllm.AutoLLM.for_model(m)
docs = inspect.cleandoc(model.config.__doc__ or "(No description)")
json_data[m] = {"pretrained": model.pretrained, "description": docs}
json_data[m] = {"model_id": model.model_ids, "description": docs}
except Exception as err:
failed_initialized.append((m, err))
@@ -701,7 +704,7 @@ def cli_factory() -> click.Group:
data: list[str | tuple[str, str, list[str]]] = []
for m, v in json_data.items():
data.extend([(m, v["description"], v["pretrained"])])
data.extend([(m, v["description"], v["model_id"])])
column_widths = [int(COLUMNS / 6), int(COLUMNS / 3 * 2), int(COLUMNS / 6)]
if len(data) == 0 and len(failed_initialized) > 0:
@@ -714,7 +717,7 @@ def cli_factory() -> click.Group:
table = tabulate.tabulate(
data,
tablefmt="fancy_grid",
headers=["LLM", "Description", "Pretrained"],
headers=["LLM", "Description", "Models Id"],
maxcolwidths=column_widths,
)
@@ -739,11 +742,9 @@ def cli_factory() -> click.Group:
@click.argument(
"model_name", type=click.Choice([inflection.dasherize(name) for name in openllm.CONFIG_MAPPING.keys()])
)
@click.option(
"--pretrained", type=click.STRING, default=None, help="Optional pretrained name or path to fine-tune weight."
)
@model_id_option
@output_option
def download_models(model_name: str, pretrained: str | None, output: OutputLiteral):
def download_models(model_name: str, model_id: str | None, output: OutputLiteral):
"""Setup LLM interactively.
Note: This is useful for development and setup for fine-tune.
@@ -751,11 +752,11 @@ def cli_factory() -> click.Group:
config = openllm.AutoConfig.for_model(model_name)
env = config.__openllm_env__.get_framework_env()
if env == "flax":
model = openllm.AutoFlaxLLM.for_model(model_name, pretrained=pretrained, llm_config=config)
model = openllm.AutoFlaxLLM.for_model(model_name, model_id=model_id, llm_config=config)
elif env == "tf":
model = openllm.AutoTFLLM.for_model(model_name, pretrained=pretrained, llm_config=config)
model = openllm.AutoTFLLM.for_model(model_name, model_id=model_id, llm_config=config)
else:
model = openllm.AutoLLM.for_model(model_name, pretrained=pretrained, llm_config=config)
model = openllm.AutoLLM.for_model(model_name, model_id=model_id, llm_config=config)
tag = model.make_tag(trust_remote_code=config.__openllm_trust_remote_code__)
@@ -829,7 +830,7 @@ def cli_factory() -> click.Group:
)
@output_option
@click.argument("query", type=click.STRING)
def query(
def query_(
query: str,
endpoint: str,
timeout: int,
@@ -838,7 +839,7 @@ def cli_factory() -> click.Group:
):
"""Ask a LLM interactively, from a terminal.
$ openllm query --endpoint http://12.323.2.1 "What is the meaning of life?"
$ openllm query --endpoint http://12.323.2.1:3000 "What is the meaning of life?"
"""
if server_type == "grpc":
endpoint = re.sub(r"http://", "", endpoint)
@@ -870,7 +871,7 @@ def cli_factory() -> click.Group:
_echo(res["responses"], fg="white")
if t.TYPE_CHECKING:
assert download_models and build and models and start and start_grpc and query and prune
assert download_models and build and models and start and start_grpc and query_ and prune
if psutil.WINDOWS:
sys.stdout.reconfigure(encoding="utf-8") # type: ignore

View File

@@ -47,7 +47,7 @@ class _BaseAutoLLMClass:
def for_model(
cls,
model_name: str,
pretrained: str | None = None,
model_id: str | None = None,
return_runner_kwargs: t.Literal[False] = ...,
llm_config: openllm.LLMConfig | None = ...,
**attrs: t.Any,
@@ -59,7 +59,7 @@ class _BaseAutoLLMClass:
def for_model(
cls,
model_name: str,
pretrained: str | None = None,
model_id: str | None = None,
return_runner_kwargs: t.Literal[True] = ...,
llm_config: openllm.LLMConfig | None = ...,
**attrs: t.Any,
@@ -70,11 +70,18 @@ class _BaseAutoLLMClass:
def for_model(
cls,
model_name: str,
pretrained: str | None = None,
model_id: str | None = None,
return_runner_kwargs: bool = False,
llm_config: openllm.LLMConfig | None = ...,
**attrs: t.Any,
) -> openllm.LLM | tuple[openllm.LLM, dict[str, t.Any]]:
"""The lower level API for creating a LLM instance.
```python
>>> import openllm
>>> llm = openllm.AutoLLM.for_model("flan-t5")
```
"""
runner_kwargs_name = [
"models",
"max_batch_size",
@@ -88,7 +95,7 @@ class _BaseAutoLLMClass:
# The rest of kwargs is now passed to config
llm_config = AutoConfig.for_model(model_name, **attrs)
if type(llm_config) in cls._model_mapping.keys():
llm = cls._model_mapping[type(llm_config)].from_pretrained(pretrained, llm_config=llm_config, **attrs)
llm = cls._model_mapping[type(llm_config)].from_pretrained(model_id, llm_config=llm_config, **attrs)
if not return_runner_kwargs:
return llm
return llm, to_runner_attrs
@@ -98,19 +105,19 @@ class _BaseAutoLLMClass:
)
@classmethod
def create_runner(cls, model_name: str, pretrained: str | None = None, **attrs: t.Any) -> bentoml.Runner:
def create_runner(cls, model_name: str, model_id: str | None = None, **attrs: t.Any) -> bentoml.Runner:
"""
Create a LLM Runner for the given model name.
Args:
model_name: The model name to instantiate.
pretrained: The pretrained model name to instantiate.
model_id: The pretrained model name to instantiate.
**attrs: Additional keyword arguments passed along to the specific configuration class.
Returns:
A LLM instance.
"""
llm, runner_attrs = cls.for_model(model_name, pretrained, return_runner_kwargs=True, **attrs)
llm, runner_attrs = cls.for_model(model_name, model_id, return_runner_kwargs=True, **attrs)
return llm.to_runner(**runner_attrs)
@classmethod

View File

@@ -55,7 +55,7 @@ class ChatGLMConfig(
START_CHATGLM_COMMAND_DOCSTRING = """\
Run a LLMServer for ChatGLM model and pretrained.
Run a LLMServer for ChatGLM model.
\b
> See more information about ChatGLM at [THUDM/ChatGLM-6b](https://huggingface.co/thudm/chatglm-6b)
@@ -67,7 +67,11 @@ Currently, ChatGLM only supports PyTorch. Make sure ``torch`` is available in yo
\b
ChatGLM Runner will use THUDM/ChatGLM-6b as the default model. To change any to any other ChatGLM
saved pretrained, or a fine-tune ChatGLM, provide ``OPENLLM_CHATGLM_PRETRAINED='thudm/chatglm-6b-int8'``
saved pretrained, or a fine-tune ChatGLM, provide ``OPENLLM_CHATGLM_MODEL_ID='thudm/chatglm-6b-int8'``
or provide `--model-id` flag when running ``openllm start chatglm``:
\b
$ openllm start chatglm --model-id='thudm/chatglm-6b-int8'
"""
DEFAULT_PROMPT_TEMPLATE = """{instruction}"""

View File

@@ -41,22 +41,27 @@ class InvalidScoreLogitsProcessor(LogitsProcessor):
class ChatGLM(openllm.LLM):
__openllm_internal__ = True
default_model = "thudm/chatglm-6b-int4"
default_id = "thudm/chatglm-6b-int4"
pretrained = ["thudm/chatglm-6b", "thudm/chatglm-6b-int8", "thudm/chatglm-6b-int4"]
model_ids = ["thudm/chatglm-6b", "thudm/chatglm-6b-int8", "thudm/chatglm-6b-int4"]
device = torch.device("cuda")
def import_model(
self, pretrained: str, tag: bentoml.Tag, *model_args: t.Any, tokenizer_kwds: dict[str, t.Any], **attrs: t.Any
self,
model_id: str,
tag: bentoml.Tag,
*model_args: t.Any,
tokenizer_kwds: dict[str, t.Any],
**attrs: t.Any,
) -> bentoml.Model:
trust_remote_code = attrs.pop("trust_remote_code", True)
return bentoml.transformers.save_model(
tag,
transformers.AutoModel.from_pretrained(pretrained, trust_remote_code=trust_remote_code),
transformers.AutoModel.from_pretrained(model_id, trust_remote_code=trust_remote_code),
custom_objects={
"tokenizer": transformers.AutoTokenizer.from_pretrained(
pretrained, trust_remote_code=trust_remote_code, **tokenizer_kwds
model_id, trust_remote_code=trust_remote_code, **tokenizer_kwds
)
},
)

View File

@@ -51,7 +51,7 @@ class DollyV2Config(
START_DOLLY_V2_COMMAND_DOCSTRING = """\
Run a LLMServer for dolly-v2 model and pretrained.
Run a LLMServer for dolly-v2 model.
\b
> See more information about dolly-v2 at [databricks/dolly-v2-3b](https://huggingface.co/databricks/dolly-v2-3b)
@@ -63,7 +63,11 @@ Currently, dolly-v2 only supports PyTorch. Make sure ``torch`` is available in y
\b
Dolly-v2 Runner will use databricks/dolly-v2-3b as the default model. To change any to any other dolly-v2
saved pretrained, or a fine-tune dolly-v2, provide ``OPENLLM_DOLLY_V2_PRETRAINED='databricks/dolly-v2-7b'``
saved pretrained, or a fine-tune dolly-v2, provide ``OPENLLM_DOLLY_V2_MODEL_ID='databricks/dolly-v2-7b'``
or provide `--model-id` flag when running ``openllm start dolly-v2``:
\b
$ openllm start dolly-v2 --model-id databricks/dolly-v2-7b
"""
INSTRUCTION_KEY = "### Instruction:"

View File

@@ -38,9 +38,9 @@ class DollyV2(openllm.LLM):
__openllm_internal__ = True
default_model = "databricks/dolly-v2-3b"
default_id = "databricks/dolly-v2-3b"
pretrained = ["databricks/dolly-v2-3b", "databricks/dolly-v2-7b", "databricks/dolly-v2-12b"]
model_ids = ["databricks/dolly-v2-3b", "databricks/dolly-v2-7b", "databricks/dolly-v2-12b"]
import_kwargs = {
"device_map": "auto" if torch.cuda.is_available() and torch.cuda.device_count() > 1 else None,
@@ -51,15 +51,15 @@ class DollyV2(openllm.LLM):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def import_model(
self, pretrained: str, tag: bentoml.Tag, *model_args: t.Any, tokenizer_kwds: dict[str, t.Any], **attrs: t.Any
self, model_id: str, tag: bentoml.Tag, *model_args: t.Any, tokenizer_kwds: dict[str, t.Any], **attrs: t.Any
) -> bentoml.Model:
trust_remote_code = attrs.pop("trust_remote_code", True)
torch_dtype = attrs.pop("torch_dtype", torch.bfloat16)
device_map = attrs.pop("device_map", "auto")
tokenizer = transformers.AutoTokenizer.from_pretrained(pretrained, **tokenizer_kwds)
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id, **tokenizer_kwds)
pipeline = transformers.pipeline(
model=pretrained,
model=model_id,
tokenizer=tokenizer,
trust_remote_code=trust_remote_code,
torch_dtype=torch_dtype,

View File

@@ -40,7 +40,7 @@ class FalconConfig(
START_FALCON_COMMAND_DOCSTRING = """\
Run a LLMServer for FalconLM model and pretrained.
Run a LLMServer for FalconLM model.
\b
> See more information about falcon at [tiiuae/falcon-7b](https://huggingface.co/tiiuae/falcon-7b)
@@ -52,7 +52,11 @@ Currently, FalconLM only supports PyTorch. Make sure ``torch`` is available in y
\b
FalconLM Runner will use tiiuae/falcon-7b as the default model. To change any to any other FalconLM
saved pretrained, or a fine-tune FalconLM, provide ``OPENLLM_FALCON_PRETRAINED='tiiuae/falcon-7b-instruct'``
saved pretrained, or a fine-tune FalconLM, provide ``OPENLLM_FALCON_MODEL_ID='tiiuae/falcon-7b-instruct'``
or provide `--model-id` flag when running ``openllm start falcon``:
\b
$ openllm start falcon --model-id tiiuae/falcon-7b-instruct
"""
DEFAULT_PROMPT_TEMPLATE = """{context}

View File

@@ -34,9 +34,9 @@ else:
class Falcon(openllm.LLM):
__openllm_internal__ = True
default_model = "tiiuae/falcon-7b"
default_id = "tiiuae/falcon-7b"
pretrained = ["tiiuae/falcon-7b", "tiiuae/falcon-40b", "tiiuae/falcon-7b-instruct", "tiiuae/falcon-40b-instruct"]
model_ids = ["tiiuae/falcon-7b", "tiiuae/falcon-40b", "tiiuae/falcon-7b-instruct", "tiiuae/falcon-40b-instruct"]
import_kwargs = {
"torch_dtype": torch.bfloat16,
@@ -44,15 +44,15 @@ class Falcon(openllm.LLM):
}
def import_model(
self, pretrained: str, tag: bentoml.Tag, *model_args: t.Any, tokenizer_kwds: dict[str, t.Any], **attrs: t.Any
self, model_id: str, tag: bentoml.Tag, *model_args: t.Any, tokenizer_kwds: dict[str, t.Any], **attrs: t.Any
) -> bentoml.Model:
trust_remote_code = attrs.pop("trust_remote_code", True)
torch_dtype = attrs.pop("torch_dtype", torch.bfloat16)
device_map = attrs.pop("device_map", "auto")
tokenizer = transformers.AutoTokenizer.from_pretrained(pretrained)
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
model = transformers.AutoModelForCausalLM.from_pretrained(
pretrained,
model_id,
trust_remote_code=trust_remote_code,
torch_dtype=torch_dtype,
device_map=device_map,

View File

@@ -16,7 +16,7 @@ from __future__ import annotations
import openllm
START_FLAN_T5_COMMAND_DOCSTRING = """\
Run a LLMServer for FLAN-T5 model and pretrained.
Run a LLMServer for FLAN-T5 model.
\b
> See more information about FLAN-T5 at [huggingface/transformers](https://huggingface.co/docs/transformers/model_doc/flan-t5)
@@ -34,7 +34,11 @@ By default, this model will use the PyTorch model for inference. However, this m
\b
FLAN-T5 Runner will use google/flan-t5-large as the default model. To change any to any other FLAN-T5
saved pretrained, or a fine-tune FLAN-T5, provide ``OPENLLM_FLAN_T5_PRETRAINED='google/flan-t5-xxl'``
saved pretrained, or a fine-tune FLAN-T5, provide ``OPENLLM_FLAN_T5_MODEL_ID='google/flan-t5-xxl'``
or provide `--model-id` flag when running ``openllm start flan-t5``:
\b
$ openllm start flan-t5 --model-id google/flan-t5-xxl
"""
DEFAULT_PROMPT_TEMPLATE = """Answer the following question:\nQuestion: {instruction}\nAnswer:"""

View File

@@ -29,9 +29,9 @@ else:
class FlanT5(openllm.LLM):
__openllm_internal__ = True
default_model = "google/flan-t5-large"
default_id = "google/flan-t5-large"
pretrained = [
model_ids = [
"google/flan-t5-small",
"google/flan-t5-base",
"google/flan-t5-large",

View File

@@ -24,9 +24,9 @@ from .configuration_flan_t5 import DEFAULT_PROMPT_TEMPLATE
class FlaxFlanT5(openllm.LLM):
__openllm_internal__ = True
default_model: str = "google/flan-t5-large"
default_id: str = "google/flan-t5-large"
pretrained = [
model_ids = [
"google/flan-t5-small",
"google/flan-t5-base",
"google/flan-t5-large",

View File

@@ -24,9 +24,9 @@ from .configuration_flan_t5 import DEFAULT_PROMPT_TEMPLATE
class TFFlanT5(openllm.LLM):
__openllm_internal__ = True
default_model: str = "google/flan-t5-large"
default_id: str = "google/flan-t5-large"
pretrained = [
model_ids = [
"google/flan-t5-small",
"google/flan-t5-base",
"google/flan-t5-large",

View File

@@ -38,7 +38,7 @@ class StableLMConfig(openllm.LLMConfig, name_type="lowercase", url="https://gith
START_STABLELM_COMMAND_DOCSTRING = """\
Run a LLMServer for StableLM model and pretrained.
Run a LLMServer for StableLM model.
\b
> See more information about StableLM at [stabilityai/stablelm-base-alpha-3b](https://huggingface.co/stabilityai/stablelm-base-alpha-3b)
@@ -50,7 +50,11 @@ Currently, StableLM only supports PyTorch. Make sure ``torch`` is available in y
\b
StableLM Runner will use stabilityai/stablelm-base-alpha-3b as the default model. To change any to any other StableLM
saved pretrained, or a fine-tune StableLM, provide ``OPENLLM_STABLELM_PRETRAINED='stabilityai/stablelm-tuned-alpha-3b'``
saved pretrained, or a fine-tune StableLM, provide ``OPENLLM_STABLELM_MODEL_ID='stabilityai/stablelm-tuned-alpha-3b'``
or provide `--model-id` flag when running ``openllm start stablelm``:
\b
$ openllm start stablelm --model-id 'stabilityai/stablelm-tuned-alpha-3b'
"""
SYSTEM_PROMPT = """<|SYSTEM|># StableLM Tuned (Alpha version)

View File

@@ -43,9 +43,9 @@ class StableLM(openllm.LLM):
__openllm_internal__ = True
load_in_mha = True
default_model = "stabilityai/stablelm-tuned-alpha-3b"
default_id = "stabilityai/stablelm-tuned-alpha-3b"
pretrained = [
model_ids = [
"stabilityai/stablelm-tuned-alpha-3b",
"stabilityai/stablelm-tuned-alpha-7b",
"stabilityai/stablelm-base-alpha-3b",
@@ -70,7 +70,7 @@ class StableLM(openllm.LLM):
use_default_prompt_template: bool = True,
**attrs: t.Any,
) -> tuple[str, dict[str, t.Any], dict[str, t.Any]]:
if "tuned" in self._pretrained and use_default_prompt_template:
if "tuned" in self._model_id and use_default_prompt_template:
prompt_variables = {
k: v
for k, v in attrs.items()

View File

@@ -42,7 +42,7 @@ class StarCoderConfig(
START_STARCODER_COMMAND_DOCSTRING = """\
Run a LLMServer for StarCoder model and pretrained.
Run a LLMServer for StarCoder model.
\b
> See more information about StarCoder at [bigcode/starcoder](https://huggingface.co/bigcode/starcoder)
@@ -54,7 +54,11 @@ Currently, StarCoder only supports PyTorch. Make sure ``torch`` is available in
\b
StarCoder Runner will use bigcode/starcoder as the default model. To change any to any other StarCoder
saved pretrained, or a fine-tune StarCoder, provide ``OPENLLM_STARCODER_PRETRAINED='bigcode/starcoder'``
saved pretrained, or a fine-tune StarCoder, provide ``OPENLLM_STARCODER_MODEL_ID='bigcode/starcoder'``
or provide `--model-id` flag when running ``openllm start starcoder``:
\b
$ openllm start starcoder --model-id 'bigcode/starcoder'
"""
DEFAULT_PROMPT_TEMPLATE = """{instruction}"""

View File

@@ -40,21 +40,21 @@ FIM_INDICATOR = "<FILL_HERE>"
class StarCoder(openllm.LLM):
__openllm_internal__ = True
default_model = "bigcode/starcoder"
default_id = "bigcode/starcoder"
pretrained = ["bigcode/starcoder", "bigcode/starcoderbase"]
model_ids = ["bigcode/starcoder", "bigcode/starcoderbase"]
device = torch.device("cuda")
def import_model(
self,
pretrained: str,
model_id: str,
tag: bentoml.Tag,
*model_args: t.Any,
tokenizer_kwds: dict[str, t.Any],
**attrs: t.Any,
) -> bentoml.Model:
tokenizer = transformers.AutoTokenizer.from_pretrained(pretrained, **tokenizer_kwds)
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id, **tokenizer_kwds)
tokenizer.add_special_tokens(
{
"additional_special_tokens": [EOD, FIM_PREFIX, FIM_MIDDLE, FIM_SUFFIX, FIM_PAD],
@@ -62,7 +62,7 @@ class StarCoder(openllm.LLM):
}
)
model = transformers.AutoModelForCausalLM.from_pretrained(pretrained, **attrs)
model = transformers.AutoModelForCausalLM.from_pretrained(model_id, **attrs)
try:
return bentoml.transformers.save_model(tag, model, custom_objects={"tokenizer": tokenizer})

View File

@@ -91,8 +91,8 @@ class ModelEnv:
return f"OPENLLM_{self.model_name.upper()}_CONFIG"
@property
def pretrained(self) -> str:
return f"OPENLLM_{self.model_name.upper()}_PRETRAINED"
def model_id(self) -> str:
return f"OPENLLM_{self.model_name.upper()}_MODEL_ID"
@property
def bettertransformer(self) -> str: