From 4d356f4b72f03112426e8a8135c9ce31e9581809 Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Tue, 7 Nov 2023 17:28:02 -0500 Subject: [PATCH] feat: Mistral support (#571) * feat: Mistral support Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> * ci: auto fixes from pre-commit.ci For more information, see https://pre-commit.ci * chore: fix style Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update README docs about mistral Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- README.md | 48 +++++++++++++++++++ openllm-core/src/openllm_core/__init__.py | 2 + .../src/openllm_core/_configuration.py | 25 ---------- .../src/openllm_core/config/__init__.py | 2 + .../openllm_core/config/configuration_auto.py | 2 +- .../config/configuration_mistral.py | 46 ++++++++++++++++++ openllm-python/src/openllm/__init__.py | 1 + 7 files changed, 100 insertions(+), 26 deletions(-) create mode 100644 openllm-core/src/openllm_core/config/configuration_mistral.py diff --git a/README.md b/README.md index cffc5584..048dc5ed 100644 --- a/README.md +++ b/README.md @@ -166,6 +166,54 @@ openllm start opt --model-id facebook/opt-2.7b OpenLLM currently supports the following models. By default, OpenLLM doesn't include dependencies to run all models. The extra model-specific dependencies can be installed with the instructions below. +
+Mistral + +### Quickstart + +Run the following commands to quickly spin up a Llama 2 server and send a request to it. + +```bash +openllm start mistral --model-id HuggingFaceH4/zephyr-7b-beta +export OPENLLM_ENDPOINT=http://localhost:3000 +openllm query 'What are large language models?' +``` + +> [!NOTE] +> Note that any Mistral variants can be deployed with OpenLLM. +> Visit the [Hugging Face Model Hub](https://huggingface.co/models?sort=trending&search=mistral) to see more Mistral compatible models. + +### Supported models + +You can specify any of the following Mistral models by using `--model-id`. + +- [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) +- [mistralai/Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) +- [amazon/MistralLite](https://huggingface.co/amazon/MistralLite) +- [HuggingFaceH4/zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) +- [HuggingFaceH4/zephyr-7b-alpha](https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha) +- Any other models that strictly follows the [MistralForCausalLM](https://huggingface.co/docs/transformers/main/en/model_doc/mistral#transformers.MistralForCausalLM) architecture + +### Supported backends + +- PyTorch (Default): + + ```bash + openllm start mistral --model-id HuggingFaceH4/zephyr-7b-beta --backend pt + ``` + +- vLLM (Recommended): + + ```bash + pip install "openllm[vllm]" + openllm start mistral --model-id HuggingFaceH4/zephyr-7b-beta --backend vllm + ``` + +> [!NOTE] +> Currently when using the vLLM backend, quantization and adapters are not supported. + +
+
Llama diff --git a/openllm-core/src/openllm_core/__init__.py b/openllm-core/src/openllm_core/__init__.py index a75ede79..6bf020bf 100644 --- a/openllm-core/src/openllm_core/__init__.py +++ b/openllm-core/src/openllm_core/__init__.py @@ -18,6 +18,7 @@ from .config import START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCST from .config import START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING from .config import START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING from .config import START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING +from .config import START_MISTRAL_COMMAND_DOCSTRING as START_MISTRAL_COMMAND_DOCSTRING from .config import START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING from .config import START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING from .config import START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING @@ -30,6 +31,7 @@ from .config import FalconConfig as FalconConfig from .config import FlanT5Config as FlanT5Config from .config import GPTNeoXConfig as GPTNeoXConfig from .config import LlamaConfig as LlamaConfig +from .config import MistralConfig as MistralConfig from .config import MPTConfig as MPTConfig from .config import OPTConfig as OPTConfig from .config import StableLMConfig as StableLMConfig diff --git a/openllm-core/src/openllm_core/_configuration.py b/openllm-core/src/openllm_core/_configuration.py index 568e3bae..6bc6c272 100644 --- a/openllm-core/src/openllm_core/_configuration.py +++ b/openllm-core/src/openllm_core/_configuration.py @@ -1511,31 +1511,6 @@ class LLMConfig(_ConfigAttr): def peft_task_type(cls) -> str: return _PEFT_TASK_TYPE_TARGET_MAPPING[cls.__openllm_model_type__] - def sanitize_parameters(self, prompt: str, **attrs: t.Any) -> tuple[str, DictStrAny, DictStrAny]: - '''This handler will sanitize all attrs and setup prompt text. - - It takes a prompt that is given by the user, attrs that can be parsed with the prompt. - - Returns a tuple of three items: - - The attributes dictionary that can be passed into LLMConfig to generate a GenerationConfig - - The attributes dictionary that will be passed into `self.postprocess_generate`. - - `openllm.LLM` also has a sanitize_parameters that will just call this method. - ''' - return prompt, attrs, attrs - - def postprocess_generate(self, prompt: str, generation_result: t.Any, **attrs: t.Any) -> t.Any: - '''This handler will postprocess generation results from LLM.generate and then output nicely formatted results (if the LLM decide to do so.). - - You can customize how the output of the LLM looks with this hook. By default, it is a simple echo. - - > [!NOTE] - > This will be used from the client side. - - `openllm.LLM` also has a postprocess_generate that will just call this method. - ''' - return generation_result - converter.register_unstructure_hook_factory(lambda cls: lenient_issubclass(cls, LLMConfig), lambda cls: make_dict_unstructure_fn(cls, converter, _cattrs_omit_if_default=False, _cattrs_use_linecache=True)) diff --git a/openllm-core/src/openllm_core/config/__init__.py b/openllm-core/src/openllm_core/config/__init__.py index e1304867..d880b08f 100644 --- a/openllm-core/src/openllm_core/config/__init__.py +++ b/openllm-core/src/openllm_core/config/__init__.py @@ -17,6 +17,8 @@ from .configuration_gpt_neox import START_GPT_NEOX_COMMAND_DOCSTRING as START_GP from .configuration_gpt_neox import GPTNeoXConfig as GPTNeoXConfig from .configuration_llama import START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING from .configuration_llama import LlamaConfig as LlamaConfig +from .configuration_mistral import START_MISTRAL_COMMAND_DOCSTRING as START_MISTRAL_COMMAND_DOCSTRING +from .configuration_mistral import MistralConfig as MistralConfig from .configuration_mpt import START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING from .configuration_mpt import MPTConfig as MPTConfig from .configuration_opt import START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING diff --git a/openllm-core/src/openllm_core/config/configuration_auto.py b/openllm-core/src/openllm_core/config/configuration_auto.py index 6705fae9..2bf6cb03 100644 --- a/openllm-core/src/openllm_core/config/configuration_auto.py +++ b/openllm-core/src/openllm_core/config/configuration_auto.py @@ -37,7 +37,7 @@ else: # NOTE: This is the entrypoint when adding new model config CONFIG_MAPPING_NAMES = OrderedDict([('chatglm', 'ChatGLMConfig'), ('dolly_v2', 'DollyV2Config'), ('falcon', 'FalconConfig'), ('flan_t5', 'FlanT5Config'), ('gpt_neox', 'GPTNeoXConfig'), ('llama', 'LlamaConfig'), ('mpt', 'MPTConfig'), ('opt', 'OPTConfig'), ('stablelm', 'StableLMConfig'), ('starcoder', 'StarCoderConfig'), - ('baichuan', 'BaichuanConfig')]) + ('mistral', 'MistralConfig'), ('baichuan', 'BaichuanConfig')]) class _LazyConfigMapping(OrderedDictType, ReprMixin): def __init__(self, mapping: OrderedDict[LiteralString, LiteralString]): diff --git a/openllm-core/src/openllm_core/config/configuration_mistral.py b/openllm-core/src/openllm_core/config/configuration_mistral.py new file mode 100644 index 00000000..3d65008e --- /dev/null +++ b/openllm-core/src/openllm_core/config/configuration_mistral.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +import openllm_core + +START_MISTRAL_COMMAND_DOCSTRING = '''\ +Run a LLMServer for Mistral model. + +\b +> See more information about Mistral at [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) + +\b +## Usage + +By default, this model will use the PyTorch model for inference. However, if vLLM exists, then it will use vLLM instead. + +\b +- To use vLLM, set the environment variable ``OPENLLM_BACKEND="vllm"`` + +\b +Mistral Runner will use mistralai/Mistral-7B-Instruct-v0.1 as the default model. To change to any other Mistral +saved pretrained, or a fine-tune Mistral, provide ``OPENLLM_MODEL_ID='HuggingFaceH4/zephyr-7b-alpha'`` +or provide `--model-id` flag when running ``openllm start mistral``: + +\b +$ openllm start mistral --model-id HuggingFaceH4/zephyr-7b-alpha +''' +DEFAULT_PROMPT_TEMPLATE = '''{instruction}''' + +class MistralConfig(openllm_core.LLMConfig): + """Mistral's [paper](https://arxiv.org/abs/2310.06825) and first released by [MistralAI](https://mistral.ai/news/announcing-mistral-7b/). + + Mistral-7B-v0.1 is Mistral AI\'s first Large Language Model (LLM). + Refer to [Mistral's HuggingFace page](https://huggingface.co/docs/transformers/v4.35.0/en/model_doc/mistral#overview) for more information. + """ + __config__ = { + 'name_type': 'lowercase', + 'url': 'https://huggingface.co/docs/transformers/v4.35.0/en/model_doc/mistral#overview', + 'default_id': 'mistralai/Mistral-7B-Instruct-v0.1', + 'architecture': 'MistralForCausalLM', + 'model_ids': ['mistralai/Mistral-7B-v0.1', 'mistralai/Mistral-7B-Instruct-v0.1', 'amazon/MistralLite', 'HuggingFaceH4/zephyr-7b-beta', 'HuggingFaceH4/zephyr-7b-alpha'], + } + + class GenerationConfig: + top_k: int = 12 + temperature: float = 0.75 + max_new_tokens: int = 256 diff --git a/openllm-python/src/openllm/__init__.py b/openllm-python/src/openllm/__init__.py index 109ea5ac..200524e0 100644 --- a/openllm-python/src/openllm/__init__.py +++ b/openllm-python/src/openllm/__init__.py @@ -38,6 +38,7 @@ from openllm_core.config import MPTConfig as MPTConfig from openllm_core.config import OPTConfig as OPTConfig from openllm_core.config import StableLMConfig as StableLMConfig from openllm_core.config import StarCoderConfig as StarCoderConfig +from openllm_core.config import MistralConfig as MistralConfig from . import exceptions as exceptions from . import utils as utils