From 4d356f4b72f03112426e8a8135c9ce31e9581809 Mon Sep 17 00:00:00 2001
From: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
Date: Tue, 7 Nov 2023 17:28:02 -0500
Subject: [PATCH] feat: Mistral support (#571)
* feat: Mistral support
Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
* ci: auto fixes from pre-commit.ci
For more information, see https://pre-commit.ci
* chore: fix style
Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
* chore: update README docs about mistral
Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
---------
Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
README.md | 48 +++++++++++++++++++
openllm-core/src/openllm_core/__init__.py | 2 +
.../src/openllm_core/_configuration.py | 25 ----------
.../src/openllm_core/config/__init__.py | 2 +
.../openllm_core/config/configuration_auto.py | 2 +-
.../config/configuration_mistral.py | 46 ++++++++++++++++++
openllm-python/src/openllm/__init__.py | 1 +
7 files changed, 100 insertions(+), 26 deletions(-)
create mode 100644 openllm-core/src/openllm_core/config/configuration_mistral.py
diff --git a/README.md b/README.md
index cffc5584..048dc5ed 100644
--- a/README.md
+++ b/README.md
@@ -166,6 +166,54 @@ openllm start opt --model-id facebook/opt-2.7b
OpenLLM currently supports the following models. By default, OpenLLM doesn't include dependencies to run all models. The extra model-specific dependencies can be installed with the instructions below.
+
+Mistral
+
+### Quickstart
+
+Run the following commands to quickly spin up a Llama 2 server and send a request to it.
+
+```bash
+openllm start mistral --model-id HuggingFaceH4/zephyr-7b-beta
+export OPENLLM_ENDPOINT=http://localhost:3000
+openllm query 'What are large language models?'
+```
+
+> [!NOTE]
+> Note that any Mistral variants can be deployed with OpenLLM.
+> Visit the [Hugging Face Model Hub](https://huggingface.co/models?sort=trending&search=mistral) to see more Mistral compatible models.
+
+### Supported models
+
+You can specify any of the following Mistral models by using `--model-id`.
+
+- [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)
+- [mistralai/Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1)
+- [amazon/MistralLite](https://huggingface.co/amazon/MistralLite)
+- [HuggingFaceH4/zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)
+- [HuggingFaceH4/zephyr-7b-alpha](https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha)
+- Any other models that strictly follows the [MistralForCausalLM](https://huggingface.co/docs/transformers/main/en/model_doc/mistral#transformers.MistralForCausalLM) architecture
+
+### Supported backends
+
+- PyTorch (Default):
+
+ ```bash
+ openllm start mistral --model-id HuggingFaceH4/zephyr-7b-beta --backend pt
+ ```
+
+- vLLM (Recommended):
+
+ ```bash
+ pip install "openllm[vllm]"
+ openllm start mistral --model-id HuggingFaceH4/zephyr-7b-beta --backend vllm
+ ```
+
+> [!NOTE]
+> Currently when using the vLLM backend, quantization and adapters are not supported.
+
+
+
Llama
diff --git a/openllm-core/src/openllm_core/__init__.py b/openllm-core/src/openllm_core/__init__.py
index a75ede79..6bf020bf 100644
--- a/openllm-core/src/openllm_core/__init__.py
+++ b/openllm-core/src/openllm_core/__init__.py
@@ -18,6 +18,7 @@ from .config import START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCST
from .config import START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING
from .config import START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING
from .config import START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING
+from .config import START_MISTRAL_COMMAND_DOCSTRING as START_MISTRAL_COMMAND_DOCSTRING
from .config import START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING
from .config import START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING
from .config import START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING
@@ -30,6 +31,7 @@ from .config import FalconConfig as FalconConfig
from .config import FlanT5Config as FlanT5Config
from .config import GPTNeoXConfig as GPTNeoXConfig
from .config import LlamaConfig as LlamaConfig
+from .config import MistralConfig as MistralConfig
from .config import MPTConfig as MPTConfig
from .config import OPTConfig as OPTConfig
from .config import StableLMConfig as StableLMConfig
diff --git a/openllm-core/src/openllm_core/_configuration.py b/openllm-core/src/openllm_core/_configuration.py
index 568e3bae..6bc6c272 100644
--- a/openllm-core/src/openllm_core/_configuration.py
+++ b/openllm-core/src/openllm_core/_configuration.py
@@ -1511,31 +1511,6 @@ class LLMConfig(_ConfigAttr):
def peft_task_type(cls) -> str:
return _PEFT_TASK_TYPE_TARGET_MAPPING[cls.__openllm_model_type__]
- def sanitize_parameters(self, prompt: str, **attrs: t.Any) -> tuple[str, DictStrAny, DictStrAny]:
- '''This handler will sanitize all attrs and setup prompt text.
-
- It takes a prompt that is given by the user, attrs that can be parsed with the prompt.
-
- Returns a tuple of three items:
- - The attributes dictionary that can be passed into LLMConfig to generate a GenerationConfig
- - The attributes dictionary that will be passed into `self.postprocess_generate`.
-
- `openllm.LLM` also has a sanitize_parameters that will just call this method.
- '''
- return prompt, attrs, attrs
-
- def postprocess_generate(self, prompt: str, generation_result: t.Any, **attrs: t.Any) -> t.Any:
- '''This handler will postprocess generation results from LLM.generate and then output nicely formatted results (if the LLM decide to do so.).
-
- You can customize how the output of the LLM looks with this hook. By default, it is a simple echo.
-
- > [!NOTE]
- > This will be used from the client side.
-
- `openllm.LLM` also has a postprocess_generate that will just call this method.
- '''
- return generation_result
-
converter.register_unstructure_hook_factory(lambda cls: lenient_issubclass(cls, LLMConfig),
lambda cls: make_dict_unstructure_fn(cls, converter, _cattrs_omit_if_default=False, _cattrs_use_linecache=True))
diff --git a/openllm-core/src/openllm_core/config/__init__.py b/openllm-core/src/openllm_core/config/__init__.py
index e1304867..d880b08f 100644
--- a/openllm-core/src/openllm_core/config/__init__.py
+++ b/openllm-core/src/openllm_core/config/__init__.py
@@ -17,6 +17,8 @@ from .configuration_gpt_neox import START_GPT_NEOX_COMMAND_DOCSTRING as START_GP
from .configuration_gpt_neox import GPTNeoXConfig as GPTNeoXConfig
from .configuration_llama import START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING
from .configuration_llama import LlamaConfig as LlamaConfig
+from .configuration_mistral import START_MISTRAL_COMMAND_DOCSTRING as START_MISTRAL_COMMAND_DOCSTRING
+from .configuration_mistral import MistralConfig as MistralConfig
from .configuration_mpt import START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING
from .configuration_mpt import MPTConfig as MPTConfig
from .configuration_opt import START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING
diff --git a/openllm-core/src/openllm_core/config/configuration_auto.py b/openllm-core/src/openllm_core/config/configuration_auto.py
index 6705fae9..2bf6cb03 100644
--- a/openllm-core/src/openllm_core/config/configuration_auto.py
+++ b/openllm-core/src/openllm_core/config/configuration_auto.py
@@ -37,7 +37,7 @@ else:
# NOTE: This is the entrypoint when adding new model config
CONFIG_MAPPING_NAMES = OrderedDict([('chatglm', 'ChatGLMConfig'), ('dolly_v2', 'DollyV2Config'), ('falcon', 'FalconConfig'), ('flan_t5', 'FlanT5Config'), ('gpt_neox', 'GPTNeoXConfig'),
('llama', 'LlamaConfig'), ('mpt', 'MPTConfig'), ('opt', 'OPTConfig'), ('stablelm', 'StableLMConfig'), ('starcoder', 'StarCoderConfig'),
- ('baichuan', 'BaichuanConfig')])
+ ('mistral', 'MistralConfig'), ('baichuan', 'BaichuanConfig')])
class _LazyConfigMapping(OrderedDictType, ReprMixin):
def __init__(self, mapping: OrderedDict[LiteralString, LiteralString]):
diff --git a/openllm-core/src/openllm_core/config/configuration_mistral.py b/openllm-core/src/openllm_core/config/configuration_mistral.py
new file mode 100644
index 00000000..3d65008e
--- /dev/null
+++ b/openllm-core/src/openllm_core/config/configuration_mistral.py
@@ -0,0 +1,46 @@
+from __future__ import annotations
+
+import openllm_core
+
+START_MISTRAL_COMMAND_DOCSTRING = '''\
+Run a LLMServer for Mistral model.
+
+\b
+> See more information about Mistral at [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)
+
+\b
+## Usage
+
+By default, this model will use the PyTorch model for inference. However, if vLLM exists, then it will use vLLM instead.
+
+\b
+- To use vLLM, set the environment variable ``OPENLLM_BACKEND="vllm"``
+
+\b
+Mistral Runner will use mistralai/Mistral-7B-Instruct-v0.1 as the default model. To change to any other Mistral
+saved pretrained, or a fine-tune Mistral, provide ``OPENLLM_MODEL_ID='HuggingFaceH4/zephyr-7b-alpha'``
+or provide `--model-id` flag when running ``openllm start mistral``:
+
+\b
+$ openllm start mistral --model-id HuggingFaceH4/zephyr-7b-alpha
+'''
+DEFAULT_PROMPT_TEMPLATE = '''{instruction}'''
+
+class MistralConfig(openllm_core.LLMConfig):
+ """Mistral's [paper](https://arxiv.org/abs/2310.06825) and first released by [MistralAI](https://mistral.ai/news/announcing-mistral-7b/).
+
+ Mistral-7B-v0.1 is Mistral AI\'s first Large Language Model (LLM).
+ Refer to [Mistral's HuggingFace page](https://huggingface.co/docs/transformers/v4.35.0/en/model_doc/mistral#overview) for more information.
+ """
+ __config__ = {
+ 'name_type': 'lowercase',
+ 'url': 'https://huggingface.co/docs/transformers/v4.35.0/en/model_doc/mistral#overview',
+ 'default_id': 'mistralai/Mistral-7B-Instruct-v0.1',
+ 'architecture': 'MistralForCausalLM',
+ 'model_ids': ['mistralai/Mistral-7B-v0.1', 'mistralai/Mistral-7B-Instruct-v0.1', 'amazon/MistralLite', 'HuggingFaceH4/zephyr-7b-beta', 'HuggingFaceH4/zephyr-7b-alpha'],
+ }
+
+ class GenerationConfig:
+ top_k: int = 12
+ temperature: float = 0.75
+ max_new_tokens: int = 256
diff --git a/openllm-python/src/openllm/__init__.py b/openllm-python/src/openllm/__init__.py
index 109ea5ac..200524e0 100644
--- a/openllm-python/src/openllm/__init__.py
+++ b/openllm-python/src/openllm/__init__.py
@@ -38,6 +38,7 @@ from openllm_core.config import MPTConfig as MPTConfig
from openllm_core.config import OPTConfig as OPTConfig
from openllm_core.config import StableLMConfig as StableLMConfig
from openllm_core.config import StarCoderConfig as StarCoderConfig
+from openllm_core.config import MistralConfig as MistralConfig
from . import exceptions as exceptions
from . import utils as utils