Files
OpenLLM/openllm-python/src/openllm/models/mpt/configuration_mpt.py
2023-08-15 02:11:14 -04:00

66 lines
3.2 KiB
Python

from __future__ import annotations
import sys, typing as t
import openllm
if t.TYPE_CHECKING: MPTPromptType = t.Literal["default", "instruct", "chat", "storywriter"]
else: MPTPromptType = str
class MPTConfig(openllm.LLMConfig):
"""MPT is a decoder-style transformer pretrained from scratch on English text and code.
This model was trained by [MosaicML](https://www.mosaicml.com/).
``openllm.MPT`` encapsulate a family of MPT variants that is publicly available
on HuggingFace. Refers [HuggingFace's MosaicML page](https://huggingface.co/mosaicml)
for more details on specific models.
"""
__config__ = {"name_type": "lowercase", "trust_remote_code": True, "url": "https://huggingface.co/mosaicml", "timeout": int(36e6), "requirements": ["triton", "einops"], "architecture": "MPTForCausalLM",
"default_id": "mosaicml/mpt-7b-instruct", "model_ids": ["mosaicml/mpt-7b", "mosaicml/mpt-7b-instruct", "mosaicml/mpt-7b-chat", "mosaicml/mpt-7b-storywriter", "mosaicml/mpt-30b", "mosaicml/mpt-30b-instruct", "mosaicml/mpt-30b-chat"]}
prompt_type: MPTPromptType = openllm.LLMConfig.Field('"default"', description="Given prompt type for running MPT. Default will be inferred from model name if pretrained.")
max_sequence_length: int = openllm.LLMConfig.Field(2048, description="Max sequence length to run MPT with. Note that MPT is trained ith sequence length of 2048, but with [ALiBi](https://arxiv.org/abs/2108.12409) it can set up to 4096 (for 7b models) and 16384 (for 30b models)")
class GenerationConfig:
max_new_tokens: int = 128
temperature: float = 0
top_p: float = 0.8
START_MPT_COMMAND_DOCSTRING = """\
Run a LLMServer for MPT model.
\b
> See more information about MPT at [HuggingFace's MosaicML page](https://huggingface.co/mosaicml)
\b
## Usage
Currently, MPT only supports PyTorch. Make sure ``torch`` is available in your system.
If you want to use Flash Attention support with openai/triton, make sure to install OpenLLM with
\b
```bash
pip install "openllm[mpt]"
```
\b
MPT Runner will use mosaicml/mpt-7b-instruct as the default model. To change to any other MPT
saved pretrained, or a fine-tune MPT, provide ``OPENLLM_MPT_MODEL_ID='mosaicml/mpt-30b'``
or provide `--model-id` flag when running ``openllm start mpt``:
\b
$ openllm start mpt --model-id mosaicml/mpt-30b
"""
INSTRUCTION_KEY, RESPONSE_KEY, END_KEY = "### Instruction:", "### Response:", "### End"
INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
# NOTE: This is the prompt that is used for generating responses using an already
# trained model. It ends with the response key, where the job of the model is to provide
# the completion that follows it (i.e. the response itself).
_chat_prompt, _default_prompt, _instruct_prompt = """{instruction}""", """{instruction}""", """{intro}
{instruction_key}
{instruction}
{response_key}
""".format(intro=INTRO_BLURB, instruction_key=INSTRUCTION_KEY, instruction="{instruction}", response_key=RESPONSE_KEY)
PROMPT_MAPPING = {"default": _default_prompt, "instruct": _instruct_prompt, "storywriter": _default_prompt, "chat": _chat_prompt}
def _get_prompt(model_type: str) -> str: return PROMPT_MAPPING[model_type]
DEFAULT_PROMPT_TEMPLATE = _get_prompt