feat(models): add vLLM support for Falcon (#223)

This commit is contained in:
Aaron Pham
2023-08-16 05:57:42 -04:00
committed by GitHub
parent 3a73aacb01
commit 8796d0d63d
7 changed files with 29 additions and 4 deletions

View File

@@ -114,6 +114,7 @@ else:
_import_structure["models.llama"].extend(["VLLMLlama"])
_import_structure["models.opt"].extend(["VLLMOPT"])
_import_structure["models.dolly_v2"].extend(["VLLMDollyV2"])
_import_structure["models.falcon"].extend(["VLLMFalcon"])
_import_structure["models.gpt_neox"].extend(["VLLMGPTNeoX"])
_import_structure["models.mpt"].extend(["VLLMMPT"])
_import_structure["models.stablelm"].extend(["VLLMStableLM"])
@@ -124,6 +125,7 @@ else:
from .models.baichuan import VLLMBaichuan as VLLMBaichuan
from .models.dolly_v2 import VLLMDollyV2 as VLLMDollyV2
from .models.gpt_neox import VLLMGPTNeoX as VLLMGPTNeoX
from .models.falcon import VLLMFalcon as VLLMFalcon
from .models.llama import VLLMLlama as VLLMLlama
from .models.mpt import VLLMMPT as VLLMMPT
from .models.opt import VLLMOPT as VLLMOPT

View File

@@ -4,7 +4,7 @@ from collections import OrderedDict
from .configuration_auto import CONFIG_MAPPING_NAMES
from .factory import BaseAutoLLMClass, _LazyAutoMapping
MODEL_VLLM_MAPPING_NAMES = OrderedDict([("baichuan", "VLLMBaichuan"), ("dolly_v2", "VLLMDollyV2"), ("gpt_neox", "VLLMGPTNeoX"), ("mpt", "VLLMMPT"), ("opt", "VLLMOPT"), ("stablelm", "VLLMStableLM"), ("starcoder", "VLLMStarCoder"), ("llama", "VLLMLlama")])
MODEL_VLLM_MAPPING_NAMES = OrderedDict([("baichuan", "VLLMBaichuan"), ("dolly_v2", "VLLMDollyV2"), ("falcon", "VLLMFalcon"), ("gpt_neox", "VLLMGPTNeoX"), ("mpt", "VLLMMPT"), ("opt", "VLLMOPT"), ("stablelm", "VLLMStableLM"), ("starcoder", "VLLMStarCoder"), ("llama", "VLLMLlama")])
MODEL_VLLM_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_VLLM_MAPPING_NAMES)
class AutoVLLM(BaseAutoLLMClass):
_model_mapping: t.ClassVar = MODEL_VLLM_MAPPING

View File

@@ -1,7 +1,7 @@
from __future__ import annotations
import sys, typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_torch_available
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
_import_structure: dict[str, list[str]] = {"configuration_falcon": ["FalconConfig", "START_FALCON_COMMAND_DOCSTRING", "DEFAULT_PROMPT_TEMPLATE"]}
if t.TYPE_CHECKING:
@@ -16,5 +16,11 @@ except MissingDependencyError: pass
else:
_import_structure["modeling_falcon"] = ["Falcon"]
if t.TYPE_CHECKING: from .modeling_falcon import Falcon as Falcon
try:
if not is_vllm_available(): raise MissingDependencyError
except MissingDependencyError: pass
else:
_import_structure["modeling_vllm_falcon"] = ["VLLMFalcon"]
if t.TYPE_CHECKING: from .modeling_vllm_falcon import VLLMFalcon as VLLMFalcon
sys.modules[__name__] = LazyModule(__name__, globals()["__file__"], _import_structure)

View File

@@ -27,7 +27,9 @@ Run a LLMServer for FalconLM model.
\b
## Usage
Currently, FalconLM only supports PyTorch. Make sure ``torch`` is available in your system.
By default, this model will use the PyTorch model for inference. However, this model also support vLLM.
Note that if you use vLLM, a NVIDIA GPU is required.
\b
FalconLM Runner will use tiiuae/falcon-7b as the default model. To change to any other FalconLM

View File

@@ -0,0 +1,11 @@
from __future__ import annotations
import logging, typing as t, openllm
from openllm._prompt import process_prompt
from .configuration_falcon import DEFAULT_PROMPT_TEMPLATE
if t.TYPE_CHECKING: import vllm, transformers
logger = logging.getLogger(__name__)
class VLLMFalcon(openllm.LLM["vllm.LLMEngine", "transformers.PreTrainedTokenizerBase"]):
__openllm_internal__ = True
tokenizer_id = "local"
def sanitize_parameters(self, prompt: str, max_new_tokens: int | None = None, top_k: int | None = None, num_return_sequences: int | None = None, eos_token_id: int | None = None, use_default_prompt_template: bool = False, **attrs: t.Any) -> tuple[str, dict[str, t.Any], dict[str, t.Any]]: return process_prompt(prompt, DEFAULT_PROMPT_TEMPLATE, use_default_prompt_template, **attrs), {"max_new_tokens": max_new_tokens, "top_k": top_k, "num_return_sequences": num_return_sequences, "eos_token_id": eos_token_id, **attrs}, {}

View File

@@ -9,6 +9,9 @@ class VLLMBaichuan(metaclass=_DummyMetaclass):
class VLLMDollyV2(metaclass=_DummyMetaclass):
_backends=["vllm"]
def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["vllm"])
class VLLMFalcon(metaclass=_DummyMetaclass):
_backends=["vllm","einops","xformers"]
def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["vllm","einops","xformers"])
class VLLMGPTNeoX(metaclass=_DummyMetaclass):
_backends=["vllm"]
def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["vllm"])
@@ -31,4 +34,4 @@ class AutoVLLM(metaclass=_DummyMetaclass):
_backends=["vllm"]
def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["vllm"])
MODEL_VLLM_MAPPING_NAMES:_t.Any=None
__all__:list[str]=["MODEL_VLLM_MAPPING_NAMES","AutoVLLM","VLLMBaichuan","VLLMDollyV2","VLLMGPTNeoX","VLLMMPT","VLLMOPT","VLLMStableLM","VLLMStarCoder","VLLMLlama"]
__all__:list[str]=["MODEL_VLLM_MAPPING_NAMES","AutoVLLM","VLLMBaichuan","VLLMDollyV2","VLLMFalcon","VLLMGPTNeoX","VLLMMPT","VLLMOPT","VLLMStableLM","VLLMStarCoder","VLLMLlama"]