feat(models): add vLLM support for Falcon (#223)

2026-04-21 07:29:41 -04:00 · 2023-08-16 05:57:42 -04:00
parent 3a73aacb01
commit 8796d0d63d
7 changed files with 29 additions and 4 deletions
--- a/openllm-python/src/openllm/init.py
+++ b/openllm-python/src/openllm/init.py
@@ -114,6 +114,7 @@ else:
  _import_structure["models.llama"].extend(["VLLMLlama"])
  _import_structure["models.opt"].extend(["VLLMOPT"])
  _import_structure["models.dolly_v2"].extend(["VLLMDollyV2"])
+  _import_structure["models.falcon"].extend(["VLLMFalcon"])
  _import_structure["models.gpt_neox"].extend(["VLLMGPTNeoX"])
  _import_structure["models.mpt"].extend(["VLLMMPT"])
  _import_structure["models.stablelm"].extend(["VLLMStableLM"])
@@ -124,6 +125,7 @@ else:
    from .models.baichuan import VLLMBaichuan as VLLMBaichuan
    from .models.dolly_v2 import VLLMDollyV2 as VLLMDollyV2
    from .models.gpt_neox import VLLMGPTNeoX as VLLMGPTNeoX
+    from .models.falcon import VLLMFalcon as VLLMFalcon
    from .models.llama import VLLMLlama as VLLMLlama
    from .models.mpt import VLLMMPT as VLLMMPT
    from .models.opt import VLLMOPT as VLLMOPT
--- a/openllm-python/src/openllm/models/auto/modeling_vllm_auto.py
+++ b/openllm-python/src/openllm/models/auto/modeling_vllm_auto.py
@@ -4,7 +4,7 @@ from collections import OrderedDict
 from .configuration_auto import CONFIG_MAPPING_NAMES
 from .factory import BaseAutoLLMClass, _LazyAutoMapping

-MODEL_VLLM_MAPPING_NAMES = OrderedDict([("baichuan", "VLLMBaichuan"), ("dolly_v2", "VLLMDollyV2"), ("gpt_neox", "VLLMGPTNeoX"), ("mpt", "VLLMMPT"), ("opt", "VLLMOPT"), ("stablelm", "VLLMStableLM"), ("starcoder", "VLLMStarCoder"), ("llama", "VLLMLlama")])
+MODEL_VLLM_MAPPING_NAMES = OrderedDict([("baichuan", "VLLMBaichuan"), ("dolly_v2", "VLLMDollyV2"), ("falcon", "VLLMFalcon"), ("gpt_neox", "VLLMGPTNeoX"), ("mpt", "VLLMMPT"), ("opt", "VLLMOPT"), ("stablelm", "VLLMStableLM"), ("starcoder", "VLLMStarCoder"), ("llama", "VLLMLlama")])
 MODEL_VLLM_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_VLLM_MAPPING_NAMES)
 class AutoVLLM(BaseAutoLLMClass):
  _model_mapping: t.ClassVar = MODEL_VLLM_MAPPING
--- a/openllm-python/src/openllm/models/falcon/init.py
+++ b/openllm-python/src/openllm/models/falcon/init.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 import sys, typing as t
 from openllm.exceptions import MissingDependencyError
-from openllm.utils import LazyModule, is_torch_available
+from openllm.utils import LazyModule, is_torch_available, is_vllm_available

 _import_structure: dict[str, list[str]] = {"configuration_falcon": ["FalconConfig", "START_FALCON_COMMAND_DOCSTRING", "DEFAULT_PROMPT_TEMPLATE"]}
 if t.TYPE_CHECKING:
@@ -16,5 +16,11 @@ except MissingDependencyError: pass
 else:
  _import_structure["modeling_falcon"] = ["Falcon"]
  if t.TYPE_CHECKING: from .modeling_falcon import Falcon as Falcon
+try:
+  if not is_vllm_available(): raise MissingDependencyError
+except MissingDependencyError: pass
+else:
+  _import_structure["modeling_vllm_falcon"] = ["VLLMFalcon"]
+  if t.TYPE_CHECKING: from .modeling_vllm_falcon import VLLMFalcon as VLLMFalcon

 sys.modules[__name__] = LazyModule(__name__, globals()["__file__"], _import_structure)
--- a/openllm-python/src/openllm/models/falcon/configuration_falcon.py
+++ b/openllm-python/src/openllm/models/falcon/configuration_falcon.py
@@ -27,7 +27,9 @@ Run a LLMServer for FalconLM model.
 \b
 ## Usage

-Currently, FalconLM only supports PyTorch. Make sure ``torch`` is available in your system.
+By default, this model will use the PyTorch model for inference. However, this model also support vLLM.
+
+Note that if you use vLLM, a NVIDIA GPU is required.

 \b
 FalconLM Runner will use tiiuae/falcon-7b as the default model. To change to any other FalconLM
--- a/openllm-python/src/openllm/models/falcon/modeling_vllm_falcon.py
+++ b/openllm-python/src/openllm/models/falcon/modeling_vllm_falcon.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+import logging, typing as t, openllm
+from openllm._prompt import process_prompt
+from .configuration_falcon import DEFAULT_PROMPT_TEMPLATE
+if t.TYPE_CHECKING: import vllm, transformers
+
+logger = logging.getLogger(__name__)
+class VLLMFalcon(openllm.LLM["vllm.LLMEngine", "transformers.PreTrainedTokenizerBase"]):
+  __openllm_internal__ = True
+  tokenizer_id = "local"
+  def sanitize_parameters(self, prompt: str, max_new_tokens: int | None = None, top_k: int | None = None, num_return_sequences: int | None = None, eos_token_id: int | None = None, use_default_prompt_template: bool = False, **attrs: t.Any) -> tuple[str, dict[str, t.Any], dict[str, t.Any]]: return process_prompt(prompt, DEFAULT_PROMPT_TEMPLATE, use_default_prompt_template, **attrs), {"max_new_tokens": max_new_tokens, "top_k": top_k, "num_return_sequences": num_return_sequences, "eos_token_id": eos_token_id, **attrs}, {}
--- a/openllm-python/src/openllm/utils/dummy_vllm_objects.py
+++ b/openllm-python/src/openllm/utils/dummy_vllm_objects.py
@@ -9,6 +9,9 @@ class VLLMBaichuan(metaclass=_DummyMetaclass):
 class VLLMDollyV2(metaclass=_DummyMetaclass):
  _backends=["vllm"]
  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["vllm"])
+class VLLMFalcon(metaclass=_DummyMetaclass):
+  _backends=["vllm","einops","xformers"]
+  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["vllm","einops","xformers"])
 class VLLMGPTNeoX(metaclass=_DummyMetaclass):
  _backends=["vllm"]
  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["vllm"])
@@ -31,4 +34,4 @@ class AutoVLLM(metaclass=_DummyMetaclass):
  _backends=["vllm"]
  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["vllm"])
 MODEL_VLLM_MAPPING_NAMES:_t.Any=None
-__all__:list[str]=["MODEL_VLLM_MAPPING_NAMES","AutoVLLM","VLLMBaichuan","VLLMDollyV2","VLLMGPTNeoX","VLLMMPT","VLLMOPT","VLLMStableLM","VLLMStarCoder","VLLMLlama"]
+__all__:list[str]=["MODEL_VLLM_MAPPING_NAMES","AutoVLLM","VLLMBaichuan","VLLMDollyV2","VLLMFalcon","VLLMGPTNeoX","VLLMMPT","VLLMOPT","VLLMStableLM","VLLMStarCoder","VLLMLlama"]