diff --git a/changelog.d/223.feature.md b/changelog.d/223.feature.md
new file mode 100644
index 00000000..dadc2b75
--- /dev/null
+++ b/changelog.d/223.feature.md
@@ -0,0 +1 @@
+Running vLLM with Falcon is now supported
diff --git a/openllm-python/src/openllm/__init__.py b/openllm-python/src/openllm/__init__.py
index e1d45aec..20e2245c 100644
--- a/openllm-python/src/openllm/__init__.py
+++ b/openllm-python/src/openllm/__init__.py
@@ -114,6 +114,7 @@ else:
   _import_structure["models.llama"].extend(["VLLMLlama"])
   _import_structure["models.opt"].extend(["VLLMOPT"])
   _import_structure["models.dolly_v2"].extend(["VLLMDollyV2"])
+  _import_structure["models.falcon"].extend(["VLLMFalcon"])
   _import_structure["models.gpt_neox"].extend(["VLLMGPTNeoX"])
   _import_structure["models.mpt"].extend(["VLLMMPT"])
   _import_structure["models.stablelm"].extend(["VLLMStableLM"])
@@ -124,6 +125,7 @@ else:
     from .models.baichuan import VLLMBaichuan as VLLMBaichuan
     from .models.dolly_v2 import VLLMDollyV2 as VLLMDollyV2
     from .models.gpt_neox import VLLMGPTNeoX as VLLMGPTNeoX
+    from .models.falcon import VLLMFalcon as VLLMFalcon
     from .models.llama import VLLMLlama as VLLMLlama
     from .models.mpt import VLLMMPT as VLLMMPT
     from .models.opt import VLLMOPT as VLLMOPT
diff --git a/openllm-python/src/openllm/models/auto/modeling_vllm_auto.py b/openllm-python/src/openllm/models/auto/modeling_vllm_auto.py
index c510441e..12778a65 100644
--- a/openllm-python/src/openllm/models/auto/modeling_vllm_auto.py
+++ b/openllm-python/src/openllm/models/auto/modeling_vllm_auto.py
@@ -4,7 +4,7 @@ from collections import OrderedDict
 from .configuration_auto import CONFIG_MAPPING_NAMES
 from .factory import BaseAutoLLMClass, _LazyAutoMapping
 
-MODEL_VLLM_MAPPING_NAMES = OrderedDict([("baichuan", "VLLMBaichuan"), ("dolly_v2", "VLLMDollyV2"), ("gpt_neox", "VLLMGPTNeoX"), ("mpt", "VLLMMPT"), ("opt", "VLLMOPT"), ("stablelm", "VLLMStableLM"), ("starcoder", "VLLMStarCoder"), ("llama", "VLLMLlama")])
+MODEL_VLLM_MAPPING_NAMES = OrderedDict([("baichuan", "VLLMBaichuan"), ("dolly_v2", "VLLMDollyV2"), ("falcon", "VLLMFalcon"), ("gpt_neox", "VLLMGPTNeoX"), ("mpt", "VLLMMPT"), ("opt", "VLLMOPT"), ("stablelm", "VLLMStableLM"), ("starcoder", "VLLMStarCoder"), ("llama", "VLLMLlama")])
 MODEL_VLLM_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_VLLM_MAPPING_NAMES)
 class AutoVLLM(BaseAutoLLMClass):
   _model_mapping: t.ClassVar = MODEL_VLLM_MAPPING
diff --git a/openllm-python/src/openllm/models/falcon/__init__.py b/openllm-python/src/openllm/models/falcon/__init__.py
index bfc5341b..0d6f3138 100644
--- a/openllm-python/src/openllm/models/falcon/__init__.py
+++ b/openllm-python/src/openllm/models/falcon/__init__.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 import sys, typing as t
 from openllm.exceptions import MissingDependencyError
-from openllm.utils import LazyModule, is_torch_available
+from openllm.utils import LazyModule, is_torch_available, is_vllm_available
 
 _import_structure: dict[str, list[str]] = {"configuration_falcon": ["FalconConfig", "START_FALCON_COMMAND_DOCSTRING", "DEFAULT_PROMPT_TEMPLATE"]}
 if t.TYPE_CHECKING:
@@ -16,5 +16,11 @@ except MissingDependencyError: pass
 else:
   _import_structure["modeling_falcon"] = ["Falcon"]
   if t.TYPE_CHECKING: from .modeling_falcon import Falcon as Falcon
+try:
+  if not is_vllm_available(): raise MissingDependencyError
+except MissingDependencyError: pass
+else:
+  _import_structure["modeling_vllm_falcon"] = ["VLLMFalcon"]
+  if t.TYPE_CHECKING: from .modeling_vllm_falcon import VLLMFalcon as VLLMFalcon
 
 sys.modules[__name__] = LazyModule(__name__, globals()["__file__"], _import_structure)
diff --git a/openllm-python/src/openllm/models/falcon/configuration_falcon.py b/openllm-python/src/openllm/models/falcon/configuration_falcon.py
index 6b1c90a2..07101fb1 100644
--- a/openllm-python/src/openllm/models/falcon/configuration_falcon.py
+++ b/openllm-python/src/openllm/models/falcon/configuration_falcon.py
@@ -27,7 +27,9 @@ Run a LLMServer for FalconLM model.
 \b
 ## Usage
 
-Currently, FalconLM only supports PyTorch. Make sure ``torch`` is available in your system.
+By default, this model will use the PyTorch model for inference. However, this model also support vLLM.
+
+Note that if you use vLLM, a NVIDIA GPU is required.
 
 \b
 FalconLM Runner will use tiiuae/falcon-7b as the default model. To change to any other FalconLM
diff --git a/openllm-python/src/openllm/models/falcon/modeling_vllm_falcon.py b/openllm-python/src/openllm/models/falcon/modeling_vllm_falcon.py
new file mode 100644
index 00000000..14a5af2e
--- /dev/null
+++ b/openllm-python/src/openllm/models/falcon/modeling_vllm_falcon.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+import logging, typing as t, openllm
+from openllm._prompt import process_prompt
+from .configuration_falcon import DEFAULT_PROMPT_TEMPLATE
+if t.TYPE_CHECKING: import vllm, transformers
+
+logger = logging.getLogger(__name__)
+class VLLMFalcon(openllm.LLM["vllm.LLMEngine", "transformers.PreTrainedTokenizerBase"]):
+  __openllm_internal__ = True
+  tokenizer_id = "local"
+  def sanitize_parameters(self, prompt: str, max_new_tokens: int | None = None, top_k: int | None = None, num_return_sequences: int | None = None, eos_token_id: int | None = None, use_default_prompt_template: bool = False, **attrs: t.Any) -> tuple[str, dict[str, t.Any], dict[str, t.Any]]: return process_prompt(prompt, DEFAULT_PROMPT_TEMPLATE, use_default_prompt_template, **attrs), {"max_new_tokens": max_new_tokens, "top_k": top_k, "num_return_sequences": num_return_sequences, "eos_token_id": eos_token_id, **attrs}, {}
diff --git a/openllm-python/src/openllm/utils/dummy_vllm_objects.py b/openllm-python/src/openllm/utils/dummy_vllm_objects.py
index f227b3ff..d1ca58c8 100644
--- a/openllm-python/src/openllm/utils/dummy_vllm_objects.py
+++ b/openllm-python/src/openllm/utils/dummy_vllm_objects.py
@@ -9,6 +9,9 @@ class VLLMBaichuan(metaclass=_DummyMetaclass):
 class VLLMDollyV2(metaclass=_DummyMetaclass):
   _backends=["vllm"]
   def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["vllm"])
+class VLLMFalcon(metaclass=_DummyMetaclass):
+  _backends=["vllm","einops","xformers"]
+  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["vllm","einops","xformers"])
 class VLLMGPTNeoX(metaclass=_DummyMetaclass):
   _backends=["vllm"]
   def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["vllm"])
@@ -31,4 +34,4 @@ class AutoVLLM(metaclass=_DummyMetaclass):
   _backends=["vllm"]
   def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["vllm"])
 MODEL_VLLM_MAPPING_NAMES:_t.Any=None
-__all__:list[str]=["MODEL_VLLM_MAPPING_NAMES","AutoVLLM","VLLMBaichuan","VLLMDollyV2","VLLMGPTNeoX","VLLMMPT","VLLMOPT","VLLMStableLM","VLLMStarCoder","VLLMLlama"]
+__all__:list[str]=["MODEL_VLLM_MAPPING_NAMES","AutoVLLM","VLLMBaichuan","VLLMDollyV2","VLLMFalcon","VLLMGPTNeoX","VLLMMPT","VLLMOPT","VLLMStableLM","VLLMStarCoder","VLLMLlama"]