diff --git a/Dockerfile b/Dockerfile
index fe56d1ca..a9c0707f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -29,7 +29,7 @@ COPY hatch.toml README.md CHANGELOG.md openllm-python/pyproject.toml /openllm-py
 # below
 RUN --mount=type=cache,target=/root/.cache/pip \
   pip3 install -v --no-cache-dir \
-  "ray==2.6.0" "xformers==0.0.23" "vllm==0.2.6" && \
+  "ray==2.6.0" "xformers==0.0.23" "vllm==0.2.7" && \
   pip3 install --no-cache-dir -e /openllm-python/
 
 COPY openllm-core/src openllm-core/src
diff --git a/changelog.d/837.change.md b/changelog.d/837.change.md
new file mode 100644
index 00000000..6fb32f02
--- /dev/null
+++ b/changelog.d/837.change.md
@@ -0,0 +1 @@
+Bump vllm to 0.2.7 for a newly built bento
diff --git a/openllm-python/README.md b/openllm-python/README.md
index d2f1f35e..eda0d8d0 100644
--- a/openllm-python/README.md
+++ b/openllm-python/README.md
@@ -1445,7 +1445,7 @@ openllm start squeeze-ai-lab/sq-llama-2-7b-w4-s0 --quantize squeezellm --seriali
 ```
 
 > [!IMPORTANT]
-> Since both `squeezellm` and `awq` are weight-aware quantization methods, meaning the quantization is done during training, all pre-trained weights needs to get quantized before inference time. Make sure to fine compatible weights on HuggingFace Hub for your model of choice.
+> Since both `squeezellm` and `awq` are weight-aware quantization methods, meaning the quantization is done during training, all pre-trained weights needs to get quantized before inference time. Make sure to find compatible weights on HuggingFace Hub for your model of choice.
 
 ## 🛠️ Serving fine-tuning layers
 
diff --git a/openllm-python/pyproject.toml b/openllm-python/pyproject.toml
index d318a52f..cbcecb45 100644
--- a/openllm-python/pyproject.toml
+++ b/openllm-python/pyproject.toml
@@ -119,7 +119,7 @@ openai = ["openai[datalib]>=1", "tiktoken"]
 playground = ["jupyter", "notebook", "ipython", "jupytext", "nbformat"]
 qwen = ["cpm-kernels", "tiktoken"]
 starcoder = ["bitsandbytes"]
-vllm = ["vllm==0.2.6", "ray==2.6.0"]
+vllm = ["vllm==0.2.7", "ray==2.6.0"]
 
 [tool.hatch.version]
 fallback-version = "0.0.0"
diff --git a/tools/dependencies.py b/tools/dependencies.py
index 1a07526d..72550486 100755
--- a/tools/dependencies.py
+++ b/tools/dependencies.py
@@ -155,7 +155,7 @@ GGML_DEPS = ['ctransformers']
 CTRANSLATE_DEPS = ['ctranslate2>=3.22.0']
 AWQ_DEPS = ['autoawq']
 GPTQ_DEPS = ['auto-gptq[triton]>=0.4.2']
-VLLM_DEPS = ['vllm==0.2.6', 'ray==2.6.0']
+VLLM_DEPS = ['vllm==0.2.7', 'ray==2.6.0']
 
 _base_requirements: dict[str, t.Any] = {
   inflection.dasherize(name): config_cls.__openllm_requirements__