diff --git a/Dockerfile b/Dockerfile index fe56d1ca..a9c0707f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -29,7 +29,7 @@ COPY hatch.toml README.md CHANGELOG.md openllm-python/pyproject.toml /openllm-py # below RUN --mount=type=cache,target=/root/.cache/pip \ pip3 install -v --no-cache-dir \ - "ray==2.6.0" "xformers==0.0.23" "vllm==0.2.6" && \ + "ray==2.6.0" "xformers==0.0.23" "vllm==0.2.7" && \ pip3 install --no-cache-dir -e /openllm-python/ COPY openllm-core/src openllm-core/src diff --git a/changelog.d/837.change.md b/changelog.d/837.change.md new file mode 100644 index 00000000..6fb32f02 --- /dev/null +++ b/changelog.d/837.change.md @@ -0,0 +1 @@ +Bump vllm to 0.2.7 for a newly built bento diff --git a/openllm-python/README.md b/openllm-python/README.md index d2f1f35e..eda0d8d0 100644 --- a/openllm-python/README.md +++ b/openllm-python/README.md @@ -1445,7 +1445,7 @@ openllm start squeeze-ai-lab/sq-llama-2-7b-w4-s0 --quantize squeezellm --seriali ``` > [!IMPORTANT] -> Since both `squeezellm` and `awq` are weight-aware quantization methods, meaning the quantization is done during training, all pre-trained weights needs to get quantized before inference time. Make sure to fine compatible weights on HuggingFace Hub for your model of choice. +> Since both `squeezellm` and `awq` are weight-aware quantization methods, meaning the quantization is done during training, all pre-trained weights needs to get quantized before inference time. Make sure to find compatible weights on HuggingFace Hub for your model of choice. ## 🛠️ Serving fine-tuning layers diff --git a/openllm-python/pyproject.toml b/openllm-python/pyproject.toml index d318a52f..cbcecb45 100644 --- a/openllm-python/pyproject.toml +++ b/openllm-python/pyproject.toml @@ -119,7 +119,7 @@ openai = ["openai[datalib]>=1", "tiktoken"] playground = ["jupyter", "notebook", "ipython", "jupytext", "nbformat"] qwen = ["cpm-kernels", "tiktoken"] starcoder = ["bitsandbytes"] -vllm = ["vllm==0.2.6", "ray==2.6.0"] +vllm = ["vllm==0.2.7", "ray==2.6.0"] [tool.hatch.version] fallback-version = "0.0.0" diff --git a/tools/dependencies.py b/tools/dependencies.py index 1a07526d..72550486 100755 --- a/tools/dependencies.py +++ b/tools/dependencies.py @@ -155,7 +155,7 @@ GGML_DEPS = ['ctransformers'] CTRANSLATE_DEPS = ['ctranslate2>=3.22.0'] AWQ_DEPS = ['autoawq'] GPTQ_DEPS = ['auto-gptq[triton]>=0.4.2'] -VLLM_DEPS = ['vllm==0.2.6', 'ray==2.6.0'] +VLLM_DEPS = ['vllm==0.2.7', 'ray==2.6.0'] _base_requirements: dict[str, t.Any] = { inflection.dasherize(name): config_cls.__openllm_requirements__