infra: prepare for release 0.4.16 [generated] [skip ci]

Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
2026-04-19 06:30:40 -04:00 · 2023-11-19 15:41:03 +00:00
parent cb4386b013
commit e9207ff683
7 changed files with 34 additions and 8 deletions
--- a/openllm-python/CHANGELOG.md
+++ b/openllm-python/CHANGELOG.md
@@ -18,6 +18,21 @@ This changelog is managed by towncrier and is compiled at release time.

 <!-- towncrier release notes start -->

+## [0.4.16](https://github.com/bentoml/openllm/tree/v0.4.16)
+
+### Changes
+
+- Update vLLM to 0.2.2, bringing supports and a lot of improvement upstream
+  [#695](https://github.com/bentoml/openllm/issues/695)
+
+
+### Features
+
+- Added experimental CTranslate backend to run on CPU, that yields higher TPS comparing to PyTorch counterpart.
+
+  This has been tested on c5.4xlarge instances
+  [#698](https://github.com/bentoml/openllm/issues/698)
+
 ## [0.4.15](https://github.com/bentoml/openllm/tree/v0.4.15)

 ### Features
--- a/openllm-python/pyproject.toml
+++ b/openllm-python/pyproject.toml
@@ -40,8 +40,8 @@ classifiers = [
 dependencies = [
    "bentoml[io]>=1.1.9",
    "transformers[torch,tokenizers]>=4.35.0",
-    "openllm-client>=0.4.15",
-    "openllm-core>=0.4.15",
+    "openllm-client>=0.4.16",
+    "openllm-core>=0.4.16",
    "safetensors",
    "optimum>=1.12.0",
    "accelerate",