diff --git a/CHANGELOG.md b/CHANGELOG.md index d58dfb45..c4e16192 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,24 @@ This changelog is managed by towncrier and is compiled at release time. +## [0.3.5](https://github.com/bentoml/openllm/tree/v0.3.5) + +### Features + +- Added support for continuous batching via vLLM + + Currently benchmark shows that 100 concurrent requests shows around 1218 TPS on 1 A100 running meta-llama/Llama-2-13b-chat-hf + [#349](https://github.com/bentoml/openllm/issues/349) + + +### Bug fix + +- Set a default serialisation for all models. + + Currently, only Llama 2 will use safetensors as default format. For all other models, if they have safetensors format, then it will can be opt-int via `--serialisation safetensors` + [#355](https://github.com/bentoml/openllm/issues/355) + + ## [0.3.4](https://github.com/bentoml/openllm/tree/v0.3.4) ### Bug fix diff --git a/changelog.d/349.feature.md b/changelog.d/349.feature.md deleted file mode 100644 index 14d0bb97..00000000 --- a/changelog.d/349.feature.md +++ /dev/null @@ -1,3 +0,0 @@ -Added support for continuous batching via vLLM - -Currently benchmark shows that 100 concurrent requests shows around 1218 TPS on 1 A100 running meta-llama/Llama-2-13b-chat-hf diff --git a/changelog.d/355.fix.md b/changelog.d/355.fix.md deleted file mode 100644 index 68bb637b..00000000 --- a/changelog.d/355.fix.md +++ /dev/null @@ -1,3 +0,0 @@ -Set a default serialisation for all models. - -Currently, only Llama 2 will use safetensors as default format. For all other models, if they have safetensors format, then it will can be opt-int via `--serialisation safetensors` diff --git a/openllm-contrib/clojure/package.json b/openllm-contrib/clojure/package.json index 643700df..4dad5742 100644 --- a/openllm-contrib/clojure/package.json +++ b/openllm-contrib/clojure/package.json @@ -1,6 +1,6 @@ { "name": "openllm-clojure-ui", - "version": "", + "version": "0.3.5", "description": "OpenLLM Clojure UI", "repository": { "url": "git@github.com:bentoml/OpenLLM.git", diff --git a/openllm-node/package.json b/openllm-node/package.json index 550852f3..61cbfab2 100644 --- a/openllm-node/package.json +++ b/openllm-node/package.json @@ -1,6 +1,6 @@ { "name": "@bentoml/openllm-node", - "version": "0.3.5.dev0", + "version": "0.3.5", "description": "NodeJS library for OpenLLM", "type": "module", "repository": { diff --git a/openllm-python/CHANGELOG.md b/openllm-python/CHANGELOG.md index d58dfb45..c4e16192 100644 --- a/openllm-python/CHANGELOG.md +++ b/openllm-python/CHANGELOG.md @@ -18,6 +18,24 @@ This changelog is managed by towncrier and is compiled at release time. +## [0.3.5](https://github.com/bentoml/openllm/tree/v0.3.5) + +### Features + +- Added support for continuous batching via vLLM + + Currently benchmark shows that 100 concurrent requests shows around 1218 TPS on 1 A100 running meta-llama/Llama-2-13b-chat-hf + [#349](https://github.com/bentoml/openllm/issues/349) + + +### Bug fix + +- Set a default serialisation for all models. + + Currently, only Llama 2 will use safetensors as default format. For all other models, if they have safetensors format, then it will can be opt-int via `--serialisation safetensors` + [#355](https://github.com/bentoml/openllm/issues/355) + + ## [0.3.4](https://github.com/bentoml/openllm/tree/v0.3.4) ### Bug fix diff --git a/package.json b/package.json index 43f98a62..c0f06cc2 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@bentoml/openllm-monorepo", - "version": "0.3.5.dev0", + "version": "0.3.5", "description": "OpenLLM: Operating LLMs in production", "author": "Aaron Pham <29749331+aarnphm@users.noreply.github.com>", "license": "Apache-2.0",