From 06a68ade7dfbc6ba4281fdb5820921b3ac7d87ae Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Mon, 4 Sep 2023 19:03:41 +0000 Subject: [PATCH] infra: prepare for release 0.3.0 [generated] [skip ci] Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- CHANGELOG.md | 41 ++++++++++++++++++++++++ changelog.d/260.feature.md | 3 -- changelog.d/283.breaking.md | 20 ------------ changelog.d/297.refactor.md | 1 - openllm-contrib/clojure/package.json | 2 +- openllm-node/package.json | 2 +- openllm-python/CHANGELOG.md | 48 ++++++++++++++++++++++++---- package.json | 2 +- 8 files changed, 86 insertions(+), 33 deletions(-) delete mode 100644 changelog.d/260.feature.md delete mode 100644 changelog.d/283.breaking.md delete mode 100644 changelog.d/297.refactor.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 15d7e2f8..c3f3ddb9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,47 @@ This changelog is managed by towncrier and is compiled at release time. +## [0.3.0](https://github.com/bentoml/openllm/tree/v0.3.0) + +### Backwards-incompatible Changes + +- All environment variable now will be more simplified, without the need for the specific model prefix + + For example: OPENLLM_LLAMA_GENERATION_MAX_NEW_TOKENS now becomes OPENLLM_GENERATION_MAX_NEW_TOKENS + + Unify some misc environment variable. To switch different backend, one can use `--backend` for both `start` and `build` + + ```bash + openllm start llama --backend vllm + ``` + + or the environment variable `OPENLLM_BACKEND` + + ```bash + OPENLLM_BACKEND=vllm openllm start llama + ``` + + `openllm.Runner` now will default to try download the model the first time if the model is not available, and get the cached in model store consequently + + Model serialisation now updated to a new API version with more clear name change, kindly ask users to do `openllm prune -y --include-bentos` and update to + this current version of openllm + [#283](https://github.com/bentoml/openllm/issues/283) + + +### Refactor + +- Refactor GPTQ to use official implementation from transformers>=4.32 + [#297](https://github.com/bentoml/openllm/issues/297) + + +### Features + +- Added support for vLLM streaming + + This can now be accessed via `/v1/generate_stream` + [#260](https://github.com/bentoml/openllm/issues/260) + + ## [0.2.27](https://github.com/bentoml/openllm/tree/v0.2.27) ### Changes diff --git a/changelog.d/260.feature.md b/changelog.d/260.feature.md deleted file mode 100644 index 2181ff2f..00000000 --- a/changelog.d/260.feature.md +++ /dev/null @@ -1,3 +0,0 @@ -Added support for vLLM streaming - -This can now be accessed via `/v1/generate_stream` diff --git a/changelog.d/283.breaking.md b/changelog.d/283.breaking.md deleted file mode 100644 index 80321200..00000000 --- a/changelog.d/283.breaking.md +++ /dev/null @@ -1,20 +0,0 @@ -All environment variable now will be more simplified, without the need for the specific model prefix - -For example: OPENLLM_LLAMA_GENERATION_MAX_NEW_TOKENS now becomes OPENLLM_GENERATION_MAX_NEW_TOKENS - -Unify some misc environment variable. To switch different backend, one can use `--backend` for both `start` and `build` - -```bash -openllm start llama --backend vllm -``` - -or the environment variable `OPENLLM_BACKEND` - -```bash -OPENLLM_BACKEND=vllm openllm start llama -``` - -`openllm.Runner` now will default to try download the model the first time if the model is not available, and get the cached in model store consequently - -Model serialisation now updated to a new API version with more clear name change, kindly ask users to do `openllm prune -y --include-bentos` and update to -this current version of openllm diff --git a/changelog.d/297.refactor.md b/changelog.d/297.refactor.md deleted file mode 100644 index d88ff541..00000000 --- a/changelog.d/297.refactor.md +++ /dev/null @@ -1 +0,0 @@ -Refactor GPTQ to use official implementation from transformers>=4.32 diff --git a/openllm-contrib/clojure/package.json b/openllm-contrib/clojure/package.json index e12062bc..c6282657 100644 --- a/openllm-contrib/clojure/package.json +++ b/openllm-contrib/clojure/package.json @@ -1,6 +1,6 @@ { "name": "openllm-clojure-ui", - "version": "", + "version": "0.3.0", "description": "OpenLLM Clojure UI", "repository": { "url": "git@github.com:bentoml/OpenLLM.git", diff --git a/openllm-node/package.json b/openllm-node/package.json index b568b322..70b4e786 100644 --- a/openllm-node/package.json +++ b/openllm-node/package.json @@ -1,6 +1,6 @@ { "name": "openllm", - "version": "0.2.28.dev0", + "version": "0.3.0", "description": "NodeJS library for OpenLLM", "type": "module", "repository": { diff --git a/openllm-python/CHANGELOG.md b/openllm-python/CHANGELOG.md index 88e6804f..c3f3ddb9 100644 --- a/openllm-python/CHANGELOG.md +++ b/openllm-python/CHANGELOG.md @@ -18,6 +18,47 @@ This changelog is managed by towncrier and is compiled at release time. +## [0.3.0](https://github.com/bentoml/openllm/tree/v0.3.0) + +### Backwards-incompatible Changes + +- All environment variable now will be more simplified, without the need for the specific model prefix + + For example: OPENLLM_LLAMA_GENERATION_MAX_NEW_TOKENS now becomes OPENLLM_GENERATION_MAX_NEW_TOKENS + + Unify some misc environment variable. To switch different backend, one can use `--backend` for both `start` and `build` + + ```bash + openllm start llama --backend vllm + ``` + + or the environment variable `OPENLLM_BACKEND` + + ```bash + OPENLLM_BACKEND=vllm openllm start llama + ``` + + `openllm.Runner` now will default to try download the model the first time if the model is not available, and get the cached in model store consequently + + Model serialisation now updated to a new API version with more clear name change, kindly ask users to do `openllm prune -y --include-bentos` and update to + this current version of openllm + [#283](https://github.com/bentoml/openllm/issues/283) + + +### Refactor + +- Refactor GPTQ to use official implementation from transformers>=4.32 + [#297](https://github.com/bentoml/openllm/issues/297) + + +### Features + +- Added support for vLLM streaming + + This can now be accessed via `/v1/generate_stream` + [#260](https://github.com/bentoml/openllm/issues/260) + + ## [0.2.27](https://github.com/bentoml/openllm/tree/v0.2.27) ### Changes @@ -230,7 +271,7 @@ No significant changes. ```bash docker run --rm --gpus all -it -v /home/ubuntu/.local/share/bentoml:/tmp/bentoml -e BENTOML_HOME=/tmp/bentoml \ - -e OPENLLM_USE_LOCAL_LATEST=True -e OPENLLM_LLAMA_FRAMEWORK=vllm ghcr.io/bentoml/openllm:2b5e96f90ad314f54e07b5b31e386e7d688d9bb2 start llama --model-id meta-llama/Llama-2-7b-chat-hf --workers-per-resource conserved --debug` + -e OPENLLM_USE_LOCAL_LATEST=True -e OPENLLM_BACKEND=vllm ghcr.io/bentoml/openllm:2b5e96f90ad314f54e07b5b31e386e7d688d9bb2 start llama --model-id meta-llama/Llama-2-7b-chat-hf --workers-per-resource conserved --debug` ``` In conjunction with this, OpenLLM now also have a set of small CLI utilities via ``openllm ext`` for ease-of-use @@ -721,9 +762,6 @@ No significant changes. `openllm start` now support `--quantize int8` and `--quantize int4` `GPTQ` quantization support is on the roadmap and currently being worked on. - `openllm start` now also support `--bettertransformer` to use - `BetterTransformer` for serving. - Refactored `openllm.LLMConfig` to be able to use with `__getitem__`: `openllm.DollyV2Config()['requirements']`. @@ -732,8 +770,6 @@ No significant changes. Added `towncrier` workflow to easily generate changelog entries - Added `use_pipeline`, `bettertransformer` flag into ModelSettings - `LLMConfig` now supported `__dataclass_transform__` protocol to help with type-checking diff --git a/package.json b/package.json index dfd1d223..efcce623 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "openllm-monorepo", - "version": "0.2.28.dev0", + "version": "0.3.0", "description": "OpenLLM: Operating LLMs in production", "repository": "git@github.com:bentoml/OpenLLM.git", "author": "Aaron Pham <29749331+aarnphm@users.noreply.github.com>",