feat(ci): automatic release semver + git archival installation (#143)

This commit is contained in:
Aaron Pham
2023-07-25 04:18:49 -04:00
committed by GitHub
parent 5635ce8d87
commit c391717226
14 changed files with 278 additions and 240 deletions

4
.git_archival.txt Normal file
View File

@@ -0,0 +1,4 @@
node: $Format:%H$
node-date: $Format:%cI$
describe-name: $Format:%(describe:tags=true,match=*[0-9]*)$
ref-names: $Format:%D$

2
.gitattributes vendored
View File

@@ -3,3 +3,5 @@ nightly-requirements-gpu.txt linguist-generated=true
tests/models/__snapshots__/* linguist-generated=true
typings/**/*.pyi linguist-generated=true
* text=auto eol=lf
# Needed for setuptools-scm-git-archive
.git_archival.txt export-subst

View File

@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
set -ex
set -e
# Function to print script usage
print_usage() {
@@ -61,41 +61,43 @@ fi
release_package() {
local version="$1"
echo "Releasing version ${version}..."
jq --arg release_version "${version}" '.version = $release_version' < package.json > package.json.tmp && mv package.json.tmp package.json
if [[ $release == 'patch' ]]; then
hatch version "${version}"
fi
towncrier build --yes --version "${version}"
git add CHANGELOG.md changelog.d src/openllm/__about__.py package.json
git add CHANGELOG.md changelog.d package.json
git commit -S -sm "infra: prepare for release ${version} [generated] [skip ci]"
git push origin main
echo "Releasing tag ${version}..." && git tag -a "v${version}" -sm "Release ${version} [generated by GitHub Actions]"
git push origin "v${version}"
echo "Finish releasing version ${version}"
}
echo "Cleaning previously built artifacts..." && hatch clean
#get highest tags across all branches, not just the current branch
version="$(git describe --tags "$(git rev-list --tags --max-count=1)")"
VERSION="${version#v}"
# Save the current value of IFS to restore it later
OLD_IFS=$IFS
IFS='.'
# split into array
read -ra VERSION_BITS <<< "$VERSION"
# Restore the original value of IFS
IFS=$OLD_IFS
VNUM1=${VERSION_BITS[0]}
VNUM2=${VERSION_BITS[1]}
VNUM3=${VERSION_BITS[2]}
if [[ $release == 'major' ]]; then
hatch version major
CURRENT_VERSION=$(hatch version)
release_package "${CURRENT_VERSION}"
VNUM1=$((VNUM1+1))
VNUM2=0
VNUM3=0
elif [[ $release == 'minor' ]]; then
hatch version minor
CURRENT_VERSION="$(hatch version)"
release_package "${CURRENT_VERSION}"
VNUM2=$((VNUM2+1))
VNUM3=0
else
CURRENT_VERSION=$(hatch version)
if [[ "$CURRENT_VERSION" =~ \.dev ]]; then
release_package "${CURRENT_VERSION%%.dev*}"
else
echo "Current version is not properly setup as dev version. Aborting..."
exit 1
fi
VNUM3=$((VNUM3+1))
fi
echo "Commit count: $(git rev-list --count HEAD)"
#create new tag
RELEASE_VERSION="$VNUM1.$VNUM2.$VNUM3"
release_package "${RELEASE_VERSION}"

View File

@@ -90,8 +90,11 @@ jobs:
run: python -m build
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
print-hash: true
prepare-next-dev-cycle:
needs:
- release
- publish-python
- binary-distribution
runs-on: ubuntu-latest
@@ -126,9 +129,19 @@ jobs:
GIT_COMMITTER_EMAIL: ${{ steps.import-gpg-key.outputs.email }}
run: |
git pull --autostash --no-edit --gpg-sign --ff origin main
echo "Bumping version to dev..." && hatch version patch && hatch version dev
jq --arg release_version "$(hatch version)" '.version = $release_version' < package.json > package.json.tmp && mv package.json.tmp package.json
git add src/openllm/__about__.py package.json && git commit -S -sm "infra: bump to dev version of $(hatch version) [generated] [skip ci]"
SEMVER="${{ needs.release.outputs.version }}"
OLD_IFS=$IFS
IFS='.'
read -ra VERSION_BITS <<< "$SEMVER"
IFS=$OLD_IFS
VNUM1=${VERSION_BITS[0]}
VNUM2=${VERSION_BITS[1]}
VNUM3=${VERSION_BITS[2]}
VNUM3=$((VNUM3+1))
DEV_VERSION="$VNUM1.$VNUM2.$VNUM3.dev0"
echo "Bumping version to ${DEV_VERSION}..."
jq --arg release_version "${DEV_VERSION}" '.version = $release_version' < package.json > package.json.tmp && mv package.json.tmp package.json
git add package.json && git commit -S -sm "infra: bump to dev version of ${DEV_VERSION} [generated] [skip ci]"
git push origin HEAD:main
binary-distribution:
if: github.repository_owner == 'bentoml'
@@ -136,6 +149,7 @@ jobs:
name: Create binary/wheels distribution
uses: bentoml/OpenLLM/.github/workflows/binary-releases.yml@main
release-notes:
if: github.repository_owner == 'bentoml'
needs:
- release
- publish-python

1
.gitignore vendored
View File

@@ -141,3 +141,4 @@ pyapp
/target
.pdm-python
/src/openllm/_version.py

View File

@@ -3,7 +3,7 @@
<div align="center">
<h1 align="center">🦾 OpenLLM</h1>
<a href="https://pypi.org/project/openllm">
<img src="https://img.shields.io/pypi/v/openllm.svg" alt="pypi_status" />
<img src="https://img.shields.io/pypi/v/openllm.svg?logo=pypi&label=PyPI&logoColor=gold" alt="pypi_status" />
</a><a href="https://github.com/bentoml/OpenLLM/actions/workflows/ci.yml">
<img src="https://github.com/bentoml/OpenLLM/actions/workflows/ci.yml/badge.svg?branch=main" alt="ci" />
</a><a href="https://twitter.com/bentomlai">
@@ -11,6 +11,14 @@
</a><a href="https://l.bentoml.com/join-openllm-discord">
<img src="https://badgen.net/badge/icon/OpenLLM/7289da?icon=discord&label=Join%20Us" alt="Discord" />
</a><br>
</a><a href="https://pypi.org/project/openllm">
<img src="https://img.shields.io/pypi/pyversions/openllm.svg?logo=python&label=Python&logoColor=gold" alt="python_version" />
</a><a href="https://github.com/pypa/hatch">
<img src="https://img.shields.io/badge/%F0%9F%A5%9A-Hatch-4051b5.svg" alt="Hatch" />
</a><br>
</a><a href="https://github.com/astral-sh/ruff">
<img src="https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/charliermarsh/ruff/main/assets/badge/v2.json" alt="Ruff" />
</a><br>
<p>An open platform for operating large language models (LLMs) in production.</br>
Fine-tune, serve, deploy, and monitor any LLMs with ease.</p>
<i></i>
@@ -39,10 +47,14 @@ Images or deploy as serverless endpoint via
🤖️ **Bring your own LLM**: Fine-tune any LLM to suit your needs with
`LLM.tuning()`. (Coming soon)
<!-- hatch-fancy-pypi-readme intro stop -->
![Gif showing OpenLLM Intro](/assets/output.gif)
<br/>
## 🏃‍ Getting Started
<!-- hatch-fancy-pypi-readme interim start -->
## 🏃 Getting Started
To use OpenLLM, you need to have Python 3.8 (or newer) and `pip` installed on
your system. We highly recommend using a Virtual Environment to prevent package
@@ -105,6 +117,7 @@ openllm query 'Explain to me the difference between "further" and "farther"'
Visit `http://localhost:3000/docs.json` for OpenLLM's API specification.
OpenLLM seamlessly supports many models and their variants.
Users can also specify different variants of the model to be served, by
providing the `--model-id` argument, e.g.:
@@ -112,6 +125,10 @@ providing the `--model-id` argument, e.g.:
openllm start flan-t5 --model-id google/flan-t5-large
```
> **Note** that `openllm` also supports all variants of fine-tuning weights, custom model path
> as well as quantized weights for any of the supported models as long as it can be loaded with
> the model architecture. Refer to [supported models](https://github.com/bentoml/OpenLLM/tree/main#-supported-models) section for models' architecture.
Use the `openllm models` command to see the list of models and their variants
supported in OpenLLM.
@@ -127,8 +144,6 @@ dependencies can be installed with the instructions below:
<tr>
<th>Model</th>
<th>Architecture</th>
<th>CPU</th>
<th>GPU</th>
<th>Model Ids</th>
<th>Installation</th>
</tr>
@@ -136,8 +151,6 @@ dependencies can be installed with the instructions below:
<td><a href=https://github.com/THUDM/ChatGLM-6B>chatglm</a></td>
<td><a href=https://github.com/THUDM/ChatGLM-6B><code>ChatGLMForConditionalGeneration</code></a></td>
<td>❌</td>
<td>✅</td>
<td>
<ul><li><a href=https://huggingface.co/thudm/chatglm-6b><code>thudm/chatglm-6b</code></a></li>
@@ -159,8 +172,6 @@ pip install "openllm[chatglm]"
<td><a href=https://github.com/databrickslabs/dolly>dolly-v2</a></td>
<td><a href=https://huggingface.co/docs/transformers/main/model_doc/gpt_neox#transformers.GPTNeoXForCausalLM><code>GPTNeoXForCausalLM</code></a></td>
<td>✅</td>
<td>✅</td>
<td>
<ul><li><a href=https://huggingface.co/databricks/dolly-v2-3b><code>databricks/dolly-v2-3b</code></a></li>
@@ -180,8 +191,6 @@ pip install openllm
<td><a href=https://falconllm.tii.ae/>falcon</a></td>
<td><a href=https://falconllm.tii.ae/><code>FalconForCausalLM</code></a></td>
<td>❌</td>
<td>✅</td>
<td>
<ul><li><a href=https://huggingface.co/tiiuae/falcon-7b><code>tiiuae/falcon-7b</code></a></li>
@@ -202,8 +211,6 @@ pip install "openllm[falcon]"
<td><a href=https://huggingface.co/docs/transformers/model_doc/flan-t5>flan-t5</a></td>
<td><a href=https://huggingface.co/docs/transformers/main/model_doc/t5#transformers.T5ForConditionalGeneration><code>T5ForConditionalGeneration</code></a></td>
<td>✅</td>
<td>✅</td>
<td>
<ul><li><a href=https://huggingface.co/google/flan-t5-small><code>google/flan-t5-small</code></a></li>
@@ -225,8 +232,6 @@ pip install "openllm[flan-t5]"
<td><a href=https://github.com/EleutherAI/gpt-neox>gpt-neox</a></td>
<td><a href=https://huggingface.co/docs/transformers/main/model_doc/gpt_neox#transformers.GPTNeoXForCausalLM><code>GPTNeoXForCausalLM</code></a></td>
<td>❌</td>
<td>✅</td>
<td>
<ul><li><a href=https://huggingface.co/eleutherai/gpt-neox-20b><code>eleutherai/gpt-neox-20b</code></a></li></ul>
@@ -244,8 +249,6 @@ pip install openllm
<td><a href=https://github.com/facebookresearch/llama>llama</a></td>
<td><a href=https://huggingface.co/docs/transformers/main/model_doc/llama#transformers.LlamaForCausalLM><code>LlamaForCausalLM</code></a></td>
<td>✅</td>
<td>✅</td>
<td>
<ul><li><a href=https://huggingface.co/meta-llama/llama-2-70b-chat-hf><code>meta-llama/llama-2-70b-chat-hf</code></a></li>
@@ -275,8 +278,6 @@ pip install "openllm[llama]"
<td><a href=https://huggingface.co/mosaicml>mpt</a></td>
<td><a href=https://huggingface.co/mosaicml><code>MPTForCausalLM</code></a></td>
<td>✅</td>
<td>✅</td>
<td>
<ul><li><a href=https://huggingface.co/mosaicml/mpt-7b><code>mosaicml/mpt-7b</code></a></li>
@@ -300,8 +301,6 @@ pip install "openllm[mpt]"
<td><a href=https://huggingface.co/docs/transformers/model_doc/opt>opt</a></td>
<td><a href=https://huggingface.co/docs/transformers/main/model_doc/opt#transformers.OPTForCausalLM><code>OPTForCausalLM</code></a></td>
<td>✅</td>
<td>✅</td>
<td>
<ul><li><a href=https://huggingface.co/facebook/opt-125m><code>facebook/opt-125m</code></a></li>
@@ -324,8 +323,6 @@ pip install "openllm[opt]"
<td><a href=https://github.com/Stability-AI/StableLM>stablelm</a></td>
<td><a href=https://huggingface.co/docs/transformers/main/model_doc/gpt_neox#transformers.GPTNeoXForCausalLM><code>GPTNeoXForCausalLM</code></a></td>
<td>✅</td>
<td>✅</td>
<td>
<ul><li><a href=https://huggingface.co/stabilityai/stablelm-tuned-alpha-3b><code>stabilityai/stablelm-tuned-alpha-3b</code></a></li>
@@ -346,8 +343,6 @@ pip install openllm
<td><a href=https://github.com/bigcode-project/starcoder>starcoder</a></td>
<td><a href=https://huggingface.co/docs/transformers/main/model_doc/gpt_bigcode#transformers.GPTBigCodeForCausalLM><code>GPTBigCodeForCausalLM</code></a></td>
<td>❌</td>
<td>✅</td>
<td>
<ul><li><a href=https://huggingface.co/bigcode/starcoder><code>bigcode/starcoder</code></a></li>
@@ -366,8 +361,6 @@ pip install "openllm[starcoder]"
<td><a href=https://github.com/baichuan-inc/Baichuan-7B>baichuan</a></td>
<td><a href=https://github.com/baichuan-inc/Baichuan-7B><code>BaiChuanForCausalLM</code></a></td>
<td>❌</td>
<td>✅</td>
<td>
<ul><li><a href=https://huggingface.co/baichuan-inc/baichuan-7b><code>baichuan-inc/baichuan-7b</code></a></li>
@@ -596,9 +589,12 @@ client.ask_agent(
)
```
<!-- hatch-fancy-pypi-readme interim stop -->
![Gif showing Agent integration](/assets/agent.gif)
<br/>
<!-- hatch-fancy-pypi-readme meta start -->
## 🚀 Deploying to Production
@@ -664,7 +660,6 @@ the serverless cloud for shipping and scaling AI applications.
[deployment instructions](https://docs.bentoml.com/en/latest/reference/cli.html#bentoml-deployment-create).
## 👥 Community
Engage with like-minded individuals passionate about LLMs, AI, and more on our

View File

@@ -0,0 +1,5 @@
Added installing with git-archival support
```bash
pip install "https://github.com/bentoml/openllm/archive/main.tar.gz"
```

View File

@@ -1,5 +1,63 @@
[metadata.hooks.fancy-pypi-readme]
content-type = "text/markdown"
# PyPI doesn't support the <picture> tag.
[[metadata.hooks.fancy-pypi-readme.fragments]]
text = """
<p align="center">
<a href="https://github.com/bentoml/openllm">
<img src="https://raw.githubusercontent.com/bentoml/openllm/main/assets/main-banner.png" width="35%" alt="Banner for OpenLLM" />
</a>
</p>
"""
[[metadata.hooks.fancy-pypi-readme.fragments]]
path = "README.md"
end-before = "\n<!-- hatch-fancy-pypi-readme intro stop -->"
[[metadata.hooks.fancy-pypi-readme.fragments]]
text = """
<p align="center">
<img src="https://raw.githubusercontent.com/bentoml/openllm/main/assets/output.gif" width="35%" alt="Gif showing OpenLLM Intro" />
</p>
"""
[[metadata.hooks.fancy-pypi-readme.fragments]]
path = "README.md"
start-after = "<!-- hatch-fancy-pypi-readme interim start -->\n"
end-before = "\n<!-- hatch-fancy-pypi-readme interim stop -->"
[[metadata.hooks.fancy-pypi-readme.fragments]]
text = """
<p align="center">
<img src="https://raw.githubusercontent.com/bentoml/openllm/main/assets/agent.gif" width="35%" alt="Gif showing Agent integration" />
</p>
"""
[[metadata.hooks.fancy-pypi-readme.fragments]]
path = "README.md"
start-after = "<!-- hatch-fancy-pypi-readme meta start -->\n"
[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]]
text = """
## Release Information
"""
[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]]
path = "CHANGELOG.md"
pattern = "\n(###.+?\n)## "
[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]]
text = """
---
[Click me for full changelog](https://github.com/bentoml/openllm/blob/main/CHANGELOG.md)
"""
[version]
path = "src/openllm/__about__.py"
fallback-version = "0.0.0"
source = "vcs"
[build.hooks.vcs]
version-file = "src/openllm/_version.py"
[version.raw-options]
git_describe_command = ["git", "describe", "--dirty", "--tags", "--long", "--first-parent"]
local_scheme = "no-local-version"
[metadata]
allow-direct-references = true
[build.targets.wheel]
@@ -16,6 +74,8 @@ dependencies = [
"tomlkit",
# NOTE: Using under ./tools/update-readme.py
"markdown-it-py",
# NOTE: For fancy PyPI readme
"hatch-fancy-pypi-readme",
]
[envs.default.scripts]
changelog = "towncrier build --version main --draft"

View File

@@ -2,7 +2,7 @@
# project.classifiers, project.dependencies, project.optional-dependencies, project.urls
[build-system]
build-backend = "hatchling.build"
requires = ["hatchling"]
requires = ["hatchling", "hatch-vcs", "hatch-fancy-pypi-readme"]
[project]
authors = [{ name = "Aaron Pham", email = "aarnphm@bentoml.com" }]
@@ -48,7 +48,7 @@ dependencies = [
"bitsandbytes<0.42",
]
description = 'OpenLLM: Operating LLMs in production'
dynamic = ["version"]
dynamic = ["version", "readme"]
keywords = [
"MLOps",
"AI",
@@ -65,7 +65,6 @@ keywords = [
]
license = "Apache-2.0"
name = "openllm"
readme = "README.md"
requires-python = ">=3.8"
[project.scripts]
@@ -258,7 +257,6 @@ omit = [
"__pypackages__/*",
"src/openllm/playground/",
"src/openllm/__init__.py",
"src/openllm/__about__.py",
"src/openllm/__main__.py",
"src/openllm/utils/dummy_*.py",
]
@@ -281,7 +279,6 @@ omit = [
"__pypackages__/*",
"src/openllm/playground/",
"src/openllm/__init__.py",
"src/openllm/__about__.py",
"src/openllm/__main__.py",
"src/openllm/utils/dummy_*.py",
]
@@ -294,7 +291,6 @@ exclude = [
"__pypackages__/*",
"src/openllm/playground/",
"src/openllm/__init__.py",
"src/openllm/__about__.py",
"src/openllm/__main__.py",
"src/openllm/utils/dummy_*.py",
]

View File

@@ -1,14 +0,0 @@
# Copyright 2023 BentoML Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__version__ = "0.2.10.dev0"

View File

@@ -16,7 +16,7 @@
An open platform for operating large language models in production. Fine-tune, serve,
deploy, and monitor any LLMs with ease.
* Built-in support for StableLM, Llama, Dolly, Flan-T5, Vicuna
* Built-in support for StableLM, Llama 2, Dolly, Flan-T5, Vicuna
* Option to bring your own fine-tuned LLMs
* Online Serving with HTTP, gRPC, SSE(coming soon) or custom API
* Native integration with BentoML and LangChain for custom LLM apps
@@ -24,37 +24,26 @@ deploy, and monitor any LLMs with ease.
from __future__ import annotations
import logging
import os
import sys
import typing as t
import warnings
from . import utils as utils
from .__about__ import __version__ as __version__
from .exceptions import MissingDependencyError
if utils.DEBUG:
utils.set_debug_mode(True)
utils.set_quiet_mode(False)
logging.basicConfig(level=logging.NOTSET)
else:
# configuration for bitsandbytes before import
os.environ["BITSANDBYTES_NOWELCOME"] = os.environ.get("BITSANDBYTES_NOWELCOME", "1")
# The following warnings from bitsandbytes, and probably not that important
# for users to see when DEBUG is False
warnings.filterwarnings(
"ignore", message="MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization"
)
warnings.filterwarnings(
"ignore", message="MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization"
)
warnings.filterwarnings(
"ignore",
message=(
"The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers and GPU quantization"
" are unavailable."
),
)
warnings.filterwarnings("ignore", message="MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization")
warnings.filterwarnings("ignore", message="MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization")
warnings.filterwarnings("ignore", message="The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers and GPU quantization are unavailable.")
_import_structure: dict[str, list[str]] = {
@@ -73,14 +62,7 @@ _import_structure: dict[str, list[str]] = {
"serialisation": ["ggml", "transformers"],
"cli": ["start", "start_grpc", "build", "import_model", "list_models"],
# NOTE: models
"models.auto": [
"AutoConfig",
"CONFIG_MAPPING",
"MODEL_MAPPING_NAMES",
"MODEL_FLAX_MAPPING_NAMES",
"MODEL_TF_MAPPING_NAMES",
"MODEL_VLLM_MAPPING_NAMES",
],
"models.auto": ["AutoConfig", "CONFIG_MAPPING", "MODEL_MAPPING_NAMES", "MODEL_FLAX_MAPPING_NAMES", "MODEL_TF_MAPPING_NAMES", "MODEL_VLLM_MAPPING_NAMES", ],
"models.chatglm": ["ChatGLMConfig"],
"models.baichuan": ["BaichuanConfig"],
"models.dolly_v2": ["DollyV2Config"],
@@ -96,50 +78,34 @@ _import_structure: dict[str, list[str]] = {
# NOTE: torch and cpm_kernels
try:
if not (utils.is_torch_available() and utils.is_cpm_kernels_available()):
raise MissingDependencyError
if not (utils.is_torch_available() and utils.is_cpm_kernels_available()): raise MissingDependencyError
except MissingDependencyError:
from .utils import dummy_pt_and_cpm_kernels_objects
_import_structure["utils.dummy_pt_and_cpm_kernels_objects"] = [
name for name in dir(dummy_pt_and_cpm_kernels_objects) if not name.startswith("_")
]
_import_structure["utils.dummy_pt_and_cpm_kernels_objects"] = [name for name in dir(dummy_pt_and_cpm_kernels_objects) if not name.startswith("_")]
else:
_import_structure["models.chatglm"].extend(["ChatGLM"])
_import_structure["models.baichuan"].extend(["Baichuan"])
try:
if not (utils.is_torch_available() and utils.is_einops_available()):
raise MissingDependencyError
if not (utils.is_torch_available() and utils.is_einops_available()): raise MissingDependencyError
except MissingDependencyError:
from .utils import dummy_pt_and_einops_objects
_import_structure["utils.dummy_pt_and_einops_objects"] = [
name for name in dir(dummy_pt_and_einops_objects) if not name.startswith("_")
]
_import_structure["utils.dummy_pt_and_einops_objects"] = [name for name in dir(dummy_pt_and_einops_objects) if not name.startswith("_")]
else:
_import_structure["models.falcon"].extend(["Falcon"])
try:
if not (utils.is_torch_available() and utils.is_triton_available()):
raise MissingDependencyError
if not (utils.is_torch_available() and utils.is_triton_available()): raise MissingDependencyError
except MissingDependencyError:
from .utils import dummy_pt_and_triton_objects
_import_structure["utils.dummy_pt_and_triton_objects"] = [
name for name in dir(dummy_pt_and_triton_objects) if not name.startswith("_")
]
_import_structure["utils.dummy_pt_and_triton_objects"] = [name for name in dir(dummy_pt_and_triton_objects) if not name.startswith("_")]
else:
_import_structure["models.mpt"].extend(["MPT"])
try:
if not utils.is_torch_available():
raise MissingDependencyError
if not utils.is_torch_available(): raise MissingDependencyError
except MissingDependencyError:
from .utils import dummy_pt_objects
_import_structure["utils.dummy_pt_objects"] = [name for name in dir(dummy_pt_objects) if not name.startswith("_")]
else:
_import_structure["models.flan_t5"].extend(["FlanT5"])
@@ -152,45 +118,34 @@ else:
_import_structure["models.auto"].extend(["AutoLLM", "MODEL_MAPPING"])
try:
if not utils.is_vllm_available():
raise MissingDependencyError
if not utils.is_vllm_available(): raise MissingDependencyError
except MissingDependencyError:
from .utils import dummy_vllm_objects
_import_structure["utils.dummy_vllm_objects"] = [
name for name in dir(dummy_vllm_objects) if not name.startswith("_")
]
_import_structure["utils.dummy_vllm_objects"] = [name for name in dir(dummy_vllm_objects) if not name.startswith("_")]
else:
_import_structure["models.llama"].extend(["VLLMLlaMA"])
_import_structure["models.auto"].extend(["AutoVLLM", "MODEL_VLLM_MAPPING"])
try:
if not utils.is_flax_available():
raise MissingDependencyError
if not utils.is_flax_available(): raise MissingDependencyError
except MissingDependencyError:
from .utils import dummy_flax_objects
_import_structure["utils.dummy_flax_objects"] = [
name for name in dir(dummy_flax_objects) if not name.startswith("_")
]
_import_structure["utils.dummy_flax_objects"] = [name for name in dir(dummy_flax_objects) if not name.startswith("_")]
else:
_import_structure["models.flan_t5"].extend(["FlaxFlanT5"])
_import_structure["models.opt"].extend(["FlaxOPT"])
_import_structure["models.auto"].extend(["AutoFlaxLLM", "MODEL_FLAX_MAPPING"])
try:
if not utils.is_tf_available():
raise MissingDependencyError
if not utils.is_tf_available(): raise MissingDependencyError
except MissingDependencyError:
from .utils import dummy_tf_objects
_import_structure["utils.dummy_tf_objects"] = [name for name in dir(dummy_tf_objects) if not name.startswith("_")]
else:
_import_structure["models.flan_t5"].extend(["TFFlanT5"])
_import_structure["models.opt"].extend(["TFOPT"])
_import_structure["models.auto"].extend(["AutoTFLLM", "MODEL_TF_MAPPING"])
# declaration for OpenLLM-related modules
if t.TYPE_CHECKING:
from . import bundle as bundle
@@ -244,8 +199,7 @@ if t.TYPE_CHECKING:
# NOTE: torch and cpm_kernels
try:
if not (utils.is_torch_available() and utils.is_cpm_kernels_available()):
raise MissingDependencyError
if not (utils.is_torch_available() and utils.is_cpm_kernels_available()): raise MissingDependencyError
except MissingDependencyError:
from .utils.dummy_pt_and_cpm_kernels_objects import *
else:
@@ -254,8 +208,7 @@ if t.TYPE_CHECKING:
# NOTE: torch and einops
try:
if not (utils.is_torch_available() and utils.is_einops_available()):
raise MissingDependencyError
if not (utils.is_torch_available() and utils.is_einops_available()): raise MissingDependencyError
except MissingDependencyError:
from .utils.dummy_pt_and_einops_objects import *
else:
@@ -263,16 +216,14 @@ if t.TYPE_CHECKING:
# NOTE: torch and triton
try:
if not (utils.is_torch_available() and utils.is_triton_available()):
raise MissingDependencyError
if not (utils.is_torch_available() and utils.is_triton_available()): raise MissingDependencyError
except MissingDependencyError:
from .utils.dummy_pt_and_triton_objects import *
else:
from .models.mpt import MPT as MPT
try:
if not utils.is_torch_available():
raise MissingDependencyError
if not utils.is_torch_available(): raise MissingDependencyError
except MissingDependencyError:
from .utils.dummy_pt_objects import *
else:
@@ -287,8 +238,7 @@ if t.TYPE_CHECKING:
from .models.starcoder import StarCoder as StarCoder
try:
if not utils.is_vllm_available():
raise MissingDependencyError
if not utils.is_vllm_available(): raise MissingDependencyError
except MissingDependencyError:
from .utils.dummy_vllm_objects import *
else:
@@ -297,8 +247,7 @@ if t.TYPE_CHECKING:
from .models.llama import VLLMLlaMA as VLLMLlaMA
try:
if not utils.is_flax_available():
raise MissingDependencyError
if not utils.is_flax_available(): raise MissingDependencyError
except MissingDependencyError:
from .utils.dummy_flax_objects import *
else:
@@ -308,8 +257,7 @@ if t.TYPE_CHECKING:
from .models.opt import FlaxOPT as FlaxOPT
try:
if not utils.is_tf_available():
raise MissingDependencyError
if not utils.is_tf_available(): raise MissingDependencyError
except MissingDependencyError:
from .utils.dummy_tf_objects import *
else:
@@ -318,20 +266,10 @@ if t.TYPE_CHECKING:
from .models.flan_t5 import TFFlanT5 as TFFlanT5
from .models.opt import TFOPT as TFOPT
else:
import sys
sys.modules[__name__] = utils.LazyModule(
__name__,
globals()["__file__"],
_import_structure,
module_spec=__spec__,
doc=__doc__,
extra_objects={
"__version__": __version__,
# The below is a special mapping that allows openllm to be used as a dictionary.
# This is purely for convenience sake, and should not be used in performance critcal
# code. This is also not considered as a public API.
"__openllm_special__": {"flax": "AutoFlaxLLM", "tf": "AutoTFLLM", "pt": "AutoLLM", "vllm": "AutoVLLM"},
},
)
else: sys.modules[__name__] = utils.LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__, doc=__doc__,
extra_objects={
# The below is a special mapping that allows openllm to be used as a dictionary.
# This is purely for convenience sake, and should not be used in performance critcal
# code. This is also not considered as a public API.
"__openllm_special__": {"flax": "AutoFlaxLLM", "tf": "AutoTFLLM", "pt": "AutoLLM", "vllm": "AutoVLLM"},
})

View File

@@ -69,7 +69,6 @@ import openllm
from bentoml._internal.configuration.containers import BentoMLContainer
from bentoml._internal.models.model import ModelStore
from .__about__ import __version__
from .exceptions import OpenLLMException
from .utils import DEBUG
from .utils import ENV_VARS_TRUE_VALUES
@@ -403,7 +402,7 @@ class OpenLLMCommandGroup(BentoMLCommandGroup):
return wrapper
@click.group(cls=OpenLLMCommandGroup, context_settings=_CONTEXT_SETTINGS, name="openllm")
@click.version_option(__version__, "--version", "-v")
@click.version_option(None, "--version", "-v")
def cli() -> None:
"""\b
██████╗ ██████╗ ███████╗███╗ ██╗██╗ ██╗ ███╗ ███╗

View File

@@ -13,30 +13,85 @@
# limitations under the License.
from __future__ import annotations
import functools
import importlib
import importlib.machinery
import importlib.metadata
import itertools
import os
import time
import types
import typing as t
import warnings
import attr
from ..exceptions import ForbiddenAttributeError
from ..exceptions import OpenLLMException
class UsageNotAllowedError(OpenLLMException):
"""Raised when LazyModule.__getitem__ is forbidden."""
class MissingAttributesError(OpenLLMException):
"""Raised when given keys is not available in LazyModule special mapping."""
@functools.total_ordering
@attr.attrs(eq=False, order=False, slots=True, frozen=True)
class VersionInfo:
"""A version object that can be compared to tuple of length 1--4.
_sentinel = object()
```python
>>> VersionInfo(19, 1, 0, "final") <= (19, 2)
True
>>> VersionInfo(19, 1, 0, "final") < (19, 1, 1)
True
>>> vi = VersionInfo(19, 2, 0, "final")
>>> vi < (19, 1, 1)
False
>>> vi < (19,)
False
>>> vi == (19, 2,)
True
>>> vi == (19, 2, 1)
False
```
Vendorred from attrs.
"""
major: int = attr.field()
minor: int = attr.field()
micro: int = attr.field()
releaselevel: str = attr.field()
_reserved_namespace = {"__openllm_special__", "__openllm_migration__"}
@classmethod
def from_version_string(cls, s: str) -> VersionInfo:
"""Parse *s* and return a VersionInfo."""
v = s.split(".")
if len(v) == 3: v.append("final")
return cls(major=int(v[0]), minor=int(v[1]), micro=int(v[2]), releaselevel=v[3])
def _ensure_tuple(self, other: VersionInfo | tuple[t.Any, ...]) -> tuple[tuple[int, int, int, str], tuple[int, int, int, str]]:
"""Ensure *other* is a tuple of a valid length.
Returns a possibly transformed *other* and ourselves as a tuple of
the same length as *other*.
"""
if self.__class__ is other.__class__: other = attr.astuple(other)
if not isinstance(other, tuple): raise NotImplementedError
if not (1 <= len(other) <= 4): raise NotImplementedError
return attr.astuple(self)[: len(other)], other
def __eq__(self, other: t.Any) -> bool:
try: us, them = self._ensure_tuple(other)
except NotImplementedError: return NotImplemented
return us == them
def __lt__(self, other: t.Any) -> bool:
try: us, them = self._ensure_tuple(other)
except NotImplementedError: return NotImplemented
# Since alphabetically "dev0" < "final" < "post1" < "post2", we don't
# have to do anything special with releaselevel for now.
return us < them
_sentinel, _reserved_namespace = object(), {"__openllm_special__", "__openllm_migration__"}
class LazyModule(types.ModuleType):
"""Module class that surfaces all objects but only performs associated imports when the objects are requested.
@@ -86,7 +141,6 @@ class LazyModule(types.ModuleType):
self._objects = _extra_objects
self._name = name
self._import_structure = import_structure
def __dir__(self) -> list[str]:
"""Needed for autocompletion in an IDE."""
result = t.cast("list[str]", super().__dir__())
@@ -95,61 +149,62 @@ class LazyModule(types.ModuleType):
# they have been accessed or not. So we only add the
# elements of self.__all__ that are not already in the dir.
return result + [i for i in self.__all__ if i not in result]
def __getitem__(self, key: str) -> t.Any:
"""This is reserved to only internal uses and users shouldn't use this."""
if self._objects.get("__openllm_special__") is None:
raise UsageNotAllowedError(f"'{self._name}' is not allowed to be used as a dict.")
if self._objects.get("__openllm_special__") is None: raise UsageNotAllowedError(f"'{self._name}' is not allowed to be used as a dict.")
_special_mapping = self._objects.get("__openllm_special__", {})
try:
if key in _special_mapping:
return getattr(self, _special_mapping.__getitem__(key))
if key in _special_mapping: return getattr(self, _special_mapping.__getitem__(key))
raise MissingAttributesError(f"Requested '{key}' is not available in given mapping.")
except AttributeError as e:
raise KeyError(f"'{self._name}' has no attribute {_special_mapping[key]}") from e
except Exception as e:
raise KeyError(f"Failed to lookup '{key}' in '{self._name}'") from e
except AttributeError as e: raise KeyError(f"'{self._name}' has no attribute {_special_mapping[key]}") from e
except Exception as e: raise KeyError(f"Failed to lookup '{key}' in '{self._name}'") from e
def __getattr__(self, name: str) -> t.Any:
"""Equivocal __getattr__ implementation.
It checks from _objects > _modules and does it recursively.
It also contains a special case for all of the metadata information, such as __version__ and __version_info__.
"""
if name in _reserved_namespace:
raise ForbiddenAttributeError(
f"'{name}' is a reserved namespace for {self._name} and should not be access nor modified."
)
if name in _reserved_namespace: raise ForbiddenAttributeError(f"'{name}' is a reserved namespace for {self._name} and should not be access nor modified.")
dunder_to_metadata = {
"__title__": "Name",
"__copyright__": "",
"__version__": "version",
"__version_info__": "version",
"__description__": "summary",
"__uri__": "",
"__url__": "",
"__author__": "",
"__email__": "",
"__license__": "license",
"__homepage__": "",
}
if name in dunder_to_metadata:
if name not in {"__version_info__", "__copyright__", "__version__"}: warnings.warn(f"Accessing '{self._name}.{name}' is deprecated. Please consider using 'importlib.metadata' directly to query for openllm packaging metadata.", DeprecationWarning, stacklevel=2)
meta = importlib.metadata.metadata("openllm")
project_url = dict(url.split(", ") for url in meta.get_all("Project-URL"))
if name == "__license__": return "Apache-2.0"
elif name == "__copyright__": return f"Copyright (c) 2023-{time.strftime('%Y')}, Aaron Pham et al."
elif name in ("__uri__", "__url__"): return project_url["GitHub"]
elif name == "__homepage__": return project_url["Homepage"]
elif name == "__version_info__": return VersionInfo.from_version_string(meta["version"]) # similar to how attrs handle __version_info__
elif name == "__author__": return meta["Author-email"].rsplit(" ", 1)[0]
elif name == "__email__": return meta["Author-email"].rsplit("<", 1)[1][:-1]
return meta[dunder_to_metadata[name]]
if "__openllm_migration__" in self._objects:
cur_value = self._objects["__openllm_migration__"].get(name, _sentinel)
if cur_value is not _sentinel:
warnings.warn(
f"'{name}' is deprecated and will be removed in future version. Make sure to use '{cur_value}' instead",
DeprecationWarning,
stacklevel=3,
)
warnings.warn(f"'{name}' is deprecated and will be removed in future version. Make sure to use '{cur_value}' instead", DeprecationWarning, stacklevel=3)
return getattr(self, cur_value)
if name in self._objects:
return self._objects.__getitem__(name)
if name in self._modules:
value = self._get_module(name)
elif name in self._class_to_module.keys():
module = self._get_module(self._class_to_module.__getitem__(name))
value = getattr(module, name)
else:
raise AttributeError(f"module {self.__name__} has no attribute {name}")
if name in self._objects: return self._objects.__getitem__(name)
if name in self._modules: value = self._get_module(name)
elif name in self._class_to_module.keys(): value = getattr(self._get_module(self._class_to_module.__getitem__(name)), name)
else: raise AttributeError(f"module {self.__name__} has no attribute {name}")
setattr(self, name, value)
return value
def _get_module(self, module_name: str) -> types.ModuleType:
try:
return importlib.import_module("." + module_name, self.__name__)
except Exception as e:
raise RuntimeError(
f"Failed to import {self.__name__}.{module_name} because of the following error (look up to see its"
f" traceback):\n{e}"
) from e
try: return importlib.import_module("." + module_name, self.__name__)
except Exception as e: raise RuntimeError(f"Failed to import {self.__name__}.{module_name} because of the following error (look up to see its traceback):\n{e}") from e
def __reduce__(self) -> tuple[type[LazyModule], tuple[str, str | None, dict[str, list[str]]]]:
"""This is to ensure any given module is pickle-able."""
return (self.__class__, (self._name, self.__file__, self._import_structure))

View File

@@ -30,23 +30,15 @@ END_COMMENT = f"<!-- {os.path.basename(__file__)}: stop -->\n"
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
def main() -> int:
with open(os.path.join(ROOT, "pyproject.toml"), "r") as f:
deps = tomlkit.parse(f.read()).value["project"]["optional-dependencies"]
with open(os.path.join(ROOT, "README.md"), "r") as f:
readme = f.readlines()
with open(os.path.join(ROOT, "pyproject.toml"), "r") as f: deps = tomlkit.parse(f.read()).value["project"]["optional-dependencies"]
with open(os.path.join(ROOT, "README.md"), "r") as f: readme = f.readlines()
start_index, stop_index = readme.index(START_COMMENT), readme.index(END_COMMENT)
formatted: dict[
t.Literal["Model", "Architecture", "CPU", "GPU", "URL", "Installation", "Model Ids"], list[str | list[str]]
] = {
formatted: dict[t.Literal["Model", "Architecture", "URL", "Installation", "Model Ids"], list[str | list[str]]] = {
"Model": [],
"Architecture": [],
"URL": [],
"CPU": [],
"GPU": [],
"Model Ids": [],
"Installation": [],
}
@@ -56,8 +48,6 @@ def main() -> int:
formatted["Model"].append(dashed)
formatted["Architecture"].append(config_cls.__openllm_architecture__)
formatted["URL"].append(config_cls.__openllm_url__)
formatted["GPU"].append("")
formatted["CPU"].append("" if not config_cls.__openllm_requires_gpu__ else "")
formatted["Model Ids"].append(config_cls.__openllm_model_ids__)
if dashed in deps:
instruction = f'```bash\npip install "openllm[{dashed}]"\n```'
@@ -74,9 +64,7 @@ def main() -> int:
meta.extend([f"<th>{header}</th>\n" for header in formatted.keys() if header not in ("URL",)])
meta += ["</tr>\n"]
# NOTE: rows
for name, architecture, url, cpu, gpu, model_ids, installation in t.cast(
t.Iterable[t.Tuple[str, str, str, str, str, t.List[str], str]], zip(*formatted.values())
):
for name, architecture, url, model_ids, installation in t.cast(t.Iterable[t.Tuple[str, str, str, t.List[str], str]], zip(*formatted.values())):
meta += "<tr>\n"
# configure architecture URL
cfg_cls = openllm.CONFIG_MAPPING[name]
@@ -94,8 +82,6 @@ def main() -> int:
[
f"\n<td><a href={url}>{name}</a></td>\n",
arch,
f"<td>{cpu}</td>\n",
f"<td>{gpu}</td>\n",
]
)
format_with_links: list[str] = []
@@ -107,12 +93,7 @@ def main() -> int:
meta.extend(["</table>\n", "\n"])
readme = readme[:start_index] + [START_COMMENT] + meta + [END_COMMENT] + readme[stop_index + 1 :]
with open(os.path.join(ROOT, "README.md"), "w") as f:
f.writelines(readme)
with open(os.path.join(ROOT, "README.md"), "w") as f: f.writelines(readme)
return 0
if __name__ == "__main__":
raise SystemExit(main())
if __name__ == "__main__": raise SystemExit(main())