diff --git a/.git_archival.txt b/.git_archival.txt
new file mode 100644
index 00000000..8fb235d7
--- /dev/null
+++ b/.git_archival.txt
@@ -0,0 +1,4 @@
+node: $Format:%H$
+node-date: $Format:%cI$
+describe-name: $Format:%(describe:tags=true,match=*[0-9]*)$
+ref-names: $Format:%D$
diff --git a/.gitattributes b/.gitattributes
index 535c916b..19b87f7b 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -3,3 +3,5 @@ nightly-requirements-gpu.txt linguist-generated=true
tests/models/__snapshots__/* linguist-generated=true
typings/**/*.pyi linguist-generated=true
* text=auto eol=lf
+# Needed for setuptools-scm-git-archive
+.git_archival.txt export-subst
diff --git a/.github/actions/release.sh b/.github/actions/release.sh
index 38ff6f53..2635898c 100755
--- a/.github/actions/release.sh
+++ b/.github/actions/release.sh
@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-set -ex
+set -e
# Function to print script usage
print_usage() {
@@ -61,41 +61,43 @@ fi
release_package() {
local version="$1"
echo "Releasing version ${version}..."
-
jq --arg release_version "${version}" '.version = $release_version' < package.json > package.json.tmp && mv package.json.tmp package.json
-
- if [[ $release == 'patch' ]]; then
- hatch version "${version}"
- fi
-
towncrier build --yes --version "${version}"
- git add CHANGELOG.md changelog.d src/openllm/__about__.py package.json
+ git add CHANGELOG.md changelog.d package.json
git commit -S -sm "infra: prepare for release ${version} [generated] [skip ci]"
git push origin main
-
echo "Releasing tag ${version}..." && git tag -a "v${version}" -sm "Release ${version} [generated by GitHub Actions]"
git push origin "v${version}"
-
echo "Finish releasing version ${version}"
}
-echo "Cleaning previously built artifacts..." && hatch clean
+#get highest tags across all branches, not just the current branch
+version="$(git describe --tags "$(git rev-list --tags --max-count=1)")"
+VERSION="${version#v}"
+# Save the current value of IFS to restore it later
+OLD_IFS=$IFS
+IFS='.'
+# split into array
+read -ra VERSION_BITS <<< "$VERSION"
+# Restore the original value of IFS
+IFS=$OLD_IFS
+VNUM1=${VERSION_BITS[0]}
+VNUM2=${VERSION_BITS[1]}
+VNUM3=${VERSION_BITS[2]}
if [[ $release == 'major' ]]; then
- hatch version major
- CURRENT_VERSION=$(hatch version)
- release_package "${CURRENT_VERSION}"
+ VNUM1=$((VNUM1+1))
+ VNUM2=0
+ VNUM3=0
elif [[ $release == 'minor' ]]; then
- hatch version minor
- CURRENT_VERSION="$(hatch version)"
- release_package "${CURRENT_VERSION}"
+ VNUM2=$((VNUM2+1))
+ VNUM3=0
else
- CURRENT_VERSION=$(hatch version)
-
- if [[ "$CURRENT_VERSION" =~ \.dev ]]; then
- release_package "${CURRENT_VERSION%%.dev*}"
- else
- echo "Current version is not properly setup as dev version. Aborting..."
- exit 1
- fi
+ VNUM3=$((VNUM3+1))
fi
+
+echo "Commit count: $(git rev-list --count HEAD)"
+
+#create new tag
+RELEASE_VERSION="$VNUM1.$VNUM2.$VNUM3"
+release_package "${RELEASE_VERSION}"
diff --git a/.github/workflows/create-releases.yml b/.github/workflows/create-releases.yml
index 82138c37..01d3171b 100644
--- a/.github/workflows/create-releases.yml
+++ b/.github/workflows/create-releases.yml
@@ -90,8 +90,11 @@ jobs:
run: python -m build
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
+ with:
+ print-hash: true
prepare-next-dev-cycle:
needs:
+ - release
- publish-python
- binary-distribution
runs-on: ubuntu-latest
@@ -126,9 +129,19 @@ jobs:
GIT_COMMITTER_EMAIL: ${{ steps.import-gpg-key.outputs.email }}
run: |
git pull --autostash --no-edit --gpg-sign --ff origin main
- echo "Bumping version to dev..." && hatch version patch && hatch version dev
- jq --arg release_version "$(hatch version)" '.version = $release_version' < package.json > package.json.tmp && mv package.json.tmp package.json
- git add src/openllm/__about__.py package.json && git commit -S -sm "infra: bump to dev version of $(hatch version) [generated] [skip ci]"
+ SEMVER="${{ needs.release.outputs.version }}"
+ OLD_IFS=$IFS
+ IFS='.'
+ read -ra VERSION_BITS <<< "$SEMVER"
+ IFS=$OLD_IFS
+ VNUM1=${VERSION_BITS[0]}
+ VNUM2=${VERSION_BITS[1]}
+ VNUM3=${VERSION_BITS[2]}
+ VNUM3=$((VNUM3+1))
+ DEV_VERSION="$VNUM1.$VNUM2.$VNUM3.dev0"
+ echo "Bumping version to ${DEV_VERSION}..."
+ jq --arg release_version "${DEV_VERSION}" '.version = $release_version' < package.json > package.json.tmp && mv package.json.tmp package.json
+ git add package.json && git commit -S -sm "infra: bump to dev version of ${DEV_VERSION} [generated] [skip ci]"
git push origin HEAD:main
binary-distribution:
if: github.repository_owner == 'bentoml'
@@ -136,6 +149,7 @@ jobs:
name: Create binary/wheels distribution
uses: bentoml/OpenLLM/.github/workflows/binary-releases.yml@main
release-notes:
+ if: github.repository_owner == 'bentoml'
needs:
- release
- publish-python
diff --git a/.gitignore b/.gitignore
index 8af78aa2..45dff7a8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -141,3 +141,4 @@ pyapp
/target
.pdm-python
+/src/openllm/_version.py
diff --git a/README.md b/README.md
index 9ed24697..a5b6bff6 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
🦾 OpenLLM
-
+
@@ -11,6 +11,14 @@
+
+
+
+
+
+
+
+
An open platform for operating large language models (LLMs) in production.
Fine-tune, serve, deploy, and monitor any LLMs with ease.
@@ -39,10 +47,14 @@ Images or deploy as serverless endpoint via
🤖️ **Bring your own LLM**: Fine-tune any LLM to suit your needs with
`LLM.tuning()`. (Coming soon)
+
+

-## 🏃 Getting Started
+
+
+## 🏃 Getting Started
To use OpenLLM, you need to have Python 3.8 (or newer) and `pip` installed on
your system. We highly recommend using a Virtual Environment to prevent package
@@ -105,6 +117,7 @@ openllm query 'Explain to me the difference between "further" and "farther"'
Visit `http://localhost:3000/docs.json` for OpenLLM's API specification.
+OpenLLM seamlessly supports many models and their variants.
Users can also specify different variants of the model to be served, by
providing the `--model-id` argument, e.g.:
@@ -112,6 +125,10 @@ providing the `--model-id` argument, e.g.:
openllm start flan-t5 --model-id google/flan-t5-large
```
+> **Note** that `openllm` also supports all variants of fine-tuning weights, custom model path
+> as well as quantized weights for any of the supported models as long as it can be loaded with
+> the model architecture. Refer to [supported models](https://github.com/bentoml/OpenLLM/tree/main#-supported-models) section for models' architecture.
+
Use the `openllm models` command to see the list of models and their variants
supported in OpenLLM.
@@ -127,8 +144,6 @@ dependencies can be installed with the instructions below:
| Model |
Architecture |
-CPU |
-GPU |
Model Ids |
Installation |
@@ -136,8 +151,6 @@ dependencies can be installed with the instructions below:
chatglm |
ChatGLMForConditionalGeneration |
-
❌ |
-
✅ |
thudm/chatglm-6b
@@ -159,8 +172,6 @@ pip install "openllm[chatglm]"
dolly-v2 |
GPTNeoXForCausalLM |
-✅ |
-✅ |
databricks/dolly-v2-3b
@@ -180,8 +191,6 @@ pip install openllm
falcon |
FalconForCausalLM |
-❌ |
-✅ |
tiiuae/falcon-7b
@@ -202,8 +211,6 @@ pip install "openllm[falcon]"
flan-t5 |
T5ForConditionalGeneration |
-✅ |
-✅ |
google/flan-t5-small
@@ -225,8 +232,6 @@ pip install "openllm[flan-t5]"
gpt-neox |
GPTNeoXForCausalLM |
-❌ |
-✅ |
@@ -244,8 +249,6 @@ pip install openllm
| llama |
LlamaForCausalLM |
-✅ |
-✅ |
meta-llama/llama-2-70b-chat-hf
@@ -275,8 +278,6 @@ pip install "openllm[llama]"
mpt |
MPTForCausalLM |
-✅ |
-✅ |
mosaicml/mpt-7b
@@ -300,8 +301,6 @@ pip install "openllm[mpt]"
opt |
OPTForCausalLM |
-✅ |
-✅ |
facebook/opt-125m
@@ -324,8 +323,6 @@ pip install "openllm[opt]"
stablelm |
GPTNeoXForCausalLM |
-✅ |
-✅ |
stabilityai/stablelm-tuned-alpha-3b
@@ -346,8 +343,6 @@ pip install openllm
starcoder |
GPTBigCodeForCausalLM |
-❌ |
-✅ |
bigcode/starcoder
@@ -366,8 +361,6 @@ pip install "openllm[starcoder]"
baichuan |
BaiChuanForCausalLM |
-❌ |
-✅ |
baichuan-inc/baichuan-7b
@@ -596,9 +589,12 @@ client.ask_agent(
)
```
+
+

+
## 🚀 Deploying to Production
@@ -664,7 +660,6 @@ the serverless cloud for shipping and scaling AI applications.
[deployment instructions](https://docs.bentoml.com/en/latest/reference/cli.html#bentoml-deployment-create).
-
## 👥 Community
Engage with like-minded individuals passionate about LLMs, AI, and more on our
diff --git a/changelog.d/143.feature.md b/changelog.d/143.feature.md
new file mode 100644
index 00000000..7bcefd94
--- /dev/null
+++ b/changelog.d/143.feature.md
@@ -0,0 +1,5 @@
+Added installing with git-archival support
+
+```bash
+pip install "https://github.com/bentoml/openllm/archive/main.tar.gz"
+```
diff --git a/hatch.toml b/hatch.toml
index d200c3d3..b6b0fda3 100644
--- a/hatch.toml
+++ b/hatch.toml
@@ -1,5 +1,63 @@
+[metadata.hooks.fancy-pypi-readme]
+content-type = "text/markdown"
+# PyPI doesn't support the tag.
+[[metadata.hooks.fancy-pypi-readme.fragments]]
+text = """
+
+
+
+
+
+
+"""
+[[metadata.hooks.fancy-pypi-readme.fragments]]
+path = "README.md"
+end-before = "\n"
+[[metadata.hooks.fancy-pypi-readme.fragments]]
+text = """
+
+
+
+
+"""
+[[metadata.hooks.fancy-pypi-readme.fragments]]
+path = "README.md"
+start-after = "\n"
+end-before = "\n"
+[[metadata.hooks.fancy-pypi-readme.fragments]]
+text = """
+
+
+
+
+"""
+[[metadata.hooks.fancy-pypi-readme.fragments]]
+path = "README.md"
+start-after = "\n"
+[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]]
+text = """
+
+## Release Information
+
+"""
+[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]]
+path = "CHANGELOG.md"
+pattern = "\n(###.+?\n)## "
+[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]]
+text = """
+
+---
+
+[Click me for full changelog](https://github.com/bentoml/openllm/blob/main/CHANGELOG.md)
+"""
[version]
-path = "src/openllm/__about__.py"
+fallback-version = "0.0.0"
+source = "vcs"
+[build.hooks.vcs]
+version-file = "src/openllm/_version.py"
+[version.raw-options]
+git_describe_command = ["git", "describe", "--dirty", "--tags", "--long", "--first-parent"]
+local_scheme = "no-local-version"
[metadata]
allow-direct-references = true
[build.targets.wheel]
@@ -16,6 +74,8 @@ dependencies = [
"tomlkit",
# NOTE: Using under ./tools/update-readme.py
"markdown-it-py",
+ # NOTE: For fancy PyPI readme
+ "hatch-fancy-pypi-readme",
]
[envs.default.scripts]
changelog = "towncrier build --version main --draft"
diff --git a/pyproject.toml b/pyproject.toml
index ce41555d..6868fbcd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
# project.classifiers, project.dependencies, project.optional-dependencies, project.urls
[build-system]
build-backend = "hatchling.build"
-requires = ["hatchling"]
+requires = ["hatchling", "hatch-vcs", "hatch-fancy-pypi-readme"]
[project]
authors = [{ name = "Aaron Pham", email = "aarnphm@bentoml.com" }]
@@ -48,7 +48,7 @@ dependencies = [
"bitsandbytes<0.42",
]
description = 'OpenLLM: Operating LLMs in production'
-dynamic = ["version"]
+dynamic = ["version", "readme"]
keywords = [
"MLOps",
"AI",
@@ -65,7 +65,6 @@ keywords = [
]
license = "Apache-2.0"
name = "openllm"
-readme = "README.md"
requires-python = ">=3.8"
[project.scripts]
@@ -258,7 +257,6 @@ omit = [
"__pypackages__/*",
"src/openllm/playground/",
"src/openllm/__init__.py",
- "src/openllm/__about__.py",
"src/openllm/__main__.py",
"src/openllm/utils/dummy_*.py",
]
@@ -281,7 +279,6 @@ omit = [
"__pypackages__/*",
"src/openllm/playground/",
"src/openllm/__init__.py",
- "src/openllm/__about__.py",
"src/openllm/__main__.py",
"src/openllm/utils/dummy_*.py",
]
@@ -294,7 +291,6 @@ exclude = [
"__pypackages__/*",
"src/openllm/playground/",
"src/openllm/__init__.py",
- "src/openllm/__about__.py",
"src/openllm/__main__.py",
"src/openllm/utils/dummy_*.py",
]
diff --git a/src/openllm/__about__.py b/src/openllm/__about__.py
deleted file mode 100644
index 78db6420..00000000
--- a/src/openllm/__about__.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# Copyright 2023 BentoML Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-__version__ = "0.2.10.dev0"
diff --git a/src/openllm/__init__.py b/src/openllm/__init__.py
index c3867f5d..9885cfa3 100644
--- a/src/openllm/__init__.py
+++ b/src/openllm/__init__.py
@@ -16,7 +16,7 @@
An open platform for operating large language models in production. Fine-tune, serve,
deploy, and monitor any LLMs with ease.
-* Built-in support for StableLM, Llama, Dolly, Flan-T5, Vicuna
+* Built-in support for StableLM, Llama 2, Dolly, Flan-T5, Vicuna
* Option to bring your own fine-tuned LLMs
* Online Serving with HTTP, gRPC, SSE(coming soon) or custom API
* Native integration with BentoML and LangChain for custom LLM apps
@@ -24,37 +24,26 @@ deploy, and monitor any LLMs with ease.
from __future__ import annotations
import logging
import os
+import sys
import typing as t
import warnings
from . import utils as utils
-from .__about__ import __version__ as __version__
from .exceptions import MissingDependencyError
if utils.DEBUG:
utils.set_debug_mode(True)
utils.set_quiet_mode(False)
-
logging.basicConfig(level=logging.NOTSET)
else:
# configuration for bitsandbytes before import
os.environ["BITSANDBYTES_NOWELCOME"] = os.environ.get("BITSANDBYTES_NOWELCOME", "1")
# The following warnings from bitsandbytes, and probably not that important
# for users to see when DEBUG is False
- warnings.filterwarnings(
- "ignore", message="MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization"
- )
- warnings.filterwarnings(
- "ignore", message="MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization"
- )
- warnings.filterwarnings(
- "ignore",
- message=(
- "The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers and GPU quantization"
- " are unavailable."
- ),
- )
+ warnings.filterwarnings("ignore", message="MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization")
+ warnings.filterwarnings("ignore", message="MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization")
+ warnings.filterwarnings("ignore", message="The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers and GPU quantization are unavailable.")
_import_structure: dict[str, list[str]] = {
@@ -73,14 +62,7 @@ _import_structure: dict[str, list[str]] = {
"serialisation": ["ggml", "transformers"],
"cli": ["start", "start_grpc", "build", "import_model", "list_models"],
# NOTE: models
- "models.auto": [
- "AutoConfig",
- "CONFIG_MAPPING",
- "MODEL_MAPPING_NAMES",
- "MODEL_FLAX_MAPPING_NAMES",
- "MODEL_TF_MAPPING_NAMES",
- "MODEL_VLLM_MAPPING_NAMES",
- ],
+ "models.auto": ["AutoConfig", "CONFIG_MAPPING", "MODEL_MAPPING_NAMES", "MODEL_FLAX_MAPPING_NAMES", "MODEL_TF_MAPPING_NAMES", "MODEL_VLLM_MAPPING_NAMES", ],
"models.chatglm": ["ChatGLMConfig"],
"models.baichuan": ["BaichuanConfig"],
"models.dolly_v2": ["DollyV2Config"],
@@ -96,50 +78,34 @@ _import_structure: dict[str, list[str]] = {
# NOTE: torch and cpm_kernels
try:
- if not (utils.is_torch_available() and utils.is_cpm_kernels_available()):
- raise MissingDependencyError
+ if not (utils.is_torch_available() and utils.is_cpm_kernels_available()): raise MissingDependencyError
except MissingDependencyError:
from .utils import dummy_pt_and_cpm_kernels_objects
-
- _import_structure["utils.dummy_pt_and_cpm_kernels_objects"] = [
- name for name in dir(dummy_pt_and_cpm_kernels_objects) if not name.startswith("_")
- ]
+ _import_structure["utils.dummy_pt_and_cpm_kernels_objects"] = [name for name in dir(dummy_pt_and_cpm_kernels_objects) if not name.startswith("_")]
else:
_import_structure["models.chatglm"].extend(["ChatGLM"])
_import_structure["models.baichuan"].extend(["Baichuan"])
-
try:
- if not (utils.is_torch_available() and utils.is_einops_available()):
- raise MissingDependencyError
+ if not (utils.is_torch_available() and utils.is_einops_available()): raise MissingDependencyError
except MissingDependencyError:
from .utils import dummy_pt_and_einops_objects
-
- _import_structure["utils.dummy_pt_and_einops_objects"] = [
- name for name in dir(dummy_pt_and_einops_objects) if not name.startswith("_")
- ]
+ _import_structure["utils.dummy_pt_and_einops_objects"] = [name for name in dir(dummy_pt_and_einops_objects) if not name.startswith("_")]
else:
_import_structure["models.falcon"].extend(["Falcon"])
-
try:
- if not (utils.is_torch_available() and utils.is_triton_available()):
- raise MissingDependencyError
+ if not (utils.is_torch_available() and utils.is_triton_available()): raise MissingDependencyError
except MissingDependencyError:
from .utils import dummy_pt_and_triton_objects
-
- _import_structure["utils.dummy_pt_and_triton_objects"] = [
- name for name in dir(dummy_pt_and_triton_objects) if not name.startswith("_")
- ]
+ _import_structure["utils.dummy_pt_and_triton_objects"] = [name for name in dir(dummy_pt_and_triton_objects) if not name.startswith("_")]
else:
_import_structure["models.mpt"].extend(["MPT"])
try:
- if not utils.is_torch_available():
- raise MissingDependencyError
+ if not utils.is_torch_available(): raise MissingDependencyError
except MissingDependencyError:
from .utils import dummy_pt_objects
-
_import_structure["utils.dummy_pt_objects"] = [name for name in dir(dummy_pt_objects) if not name.startswith("_")]
else:
_import_structure["models.flan_t5"].extend(["FlanT5"])
@@ -152,45 +118,34 @@ else:
_import_structure["models.auto"].extend(["AutoLLM", "MODEL_MAPPING"])
try:
- if not utils.is_vllm_available():
- raise MissingDependencyError
+ if not utils.is_vllm_available(): raise MissingDependencyError
except MissingDependencyError:
from .utils import dummy_vllm_objects
-
- _import_structure["utils.dummy_vllm_objects"] = [
- name for name in dir(dummy_vllm_objects) if not name.startswith("_")
- ]
+ _import_structure["utils.dummy_vllm_objects"] = [name for name in dir(dummy_vllm_objects) if not name.startswith("_")]
else:
_import_structure["models.llama"].extend(["VLLMLlaMA"])
_import_structure["models.auto"].extend(["AutoVLLM", "MODEL_VLLM_MAPPING"])
try:
- if not utils.is_flax_available():
- raise MissingDependencyError
+ if not utils.is_flax_available(): raise MissingDependencyError
except MissingDependencyError:
from .utils import dummy_flax_objects
-
- _import_structure["utils.dummy_flax_objects"] = [
- name for name in dir(dummy_flax_objects) if not name.startswith("_")
- ]
+ _import_structure["utils.dummy_flax_objects"] = [name for name in dir(dummy_flax_objects) if not name.startswith("_")]
else:
_import_structure["models.flan_t5"].extend(["FlaxFlanT5"])
_import_structure["models.opt"].extend(["FlaxOPT"])
_import_structure["models.auto"].extend(["AutoFlaxLLM", "MODEL_FLAX_MAPPING"])
try:
- if not utils.is_tf_available():
- raise MissingDependencyError
+ if not utils.is_tf_available(): raise MissingDependencyError
except MissingDependencyError:
from .utils import dummy_tf_objects
-
_import_structure["utils.dummy_tf_objects"] = [name for name in dir(dummy_tf_objects) if not name.startswith("_")]
else:
_import_structure["models.flan_t5"].extend(["TFFlanT5"])
_import_structure["models.opt"].extend(["TFOPT"])
_import_structure["models.auto"].extend(["AutoTFLLM", "MODEL_TF_MAPPING"])
-
# declaration for OpenLLM-related modules
if t.TYPE_CHECKING:
from . import bundle as bundle
@@ -244,8 +199,7 @@ if t.TYPE_CHECKING:
# NOTE: torch and cpm_kernels
try:
- if not (utils.is_torch_available() and utils.is_cpm_kernels_available()):
- raise MissingDependencyError
+ if not (utils.is_torch_available() and utils.is_cpm_kernels_available()): raise MissingDependencyError
except MissingDependencyError:
from .utils.dummy_pt_and_cpm_kernels_objects import *
else:
@@ -254,8 +208,7 @@ if t.TYPE_CHECKING:
# NOTE: torch and einops
try:
- if not (utils.is_torch_available() and utils.is_einops_available()):
- raise MissingDependencyError
+ if not (utils.is_torch_available() and utils.is_einops_available()): raise MissingDependencyError
except MissingDependencyError:
from .utils.dummy_pt_and_einops_objects import *
else:
@@ -263,16 +216,14 @@ if t.TYPE_CHECKING:
# NOTE: torch and triton
try:
- if not (utils.is_torch_available() and utils.is_triton_available()):
- raise MissingDependencyError
+ if not (utils.is_torch_available() and utils.is_triton_available()): raise MissingDependencyError
except MissingDependencyError:
from .utils.dummy_pt_and_triton_objects import *
else:
from .models.mpt import MPT as MPT
try:
- if not utils.is_torch_available():
- raise MissingDependencyError
+ if not utils.is_torch_available(): raise MissingDependencyError
except MissingDependencyError:
from .utils.dummy_pt_objects import *
else:
@@ -287,8 +238,7 @@ if t.TYPE_CHECKING:
from .models.starcoder import StarCoder as StarCoder
try:
- if not utils.is_vllm_available():
- raise MissingDependencyError
+ if not utils.is_vllm_available(): raise MissingDependencyError
except MissingDependencyError:
from .utils.dummy_vllm_objects import *
else:
@@ -297,8 +247,7 @@ if t.TYPE_CHECKING:
from .models.llama import VLLMLlaMA as VLLMLlaMA
try:
- if not utils.is_flax_available():
- raise MissingDependencyError
+ if not utils.is_flax_available(): raise MissingDependencyError
except MissingDependencyError:
from .utils.dummy_flax_objects import *
else:
@@ -308,8 +257,7 @@ if t.TYPE_CHECKING:
from .models.opt import FlaxOPT as FlaxOPT
try:
- if not utils.is_tf_available():
- raise MissingDependencyError
+ if not utils.is_tf_available(): raise MissingDependencyError
except MissingDependencyError:
from .utils.dummy_tf_objects import *
else:
@@ -318,20 +266,10 @@ if t.TYPE_CHECKING:
from .models.flan_t5 import TFFlanT5 as TFFlanT5
from .models.opt import TFOPT as TFOPT
-else:
- import sys
-
- sys.modules[__name__] = utils.LazyModule(
- __name__,
- globals()["__file__"],
- _import_structure,
- module_spec=__spec__,
- doc=__doc__,
- extra_objects={
- "__version__": __version__,
- # The below is a special mapping that allows openllm to be used as a dictionary.
- # This is purely for convenience sake, and should not be used in performance critcal
- # code. This is also not considered as a public API.
- "__openllm_special__": {"flax": "AutoFlaxLLM", "tf": "AutoTFLLM", "pt": "AutoLLM", "vllm": "AutoVLLM"},
- },
- )
+else: sys.modules[__name__] = utils.LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__, doc=__doc__,
+ extra_objects={
+ # The below is a special mapping that allows openllm to be used as a dictionary.
+ # This is purely for convenience sake, and should not be used in performance critcal
+ # code. This is also not considered as a public API.
+ "__openllm_special__": {"flax": "AutoFlaxLLM", "tf": "AutoTFLLM", "pt": "AutoLLM", "vllm": "AutoVLLM"},
+ })
diff --git a/src/openllm/cli.py b/src/openllm/cli.py
index 119cc0b2..9655c429 100644
--- a/src/openllm/cli.py
+++ b/src/openllm/cli.py
@@ -69,7 +69,6 @@ import openllm
from bentoml._internal.configuration.containers import BentoMLContainer
from bentoml._internal.models.model import ModelStore
-from .__about__ import __version__
from .exceptions import OpenLLMException
from .utils import DEBUG
from .utils import ENV_VARS_TRUE_VALUES
@@ -403,7 +402,7 @@ class OpenLLMCommandGroup(BentoMLCommandGroup):
return wrapper
@click.group(cls=OpenLLMCommandGroup, context_settings=_CONTEXT_SETTINGS, name="openllm")
-@click.version_option(__version__, "--version", "-v")
+@click.version_option(None, "--version", "-v")
def cli() -> None:
"""\b
██████╗ ██████╗ ███████╗███╗ ██╗██╗ ██╗ ███╗ ███╗
diff --git a/src/openllm/utils/lazy.py b/src/openllm/utils/lazy.py
index 6549e891..0c9c940e 100644
--- a/src/openllm/utils/lazy.py
+++ b/src/openllm/utils/lazy.py
@@ -13,30 +13,85 @@
# limitations under the License.
from __future__ import annotations
+import functools
import importlib
import importlib.machinery
+import importlib.metadata
import itertools
import os
+import time
import types
import typing as t
import warnings
+import attr
+
from ..exceptions import ForbiddenAttributeError
from ..exceptions import OpenLLMException
class UsageNotAllowedError(OpenLLMException):
"""Raised when LazyModule.__getitem__ is forbidden."""
-
-
class MissingAttributesError(OpenLLMException):
"""Raised when given keys is not available in LazyModule special mapping."""
+@functools.total_ordering
+@attr.attrs(eq=False, order=False, slots=True, frozen=True)
+class VersionInfo:
+ """A version object that can be compared to tuple of length 1--4.
-_sentinel = object()
+ ```python
+ >>> VersionInfo(19, 1, 0, "final") <= (19, 2)
+ True
+ >>> VersionInfo(19, 1, 0, "final") < (19, 1, 1)
+ True
+ >>> vi = VersionInfo(19, 2, 0, "final")
+ >>> vi < (19, 1, 1)
+ False
+ >>> vi < (19,)
+ False
+ >>> vi == (19, 2,)
+ True
+ >>> vi == (19, 2, 1)
+ False
+ ```
+ Vendorred from attrs.
+ """
+ major: int = attr.field()
+ minor: int = attr.field()
+ micro: int = attr.field()
+ releaselevel: str = attr.field()
-_reserved_namespace = {"__openllm_special__", "__openllm_migration__"}
+ @classmethod
+ def from_version_string(cls, s: str) -> VersionInfo:
+ """Parse *s* and return a VersionInfo."""
+ v = s.split(".")
+ if len(v) == 3: v.append("final")
+ return cls(major=int(v[0]), minor=int(v[1]), micro=int(v[2]), releaselevel=v[3])
+ def _ensure_tuple(self, other: VersionInfo | tuple[t.Any, ...]) -> tuple[tuple[int, int, int, str], tuple[int, int, int, str]]:
+ """Ensure *other* is a tuple of a valid length.
+ Returns a possibly transformed *other* and ourselves as a tuple of
+ the same length as *other*.
+ """
+ if self.__class__ is other.__class__: other = attr.astuple(other)
+ if not isinstance(other, tuple): raise NotImplementedError
+ if not (1 <= len(other) <= 4): raise NotImplementedError
+ return attr.astuple(self)[: len(other)], other
+
+ def __eq__(self, other: t.Any) -> bool:
+ try: us, them = self._ensure_tuple(other)
+ except NotImplementedError: return NotImplemented
+ return us == them
+
+ def __lt__(self, other: t.Any) -> bool:
+ try: us, them = self._ensure_tuple(other)
+ except NotImplementedError: return NotImplemented
+ # Since alphabetically "dev0" < "final" < "post1" < "post2", we don't
+ # have to do anything special with releaselevel for now.
+ return us < them
+
+_sentinel, _reserved_namespace = object(), {"__openllm_special__", "__openllm_migration__"}
class LazyModule(types.ModuleType):
"""Module class that surfaces all objects but only performs associated imports when the objects are requested.
@@ -86,7 +141,6 @@ class LazyModule(types.ModuleType):
self._objects = _extra_objects
self._name = name
self._import_structure = import_structure
-
def __dir__(self) -> list[str]:
"""Needed for autocompletion in an IDE."""
result = t.cast("list[str]", super().__dir__())
@@ -95,61 +149,62 @@ class LazyModule(types.ModuleType):
# they have been accessed or not. So we only add the
# elements of self.__all__ that are not already in the dir.
return result + [i for i in self.__all__ if i not in result]
-
def __getitem__(self, key: str) -> t.Any:
"""This is reserved to only internal uses and users shouldn't use this."""
- if self._objects.get("__openllm_special__") is None:
- raise UsageNotAllowedError(f"'{self._name}' is not allowed to be used as a dict.")
+ if self._objects.get("__openllm_special__") is None: raise UsageNotAllowedError(f"'{self._name}' is not allowed to be used as a dict.")
_special_mapping = self._objects.get("__openllm_special__", {})
try:
- if key in _special_mapping:
- return getattr(self, _special_mapping.__getitem__(key))
+ if key in _special_mapping: return getattr(self, _special_mapping.__getitem__(key))
raise MissingAttributesError(f"Requested '{key}' is not available in given mapping.")
- except AttributeError as e:
- raise KeyError(f"'{self._name}' has no attribute {_special_mapping[key]}") from e
- except Exception as e:
- raise KeyError(f"Failed to lookup '{key}' in '{self._name}'") from e
-
+ except AttributeError as e: raise KeyError(f"'{self._name}' has no attribute {_special_mapping[key]}") from e
+ except Exception as e: raise KeyError(f"Failed to lookup '{key}' in '{self._name}'") from e
def __getattr__(self, name: str) -> t.Any:
"""Equivocal __getattr__ implementation.
It checks from _objects > _modules and does it recursively.
+
+ It also contains a special case for all of the metadata information, such as __version__ and __version_info__.
"""
- if name in _reserved_namespace:
- raise ForbiddenAttributeError(
- f"'{name}' is a reserved namespace for {self._name} and should not be access nor modified."
- )
+ if name in _reserved_namespace: raise ForbiddenAttributeError(f"'{name}' is a reserved namespace for {self._name} and should not be access nor modified.")
+ dunder_to_metadata = {
+ "__title__": "Name",
+ "__copyright__": "",
+ "__version__": "version",
+ "__version_info__": "version",
+ "__description__": "summary",
+ "__uri__": "",
+ "__url__": "",
+ "__author__": "",
+ "__email__": "",
+ "__license__": "license",
+ "__homepage__": "",
+ }
+ if name in dunder_to_metadata:
+ if name not in {"__version_info__", "__copyright__", "__version__"}: warnings.warn(f"Accessing '{self._name}.{name}' is deprecated. Please consider using 'importlib.metadata' directly to query for openllm packaging metadata.", DeprecationWarning, stacklevel=2)
+ meta = importlib.metadata.metadata("openllm")
+ project_url = dict(url.split(", ") for url in meta.get_all("Project-URL"))
+ if name == "__license__": return "Apache-2.0"
+ elif name == "__copyright__": return f"Copyright (c) 2023-{time.strftime('%Y')}, Aaron Pham et al."
+ elif name in ("__uri__", "__url__"): return project_url["GitHub"]
+ elif name == "__homepage__": return project_url["Homepage"]
+ elif name == "__version_info__": return VersionInfo.from_version_string(meta["version"]) # similar to how attrs handle __version_info__
+ elif name == "__author__": return meta["Author-email"].rsplit(" ", 1)[0]
+ elif name == "__email__": return meta["Author-email"].rsplit("<", 1)[1][:-1]
+ return meta[dunder_to_metadata[name]]
if "__openllm_migration__" in self._objects:
cur_value = self._objects["__openllm_migration__"].get(name, _sentinel)
if cur_value is not _sentinel:
- warnings.warn(
- f"'{name}' is deprecated and will be removed in future version. Make sure to use '{cur_value}' instead",
- DeprecationWarning,
- stacklevel=3,
- )
+ warnings.warn(f"'{name}' is deprecated and will be removed in future version. Make sure to use '{cur_value}' instead", DeprecationWarning, stacklevel=3)
return getattr(self, cur_value)
- if name in self._objects:
- return self._objects.__getitem__(name)
- if name in self._modules:
- value = self._get_module(name)
- elif name in self._class_to_module.keys():
- module = self._get_module(self._class_to_module.__getitem__(name))
- value = getattr(module, name)
- else:
- raise AttributeError(f"module {self.__name__} has no attribute {name}")
-
+ if name in self._objects: return self._objects.__getitem__(name)
+ if name in self._modules: value = self._get_module(name)
+ elif name in self._class_to_module.keys(): value = getattr(self._get_module(self._class_to_module.__getitem__(name)), name)
+ else: raise AttributeError(f"module {self.__name__} has no attribute {name}")
setattr(self, name, value)
return value
-
def _get_module(self, module_name: str) -> types.ModuleType:
- try:
- return importlib.import_module("." + module_name, self.__name__)
- except Exception as e:
- raise RuntimeError(
- f"Failed to import {self.__name__}.{module_name} because of the following error (look up to see its"
- f" traceback):\n{e}"
- ) from e
-
+ try: return importlib.import_module("." + module_name, self.__name__)
+ except Exception as e: raise RuntimeError(f"Failed to import {self.__name__}.{module_name} because of the following error (look up to see its traceback):\n{e}") from e
def __reduce__(self) -> tuple[type[LazyModule], tuple[str, str | None, dict[str, list[str]]]]:
"""This is to ensure any given module is pickle-able."""
return (self.__class__, (self._name, self.__file__, self._import_structure))
diff --git a/tools/update-readme.py b/tools/update-readme.py
index 29ee132c..6c819f82 100755
--- a/tools/update-readme.py
+++ b/tools/update-readme.py
@@ -30,23 +30,15 @@ END_COMMENT = f"\n"
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-
def main() -> int:
- with open(os.path.join(ROOT, "pyproject.toml"), "r") as f:
- deps = tomlkit.parse(f.read()).value["project"]["optional-dependencies"]
-
- with open(os.path.join(ROOT, "README.md"), "r") as f:
- readme = f.readlines()
+ with open(os.path.join(ROOT, "pyproject.toml"), "r") as f: deps = tomlkit.parse(f.read()).value["project"]["optional-dependencies"]
+ with open(os.path.join(ROOT, "README.md"), "r") as f: readme = f.readlines()
start_index, stop_index = readme.index(START_COMMENT), readme.index(END_COMMENT)
- formatted: dict[
- t.Literal["Model", "Architecture", "CPU", "GPU", "URL", "Installation", "Model Ids"], list[str | list[str]]
- ] = {
+ formatted: dict[t.Literal["Model", "Architecture", "URL", "Installation", "Model Ids"], list[str | list[str]]] = {
"Model": [],
"Architecture": [],
"URL": [],
- "CPU": [],
- "GPU": [],
"Model Ids": [],
"Installation": [],
}
@@ -56,8 +48,6 @@ def main() -> int:
formatted["Model"].append(dashed)
formatted["Architecture"].append(config_cls.__openllm_architecture__)
formatted["URL"].append(config_cls.__openllm_url__)
- formatted["GPU"].append("✅")
- formatted["CPU"].append("✅" if not config_cls.__openllm_requires_gpu__ else "❌")
formatted["Model Ids"].append(config_cls.__openllm_model_ids__)
if dashed in deps:
instruction = f'```bash\npip install "openllm[{dashed}]"\n```'
@@ -74,9 +64,7 @@ def main() -> int:
meta.extend([f"{header} | \n" for header in formatted.keys() if header not in ("URL",)])
meta += ["\n"]
# NOTE: rows
- for name, architecture, url, cpu, gpu, model_ids, installation in t.cast(
- t.Iterable[t.Tuple[str, str, str, str, str, t.List[str], str]], zip(*formatted.values())
- ):
+ for name, architecture, url, model_ids, installation in t.cast(t.Iterable[t.Tuple[str, str, str, t.List[str], str]], zip(*formatted.values())):
meta += "\n"
# configure architecture URL
cfg_cls = openllm.CONFIG_MAPPING[name]
@@ -94,8 +82,6 @@ def main() -> int:
[
f"\n| {name} | \n",
arch,
- f"{cpu} | \n",
- f"{gpu} | \n",
]
)
format_with_links: list[str] = []
@@ -107,12 +93,7 @@ def main() -> int:
meta.extend(["\n", "\n"])
readme = readme[:start_index] + [START_COMMENT] + meta + [END_COMMENT] + readme[stop_index + 1 :]
-
- with open(os.path.join(ROOT, "README.md"), "w") as f:
- f.writelines(readme)
-
+ with open(os.path.join(ROOT, "README.md"), "w") as f: f.writelines(readme)
return 0
-
-if __name__ == "__main__":
- raise SystemExit(main())
+if __name__ == "__main__": raise SystemExit(main())
|
|
|
|
|
|
|
|
|
|