diff --git a/.gitattributes b/.gitattributes index eefb4f9f..d81da4c7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -12,6 +12,7 @@ openllm-python/CHANGELOG.md linguist-generated=true # Others Formula/openllm.rb linguist-generated=true +mypy.ini linguist-generated=true * text=auto eol=lf # Needed for setuptools-scm-git-archive diff --git a/ruff.toml b/.ruff.toml similarity index 100% rename from ruff.toml rename to .ruff.toml diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 3cba191d..af195434 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -205,6 +205,12 @@ See this [docs](/.github/INFRA.md) for more information on OpenLLM's CI/CD workf ## Typing For all internal functions, it is recommended to provide type hint. For all public function definitions, it is recommended to create a stubs file `.pyi` to separate supported external API to increase code visibility. See [openllm-client's `__init__.pyi`](/openllm-client/src/openllm_client/__init__.pyi) for example. +If an internal helpers or any functions, utilities that is prefixed with `_`, then it is recommended to provide inline annotations. See [STYLE.md](./STYLE.md) to learn more about style and typing philosophy. + +If you want to update any mypy configuration, please update the [`./tools/update-mypy.py`](./tools/update-mypy.py) + +If you need to update pyright configuration, please update the [`pyrightconfig.json`](./pyrightconfig.json) + ## Install from git archive install ```bash diff --git a/README.md b/README.md index 5f701dff..a32dde26 100644 --- a/README.md +++ b/README.md @@ -503,14 +503,6 @@ openllm start tiiuae/falcon-7b --backend pt ### Quickstart - - -> **Note:** FlanT5 requires to install with: -> ```bash -> pip install "openllm[flan-t5]" -> ``` - - Run the following command to quickly spin up a FlanT5 server: ```bash @@ -869,14 +861,6 @@ TRUST_REMOTE_CODE=True openllm start mosaicml/mpt-7b --backend pt ### Quickstart - - -> **Note:** OPT requires to install with: -> ```bash -> pip install "openllm[opt]" -> ``` - - Run the following command to quickly spin up a OPT server: ```bash diff --git a/STYLE.md b/STYLE.md index e3027aa0..4bf9ebff 100644 --- a/STYLE.md +++ b/STYLE.md @@ -1,4 +1,4 @@ -## the coding style +## the coding style. This documentation serves as a brief discussion of the coding style used for OpenLLM. As you have noticed, it is different from the conventional @@ -48,14 +48,16 @@ rather the brevity of expression. (it enables [expository programming](http://archive.vector.org.uk/art10000980), combining with prototyping new ideas and logics within models implementation) -## some guidelines +## some guidelines. Though I have stopped using deterministic formatter and linter, I do understand that people have preferences for using these tools, and it plays nicely with IDE and editors. As such, I included a [`pyproject.toml`](./pyproject.toml) file that specifies some configuration for the tools that makes it compiliant with -the repository's style. In short, some of the tools include `ruff`, `yapf`, and -`interrogate`. Since we manage everything via `hatch`, refer back to the +the repository's style. In short, I'm using `ruff` for both linting and formatting, +`mypy` for type checking, and provide a `pyright` compatible configuration for those +who wishes to use VSCode or `pyright` LSP. +Since we manage everything via `hatch`, refer back to the [DEVELOPMENT.md](./DEVELOPMENT.md) for more information on this. Overtime, Python has incorporated a lot of features that supports this style of @@ -68,7 +70,7 @@ somewhat, type-safe. Since there is no real type-safety when working with Python, typing should be a best-effort to make sure we don't introduce too many bugs. -### naming +### naming. - follow Python standard for this, I don't have too much opinion on this. Just make sure that it is descriptive, and the abbreviation describes the intent of @@ -84,7 +86,7 @@ bugs. _If you have any suggestions, feel free to give it on our discord server!_ -### layout +### layout. - Preferably not a lot of whitespaces, but rather flowing. If you can fit everything for `if`, `def` or a `return` within one line, then there's no need @@ -108,7 +110,7 @@ _If you have any suggestions, feel free to give it on our discord server!_ - With regards to writing operator, try to follow the domain-specific notation. I.e: when writing pathlib, just don't add space since that is not how you - write a path in the terminal. `yapf` will try to accommodate some of this + write a path in the terminal. `ruff format` will try to accommodate some of this changes. - Avoid trailing whitespace @@ -116,9 +118,10 @@ _If you have any suggestions, feel free to give it on our discord server!_ - use array, pytorch or numpy-based indexing where possible. - If you need to export anything, put it in `__all__` or do lazy export for - type-safe checker. + type-safe checker. See [OpenLLM's `__init__.py`](./openllm-python/src/openllm/__init__.py) + for example on how to lazily export a module. -### misc +### misc. - import alias should be concise and descriptive. A convention is to always `import typing as t`. @@ -129,13 +132,54 @@ _If you have any suggestions, feel free to give it on our discord server!_ MDX and will be hosted on the GitHub Pages, so stay tuned! - If anything that is not used for runtime, just put it under `t.TYPE_CHECKING` -### note on codegen +### note on codegen. - We also do some codegen for some of the assignment functions. These logics are largely based on the work of [attrs](https://github.com/python-attrs/attrs) to ensure fast and isolated codegen in Python. If you need codegen but don't know how it works, feel free to mention @aarnphm on discord! +### types. + +I do believe in static type checking, and often times all of the code in OpenLLM are safely-types. +Types play nicely with static analysis tools, and it is a great way to catch bugs for applications +downstream. In Python, there are two ways for doing static type: + +1. Stubs files (recommended) + +If you have seen files that ends with `.pyi`, those are stubs files. Stubs files are great format +for specifying types for external API, and it is a great way to separate the implementation from +the API. For example, if you want to specify the type for `openllm_client.Client`, you can create +a stubs file `openllm_client/__init__.pyi` and specify the type there. + +A few examples include [`openllm.LLM` types definition](./openllm-python/src/openllm/_llm.pyi) versus +the [actual implementation](./openllm-python/src/openllm/_llm.py). + +> Therefore, if you touch any public API, make sure to also update and add/update the stubs files correctly. + +2. Inline annotations (encourage, not required) + +Inline annotations are great for specifying types for internal functions. For example: +```python +def _resolve_internal_converter(llm: LLM, type_: str) -> Converter: ... +``` + +This is not always required. If the internal functions are expressive enough, as well +as the variable names are descriptive to ensure there is not type abrasion, then it is not +required to specify the types. For example: +```python +import torch, torch.nn.functional as F +rms_norm = lambda tensor: torch.sqrt(F.mean(torch.square(tensor))) +``` +As you can see, the function calculate the RMSNorm of a given torch tensor. + +#### note on `TYPE_CHECKING` block. + +As you can see, we also incorporate `TYPE_CHECKING` argument into various places. +This will provides some nice in line type checking when development. Usually, I think +it is nice to have, but once the files get more and more complex, it is better to just +provide a stubs file for it. + ## FAQ ### Why not use `black`? @@ -143,6 +187,9 @@ _If you have any suggestions, feel free to give it on our discord server!_ `black` is used on our other projects, but I rather find `black` to be very verbose and overtime it is annoying to work with too much whitespaces. +Personally, I think four spaces is a mistake, as in some cases it is harder to read +with four spaces code versus 2 spaces code. + ### Why not PEP8? PEP8 is great if you are writing library such as this, but I'm going to do a lot @@ -152,7 +199,7 @@ probably not fit here, and want to explore more expressive style. ### Editor is complaining about the style, what should I do? Kindly ask you to disable linting for this project šŸ¤—. I will try my best to -accomodate with ruff and yapf, but I don't want to spend too much time on this. +accomodate for ruff and yapf, but I don't want to spend too much time on this. It is pretty stragithforward to disable it in your editor, with google. ### Style might put off new contributors? diff --git a/all.sh b/all.sh new file mode 100644 index 00000000..c9fffb04 --- /dev/null +++ b/all.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +printf "Running mirror.sh\n" +bash ./tools/mirror.sh +printf "Running update-mypy.py\n" +python ./tools/update-mypy.py +printf "Running update-config-stubs.py\n" +python ./tools/dependencies.py +printf "Running dependencies.py\n" +python ./tools/update-config-stubs.py diff --git a/hatch.toml b/hatch.toml index bcbd5119..f4427e38 100644 --- a/hatch.toml +++ b/hatch.toml @@ -1,66 +1,53 @@ [envs.default] dependencies = [ - "openllm-core @ {root:uri}/openllm-core", - "openllm-client @ {root:uri}/openllm-client", - "openllm[opt,chatglm,fine-tune] @ {root:uri}/openllm-python", - # NOTE: To run all hooks - "pre-commit", - # NOTE: towncrier for changelog - "towncrier", - # NOTE: Using under ./tools/update-optional-dependencies.py - "tomlkit", - # NOTE: For fancy PyPI readme - "hatch-fancy-pypi-readme", - # NOTE: For working with shell pipe - "plumbum", - # The below sync with mypyc deps and pre-commit mypy - "types-psutil", - "types-tabulate", - "types-PyYAML", - "types-protobuf", + "openllm-core @ {root:uri}/openllm-core", + "openllm-client @ {root:uri}/openllm-client", + "openllm[chatglm,fine-tune] @ {root:uri}/openllm-python", + # NOTE: To run all hooks + "pre-commit", + # NOTE: towncrier for changelog + "towncrier", + # NOTE: Using under ./tools/update-optional-dependencies.py + "tomlkit", + # NOTE: For fancy PyPI readme + "hatch-fancy-pypi-readme", + # NOTE: For working with shell pipe + "plumbum", + # The below sync with mypyc deps and pre-commit mypy + "types-psutil", + "types-tabulate", + "types-PyYAML", + "types-protobuf", ] [envs.default.scripts] changelog = "towncrier build --version main --draft" -check-stubs = ["./tools/update-config-stubs.py"] inplace-changelog = "towncrier build --version main --keep" -quality = [ - "./tools/dependencies.py", - "- ./tools/update-brew-tap.py", - "check-stubs", - "bash ./tools/mirror.sh", - "- pre-commit run --all-files", - "- pnpm format", -] setup = [ - "pre-commit install", - "- ln -s .python-version-default .python-version", - "curl -fsSL https://raw.githubusercontent.com/clj-kondo/clj-kondo/master/script/install-clj-kondo | bash -", -] -tool = ["quality", "bash ./clean.sh", "bash ./compile.sh {args}"] -typing = [ - "- pre-commit run mypy {args:-a}", - "- pre-commit run pyright {args:-a}", + "pre-commit install", + "- ln -s .python-version-default .python-version", ] +quality = ["bash ./all.sh", "- pre-commit run --all-files", "- pnpm format"] +tool = ["quality", "bash ./clean.sh", 'python ./cz.py'] [envs.tests] dependencies = [ - "openllm-core @ {root:uri}/openllm-core", - "openllm-client @ {root:uri}/openllm-client", - "openllm[opt,chatglm,fine-tune] @ {root:uri}/openllm-python", - # NOTE: interact with docker for container tests. - "docker", - # NOTE: Tests strategies with Hypothesis and pytest, and snapshot testing with syrupy - "coverage[toml]>=6.5", - "filelock>=3.7.1", - "pytest", - "pytest-cov", - "pytest-mock", - "pytest-randomly", - "pytest-rerunfailures", - "pytest-asyncio>=0.21.0", - "pytest-xdist[psutil]", - "trustme", - "hypothesis", - "syrupy", + "openllm-core @ {root:uri}/openllm-core", + "openllm-client @ {root:uri}/openllm-client", + "openllm[chatglm,fine-tune] @ {root:uri}/openllm-python", + # NOTE: interact with docker for container tests. + "docker", + # NOTE: Tests strategies with Hypothesis and pytest, and snapshot testing with syrupy + "coverage[toml]>=6.5", + "filelock>=3.7.1", + "pytest", + "pytest-cov", + "pytest-mock", + "pytest-randomly", + "pytest-rerunfailures", + "pytest-asyncio>=0.21.0", + "pytest-xdist[psutil]", + "trustme", + "hypothesis", + "syrupy", ] skip-install = false template = "tests" @@ -91,10 +78,10 @@ clojure = ["bash external/clojure/run-clojure-ui.sh"] detached = true [envs.ci.scripts] client-stubs = "bash openllm-client/generate-grpc-stubs" -compile = "bash ./compile.sh {args}" +compile = "bash ./tools/compile.sh {args}" recompile = ["bash ./clean.sh", "compile"] edi = "bash local.sh" lock = [ - "bash tools/lock-actions.sh", - "pushd external/clojure && pnpm i --frozen-lockfile", + "bash tools/lock-actions.sh", + "pushd external/clojure && pnpm i --frozen-lockfile", ] diff --git a/mypy.ini b/mypy.ini index e04ff06e..8a9c2a4f 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,10 +1,11 @@ +# The following is autogenerated by tools/update-mypy.py [mypy] pretty = true python_version = 3.8 show_error_codes = true strict = true -warn_unused_configs = True +warn_unused_configs = true ignore_missing_imports = true check_untyped_defs = true warn_unreachable = true -files = openllm-python/src/openllm/bundle/__init__.pyi, openllm-python/src/openllm/serialisation/__init__.pyi, openllm-client/src/openllm_client/__init__.pyi, openllm-client/src/openllm_client/_utils.pyi, openllm-python/src/openllm/__init__.pyi, openllm-client/src/openllm_client/_typing_compat.py, openllm-core/src/openllm_core/_typing_compat.py, openllm-python/src/openllm/client.pyi, openllm-python/src/openllm/bundle/_package.pyi, openllm-python/src/openllm/_runners.pyi, openllm-python/src/openllm/_quantisation.pyi, openllm-python/src/openllm/_llm.pyi, openllm-python/src/openllm/_generation.pyi, openllm-python/src/openllm/entrypoints/openai.pyi, openllm-python/src/openllm/entrypoints/__init__.pyi, openllm-python/src/openllm/entrypoints/hf.pyi, openllm-python/src/openllm/entrypoints/_openapi.pyi, openllm-python/src/openllm/entrypoints/cohere.pyi, openllm-python/src/openllm/_service_vars.pyi +files = openllm-python/src/openllm/bundle/__init__.pyi, openllm-python/src/openllm/serialisation/__init__.pyi, openllm-client/src/openllm_client/__init__.pyi, openllm-client/src/openllm_client/_utils.pyi, openllm-python/src/openllm/__init__.pyi, openllm-client/src/openllm_client/_typing_compat.py, openllm-core/src/openllm_core/_typing_compat.py, openllm-python/src/openllm/client.pyi, openllm-python/src/openllm/bundle/_package.pyi, openllm-python/src/openllm/_runners.pyi, openllm-python/src/openllm/_quantisation.pyi, openllm-python/src/openllm/_llm.pyi, openllm-python/src/openllm/_generation.pyi, openllm-python/src/openllm/entrypoints/openai.pyi, openllm-python/src/openllm/entrypoints/__init__.pyi, openllm-python/src/openllm/entrypoints/hf.pyi, openllm-python/src/openllm/entrypoints/_openapi.pyi, openllm-python/src/openllm/entrypoints/cohere.pyi, openllm-python/src/openllm/_service_vars.pyi, openllm-python/src/openllm/utils/__init__.pyi diff --git a/openllm-client/src/openllm_client/_utils.py b/openllm-client/src/openllm_client/_utils.py index b6f27d56..5bc3371d 100644 --- a/openllm-client/src/openllm_client/_utils.py +++ b/openllm-client/src/openllm_client/_utils.py @@ -2,7 +2,8 @@ import openllm_core def __dir__(): - return dir(openllm_core.utils) + coreutils = set(dir(openllm_core.utils)) | set([it for it in openllm_core.utils._extras if not it.startswith('_')]) + return sorted(list(coreutils)) def __getattr__(name): diff --git a/openllm-client/src/openllm_client/_utils.pyi b/openllm-client/src/openllm_client/_utils.pyi index 32566c9f..a66f9ce9 100644 --- a/openllm-client/src/openllm_client/_utils.pyi +++ b/openllm-client/src/openllm_client/_utils.pyi @@ -19,6 +19,7 @@ from openllm_core.utils import ( generate_hash_from_file as generate_hash_from_file, get_debug_mode as get_debug_mode, get_quiet_mode as get_quiet_mode, + getenv as getenv, in_notebook as in_notebook, lenient_issubclass as lenient_issubclass, reserve_free_port as reserve_free_port, @@ -40,7 +41,6 @@ from openllm_core.utils.import_utils import ( is_jupyter_available as is_jupyter_available, is_jupytext_available as is_jupytext_available, is_notebook_available as is_notebook_available, - is_optimum_supports_gptq as is_optimum_supports_gptq, is_peft_available as is_peft_available, is_torch_available as is_torch_available, is_transformers_available as is_transformers_available, diff --git a/openllm-core/src/openllm_core/_typing_compat.py b/openllm-core/src/openllm_core/_typing_compat.py index 7a9c4065..fbe2f719 100644 --- a/openllm-core/src/openllm_core/_typing_compat.py +++ b/openllm-core/src/openllm_core/_typing_compat.py @@ -30,10 +30,10 @@ ListStr = t.List[str] TupleAny = t.Tuple[t.Any, ...] At = t.TypeVar('At', bound=attr.AttrsInstance) -LiteralDtype = t.Literal['float16', 'float32', 'bfloat16'] +LiteralDtype = t.Literal['float16', 'float32', 'bfloat16', 'int8', 'int16'] LiteralSerialisation = t.Literal['safetensors', 'legacy'] LiteralQuantise = t.Literal['int8', 'int4', 'gptq', 'awq', 'squeezellm'] -LiteralBackend = t.Literal['pt', 'vllm', 'ggml', 'mlc'] +LiteralBackend = t.Literal['pt', 'vllm', 'ctranslate', 'ggml', 'mlc'] AdapterType = t.Literal[ 'lora', 'adalora', 'adaption_prompt', 'prefix_tuning', 'p_tuning', 'prompt_tuning', 'ia3', 'loha', 'lokr' ] diff --git a/openllm-core/src/openllm_core/config/configuration_baichuan.py b/openllm-core/src/openllm_core/config/configuration_baichuan.py index 7c3d939b..a9540f3d 100644 --- a/openllm-core/src/openllm_core/config/configuration_baichuan.py +++ b/openllm-core/src/openllm_core/config/configuration_baichuan.py @@ -24,7 +24,7 @@ class BaichuanConfig(openllm_core.LLMConfig): 'trust_remote_code': True, 'timeout': 3600000, 'url': 'https://github.com/baichuan-inc/Baichuan-7B', - 'requirements': ['cpm-kernels', 'sentencepiece'], + 'requirements': ['cpm-kernels'], 'architecture': 'BaiChuanForCausalLM', # NOTE: See the following # https://huggingface.co/baichuan-inc/Baichuan-13B-Chat/blob/19ef51ba5bad8935b03acd20ff04a269210983bc/modeling_baichuan.py#L555 diff --git a/openllm-core/src/openllm_core/config/configuration_chatglm.py b/openllm-core/src/openllm_core/config/configuration_chatglm.py index da4bf1c0..7a205349 100644 --- a/openllm-core/src/openllm_core/config/configuration_chatglm.py +++ b/openllm-core/src/openllm_core/config/configuration_chatglm.py @@ -30,7 +30,7 @@ class ChatGLMConfig(openllm_core.LLMConfig): 'trust_remote_code': True, 'timeout': 3600000, 'url': 'https://github.com/THUDM/ChatGLM-6B', - 'requirements': ['cpm-kernels', 'sentencepiece'], + 'requirements': ['cpm-kernels'], 'architecture': 'ChatGLMModel', 'default_id': 'thudm/chatglm-6b', 'model_ids': [ diff --git a/openllm-core/src/openllm_core/config/configuration_falcon.py b/openllm-core/src/openllm_core/config/configuration_falcon.py index 139be86f..8598746d 100644 --- a/openllm-core/src/openllm_core/config/configuration_falcon.py +++ b/openllm-core/src/openllm_core/config/configuration_falcon.py @@ -23,7 +23,7 @@ class FalconConfig(openllm_core.LLMConfig): 'trust_remote_code': False, 'timeout': int(36e6), 'url': 'https://falconllm.tii.ae/', - 'requirements': ['einops', 'xformers'], + 'requirements': ['xformers'], 'architecture': 'FalconForCausalLM', # NOTE: See https://huggingface.co/tiiuae/falcon-7b-instruct/discussions/1 'default_id': 'tiiuae/falcon-7b', diff --git a/openllm-core/src/openllm_core/config/configuration_llama.py b/openllm-core/src/openllm_core/config/configuration_llama.py index 897f56a2..cab33a69 100644 --- a/openllm-core/src/openllm_core/config/configuration_llama.py +++ b/openllm-core/src/openllm_core/config/configuration_llama.py @@ -48,7 +48,6 @@ class LlamaConfig(openllm_core.LLMConfig): 'name_type': 'lowercase', 'url': 'https://github.com/facebookresearch/llama', 'architecture': 'LlamaForCausalLM', - 'requirements': ['fairscale', 'sentencepiece', 'scipy'], 'default_id': 'NousResearch/llama-2-7b-hf', 'serialisation': 'safetensors', 'model_ids': [ diff --git a/openllm-core/src/openllm_core/config/configuration_mpt.py b/openllm-core/src/openllm_core/config/configuration_mpt.py index 7449f0d6..b0105eda 100644 --- a/openllm-core/src/openllm_core/config/configuration_mpt.py +++ b/openllm-core/src/openllm_core/config/configuration_mpt.py @@ -53,7 +53,7 @@ class MPTConfig(openllm_core.LLMConfig): 'trust_remote_code': True, 'url': 'https://huggingface.co/mosaicml', 'timeout': int(36e6), - 'requirements': ['triton', 'einops'], + 'requirements': ['triton'], 'architecture': 'MPTForCausalLM', # NOTE: See https://huggingface.co/TheBloke/mpt-30B-chat-GGML/discussions/4 'default_id': 'mosaicml/mpt-7b-instruct', diff --git a/openllm-core/src/openllm_core/config/configuration_phi.py b/openllm-core/src/openllm_core/config/configuration_phi.py index 1f435795..01ea62cf 100644 --- a/openllm-core/src/openllm_core/config/configuration_phi.py +++ b/openllm-core/src/openllm_core/config/configuration_phi.py @@ -25,7 +25,6 @@ class PhiConfig(openllm_core.LLMConfig): 'name_type': 'lowercase', 'url': 'https://arxiv.org/abs/2309.05463', 'architecture': 'PhiForCausalLM', - 'requirements': ['einops'], 'trust_remote_code': True, 'default_id': 'microsoft/phi-1_5', 'serialisation': 'safetensors', diff --git a/openllm-core/src/openllm_core/utils/__init__.py b/openllm-core/src/openllm_core/utils/__init__.py index 835cb195..2c7c6325 100644 --- a/openllm-core/src/openllm_core/utils/__init__.py +++ b/openllm-core/src/openllm_core/utils/__init__.py @@ -11,54 +11,30 @@ import sys import types import typing as t import uuid -from pathlib import Path +from pathlib import Path as _Path -from . import pkg +from . import import_utils as iutils, pkg from .import_utils import ENV_VARS_TRUE_VALUES as ENV_VARS_TRUE_VALUES from .lazy import LazyLoader as LazyLoader, LazyModule as LazyModule, VersionInfo as VersionInfo -from .._typing_compat import overload +from .._typing_compat import overload as _overload if t.TYPE_CHECKING: from bentoml._internal.models.model import ModelContext from bentoml._internal.types import PathType from openllm_core._typing_compat import AnyCallable +# See https://github.com/bentoml/BentoML/blob/a59750c5044bab60b6b3765e6c17041fd8984712/src/bentoml_cli/env.py#L17 DEBUG_ENV_VAR = 'BENTOML_DEBUG' QUIET_ENV_VAR = 'BENTOML_QUIET' # https://github.com/grpc/grpc/blob/master/doc/environment_variables.md _GRPC_DEBUG_ENV_VAR = 'GRPC_VERBOSITY' +WARNING_ENV_VAR = 'OPENLLM_DISABLE_WARNING' +DEV_DEBUG_VAR = 'DEBUG' +# equivocal setattr to save one lookup per assignment +_object_setattr = object.__setattr__ logger = logging.getLogger(__name__) -try: - from typing import GenericAlias as _TypingGenericAlias # type: ignore -except ImportError: - # python < 3.9 does not have GenericAlias (list[int], tuple[str, ...] and so on) - _TypingGenericAlias = () # type: ignore -if sys.version_info < (3, 10): - _WithArgsTypes = (_TypingGenericAlias,) -else: - # _GenericAlias is the actual GenericAlias implementation - _WithArgsTypes: t.Any = (t._GenericAlias, types.GenericAlias, types.UnionType) # type: ignore - -DEV_DEBUG_VAR = 'DEBUG' - - -def resolve_user_filepath(filepath: str, ctx: str | None) -> str: - # Return if filepath exist after expanduser - - _path = os.path.expanduser(os.path.expandvars(filepath)) - if os.path.exists(_path): - return os.path.realpath(_path) - - # Try finding file in ctx if provided - if ctx: - _path = os.path.expanduser(os.path.join(ctx, filepath)) - if os.path.exists(_path): - return os.path.realpath(_path) - - raise FileNotFoundError(f'file {filepath} not found') - @contextlib.contextmanager def reserve_free_port( @@ -106,100 +82,105 @@ def reserve_free_port( sock.close() -def calc_dir_size(path: PathType) -> int: - return sum(f.stat().st_size for f in Path(path).glob('**/*') if f.is_file()) - - -def set_debug_mode(enabled: bool, level: int = 1) -> None: - # monkeypatch bentoml._internal.configuration.set_debug_mode to remove unused logs - if enabled: - os.environ[DEV_DEBUG_VAR] = str(level) - os.environ[DEBUG_ENV_VAR] = str(enabled) - os.environ[_GRPC_DEBUG_ENV_VAR] = 'DEBUG' if enabled else 'ERROR' - set_disable_warnings(enabled) - - -def lenient_issubclass(cls: t.Any, class_or_tuple: type[t.Any] | tuple[type[t.Any], ...] | None) -> bool: - try: - return isinstance(cls, type) and issubclass(cls, class_or_tuple) # type: ignore[arg-type] +# fmt: off +_T=t.TypeVar('_T') +@functools.lru_cache(maxsize=1) +def _WithArgsTypes()->tuple[type[t.Any],...]: + try:from typing import GenericAlias as _TypingGenericAlias # type: ignore # noqa: I001 + except ImportError:_TypingGenericAlias = () # type: ignore # python < 3.9 does not have GenericAlias (list[int], tuple[str, ...] and so on) + # _GenericAlias is the actual GenericAlias implementation + return (_TypingGenericAlias,) if sys.version_info<(3,10) else (t._GenericAlias, types.GenericAlias, types.UnionType) # type: ignore +def lenient_issubclass(cls:t.Any,class_or_tuple:type[t.Any]|tuple[type[t.Any],...]|None)->bool: + try:return isinstance(cls,type) and issubclass(cls,class_or_tuple) except TypeError: - if isinstance(cls, _WithArgsTypes): - return False + if isinstance(cls,_WithArgsTypes()):return False raise - - -@functools.lru_cache(maxsize=128) -def generate_hash_from_file(f: str, algorithm: t.Literal['md5', 'sha1'] = 'sha1') -> str: - """Generate a hash from given file's modification time. - - Args: - f: The file to generate the hash from. - algorithm: The hashing algorithm to use. Defaults to 'sha1' (similar to how Git generate its commit hash.) - - Returns: - The generated hash. - """ - return getattr(hashlib, algorithm)(str(os.path.getmtime(resolve_filepath(f))).encode()).hexdigest() - - -def check_bool_env(env: str, default: bool = True) -> bool: - v = os.environ.get(env, str(default)).upper() - if v.isdigit(): - return bool(int(v)) # special check for digits +def resolve_user_filepath(filepath:str, ctx:str|None)->str: + _path=os.path.expanduser(os.path.expandvars(filepath)) + if os.path.exists(_path):return os.path.realpath(_path) + # Try finding file in ctx if provided + if ctx: + _path=os.path.expanduser(os.path.join(ctx, filepath)) + if os.path.exists(_path):return os.path.realpath(_path) + raise FileNotFoundError(f'file {filepath} not found') +# this is the supress version of resolve_user_filepath +def resolve_filepath(path:str,ctx:str|None=None)->str: + try:return resolve_user_filepath(path, ctx) + except FileNotFoundError:return path +def check_bool_env(env:str,default:bool=True)->bool: + v=os.getenv(env,default=str(default)).upper() + if v.isdigit():return bool(int(v)) # special check for digits return v in ENV_VARS_TRUE_VALUES - - -# equivocal setattr to save one lookup per assignment -_object_setattr = object.__setattr__ - - -def field_env_key(key: str, suffix: str | None = None) -> str: - return '_'.join(filter(None, map(str.upper, ['OPENLLM', suffix.strip('_') if suffix else '', key]))) - - -# Special debug flag controled via DEBUG -DEBUG: bool = sys.flags.dev_mode or (not sys.flags.ignore_environment and check_bool_env(DEV_DEBUG_VAR, default=False)) -# Whether to show the codenge for debug purposes -SHOW_CODEGEN: bool = DEBUG and ( - os.environ.get(DEV_DEBUG_VAR, str(0)).isdigit() and int(os.environ.get(DEV_DEBUG_VAR, str(0))) > 3 -) -# MYPY is like t.TYPE_CHECKING, but reserved for Mypy plugins -MYPY = False - - -def get_debug_mode() -> bool: - if not DEBUG and DEBUG_ENV_VAR in os.environ: - return check_bool_env(DEBUG_ENV_VAR, False) - return DEBUG - - -def get_quiet_mode() -> bool: - if QUIET_ENV_VAR in os.environ: - return check_bool_env(QUIET_ENV_VAR, False) - if DEBUG: - return False +def calc_dir_size(path:PathType)->int:return sum(f.stat().st_size for f in _Path(path).glob('**/*') if f.is_file()) +@functools.lru_cache(maxsize=128) +def generate_hash_from_file(f:str,algorithm:t.Literal['md5','sha1']='sha1')->str:return str(getattr(hashlib,algorithm)(str(os.path.getmtime(resolve_filepath(f))).encode()).hexdigest()) +def getenv(env:str,default:t.Any=None,var:t.Sequence[str]|None=None)->t.Any: + env_key={f'OPENLLM_{env.upper()}',env.upper()} + if var is not None:env_key=set(var)|env_key + def callback(k:str)->t.Any: + _var = os.getenv(k) + if _var and k.startswith('OPENLLM_') and not get_disable_warnings() and not get_quiet_mode():logger.warning("Using '%s' environment is deprecated, use '%s' instead.",k.upper(),k[8:].upper()) + return _var + return first_not_none(*(callback(k) for k in env_key),default=default) +def field_env_key(key:str,suffix:str|None=None)->str:return '_'.join(filter(None,map(str.upper,['OPENLLM',suffix.strip('_') if suffix else '',key]))) +def get_debug_mode()->bool:return check_bool_env(DEBUG_ENV_VAR,False) if (not DEBUG and DEBUG_ENV_VAR in os.environ) else DEBUG +def get_quiet_mode()->bool: + if QUIET_ENV_VAR in os.environ:return check_bool_env(QUIET_ENV_VAR, False) + if DEBUG:return False return False - - -def set_quiet_mode(enabled: bool) -> None: - # do not log setting quiet mode - os.environ[QUIET_ENV_VAR] = str(enabled) - os.environ[_GRPC_DEBUG_ENV_VAR] = 'NONE' +def get_disable_warnings()->bool:return check_bool_env(WARNING_ENV_VAR, False) +def set_disable_warnings(disable:bool=True)->None: + if get_disable_warnings():os.environ[WARNING_ENV_VAR]=str(disable) +def set_debug_mode(enabled:bool,level:int=1)->None: + if enabled:os.environ[DEV_DEBUG_VAR] = str(level) + os.environ.update({DEBUG_ENV_VAR:str(enabled),_GRPC_DEBUG_ENV_VAR:'DEBUG' if enabled else 'ERROR','CT2_VERBOSE':'3'}) set_disable_warnings(enabled) - - -WARNING_ENV_VAR = 'OPENLLM_DISABLE_WARNING' - - -def get_disable_warnings() -> bool: - if get_debug_mode(): - return False - return check_bool_env(WARNING_ENV_VAR, False) - - -def set_disable_warnings(disable: bool = True) -> None: - if get_disable_warnings(): - os.environ[WARNING_ENV_VAR] = str(disable) +def set_quiet_mode(enabled:bool)->None: + os.environ.update({QUIET_ENV_VAR:str(enabled),_GRPC_DEBUG_ENV_VAR:'NONE','CT2_VERBOSE':'-1'}) + set_disable_warnings(enabled) +def gen_random_uuid(prefix:str|None=None)->str:return '-'.join([prefix or 'openllm', str(uuid.uuid4().hex)]) +# NOTE: `compose` any number of unary functions into a single unary function +# compose(f, g, h)(x) == f(g(h(x))); compose(f, g, h)(x, y, z) == f(g(h(x, y, z))) +def compose(*funcs:AnyCallable)->AnyCallable:return functools.reduce(lambda f1,f2:lambda *args,**kwargs:f1(f2(*args,**kwargs)),funcs) +# NOTE: `apply` a transform function that is invoked on results returned from the decorated function +# apply(reversed)(func)(*args, **kwargs) == reversed(func(*args, **kwargs)) +def apply(transform:AnyCallable)->t.Callable[[AnyCallable], AnyCallable]:return lambda func:functools.wraps(func)(compose(transform,func)) +def validate_is_path(maybe_path:str)->bool:return os.path.exists(os.path.dirname(resolve_filepath(maybe_path))) +@_overload +def first_not_none(*args:_T|None,default:_T)->_T:... +@_overload +def first_not_none(*args:_T|None)->_T|None:... +def first_not_none(*args:_T|None,default:_T|None=None)->_T|None:return next((arg for arg in args if arg is not None),default) +def generate_context(framework_name:str)->ModelContext: + from bentoml._internal.models.model import ModelContext + framework_versions={'transformers':pkg.get_pkg_version('transformers'),'safetensors':pkg.get_pkg_version('safetensors'),'optimum':pkg.get_pkg_version('optimum'),'accelerate':pkg.get_pkg_version('accelerate')} + if iutils.is_torch_available():framework_versions['torch']=pkg.get_pkg_version('torch') + if iutils.is_ctranslate_available():framework_versions['ctranslate2']=pkg.get_pkg_version('ctranslate2') + if iutils.is_vllm_available():framework_versions['vllm']=pkg.get_pkg_version('vllm') + if iutils.is_autoawq_available():framework_versions['autoawq']=pkg.get_pkg_version('autoawq') + if iutils.is_autogptq_available():framework_versions['autogptq']=pkg.get_pkg_version('auto_gptq') + if iutils.is_bentoml_available():framework_versions['bentoml']=pkg.get_pkg_version('bentoml') + return ModelContext(framework_name=framework_name,framework_versions=framework_versions) +@functools.lru_cache(maxsize=1) +def in_notebook()->bool: + try:from IPython.core.getipython import get_ipython; return 'IPKernelApp' in get_ipython().config # noqa: I001 + except (ImportError, AttributeError):return False +# Used to filter out INFO log +class InfoFilter(logging.Filter): + def filter(self,record:logging.LogRecord)->bool:return logging.INFO<=record.levelnotuple[dict[str,t.Any],dict[str, t.Any]]: + tokenizer_attrs = {k[len(_TOKENIZER_PREFIX):]:v for k,v in attrs.items() if k.startswith(_TOKENIZER_PREFIX)} + for k in tuple(attrs.keys()): + if k.startswith(_TOKENIZER_PREFIX):del attrs[k] + return attrs,tokenizer_attrs +# Special debug flag controled via DEBUG +DEBUG=sys.flags.dev_mode or (not sys.flags.ignore_environment and check_bool_env(DEV_DEBUG_VAR, default=False)) +# Whether to show the codenge for debug purposes +SHOW_CODEGEN=DEBUG and (os.environ.get(DEV_DEBUG_VAR,str(0)).isdigit() and int(os.environ.get(DEV_DEBUG_VAR,str(0)))>3) +# MYPY is like t.TYPE_CHECKING, but reserved for Mypy plugins +MYPY=False +# fmt: on class ExceptionFilter(logging.Filter): @@ -226,15 +207,6 @@ class ExceptionFilter(logging.Filter): return True -class InfoFilter(logging.Filter): - def filter(self, record: logging.LogRecord) -> bool: - return logging.INFO <= record.levelno < logging.WARNING - - -def gen_random_uuid(prefix: str | None = None) -> str: - return '-'.join([prefix or 'openllm', str(uuid.uuid4().hex)]) - - _LOGGING_CONFIG: dict[str, t.Any] = { 'version': 1, 'disable_existing_loggers': True, @@ -259,10 +231,6 @@ _LOGGING_CONFIG: dict[str, t.Any] = { def configure_logging() -> None: - """Configure logging for OpenLLM. - - Behaves similar to how BentoML loggers are being configured. - """ if get_quiet_mode(): _LOGGING_CONFIG['loggers']['openllm']['level'] = logging.ERROR _LOGGING_CONFIG['loggers']['bentoml']['level'] = logging.ERROR @@ -280,135 +248,16 @@ def configure_logging() -> None: logging.config.dictConfig(_LOGGING_CONFIG) -@functools.lru_cache(maxsize=1) -def in_notebook() -> bool: - try: - from IPython.core.getipython import get_ipython - - if t.TYPE_CHECKING: - from IPython.core.interactiveshell import InteractiveShell - return 'IPKernelApp' in t.cast( - 'dict[str, t.Any]', t.cast(t.Callable[[], 'InteractiveShell'], get_ipython)().config - ) - except (ImportError, AttributeError): - return False - - -class suppress(contextlib.suppress, contextlib.ContextDecorator): - """A version of contextlib.suppress with decorator support. - - >>> @suppress(KeyError) - ... def key_error(): - ... {}[''] - >>> key_error() - """ - - -def compose(*funcs: AnyCallable) -> AnyCallable: - """Compose any number of unary functions into a single unary function. - - >>> import textwrap - >>> expected = str.strip(textwrap.dedent(compose.__doc__)) - >>> strip_and_dedent = compose(str.strip, textwrap.dedent) - >>> strip_and_dedent(compose.__doc__) == expected - True - - Compose also allows the innermost function to take arbitrary arguments. - - >>> round_three = lambda x: round(x, ndigits=3) - >>> f = compose(round_three, int.__truediv__) - >>> [f(3*x, x+1) for x in range(1,10)] - [1.5, 2.0, 2.25, 2.4, 2.5, 2.571, 2.625, 2.667, 2.7] - """ - - def compose_two(f1: AnyCallable, f2: AnyCallable) -> AnyCallable: - return lambda *args, **kwargs: f1(f2(*args, **kwargs)) - - return functools.reduce(compose_two, funcs) - - -def apply(transform: AnyCallable) -> t.Callable[[AnyCallable], AnyCallable]: - """Decorate a function with a transform function that is invoked on results returned from the decorated function. - - ```python - @apply(reversed) - def get_numbers(start): - "doc for get_numbers" - return range(start, start+3) - list(get_numbers(4)) - # [6, 5, 4] - ``` - ```python - get_numbers.__doc__ - # 'doc for get_numbers' - ``` - """ - return lambda func: functools.wraps(func)(compose(transform, func)) - - -T = t.TypeVar('T') -K = t.TypeVar('K') - - -@overload -def first_not_none(*args: T | None, default: T) -> T: ... - - -@overload -def first_not_none(*args: T | None) -> T | None: ... - - -def first_not_none(*args: T | None, default: T | None = None) -> T | None: - return next((arg for arg in args if arg is not None), default) - - -def resolve_filepath(path: str, ctx: str | None = None) -> str: - """Resolve a file path to an absolute path, expand user and environment variables.""" - try: - return resolve_user_filepath(path, ctx) - except FileNotFoundError: - return path - - -def validate_is_path(maybe_path: str) -> bool: - return os.path.exists(os.path.dirname(resolve_filepath(maybe_path))) - - -def generate_context(framework_name: str) -> ModelContext: - import openllm_core - from bentoml._internal.models.model import ModelContext - - framework_versions = {'transformers': pkg.get_pkg_version('transformers')} - if openllm_core.utils.is_torch_available(): - framework_versions['torch'] = pkg.get_pkg_version('torch') - return ModelContext(framework_name=framework_name, framework_versions=framework_versions) - - -_TOKENIZER_PREFIX = '_tokenizer_' - - -def flatten_attrs(**attrs: t.Any) -> tuple[dict[str, t.Any], dict[str, t.Any]]: - """Normalize the given attrs to a model and tokenizer kwargs accordingly.""" - tokenizer_attrs = {k[len(_TOKENIZER_PREFIX) :]: v for k, v in attrs.items() if k.startswith(_TOKENIZER_PREFIX)} - for k in tuple(attrs.keys()): - if k.startswith(_TOKENIZER_PREFIX): - del attrs[k] - return attrs, tokenizer_attrs - - -# NOTE: The set marks contains a set of modules name -# that are available above and are whitelisted -# to be included in the extra_objects map. -_whitelist_modules = {'pkg'} - # XXX: define all classes, functions import above this line # since _extras will be the locals() import from this file. _extras: dict[str, t.Any] = { - k: v - for k, v in locals().items() - if k in _whitelist_modules or (not isinstance(v, types.ModuleType) and not k.startswith('_')) + **{ + k: v + for k, v in locals().items() + if k in {'pkg'} or (not isinstance(v, types.ModuleType) and k not in {'annotations'} and not k.startswith('_')) + }, + '__openllm_migration__': {'bentoml_cattr': 'converter'}, } -_extras['__openllm_migration__'] = {'bentoml_cattr': 'converter'} __lazy = LazyModule( __name__, globals()['__file__'], @@ -431,8 +280,8 @@ __lazy = LazyModule( 'is_notebook_available', 'is_autogptq_available', 'is_grpc_available', + 'is_ctranslate_available', 'is_transformers_available', - 'is_optimum_supports_gptq', 'is_autoawq_available', 'is_bentoml_available', ], @@ -444,7 +293,6 @@ __dir__ = __lazy.__dir__ __getattr__ = __lazy.__getattr__ if t.TYPE_CHECKING: - # NOTE: The following exports useful utils from bentoml from . import analytics as analytics, codegen as codegen, dantic as dantic, serde as serde from .import_utils import ( OPTIONAL_DEPENDENCIES as OPTIONAL_DEPENDENCIES, @@ -452,11 +300,11 @@ if t.TYPE_CHECKING: is_autogptq_available as is_autogptq_available, is_bentoml_available as is_bentoml_available, is_bitsandbytes_available as is_bitsandbytes_available, + is_ctranslate_available as is_ctranslate_available, is_grpc_available as is_grpc_available, is_jupyter_available as is_jupyter_available, is_jupytext_available as is_jupytext_available, is_notebook_available as is_notebook_available, - is_optimum_supports_gptq as is_optimum_supports_gptq, is_peft_available as is_peft_available, is_torch_available as is_torch_available, is_transformers_available as is_transformers_available, diff --git a/openllm-core/src/openllm_core/utils/dantic.py b/openllm-core/src/openllm_core/utils/dantic.py index 13a20758..2369843e 100644 --- a/openllm-core/src/openllm_core/utils/dantic.py +++ b/openllm-core/src/openllm_core/utils/dantic.py @@ -1,5 +1,3 @@ -"""An interface provides the best of pydantic and attrs.""" - from __future__ import annotations import functools import importlib diff --git a/openllm-core/src/openllm_core/utils/import_utils.py b/openllm-core/src/openllm_core/utils/import_utils.py index dd92071b..2d6323af 100644 --- a/openllm-core/src/openllm_core/utils/import_utils.py +++ b/openllm-core/src/openllm_core/utils/import_utils.py @@ -1,126 +1,51 @@ -from __future__ import annotations +# fmt: off import importlib import importlib.metadata import importlib.util -import logging import os -import typing as t -if t.TYPE_CHECKING: - from collections import OrderedDict - - BackendOrderedDict = OrderedDict[str, t.Tuple[t.Callable[[], bool], str]] - -logger = logging.getLogger(__name__) -OPTIONAL_DEPENDENCIES = { - 'opt', - 'flan-t5', - 'vllm', - 'fine-tune', - 'ggml', - 'agents', - 'openai', - 'playground', - 'gptq', - 'grpc', - 'awq', -} -ENV_VARS_TRUE_VALUES = {'1', 'ON', 'YES', 'TRUE'} -ENV_VARS_TRUE_AND_AUTO_VALUES = ENV_VARS_TRUE_VALUES.union({'AUTO'}) -USE_TORCH = os.environ.get('USE_TORCH', 'AUTO').upper() -USE_VLLM = os.environ.get('USE_VLLM', 'AUTO').upper() - - -def _is_package_available(package: str) -> bool: - _package_available = importlib.util.find_spec(package) is not None +OPTIONAL_DEPENDENCIES={'vllm','fine-tune','ggml','ctranslate','agents','openai','playground','gptq','grpc','awq'} +ENV_VARS_TRUE_VALUES={'1','ON','YES','TRUE'} +ENV_VARS_TRUE_AND_AUTO_VALUES=ENV_VARS_TRUE_VALUES.union({'AUTO'}) +USE_VLLM=os.getenv('USE_VLLM','AUTO').upper() +def _is_package_available(package:str)->bool: + _package_available=importlib.util.find_spec(package) is not None if _package_available: - try: - importlib.metadata.version(package) - except importlib.metadata.PackageNotFoundError: - _package_available = False + try:importlib.metadata.version(package) + except importlib.metadata.PackageNotFoundError:_package_available=False return _package_available - - -_torch_available = importlib.util.find_spec('torch') is not None -_vllm_available = importlib.util.find_spec('vllm') is not None -_transformers_available = _is_package_available('transformers') -_grpc_available = importlib.util.find_spec('grpc') is not None -_bentoml_available = _is_package_available('bentoml') -_peft_available = _is_package_available('peft') -_bitsandbytes_available = _is_package_available('bitsandbytes') -_jupyter_available = _is_package_available('jupyter') -_jupytext_available = _is_package_available('jupytext') -_notebook_available = _is_package_available('notebook') -_autogptq_available = _is_package_available('auto_gptq') -_autoawq_available = importlib.util.find_spec('awq') is not None - - -def is_bentoml_available() -> bool: - return _bentoml_available - - -def is_transformers_available() -> bool: - return _transformers_available - - -def is_grpc_available() -> bool: - return _grpc_available - - -def is_optimum_supports_gptq() -> bool: - from . import pkg - - return pkg.pkg_version_info('optimum')[:2] >= (0, 12) - - -def is_jupyter_available() -> bool: - return _jupyter_available - - -def is_jupytext_available() -> bool: - return _jupytext_available - - -def is_notebook_available() -> bool: - return _notebook_available - - -def is_peft_available() -> bool: - return _peft_available - - -def is_bitsandbytes_available() -> bool: - return _bitsandbytes_available - - -def is_autogptq_available() -> bool: - return _autogptq_available - - -def is_torch_available() -> bool: - global _torch_available - if USE_TORCH in ENV_VARS_TRUE_AND_AUTO_VALUES and _torch_available: - try: - importlib.metadata.version('torch') - except importlib.metadata.PackageNotFoundError: - _torch_available = False - return _torch_available - - -def is_autoawq_available() -> bool: +_ctranslate_available=importlib.util.find_spec('ctranslate2') is not None +_vllm_available=importlib.util.find_spec('vllm') is not None +_grpc_available=importlib.util.find_spec('grpc') is not None +_autoawq_available=importlib.util.find_spec('awq') is not None +_torch_available=_is_package_available('torch') +_transformers_available=_is_package_available('transformers') +_bentoml_available=_is_package_available('bentoml') +_peft_available=_is_package_available('peft') +_bitsandbytes_available=_is_package_available('bitsandbytes') +_jupyter_available=_is_package_available('jupyter') +_jupytext_available=_is_package_available('jupytext') +_notebook_available=_is_package_available('notebook') +_autogptq_available=_is_package_available('auto_gptq') +def is_ctranslate_available()->bool:return _ctranslate_available +def is_bentoml_available()->bool:return _bentoml_available # needs this since openllm-core doesn't explicitly depends on bentoml +def is_transformers_available()->bool:return _transformers_available # needs this since openllm-core doesn't explicitly depends on transformers +def is_grpc_available()->bool:return _grpc_available +def is_jupyter_available()->bool:return _jupyter_available +def is_jupytext_available()->bool:return _jupytext_available +def is_notebook_available()->bool:return _notebook_available +def is_peft_available()->bool:return _peft_available +def is_bitsandbytes_available()->bool:return _bitsandbytes_available +def is_autogptq_available()->bool:return _autogptq_available +def is_torch_available()->bool:return _torch_available +def is_autoawq_available()->bool: global _autoawq_available - try: - importlib.metadata.version('autoawq') - except importlib.metadata.PackageNotFoundError: - _autoawq_available = False + try:importlib.metadata.version('autoawq') + except importlib.metadata.PackageNotFoundError:_autoawq_available=False return _autoawq_available - - -def is_vllm_available() -> bool: +def is_vllm_available()->bool: global _vllm_available - if USE_VLLM in ENV_VARS_TRUE_AND_AUTO_VALUES and _vllm_available: - try: - importlib.metadata.version('vllm') - except importlib.metadata.PackageNotFoundError: - _vllm_available = False + if USE_VLLM in ENV_VARS_TRUE_AND_AUTO_VALUES or _vllm_available: + try:importlib.metadata.version('vllm') + except importlib.metadata.PackageNotFoundError:_vllm_available=False return _vllm_available diff --git a/openllm-core/src/openllm_core/utils/lazy.py b/openllm-core/src/openllm_core/utils/lazy.py index 6e92a20e..3c7d7bb3 100644 --- a/openllm-core/src/openllm_core/utils/lazy.py +++ b/openllm-core/src/openllm_core/utils/lazy.py @@ -90,6 +90,10 @@ class VersionInfo: micro: int = attr.field() releaselevel: str = attr.field() + @classmethod + def from_package(cls, package: str) -> VersionInfo: + return cls.from_version_string(importlib.metadata.version(package)) + @classmethod def from_version_string(cls, s: str) -> VersionInfo: v = s.split('.') diff --git a/openllm-python/README.md b/openllm-python/README.md index 1b79c6b9..a32dde26 100644 --- a/openllm-python/README.md +++ b/openllm-python/README.md @@ -68,10 +68,32 @@ OpenLLM is designed for AI application developers working to build production-re +## šŸ’¾ TL/DR + +For starter, we provide two ways to quickly try out OpenLLM: +### Jupyter Notebooks + +Try this [OpenLLM tutorial in Google Colab: Serving Llama 2 with OpenLLM](https://colab.research.google.com/github/bentoml/OpenLLM/blob/main/examples/llama2.ipynb). + +### Docker + +We provide a docker container that helps you start running OpenLLM: + +```bash +docker run --rm -it -p 3000:3000 ghcr.io/bentoml/openllm start facebook/opt-1.3b --backend pt +``` + +> [!NOTE] +> Given you have access to GPUs and have setup [nvidia-docker](https://github.com/NVIDIA/nvidia-container-toolkit), you can additionally pass in `--gpus` +> to use GPU for faster inference and optimization +>```bash +> docker run --rm --gpus all -p 3000:3000 -it ghcr.io/bentoml/openllm start HuggingFaceH4/zephyr-7b-beta --backend vllm +> ``` + + ## šŸƒ Get started -To quickly get started with OpenLLM, follow the instructions below or try this [OpenLLM tutorial in Google Colab: Serving Llama 2 with OpenLLM](https://colab.research.google.com/github/bentoml/OpenLLM/blob/main/examples/openllm-llama2-demo/openllm_llama2_demo.ipynb). - +The following provides instructions for how to get started with OpenLLM locally. ### Prerequisites You have installed Python 3.8 (or later) andĀ `pip`. We highly recommend using a [Virtual Environment](https://docs.python.org/3/library/venv.html) to prevent package conflicts. @@ -124,7 +146,7 @@ Extensions: playground OpenLLM Playground. ``` -### Start an LLM server +### Start a LLM server OpenLLM allows you to quickly spin up an LLM server using `openllm start`. For example, to start anĀ [OPT](https://huggingface.co/docs/transformers/model_doc/opt)Ā server, run the following: @@ -481,14 +503,6 @@ openllm start tiiuae/falcon-7b --backend pt ### Quickstart - - -> **Note:** FlanT5 requires to install with: -> ```bash -> pip install "openllm[flan-t5]" -> ``` - - Run the following command to quickly spin up a FlanT5 server: ```bash @@ -847,14 +861,6 @@ TRUST_REMOTE_CODE=True openllm start mosaicml/mpt-7b --backend pt ### Quickstart - - -> **Note:** OPT requires to install with: -> ```bash -> pip install "openllm[opt]" -> ``` - - Run the following command to quickly spin up a OPT server: ```bash diff --git a/openllm-python/pyproject.toml b/openllm-python/pyproject.toml index 3a042adc..5e531e36 100644 --- a/openllm-python/pyproject.toml +++ b/openllm-python/pyproject.toml @@ -46,6 +46,9 @@ dependencies = [ "optimum>=1.12.0", "accelerate", "ghapi", + "einops", + "sentencepiece", + "scipy", "build[virtualenv]<1", "click>=8.1.3", "cuda-python;platform_system!=\"Darwin\"", @@ -70,6 +73,8 @@ keywords = [ "StableLM", "Alpaca", "PyTorch", + "Mistral", + "vLLM", "Transformers", ] license = "Apache-2.0" @@ -99,22 +104,19 @@ Twitter = "https://twitter.com/bentomlai" agents = ["transformers[agents]>=4.35.0", "diffusers", "soundfile"] all = ["openllm[full]"] awq = ["autoawq"] -baichuan = ["cpm-kernels", "sentencepiece"] -chatglm = ["cpm-kernels", "sentencepiece"] -falcon = ["einops", "xformers"] -fine-tune = ["peft>=0.6.0", "datasets", "trl", "scipy", "huggingface-hub"] -flan-t5 = ["transformers>=4.35.0"] +baichuan = ["cpm-kernels"] +chatglm = ["cpm-kernels"] +ctranslate = ["ctranslate2"] +falcon = ["xformers"] +fine-tune = ["peft>=0.6.0", "datasets", "trl", "huggingface-hub"] full = [ - "openllm[agents,awq,baichuan,chatglm,falcon,fine-tune,flan-t5,ggml,gptq,grpc,llama,mpt,openai,opt,phi,playground,starcoder,vllm]", + "openllm[agents,awq,baichuan,chatglm,ctranslate,falcon,fine-tune,ggml,gptq,grpc,mpt,openai,playground,starcoder,vllm]", ] ggml = ["ctransformers"] gptq = ["auto-gptq[triton]>=0.4.2", "optimum>=1.12.0"] -grpc = ["openllm-client[grpc]"] -llama = ["fairscale", "sentencepiece", "scipy"] -mpt = ["triton", "einops"] +grpc = ["bentoml[grpc]>=1.1.9", "openllm-client[grpc]>=0.4.15"] +mpt = ["triton"] openai = ["openai[datalib]>=1", "tiktoken"] -opt = ["transformers>=4.35.0"] -phi = ["einops"] playground = ["jupyter", "notebook", "ipython", "jupytext", "nbformat"] starcoder = ["bitsandbytes"] vllm = ["vllm>=0.2.1post1", "ray"] diff --git a/openllm-python/src/openllm/_llm.py b/openllm-python/src/openllm/_llm.py index ed086894..4f44084c 100644 --- a/openllm-python/src/openllm/_llm.py +++ b/openllm-python/src/openllm/_llm.py @@ -42,6 +42,7 @@ from openllm_core.utils import ( generate_hash_from_file, get_disable_warnings, get_quiet_mode, + getenv, is_peft_available, is_vllm_available, resolve_filepath, @@ -52,6 +53,7 @@ from .exceptions import ForbiddenAttributeError, OpenLLMException from .serialisation.constants import PEFT_CONFIG_NAME if t.TYPE_CHECKING: + import torch import transformers from peft.config import PeftConfig @@ -109,8 +111,8 @@ def _torch_dtype_mapping(): return { 'half': torch.float16, - 'float16': torch.float16, 'float': torch.float32, + 'float16': torch.float16, 'float32': torch.float32, 'bfloat16': torch.bfloat16, } @@ -132,7 +134,8 @@ class LLM(t.Generic[M, T], ReprMixin): _prompt_template: PromptTemplate | None _system_message: str | None - __llm_torch_dtype__: LiteralDtype | t.Literal['auto', 'half', 'float'] = 'auto' + __llm_dtype__: LiteralDtype | t.Literal['auto', 'half', 'float'] = 'auto' + __llm_torch_dtype__: 'torch.dtype' = None __llm_config__: LLMConfig | None = None __llm_backend__: LiteralBackend = None # type: ignore __llm_quantization_config__: transformers.BitsAndBytesConfig | transformers.GPTQConfig | transformers.AwqConfig | None = None @@ -158,16 +161,23 @@ class LLM(t.Generic[M, T], ReprMixin): serialisation='safetensors', trust_remote_code=False, embedded=False, - torch_dtype='auto', + dtype='auto', low_cpu_mem_usage=True, **attrs, ): + # backward compatible + torch_dtype = attrs.pop('torch_dtype', None) + if torch_dtype is not None: + logger.warning( + 'The argument "torch_dtype" is deprecated and will be removed in the future. Please use "dtype" instead.' + ) + dtype = torch_dtype _local = False if validate_is_path(model_id): model_id, _local = resolve_filepath(model_id), True backend = first_not_none(backend, os.getenv('OPENLLM_BACKEND'), default='vllm' if is_vllm_available() else 'pt') - torch_dtype = first_not_none(os.getenv('TORCH_DTYPE'), torch_dtype, default='auto') - quantize = first_not_none(quantize, os.getenv('OPENLLM_QUANTIZE'), default=None) + dtype = first_not_none(getenv('dtype', default=dtype, var=['TORCH_DTYPE']), default='auto') + quantize = first_not_none(getenv('quantize', default=quantize, var=['QUANITSE']), default=None) attrs.update({'low_cpu_mem_usage': low_cpu_mem_usage}) # parsing tokenizer and model kwargs, as the hierarchy is param pass > default model_attrs, tokenizer_attrs = flatten_attrs(**attrs) @@ -189,7 +199,7 @@ class LLM(t.Generic[M, T], ReprMixin): system_message=system_message, LLM__model_attrs=model_attrs, LLM__tokenizer_attrs=tokenizer_attrs, - llm_torch_dtype__=torch_dtype.lower(), + llm_dtype__=torch_dtype.lower(), llm_backend__=backend, llm_config__=llm_config, llm_trust_remote_code__=trust_remote_code, @@ -222,15 +232,15 @@ class LLM(t.Generic[M, T], ReprMixin): config_dtype = getattr(hf_config, 'torch_dtype', None) if config_dtype is None: config_dtype = torch.float32 - if self.__llm_torch_dtype__ == 'auto': + if self.__llm_dtype__ == 'auto': if config_dtype == torch.float32: torch_dtype = torch.float16 # following common practice else: torch_dtype = config_dtype else: - if self.__llm_torch_dtype__ not in _torch_dtype_mapping(): - raise ValueError(f"Unknown dtype '{self.__llm_torch_dtype__}'") - torch_dtype = _torch_dtype_mapping()[self.__llm_torch_dtype__] + if self.__llm_dtype__ not in _torch_dtype_mapping(): + raise ValueError(f"Unknown dtype '{self.__llm_dtype__}'") + torch_dtype = _torch_dtype_mapping()[self.__llm_dtype__] self.__llm_torch_dtype__ = torch_dtype return self.__llm_torch_dtype__ diff --git a/openllm-python/src/openllm/_llm.pyi b/openllm-python/src/openllm/_llm.pyi index 18e974d3..8a96e066 100644 --- a/openllm-python/src/openllm/_llm.pyi +++ b/openllm-python/src/openllm/_llm.pyi @@ -32,7 +32,8 @@ class IdentifyingParams(TypedDict): model_id: str ResolvedAdapterMap = Dict[AdapterType, Dict[str, Tuple[PeftConfig, str]]] -Dtype = Union[LiteralDtype, Literal['auto', 'half', 'float']] +CTranslateDtype = Literal['int8_float32', 'int8_float16', 'int8_bfloat16'] +Dtype = Union[LiteralDtype, CTranslateDtype, Literal['auto', 'half', 'float']] @attr.define(slots=True, repr=False, init=False) class LLM(Generic[M, T]): @@ -50,7 +51,8 @@ class LLM(Generic[M, T]): _prompt_template: Optional[PromptTemplate] _system_message: Optional[str] - __llm_torch_dtype__: Dtype = ... + __llm_dtype__: Dtype = ... + __llm_torch_dtype__: Optional[torch.dtype] = ... __llm_config__: Optional[LLMConfig] = ... __llm_backend__: LiteralBackend = ... __llm_quantization_config__: Optional[QuantizationConfig] = ... diff --git a/openllm-python/src/openllm/_quantisation.py b/openllm-python/src/openllm/_quantisation.py index 96a799e2..9224f17e 100644 --- a/openllm-python/src/openllm/_quantisation.py +++ b/openllm-python/src/openllm/_quantisation.py @@ -1,12 +1,7 @@ from __future__ import annotations from openllm_core.exceptions import MissingDependencyError -from openllm_core.utils import ( - is_autoawq_available, - is_autogptq_available, - is_bitsandbytes_available, - is_optimum_supports_gptq, -) +from openllm_core.utils import is_autoawq_available, is_autogptq_available, is_bitsandbytes_available def infer_quantisation_config(llm, quantise, **attrs): @@ -98,7 +93,7 @@ def infer_quantisation_config(llm, quantise, **attrs): elif quantise == 'int4': quantisation_config = create_int4_config() elif quantise == 'gptq': - if not is_autogptq_available() or not is_optimum_supports_gptq(): + if not is_autogptq_available(): raise MissingDependencyError( "GPTQ requires 'auto-gptq' and 'optimum>=0.12' to be installed. Do it with 'pip install \"openllm[gptq]\"'" ) diff --git a/openllm-python/src/openllm/protocol/__init__.py b/openllm-python/src/openllm/protocol/__init__.py index d4674396..8b6d271e 100644 --- a/openllm-python/src/openllm/protocol/__init__.py +++ b/openllm-python/src/openllm/protocol/__init__.py @@ -1,18 +1,13 @@ -"""Protocol-related packages for all library integrations. - -Currently support OpenAI compatible API. -""" - from __future__ import annotations import os import typing as t from openllm_core.utils import LazyModule -_import_structure: dict[str, list[str]] = {'openai': []} +_import_structure: dict[str, list[str]] = {'openai': [], 'cohere': [], 'hf': []} if t.TYPE_CHECKING: - from . import openai as openai + from . import cohere as cohere, hf as hf, openai as openai __lazy = LazyModule(__name__, os.path.abspath('__file__'), _import_structure) __all__ = __lazy.__all__ diff --git a/openllm-python/src/openllm/serialisation/__init__.py b/openllm-python/src/openllm/serialisation/__init__.py index 1cae437e..a6e364ad 100644 --- a/openllm-python/src/openllm/serialisation/__init__.py +++ b/openllm-python/src/openllm/serialisation/__init__.py @@ -1,21 +1,26 @@ from __future__ import annotations import importlib +import typing as t -import cloudpickle -import fs - -from openllm_core._typing_compat import ParamSpec +from openllm_core._typing_compat import M, ParamSpec, T, TypeGuard from openllm_core.exceptions import OpenLLMException +if t.TYPE_CHECKING: + from bentoml import Model + + from .._llm import LLM + P = ParamSpec('P') -def load_tokenizer(llm, **tokenizer_attrs): +def load_tokenizer(llm: LLM[M, T], **tokenizer_attrs: t.Any) -> TypeGuard[T]: """Load the tokenizer from BentoML store. By default, it will try to find the bentomodel whether it is in store.. If model is not found, it will raises a ``bentoml.exceptions.NotFound``. """ + import cloudpickle + import fs from transformers import AutoTokenizer tokenizer_attrs = {**llm.llm_parameters[-1], **tokenizer_attrs} @@ -52,34 +57,39 @@ def load_tokenizer(llm, **tokenizer_attrs): return tokenizer -_extras = ['get', 'import_model', 'load_model'] - - def _make_dispatch_function(fn): - def caller(llm, *args, **kwargs): + def caller(llm: LLM[M, T], *args: P.args, **kwargs: P.kwargs) -> TypeGuard[M | T | Model]: """Generic function dispatch to correct serialisation submodules based on LLM runtime. > [!NOTE] See 'openllm.serialisation.transformers' if 'llm.__llm_backend__ in ("pt", "vllm")' > [!NOTE] See 'openllm.serialisation.ggml' if 'llm.__llm_backend__="ggml"' + + > [!NOTE] See 'openllm.serialisation.ctranslate' if 'llm.__llm_backend__="ctranslate"' """ - serde = 'transformers' if llm.__llm_backend__ == 'ggml': serde = 'ggml' - return getattr(importlib.import_module(f'.{serde}', __name__), fn)(llm, *args, **kwargs) + elif llm.__llm_backend__ == 'ctranslate': + serde = 'ctranslate' + elif llm.__llm_backend__ in {'pt', 'vllm'}: + serde = 'transformers' + else: + raise OpenLLMException(f'Not supported backend {llm.__llm_backend__}') + return getattr(importlib.import_module(f'.{serde}', 'openllm.serialisation'), fn)(llm, *args, **kwargs) return caller -_import_structure: dict[str, list[str]] = {'ggml': [], 'transformers': [], 'constants': []} -__all__ = ['ggml', 'transformers', 'constants', 'load_tokenizer', *_extras] +_extras = ['get', 'import_model', 'load_model'] +_import_structure = {'ggml', 'transformers', 'ctranslate', 'constants'} +__all__ = ['load_tokenizer', *_extras, *_import_structure] -def __dir__(): +def __dir__() -> t.Sequence[str]: return sorted(__all__) -def __getattr__(name): +def __getattr__(name: str) -> t.Any: if name == 'load_tokenizer': return load_tokenizer elif name in _import_structure: diff --git a/openllm-python/src/openllm/serialisation/ctranslate/__init__.py b/openllm-python/src/openllm/serialisation/ctranslate/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/openllm-python/src/openllm/serialisation/ggml.py b/openllm-python/src/openllm/serialisation/ggml/__init__.py similarity index 100% rename from openllm-python/src/openllm/serialisation/ggml.py rename to openllm-python/src/openllm/serialisation/ggml/__init__.py diff --git a/openllm-python/src/openllm/serialisation/transformers/__init__.py b/openllm-python/src/openllm/serialisation/transformers/__init__.py index 93ce948e..09162049 100644 --- a/openllm-python/src/openllm/serialisation/transformers/__init__.py +++ b/openllm-python/src/openllm/serialisation/transformers/__init__.py @@ -63,9 +63,22 @@ def import_model(llm, *decls, trust_remote_code, _model_store=Provide[BentoMLCon metadata['_quantize'] = quantize architectures = getattr(config, 'architectures', []) if not architectures: - raise RuntimeError( - 'Failed to determine the architecture for this model. Make sure the `config.json` is valid and can be loaded with `transformers.AutoConfig`' - ) + if trust_remote_code: + auto_map = getattr(config, 'auto_map', {}) + if not auto_map: + raise RuntimeError( + f'Failed to determine the architecture from both `auto_map` and `architectures` from {llm.model_id}' + ) + autoclass = 'AutoModelForSeq2SeqLM' if llm.config['model_type'] == 'seq2seq_lm' else 'AutoModelForCausalLM' + if autoclass not in auto_map: + raise RuntimeError( + f"Given model '{llm.model_id}' is yet to be supported with 'auto_map'. OpenLLM currently only support encoder-decoders or decoders only models." + ) + architectures = [auto_map[autoclass]] + else: + raise RuntimeError( + 'Failed to determine the architecture for this model. Make sure the `config.json` is valid and can be loaded with `transformers.AutoConfig`' + ) metadata['_pretrained_class'] = architectures[0] if not llm._local: metadata['_revision'] = get_hash(config) @@ -75,7 +88,7 @@ def import_model(llm, *decls, trust_remote_code, _model_store=Provide[BentoMLCon signatures = {} if quantize == 'gptq': - if not openllm.utils.is_autogptq_available() or not openllm.utils.is_optimum_supports_gptq(): + if not openllm.utils.is_autogptq_available(): raise OpenLLMException( "GPTQ quantisation requires 'auto-gptq' and 'optimum' (Not found in local environment). Install it with 'pip install \"openllm[gptq]\" --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/'" ) @@ -213,7 +226,7 @@ def load_model(llm, *decls, **attrs): if '_quantize' in llm.bentomodel.info.metadata: _quantise = llm.bentomodel.info.metadata['_quantize'] if _quantise == 'gptq': - if not openllm.utils.is_autogptq_available() or not openllm.utils.is_optimum_supports_gptq(): + if not openllm.utils.is_autogptq_available(): raise OpenLLMException( "GPTQ quantisation requires 'auto-gptq' and 'optimum' (Not found in local environment). Install it with 'pip install \"openllm[gptq]\" --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/'" ) diff --git a/openllm-python/src/openllm/serialisation/transformers/_helpers.py b/openllm-python/src/openllm/serialisation/transformers/_helpers.py index c82ee77f..f3ceb980 100644 --- a/openllm-python/src/openllm/serialisation/transformers/_helpers.py +++ b/openllm-python/src/openllm/serialisation/transformers/_helpers.py @@ -1,6 +1,7 @@ from __future__ import annotations import copy import logging +import typing as t import transformers @@ -10,14 +11,14 @@ from openllm_core.utils import get_disable_warnings, get_quiet_mode logger = logging.getLogger(__name__) -def get_hash(config) -> str: +def get_hash(config: transformers.PretrainedConfig) -> str: _commit_hash = getattr(config, '_commit_hash', None) if _commit_hash is None: raise ValueError(f'Cannot find commit hash in {config}') return _commit_hash -def process_config(model_id, trust_remote_code, **attrs): +def process_config(model_id: str, trust_remote_code: bool, **attrs: t.Any): config = attrs.pop('config', None) # this logic below is synonymous to handling `from_pretrained` attrs. hub_attrs = {k: attrs.pop(k) for k in HUB_ATTRS if k in attrs} diff --git a/openllm-python/src/openllm/utils/__init__.py b/openllm-python/src/openllm/utils/__init__.py index 32bac990..7dda7858 100644 --- a/openllm-python/src/openllm/utils/__init__.py +++ b/openllm-python/src/openllm/utils/__init__.py @@ -1,74 +1,10 @@ -"""Utilities function for OpenLLM. - -User can import these function for convenience, but -we won't ensure backward compatibility for these functions. So use with caution. -""" - -from __future__ import annotations import functools import importlib.metadata -import typing as t import openllm_core -if t.TYPE_CHECKING: - import openllm - from openllm_core.utils import ( - DEBUG as DEBUG, - DEBUG_ENV_VAR as DEBUG_ENV_VAR, - DEV_DEBUG_VAR as DEV_DEBUG_VAR, - ENV_VARS_TRUE_VALUES as ENV_VARS_TRUE_VALUES, - MYPY as MYPY, - OPTIONAL_DEPENDENCIES as OPTIONAL_DEPENDENCIES, - QUIET_ENV_VAR as QUIET_ENV_VAR, - SHOW_CODEGEN as SHOW_CODEGEN, - LazyLoader as LazyLoader, - LazyModule as LazyModule, - ReprMixin as ReprMixin, - VersionInfo as VersionInfo, - analytics as analytics, - calc_dir_size as calc_dir_size, - check_bool_env as check_bool_env, - codegen as codegen, - configure_logging as configure_logging, - dantic as dantic, - field_env_key as field_env_key, - first_not_none as first_not_none, - flatten_attrs as flatten_attrs, - gen_random_uuid as gen_random_uuid, - generate_context as generate_context, - generate_hash_from_file as generate_hash_from_file, - get_debug_mode as get_debug_mode, - get_disable_warnings as get_disable_warnings, - get_quiet_mode as get_quiet_mode, - in_notebook as in_notebook, - is_autoawq_available as is_autoawq_available, - is_autogptq_available as is_autogptq_available, - is_bentoml_available as is_bentoml_available, - is_bitsandbytes_available as is_bitsandbytes_available, - is_grpc_available as is_grpc_available, - is_jupyter_available as is_jupyter_available, - is_jupytext_available as is_jupytext_available, - is_notebook_available as is_notebook_available, - is_optimum_supports_gptq as is_optimum_supports_gptq, - is_peft_available as is_peft_available, - is_torch_available as is_torch_available, - is_transformers_available as is_transformers_available, - is_vllm_available as is_vllm_available, - lenient_issubclass as lenient_issubclass, - reserve_free_port as reserve_free_port, - resolve_filepath as resolve_filepath, - resolve_user_filepath as resolve_user_filepath, - serde as serde, - set_debug_mode as set_debug_mode, - set_disable_warnings as set_disable_warnings, - set_quiet_mode as set_quiet_mode, - validate_is_path as validate_is_path, - ) - from openllm_core.utils.serde import converter as converter - -def generate_labels(llm: openllm.LLM[t.Any, t.Any]) -> dict[str, t.Any]: +def generate_labels(llm): return { 'backend': llm.__llm_backend__, 'framework': 'openllm', @@ -79,27 +15,26 @@ def generate_labels(llm: openllm.LLM[t.Any, t.Any]) -> dict[str, t.Any]: } -def available_devices() -> tuple[str, ...]: - """Return available GPU under system. Currently only supports NVIDIA GPUs.""" +def available_devices(): from .._strategies import NvidiaGpuResource return tuple(NvidiaGpuResource.from_system()) @functools.lru_cache(maxsize=1) -def device_count() -> int: +def device_count(): return len(available_devices()) __all__ = ['generate_labels', 'available_devices', 'device_count'] -def __dir__() -> t.Sequence[str]: - return sorted(__all__) + sorted(dir(openllm_core.utils)) +def __dir__(): + coreutils = set(dir(openllm_core.utils)) | set([it for it in openllm_core.utils._extras if not it.startswith('_')]) + return sorted(__all__) + sorted(list(coreutils)) -def __getattr__(it: str) -> t.Any: +def __getattr__(it): if hasattr(openllm_core.utils, it): return getattr(openllm_core.utils, it) - else: - raise AttributeError(f'module {__name__} has no attribute {it}') + raise AttributeError(f'module {__name__} has no attribute {it}') diff --git a/openllm-python/src/openllm/utils/__init__.pyi b/openllm-python/src/openllm/utils/__init__.pyi new file mode 100644 index 00000000..4ec387f7 --- /dev/null +++ b/openllm-python/src/openllm/utils/__init__.pyi @@ -0,0 +1,61 @@ +from typing import Any, Dict, Tuple + +from openllm_core.utils import ( + DEBUG as DEBUG, + DEBUG_ENV_VAR as DEBUG_ENV_VAR, + DEV_DEBUG_VAR as DEV_DEBUG_VAR, + ENV_VARS_TRUE_VALUES as ENV_VARS_TRUE_VALUES, + MYPY as MYPY, + OPTIONAL_DEPENDENCIES as OPTIONAL_DEPENDENCIES, + QUIET_ENV_VAR as QUIET_ENV_VAR, + SHOW_CODEGEN as SHOW_CODEGEN, + LazyLoader as LazyLoader, + LazyModule as LazyModule, + ReprMixin as ReprMixin, + VersionInfo as VersionInfo, + analytics as analytics, + calc_dir_size as calc_dir_size, + check_bool_env as check_bool_env, + codegen as codegen, + configure_logging as configure_logging, + dantic as dantic, + field_env_key as field_env_key, + first_not_none as first_not_none, + flatten_attrs as flatten_attrs, + gen_random_uuid as gen_random_uuid, + generate_context as generate_context, + generate_hash_from_file as generate_hash_from_file, + get_debug_mode as get_debug_mode, + get_disable_warnings as get_disable_warnings, + get_quiet_mode as get_quiet_mode, + getenv as getenv, + in_notebook as in_notebook, + is_autoawq_available as is_autoawq_available, + is_autogptq_available as is_autogptq_available, + is_bentoml_available as is_bentoml_available, + is_bitsandbytes_available as is_bitsandbytes_available, + is_grpc_available as is_grpc_available, + is_jupyter_available as is_jupyter_available, + is_jupytext_available as is_jupytext_available, + is_notebook_available as is_notebook_available, + is_peft_available as is_peft_available, + is_torch_available as is_torch_available, + is_transformers_available as is_transformers_available, + is_vllm_available as is_vllm_available, + lenient_issubclass as lenient_issubclass, + reserve_free_port as reserve_free_port, + resolve_filepath as resolve_filepath, + resolve_user_filepath as resolve_user_filepath, + serde as serde, + set_debug_mode as set_debug_mode, + set_disable_warnings as set_disable_warnings, + set_quiet_mode as set_quiet_mode, + validate_is_path as validate_is_path, +) +from openllm_core.utils.serde import converter as converter + +from .._llm import LLM + +def available_devices() -> Tuple[str, ...]: ... +def device_count() -> int: ... +def generate_labels(llm: LLM[Any, Any]) -> Dict[str, Any]: ... diff --git a/compile.sh b/tools/compile.sh similarity index 100% rename from compile.sh rename to tools/compile.sh diff --git a/tools/dependencies.py b/tools/dependencies.py index 530638b0..d44e39f6 100755 --- a/tools/dependencies.py +++ b/tools/dependencies.py @@ -17,6 +17,7 @@ sys.path.insert(0, os.path.join(ROOT, 'openllm-python', 'src')) sys.path.insert(1, os.path.join(ROOT, 'openllm-core', 'src')) import openllm +from openllm_core.utils.lazy import VersionInfo _OWNER, _REPO = 'bentoml', 'openllm' @@ -141,19 +142,19 @@ class Dependencies: return cls(*decls) -lower_bentoml_constraint = '1.1.9' +_LOWER_BENTOML_CONSTRAINT = '1.1.9' +_OPENLLM_CLIENT_CONSTRAINT = str(VersionInfo.from_package('openllm-client')) _BENTOML_EXT = ['io'] _TRANSFORMERS_EXT = ['torch', 'tokenizers'] _TRANSFORMERS_CONSTRAINTS = '4.35.0' -FINE_TUNE_DEPS = ['peft>=0.6.0', 'datasets', 'trl', 'scipy', 'huggingface-hub'] -FLAN_T5_DEPS = [f'transformers>={_TRANSFORMERS_CONSTRAINTS}'] -OPT_DEPS = [f'transformers>={_TRANSFORMERS_CONSTRAINTS}'] -GRPC_DEPS = ['openllm-client[grpc]'] +FINE_TUNE_DEPS = ['peft>=0.6.0', 'datasets', 'trl', 'huggingface-hub'] +GRPC_DEPS = [f'bentoml[grpc]>={_LOWER_BENTOML_CONSTRAINT}', f'openllm-client[grpc]>={_OPENLLM_CLIENT_CONSTRAINT}'] OPENAI_DEPS = ['openai[datalib]>=1', 'tiktoken'] AGENTS_DEPS = [f'transformers[agents]>={_TRANSFORMERS_CONSTRAINTS}', 'diffusers', 'soundfile'] PLAYGROUND_DEPS = ['jupyter', 'notebook', 'ipython', 'jupytext', 'nbformat'] GGML_DEPS = ['ctransformers'] +CTRANSLATE_DEPS = ['ctranslate2'] AWQ_DEPS = ['autoawq'] GPTQ_DEPS = ['auto-gptq[triton]>=0.4.2', 'optimum>=1.12.0'] VLLM_DEPS = ['vllm>=0.2.1post1', 'ray'] @@ -264,6 +265,8 @@ def keywords() -> Array: 'StableLM', 'Alpaca', 'PyTorch', + 'Mistral', + 'vLLM', 'Transformers', ] ) @@ -303,7 +306,7 @@ def main(args) -> int: release_version = openllm.bundle.RefResolver.from_strategy('release').version _BASE_DEPENDENCIES = [ - Dependencies(name='bentoml', extensions=_BENTOML_EXT, lower_constraint=lower_bentoml_constraint), + Dependencies(name='bentoml', extensions=_BENTOML_EXT, lower_constraint=_LOWER_BENTOML_CONSTRAINT), Dependencies(name='transformers', extensions=_TRANSFORMERS_EXT, lower_constraint=_TRANSFORMERS_CONSTRAINTS), Dependencies(name='openllm-client', lower_constraint=release_version), Dependencies(name='openllm-core', lower_constraint=release_version), @@ -311,6 +314,9 @@ def main(args) -> int: Dependencies(name='optimum', lower_constraint='1.12.0'), Dependencies(name='accelerate'), Dependencies(name='ghapi'), + Dependencies(name='einops'), + Dependencies(name='sentencepiece'), + Dependencies(name='scipy'), Dependencies(name='build', upper_constraint='1', extensions=['virtualenv']), Dependencies(name='click', lower_constraint='8.1.3'), Dependencies(name='cuda-python', platform=('Darwin', 'ne')), diff --git a/tools/run-release-action b/tools/run-release-action index 289f86e5..d8190575 100755 --- a/tools/run-release-action +++ b/tools/run-release-action @@ -18,20 +18,23 @@ validate_release() { fi } -if ! command -v gh @ >&1 > /dev/null; then - echo "ERROR: gh not installed. Aborting..." +check_membership() { + local org="BentoML" + local username=$(gh api user | jq -r '.login') + if gh api orgs/$org/members/$username -q '.message' | grep -q "Not Found"; then + echo "ERROR: You must be a member of $org to run this script." exit 1 -fi + fi +} -if ! command -v jq @ >&1 > /dev/null; then - echo "ERROR: jq not installed. Aborting..." - exit 1 -fi +for cmd in gh jq hatch; do + if ! command -v "$cmd" @ >&1 > /dev/null; then + echo "ERROR: $cmd not installed. Aborting..." + exit 1 + fi +done -if ! command -v hatch @ >&1 > /dev/null; then - echo "ERROR: hatch not installed. Aborting..." - exit 1 -fi +check_membership # Check if release flag is provided if [[ $1 == "--release" ]]; then diff --git a/tools/update-mypy.py b/tools/update-mypy.py index 73148316..4b2dc556 100755 --- a/tools/update-mypy.py +++ b/tools/update-mypy.py @@ -4,6 +4,17 @@ import configparser import os from typing import List +_MYPY_CONFIG = { + 'pretty': 'true', + 'python_version': '3.8', + 'show_error_codes': 'true', + 'strict': 'true', + 'warn_unused_configs': 'true', + 'ignore_missing_imports': 'true', + 'check_untyped_defs': 'true', + 'warn_unreachable': 'true', +} + # Function to find .pyi files in a given directory def pyi_in_subdir(directory: str, git_root: str) -> List[str]: @@ -50,9 +61,14 @@ def update_mypy_ini(pyi_files: List[str], mypy_ini_path: str) -> int: # Update the 'files' entry config['mypy']['files'] = ', '.join(updated_files) + for key, value in _MYPY_CONFIG.items(): + config.set('mypy', key, value) # Write changes back to mypy.ini with open(mypy_ini_path, 'w') as configfile: + configfile.write( + f'# The following is autogenerated by {os.path.join(os.path.basename(os.path.dirname(__file__)), os.path.basename(__file__))}\n' + ) config.write(configfile) # Remove last newline if exists with open(mypy_ini_path, 'rb+') as file: