mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-04-20 15:12:12 -04:00
Merge openllm-next as openllm 0.6
This commit is contained in:
5
.gitattributes
vendored
Normal file
5
.gitattributes
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
**/_next/ linguist-generated=true
|
||||
|
||||
* text=auto eol=lf
|
||||
# Needed for setuptools-scm-git-archive
|
||||
.git_archival.txt export-subst
|
||||
163
.gitignore
vendored
Normal file
163
.gitignore
vendored
Normal file
@@ -0,0 +1,163 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
#poetry.lock
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
#pdm.lock
|
||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||
# in version control.
|
||||
# https://pdm.fming.dev/#use-with-ide
|
||||
.pdm.toml
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
*.whl
|
||||
# Environments
|
||||
venv/
|
||||
107
DEVELOPMENT.md
Normal file
107
DEVELOPMENT.md
Normal file
@@ -0,0 +1,107 @@
|
||||
# Developer Guide
|
||||
|
||||
This Developer Guide is designed to help you contribute to the OpenLLM project.
|
||||
Follow these steps to set up your development environment and learn the process
|
||||
of contributing to our open-source project.
|
||||
|
||||
Join our [Discord Channel](https://l.bentoml.com/join-openllm-discord) and reach
|
||||
out to us if you have any question!
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Developer Guide](#developer-guide)
|
||||
- [Table of Contents](#table-of-contents)
|
||||
- [Setting Up Your Development Environment](#setting-up-your-development-environment)
|
||||
- [Development Workflow](#development-workflow)
|
||||
- [Adding new models](#adding-new-models)
|
||||
- [Adding bentos](#adding-new-models)
|
||||
- [Adding repos](#adding-new-models)
|
||||
|
||||
## Setting Up Your Development Environment
|
||||
|
||||
Before you can start developing, you'll need to set up your environment:
|
||||
|
||||
1. Ensure you have [Git](https://git-scm.com/), and
|
||||
[Python3.8+](https://www.python.org/downloads/) installed.
|
||||
2. Fork the OpenLLM repository from GitHub.
|
||||
3. Clone the forked repository from GitHub:
|
||||
|
||||
```bash
|
||||
git clone git@github.com:username/OpenLLM.git && cd openllm
|
||||
```
|
||||
|
||||
4. Add the OpenLLM upstream remote to your local OpenLLM clone:
|
||||
|
||||
```bash
|
||||
git remote add upstream git@github.com:bentoml/OpenLLM.git
|
||||
```
|
||||
|
||||
5. Configure git to pull from the upstream remote:
|
||||
|
||||
```bash
|
||||
git switch main # ensure you're on the main branch
|
||||
git fetch upstream --tags
|
||||
git branch --set-upstream-to=upstream/main
|
||||
```
|
||||
|
||||
## Development Workflow
|
||||
|
||||
There are a few ways to contribute to the repository structure for OpenLLM:
|
||||
|
||||
### Adding new models
|
||||
|
||||
1. [recipe.yaml](./recipe.yaml) contains all related-metadata for generating new LLM-based bentos. To add a new LLM, the following structure should be adhere to:
|
||||
|
||||
```yaml
|
||||
"<model_name>:<model_tag>":
|
||||
project: vllm-chat
|
||||
service_config:
|
||||
name: phi3
|
||||
traffic:
|
||||
timeout: 300
|
||||
resources:
|
||||
gpu: 1
|
||||
gpu_type: nvidia-tesla-l4
|
||||
engine_config:
|
||||
model: microsoft/Phi-3-mini-4k-instruct
|
||||
max_model_len: 4096
|
||||
dtype: half
|
||||
chat_template: phi-3
|
||||
```
|
||||
|
||||
- `<model_name>` represents the type of model to be supported. Currently supports `phi3`, `llama2`, `llama3`, `gemma`
|
||||
|
||||
- `<model_tag>` emphasizes the type of model and its related metadata. The convention would include `<model_size>-<model_type>-<precision>[-<quantization>]`
|
||||
For example:
|
||||
|
||||
- `microsoft/Phi-3-mini-4k-instruct` should be represented as `3.8b-instruct-fp16`.
|
||||
- `TheBloke/Llama-2-7B-Chat-AWQ` would be `7b-chat-awq-4bit`
|
||||
|
||||
- `project` would be used as the basis for the generated bento. Currently, most models should use `vllm-chat` as default.
|
||||
|
||||
- `service_config` entails all BentoML-related [configuration](https://docs.bentoml.com/en/latest/guides/configurations.html) to run this bento.
|
||||
|
||||
> [!NOTE]
|
||||
>
|
||||
> We recommend to include the following field for `service_config`:
|
||||
>
|
||||
> - `name` should be the same as `<model_name>`
|
||||
> - `resources` includes the available accelerator that can run this models. See more [here](https://docs.bentoml.com/en/latest/guides/configurations.html#resources)
|
||||
|
||||
- `engine_config` are fields to be used for vLLM engine. See more supported arguments in [`AsyncEngineArgs`](https://github.com/vllm-project/vllm/blob/7cd2ebb0251fd1fd0eec5c93dac674603a22eddd/vllm/engine/arg_utils.py#L799). We recommend to always include `model`, `max_model_len`, `dtype` and `trust_remote_code`.
|
||||
|
||||
- If the model is a chat model, `chat_template` should be used. Add the appropriate `chat_template` under [chat_template directory](./vllm-chat/chat_templates/) should you decide to do so.
|
||||
|
||||
2. You can then run `BENTOML_HOME=$(openllm repo default)/bentoml/bentos python make.py <model_name>:<model_tag>` to generate the required bentos.
|
||||
|
||||
3. You can then submit a [Pull request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request) to `openllm` with the recipe changes
|
||||
|
||||
### Adding bentos
|
||||
|
||||
OpenLLM now also manages a [generated bento repository](https://github.com/bentoml/openllm-models/tree/main). If you update and modify and generated bentos, make sure to update the recipe and added the generated bentos under `bentoml/bentos`.
|
||||
|
||||
### Adding repos
|
||||
|
||||
If you wish to create a your own managed git repo, you should follow the structure of [bentoml/openllm-models](https://github.com/bentoml/openllm-models/tree/main).
|
||||
|
||||
To add your custom repo, do `openllm repo add <repo_alias> <git_url>`
|
||||
201
LICENSE
Normal file
201
LICENSE
Normal file
@@ -0,0 +1,201 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
22
README.md
Normal file
22
README.md
Normal file
@@ -0,0 +1,22 @@
|
||||
```
|
||||
pip install .
|
||||
openllm serve
|
||||
# or openllm run
|
||||
```
|
||||
To find out what LLM models are already in your hands.
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
This project is licensed under the MIT License - see the LICENSE file for details.
|
||||
|
||||
Acknowledgements
|
||||
----------------
|
||||
|
||||
This project makes use of the following open-source projects:
|
||||
|
||||
* [bentoml/bentoml](https://github.com/bentoml/bentoml) for production level model serving
|
||||
* [blrchen/chatgpt-lite](https://github.com/blrchen/chatgpt-lite) for a fancy Web Chat UI
|
||||
* [chujiezheng/chat_templates](https://github.com/chujiezheng/chat_templates)
|
||||
|
||||
We are grateful to the developers and contributors of these projects for their hard work and dedication.
|
||||
0
openllm_next/__init__.py
Normal file
0
openllm_next/__init__.py
Normal file
338
openllm_next/__main__.py
Normal file
338
openllm_next/__main__.py
Normal file
@@ -0,0 +1,338 @@
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from typing import Annotated, Optional
|
||||
|
||||
import questionary
|
||||
import typer
|
||||
|
||||
from openllm_next.accelerator_spec import (
|
||||
DeploymentTarget,
|
||||
can_run,
|
||||
get_local_machine_spec,
|
||||
)
|
||||
from openllm_next.analytic import DO_NOT_TRACK, OpenLLMTyper
|
||||
from openllm_next.clean import app as clean_app
|
||||
from openllm_next.cloud import deploy as cloud_deploy
|
||||
from openllm_next.cloud import ensure_cloud_context, get_cloud_machine_spec
|
||||
from openllm_next.common import CHECKED, INTERACTIVE, VERBOSE_LEVEL, output
|
||||
from openllm_next.local import run as local_run
|
||||
from openllm_next.local import serve as local_serve
|
||||
from openllm_next.model import app as model_app
|
||||
from openllm_next.model import ensure_bento, list_bento
|
||||
from openllm_next.repo import app as repo_app
|
||||
|
||||
app = OpenLLMTyper(
|
||||
help="`openllm hello` to get started. "
|
||||
"OpenLLM is a CLI tool to manage and deploy open source LLMs and"
|
||||
" get an OpenAI API compatible chat server in seconds.",
|
||||
)
|
||||
|
||||
app.add_typer(repo_app, name="repo")
|
||||
app.add_typer(model_app, name="model")
|
||||
app.add_typer(clean_app, name="clean")
|
||||
|
||||
|
||||
def _select_bento_name(models, target):
|
||||
from tabulate import tabulate
|
||||
|
||||
options = []
|
||||
model_infos = [
|
||||
[model.repo.name, model.name, can_run(model, target)] for model in models
|
||||
]
|
||||
model_name_groups = defaultdict(lambda: 0)
|
||||
for repo, name, score in model_infos:
|
||||
model_name_groups[(repo, name)] += score
|
||||
table_data = [
|
||||
[name, repo, CHECKED if score > 0 else ""]
|
||||
for (repo, name), score in model_name_groups.items()
|
||||
]
|
||||
if not table_data:
|
||||
output("No model found", style="red")
|
||||
raise typer.Exit(1)
|
||||
table = tabulate(
|
||||
table_data,
|
||||
headers=["model", "repo", "locally runnable"],
|
||||
).split("\n")
|
||||
headers = f"{table[0]}\n {table[1]}"
|
||||
|
||||
options.append(questionary.Separator(headers))
|
||||
for table_data, table_line in zip(table_data, table[2:]):
|
||||
options.append(questionary.Choice(table_line, value=table_data[:2]))
|
||||
selected = questionary.select("Select a model", options).ask()
|
||||
if selected is None:
|
||||
raise typer.Exit(1)
|
||||
return selected
|
||||
|
||||
|
||||
def _select_bento_version(models, target, bento_name, repo):
|
||||
from tabulate import tabulate
|
||||
|
||||
model_infos = [
|
||||
[model, can_run(model, target)]
|
||||
for model in models
|
||||
if model.name == bento_name and model.repo.name == repo
|
||||
]
|
||||
|
||||
table_data = [
|
||||
[model.tag, CHECKED if score > 0 else ""]
|
||||
for model, score in model_infos
|
||||
if model.name == bento_name and model.repo.name == repo
|
||||
]
|
||||
if not table_data:
|
||||
output(f"No model found for {bento_name} in {repo}", style="red")
|
||||
raise typer.Exit(1)
|
||||
table = tabulate(
|
||||
table_data,
|
||||
headers=["version", "locally runnable"],
|
||||
).split("\n")
|
||||
|
||||
options = []
|
||||
options.append(questionary.Separator(f"{table[0]}\n {table[1]}"))
|
||||
for table_data, table_line in zip(model_infos, table[2:]):
|
||||
options.append(questionary.Choice(table_line, value=table_data))
|
||||
selected = questionary.select("Select a version", options).ask()
|
||||
if selected is None:
|
||||
raise typer.Exit(1)
|
||||
return selected
|
||||
|
||||
|
||||
def _select_target(bento, targets):
|
||||
from tabulate import tabulate
|
||||
|
||||
options = []
|
||||
targets.sort(key=lambda x: can_run(bento, x), reverse=True)
|
||||
if not targets:
|
||||
output(
|
||||
"No available instance type, check your bentocloud account",
|
||||
style="red",
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
|
||||
table = tabulate(
|
||||
[
|
||||
[
|
||||
target.name,
|
||||
target.accelerators_repr,
|
||||
f"${target.price}",
|
||||
CHECKED if can_run(bento, target) else "insufficient res.",
|
||||
]
|
||||
for target in targets
|
||||
],
|
||||
headers=["instance type", "accelerator", "price/hr", "deployable"],
|
||||
).split("\n")
|
||||
options.append(questionary.Separator(f"{table[0]}\n {table[1]}"))
|
||||
|
||||
for target, line in zip(targets, table[2:]):
|
||||
options.append(
|
||||
questionary.Choice(
|
||||
f"{line}",
|
||||
value=target,
|
||||
)
|
||||
)
|
||||
selected = questionary.select("Select an instance type", options).ask()
|
||||
if selected is None:
|
||||
raise typer.Exit(1)
|
||||
return selected
|
||||
|
||||
|
||||
def _select_action(bento, score):
|
||||
if score > 0:
|
||||
options = [
|
||||
questionary.Separator("Available actions"),
|
||||
questionary.Choice(
|
||||
"0. Run the model in terminal",
|
||||
value="run",
|
||||
shortcut_key="0",
|
||||
),
|
||||
questionary.Separator(f" $ openllm run {bento}"),
|
||||
questionary.Separator(" "),
|
||||
questionary.Choice(
|
||||
"1. Serve the model locally and get a chat server",
|
||||
value="serve",
|
||||
shortcut_key="1",
|
||||
),
|
||||
questionary.Separator(f" $ openllm serve {bento}"),
|
||||
questionary.Separator(" "),
|
||||
questionary.Choice(
|
||||
"2. Deploy the model to bentocloud and get a scalable chat server",
|
||||
value="deploy",
|
||||
shortcut_key="2",
|
||||
),
|
||||
questionary.Separator(f" $ openllm deploy {bento}"),
|
||||
]
|
||||
else:
|
||||
options = [
|
||||
questionary.Separator("Available actions"),
|
||||
questionary.Choice(
|
||||
"0. Run the model in terminal",
|
||||
value="run",
|
||||
disabled="insufficient res.",
|
||||
shortcut_key="0",
|
||||
),
|
||||
questionary.Separator(f" $ openllm run {bento}"),
|
||||
questionary.Separator(" "),
|
||||
questionary.Choice(
|
||||
"1. Serve the model locally and get a chat server",
|
||||
value="serve",
|
||||
disabled="insufficient res.",
|
||||
shortcut_key="1",
|
||||
),
|
||||
questionary.Separator(f" $ openllm serve {bento}"),
|
||||
questionary.Separator(" "),
|
||||
questionary.Choice(
|
||||
"2. Deploy the model to bentocloud and get a scalable chat server",
|
||||
value="deploy",
|
||||
shortcut_key="2",
|
||||
),
|
||||
questionary.Separator(f" $ openllm deploy {bento}"),
|
||||
]
|
||||
action = questionary.select("Select an action", options).ask()
|
||||
if action is None:
|
||||
raise typer.Exit(1)
|
||||
if action == "run":
|
||||
try:
|
||||
local_run(bento)
|
||||
finally:
|
||||
output("\nUse this command to run the action again:", style="green")
|
||||
output(f" $ openllm run {bento}", style="orange")
|
||||
elif action == "serve":
|
||||
try:
|
||||
local_serve(bento)
|
||||
finally:
|
||||
output("\nUse this command to run the action again:", style="green")
|
||||
output(f" $ openllm serve {bento}", style="orange")
|
||||
elif action == "deploy":
|
||||
ensure_cloud_context()
|
||||
targets = get_cloud_machine_spec()
|
||||
target = _select_target(bento, targets)
|
||||
try:
|
||||
cloud_deploy(bento, target)
|
||||
finally:
|
||||
output("\nUse this command to run the action again:", style="green")
|
||||
output(
|
||||
f" $ openllm deploy {bento} --instance-type {target.name}",
|
||||
style="orange",
|
||||
)
|
||||
|
||||
|
||||
@app.command(help="get started interactively")
|
||||
def hello():
|
||||
INTERACTIVE.set(True)
|
||||
VERBOSE_LEVEL.set(20)
|
||||
|
||||
target = get_local_machine_spec()
|
||||
output(f" Detected Platform: {target.platform}", style="green")
|
||||
if target.accelerators:
|
||||
output(" Detected Accelerators: ", style="green")
|
||||
for a in target.accelerators:
|
||||
output(f" - {a.model} {a.memory_size}GB", style="green")
|
||||
else:
|
||||
output(" Detected Accelerators: None", style="yellow")
|
||||
|
||||
models = list_bento()
|
||||
if not models:
|
||||
output(
|
||||
"No model found, you probably need to update the model repo:",
|
||||
style="red",
|
||||
)
|
||||
output(
|
||||
" $ openllm repo update",
|
||||
style="orange",
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
|
||||
bento_name, repo = _select_bento_name(models, target)
|
||||
bento, score = _select_bento_version(models, target, bento_name, repo)
|
||||
_select_action(bento, score)
|
||||
|
||||
|
||||
@app.command(help="start an OpenAI API compatible chat server and chat in browser")
|
||||
def serve(
|
||||
model: Annotated[str, typer.Argument()] = "",
|
||||
repo: Optional[str] = None,
|
||||
port: int = 3000,
|
||||
verbose: bool = False,
|
||||
):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
target = get_local_machine_spec()
|
||||
bento = ensure_bento(model, target=target, repo_name=repo)
|
||||
local_serve(bento, port=port)
|
||||
|
||||
|
||||
@app.command(help="run the model and chat in terminal")
|
||||
def run(
|
||||
model: Annotated[str, typer.Argument()] = "",
|
||||
repo: Optional[str] = None,
|
||||
port: Optional[int] = None,
|
||||
timeout: int = 600,
|
||||
verbose: bool = False,
|
||||
):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
target = get_local_machine_spec()
|
||||
bento = ensure_bento(model, target=target, repo_name=repo)
|
||||
if port is None:
|
||||
port = random.randint(30000, 40000)
|
||||
local_run(bento, port=port, timeout=timeout)
|
||||
|
||||
|
||||
@app.command(
|
||||
help="deploy an production-ready OpenAI API compatible chat server to bentocloud ($100 free credit)",
|
||||
)
|
||||
def deploy(
|
||||
model: Annotated[str, typer.Argument()] = "",
|
||||
instance_type: Optional[str] = None,
|
||||
repo: Optional[str] = None,
|
||||
verbose: bool = False,
|
||||
):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
bento = ensure_bento(model, repo_name=repo)
|
||||
if instance_type is not None:
|
||||
cloud_deploy(bento, DeploymentTarget(name=instance_type))
|
||||
return
|
||||
targets = get_cloud_machine_spec()
|
||||
targets = filter(lambda x: can_run(bento, x) > 0, targets)
|
||||
targets = sorted(targets, key=lambda x: can_run(bento, x), reverse=True)
|
||||
if not targets:
|
||||
output(
|
||||
"No available instance type, check your bentocloud account",
|
||||
style="red",
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
target = targets[0]
|
||||
output(
|
||||
f"Recommended instance type: {target.name}",
|
||||
style="green",
|
||||
)
|
||||
cloud_deploy(bento, target)
|
||||
|
||||
|
||||
@app.callback(invoke_without_command=True)
|
||||
def typer_callback(
|
||||
verbose: int = 0,
|
||||
do_not_track: bool = typer.Option(
|
||||
False,
|
||||
"--do-not-track",
|
||||
help="Whether to disable usage tracking",
|
||||
envvar=DO_NOT_TRACK,
|
||||
),
|
||||
):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(verbose)
|
||||
if do_not_track:
|
||||
os.environ[DO_NOT_TRACK] = str(True)
|
||||
|
||||
|
||||
def main():
|
||||
if sys.version_info < (3, 9):
|
||||
output("Python 3.8 or higher is required", style="red")
|
||||
sys.exit(1)
|
||||
app()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
166
openllm_next/accelerator_spec.py
Normal file
166
openllm_next/accelerator_spec.py
Normal file
@@ -0,0 +1,166 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import functools
|
||||
import math
|
||||
import typing
|
||||
from types import SimpleNamespace
|
||||
|
||||
import psutil
|
||||
|
||||
from openllm_next.common import BentoInfo, DeploymentTarget, output
|
||||
|
||||
|
||||
class Accelerator(SimpleNamespace):
|
||||
model: str
|
||||
memory_size: float
|
||||
|
||||
def __gt__(self, other):
|
||||
return self.memory_size > other.memory_size
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.memory_size == other.memory_size
|
||||
|
||||
def __repr__(self):
|
||||
return f"{self.model}({self.memory_size}GB)"
|
||||
|
||||
|
||||
class Resource(SimpleNamespace):
|
||||
cpu: int = 0
|
||||
memory: float
|
||||
gpu: int = 0
|
||||
gpu_type: str = ""
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.cpu, self.memory, self.gpu, self.gpu_type))
|
||||
|
||||
def __bool__(self):
|
||||
return any(value is not None for value in self.__dict__.values())
|
||||
|
||||
|
||||
ACCELERATOR_SPEC_DICT: dict[str, dict] = {
|
||||
"nvidia-gtx-1650": {"model": "GTX 1650", "memory_size": 4.0},
|
||||
"nvidia-gtx-1060": {"model": "GTX 1060", "memory_size": 6.0},
|
||||
"nvidia-gtx-1080-ti": {"model": "GTX 1080 Ti", "memory_size": 11.0},
|
||||
"nvidia-rtx-3060": {"model": "RTX 3060", "memory_size": 12.0},
|
||||
"nvidia-rtx-3060-ti": {"model": "RTX 3060 Ti", "memory_size": 8.0},
|
||||
"nvidia-rtx-3070-ti": {"model": "RTX 3070 Ti", "memory_size": 8.0},
|
||||
"nvidia-rtx-3080": {"model": "RTX 3080", "memory_size": 10.0},
|
||||
"nvidia-rtx-3080-ti": {"model": "RTX 3080 Ti", "memory_size": 12.0},
|
||||
"nvidia-rtx-3090": {"model": "RTX 3090", "memory_size": 24.0},
|
||||
"nvidia-rtx-4070-ti": {"model": "RTX 4070 Ti", "memory_size": 12.0},
|
||||
"nvidia-tesla-p4": {"model": "P4", "memory_size": 8.0},
|
||||
"nvidia-tesla-p100": {"model": "P100", "memory_size": 16.0},
|
||||
"nvidia-tesla-k80": {"model": "K80", "memory_size": 12.0},
|
||||
"nvidia-tesla-t4": {"model": "T4", "memory_size": 16.0},
|
||||
"nvidia-tesla-v100": {"model": "V100", "memory_size": 16.0},
|
||||
"nvidia-l4": {"model": "L4", "memory_size": 24.0},
|
||||
"nvidia-tesla-l4": {"model": "L4", "memory_size": 24.0},
|
||||
"nvidia-tesla-a10g": {"model": "A10G", "memory_size": 24.0},
|
||||
"nvidia-a100-80g": {"model": "A100", "memory_size": 80.0},
|
||||
"nvidia-a100-80gb": {"model": "A100", "memory_size": 80.0},
|
||||
"nvidia-tesla-a100": {"model": "A100", "memory_size": 40.0},
|
||||
}
|
||||
|
||||
|
||||
ACCELERATOR_SPECS: dict[str, Accelerator] = {
|
||||
key: Accelerator(**value) for key, value in ACCELERATOR_SPEC_DICT.items()
|
||||
}
|
||||
|
||||
|
||||
@functools.lru_cache
|
||||
def get_local_machine_spec():
|
||||
if psutil.MACOS:
|
||||
return DeploymentTarget(accelerators=[], source="local", platform="macos")
|
||||
|
||||
if psutil.WINDOWS:
|
||||
platform = "windows"
|
||||
elif psutil.LINUX:
|
||||
platform = "linux"
|
||||
else:
|
||||
raise NotImplementedError(f"Unsupported platform")
|
||||
|
||||
from pynvml import (
|
||||
nvmlDeviceGetCount,
|
||||
nvmlDeviceGetCudaComputeCapability,
|
||||
nvmlDeviceGetHandleByIndex,
|
||||
nvmlDeviceGetMemoryInfo,
|
||||
nvmlDeviceGetName,
|
||||
nvmlInit,
|
||||
nvmlShutdown,
|
||||
)
|
||||
|
||||
try:
|
||||
nvmlInit()
|
||||
device_count = nvmlDeviceGetCount()
|
||||
accelerators: list[Accelerator] = []
|
||||
for i in range(device_count):
|
||||
handle = nvmlDeviceGetHandleByIndex(i)
|
||||
name = nvmlDeviceGetName(handle)
|
||||
memory_info = nvmlDeviceGetMemoryInfo(handle)
|
||||
accelerators.append(
|
||||
Accelerator(
|
||||
model=name, memory_size=math.ceil(int(memory_info.total) / 1024**3)
|
||||
)
|
||||
)
|
||||
compute_capability = nvmlDeviceGetCudaComputeCapability(handle)
|
||||
if compute_capability < (7, 5):
|
||||
output(
|
||||
f"GPU {name} with compute capability {compute_capability} "
|
||||
"may not be supported, 7.5 or higher is recommended. check "
|
||||
"https://developer.nvidia.com/cuda-gpus for more information",
|
||||
style="yellow",
|
||||
)
|
||||
nvmlShutdown()
|
||||
return DeploymentTarget(
|
||||
accelerators=accelerators,
|
||||
source="local",
|
||||
platform=platform,
|
||||
)
|
||||
except Exception as e:
|
||||
output(
|
||||
f"Failed to get local GPU info. Ensure nvidia driver is installed to enable local GPU deployment",
|
||||
style="yellow",
|
||||
)
|
||||
output(f"Error: {e}", style="red", level=20)
|
||||
return DeploymentTarget(accelerators=[], source="local", platform=platform)
|
||||
|
||||
|
||||
@functools.lru_cache()
|
||||
def can_run(
|
||||
bento: typing.Union[Resource, BentoInfo],
|
||||
target: typing.Optional[DeploymentTarget] = None,
|
||||
) -> float:
|
||||
"""
|
||||
Calculate if the bento can be deployed on the target.
|
||||
"""
|
||||
if target is None:
|
||||
target = get_local_machine_spec()
|
||||
|
||||
resource_spec = Resource(**(bento.bento_yaml["services"][0]["config"].get("resources", {})))
|
||||
labels = bento.bento_yaml.get("labels", {})
|
||||
platforms = labels.get("platforms", "linux").split(",")
|
||||
|
||||
if target.platform not in platforms:
|
||||
return 0.0
|
||||
|
||||
# return 1.0 if no resource is specified
|
||||
if not resource_spec:
|
||||
return 0.5
|
||||
|
||||
if resource_spec.gpu > 0:
|
||||
required_gpu = ACCELERATOR_SPECS[resource_spec.gpu_type]
|
||||
filtered_accelerators = [
|
||||
ac
|
||||
for ac in target.accelerators
|
||||
if ac.memory_size >= required_gpu.memory_size
|
||||
]
|
||||
if resource_spec.gpu > len(filtered_accelerators):
|
||||
return 0.0
|
||||
return (
|
||||
required_gpu.memory_size
|
||||
* resource_spec.gpu
|
||||
/ sum(ac.memory_size for ac in target.accelerators)
|
||||
)
|
||||
if target.accelerators:
|
||||
return 0.01 / sum(ac.memory_size for ac in target.accelerators)
|
||||
return 1.0
|
||||
118
openllm_next/analytic.py
Normal file
118
openllm_next/analytic.py
Normal file
@@ -0,0 +1,118 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import functools
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import typing
|
||||
from abc import ABC
|
||||
|
||||
import attr
|
||||
import click
|
||||
import typer
|
||||
import typer.core
|
||||
|
||||
DO_NOT_TRACK = "BENTOML_DO_NOT_TRACK"
|
||||
|
||||
|
||||
class EventMeta(ABC):
|
||||
@property
|
||||
def event_name(self):
|
||||
# camel case to snake case
|
||||
event_name = re.sub(r"(?<!^)(?=[A-Z])", "_", self.__class__.__name__).lower()
|
||||
# remove "_event" suffix
|
||||
suffix_to_remove = "_event"
|
||||
if event_name.endswith(suffix_to_remove):
|
||||
event_name = event_name[: -len(suffix_to_remove)]
|
||||
return event_name
|
||||
|
||||
|
||||
@attr.define
|
||||
class CliEvent(EventMeta):
|
||||
cmd_group: str
|
||||
cmd_name: str
|
||||
duration_in_ms: float = attr.field(default=0)
|
||||
error_type: typing.Optional[str] = attr.field(default=None)
|
||||
return_code: typing.Optional[int] = attr.field(default=None)
|
||||
|
||||
|
||||
@attr.define
|
||||
class OpenllmCliEvent(CliEvent):
|
||||
pass
|
||||
|
||||
|
||||
class OrderedCommands(typer.core.TyperGroup):
|
||||
def list_commands(self, _: click.Context) -> typing.Iterable[str]:
|
||||
return list(self.commands)
|
||||
|
||||
|
||||
class OpenLLMTyper(typer.Typer):
|
||||
def __init__(self, *args: typing.Any, **kwargs: typing.Any):
|
||||
no_args_is_help = kwargs.pop("no_args_is_help", True)
|
||||
context_settings = kwargs.pop("context_settings", {})
|
||||
if "help_option_names" not in context_settings:
|
||||
context_settings["help_option_names"] = ("-h", "--help")
|
||||
if "max_content_width" not in context_settings:
|
||||
context_settings["max_content_width"] = int(
|
||||
os.environ.get("COLUMNS", str(120))
|
||||
)
|
||||
klass = kwargs.pop("cls", OrderedCommands)
|
||||
|
||||
super().__init__(
|
||||
*args,
|
||||
cls=klass,
|
||||
no_args_is_help=no_args_is_help,
|
||||
context_settings=context_settings,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def command(self, *args: typing.Any, **kwargs: typing.Any):
|
||||
def decorator(f):
|
||||
@functools.wraps(f)
|
||||
@click.pass_context
|
||||
def wrapped(ctx: click.Context, *args, **kwargs):
|
||||
from bentoml._internal.utils.analytics import track
|
||||
|
||||
do_not_track = (
|
||||
os.environ.get(DO_NOT_TRACK, str(False)).lower() == "true"
|
||||
)
|
||||
|
||||
# so we know that the root program is openllm
|
||||
command_name = ctx.info_name
|
||||
if ctx.parent.parent is not None:
|
||||
# openllm model list
|
||||
command_group = ctx.parent.info_name
|
||||
elif ctx.parent.info_name == ctx.find_root().info_name:
|
||||
# openllm run
|
||||
command_group = "openllm"
|
||||
|
||||
if do_not_track:
|
||||
return f(*args, **kwargs)
|
||||
start_time = time.time_ns()
|
||||
try:
|
||||
return_value = f(*args, **kwargs)
|
||||
duration_in_ns = time.time_ns() - start_time
|
||||
track(
|
||||
OpenllmCliEvent(
|
||||
cmd_group=command_group,
|
||||
cmd_name=command_name,
|
||||
duration_in_ms=duration_in_ns / 1e6,
|
||||
)
|
||||
)
|
||||
return return_value
|
||||
except BaseException as e:
|
||||
duration_in_ns = time.time_ns() - start_time
|
||||
track(
|
||||
OpenllmCliEvent(
|
||||
cmd_group=command_group,
|
||||
cmd_name=command_name,
|
||||
duration_in_ms=duration_in_ns / 1e6,
|
||||
error_type=type(e).__name__,
|
||||
return_code=2 if isinstance(e, KeyboardInterrupt) else 1,
|
||||
)
|
||||
)
|
||||
raise
|
||||
|
||||
return typer.Typer.command(self, *args, **kwargs)(wrapped)
|
||||
|
||||
return decorator
|
||||
75
openllm_next/clean.py
Normal file
75
openllm_next/clean.py
Normal file
@@ -0,0 +1,75 @@
|
||||
import pathlib
|
||||
import shutil
|
||||
|
||||
import questionary
|
||||
|
||||
from openllm_next.analytic import OpenLLMTyper
|
||||
from openllm_next.common import (
|
||||
CONFIG_FILE,
|
||||
REPO_DIR,
|
||||
VENV_DIR,
|
||||
VERBOSE_LEVEL,
|
||||
output,
|
||||
)
|
||||
|
||||
app = OpenLLMTyper(help="clean up and release disk space used by OpenLLM")
|
||||
|
||||
|
||||
HUGGINGFACE_CACHE = pathlib.Path.home() / ".cache" / "huggingface" / "hub"
|
||||
|
||||
|
||||
@app.command(help="Clean up all the cached models from huggingface")
|
||||
def model_cache(verbose: bool = False):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
used_space = sum(f.stat().st_size for f in HUGGINGFACE_CACHE.rglob("*"))
|
||||
sure = questionary.confirm(
|
||||
f"This will remove all models cached by Huggingface (~{used_space / 1024 / 1024:.2f}MB), are you sure?"
|
||||
).ask()
|
||||
if not sure:
|
||||
return
|
||||
shutil.rmtree(HUGGINGFACE_CACHE, ignore_errors=True)
|
||||
output("All models cached by Huggingface have been removed", style="green")
|
||||
|
||||
|
||||
@app.command(help="Clean up all the virtual environments created by OpenLLM")
|
||||
def venvs(verbose: bool = False):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
used_space = sum(f.stat().st_size for f in VENV_DIR.rglob("*"))
|
||||
sure = questionary.confirm(
|
||||
f"This will remove all virtual environments created by OpenLLM (~{used_space / 1024 / 1024:.2f}MB), are you sure?"
|
||||
).ask()
|
||||
if not sure:
|
||||
return
|
||||
shutil.rmtree(VENV_DIR, ignore_errors=True)
|
||||
output("All virtual environments have been removed", style="green")
|
||||
|
||||
|
||||
@app.command(help="Clean up all the repositories cloned by OpenLLM")
|
||||
def repos(verbose: bool = False):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
shutil.rmtree(REPO_DIR, ignore_errors=True)
|
||||
output("All repositories have been removed", style="green")
|
||||
|
||||
|
||||
@app.command(help="Reset configurations to default")
|
||||
def configs(verbose: bool = False):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
shutil.rmtree(CONFIG_FILE, ignore_errors=True)
|
||||
output("All configurations have been reset", style="green")
|
||||
|
||||
|
||||
@app.command(
|
||||
name="all",
|
||||
help="Clean up all above and bring OpenLLM to a fresh start",
|
||||
)
|
||||
def all_cache(verbose: bool = False):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
repos()
|
||||
venvs()
|
||||
model_cache()
|
||||
configs()
|
||||
174
openllm_next/cloud.py
Normal file
174
openllm_next/cloud.py
Normal file
@@ -0,0 +1,174 @@
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
import shutil
|
||||
import subprocess
|
||||
import typing
|
||||
|
||||
import typer
|
||||
|
||||
from openllm_next.accelerator_spec import ACCELERATOR_SPECS
|
||||
from openllm_next.analytic import OpenLLMTyper
|
||||
from openllm_next.common import (
|
||||
INTERACTIVE,
|
||||
BentoInfo,
|
||||
DeploymentTarget,
|
||||
output,
|
||||
run_command,
|
||||
)
|
||||
|
||||
app = OpenLLMTyper()
|
||||
|
||||
|
||||
def _get_deploy_cmd(bento: BentoInfo, target: typing.Optional[DeploymentTarget] = None):
|
||||
cmd = ["bentoml", "deploy", bento.bentoml_tag]
|
||||
env = {
|
||||
"BENTOML_HOME": f"{bento.repo.path}/bentoml",
|
||||
}
|
||||
|
||||
required_envs = bento.bento_yaml.get("envs", [])
|
||||
required_env_names = [env["name"] for env in required_envs if "name" in env]
|
||||
if required_env_names:
|
||||
output(
|
||||
f"This model requires the following environment variables to run: {repr(required_env_names)}",
|
||||
style="yellow",
|
||||
)
|
||||
|
||||
for env_info in bento.bento_yaml.get("envs", []):
|
||||
if "name" not in env_info:
|
||||
continue
|
||||
if os.environ.get(env_info["name"]):
|
||||
default = os.environ[env_info["name"]]
|
||||
elif "value" in env_info:
|
||||
default = env_info["value"]
|
||||
else:
|
||||
default = ""
|
||||
|
||||
if INTERACTIVE.get():
|
||||
import questionary
|
||||
|
||||
value = questionary.text(
|
||||
f"{env_info['name']}:",
|
||||
default=default,
|
||||
).ask()
|
||||
else:
|
||||
if default == "":
|
||||
output(
|
||||
f"Environment variable {env_info['name']} is required but not provided",
|
||||
style="red",
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
else:
|
||||
value = default
|
||||
|
||||
if value is None:
|
||||
raise typer.Exit(1)
|
||||
cmd += ["--env", f"{env_info['name']}={value}"]
|
||||
|
||||
if target:
|
||||
cmd += ["--instance-type", target.name]
|
||||
|
||||
assert (pathlib.Path.home() / "bentoml" / ".yatai.yaml").exists()
|
||||
shutil.copy(
|
||||
pathlib.Path.home() / "bentoml" / ".yatai.yaml",
|
||||
bento.repo.path / "bentoml" / ".yatai.yaml",
|
||||
)
|
||||
|
||||
return cmd, env, None
|
||||
|
||||
|
||||
def ensure_cloud_context():
|
||||
import questionary
|
||||
|
||||
cmd = ["bentoml", "cloud", "current-context"]
|
||||
try:
|
||||
result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
||||
context = json.loads(result)
|
||||
output(f" bentoml already logged in: {context['endpoint']}", style="green")
|
||||
except subprocess.CalledProcessError:
|
||||
output(" bentoml not logged in", style="red")
|
||||
if not INTERACTIVE.get():
|
||||
output(
|
||||
"\n get bentoml logged in by:",
|
||||
)
|
||||
output(
|
||||
" $ bentoml cloud login",
|
||||
style="orange",
|
||||
)
|
||||
output("")
|
||||
output(
|
||||
""" * you may need to visit https://cloud.bentoml.com to get an account. you can also bring your own bentoml cluster (BYOC) to your team from https://bentoml.com/contact""",
|
||||
style="yellow",
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
else:
|
||||
action = questionary.select(
|
||||
"Choose an action:",
|
||||
choices=[
|
||||
"I have a BentoCloud account",
|
||||
"get an account in two minutes",
|
||||
],
|
||||
).ask()
|
||||
if action is None:
|
||||
raise typer.Exit(1)
|
||||
elif action == "get an account in two minutes":
|
||||
output(
|
||||
"Please visit https://cloud.bentoml.com to get your token",
|
||||
style="yellow",
|
||||
)
|
||||
endpoint = questionary.text(
|
||||
"Enter the endpoint: (similar to https://my-org.cloud.bentoml.com)"
|
||||
).ask()
|
||||
if endpoint is None:
|
||||
raise typer.Exit(1)
|
||||
token = questionary.text(
|
||||
"Enter your token: (similar to cniluaxxxxxxxx)"
|
||||
).ask()
|
||||
if token is None:
|
||||
raise typer.Exit(1)
|
||||
cmd = [
|
||||
"bentoml",
|
||||
"cloud",
|
||||
"login",
|
||||
"--api-token",
|
||||
token,
|
||||
"--endpoint",
|
||||
endpoint,
|
||||
]
|
||||
try:
|
||||
result = subprocess.check_output(cmd)
|
||||
output(" Logged in successfully", style="green")
|
||||
except subprocess.CalledProcessError:
|
||||
output(" Failed to login", style="red")
|
||||
raise typer.Exit(1)
|
||||
|
||||
|
||||
def get_cloud_machine_spec():
|
||||
ensure_cloud_context()
|
||||
cmd = ["bentoml", "deployment", "list-instance-types", "-o", "json"]
|
||||
try:
|
||||
result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
||||
instance_types = json.loads(result)
|
||||
return [
|
||||
DeploymentTarget(
|
||||
source="cloud",
|
||||
name=it["name"],
|
||||
price=it["price"],
|
||||
platform="linux",
|
||||
accelerators=(
|
||||
[ACCELERATOR_SPECS[it["gpu_type"]] for _ in range(int(it["gpu"]))]
|
||||
if it.get("gpu") and it["gpu_type"] in ACCELERATOR_SPECS
|
||||
else []
|
||||
),
|
||||
)
|
||||
for it in instance_types
|
||||
]
|
||||
except (subprocess.CalledProcessError, json.JSONDecodeError):
|
||||
output("Failed to get cloud instance types", style="red")
|
||||
return []
|
||||
|
||||
|
||||
def deploy(bento: BentoInfo, target: DeploymentTarget):
|
||||
ensure_cloud_context()
|
||||
cmd, env, cwd = _get_deploy_cmd(bento, target)
|
||||
run_command(cmd, env=env, cwd=cwd)
|
||||
422
openllm_next/common.py
Normal file
422
openllm_next/common.py
Normal file
@@ -0,0 +1,422 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import functools
|
||||
import hashlib
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import sysconfig
|
||||
import typing
|
||||
from contextlib import asynccontextmanager, contextmanager
|
||||
from types import SimpleNamespace
|
||||
|
||||
import typer
|
||||
import typer.core
|
||||
|
||||
ERROR_STYLE = "red"
|
||||
SUCCESS_STYLE = "green"
|
||||
|
||||
|
||||
CLLAMA_HOME = pathlib.Path.home() / ".openllm_next"
|
||||
REPO_DIR = CLLAMA_HOME / "repos"
|
||||
TEMP_DIR = CLLAMA_HOME / "temp"
|
||||
VENV_DIR = CLLAMA_HOME / "venv"
|
||||
|
||||
REPO_DIR.mkdir(exist_ok=True, parents=True)
|
||||
TEMP_DIR.mkdir(exist_ok=True, parents=True)
|
||||
VENV_DIR.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
CONFIG_FILE = CLLAMA_HOME / "config.json"
|
||||
|
||||
CHECKED = "☆"
|
||||
|
||||
T = typing.TypeVar("T")
|
||||
|
||||
|
||||
class ContextVar(typing.Generic[T]):
|
||||
def __init__(self, default: T):
|
||||
self._stack: list[T] = []
|
||||
self._default = default
|
||||
|
||||
def get(self) -> T:
|
||||
if self._stack:
|
||||
return self._stack[-1]
|
||||
return self._default
|
||||
|
||||
def set(self, value):
|
||||
self._stack.append(value)
|
||||
|
||||
@contextmanager
|
||||
def patch(self, value):
|
||||
self._stack.append(value)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
self._stack.pop()
|
||||
|
||||
|
||||
VERBOSE_LEVEL = ContextVar(10)
|
||||
INTERACTIVE = ContextVar(False)
|
||||
FORCE = ContextVar(False)
|
||||
|
||||
|
||||
def output(content, level=0, style=None, end=None):
|
||||
import questionary
|
||||
|
||||
if level > VERBOSE_LEVEL.get():
|
||||
return
|
||||
|
||||
if not isinstance(content, str):
|
||||
import pyaml
|
||||
|
||||
out = io.StringIO()
|
||||
pyaml.pprint(
|
||||
content,
|
||||
dst=out,
|
||||
sort_dicts=False,
|
||||
sort_keys=False,
|
||||
)
|
||||
questionary.print(out.getvalue(), style=style, end="" if end is None else end)
|
||||
out.close()
|
||||
|
||||
if isinstance(content, str):
|
||||
questionary.print(content, style=style, end="\n" if end is None else end)
|
||||
|
||||
|
||||
class Config(SimpleNamespace):
|
||||
repos: dict[str, str] = {
|
||||
"default": "git+https://github.com/bentoml/openllm-models@main"
|
||||
}
|
||||
default_repo: str = "default"
|
||||
|
||||
def tolist(self):
|
||||
return dict(
|
||||
repos=self.repos,
|
||||
default_repo=self.default_repo,
|
||||
)
|
||||
|
||||
|
||||
def load_config():
|
||||
if CONFIG_FILE.exists():
|
||||
try:
|
||||
with open(CONFIG_FILE) as f:
|
||||
return Config(**json.load(f))
|
||||
except json.JSONDecodeError:
|
||||
return Config()
|
||||
return Config()
|
||||
|
||||
|
||||
def save_config(config):
|
||||
with open(CONFIG_FILE, "w") as f:
|
||||
json.dump(config.tolist(), f, indent=2)
|
||||
|
||||
|
||||
class RepoInfo(SimpleNamespace):
|
||||
name: str
|
||||
path: pathlib.Path
|
||||
url: str
|
||||
server: str
|
||||
owner: str
|
||||
repo: str
|
||||
branch: str
|
||||
|
||||
def tolist(self):
|
||||
if VERBOSE_LEVEL.get() <= 0:
|
||||
return f"{self.name} ({self.url})"
|
||||
if VERBOSE_LEVEL.get() <= 10:
|
||||
return dict(
|
||||
name=self.name,
|
||||
url=self.url,
|
||||
path=str(self.path),
|
||||
)
|
||||
if VERBOSE_LEVEL.get() <= 20:
|
||||
return dict(
|
||||
name=self.name,
|
||||
url=self.url,
|
||||
path=str(self.path),
|
||||
server=self.server,
|
||||
owner=self.owner,
|
||||
repo=self.repo,
|
||||
branch=self.branch,
|
||||
)
|
||||
|
||||
|
||||
class BentoInfo(SimpleNamespace):
|
||||
repo: RepoInfo
|
||||
path: pathlib.Path
|
||||
alias: str = ""
|
||||
|
||||
def __str__(self):
|
||||
if self.repo.name == "default":
|
||||
return f"{self.tag}"
|
||||
else:
|
||||
return f"{self.repo.name}/{self.tag}"
|
||||
|
||||
def __hash__(self):
|
||||
return md5(str(self.path))
|
||||
|
||||
@property
|
||||
def tag(self) -> str:
|
||||
if self.alias:
|
||||
return f"{self.path.parent.name}:{self.alias}"
|
||||
return f"{self.path.parent.name}:{self.path.name}"
|
||||
|
||||
@property
|
||||
def bentoml_tag(self) -> str:
|
||||
return f"{self.path.parent.name}:{self.path.name}"
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return self.path.parent.name
|
||||
|
||||
@property
|
||||
def version(self) -> str:
|
||||
return self.path.name
|
||||
|
||||
@property
|
||||
def labels(self) -> dict[str, str]:
|
||||
return self.bento_yaml["labels"]
|
||||
|
||||
@functools.cached_property
|
||||
def bento_yaml(self) -> dict:
|
||||
import yaml
|
||||
|
||||
bento_file = self.path / "bento.yaml"
|
||||
return yaml.safe_load(bento_file.read_text())
|
||||
|
||||
@functools.cached_property
|
||||
def platforms(self) -> list[str]:
|
||||
return self.bento_yaml["labels"].get("platforms", "linux").split(",")
|
||||
|
||||
@functools.cached_property
|
||||
def pretty_yaml(self) -> dict:
|
||||
def _pretty_routes(routes):
|
||||
return {
|
||||
route["route"]: {
|
||||
"input": {
|
||||
k: v["type"] for k, v in route["input"]["properties"].items()
|
||||
},
|
||||
"output": route["output"]["type"],
|
||||
}
|
||||
for route in routes
|
||||
}
|
||||
|
||||
if len(self.bento_yaml["services"]) == 1:
|
||||
pretty_yaml = {
|
||||
"apis": _pretty_routes(self.bento_yaml["schema"]["routes"]),
|
||||
"resources": self.bento_yaml["services"][0]["config"]["resources"],
|
||||
"envs": self.bento_yaml["envs"],
|
||||
"platforms": self.platforms,
|
||||
}
|
||||
return pretty_yaml
|
||||
return self.bento_yaml
|
||||
|
||||
@functools.cached_property
|
||||
def pretty_gpu(self) -> str:
|
||||
from openllm_next.accelerator_spec import ACCELERATOR_SPECS
|
||||
|
||||
try:
|
||||
resources = self.bento_yaml["services"][0]["config"]["resources"]
|
||||
if resources["gpu"] > 1:
|
||||
acc = ACCELERATOR_SPECS[resources["gpu_type"]]
|
||||
return f"{acc.memory_size:.0f}Gx{resources['gpu']}"
|
||||
elif resources["gpu"] > 0:
|
||||
acc = ACCELERATOR_SPECS[resources["gpu_type"]]
|
||||
return f"{acc.memory_size:.0f}G"
|
||||
except KeyError:
|
||||
pass
|
||||
return ""
|
||||
|
||||
def tolist(self):
|
||||
verbose = VERBOSE_LEVEL.get()
|
||||
if verbose <= 0:
|
||||
return str(self)
|
||||
if verbose <= 10:
|
||||
return dict(
|
||||
tag=self.tag,
|
||||
repo=self.repo.tolist(),
|
||||
path=str(self.path),
|
||||
model_card=self.pretty_yaml,
|
||||
)
|
||||
if verbose <= 20:
|
||||
return dict(
|
||||
tag=self.tag,
|
||||
repo=self.repo.tolist(),
|
||||
path=str(self.path),
|
||||
bento_yaml=self.bento_yaml,
|
||||
)
|
||||
|
||||
|
||||
class VenvSpec(SimpleNamespace):
|
||||
python_version: str
|
||||
python_packages: dict[str, str]
|
||||
name_prefix = ""
|
||||
|
||||
def __hash__(self):
|
||||
return md5(
|
||||
# self.python_version,
|
||||
*sorted(self.python_packages),
|
||||
)
|
||||
|
||||
|
||||
class Accelerator(SimpleNamespace):
|
||||
model: str
|
||||
memory_size: float
|
||||
|
||||
def __gt__(self, other):
|
||||
return self.memory_size > other.memory_size
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.memory_size == other.memory_size
|
||||
|
||||
|
||||
class DeploymentTarget(SimpleNamespace):
|
||||
source: str = "local"
|
||||
name: str = "local"
|
||||
price: str = ""
|
||||
platform = "linux"
|
||||
accelerators: list[Accelerator]
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.source)
|
||||
|
||||
@property
|
||||
def accelerators_repr(self) -> str:
|
||||
accs = {a.model for a in self.accelerators}
|
||||
if len(accs) == 0:
|
||||
return "null"
|
||||
if len(accs) == 1:
|
||||
a = self.accelerators[0]
|
||||
return f"{a.model} x{len(self.accelerators)}"
|
||||
return ", ".join((f"{a.model}" for a in self.accelerators))
|
||||
|
||||
|
||||
def run_command(
|
||||
cmd,
|
||||
cwd=None,
|
||||
env=None,
|
||||
copy_env=True,
|
||||
venv=None,
|
||||
silent=False,
|
||||
) -> subprocess.CompletedProcess:
|
||||
import shlex
|
||||
|
||||
env = env or {}
|
||||
cmd = [str(c) for c in cmd]
|
||||
bin_dir = "Scripts" if os.name == "nt" else "bin"
|
||||
if not silent:
|
||||
output("\n")
|
||||
if cwd:
|
||||
output(f"$ cd {cwd}", style="orange")
|
||||
if env:
|
||||
for k, v in env.items():
|
||||
output(f"$ export {k}={shlex.quote(v)}", style="orange")
|
||||
if venv:
|
||||
output(f"$ source {venv / 'bin' / 'activate'}", style="orange")
|
||||
output(f"$ {' '.join(cmd)}", style="orange")
|
||||
|
||||
if venv:
|
||||
py = venv / bin_dir / f"python{sysconfig.get_config_var('EXE')}"
|
||||
else:
|
||||
py = sys.executable
|
||||
|
||||
if copy_env:
|
||||
env = {**os.environ, **env}
|
||||
|
||||
if cmd and cmd[0] == "bentoml":
|
||||
cmd = [py, "-m", "bentoml"] + cmd[1:]
|
||||
if cmd and cmd[0] == "python":
|
||||
cmd = [py] + cmd[1:]
|
||||
|
||||
try:
|
||||
if silent:
|
||||
return subprocess.run( # type: ignore
|
||||
cmd,
|
||||
cwd=cwd,
|
||||
env=env,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
else:
|
||||
return subprocess.run(
|
||||
cmd,
|
||||
cwd=cwd,
|
||||
env=env,
|
||||
)
|
||||
except subprocess.CalledProcessError:
|
||||
output("Command failed", style="red")
|
||||
raise typer.Exit(1)
|
||||
|
||||
|
||||
async def stream_command_output(stream, style="gray"):
|
||||
async for line in stream:
|
||||
output(line.decode(), style=style, end="")
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def async_run_command(
|
||||
cmd,
|
||||
cwd=None,
|
||||
env=None,
|
||||
copy_env=True,
|
||||
venv=None,
|
||||
silent=True,
|
||||
):
|
||||
import shlex
|
||||
|
||||
env = env or {}
|
||||
cmd = [str(c) for c in cmd]
|
||||
|
||||
if not silent:
|
||||
output("\n")
|
||||
if cwd:
|
||||
output(f"$ cd {cwd}", style="orange")
|
||||
if env:
|
||||
for k, v in env.items():
|
||||
output(f"$ export {k}={shlex.quote(v)}", style="orange")
|
||||
if venv:
|
||||
output(f"$ source {venv / 'bin' / 'activate'}", style="orange")
|
||||
output(f"$ {' '.join(cmd)}", style="orange")
|
||||
|
||||
if venv:
|
||||
py = venv / "bin" / "python"
|
||||
else:
|
||||
py = sys.executable
|
||||
|
||||
if copy_env:
|
||||
env = {**os.environ, **env}
|
||||
|
||||
if cmd and cmd[0] == "bentoml":
|
||||
cmd = [py, "-m", "bentoml"] + cmd[1:]
|
||||
if cmd and cmd[0] == "python":
|
||||
cmd = [py] + cmd[1:]
|
||||
|
||||
proc = None
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_shell(
|
||||
" ".join(map(str, cmd)),
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
cwd=cwd,
|
||||
env=env,
|
||||
)
|
||||
yield proc
|
||||
except subprocess.CalledProcessError:
|
||||
output("Command failed", style="red")
|
||||
raise typer.Exit(1)
|
||||
finally:
|
||||
if proc:
|
||||
proc.send_signal(signal.SIGINT)
|
||||
await proc.wait()
|
||||
|
||||
|
||||
def md5(*strings: str) -> int:
|
||||
m = hashlib.md5()
|
||||
for s in strings:
|
||||
m.update(s.encode())
|
||||
return int(m.hexdigest(), 16)
|
||||
117
openllm_next/local.py
Normal file
117
openllm_next/local.py
Normal file
@@ -0,0 +1,117 @@
|
||||
import asyncio
|
||||
import time
|
||||
|
||||
import httpx
|
||||
|
||||
from openllm_next.common import (
|
||||
BentoInfo,
|
||||
async_run_command,
|
||||
output,
|
||||
run_command,
|
||||
stream_command_output,
|
||||
)
|
||||
from openllm_next.venv import ensure_venv
|
||||
|
||||
|
||||
def _get_serve_cmd(bento: BentoInfo, port: int = 3000):
|
||||
cmd = ["bentoml", "serve", bento.bentoml_tag]
|
||||
if port != 3000:
|
||||
cmd += ["--port", str(port)]
|
||||
env = {
|
||||
"BENTOML_HOME": f"{bento.repo.path}/bentoml",
|
||||
}
|
||||
return cmd, env, None
|
||||
|
||||
|
||||
def serve(
|
||||
bento: BentoInfo,
|
||||
port: int = 3000,
|
||||
):
|
||||
venv = ensure_venv(bento)
|
||||
cmd, env, cwd = _get_serve_cmd(bento, port=port)
|
||||
run_command(cmd, env=env, cwd=cwd, venv=venv)
|
||||
|
||||
|
||||
async def _run_model(
|
||||
bento: BentoInfo,
|
||||
port: int = 3000,
|
||||
timeout: int = 600,
|
||||
):
|
||||
venv = ensure_venv(bento)
|
||||
cmd, env, cwd = _get_serve_cmd(bento, port)
|
||||
async with async_run_command(
|
||||
cmd,
|
||||
env=env,
|
||||
cwd=cwd,
|
||||
venv=venv,
|
||||
silent=False,
|
||||
) as server_proc:
|
||||
|
||||
output(f"Model server started {server_proc.pid}")
|
||||
|
||||
stdout_streamer = None
|
||||
stderr_streamer = None
|
||||
start_time = time.time()
|
||||
|
||||
output("Model loading...", style="green")
|
||||
for _ in range(timeout):
|
||||
try:
|
||||
resp = httpx.get(f"http://localhost:{port}/readyz", timeout=3)
|
||||
if resp.status_code == 200:
|
||||
break
|
||||
except httpx.RequestError:
|
||||
if time.time() - start_time > 30:
|
||||
if not stdout_streamer:
|
||||
stdout_streamer = asyncio.create_task(
|
||||
stream_command_output(server_proc.stdout, style="gray")
|
||||
)
|
||||
if not stderr_streamer:
|
||||
stderr_streamer = asyncio.create_task(
|
||||
stream_command_output(server_proc.stderr, style="#BD2D0F")
|
||||
)
|
||||
await asyncio.sleep(1)
|
||||
else:
|
||||
output("Model failed to load", style="red")
|
||||
server_proc.terminate()
|
||||
return
|
||||
|
||||
if stdout_streamer:
|
||||
stdout_streamer.cancel()
|
||||
if stderr_streamer:
|
||||
stderr_streamer.cancel()
|
||||
|
||||
output("Model is ready", style="green")
|
||||
messages: list[dict[str, str]] = []
|
||||
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
client = AsyncOpenAI(base_url=f"http://localhost:{port}/v1", api_key="local")
|
||||
model_id = (await client.models.list()).data[0].id
|
||||
while True:
|
||||
try:
|
||||
message = input("user: ")
|
||||
if message == "":
|
||||
output("empty message, please enter something", style="yellow")
|
||||
continue
|
||||
messages.append(dict(role="user", content=message))
|
||||
output("assistant: ", end="", style="lightgreen")
|
||||
assistant_message = ""
|
||||
stream = await client.chat.completions.create(
|
||||
model=model_id,
|
||||
messages=messages, # type: ignore
|
||||
stream=True,
|
||||
)
|
||||
async for chunk in stream:
|
||||
text = chunk.choices[0].delta.content or ""
|
||||
assistant_message += text
|
||||
output(text, end="", style="lightgreen")
|
||||
messages.append(dict(role="assistant", content=assistant_message))
|
||||
output("")
|
||||
except KeyboardInterrupt:
|
||||
break
|
||||
output("\nStopping model server...", style="green")
|
||||
output("Stopped model server", style="green")
|
||||
|
||||
|
||||
def run(bento: BentoInfo, port: int = 3000, timeout: int = 600):
|
||||
asyncio.run(_run_model(bento, port=port, timeout=timeout))
|
||||
173
openllm_next/model.py
Normal file
173
openllm_next/model.py
Normal file
@@ -0,0 +1,173 @@
|
||||
import typing
|
||||
from typing import Optional
|
||||
|
||||
import tabulate
|
||||
import typer
|
||||
|
||||
from openllm_next.accelerator_spec import DeploymentTarget, can_run
|
||||
from openllm_next.analytic import OpenLLMTyper
|
||||
from openllm_next.common import (
|
||||
FORCE,
|
||||
VERBOSE_LEVEL,
|
||||
BentoInfo,
|
||||
load_config,
|
||||
output,
|
||||
)
|
||||
from openllm_next.repo import ensure_repo_updated, parse_repo_url
|
||||
|
||||
app = OpenLLMTyper(help="manage models")
|
||||
|
||||
|
||||
@app.command()
|
||||
def get(
|
||||
tag: str,
|
||||
repo: Optional[str] = None,
|
||||
verbose: bool = False,
|
||||
):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
bento_info = ensure_bento(tag, repo_name=repo)
|
||||
if bento_info:
|
||||
output(bento_info)
|
||||
|
||||
|
||||
@app.command(name="list")
|
||||
def list_(
|
||||
tag: Optional[str] = None,
|
||||
repo: Optional[str] = None,
|
||||
verbose: bool = False,
|
||||
):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
|
||||
bentos = list_bento(tag=tag, repo_name=repo)
|
||||
bentos.sort(key=lambda x: x.name)
|
||||
|
||||
seen = set()
|
||||
|
||||
def is_seen(value):
|
||||
if value in seen:
|
||||
return True
|
||||
seen.add(value)
|
||||
return False
|
||||
|
||||
table = tabulate.tabulate(
|
||||
[
|
||||
[
|
||||
"" if is_seen(bento.name) else bento.name,
|
||||
bento.tag,
|
||||
bento.repo.name,
|
||||
bento.pretty_gpu,
|
||||
",".join(bento.platforms),
|
||||
]
|
||||
for bento in bentos
|
||||
],
|
||||
headers=["model", "version", "repo", "required VRAM", "platforms"],
|
||||
)
|
||||
output(table)
|
||||
|
||||
|
||||
def ensure_bento(
|
||||
model: str,
|
||||
target: Optional[DeploymentTarget] = None,
|
||||
repo_name: Optional[str] = None,
|
||||
) -> BentoInfo:
|
||||
bentos = list_bento(model, repo_name=repo_name)
|
||||
if len(bentos) == 0:
|
||||
output(f"No model found for {model}", style="red")
|
||||
raise typer.Exit(1)
|
||||
|
||||
if len(bentos) == 1:
|
||||
if FORCE.get():
|
||||
output(f"Found model {bentos[0]}", style="green")
|
||||
return bentos[0]
|
||||
if target is None:
|
||||
return bentos[0]
|
||||
if can_run(bentos[0], target) <= 0:
|
||||
return bentos[0]
|
||||
output(f"Found model {bentos[0]}", style="green")
|
||||
return bentos[0]
|
||||
|
||||
if target is None:
|
||||
output(
|
||||
f"Multiple models match {model}, did you mean one of these?",
|
||||
style="red",
|
||||
)
|
||||
for bento in bentos:
|
||||
output(f" {bento}")
|
||||
raise typer.Exit(1)
|
||||
|
||||
filtered = [bento for bento in bentos if can_run(bento, target) > 0]
|
||||
if len(filtered) == 0:
|
||||
output(f"No deployment target found for {model}", style="red")
|
||||
raise typer.Exit(1)
|
||||
|
||||
if len(filtered) == 0:
|
||||
output(f"No deployment target found for {model}", style="red")
|
||||
raise typer.Exit(1)
|
||||
|
||||
if len(bentos) > 1:
|
||||
output(
|
||||
f"Multiple models match {model}, did you mean one of these?",
|
||||
style="red",
|
||||
)
|
||||
for bento in bentos:
|
||||
output(f" {bento}")
|
||||
raise typer.Exit(1)
|
||||
|
||||
return bentos[0]
|
||||
|
||||
|
||||
def list_bento(
|
||||
tag: typing.Optional[str] = None,
|
||||
repo_name: typing.Optional[str] = None,
|
||||
include_alias: bool = False,
|
||||
) -> typing.List[BentoInfo]:
|
||||
ensure_repo_updated()
|
||||
|
||||
if repo_name is not None:
|
||||
config = load_config()
|
||||
if repo_name not in config.repos:
|
||||
output(f"Repo `{repo_name}` not found, did you mean one of these?")
|
||||
for repo_name in config.repos:
|
||||
output(f" {repo_name}")
|
||||
raise typer.Exit(1)
|
||||
|
||||
if not tag:
|
||||
glob_pattern = "bentoml/bentos/*/*"
|
||||
elif ":" in tag:
|
||||
bento_name, version = tag.split(":")
|
||||
glob_pattern = f"bentoml/bentos/{bento_name}/{version}"
|
||||
else:
|
||||
glob_pattern = f"bentoml/bentos/{tag}/*"
|
||||
|
||||
model_list = []
|
||||
config = load_config()
|
||||
for _repo_name, repo_url in config.repos.items():
|
||||
if repo_name is not None and _repo_name != repo_name:
|
||||
continue
|
||||
repo = parse_repo_url(repo_url, _repo_name)
|
||||
for path in repo.path.glob(glob_pattern):
|
||||
if path.is_dir() and (path / "bento.yaml").exists():
|
||||
model = BentoInfo(repo=repo, path=path)
|
||||
elif path.is_file():
|
||||
with open(path) as f:
|
||||
origin_name = f.read().strip()
|
||||
origin_path = path.parent / origin_name
|
||||
model = BentoInfo(alias=path.name, repo=repo, path=origin_path)
|
||||
else:
|
||||
model = None
|
||||
if model:
|
||||
model_list.append(model)
|
||||
model_list.sort(key=lambda x: x.tag)
|
||||
if not include_alias:
|
||||
seen = set()
|
||||
model_list = [
|
||||
x
|
||||
for x in model_list
|
||||
if not (
|
||||
f"{x.bento_yaml['name']}:{x.bento_yaml['version']}" in seen
|
||||
or seen.add(f"{x.bento_yaml['name']}:{x.bento_yaml['version']}")
|
||||
)
|
||||
]
|
||||
return model_list
|
||||
203
openllm_next/repo.py
Normal file
203
openllm_next/repo.py
Normal file
@@ -0,0 +1,203 @@
|
||||
import datetime
|
||||
import re
|
||||
import shutil
|
||||
|
||||
import pyaml
|
||||
import questionary
|
||||
import typer
|
||||
|
||||
from openllm_next.analytic import OpenLLMTyper
|
||||
from openllm_next.common import (
|
||||
INTERACTIVE,
|
||||
REPO_DIR,
|
||||
VERBOSE_LEVEL,
|
||||
RepoInfo,
|
||||
load_config,
|
||||
output,
|
||||
save_config,
|
||||
)
|
||||
|
||||
UPDATE_INTERVAL = datetime.timedelta(days=3)
|
||||
|
||||
app = OpenLLMTyper(help="manage repos")
|
||||
|
||||
|
||||
@app.command()
|
||||
def list(verbose: bool = False):
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
config = load_config()
|
||||
pyaml.pprint(
|
||||
[parse_repo_url(repo, name) for name, repo in config.repos.items()],
|
||||
sort_dicts=False,
|
||||
sort_keys=False,
|
||||
)
|
||||
|
||||
|
||||
@app.command()
|
||||
def remove(name: str):
|
||||
config = load_config()
|
||||
if name not in config.repos:
|
||||
output(f"Repo {name} does not exist", style="red")
|
||||
return
|
||||
|
||||
del config.repos[name]
|
||||
save_config(config)
|
||||
output(f"Repo {name} removed", style="green")
|
||||
|
||||
|
||||
def _complete_alias(repo_name: str):
|
||||
from openllm_next.model import list_bento
|
||||
|
||||
for bento in list_bento(repo_name=repo_name):
|
||||
alias = bento.labels.get("openllm_alias", "").strip()
|
||||
if alias:
|
||||
for a in alias.split(","):
|
||||
with open(bento.path.parent / a, "w") as f:
|
||||
f.write(bento.version)
|
||||
|
||||
|
||||
@app.command()
|
||||
def update():
|
||||
import dulwich
|
||||
import dulwich.errors
|
||||
import dulwich.porcelain
|
||||
|
||||
config = load_config()
|
||||
repos_in_use = set()
|
||||
for repo_name, repo in config.repos.items():
|
||||
repo = parse_repo_url(repo, repo_name)
|
||||
repos_in_use.add((repo.server, repo.owner, repo.repo))
|
||||
if repo.path.exists(): # TODO: use update instead of remove and clone
|
||||
shutil.rmtree(repo.path, ignore_errors=True)
|
||||
if not repo.path.exists():
|
||||
repo.path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
dulwich.porcelain.clone(
|
||||
f"https://{repo.server}/{repo.owner}/{repo.repo}.git",
|
||||
str(repo.path),
|
||||
checkout=True,
|
||||
depth=1,
|
||||
branch=repo.branch,
|
||||
)
|
||||
output("")
|
||||
output(f"Repo `{repo.name}` updated", style="green")
|
||||
except:
|
||||
shutil.rmtree(repo.path, ignore_errors=True)
|
||||
output(f"Failed to clone repo {repo.name}", style="red")
|
||||
else:
|
||||
try:
|
||||
import dulwich.porcelain
|
||||
|
||||
dulwich.porcelain.pull(
|
||||
str(repo.path),
|
||||
f"https://{repo.server}/{repo.owner}/{repo.repo}.git",
|
||||
refspecs=repo.branch,
|
||||
force=True,
|
||||
)
|
||||
dulwich.porcelain.clean(str(repo.path), str(repo.path))
|
||||
output("")
|
||||
output(f"Repo `{repo.name}` updated", style="green")
|
||||
except:
|
||||
shutil.rmtree(repo.path, ignore_errors=True)
|
||||
output(f"Failed to update repo {repo.name}", style="red")
|
||||
for c in REPO_DIR.glob("*/*/*"):
|
||||
repo_spec = tuple(c.parts[-3:])
|
||||
if repo_spec not in repos_in_use:
|
||||
shutil.rmtree(c, ignore_errors=True)
|
||||
output(f"Removed unused repo cache {c}")
|
||||
with open(REPO_DIR / "last_update", "w") as f:
|
||||
f.write(datetime.datetime.now().isoformat())
|
||||
for repo_name in config.repos:
|
||||
_complete_alias(repo_name)
|
||||
|
||||
|
||||
def ensure_repo_updated():
|
||||
last_update_file = REPO_DIR / "last_update"
|
||||
if not last_update_file.exists():
|
||||
if INTERACTIVE.get():
|
||||
choice = questionary.confirm(
|
||||
"The repo cache is never updated, do you want to update it to fetch the latest model list?"
|
||||
).ask()
|
||||
if choice:
|
||||
update()
|
||||
return
|
||||
else:
|
||||
output(
|
||||
"The repo cache is never updated, please run `openllm repo update` to fetch the latest model list",
|
||||
style="red",
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
last_update = datetime.datetime.fromisoformat(last_update_file.read_text().strip())
|
||||
if datetime.datetime.now() - last_update > UPDATE_INTERVAL:
|
||||
if INTERACTIVE.get():
|
||||
choice = questionary.confirm(
|
||||
"The repo cache is outdated, do you want to update it to fetch the latest model list?"
|
||||
).ask()
|
||||
if choice:
|
||||
update()
|
||||
else:
|
||||
output(
|
||||
"The repo cache is outdated, please run `openllm repo update` to fetch the latest model list",
|
||||
style="yellow",
|
||||
)
|
||||
|
||||
|
||||
GIT_REPO_RE = re.compile(
|
||||
r"git\+https://(?P<server>.+)/(?P<owner>.+)/(?P<repo>.+?)(@(?P<branch>.+))?$"
|
||||
)
|
||||
|
||||
|
||||
def parse_repo_url(repo_url, repo_name=None) -> RepoInfo:
|
||||
"""
|
||||
parse the git repo url to server, owner, repo name, branch
|
||||
>>> parse_repo_url("git+https://github.com/bentoml/bentovllm@main")
|
||||
('github.com', 'bentoml', 'bentovllm', 'main')
|
||||
|
||||
>>> parse_repo_url("git+https://github.com/bentoml/bentovllm")
|
||||
('github.com', 'bentoml', 'bentovllm', 'main')
|
||||
"""
|
||||
match = GIT_REPO_RE.match(repo_url)
|
||||
if not match:
|
||||
raise ValueError(f"Invalid git repo url: {repo_url}")
|
||||
server = match.group("server")
|
||||
owner = match.group("owner")
|
||||
repo = match.group("repo")
|
||||
branch = match.group("branch") or "main"
|
||||
path = REPO_DIR / server / owner / repo
|
||||
return RepoInfo(
|
||||
name=repo if repo_name is None else repo_name,
|
||||
url=repo_url,
|
||||
server=server,
|
||||
owner=owner,
|
||||
repo=repo,
|
||||
branch=branch,
|
||||
path=path,
|
||||
)
|
||||
|
||||
|
||||
@app.command()
|
||||
def add(name: str, repo: str):
|
||||
name = name.lower()
|
||||
if not name.isidentifier():
|
||||
output(
|
||||
f"Invalid repo name: {name}, should only contain letters, numbers and underscores",
|
||||
style="red",
|
||||
)
|
||||
return
|
||||
|
||||
config = load_config()
|
||||
if name in config.repos:
|
||||
override = questionary.confirm(
|
||||
f"Repo {name} already exists({config.repos[name]}), override?"
|
||||
).ask()
|
||||
if not override:
|
||||
return
|
||||
|
||||
config.repos[name] = repo
|
||||
save_config(config)
|
||||
output(f"Repo {name} added", style="green")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
164
openllm_next/venv.py
Normal file
164
openllm_next/venv.py
Normal file
@@ -0,0 +1,164 @@
|
||||
import functools
|
||||
import os
|
||||
import pathlib
|
||||
import shutil
|
||||
import typing
|
||||
from typing import Iterable
|
||||
|
||||
import typer
|
||||
|
||||
from openllm_next.common import (
|
||||
VENV_DIR,
|
||||
VERBOSE_LEVEL,
|
||||
BentoInfo,
|
||||
VenvSpec,
|
||||
output,
|
||||
run_command,
|
||||
)
|
||||
|
||||
|
||||
@functools.lru_cache
|
||||
def _resolve_packages(requirement: typing.Union[pathlib.Path, str]):
|
||||
from pip_requirements_parser import RequirementsFile
|
||||
|
||||
requirements_txt = RequirementsFile.from_file(
|
||||
str(requirement),
|
||||
include_nested=True,
|
||||
)
|
||||
return requirements_txt.requirements
|
||||
|
||||
|
||||
def _filter_preheat_packages(requirements: Iterable) -> list[str]:
|
||||
PREHEAT_PIP_PACKAGES = ["torch", "vllm"]
|
||||
|
||||
deps: list[str] = []
|
||||
for req in requirements:
|
||||
if (
|
||||
req.is_editable
|
||||
or req.is_local_path
|
||||
or req.is_url
|
||||
or req.is_wheel
|
||||
or not req.name
|
||||
or not req.specifier
|
||||
):
|
||||
continue
|
||||
for sp in req.specifier:
|
||||
if sp.operator == "==" and req.name in PREHEAT_PIP_PACKAGES:
|
||||
assert req.line is not None
|
||||
deps.append(req.line)
|
||||
break
|
||||
return deps
|
||||
|
||||
|
||||
@functools.lru_cache
|
||||
def _resolve_bento_env_specs(bento: BentoInfo):
|
||||
ver_file = bento.path / "env" / "python" / "version.txt"
|
||||
assert ver_file.exists(), f"cannot find version file in {bento.path}"
|
||||
|
||||
lock_file = bento.path / "env" / "python" / "requirements.lock.txt"
|
||||
if not lock_file.exists():
|
||||
lock_file = bento.path / "env" / "python" / "requirements.txt"
|
||||
|
||||
reqs = _resolve_packages(lock_file)
|
||||
preheat_packages = _filter_preheat_packages(reqs)
|
||||
ver = ver_file.read_text().strip()
|
||||
return (
|
||||
VenvSpec(
|
||||
python_version=ver,
|
||||
python_packages=preheat_packages,
|
||||
name_prefix=f"{bento.tag.replace(':', '_')}-1-",
|
||||
),
|
||||
VenvSpec(
|
||||
python_version=ver,
|
||||
python_packages=[v.line for v in reqs],
|
||||
name_prefix=f"{bento.tag.replace(':', '_')}-2-",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _get_lib_dir(venv: pathlib.Path) -> pathlib.Path:
|
||||
if os.name == "nt":
|
||||
return venv / "Lib/site-packages"
|
||||
else:
|
||||
return next(venv.glob("lib/python*")) / "site-packages"
|
||||
|
||||
|
||||
def _ensure_venv(
|
||||
env_spec: VenvSpec,
|
||||
parrent_venv: typing.Optional[pathlib.Path] = None,
|
||||
) -> pathlib.Path:
|
||||
venv = VENV_DIR / str(hash(env_spec))
|
||||
if venv.exists() and not (venv / "DONE").exists():
|
||||
shutil.rmtree(venv, ignore_errors=True)
|
||||
if not venv.exists():
|
||||
output(f"Installing model dependencies({venv})...", style="green")
|
||||
|
||||
venv_py = (
|
||||
venv / "Scripts" / "python.exe"
|
||||
if os.name == "nt"
|
||||
else venv / "bin" / "python"
|
||||
)
|
||||
try:
|
||||
run_command(
|
||||
["python", "-m", "uv", "venv", venv],
|
||||
silent=VERBOSE_LEVEL.get() < 10,
|
||||
)
|
||||
lib_dir = _get_lib_dir(venv)
|
||||
if parrent_venv is not None:
|
||||
parent_lib_dir = _get_lib_dir(parrent_venv)
|
||||
with open(lib_dir / f"{parrent_venv.name}.pth", "w+") as f:
|
||||
f.write(str(parent_lib_dir))
|
||||
with open(venv / "requirements.txt", "w") as f:
|
||||
f.write("\n".join(sorted(env_spec.python_packages)))
|
||||
run_command(
|
||||
[
|
||||
"python",
|
||||
"-m",
|
||||
"uv",
|
||||
"pip",
|
||||
"install",
|
||||
"-p",
|
||||
str(venv_py),
|
||||
"-r",
|
||||
venv / "requirements.txt",
|
||||
],
|
||||
silent=VERBOSE_LEVEL.get() < 10,
|
||||
)
|
||||
with open(venv / "DONE", "w") as f:
|
||||
f.write("DONE")
|
||||
except Exception:
|
||||
shutil.rmtree(venv, ignore_errors=True)
|
||||
output(
|
||||
f"Failed to install dependencies to {venv}. Cleaned up.",
|
||||
style="red",
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
output(f"Successfully installed dependencies to {venv}.", style="green")
|
||||
return venv
|
||||
else:
|
||||
return venv
|
||||
|
||||
|
||||
def _ensure_venvs(env_spec_list: Iterable[VenvSpec]) -> pathlib.Path:
|
||||
last_venv = None
|
||||
for env_spec in env_spec_list:
|
||||
last_venv = _ensure_venv(env_spec, last_venv)
|
||||
assert last_venv is not None
|
||||
return last_venv
|
||||
|
||||
|
||||
def ensure_venv(bento: BentoInfo) -> pathlib.Path:
|
||||
return _ensure_venvs(_resolve_bento_env_specs(bento))
|
||||
|
||||
|
||||
def _check_venv(env_spec: VenvSpec) -> bool:
|
||||
venv = VENV_DIR / str(hash(env_spec))
|
||||
if not venv.exists():
|
||||
return False
|
||||
if venv.exists() and not (venv / "DONE").exists():
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def check_venv(bento: BentoInfo) -> bool:
|
||||
return all(_check_venv(env_spec) for env_spec in _resolve_bento_env_specs(bento))
|
||||
34
pyproject.toml
Normal file
34
pyproject.toml
Normal file
@@ -0,0 +1,34 @@
|
||||
[build-system]
|
||||
requires = ["setuptools>=42", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "openllm-next"
|
||||
version = "0.0.1"
|
||||
description = "A description of your package."
|
||||
authors = [{name = "oasiszero", email = "oasis0.com@gmail.com"}]
|
||||
license = {file = "LICENSE"}
|
||||
dependencies = [
|
||||
"bentoml",
|
||||
"typer",
|
||||
"questionary",
|
||||
"pyaml",
|
||||
"psutil",
|
||||
"pathlib",
|
||||
"pip_requirements_parser",
|
||||
"nvidia-ml-py",
|
||||
"dulwich",
|
||||
"tabulate",
|
||||
"uv",
|
||||
"openai==1.35.9",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
openllm = "openllm_next.__main__:main"
|
||||
|
||||
[tool.typer]
|
||||
src-dir = "openllm_next"
|
||||
|
||||
[tool.isort]
|
||||
multi_line_output = 3
|
||||
include_trailing_comma = true
|
||||
Reference in New Issue
Block a user