mirror of
https://github.com/bentoml/OpenLLM.git
synced 2025-12-23 23:57:46 -05:00
chore: cleanup code and env requirements
Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
This commit is contained in:
2
.github/workflows/dependabot-auto-merge.yml
vendored
2
.github/workflows/dependabot-auto-merge.yml
vendored
@@ -12,7 +12,7 @@ jobs:
|
||||
steps:
|
||||
- name: Dependabot metadata
|
||||
id: metadata
|
||||
uses: dependabot/fetch-metadata@v2.3.0
|
||||
uses: dependabot/fetch-metadata@d7267f607e9d3fb96fc2fbe83e0af444713e90b7 # ratchet:dependabot/fetch-metadata@v2.3.0
|
||||
with:
|
||||
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||
- name: Enable auto-merge for Dependabot PRs
|
||||
|
||||
35
.github/workflows/tests.yml
vendored
Normal file
35
.github/workflows/tests.yml
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
name: Run Tests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main, master]
|
||||
pull_request:
|
||||
branches: [main, master]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.9", "3.12"]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # ratchet:actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # ratchet:actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Install uv
|
||||
run: |
|
||||
pip install uv
|
||||
|
||||
- name: Install dependencies with uv
|
||||
run: |
|
||||
uv pip install -e .
|
||||
uv pip install pytest pexpect
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
pytest tests -v
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -163,3 +163,4 @@ cython_debug/
|
||||
venv/
|
||||
.envrc
|
||||
_version.py
|
||||
.cursor
|
||||
|
||||
@@ -7,7 +7,7 @@ default_language_version:
|
||||
python: python3.11 # NOTE: sync with .python-version-default
|
||||
repos:
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: "v0.11.2"
|
||||
rev: "v0.11.4"
|
||||
hooks:
|
||||
- id: ruff
|
||||
alias: r
|
||||
|
||||
15
.ruff.toml
15
.ruff.toml
@@ -1,7 +1,7 @@
|
||||
extend-include = ["*.ipynb"]
|
||||
preview = true
|
||||
line-length = 119
|
||||
indent-width = 4
|
||||
line-length = 100
|
||||
indent-width = 2
|
||||
|
||||
[format]
|
||||
preview = true
|
||||
@@ -18,21 +18,16 @@ ignore = [
|
||||
]
|
||||
select = [
|
||||
"F",
|
||||
"G", # flake8-logging-format
|
||||
"PERF", # perflint
|
||||
"RUF", # Ruff-specific rules
|
||||
"G", # flake8-logging-format
|
||||
"PERF", # perflint
|
||||
"RUF", # Ruff-specific rules
|
||||
"W6",
|
||||
"E71",
|
||||
"E72",
|
||||
"E112",
|
||||
"E113",
|
||||
# "E124",
|
||||
"E203",
|
||||
"E272",
|
||||
# "E303",
|
||||
# "E304",
|
||||
# "E501",
|
||||
# "E502",
|
||||
"E702",
|
||||
"E703",
|
||||
"E731",
|
||||
|
||||
@@ -9,30 +9,30 @@
|
||||
import subprocess, sys, pathlib, json, jinja2
|
||||
|
||||
if __name__ == '__main__':
|
||||
with (pathlib.Path('.').parent / 'README.md').open('w') as f:
|
||||
f.write(
|
||||
jinja2.Environment(loader=jinja2.FileSystemLoader('.'))
|
||||
.get_template('README.md.tpl')
|
||||
.render(
|
||||
model_dict=json.loads(
|
||||
subprocess.run(
|
||||
[
|
||||
sys.executable,
|
||||
'-m',
|
||||
'uv',
|
||||
'run',
|
||||
'--with-editable',
|
||||
'.',
|
||||
'openllm',
|
||||
'model',
|
||||
'list',
|
||||
'--output',
|
||||
'readme',
|
||||
],
|
||||
text=True,
|
||||
check=True,
|
||||
capture_output=True,
|
||||
).stdout.strip()
|
||||
)
|
||||
)
|
||||
with (pathlib.Path('.').parent / 'README.md').open('w') as f:
|
||||
f.write(
|
||||
jinja2.Environment(loader=jinja2.FileSystemLoader('.'))
|
||||
.get_template('README.md.tpl')
|
||||
.render(
|
||||
model_dict=json.loads(
|
||||
subprocess.run(
|
||||
[
|
||||
sys.executable,
|
||||
'-m',
|
||||
'uv',
|
||||
'run',
|
||||
'--with-editable',
|
||||
'.',
|
||||
'openllm',
|
||||
'model',
|
||||
'list',
|
||||
'--output',
|
||||
'readme',
|
||||
],
|
||||
text=True,
|
||||
check=True,
|
||||
capture_output=True,
|
||||
).stdout.strip()
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
@@ -43,6 +43,7 @@ dependencies = [
|
||||
"uv",
|
||||
"openai==1.70.0",
|
||||
"huggingface-hub",
|
||||
"hf-xet",
|
||||
"typing-extensions>=4.12.2",
|
||||
]
|
||||
keywords = [
|
||||
@@ -87,6 +88,12 @@ src-dir = "src/openllm"
|
||||
requires = ["hatchling==1.27.0", "hatch-vcs==0.4.0"]
|
||||
build-backend = 'hatchling.build'
|
||||
|
||||
[dependency-groups]
|
||||
tests = [
|
||||
"pexpect>=4.9.0",
|
||||
"pytest>=8.3.5",
|
||||
]
|
||||
|
||||
[tool.hatch.version]
|
||||
source = "vcs"
|
||||
fallback-version = "0.0.0"
|
||||
|
||||
@@ -14,12 +14,12 @@ from openllm.model import app as model_app, ensure_bento, list_bento
|
||||
from openllm.repo import app as repo_app, cmd_update
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from openllm.common import DeploymentTarget
|
||||
from openllm.common import DeploymentTarget
|
||||
|
||||
app = OpenLLMTyper(
|
||||
help='`openllm hello` to get started. '
|
||||
'OpenLLM is a CLI tool to manage and deploy open source LLMs and'
|
||||
' get an OpenAI API compatible chat server in seconds.'
|
||||
help='`openllm hello` to get started. '
|
||||
'OpenLLM is a CLI tool to manage and deploy open source LLMs and'
|
||||
' get an OpenAI API compatible chat server in seconds.'
|
||||
)
|
||||
|
||||
app.add_typer(repo_app, name='repo')
|
||||
@@ -28,263 +28,274 @@ app.add_typer(clean_app, name='clean')
|
||||
|
||||
|
||||
def _select_bento_name(models: list[BentoInfo], target: DeploymentTarget) -> tuple[str, str]:
|
||||
from tabulate import tabulate
|
||||
from tabulate import tabulate
|
||||
|
||||
model_infos = [(model.repo.name, model.name, can_run(model, target)) for model in models]
|
||||
model_name_groups: defaultdict[tuple[str, str], float] = defaultdict(lambda: 0.0)
|
||||
for repo, name, score in model_infos:
|
||||
model_name_groups[repo, name] += score
|
||||
table_data = [(name, repo, CHECKED if score > 0 else '') for (repo, name), score in model_name_groups.items()]
|
||||
if not table_data:
|
||||
output('No model found', style='red')
|
||||
raise typer.Exit(1)
|
||||
table: list[str] = tabulate(table_data, headers=['model', 'repo', 'locally runnable']).split('\n')
|
||||
model_infos = [(model.repo.name, model.name, can_run(model, target)) for model in models]
|
||||
model_name_groups: defaultdict[tuple[str, str], float] = defaultdict(lambda: 0.0)
|
||||
for repo, name, score in model_infos:
|
||||
model_name_groups[repo, name] += score
|
||||
table_data = [
|
||||
(name, repo, CHECKED if score > 0 else '') for (repo, name), score in model_name_groups.items()
|
||||
]
|
||||
if not table_data:
|
||||
output('No model found', style='red')
|
||||
raise typer.Exit(1)
|
||||
table: list[str] = tabulate(table_data, headers=['model', 'repo', 'locally runnable']).split('\n')
|
||||
|
||||
selected: tuple[str, str] | None = questionary.select(
|
||||
'Select a model',
|
||||
[
|
||||
questionary.Separator(f'{table[0]}\n {table[1]}'),
|
||||
*[questionary.Choice(line, value=value[:2]) for value, line in zip(table_data, table[2:])],
|
||||
],
|
||||
).ask()
|
||||
if selected is None:
|
||||
raise typer.Exit(1)
|
||||
return selected
|
||||
selected: tuple[str, str] | None = questionary.select(
|
||||
'Select a model',
|
||||
[
|
||||
questionary.Separator(f'{table[0]}\n {table[1]}'),
|
||||
*[questionary.Choice(line, value=value[:2]) for value, line in zip(table_data, table[2:])],
|
||||
],
|
||||
).ask()
|
||||
if selected is None:
|
||||
raise typer.Exit(1)
|
||||
return selected
|
||||
|
||||
|
||||
def _select_bento_version(
|
||||
models: list[BentoInfo], target: DeploymentTarget | None, bento_name: str, repo: str
|
||||
models: list[BentoInfo], target: DeploymentTarget | None, bento_name: str, repo: str
|
||||
) -> tuple[BentoInfo, float]:
|
||||
from tabulate import tabulate
|
||||
from tabulate import tabulate
|
||||
|
||||
model_infos: list[tuple[BentoInfo, float]] = [
|
||||
(model, can_run(model, target)) for model in models if model.name == bento_name and model.repo.name == repo
|
||||
]
|
||||
model_infos: list[tuple[BentoInfo, float]] = [
|
||||
(model, can_run(model, target))
|
||||
for model in models
|
||||
if model.name == bento_name and model.repo.name == repo
|
||||
]
|
||||
|
||||
table_data = [
|
||||
[model.tag, CHECKED if score > 0 else '']
|
||||
for model, score in model_infos
|
||||
if model.name == bento_name and model.repo.name == repo
|
||||
]
|
||||
if not table_data:
|
||||
output(f'No model found for {bento_name} in {repo}', style='red')
|
||||
raise typer.Exit(1)
|
||||
table: list[str] = tabulate(table_data, headers=['version', 'locally runnable']).split('\n')
|
||||
table_data = [
|
||||
[model.tag, CHECKED if score > 0 else '']
|
||||
for model, score in model_infos
|
||||
if model.name == bento_name and model.repo.name == repo
|
||||
]
|
||||
if not table_data:
|
||||
output(f'No model found for {bento_name} in {repo}', style='red')
|
||||
raise typer.Exit(1)
|
||||
table: list[str] = tabulate(table_data, headers=['version', 'locally runnable']).split('\n')
|
||||
|
||||
selected: tuple[BentoInfo, float] | None = questionary.select(
|
||||
'Select a version',
|
||||
[
|
||||
questionary.Separator(f'{table[0]}\n {table[1]}'),
|
||||
*[questionary.Choice(line, value=value[:2]) for value, line in zip(model_infos, table[2:])],
|
||||
],
|
||||
).ask()
|
||||
if selected is None:
|
||||
raise typer.Exit(1)
|
||||
return selected
|
||||
selected: tuple[BentoInfo, float] | None = questionary.select(
|
||||
'Select a version',
|
||||
[
|
||||
questionary.Separator(f'{table[0]}\n {table[1]}'),
|
||||
*[questionary.Choice(line, value=value[:2]) for value, line in zip(model_infos, table[2:])],
|
||||
],
|
||||
).ask()
|
||||
if selected is None:
|
||||
raise typer.Exit(1)
|
||||
return selected
|
||||
|
||||
|
||||
def _select_target(bento: BentoInfo, targets: list[DeploymentTarget]) -> DeploymentTarget:
|
||||
from tabulate import tabulate
|
||||
from tabulate import tabulate
|
||||
|
||||
targets.sort(key=lambda x: can_run(bento, x), reverse=True)
|
||||
if not targets:
|
||||
output('No available instance type, check your bentocloud account', style='red')
|
||||
raise typer.Exit(1)
|
||||
targets.sort(key=lambda x: can_run(bento, x), reverse=True)
|
||||
if not targets:
|
||||
output('No available instance type, check your bentocloud account', style='red')
|
||||
raise typer.Exit(1)
|
||||
|
||||
table = tabulate(
|
||||
[
|
||||
[
|
||||
target.name,
|
||||
target.accelerators_repr,
|
||||
f'${target.price}',
|
||||
CHECKED if can_run(bento, target) else 'insufficient res.',
|
||||
]
|
||||
for target in targets
|
||||
],
|
||||
headers=['instance type', 'accelerator', 'price/hr', 'deployable'],
|
||||
).split('\n')
|
||||
table = tabulate(
|
||||
[
|
||||
[
|
||||
target.name,
|
||||
target.accelerators_repr,
|
||||
f'${target.price}',
|
||||
CHECKED if can_run(bento, target) else 'insufficient res.',
|
||||
]
|
||||
for target in targets
|
||||
],
|
||||
headers=['instance type', 'accelerator', 'price/hr', 'deployable'],
|
||||
).split('\n')
|
||||
|
||||
selected: DeploymentTarget | None = questionary.select(
|
||||
'Select an instance type',
|
||||
[
|
||||
questionary.Separator(f'{table[0]}\n {table[1]}'),
|
||||
*[questionary.Choice(f'{line}', value=target) for target, line in zip(targets, table[2:])],
|
||||
],
|
||||
).ask()
|
||||
if selected is None:
|
||||
raise typer.Exit(1)
|
||||
return selected
|
||||
selected: DeploymentTarget | None = questionary.select(
|
||||
'Select an instance type',
|
||||
[
|
||||
questionary.Separator(f'{table[0]}\n {table[1]}'),
|
||||
*[questionary.Choice(f'{line}', value=target) for target, line in zip(targets, table[2:])],
|
||||
],
|
||||
).ask()
|
||||
if selected is None:
|
||||
raise typer.Exit(1)
|
||||
return selected
|
||||
|
||||
|
||||
def _select_action(bento: BentoInfo, score: float) -> None:
|
||||
if score > 0:
|
||||
options: list[typing.Any] = [
|
||||
questionary.Separator('Available actions'),
|
||||
questionary.Choice('0. Run the model in terminal', value='run', shortcut_key='0'),
|
||||
questionary.Separator(f' $ openllm run {bento}'),
|
||||
questionary.Separator(' '),
|
||||
questionary.Choice('1. Serve the model locally and get a chat server', value='serve', shortcut_key='1'),
|
||||
questionary.Separator(f' $ openllm serve {bento}'),
|
||||
questionary.Separator(' '),
|
||||
questionary.Choice(
|
||||
'2. Deploy the model to bentocloud and get a scalable chat server', value='deploy', shortcut_key='2'
|
||||
),
|
||||
questionary.Separator(f' $ openllm deploy {bento}'),
|
||||
]
|
||||
else:
|
||||
options = [
|
||||
questionary.Separator('Available actions'),
|
||||
questionary.Choice(
|
||||
'0. Run the model in terminal', value='run', disabled='insufficient res.', shortcut_key='0'
|
||||
),
|
||||
questionary.Separator(f' $ openllm run {bento}'),
|
||||
questionary.Separator(' '),
|
||||
questionary.Choice(
|
||||
'1. Serve the model locally and get a chat server',
|
||||
value='serve',
|
||||
disabled='insufficient res.',
|
||||
shortcut_key='1',
|
||||
),
|
||||
questionary.Separator(f' $ openllm serve {bento}'),
|
||||
questionary.Separator(' '),
|
||||
questionary.Choice(
|
||||
'2. Deploy the model to bentocloud and get a scalable chat server', value='deploy', shortcut_key='2'
|
||||
),
|
||||
questionary.Separator(f' $ openllm deploy {bento}'),
|
||||
]
|
||||
action: str | None = questionary.select('Select an action', options).ask()
|
||||
if action is None:
|
||||
raise typer.Exit(1)
|
||||
if action == 'run':
|
||||
try:
|
||||
port = random.randint(30000, 40000)
|
||||
local_run(bento, port=port)
|
||||
finally:
|
||||
output('\nUse this command to run the action again:', style='green')
|
||||
output(f' $ openllm run {bento}', style='orange')
|
||||
elif action == 'serve':
|
||||
try:
|
||||
local_serve(bento)
|
||||
finally:
|
||||
output('\nUse this command to run the action again:', style='green')
|
||||
output(f' $ openllm serve {bento}', style='orange')
|
||||
elif action == 'deploy':
|
||||
ensure_cloud_context()
|
||||
targets = get_cloud_machine_spec()
|
||||
target = _select_target(bento, targets)
|
||||
try:
|
||||
cloud_deploy(bento, target)
|
||||
finally:
|
||||
output('\nUse this command to run the action again:', style='green')
|
||||
output(f' $ openllm deploy {bento} --instance-type {target.name}', style='orange')
|
||||
if score > 0:
|
||||
options: list[typing.Any] = [
|
||||
questionary.Separator('Available actions'),
|
||||
questionary.Choice('0. Run the model in terminal', value='run', shortcut_key='0'),
|
||||
questionary.Separator(f' $ openllm run {bento}'),
|
||||
questionary.Separator(' '),
|
||||
questionary.Choice(
|
||||
'1. Serve the model locally and get a chat server', value='serve', shortcut_key='1'
|
||||
),
|
||||
questionary.Separator(f' $ openllm serve {bento}'),
|
||||
questionary.Separator(' '),
|
||||
questionary.Choice(
|
||||
'2. Deploy the model to bentocloud and get a scalable chat server',
|
||||
value='deploy',
|
||||
shortcut_key='2',
|
||||
),
|
||||
questionary.Separator(f' $ openllm deploy {bento}'),
|
||||
]
|
||||
else:
|
||||
options = [
|
||||
questionary.Separator('Available actions'),
|
||||
questionary.Choice(
|
||||
'0. Run the model in terminal', value='run', disabled='insufficient res.', shortcut_key='0'
|
||||
),
|
||||
questionary.Separator(f' $ openllm run {bento}'),
|
||||
questionary.Separator(' '),
|
||||
questionary.Choice(
|
||||
'1. Serve the model locally and get a chat server',
|
||||
value='serve',
|
||||
disabled='insufficient res.',
|
||||
shortcut_key='1',
|
||||
),
|
||||
questionary.Separator(f' $ openllm serve {bento}'),
|
||||
questionary.Separator(' '),
|
||||
questionary.Choice(
|
||||
'2. Deploy the model to bentocloud and get a scalable chat server',
|
||||
value='deploy',
|
||||
shortcut_key='2',
|
||||
),
|
||||
questionary.Separator(f' $ openllm deploy {bento}'),
|
||||
]
|
||||
action: str | None = questionary.select('Select an action', options).ask()
|
||||
if action is None:
|
||||
raise typer.Exit(1)
|
||||
if action == 'run':
|
||||
try:
|
||||
port = random.randint(30000, 40000)
|
||||
local_run(bento, port=port)
|
||||
finally:
|
||||
output('\nUse this command to run the action again:', style='green')
|
||||
output(f' $ openllm run {bento}', style='orange')
|
||||
elif action == 'serve':
|
||||
try:
|
||||
local_serve(bento)
|
||||
finally:
|
||||
output('\nUse this command to run the action again:', style='green')
|
||||
output(f' $ openllm serve {bento}', style='orange')
|
||||
elif action == 'deploy':
|
||||
ensure_cloud_context()
|
||||
targets = get_cloud_machine_spec()
|
||||
target = _select_target(bento, targets)
|
||||
try:
|
||||
cloud_deploy(bento, target)
|
||||
finally:
|
||||
output('\nUse this command to run the action again:', style='green')
|
||||
output(f' $ openllm deploy {bento} --instance-type {target.name}', style='orange')
|
||||
|
||||
|
||||
@app.command(help='get started interactively')
|
||||
def hello() -> None:
|
||||
INTERACTIVE.set(True)
|
||||
def hello(repo: typing.Optional[str] = None) -> None:
|
||||
cmd_update()
|
||||
INTERACTIVE.set(True)
|
||||
|
||||
target = get_local_machine_spec()
|
||||
output(f' Detected Platform: {target.platform}', style='green')
|
||||
if target.accelerators:
|
||||
output(' Detected Accelerators: ', style='green')
|
||||
for a in target.accelerators:
|
||||
output(f' - {a.model} {a.memory_size}GB', style='green')
|
||||
else:
|
||||
output(' Detected Accelerators: None', style='yellow')
|
||||
target = get_local_machine_spec()
|
||||
output(f' Detected Platform: {target.platform}', style='green')
|
||||
if target.accelerators:
|
||||
output(' Detected Accelerators: ', style='green')
|
||||
for a in target.accelerators:
|
||||
output(f' - {a.model} {a.memory_size}GB', style='green')
|
||||
else:
|
||||
output(' Detected Accelerators: None', style='green')
|
||||
|
||||
models = list_bento()
|
||||
if not models:
|
||||
output('No model found, you probably need to update the model repo:', style='red')
|
||||
output(' $ openllm repo update', style='orange')
|
||||
raise typer.Exit(1)
|
||||
models = list_bento(repo_name=repo)
|
||||
if not models:
|
||||
output('No model found, you probably need to update the model repo:', style='red')
|
||||
output(' $ openllm repo update', style='orange')
|
||||
raise typer.Exit(1)
|
||||
|
||||
bento_name, repo = _select_bento_name(models, target)
|
||||
bento, score = _select_bento_version(models, target, bento_name, repo)
|
||||
_select_action(bento, score)
|
||||
bento_name, repo = _select_bento_name(models, target)
|
||||
bento, score = _select_bento_version(models, target, bento_name, repo)
|
||||
_select_action(bento, score)
|
||||
|
||||
|
||||
@app.command(help='start an OpenAI API compatible chat server and chat in browser')
|
||||
def serve(
|
||||
model: typing.Annotated[str, typer.Argument()] = '',
|
||||
repo: typing.Optional[str] = None,
|
||||
port: int = 3000,
|
||||
verbose: bool = False,
|
||||
model: typing.Annotated[str, typer.Argument()] = '',
|
||||
repo: typing.Optional[str] = None,
|
||||
port: int = 3000,
|
||||
verbose: bool = False,
|
||||
) -> None:
|
||||
cmd_update()
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
target = get_local_machine_spec()
|
||||
bento = ensure_bento(model, target=target, repo_name=repo)
|
||||
local_serve(bento, port=port)
|
||||
cmd_update()
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
target = get_local_machine_spec()
|
||||
bento = ensure_bento(model, target=target, repo_name=repo)
|
||||
local_serve(bento, port=port)
|
||||
|
||||
|
||||
@app.command(help='run the model and chat in terminal')
|
||||
def run(
|
||||
model: typing.Annotated[str, typer.Argument()] = '',
|
||||
repo: typing.Optional[str] = None,
|
||||
port: typing.Optional[int] = None,
|
||||
timeout: int = 600,
|
||||
verbose: bool = False,
|
||||
model: typing.Annotated[str, typer.Argument()] = '',
|
||||
repo: typing.Optional[str] = None,
|
||||
port: typing.Optional[int] = None,
|
||||
timeout: int = 600,
|
||||
verbose: bool = False,
|
||||
) -> None:
|
||||
cmd_update()
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
target = get_local_machine_spec()
|
||||
bento = ensure_bento(model, target=target, repo_name=repo)
|
||||
if port is None:
|
||||
port = random.randint(30000, 40000)
|
||||
local_run(bento, port=port, timeout=timeout)
|
||||
cmd_update()
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
target = get_local_machine_spec()
|
||||
bento = ensure_bento(model, target=target, repo_name=repo)
|
||||
if port is None:
|
||||
port = random.randint(30000, 40000)
|
||||
local_run(bento, port=port, timeout=timeout)
|
||||
|
||||
|
||||
@app.command(help='deploy production-ready OpenAI API-compatible server to BentoCloud')
|
||||
def deploy(
|
||||
model: typing.Annotated[str, typer.Argument()] = '',
|
||||
instance_type: typing.Optional[str] = None,
|
||||
repo: typing.Optional[str] = None,
|
||||
verbose: bool = False,
|
||||
env: typing.Optional[list[str]] = typer.Option(
|
||||
None,
|
||||
'--env',
|
||||
help='Environment variables to pass to the deployment command. Format: NAME or NAME=value. Can be specified multiple times.',
|
||||
),
|
||||
model: typing.Annotated[str, typer.Argument()] = '',
|
||||
instance_type: typing.Optional[str] = None,
|
||||
repo: typing.Optional[str] = None,
|
||||
verbose: bool = False,
|
||||
env: typing.Optional[list[str]] = typer.Option(
|
||||
None,
|
||||
'--env',
|
||||
help='Environment variables to pass to the deployment command. Format: NAME or NAME=value. Can be specified multiple times.',
|
||||
),
|
||||
) -> None:
|
||||
cmd_update()
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
bento = ensure_bento(model, repo_name=repo)
|
||||
if instance_type is not None:
|
||||
return cloud_deploy(bento, DeploymentTarget(accelerators=[], name=instance_type), cli_envs=env)
|
||||
targets = sorted(
|
||||
filter(lambda x: can_run(bento, x) > 0, get_cloud_machine_spec()),
|
||||
key=lambda x: can_run(bento, x),
|
||||
reverse=True,
|
||||
)
|
||||
if not targets:
|
||||
output('No available instance type, check your bentocloud account', style='red')
|
||||
raise typer.Exit(1)
|
||||
target = targets[0]
|
||||
output(f'Recommended instance type: {target.name}', style='green')
|
||||
cloud_deploy(bento, target, cli_envs=env)
|
||||
cmd_update()
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
bento = ensure_bento(model, repo_name=repo)
|
||||
if instance_type is not None:
|
||||
return cloud_deploy(bento, DeploymentTarget(accelerators=[], name=instance_type), cli_envs=env)
|
||||
targets = sorted(
|
||||
filter(lambda x: can_run(bento, x) > 0, get_cloud_machine_spec()),
|
||||
key=lambda x: can_run(bento, x),
|
||||
reverse=True,
|
||||
)
|
||||
if not targets:
|
||||
output('No available instance type, check your bentocloud account', style='red')
|
||||
raise typer.Exit(1)
|
||||
target = targets[0]
|
||||
output(f'Recommended instance type: {target.name}', style='green')
|
||||
cloud_deploy(bento, target, cli_envs=env)
|
||||
|
||||
|
||||
@app.callback(invoke_without_command=True)
|
||||
def typer_callback(
|
||||
verbose: int = 0,
|
||||
do_not_track: bool = typer.Option(
|
||||
False, '--do-not-track', help='Whether to disable usage tracking', envvar=DO_NOT_TRACK
|
||||
),
|
||||
version: bool = typer.Option(False, '--version', '-v', help='Show version'),
|
||||
verbose: int = 0,
|
||||
do_not_track: bool = typer.Option(
|
||||
False, '--do-not-track', help='Whether to disable usage tracking', envvar=DO_NOT_TRACK
|
||||
),
|
||||
version: bool = typer.Option(False, '--version', '-v', help='Show version'),
|
||||
) -> None:
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(verbose)
|
||||
if version:
|
||||
output(
|
||||
f'openllm, {importlib.metadata.version("openllm")}\nPython ({platform.python_implementation()}) {platform.python_version()}'
|
||||
)
|
||||
sys.exit(0)
|
||||
if do_not_track:
|
||||
os.environ[DO_NOT_TRACK] = str(True)
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(verbose)
|
||||
if version:
|
||||
output(
|
||||
f'openllm, {importlib.metadata.version("openllm")}\nPython ({platform.python_implementation()}) {platform.python_version()}'
|
||||
)
|
||||
sys.exit(0)
|
||||
if do_not_track:
|
||||
os.environ[DO_NOT_TRACK] = str(True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app()
|
||||
app()
|
||||
|
||||
@@ -9,129 +9,141 @@ from openllm.common import BentoInfo, DeploymentTarget, output, Accelerator
|
||||
|
||||
|
||||
def parse_memory_string(v: typing.Any) -> typing.Any:
|
||||
"""Parse memory strings like "60Gi" into float."""
|
||||
if isinstance(v, str):
|
||||
match = re.match(r'(\d+(\.\d+)?)\s*Gi$', v, re.IGNORECASE)
|
||||
if match:
|
||||
return float(match.group(1))
|
||||
# Pass other types (including numbers or other strings for standard float conversion) through
|
||||
return v
|
||||
"""Parse memory strings like "60Gi" into float."""
|
||||
if isinstance(v, str):
|
||||
match = re.match(r'(\d+(\.\d+)?)\s*Gi$', v, re.IGNORECASE)
|
||||
if match:
|
||||
return float(match.group(1))
|
||||
# Pass other types (including numbers or other strings for standard float conversion) through
|
||||
return v
|
||||
|
||||
|
||||
class Resource(pydantic.BaseModel):
|
||||
memory: typing.Annotated[float, BeforeValidator(parse_memory_string)] = 0.0
|
||||
cpu: int = 0
|
||||
gpu: int = 0
|
||||
gpu_type: str = ''
|
||||
memory: typing.Annotated[float, BeforeValidator(parse_memory_string)] = 0.0
|
||||
cpu: int = 0
|
||||
gpu: int = 0
|
||||
gpu_type: str = ''
|
||||
|
||||
@override
|
||||
def __hash__(self) -> int:
|
||||
return hash((self.cpu, self.memory, self.gpu, self.gpu_type))
|
||||
@override
|
||||
def __hash__(self) -> int:
|
||||
return hash((self.cpu, self.memory, self.gpu, self.gpu_type))
|
||||
|
||||
def __bool__(self) -> bool:
|
||||
return any(value is not None for value in self.__dict__.values())
|
||||
def __bool__(self) -> bool:
|
||||
return any(value is not None for value in self.__dict__.values())
|
||||
|
||||
|
||||
ACCELERATOR_SPECS: dict[str, Accelerator] = {
|
||||
'nvidia-gtx-1650': Accelerator(model='GTX 1650', memory_size=4.0),
|
||||
'nvidia-gtx-1060': Accelerator(model='GTX 1060', memory_size=6.0),
|
||||
'nvidia-gtx-1080-ti': Accelerator(model='GTX 1080 Ti', memory_size=11.0),
|
||||
'nvidia-rtx-3060': Accelerator(model='RTX 3060', memory_size=12.0),
|
||||
'nvidia-rtx-3060-ti': Accelerator(model='RTX 3060 Ti', memory_size=8.0),
|
||||
'nvidia-rtx-3070-ti': Accelerator(model='RTX 3070 Ti', memory_size=8.0),
|
||||
'nvidia-rtx-3080': Accelerator(model='RTX 3080', memory_size=10.0),
|
||||
'nvidia-rtx-3080-ti': Accelerator(model='RTX 3080 Ti', memory_size=12.0),
|
||||
'nvidia-rtx-3090': Accelerator(model='RTX 3090', memory_size=24.0),
|
||||
'nvidia-rtx-4070-ti': Accelerator(model='RTX 4070 Ti', memory_size=12.0),
|
||||
'nvidia-tesla-p4': Accelerator(model='P4', memory_size=8.0),
|
||||
'nvidia-tesla-p100': Accelerator(model='P100', memory_size=16.0),
|
||||
'nvidia-tesla-k80': Accelerator(model='K80', memory_size=12.0),
|
||||
'nvidia-tesla-t4': Accelerator(model='T4', memory_size=16.0),
|
||||
'nvidia-tesla-v100': Accelerator(model='V100', memory_size=16.0),
|
||||
'nvidia-l4': Accelerator(model='L4', memory_size=24.0),
|
||||
'nvidia-tesla-l4': Accelerator(model='L4', memory_size=24.0),
|
||||
'nvidia-tesla-a10g': Accelerator(model='A10G', memory_size=24.0),
|
||||
'nvidia-a100-80g': Accelerator(model='A100', memory_size=80.0),
|
||||
'nvidia-a100-80gb': Accelerator(model='A100', memory_size=80.0),
|
||||
'nvidia-tesla-a100': Accelerator(model='A100', memory_size=40.0),
|
||||
'nvidia-gtx-1650': Accelerator(model='GTX 1650', memory_size=4.0),
|
||||
'nvidia-gtx-1060': Accelerator(model='GTX 1060', memory_size=6.0),
|
||||
'nvidia-gtx-1080-ti': Accelerator(model='GTX 1080 Ti', memory_size=11.0),
|
||||
'nvidia-rtx-3060': Accelerator(model='RTX 3060', memory_size=12.0),
|
||||
'nvidia-rtx-3060-ti': Accelerator(model='RTX 3060 Ti', memory_size=8.0),
|
||||
'nvidia-rtx-3070-ti': Accelerator(model='RTX 3070 Ti', memory_size=8.0),
|
||||
'nvidia-rtx-3080': Accelerator(model='RTX 3080', memory_size=10.0),
|
||||
'nvidia-rtx-3080-ti': Accelerator(model='RTX 3080 Ti', memory_size=12.0),
|
||||
'nvidia-rtx-3090': Accelerator(model='RTX 3090', memory_size=24.0),
|
||||
'nvidia-rtx-4070-ti': Accelerator(model='RTX 4070 Ti', memory_size=12.0),
|
||||
'nvidia-tesla-p4': Accelerator(model='P4', memory_size=8.0),
|
||||
'nvidia-tesla-p100': Accelerator(model='P100', memory_size=16.0),
|
||||
'nvidia-tesla-k80': Accelerator(model='K80', memory_size=12.0),
|
||||
'nvidia-tesla-t4': Accelerator(model='T4', memory_size=16.0),
|
||||
'nvidia-tesla-v100': Accelerator(model='V100', memory_size=16.0),
|
||||
'nvidia-l4': Accelerator(model='L4', memory_size=24.0),
|
||||
'nvidia-tesla-l4': Accelerator(model='L4', memory_size=24.0),
|
||||
'nvidia-tesla-a10g': Accelerator(model='A10G', memory_size=24.0),
|
||||
'nvidia-a100-80g': Accelerator(model='A100', memory_size=80.0),
|
||||
'nvidia-a100-80gb': Accelerator(model='A100', memory_size=80.0),
|
||||
'nvidia-tesla-a100': Accelerator(model='A100', memory_size=40.0),
|
||||
'nvidia-tesla-h100': Accelerator(model='H100', memory_size=80.0),
|
||||
'nvidia-h200-141gb': Accelerator(model='H200', memory_size=141.0),
|
||||
'nvidia-blackwell-b100': Accelerator(model='B100', memory_size=192.0),
|
||||
'nvidia-blackwell-gb200': Accelerator(model='GB200', memory_size=192.0),
|
||||
}
|
||||
|
||||
|
||||
@functools.lru_cache
|
||||
def get_local_machine_spec() -> DeploymentTarget:
|
||||
if psutil.MACOS:
|
||||
return DeploymentTarget(accelerators=[], source='local', platform='macos')
|
||||
if psutil.MACOS:
|
||||
return DeploymentTarget(accelerators=[], source='local', platform='macos')
|
||||
|
||||
if psutil.WINDOWS:
|
||||
platform = 'windows'
|
||||
elif psutil.LINUX:
|
||||
platform = 'linux'
|
||||
else:
|
||||
raise NotImplementedError('Unsupported platform')
|
||||
if psutil.WINDOWS:
|
||||
platform = 'windows'
|
||||
elif psutil.LINUX:
|
||||
platform = 'linux'
|
||||
else:
|
||||
raise NotImplementedError('Unsupported platform')
|
||||
|
||||
from pynvml import (
|
||||
nvmlDeviceGetCount,
|
||||
nvmlDeviceGetCudaComputeCapability,
|
||||
nvmlDeviceGetHandleByIndex,
|
||||
nvmlDeviceGetMemoryInfo,
|
||||
nvmlDeviceGetName,
|
||||
nvmlInit,
|
||||
nvmlShutdown,
|
||||
)
|
||||
from pynvml import (
|
||||
nvmlDeviceGetCount,
|
||||
nvmlDeviceGetCudaComputeCapability,
|
||||
nvmlDeviceGetHandleByIndex,
|
||||
nvmlDeviceGetMemoryInfo,
|
||||
nvmlDeviceGetName,
|
||||
nvmlInit,
|
||||
nvmlShutdown,
|
||||
)
|
||||
|
||||
try:
|
||||
nvmlInit()
|
||||
device_count = nvmlDeviceGetCount()
|
||||
accelerators: list[Accelerator] = []
|
||||
for i in range(device_count):
|
||||
handle = nvmlDeviceGetHandleByIndex(i)
|
||||
name = nvmlDeviceGetName(handle)
|
||||
memory_info = nvmlDeviceGetMemoryInfo(handle)
|
||||
accelerators.append(Accelerator(model=name, memory_size=math.ceil(int(memory_info.total) / 1024**3)))
|
||||
compute_capability = nvmlDeviceGetCudaComputeCapability(handle)
|
||||
if compute_capability < (7, 5):
|
||||
output(
|
||||
f'GPU {name} with compute capability {compute_capability} '
|
||||
'may not be supported, 7.5 or higher is recommended. check '
|
||||
'https://developer.nvidia.com/cuda-gpus for more information',
|
||||
style='yellow',
|
||||
)
|
||||
nvmlShutdown()
|
||||
return DeploymentTarget(accelerators=accelerators, source='local', platform=platform)
|
||||
except Exception as e:
|
||||
try:
|
||||
nvmlInit()
|
||||
device_count = nvmlDeviceGetCount()
|
||||
accelerators: list[Accelerator] = []
|
||||
for i in range(device_count):
|
||||
handle = nvmlDeviceGetHandleByIndex(i)
|
||||
name = nvmlDeviceGetName(handle)
|
||||
memory_info = nvmlDeviceGetMemoryInfo(handle)
|
||||
accelerators.append(
|
||||
Accelerator(model=name, memory_size=math.ceil(int(memory_info.total) / 1024**3))
|
||||
)
|
||||
compute_capability = nvmlDeviceGetCudaComputeCapability(handle)
|
||||
if compute_capability < (7, 5):
|
||||
output(
|
||||
'Failed to get local GPU info. Ensure nvidia driver is installed to enable local GPU deployment',
|
||||
style='yellow',
|
||||
f'GPU {name} with compute capability {compute_capability} '
|
||||
'may not be supported, 7.5 or higher is recommended. check '
|
||||
'https://developer.nvidia.com/cuda-gpus for more information',
|
||||
style='yellow',
|
||||
)
|
||||
output(f'Error: {e}', style='red', level=20)
|
||||
return DeploymentTarget(accelerators=[], source='local', platform=platform)
|
||||
nvmlShutdown()
|
||||
return DeploymentTarget(accelerators=accelerators, source='local', platform=platform)
|
||||
except Exception as e:
|
||||
output(
|
||||
'Failed to get local GPU info. Ensure nvidia driver is installed to enable local GPU deployment',
|
||||
style='yellow',
|
||||
)
|
||||
output(f'Error: {e}', style='red', level=20)
|
||||
return DeploymentTarget(accelerators=[], source='local', platform=platform)
|
||||
|
||||
|
||||
@functools.lru_cache(typed=True)
|
||||
def can_run(bento: BentoInfo, target: DeploymentTarget | None = None) -> float:
|
||||
"""
|
||||
Calculate if the bento can be deployed on the target.
|
||||
"""
|
||||
if target is None:
|
||||
target = get_local_machine_spec()
|
||||
"""
|
||||
Calculate if the bento can be deployed on the target.
|
||||
"""
|
||||
if target is None:
|
||||
target = get_local_machine_spec()
|
||||
|
||||
resource_spec = Resource(**(bento.bento_yaml['services'][0]['config'].get('resources', {})))
|
||||
labels = bento.bento_yaml.get('labels', {})
|
||||
platforms = labels.get('platforms', 'linux').split(',')
|
||||
resource_spec = Resource(**(bento.bento_yaml['services'][0]['config'].get('resources', {})))
|
||||
labels = bento.bento_yaml.get('labels', {})
|
||||
platforms = labels.get('platforms', 'linux').split(',')
|
||||
|
||||
if target.platform not in platforms:
|
||||
return 0.0
|
||||
if target.platform not in platforms:
|
||||
return 0.0
|
||||
|
||||
# return 1.0 if no resource is specified
|
||||
if not resource_spec:
|
||||
return 0.5
|
||||
# return 1.0 if no resource is specified
|
||||
if not resource_spec:
|
||||
return 0.5
|
||||
|
||||
if resource_spec.gpu > 0:
|
||||
required_gpu = ACCELERATOR_SPECS[resource_spec.gpu_type]
|
||||
filtered_accelerators = [ac for ac in target.accelerators if ac.memory_size >= required_gpu.memory_size]
|
||||
if resource_spec.gpu > len(filtered_accelerators):
|
||||
return 0.0
|
||||
return required_gpu.memory_size * resource_spec.gpu / sum(ac.memory_size for ac in target.accelerators)
|
||||
if target.accelerators:
|
||||
return 0.01 / sum(ac.memory_size for ac in target.accelerators)
|
||||
return 1.0
|
||||
if resource_spec.gpu > 0:
|
||||
required_gpu = ACCELERATOR_SPECS[resource_spec.gpu_type]
|
||||
filtered_accelerators = [
|
||||
ac for ac in target.accelerators if ac.memory_size >= required_gpu.memory_size
|
||||
]
|
||||
if resource_spec.gpu > len(filtered_accelerators):
|
||||
return 0.0
|
||||
return (
|
||||
required_gpu.memory_size
|
||||
* resource_spec.gpu
|
||||
/ sum(ac.memory_size for ac in target.accelerators)
|
||||
)
|
||||
if target.accelerators:
|
||||
return 0.01 / sum(ac.memory_size for ac in target.accelerators)
|
||||
return 1.0
|
||||
|
||||
@@ -7,99 +7,99 @@ DO_NOT_TRACK = 'BENTOML_DO_NOT_TRACK'
|
||||
|
||||
|
||||
class EventMeta(abc.ABC):
|
||||
@property
|
||||
def event_name(self) -> str:
|
||||
# camel case to snake case
|
||||
event_name = re.sub(r'(?<!^)(?=[A-Z])', '_', self.__class__.__name__).lower()
|
||||
# remove "_event" suffix
|
||||
suffix_to_remove = '_event'
|
||||
if event_name.endswith(suffix_to_remove):
|
||||
event_name = event_name[: -len(suffix_to_remove)]
|
||||
return event_name
|
||||
@property
|
||||
def event_name(self) -> str:
|
||||
# camel case to snake case
|
||||
event_name = re.sub(r'(?<!^)(?=[A-Z])', '_', self.__class__.__name__).lower()
|
||||
# remove "_event" suffix
|
||||
suffix_to_remove = '_event'
|
||||
if event_name.endswith(suffix_to_remove):
|
||||
event_name = event_name[: -len(suffix_to_remove)]
|
||||
return event_name
|
||||
|
||||
|
||||
@attr.define
|
||||
class CliEvent(EventMeta):
|
||||
cmd_group: str
|
||||
cmd_name: str
|
||||
duration_in_ms: float = attr.field(default=0)
|
||||
error_type: typing.Optional[str] = attr.field(default=None)
|
||||
return_code: typing.Optional[int] = attr.field(default=None)
|
||||
cmd_group: str
|
||||
cmd_name: str
|
||||
duration_in_ms: float = attr.field(default=0)
|
||||
error_type: typing.Optional[str] = attr.field(default=None)
|
||||
return_code: typing.Optional[int] = attr.field(default=None)
|
||||
|
||||
|
||||
@attr.define
|
||||
class OpenllmCliEvent(CliEvent):
|
||||
pass
|
||||
pass
|
||||
|
||||
|
||||
class OrderedCommands(typer.core.TyperGroup):
|
||||
def list_commands(self, ctx: click.Context) -> list[str]:
|
||||
return list(self.commands)
|
||||
def list_commands(self, ctx: click.Context) -> list[str]:
|
||||
return list(self.commands)
|
||||
|
||||
|
||||
class OpenLLMTyper(typer.Typer):
|
||||
def __init__(self, *args: typing.Any, **kwargs: typing.Any):
|
||||
no_args_is_help: bool = kwargs.pop('no_args_is_help', True)
|
||||
context_settings: dict[str, typing.Any] = kwargs.pop('context_settings', {})
|
||||
if 'help_option_names' not in context_settings:
|
||||
context_settings['help_option_names'] = ('-h', '--help')
|
||||
if 'max_content_width' not in context_settings:
|
||||
context_settings['max_content_width'] = int(os.environ.get('COLUMNS', str(120)))
|
||||
klass = kwargs.pop('cls', OrderedCommands)
|
||||
def __init__(self, *args: typing.Any, **kwargs: typing.Any):
|
||||
no_args_is_help: bool = kwargs.pop('no_args_is_help', True)
|
||||
context_settings: dict[str, typing.Any] = kwargs.pop('context_settings', {})
|
||||
if 'help_option_names' not in context_settings:
|
||||
context_settings['help_option_names'] = ('-h', '--help')
|
||||
if 'max_content_width' not in context_settings:
|
||||
context_settings['max_content_width'] = int(os.environ.get('COLUMNS', str(120)))
|
||||
klass = kwargs.pop('cls', OrderedCommands)
|
||||
|
||||
super().__init__(
|
||||
*args, cls=klass, no_args_is_help=no_args_is_help, context_settings=context_settings, **kwargs
|
||||
)
|
||||
super().__init__(
|
||||
*args, cls=klass, no_args_is_help=no_args_is_help, context_settings=context_settings, **kwargs
|
||||
)
|
||||
|
||||
# NOTE: Since OpenLLMTyper only wraps command to add analytics, the default type-hint for @app.command
|
||||
# does not change, hence the below hijacking.
|
||||
if typing.TYPE_CHECKING:
|
||||
command = typer.Typer.command
|
||||
else:
|
||||
# NOTE: Since OpenLLMTyper only wraps command to add analytics, the default type-hint for @app.command
|
||||
# does not change, hence the below hijacking.
|
||||
if typing.TYPE_CHECKING:
|
||||
command = typer.Typer.command
|
||||
else:
|
||||
|
||||
def command(self, *args: typing.Any, **kwargs: typing.Any):
|
||||
def decorator(f):
|
||||
@functools.wraps(f)
|
||||
@click.pass_context
|
||||
def wrapped(ctx: click.Context, *args, **kwargs):
|
||||
from bentoml._internal.utils.analytics import track
|
||||
def command(self, *args: typing.Any, **kwargs: typing.Any):
|
||||
def decorator(f):
|
||||
@functools.wraps(f)
|
||||
@click.pass_context
|
||||
def wrapped(ctx: click.Context, *args, **kwargs):
|
||||
from bentoml._internal.utils.analytics import track
|
||||
|
||||
do_not_track = os.environ.get(DO_NOT_TRACK, str(False)).lower() == 'true'
|
||||
do_not_track = os.environ.get(DO_NOT_TRACK, str(False)).lower() == 'true'
|
||||
|
||||
# so we know that the root program is openllm
|
||||
command_name = ctx.info_name
|
||||
if ctx.parent.parent is not None:
|
||||
# openllm model list
|
||||
command_group = ctx.parent.info_name
|
||||
elif ctx.parent.info_name == ctx.find_root().info_name:
|
||||
# openllm run
|
||||
command_group = 'openllm'
|
||||
# so we know that the root program is openllm
|
||||
command_name = ctx.info_name
|
||||
if ctx.parent.parent is not None:
|
||||
# openllm model list
|
||||
command_group = ctx.parent.info_name
|
||||
elif ctx.parent.info_name == ctx.find_root().info_name:
|
||||
# openllm run
|
||||
command_group = 'openllm'
|
||||
|
||||
if do_not_track:
|
||||
return f(*args, **kwargs)
|
||||
start_time = time.time_ns()
|
||||
try:
|
||||
return_value = f(*args, **kwargs)
|
||||
duration_in_ns = time.time_ns() - start_time
|
||||
track(
|
||||
OpenllmCliEvent(
|
||||
cmd_group=command_group, cmd_name=command_name, duration_in_ms=duration_in_ns / 1e6
|
||||
)
|
||||
)
|
||||
return return_value
|
||||
except BaseException as e:
|
||||
duration_in_ns = time.time_ns() - start_time
|
||||
track(
|
||||
OpenllmCliEvent(
|
||||
cmd_group=command_group,
|
||||
cmd_name=command_name,
|
||||
duration_in_ms=duration_in_ns / 1e6,
|
||||
error_type=type(e).__name__,
|
||||
return_code=(2 if isinstance(e, KeyboardInterrupt) else 1),
|
||||
)
|
||||
)
|
||||
raise
|
||||
if do_not_track:
|
||||
return f(*args, **kwargs)
|
||||
start_time = time.time_ns()
|
||||
try:
|
||||
return_value = f(*args, **kwargs)
|
||||
duration_in_ns = time.time_ns() - start_time
|
||||
track(
|
||||
OpenllmCliEvent(
|
||||
cmd_group=command_group, cmd_name=command_name, duration_in_ms=duration_in_ns / 1e6
|
||||
)
|
||||
)
|
||||
return return_value
|
||||
except BaseException as e:
|
||||
duration_in_ns = time.time_ns() - start_time
|
||||
track(
|
||||
OpenllmCliEvent(
|
||||
cmd_group=command_group,
|
||||
cmd_name=command_name,
|
||||
duration_in_ms=duration_in_ns / 1e6,
|
||||
error_type=type(e).__name__,
|
||||
return_code=(2 if isinstance(e, KeyboardInterrupt) else 1),
|
||||
)
|
||||
)
|
||||
raise
|
||||
|
||||
return typer.Typer.command(self, *args, **kwargs)(wrapped)
|
||||
return typer.Typer.command(self, *args, **kwargs)(wrapped)
|
||||
|
||||
return decorator
|
||||
return decorator
|
||||
|
||||
@@ -12,72 +12,72 @@ HUGGINGFACE_CACHE = pathlib.Path.home() / '.cache' / 'huggingface' / 'hub'
|
||||
|
||||
|
||||
def _du(path: pathlib.Path) -> int:
|
||||
seen_paths = set()
|
||||
used_space = 0
|
||||
seen_paths = set()
|
||||
used_space = 0
|
||||
|
||||
for f in path.rglob('*'):
|
||||
if os.name == 'nt': # Windows system
|
||||
# On Windows, directly add file sizes without considering hard links
|
||||
used_space += f.stat().st_size
|
||||
else:
|
||||
# On non-Windows systems, use inodes to avoid double counting
|
||||
stat = f.stat()
|
||||
if stat.st_ino not in seen_paths:
|
||||
seen_paths.add(stat.st_ino)
|
||||
used_space += stat.st_size
|
||||
return used_space
|
||||
for f in path.rglob('*'):
|
||||
if os.name == 'nt': # Windows system
|
||||
# On Windows, directly add file sizes without considering hard links
|
||||
used_space += f.stat().st_size
|
||||
else:
|
||||
# On non-Windows systems, use inodes to avoid double counting
|
||||
stat = f.stat()
|
||||
if stat.st_ino not in seen_paths:
|
||||
seen_paths.add(stat.st_ino)
|
||||
used_space += stat.st_size
|
||||
return used_space
|
||||
|
||||
|
||||
@app.command(help='Clean up all the cached models from huggingface')
|
||||
def model_cache(verbose: bool = False) -> None:
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
used_space = _du(HUGGINGFACE_CACHE)
|
||||
sure = questionary.confirm(
|
||||
f'This will remove all models cached by Huggingface (~{used_space / 1024 / 1024:.2f}MB), are you sure?'
|
||||
).ask()
|
||||
if not sure:
|
||||
return
|
||||
shutil.rmtree(HUGGINGFACE_CACHE, ignore_errors=True)
|
||||
output('All models cached by Huggingface have been removed', style='green')
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
used_space = _du(HUGGINGFACE_CACHE)
|
||||
sure = questionary.confirm(
|
||||
f'This will remove all models cached by Huggingface (~{used_space / 1024 / 1024:.2f}MB), are you sure?'
|
||||
).ask()
|
||||
if not sure:
|
||||
return
|
||||
shutil.rmtree(HUGGINGFACE_CACHE, ignore_errors=True)
|
||||
output('All models cached by Huggingface have been removed', style='green')
|
||||
|
||||
|
||||
@app.command(help='Clean up all the virtual environments created by OpenLLM')
|
||||
def venvs(verbose: bool = False) -> None:
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
|
||||
used_space = _du(VENV_DIR)
|
||||
sure = questionary.confirm(
|
||||
f'This will remove all virtual environments created by OpenLLM (~{used_space / 1024 / 1024:.2f}MB), are you sure?'
|
||||
).ask()
|
||||
if not sure:
|
||||
return
|
||||
shutil.rmtree(VENV_DIR, ignore_errors=True)
|
||||
output('All virtual environments have been removed', style='green')
|
||||
used_space = _du(VENV_DIR)
|
||||
sure = questionary.confirm(
|
||||
f'This will remove all virtual environments created by OpenLLM (~{used_space / 1024 / 1024:.2f}MB), are you sure?'
|
||||
).ask()
|
||||
if not sure:
|
||||
return
|
||||
shutil.rmtree(VENV_DIR, ignore_errors=True)
|
||||
output('All virtual environments have been removed', style='green')
|
||||
|
||||
|
||||
@app.command(help='Clean up all the repositories cloned by OpenLLM')
|
||||
def repos(verbose: bool = False) -> None:
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
shutil.rmtree(REPO_DIR, ignore_errors=True)
|
||||
output('All repositories have been removed', style='green')
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
shutil.rmtree(REPO_DIR, ignore_errors=True)
|
||||
output('All repositories have been removed', style='green')
|
||||
|
||||
|
||||
@app.command(help='Reset configurations to default')
|
||||
def configs(verbose: bool = False) -> None:
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
shutil.rmtree(CONFIG_FILE, ignore_errors=True)
|
||||
output('All configurations have been reset', style='green')
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
shutil.rmtree(CONFIG_FILE, ignore_errors=True)
|
||||
output('All configurations have been reset', style='green')
|
||||
|
||||
|
||||
@app.command(name='all', help='Clean up all above and bring OpenLLM to a fresh start')
|
||||
def all_cache(verbose: bool = False) -> None:
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
repos()
|
||||
venvs()
|
||||
model_cache()
|
||||
configs()
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
repos()
|
||||
venvs()
|
||||
model_cache()
|
||||
configs()
|
||||
|
||||
@@ -11,158 +11,171 @@ app = OpenLLMTyper()
|
||||
|
||||
|
||||
def resolve_cloud_config() -> pathlib.Path:
|
||||
env = os.environ.get('BENTOML_HOME')
|
||||
if env is not None:
|
||||
return pathlib.Path(env) / '.yatai.yaml'
|
||||
return pathlib.Path.home() / 'bentoml' / '.yatai.yaml'
|
||||
env = os.environ.get('BENTOML_HOME')
|
||||
if env is not None:
|
||||
return pathlib.Path(env) / '.yatai.yaml'
|
||||
return pathlib.Path.home() / 'bentoml' / '.yatai.yaml'
|
||||
|
||||
|
||||
def _get_deploy_cmd(
|
||||
bento: BentoInfo, target: typing.Optional[DeploymentTarget] = None, cli_envs: typing.Optional[list[str]] = None
|
||||
bento: BentoInfo,
|
||||
target: typing.Optional[DeploymentTarget] = None,
|
||||
cli_envs: typing.Optional[list[str]] = None,
|
||||
) -> tuple[list[str], EnvVars]:
|
||||
cmd = ['bentoml', 'deploy', bento.bentoml_tag]
|
||||
env = EnvVars({'BENTOML_HOME': f'{bento.repo.path}/bentoml'})
|
||||
|
||||
# Process CLI env vars first to determine overrides
|
||||
explicit_envs: dict[str, str] = {}
|
||||
if cli_envs:
|
||||
for env_var in cli_envs:
|
||||
if '=' in env_var:
|
||||
name, value = env_var.split('=', 1)
|
||||
explicit_envs[name] = value
|
||||
else:
|
||||
name = env_var
|
||||
value = typing.cast(str, os.environ.get(name))
|
||||
if value is None:
|
||||
output(
|
||||
f"Environment variable '{name}' specified via --env but not found in the current environment.",
|
||||
style='red',
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
explicit_envs[name] = value
|
||||
|
||||
# Process envs defined in bento.yaml, skipping those overridden by CLI
|
||||
required_envs = bento.bento_yaml.get('envs', [])
|
||||
required_env_names = [env['name'] for env in required_envs if 'name' in env and env['name'] not in explicit_envs]
|
||||
if required_env_names:
|
||||
output(
|
||||
f'This model requires the following environment variables to run (unless overridden via --env): {required_env_names!r}',
|
||||
style='yellow',
|
||||
)
|
||||
|
||||
for env_info in required_envs:
|
||||
name = typing.cast(str, env_info.get('name'))
|
||||
if not name or name in explicit_envs:
|
||||
continue
|
||||
|
||||
if os.environ.get(name):
|
||||
default = os.environ[name]
|
||||
elif 'value' in env_info:
|
||||
default = env_info['value']
|
||||
else:
|
||||
default = ''
|
||||
|
||||
if INTERACTIVE.get():
|
||||
import questionary
|
||||
|
||||
value = questionary.text(f'{name}: (from bento.yaml)', default=default).ask()
|
||||
else:
|
||||
if default == '':
|
||||
output(f'Environment variable {name} (from bento.yaml) is required but not provided', style='red')
|
||||
raise typer.Exit(1)
|
||||
else:
|
||||
value = default
|
||||
cmd = ['bentoml', 'deploy', bento.bentoml_tag]
|
||||
env = EnvVars({'BENTOML_HOME': f'{bento.repo.path}/bentoml'})
|
||||
|
||||
# Process CLI env vars first to determine overrides
|
||||
explicit_envs: dict[str, str] = {}
|
||||
if cli_envs:
|
||||
for env_var in cli_envs:
|
||||
if '=' in env_var:
|
||||
name, value = env_var.split('=', 1)
|
||||
explicit_envs[name] = value
|
||||
else:
|
||||
name = env_var
|
||||
value = typing.cast(str, os.environ.get(name))
|
||||
if value is None:
|
||||
raise typer.Exit(1)
|
||||
cmd += ['--env', f'{name}={value}']
|
||||
output(
|
||||
f"Environment variable '{name}' specified via --env but not found in the current environment.",
|
||||
style='red',
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
explicit_envs[name] = value
|
||||
|
||||
# Add explicitly provided env vars from CLI
|
||||
for name, value in explicit_envs.items():
|
||||
cmd += ['--env', f'{name}={value}']
|
||||
# Process envs defined in bento.yaml, skipping those overridden by CLI
|
||||
required_envs = bento.bento_yaml.get('envs', [])
|
||||
required_env_names = [
|
||||
env['name']
|
||||
for env in required_envs
|
||||
if 'name' in env and env['name'] not in explicit_envs and not env.get('value')
|
||||
]
|
||||
if required_env_names:
|
||||
output(
|
||||
f'This model requires the following environment variables to run (unless overridden via --env): {required_env_names!r}',
|
||||
style='green',
|
||||
)
|
||||
|
||||
if target:
|
||||
cmd += ['--instance-type', target.name]
|
||||
for env_info in required_envs:
|
||||
name = typing.cast(str, env_info.get('name'))
|
||||
if not name or name in explicit_envs or env_info.get('value', None) is not None:
|
||||
continue
|
||||
|
||||
base_config = resolve_cloud_config()
|
||||
if not base_config.exists():
|
||||
raise Exception('Cannot find cloud config.')
|
||||
# remove before copy
|
||||
if (bento.repo.path / 'bentoml' / '.yatai.yaml').exists():
|
||||
(bento.repo.path / 'bentoml' / '.yatai.yaml').unlink()
|
||||
shutil.copy(base_config, bento.repo.path / 'bentoml' / '.yatai.yaml')
|
||||
if os.environ.get(name):
|
||||
default = os.environ[name]
|
||||
elif 'value' in env_info:
|
||||
default = env_info['value']
|
||||
else:
|
||||
default = ''
|
||||
|
||||
return cmd, env
|
||||
if INTERACTIVE.get():
|
||||
import questionary
|
||||
|
||||
value = questionary.text(f'{name}: (from bento.yaml)', default=default).ask()
|
||||
else:
|
||||
if default == '':
|
||||
output(
|
||||
f'Environment variable {name} (from bento.yaml) is required but not provided', style='red'
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
else:
|
||||
value = default
|
||||
|
||||
if value is None:
|
||||
raise typer.Exit(1)
|
||||
cmd += ['--env', f'{name}={value}']
|
||||
|
||||
# Add explicitly provided env vars from CLI
|
||||
for name, value in explicit_envs.items():
|
||||
cmd += ['--env', f'{name}={value}']
|
||||
|
||||
if target:
|
||||
cmd += ['--instance-type', target.name]
|
||||
|
||||
base_config = resolve_cloud_config()
|
||||
if not base_config.exists():
|
||||
raise Exception('Cannot find cloud config.')
|
||||
# remove before copy
|
||||
if (bento.repo.path / 'bentoml' / '.yatai.yaml').exists():
|
||||
(bento.repo.path / 'bentoml' / '.yatai.yaml').unlink()
|
||||
shutil.copy(base_config, bento.repo.path / 'bentoml' / '.yatai.yaml')
|
||||
|
||||
return cmd, env
|
||||
|
||||
|
||||
def ensure_cloud_context() -> None:
|
||||
import questionary
|
||||
import questionary
|
||||
|
||||
cmd = ['bentoml', 'cloud', 'current-context']
|
||||
try:
|
||||
result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
||||
context = json.loads(result)
|
||||
output(f' bentoml already logged in: {context["endpoint"]}', style='green', level=20)
|
||||
except subprocess.CalledProcessError:
|
||||
output(' bentoml not logged in', style='red')
|
||||
if not INTERACTIVE.get():
|
||||
output('\n get bentoml logged in by:')
|
||||
output(' $ bentoml cloud login', style='orange')
|
||||
output('')
|
||||
output(
|
||||
""" * you may need to visit https://cloud.bentoml.com to get an account. you can also bring your own bentoml cluster (BYOC) to your team from https://bentoml.com/contact""",
|
||||
style='yellow',
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
else:
|
||||
action = questionary.select(
|
||||
'Choose an action:', choices=['I have a BentoCloud account', 'get an account in two minutes']
|
||||
).ask()
|
||||
if action is None:
|
||||
raise typer.Exit(1)
|
||||
elif action == 'get an account in two minutes':
|
||||
output('Please visit https://cloud.bentoml.com to get your token', style='yellow')
|
||||
endpoint = questionary.text('Enter the endpoint: (similar to https://my-org.cloud.bentoml.com)').ask()
|
||||
if endpoint is None:
|
||||
raise typer.Exit(1)
|
||||
token = questionary.text('Enter your token: (similar to cniluaxxxxxxxx)').ask()
|
||||
if token is None:
|
||||
raise typer.Exit(1)
|
||||
cmd = ['bentoml', 'cloud', 'login', '--api-token', token, '--endpoint', endpoint]
|
||||
try:
|
||||
result = subprocess.check_output(cmd)
|
||||
output(' Logged in successfully', style='green')
|
||||
except subprocess.CalledProcessError:
|
||||
output(' Failed to login', style='red')
|
||||
raise typer.Exit(1)
|
||||
cmd = ['bentoml', 'cloud', 'current-context']
|
||||
try:
|
||||
result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
||||
context = json.loads(result)
|
||||
output(f' bentoml already logged in: {context["endpoint"]}', style='green', level=20)
|
||||
except subprocess.CalledProcessError:
|
||||
output(' bentoml not logged in', style='red')
|
||||
if not INTERACTIVE.get():
|
||||
output('\n get bentoml logged in by:')
|
||||
output(' $ bentoml cloud login', style='orange')
|
||||
output('')
|
||||
output(
|
||||
""" * you may need to visit https://cloud.bentoml.com to get an account. you can also bring your own bentoml cluster (BYOC) to your team from https://bentoml.com/contact""",
|
||||
style='yellow',
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
else:
|
||||
action = questionary.select(
|
||||
'Choose an action:',
|
||||
choices=['I have a BentoCloud account', 'get an account in two minutes'],
|
||||
).ask()
|
||||
if action is None:
|
||||
raise typer.Exit(1)
|
||||
elif action == 'get an account in two minutes':
|
||||
output('Please visit https://cloud.bentoml.com to get your token', style='yellow')
|
||||
endpoint = questionary.text(
|
||||
'Enter the endpoint: (similar to https://my-org.cloud.bentoml.com)'
|
||||
).ask()
|
||||
if endpoint is None:
|
||||
raise typer.Exit(1)
|
||||
token = questionary.text('Enter your token: (similar to cniluaxxxxxxxx)').ask()
|
||||
if token is None:
|
||||
raise typer.Exit(1)
|
||||
cmd = ['bentoml', 'cloud', 'login', '--api-token', token, '--endpoint', endpoint]
|
||||
try:
|
||||
result = subprocess.check_output(cmd)
|
||||
output(' Logged in successfully', style='green')
|
||||
except subprocess.CalledProcessError:
|
||||
output(' Failed to login', style='red')
|
||||
raise typer.Exit(1)
|
||||
|
||||
|
||||
def get_cloud_machine_spec() -> list[DeploymentTarget]:
|
||||
ensure_cloud_context()
|
||||
cmd = ['bentoml', 'deployment', 'list-instance-types', '-o', 'json']
|
||||
try:
|
||||
result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
||||
instance_types = json.loads(result)
|
||||
return [
|
||||
DeploymentTarget(
|
||||
source='cloud',
|
||||
name=it['name'],
|
||||
price=it['price'],
|
||||
platform='linux',
|
||||
accelerators=(
|
||||
[ACCELERATOR_SPECS[it['gpu_type']] for _ in range(int(it['gpu']))]
|
||||
if it.get('gpu') and it['gpu_type'] in ACCELERATOR_SPECS
|
||||
else []
|
||||
),
|
||||
)
|
||||
for it in instance_types
|
||||
]
|
||||
except (subprocess.CalledProcessError, json.JSONDecodeError):
|
||||
output('Failed to get cloud instance types', style='red')
|
||||
return []
|
||||
ensure_cloud_context()
|
||||
cmd = ['bentoml', 'deployment', 'list-instance-types', '-o', 'json']
|
||||
try:
|
||||
result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
||||
instance_types = json.loads(result)
|
||||
return [
|
||||
DeploymentTarget(
|
||||
source='cloud',
|
||||
name=it['name'],
|
||||
price=it['price'],
|
||||
platform='linux',
|
||||
accelerators=(
|
||||
[ACCELERATOR_SPECS[it['gpu_type']] for _ in range(int(it['gpu']))]
|
||||
if it.get('gpu') and it['gpu_type'] in ACCELERATOR_SPECS
|
||||
else []
|
||||
),
|
||||
)
|
||||
for it in instance_types
|
||||
]
|
||||
except (subprocess.CalledProcessError, json.JSONDecodeError):
|
||||
output('Failed to get cloud instance types', style='red')
|
||||
return []
|
||||
|
||||
|
||||
def deploy(bento: BentoInfo, target: DeploymentTarget, cli_envs: typing.Optional[list[str]] = None) -> None:
|
||||
ensure_cloud_context()
|
||||
cmd, env = _get_deploy_cmd(bento, target, cli_envs=cli_envs)
|
||||
run_command(cmd, env=env, cwd=None)
|
||||
def deploy(
|
||||
bento: BentoInfo, target: DeploymentTarget, cli_envs: typing.Optional[list[str]] = None
|
||||
) -> None:
|
||||
ensure_cloud_context()
|
||||
cmd, env = _get_deploy_cmd(bento, target, cli_envs=cli_envs)
|
||||
run_command(cmd, env=env, cwd=None)
|
||||
|
||||
@@ -31,401 +31,413 @@ T = typing.TypeVar('T')
|
||||
|
||||
|
||||
class ContextVar(typing.Generic[T]):
|
||||
def __init__(self, default: T):
|
||||
self._stack: list[T] = []
|
||||
self._default = default
|
||||
def __init__(self, default: T):
|
||||
self._stack: list[T] = []
|
||||
self._default = default
|
||||
|
||||
def get(self) -> T:
|
||||
if self._stack:
|
||||
return self._stack[-1]
|
||||
return self._default
|
||||
def get(self) -> T:
|
||||
if self._stack:
|
||||
return self._stack[-1]
|
||||
return self._default
|
||||
|
||||
def set(self, value: T) -> None:
|
||||
self._stack.append(value)
|
||||
def set(self, value: T) -> None:
|
||||
self._stack.append(value)
|
||||
|
||||
@contextmanager
|
||||
def patch(self, value: T) -> typing.Iterator[None]:
|
||||
self._stack.append(value)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
self._stack.pop()
|
||||
@contextmanager
|
||||
def patch(self, value: T) -> typing.Iterator[None]:
|
||||
self._stack.append(value)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
self._stack.pop()
|
||||
|
||||
|
||||
VERBOSE_LEVEL = ContextVar(10)
|
||||
VERBOSE_LEVEL = ContextVar(0)
|
||||
INTERACTIVE = ContextVar(False)
|
||||
|
||||
|
||||
def output(content: typing.Any, level: int = 0, style: str | None = None, end: str | None = None) -> None:
|
||||
if level > VERBOSE_LEVEL.get():
|
||||
return
|
||||
def output(
|
||||
content: typing.Any, level: int = 0, style: str | None = None, end: str | None = None
|
||||
) -> None:
|
||||
if level > VERBOSE_LEVEL.get():
|
||||
return
|
||||
|
||||
if not isinstance(content, str):
|
||||
out = io.StringIO()
|
||||
pyaml.pprint(content, dst=out, sort_dicts=False, sort_keys=False)
|
||||
questionary.print(out.getvalue(), style=style, end='' if end is None else end)
|
||||
out.close()
|
||||
else:
|
||||
questionary.print(content, style=style, end='\n' if end is None else end)
|
||||
if not isinstance(content, str):
|
||||
out = io.StringIO()
|
||||
pyaml.pprint(content, dst=out, sort_dicts=False, sort_keys=False)
|
||||
questionary.print(out.getvalue(), style=style, end='' if end is None else end)
|
||||
out.close()
|
||||
else:
|
||||
questionary.print(content, style=style, end='\n' if end is None else end)
|
||||
|
||||
|
||||
class Config(pydantic.BaseModel):
|
||||
repos: dict[str, str] = pydantic.Field(
|
||||
default_factory=lambda: {'default': 'https://github.com/bentoml/openllm-models@main'}
|
||||
)
|
||||
default_repo: str = 'default'
|
||||
repos: dict[str, str] = pydantic.Field(
|
||||
default_factory=lambda: {'default': 'https://github.com/bentoml/openllm-models@main'}
|
||||
)
|
||||
default_repo: str = 'default'
|
||||
|
||||
def tolist(self) -> dict[str, typing.Any]:
|
||||
return dict(repos=self.repos, default_repo=self.default_repo)
|
||||
def tolist(self) -> dict[str, typing.Any]:
|
||||
return dict(repos=self.repos, default_repo=self.default_repo)
|
||||
|
||||
|
||||
def load_config() -> Config:
|
||||
if CONFIG_FILE.exists():
|
||||
try:
|
||||
with open(CONFIG_FILE) as f:
|
||||
return Config(**json.load(f))
|
||||
except json.JSONDecodeError:
|
||||
return Config()
|
||||
return Config()
|
||||
if CONFIG_FILE.exists():
|
||||
try:
|
||||
with open(CONFIG_FILE) as f:
|
||||
return Config(**json.load(f))
|
||||
except json.JSONDecodeError:
|
||||
return Config()
|
||||
return Config()
|
||||
|
||||
|
||||
def save_config(config: Config) -> None:
|
||||
with open(CONFIG_FILE, 'w') as f:
|
||||
json.dump(config.tolist(), f, indent=2)
|
||||
with open(CONFIG_FILE, 'w') as f:
|
||||
json.dump(config.tolist(), f, indent=2)
|
||||
|
||||
|
||||
class BentoMetadata(typing.TypedDict):
|
||||
name: str
|
||||
version: str
|
||||
labels: dict[str, str]
|
||||
envs: list[dict[str, str]]
|
||||
services: list[dict[str, typing.Any]]
|
||||
schema: dict[str, typing.Any]
|
||||
name: str
|
||||
version: str
|
||||
labels: dict[str, str]
|
||||
envs: list[dict[str, str]]
|
||||
services: list[dict[str, typing.Any]]
|
||||
schema: dict[str, typing.Any]
|
||||
|
||||
|
||||
class EnvVars(UserDict[str, str]):
|
||||
"""
|
||||
A dictionary-like object that sorted by key and only keeps the environment variables that have a value.
|
||||
"""
|
||||
"""
|
||||
A dictionary-like object that sorted by key and only keeps the environment variables that have a value.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def __get_pydantic_core_schema__(
|
||||
cls: type[EnvVars], source_type: type[typing.Any], handler: typing.Callable[..., typing.Any]
|
||||
) -> core_schema.DictSchema:
|
||||
return core_schema.dict_schema(core_schema.str_schema(), core_schema.str_schema())
|
||||
@classmethod
|
||||
def __get_pydantic_core_schema__(
|
||||
cls: type[EnvVars], source_type: type[typing.Any], handler: typing.Callable[..., typing.Any]
|
||||
) -> core_schema.DictSchema:
|
||||
return core_schema.dict_schema(core_schema.str_schema(), core_schema.str_schema())
|
||||
|
||||
def __init__(self, data: typing.Mapping[str, str] | None = None):
|
||||
super().__init__(data or {})
|
||||
self.data = {k: v for k, v in sorted(self.data.items()) if v}
|
||||
def __init__(self, data: typing.Mapping[str, str] | None = None):
|
||||
super().__init__(data or {})
|
||||
self.data = {k: v for k, v in sorted(self.data.items()) if v}
|
||||
|
||||
def __hash__(self) -> int:
|
||||
return hash(tuple(sorted(self.data.items())))
|
||||
def __hash__(self) -> int:
|
||||
return hash(tuple(sorted(self.data.items())))
|
||||
|
||||
|
||||
class RepoInfo(pydantic.BaseModel):
|
||||
name: str
|
||||
path: pathlib.Path
|
||||
url: str
|
||||
server: str
|
||||
owner: str
|
||||
repo: str
|
||||
branch: str
|
||||
name: str
|
||||
path: pathlib.Path
|
||||
url: str
|
||||
server: str
|
||||
owner: str
|
||||
repo: str
|
||||
branch: str
|
||||
|
||||
def tolist(self) -> str | dict[str, typing.Any] | None:
|
||||
if VERBOSE_LEVEL.get() <= 0:
|
||||
return f'{self.name} ({self.url}@{self.branch})'
|
||||
if VERBOSE_LEVEL.get() <= 10:
|
||||
return dict(name=self.name, url=f'{self.url}@{self.branch}', path=str(self.path))
|
||||
if VERBOSE_LEVEL.get() <= 20:
|
||||
return dict(
|
||||
name=self.name,
|
||||
url=f'{self.url}@{self.branch}',
|
||||
path=str(self.path),
|
||||
server=self.server,
|
||||
owner=self.owner,
|
||||
repo=self.repo,
|
||||
)
|
||||
return None
|
||||
def tolist(self) -> str | dict[str, typing.Any] | None:
|
||||
if VERBOSE_LEVEL.get() <= 0:
|
||||
return f'{self.name} ({self.url}@{self.branch})'
|
||||
if VERBOSE_LEVEL.get() <= 10:
|
||||
return dict(name=self.name, url=f'{self.url}@{self.branch}', path=str(self.path))
|
||||
if VERBOSE_LEVEL.get() <= 20:
|
||||
return dict(
|
||||
name=self.name,
|
||||
url=f'{self.url}@{self.branch}',
|
||||
path=str(self.path),
|
||||
server=self.server,
|
||||
owner=self.owner,
|
||||
repo=self.repo,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
class BentoInfo(pydantic.BaseModel):
|
||||
repo: RepoInfo
|
||||
path: pathlib.Path
|
||||
alias: str = ''
|
||||
repo: RepoInfo
|
||||
path: pathlib.Path
|
||||
alias: str = ''
|
||||
|
||||
def __str__(self) -> str:
|
||||
if self.repo.name == 'default':
|
||||
return f'{self.tag}'
|
||||
else:
|
||||
return f'{self.repo.name}/{self.tag}'
|
||||
def __str__(self) -> str:
|
||||
if self.repo.name == 'default':
|
||||
return f'{self.tag}'
|
||||
else:
|
||||
return f'{self.repo.name}/{self.tag}'
|
||||
|
||||
@override
|
||||
def __hash__(self) -> int:
|
||||
return md5(str(self.path))
|
||||
@override
|
||||
def __hash__(self) -> int:
|
||||
return md5(str(self.path))
|
||||
|
||||
@property
|
||||
def tag(self) -> str:
|
||||
if self.alias:
|
||||
return f'{self.path.parent.name}:{self.alias}'
|
||||
return f'{self.path.parent.name}:{self.path.name}'
|
||||
@property
|
||||
def tag(self) -> str:
|
||||
if self.alias:
|
||||
return f'{self.path.parent.name}:{self.alias}'
|
||||
return f'{self.path.parent.name}:{self.path.name}'
|
||||
|
||||
@property
|
||||
def bentoml_tag(self) -> str:
|
||||
return f'{self.path.parent.name}:{self.path.name}'
|
||||
@property
|
||||
def bentoml_tag(self) -> str:
|
||||
return f'{self.path.parent.name}:{self.path.name}'
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return self.path.parent.name
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return self.path.parent.name
|
||||
|
||||
@property
|
||||
def version(self) -> str:
|
||||
return self.path.name
|
||||
@property
|
||||
def version(self) -> str:
|
||||
return self.path.name
|
||||
|
||||
@property
|
||||
def labels(self) -> dict[str, str]:
|
||||
return self.bento_yaml['labels']
|
||||
@property
|
||||
def labels(self) -> dict[str, str]:
|
||||
return self.bento_yaml['labels']
|
||||
|
||||
@property
|
||||
def envs(self) -> list[dict[str, str]]:
|
||||
return self.bento_yaml['envs']
|
||||
@property
|
||||
def envs(self) -> list[dict[str, str]]:
|
||||
return self.bento_yaml['envs']
|
||||
|
||||
@functools.cached_property
|
||||
def bento_yaml(self) -> BentoMetadata:
|
||||
bento: BentoMetadata = yaml.safe_load((self.path / 'bento.yaml').read_text())
|
||||
return bento
|
||||
@functools.cached_property
|
||||
def bento_yaml(self) -> BentoMetadata:
|
||||
bento: BentoMetadata = yaml.safe_load((self.path / 'bento.yaml').read_text())
|
||||
return bento
|
||||
|
||||
@functools.cached_property
|
||||
def platforms(self) -> list[str]:
|
||||
return self.bento_yaml['labels'].get('platforms', 'linux').split(',')
|
||||
@functools.cached_property
|
||||
def platforms(self) -> list[str]:
|
||||
return self.bento_yaml['labels'].get('platforms', 'linux').split(',')
|
||||
|
||||
@functools.cached_property
|
||||
def pretty_yaml(self) -> BentoMetadata | dict[str, typing.Any]:
|
||||
def _pretty_routes(routes: list[dict[str, typing.Any]]) -> dict[str, typing.Any]:
|
||||
return {
|
||||
route['route']: {
|
||||
'input': {k: v['type'] for k, v in route['input']['properties'].items()},
|
||||
'output': route['output']['type'],
|
||||
}
|
||||
for route in routes
|
||||
}
|
||||
@functools.cached_property
|
||||
def pretty_yaml(self) -> BentoMetadata | dict[str, typing.Any]:
|
||||
def _pretty_routes(routes: list[dict[str, typing.Any]]) -> dict[str, typing.Any]:
|
||||
return {
|
||||
route['route']: {
|
||||
'input': {k: v['type'] for k, v in route['input']['properties'].items()},
|
||||
'output': route['output']['type'],
|
||||
}
|
||||
for route in routes
|
||||
}
|
||||
|
||||
if len(self.bento_yaml['services']) == 1:
|
||||
pretty_yaml: dict[str, typing.Any] = {
|
||||
'apis': _pretty_routes(self.bento_yaml['schema']['routes']),
|
||||
'resources': self.bento_yaml['services'][0]['config']['resources'],
|
||||
'envs': self.bento_yaml['envs'],
|
||||
'platforms': self.platforms,
|
||||
}
|
||||
return pretty_yaml
|
||||
return self.bento_yaml
|
||||
if len(self.bento_yaml['services']) == 1:
|
||||
pretty_yaml: dict[str, typing.Any] = {
|
||||
'apis': _pretty_routes(self.bento_yaml['schema']['routes']),
|
||||
'resources': self.bento_yaml['services'][0]['config']['resources'],
|
||||
'envs': self.bento_yaml['envs'],
|
||||
'platforms': self.platforms,
|
||||
}
|
||||
return pretty_yaml
|
||||
return self.bento_yaml
|
||||
|
||||
@functools.cached_property
|
||||
def pretty_gpu(self) -> str:
|
||||
from openllm.accelerator_spec import ACCELERATOR_SPECS
|
||||
@functools.cached_property
|
||||
def pretty_gpu(self) -> str:
|
||||
from openllm.accelerator_spec import ACCELERATOR_SPECS
|
||||
|
||||
try:
|
||||
resources = self.bento_yaml['services'][0]['config']['resources']
|
||||
if resources['gpu'] > 1:
|
||||
acc = ACCELERATOR_SPECS[resources['gpu_type']]
|
||||
return f'{acc.memory_size:.0f}Gx{resources["gpu"]}'
|
||||
elif resources['gpu'] > 0:
|
||||
acc = ACCELERATOR_SPECS[resources['gpu_type']]
|
||||
return f'{acc.memory_size:.0f}G'
|
||||
except KeyError:
|
||||
pass
|
||||
return ''
|
||||
try:
|
||||
resources = self.bento_yaml['services'][0]['config']['resources']
|
||||
if resources['gpu'] > 1:
|
||||
acc = ACCELERATOR_SPECS[resources['gpu_type']]
|
||||
return f'{acc.memory_size:.0f}Gx{resources["gpu"]}'
|
||||
elif resources['gpu'] > 0:
|
||||
acc = ACCELERATOR_SPECS[resources['gpu_type']]
|
||||
return f'{acc.memory_size:.0f}G'
|
||||
except KeyError:
|
||||
pass
|
||||
return ''
|
||||
|
||||
def tolist(self) -> str | dict[str, typing.Any] | None:
|
||||
verbose = VERBOSE_LEVEL.get()
|
||||
if verbose <= 0:
|
||||
return str(self)
|
||||
if verbose <= 10:
|
||||
return dict(tag=self.tag, repo=self.repo.tolist(), path=str(self.path), model_card=self.pretty_yaml)
|
||||
if verbose <= 20:
|
||||
return dict(tag=self.tag, repo=self.repo.tolist(), path=str(self.path), bento_yaml=self.bento_yaml)
|
||||
return None
|
||||
def tolist(self) -> str | dict[str, typing.Any] | None:
|
||||
verbose = VERBOSE_LEVEL.get()
|
||||
if verbose <= 0:
|
||||
return str(self)
|
||||
if verbose <= 10:
|
||||
return dict(
|
||||
tag=self.tag, repo=self.repo.tolist(), path=str(self.path), model_card=self.pretty_yaml
|
||||
)
|
||||
if verbose <= 20:
|
||||
return dict(
|
||||
tag=self.tag, repo=self.repo.tolist(), path=str(self.path), bento_yaml=self.bento_yaml
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
class VenvSpec(pydantic.BaseModel):
|
||||
python_version: str
|
||||
requirements_txt: str
|
||||
envs: EnvVars
|
||||
name_prefix: str = ''
|
||||
python_version: str
|
||||
requirements_txt: str
|
||||
envs: EnvVars
|
||||
name_prefix: str = ''
|
||||
|
||||
@functools.cached_property
|
||||
def normalized_requirements_txt(self) -> str:
|
||||
parameter_lines: list[str] = []
|
||||
dependency_lines: list[str] = []
|
||||
comment_lines: list[str] = []
|
||||
@functools.cached_property
|
||||
def normalized_requirements_txt(self) -> str:
|
||||
parameter_lines: list[str] = []
|
||||
dependency_lines: list[str] = []
|
||||
comment_lines: list[str] = []
|
||||
|
||||
for line in self.requirements_txt.splitlines():
|
||||
if not line.strip():
|
||||
continue
|
||||
elif line.strip().startswith('#'):
|
||||
comment_lines.append(line.strip())
|
||||
elif line.strip().startswith('-'):
|
||||
parameter_lines.append(line.strip())
|
||||
else:
|
||||
dependency_lines.append(line.strip())
|
||||
for line in self.requirements_txt.splitlines():
|
||||
if not line.strip():
|
||||
continue
|
||||
elif line.strip().startswith('#'):
|
||||
comment_lines.append(line.strip())
|
||||
elif line.strip().startswith('-'):
|
||||
parameter_lines.append(line.strip())
|
||||
else:
|
||||
dependency_lines.append(line.strip())
|
||||
|
||||
parameter_lines.sort()
|
||||
dependency_lines.sort()
|
||||
return '\n'.join(parameter_lines + dependency_lines).strip()
|
||||
parameter_lines.sort()
|
||||
dependency_lines.sort()
|
||||
return '\n'.join(parameter_lines + dependency_lines).strip()
|
||||
|
||||
@functools.cached_property
|
||||
def normalized_envs(self) -> str:
|
||||
return '\n'.join(f'{k}={v}' for k, v in sorted(self.envs.items(), key=lambda x: x[0]) if not v)
|
||||
@functools.cached_property
|
||||
def normalized_envs(self) -> str:
|
||||
return '\n'.join(f'{k}={v}' for k, v in sorted(self.envs.items(), key=lambda x: x[0]) if not v)
|
||||
|
||||
@override
|
||||
def __hash__(self) -> int:
|
||||
return md5(self.normalized_requirements_txt, str(hash(self.normalized_envs)))
|
||||
@override
|
||||
def __hash__(self) -> int:
|
||||
return md5(self.normalized_requirements_txt, str(hash(self.normalized_envs)))
|
||||
|
||||
|
||||
class Accelerator(pydantic.BaseModel):
|
||||
model: str
|
||||
memory_size: float
|
||||
model: str
|
||||
memory_size: float
|
||||
|
||||
def __gt__(self, other: Accelerator) -> bool:
|
||||
return self.memory_size > other.memory_size
|
||||
def __gt__(self, other: Accelerator) -> bool:
|
||||
return self.memory_size > other.memory_size
|
||||
|
||||
def __eq__(self, other: object) -> bool:
|
||||
if not isinstance(other, Accelerator):
|
||||
return NotImplemented
|
||||
return self.memory_size == other.memory_size
|
||||
def __eq__(self, other: object) -> bool:
|
||||
if not isinstance(other, Accelerator):
|
||||
return NotImplemented
|
||||
return self.memory_size == other.memory_size
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f'{self.model}({self.memory_size}GB)'
|
||||
def __repr__(self) -> str:
|
||||
return f'{self.model}({self.memory_size}GB)'
|
||||
|
||||
|
||||
class DeploymentTarget(pydantic.BaseModel):
|
||||
accelerators: list[Accelerator]
|
||||
source: str = 'local'
|
||||
name: str = 'local'
|
||||
price: str = ''
|
||||
platform: str = 'linux'
|
||||
accelerators: list[Accelerator]
|
||||
source: str = 'local'
|
||||
name: str = 'local'
|
||||
price: str = ''
|
||||
platform: str = 'linux'
|
||||
|
||||
@override
|
||||
def __hash__(self) -> int:
|
||||
return hash(self.source)
|
||||
@override
|
||||
def __hash__(self) -> int:
|
||||
return hash(self.source)
|
||||
|
||||
@property
|
||||
def accelerators_repr(self) -> str:
|
||||
accs = {a.model for a in self.accelerators}
|
||||
if len(accs) == 0:
|
||||
return 'null'
|
||||
if len(accs) == 1:
|
||||
a = self.accelerators[0]
|
||||
return f'{a.model} x{len(self.accelerators)}'
|
||||
return ', '.join((f'{a.model}' for a in self.accelerators))
|
||||
@property
|
||||
def accelerators_repr(self) -> str:
|
||||
accs = {a.model for a in self.accelerators}
|
||||
if len(accs) == 0:
|
||||
return 'null'
|
||||
if len(accs) == 1:
|
||||
a = self.accelerators[0]
|
||||
return f'{a.model} x{len(self.accelerators)}'
|
||||
return ', '.join((f'{a.model}' for a in self.accelerators))
|
||||
|
||||
|
||||
def run_command(
|
||||
cmd: list[str],
|
||||
cwd: str | None = None,
|
||||
env: EnvVars | None = None,
|
||||
copy_env: bool = True,
|
||||
venv: pathlib.Path | None = None,
|
||||
silent: bool = False,
|
||||
cmd: list[str],
|
||||
cwd: str | None = None,
|
||||
env: EnvVars | None = None,
|
||||
copy_env: bool = True,
|
||||
venv: pathlib.Path | None = None,
|
||||
silent: bool = False,
|
||||
) -> subprocess.CompletedProcess[typing.Any]:
|
||||
env = env or EnvVars({})
|
||||
cmd = [str(c) for c in cmd]
|
||||
bin_dir = 'Scripts' if os.name == 'nt' else 'bin'
|
||||
if not silent:
|
||||
output('\n')
|
||||
if cwd:
|
||||
output(f'$ cd {cwd}', style='orange')
|
||||
if env:
|
||||
for k, v in env.items():
|
||||
output(f'$ export {k}={shlex.quote(v)}', style='orange')
|
||||
if venv:
|
||||
output(f'$ source {venv / "bin" / "activate"}', style='orange')
|
||||
output(f'$ {" ".join(cmd)}', style='orange')
|
||||
|
||||
env = env or EnvVars({})
|
||||
cmd = [str(c) for c in cmd]
|
||||
bin_dir = 'Scripts' if os.name == 'nt' else 'bin'
|
||||
if not silent:
|
||||
output('\n')
|
||||
if cwd:
|
||||
output(f'$ cd {cwd}', style='orange')
|
||||
if env:
|
||||
for k, v in env.items():
|
||||
output(f'$ export {k}={shlex.quote(v)}', style='orange')
|
||||
if venv:
|
||||
py = venv / bin_dir / f'python{sysconfig.get_config_var("EXE")}'
|
||||
output(f'$ source {venv / "bin" / "activate"}', style='orange')
|
||||
output(f'$ {" ".join(cmd)}', style='orange')
|
||||
|
||||
if venv:
|
||||
py = venv / bin_dir / f'python{sysconfig.get_config_var("EXE")}'
|
||||
else:
|
||||
py = pathlib.Path(sys.executable)
|
||||
|
||||
if copy_env:
|
||||
env = EnvVars({**os.environ, **env})
|
||||
|
||||
if cmd and cmd[0] == 'bentoml':
|
||||
cmd = [py.__fspath__(), '-m', 'bentoml', *cmd[1:]]
|
||||
if cmd and cmd[0] == 'python':
|
||||
cmd = [py.__fspath__(), *cmd[1:]]
|
||||
|
||||
try:
|
||||
if silent:
|
||||
return subprocess.run(
|
||||
cmd, cwd=cwd, env=env, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True
|
||||
)
|
||||
else:
|
||||
py = pathlib.Path(sys.executable)
|
||||
|
||||
if copy_env:
|
||||
env = EnvVars({**os.environ, **env})
|
||||
|
||||
if cmd and cmd[0] == 'bentoml':
|
||||
cmd = [py.__fspath__(), '-m', 'bentoml'] + cmd[1:]
|
||||
if cmd and cmd[0] == 'python':
|
||||
cmd = [py.__fspath__()] + cmd[1:]
|
||||
|
||||
try:
|
||||
if silent:
|
||||
return subprocess.run(
|
||||
cmd, cwd=cwd, env=env, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True
|
||||
)
|
||||
else:
|
||||
return subprocess.run(cmd, cwd=cwd, env=env, check=True)
|
||||
except Exception as e:
|
||||
if VERBOSE_LEVEL.get() >= 20:
|
||||
output(str(e), style='red')
|
||||
raise typer.Exit(1)
|
||||
return subprocess.run(cmd, cwd=cwd, env=env, check=True)
|
||||
except Exception as e:
|
||||
if VERBOSE_LEVEL.get() >= 20:
|
||||
output(str(e), style='red')
|
||||
raise typer.Exit(1)
|
||||
|
||||
|
||||
async def stream_command_output(stream: asyncio.streams.StreamReader | None, style: str = 'gray') -> None:
|
||||
if stream:
|
||||
async for line in stream:
|
||||
output(line.decode(), style=style, end='')
|
||||
async def stream_command_output(
|
||||
stream: asyncio.streams.StreamReader | None, style: str = 'gray'
|
||||
) -> None:
|
||||
if stream:
|
||||
async for line in stream:
|
||||
output(line.decode(), style=style, end='')
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def async_run_command(
|
||||
cmd: list[str],
|
||||
cwd: str | None = None,
|
||||
env: EnvVars | None = None,
|
||||
copy_env: bool = True,
|
||||
venv: pathlib.Path | None = None,
|
||||
silent: bool = True,
|
||||
cmd: list[str],
|
||||
cwd: str | None = None,
|
||||
env: EnvVars | None = None,
|
||||
copy_env: bool = True,
|
||||
venv: pathlib.Path | None = None,
|
||||
silent: bool = True,
|
||||
) -> typing.AsyncGenerator[asyncio.subprocess.Process]:
|
||||
env = env or EnvVars({})
|
||||
cmd = [str(c) for c in cmd]
|
||||
|
||||
if not silent:
|
||||
output('\n')
|
||||
if cwd:
|
||||
output(f'$ cd {cwd}', style='orange')
|
||||
if env:
|
||||
for k, v in env.items():
|
||||
output(f'$ export {k}={shlex.quote(v)}', style='orange')
|
||||
if venv:
|
||||
output(f'$ source {venv / "bin" / "activate"}', style='orange')
|
||||
output(f'$ {" ".join(cmd)}', style='orange')
|
||||
env = env or EnvVars({})
|
||||
cmd = [str(c) for c in cmd]
|
||||
|
||||
if not silent:
|
||||
output('\n')
|
||||
if cwd:
|
||||
output(f'$ cd {cwd}', style='orange')
|
||||
if env:
|
||||
for k, v in env.items():
|
||||
output(f'$ export {k}={shlex.quote(v)}', style='orange')
|
||||
if venv:
|
||||
py = venv / 'bin' / 'python'
|
||||
else:
|
||||
py = pathlib.Path(sys.executable)
|
||||
output(f'$ source {venv / "bin" / "activate"}', style='orange')
|
||||
output(f'$ {" ".join(cmd)}', style='orange')
|
||||
|
||||
if copy_env:
|
||||
env = EnvVars({**os.environ, **env})
|
||||
if venv:
|
||||
py = venv / 'bin' / 'python'
|
||||
else:
|
||||
py = pathlib.Path(sys.executable)
|
||||
|
||||
if cmd and cmd[0] == 'bentoml':
|
||||
cmd = [py.__fspath__(), '-m', 'bentoml'] + cmd[1:]
|
||||
if cmd and cmd[0] == 'python':
|
||||
cmd = [py.__fspath__()] + cmd[1:]
|
||||
if copy_env:
|
||||
env = EnvVars({**os.environ, **env})
|
||||
|
||||
proc = None
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_shell(
|
||||
' '.join(map(str, cmd)), stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, cwd=cwd, env=env
|
||||
)
|
||||
yield proc
|
||||
except subprocess.CalledProcessError:
|
||||
output('Command failed', style='red')
|
||||
raise typer.Exit(1)
|
||||
finally:
|
||||
if proc:
|
||||
proc.send_signal(signal.SIGINT)
|
||||
await proc.wait()
|
||||
if cmd and cmd[0] == 'bentoml':
|
||||
cmd = [py.__fspath__(), '-m', 'bentoml', *cmd[1:]]
|
||||
if cmd and cmd[0] == 'python':
|
||||
cmd = [py.__fspath__(), *cmd[1:]]
|
||||
|
||||
proc = None
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_shell(
|
||||
' '.join(map(str, cmd)),
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
cwd=cwd,
|
||||
env=env,
|
||||
)
|
||||
yield proc
|
||||
except subprocess.CalledProcessError:
|
||||
output('Command failed', style='red')
|
||||
raise typer.Exit(1)
|
||||
finally:
|
||||
if proc:
|
||||
proc.send_signal(signal.SIGINT)
|
||||
await proc.wait()
|
||||
|
||||
|
||||
def md5(*strings: str) -> int:
|
||||
m = hashlib.md5()
|
||||
for s in strings:
|
||||
m.update(s.encode())
|
||||
return int(m.hexdigest(), 16)
|
||||
m = hashlib.md5()
|
||||
for s in strings:
|
||||
m.update(s.encode())
|
||||
return int(m.hexdigest(), 16)
|
||||
|
||||
@@ -4,103 +4,114 @@ import asyncio, time, typing
|
||||
import httpx, openai
|
||||
|
||||
from openai.types.chat import ChatCompletionAssistantMessageParam, ChatCompletionUserMessageParam
|
||||
from openllm.common import BentoInfo, EnvVars, async_run_command, output, run_command, stream_command_output
|
||||
from openllm.common import (
|
||||
BentoInfo,
|
||||
EnvVars,
|
||||
async_run_command,
|
||||
output,
|
||||
run_command,
|
||||
stream_command_output,
|
||||
)
|
||||
from openllm.venv import ensure_venv
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from openai.types.chat import ChatCompletionMessageParam
|
||||
from openai.types.chat import ChatCompletionMessageParam
|
||||
|
||||
|
||||
def prep_env_vars(bento: BentoInfo) -> None:
|
||||
import os
|
||||
import os
|
||||
|
||||
env_vars = bento.envs
|
||||
for env_var in env_vars:
|
||||
if not env_var.get('value'):
|
||||
continue
|
||||
key = env_var['name']
|
||||
value = env_var['value']
|
||||
os.environ[key] = value
|
||||
env_vars = bento.envs
|
||||
for env_var in env_vars:
|
||||
if not env_var.get('value'):
|
||||
continue
|
||||
key = env_var['name']
|
||||
value = env_var['value']
|
||||
os.environ[key] = value
|
||||
|
||||
|
||||
def _get_serve_cmd(bento: BentoInfo, port: int = 3000) -> tuple[list[str], EnvVars]:
|
||||
cmd = ['bentoml', 'serve', bento.bentoml_tag]
|
||||
if port != 3000:
|
||||
cmd += ['--port', str(port)]
|
||||
return cmd, EnvVars({'BENTOML_HOME': f'{bento.repo.path}/bentoml'})
|
||||
cmd = ['bentoml', 'serve', bento.bentoml_tag]
|
||||
if port != 3000:
|
||||
cmd += ['--port', str(port)]
|
||||
return cmd, EnvVars({'BENTOML_HOME': f'{bento.repo.path}/bentoml'})
|
||||
|
||||
|
||||
def serve(bento: BentoInfo, port: int = 3000) -> None:
|
||||
prep_env_vars(bento)
|
||||
cmd, env = _get_serve_cmd(bento, port=port)
|
||||
venv = ensure_venv(bento, runtime_envs=env)
|
||||
output(f'Access the Chat UI at http://localhost:{port}/chat (or with you IP)')
|
||||
run_command(cmd, env=env, cwd=None, venv=venv)
|
||||
prep_env_vars(bento)
|
||||
cmd, env = _get_serve_cmd(bento, port=port)
|
||||
venv = ensure_venv(bento, runtime_envs=env)
|
||||
output(f'Access the Chat UI at http://localhost:{port}/chat (or with you IP)')
|
||||
run_command(cmd, env=env, cwd=None, venv=venv)
|
||||
|
||||
|
||||
async def _run_model(bento: BentoInfo, port: int = 3000, timeout: int = 600) -> None:
|
||||
cmd, env = _get_serve_cmd(bento, port)
|
||||
venv = ensure_venv(bento, runtime_envs=env)
|
||||
async with async_run_command(cmd, env=env, cwd=None, venv=venv, silent=False) as server_proc:
|
||||
output(f'Model server started {server_proc.pid}')
|
||||
cmd, env = _get_serve_cmd(bento, port)
|
||||
venv = ensure_venv(bento, runtime_envs=env)
|
||||
async with async_run_command(cmd, env=env, cwd=None, venv=venv, silent=False) as server_proc:
|
||||
output(f'Model server started {server_proc.pid}')
|
||||
|
||||
stdout_streamer = None
|
||||
stderr_streamer = None
|
||||
start_time = time.time()
|
||||
stdout_streamer = None
|
||||
stderr_streamer = None
|
||||
start_time = time.time()
|
||||
|
||||
output('Model loading...', style='green')
|
||||
for _ in range(timeout):
|
||||
try:
|
||||
resp = httpx.get(f'http://localhost:{port}/readyz', timeout=3)
|
||||
if resp.status_code == 200:
|
||||
break
|
||||
except httpx.RequestError:
|
||||
if time.time() - start_time > 30:
|
||||
if not stdout_streamer:
|
||||
stdout_streamer = asyncio.create_task(stream_command_output(server_proc.stdout, style='gray'))
|
||||
if not stderr_streamer:
|
||||
stderr_streamer = asyncio.create_task(
|
||||
stream_command_output(server_proc.stderr, style='#BD2D0F')
|
||||
)
|
||||
await asyncio.sleep(1)
|
||||
else:
|
||||
output('Model failed to load', style='red')
|
||||
server_proc.terminate()
|
||||
return
|
||||
output('Model loading...', style='green')
|
||||
for _ in range(timeout):
|
||||
try:
|
||||
resp = httpx.get(f'http://localhost:{port}/readyz', timeout=3)
|
||||
if resp.status_code == 200:
|
||||
break
|
||||
except httpx.RequestError:
|
||||
if time.time() - start_time > 30:
|
||||
if not stdout_streamer:
|
||||
stdout_streamer = asyncio.create_task(
|
||||
stream_command_output(server_proc.stdout, style='gray')
|
||||
)
|
||||
if not stderr_streamer:
|
||||
stderr_streamer = asyncio.create_task(
|
||||
stream_command_output(server_proc.stderr, style='#BD2D0F')
|
||||
)
|
||||
await asyncio.sleep(1)
|
||||
else:
|
||||
output('Model failed to load', style='red')
|
||||
server_proc.terminate()
|
||||
return
|
||||
|
||||
if stdout_streamer:
|
||||
stdout_streamer.cancel()
|
||||
if stderr_streamer:
|
||||
stderr_streamer.cancel()
|
||||
if stdout_streamer:
|
||||
stdout_streamer.cancel()
|
||||
if stderr_streamer:
|
||||
stderr_streamer.cancel()
|
||||
|
||||
output('Model is ready', style='green')
|
||||
messages: list[ChatCompletionMessageParam] = []
|
||||
output('Model is ready', style='green')
|
||||
messages: list[ChatCompletionMessageParam] = []
|
||||
|
||||
client = openai.AsyncOpenAI(base_url=f'http://localhost:{port}/v1', api_key='local')
|
||||
while True:
|
||||
try:
|
||||
message = input('user: ')
|
||||
if message == '':
|
||||
output('empty message, please enter something', style='yellow')
|
||||
continue
|
||||
messages.append(ChatCompletionUserMessageParam(role='user', content=message))
|
||||
output('assistant: ', end='', style='lightgreen')
|
||||
assistant_message = ''
|
||||
stream = await client.chat.completions.create(
|
||||
model=(await client.models.list()).data[0].id, messages=messages, stream=True
|
||||
)
|
||||
async for chunk in stream:
|
||||
text = chunk.choices[0].delta.content or ''
|
||||
assistant_message += text
|
||||
output(text, end='', style='lightgreen')
|
||||
messages.append(ChatCompletionAssistantMessageParam(role='assistant', content=assistant_message))
|
||||
output('')
|
||||
except KeyboardInterrupt:
|
||||
break
|
||||
output('\nStopping model server...', style='green')
|
||||
output('Stopped model server', style='green')
|
||||
client = openai.AsyncOpenAI(base_url=f'http://localhost:{port}/v1', api_key='local')
|
||||
while True:
|
||||
try:
|
||||
message = input('user: ')
|
||||
if message == '':
|
||||
output('empty message, please enter something', style='yellow')
|
||||
continue
|
||||
messages.append(ChatCompletionUserMessageParam(role='user', content=message))
|
||||
output('assistant: ', end='', style='lightgreen')
|
||||
assistant_message = ''
|
||||
stream = await client.chat.completions.create(
|
||||
model=(await client.models.list()).data[0].id, messages=messages, stream=True
|
||||
)
|
||||
async for chunk in stream:
|
||||
text = chunk.choices[0].delta.content or ''
|
||||
assistant_message += text
|
||||
output(text, end='', style='lightgreen')
|
||||
messages.append(
|
||||
ChatCompletionAssistantMessageParam(role='assistant', content=assistant_message)
|
||||
)
|
||||
output('')
|
||||
except KeyboardInterrupt:
|
||||
break
|
||||
output('\nStopping model server...', style='green')
|
||||
output('Stopped model server', style='green')
|
||||
|
||||
|
||||
def run(bento: BentoInfo, port: int = 3000, timeout: int = 600) -> None:
|
||||
prep_env_vars(bento)
|
||||
asyncio.run(_run_model(bento, port=port, timeout=timeout))
|
||||
prep_env_vars(bento)
|
||||
asyncio.run(_run_model(bento, port=port, timeout=timeout))
|
||||
|
||||
@@ -14,155 +14,159 @@ app = OpenLLMTyper(help='manage models')
|
||||
|
||||
@app.command(help='get model')
|
||||
def get(tag: str, repo: typing.Optional[str] = None, verbose: bool = False) -> None:
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
bento_info = ensure_bento(tag, repo_name=repo)
|
||||
if bento_info:
|
||||
output_(bento_info)
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
bento_info = ensure_bento(tag, repo_name=repo)
|
||||
if bento_info:
|
||||
output_(bento_info)
|
||||
|
||||
|
||||
@app.command(name='list', help='list available models')
|
||||
def list_model(
|
||||
tag: typing.Optional[str] = None,
|
||||
repo: typing.Optional[str] = None,
|
||||
verbose: bool = False,
|
||||
output: typing.Optional[str] = typer.Option(None, hidden=True),
|
||||
tag: typing.Optional[str] = None,
|
||||
repo: typing.Optional[str] = None,
|
||||
verbose: bool = False,
|
||||
output: typing.Optional[str] = typer.Option(None, hidden=True),
|
||||
) -> None:
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
|
||||
bentos = list_bento(tag=tag, repo_name=repo)
|
||||
bentos.sort(key=lambda x: x.name)
|
||||
bentos = list_bento(tag=tag, repo_name=repo)
|
||||
bentos.sort(key=lambda x: x.name)
|
||||
|
||||
seen = set()
|
||||
seen = set()
|
||||
|
||||
def is_seen(value: str) -> bool:
|
||||
if value in seen:
|
||||
return True
|
||||
seen.add(value)
|
||||
return False
|
||||
def is_seen(value: str) -> bool:
|
||||
if value in seen:
|
||||
return True
|
||||
seen.add(value)
|
||||
return False
|
||||
|
||||
if output == 'readme':
|
||||
# Parse parameters from bento.tag (e.g. "model:671b-it" -> "671b", 'model:something-long-78b' -> '78b')
|
||||
questionary.print(
|
||||
json.dumps({
|
||||
f'{bento.name}': dict(
|
||||
tag=bento.tag,
|
||||
version=bento.tag.split(':')[-1],
|
||||
pretty_gpu=bento.pretty_gpu,
|
||||
command=f'openllm serve {bento.tag}',
|
||||
)
|
||||
for bento in bentos
|
||||
if not is_seen(bento.name)
|
||||
})
|
||||
if output == 'readme':
|
||||
# Parse parameters from bento.tag (e.g. "model:671b-it" -> "671b", 'model:something-long-78b' -> '78b')
|
||||
questionary.print(
|
||||
json.dumps({
|
||||
f'{bento.name}': dict(
|
||||
tag=bento.tag,
|
||||
version=bento.tag.split(':')[-1],
|
||||
pretty_gpu=bento.pretty_gpu,
|
||||
command=f'openllm serve {bento.tag}',
|
||||
)
|
||||
return
|
||||
|
||||
table = tabulate.tabulate(
|
||||
[
|
||||
[
|
||||
'' if is_seen(bento.name) else bento.name,
|
||||
bento.tag,
|
||||
bento.repo.name,
|
||||
bento.pretty_gpu,
|
||||
','.join(bento.platforms),
|
||||
]
|
||||
for bento in bentos
|
||||
],
|
||||
headers=['model', 'version', 'repo', 'required GPU RAM', 'platforms'],
|
||||
for bento in bentos
|
||||
if not is_seen(bento.name)
|
||||
})
|
||||
)
|
||||
output_(table)
|
||||
return
|
||||
|
||||
table = tabulate.tabulate(
|
||||
[
|
||||
[
|
||||
'' if is_seen(bento.name) else bento.name,
|
||||
bento.tag,
|
||||
bento.repo.name,
|
||||
bento.pretty_gpu,
|
||||
','.join(bento.platforms),
|
||||
]
|
||||
for bento in bentos
|
||||
],
|
||||
headers=['model', 'version', 'repo', 'required GPU RAM', 'platforms'],
|
||||
)
|
||||
output_(table)
|
||||
|
||||
|
||||
def ensure_bento(
|
||||
model: str, target: typing.Optional[DeploymentTarget] = None, repo_name: typing.Optional[str] = None
|
||||
model: str,
|
||||
target: typing.Optional[DeploymentTarget] = None,
|
||||
repo_name: typing.Optional[str] = None,
|
||||
) -> BentoInfo:
|
||||
bentos = list_bento(model, repo_name=repo_name)
|
||||
if len(bentos) == 0:
|
||||
output_(f'No model found for {model}', style='red')
|
||||
raise typer.Exit(1)
|
||||
|
||||
if len(bentos) == 1:
|
||||
output_(f'Found model {bentos[0]}', style='green')
|
||||
if target is not None and can_run(bentos[0], target) <= 0:
|
||||
output_(
|
||||
f'The machine({target.name}) with {target.accelerators_repr} does not appear to have sufficient '
|
||||
f'resources to run model {bentos[0]}\n',
|
||||
style='yellow',
|
||||
)
|
||||
return bentos[0]
|
||||
|
||||
# multiple models, pick one according to target
|
||||
output_(f'Multiple models match {model}, did you mean one of these?', style='red')
|
||||
list_model(model, repo=repo_name)
|
||||
bentos = list_bento(model, repo_name=repo_name)
|
||||
if len(bentos) == 0:
|
||||
output_(f'No model found for {model}', style='red')
|
||||
raise typer.Exit(1)
|
||||
|
||||
if len(bentos) == 1:
|
||||
output_(f'Found model {bentos[0]}', style='green')
|
||||
if target is not None and can_run(bentos[0], target) <= 0:
|
||||
output_(
|
||||
f'The machine({target.name}) with {target.accelerators_repr} does not appear to have sufficient '
|
||||
f'resources to run model {bentos[0]}\n',
|
||||
style='yellow',
|
||||
)
|
||||
return bentos[0]
|
||||
|
||||
# multiple models, pick one according to target
|
||||
output_(f'Multiple models match {model}, did you mean one of these?', style='red')
|
||||
list_model(model, repo=repo_name)
|
||||
raise typer.Exit(1)
|
||||
|
||||
|
||||
NUMBER_RE = re.compile(r'\d+')
|
||||
|
||||
|
||||
def _extract_first_number(s: str) -> int:
|
||||
match = NUMBER_RE.search(s)
|
||||
if match:
|
||||
return int(match.group())
|
||||
else:
|
||||
return 100
|
||||
match = NUMBER_RE.search(s)
|
||||
if match:
|
||||
return int(match.group())
|
||||
else:
|
||||
return 100
|
||||
|
||||
|
||||
def list_bento(
|
||||
tag: typing.Optional[str] = None, repo_name: typing.Optional[str] = None, include_alias: bool = False
|
||||
tag: typing.Optional[str] = None,
|
||||
repo_name: typing.Optional[str] = None,
|
||||
include_alias: bool = False,
|
||||
) -> typing.List[BentoInfo]:
|
||||
ensure_repo_updated()
|
||||
ensure_repo_updated()
|
||||
|
||||
if repo_name is None and tag and '/' in tag:
|
||||
repo_name, tag = tag.split('/', 1)
|
||||
if repo_name is None and tag and '/' in tag:
|
||||
repo_name, tag = tag.split('/', 1)
|
||||
|
||||
repo_list = list_repo(repo_name)
|
||||
if repo_name is not None:
|
||||
repo_map = {repo.name: repo for repo in repo_list}
|
||||
if repo_name not in repo_map:
|
||||
output_(f'Repo `{repo_name}` not found, did you mean one of these?')
|
||||
for repo_name in repo_map:
|
||||
output_(f' {repo_name}')
|
||||
raise typer.Exit(1)
|
||||
repo_list = list_repo(repo_name)
|
||||
if repo_name is not None:
|
||||
repo_map = {repo.name: repo for repo in repo_list}
|
||||
if repo_name not in repo_map:
|
||||
output_(f'Repo `{repo_name}` not found, did you mean one of these?')
|
||||
for repo_name in repo_map:
|
||||
output_(f' {repo_name}')
|
||||
raise typer.Exit(1)
|
||||
|
||||
if not tag:
|
||||
glob_pattern = 'bentoml/bentos/*/*'
|
||||
elif ':' in tag:
|
||||
bento_name, version = tag.split(':')
|
||||
glob_pattern = f'bentoml/bentos/{bento_name}/{version}'
|
||||
else:
|
||||
glob_pattern = f'bentoml/bentos/{tag}/*'
|
||||
if not tag:
|
||||
glob_pattern = 'bentoml/bentos/*/*'
|
||||
elif ':' in tag:
|
||||
bento_name, version = tag.split(':')
|
||||
glob_pattern = f'bentoml/bentos/{bento_name}/{version}'
|
||||
else:
|
||||
glob_pattern = f'bentoml/bentos/{tag}/*'
|
||||
|
||||
model_list: list[BentoInfo] = []
|
||||
repo_list = list_repo(repo_name)
|
||||
for repo in repo_list:
|
||||
paths = sorted(
|
||||
repo.path.glob(glob_pattern),
|
||||
key=lambda x: (x.parent.name, _extract_first_number(x.name), len(x.name), x.name),
|
||||
)
|
||||
for path in paths:
|
||||
if path.is_dir() and (path / 'bento.yaml').exists():
|
||||
model = BentoInfo(repo=repo, path=path)
|
||||
elif path.is_file():
|
||||
with open(path) as f:
|
||||
origin_name = f.read().strip()
|
||||
origin_path = path.parent / origin_name
|
||||
model = BentoInfo(alias=path.name, repo=repo, path=origin_path)
|
||||
else:
|
||||
model = None
|
||||
if model:
|
||||
model_list.append(model)
|
||||
model_list: list[BentoInfo] = []
|
||||
repo_list = list_repo(repo_name)
|
||||
for repo in repo_list:
|
||||
paths = sorted(
|
||||
repo.path.glob(glob_pattern),
|
||||
key=lambda x: (x.parent.name, _extract_first_number(x.name), len(x.name), x.name),
|
||||
)
|
||||
for path in paths:
|
||||
if path.is_dir() and (path / 'bento.yaml').exists():
|
||||
model = BentoInfo(repo=repo, path=path)
|
||||
elif path.is_file():
|
||||
with open(path) as f:
|
||||
origin_name = f.read().strip()
|
||||
origin_path = path.parent / origin_name
|
||||
model = BentoInfo(alias=path.name, repo=repo, path=origin_path)
|
||||
else:
|
||||
model = None
|
||||
if model:
|
||||
model_list.append(model)
|
||||
|
||||
if not include_alias:
|
||||
seen: set[str] = set()
|
||||
# we are calling side-effect in seen here.
|
||||
model_list = [
|
||||
x
|
||||
for x in model_list
|
||||
if not (
|
||||
f'{x.bento_yaml["name"]}:{x.bento_yaml["version"]}' in seen
|
||||
or seen.add(f'{x.bento_yaml["name"]}:{x.bento_yaml["version"]}') # type: ignore
|
||||
)
|
||||
]
|
||||
return model_list
|
||||
if not include_alias:
|
||||
seen: set[str] = set()
|
||||
# we are calling side-effect in seen here.
|
||||
model_list = [
|
||||
x
|
||||
for x in model_list
|
||||
if not (
|
||||
f'{x.bento_yaml["name"]}:{x.bento_yaml["version"]}' in seen
|
||||
or seen.add(f'{x.bento_yaml["name"]}:{x.bento_yaml["version"]}') # type: ignore
|
||||
)
|
||||
]
|
||||
return model_list
|
||||
|
||||
@@ -4,7 +4,15 @@ import datetime, subprocess, re, shutil, typing, os, pathlib
|
||||
import pyaml, questionary, typer
|
||||
|
||||
from openllm.analytic import OpenLLMTyper
|
||||
from openllm.common import INTERACTIVE, REPO_DIR, VERBOSE_LEVEL, RepoInfo, load_config, output, save_config
|
||||
from openllm.common import (
|
||||
INTERACTIVE,
|
||||
REPO_DIR,
|
||||
VERBOSE_LEVEL,
|
||||
RepoInfo,
|
||||
load_config,
|
||||
output,
|
||||
save_config,
|
||||
)
|
||||
|
||||
UPDATE_INTERVAL = datetime.timedelta(days=3)
|
||||
TEST_REPO = os.getenv('OPENLLM_TEST_REPO', None) # for testing
|
||||
@@ -15,223 +23,248 @@ app = OpenLLMTyper(help='manage repos')
|
||||
|
||||
@app.command(name='list', help='list available repo')
|
||||
def cmd_list(verbose: bool = False) -> None:
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
pyaml.pprint(list_repo(), sort_dicts=False, sort_keys=False)
|
||||
if verbose:
|
||||
VERBOSE_LEVEL.set(20)
|
||||
pyaml.pprint(list_repo(), sort_dicts=False, sort_keys=False)
|
||||
|
||||
|
||||
@app.command(name='remove', help='remove given repo')
|
||||
def cmd_remove(name: str) -> None:
|
||||
if TEST_REPO:
|
||||
return
|
||||
config = load_config()
|
||||
if name not in config.repos:
|
||||
output(f'Repo {name} does not exist', style='red')
|
||||
return
|
||||
if TEST_REPO:
|
||||
return
|
||||
config = load_config()
|
||||
if name not in config.repos:
|
||||
output(f'Repo {name} does not exist', style='red')
|
||||
return
|
||||
|
||||
del config.repos[name]
|
||||
save_config(config)
|
||||
output(f'Repo {name} removed', style='green')
|
||||
del config.repos[name]
|
||||
save_config(config)
|
||||
output(f'Repo {name} removed', style='green')
|
||||
|
||||
|
||||
@app.command(name='update', help='update default repo')
|
||||
def cmd_update() -> None:
|
||||
if TEST_REPO:
|
||||
return
|
||||
repos_in_use = set()
|
||||
for repo in list_repo():
|
||||
repos_in_use.add((repo.server, repo.owner, repo.repo, repo.branch))
|
||||
if repo.path.exists():
|
||||
shutil.rmtree(repo.path, ignore_errors=True)
|
||||
repo.path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
_clone_repo(repo)
|
||||
output('')
|
||||
output(f'Repo `{repo.name}` updated', style='green')
|
||||
except Exception as e:
|
||||
shutil.rmtree(repo.path, ignore_errors=True)
|
||||
output(f'Failed to clone repo {repo.name}', style='red')
|
||||
output(e)
|
||||
for c in REPO_DIR.glob('*/*/*/*'):
|
||||
repo_spec = tuple(c.parts[-4:])
|
||||
if repo_spec not in repos_in_use:
|
||||
shutil.rmtree(c, ignore_errors=True)
|
||||
output(f'Removed unused repo cache {c}')
|
||||
with open(REPO_DIR / 'last_update', 'w') as f:
|
||||
f.write(datetime.datetime.now().isoformat())
|
||||
for repo in list_repo():
|
||||
_complete_alias(repo.name)
|
||||
if TEST_REPO:
|
||||
return
|
||||
|
||||
repos_in_use = set()
|
||||
for repo in list_repo():
|
||||
# Show simplified output if not in verbose mode
|
||||
if VERBOSE_LEVEL.get() <= 0:
|
||||
output(f'updating repo {repo.name}', style='green')
|
||||
|
||||
repos_in_use.add((repo.server, repo.owner, repo.repo, repo.branch))
|
||||
if repo.path.exists():
|
||||
shutil.rmtree(repo.path, ignore_errors=True)
|
||||
repo.path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
_clone_repo(repo)
|
||||
if VERBOSE_LEVEL.get() > 0:
|
||||
output('')
|
||||
output(f'Repo `{repo.name}` updated', style='green')
|
||||
except Exception as e:
|
||||
shutil.rmtree(repo.path, ignore_errors=True)
|
||||
if VERBOSE_LEVEL.get() > 0:
|
||||
output(f'Failed to clone repo {repo.name}', style='red')
|
||||
output(e)
|
||||
for c in REPO_DIR.glob('*/*/*/*'):
|
||||
repo_spec = tuple(c.parts[-4:])
|
||||
if repo_spec not in repos_in_use:
|
||||
shutil.rmtree(c, ignore_errors=True)
|
||||
if VERBOSE_LEVEL.get() > 0:
|
||||
output(f'Removed unused repo cache {c}')
|
||||
with open(REPO_DIR / 'last_update', 'w') as f:
|
||||
f.write(datetime.datetime.now().isoformat())
|
||||
for repo in list_repo():
|
||||
_complete_alias(repo.name)
|
||||
|
||||
|
||||
@app.command(name='add', help='add new repo')
|
||||
def cmd_add(name: str, repo: str) -> None:
|
||||
if TEST_REPO:
|
||||
return
|
||||
name = name.lower()
|
||||
if not name.isidentifier():
|
||||
output(f'Invalid repo name: {name}, should only contain letters, numbers and underscores', style='red')
|
||||
return
|
||||
if TEST_REPO:
|
||||
return
|
||||
name = name.lower()
|
||||
if not name.isidentifier():
|
||||
output(
|
||||
f'Invalid repo name: {name}, should only contain letters, numbers and underscores',
|
||||
style='red',
|
||||
)
|
||||
return
|
||||
|
||||
try:
|
||||
parse_repo_url(repo)
|
||||
except ValueError:
|
||||
output(f'Invalid repo url: {repo}', style='red')
|
||||
return
|
||||
try:
|
||||
parse_repo_url(repo)
|
||||
except ValueError:
|
||||
output(f'Invalid repo url: {repo}', style='red')
|
||||
return
|
||||
|
||||
config = load_config()
|
||||
if name in config.repos:
|
||||
override = questionary.confirm(f'Repo {name} already exists({config.repos[name]}), override?').ask()
|
||||
if not override:
|
||||
return
|
||||
config = load_config()
|
||||
if name in config.repos:
|
||||
override = questionary.confirm(
|
||||
f'Repo {name} already exists({config.repos[name]}), override?'
|
||||
).ask()
|
||||
if not override:
|
||||
return
|
||||
|
||||
config.repos[name] = repo
|
||||
save_config(config)
|
||||
output(f'Repo {name} added', style='green')
|
||||
config.repos[name] = repo
|
||||
save_config(config)
|
||||
output(f'Repo {name} added', style='green')
|
||||
|
||||
|
||||
@app.command(name='default', help='get default repo path')
|
||||
def default() -> typing.Optional[pathlib.Path]:
|
||||
if TEST_REPO:
|
||||
return None
|
||||
output((info := parse_repo_url(load_config().repos['default'], 'default')).path)
|
||||
return info.path
|
||||
if TEST_REPO:
|
||||
return None
|
||||
output((info := parse_repo_url(load_config().repos['default'], 'default')).path)
|
||||
return info.path
|
||||
|
||||
|
||||
def list_repo(repo_name: typing.Optional[str] = None) -> typing.List[RepoInfo]:
|
||||
if TEST_REPO:
|
||||
return [
|
||||
RepoInfo(
|
||||
name='default',
|
||||
url='',
|
||||
server='test',
|
||||
owner='test',
|
||||
repo='test',
|
||||
branch='main',
|
||||
path=pathlib.Path(TEST_REPO),
|
||||
)
|
||||
]
|
||||
config = load_config()
|
||||
repos = []
|
||||
for _repo_name, repo_url in config.repos.items():
|
||||
if repo_name is not None and _repo_name != repo_name:
|
||||
continue
|
||||
repo = parse_repo_url(repo_url, _repo_name)
|
||||
repos.append(repo)
|
||||
return repos
|
||||
if TEST_REPO:
|
||||
return [
|
||||
RepoInfo(
|
||||
name='default',
|
||||
url='',
|
||||
server='test',
|
||||
owner='test',
|
||||
repo='test',
|
||||
branch='main',
|
||||
path=pathlib.Path(TEST_REPO),
|
||||
)
|
||||
]
|
||||
config = load_config()
|
||||
repos = []
|
||||
for _repo_name, repo_url in config.repos.items():
|
||||
if repo_name is not None and _repo_name != repo_name:
|
||||
continue
|
||||
repo = parse_repo_url(repo_url, _repo_name)
|
||||
repos.append(repo)
|
||||
return repos
|
||||
|
||||
|
||||
def _complete_alias(repo_name: str) -> None:
|
||||
from openllm.model import list_bento
|
||||
from openllm.model import list_bento
|
||||
|
||||
for bento in list_bento(repo_name=repo_name):
|
||||
alias = bento.labels.get('aliases', '').strip()
|
||||
if alias:
|
||||
for a in alias.split(','):
|
||||
with open(bento.path.parent / a, 'w') as f:
|
||||
f.write(bento.version)
|
||||
for bento in list_bento(repo_name=repo_name):
|
||||
alias = bento.labels.get('aliases', '').strip()
|
||||
if alias:
|
||||
for a in alias.split(','):
|
||||
with open(bento.path.parent / a, 'w') as f:
|
||||
f.write(bento.version)
|
||||
|
||||
|
||||
def _clone_repo(repo: RepoInfo) -> None:
|
||||
try:
|
||||
subprocess.run(['git', 'clone', '--depth=1', '-b', repo.branch, repo.url, str(repo.path)], check=True)
|
||||
except (subprocess.CalledProcessError, FileNotFoundError):
|
||||
import dulwich
|
||||
import dulwich.porcelain
|
||||
try:
|
||||
# Suppress output if verbosity level is low
|
||||
if VERBOSE_LEVEL.get() <= 0:
|
||||
subprocess.run(
|
||||
['git', 'clone', '--depth=1', '-b', repo.branch, repo.url, str(repo.path)],
|
||||
check=True,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
else:
|
||||
subprocess.run(
|
||||
['git', 'clone', '--depth=1', '-b', repo.branch, repo.url, str(repo.path)], check=True
|
||||
)
|
||||
except (subprocess.CalledProcessError, FileNotFoundError):
|
||||
import dulwich
|
||||
import dulwich.porcelain
|
||||
|
||||
dulwich.porcelain.clone(repo.url, str(repo.path), checkout=True, depth=1, branch=repo.branch)
|
||||
# Dulwich doesn't have easy output suppression, but we rarely get here
|
||||
dulwich.porcelain.clone(repo.url, str(repo.path), checkout=True, depth=1, branch=repo.branch)
|
||||
|
||||
|
||||
def ensure_repo_updated() -> None:
|
||||
if TEST_REPO:
|
||||
return
|
||||
last_update_file = REPO_DIR / 'last_update'
|
||||
if not last_update_file.exists():
|
||||
if INTERACTIVE.get():
|
||||
choice = questionary.confirm(
|
||||
'The repo cache is never updated, do you want to update it to fetch the latest model list?'
|
||||
).ask()
|
||||
if choice:
|
||||
cmd_update()
|
||||
return
|
||||
else:
|
||||
output(
|
||||
'The repo cache is never updated, please run `openllm repo update` to fetch the latest model list',
|
||||
style='red',
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
last_update = datetime.datetime.fromisoformat(last_update_file.read_text().strip())
|
||||
if datetime.datetime.now() - last_update > UPDATE_INTERVAL:
|
||||
if INTERACTIVE.get():
|
||||
choice = questionary.confirm(
|
||||
'The repo cache is outdated, do you want to update it to fetch the latest model list?'
|
||||
).ask()
|
||||
if choice:
|
||||
cmd_update()
|
||||
else:
|
||||
output(
|
||||
'The repo cache is outdated, please run `openllm repo update` to fetch the latest model list',
|
||||
style='yellow',
|
||||
)
|
||||
if TEST_REPO:
|
||||
return
|
||||
last_update_file = REPO_DIR / 'last_update'
|
||||
if not last_update_file.exists():
|
||||
if INTERACTIVE.get():
|
||||
choice = questionary.confirm(
|
||||
'The repo cache is never updated, do you want to update it to fetch the latest model list?'
|
||||
).ask()
|
||||
if choice:
|
||||
cmd_update()
|
||||
return
|
||||
else:
|
||||
output(
|
||||
'The repo cache is never updated, please run `openllm repo update` to fetch the latest model list',
|
||||
style='red',
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
last_update = datetime.datetime.fromisoformat(last_update_file.read_text().strip())
|
||||
if datetime.datetime.now() - last_update > UPDATE_INTERVAL:
|
||||
if INTERACTIVE.get():
|
||||
choice = questionary.confirm(
|
||||
'The repo cache is outdated, do you want to update it to fetch the latest model list?'
|
||||
).ask()
|
||||
if choice:
|
||||
cmd_update()
|
||||
else:
|
||||
output(
|
||||
'The repo cache is outdated, please run `openllm repo update` to fetch the latest model list',
|
||||
style='yellow',
|
||||
)
|
||||
|
||||
|
||||
GIT_HTTP_RE = re.compile(
|
||||
r'(?P<schema>git|ssh|http|https):\/\/(?P<server>[\.\w\d\-]+)\/(?P<owner>[\w\d\-]+)\/(?P<repo>[\w\d\-\_\.]+)(@(?P<branch>.+))?(\/)?$'
|
||||
r'(?P<schema>git|ssh|http|https):\/\/(?P<server>[\.\w\d\-]+)\/(?P<owner>[\w\d\-]+)\/(?P<repo>[\w\d\-\_\.]+)(@(?P<branch>.+))?(\/)?$'
|
||||
)
|
||||
GIT_SSH_RE = re.compile(
|
||||
r'git@(?P<server>[\.\w\d-]+):(?P<owner>[\w\d\-]+)\/(?P<repo>[\w\d\-\_\.]+)(@(?P<branch>.+))?(\/)?$'
|
||||
r'git@(?P<server>[\.\w\d-]+):(?P<owner>[\w\d\-]+)\/(?P<repo>[\w\d\-\_\.]+)(@(?P<branch>.+))?(\/)?$'
|
||||
)
|
||||
|
||||
|
||||
def parse_repo_url(repo_url: str, repo_name: typing.Optional[str] = None) -> RepoInfo:
|
||||
"""
|
||||
parse the git repo url to server, owner, repo name, branch
|
||||
>>> parse_repo_url('https://github.com/bentoml/bentovllm@main')
|
||||
('github.com', 'bentoml', 'bentovllm', 'main')
|
||||
"""
|
||||
parse the git repo url to server, owner, repo name, branch
|
||||
>>> parse_repo_url('https://github.com/bentoml/bentovllm@main')
|
||||
('github.com', 'bentoml', 'bentovllm', 'main')
|
||||
|
||||
>>> parse_repo_url('https://github.com/bentoml/bentovllm.git@main')
|
||||
('github.com', 'bentoml', 'bentovllm', 'main')
|
||||
>>> parse_repo_url('https://github.com/bentoml/bentovllm.git@main')
|
||||
('github.com', 'bentoml', 'bentovllm', 'main')
|
||||
|
||||
>>> parse_repo_url('https://github.com/bentoml/bentovllm')
|
||||
('github.com', 'bentoml', 'bentovllm', 'main')
|
||||
>>> parse_repo_url('https://github.com/bentoml/bentovllm')
|
||||
('github.com', 'bentoml', 'bentovllm', 'main')
|
||||
|
||||
>>> parse_repo_url('git@github.com:bentoml/openllm-models.git')
|
||||
('github.com', 'bentoml', 'openllm-models', 'main')
|
||||
"""
|
||||
match = GIT_HTTP_RE.match(repo_url)
|
||||
if match:
|
||||
schema = match.group('schema')
|
||||
else:
|
||||
match = GIT_SSH_RE.match(repo_url)
|
||||
if not match:
|
||||
raise ValueError(f'Invalid git repo url: {repo_url}')
|
||||
schema = None
|
||||
>>> parse_repo_url('git@github.com:bentoml/openllm-models.git')
|
||||
('github.com', 'bentoml', 'openllm-models', 'main')
|
||||
"""
|
||||
match = GIT_HTTP_RE.match(repo_url)
|
||||
if match:
|
||||
schema = match.group('schema')
|
||||
else:
|
||||
match = GIT_SSH_RE.match(repo_url)
|
||||
if not match:
|
||||
raise ValueError(f'Invalid git repo url: {repo_url}')
|
||||
schema = None
|
||||
|
||||
if match.group('branch') is not None:
|
||||
repo_url = repo_url[: match.start('branch') - 1]
|
||||
if match.group('branch') is not None:
|
||||
repo_url = repo_url[: match.start('branch') - 1]
|
||||
|
||||
server = match.group('server')
|
||||
owner = match.group('owner')
|
||||
repo = match.group('repo')
|
||||
if repo.endswith('.git'):
|
||||
repo = repo[:-4]
|
||||
branch = match.group('branch') or 'main'
|
||||
server = match.group('server')
|
||||
owner = match.group('owner')
|
||||
repo = match.group('repo')
|
||||
if repo.endswith('.git'):
|
||||
repo = repo[:-4]
|
||||
branch = match.group('branch') or 'main'
|
||||
|
||||
if schema is not None:
|
||||
repo_url = f'{schema}://{server}/{owner}/{repo}'
|
||||
else:
|
||||
repo_url = f'git@{server}:{owner}/{repo}'
|
||||
if schema is not None:
|
||||
repo_url = f'{schema}://{server}/{owner}/{repo}'
|
||||
else:
|
||||
repo_url = f'git@{server}:{owner}/{repo}'
|
||||
|
||||
path = REPO_DIR / server / owner / repo / branch
|
||||
return RepoInfo(
|
||||
name=repo if repo_name is None else repo_name,
|
||||
url=repo_url,
|
||||
server=server,
|
||||
owner=owner,
|
||||
repo=repo,
|
||||
branch=branch,
|
||||
path=path,
|
||||
)
|
||||
path = REPO_DIR / server / owner / repo / branch
|
||||
return RepoInfo(
|
||||
name=repo if repo_name is None else repo_name,
|
||||
url=repo_url,
|
||||
server=server,
|
||||
owner=owner,
|
||||
repo=repo,
|
||||
branch=branch,
|
||||
path=path,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app()
|
||||
app()
|
||||
|
||||
@@ -3,92 +3,100 @@ from __future__ import annotations
|
||||
import functools, os, pathlib, shutil
|
||||
import typer, yaml
|
||||
|
||||
from openllm.common import VENV_DIR, VERBOSE_LEVEL, BentoInfo, EnvVars, VenvSpec, output, run_command
|
||||
from openllm.common import (
|
||||
VENV_DIR,
|
||||
VERBOSE_LEVEL,
|
||||
BentoInfo,
|
||||
EnvVars,
|
||||
VenvSpec,
|
||||
output,
|
||||
run_command,
|
||||
)
|
||||
|
||||
|
||||
@functools.lru_cache
|
||||
def _resolve_bento_venv_spec(bento: BentoInfo, runtime_envs: EnvVars | None = None) -> VenvSpec:
|
||||
lock_file = bento.path / 'env' / 'python' / 'requirements.lock.txt'
|
||||
if not lock_file.exists():
|
||||
lock_file = bento.path / 'env' / 'python' / 'requirements.txt'
|
||||
lock_file = bento.path / 'env' / 'python' / 'requirements.lock.txt'
|
||||
if not lock_file.exists():
|
||||
lock_file = bento.path / 'env' / 'python' / 'requirements.txt'
|
||||
|
||||
reqs = lock_file.read_text().strip()
|
||||
bentofile = bento.path / 'bento.yaml'
|
||||
data = yaml.safe_load(bentofile.read_text())
|
||||
bento_env_list = data.get('envs', [])
|
||||
python_version = data.get('image', {})['python_version']
|
||||
bento_envs = {e['name']: e.get('value') for e in bento_env_list}
|
||||
envs = {k: runtime_envs.get(k, v) for k, v in bento_envs.items()} if runtime_envs else {}
|
||||
reqs = lock_file.read_text().strip()
|
||||
bentofile = bento.path / 'bento.yaml'
|
||||
data = yaml.safe_load(bentofile.read_text())
|
||||
bento_env_list = data.get('envs', [])
|
||||
python_version = data.get('image', {})['python_version']
|
||||
bento_envs = {e['name']: e.get('value') for e in bento_env_list}
|
||||
envs = {k: runtime_envs.get(k, v) for k, v in bento_envs.items()} if runtime_envs else {}
|
||||
|
||||
return VenvSpec(
|
||||
python_version=python_version,
|
||||
requirements_txt=reqs,
|
||||
name_prefix=f'{bento.tag.replace(":", "_")}-1-',
|
||||
envs=EnvVars(envs),
|
||||
)
|
||||
return VenvSpec(
|
||||
python_version=python_version,
|
||||
requirements_txt=reqs,
|
||||
name_prefix=f'{bento.tag.replace(":", "_")}-1-',
|
||||
envs=EnvVars(envs),
|
||||
)
|
||||
|
||||
|
||||
def _ensure_venv(venv_spec: VenvSpec) -> pathlib.Path:
|
||||
venv = VENV_DIR / str(hash(venv_spec))
|
||||
if venv.exists() and not (venv / 'DONE').exists():
|
||||
shutil.rmtree(venv, ignore_errors=True)
|
||||
if not venv.exists():
|
||||
output(f'Installing model dependencies({venv})...', style='green')
|
||||
venv = VENV_DIR / str(hash(venv_spec))
|
||||
if venv.exists() and not (venv / 'DONE').exists():
|
||||
shutil.rmtree(venv, ignore_errors=True)
|
||||
if not venv.exists():
|
||||
output(f'Installing model dependencies({venv})...', style='green')
|
||||
|
||||
venv_py = venv / 'Scripts' / 'python.exe' if os.name == 'nt' else venv / 'bin' / 'python'
|
||||
try:
|
||||
run_command(
|
||||
['python', '-m', 'uv', 'venv', venv.__fspath__(), '-p', venv_spec.python_version],
|
||||
silent=VERBOSE_LEVEL.get() < 10,
|
||||
)
|
||||
run_command(
|
||||
['python', '-m', 'uv', 'pip', 'install', '-p', str(venv_py), 'bentoml'],
|
||||
silent=VERBOSE_LEVEL.get() < 10,
|
||||
env=venv_spec.envs,
|
||||
)
|
||||
with open(venv / 'requirements.txt', 'w') as f:
|
||||
f.write(venv_spec.normalized_requirements_txt)
|
||||
run_command(
|
||||
[
|
||||
'python',
|
||||
'-m',
|
||||
'uv',
|
||||
'pip',
|
||||
'install',
|
||||
'-p',
|
||||
str(venv_py),
|
||||
'-r',
|
||||
(venv / 'requirements.txt').__fspath__(),
|
||||
],
|
||||
silent=VERBOSE_LEVEL.get() < 10,
|
||||
env=venv_spec.envs,
|
||||
)
|
||||
with open(venv / 'DONE', 'w') as f:
|
||||
f.write('DONE')
|
||||
except Exception as e:
|
||||
shutil.rmtree(venv, ignore_errors=True)
|
||||
if VERBOSE_LEVEL.get() >= 10:
|
||||
output(str(e), style='red')
|
||||
output(f'Failed to install dependencies to {venv}. Cleaned up.', style='red')
|
||||
raise typer.Exit(1)
|
||||
output(f'Successfully installed dependencies to {venv}.', style='green')
|
||||
return venv
|
||||
else:
|
||||
return venv
|
||||
|
||||
|
||||
def ensure_venv(bento: BentoInfo, runtime_envs: EnvVars | None = None) -> pathlib.Path:
|
||||
venv_spec = _resolve_bento_venv_spec(bento, runtime_envs=EnvVars(runtime_envs))
|
||||
venv = _ensure_venv(venv_spec)
|
||||
assert venv is not None
|
||||
venv_py = venv / 'Scripts' / 'python.exe' if os.name == 'nt' else venv / 'bin' / 'python'
|
||||
try:
|
||||
run_command(
|
||||
['python', '-m', 'uv', 'venv', venv.__fspath__(), '-p', venv_spec.python_version],
|
||||
silent=VERBOSE_LEVEL.get() < 10,
|
||||
)
|
||||
run_command(
|
||||
['python', '-m', 'uv', 'pip', 'install', '-p', str(venv_py), 'bentoml'],
|
||||
silent=VERBOSE_LEVEL.get() < 10,
|
||||
env=venv_spec.envs,
|
||||
)
|
||||
with open(venv / 'requirements.txt', 'w') as f:
|
||||
f.write(venv_spec.normalized_requirements_txt)
|
||||
run_command(
|
||||
[
|
||||
'python',
|
||||
'-m',
|
||||
'uv',
|
||||
'pip',
|
||||
'install',
|
||||
'-p',
|
||||
str(venv_py),
|
||||
'-r',
|
||||
(venv / 'requirements.txt').__fspath__(),
|
||||
],
|
||||
silent=VERBOSE_LEVEL.get() < 10,
|
||||
env=venv_spec.envs,
|
||||
)
|
||||
with open(venv / 'DONE', 'w') as f:
|
||||
f.write('DONE')
|
||||
except Exception as e:
|
||||
shutil.rmtree(venv, ignore_errors=True)
|
||||
if VERBOSE_LEVEL.get() >= 10:
|
||||
output(str(e), style='red')
|
||||
output(f'Failed to install dependencies to {venv}. Cleaned up.', style='red')
|
||||
raise typer.Exit(1)
|
||||
output(f'Successfully installed dependencies to {venv}.', style='green')
|
||||
return venv
|
||||
else:
|
||||
return venv
|
||||
|
||||
|
||||
def ensure_venv(bento: BentoInfo, runtime_envs: EnvVars | None = None) -> pathlib.Path:
|
||||
venv_spec = _resolve_bento_venv_spec(bento, runtime_envs=EnvVars(runtime_envs))
|
||||
venv = _ensure_venv(venv_spec)
|
||||
assert venv is not None
|
||||
return venv
|
||||
|
||||
|
||||
def check_venv(bento: BentoInfo) -> bool:
|
||||
venv_spec = _resolve_bento_venv_spec(bento)
|
||||
venv = VENV_DIR / str(hash(venv_spec))
|
||||
if not venv.exists():
|
||||
return False
|
||||
if venv.exists() and not (venv / 'DONE').exists():
|
||||
return False
|
||||
return True
|
||||
venv_spec = _resolve_bento_venv_spec(bento)
|
||||
venv = VENV_DIR / str(hash(venv_spec))
|
||||
if not venv.exists():
|
||||
return False
|
||||
if venv.exists() and not (venv / 'DONE').exists():
|
||||
return False
|
||||
return True
|
||||
|
||||
75
tests/test_cli_flow.py
Normal file
75
tests/test_cli_flow.py
Normal file
@@ -0,0 +1,75 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import sys, typing
|
||||
|
||||
import pytest, pexpect
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pexpect_process() -> typing.Generator[pexpect.spawn[typing.Any], None, None]:
|
||||
child = pexpect.spawn(
|
||||
f'{sys.executable} -m openllm hello', encoding='utf-8', timeout=20, echo=False
|
||||
)
|
||||
try:
|
||||
yield child
|
||||
finally:
|
||||
try:
|
||||
child.sendcontrol('c')
|
||||
child.close(force=True)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
def safe_expect(
|
||||
child: pexpect.spawn, pattern: str, timeout: int = 10, debug_msg: str = 'Expecting pattern'
|
||||
) -> int:
|
||||
try:
|
||||
print(f"\n{debug_msg}: '{pattern}'")
|
||||
index = child.expect(pattern, timeout=timeout)
|
||||
print(f'Found match at index {index}')
|
||||
print(f'Before match: {child.before}')
|
||||
print(f'After match: {child.after}')
|
||||
return index
|
||||
except pexpect.TIMEOUT:
|
||||
print(f'TIMEOUT while {debug_msg}')
|
||||
print(f'Last output: {child.before}')
|
||||
raise
|
||||
except pexpect.EOF:
|
||||
print(f'EOF while {debug_msg}')
|
||||
print(f'Last output: {child.before}')
|
||||
raise
|
||||
|
||||
|
||||
def test_hello_flow_to_deploy(pexpect_process: pexpect.spawn) -> None:
|
||||
child = pexpect_process
|
||||
|
||||
try:
|
||||
safe_expect(child, 'Select a model', timeout=10, debug_msg='Waiting for model selection prompt')
|
||||
|
||||
child.sendline('\x1b[B')
|
||||
child.sendline('\r')
|
||||
|
||||
safe_expect(
|
||||
child, 'Select a version', timeout=10, debug_msg='Waiting for version selection prompt'
|
||||
)
|
||||
|
||||
child.sendline('\r')
|
||||
|
||||
safe_expect(
|
||||
child, 'Select an action', timeout=10, debug_msg='Waiting for action selection prompt'
|
||||
)
|
||||
|
||||
child.sendline('\x1b[B')
|
||||
child.sendline('\x1b[B')
|
||||
|
||||
child.sendline('\r')
|
||||
|
||||
safe_expect(
|
||||
child, 'Select an instance type', timeout=10, debug_msg='Waiting for instance type prompt'
|
||||
)
|
||||
|
||||
child.sendline('\r')
|
||||
|
||||
child.expect('Error: .*HF_TOKEN', timeout=10)
|
||||
except Exception as e:
|
||||
pytest.fail(f'Test failed with exception: {e}')
|
||||
101
uv.lock
generated
101
uv.lock
generated
@@ -214,7 +214,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "bentoml"
|
||||
version = "1.4.5"
|
||||
version = "1.4.8"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "a2wsgi" },
|
||||
@@ -261,9 +261,9 @@ dependencies = [
|
||||
{ name = "uvicorn" },
|
||||
{ name = "watchfiles" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/dc/df/6e5a260aaf2ee5da3d797374f81bba087fdcb8b521c7cb7441d390e266b6/bentoml-1.4.5.tar.gz", hash = "sha256:372d6d2f93dbcef38eefd568d0a9c99bfd8b5fbb7202983d948de03efa5cc961", size = 967625 }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/87/a4/7ba2d3cfea05e4d9505b4aedfec17477771bc5dc98ed4d818f83cdc23093/bentoml-1.4.8.tar.gz", hash = "sha256:fb7e1d21a415645afdeb928f45a1950b7409960b5d9360189b777640c96f7103", size = 970299 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/23/26/64bfa28ce0b9e29e825a656e4785eb39b5ab4ca7abb6dbe1e25d856ac716/bentoml-1.4.5-py3-none-any.whl", hash = "sha256:31ecdf26e4addcf62c03a356b629925f5c3aca304d73a5cdf60c1bcbf5e19eb2", size = 1147638 },
|
||||
{ url = "https://files.pythonhosted.org/packages/cb/3e/c4adc9c48ceab6bfd8735f125f1b2ec58c6a636b4f2c092349c02e1beb71/bentoml-1.4.8-py3-none-any.whl", hash = "sha256:b33765e15101348fa6ca1fe68f07b3309ad4ea5c8823e56c2358a1b09b29edbb", size = 1150381 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -681,6 +681,21 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/95/04/ff642e65ad6b90db43e668d70ffb6736436c7ce41fcc549f4e9472234127/h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761", size = 58259 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hf-xet"
|
||||
version = "1.0.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/95/68/4c363b2e62cb3dbe12d2257ba9b22f101384692d4b9727c5f72433472cff/hf_xet-1.0.3.tar.gz", hash = "sha256:a6d16861a06dd4b8f7229c16b392c5fb8b9588ced89a6ee9bc3e66227f794353", size = 257227 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/26/12/ebbba4b64cb9c908bd5dee355da27f3cc5ad4f29b4b2835041d363388363/hf_xet-1.0.3-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:0705e5db0da5794ab048a8662a7b3aba220f963270b26abc92e8d05abca22451", size = 4979740 },
|
||||
{ url = "https://files.pythonhosted.org/packages/58/8f/34eadc408b834bcb55886b242a9783da3f63508c4bcbfda7a4f21e61f3d1/hf_xet-1.0.3-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:09a9565ca84049d48c99c83a82d08fbc21d63c04811fd2f7dd088292c1185bc5", size = 4806773 },
|
||||
{ url = "https://files.pythonhosted.org/packages/a1/de/00b2e2568a39c01b0e013db3300f4d5841f2e597d7b0518923c7881bd166/hf_xet-1.0.3-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70e18534d46ab92bbc3125addaebc145f9b27e06eecd67b40c4342f4b92b677f", size = 53812632 },
|
||||
{ url = "https://files.pythonhosted.org/packages/e2/d8/4ff790370a6795418196553c33e7bcceaa73a7d587e21e4ccb7661b54a2a/hf_xet-1.0.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:da28fd32213ad5b8f60771aba44ac032ba19d752928cfd95914f09146b3f51ec", size = 52277180 },
|
||||
{ url = "https://files.pythonhosted.org/packages/83/dd/7b432918a3e9e09794674b81e852acc6e14177c0a4466ac0566b7e7f47a4/hf_xet-1.0.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1b71118b8f7e9edf1ae56282388794f351163c7de5c22ea3737dffa9313f500e", size = 53309852 },
|
||||
{ url = "https://files.pythonhosted.org/packages/4d/a2/d7a5f452a3a8faaa82aeb3aceddab2e103c1b7028a00bbc4caebca5d79fe/hf_xet-1.0.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5927d1986f87b7b80616eb6353a1402be1d72c46b6b0709b01ffc7623a159563", size = 53739471 },
|
||||
{ url = "https://files.pythonhosted.org/packages/82/81/966f800933043c0be989306f5224ef058543f7848f1e78d7ef3305bd069a/hf_xet-1.0.3-cp37-abi3-win_amd64.whl", hash = "sha256:014b5a40e62ad334f21513e5ba39b419117396031e9264dfc15dd598a1595029", size = 4123538 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "httpcore"
|
||||
version = "1.0.7"
|
||||
@@ -763,6 +778,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a0/d9/a1e041c5e7caa9a05c925f4bdbdfb7f006d1f74996af53467bc394c97be7/importlib_metadata-8.5.0-py3-none-any.whl", hash = "sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b", size = 26514 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iniconfig"
|
||||
version = "2.1.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jinja2"
|
||||
version = "3.1.5"
|
||||
@@ -1167,7 +1191,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "openai"
|
||||
version = "1.66.3"
|
||||
version = "1.70.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "anyio" },
|
||||
@@ -1179,9 +1203,9 @@ dependencies = [
|
||||
{ name = "tqdm" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a3/77/5172104ca1df35ed2ed8fb26dbc787f721c39498fc51d666c4db07756a0c/openai-1.66.3.tar.gz", hash = "sha256:8dde3aebe2d081258d4159c4cb27bdc13b5bb3f7ea2201d9bd940b9a89faf0c9", size = 397244 }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/87/f5/ae0f3cd226c2993b4ac1cc4b5f6ca099764689f403c14922c9356accec66/openai-1.70.0.tar.gz", hash = "sha256:e52a8d54c3efeb08cf58539b5b21a5abef25368b5432965e4de88cdf4e091b2b", size = 409640 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/78/5a/e20182f7b6171642d759c548daa0ba20a1d3ac10d2bd0a13fd75704a9ac3/openai-1.66.3-py3-none-any.whl", hash = "sha256:a427c920f727711877ab17c11b95f1230b27767ba7a01e5b66102945141ceca9", size = 567400 },
|
||||
{ url = "https://files.pythonhosted.org/packages/e2/39/c4b38317d2c702c4bc763957735aaeaf30dfc43b5b824121c49a4ba7ba0f/openai-1.70.0-py3-none-any.whl", hash = "sha256:f6438d053fd8b2e05fd6bef70871e832d9bbdf55e119d0ac5b92726f1ae6f614", size = 599070 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1190,6 +1214,7 @@ source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "bentoml" },
|
||||
{ name = "dulwich" },
|
||||
{ name = "hf-xet" },
|
||||
{ name = "huggingface-hub" },
|
||||
{ name = "nvidia-ml-py" },
|
||||
{ name = "openai" },
|
||||
@@ -1204,13 +1229,20 @@ dependencies = [
|
||||
{ name = "uv" },
|
||||
]
|
||||
|
||||
[package.dev-dependencies]
|
||||
tests = [
|
||||
{ name = "pexpect" },
|
||||
{ name = "pytest" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "bentoml", specifier = "==1.4.5" },
|
||||
{ name = "bentoml", specifier = "==1.4.8" },
|
||||
{ name = "dulwich" },
|
||||
{ name = "hf-xet" },
|
||||
{ name = "huggingface-hub" },
|
||||
{ name = "nvidia-ml-py" },
|
||||
{ name = "openai", specifier = "==1.66.3" },
|
||||
{ name = "openai", specifier = "==1.70.0" },
|
||||
{ name = "pathlib" },
|
||||
{ name = "pip-requirements-parser" },
|
||||
{ name = "psutil" },
|
||||
@@ -1222,6 +1254,12 @@ requires-dist = [
|
||||
{ name = "uv" },
|
||||
]
|
||||
|
||||
[package.metadata.requires-dev]
|
||||
tests = [
|
||||
{ name = "pexpect", specifier = ">=4.9.0" },
|
||||
{ name = "pytest", specifier = ">=8.3.5" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-api"
|
||||
version = "1.30.0"
|
||||
@@ -1345,6 +1383,18 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pexpect"
|
||||
version = "4.9.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "ptyprocess" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/42/92/cc564bf6381ff43ce1f4d06852fc19a2f11d180f23dc32d9588bee2f149d/pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f", size = 166450 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pip-requirements-parser"
|
||||
version = "32.0.1"
|
||||
@@ -1358,6 +1408,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/54/d0/d04f1d1e064ac901439699ee097f58688caadea42498ec9c4b4ad2ef84ab/pip_requirements_parser-32.0.1-py3-none-any.whl", hash = "sha256:4659bc2a667783e7a15d190f6fccf8b2486685b6dba4c19c3876314769c57526", size = 35648 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pluggy"
|
||||
version = "1.5.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/96/2d/02d4312c973c6050a18b314a5ad0b3210edb65a906f868e31c111dede4a6/pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", size = 67955 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prometheus-client"
|
||||
version = "0.21.1"
|
||||
@@ -1483,6 +1542,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/50/1b/6921afe68c74868b4c9fa424dad3be35b095e16687989ebbb50ce4fceb7c/psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553", size = 244885 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ptyprocess"
|
||||
version = "0.7.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/20/e5/16ff212c1e452235a90aeb09066144d0c5a6a8c0834397e03f5224495c4e/ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220", size = 70762 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", size = 13993 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyaml"
|
||||
version = "25.1.0"
|
||||
@@ -1633,6 +1701,23 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/1c/a7/c8a2d361bf89c0d9577c934ebb7421b25dc84bf3a8e3ac0a40aed9acc547/pyparsing-3.2.1-py3-none-any.whl", hash = "sha256:506ff4f4386c4cec0590ec19e6302d3aedb992fdc02c761e90416f158dacf8e1", size = 107716 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest"
|
||||
version = "8.3.5"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "colorama", marker = "sys_platform == 'win32'" },
|
||||
{ name = "exceptiongroup", marker = "python_full_version < '3.11'" },
|
||||
{ name = "iniconfig" },
|
||||
{ name = "packaging" },
|
||||
{ name = "pluggy" },
|
||||
{ name = "tomli", marker = "python_full_version < '3.11'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/ae/3c/c9d525a414d506893f0cd8a8d0de7706446213181570cdbd766691164e40/pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845", size = 1450891 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820", size = 343634 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-dateutil"
|
||||
version = "2.9.0.post0"
|
||||
|
||||
Reference in New Issue
Block a user