chore: cleanup code and env requirements

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
This commit is contained in:
Aaron Pham
2025-04-09 21:36:46 -04:00
parent 578308bb9c
commit d8fb4ae4a5
19 changed files with 1670 additions and 1368 deletions

View File

@@ -12,7 +12,7 @@ jobs:
steps:
- name: Dependabot metadata
id: metadata
uses: dependabot/fetch-metadata@v2.3.0
uses: dependabot/fetch-metadata@d7267f607e9d3fb96fc2fbe83e0af444713e90b7 # ratchet:dependabot/fetch-metadata@v2.3.0
with:
github-token: "${{ secrets.GITHUB_TOKEN }}"
- name: Enable auto-merge for Dependabot PRs

35
.github/workflows/tests.yml vendored Normal file
View File

@@ -0,0 +1,35 @@
name: Run Tests
on:
push:
branches: [main, master]
pull_request:
branches: [main, master]
jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.12"]
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # ratchet:actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # ratchet:actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install uv
run: |
pip install uv
- name: Install dependencies with uv
run: |
uv pip install -e .
uv pip install pytest pexpect
- name: Run tests
run: |
pytest tests -v

1
.gitignore vendored
View File

@@ -163,3 +163,4 @@ cython_debug/
venv/
.envrc
_version.py
.cursor

View File

@@ -7,7 +7,7 @@ default_language_version:
python: python3.11 # NOTE: sync with .python-version-default
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: "v0.11.2"
rev: "v0.11.4"
hooks:
- id: ruff
alias: r

View File

@@ -1,7 +1,7 @@
extend-include = ["*.ipynb"]
preview = true
line-length = 119
indent-width = 4
line-length = 100
indent-width = 2
[format]
preview = true
@@ -18,21 +18,16 @@ ignore = [
]
select = [
"F",
"G", # flake8-logging-format
"PERF", # perflint
"RUF", # Ruff-specific rules
"G", # flake8-logging-format
"PERF", # perflint
"RUF", # Ruff-specific rules
"W6",
"E71",
"E72",
"E112",
"E113",
# "E124",
"E203",
"E272",
# "E303",
# "E304",
# "E501",
# "E502",
"E702",
"E703",
"E731",

View File

@@ -9,30 +9,30 @@
import subprocess, sys, pathlib, json, jinja2
if __name__ == '__main__':
with (pathlib.Path('.').parent / 'README.md').open('w') as f:
f.write(
jinja2.Environment(loader=jinja2.FileSystemLoader('.'))
.get_template('README.md.tpl')
.render(
model_dict=json.loads(
subprocess.run(
[
sys.executable,
'-m',
'uv',
'run',
'--with-editable',
'.',
'openllm',
'model',
'list',
'--output',
'readme',
],
text=True,
check=True,
capture_output=True,
).stdout.strip()
)
)
with (pathlib.Path('.').parent / 'README.md').open('w') as f:
f.write(
jinja2.Environment(loader=jinja2.FileSystemLoader('.'))
.get_template('README.md.tpl')
.render(
model_dict=json.loads(
subprocess.run(
[
sys.executable,
'-m',
'uv',
'run',
'--with-editable',
'.',
'openllm',
'model',
'list',
'--output',
'readme',
],
text=True,
check=True,
capture_output=True,
).stdout.strip()
)
)
)

View File

@@ -43,6 +43,7 @@ dependencies = [
"uv",
"openai==1.70.0",
"huggingface-hub",
"hf-xet",
"typing-extensions>=4.12.2",
]
keywords = [
@@ -87,6 +88,12 @@ src-dir = "src/openllm"
requires = ["hatchling==1.27.0", "hatch-vcs==0.4.0"]
build-backend = 'hatchling.build'
[dependency-groups]
tests = [
"pexpect>=4.9.0",
"pytest>=8.3.5",
]
[tool.hatch.version]
source = "vcs"
fallback-version = "0.0.0"

View File

@@ -14,12 +14,12 @@ from openllm.model import app as model_app, ensure_bento, list_bento
from openllm.repo import app as repo_app, cmd_update
if typing.TYPE_CHECKING:
from openllm.common import DeploymentTarget
from openllm.common import DeploymentTarget
app = OpenLLMTyper(
help='`openllm hello` to get started. '
'OpenLLM is a CLI tool to manage and deploy open source LLMs and'
' get an OpenAI API compatible chat server in seconds.'
help='`openllm hello` to get started. '
'OpenLLM is a CLI tool to manage and deploy open source LLMs and'
' get an OpenAI API compatible chat server in seconds.'
)
app.add_typer(repo_app, name='repo')
@@ -28,263 +28,274 @@ app.add_typer(clean_app, name='clean')
def _select_bento_name(models: list[BentoInfo], target: DeploymentTarget) -> tuple[str, str]:
from tabulate import tabulate
from tabulate import tabulate
model_infos = [(model.repo.name, model.name, can_run(model, target)) for model in models]
model_name_groups: defaultdict[tuple[str, str], float] = defaultdict(lambda: 0.0)
for repo, name, score in model_infos:
model_name_groups[repo, name] += score
table_data = [(name, repo, CHECKED if score > 0 else '') for (repo, name), score in model_name_groups.items()]
if not table_data:
output('No model found', style='red')
raise typer.Exit(1)
table: list[str] = tabulate(table_data, headers=['model', 'repo', 'locally runnable']).split('\n')
model_infos = [(model.repo.name, model.name, can_run(model, target)) for model in models]
model_name_groups: defaultdict[tuple[str, str], float] = defaultdict(lambda: 0.0)
for repo, name, score in model_infos:
model_name_groups[repo, name] += score
table_data = [
(name, repo, CHECKED if score > 0 else '') for (repo, name), score in model_name_groups.items()
]
if not table_data:
output('No model found', style='red')
raise typer.Exit(1)
table: list[str] = tabulate(table_data, headers=['model', 'repo', 'locally runnable']).split('\n')
selected: tuple[str, str] | None = questionary.select(
'Select a model',
[
questionary.Separator(f'{table[0]}\n {table[1]}'),
*[questionary.Choice(line, value=value[:2]) for value, line in zip(table_data, table[2:])],
],
).ask()
if selected is None:
raise typer.Exit(1)
return selected
selected: tuple[str, str] | None = questionary.select(
'Select a model',
[
questionary.Separator(f'{table[0]}\n {table[1]}'),
*[questionary.Choice(line, value=value[:2]) for value, line in zip(table_data, table[2:])],
],
).ask()
if selected is None:
raise typer.Exit(1)
return selected
def _select_bento_version(
models: list[BentoInfo], target: DeploymentTarget | None, bento_name: str, repo: str
models: list[BentoInfo], target: DeploymentTarget | None, bento_name: str, repo: str
) -> tuple[BentoInfo, float]:
from tabulate import tabulate
from tabulate import tabulate
model_infos: list[tuple[BentoInfo, float]] = [
(model, can_run(model, target)) for model in models if model.name == bento_name and model.repo.name == repo
]
model_infos: list[tuple[BentoInfo, float]] = [
(model, can_run(model, target))
for model in models
if model.name == bento_name and model.repo.name == repo
]
table_data = [
[model.tag, CHECKED if score > 0 else '']
for model, score in model_infos
if model.name == bento_name and model.repo.name == repo
]
if not table_data:
output(f'No model found for {bento_name} in {repo}', style='red')
raise typer.Exit(1)
table: list[str] = tabulate(table_data, headers=['version', 'locally runnable']).split('\n')
table_data = [
[model.tag, CHECKED if score > 0 else '']
for model, score in model_infos
if model.name == bento_name and model.repo.name == repo
]
if not table_data:
output(f'No model found for {bento_name} in {repo}', style='red')
raise typer.Exit(1)
table: list[str] = tabulate(table_data, headers=['version', 'locally runnable']).split('\n')
selected: tuple[BentoInfo, float] | None = questionary.select(
'Select a version',
[
questionary.Separator(f'{table[0]}\n {table[1]}'),
*[questionary.Choice(line, value=value[:2]) for value, line in zip(model_infos, table[2:])],
],
).ask()
if selected is None:
raise typer.Exit(1)
return selected
selected: tuple[BentoInfo, float] | None = questionary.select(
'Select a version',
[
questionary.Separator(f'{table[0]}\n {table[1]}'),
*[questionary.Choice(line, value=value[:2]) for value, line in zip(model_infos, table[2:])],
],
).ask()
if selected is None:
raise typer.Exit(1)
return selected
def _select_target(bento: BentoInfo, targets: list[DeploymentTarget]) -> DeploymentTarget:
from tabulate import tabulate
from tabulate import tabulate
targets.sort(key=lambda x: can_run(bento, x), reverse=True)
if not targets:
output('No available instance type, check your bentocloud account', style='red')
raise typer.Exit(1)
targets.sort(key=lambda x: can_run(bento, x), reverse=True)
if not targets:
output('No available instance type, check your bentocloud account', style='red')
raise typer.Exit(1)
table = tabulate(
[
[
target.name,
target.accelerators_repr,
f'${target.price}',
CHECKED if can_run(bento, target) else 'insufficient res.',
]
for target in targets
],
headers=['instance type', 'accelerator', 'price/hr', 'deployable'],
).split('\n')
table = tabulate(
[
[
target.name,
target.accelerators_repr,
f'${target.price}',
CHECKED if can_run(bento, target) else 'insufficient res.',
]
for target in targets
],
headers=['instance type', 'accelerator', 'price/hr', 'deployable'],
).split('\n')
selected: DeploymentTarget | None = questionary.select(
'Select an instance type',
[
questionary.Separator(f'{table[0]}\n {table[1]}'),
*[questionary.Choice(f'{line}', value=target) for target, line in zip(targets, table[2:])],
],
).ask()
if selected is None:
raise typer.Exit(1)
return selected
selected: DeploymentTarget | None = questionary.select(
'Select an instance type',
[
questionary.Separator(f'{table[0]}\n {table[1]}'),
*[questionary.Choice(f'{line}', value=target) for target, line in zip(targets, table[2:])],
],
).ask()
if selected is None:
raise typer.Exit(1)
return selected
def _select_action(bento: BentoInfo, score: float) -> None:
if score > 0:
options: list[typing.Any] = [
questionary.Separator('Available actions'),
questionary.Choice('0. Run the model in terminal', value='run', shortcut_key='0'),
questionary.Separator(f' $ openllm run {bento}'),
questionary.Separator(' '),
questionary.Choice('1. Serve the model locally and get a chat server', value='serve', shortcut_key='1'),
questionary.Separator(f' $ openllm serve {bento}'),
questionary.Separator(' '),
questionary.Choice(
'2. Deploy the model to bentocloud and get a scalable chat server', value='deploy', shortcut_key='2'
),
questionary.Separator(f' $ openllm deploy {bento}'),
]
else:
options = [
questionary.Separator('Available actions'),
questionary.Choice(
'0. Run the model in terminal', value='run', disabled='insufficient res.', shortcut_key='0'
),
questionary.Separator(f' $ openllm run {bento}'),
questionary.Separator(' '),
questionary.Choice(
'1. Serve the model locally and get a chat server',
value='serve',
disabled='insufficient res.',
shortcut_key='1',
),
questionary.Separator(f' $ openllm serve {bento}'),
questionary.Separator(' '),
questionary.Choice(
'2. Deploy the model to bentocloud and get a scalable chat server', value='deploy', shortcut_key='2'
),
questionary.Separator(f' $ openllm deploy {bento}'),
]
action: str | None = questionary.select('Select an action', options).ask()
if action is None:
raise typer.Exit(1)
if action == 'run':
try:
port = random.randint(30000, 40000)
local_run(bento, port=port)
finally:
output('\nUse this command to run the action again:', style='green')
output(f' $ openllm run {bento}', style='orange')
elif action == 'serve':
try:
local_serve(bento)
finally:
output('\nUse this command to run the action again:', style='green')
output(f' $ openllm serve {bento}', style='orange')
elif action == 'deploy':
ensure_cloud_context()
targets = get_cloud_machine_spec()
target = _select_target(bento, targets)
try:
cloud_deploy(bento, target)
finally:
output('\nUse this command to run the action again:', style='green')
output(f' $ openllm deploy {bento} --instance-type {target.name}', style='orange')
if score > 0:
options: list[typing.Any] = [
questionary.Separator('Available actions'),
questionary.Choice('0. Run the model in terminal', value='run', shortcut_key='0'),
questionary.Separator(f' $ openllm run {bento}'),
questionary.Separator(' '),
questionary.Choice(
'1. Serve the model locally and get a chat server', value='serve', shortcut_key='1'
),
questionary.Separator(f' $ openllm serve {bento}'),
questionary.Separator(' '),
questionary.Choice(
'2. Deploy the model to bentocloud and get a scalable chat server',
value='deploy',
shortcut_key='2',
),
questionary.Separator(f' $ openllm deploy {bento}'),
]
else:
options = [
questionary.Separator('Available actions'),
questionary.Choice(
'0. Run the model in terminal', value='run', disabled='insufficient res.', shortcut_key='0'
),
questionary.Separator(f' $ openllm run {bento}'),
questionary.Separator(' '),
questionary.Choice(
'1. Serve the model locally and get a chat server',
value='serve',
disabled='insufficient res.',
shortcut_key='1',
),
questionary.Separator(f' $ openllm serve {bento}'),
questionary.Separator(' '),
questionary.Choice(
'2. Deploy the model to bentocloud and get a scalable chat server',
value='deploy',
shortcut_key='2',
),
questionary.Separator(f' $ openllm deploy {bento}'),
]
action: str | None = questionary.select('Select an action', options).ask()
if action is None:
raise typer.Exit(1)
if action == 'run':
try:
port = random.randint(30000, 40000)
local_run(bento, port=port)
finally:
output('\nUse this command to run the action again:', style='green')
output(f' $ openllm run {bento}', style='orange')
elif action == 'serve':
try:
local_serve(bento)
finally:
output('\nUse this command to run the action again:', style='green')
output(f' $ openllm serve {bento}', style='orange')
elif action == 'deploy':
ensure_cloud_context()
targets = get_cloud_machine_spec()
target = _select_target(bento, targets)
try:
cloud_deploy(bento, target)
finally:
output('\nUse this command to run the action again:', style='green')
output(f' $ openllm deploy {bento} --instance-type {target.name}', style='orange')
@app.command(help='get started interactively')
def hello() -> None:
INTERACTIVE.set(True)
def hello(repo: typing.Optional[str] = None) -> None:
cmd_update()
INTERACTIVE.set(True)
target = get_local_machine_spec()
output(f' Detected Platform: {target.platform}', style='green')
if target.accelerators:
output(' Detected Accelerators: ', style='green')
for a in target.accelerators:
output(f' - {a.model} {a.memory_size}GB', style='green')
else:
output(' Detected Accelerators: None', style='yellow')
target = get_local_machine_spec()
output(f' Detected Platform: {target.platform}', style='green')
if target.accelerators:
output(' Detected Accelerators: ', style='green')
for a in target.accelerators:
output(f' - {a.model} {a.memory_size}GB', style='green')
else:
output(' Detected Accelerators: None', style='green')
models = list_bento()
if not models:
output('No model found, you probably need to update the model repo:', style='red')
output(' $ openllm repo update', style='orange')
raise typer.Exit(1)
models = list_bento(repo_name=repo)
if not models:
output('No model found, you probably need to update the model repo:', style='red')
output(' $ openllm repo update', style='orange')
raise typer.Exit(1)
bento_name, repo = _select_bento_name(models, target)
bento, score = _select_bento_version(models, target, bento_name, repo)
_select_action(bento, score)
bento_name, repo = _select_bento_name(models, target)
bento, score = _select_bento_version(models, target, bento_name, repo)
_select_action(bento, score)
@app.command(help='start an OpenAI API compatible chat server and chat in browser')
def serve(
model: typing.Annotated[str, typer.Argument()] = '',
repo: typing.Optional[str] = None,
port: int = 3000,
verbose: bool = False,
model: typing.Annotated[str, typer.Argument()] = '',
repo: typing.Optional[str] = None,
port: int = 3000,
verbose: bool = False,
) -> None:
cmd_update()
if verbose:
VERBOSE_LEVEL.set(20)
target = get_local_machine_spec()
bento = ensure_bento(model, target=target, repo_name=repo)
local_serve(bento, port=port)
cmd_update()
if verbose:
VERBOSE_LEVEL.set(20)
target = get_local_machine_spec()
bento = ensure_bento(model, target=target, repo_name=repo)
local_serve(bento, port=port)
@app.command(help='run the model and chat in terminal')
def run(
model: typing.Annotated[str, typer.Argument()] = '',
repo: typing.Optional[str] = None,
port: typing.Optional[int] = None,
timeout: int = 600,
verbose: bool = False,
model: typing.Annotated[str, typer.Argument()] = '',
repo: typing.Optional[str] = None,
port: typing.Optional[int] = None,
timeout: int = 600,
verbose: bool = False,
) -> None:
cmd_update()
if verbose:
VERBOSE_LEVEL.set(20)
target = get_local_machine_spec()
bento = ensure_bento(model, target=target, repo_name=repo)
if port is None:
port = random.randint(30000, 40000)
local_run(bento, port=port, timeout=timeout)
cmd_update()
if verbose:
VERBOSE_LEVEL.set(20)
target = get_local_machine_spec()
bento = ensure_bento(model, target=target, repo_name=repo)
if port is None:
port = random.randint(30000, 40000)
local_run(bento, port=port, timeout=timeout)
@app.command(help='deploy production-ready OpenAI API-compatible server to BentoCloud')
def deploy(
model: typing.Annotated[str, typer.Argument()] = '',
instance_type: typing.Optional[str] = None,
repo: typing.Optional[str] = None,
verbose: bool = False,
env: typing.Optional[list[str]] = typer.Option(
None,
'--env',
help='Environment variables to pass to the deployment command. Format: NAME or NAME=value. Can be specified multiple times.',
),
model: typing.Annotated[str, typer.Argument()] = '',
instance_type: typing.Optional[str] = None,
repo: typing.Optional[str] = None,
verbose: bool = False,
env: typing.Optional[list[str]] = typer.Option(
None,
'--env',
help='Environment variables to pass to the deployment command. Format: NAME or NAME=value. Can be specified multiple times.',
),
) -> None:
cmd_update()
if verbose:
VERBOSE_LEVEL.set(20)
bento = ensure_bento(model, repo_name=repo)
if instance_type is not None:
return cloud_deploy(bento, DeploymentTarget(accelerators=[], name=instance_type), cli_envs=env)
targets = sorted(
filter(lambda x: can_run(bento, x) > 0, get_cloud_machine_spec()),
key=lambda x: can_run(bento, x),
reverse=True,
)
if not targets:
output('No available instance type, check your bentocloud account', style='red')
raise typer.Exit(1)
target = targets[0]
output(f'Recommended instance type: {target.name}', style='green')
cloud_deploy(bento, target, cli_envs=env)
cmd_update()
if verbose:
VERBOSE_LEVEL.set(20)
bento = ensure_bento(model, repo_name=repo)
if instance_type is not None:
return cloud_deploy(bento, DeploymentTarget(accelerators=[], name=instance_type), cli_envs=env)
targets = sorted(
filter(lambda x: can_run(bento, x) > 0, get_cloud_machine_spec()),
key=lambda x: can_run(bento, x),
reverse=True,
)
if not targets:
output('No available instance type, check your bentocloud account', style='red')
raise typer.Exit(1)
target = targets[0]
output(f'Recommended instance type: {target.name}', style='green')
cloud_deploy(bento, target, cli_envs=env)
@app.callback(invoke_without_command=True)
def typer_callback(
verbose: int = 0,
do_not_track: bool = typer.Option(
False, '--do-not-track', help='Whether to disable usage tracking', envvar=DO_NOT_TRACK
),
version: bool = typer.Option(False, '--version', '-v', help='Show version'),
verbose: int = 0,
do_not_track: bool = typer.Option(
False, '--do-not-track', help='Whether to disable usage tracking', envvar=DO_NOT_TRACK
),
version: bool = typer.Option(False, '--version', '-v', help='Show version'),
) -> None:
if verbose:
VERBOSE_LEVEL.set(verbose)
if version:
output(
f'openllm, {importlib.metadata.version("openllm")}\nPython ({platform.python_implementation()}) {platform.python_version()}'
)
sys.exit(0)
if do_not_track:
os.environ[DO_NOT_TRACK] = str(True)
if verbose:
VERBOSE_LEVEL.set(verbose)
if version:
output(
f'openllm, {importlib.metadata.version("openllm")}\nPython ({platform.python_implementation()}) {platform.python_version()}'
)
sys.exit(0)
if do_not_track:
os.environ[DO_NOT_TRACK] = str(True)
if __name__ == '__main__':
app()
app()

View File

@@ -9,129 +9,141 @@ from openllm.common import BentoInfo, DeploymentTarget, output, Accelerator
def parse_memory_string(v: typing.Any) -> typing.Any:
"""Parse memory strings like "60Gi" into float."""
if isinstance(v, str):
match = re.match(r'(\d+(\.\d+)?)\s*Gi$', v, re.IGNORECASE)
if match:
return float(match.group(1))
# Pass other types (including numbers or other strings for standard float conversion) through
return v
"""Parse memory strings like "60Gi" into float."""
if isinstance(v, str):
match = re.match(r'(\d+(\.\d+)?)\s*Gi$', v, re.IGNORECASE)
if match:
return float(match.group(1))
# Pass other types (including numbers or other strings for standard float conversion) through
return v
class Resource(pydantic.BaseModel):
memory: typing.Annotated[float, BeforeValidator(parse_memory_string)] = 0.0
cpu: int = 0
gpu: int = 0
gpu_type: str = ''
memory: typing.Annotated[float, BeforeValidator(parse_memory_string)] = 0.0
cpu: int = 0
gpu: int = 0
gpu_type: str = ''
@override
def __hash__(self) -> int:
return hash((self.cpu, self.memory, self.gpu, self.gpu_type))
@override
def __hash__(self) -> int:
return hash((self.cpu, self.memory, self.gpu, self.gpu_type))
def __bool__(self) -> bool:
return any(value is not None for value in self.__dict__.values())
def __bool__(self) -> bool:
return any(value is not None for value in self.__dict__.values())
ACCELERATOR_SPECS: dict[str, Accelerator] = {
'nvidia-gtx-1650': Accelerator(model='GTX 1650', memory_size=4.0),
'nvidia-gtx-1060': Accelerator(model='GTX 1060', memory_size=6.0),
'nvidia-gtx-1080-ti': Accelerator(model='GTX 1080 Ti', memory_size=11.0),
'nvidia-rtx-3060': Accelerator(model='RTX 3060', memory_size=12.0),
'nvidia-rtx-3060-ti': Accelerator(model='RTX 3060 Ti', memory_size=8.0),
'nvidia-rtx-3070-ti': Accelerator(model='RTX 3070 Ti', memory_size=8.0),
'nvidia-rtx-3080': Accelerator(model='RTX 3080', memory_size=10.0),
'nvidia-rtx-3080-ti': Accelerator(model='RTX 3080 Ti', memory_size=12.0),
'nvidia-rtx-3090': Accelerator(model='RTX 3090', memory_size=24.0),
'nvidia-rtx-4070-ti': Accelerator(model='RTX 4070 Ti', memory_size=12.0),
'nvidia-tesla-p4': Accelerator(model='P4', memory_size=8.0),
'nvidia-tesla-p100': Accelerator(model='P100', memory_size=16.0),
'nvidia-tesla-k80': Accelerator(model='K80', memory_size=12.0),
'nvidia-tesla-t4': Accelerator(model='T4', memory_size=16.0),
'nvidia-tesla-v100': Accelerator(model='V100', memory_size=16.0),
'nvidia-l4': Accelerator(model='L4', memory_size=24.0),
'nvidia-tesla-l4': Accelerator(model='L4', memory_size=24.0),
'nvidia-tesla-a10g': Accelerator(model='A10G', memory_size=24.0),
'nvidia-a100-80g': Accelerator(model='A100', memory_size=80.0),
'nvidia-a100-80gb': Accelerator(model='A100', memory_size=80.0),
'nvidia-tesla-a100': Accelerator(model='A100', memory_size=40.0),
'nvidia-gtx-1650': Accelerator(model='GTX 1650', memory_size=4.0),
'nvidia-gtx-1060': Accelerator(model='GTX 1060', memory_size=6.0),
'nvidia-gtx-1080-ti': Accelerator(model='GTX 1080 Ti', memory_size=11.0),
'nvidia-rtx-3060': Accelerator(model='RTX 3060', memory_size=12.0),
'nvidia-rtx-3060-ti': Accelerator(model='RTX 3060 Ti', memory_size=8.0),
'nvidia-rtx-3070-ti': Accelerator(model='RTX 3070 Ti', memory_size=8.0),
'nvidia-rtx-3080': Accelerator(model='RTX 3080', memory_size=10.0),
'nvidia-rtx-3080-ti': Accelerator(model='RTX 3080 Ti', memory_size=12.0),
'nvidia-rtx-3090': Accelerator(model='RTX 3090', memory_size=24.0),
'nvidia-rtx-4070-ti': Accelerator(model='RTX 4070 Ti', memory_size=12.0),
'nvidia-tesla-p4': Accelerator(model='P4', memory_size=8.0),
'nvidia-tesla-p100': Accelerator(model='P100', memory_size=16.0),
'nvidia-tesla-k80': Accelerator(model='K80', memory_size=12.0),
'nvidia-tesla-t4': Accelerator(model='T4', memory_size=16.0),
'nvidia-tesla-v100': Accelerator(model='V100', memory_size=16.0),
'nvidia-l4': Accelerator(model='L4', memory_size=24.0),
'nvidia-tesla-l4': Accelerator(model='L4', memory_size=24.0),
'nvidia-tesla-a10g': Accelerator(model='A10G', memory_size=24.0),
'nvidia-a100-80g': Accelerator(model='A100', memory_size=80.0),
'nvidia-a100-80gb': Accelerator(model='A100', memory_size=80.0),
'nvidia-tesla-a100': Accelerator(model='A100', memory_size=40.0),
'nvidia-tesla-h100': Accelerator(model='H100', memory_size=80.0),
'nvidia-h200-141gb': Accelerator(model='H200', memory_size=141.0),
'nvidia-blackwell-b100': Accelerator(model='B100', memory_size=192.0),
'nvidia-blackwell-gb200': Accelerator(model='GB200', memory_size=192.0),
}
@functools.lru_cache
def get_local_machine_spec() -> DeploymentTarget:
if psutil.MACOS:
return DeploymentTarget(accelerators=[], source='local', platform='macos')
if psutil.MACOS:
return DeploymentTarget(accelerators=[], source='local', platform='macos')
if psutil.WINDOWS:
platform = 'windows'
elif psutil.LINUX:
platform = 'linux'
else:
raise NotImplementedError('Unsupported platform')
if psutil.WINDOWS:
platform = 'windows'
elif psutil.LINUX:
platform = 'linux'
else:
raise NotImplementedError('Unsupported platform')
from pynvml import (
nvmlDeviceGetCount,
nvmlDeviceGetCudaComputeCapability,
nvmlDeviceGetHandleByIndex,
nvmlDeviceGetMemoryInfo,
nvmlDeviceGetName,
nvmlInit,
nvmlShutdown,
)
from pynvml import (
nvmlDeviceGetCount,
nvmlDeviceGetCudaComputeCapability,
nvmlDeviceGetHandleByIndex,
nvmlDeviceGetMemoryInfo,
nvmlDeviceGetName,
nvmlInit,
nvmlShutdown,
)
try:
nvmlInit()
device_count = nvmlDeviceGetCount()
accelerators: list[Accelerator] = []
for i in range(device_count):
handle = nvmlDeviceGetHandleByIndex(i)
name = nvmlDeviceGetName(handle)
memory_info = nvmlDeviceGetMemoryInfo(handle)
accelerators.append(Accelerator(model=name, memory_size=math.ceil(int(memory_info.total) / 1024**3)))
compute_capability = nvmlDeviceGetCudaComputeCapability(handle)
if compute_capability < (7, 5):
output(
f'GPU {name} with compute capability {compute_capability} '
'may not be supported, 7.5 or higher is recommended. check '
'https://developer.nvidia.com/cuda-gpus for more information',
style='yellow',
)
nvmlShutdown()
return DeploymentTarget(accelerators=accelerators, source='local', platform=platform)
except Exception as e:
try:
nvmlInit()
device_count = nvmlDeviceGetCount()
accelerators: list[Accelerator] = []
for i in range(device_count):
handle = nvmlDeviceGetHandleByIndex(i)
name = nvmlDeviceGetName(handle)
memory_info = nvmlDeviceGetMemoryInfo(handle)
accelerators.append(
Accelerator(model=name, memory_size=math.ceil(int(memory_info.total) / 1024**3))
)
compute_capability = nvmlDeviceGetCudaComputeCapability(handle)
if compute_capability < (7, 5):
output(
'Failed to get local GPU info. Ensure nvidia driver is installed to enable local GPU deployment',
style='yellow',
f'GPU {name} with compute capability {compute_capability} '
'may not be supported, 7.5 or higher is recommended. check '
'https://developer.nvidia.com/cuda-gpus for more information',
style='yellow',
)
output(f'Error: {e}', style='red', level=20)
return DeploymentTarget(accelerators=[], source='local', platform=platform)
nvmlShutdown()
return DeploymentTarget(accelerators=accelerators, source='local', platform=platform)
except Exception as e:
output(
'Failed to get local GPU info. Ensure nvidia driver is installed to enable local GPU deployment',
style='yellow',
)
output(f'Error: {e}', style='red', level=20)
return DeploymentTarget(accelerators=[], source='local', platform=platform)
@functools.lru_cache(typed=True)
def can_run(bento: BentoInfo, target: DeploymentTarget | None = None) -> float:
"""
Calculate if the bento can be deployed on the target.
"""
if target is None:
target = get_local_machine_spec()
"""
Calculate if the bento can be deployed on the target.
"""
if target is None:
target = get_local_machine_spec()
resource_spec = Resource(**(bento.bento_yaml['services'][0]['config'].get('resources', {})))
labels = bento.bento_yaml.get('labels', {})
platforms = labels.get('platforms', 'linux').split(',')
resource_spec = Resource(**(bento.bento_yaml['services'][0]['config'].get('resources', {})))
labels = bento.bento_yaml.get('labels', {})
platforms = labels.get('platforms', 'linux').split(',')
if target.platform not in platforms:
return 0.0
if target.platform not in platforms:
return 0.0
# return 1.0 if no resource is specified
if not resource_spec:
return 0.5
# return 1.0 if no resource is specified
if not resource_spec:
return 0.5
if resource_spec.gpu > 0:
required_gpu = ACCELERATOR_SPECS[resource_spec.gpu_type]
filtered_accelerators = [ac for ac in target.accelerators if ac.memory_size >= required_gpu.memory_size]
if resource_spec.gpu > len(filtered_accelerators):
return 0.0
return required_gpu.memory_size * resource_spec.gpu / sum(ac.memory_size for ac in target.accelerators)
if target.accelerators:
return 0.01 / sum(ac.memory_size for ac in target.accelerators)
return 1.0
if resource_spec.gpu > 0:
required_gpu = ACCELERATOR_SPECS[resource_spec.gpu_type]
filtered_accelerators = [
ac for ac in target.accelerators if ac.memory_size >= required_gpu.memory_size
]
if resource_spec.gpu > len(filtered_accelerators):
return 0.0
return (
required_gpu.memory_size
* resource_spec.gpu
/ sum(ac.memory_size for ac in target.accelerators)
)
if target.accelerators:
return 0.01 / sum(ac.memory_size for ac in target.accelerators)
return 1.0

View File

@@ -7,99 +7,99 @@ DO_NOT_TRACK = 'BENTOML_DO_NOT_TRACK'
class EventMeta(abc.ABC):
@property
def event_name(self) -> str:
# camel case to snake case
event_name = re.sub(r'(?<!^)(?=[A-Z])', '_', self.__class__.__name__).lower()
# remove "_event" suffix
suffix_to_remove = '_event'
if event_name.endswith(suffix_to_remove):
event_name = event_name[: -len(suffix_to_remove)]
return event_name
@property
def event_name(self) -> str:
# camel case to snake case
event_name = re.sub(r'(?<!^)(?=[A-Z])', '_', self.__class__.__name__).lower()
# remove "_event" suffix
suffix_to_remove = '_event'
if event_name.endswith(suffix_to_remove):
event_name = event_name[: -len(suffix_to_remove)]
return event_name
@attr.define
class CliEvent(EventMeta):
cmd_group: str
cmd_name: str
duration_in_ms: float = attr.field(default=0)
error_type: typing.Optional[str] = attr.field(default=None)
return_code: typing.Optional[int] = attr.field(default=None)
cmd_group: str
cmd_name: str
duration_in_ms: float = attr.field(default=0)
error_type: typing.Optional[str] = attr.field(default=None)
return_code: typing.Optional[int] = attr.field(default=None)
@attr.define
class OpenllmCliEvent(CliEvent):
pass
pass
class OrderedCommands(typer.core.TyperGroup):
def list_commands(self, ctx: click.Context) -> list[str]:
return list(self.commands)
def list_commands(self, ctx: click.Context) -> list[str]:
return list(self.commands)
class OpenLLMTyper(typer.Typer):
def __init__(self, *args: typing.Any, **kwargs: typing.Any):
no_args_is_help: bool = kwargs.pop('no_args_is_help', True)
context_settings: dict[str, typing.Any] = kwargs.pop('context_settings', {})
if 'help_option_names' not in context_settings:
context_settings['help_option_names'] = ('-h', '--help')
if 'max_content_width' not in context_settings:
context_settings['max_content_width'] = int(os.environ.get('COLUMNS', str(120)))
klass = kwargs.pop('cls', OrderedCommands)
def __init__(self, *args: typing.Any, **kwargs: typing.Any):
no_args_is_help: bool = kwargs.pop('no_args_is_help', True)
context_settings: dict[str, typing.Any] = kwargs.pop('context_settings', {})
if 'help_option_names' not in context_settings:
context_settings['help_option_names'] = ('-h', '--help')
if 'max_content_width' not in context_settings:
context_settings['max_content_width'] = int(os.environ.get('COLUMNS', str(120)))
klass = kwargs.pop('cls', OrderedCommands)
super().__init__(
*args, cls=klass, no_args_is_help=no_args_is_help, context_settings=context_settings, **kwargs
)
super().__init__(
*args, cls=klass, no_args_is_help=no_args_is_help, context_settings=context_settings, **kwargs
)
# NOTE: Since OpenLLMTyper only wraps command to add analytics, the default type-hint for @app.command
# does not change, hence the below hijacking.
if typing.TYPE_CHECKING:
command = typer.Typer.command
else:
# NOTE: Since OpenLLMTyper only wraps command to add analytics, the default type-hint for @app.command
# does not change, hence the below hijacking.
if typing.TYPE_CHECKING:
command = typer.Typer.command
else:
def command(self, *args: typing.Any, **kwargs: typing.Any):
def decorator(f):
@functools.wraps(f)
@click.pass_context
def wrapped(ctx: click.Context, *args, **kwargs):
from bentoml._internal.utils.analytics import track
def command(self, *args: typing.Any, **kwargs: typing.Any):
def decorator(f):
@functools.wraps(f)
@click.pass_context
def wrapped(ctx: click.Context, *args, **kwargs):
from bentoml._internal.utils.analytics import track
do_not_track = os.environ.get(DO_NOT_TRACK, str(False)).lower() == 'true'
do_not_track = os.environ.get(DO_NOT_TRACK, str(False)).lower() == 'true'
# so we know that the root program is openllm
command_name = ctx.info_name
if ctx.parent.parent is not None:
# openllm model list
command_group = ctx.parent.info_name
elif ctx.parent.info_name == ctx.find_root().info_name:
# openllm run
command_group = 'openllm'
# so we know that the root program is openllm
command_name = ctx.info_name
if ctx.parent.parent is not None:
# openllm model list
command_group = ctx.parent.info_name
elif ctx.parent.info_name == ctx.find_root().info_name:
# openllm run
command_group = 'openllm'
if do_not_track:
return f(*args, **kwargs)
start_time = time.time_ns()
try:
return_value = f(*args, **kwargs)
duration_in_ns = time.time_ns() - start_time
track(
OpenllmCliEvent(
cmd_group=command_group, cmd_name=command_name, duration_in_ms=duration_in_ns / 1e6
)
)
return return_value
except BaseException as e:
duration_in_ns = time.time_ns() - start_time
track(
OpenllmCliEvent(
cmd_group=command_group,
cmd_name=command_name,
duration_in_ms=duration_in_ns / 1e6,
error_type=type(e).__name__,
return_code=(2 if isinstance(e, KeyboardInterrupt) else 1),
)
)
raise
if do_not_track:
return f(*args, **kwargs)
start_time = time.time_ns()
try:
return_value = f(*args, **kwargs)
duration_in_ns = time.time_ns() - start_time
track(
OpenllmCliEvent(
cmd_group=command_group, cmd_name=command_name, duration_in_ms=duration_in_ns / 1e6
)
)
return return_value
except BaseException as e:
duration_in_ns = time.time_ns() - start_time
track(
OpenllmCliEvent(
cmd_group=command_group,
cmd_name=command_name,
duration_in_ms=duration_in_ns / 1e6,
error_type=type(e).__name__,
return_code=(2 if isinstance(e, KeyboardInterrupt) else 1),
)
)
raise
return typer.Typer.command(self, *args, **kwargs)(wrapped)
return typer.Typer.command(self, *args, **kwargs)(wrapped)
return decorator
return decorator

View File

@@ -12,72 +12,72 @@ HUGGINGFACE_CACHE = pathlib.Path.home() / '.cache' / 'huggingface' / 'hub'
def _du(path: pathlib.Path) -> int:
seen_paths = set()
used_space = 0
seen_paths = set()
used_space = 0
for f in path.rglob('*'):
if os.name == 'nt': # Windows system
# On Windows, directly add file sizes without considering hard links
used_space += f.stat().st_size
else:
# On non-Windows systems, use inodes to avoid double counting
stat = f.stat()
if stat.st_ino not in seen_paths:
seen_paths.add(stat.st_ino)
used_space += stat.st_size
return used_space
for f in path.rglob('*'):
if os.name == 'nt': # Windows system
# On Windows, directly add file sizes without considering hard links
used_space += f.stat().st_size
else:
# On non-Windows systems, use inodes to avoid double counting
stat = f.stat()
if stat.st_ino not in seen_paths:
seen_paths.add(stat.st_ino)
used_space += stat.st_size
return used_space
@app.command(help='Clean up all the cached models from huggingface')
def model_cache(verbose: bool = False) -> None:
if verbose:
VERBOSE_LEVEL.set(20)
used_space = _du(HUGGINGFACE_CACHE)
sure = questionary.confirm(
f'This will remove all models cached by Huggingface (~{used_space / 1024 / 1024:.2f}MB), are you sure?'
).ask()
if not sure:
return
shutil.rmtree(HUGGINGFACE_CACHE, ignore_errors=True)
output('All models cached by Huggingface have been removed', style='green')
if verbose:
VERBOSE_LEVEL.set(20)
used_space = _du(HUGGINGFACE_CACHE)
sure = questionary.confirm(
f'This will remove all models cached by Huggingface (~{used_space / 1024 / 1024:.2f}MB), are you sure?'
).ask()
if not sure:
return
shutil.rmtree(HUGGINGFACE_CACHE, ignore_errors=True)
output('All models cached by Huggingface have been removed', style='green')
@app.command(help='Clean up all the virtual environments created by OpenLLM')
def venvs(verbose: bool = False) -> None:
if verbose:
VERBOSE_LEVEL.set(20)
if verbose:
VERBOSE_LEVEL.set(20)
used_space = _du(VENV_DIR)
sure = questionary.confirm(
f'This will remove all virtual environments created by OpenLLM (~{used_space / 1024 / 1024:.2f}MB), are you sure?'
).ask()
if not sure:
return
shutil.rmtree(VENV_DIR, ignore_errors=True)
output('All virtual environments have been removed', style='green')
used_space = _du(VENV_DIR)
sure = questionary.confirm(
f'This will remove all virtual environments created by OpenLLM (~{used_space / 1024 / 1024:.2f}MB), are you sure?'
).ask()
if not sure:
return
shutil.rmtree(VENV_DIR, ignore_errors=True)
output('All virtual environments have been removed', style='green')
@app.command(help='Clean up all the repositories cloned by OpenLLM')
def repos(verbose: bool = False) -> None:
if verbose:
VERBOSE_LEVEL.set(20)
shutil.rmtree(REPO_DIR, ignore_errors=True)
output('All repositories have been removed', style='green')
if verbose:
VERBOSE_LEVEL.set(20)
shutil.rmtree(REPO_DIR, ignore_errors=True)
output('All repositories have been removed', style='green')
@app.command(help='Reset configurations to default')
def configs(verbose: bool = False) -> None:
if verbose:
VERBOSE_LEVEL.set(20)
shutil.rmtree(CONFIG_FILE, ignore_errors=True)
output('All configurations have been reset', style='green')
if verbose:
VERBOSE_LEVEL.set(20)
shutil.rmtree(CONFIG_FILE, ignore_errors=True)
output('All configurations have been reset', style='green')
@app.command(name='all', help='Clean up all above and bring OpenLLM to a fresh start')
def all_cache(verbose: bool = False) -> None:
if verbose:
VERBOSE_LEVEL.set(20)
repos()
venvs()
model_cache()
configs()
if verbose:
VERBOSE_LEVEL.set(20)
repos()
venvs()
model_cache()
configs()

View File

@@ -11,158 +11,171 @@ app = OpenLLMTyper()
def resolve_cloud_config() -> pathlib.Path:
env = os.environ.get('BENTOML_HOME')
if env is not None:
return pathlib.Path(env) / '.yatai.yaml'
return pathlib.Path.home() / 'bentoml' / '.yatai.yaml'
env = os.environ.get('BENTOML_HOME')
if env is not None:
return pathlib.Path(env) / '.yatai.yaml'
return pathlib.Path.home() / 'bentoml' / '.yatai.yaml'
def _get_deploy_cmd(
bento: BentoInfo, target: typing.Optional[DeploymentTarget] = None, cli_envs: typing.Optional[list[str]] = None
bento: BentoInfo,
target: typing.Optional[DeploymentTarget] = None,
cli_envs: typing.Optional[list[str]] = None,
) -> tuple[list[str], EnvVars]:
cmd = ['bentoml', 'deploy', bento.bentoml_tag]
env = EnvVars({'BENTOML_HOME': f'{bento.repo.path}/bentoml'})
# Process CLI env vars first to determine overrides
explicit_envs: dict[str, str] = {}
if cli_envs:
for env_var in cli_envs:
if '=' in env_var:
name, value = env_var.split('=', 1)
explicit_envs[name] = value
else:
name = env_var
value = typing.cast(str, os.environ.get(name))
if value is None:
output(
f"Environment variable '{name}' specified via --env but not found in the current environment.",
style='red',
)
raise typer.Exit(1)
explicit_envs[name] = value
# Process envs defined in bento.yaml, skipping those overridden by CLI
required_envs = bento.bento_yaml.get('envs', [])
required_env_names = [env['name'] for env in required_envs if 'name' in env and env['name'] not in explicit_envs]
if required_env_names:
output(
f'This model requires the following environment variables to run (unless overridden via --env): {required_env_names!r}',
style='yellow',
)
for env_info in required_envs:
name = typing.cast(str, env_info.get('name'))
if not name or name in explicit_envs:
continue
if os.environ.get(name):
default = os.environ[name]
elif 'value' in env_info:
default = env_info['value']
else:
default = ''
if INTERACTIVE.get():
import questionary
value = questionary.text(f'{name}: (from bento.yaml)', default=default).ask()
else:
if default == '':
output(f'Environment variable {name} (from bento.yaml) is required but not provided', style='red')
raise typer.Exit(1)
else:
value = default
cmd = ['bentoml', 'deploy', bento.bentoml_tag]
env = EnvVars({'BENTOML_HOME': f'{bento.repo.path}/bentoml'})
# Process CLI env vars first to determine overrides
explicit_envs: dict[str, str] = {}
if cli_envs:
for env_var in cli_envs:
if '=' in env_var:
name, value = env_var.split('=', 1)
explicit_envs[name] = value
else:
name = env_var
value = typing.cast(str, os.environ.get(name))
if value is None:
raise typer.Exit(1)
cmd += ['--env', f'{name}={value}']
output(
f"Environment variable '{name}' specified via --env but not found in the current environment.",
style='red',
)
raise typer.Exit(1)
explicit_envs[name] = value
# Add explicitly provided env vars from CLI
for name, value in explicit_envs.items():
cmd += ['--env', f'{name}={value}']
# Process envs defined in bento.yaml, skipping those overridden by CLI
required_envs = bento.bento_yaml.get('envs', [])
required_env_names = [
env['name']
for env in required_envs
if 'name' in env and env['name'] not in explicit_envs and not env.get('value')
]
if required_env_names:
output(
f'This model requires the following environment variables to run (unless overridden via --env): {required_env_names!r}',
style='green',
)
if target:
cmd += ['--instance-type', target.name]
for env_info in required_envs:
name = typing.cast(str, env_info.get('name'))
if not name or name in explicit_envs or env_info.get('value', None) is not None:
continue
base_config = resolve_cloud_config()
if not base_config.exists():
raise Exception('Cannot find cloud config.')
# remove before copy
if (bento.repo.path / 'bentoml' / '.yatai.yaml').exists():
(bento.repo.path / 'bentoml' / '.yatai.yaml').unlink()
shutil.copy(base_config, bento.repo.path / 'bentoml' / '.yatai.yaml')
if os.environ.get(name):
default = os.environ[name]
elif 'value' in env_info:
default = env_info['value']
else:
default = ''
return cmd, env
if INTERACTIVE.get():
import questionary
value = questionary.text(f'{name}: (from bento.yaml)', default=default).ask()
else:
if default == '':
output(
f'Environment variable {name} (from bento.yaml) is required but not provided', style='red'
)
raise typer.Exit(1)
else:
value = default
if value is None:
raise typer.Exit(1)
cmd += ['--env', f'{name}={value}']
# Add explicitly provided env vars from CLI
for name, value in explicit_envs.items():
cmd += ['--env', f'{name}={value}']
if target:
cmd += ['--instance-type', target.name]
base_config = resolve_cloud_config()
if not base_config.exists():
raise Exception('Cannot find cloud config.')
# remove before copy
if (bento.repo.path / 'bentoml' / '.yatai.yaml').exists():
(bento.repo.path / 'bentoml' / '.yatai.yaml').unlink()
shutil.copy(base_config, bento.repo.path / 'bentoml' / '.yatai.yaml')
return cmd, env
def ensure_cloud_context() -> None:
import questionary
import questionary
cmd = ['bentoml', 'cloud', 'current-context']
try:
result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
context = json.loads(result)
output(f' bentoml already logged in: {context["endpoint"]}', style='green', level=20)
except subprocess.CalledProcessError:
output(' bentoml not logged in', style='red')
if not INTERACTIVE.get():
output('\n get bentoml logged in by:')
output(' $ bentoml cloud login', style='orange')
output('')
output(
""" * you may need to visit https://cloud.bentoml.com to get an account. you can also bring your own bentoml cluster (BYOC) to your team from https://bentoml.com/contact""",
style='yellow',
)
raise typer.Exit(1)
else:
action = questionary.select(
'Choose an action:', choices=['I have a BentoCloud account', 'get an account in two minutes']
).ask()
if action is None:
raise typer.Exit(1)
elif action == 'get an account in two minutes':
output('Please visit https://cloud.bentoml.com to get your token', style='yellow')
endpoint = questionary.text('Enter the endpoint: (similar to https://my-org.cloud.bentoml.com)').ask()
if endpoint is None:
raise typer.Exit(1)
token = questionary.text('Enter your token: (similar to cniluaxxxxxxxx)').ask()
if token is None:
raise typer.Exit(1)
cmd = ['bentoml', 'cloud', 'login', '--api-token', token, '--endpoint', endpoint]
try:
result = subprocess.check_output(cmd)
output(' Logged in successfully', style='green')
except subprocess.CalledProcessError:
output(' Failed to login', style='red')
raise typer.Exit(1)
cmd = ['bentoml', 'cloud', 'current-context']
try:
result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
context = json.loads(result)
output(f' bentoml already logged in: {context["endpoint"]}', style='green', level=20)
except subprocess.CalledProcessError:
output(' bentoml not logged in', style='red')
if not INTERACTIVE.get():
output('\n get bentoml logged in by:')
output(' $ bentoml cloud login', style='orange')
output('')
output(
""" * you may need to visit https://cloud.bentoml.com to get an account. you can also bring your own bentoml cluster (BYOC) to your team from https://bentoml.com/contact""",
style='yellow',
)
raise typer.Exit(1)
else:
action = questionary.select(
'Choose an action:',
choices=['I have a BentoCloud account', 'get an account in two minutes'],
).ask()
if action is None:
raise typer.Exit(1)
elif action == 'get an account in two minutes':
output('Please visit https://cloud.bentoml.com to get your token', style='yellow')
endpoint = questionary.text(
'Enter the endpoint: (similar to https://my-org.cloud.bentoml.com)'
).ask()
if endpoint is None:
raise typer.Exit(1)
token = questionary.text('Enter your token: (similar to cniluaxxxxxxxx)').ask()
if token is None:
raise typer.Exit(1)
cmd = ['bentoml', 'cloud', 'login', '--api-token', token, '--endpoint', endpoint]
try:
result = subprocess.check_output(cmd)
output(' Logged in successfully', style='green')
except subprocess.CalledProcessError:
output(' Failed to login', style='red')
raise typer.Exit(1)
def get_cloud_machine_spec() -> list[DeploymentTarget]:
ensure_cloud_context()
cmd = ['bentoml', 'deployment', 'list-instance-types', '-o', 'json']
try:
result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
instance_types = json.loads(result)
return [
DeploymentTarget(
source='cloud',
name=it['name'],
price=it['price'],
platform='linux',
accelerators=(
[ACCELERATOR_SPECS[it['gpu_type']] for _ in range(int(it['gpu']))]
if it.get('gpu') and it['gpu_type'] in ACCELERATOR_SPECS
else []
),
)
for it in instance_types
]
except (subprocess.CalledProcessError, json.JSONDecodeError):
output('Failed to get cloud instance types', style='red')
return []
ensure_cloud_context()
cmd = ['bentoml', 'deployment', 'list-instance-types', '-o', 'json']
try:
result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
instance_types = json.loads(result)
return [
DeploymentTarget(
source='cloud',
name=it['name'],
price=it['price'],
platform='linux',
accelerators=(
[ACCELERATOR_SPECS[it['gpu_type']] for _ in range(int(it['gpu']))]
if it.get('gpu') and it['gpu_type'] in ACCELERATOR_SPECS
else []
),
)
for it in instance_types
]
except (subprocess.CalledProcessError, json.JSONDecodeError):
output('Failed to get cloud instance types', style='red')
return []
def deploy(bento: BentoInfo, target: DeploymentTarget, cli_envs: typing.Optional[list[str]] = None) -> None:
ensure_cloud_context()
cmd, env = _get_deploy_cmd(bento, target, cli_envs=cli_envs)
run_command(cmd, env=env, cwd=None)
def deploy(
bento: BentoInfo, target: DeploymentTarget, cli_envs: typing.Optional[list[str]] = None
) -> None:
ensure_cloud_context()
cmd, env = _get_deploy_cmd(bento, target, cli_envs=cli_envs)
run_command(cmd, env=env, cwd=None)

View File

@@ -31,401 +31,413 @@ T = typing.TypeVar('T')
class ContextVar(typing.Generic[T]):
def __init__(self, default: T):
self._stack: list[T] = []
self._default = default
def __init__(self, default: T):
self._stack: list[T] = []
self._default = default
def get(self) -> T:
if self._stack:
return self._stack[-1]
return self._default
def get(self) -> T:
if self._stack:
return self._stack[-1]
return self._default
def set(self, value: T) -> None:
self._stack.append(value)
def set(self, value: T) -> None:
self._stack.append(value)
@contextmanager
def patch(self, value: T) -> typing.Iterator[None]:
self._stack.append(value)
try:
yield
finally:
self._stack.pop()
@contextmanager
def patch(self, value: T) -> typing.Iterator[None]:
self._stack.append(value)
try:
yield
finally:
self._stack.pop()
VERBOSE_LEVEL = ContextVar(10)
VERBOSE_LEVEL = ContextVar(0)
INTERACTIVE = ContextVar(False)
def output(content: typing.Any, level: int = 0, style: str | None = None, end: str | None = None) -> None:
if level > VERBOSE_LEVEL.get():
return
def output(
content: typing.Any, level: int = 0, style: str | None = None, end: str | None = None
) -> None:
if level > VERBOSE_LEVEL.get():
return
if not isinstance(content, str):
out = io.StringIO()
pyaml.pprint(content, dst=out, sort_dicts=False, sort_keys=False)
questionary.print(out.getvalue(), style=style, end='' if end is None else end)
out.close()
else:
questionary.print(content, style=style, end='\n' if end is None else end)
if not isinstance(content, str):
out = io.StringIO()
pyaml.pprint(content, dst=out, sort_dicts=False, sort_keys=False)
questionary.print(out.getvalue(), style=style, end='' if end is None else end)
out.close()
else:
questionary.print(content, style=style, end='\n' if end is None else end)
class Config(pydantic.BaseModel):
repos: dict[str, str] = pydantic.Field(
default_factory=lambda: {'default': 'https://github.com/bentoml/openllm-models@main'}
)
default_repo: str = 'default'
repos: dict[str, str] = pydantic.Field(
default_factory=lambda: {'default': 'https://github.com/bentoml/openllm-models@main'}
)
default_repo: str = 'default'
def tolist(self) -> dict[str, typing.Any]:
return dict(repos=self.repos, default_repo=self.default_repo)
def tolist(self) -> dict[str, typing.Any]:
return dict(repos=self.repos, default_repo=self.default_repo)
def load_config() -> Config:
if CONFIG_FILE.exists():
try:
with open(CONFIG_FILE) as f:
return Config(**json.load(f))
except json.JSONDecodeError:
return Config()
return Config()
if CONFIG_FILE.exists():
try:
with open(CONFIG_FILE) as f:
return Config(**json.load(f))
except json.JSONDecodeError:
return Config()
return Config()
def save_config(config: Config) -> None:
with open(CONFIG_FILE, 'w') as f:
json.dump(config.tolist(), f, indent=2)
with open(CONFIG_FILE, 'w') as f:
json.dump(config.tolist(), f, indent=2)
class BentoMetadata(typing.TypedDict):
name: str
version: str
labels: dict[str, str]
envs: list[dict[str, str]]
services: list[dict[str, typing.Any]]
schema: dict[str, typing.Any]
name: str
version: str
labels: dict[str, str]
envs: list[dict[str, str]]
services: list[dict[str, typing.Any]]
schema: dict[str, typing.Any]
class EnvVars(UserDict[str, str]):
"""
A dictionary-like object that sorted by key and only keeps the environment variables that have a value.
"""
"""
A dictionary-like object that sorted by key and only keeps the environment variables that have a value.
"""
@classmethod
def __get_pydantic_core_schema__(
cls: type[EnvVars], source_type: type[typing.Any], handler: typing.Callable[..., typing.Any]
) -> core_schema.DictSchema:
return core_schema.dict_schema(core_schema.str_schema(), core_schema.str_schema())
@classmethod
def __get_pydantic_core_schema__(
cls: type[EnvVars], source_type: type[typing.Any], handler: typing.Callable[..., typing.Any]
) -> core_schema.DictSchema:
return core_schema.dict_schema(core_schema.str_schema(), core_schema.str_schema())
def __init__(self, data: typing.Mapping[str, str] | None = None):
super().__init__(data or {})
self.data = {k: v for k, v in sorted(self.data.items()) if v}
def __init__(self, data: typing.Mapping[str, str] | None = None):
super().__init__(data or {})
self.data = {k: v for k, v in sorted(self.data.items()) if v}
def __hash__(self) -> int:
return hash(tuple(sorted(self.data.items())))
def __hash__(self) -> int:
return hash(tuple(sorted(self.data.items())))
class RepoInfo(pydantic.BaseModel):
name: str
path: pathlib.Path
url: str
server: str
owner: str
repo: str
branch: str
name: str
path: pathlib.Path
url: str
server: str
owner: str
repo: str
branch: str
def tolist(self) -> str | dict[str, typing.Any] | None:
if VERBOSE_LEVEL.get() <= 0:
return f'{self.name} ({self.url}@{self.branch})'
if VERBOSE_LEVEL.get() <= 10:
return dict(name=self.name, url=f'{self.url}@{self.branch}', path=str(self.path))
if VERBOSE_LEVEL.get() <= 20:
return dict(
name=self.name,
url=f'{self.url}@{self.branch}',
path=str(self.path),
server=self.server,
owner=self.owner,
repo=self.repo,
)
return None
def tolist(self) -> str | dict[str, typing.Any] | None:
if VERBOSE_LEVEL.get() <= 0:
return f'{self.name} ({self.url}@{self.branch})'
if VERBOSE_LEVEL.get() <= 10:
return dict(name=self.name, url=f'{self.url}@{self.branch}', path=str(self.path))
if VERBOSE_LEVEL.get() <= 20:
return dict(
name=self.name,
url=f'{self.url}@{self.branch}',
path=str(self.path),
server=self.server,
owner=self.owner,
repo=self.repo,
)
return None
class BentoInfo(pydantic.BaseModel):
repo: RepoInfo
path: pathlib.Path
alias: str = ''
repo: RepoInfo
path: pathlib.Path
alias: str = ''
def __str__(self) -> str:
if self.repo.name == 'default':
return f'{self.tag}'
else:
return f'{self.repo.name}/{self.tag}'
def __str__(self) -> str:
if self.repo.name == 'default':
return f'{self.tag}'
else:
return f'{self.repo.name}/{self.tag}'
@override
def __hash__(self) -> int:
return md5(str(self.path))
@override
def __hash__(self) -> int:
return md5(str(self.path))
@property
def tag(self) -> str:
if self.alias:
return f'{self.path.parent.name}:{self.alias}'
return f'{self.path.parent.name}:{self.path.name}'
@property
def tag(self) -> str:
if self.alias:
return f'{self.path.parent.name}:{self.alias}'
return f'{self.path.parent.name}:{self.path.name}'
@property
def bentoml_tag(self) -> str:
return f'{self.path.parent.name}:{self.path.name}'
@property
def bentoml_tag(self) -> str:
return f'{self.path.parent.name}:{self.path.name}'
@property
def name(self) -> str:
return self.path.parent.name
@property
def name(self) -> str:
return self.path.parent.name
@property
def version(self) -> str:
return self.path.name
@property
def version(self) -> str:
return self.path.name
@property
def labels(self) -> dict[str, str]:
return self.bento_yaml['labels']
@property
def labels(self) -> dict[str, str]:
return self.bento_yaml['labels']
@property
def envs(self) -> list[dict[str, str]]:
return self.bento_yaml['envs']
@property
def envs(self) -> list[dict[str, str]]:
return self.bento_yaml['envs']
@functools.cached_property
def bento_yaml(self) -> BentoMetadata:
bento: BentoMetadata = yaml.safe_load((self.path / 'bento.yaml').read_text())
return bento
@functools.cached_property
def bento_yaml(self) -> BentoMetadata:
bento: BentoMetadata = yaml.safe_load((self.path / 'bento.yaml').read_text())
return bento
@functools.cached_property
def platforms(self) -> list[str]:
return self.bento_yaml['labels'].get('platforms', 'linux').split(',')
@functools.cached_property
def platforms(self) -> list[str]:
return self.bento_yaml['labels'].get('platforms', 'linux').split(',')
@functools.cached_property
def pretty_yaml(self) -> BentoMetadata | dict[str, typing.Any]:
def _pretty_routes(routes: list[dict[str, typing.Any]]) -> dict[str, typing.Any]:
return {
route['route']: {
'input': {k: v['type'] for k, v in route['input']['properties'].items()},
'output': route['output']['type'],
}
for route in routes
}
@functools.cached_property
def pretty_yaml(self) -> BentoMetadata | dict[str, typing.Any]:
def _pretty_routes(routes: list[dict[str, typing.Any]]) -> dict[str, typing.Any]:
return {
route['route']: {
'input': {k: v['type'] for k, v in route['input']['properties'].items()},
'output': route['output']['type'],
}
for route in routes
}
if len(self.bento_yaml['services']) == 1:
pretty_yaml: dict[str, typing.Any] = {
'apis': _pretty_routes(self.bento_yaml['schema']['routes']),
'resources': self.bento_yaml['services'][0]['config']['resources'],
'envs': self.bento_yaml['envs'],
'platforms': self.platforms,
}
return pretty_yaml
return self.bento_yaml
if len(self.bento_yaml['services']) == 1:
pretty_yaml: dict[str, typing.Any] = {
'apis': _pretty_routes(self.bento_yaml['schema']['routes']),
'resources': self.bento_yaml['services'][0]['config']['resources'],
'envs': self.bento_yaml['envs'],
'platforms': self.platforms,
}
return pretty_yaml
return self.bento_yaml
@functools.cached_property
def pretty_gpu(self) -> str:
from openllm.accelerator_spec import ACCELERATOR_SPECS
@functools.cached_property
def pretty_gpu(self) -> str:
from openllm.accelerator_spec import ACCELERATOR_SPECS
try:
resources = self.bento_yaml['services'][0]['config']['resources']
if resources['gpu'] > 1:
acc = ACCELERATOR_SPECS[resources['gpu_type']]
return f'{acc.memory_size:.0f}Gx{resources["gpu"]}'
elif resources['gpu'] > 0:
acc = ACCELERATOR_SPECS[resources['gpu_type']]
return f'{acc.memory_size:.0f}G'
except KeyError:
pass
return ''
try:
resources = self.bento_yaml['services'][0]['config']['resources']
if resources['gpu'] > 1:
acc = ACCELERATOR_SPECS[resources['gpu_type']]
return f'{acc.memory_size:.0f}Gx{resources["gpu"]}'
elif resources['gpu'] > 0:
acc = ACCELERATOR_SPECS[resources['gpu_type']]
return f'{acc.memory_size:.0f}G'
except KeyError:
pass
return ''
def tolist(self) -> str | dict[str, typing.Any] | None:
verbose = VERBOSE_LEVEL.get()
if verbose <= 0:
return str(self)
if verbose <= 10:
return dict(tag=self.tag, repo=self.repo.tolist(), path=str(self.path), model_card=self.pretty_yaml)
if verbose <= 20:
return dict(tag=self.tag, repo=self.repo.tolist(), path=str(self.path), bento_yaml=self.bento_yaml)
return None
def tolist(self) -> str | dict[str, typing.Any] | None:
verbose = VERBOSE_LEVEL.get()
if verbose <= 0:
return str(self)
if verbose <= 10:
return dict(
tag=self.tag, repo=self.repo.tolist(), path=str(self.path), model_card=self.pretty_yaml
)
if verbose <= 20:
return dict(
tag=self.tag, repo=self.repo.tolist(), path=str(self.path), bento_yaml=self.bento_yaml
)
return None
class VenvSpec(pydantic.BaseModel):
python_version: str
requirements_txt: str
envs: EnvVars
name_prefix: str = ''
python_version: str
requirements_txt: str
envs: EnvVars
name_prefix: str = ''
@functools.cached_property
def normalized_requirements_txt(self) -> str:
parameter_lines: list[str] = []
dependency_lines: list[str] = []
comment_lines: list[str] = []
@functools.cached_property
def normalized_requirements_txt(self) -> str:
parameter_lines: list[str] = []
dependency_lines: list[str] = []
comment_lines: list[str] = []
for line in self.requirements_txt.splitlines():
if not line.strip():
continue
elif line.strip().startswith('#'):
comment_lines.append(line.strip())
elif line.strip().startswith('-'):
parameter_lines.append(line.strip())
else:
dependency_lines.append(line.strip())
for line in self.requirements_txt.splitlines():
if not line.strip():
continue
elif line.strip().startswith('#'):
comment_lines.append(line.strip())
elif line.strip().startswith('-'):
parameter_lines.append(line.strip())
else:
dependency_lines.append(line.strip())
parameter_lines.sort()
dependency_lines.sort()
return '\n'.join(parameter_lines + dependency_lines).strip()
parameter_lines.sort()
dependency_lines.sort()
return '\n'.join(parameter_lines + dependency_lines).strip()
@functools.cached_property
def normalized_envs(self) -> str:
return '\n'.join(f'{k}={v}' for k, v in sorted(self.envs.items(), key=lambda x: x[0]) if not v)
@functools.cached_property
def normalized_envs(self) -> str:
return '\n'.join(f'{k}={v}' for k, v in sorted(self.envs.items(), key=lambda x: x[0]) if not v)
@override
def __hash__(self) -> int:
return md5(self.normalized_requirements_txt, str(hash(self.normalized_envs)))
@override
def __hash__(self) -> int:
return md5(self.normalized_requirements_txt, str(hash(self.normalized_envs)))
class Accelerator(pydantic.BaseModel):
model: str
memory_size: float
model: str
memory_size: float
def __gt__(self, other: Accelerator) -> bool:
return self.memory_size > other.memory_size
def __gt__(self, other: Accelerator) -> bool:
return self.memory_size > other.memory_size
def __eq__(self, other: object) -> bool:
if not isinstance(other, Accelerator):
return NotImplemented
return self.memory_size == other.memory_size
def __eq__(self, other: object) -> bool:
if not isinstance(other, Accelerator):
return NotImplemented
return self.memory_size == other.memory_size
def __repr__(self) -> str:
return f'{self.model}({self.memory_size}GB)'
def __repr__(self) -> str:
return f'{self.model}({self.memory_size}GB)'
class DeploymentTarget(pydantic.BaseModel):
accelerators: list[Accelerator]
source: str = 'local'
name: str = 'local'
price: str = ''
platform: str = 'linux'
accelerators: list[Accelerator]
source: str = 'local'
name: str = 'local'
price: str = ''
platform: str = 'linux'
@override
def __hash__(self) -> int:
return hash(self.source)
@override
def __hash__(self) -> int:
return hash(self.source)
@property
def accelerators_repr(self) -> str:
accs = {a.model for a in self.accelerators}
if len(accs) == 0:
return 'null'
if len(accs) == 1:
a = self.accelerators[0]
return f'{a.model} x{len(self.accelerators)}'
return ', '.join((f'{a.model}' for a in self.accelerators))
@property
def accelerators_repr(self) -> str:
accs = {a.model for a in self.accelerators}
if len(accs) == 0:
return 'null'
if len(accs) == 1:
a = self.accelerators[0]
return f'{a.model} x{len(self.accelerators)}'
return ', '.join((f'{a.model}' for a in self.accelerators))
def run_command(
cmd: list[str],
cwd: str | None = None,
env: EnvVars | None = None,
copy_env: bool = True,
venv: pathlib.Path | None = None,
silent: bool = False,
cmd: list[str],
cwd: str | None = None,
env: EnvVars | None = None,
copy_env: bool = True,
venv: pathlib.Path | None = None,
silent: bool = False,
) -> subprocess.CompletedProcess[typing.Any]:
env = env or EnvVars({})
cmd = [str(c) for c in cmd]
bin_dir = 'Scripts' if os.name == 'nt' else 'bin'
if not silent:
output('\n')
if cwd:
output(f'$ cd {cwd}', style='orange')
if env:
for k, v in env.items():
output(f'$ export {k}={shlex.quote(v)}', style='orange')
if venv:
output(f'$ source {venv / "bin" / "activate"}', style='orange')
output(f'$ {" ".join(cmd)}', style='orange')
env = env or EnvVars({})
cmd = [str(c) for c in cmd]
bin_dir = 'Scripts' if os.name == 'nt' else 'bin'
if not silent:
output('\n')
if cwd:
output(f'$ cd {cwd}', style='orange')
if env:
for k, v in env.items():
output(f'$ export {k}={shlex.quote(v)}', style='orange')
if venv:
py = venv / bin_dir / f'python{sysconfig.get_config_var("EXE")}'
output(f'$ source {venv / "bin" / "activate"}', style='orange')
output(f'$ {" ".join(cmd)}', style='orange')
if venv:
py = venv / bin_dir / f'python{sysconfig.get_config_var("EXE")}'
else:
py = pathlib.Path(sys.executable)
if copy_env:
env = EnvVars({**os.environ, **env})
if cmd and cmd[0] == 'bentoml':
cmd = [py.__fspath__(), '-m', 'bentoml', *cmd[1:]]
if cmd and cmd[0] == 'python':
cmd = [py.__fspath__(), *cmd[1:]]
try:
if silent:
return subprocess.run(
cmd, cwd=cwd, env=env, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True
)
else:
py = pathlib.Path(sys.executable)
if copy_env:
env = EnvVars({**os.environ, **env})
if cmd and cmd[0] == 'bentoml':
cmd = [py.__fspath__(), '-m', 'bentoml'] + cmd[1:]
if cmd and cmd[0] == 'python':
cmd = [py.__fspath__()] + cmd[1:]
try:
if silent:
return subprocess.run(
cmd, cwd=cwd, env=env, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True
)
else:
return subprocess.run(cmd, cwd=cwd, env=env, check=True)
except Exception as e:
if VERBOSE_LEVEL.get() >= 20:
output(str(e), style='red')
raise typer.Exit(1)
return subprocess.run(cmd, cwd=cwd, env=env, check=True)
except Exception as e:
if VERBOSE_LEVEL.get() >= 20:
output(str(e), style='red')
raise typer.Exit(1)
async def stream_command_output(stream: asyncio.streams.StreamReader | None, style: str = 'gray') -> None:
if stream:
async for line in stream:
output(line.decode(), style=style, end='')
async def stream_command_output(
stream: asyncio.streams.StreamReader | None, style: str = 'gray'
) -> None:
if stream:
async for line in stream:
output(line.decode(), style=style, end='')
@asynccontextmanager
async def async_run_command(
cmd: list[str],
cwd: str | None = None,
env: EnvVars | None = None,
copy_env: bool = True,
venv: pathlib.Path | None = None,
silent: bool = True,
cmd: list[str],
cwd: str | None = None,
env: EnvVars | None = None,
copy_env: bool = True,
venv: pathlib.Path | None = None,
silent: bool = True,
) -> typing.AsyncGenerator[asyncio.subprocess.Process]:
env = env or EnvVars({})
cmd = [str(c) for c in cmd]
if not silent:
output('\n')
if cwd:
output(f'$ cd {cwd}', style='orange')
if env:
for k, v in env.items():
output(f'$ export {k}={shlex.quote(v)}', style='orange')
if venv:
output(f'$ source {venv / "bin" / "activate"}', style='orange')
output(f'$ {" ".join(cmd)}', style='orange')
env = env or EnvVars({})
cmd = [str(c) for c in cmd]
if not silent:
output('\n')
if cwd:
output(f'$ cd {cwd}', style='orange')
if env:
for k, v in env.items():
output(f'$ export {k}={shlex.quote(v)}', style='orange')
if venv:
py = venv / 'bin' / 'python'
else:
py = pathlib.Path(sys.executable)
output(f'$ source {venv / "bin" / "activate"}', style='orange')
output(f'$ {" ".join(cmd)}', style='orange')
if copy_env:
env = EnvVars({**os.environ, **env})
if venv:
py = venv / 'bin' / 'python'
else:
py = pathlib.Path(sys.executable)
if cmd and cmd[0] == 'bentoml':
cmd = [py.__fspath__(), '-m', 'bentoml'] + cmd[1:]
if cmd and cmd[0] == 'python':
cmd = [py.__fspath__()] + cmd[1:]
if copy_env:
env = EnvVars({**os.environ, **env})
proc = None
try:
proc = await asyncio.create_subprocess_shell(
' '.join(map(str, cmd)), stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, cwd=cwd, env=env
)
yield proc
except subprocess.CalledProcessError:
output('Command failed', style='red')
raise typer.Exit(1)
finally:
if proc:
proc.send_signal(signal.SIGINT)
await proc.wait()
if cmd and cmd[0] == 'bentoml':
cmd = [py.__fspath__(), '-m', 'bentoml', *cmd[1:]]
if cmd and cmd[0] == 'python':
cmd = [py.__fspath__(), *cmd[1:]]
proc = None
try:
proc = await asyncio.create_subprocess_shell(
' '.join(map(str, cmd)),
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=cwd,
env=env,
)
yield proc
except subprocess.CalledProcessError:
output('Command failed', style='red')
raise typer.Exit(1)
finally:
if proc:
proc.send_signal(signal.SIGINT)
await proc.wait()
def md5(*strings: str) -> int:
m = hashlib.md5()
for s in strings:
m.update(s.encode())
return int(m.hexdigest(), 16)
m = hashlib.md5()
for s in strings:
m.update(s.encode())
return int(m.hexdigest(), 16)

View File

@@ -4,103 +4,114 @@ import asyncio, time, typing
import httpx, openai
from openai.types.chat import ChatCompletionAssistantMessageParam, ChatCompletionUserMessageParam
from openllm.common import BentoInfo, EnvVars, async_run_command, output, run_command, stream_command_output
from openllm.common import (
BentoInfo,
EnvVars,
async_run_command,
output,
run_command,
stream_command_output,
)
from openllm.venv import ensure_venv
if typing.TYPE_CHECKING:
from openai.types.chat import ChatCompletionMessageParam
from openai.types.chat import ChatCompletionMessageParam
def prep_env_vars(bento: BentoInfo) -> None:
import os
import os
env_vars = bento.envs
for env_var in env_vars:
if not env_var.get('value'):
continue
key = env_var['name']
value = env_var['value']
os.environ[key] = value
env_vars = bento.envs
for env_var in env_vars:
if not env_var.get('value'):
continue
key = env_var['name']
value = env_var['value']
os.environ[key] = value
def _get_serve_cmd(bento: BentoInfo, port: int = 3000) -> tuple[list[str], EnvVars]:
cmd = ['bentoml', 'serve', bento.bentoml_tag]
if port != 3000:
cmd += ['--port', str(port)]
return cmd, EnvVars({'BENTOML_HOME': f'{bento.repo.path}/bentoml'})
cmd = ['bentoml', 'serve', bento.bentoml_tag]
if port != 3000:
cmd += ['--port', str(port)]
return cmd, EnvVars({'BENTOML_HOME': f'{bento.repo.path}/bentoml'})
def serve(bento: BentoInfo, port: int = 3000) -> None:
prep_env_vars(bento)
cmd, env = _get_serve_cmd(bento, port=port)
venv = ensure_venv(bento, runtime_envs=env)
output(f'Access the Chat UI at http://localhost:{port}/chat (or with you IP)')
run_command(cmd, env=env, cwd=None, venv=venv)
prep_env_vars(bento)
cmd, env = _get_serve_cmd(bento, port=port)
venv = ensure_venv(bento, runtime_envs=env)
output(f'Access the Chat UI at http://localhost:{port}/chat (or with you IP)')
run_command(cmd, env=env, cwd=None, venv=venv)
async def _run_model(bento: BentoInfo, port: int = 3000, timeout: int = 600) -> None:
cmd, env = _get_serve_cmd(bento, port)
venv = ensure_venv(bento, runtime_envs=env)
async with async_run_command(cmd, env=env, cwd=None, venv=venv, silent=False) as server_proc:
output(f'Model server started {server_proc.pid}')
cmd, env = _get_serve_cmd(bento, port)
venv = ensure_venv(bento, runtime_envs=env)
async with async_run_command(cmd, env=env, cwd=None, venv=venv, silent=False) as server_proc:
output(f'Model server started {server_proc.pid}')
stdout_streamer = None
stderr_streamer = None
start_time = time.time()
stdout_streamer = None
stderr_streamer = None
start_time = time.time()
output('Model loading...', style='green')
for _ in range(timeout):
try:
resp = httpx.get(f'http://localhost:{port}/readyz', timeout=3)
if resp.status_code == 200:
break
except httpx.RequestError:
if time.time() - start_time > 30:
if not stdout_streamer:
stdout_streamer = asyncio.create_task(stream_command_output(server_proc.stdout, style='gray'))
if not stderr_streamer:
stderr_streamer = asyncio.create_task(
stream_command_output(server_proc.stderr, style='#BD2D0F')
)
await asyncio.sleep(1)
else:
output('Model failed to load', style='red')
server_proc.terminate()
return
output('Model loading...', style='green')
for _ in range(timeout):
try:
resp = httpx.get(f'http://localhost:{port}/readyz', timeout=3)
if resp.status_code == 200:
break
except httpx.RequestError:
if time.time() - start_time > 30:
if not stdout_streamer:
stdout_streamer = asyncio.create_task(
stream_command_output(server_proc.stdout, style='gray')
)
if not stderr_streamer:
stderr_streamer = asyncio.create_task(
stream_command_output(server_proc.stderr, style='#BD2D0F')
)
await asyncio.sleep(1)
else:
output('Model failed to load', style='red')
server_proc.terminate()
return
if stdout_streamer:
stdout_streamer.cancel()
if stderr_streamer:
stderr_streamer.cancel()
if stdout_streamer:
stdout_streamer.cancel()
if stderr_streamer:
stderr_streamer.cancel()
output('Model is ready', style='green')
messages: list[ChatCompletionMessageParam] = []
output('Model is ready', style='green')
messages: list[ChatCompletionMessageParam] = []
client = openai.AsyncOpenAI(base_url=f'http://localhost:{port}/v1', api_key='local')
while True:
try:
message = input('user: ')
if message == '':
output('empty message, please enter something', style='yellow')
continue
messages.append(ChatCompletionUserMessageParam(role='user', content=message))
output('assistant: ', end='', style='lightgreen')
assistant_message = ''
stream = await client.chat.completions.create(
model=(await client.models.list()).data[0].id, messages=messages, stream=True
)
async for chunk in stream:
text = chunk.choices[0].delta.content or ''
assistant_message += text
output(text, end='', style='lightgreen')
messages.append(ChatCompletionAssistantMessageParam(role='assistant', content=assistant_message))
output('')
except KeyboardInterrupt:
break
output('\nStopping model server...', style='green')
output('Stopped model server', style='green')
client = openai.AsyncOpenAI(base_url=f'http://localhost:{port}/v1', api_key='local')
while True:
try:
message = input('user: ')
if message == '':
output('empty message, please enter something', style='yellow')
continue
messages.append(ChatCompletionUserMessageParam(role='user', content=message))
output('assistant: ', end='', style='lightgreen')
assistant_message = ''
stream = await client.chat.completions.create(
model=(await client.models.list()).data[0].id, messages=messages, stream=True
)
async for chunk in stream:
text = chunk.choices[0].delta.content or ''
assistant_message += text
output(text, end='', style='lightgreen')
messages.append(
ChatCompletionAssistantMessageParam(role='assistant', content=assistant_message)
)
output('')
except KeyboardInterrupt:
break
output('\nStopping model server...', style='green')
output('Stopped model server', style='green')
def run(bento: BentoInfo, port: int = 3000, timeout: int = 600) -> None:
prep_env_vars(bento)
asyncio.run(_run_model(bento, port=port, timeout=timeout))
prep_env_vars(bento)
asyncio.run(_run_model(bento, port=port, timeout=timeout))

View File

@@ -14,155 +14,159 @@ app = OpenLLMTyper(help='manage models')
@app.command(help='get model')
def get(tag: str, repo: typing.Optional[str] = None, verbose: bool = False) -> None:
if verbose:
VERBOSE_LEVEL.set(20)
bento_info = ensure_bento(tag, repo_name=repo)
if bento_info:
output_(bento_info)
if verbose:
VERBOSE_LEVEL.set(20)
bento_info = ensure_bento(tag, repo_name=repo)
if bento_info:
output_(bento_info)
@app.command(name='list', help='list available models')
def list_model(
tag: typing.Optional[str] = None,
repo: typing.Optional[str] = None,
verbose: bool = False,
output: typing.Optional[str] = typer.Option(None, hidden=True),
tag: typing.Optional[str] = None,
repo: typing.Optional[str] = None,
verbose: bool = False,
output: typing.Optional[str] = typer.Option(None, hidden=True),
) -> None:
if verbose:
VERBOSE_LEVEL.set(20)
if verbose:
VERBOSE_LEVEL.set(20)
bentos = list_bento(tag=tag, repo_name=repo)
bentos.sort(key=lambda x: x.name)
bentos = list_bento(tag=tag, repo_name=repo)
bentos.sort(key=lambda x: x.name)
seen = set()
seen = set()
def is_seen(value: str) -> bool:
if value in seen:
return True
seen.add(value)
return False
def is_seen(value: str) -> bool:
if value in seen:
return True
seen.add(value)
return False
if output == 'readme':
# Parse parameters from bento.tag (e.g. "model:671b-it" -> "671b", 'model:something-long-78b' -> '78b')
questionary.print(
json.dumps({
f'{bento.name}': dict(
tag=bento.tag,
version=bento.tag.split(':')[-1],
pretty_gpu=bento.pretty_gpu,
command=f'openllm serve {bento.tag}',
)
for bento in bentos
if not is_seen(bento.name)
})
if output == 'readme':
# Parse parameters from bento.tag (e.g. "model:671b-it" -> "671b", 'model:something-long-78b' -> '78b')
questionary.print(
json.dumps({
f'{bento.name}': dict(
tag=bento.tag,
version=bento.tag.split(':')[-1],
pretty_gpu=bento.pretty_gpu,
command=f'openllm serve {bento.tag}',
)
return
table = tabulate.tabulate(
[
[
'' if is_seen(bento.name) else bento.name,
bento.tag,
bento.repo.name,
bento.pretty_gpu,
','.join(bento.platforms),
]
for bento in bentos
],
headers=['model', 'version', 'repo', 'required GPU RAM', 'platforms'],
for bento in bentos
if not is_seen(bento.name)
})
)
output_(table)
return
table = tabulate.tabulate(
[
[
'' if is_seen(bento.name) else bento.name,
bento.tag,
bento.repo.name,
bento.pretty_gpu,
','.join(bento.platforms),
]
for bento in bentos
],
headers=['model', 'version', 'repo', 'required GPU RAM', 'platforms'],
)
output_(table)
def ensure_bento(
model: str, target: typing.Optional[DeploymentTarget] = None, repo_name: typing.Optional[str] = None
model: str,
target: typing.Optional[DeploymentTarget] = None,
repo_name: typing.Optional[str] = None,
) -> BentoInfo:
bentos = list_bento(model, repo_name=repo_name)
if len(bentos) == 0:
output_(f'No model found for {model}', style='red')
raise typer.Exit(1)
if len(bentos) == 1:
output_(f'Found model {bentos[0]}', style='green')
if target is not None and can_run(bentos[0], target) <= 0:
output_(
f'The machine({target.name}) with {target.accelerators_repr} does not appear to have sufficient '
f'resources to run model {bentos[0]}\n',
style='yellow',
)
return bentos[0]
# multiple models, pick one according to target
output_(f'Multiple models match {model}, did you mean one of these?', style='red')
list_model(model, repo=repo_name)
bentos = list_bento(model, repo_name=repo_name)
if len(bentos) == 0:
output_(f'No model found for {model}', style='red')
raise typer.Exit(1)
if len(bentos) == 1:
output_(f'Found model {bentos[0]}', style='green')
if target is not None and can_run(bentos[0], target) <= 0:
output_(
f'The machine({target.name}) with {target.accelerators_repr} does not appear to have sufficient '
f'resources to run model {bentos[0]}\n',
style='yellow',
)
return bentos[0]
# multiple models, pick one according to target
output_(f'Multiple models match {model}, did you mean one of these?', style='red')
list_model(model, repo=repo_name)
raise typer.Exit(1)
NUMBER_RE = re.compile(r'\d+')
def _extract_first_number(s: str) -> int:
match = NUMBER_RE.search(s)
if match:
return int(match.group())
else:
return 100
match = NUMBER_RE.search(s)
if match:
return int(match.group())
else:
return 100
def list_bento(
tag: typing.Optional[str] = None, repo_name: typing.Optional[str] = None, include_alias: bool = False
tag: typing.Optional[str] = None,
repo_name: typing.Optional[str] = None,
include_alias: bool = False,
) -> typing.List[BentoInfo]:
ensure_repo_updated()
ensure_repo_updated()
if repo_name is None and tag and '/' in tag:
repo_name, tag = tag.split('/', 1)
if repo_name is None and tag and '/' in tag:
repo_name, tag = tag.split('/', 1)
repo_list = list_repo(repo_name)
if repo_name is not None:
repo_map = {repo.name: repo for repo in repo_list}
if repo_name not in repo_map:
output_(f'Repo `{repo_name}` not found, did you mean one of these?')
for repo_name in repo_map:
output_(f' {repo_name}')
raise typer.Exit(1)
repo_list = list_repo(repo_name)
if repo_name is not None:
repo_map = {repo.name: repo for repo in repo_list}
if repo_name not in repo_map:
output_(f'Repo `{repo_name}` not found, did you mean one of these?')
for repo_name in repo_map:
output_(f' {repo_name}')
raise typer.Exit(1)
if not tag:
glob_pattern = 'bentoml/bentos/*/*'
elif ':' in tag:
bento_name, version = tag.split(':')
glob_pattern = f'bentoml/bentos/{bento_name}/{version}'
else:
glob_pattern = f'bentoml/bentos/{tag}/*'
if not tag:
glob_pattern = 'bentoml/bentos/*/*'
elif ':' in tag:
bento_name, version = tag.split(':')
glob_pattern = f'bentoml/bentos/{bento_name}/{version}'
else:
glob_pattern = f'bentoml/bentos/{tag}/*'
model_list: list[BentoInfo] = []
repo_list = list_repo(repo_name)
for repo in repo_list:
paths = sorted(
repo.path.glob(glob_pattern),
key=lambda x: (x.parent.name, _extract_first_number(x.name), len(x.name), x.name),
)
for path in paths:
if path.is_dir() and (path / 'bento.yaml').exists():
model = BentoInfo(repo=repo, path=path)
elif path.is_file():
with open(path) as f:
origin_name = f.read().strip()
origin_path = path.parent / origin_name
model = BentoInfo(alias=path.name, repo=repo, path=origin_path)
else:
model = None
if model:
model_list.append(model)
model_list: list[BentoInfo] = []
repo_list = list_repo(repo_name)
for repo in repo_list:
paths = sorted(
repo.path.glob(glob_pattern),
key=lambda x: (x.parent.name, _extract_first_number(x.name), len(x.name), x.name),
)
for path in paths:
if path.is_dir() and (path / 'bento.yaml').exists():
model = BentoInfo(repo=repo, path=path)
elif path.is_file():
with open(path) as f:
origin_name = f.read().strip()
origin_path = path.parent / origin_name
model = BentoInfo(alias=path.name, repo=repo, path=origin_path)
else:
model = None
if model:
model_list.append(model)
if not include_alias:
seen: set[str] = set()
# we are calling side-effect in seen here.
model_list = [
x
for x in model_list
if not (
f'{x.bento_yaml["name"]}:{x.bento_yaml["version"]}' in seen
or seen.add(f'{x.bento_yaml["name"]}:{x.bento_yaml["version"]}') # type: ignore
)
]
return model_list
if not include_alias:
seen: set[str] = set()
# we are calling side-effect in seen here.
model_list = [
x
for x in model_list
if not (
f'{x.bento_yaml["name"]}:{x.bento_yaml["version"]}' in seen
or seen.add(f'{x.bento_yaml["name"]}:{x.bento_yaml["version"]}') # type: ignore
)
]
return model_list

View File

@@ -4,7 +4,15 @@ import datetime, subprocess, re, shutil, typing, os, pathlib
import pyaml, questionary, typer
from openllm.analytic import OpenLLMTyper
from openllm.common import INTERACTIVE, REPO_DIR, VERBOSE_LEVEL, RepoInfo, load_config, output, save_config
from openllm.common import (
INTERACTIVE,
REPO_DIR,
VERBOSE_LEVEL,
RepoInfo,
load_config,
output,
save_config,
)
UPDATE_INTERVAL = datetime.timedelta(days=3)
TEST_REPO = os.getenv('OPENLLM_TEST_REPO', None) # for testing
@@ -15,223 +23,248 @@ app = OpenLLMTyper(help='manage repos')
@app.command(name='list', help='list available repo')
def cmd_list(verbose: bool = False) -> None:
if verbose:
VERBOSE_LEVEL.set(20)
pyaml.pprint(list_repo(), sort_dicts=False, sort_keys=False)
if verbose:
VERBOSE_LEVEL.set(20)
pyaml.pprint(list_repo(), sort_dicts=False, sort_keys=False)
@app.command(name='remove', help='remove given repo')
def cmd_remove(name: str) -> None:
if TEST_REPO:
return
config = load_config()
if name not in config.repos:
output(f'Repo {name} does not exist', style='red')
return
if TEST_REPO:
return
config = load_config()
if name not in config.repos:
output(f'Repo {name} does not exist', style='red')
return
del config.repos[name]
save_config(config)
output(f'Repo {name} removed', style='green')
del config.repos[name]
save_config(config)
output(f'Repo {name} removed', style='green')
@app.command(name='update', help='update default repo')
def cmd_update() -> None:
if TEST_REPO:
return
repos_in_use = set()
for repo in list_repo():
repos_in_use.add((repo.server, repo.owner, repo.repo, repo.branch))
if repo.path.exists():
shutil.rmtree(repo.path, ignore_errors=True)
repo.path.parent.mkdir(parents=True, exist_ok=True)
try:
_clone_repo(repo)
output('')
output(f'Repo `{repo.name}` updated', style='green')
except Exception as e:
shutil.rmtree(repo.path, ignore_errors=True)
output(f'Failed to clone repo {repo.name}', style='red')
output(e)
for c in REPO_DIR.glob('*/*/*/*'):
repo_spec = tuple(c.parts[-4:])
if repo_spec not in repos_in_use:
shutil.rmtree(c, ignore_errors=True)
output(f'Removed unused repo cache {c}')
with open(REPO_DIR / 'last_update', 'w') as f:
f.write(datetime.datetime.now().isoformat())
for repo in list_repo():
_complete_alias(repo.name)
if TEST_REPO:
return
repos_in_use = set()
for repo in list_repo():
# Show simplified output if not in verbose mode
if VERBOSE_LEVEL.get() <= 0:
output(f'updating repo {repo.name}', style='green')
repos_in_use.add((repo.server, repo.owner, repo.repo, repo.branch))
if repo.path.exists():
shutil.rmtree(repo.path, ignore_errors=True)
repo.path.parent.mkdir(parents=True, exist_ok=True)
try:
_clone_repo(repo)
if VERBOSE_LEVEL.get() > 0:
output('')
output(f'Repo `{repo.name}` updated', style='green')
except Exception as e:
shutil.rmtree(repo.path, ignore_errors=True)
if VERBOSE_LEVEL.get() > 0:
output(f'Failed to clone repo {repo.name}', style='red')
output(e)
for c in REPO_DIR.glob('*/*/*/*'):
repo_spec = tuple(c.parts[-4:])
if repo_spec not in repos_in_use:
shutil.rmtree(c, ignore_errors=True)
if VERBOSE_LEVEL.get() > 0:
output(f'Removed unused repo cache {c}')
with open(REPO_DIR / 'last_update', 'w') as f:
f.write(datetime.datetime.now().isoformat())
for repo in list_repo():
_complete_alias(repo.name)
@app.command(name='add', help='add new repo')
def cmd_add(name: str, repo: str) -> None:
if TEST_REPO:
return
name = name.lower()
if not name.isidentifier():
output(f'Invalid repo name: {name}, should only contain letters, numbers and underscores', style='red')
return
if TEST_REPO:
return
name = name.lower()
if not name.isidentifier():
output(
f'Invalid repo name: {name}, should only contain letters, numbers and underscores',
style='red',
)
return
try:
parse_repo_url(repo)
except ValueError:
output(f'Invalid repo url: {repo}', style='red')
return
try:
parse_repo_url(repo)
except ValueError:
output(f'Invalid repo url: {repo}', style='red')
return
config = load_config()
if name in config.repos:
override = questionary.confirm(f'Repo {name} already exists({config.repos[name]}), override?').ask()
if not override:
return
config = load_config()
if name in config.repos:
override = questionary.confirm(
f'Repo {name} already exists({config.repos[name]}), override?'
).ask()
if not override:
return
config.repos[name] = repo
save_config(config)
output(f'Repo {name} added', style='green')
config.repos[name] = repo
save_config(config)
output(f'Repo {name} added', style='green')
@app.command(name='default', help='get default repo path')
def default() -> typing.Optional[pathlib.Path]:
if TEST_REPO:
return None
output((info := parse_repo_url(load_config().repos['default'], 'default')).path)
return info.path
if TEST_REPO:
return None
output((info := parse_repo_url(load_config().repos['default'], 'default')).path)
return info.path
def list_repo(repo_name: typing.Optional[str] = None) -> typing.List[RepoInfo]:
if TEST_REPO:
return [
RepoInfo(
name='default',
url='',
server='test',
owner='test',
repo='test',
branch='main',
path=pathlib.Path(TEST_REPO),
)
]
config = load_config()
repos = []
for _repo_name, repo_url in config.repos.items():
if repo_name is not None and _repo_name != repo_name:
continue
repo = parse_repo_url(repo_url, _repo_name)
repos.append(repo)
return repos
if TEST_REPO:
return [
RepoInfo(
name='default',
url='',
server='test',
owner='test',
repo='test',
branch='main',
path=pathlib.Path(TEST_REPO),
)
]
config = load_config()
repos = []
for _repo_name, repo_url in config.repos.items():
if repo_name is not None and _repo_name != repo_name:
continue
repo = parse_repo_url(repo_url, _repo_name)
repos.append(repo)
return repos
def _complete_alias(repo_name: str) -> None:
from openllm.model import list_bento
from openllm.model import list_bento
for bento in list_bento(repo_name=repo_name):
alias = bento.labels.get('aliases', '').strip()
if alias:
for a in alias.split(','):
with open(bento.path.parent / a, 'w') as f:
f.write(bento.version)
for bento in list_bento(repo_name=repo_name):
alias = bento.labels.get('aliases', '').strip()
if alias:
for a in alias.split(','):
with open(bento.path.parent / a, 'w') as f:
f.write(bento.version)
def _clone_repo(repo: RepoInfo) -> None:
try:
subprocess.run(['git', 'clone', '--depth=1', '-b', repo.branch, repo.url, str(repo.path)], check=True)
except (subprocess.CalledProcessError, FileNotFoundError):
import dulwich
import dulwich.porcelain
try:
# Suppress output if verbosity level is low
if VERBOSE_LEVEL.get() <= 0:
subprocess.run(
['git', 'clone', '--depth=1', '-b', repo.branch, repo.url, str(repo.path)],
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
else:
subprocess.run(
['git', 'clone', '--depth=1', '-b', repo.branch, repo.url, str(repo.path)], check=True
)
except (subprocess.CalledProcessError, FileNotFoundError):
import dulwich
import dulwich.porcelain
dulwich.porcelain.clone(repo.url, str(repo.path), checkout=True, depth=1, branch=repo.branch)
# Dulwich doesn't have easy output suppression, but we rarely get here
dulwich.porcelain.clone(repo.url, str(repo.path), checkout=True, depth=1, branch=repo.branch)
def ensure_repo_updated() -> None:
if TEST_REPO:
return
last_update_file = REPO_DIR / 'last_update'
if not last_update_file.exists():
if INTERACTIVE.get():
choice = questionary.confirm(
'The repo cache is never updated, do you want to update it to fetch the latest model list?'
).ask()
if choice:
cmd_update()
return
else:
output(
'The repo cache is never updated, please run `openllm repo update` to fetch the latest model list',
style='red',
)
raise typer.Exit(1)
last_update = datetime.datetime.fromisoformat(last_update_file.read_text().strip())
if datetime.datetime.now() - last_update > UPDATE_INTERVAL:
if INTERACTIVE.get():
choice = questionary.confirm(
'The repo cache is outdated, do you want to update it to fetch the latest model list?'
).ask()
if choice:
cmd_update()
else:
output(
'The repo cache is outdated, please run `openllm repo update` to fetch the latest model list',
style='yellow',
)
if TEST_REPO:
return
last_update_file = REPO_DIR / 'last_update'
if not last_update_file.exists():
if INTERACTIVE.get():
choice = questionary.confirm(
'The repo cache is never updated, do you want to update it to fetch the latest model list?'
).ask()
if choice:
cmd_update()
return
else:
output(
'The repo cache is never updated, please run `openllm repo update` to fetch the latest model list',
style='red',
)
raise typer.Exit(1)
last_update = datetime.datetime.fromisoformat(last_update_file.read_text().strip())
if datetime.datetime.now() - last_update > UPDATE_INTERVAL:
if INTERACTIVE.get():
choice = questionary.confirm(
'The repo cache is outdated, do you want to update it to fetch the latest model list?'
).ask()
if choice:
cmd_update()
else:
output(
'The repo cache is outdated, please run `openllm repo update` to fetch the latest model list',
style='yellow',
)
GIT_HTTP_RE = re.compile(
r'(?P<schema>git|ssh|http|https):\/\/(?P<server>[\.\w\d\-]+)\/(?P<owner>[\w\d\-]+)\/(?P<repo>[\w\d\-\_\.]+)(@(?P<branch>.+))?(\/)?$'
r'(?P<schema>git|ssh|http|https):\/\/(?P<server>[\.\w\d\-]+)\/(?P<owner>[\w\d\-]+)\/(?P<repo>[\w\d\-\_\.]+)(@(?P<branch>.+))?(\/)?$'
)
GIT_SSH_RE = re.compile(
r'git@(?P<server>[\.\w\d-]+):(?P<owner>[\w\d\-]+)\/(?P<repo>[\w\d\-\_\.]+)(@(?P<branch>.+))?(\/)?$'
r'git@(?P<server>[\.\w\d-]+):(?P<owner>[\w\d\-]+)\/(?P<repo>[\w\d\-\_\.]+)(@(?P<branch>.+))?(\/)?$'
)
def parse_repo_url(repo_url: str, repo_name: typing.Optional[str] = None) -> RepoInfo:
"""
parse the git repo url to server, owner, repo name, branch
>>> parse_repo_url('https://github.com/bentoml/bentovllm@main')
('github.com', 'bentoml', 'bentovllm', 'main')
"""
parse the git repo url to server, owner, repo name, branch
>>> parse_repo_url('https://github.com/bentoml/bentovllm@main')
('github.com', 'bentoml', 'bentovllm', 'main')
>>> parse_repo_url('https://github.com/bentoml/bentovllm.git@main')
('github.com', 'bentoml', 'bentovllm', 'main')
>>> parse_repo_url('https://github.com/bentoml/bentovllm.git@main')
('github.com', 'bentoml', 'bentovllm', 'main')
>>> parse_repo_url('https://github.com/bentoml/bentovllm')
('github.com', 'bentoml', 'bentovllm', 'main')
>>> parse_repo_url('https://github.com/bentoml/bentovllm')
('github.com', 'bentoml', 'bentovllm', 'main')
>>> parse_repo_url('git@github.com:bentoml/openllm-models.git')
('github.com', 'bentoml', 'openllm-models', 'main')
"""
match = GIT_HTTP_RE.match(repo_url)
if match:
schema = match.group('schema')
else:
match = GIT_SSH_RE.match(repo_url)
if not match:
raise ValueError(f'Invalid git repo url: {repo_url}')
schema = None
>>> parse_repo_url('git@github.com:bentoml/openllm-models.git')
('github.com', 'bentoml', 'openllm-models', 'main')
"""
match = GIT_HTTP_RE.match(repo_url)
if match:
schema = match.group('schema')
else:
match = GIT_SSH_RE.match(repo_url)
if not match:
raise ValueError(f'Invalid git repo url: {repo_url}')
schema = None
if match.group('branch') is not None:
repo_url = repo_url[: match.start('branch') - 1]
if match.group('branch') is not None:
repo_url = repo_url[: match.start('branch') - 1]
server = match.group('server')
owner = match.group('owner')
repo = match.group('repo')
if repo.endswith('.git'):
repo = repo[:-4]
branch = match.group('branch') or 'main'
server = match.group('server')
owner = match.group('owner')
repo = match.group('repo')
if repo.endswith('.git'):
repo = repo[:-4]
branch = match.group('branch') or 'main'
if schema is not None:
repo_url = f'{schema}://{server}/{owner}/{repo}'
else:
repo_url = f'git@{server}:{owner}/{repo}'
if schema is not None:
repo_url = f'{schema}://{server}/{owner}/{repo}'
else:
repo_url = f'git@{server}:{owner}/{repo}'
path = REPO_DIR / server / owner / repo / branch
return RepoInfo(
name=repo if repo_name is None else repo_name,
url=repo_url,
server=server,
owner=owner,
repo=repo,
branch=branch,
path=path,
)
path = REPO_DIR / server / owner / repo / branch
return RepoInfo(
name=repo if repo_name is None else repo_name,
url=repo_url,
server=server,
owner=owner,
repo=repo,
branch=branch,
path=path,
)
if __name__ == '__main__':
app()
app()

View File

@@ -3,92 +3,100 @@ from __future__ import annotations
import functools, os, pathlib, shutil
import typer, yaml
from openllm.common import VENV_DIR, VERBOSE_LEVEL, BentoInfo, EnvVars, VenvSpec, output, run_command
from openllm.common import (
VENV_DIR,
VERBOSE_LEVEL,
BentoInfo,
EnvVars,
VenvSpec,
output,
run_command,
)
@functools.lru_cache
def _resolve_bento_venv_spec(bento: BentoInfo, runtime_envs: EnvVars | None = None) -> VenvSpec:
lock_file = bento.path / 'env' / 'python' / 'requirements.lock.txt'
if not lock_file.exists():
lock_file = bento.path / 'env' / 'python' / 'requirements.txt'
lock_file = bento.path / 'env' / 'python' / 'requirements.lock.txt'
if not lock_file.exists():
lock_file = bento.path / 'env' / 'python' / 'requirements.txt'
reqs = lock_file.read_text().strip()
bentofile = bento.path / 'bento.yaml'
data = yaml.safe_load(bentofile.read_text())
bento_env_list = data.get('envs', [])
python_version = data.get('image', {})['python_version']
bento_envs = {e['name']: e.get('value') for e in bento_env_list}
envs = {k: runtime_envs.get(k, v) for k, v in bento_envs.items()} if runtime_envs else {}
reqs = lock_file.read_text().strip()
bentofile = bento.path / 'bento.yaml'
data = yaml.safe_load(bentofile.read_text())
bento_env_list = data.get('envs', [])
python_version = data.get('image', {})['python_version']
bento_envs = {e['name']: e.get('value') for e in bento_env_list}
envs = {k: runtime_envs.get(k, v) for k, v in bento_envs.items()} if runtime_envs else {}
return VenvSpec(
python_version=python_version,
requirements_txt=reqs,
name_prefix=f'{bento.tag.replace(":", "_")}-1-',
envs=EnvVars(envs),
)
return VenvSpec(
python_version=python_version,
requirements_txt=reqs,
name_prefix=f'{bento.tag.replace(":", "_")}-1-',
envs=EnvVars(envs),
)
def _ensure_venv(venv_spec: VenvSpec) -> pathlib.Path:
venv = VENV_DIR / str(hash(venv_spec))
if venv.exists() and not (venv / 'DONE').exists():
shutil.rmtree(venv, ignore_errors=True)
if not venv.exists():
output(f'Installing model dependencies({venv})...', style='green')
venv = VENV_DIR / str(hash(venv_spec))
if venv.exists() and not (venv / 'DONE').exists():
shutil.rmtree(venv, ignore_errors=True)
if not venv.exists():
output(f'Installing model dependencies({venv})...', style='green')
venv_py = venv / 'Scripts' / 'python.exe' if os.name == 'nt' else venv / 'bin' / 'python'
try:
run_command(
['python', '-m', 'uv', 'venv', venv.__fspath__(), '-p', venv_spec.python_version],
silent=VERBOSE_LEVEL.get() < 10,
)
run_command(
['python', '-m', 'uv', 'pip', 'install', '-p', str(venv_py), 'bentoml'],
silent=VERBOSE_LEVEL.get() < 10,
env=venv_spec.envs,
)
with open(venv / 'requirements.txt', 'w') as f:
f.write(venv_spec.normalized_requirements_txt)
run_command(
[
'python',
'-m',
'uv',
'pip',
'install',
'-p',
str(venv_py),
'-r',
(venv / 'requirements.txt').__fspath__(),
],
silent=VERBOSE_LEVEL.get() < 10,
env=venv_spec.envs,
)
with open(venv / 'DONE', 'w') as f:
f.write('DONE')
except Exception as e:
shutil.rmtree(venv, ignore_errors=True)
if VERBOSE_LEVEL.get() >= 10:
output(str(e), style='red')
output(f'Failed to install dependencies to {venv}. Cleaned up.', style='red')
raise typer.Exit(1)
output(f'Successfully installed dependencies to {venv}.', style='green')
return venv
else:
return venv
def ensure_venv(bento: BentoInfo, runtime_envs: EnvVars | None = None) -> pathlib.Path:
venv_spec = _resolve_bento_venv_spec(bento, runtime_envs=EnvVars(runtime_envs))
venv = _ensure_venv(venv_spec)
assert venv is not None
venv_py = venv / 'Scripts' / 'python.exe' if os.name == 'nt' else venv / 'bin' / 'python'
try:
run_command(
['python', '-m', 'uv', 'venv', venv.__fspath__(), '-p', venv_spec.python_version],
silent=VERBOSE_LEVEL.get() < 10,
)
run_command(
['python', '-m', 'uv', 'pip', 'install', '-p', str(venv_py), 'bentoml'],
silent=VERBOSE_LEVEL.get() < 10,
env=venv_spec.envs,
)
with open(venv / 'requirements.txt', 'w') as f:
f.write(venv_spec.normalized_requirements_txt)
run_command(
[
'python',
'-m',
'uv',
'pip',
'install',
'-p',
str(venv_py),
'-r',
(venv / 'requirements.txt').__fspath__(),
],
silent=VERBOSE_LEVEL.get() < 10,
env=venv_spec.envs,
)
with open(venv / 'DONE', 'w') as f:
f.write('DONE')
except Exception as e:
shutil.rmtree(venv, ignore_errors=True)
if VERBOSE_LEVEL.get() >= 10:
output(str(e), style='red')
output(f'Failed to install dependencies to {venv}. Cleaned up.', style='red')
raise typer.Exit(1)
output(f'Successfully installed dependencies to {venv}.', style='green')
return venv
else:
return venv
def ensure_venv(bento: BentoInfo, runtime_envs: EnvVars | None = None) -> pathlib.Path:
venv_spec = _resolve_bento_venv_spec(bento, runtime_envs=EnvVars(runtime_envs))
venv = _ensure_venv(venv_spec)
assert venv is not None
return venv
def check_venv(bento: BentoInfo) -> bool:
venv_spec = _resolve_bento_venv_spec(bento)
venv = VENV_DIR / str(hash(venv_spec))
if not venv.exists():
return False
if venv.exists() and not (venv / 'DONE').exists():
return False
return True
venv_spec = _resolve_bento_venv_spec(bento)
venv = VENV_DIR / str(hash(venv_spec))
if not venv.exists():
return False
if venv.exists() and not (venv / 'DONE').exists():
return False
return True

75
tests/test_cli_flow.py Normal file
View File

@@ -0,0 +1,75 @@
from __future__ import annotations
import sys, typing
import pytest, pexpect
@pytest.fixture
def pexpect_process() -> typing.Generator[pexpect.spawn[typing.Any], None, None]:
child = pexpect.spawn(
f'{sys.executable} -m openllm hello', encoding='utf-8', timeout=20, echo=False
)
try:
yield child
finally:
try:
child.sendcontrol('c')
child.close(force=True)
except:
pass
def safe_expect(
child: pexpect.spawn, pattern: str, timeout: int = 10, debug_msg: str = 'Expecting pattern'
) -> int:
try:
print(f"\n{debug_msg}: '{pattern}'")
index = child.expect(pattern, timeout=timeout)
print(f'Found match at index {index}')
print(f'Before match: {child.before}')
print(f'After match: {child.after}')
return index
except pexpect.TIMEOUT:
print(f'TIMEOUT while {debug_msg}')
print(f'Last output: {child.before}')
raise
except pexpect.EOF:
print(f'EOF while {debug_msg}')
print(f'Last output: {child.before}')
raise
def test_hello_flow_to_deploy(pexpect_process: pexpect.spawn) -> None:
child = pexpect_process
try:
safe_expect(child, 'Select a model', timeout=10, debug_msg='Waiting for model selection prompt')
child.sendline('\x1b[B')
child.sendline('\r')
safe_expect(
child, 'Select a version', timeout=10, debug_msg='Waiting for version selection prompt'
)
child.sendline('\r')
safe_expect(
child, 'Select an action', timeout=10, debug_msg='Waiting for action selection prompt'
)
child.sendline('\x1b[B')
child.sendline('\x1b[B')
child.sendline('\r')
safe_expect(
child, 'Select an instance type', timeout=10, debug_msg='Waiting for instance type prompt'
)
child.sendline('\r')
child.expect('Error: .*HF_TOKEN', timeout=10)
except Exception as e:
pytest.fail(f'Test failed with exception: {e}')

101
uv.lock generated
View File

@@ -214,7 +214,7 @@ wheels = [
[[package]]
name = "bentoml"
version = "1.4.5"
version = "1.4.8"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "a2wsgi" },
@@ -261,9 +261,9 @@ dependencies = [
{ name = "uvicorn" },
{ name = "watchfiles" },
]
sdist = { url = "https://files.pythonhosted.org/packages/dc/df/6e5a260aaf2ee5da3d797374f81bba087fdcb8b521c7cb7441d390e266b6/bentoml-1.4.5.tar.gz", hash = "sha256:372d6d2f93dbcef38eefd568d0a9c99bfd8b5fbb7202983d948de03efa5cc961", size = 967625 }
sdist = { url = "https://files.pythonhosted.org/packages/87/a4/7ba2d3cfea05e4d9505b4aedfec17477771bc5dc98ed4d818f83cdc23093/bentoml-1.4.8.tar.gz", hash = "sha256:fb7e1d21a415645afdeb928f45a1950b7409960b5d9360189b777640c96f7103", size = 970299 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/23/26/64bfa28ce0b9e29e825a656e4785eb39b5ab4ca7abb6dbe1e25d856ac716/bentoml-1.4.5-py3-none-any.whl", hash = "sha256:31ecdf26e4addcf62c03a356b629925f5c3aca304d73a5cdf60c1bcbf5e19eb2", size = 1147638 },
{ url = "https://files.pythonhosted.org/packages/cb/3e/c4adc9c48ceab6bfd8735f125f1b2ec58c6a636b4f2c092349c02e1beb71/bentoml-1.4.8-py3-none-any.whl", hash = "sha256:b33765e15101348fa6ca1fe68f07b3309ad4ea5c8823e56c2358a1b09b29edbb", size = 1150381 },
]
[[package]]
@@ -681,6 +681,21 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/95/04/ff642e65ad6b90db43e668d70ffb6736436c7ce41fcc549f4e9472234127/h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761", size = 58259 },
]
[[package]]
name = "hf-xet"
version = "1.0.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/95/68/4c363b2e62cb3dbe12d2257ba9b22f101384692d4b9727c5f72433472cff/hf_xet-1.0.3.tar.gz", hash = "sha256:a6d16861a06dd4b8f7229c16b392c5fb8b9588ced89a6ee9bc3e66227f794353", size = 257227 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/26/12/ebbba4b64cb9c908bd5dee355da27f3cc5ad4f29b4b2835041d363388363/hf_xet-1.0.3-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:0705e5db0da5794ab048a8662a7b3aba220f963270b26abc92e8d05abca22451", size = 4979740 },
{ url = "https://files.pythonhosted.org/packages/58/8f/34eadc408b834bcb55886b242a9783da3f63508c4bcbfda7a4f21e61f3d1/hf_xet-1.0.3-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:09a9565ca84049d48c99c83a82d08fbc21d63c04811fd2f7dd088292c1185bc5", size = 4806773 },
{ url = "https://files.pythonhosted.org/packages/a1/de/00b2e2568a39c01b0e013db3300f4d5841f2e597d7b0518923c7881bd166/hf_xet-1.0.3-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70e18534d46ab92bbc3125addaebc145f9b27e06eecd67b40c4342f4b92b677f", size = 53812632 },
{ url = "https://files.pythonhosted.org/packages/e2/d8/4ff790370a6795418196553c33e7bcceaa73a7d587e21e4ccb7661b54a2a/hf_xet-1.0.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:da28fd32213ad5b8f60771aba44ac032ba19d752928cfd95914f09146b3f51ec", size = 52277180 },
{ url = "https://files.pythonhosted.org/packages/83/dd/7b432918a3e9e09794674b81e852acc6e14177c0a4466ac0566b7e7f47a4/hf_xet-1.0.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1b71118b8f7e9edf1ae56282388794f351163c7de5c22ea3737dffa9313f500e", size = 53309852 },
{ url = "https://files.pythonhosted.org/packages/4d/a2/d7a5f452a3a8faaa82aeb3aceddab2e103c1b7028a00bbc4caebca5d79fe/hf_xet-1.0.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5927d1986f87b7b80616eb6353a1402be1d72c46b6b0709b01ffc7623a159563", size = 53739471 },
{ url = "https://files.pythonhosted.org/packages/82/81/966f800933043c0be989306f5224ef058543f7848f1e78d7ef3305bd069a/hf_xet-1.0.3-cp37-abi3-win_amd64.whl", hash = "sha256:014b5a40e62ad334f21513e5ba39b419117396031e9264dfc15dd598a1595029", size = 4123538 },
]
[[package]]
name = "httpcore"
version = "1.0.7"
@@ -763,6 +778,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/a0/d9/a1e041c5e7caa9a05c925f4bdbdfb7f006d1f74996af53467bc394c97be7/importlib_metadata-8.5.0-py3-none-any.whl", hash = "sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b", size = 26514 },
]
[[package]]
name = "iniconfig"
version = "2.1.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050 },
]
[[package]]
name = "jinja2"
version = "3.1.5"
@@ -1167,7 +1191,7 @@ wheels = [
[[package]]
name = "openai"
version = "1.66.3"
version = "1.70.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
@@ -1179,9 +1203,9 @@ dependencies = [
{ name = "tqdm" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/a3/77/5172104ca1df35ed2ed8fb26dbc787f721c39498fc51d666c4db07756a0c/openai-1.66.3.tar.gz", hash = "sha256:8dde3aebe2d081258d4159c4cb27bdc13b5bb3f7ea2201d9bd940b9a89faf0c9", size = 397244 }
sdist = { url = "https://files.pythonhosted.org/packages/87/f5/ae0f3cd226c2993b4ac1cc4b5f6ca099764689f403c14922c9356accec66/openai-1.70.0.tar.gz", hash = "sha256:e52a8d54c3efeb08cf58539b5b21a5abef25368b5432965e4de88cdf4e091b2b", size = 409640 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/78/5a/e20182f7b6171642d759c548daa0ba20a1d3ac10d2bd0a13fd75704a9ac3/openai-1.66.3-py3-none-any.whl", hash = "sha256:a427c920f727711877ab17c11b95f1230b27767ba7a01e5b66102945141ceca9", size = 567400 },
{ url = "https://files.pythonhosted.org/packages/e2/39/c4b38317d2c702c4bc763957735aaeaf30dfc43b5b824121c49a4ba7ba0f/openai-1.70.0-py3-none-any.whl", hash = "sha256:f6438d053fd8b2e05fd6bef70871e832d9bbdf55e119d0ac5b92726f1ae6f614", size = 599070 },
]
[[package]]
@@ -1190,6 +1214,7 @@ source = { editable = "." }
dependencies = [
{ name = "bentoml" },
{ name = "dulwich" },
{ name = "hf-xet" },
{ name = "huggingface-hub" },
{ name = "nvidia-ml-py" },
{ name = "openai" },
@@ -1204,13 +1229,20 @@ dependencies = [
{ name = "uv" },
]
[package.dev-dependencies]
tests = [
{ name = "pexpect" },
{ name = "pytest" },
]
[package.metadata]
requires-dist = [
{ name = "bentoml", specifier = "==1.4.5" },
{ name = "bentoml", specifier = "==1.4.8" },
{ name = "dulwich" },
{ name = "hf-xet" },
{ name = "huggingface-hub" },
{ name = "nvidia-ml-py" },
{ name = "openai", specifier = "==1.66.3" },
{ name = "openai", specifier = "==1.70.0" },
{ name = "pathlib" },
{ name = "pip-requirements-parser" },
{ name = "psutil" },
@@ -1222,6 +1254,12 @@ requires-dist = [
{ name = "uv" },
]
[package.metadata.requires-dev]
tests = [
{ name = "pexpect", specifier = ">=4.9.0" },
{ name = "pytest", specifier = ">=8.3.5" },
]
[[package]]
name = "opentelemetry-api"
version = "1.30.0"
@@ -1345,6 +1383,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191 },
]
[[package]]
name = "pexpect"
version = "4.9.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "ptyprocess" },
]
sdist = { url = "https://files.pythonhosted.org/packages/42/92/cc564bf6381ff43ce1f4d06852fc19a2f11d180f23dc32d9588bee2f149d/pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f", size = 166450 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772 },
]
[[package]]
name = "pip-requirements-parser"
version = "32.0.1"
@@ -1358,6 +1408,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/54/d0/d04f1d1e064ac901439699ee097f58688caadea42498ec9c4b4ad2ef84ab/pip_requirements_parser-32.0.1-py3-none-any.whl", hash = "sha256:4659bc2a667783e7a15d190f6fccf8b2486685b6dba4c19c3876314769c57526", size = 35648 },
]
[[package]]
name = "pluggy"
version = "1.5.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/96/2d/02d4312c973c6050a18b314a5ad0b3210edb65a906f868e31c111dede4a6/pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", size = 67955 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 },
]
[[package]]
name = "prometheus-client"
version = "0.21.1"
@@ -1483,6 +1542,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/50/1b/6921afe68c74868b4c9fa424dad3be35b095e16687989ebbb50ce4fceb7c/psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553", size = 244885 },
]
[[package]]
name = "ptyprocess"
version = "0.7.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/20/e5/16ff212c1e452235a90aeb09066144d0c5a6a8c0834397e03f5224495c4e/ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220", size = 70762 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", size = 13993 },
]
[[package]]
name = "pyaml"
version = "25.1.0"
@@ -1633,6 +1701,23 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/1c/a7/c8a2d361bf89c0d9577c934ebb7421b25dc84bf3a8e3ac0a40aed9acc547/pyparsing-3.2.1-py3-none-any.whl", hash = "sha256:506ff4f4386c4cec0590ec19e6302d3aedb992fdc02c761e90416f158dacf8e1", size = 107716 },
]
[[package]]
name = "pytest"
version = "8.3.5"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "colorama", marker = "sys_platform == 'win32'" },
{ name = "exceptiongroup", marker = "python_full_version < '3.11'" },
{ name = "iniconfig" },
{ name = "packaging" },
{ name = "pluggy" },
{ name = "tomli", marker = "python_full_version < '3.11'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/ae/3c/c9d525a414d506893f0cd8a8d0de7706446213181570cdbd766691164e40/pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845", size = 1450891 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820", size = 343634 },
]
[[package]]
name = "python-dateutil"
version = "2.9.0.post0"