From d8fb4ae4a515ed0a293783d5c43bca798ac78ebe Mon Sep 17 00:00:00 2001 From: Aaron Pham Date: Wed, 9 Apr 2025 21:36:46 -0400 Subject: [PATCH] chore: cleanup code and env requirements Signed-off-by: Aaron Pham --- .github/workflows/dependabot-auto-merge.yml | 2 +- .github/workflows/tests.yml | 35 ++ .gitignore | 1 + .pre-commit-config.yaml | 2 +- .ruff.toml | 15 +- gen_readme.py | 52 +- pyproject.toml | 7 + src/openllm/__main__.py | 445 +++++++------- src/openllm/accelerator_spec.py | 212 +++---- src/openllm/analytic.py | 148 ++--- src/openllm/clean.py | 94 +-- src/openllm/cloud.py | 285 ++++----- src/openllm/common.py | 628 ++++++++++---------- src/openllm/local.py | 163 ++--- src/openllm/model.py | 246 ++++---- src/openllm/repo.py | 369 ++++++------ src/openllm/venv.py | 158 ++--- tests/test_cli_flow.py | 75 +++ uv.lock | 101 +++- 19 files changed, 1670 insertions(+), 1368 deletions(-) create mode 100644 .github/workflows/tests.yml create mode 100644 tests/test_cli_flow.py diff --git a/.github/workflows/dependabot-auto-merge.yml b/.github/workflows/dependabot-auto-merge.yml index af85e81b..84375db4 100644 --- a/.github/workflows/dependabot-auto-merge.yml +++ b/.github/workflows/dependabot-auto-merge.yml @@ -12,7 +12,7 @@ jobs: steps: - name: Dependabot metadata id: metadata - uses: dependabot/fetch-metadata@v2.3.0 + uses: dependabot/fetch-metadata@d7267f607e9d3fb96fc2fbe83e0af444713e90b7 # ratchet:dependabot/fetch-metadata@v2.3.0 with: github-token: "${{ secrets.GITHUB_TOKEN }}" - name: Enable auto-merge for Dependabot PRs diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 00000000..d7de1f72 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,35 @@ +name: Run Tests + +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9", "3.12"] + + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # ratchet:actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # ratchet:actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Install uv + run: | + pip install uv + + - name: Install dependencies with uv + run: | + uv pip install -e . + uv pip install pytest pexpect + + - name: Run tests + run: | + pytest tests -v diff --git a/.gitignore b/.gitignore index db2704c1..fb3f7275 100644 --- a/.gitignore +++ b/.gitignore @@ -163,3 +163,4 @@ cython_debug/ venv/ .envrc _version.py +.cursor diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 633fec62..40a9e3ca 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,7 +7,7 @@ default_language_version: python: python3.11 # NOTE: sync with .python-version-default repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: "v0.11.2" + rev: "v0.11.4" hooks: - id: ruff alias: r diff --git a/.ruff.toml b/.ruff.toml index 8f424ed9..2cf4e60a 100644 --- a/.ruff.toml +++ b/.ruff.toml @@ -1,7 +1,7 @@ extend-include = ["*.ipynb"] preview = true -line-length = 119 -indent-width = 4 +line-length = 100 +indent-width = 2 [format] preview = true @@ -18,21 +18,16 @@ ignore = [ ] select = [ "F", - "G", # flake8-logging-format - "PERF", # perflint - "RUF", # Ruff-specific rules + "G", # flake8-logging-format + "PERF", # perflint + "RUF", # Ruff-specific rules "W6", "E71", "E72", "E112", "E113", - # "E124", "E203", "E272", - # "E303", - # "E304", - # "E501", - # "E502", "E702", "E703", "E731", diff --git a/gen_readme.py b/gen_readme.py index 15bd9081..d9ea7192 100644 --- a/gen_readme.py +++ b/gen_readme.py @@ -9,30 +9,30 @@ import subprocess, sys, pathlib, json, jinja2 if __name__ == '__main__': - with (pathlib.Path('.').parent / 'README.md').open('w') as f: - f.write( - jinja2.Environment(loader=jinja2.FileSystemLoader('.')) - .get_template('README.md.tpl') - .render( - model_dict=json.loads( - subprocess.run( - [ - sys.executable, - '-m', - 'uv', - 'run', - '--with-editable', - '.', - 'openllm', - 'model', - 'list', - '--output', - 'readme', - ], - text=True, - check=True, - capture_output=True, - ).stdout.strip() - ) - ) + with (pathlib.Path('.').parent / 'README.md').open('w') as f: + f.write( + jinja2.Environment(loader=jinja2.FileSystemLoader('.')) + .get_template('README.md.tpl') + .render( + model_dict=json.loads( + subprocess.run( + [ + sys.executable, + '-m', + 'uv', + 'run', + '--with-editable', + '.', + 'openllm', + 'model', + 'list', + '--output', + 'readme', + ], + text=True, + check=True, + capture_output=True, + ).stdout.strip() ) + ) + ) diff --git a/pyproject.toml b/pyproject.toml index 4bde6f86..ff42441b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,7 @@ dependencies = [ "uv", "openai==1.70.0", "huggingface-hub", + "hf-xet", "typing-extensions>=4.12.2", ] keywords = [ @@ -87,6 +88,12 @@ src-dir = "src/openllm" requires = ["hatchling==1.27.0", "hatch-vcs==0.4.0"] build-backend = 'hatchling.build' +[dependency-groups] +tests = [ + "pexpect>=4.9.0", + "pytest>=8.3.5", +] + [tool.hatch.version] source = "vcs" fallback-version = "0.0.0" diff --git a/src/openllm/__main__.py b/src/openllm/__main__.py index 183c35f2..4b8a57fb 100644 --- a/src/openllm/__main__.py +++ b/src/openllm/__main__.py @@ -14,12 +14,12 @@ from openllm.model import app as model_app, ensure_bento, list_bento from openllm.repo import app as repo_app, cmd_update if typing.TYPE_CHECKING: - from openllm.common import DeploymentTarget + from openllm.common import DeploymentTarget app = OpenLLMTyper( - help='`openllm hello` to get started. ' - 'OpenLLM is a CLI tool to manage and deploy open source LLMs and' - ' get an OpenAI API compatible chat server in seconds.' + help='`openllm hello` to get started. ' + 'OpenLLM is a CLI tool to manage and deploy open source LLMs and' + ' get an OpenAI API compatible chat server in seconds.' ) app.add_typer(repo_app, name='repo') @@ -28,263 +28,274 @@ app.add_typer(clean_app, name='clean') def _select_bento_name(models: list[BentoInfo], target: DeploymentTarget) -> tuple[str, str]: - from tabulate import tabulate + from tabulate import tabulate - model_infos = [(model.repo.name, model.name, can_run(model, target)) for model in models] - model_name_groups: defaultdict[tuple[str, str], float] = defaultdict(lambda: 0.0) - for repo, name, score in model_infos: - model_name_groups[repo, name] += score - table_data = [(name, repo, CHECKED if score > 0 else '') for (repo, name), score in model_name_groups.items()] - if not table_data: - output('No model found', style='red') - raise typer.Exit(1) - table: list[str] = tabulate(table_data, headers=['model', 'repo', 'locally runnable']).split('\n') + model_infos = [(model.repo.name, model.name, can_run(model, target)) for model in models] + model_name_groups: defaultdict[tuple[str, str], float] = defaultdict(lambda: 0.0) + for repo, name, score in model_infos: + model_name_groups[repo, name] += score + table_data = [ + (name, repo, CHECKED if score > 0 else '') for (repo, name), score in model_name_groups.items() + ] + if not table_data: + output('No model found', style='red') + raise typer.Exit(1) + table: list[str] = tabulate(table_data, headers=['model', 'repo', 'locally runnable']).split('\n') - selected: tuple[str, str] | None = questionary.select( - 'Select a model', - [ - questionary.Separator(f'{table[0]}\n {table[1]}'), - *[questionary.Choice(line, value=value[:2]) for value, line in zip(table_data, table[2:])], - ], - ).ask() - if selected is None: - raise typer.Exit(1) - return selected + selected: tuple[str, str] | None = questionary.select( + 'Select a model', + [ + questionary.Separator(f'{table[0]}\n {table[1]}'), + *[questionary.Choice(line, value=value[:2]) for value, line in zip(table_data, table[2:])], + ], + ).ask() + if selected is None: + raise typer.Exit(1) + return selected def _select_bento_version( - models: list[BentoInfo], target: DeploymentTarget | None, bento_name: str, repo: str + models: list[BentoInfo], target: DeploymentTarget | None, bento_name: str, repo: str ) -> tuple[BentoInfo, float]: - from tabulate import tabulate + from tabulate import tabulate - model_infos: list[tuple[BentoInfo, float]] = [ - (model, can_run(model, target)) for model in models if model.name == bento_name and model.repo.name == repo - ] + model_infos: list[tuple[BentoInfo, float]] = [ + (model, can_run(model, target)) + for model in models + if model.name == bento_name and model.repo.name == repo + ] - table_data = [ - [model.tag, CHECKED if score > 0 else ''] - for model, score in model_infos - if model.name == bento_name and model.repo.name == repo - ] - if not table_data: - output(f'No model found for {bento_name} in {repo}', style='red') - raise typer.Exit(1) - table: list[str] = tabulate(table_data, headers=['version', 'locally runnable']).split('\n') + table_data = [ + [model.tag, CHECKED if score > 0 else ''] + for model, score in model_infos + if model.name == bento_name and model.repo.name == repo + ] + if not table_data: + output(f'No model found for {bento_name} in {repo}', style='red') + raise typer.Exit(1) + table: list[str] = tabulate(table_data, headers=['version', 'locally runnable']).split('\n') - selected: tuple[BentoInfo, float] | None = questionary.select( - 'Select a version', - [ - questionary.Separator(f'{table[0]}\n {table[1]}'), - *[questionary.Choice(line, value=value[:2]) for value, line in zip(model_infos, table[2:])], - ], - ).ask() - if selected is None: - raise typer.Exit(1) - return selected + selected: tuple[BentoInfo, float] | None = questionary.select( + 'Select a version', + [ + questionary.Separator(f'{table[0]}\n {table[1]}'), + *[questionary.Choice(line, value=value[:2]) for value, line in zip(model_infos, table[2:])], + ], + ).ask() + if selected is None: + raise typer.Exit(1) + return selected def _select_target(bento: BentoInfo, targets: list[DeploymentTarget]) -> DeploymentTarget: - from tabulate import tabulate + from tabulate import tabulate - targets.sort(key=lambda x: can_run(bento, x), reverse=True) - if not targets: - output('No available instance type, check your bentocloud account', style='red') - raise typer.Exit(1) + targets.sort(key=lambda x: can_run(bento, x), reverse=True) + if not targets: + output('No available instance type, check your bentocloud account', style='red') + raise typer.Exit(1) - table = tabulate( - [ - [ - target.name, - target.accelerators_repr, - f'${target.price}', - CHECKED if can_run(bento, target) else 'insufficient res.', - ] - for target in targets - ], - headers=['instance type', 'accelerator', 'price/hr', 'deployable'], - ).split('\n') + table = tabulate( + [ + [ + target.name, + target.accelerators_repr, + f'${target.price}', + CHECKED if can_run(bento, target) else 'insufficient res.', + ] + for target in targets + ], + headers=['instance type', 'accelerator', 'price/hr', 'deployable'], + ).split('\n') - selected: DeploymentTarget | None = questionary.select( - 'Select an instance type', - [ - questionary.Separator(f'{table[0]}\n {table[1]}'), - *[questionary.Choice(f'{line}', value=target) for target, line in zip(targets, table[2:])], - ], - ).ask() - if selected is None: - raise typer.Exit(1) - return selected + selected: DeploymentTarget | None = questionary.select( + 'Select an instance type', + [ + questionary.Separator(f'{table[0]}\n {table[1]}'), + *[questionary.Choice(f'{line}', value=target) for target, line in zip(targets, table[2:])], + ], + ).ask() + if selected is None: + raise typer.Exit(1) + return selected def _select_action(bento: BentoInfo, score: float) -> None: - if score > 0: - options: list[typing.Any] = [ - questionary.Separator('Available actions'), - questionary.Choice('0. Run the model in terminal', value='run', shortcut_key='0'), - questionary.Separator(f' $ openllm run {bento}'), - questionary.Separator(' '), - questionary.Choice('1. Serve the model locally and get a chat server', value='serve', shortcut_key='1'), - questionary.Separator(f' $ openllm serve {bento}'), - questionary.Separator(' '), - questionary.Choice( - '2. Deploy the model to bentocloud and get a scalable chat server', value='deploy', shortcut_key='2' - ), - questionary.Separator(f' $ openllm deploy {bento}'), - ] - else: - options = [ - questionary.Separator('Available actions'), - questionary.Choice( - '0. Run the model in terminal', value='run', disabled='insufficient res.', shortcut_key='0' - ), - questionary.Separator(f' $ openllm run {bento}'), - questionary.Separator(' '), - questionary.Choice( - '1. Serve the model locally and get a chat server', - value='serve', - disabled='insufficient res.', - shortcut_key='1', - ), - questionary.Separator(f' $ openllm serve {bento}'), - questionary.Separator(' '), - questionary.Choice( - '2. Deploy the model to bentocloud and get a scalable chat server', value='deploy', shortcut_key='2' - ), - questionary.Separator(f' $ openllm deploy {bento}'), - ] - action: str | None = questionary.select('Select an action', options).ask() - if action is None: - raise typer.Exit(1) - if action == 'run': - try: - port = random.randint(30000, 40000) - local_run(bento, port=port) - finally: - output('\nUse this command to run the action again:', style='green') - output(f' $ openllm run {bento}', style='orange') - elif action == 'serve': - try: - local_serve(bento) - finally: - output('\nUse this command to run the action again:', style='green') - output(f' $ openllm serve {bento}', style='orange') - elif action == 'deploy': - ensure_cloud_context() - targets = get_cloud_machine_spec() - target = _select_target(bento, targets) - try: - cloud_deploy(bento, target) - finally: - output('\nUse this command to run the action again:', style='green') - output(f' $ openllm deploy {bento} --instance-type {target.name}', style='orange') + if score > 0: + options: list[typing.Any] = [ + questionary.Separator('Available actions'), + questionary.Choice('0. Run the model in terminal', value='run', shortcut_key='0'), + questionary.Separator(f' $ openllm run {bento}'), + questionary.Separator(' '), + questionary.Choice( + '1. Serve the model locally and get a chat server', value='serve', shortcut_key='1' + ), + questionary.Separator(f' $ openllm serve {bento}'), + questionary.Separator(' '), + questionary.Choice( + '2. Deploy the model to bentocloud and get a scalable chat server', + value='deploy', + shortcut_key='2', + ), + questionary.Separator(f' $ openllm deploy {bento}'), + ] + else: + options = [ + questionary.Separator('Available actions'), + questionary.Choice( + '0. Run the model in terminal', value='run', disabled='insufficient res.', shortcut_key='0' + ), + questionary.Separator(f' $ openllm run {bento}'), + questionary.Separator(' '), + questionary.Choice( + '1. Serve the model locally and get a chat server', + value='serve', + disabled='insufficient res.', + shortcut_key='1', + ), + questionary.Separator(f' $ openllm serve {bento}'), + questionary.Separator(' '), + questionary.Choice( + '2. Deploy the model to bentocloud and get a scalable chat server', + value='deploy', + shortcut_key='2', + ), + questionary.Separator(f' $ openllm deploy {bento}'), + ] + action: str | None = questionary.select('Select an action', options).ask() + if action is None: + raise typer.Exit(1) + if action == 'run': + try: + port = random.randint(30000, 40000) + local_run(bento, port=port) + finally: + output('\nUse this command to run the action again:', style='green') + output(f' $ openllm run {bento}', style='orange') + elif action == 'serve': + try: + local_serve(bento) + finally: + output('\nUse this command to run the action again:', style='green') + output(f' $ openllm serve {bento}', style='orange') + elif action == 'deploy': + ensure_cloud_context() + targets = get_cloud_machine_spec() + target = _select_target(bento, targets) + try: + cloud_deploy(bento, target) + finally: + output('\nUse this command to run the action again:', style='green') + output(f' $ openllm deploy {bento} --instance-type {target.name}', style='orange') @app.command(help='get started interactively') -def hello() -> None: - INTERACTIVE.set(True) +def hello(repo: typing.Optional[str] = None) -> None: + cmd_update() + INTERACTIVE.set(True) - target = get_local_machine_spec() - output(f' Detected Platform: {target.platform}', style='green') - if target.accelerators: - output(' Detected Accelerators: ', style='green') - for a in target.accelerators: - output(f' - {a.model} {a.memory_size}GB', style='green') - else: - output(' Detected Accelerators: None', style='yellow') + target = get_local_machine_spec() + output(f' Detected Platform: {target.platform}', style='green') + if target.accelerators: + output(' Detected Accelerators: ', style='green') + for a in target.accelerators: + output(f' - {a.model} {a.memory_size}GB', style='green') + else: + output(' Detected Accelerators: None', style='green') - models = list_bento() - if not models: - output('No model found, you probably need to update the model repo:', style='red') - output(' $ openllm repo update', style='orange') - raise typer.Exit(1) + models = list_bento(repo_name=repo) + if not models: + output('No model found, you probably need to update the model repo:', style='red') + output(' $ openllm repo update', style='orange') + raise typer.Exit(1) - bento_name, repo = _select_bento_name(models, target) - bento, score = _select_bento_version(models, target, bento_name, repo) - _select_action(bento, score) + bento_name, repo = _select_bento_name(models, target) + bento, score = _select_bento_version(models, target, bento_name, repo) + _select_action(bento, score) @app.command(help='start an OpenAI API compatible chat server and chat in browser') def serve( - model: typing.Annotated[str, typer.Argument()] = '', - repo: typing.Optional[str] = None, - port: int = 3000, - verbose: bool = False, + model: typing.Annotated[str, typer.Argument()] = '', + repo: typing.Optional[str] = None, + port: int = 3000, + verbose: bool = False, ) -> None: - cmd_update() - if verbose: - VERBOSE_LEVEL.set(20) - target = get_local_machine_spec() - bento = ensure_bento(model, target=target, repo_name=repo) - local_serve(bento, port=port) + cmd_update() + if verbose: + VERBOSE_LEVEL.set(20) + target = get_local_machine_spec() + bento = ensure_bento(model, target=target, repo_name=repo) + local_serve(bento, port=port) @app.command(help='run the model and chat in terminal') def run( - model: typing.Annotated[str, typer.Argument()] = '', - repo: typing.Optional[str] = None, - port: typing.Optional[int] = None, - timeout: int = 600, - verbose: bool = False, + model: typing.Annotated[str, typer.Argument()] = '', + repo: typing.Optional[str] = None, + port: typing.Optional[int] = None, + timeout: int = 600, + verbose: bool = False, ) -> None: - cmd_update() - if verbose: - VERBOSE_LEVEL.set(20) - target = get_local_machine_spec() - bento = ensure_bento(model, target=target, repo_name=repo) - if port is None: - port = random.randint(30000, 40000) - local_run(bento, port=port, timeout=timeout) + cmd_update() + if verbose: + VERBOSE_LEVEL.set(20) + target = get_local_machine_spec() + bento = ensure_bento(model, target=target, repo_name=repo) + if port is None: + port = random.randint(30000, 40000) + local_run(bento, port=port, timeout=timeout) @app.command(help='deploy production-ready OpenAI API-compatible server to BentoCloud') def deploy( - model: typing.Annotated[str, typer.Argument()] = '', - instance_type: typing.Optional[str] = None, - repo: typing.Optional[str] = None, - verbose: bool = False, - env: typing.Optional[list[str]] = typer.Option( - None, - '--env', - help='Environment variables to pass to the deployment command. Format: NAME or NAME=value. Can be specified multiple times.', - ), + model: typing.Annotated[str, typer.Argument()] = '', + instance_type: typing.Optional[str] = None, + repo: typing.Optional[str] = None, + verbose: bool = False, + env: typing.Optional[list[str]] = typer.Option( + None, + '--env', + help='Environment variables to pass to the deployment command. Format: NAME or NAME=value. Can be specified multiple times.', + ), ) -> None: - cmd_update() - if verbose: - VERBOSE_LEVEL.set(20) - bento = ensure_bento(model, repo_name=repo) - if instance_type is not None: - return cloud_deploy(bento, DeploymentTarget(accelerators=[], name=instance_type), cli_envs=env) - targets = sorted( - filter(lambda x: can_run(bento, x) > 0, get_cloud_machine_spec()), - key=lambda x: can_run(bento, x), - reverse=True, - ) - if not targets: - output('No available instance type, check your bentocloud account', style='red') - raise typer.Exit(1) - target = targets[0] - output(f'Recommended instance type: {target.name}', style='green') - cloud_deploy(bento, target, cli_envs=env) + cmd_update() + if verbose: + VERBOSE_LEVEL.set(20) + bento = ensure_bento(model, repo_name=repo) + if instance_type is not None: + return cloud_deploy(bento, DeploymentTarget(accelerators=[], name=instance_type), cli_envs=env) + targets = sorted( + filter(lambda x: can_run(bento, x) > 0, get_cloud_machine_spec()), + key=lambda x: can_run(bento, x), + reverse=True, + ) + if not targets: + output('No available instance type, check your bentocloud account', style='red') + raise typer.Exit(1) + target = targets[0] + output(f'Recommended instance type: {target.name}', style='green') + cloud_deploy(bento, target, cli_envs=env) @app.callback(invoke_without_command=True) def typer_callback( - verbose: int = 0, - do_not_track: bool = typer.Option( - False, '--do-not-track', help='Whether to disable usage tracking', envvar=DO_NOT_TRACK - ), - version: bool = typer.Option(False, '--version', '-v', help='Show version'), + verbose: int = 0, + do_not_track: bool = typer.Option( + False, '--do-not-track', help='Whether to disable usage tracking', envvar=DO_NOT_TRACK + ), + version: bool = typer.Option(False, '--version', '-v', help='Show version'), ) -> None: - if verbose: - VERBOSE_LEVEL.set(verbose) - if version: - output( - f'openllm, {importlib.metadata.version("openllm")}\nPython ({platform.python_implementation()}) {platform.python_version()}' - ) - sys.exit(0) - if do_not_track: - os.environ[DO_NOT_TRACK] = str(True) + if verbose: + VERBOSE_LEVEL.set(verbose) + if version: + output( + f'openllm, {importlib.metadata.version("openllm")}\nPython ({platform.python_implementation()}) {platform.python_version()}' + ) + sys.exit(0) + if do_not_track: + os.environ[DO_NOT_TRACK] = str(True) if __name__ == '__main__': - app() + app() diff --git a/src/openllm/accelerator_spec.py b/src/openllm/accelerator_spec.py index f9945fa9..e0455979 100644 --- a/src/openllm/accelerator_spec.py +++ b/src/openllm/accelerator_spec.py @@ -9,129 +9,141 @@ from openllm.common import BentoInfo, DeploymentTarget, output, Accelerator def parse_memory_string(v: typing.Any) -> typing.Any: - """Parse memory strings like "60Gi" into float.""" - if isinstance(v, str): - match = re.match(r'(\d+(\.\d+)?)\s*Gi$', v, re.IGNORECASE) - if match: - return float(match.group(1)) - # Pass other types (including numbers or other strings for standard float conversion) through - return v + """Parse memory strings like "60Gi" into float.""" + if isinstance(v, str): + match = re.match(r'(\d+(\.\d+)?)\s*Gi$', v, re.IGNORECASE) + if match: + return float(match.group(1)) + # Pass other types (including numbers or other strings for standard float conversion) through + return v class Resource(pydantic.BaseModel): - memory: typing.Annotated[float, BeforeValidator(parse_memory_string)] = 0.0 - cpu: int = 0 - gpu: int = 0 - gpu_type: str = '' + memory: typing.Annotated[float, BeforeValidator(parse_memory_string)] = 0.0 + cpu: int = 0 + gpu: int = 0 + gpu_type: str = '' - @override - def __hash__(self) -> int: - return hash((self.cpu, self.memory, self.gpu, self.gpu_type)) + @override + def __hash__(self) -> int: + return hash((self.cpu, self.memory, self.gpu, self.gpu_type)) - def __bool__(self) -> bool: - return any(value is not None for value in self.__dict__.values()) + def __bool__(self) -> bool: + return any(value is not None for value in self.__dict__.values()) ACCELERATOR_SPECS: dict[str, Accelerator] = { - 'nvidia-gtx-1650': Accelerator(model='GTX 1650', memory_size=4.0), - 'nvidia-gtx-1060': Accelerator(model='GTX 1060', memory_size=6.0), - 'nvidia-gtx-1080-ti': Accelerator(model='GTX 1080 Ti', memory_size=11.0), - 'nvidia-rtx-3060': Accelerator(model='RTX 3060', memory_size=12.0), - 'nvidia-rtx-3060-ti': Accelerator(model='RTX 3060 Ti', memory_size=8.0), - 'nvidia-rtx-3070-ti': Accelerator(model='RTX 3070 Ti', memory_size=8.0), - 'nvidia-rtx-3080': Accelerator(model='RTX 3080', memory_size=10.0), - 'nvidia-rtx-3080-ti': Accelerator(model='RTX 3080 Ti', memory_size=12.0), - 'nvidia-rtx-3090': Accelerator(model='RTX 3090', memory_size=24.0), - 'nvidia-rtx-4070-ti': Accelerator(model='RTX 4070 Ti', memory_size=12.0), - 'nvidia-tesla-p4': Accelerator(model='P4', memory_size=8.0), - 'nvidia-tesla-p100': Accelerator(model='P100', memory_size=16.0), - 'nvidia-tesla-k80': Accelerator(model='K80', memory_size=12.0), - 'nvidia-tesla-t4': Accelerator(model='T4', memory_size=16.0), - 'nvidia-tesla-v100': Accelerator(model='V100', memory_size=16.0), - 'nvidia-l4': Accelerator(model='L4', memory_size=24.0), - 'nvidia-tesla-l4': Accelerator(model='L4', memory_size=24.0), - 'nvidia-tesla-a10g': Accelerator(model='A10G', memory_size=24.0), - 'nvidia-a100-80g': Accelerator(model='A100', memory_size=80.0), - 'nvidia-a100-80gb': Accelerator(model='A100', memory_size=80.0), - 'nvidia-tesla-a100': Accelerator(model='A100', memory_size=40.0), + 'nvidia-gtx-1650': Accelerator(model='GTX 1650', memory_size=4.0), + 'nvidia-gtx-1060': Accelerator(model='GTX 1060', memory_size=6.0), + 'nvidia-gtx-1080-ti': Accelerator(model='GTX 1080 Ti', memory_size=11.0), + 'nvidia-rtx-3060': Accelerator(model='RTX 3060', memory_size=12.0), + 'nvidia-rtx-3060-ti': Accelerator(model='RTX 3060 Ti', memory_size=8.0), + 'nvidia-rtx-3070-ti': Accelerator(model='RTX 3070 Ti', memory_size=8.0), + 'nvidia-rtx-3080': Accelerator(model='RTX 3080', memory_size=10.0), + 'nvidia-rtx-3080-ti': Accelerator(model='RTX 3080 Ti', memory_size=12.0), + 'nvidia-rtx-3090': Accelerator(model='RTX 3090', memory_size=24.0), + 'nvidia-rtx-4070-ti': Accelerator(model='RTX 4070 Ti', memory_size=12.0), + 'nvidia-tesla-p4': Accelerator(model='P4', memory_size=8.0), + 'nvidia-tesla-p100': Accelerator(model='P100', memory_size=16.0), + 'nvidia-tesla-k80': Accelerator(model='K80', memory_size=12.0), + 'nvidia-tesla-t4': Accelerator(model='T4', memory_size=16.0), + 'nvidia-tesla-v100': Accelerator(model='V100', memory_size=16.0), + 'nvidia-l4': Accelerator(model='L4', memory_size=24.0), + 'nvidia-tesla-l4': Accelerator(model='L4', memory_size=24.0), + 'nvidia-tesla-a10g': Accelerator(model='A10G', memory_size=24.0), + 'nvidia-a100-80g': Accelerator(model='A100', memory_size=80.0), + 'nvidia-a100-80gb': Accelerator(model='A100', memory_size=80.0), + 'nvidia-tesla-a100': Accelerator(model='A100', memory_size=40.0), + 'nvidia-tesla-h100': Accelerator(model='H100', memory_size=80.0), + 'nvidia-h200-141gb': Accelerator(model='H200', memory_size=141.0), + 'nvidia-blackwell-b100': Accelerator(model='B100', memory_size=192.0), + 'nvidia-blackwell-gb200': Accelerator(model='GB200', memory_size=192.0), } @functools.lru_cache def get_local_machine_spec() -> DeploymentTarget: - if psutil.MACOS: - return DeploymentTarget(accelerators=[], source='local', platform='macos') + if psutil.MACOS: + return DeploymentTarget(accelerators=[], source='local', platform='macos') - if psutil.WINDOWS: - platform = 'windows' - elif psutil.LINUX: - platform = 'linux' - else: - raise NotImplementedError('Unsupported platform') + if psutil.WINDOWS: + platform = 'windows' + elif psutil.LINUX: + platform = 'linux' + else: + raise NotImplementedError('Unsupported platform') - from pynvml import ( - nvmlDeviceGetCount, - nvmlDeviceGetCudaComputeCapability, - nvmlDeviceGetHandleByIndex, - nvmlDeviceGetMemoryInfo, - nvmlDeviceGetName, - nvmlInit, - nvmlShutdown, - ) + from pynvml import ( + nvmlDeviceGetCount, + nvmlDeviceGetCudaComputeCapability, + nvmlDeviceGetHandleByIndex, + nvmlDeviceGetMemoryInfo, + nvmlDeviceGetName, + nvmlInit, + nvmlShutdown, + ) - try: - nvmlInit() - device_count = nvmlDeviceGetCount() - accelerators: list[Accelerator] = [] - for i in range(device_count): - handle = nvmlDeviceGetHandleByIndex(i) - name = nvmlDeviceGetName(handle) - memory_info = nvmlDeviceGetMemoryInfo(handle) - accelerators.append(Accelerator(model=name, memory_size=math.ceil(int(memory_info.total) / 1024**3))) - compute_capability = nvmlDeviceGetCudaComputeCapability(handle) - if compute_capability < (7, 5): - output( - f'GPU {name} with compute capability {compute_capability} ' - 'may not be supported, 7.5 or higher is recommended. check ' - 'https://developer.nvidia.com/cuda-gpus for more information', - style='yellow', - ) - nvmlShutdown() - return DeploymentTarget(accelerators=accelerators, source='local', platform=platform) - except Exception as e: + try: + nvmlInit() + device_count = nvmlDeviceGetCount() + accelerators: list[Accelerator] = [] + for i in range(device_count): + handle = nvmlDeviceGetHandleByIndex(i) + name = nvmlDeviceGetName(handle) + memory_info = nvmlDeviceGetMemoryInfo(handle) + accelerators.append( + Accelerator(model=name, memory_size=math.ceil(int(memory_info.total) / 1024**3)) + ) + compute_capability = nvmlDeviceGetCudaComputeCapability(handle) + if compute_capability < (7, 5): output( - 'Failed to get local GPU info. Ensure nvidia driver is installed to enable local GPU deployment', - style='yellow', + f'GPU {name} with compute capability {compute_capability} ' + 'may not be supported, 7.5 or higher is recommended. check ' + 'https://developer.nvidia.com/cuda-gpus for more information', + style='yellow', ) - output(f'Error: {e}', style='red', level=20) - return DeploymentTarget(accelerators=[], source='local', platform=platform) + nvmlShutdown() + return DeploymentTarget(accelerators=accelerators, source='local', platform=platform) + except Exception as e: + output( + 'Failed to get local GPU info. Ensure nvidia driver is installed to enable local GPU deployment', + style='yellow', + ) + output(f'Error: {e}', style='red', level=20) + return DeploymentTarget(accelerators=[], source='local', platform=platform) @functools.lru_cache(typed=True) def can_run(bento: BentoInfo, target: DeploymentTarget | None = None) -> float: - """ - Calculate if the bento can be deployed on the target. - """ - if target is None: - target = get_local_machine_spec() + """ + Calculate if the bento can be deployed on the target. + """ + if target is None: + target = get_local_machine_spec() - resource_spec = Resource(**(bento.bento_yaml['services'][0]['config'].get('resources', {}))) - labels = bento.bento_yaml.get('labels', {}) - platforms = labels.get('platforms', 'linux').split(',') + resource_spec = Resource(**(bento.bento_yaml['services'][0]['config'].get('resources', {}))) + labels = bento.bento_yaml.get('labels', {}) + platforms = labels.get('platforms', 'linux').split(',') - if target.platform not in platforms: - return 0.0 + if target.platform not in platforms: + return 0.0 - # return 1.0 if no resource is specified - if not resource_spec: - return 0.5 + # return 1.0 if no resource is specified + if not resource_spec: + return 0.5 - if resource_spec.gpu > 0: - required_gpu = ACCELERATOR_SPECS[resource_spec.gpu_type] - filtered_accelerators = [ac for ac in target.accelerators if ac.memory_size >= required_gpu.memory_size] - if resource_spec.gpu > len(filtered_accelerators): - return 0.0 - return required_gpu.memory_size * resource_spec.gpu / sum(ac.memory_size for ac in target.accelerators) - if target.accelerators: - return 0.01 / sum(ac.memory_size for ac in target.accelerators) - return 1.0 + if resource_spec.gpu > 0: + required_gpu = ACCELERATOR_SPECS[resource_spec.gpu_type] + filtered_accelerators = [ + ac for ac in target.accelerators if ac.memory_size >= required_gpu.memory_size + ] + if resource_spec.gpu > len(filtered_accelerators): + return 0.0 + return ( + required_gpu.memory_size + * resource_spec.gpu + / sum(ac.memory_size for ac in target.accelerators) + ) + if target.accelerators: + return 0.01 / sum(ac.memory_size for ac in target.accelerators) + return 1.0 diff --git a/src/openllm/analytic.py b/src/openllm/analytic.py index b8b2d8e0..877a9ed9 100644 --- a/src/openllm/analytic.py +++ b/src/openllm/analytic.py @@ -7,99 +7,99 @@ DO_NOT_TRACK = 'BENTOML_DO_NOT_TRACK' class EventMeta(abc.ABC): - @property - def event_name(self) -> str: - # camel case to snake case - event_name = re.sub(r'(? str: + # camel case to snake case + event_name = re.sub(r'(? list[str]: - return list(self.commands) + def list_commands(self, ctx: click.Context) -> list[str]: + return list(self.commands) class OpenLLMTyper(typer.Typer): - def __init__(self, *args: typing.Any, **kwargs: typing.Any): - no_args_is_help: bool = kwargs.pop('no_args_is_help', True) - context_settings: dict[str, typing.Any] = kwargs.pop('context_settings', {}) - if 'help_option_names' not in context_settings: - context_settings['help_option_names'] = ('-h', '--help') - if 'max_content_width' not in context_settings: - context_settings['max_content_width'] = int(os.environ.get('COLUMNS', str(120))) - klass = kwargs.pop('cls', OrderedCommands) + def __init__(self, *args: typing.Any, **kwargs: typing.Any): + no_args_is_help: bool = kwargs.pop('no_args_is_help', True) + context_settings: dict[str, typing.Any] = kwargs.pop('context_settings', {}) + if 'help_option_names' not in context_settings: + context_settings['help_option_names'] = ('-h', '--help') + if 'max_content_width' not in context_settings: + context_settings['max_content_width'] = int(os.environ.get('COLUMNS', str(120))) + klass = kwargs.pop('cls', OrderedCommands) - super().__init__( - *args, cls=klass, no_args_is_help=no_args_is_help, context_settings=context_settings, **kwargs - ) + super().__init__( + *args, cls=klass, no_args_is_help=no_args_is_help, context_settings=context_settings, **kwargs + ) - # NOTE: Since OpenLLMTyper only wraps command to add analytics, the default type-hint for @app.command - # does not change, hence the below hijacking. - if typing.TYPE_CHECKING: - command = typer.Typer.command - else: + # NOTE: Since OpenLLMTyper only wraps command to add analytics, the default type-hint for @app.command + # does not change, hence the below hijacking. + if typing.TYPE_CHECKING: + command = typer.Typer.command + else: - def command(self, *args: typing.Any, **kwargs: typing.Any): - def decorator(f): - @functools.wraps(f) - @click.pass_context - def wrapped(ctx: click.Context, *args, **kwargs): - from bentoml._internal.utils.analytics import track + def command(self, *args: typing.Any, **kwargs: typing.Any): + def decorator(f): + @functools.wraps(f) + @click.pass_context + def wrapped(ctx: click.Context, *args, **kwargs): + from bentoml._internal.utils.analytics import track - do_not_track = os.environ.get(DO_NOT_TRACK, str(False)).lower() == 'true' + do_not_track = os.environ.get(DO_NOT_TRACK, str(False)).lower() == 'true' - # so we know that the root program is openllm - command_name = ctx.info_name - if ctx.parent.parent is not None: - # openllm model list - command_group = ctx.parent.info_name - elif ctx.parent.info_name == ctx.find_root().info_name: - # openllm run - command_group = 'openllm' + # so we know that the root program is openllm + command_name = ctx.info_name + if ctx.parent.parent is not None: + # openllm model list + command_group = ctx.parent.info_name + elif ctx.parent.info_name == ctx.find_root().info_name: + # openllm run + command_group = 'openllm' - if do_not_track: - return f(*args, **kwargs) - start_time = time.time_ns() - try: - return_value = f(*args, **kwargs) - duration_in_ns = time.time_ns() - start_time - track( - OpenllmCliEvent( - cmd_group=command_group, cmd_name=command_name, duration_in_ms=duration_in_ns / 1e6 - ) - ) - return return_value - except BaseException as e: - duration_in_ns = time.time_ns() - start_time - track( - OpenllmCliEvent( - cmd_group=command_group, - cmd_name=command_name, - duration_in_ms=duration_in_ns / 1e6, - error_type=type(e).__name__, - return_code=(2 if isinstance(e, KeyboardInterrupt) else 1), - ) - ) - raise + if do_not_track: + return f(*args, **kwargs) + start_time = time.time_ns() + try: + return_value = f(*args, **kwargs) + duration_in_ns = time.time_ns() - start_time + track( + OpenllmCliEvent( + cmd_group=command_group, cmd_name=command_name, duration_in_ms=duration_in_ns / 1e6 + ) + ) + return return_value + except BaseException as e: + duration_in_ns = time.time_ns() - start_time + track( + OpenllmCliEvent( + cmd_group=command_group, + cmd_name=command_name, + duration_in_ms=duration_in_ns / 1e6, + error_type=type(e).__name__, + return_code=(2 if isinstance(e, KeyboardInterrupt) else 1), + ) + ) + raise - return typer.Typer.command(self, *args, **kwargs)(wrapped) + return typer.Typer.command(self, *args, **kwargs)(wrapped) - return decorator + return decorator diff --git a/src/openllm/clean.py b/src/openllm/clean.py index b24dfba1..194f0de5 100644 --- a/src/openllm/clean.py +++ b/src/openllm/clean.py @@ -12,72 +12,72 @@ HUGGINGFACE_CACHE = pathlib.Path.home() / '.cache' / 'huggingface' / 'hub' def _du(path: pathlib.Path) -> int: - seen_paths = set() - used_space = 0 + seen_paths = set() + used_space = 0 - for f in path.rglob('*'): - if os.name == 'nt': # Windows system - # On Windows, directly add file sizes without considering hard links - used_space += f.stat().st_size - else: - # On non-Windows systems, use inodes to avoid double counting - stat = f.stat() - if stat.st_ino not in seen_paths: - seen_paths.add(stat.st_ino) - used_space += stat.st_size - return used_space + for f in path.rglob('*'): + if os.name == 'nt': # Windows system + # On Windows, directly add file sizes without considering hard links + used_space += f.stat().st_size + else: + # On non-Windows systems, use inodes to avoid double counting + stat = f.stat() + if stat.st_ino not in seen_paths: + seen_paths.add(stat.st_ino) + used_space += stat.st_size + return used_space @app.command(help='Clean up all the cached models from huggingface') def model_cache(verbose: bool = False) -> None: - if verbose: - VERBOSE_LEVEL.set(20) - used_space = _du(HUGGINGFACE_CACHE) - sure = questionary.confirm( - f'This will remove all models cached by Huggingface (~{used_space / 1024 / 1024:.2f}MB), are you sure?' - ).ask() - if not sure: - return - shutil.rmtree(HUGGINGFACE_CACHE, ignore_errors=True) - output('All models cached by Huggingface have been removed', style='green') + if verbose: + VERBOSE_LEVEL.set(20) + used_space = _du(HUGGINGFACE_CACHE) + sure = questionary.confirm( + f'This will remove all models cached by Huggingface (~{used_space / 1024 / 1024:.2f}MB), are you sure?' + ).ask() + if not sure: + return + shutil.rmtree(HUGGINGFACE_CACHE, ignore_errors=True) + output('All models cached by Huggingface have been removed', style='green') @app.command(help='Clean up all the virtual environments created by OpenLLM') def venvs(verbose: bool = False) -> None: - if verbose: - VERBOSE_LEVEL.set(20) + if verbose: + VERBOSE_LEVEL.set(20) - used_space = _du(VENV_DIR) - sure = questionary.confirm( - f'This will remove all virtual environments created by OpenLLM (~{used_space / 1024 / 1024:.2f}MB), are you sure?' - ).ask() - if not sure: - return - shutil.rmtree(VENV_DIR, ignore_errors=True) - output('All virtual environments have been removed', style='green') + used_space = _du(VENV_DIR) + sure = questionary.confirm( + f'This will remove all virtual environments created by OpenLLM (~{used_space / 1024 / 1024:.2f}MB), are you sure?' + ).ask() + if not sure: + return + shutil.rmtree(VENV_DIR, ignore_errors=True) + output('All virtual environments have been removed', style='green') @app.command(help='Clean up all the repositories cloned by OpenLLM') def repos(verbose: bool = False) -> None: - if verbose: - VERBOSE_LEVEL.set(20) - shutil.rmtree(REPO_DIR, ignore_errors=True) - output('All repositories have been removed', style='green') + if verbose: + VERBOSE_LEVEL.set(20) + shutil.rmtree(REPO_DIR, ignore_errors=True) + output('All repositories have been removed', style='green') @app.command(help='Reset configurations to default') def configs(verbose: bool = False) -> None: - if verbose: - VERBOSE_LEVEL.set(20) - shutil.rmtree(CONFIG_FILE, ignore_errors=True) - output('All configurations have been reset', style='green') + if verbose: + VERBOSE_LEVEL.set(20) + shutil.rmtree(CONFIG_FILE, ignore_errors=True) + output('All configurations have been reset', style='green') @app.command(name='all', help='Clean up all above and bring OpenLLM to a fresh start') def all_cache(verbose: bool = False) -> None: - if verbose: - VERBOSE_LEVEL.set(20) - repos() - venvs() - model_cache() - configs() + if verbose: + VERBOSE_LEVEL.set(20) + repos() + venvs() + model_cache() + configs() diff --git a/src/openllm/cloud.py b/src/openllm/cloud.py index d728fbf8..503c558b 100644 --- a/src/openllm/cloud.py +++ b/src/openllm/cloud.py @@ -11,158 +11,171 @@ app = OpenLLMTyper() def resolve_cloud_config() -> pathlib.Path: - env = os.environ.get('BENTOML_HOME') - if env is not None: - return pathlib.Path(env) / '.yatai.yaml' - return pathlib.Path.home() / 'bentoml' / '.yatai.yaml' + env = os.environ.get('BENTOML_HOME') + if env is not None: + return pathlib.Path(env) / '.yatai.yaml' + return pathlib.Path.home() / 'bentoml' / '.yatai.yaml' def _get_deploy_cmd( - bento: BentoInfo, target: typing.Optional[DeploymentTarget] = None, cli_envs: typing.Optional[list[str]] = None + bento: BentoInfo, + target: typing.Optional[DeploymentTarget] = None, + cli_envs: typing.Optional[list[str]] = None, ) -> tuple[list[str], EnvVars]: - cmd = ['bentoml', 'deploy', bento.bentoml_tag] - env = EnvVars({'BENTOML_HOME': f'{bento.repo.path}/bentoml'}) - - # Process CLI env vars first to determine overrides - explicit_envs: dict[str, str] = {} - if cli_envs: - for env_var in cli_envs: - if '=' in env_var: - name, value = env_var.split('=', 1) - explicit_envs[name] = value - else: - name = env_var - value = typing.cast(str, os.environ.get(name)) - if value is None: - output( - f"Environment variable '{name}' specified via --env but not found in the current environment.", - style='red', - ) - raise typer.Exit(1) - explicit_envs[name] = value - - # Process envs defined in bento.yaml, skipping those overridden by CLI - required_envs = bento.bento_yaml.get('envs', []) - required_env_names = [env['name'] for env in required_envs if 'name' in env and env['name'] not in explicit_envs] - if required_env_names: - output( - f'This model requires the following environment variables to run (unless overridden via --env): {required_env_names!r}', - style='yellow', - ) - - for env_info in required_envs: - name = typing.cast(str, env_info.get('name')) - if not name or name in explicit_envs: - continue - - if os.environ.get(name): - default = os.environ[name] - elif 'value' in env_info: - default = env_info['value'] - else: - default = '' - - if INTERACTIVE.get(): - import questionary - - value = questionary.text(f'{name}: (from bento.yaml)', default=default).ask() - else: - if default == '': - output(f'Environment variable {name} (from bento.yaml) is required but not provided', style='red') - raise typer.Exit(1) - else: - value = default + cmd = ['bentoml', 'deploy', bento.bentoml_tag] + env = EnvVars({'BENTOML_HOME': f'{bento.repo.path}/bentoml'}) + # Process CLI env vars first to determine overrides + explicit_envs: dict[str, str] = {} + if cli_envs: + for env_var in cli_envs: + if '=' in env_var: + name, value = env_var.split('=', 1) + explicit_envs[name] = value + else: + name = env_var + value = typing.cast(str, os.environ.get(name)) if value is None: - raise typer.Exit(1) - cmd += ['--env', f'{name}={value}'] + output( + f"Environment variable '{name}' specified via --env but not found in the current environment.", + style='red', + ) + raise typer.Exit(1) + explicit_envs[name] = value - # Add explicitly provided env vars from CLI - for name, value in explicit_envs.items(): - cmd += ['--env', f'{name}={value}'] + # Process envs defined in bento.yaml, skipping those overridden by CLI + required_envs = bento.bento_yaml.get('envs', []) + required_env_names = [ + env['name'] + for env in required_envs + if 'name' in env and env['name'] not in explicit_envs and not env.get('value') + ] + if required_env_names: + output( + f'This model requires the following environment variables to run (unless overridden via --env): {required_env_names!r}', + style='green', + ) - if target: - cmd += ['--instance-type', target.name] + for env_info in required_envs: + name = typing.cast(str, env_info.get('name')) + if not name or name in explicit_envs or env_info.get('value', None) is not None: + continue - base_config = resolve_cloud_config() - if not base_config.exists(): - raise Exception('Cannot find cloud config.') - # remove before copy - if (bento.repo.path / 'bentoml' / '.yatai.yaml').exists(): - (bento.repo.path / 'bentoml' / '.yatai.yaml').unlink() - shutil.copy(base_config, bento.repo.path / 'bentoml' / '.yatai.yaml') + if os.environ.get(name): + default = os.environ[name] + elif 'value' in env_info: + default = env_info['value'] + else: + default = '' - return cmd, env + if INTERACTIVE.get(): + import questionary + + value = questionary.text(f'{name}: (from bento.yaml)', default=default).ask() + else: + if default == '': + output( + f'Environment variable {name} (from bento.yaml) is required but not provided', style='red' + ) + raise typer.Exit(1) + else: + value = default + + if value is None: + raise typer.Exit(1) + cmd += ['--env', f'{name}={value}'] + + # Add explicitly provided env vars from CLI + for name, value in explicit_envs.items(): + cmd += ['--env', f'{name}={value}'] + + if target: + cmd += ['--instance-type', target.name] + + base_config = resolve_cloud_config() + if not base_config.exists(): + raise Exception('Cannot find cloud config.') + # remove before copy + if (bento.repo.path / 'bentoml' / '.yatai.yaml').exists(): + (bento.repo.path / 'bentoml' / '.yatai.yaml').unlink() + shutil.copy(base_config, bento.repo.path / 'bentoml' / '.yatai.yaml') + + return cmd, env def ensure_cloud_context() -> None: - import questionary + import questionary - cmd = ['bentoml', 'cloud', 'current-context'] - try: - result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL) - context = json.loads(result) - output(f' bentoml already logged in: {context["endpoint"]}', style='green', level=20) - except subprocess.CalledProcessError: - output(' bentoml not logged in', style='red') - if not INTERACTIVE.get(): - output('\n get bentoml logged in by:') - output(' $ bentoml cloud login', style='orange') - output('') - output( - """ * you may need to visit https://cloud.bentoml.com to get an account. you can also bring your own bentoml cluster (BYOC) to your team from https://bentoml.com/contact""", - style='yellow', - ) - raise typer.Exit(1) - else: - action = questionary.select( - 'Choose an action:', choices=['I have a BentoCloud account', 'get an account in two minutes'] - ).ask() - if action is None: - raise typer.Exit(1) - elif action == 'get an account in two minutes': - output('Please visit https://cloud.bentoml.com to get your token', style='yellow') - endpoint = questionary.text('Enter the endpoint: (similar to https://my-org.cloud.bentoml.com)').ask() - if endpoint is None: - raise typer.Exit(1) - token = questionary.text('Enter your token: (similar to cniluaxxxxxxxx)').ask() - if token is None: - raise typer.Exit(1) - cmd = ['bentoml', 'cloud', 'login', '--api-token', token, '--endpoint', endpoint] - try: - result = subprocess.check_output(cmd) - output(' Logged in successfully', style='green') - except subprocess.CalledProcessError: - output(' Failed to login', style='red') - raise typer.Exit(1) + cmd = ['bentoml', 'cloud', 'current-context'] + try: + result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL) + context = json.loads(result) + output(f' bentoml already logged in: {context["endpoint"]}', style='green', level=20) + except subprocess.CalledProcessError: + output(' bentoml not logged in', style='red') + if not INTERACTIVE.get(): + output('\n get bentoml logged in by:') + output(' $ bentoml cloud login', style='orange') + output('') + output( + """ * you may need to visit https://cloud.bentoml.com to get an account. you can also bring your own bentoml cluster (BYOC) to your team from https://bentoml.com/contact""", + style='yellow', + ) + raise typer.Exit(1) + else: + action = questionary.select( + 'Choose an action:', + choices=['I have a BentoCloud account', 'get an account in two minutes'], + ).ask() + if action is None: + raise typer.Exit(1) + elif action == 'get an account in two minutes': + output('Please visit https://cloud.bentoml.com to get your token', style='yellow') + endpoint = questionary.text( + 'Enter the endpoint: (similar to https://my-org.cloud.bentoml.com)' + ).ask() + if endpoint is None: + raise typer.Exit(1) + token = questionary.text('Enter your token: (similar to cniluaxxxxxxxx)').ask() + if token is None: + raise typer.Exit(1) + cmd = ['bentoml', 'cloud', 'login', '--api-token', token, '--endpoint', endpoint] + try: + result = subprocess.check_output(cmd) + output(' Logged in successfully', style='green') + except subprocess.CalledProcessError: + output(' Failed to login', style='red') + raise typer.Exit(1) def get_cloud_machine_spec() -> list[DeploymentTarget]: - ensure_cloud_context() - cmd = ['bentoml', 'deployment', 'list-instance-types', '-o', 'json'] - try: - result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL) - instance_types = json.loads(result) - return [ - DeploymentTarget( - source='cloud', - name=it['name'], - price=it['price'], - platform='linux', - accelerators=( - [ACCELERATOR_SPECS[it['gpu_type']] for _ in range(int(it['gpu']))] - if it.get('gpu') and it['gpu_type'] in ACCELERATOR_SPECS - else [] - ), - ) - for it in instance_types - ] - except (subprocess.CalledProcessError, json.JSONDecodeError): - output('Failed to get cloud instance types', style='red') - return [] + ensure_cloud_context() + cmd = ['bentoml', 'deployment', 'list-instance-types', '-o', 'json'] + try: + result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL) + instance_types = json.loads(result) + return [ + DeploymentTarget( + source='cloud', + name=it['name'], + price=it['price'], + platform='linux', + accelerators=( + [ACCELERATOR_SPECS[it['gpu_type']] for _ in range(int(it['gpu']))] + if it.get('gpu') and it['gpu_type'] in ACCELERATOR_SPECS + else [] + ), + ) + for it in instance_types + ] + except (subprocess.CalledProcessError, json.JSONDecodeError): + output('Failed to get cloud instance types', style='red') + return [] -def deploy(bento: BentoInfo, target: DeploymentTarget, cli_envs: typing.Optional[list[str]] = None) -> None: - ensure_cloud_context() - cmd, env = _get_deploy_cmd(bento, target, cli_envs=cli_envs) - run_command(cmd, env=env, cwd=None) +def deploy( + bento: BentoInfo, target: DeploymentTarget, cli_envs: typing.Optional[list[str]] = None +) -> None: + ensure_cloud_context() + cmd, env = _get_deploy_cmd(bento, target, cli_envs=cli_envs) + run_command(cmd, env=env, cwd=None) diff --git a/src/openllm/common.py b/src/openllm/common.py index 9503c12e..9563465c 100644 --- a/src/openllm/common.py +++ b/src/openllm/common.py @@ -31,401 +31,413 @@ T = typing.TypeVar('T') class ContextVar(typing.Generic[T]): - def __init__(self, default: T): - self._stack: list[T] = [] - self._default = default + def __init__(self, default: T): + self._stack: list[T] = [] + self._default = default - def get(self) -> T: - if self._stack: - return self._stack[-1] - return self._default + def get(self) -> T: + if self._stack: + return self._stack[-1] + return self._default - def set(self, value: T) -> None: - self._stack.append(value) + def set(self, value: T) -> None: + self._stack.append(value) - @contextmanager - def patch(self, value: T) -> typing.Iterator[None]: - self._stack.append(value) - try: - yield - finally: - self._stack.pop() + @contextmanager + def patch(self, value: T) -> typing.Iterator[None]: + self._stack.append(value) + try: + yield + finally: + self._stack.pop() -VERBOSE_LEVEL = ContextVar(10) +VERBOSE_LEVEL = ContextVar(0) INTERACTIVE = ContextVar(False) -def output(content: typing.Any, level: int = 0, style: str | None = None, end: str | None = None) -> None: - if level > VERBOSE_LEVEL.get(): - return +def output( + content: typing.Any, level: int = 0, style: str | None = None, end: str | None = None +) -> None: + if level > VERBOSE_LEVEL.get(): + return - if not isinstance(content, str): - out = io.StringIO() - pyaml.pprint(content, dst=out, sort_dicts=False, sort_keys=False) - questionary.print(out.getvalue(), style=style, end='' if end is None else end) - out.close() - else: - questionary.print(content, style=style, end='\n' if end is None else end) + if not isinstance(content, str): + out = io.StringIO() + pyaml.pprint(content, dst=out, sort_dicts=False, sort_keys=False) + questionary.print(out.getvalue(), style=style, end='' if end is None else end) + out.close() + else: + questionary.print(content, style=style, end='\n' if end is None else end) class Config(pydantic.BaseModel): - repos: dict[str, str] = pydantic.Field( - default_factory=lambda: {'default': 'https://github.com/bentoml/openllm-models@main'} - ) - default_repo: str = 'default' + repos: dict[str, str] = pydantic.Field( + default_factory=lambda: {'default': 'https://github.com/bentoml/openllm-models@main'} + ) + default_repo: str = 'default' - def tolist(self) -> dict[str, typing.Any]: - return dict(repos=self.repos, default_repo=self.default_repo) + def tolist(self) -> dict[str, typing.Any]: + return dict(repos=self.repos, default_repo=self.default_repo) def load_config() -> Config: - if CONFIG_FILE.exists(): - try: - with open(CONFIG_FILE) as f: - return Config(**json.load(f)) - except json.JSONDecodeError: - return Config() - return Config() + if CONFIG_FILE.exists(): + try: + with open(CONFIG_FILE) as f: + return Config(**json.load(f)) + except json.JSONDecodeError: + return Config() + return Config() def save_config(config: Config) -> None: - with open(CONFIG_FILE, 'w') as f: - json.dump(config.tolist(), f, indent=2) + with open(CONFIG_FILE, 'w') as f: + json.dump(config.tolist(), f, indent=2) class BentoMetadata(typing.TypedDict): - name: str - version: str - labels: dict[str, str] - envs: list[dict[str, str]] - services: list[dict[str, typing.Any]] - schema: dict[str, typing.Any] + name: str + version: str + labels: dict[str, str] + envs: list[dict[str, str]] + services: list[dict[str, typing.Any]] + schema: dict[str, typing.Any] class EnvVars(UserDict[str, str]): - """ - A dictionary-like object that sorted by key and only keeps the environment variables that have a value. - """ + """ + A dictionary-like object that sorted by key and only keeps the environment variables that have a value. + """ - @classmethod - def __get_pydantic_core_schema__( - cls: type[EnvVars], source_type: type[typing.Any], handler: typing.Callable[..., typing.Any] - ) -> core_schema.DictSchema: - return core_schema.dict_schema(core_schema.str_schema(), core_schema.str_schema()) + @classmethod + def __get_pydantic_core_schema__( + cls: type[EnvVars], source_type: type[typing.Any], handler: typing.Callable[..., typing.Any] + ) -> core_schema.DictSchema: + return core_schema.dict_schema(core_schema.str_schema(), core_schema.str_schema()) - def __init__(self, data: typing.Mapping[str, str] | None = None): - super().__init__(data or {}) - self.data = {k: v for k, v in sorted(self.data.items()) if v} + def __init__(self, data: typing.Mapping[str, str] | None = None): + super().__init__(data or {}) + self.data = {k: v for k, v in sorted(self.data.items()) if v} - def __hash__(self) -> int: - return hash(tuple(sorted(self.data.items()))) + def __hash__(self) -> int: + return hash(tuple(sorted(self.data.items()))) class RepoInfo(pydantic.BaseModel): - name: str - path: pathlib.Path - url: str - server: str - owner: str - repo: str - branch: str + name: str + path: pathlib.Path + url: str + server: str + owner: str + repo: str + branch: str - def tolist(self) -> str | dict[str, typing.Any] | None: - if VERBOSE_LEVEL.get() <= 0: - return f'{self.name} ({self.url}@{self.branch})' - if VERBOSE_LEVEL.get() <= 10: - return dict(name=self.name, url=f'{self.url}@{self.branch}', path=str(self.path)) - if VERBOSE_LEVEL.get() <= 20: - return dict( - name=self.name, - url=f'{self.url}@{self.branch}', - path=str(self.path), - server=self.server, - owner=self.owner, - repo=self.repo, - ) - return None + def tolist(self) -> str | dict[str, typing.Any] | None: + if VERBOSE_LEVEL.get() <= 0: + return f'{self.name} ({self.url}@{self.branch})' + if VERBOSE_LEVEL.get() <= 10: + return dict(name=self.name, url=f'{self.url}@{self.branch}', path=str(self.path)) + if VERBOSE_LEVEL.get() <= 20: + return dict( + name=self.name, + url=f'{self.url}@{self.branch}', + path=str(self.path), + server=self.server, + owner=self.owner, + repo=self.repo, + ) + return None class BentoInfo(pydantic.BaseModel): - repo: RepoInfo - path: pathlib.Path - alias: str = '' + repo: RepoInfo + path: pathlib.Path + alias: str = '' - def __str__(self) -> str: - if self.repo.name == 'default': - return f'{self.tag}' - else: - return f'{self.repo.name}/{self.tag}' + def __str__(self) -> str: + if self.repo.name == 'default': + return f'{self.tag}' + else: + return f'{self.repo.name}/{self.tag}' - @override - def __hash__(self) -> int: - return md5(str(self.path)) + @override + def __hash__(self) -> int: + return md5(str(self.path)) - @property - def tag(self) -> str: - if self.alias: - return f'{self.path.parent.name}:{self.alias}' - return f'{self.path.parent.name}:{self.path.name}' + @property + def tag(self) -> str: + if self.alias: + return f'{self.path.parent.name}:{self.alias}' + return f'{self.path.parent.name}:{self.path.name}' - @property - def bentoml_tag(self) -> str: - return f'{self.path.parent.name}:{self.path.name}' + @property + def bentoml_tag(self) -> str: + return f'{self.path.parent.name}:{self.path.name}' - @property - def name(self) -> str: - return self.path.parent.name + @property + def name(self) -> str: + return self.path.parent.name - @property - def version(self) -> str: - return self.path.name + @property + def version(self) -> str: + return self.path.name - @property - def labels(self) -> dict[str, str]: - return self.bento_yaml['labels'] + @property + def labels(self) -> dict[str, str]: + return self.bento_yaml['labels'] - @property - def envs(self) -> list[dict[str, str]]: - return self.bento_yaml['envs'] + @property + def envs(self) -> list[dict[str, str]]: + return self.bento_yaml['envs'] - @functools.cached_property - def bento_yaml(self) -> BentoMetadata: - bento: BentoMetadata = yaml.safe_load((self.path / 'bento.yaml').read_text()) - return bento + @functools.cached_property + def bento_yaml(self) -> BentoMetadata: + bento: BentoMetadata = yaml.safe_load((self.path / 'bento.yaml').read_text()) + return bento - @functools.cached_property - def platforms(self) -> list[str]: - return self.bento_yaml['labels'].get('platforms', 'linux').split(',') + @functools.cached_property + def platforms(self) -> list[str]: + return self.bento_yaml['labels'].get('platforms', 'linux').split(',') - @functools.cached_property - def pretty_yaml(self) -> BentoMetadata | dict[str, typing.Any]: - def _pretty_routes(routes: list[dict[str, typing.Any]]) -> dict[str, typing.Any]: - return { - route['route']: { - 'input': {k: v['type'] for k, v in route['input']['properties'].items()}, - 'output': route['output']['type'], - } - for route in routes - } + @functools.cached_property + def pretty_yaml(self) -> BentoMetadata | dict[str, typing.Any]: + def _pretty_routes(routes: list[dict[str, typing.Any]]) -> dict[str, typing.Any]: + return { + route['route']: { + 'input': {k: v['type'] for k, v in route['input']['properties'].items()}, + 'output': route['output']['type'], + } + for route in routes + } - if len(self.bento_yaml['services']) == 1: - pretty_yaml: dict[str, typing.Any] = { - 'apis': _pretty_routes(self.bento_yaml['schema']['routes']), - 'resources': self.bento_yaml['services'][0]['config']['resources'], - 'envs': self.bento_yaml['envs'], - 'platforms': self.platforms, - } - return pretty_yaml - return self.bento_yaml + if len(self.bento_yaml['services']) == 1: + pretty_yaml: dict[str, typing.Any] = { + 'apis': _pretty_routes(self.bento_yaml['schema']['routes']), + 'resources': self.bento_yaml['services'][0]['config']['resources'], + 'envs': self.bento_yaml['envs'], + 'platforms': self.platforms, + } + return pretty_yaml + return self.bento_yaml - @functools.cached_property - def pretty_gpu(self) -> str: - from openllm.accelerator_spec import ACCELERATOR_SPECS + @functools.cached_property + def pretty_gpu(self) -> str: + from openllm.accelerator_spec import ACCELERATOR_SPECS - try: - resources = self.bento_yaml['services'][0]['config']['resources'] - if resources['gpu'] > 1: - acc = ACCELERATOR_SPECS[resources['gpu_type']] - return f'{acc.memory_size:.0f}Gx{resources["gpu"]}' - elif resources['gpu'] > 0: - acc = ACCELERATOR_SPECS[resources['gpu_type']] - return f'{acc.memory_size:.0f}G' - except KeyError: - pass - return '' + try: + resources = self.bento_yaml['services'][0]['config']['resources'] + if resources['gpu'] > 1: + acc = ACCELERATOR_SPECS[resources['gpu_type']] + return f'{acc.memory_size:.0f}Gx{resources["gpu"]}' + elif resources['gpu'] > 0: + acc = ACCELERATOR_SPECS[resources['gpu_type']] + return f'{acc.memory_size:.0f}G' + except KeyError: + pass + return '' - def tolist(self) -> str | dict[str, typing.Any] | None: - verbose = VERBOSE_LEVEL.get() - if verbose <= 0: - return str(self) - if verbose <= 10: - return dict(tag=self.tag, repo=self.repo.tolist(), path=str(self.path), model_card=self.pretty_yaml) - if verbose <= 20: - return dict(tag=self.tag, repo=self.repo.tolist(), path=str(self.path), bento_yaml=self.bento_yaml) - return None + def tolist(self) -> str | dict[str, typing.Any] | None: + verbose = VERBOSE_LEVEL.get() + if verbose <= 0: + return str(self) + if verbose <= 10: + return dict( + tag=self.tag, repo=self.repo.tolist(), path=str(self.path), model_card=self.pretty_yaml + ) + if verbose <= 20: + return dict( + tag=self.tag, repo=self.repo.tolist(), path=str(self.path), bento_yaml=self.bento_yaml + ) + return None class VenvSpec(pydantic.BaseModel): - python_version: str - requirements_txt: str - envs: EnvVars - name_prefix: str = '' + python_version: str + requirements_txt: str + envs: EnvVars + name_prefix: str = '' - @functools.cached_property - def normalized_requirements_txt(self) -> str: - parameter_lines: list[str] = [] - dependency_lines: list[str] = [] - comment_lines: list[str] = [] + @functools.cached_property + def normalized_requirements_txt(self) -> str: + parameter_lines: list[str] = [] + dependency_lines: list[str] = [] + comment_lines: list[str] = [] - for line in self.requirements_txt.splitlines(): - if not line.strip(): - continue - elif line.strip().startswith('#'): - comment_lines.append(line.strip()) - elif line.strip().startswith('-'): - parameter_lines.append(line.strip()) - else: - dependency_lines.append(line.strip()) + for line in self.requirements_txt.splitlines(): + if not line.strip(): + continue + elif line.strip().startswith('#'): + comment_lines.append(line.strip()) + elif line.strip().startswith('-'): + parameter_lines.append(line.strip()) + else: + dependency_lines.append(line.strip()) - parameter_lines.sort() - dependency_lines.sort() - return '\n'.join(parameter_lines + dependency_lines).strip() + parameter_lines.sort() + dependency_lines.sort() + return '\n'.join(parameter_lines + dependency_lines).strip() - @functools.cached_property - def normalized_envs(self) -> str: - return '\n'.join(f'{k}={v}' for k, v in sorted(self.envs.items(), key=lambda x: x[0]) if not v) + @functools.cached_property + def normalized_envs(self) -> str: + return '\n'.join(f'{k}={v}' for k, v in sorted(self.envs.items(), key=lambda x: x[0]) if not v) - @override - def __hash__(self) -> int: - return md5(self.normalized_requirements_txt, str(hash(self.normalized_envs))) + @override + def __hash__(self) -> int: + return md5(self.normalized_requirements_txt, str(hash(self.normalized_envs))) class Accelerator(pydantic.BaseModel): - model: str - memory_size: float + model: str + memory_size: float - def __gt__(self, other: Accelerator) -> bool: - return self.memory_size > other.memory_size + def __gt__(self, other: Accelerator) -> bool: + return self.memory_size > other.memory_size - def __eq__(self, other: object) -> bool: - if not isinstance(other, Accelerator): - return NotImplemented - return self.memory_size == other.memory_size + def __eq__(self, other: object) -> bool: + if not isinstance(other, Accelerator): + return NotImplemented + return self.memory_size == other.memory_size - def __repr__(self) -> str: - return f'{self.model}({self.memory_size}GB)' + def __repr__(self) -> str: + return f'{self.model}({self.memory_size}GB)' class DeploymentTarget(pydantic.BaseModel): - accelerators: list[Accelerator] - source: str = 'local' - name: str = 'local' - price: str = '' - platform: str = 'linux' + accelerators: list[Accelerator] + source: str = 'local' + name: str = 'local' + price: str = '' + platform: str = 'linux' - @override - def __hash__(self) -> int: - return hash(self.source) + @override + def __hash__(self) -> int: + return hash(self.source) - @property - def accelerators_repr(self) -> str: - accs = {a.model for a in self.accelerators} - if len(accs) == 0: - return 'null' - if len(accs) == 1: - a = self.accelerators[0] - return f'{a.model} x{len(self.accelerators)}' - return ', '.join((f'{a.model}' for a in self.accelerators)) + @property + def accelerators_repr(self) -> str: + accs = {a.model for a in self.accelerators} + if len(accs) == 0: + return 'null' + if len(accs) == 1: + a = self.accelerators[0] + return f'{a.model} x{len(self.accelerators)}' + return ', '.join((f'{a.model}' for a in self.accelerators)) def run_command( - cmd: list[str], - cwd: str | None = None, - env: EnvVars | None = None, - copy_env: bool = True, - venv: pathlib.Path | None = None, - silent: bool = False, + cmd: list[str], + cwd: str | None = None, + env: EnvVars | None = None, + copy_env: bool = True, + venv: pathlib.Path | None = None, + silent: bool = False, ) -> subprocess.CompletedProcess[typing.Any]: - env = env or EnvVars({}) - cmd = [str(c) for c in cmd] - bin_dir = 'Scripts' if os.name == 'nt' else 'bin' - if not silent: - output('\n') - if cwd: - output(f'$ cd {cwd}', style='orange') - if env: - for k, v in env.items(): - output(f'$ export {k}={shlex.quote(v)}', style='orange') - if venv: - output(f'$ source {venv / "bin" / "activate"}', style='orange') - output(f'$ {" ".join(cmd)}', style='orange') - + env = env or EnvVars({}) + cmd = [str(c) for c in cmd] + bin_dir = 'Scripts' if os.name == 'nt' else 'bin' + if not silent: + output('\n') + if cwd: + output(f'$ cd {cwd}', style='orange') + if env: + for k, v in env.items(): + output(f'$ export {k}={shlex.quote(v)}', style='orange') if venv: - py = venv / bin_dir / f'python{sysconfig.get_config_var("EXE")}' + output(f'$ source {venv / "bin" / "activate"}', style='orange') + output(f'$ {" ".join(cmd)}', style='orange') + + if venv: + py = venv / bin_dir / f'python{sysconfig.get_config_var("EXE")}' + else: + py = pathlib.Path(sys.executable) + + if copy_env: + env = EnvVars({**os.environ, **env}) + + if cmd and cmd[0] == 'bentoml': + cmd = [py.__fspath__(), '-m', 'bentoml', *cmd[1:]] + if cmd and cmd[0] == 'python': + cmd = [py.__fspath__(), *cmd[1:]] + + try: + if silent: + return subprocess.run( + cmd, cwd=cwd, env=env, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True + ) else: - py = pathlib.Path(sys.executable) - - if copy_env: - env = EnvVars({**os.environ, **env}) - - if cmd and cmd[0] == 'bentoml': - cmd = [py.__fspath__(), '-m', 'bentoml'] + cmd[1:] - if cmd and cmd[0] == 'python': - cmd = [py.__fspath__()] + cmd[1:] - - try: - if silent: - return subprocess.run( - cmd, cwd=cwd, env=env, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True - ) - else: - return subprocess.run(cmd, cwd=cwd, env=env, check=True) - except Exception as e: - if VERBOSE_LEVEL.get() >= 20: - output(str(e), style='red') - raise typer.Exit(1) + return subprocess.run(cmd, cwd=cwd, env=env, check=True) + except Exception as e: + if VERBOSE_LEVEL.get() >= 20: + output(str(e), style='red') + raise typer.Exit(1) -async def stream_command_output(stream: asyncio.streams.StreamReader | None, style: str = 'gray') -> None: - if stream: - async for line in stream: - output(line.decode(), style=style, end='') +async def stream_command_output( + stream: asyncio.streams.StreamReader | None, style: str = 'gray' +) -> None: + if stream: + async for line in stream: + output(line.decode(), style=style, end='') @asynccontextmanager async def async_run_command( - cmd: list[str], - cwd: str | None = None, - env: EnvVars | None = None, - copy_env: bool = True, - venv: pathlib.Path | None = None, - silent: bool = True, + cmd: list[str], + cwd: str | None = None, + env: EnvVars | None = None, + copy_env: bool = True, + venv: pathlib.Path | None = None, + silent: bool = True, ) -> typing.AsyncGenerator[asyncio.subprocess.Process]: - env = env or EnvVars({}) - cmd = [str(c) for c in cmd] - - if not silent: - output('\n') - if cwd: - output(f'$ cd {cwd}', style='orange') - if env: - for k, v in env.items(): - output(f'$ export {k}={shlex.quote(v)}', style='orange') - if venv: - output(f'$ source {venv / "bin" / "activate"}', style='orange') - output(f'$ {" ".join(cmd)}', style='orange') + env = env or EnvVars({}) + cmd = [str(c) for c in cmd] + if not silent: + output('\n') + if cwd: + output(f'$ cd {cwd}', style='orange') + if env: + for k, v in env.items(): + output(f'$ export {k}={shlex.quote(v)}', style='orange') if venv: - py = venv / 'bin' / 'python' - else: - py = pathlib.Path(sys.executable) + output(f'$ source {venv / "bin" / "activate"}', style='orange') + output(f'$ {" ".join(cmd)}', style='orange') - if copy_env: - env = EnvVars({**os.environ, **env}) + if venv: + py = venv / 'bin' / 'python' + else: + py = pathlib.Path(sys.executable) - if cmd and cmd[0] == 'bentoml': - cmd = [py.__fspath__(), '-m', 'bentoml'] + cmd[1:] - if cmd and cmd[0] == 'python': - cmd = [py.__fspath__()] + cmd[1:] + if copy_env: + env = EnvVars({**os.environ, **env}) - proc = None - try: - proc = await asyncio.create_subprocess_shell( - ' '.join(map(str, cmd)), stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, cwd=cwd, env=env - ) - yield proc - except subprocess.CalledProcessError: - output('Command failed', style='red') - raise typer.Exit(1) - finally: - if proc: - proc.send_signal(signal.SIGINT) - await proc.wait() + if cmd and cmd[0] == 'bentoml': + cmd = [py.__fspath__(), '-m', 'bentoml', *cmd[1:]] + if cmd and cmd[0] == 'python': + cmd = [py.__fspath__(), *cmd[1:]] + + proc = None + try: + proc = await asyncio.create_subprocess_shell( + ' '.join(map(str, cmd)), + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + cwd=cwd, + env=env, + ) + yield proc + except subprocess.CalledProcessError: + output('Command failed', style='red') + raise typer.Exit(1) + finally: + if proc: + proc.send_signal(signal.SIGINT) + await proc.wait() def md5(*strings: str) -> int: - m = hashlib.md5() - for s in strings: - m.update(s.encode()) - return int(m.hexdigest(), 16) + m = hashlib.md5() + for s in strings: + m.update(s.encode()) + return int(m.hexdigest(), 16) diff --git a/src/openllm/local.py b/src/openllm/local.py index d754b998..a5c72c60 100644 --- a/src/openllm/local.py +++ b/src/openllm/local.py @@ -4,103 +4,114 @@ import asyncio, time, typing import httpx, openai from openai.types.chat import ChatCompletionAssistantMessageParam, ChatCompletionUserMessageParam -from openllm.common import BentoInfo, EnvVars, async_run_command, output, run_command, stream_command_output +from openllm.common import ( + BentoInfo, + EnvVars, + async_run_command, + output, + run_command, + stream_command_output, +) from openllm.venv import ensure_venv if typing.TYPE_CHECKING: - from openai.types.chat import ChatCompletionMessageParam + from openai.types.chat import ChatCompletionMessageParam def prep_env_vars(bento: BentoInfo) -> None: - import os + import os - env_vars = bento.envs - for env_var in env_vars: - if not env_var.get('value'): - continue - key = env_var['name'] - value = env_var['value'] - os.environ[key] = value + env_vars = bento.envs + for env_var in env_vars: + if not env_var.get('value'): + continue + key = env_var['name'] + value = env_var['value'] + os.environ[key] = value def _get_serve_cmd(bento: BentoInfo, port: int = 3000) -> tuple[list[str], EnvVars]: - cmd = ['bentoml', 'serve', bento.bentoml_tag] - if port != 3000: - cmd += ['--port', str(port)] - return cmd, EnvVars({'BENTOML_HOME': f'{bento.repo.path}/bentoml'}) + cmd = ['bentoml', 'serve', bento.bentoml_tag] + if port != 3000: + cmd += ['--port', str(port)] + return cmd, EnvVars({'BENTOML_HOME': f'{bento.repo.path}/bentoml'}) def serve(bento: BentoInfo, port: int = 3000) -> None: - prep_env_vars(bento) - cmd, env = _get_serve_cmd(bento, port=port) - venv = ensure_venv(bento, runtime_envs=env) - output(f'Access the Chat UI at http://localhost:{port}/chat (or with you IP)') - run_command(cmd, env=env, cwd=None, venv=venv) + prep_env_vars(bento) + cmd, env = _get_serve_cmd(bento, port=port) + venv = ensure_venv(bento, runtime_envs=env) + output(f'Access the Chat UI at http://localhost:{port}/chat (or with you IP)') + run_command(cmd, env=env, cwd=None, venv=venv) async def _run_model(bento: BentoInfo, port: int = 3000, timeout: int = 600) -> None: - cmd, env = _get_serve_cmd(bento, port) - venv = ensure_venv(bento, runtime_envs=env) - async with async_run_command(cmd, env=env, cwd=None, venv=venv, silent=False) as server_proc: - output(f'Model server started {server_proc.pid}') + cmd, env = _get_serve_cmd(bento, port) + venv = ensure_venv(bento, runtime_envs=env) + async with async_run_command(cmd, env=env, cwd=None, venv=venv, silent=False) as server_proc: + output(f'Model server started {server_proc.pid}') - stdout_streamer = None - stderr_streamer = None - start_time = time.time() + stdout_streamer = None + stderr_streamer = None + start_time = time.time() - output('Model loading...', style='green') - for _ in range(timeout): - try: - resp = httpx.get(f'http://localhost:{port}/readyz', timeout=3) - if resp.status_code == 200: - break - except httpx.RequestError: - if time.time() - start_time > 30: - if not stdout_streamer: - stdout_streamer = asyncio.create_task(stream_command_output(server_proc.stdout, style='gray')) - if not stderr_streamer: - stderr_streamer = asyncio.create_task( - stream_command_output(server_proc.stderr, style='#BD2D0F') - ) - await asyncio.sleep(1) - else: - output('Model failed to load', style='red') - server_proc.terminate() - return + output('Model loading...', style='green') + for _ in range(timeout): + try: + resp = httpx.get(f'http://localhost:{port}/readyz', timeout=3) + if resp.status_code == 200: + break + except httpx.RequestError: + if time.time() - start_time > 30: + if not stdout_streamer: + stdout_streamer = asyncio.create_task( + stream_command_output(server_proc.stdout, style='gray') + ) + if not stderr_streamer: + stderr_streamer = asyncio.create_task( + stream_command_output(server_proc.stderr, style='#BD2D0F') + ) + await asyncio.sleep(1) + else: + output('Model failed to load', style='red') + server_proc.terminate() + return - if stdout_streamer: - stdout_streamer.cancel() - if stderr_streamer: - stderr_streamer.cancel() + if stdout_streamer: + stdout_streamer.cancel() + if stderr_streamer: + stderr_streamer.cancel() - output('Model is ready', style='green') - messages: list[ChatCompletionMessageParam] = [] + output('Model is ready', style='green') + messages: list[ChatCompletionMessageParam] = [] - client = openai.AsyncOpenAI(base_url=f'http://localhost:{port}/v1', api_key='local') - while True: - try: - message = input('user: ') - if message == '': - output('empty message, please enter something', style='yellow') - continue - messages.append(ChatCompletionUserMessageParam(role='user', content=message)) - output('assistant: ', end='', style='lightgreen') - assistant_message = '' - stream = await client.chat.completions.create( - model=(await client.models.list()).data[0].id, messages=messages, stream=True - ) - async for chunk in stream: - text = chunk.choices[0].delta.content or '' - assistant_message += text - output(text, end='', style='lightgreen') - messages.append(ChatCompletionAssistantMessageParam(role='assistant', content=assistant_message)) - output('') - except KeyboardInterrupt: - break - output('\nStopping model server...', style='green') - output('Stopped model server', style='green') + client = openai.AsyncOpenAI(base_url=f'http://localhost:{port}/v1', api_key='local') + while True: + try: + message = input('user: ') + if message == '': + output('empty message, please enter something', style='yellow') + continue + messages.append(ChatCompletionUserMessageParam(role='user', content=message)) + output('assistant: ', end='', style='lightgreen') + assistant_message = '' + stream = await client.chat.completions.create( + model=(await client.models.list()).data[0].id, messages=messages, stream=True + ) + async for chunk in stream: + text = chunk.choices[0].delta.content or '' + assistant_message += text + output(text, end='', style='lightgreen') + messages.append( + ChatCompletionAssistantMessageParam(role='assistant', content=assistant_message) + ) + output('') + except KeyboardInterrupt: + break + output('\nStopping model server...', style='green') + output('Stopped model server', style='green') def run(bento: BentoInfo, port: int = 3000, timeout: int = 600) -> None: - prep_env_vars(bento) - asyncio.run(_run_model(bento, port=port, timeout=timeout)) + prep_env_vars(bento) + asyncio.run(_run_model(bento, port=port, timeout=timeout)) diff --git a/src/openllm/model.py b/src/openllm/model.py index 24ae637f..41b56427 100644 --- a/src/openllm/model.py +++ b/src/openllm/model.py @@ -14,155 +14,159 @@ app = OpenLLMTyper(help='manage models') @app.command(help='get model') def get(tag: str, repo: typing.Optional[str] = None, verbose: bool = False) -> None: - if verbose: - VERBOSE_LEVEL.set(20) - bento_info = ensure_bento(tag, repo_name=repo) - if bento_info: - output_(bento_info) + if verbose: + VERBOSE_LEVEL.set(20) + bento_info = ensure_bento(tag, repo_name=repo) + if bento_info: + output_(bento_info) @app.command(name='list', help='list available models') def list_model( - tag: typing.Optional[str] = None, - repo: typing.Optional[str] = None, - verbose: bool = False, - output: typing.Optional[str] = typer.Option(None, hidden=True), + tag: typing.Optional[str] = None, + repo: typing.Optional[str] = None, + verbose: bool = False, + output: typing.Optional[str] = typer.Option(None, hidden=True), ) -> None: - if verbose: - VERBOSE_LEVEL.set(20) + if verbose: + VERBOSE_LEVEL.set(20) - bentos = list_bento(tag=tag, repo_name=repo) - bentos.sort(key=lambda x: x.name) + bentos = list_bento(tag=tag, repo_name=repo) + bentos.sort(key=lambda x: x.name) - seen = set() + seen = set() - def is_seen(value: str) -> bool: - if value in seen: - return True - seen.add(value) - return False + def is_seen(value: str) -> bool: + if value in seen: + return True + seen.add(value) + return False - if output == 'readme': - # Parse parameters from bento.tag (e.g. "model:671b-it" -> "671b", 'model:something-long-78b' -> '78b') - questionary.print( - json.dumps({ - f'{bento.name}': dict( - tag=bento.tag, - version=bento.tag.split(':')[-1], - pretty_gpu=bento.pretty_gpu, - command=f'openllm serve {bento.tag}', - ) - for bento in bentos - if not is_seen(bento.name) - }) + if output == 'readme': + # Parse parameters from bento.tag (e.g. "model:671b-it" -> "671b", 'model:something-long-78b' -> '78b') + questionary.print( + json.dumps({ + f'{bento.name}': dict( + tag=bento.tag, + version=bento.tag.split(':')[-1], + pretty_gpu=bento.pretty_gpu, + command=f'openllm serve {bento.tag}', ) - return - - table = tabulate.tabulate( - [ - [ - '' if is_seen(bento.name) else bento.name, - bento.tag, - bento.repo.name, - bento.pretty_gpu, - ','.join(bento.platforms), - ] - for bento in bentos - ], - headers=['model', 'version', 'repo', 'required GPU RAM', 'platforms'], + for bento in bentos + if not is_seen(bento.name) + }) ) - output_(table) + return + + table = tabulate.tabulate( + [ + [ + '' if is_seen(bento.name) else bento.name, + bento.tag, + bento.repo.name, + bento.pretty_gpu, + ','.join(bento.platforms), + ] + for bento in bentos + ], + headers=['model', 'version', 'repo', 'required GPU RAM', 'platforms'], + ) + output_(table) def ensure_bento( - model: str, target: typing.Optional[DeploymentTarget] = None, repo_name: typing.Optional[str] = None + model: str, + target: typing.Optional[DeploymentTarget] = None, + repo_name: typing.Optional[str] = None, ) -> BentoInfo: - bentos = list_bento(model, repo_name=repo_name) - if len(bentos) == 0: - output_(f'No model found for {model}', style='red') - raise typer.Exit(1) - - if len(bentos) == 1: - output_(f'Found model {bentos[0]}', style='green') - if target is not None and can_run(bentos[0], target) <= 0: - output_( - f'The machine({target.name}) with {target.accelerators_repr} does not appear to have sufficient ' - f'resources to run model {bentos[0]}\n', - style='yellow', - ) - return bentos[0] - - # multiple models, pick one according to target - output_(f'Multiple models match {model}, did you mean one of these?', style='red') - list_model(model, repo=repo_name) + bentos = list_bento(model, repo_name=repo_name) + if len(bentos) == 0: + output_(f'No model found for {model}', style='red') raise typer.Exit(1) + if len(bentos) == 1: + output_(f'Found model {bentos[0]}', style='green') + if target is not None and can_run(bentos[0], target) <= 0: + output_( + f'The machine({target.name}) with {target.accelerators_repr} does not appear to have sufficient ' + f'resources to run model {bentos[0]}\n', + style='yellow', + ) + return bentos[0] + + # multiple models, pick one according to target + output_(f'Multiple models match {model}, did you mean one of these?', style='red') + list_model(model, repo=repo_name) + raise typer.Exit(1) + NUMBER_RE = re.compile(r'\d+') def _extract_first_number(s: str) -> int: - match = NUMBER_RE.search(s) - if match: - return int(match.group()) - else: - return 100 + match = NUMBER_RE.search(s) + if match: + return int(match.group()) + else: + return 100 def list_bento( - tag: typing.Optional[str] = None, repo_name: typing.Optional[str] = None, include_alias: bool = False + tag: typing.Optional[str] = None, + repo_name: typing.Optional[str] = None, + include_alias: bool = False, ) -> typing.List[BentoInfo]: - ensure_repo_updated() + ensure_repo_updated() - if repo_name is None and tag and '/' in tag: - repo_name, tag = tag.split('/', 1) + if repo_name is None and tag and '/' in tag: + repo_name, tag = tag.split('/', 1) - repo_list = list_repo(repo_name) - if repo_name is not None: - repo_map = {repo.name: repo for repo in repo_list} - if repo_name not in repo_map: - output_(f'Repo `{repo_name}` not found, did you mean one of these?') - for repo_name in repo_map: - output_(f' {repo_name}') - raise typer.Exit(1) + repo_list = list_repo(repo_name) + if repo_name is not None: + repo_map = {repo.name: repo for repo in repo_list} + if repo_name not in repo_map: + output_(f'Repo `{repo_name}` not found, did you mean one of these?') + for repo_name in repo_map: + output_(f' {repo_name}') + raise typer.Exit(1) - if not tag: - glob_pattern = 'bentoml/bentos/*/*' - elif ':' in tag: - bento_name, version = tag.split(':') - glob_pattern = f'bentoml/bentos/{bento_name}/{version}' - else: - glob_pattern = f'bentoml/bentos/{tag}/*' + if not tag: + glob_pattern = 'bentoml/bentos/*/*' + elif ':' in tag: + bento_name, version = tag.split(':') + glob_pattern = f'bentoml/bentos/{bento_name}/{version}' + else: + glob_pattern = f'bentoml/bentos/{tag}/*' - model_list: list[BentoInfo] = [] - repo_list = list_repo(repo_name) - for repo in repo_list: - paths = sorted( - repo.path.glob(glob_pattern), - key=lambda x: (x.parent.name, _extract_first_number(x.name), len(x.name), x.name), - ) - for path in paths: - if path.is_dir() and (path / 'bento.yaml').exists(): - model = BentoInfo(repo=repo, path=path) - elif path.is_file(): - with open(path) as f: - origin_name = f.read().strip() - origin_path = path.parent / origin_name - model = BentoInfo(alias=path.name, repo=repo, path=origin_path) - else: - model = None - if model: - model_list.append(model) + model_list: list[BentoInfo] = [] + repo_list = list_repo(repo_name) + for repo in repo_list: + paths = sorted( + repo.path.glob(glob_pattern), + key=lambda x: (x.parent.name, _extract_first_number(x.name), len(x.name), x.name), + ) + for path in paths: + if path.is_dir() and (path / 'bento.yaml').exists(): + model = BentoInfo(repo=repo, path=path) + elif path.is_file(): + with open(path) as f: + origin_name = f.read().strip() + origin_path = path.parent / origin_name + model = BentoInfo(alias=path.name, repo=repo, path=origin_path) + else: + model = None + if model: + model_list.append(model) - if not include_alias: - seen: set[str] = set() - # we are calling side-effect in seen here. - model_list = [ - x - for x in model_list - if not ( - f'{x.bento_yaml["name"]}:{x.bento_yaml["version"]}' in seen - or seen.add(f'{x.bento_yaml["name"]}:{x.bento_yaml["version"]}') # type: ignore - ) - ] - return model_list + if not include_alias: + seen: set[str] = set() + # we are calling side-effect in seen here. + model_list = [ + x + for x in model_list + if not ( + f'{x.bento_yaml["name"]}:{x.bento_yaml["version"]}' in seen + or seen.add(f'{x.bento_yaml["name"]}:{x.bento_yaml["version"]}') # type: ignore + ) + ] + return model_list diff --git a/src/openllm/repo.py b/src/openllm/repo.py index 3b59779e..50db4268 100644 --- a/src/openllm/repo.py +++ b/src/openllm/repo.py @@ -4,7 +4,15 @@ import datetime, subprocess, re, shutil, typing, os, pathlib import pyaml, questionary, typer from openllm.analytic import OpenLLMTyper -from openllm.common import INTERACTIVE, REPO_DIR, VERBOSE_LEVEL, RepoInfo, load_config, output, save_config +from openllm.common import ( + INTERACTIVE, + REPO_DIR, + VERBOSE_LEVEL, + RepoInfo, + load_config, + output, + save_config, +) UPDATE_INTERVAL = datetime.timedelta(days=3) TEST_REPO = os.getenv('OPENLLM_TEST_REPO', None) # for testing @@ -15,223 +23,248 @@ app = OpenLLMTyper(help='manage repos') @app.command(name='list', help='list available repo') def cmd_list(verbose: bool = False) -> None: - if verbose: - VERBOSE_LEVEL.set(20) - pyaml.pprint(list_repo(), sort_dicts=False, sort_keys=False) + if verbose: + VERBOSE_LEVEL.set(20) + pyaml.pprint(list_repo(), sort_dicts=False, sort_keys=False) @app.command(name='remove', help='remove given repo') def cmd_remove(name: str) -> None: - if TEST_REPO: - return - config = load_config() - if name not in config.repos: - output(f'Repo {name} does not exist', style='red') - return + if TEST_REPO: + return + config = load_config() + if name not in config.repos: + output(f'Repo {name} does not exist', style='red') + return - del config.repos[name] - save_config(config) - output(f'Repo {name} removed', style='green') + del config.repos[name] + save_config(config) + output(f'Repo {name} removed', style='green') @app.command(name='update', help='update default repo') def cmd_update() -> None: - if TEST_REPO: - return - repos_in_use = set() - for repo in list_repo(): - repos_in_use.add((repo.server, repo.owner, repo.repo, repo.branch)) - if repo.path.exists(): - shutil.rmtree(repo.path, ignore_errors=True) - repo.path.parent.mkdir(parents=True, exist_ok=True) - try: - _clone_repo(repo) - output('') - output(f'Repo `{repo.name}` updated', style='green') - except Exception as e: - shutil.rmtree(repo.path, ignore_errors=True) - output(f'Failed to clone repo {repo.name}', style='red') - output(e) - for c in REPO_DIR.glob('*/*/*/*'): - repo_spec = tuple(c.parts[-4:]) - if repo_spec not in repos_in_use: - shutil.rmtree(c, ignore_errors=True) - output(f'Removed unused repo cache {c}') - with open(REPO_DIR / 'last_update', 'w') as f: - f.write(datetime.datetime.now().isoformat()) - for repo in list_repo(): - _complete_alias(repo.name) + if TEST_REPO: + return + + repos_in_use = set() + for repo in list_repo(): + # Show simplified output if not in verbose mode + if VERBOSE_LEVEL.get() <= 0: + output(f'updating repo {repo.name}', style='green') + + repos_in_use.add((repo.server, repo.owner, repo.repo, repo.branch)) + if repo.path.exists(): + shutil.rmtree(repo.path, ignore_errors=True) + repo.path.parent.mkdir(parents=True, exist_ok=True) + try: + _clone_repo(repo) + if VERBOSE_LEVEL.get() > 0: + output('') + output(f'Repo `{repo.name}` updated', style='green') + except Exception as e: + shutil.rmtree(repo.path, ignore_errors=True) + if VERBOSE_LEVEL.get() > 0: + output(f'Failed to clone repo {repo.name}', style='red') + output(e) + for c in REPO_DIR.glob('*/*/*/*'): + repo_spec = tuple(c.parts[-4:]) + if repo_spec not in repos_in_use: + shutil.rmtree(c, ignore_errors=True) + if VERBOSE_LEVEL.get() > 0: + output(f'Removed unused repo cache {c}') + with open(REPO_DIR / 'last_update', 'w') as f: + f.write(datetime.datetime.now().isoformat()) + for repo in list_repo(): + _complete_alias(repo.name) @app.command(name='add', help='add new repo') def cmd_add(name: str, repo: str) -> None: - if TEST_REPO: - return - name = name.lower() - if not name.isidentifier(): - output(f'Invalid repo name: {name}, should only contain letters, numbers and underscores', style='red') - return + if TEST_REPO: + return + name = name.lower() + if not name.isidentifier(): + output( + f'Invalid repo name: {name}, should only contain letters, numbers and underscores', + style='red', + ) + return - try: - parse_repo_url(repo) - except ValueError: - output(f'Invalid repo url: {repo}', style='red') - return + try: + parse_repo_url(repo) + except ValueError: + output(f'Invalid repo url: {repo}', style='red') + return - config = load_config() - if name in config.repos: - override = questionary.confirm(f'Repo {name} already exists({config.repos[name]}), override?').ask() - if not override: - return + config = load_config() + if name in config.repos: + override = questionary.confirm( + f'Repo {name} already exists({config.repos[name]}), override?' + ).ask() + if not override: + return - config.repos[name] = repo - save_config(config) - output(f'Repo {name} added', style='green') + config.repos[name] = repo + save_config(config) + output(f'Repo {name} added', style='green') @app.command(name='default', help='get default repo path') def default() -> typing.Optional[pathlib.Path]: - if TEST_REPO: - return None - output((info := parse_repo_url(load_config().repos['default'], 'default')).path) - return info.path + if TEST_REPO: + return None + output((info := parse_repo_url(load_config().repos['default'], 'default')).path) + return info.path def list_repo(repo_name: typing.Optional[str] = None) -> typing.List[RepoInfo]: - if TEST_REPO: - return [ - RepoInfo( - name='default', - url='', - server='test', - owner='test', - repo='test', - branch='main', - path=pathlib.Path(TEST_REPO), - ) - ] - config = load_config() - repos = [] - for _repo_name, repo_url in config.repos.items(): - if repo_name is not None and _repo_name != repo_name: - continue - repo = parse_repo_url(repo_url, _repo_name) - repos.append(repo) - return repos + if TEST_REPO: + return [ + RepoInfo( + name='default', + url='', + server='test', + owner='test', + repo='test', + branch='main', + path=pathlib.Path(TEST_REPO), + ) + ] + config = load_config() + repos = [] + for _repo_name, repo_url in config.repos.items(): + if repo_name is not None and _repo_name != repo_name: + continue + repo = parse_repo_url(repo_url, _repo_name) + repos.append(repo) + return repos def _complete_alias(repo_name: str) -> None: - from openllm.model import list_bento + from openllm.model import list_bento - for bento in list_bento(repo_name=repo_name): - alias = bento.labels.get('aliases', '').strip() - if alias: - for a in alias.split(','): - with open(bento.path.parent / a, 'w') as f: - f.write(bento.version) + for bento in list_bento(repo_name=repo_name): + alias = bento.labels.get('aliases', '').strip() + if alias: + for a in alias.split(','): + with open(bento.path.parent / a, 'w') as f: + f.write(bento.version) def _clone_repo(repo: RepoInfo) -> None: - try: - subprocess.run(['git', 'clone', '--depth=1', '-b', repo.branch, repo.url, str(repo.path)], check=True) - except (subprocess.CalledProcessError, FileNotFoundError): - import dulwich - import dulwich.porcelain + try: + # Suppress output if verbosity level is low + if VERBOSE_LEVEL.get() <= 0: + subprocess.run( + ['git', 'clone', '--depth=1', '-b', repo.branch, repo.url, str(repo.path)], + check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + else: + subprocess.run( + ['git', 'clone', '--depth=1', '-b', repo.branch, repo.url, str(repo.path)], check=True + ) + except (subprocess.CalledProcessError, FileNotFoundError): + import dulwich + import dulwich.porcelain - dulwich.porcelain.clone(repo.url, str(repo.path), checkout=True, depth=1, branch=repo.branch) + # Dulwich doesn't have easy output suppression, but we rarely get here + dulwich.porcelain.clone(repo.url, str(repo.path), checkout=True, depth=1, branch=repo.branch) def ensure_repo_updated() -> None: - if TEST_REPO: - return - last_update_file = REPO_DIR / 'last_update' - if not last_update_file.exists(): - if INTERACTIVE.get(): - choice = questionary.confirm( - 'The repo cache is never updated, do you want to update it to fetch the latest model list?' - ).ask() - if choice: - cmd_update() - return - else: - output( - 'The repo cache is never updated, please run `openllm repo update` to fetch the latest model list', - style='red', - ) - raise typer.Exit(1) - last_update = datetime.datetime.fromisoformat(last_update_file.read_text().strip()) - if datetime.datetime.now() - last_update > UPDATE_INTERVAL: - if INTERACTIVE.get(): - choice = questionary.confirm( - 'The repo cache is outdated, do you want to update it to fetch the latest model list?' - ).ask() - if choice: - cmd_update() - else: - output( - 'The repo cache is outdated, please run `openllm repo update` to fetch the latest model list', - style='yellow', - ) + if TEST_REPO: + return + last_update_file = REPO_DIR / 'last_update' + if not last_update_file.exists(): + if INTERACTIVE.get(): + choice = questionary.confirm( + 'The repo cache is never updated, do you want to update it to fetch the latest model list?' + ).ask() + if choice: + cmd_update() + return + else: + output( + 'The repo cache is never updated, please run `openllm repo update` to fetch the latest model list', + style='red', + ) + raise typer.Exit(1) + last_update = datetime.datetime.fromisoformat(last_update_file.read_text().strip()) + if datetime.datetime.now() - last_update > UPDATE_INTERVAL: + if INTERACTIVE.get(): + choice = questionary.confirm( + 'The repo cache is outdated, do you want to update it to fetch the latest model list?' + ).ask() + if choice: + cmd_update() + else: + output( + 'The repo cache is outdated, please run `openllm repo update` to fetch the latest model list', + style='yellow', + ) GIT_HTTP_RE = re.compile( - r'(?Pgit|ssh|http|https):\/\/(?P[\.\w\d\-]+)\/(?P[\w\d\-]+)\/(?P[\w\d\-\_\.]+)(@(?P.+))?(\/)?$' + r'(?Pgit|ssh|http|https):\/\/(?P[\.\w\d\-]+)\/(?P[\w\d\-]+)\/(?P[\w\d\-\_\.]+)(@(?P.+))?(\/)?$' ) GIT_SSH_RE = re.compile( - r'git@(?P[\.\w\d-]+):(?P[\w\d\-]+)\/(?P[\w\d\-\_\.]+)(@(?P.+))?(\/)?$' + r'git@(?P[\.\w\d-]+):(?P[\w\d\-]+)\/(?P[\w\d\-\_\.]+)(@(?P.+))?(\/)?$' ) def parse_repo_url(repo_url: str, repo_name: typing.Optional[str] = None) -> RepoInfo: - """ - parse the git repo url to server, owner, repo name, branch - >>> parse_repo_url('https://github.com/bentoml/bentovllm@main') - ('github.com', 'bentoml', 'bentovllm', 'main') + """ + parse the git repo url to server, owner, repo name, branch + >>> parse_repo_url('https://github.com/bentoml/bentovllm@main') + ('github.com', 'bentoml', 'bentovllm', 'main') - >>> parse_repo_url('https://github.com/bentoml/bentovllm.git@main') - ('github.com', 'bentoml', 'bentovllm', 'main') + >>> parse_repo_url('https://github.com/bentoml/bentovllm.git@main') + ('github.com', 'bentoml', 'bentovllm', 'main') - >>> parse_repo_url('https://github.com/bentoml/bentovllm') - ('github.com', 'bentoml', 'bentovllm', 'main') + >>> parse_repo_url('https://github.com/bentoml/bentovllm') + ('github.com', 'bentoml', 'bentovllm', 'main') - >>> parse_repo_url('git@github.com:bentoml/openllm-models.git') - ('github.com', 'bentoml', 'openllm-models', 'main') - """ - match = GIT_HTTP_RE.match(repo_url) - if match: - schema = match.group('schema') - else: - match = GIT_SSH_RE.match(repo_url) - if not match: - raise ValueError(f'Invalid git repo url: {repo_url}') - schema = None + >>> parse_repo_url('git@github.com:bentoml/openllm-models.git') + ('github.com', 'bentoml', 'openllm-models', 'main') + """ + match = GIT_HTTP_RE.match(repo_url) + if match: + schema = match.group('schema') + else: + match = GIT_SSH_RE.match(repo_url) + if not match: + raise ValueError(f'Invalid git repo url: {repo_url}') + schema = None - if match.group('branch') is not None: - repo_url = repo_url[: match.start('branch') - 1] + if match.group('branch') is not None: + repo_url = repo_url[: match.start('branch') - 1] - server = match.group('server') - owner = match.group('owner') - repo = match.group('repo') - if repo.endswith('.git'): - repo = repo[:-4] - branch = match.group('branch') or 'main' + server = match.group('server') + owner = match.group('owner') + repo = match.group('repo') + if repo.endswith('.git'): + repo = repo[:-4] + branch = match.group('branch') or 'main' - if schema is not None: - repo_url = f'{schema}://{server}/{owner}/{repo}' - else: - repo_url = f'git@{server}:{owner}/{repo}' + if schema is not None: + repo_url = f'{schema}://{server}/{owner}/{repo}' + else: + repo_url = f'git@{server}:{owner}/{repo}' - path = REPO_DIR / server / owner / repo / branch - return RepoInfo( - name=repo if repo_name is None else repo_name, - url=repo_url, - server=server, - owner=owner, - repo=repo, - branch=branch, - path=path, - ) + path = REPO_DIR / server / owner / repo / branch + return RepoInfo( + name=repo if repo_name is None else repo_name, + url=repo_url, + server=server, + owner=owner, + repo=repo, + branch=branch, + path=path, + ) if __name__ == '__main__': - app() + app() diff --git a/src/openllm/venv.py b/src/openllm/venv.py index 314166ed..7c7cfda3 100644 --- a/src/openllm/venv.py +++ b/src/openllm/venv.py @@ -3,92 +3,100 @@ from __future__ import annotations import functools, os, pathlib, shutil import typer, yaml -from openllm.common import VENV_DIR, VERBOSE_LEVEL, BentoInfo, EnvVars, VenvSpec, output, run_command +from openllm.common import ( + VENV_DIR, + VERBOSE_LEVEL, + BentoInfo, + EnvVars, + VenvSpec, + output, + run_command, +) @functools.lru_cache def _resolve_bento_venv_spec(bento: BentoInfo, runtime_envs: EnvVars | None = None) -> VenvSpec: - lock_file = bento.path / 'env' / 'python' / 'requirements.lock.txt' - if not lock_file.exists(): - lock_file = bento.path / 'env' / 'python' / 'requirements.txt' + lock_file = bento.path / 'env' / 'python' / 'requirements.lock.txt' + if not lock_file.exists(): + lock_file = bento.path / 'env' / 'python' / 'requirements.txt' - reqs = lock_file.read_text().strip() - bentofile = bento.path / 'bento.yaml' - data = yaml.safe_load(bentofile.read_text()) - bento_env_list = data.get('envs', []) - python_version = data.get('image', {})['python_version'] - bento_envs = {e['name']: e.get('value') for e in bento_env_list} - envs = {k: runtime_envs.get(k, v) for k, v in bento_envs.items()} if runtime_envs else {} + reqs = lock_file.read_text().strip() + bentofile = bento.path / 'bento.yaml' + data = yaml.safe_load(bentofile.read_text()) + bento_env_list = data.get('envs', []) + python_version = data.get('image', {})['python_version'] + bento_envs = {e['name']: e.get('value') for e in bento_env_list} + envs = {k: runtime_envs.get(k, v) for k, v in bento_envs.items()} if runtime_envs else {} - return VenvSpec( - python_version=python_version, - requirements_txt=reqs, - name_prefix=f'{bento.tag.replace(":", "_")}-1-', - envs=EnvVars(envs), - ) + return VenvSpec( + python_version=python_version, + requirements_txt=reqs, + name_prefix=f'{bento.tag.replace(":", "_")}-1-', + envs=EnvVars(envs), + ) def _ensure_venv(venv_spec: VenvSpec) -> pathlib.Path: - venv = VENV_DIR / str(hash(venv_spec)) - if venv.exists() and not (venv / 'DONE').exists(): - shutil.rmtree(venv, ignore_errors=True) - if not venv.exists(): - output(f'Installing model dependencies({venv})...', style='green') + venv = VENV_DIR / str(hash(venv_spec)) + if venv.exists() and not (venv / 'DONE').exists(): + shutil.rmtree(venv, ignore_errors=True) + if not venv.exists(): + output(f'Installing model dependencies({venv})...', style='green') - venv_py = venv / 'Scripts' / 'python.exe' if os.name == 'nt' else venv / 'bin' / 'python' - try: - run_command( - ['python', '-m', 'uv', 'venv', venv.__fspath__(), '-p', venv_spec.python_version], - silent=VERBOSE_LEVEL.get() < 10, - ) - run_command( - ['python', '-m', 'uv', 'pip', 'install', '-p', str(venv_py), 'bentoml'], - silent=VERBOSE_LEVEL.get() < 10, - env=venv_spec.envs, - ) - with open(venv / 'requirements.txt', 'w') as f: - f.write(venv_spec.normalized_requirements_txt) - run_command( - [ - 'python', - '-m', - 'uv', - 'pip', - 'install', - '-p', - str(venv_py), - '-r', - (venv / 'requirements.txt').__fspath__(), - ], - silent=VERBOSE_LEVEL.get() < 10, - env=venv_spec.envs, - ) - with open(venv / 'DONE', 'w') as f: - f.write('DONE') - except Exception as e: - shutil.rmtree(venv, ignore_errors=True) - if VERBOSE_LEVEL.get() >= 10: - output(str(e), style='red') - output(f'Failed to install dependencies to {venv}. Cleaned up.', style='red') - raise typer.Exit(1) - output(f'Successfully installed dependencies to {venv}.', style='green') - return venv - else: - return venv - - -def ensure_venv(bento: BentoInfo, runtime_envs: EnvVars | None = None) -> pathlib.Path: - venv_spec = _resolve_bento_venv_spec(bento, runtime_envs=EnvVars(runtime_envs)) - venv = _ensure_venv(venv_spec) - assert venv is not None + venv_py = venv / 'Scripts' / 'python.exe' if os.name == 'nt' else venv / 'bin' / 'python' + try: + run_command( + ['python', '-m', 'uv', 'venv', venv.__fspath__(), '-p', venv_spec.python_version], + silent=VERBOSE_LEVEL.get() < 10, + ) + run_command( + ['python', '-m', 'uv', 'pip', 'install', '-p', str(venv_py), 'bentoml'], + silent=VERBOSE_LEVEL.get() < 10, + env=venv_spec.envs, + ) + with open(venv / 'requirements.txt', 'w') as f: + f.write(venv_spec.normalized_requirements_txt) + run_command( + [ + 'python', + '-m', + 'uv', + 'pip', + 'install', + '-p', + str(venv_py), + '-r', + (venv / 'requirements.txt').__fspath__(), + ], + silent=VERBOSE_LEVEL.get() < 10, + env=venv_spec.envs, + ) + with open(venv / 'DONE', 'w') as f: + f.write('DONE') + except Exception as e: + shutil.rmtree(venv, ignore_errors=True) + if VERBOSE_LEVEL.get() >= 10: + output(str(e), style='red') + output(f'Failed to install dependencies to {venv}. Cleaned up.', style='red') + raise typer.Exit(1) + output(f'Successfully installed dependencies to {venv}.', style='green') + return venv + else: return venv +def ensure_venv(bento: BentoInfo, runtime_envs: EnvVars | None = None) -> pathlib.Path: + venv_spec = _resolve_bento_venv_spec(bento, runtime_envs=EnvVars(runtime_envs)) + venv = _ensure_venv(venv_spec) + assert venv is not None + return venv + + def check_venv(bento: BentoInfo) -> bool: - venv_spec = _resolve_bento_venv_spec(bento) - venv = VENV_DIR / str(hash(venv_spec)) - if not venv.exists(): - return False - if venv.exists() and not (venv / 'DONE').exists(): - return False - return True + venv_spec = _resolve_bento_venv_spec(bento) + venv = VENV_DIR / str(hash(venv_spec)) + if not venv.exists(): + return False + if venv.exists() and not (venv / 'DONE').exists(): + return False + return True diff --git a/tests/test_cli_flow.py b/tests/test_cli_flow.py new file mode 100644 index 00000000..58f6ac1e --- /dev/null +++ b/tests/test_cli_flow.py @@ -0,0 +1,75 @@ +from __future__ import annotations + +import sys, typing + +import pytest, pexpect + + +@pytest.fixture +def pexpect_process() -> typing.Generator[pexpect.spawn[typing.Any], None, None]: + child = pexpect.spawn( + f'{sys.executable} -m openllm hello', encoding='utf-8', timeout=20, echo=False + ) + try: + yield child + finally: + try: + child.sendcontrol('c') + child.close(force=True) + except: + pass + + +def safe_expect( + child: pexpect.spawn, pattern: str, timeout: int = 10, debug_msg: str = 'Expecting pattern' +) -> int: + try: + print(f"\n{debug_msg}: '{pattern}'") + index = child.expect(pattern, timeout=timeout) + print(f'Found match at index {index}') + print(f'Before match: {child.before}') + print(f'After match: {child.after}') + return index + except pexpect.TIMEOUT: + print(f'TIMEOUT while {debug_msg}') + print(f'Last output: {child.before}') + raise + except pexpect.EOF: + print(f'EOF while {debug_msg}') + print(f'Last output: {child.before}') + raise + + +def test_hello_flow_to_deploy(pexpect_process: pexpect.spawn) -> None: + child = pexpect_process + + try: + safe_expect(child, 'Select a model', timeout=10, debug_msg='Waiting for model selection prompt') + + child.sendline('\x1b[B') + child.sendline('\r') + + safe_expect( + child, 'Select a version', timeout=10, debug_msg='Waiting for version selection prompt' + ) + + child.sendline('\r') + + safe_expect( + child, 'Select an action', timeout=10, debug_msg='Waiting for action selection prompt' + ) + + child.sendline('\x1b[B') + child.sendline('\x1b[B') + + child.sendline('\r') + + safe_expect( + child, 'Select an instance type', timeout=10, debug_msg='Waiting for instance type prompt' + ) + + child.sendline('\r') + + child.expect('Error: .*HF_TOKEN', timeout=10) + except Exception as e: + pytest.fail(f'Test failed with exception: {e}') diff --git a/uv.lock b/uv.lock index 5b7813e6..313bf388 100644 --- a/uv.lock +++ b/uv.lock @@ -214,7 +214,7 @@ wheels = [ [[package]] name = "bentoml" -version = "1.4.5" +version = "1.4.8" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "a2wsgi" }, @@ -261,9 +261,9 @@ dependencies = [ { name = "uvicorn" }, { name = "watchfiles" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/dc/df/6e5a260aaf2ee5da3d797374f81bba087fdcb8b521c7cb7441d390e266b6/bentoml-1.4.5.tar.gz", hash = "sha256:372d6d2f93dbcef38eefd568d0a9c99bfd8b5fbb7202983d948de03efa5cc961", size = 967625 } +sdist = { url = "https://files.pythonhosted.org/packages/87/a4/7ba2d3cfea05e4d9505b4aedfec17477771bc5dc98ed4d818f83cdc23093/bentoml-1.4.8.tar.gz", hash = "sha256:fb7e1d21a415645afdeb928f45a1950b7409960b5d9360189b777640c96f7103", size = 970299 } wheels = [ - { url = "https://files.pythonhosted.org/packages/23/26/64bfa28ce0b9e29e825a656e4785eb39b5ab4ca7abb6dbe1e25d856ac716/bentoml-1.4.5-py3-none-any.whl", hash = "sha256:31ecdf26e4addcf62c03a356b629925f5c3aca304d73a5cdf60c1bcbf5e19eb2", size = 1147638 }, + { url = "https://files.pythonhosted.org/packages/cb/3e/c4adc9c48ceab6bfd8735f125f1b2ec58c6a636b4f2c092349c02e1beb71/bentoml-1.4.8-py3-none-any.whl", hash = "sha256:b33765e15101348fa6ca1fe68f07b3309ad4ea5c8823e56c2358a1b09b29edbb", size = 1150381 }, ] [[package]] @@ -681,6 +681,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/95/04/ff642e65ad6b90db43e668d70ffb6736436c7ce41fcc549f4e9472234127/h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761", size = 58259 }, ] +[[package]] +name = "hf-xet" +version = "1.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/95/68/4c363b2e62cb3dbe12d2257ba9b22f101384692d4b9727c5f72433472cff/hf_xet-1.0.3.tar.gz", hash = "sha256:a6d16861a06dd4b8f7229c16b392c5fb8b9588ced89a6ee9bc3e66227f794353", size = 257227 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/26/12/ebbba4b64cb9c908bd5dee355da27f3cc5ad4f29b4b2835041d363388363/hf_xet-1.0.3-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:0705e5db0da5794ab048a8662a7b3aba220f963270b26abc92e8d05abca22451", size = 4979740 }, + { url = "https://files.pythonhosted.org/packages/58/8f/34eadc408b834bcb55886b242a9783da3f63508c4bcbfda7a4f21e61f3d1/hf_xet-1.0.3-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:09a9565ca84049d48c99c83a82d08fbc21d63c04811fd2f7dd088292c1185bc5", size = 4806773 }, + { url = "https://files.pythonhosted.org/packages/a1/de/00b2e2568a39c01b0e013db3300f4d5841f2e597d7b0518923c7881bd166/hf_xet-1.0.3-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70e18534d46ab92bbc3125addaebc145f9b27e06eecd67b40c4342f4b92b677f", size = 53812632 }, + { url = "https://files.pythonhosted.org/packages/e2/d8/4ff790370a6795418196553c33e7bcceaa73a7d587e21e4ccb7661b54a2a/hf_xet-1.0.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:da28fd32213ad5b8f60771aba44ac032ba19d752928cfd95914f09146b3f51ec", size = 52277180 }, + { url = "https://files.pythonhosted.org/packages/83/dd/7b432918a3e9e09794674b81e852acc6e14177c0a4466ac0566b7e7f47a4/hf_xet-1.0.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1b71118b8f7e9edf1ae56282388794f351163c7de5c22ea3737dffa9313f500e", size = 53309852 }, + { url = "https://files.pythonhosted.org/packages/4d/a2/d7a5f452a3a8faaa82aeb3aceddab2e103c1b7028a00bbc4caebca5d79fe/hf_xet-1.0.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5927d1986f87b7b80616eb6353a1402be1d72c46b6b0709b01ffc7623a159563", size = 53739471 }, + { url = "https://files.pythonhosted.org/packages/82/81/966f800933043c0be989306f5224ef058543f7848f1e78d7ef3305bd069a/hf_xet-1.0.3-cp37-abi3-win_amd64.whl", hash = "sha256:014b5a40e62ad334f21513e5ba39b419117396031e9264dfc15dd598a1595029", size = 4123538 }, +] + [[package]] name = "httpcore" version = "1.0.7" @@ -763,6 +778,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/d9/a1e041c5e7caa9a05c925f4bdbdfb7f006d1f74996af53467bc394c97be7/importlib_metadata-8.5.0-py3-none-any.whl", hash = "sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b", size = 26514 }, ] +[[package]] +name = "iniconfig" +version = "2.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050 }, +] + [[package]] name = "jinja2" version = "3.1.5" @@ -1167,7 +1191,7 @@ wheels = [ [[package]] name = "openai" -version = "1.66.3" +version = "1.70.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -1179,9 +1203,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a3/77/5172104ca1df35ed2ed8fb26dbc787f721c39498fc51d666c4db07756a0c/openai-1.66.3.tar.gz", hash = "sha256:8dde3aebe2d081258d4159c4cb27bdc13b5bb3f7ea2201d9bd940b9a89faf0c9", size = 397244 } +sdist = { url = "https://files.pythonhosted.org/packages/87/f5/ae0f3cd226c2993b4ac1cc4b5f6ca099764689f403c14922c9356accec66/openai-1.70.0.tar.gz", hash = "sha256:e52a8d54c3efeb08cf58539b5b21a5abef25368b5432965e4de88cdf4e091b2b", size = 409640 } wheels = [ - { url = "https://files.pythonhosted.org/packages/78/5a/e20182f7b6171642d759c548daa0ba20a1d3ac10d2bd0a13fd75704a9ac3/openai-1.66.3-py3-none-any.whl", hash = "sha256:a427c920f727711877ab17c11b95f1230b27767ba7a01e5b66102945141ceca9", size = 567400 }, + { url = "https://files.pythonhosted.org/packages/e2/39/c4b38317d2c702c4bc763957735aaeaf30dfc43b5b824121c49a4ba7ba0f/openai-1.70.0-py3-none-any.whl", hash = "sha256:f6438d053fd8b2e05fd6bef70871e832d9bbdf55e119d0ac5b92726f1ae6f614", size = 599070 }, ] [[package]] @@ -1190,6 +1214,7 @@ source = { editable = "." } dependencies = [ { name = "bentoml" }, { name = "dulwich" }, + { name = "hf-xet" }, { name = "huggingface-hub" }, { name = "nvidia-ml-py" }, { name = "openai" }, @@ -1204,13 +1229,20 @@ dependencies = [ { name = "uv" }, ] +[package.dev-dependencies] +tests = [ + { name = "pexpect" }, + { name = "pytest" }, +] + [package.metadata] requires-dist = [ - { name = "bentoml", specifier = "==1.4.5" }, + { name = "bentoml", specifier = "==1.4.8" }, { name = "dulwich" }, + { name = "hf-xet" }, { name = "huggingface-hub" }, { name = "nvidia-ml-py" }, - { name = "openai", specifier = "==1.66.3" }, + { name = "openai", specifier = "==1.70.0" }, { name = "pathlib" }, { name = "pip-requirements-parser" }, { name = "psutil" }, @@ -1222,6 +1254,12 @@ requires-dist = [ { name = "uv" }, ] +[package.metadata.requires-dev] +tests = [ + { name = "pexpect", specifier = ">=4.9.0" }, + { name = "pytest", specifier = ">=8.3.5" }, +] + [[package]] name = "opentelemetry-api" version = "1.30.0" @@ -1345,6 +1383,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191 }, ] +[[package]] +name = "pexpect" +version = "4.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "ptyprocess" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/92/cc564bf6381ff43ce1f4d06852fc19a2f11d180f23dc32d9588bee2f149d/pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f", size = 166450 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772 }, +] + [[package]] name = "pip-requirements-parser" version = "32.0.1" @@ -1358,6 +1408,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/d0/d04f1d1e064ac901439699ee097f58688caadea42498ec9c4b4ad2ef84ab/pip_requirements_parser-32.0.1-py3-none-any.whl", hash = "sha256:4659bc2a667783e7a15d190f6fccf8b2486685b6dba4c19c3876314769c57526", size = 35648 }, ] +[[package]] +name = "pluggy" +version = "1.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/96/2d/02d4312c973c6050a18b314a5ad0b3210edb65a906f868e31c111dede4a6/pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", size = 67955 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 }, +] + [[package]] name = "prometheus-client" version = "0.21.1" @@ -1483,6 +1542,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/50/1b/6921afe68c74868b4c9fa424dad3be35b095e16687989ebbb50ce4fceb7c/psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553", size = 244885 }, ] +[[package]] +name = "ptyprocess" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/20/e5/16ff212c1e452235a90aeb09066144d0c5a6a8c0834397e03f5224495c4e/ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220", size = 70762 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", size = 13993 }, +] + [[package]] name = "pyaml" version = "25.1.0" @@ -1633,6 +1701,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1c/a7/c8a2d361bf89c0d9577c934ebb7421b25dc84bf3a8e3ac0a40aed9acc547/pyparsing-3.2.1-py3-none-any.whl", hash = "sha256:506ff4f4386c4cec0590ec19e6302d3aedb992fdc02c761e90416f158dacf8e1", size = 107716 }, ] +[[package]] +name = "pytest" +version = "8.3.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ae/3c/c9d525a414d506893f0cd8a8d0de7706446213181570cdbd766691164e40/pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845", size = 1450891 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820", size = 343634 }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0"