diff --git a/src/openllm/__main__.py b/src/openllm/__main__.py index 778d11df..e66c3431 100644 --- a/src/openllm/__main__.py +++ b/src/openllm/__main__.py @@ -1,8 +1,8 @@ +import importlib.metadata import os +import platform import random import sys -import platform -import importlib.metadata from collections import defaultdict from typing import Annotated, Optional @@ -22,35 +22,42 @@ from openllm.model import ensure_bento, list_bento from openllm.repo import app as repo_app app = OpenLLMTyper( - help='`openllm hello` to get started. ' - 'OpenLLM is a CLI tool to manage and deploy open source LLMs and' - ' get an OpenAI API compatible chat server in seconds.' + help="`openllm hello` to get started. " + "OpenLLM is a CLI tool to manage and deploy open source LLMs and" + " get an OpenAI API compatible chat server in seconds." ) -app.add_typer(repo_app, name='repo') -app.add_typer(model_app, name='model') -app.add_typer(clean_app, name='clean') +app.add_typer(repo_app, name="repo") +app.add_typer(model_app, name="model") +app.add_typer(clean_app, name="clean") def _select_bento_name(models: list[BentoInfo], target: DeploymentTarget): from tabulate import tabulate options = [] - model_infos = [(model.repo.name, model.name, can_run(model, target)) for model in models] + model_infos = [ + (model.repo.name, model.name, can_run(model, target)) for model in models + ] model_name_groups = defaultdict(lambda: 0.0) for repo, name, score in model_infos: model_name_groups[(repo, name)] += score - table_data = [(name, repo, CHECKED if score > 0 else '') for (repo, name), score in model_name_groups.items()] + table_data = [ + (name, repo, CHECKED if score > 0 else "") + for (repo, name), score in model_name_groups.items() + ] if not table_data: - output('No model found', style='red') + output("No model found", style="red") raise typer.Exit(1) - table = tabulate(table_data, headers=['model', 'repo', 'locally runnable']).split('\n') - headers = f'{table[0]}\n {table[1]}' + table = tabulate(table_data, headers=["model", "repo", "locally runnable"]).split( + "\n" + ) + headers = f"{table[0]}\n {table[1]}" options.append(questionary.Separator(headers)) for table_data, table_line in zip(table_data, table[2:]): options.append(questionary.Choice(table_line, value=table_data[:2])) - selected = questionary.select('Select a model', options).ask() + selected = questionary.select("Select a model", options).ask() if selected is None: raise typer.Exit(1) return selected @@ -60,24 +67,26 @@ def _select_bento_version(models, target, bento_name, repo): from tabulate import tabulate model_infos = [ - [model, can_run(model, target)] for model in models if model.name == bento_name and model.repo.name == repo + [model, can_run(model, target)] + for model in models + if model.name == bento_name and model.repo.name == repo ] table_data = [ - [model.tag, CHECKED if score > 0 else ''] + [model.tag, CHECKED if score > 0 else ""] for model, score in model_infos if model.name == bento_name and model.repo.name == repo ] if not table_data: - output(f'No model found for {bento_name} in {repo}', style='red') + output(f"No model found for {bento_name} in {repo}", style="red") raise typer.Exit(1) - table = tabulate(table_data, headers=['version', 'locally runnable']).split('\n') + table = tabulate(table_data, headers=["version", "locally runnable"]).split("\n") options = [] - options.append(questionary.Separator(f'{table[0]}\n {table[1]}')) + options.append(questionary.Separator(f"{table[0]}\n {table[1]}")) for table_data, table_line in zip(model_infos, table[2:]): options.append(questionary.Choice(table_line, value=table_data)) - selected = questionary.select('Select a version', options).ask() + selected = questionary.select("Select a version", options).ask() if selected is None: raise typer.Exit(1) return selected @@ -89,7 +98,7 @@ def _select_target(bento, targets): options = [] targets.sort(key=lambda x: can_run(bento, x), reverse=True) if not targets: - output('No available instance type, check your bentocloud account', style='red') + output("No available instance type, check your bentocloud account", style="red") raise typer.Exit(1) table = tabulate( @@ -97,103 +106,122 @@ def _select_target(bento, targets): [ target.name, target.accelerators_repr, - f'${target.price}', - CHECKED if can_run(bento, target) else 'insufficient res.', + f"${target.price}", + CHECKED if can_run(bento, target) else "insufficient res.", ] for target in targets ], - headers=['instance type', 'accelerator', 'price/hr', 'deployable'], - ).split('\n') - options.append(questionary.Separator(f'{table[0]}\n {table[1]}')) + headers=["instance type", "accelerator", "price/hr", "deployable"], + ).split("\n") + options.append(questionary.Separator(f"{table[0]}\n {table[1]}")) for target, line in zip(targets, table[2:]): - options.append(questionary.Choice(f'{line}', value=target)) - selected = questionary.select('Select an instance type', options).ask() + options.append(questionary.Choice(f"{line}", value=target)) + selected = questionary.select("Select an instance type", options).ask() if selected is None: raise typer.Exit(1) return selected -def _select_action(bento, score): +def _select_action(bento: BentoInfo, score): if score > 0: options = [ - questionary.Separator('Available actions'), - questionary.Choice('0. Run the model in terminal', value='run', shortcut_key='0'), - questionary.Separator(f' $ openllm run {bento}'), - questionary.Separator(' '), - questionary.Choice('1. Serve the model locally and get a chat server', value='serve', shortcut_key='1'), - questionary.Separator(f' $ openllm serve {bento}'), - questionary.Separator(' '), + questionary.Separator("Available actions"), questionary.Choice( - '2. Deploy the model to bentocloud and get a scalable chat server', value='deploy', shortcut_key='2' + "0. Run the model in terminal", value="run", shortcut_key="0" ), - questionary.Separator(f' $ openllm deploy {bento}'), + questionary.Separator(f" $ openllm run {bento}"), + questionary.Separator(" "), + questionary.Choice( + "1. Serve the model locally and get a chat server", + value="serve", + shortcut_key="1", + ), + questionary.Separator(f" $ openllm serve {bento}"), + questionary.Separator(" "), + questionary.Choice( + "2. Deploy the model to bentocloud and get a scalable chat server", + value="deploy", + shortcut_key="2", + ), + questionary.Separator(f" $ openllm deploy {bento}"), ] else: options = [ - questionary.Separator('Available actions'), + questionary.Separator("Available actions"), questionary.Choice( - '0. Run the model in terminal', value='run', disabled='insufficient res.', shortcut_key='0' + "0. Run the model in terminal", + value="run", + disabled="insufficient res.", + shortcut_key="0", ), - questionary.Separator(f' $ openllm run {bento}'), - questionary.Separator(' '), + questionary.Separator(f" $ openllm run {bento}"), + questionary.Separator(" "), questionary.Choice( - '1. Serve the model locally and get a chat server', - value='serve', - disabled='insufficient res.', - shortcut_key='1', + "1. Serve the model locally and get a chat server", + value="serve", + disabled="insufficient res.", + shortcut_key="1", ), - questionary.Separator(f' $ openllm serve {bento}'), - questionary.Separator(' '), + questionary.Separator(f" $ openllm serve {bento}"), + questionary.Separator(" "), questionary.Choice( - '2. Deploy the model to bentocloud and get a scalable chat server', value='deploy', shortcut_key='2' + "2. Deploy the model to bentocloud and get a scalable chat server", + value="deploy", + shortcut_key="2", ), - questionary.Separator(f' $ openllm deploy {bento}'), + questionary.Separator(f" $ openllm deploy {bento}"), ] - action = questionary.select('Select an action', options).ask() + action = questionary.select("Select an action", options).ask() if action is None: raise typer.Exit(1) - if action == 'run': + if action == "run": try: - local_run(bento) + port = random.randint(30000, 40000) + local_run(bento, port=port) finally: - output('\nUse this command to run the action again:', style='green') - output(f' $ openllm run {bento}', style='orange') - elif action == 'serve': + output("\nUse this command to run the action again:", style="green") + output(f" $ openllm run {bento}", style="orange") + elif action == "serve": try: local_serve(bento) finally: - output('\nUse this command to run the action again:', style='green') - output(f' $ openllm serve {bento}', style='orange') - elif action == 'deploy': + output("\nUse this command to run the action again:", style="green") + output(f" $ openllm serve {bento}", style="orange") + elif action == "deploy": ensure_cloud_context() targets = get_cloud_machine_spec() target = _select_target(bento, targets) try: cloud_deploy(bento, target) finally: - output('\nUse this command to run the action again:', style='green') - output(f' $ openllm deploy {bento} --instance-type {target.name}', style='orange') + output("\nUse this command to run the action again:", style="green") + output( + f" $ openllm deploy {bento} --instance-type {target.name}", + style="orange", + ) -@app.command(help='get started interactively') +@app.command(help="get started interactively") def hello(): INTERACTIVE.set(True) - VERBOSE_LEVEL.set(20) + # VERBOSE_LEVEL.set(20) target = get_local_machine_spec() - output(f' Detected Platform: {target.platform}', style='green') + output(f" Detected Platform: {target.platform}", style="green") if target.accelerators: - output(' Detected Accelerators: ', style='green') + output(" Detected Accelerators: ", style="green") for a in target.accelerators: - output(f' - {a.model} {a.memory_size}GB', style='green') + output(f" - {a.model} {a.memory_size}GB", style="green") else: - output(' Detected Accelerators: None', style='yellow') + output(" Detected Accelerators: None", style="yellow") models = list_bento() if not models: - output('No model found, you probably need to update the model repo:', style='red') - output(' $ openllm repo update', style='orange') + output( + "No model found, you probably need to update the model repo:", style="red" + ) + output(" $ openllm repo update", style="orange") raise typer.Exit(1) bento_name, repo = _select_bento_name(models, target) @@ -201,9 +229,12 @@ def hello(): _select_action(bento, score) -@app.command(help='start an OpenAI API compatible chat server and chat in browser') +@app.command(help="start an OpenAI API compatible chat server and chat in browser") def serve( - model: Annotated[str, typer.Argument()] = '', repo: Optional[str] = None, port: int = 3000, verbose: bool = False + model: Annotated[str, typer.Argument()] = "", + repo: Optional[str] = None, + port: int = 3000, + verbose: bool = False, ): if verbose: VERBOSE_LEVEL.set(20) @@ -212,9 +243,9 @@ def serve( local_serve(bento, port=port) -@app.command(help='run the model and chat in terminal') +@app.command(help="run the model and chat in terminal") def run( - model: Annotated[str, typer.Argument()] = '', + model: Annotated[str, typer.Argument()] = "", repo: Optional[str] = None, port: Optional[int] = None, timeout: int = 600, @@ -229,9 +260,11 @@ def run( local_run(bento, port=port, timeout=timeout) -@app.command(help='deploy an production-ready OpenAI API compatible chat server to bentocloud ($100 free credit)') +@app.command( + help="deploy an production-ready OpenAI API compatible chat server to bentocloud ($100 free credit)" +) def deploy( - model: Annotated[str, typer.Argument()] = '', + model: Annotated[str, typer.Argument()] = "", instance_type: Optional[str] = None, repo: Optional[str] = None, verbose: bool = False, @@ -246,10 +279,10 @@ def deploy( targets = filter(lambda x: can_run(bento, x) > 0, targets) targets = sorted(targets, key=lambda x: can_run(bento, x), reverse=True) if not targets: - output('No available instance type, check your bentocloud account', style='red') + output("No available instance type, check your bentocloud account", style="red") raise typer.Exit(1) target = targets[0] - output(f'Recommended instance type: {target.name}', style='green') + output(f"Recommended instance type: {target.name}", style="green") cloud_deploy(bento, target) @@ -257,9 +290,12 @@ def deploy( def typer_callback( verbose: int = 0, do_not_track: bool = typer.Option( - False, '--do-not-track', help='Whether to disable usage tracking', envvar=DO_NOT_TRACK + False, + "--do-not-track", + help="Whether to disable usage tracking", + envvar=DO_NOT_TRACK, ), - version: bool = typer.Option(False, '--version', '-v', help='Show version'), + version: bool = typer.Option(False, "--version", "-v", help="Show version"), ): if verbose: VERBOSE_LEVEL.set(verbose) @@ -272,5 +308,5 @@ def typer_callback( os.environ[DO_NOT_TRACK] = str(True) -if __name__ == '__main__': +if __name__ == "__main__": app()