fix: openllm hello nits

2026-08-02 19:22:27 -04:00 · 2024-07-15 17:35:16 +08:00
parent 690d682fee
commit 8675a7bbf4
1 changed files with 116 additions and 80 deletions
--- a/src/openllm/main.py
+++ b/src/openllm/main.py
@@ -1,8 +1,8 @@
+import importlib.metadata
 import os
+import platform
 import random
 import sys
-import platform
-import importlib.metadata
 from collections import defaultdict
 from typing import Annotated, Optional

@@ -22,35 +22,42 @@ from openllm.model import ensure_bento, list_bento
 from openllm.repo import app as repo_app

 app = OpenLLMTyper(
-    help='`openllm hello` to get started. '
-    'OpenLLM is a CLI tool to manage and deploy open source LLMs and'
-    ' get an OpenAI API compatible chat server in seconds.'
+    help="`openllm hello` to get started. "
+    "OpenLLM is a CLI tool to manage and deploy open source LLMs and"
+    " get an OpenAI API compatible chat server in seconds."
 )

-app.add_typer(repo_app, name='repo')
-app.add_typer(model_app, name='model')
-app.add_typer(clean_app, name='clean')
+app.add_typer(repo_app, name="repo")
+app.add_typer(model_app, name="model")
+app.add_typer(clean_app, name="clean")


 def _select_bento_name(models: list[BentoInfo], target: DeploymentTarget):
    from tabulate import tabulate

    options = []
-    model_infos = [(model.repo.name, model.name, can_run(model, target)) for model in models]
+    model_infos = [
+        (model.repo.name, model.name, can_run(model, target)) for model in models
+    ]
    model_name_groups = defaultdict(lambda: 0.0)
    for repo, name, score in model_infos:
        model_name_groups[(repo, name)] += score
-    table_data = [(name, repo, CHECKED if score > 0 else '') for (repo, name), score in model_name_groups.items()]
+    table_data = [
+        (name, repo, CHECKED if score > 0 else "")
+        for (repo, name), score in model_name_groups.items()
+    ]
    if not table_data:
-        output('No model found', style='red')
+        output("No model found", style="red")
        raise typer.Exit(1)
-    table = tabulate(table_data, headers=['model', 'repo', 'locally runnable']).split('\n')
-    headers = f'{table[0]}\n   {table[1]}'
+    table = tabulate(table_data, headers=["model", "repo", "locally runnable"]).split(
+        "\n"
+    )
+    headers = f"{table[0]}\n   {table[1]}"

    options.append(questionary.Separator(headers))
    for table_data, table_line in zip(table_data, table[2:]):
        options.append(questionary.Choice(table_line, value=table_data[:2]))
-    selected = questionary.select('Select a model', options).ask()
+    selected = questionary.select("Select a model", options).ask()
    if selected is None:
        raise typer.Exit(1)
    return selected
@@ -60,24 +67,26 @@ def _select_bento_version(models, target, bento_name, repo):
    from tabulate import tabulate

    model_infos = [
-        [model, can_run(model, target)] for model in models if model.name == bento_name and model.repo.name == repo
+        [model, can_run(model, target)]
+        for model in models
+        if model.name == bento_name and model.repo.name == repo
    ]

    table_data = [
-        [model.tag, CHECKED if score > 0 else '']
+        [model.tag, CHECKED if score > 0 else ""]
        for model, score in model_infos
        if model.name == bento_name and model.repo.name == repo
    ]
    if not table_data:
-        output(f'No model found for {bento_name} in {repo}', style='red')
+        output(f"No model found for {bento_name} in {repo}", style="red")
        raise typer.Exit(1)
-    table = tabulate(table_data, headers=['version', 'locally runnable']).split('\n')
+    table = tabulate(table_data, headers=["version", "locally runnable"]).split("\n")

    options = []
-    options.append(questionary.Separator(f'{table[0]}\n   {table[1]}'))
+    options.append(questionary.Separator(f"{table[0]}\n   {table[1]}"))
    for table_data, table_line in zip(model_infos, table[2:]):
        options.append(questionary.Choice(table_line, value=table_data))
-    selected = questionary.select('Select a version', options).ask()
+    selected = questionary.select("Select a version", options).ask()
    if selected is None:
        raise typer.Exit(1)
    return selected
@@ -89,7 +98,7 @@ def _select_target(bento, targets):
    options = []
    targets.sort(key=lambda x: can_run(bento, x), reverse=True)
    if not targets:
-        output('No available instance type, check your bentocloud account', style='red')
+        output("No available instance type, check your bentocloud account", style="red")
        raise typer.Exit(1)

    table = tabulate(
@@ -97,103 +106,122 @@ def _select_target(bento, targets):
            [
                target.name,
                target.accelerators_repr,
-                f'${target.price}',
-                CHECKED if can_run(bento, target) else 'insufficient res.',
+                f"${target.price}",
+                CHECKED if can_run(bento, target) else "insufficient res.",
            ]
            for target in targets
        ],
-        headers=['instance type', 'accelerator', 'price/hr', 'deployable'],
-    ).split('\n')
-    options.append(questionary.Separator(f'{table[0]}\n   {table[1]}'))
+        headers=["instance type", "accelerator", "price/hr", "deployable"],
+    ).split("\n")
+    options.append(questionary.Separator(f"{table[0]}\n   {table[1]}"))

    for target, line in zip(targets, table[2:]):
-        options.append(questionary.Choice(f'{line}', value=target))
-    selected = questionary.select('Select an instance type', options).ask()
+        options.append(questionary.Choice(f"{line}", value=target))
+    selected = questionary.select("Select an instance type", options).ask()
    if selected is None:
        raise typer.Exit(1)
    return selected


-def _select_action(bento, score):
+def _select_action(bento: BentoInfo, score):
    if score > 0:
        options = [
-            questionary.Separator('Available actions'),
-            questionary.Choice('0. Run the model in terminal', value='run', shortcut_key='0'),
-            questionary.Separator(f'  $ openllm run {bento}'),
-            questionary.Separator(' '),
-            questionary.Choice('1. Serve the model locally and get a chat server', value='serve', shortcut_key='1'),
-            questionary.Separator(f'  $ openllm serve {bento}'),
-            questionary.Separator(' '),
+            questionary.Separator("Available actions"),
            questionary.Choice(
-                '2. Deploy the model to bentocloud and get a scalable chat server', value='deploy', shortcut_key='2'
+                "0. Run the model in terminal", value="run", shortcut_key="0"
            ),
-            questionary.Separator(f'  $ openllm deploy {bento}'),
+            questionary.Separator(f"  $ openllm run {bento}"),
+            questionary.Separator(" "),
+            questionary.Choice(
+                "1. Serve the model locally and get a chat server",
+                value="serve",
+                shortcut_key="1",
+            ),
+            questionary.Separator(f"  $ openllm serve {bento}"),
+            questionary.Separator(" "),
+            questionary.Choice(
+                "2. Deploy the model to bentocloud and get a scalable chat server",
+                value="deploy",
+                shortcut_key="2",
+            ),
+            questionary.Separator(f"  $ openllm deploy {bento}"),
        ]
    else:
        options = [
-            questionary.Separator('Available actions'),
+            questionary.Separator("Available actions"),
            questionary.Choice(
-                '0. Run the model in terminal', value='run', disabled='insufficient res.', shortcut_key='0'
+                "0. Run the model in terminal",
+                value="run",
+                disabled="insufficient res.",
+                shortcut_key="0",
            ),
-            questionary.Separator(f'  $ openllm run {bento}'),
-            questionary.Separator(' '),
+            questionary.Separator(f"  $ openllm run {bento}"),
+            questionary.Separator(" "),
            questionary.Choice(
-                '1. Serve the model locally and get a chat server',
-                value='serve',
-                disabled='insufficient res.',
-                shortcut_key='1',
+                "1. Serve the model locally and get a chat server",
+                value="serve",
+                disabled="insufficient res.",
+                shortcut_key="1",
            ),
-            questionary.Separator(f'  $ openllm serve {bento}'),
-            questionary.Separator(' '),
+            questionary.Separator(f"  $ openllm serve {bento}"),
+            questionary.Separator(" "),
            questionary.Choice(
-                '2. Deploy the model to bentocloud and get a scalable chat server', value='deploy', shortcut_key='2'
+                "2. Deploy the model to bentocloud and get a scalable chat server",
+                value="deploy",
+                shortcut_key="2",
            ),
-            questionary.Separator(f'  $ openllm deploy {bento}'),
+            questionary.Separator(f"  $ openllm deploy {bento}"),
        ]
-    action = questionary.select('Select an action', options).ask()
+    action = questionary.select("Select an action", options).ask()
    if action is None:
        raise typer.Exit(1)
-    if action == 'run':
+    if action == "run":
        try:
-            local_run(bento)
+            port = random.randint(30000, 40000)
+            local_run(bento, port=port)
        finally:
-            output('\nUse this command to run the action again:', style='green')
-            output(f'  $ openllm run {bento}', style='orange')
-    elif action == 'serve':
+            output("\nUse this command to run the action again:", style="green")
+            output(f"  $ openllm run {bento}", style="orange")
+    elif action == "serve":
        try:
            local_serve(bento)
        finally:
-            output('\nUse this command to run the action again:', style='green')
-            output(f'  $ openllm serve {bento}', style='orange')
-    elif action == 'deploy':
+            output("\nUse this command to run the action again:", style="green")
+            output(f"  $ openllm serve {bento}", style="orange")
+    elif action == "deploy":
        ensure_cloud_context()
        targets = get_cloud_machine_spec()
        target = _select_target(bento, targets)
        try:
            cloud_deploy(bento, target)
        finally:
-            output('\nUse this command to run the action again:', style='green')
-            output(f'  $ openllm deploy {bento} --instance-type {target.name}', style='orange')
+            output("\nUse this command to run the action again:", style="green")
+            output(
+                f"  $ openllm deploy {bento} --instance-type {target.name}",
+                style="orange",
+            )


-@app.command(help='get started interactively')
+@app.command(help="get started interactively")
 def hello():
    INTERACTIVE.set(True)
-    VERBOSE_LEVEL.set(20)
+    # VERBOSE_LEVEL.set(20)

    target = get_local_machine_spec()
-    output(f'  Detected Platform: {target.platform}', style='green')
+    output(f"  Detected Platform: {target.platform}", style="green")
    if target.accelerators:
-        output('  Detected Accelerators: ', style='green')
+        output("  Detected Accelerators: ", style="green")
        for a in target.accelerators:
-            output(f'   - {a.model} {a.memory_size}GB', style='green')
+            output(f"   - {a.model} {a.memory_size}GB", style="green")
    else:
-        output('  Detected Accelerators: None', style='yellow')
+        output("  Detected Accelerators: None", style="yellow")

    models = list_bento()
    if not models:
-        output('No model found, you probably need to update the model repo:', style='red')
-        output('  $ openllm repo update', style='orange')
+        output(
+            "No model found, you probably need to update the model repo:", style="red"
+        )
+        output("  $ openllm repo update", style="orange")
        raise typer.Exit(1)

    bento_name, repo = _select_bento_name(models, target)
@@ -201,9 +229,12 @@ def hello():
    _select_action(bento, score)


-@app.command(help='start an OpenAI API compatible chat server and chat in browser')
+@app.command(help="start an OpenAI API compatible chat server and chat in browser")
 def serve(
-    model: Annotated[str, typer.Argument()] = '', repo: Optional[str] = None, port: int = 3000, verbose: bool = False
+    model: Annotated[str, typer.Argument()] = "",
+    repo: Optional[str] = None,
+    port: int = 3000,
+    verbose: bool = False,
 ):
    if verbose:
        VERBOSE_LEVEL.set(20)
@@ -212,9 +243,9 @@ def serve(
    local_serve(bento, port=port)


-@app.command(help='run the model and chat in terminal')
+@app.command(help="run the model and chat in terminal")
 def run(
-    model: Annotated[str, typer.Argument()] = '',
+    model: Annotated[str, typer.Argument()] = "",
    repo: Optional[str] = None,
    port: Optional[int] = None,
    timeout: int = 600,
@@ -229,9 +260,11 @@ def run(
    local_run(bento, port=port, timeout=timeout)


-@app.command(help='deploy an production-ready OpenAI API compatible chat server to bentocloud ($100 free credit)')
+@app.command(
+    help="deploy an production-ready OpenAI API compatible chat server to bentocloud ($100 free credit)"
+)
 def deploy(
-    model: Annotated[str, typer.Argument()] = '',
+    model: Annotated[str, typer.Argument()] = "",
    instance_type: Optional[str] = None,
    repo: Optional[str] = None,
    verbose: bool = False,
@@ -246,10 +279,10 @@ def deploy(
    targets = filter(lambda x: can_run(bento, x) > 0, targets)
    targets = sorted(targets, key=lambda x: can_run(bento, x), reverse=True)
    if not targets:
-        output('No available instance type, check your bentocloud account', style='red')
+        output("No available instance type, check your bentocloud account", style="red")
        raise typer.Exit(1)
    target = targets[0]
-    output(f'Recommended instance type: {target.name}', style='green')
+    output(f"Recommended instance type: {target.name}", style="green")
    cloud_deploy(bento, target)


@@ -257,9 +290,12 @@ def deploy(
 def typer_callback(
    verbose: int = 0,
    do_not_track: bool = typer.Option(
-        False, '--do-not-track', help='Whether to disable usage tracking', envvar=DO_NOT_TRACK
+        False,
+        "--do-not-track",
+        help="Whether to disable usage tracking",
+        envvar=DO_NOT_TRACK,
    ),
-    version: bool = typer.Option(False, '--version', '-v', help='Show version'),
+    version: bool = typer.Option(False, "--version", "-v", help="Show version"),
 ):
    if verbose:
        VERBOSE_LEVEL.set(verbose)
@@ -272,5 +308,5 @@ def typer_callback(
        os.environ[DO_NOT_TRACK] = str(True)


-if __name__ == '__main__':
+if __name__ == "__main__":
    app()