diff --git a/src/openllm/__main__.py b/src/openllm/__main__.py
index 778d11df..e66c3431 100644
--- a/src/openllm/__main__.py
+++ b/src/openllm/__main__.py
@@ -1,8 +1,8 @@
+import importlib.metadata
 import os
+import platform
 import random
 import sys
-import platform
-import importlib.metadata
 from collections import defaultdict
 from typing import Annotated, Optional
 
@@ -22,35 +22,42 @@ from openllm.model import ensure_bento, list_bento
 from openllm.repo import app as repo_app
 
 app = OpenLLMTyper(
-    help='`openllm hello` to get started. '
-    'OpenLLM is a CLI tool to manage and deploy open source LLMs and'
-    ' get an OpenAI API compatible chat server in seconds.'
+    help="`openllm hello` to get started. "
+    "OpenLLM is a CLI tool to manage and deploy open source LLMs and"
+    " get an OpenAI API compatible chat server in seconds."
 )
 
-app.add_typer(repo_app, name='repo')
-app.add_typer(model_app, name='model')
-app.add_typer(clean_app, name='clean')
+app.add_typer(repo_app, name="repo")
+app.add_typer(model_app, name="model")
+app.add_typer(clean_app, name="clean")
 
 
 def _select_bento_name(models: list[BentoInfo], target: DeploymentTarget):
     from tabulate import tabulate
 
     options = []
-    model_infos = [(model.repo.name, model.name, can_run(model, target)) for model in models]
+    model_infos = [
+        (model.repo.name, model.name, can_run(model, target)) for model in models
+    ]
     model_name_groups = defaultdict(lambda: 0.0)
     for repo, name, score in model_infos:
         model_name_groups[(repo, name)] += score
-    table_data = [(name, repo, CHECKED if score > 0 else '') for (repo, name), score in model_name_groups.items()]
+    table_data = [
+        (name, repo, CHECKED if score > 0 else "")
+        for (repo, name), score in model_name_groups.items()
+    ]
     if not table_data:
-        output('No model found', style='red')
+        output("No model found", style="red")
         raise typer.Exit(1)
-    table = tabulate(table_data, headers=['model', 'repo', 'locally runnable']).split('\n')
-    headers = f'{table[0]}\n   {table[1]}'
+    table = tabulate(table_data, headers=["model", "repo", "locally runnable"]).split(
+        "\n"
+    )
+    headers = f"{table[0]}\n   {table[1]}"
 
     options.append(questionary.Separator(headers))
     for table_data, table_line in zip(table_data, table[2:]):
         options.append(questionary.Choice(table_line, value=table_data[:2]))
-    selected = questionary.select('Select a model', options).ask()
+    selected = questionary.select("Select a model", options).ask()
     if selected is None:
         raise typer.Exit(1)
     return selected
@@ -60,24 +67,26 @@ def _select_bento_version(models, target, bento_name, repo):
     from tabulate import tabulate
 
     model_infos = [
-        [model, can_run(model, target)] for model in models if model.name == bento_name and model.repo.name == repo
+        [model, can_run(model, target)]
+        for model in models
+        if model.name == bento_name and model.repo.name == repo
     ]
 
     table_data = [
-        [model.tag, CHECKED if score > 0 else '']
+        [model.tag, CHECKED if score > 0 else ""]
         for model, score in model_infos
         if model.name == bento_name and model.repo.name == repo
     ]
     if not table_data:
-        output(f'No model found for {bento_name} in {repo}', style='red')
+        output(f"No model found for {bento_name} in {repo}", style="red")
         raise typer.Exit(1)
-    table = tabulate(table_data, headers=['version', 'locally runnable']).split('\n')
+    table = tabulate(table_data, headers=["version", "locally runnable"]).split("\n")
 
     options = []
-    options.append(questionary.Separator(f'{table[0]}\n   {table[1]}'))
+    options.append(questionary.Separator(f"{table[0]}\n   {table[1]}"))
     for table_data, table_line in zip(model_infos, table[2:]):
         options.append(questionary.Choice(table_line, value=table_data))
-    selected = questionary.select('Select a version', options).ask()
+    selected = questionary.select("Select a version", options).ask()
     if selected is None:
         raise typer.Exit(1)
     return selected
@@ -89,7 +98,7 @@ def _select_target(bento, targets):
     options = []
     targets.sort(key=lambda x: can_run(bento, x), reverse=True)
     if not targets:
-        output('No available instance type, check your bentocloud account', style='red')
+        output("No available instance type, check your bentocloud account", style="red")
         raise typer.Exit(1)
 
     table = tabulate(
@@ -97,103 +106,122 @@ def _select_target(bento, targets):
             [
                 target.name,
                 target.accelerators_repr,
-                f'${target.price}',
-                CHECKED if can_run(bento, target) else 'insufficient res.',
+                f"${target.price}",
+                CHECKED if can_run(bento, target) else "insufficient res.",
             ]
             for target in targets
         ],
-        headers=['instance type', 'accelerator', 'price/hr', 'deployable'],
-    ).split('\n')
-    options.append(questionary.Separator(f'{table[0]}\n   {table[1]}'))
+        headers=["instance type", "accelerator", "price/hr", "deployable"],
+    ).split("\n")
+    options.append(questionary.Separator(f"{table[0]}\n   {table[1]}"))
 
     for target, line in zip(targets, table[2:]):
-        options.append(questionary.Choice(f'{line}', value=target))
-    selected = questionary.select('Select an instance type', options).ask()
+        options.append(questionary.Choice(f"{line}", value=target))
+    selected = questionary.select("Select an instance type", options).ask()
     if selected is None:
         raise typer.Exit(1)
     return selected
 
 
-def _select_action(bento, score):
+def _select_action(bento: BentoInfo, score):
     if score > 0:
         options = [
-            questionary.Separator('Available actions'),
-            questionary.Choice('0. Run the model in terminal', value='run', shortcut_key='0'),
-            questionary.Separator(f'  $ openllm run {bento}'),
-            questionary.Separator(' '),
-            questionary.Choice('1. Serve the model locally and get a chat server', value='serve', shortcut_key='1'),
-            questionary.Separator(f'  $ openllm serve {bento}'),
-            questionary.Separator(' '),
+            questionary.Separator("Available actions"),
             questionary.Choice(
-                '2. Deploy the model to bentocloud and get a scalable chat server', value='deploy', shortcut_key='2'
+                "0. Run the model in terminal", value="run", shortcut_key="0"
             ),
-            questionary.Separator(f'  $ openllm deploy {bento}'),
+            questionary.Separator(f"  $ openllm run {bento}"),
+            questionary.Separator(" "),
+            questionary.Choice(
+                "1. Serve the model locally and get a chat server",
+                value="serve",
+                shortcut_key="1",
+            ),
+            questionary.Separator(f"  $ openllm serve {bento}"),
+            questionary.Separator(" "),
+            questionary.Choice(
+                "2. Deploy the model to bentocloud and get a scalable chat server",
+                value="deploy",
+                shortcut_key="2",
+            ),
+            questionary.Separator(f"  $ openllm deploy {bento}"),
         ]
     else:
         options = [
-            questionary.Separator('Available actions'),
+            questionary.Separator("Available actions"),
             questionary.Choice(
-                '0. Run the model in terminal', value='run', disabled='insufficient res.', shortcut_key='0'
+                "0. Run the model in terminal",
+                value="run",
+                disabled="insufficient res.",
+                shortcut_key="0",
             ),
-            questionary.Separator(f'  $ openllm run {bento}'),
-            questionary.Separator(' '),
+            questionary.Separator(f"  $ openllm run {bento}"),
+            questionary.Separator(" "),
             questionary.Choice(
-                '1. Serve the model locally and get a chat server',
-                value='serve',
-                disabled='insufficient res.',
-                shortcut_key='1',
+                "1. Serve the model locally and get a chat server",
+                value="serve",
+                disabled="insufficient res.",
+                shortcut_key="1",
             ),
-            questionary.Separator(f'  $ openllm serve {bento}'),
-            questionary.Separator(' '),
+            questionary.Separator(f"  $ openllm serve {bento}"),
+            questionary.Separator(" "),
             questionary.Choice(
-                '2. Deploy the model to bentocloud and get a scalable chat server', value='deploy', shortcut_key='2'
+                "2. Deploy the model to bentocloud and get a scalable chat server",
+                value="deploy",
+                shortcut_key="2",
             ),
-            questionary.Separator(f'  $ openllm deploy {bento}'),
+            questionary.Separator(f"  $ openllm deploy {bento}"),
         ]
-    action = questionary.select('Select an action', options).ask()
+    action = questionary.select("Select an action", options).ask()
     if action is None:
         raise typer.Exit(1)
-    if action == 'run':
+    if action == "run":
         try:
-            local_run(bento)
+            port = random.randint(30000, 40000)
+            local_run(bento, port=port)
         finally:
-            output('\nUse this command to run the action again:', style='green')
-            output(f'  $ openllm run {bento}', style='orange')
-    elif action == 'serve':
+            output("\nUse this command to run the action again:", style="green")
+            output(f"  $ openllm run {bento}", style="orange")
+    elif action == "serve":
         try:
             local_serve(bento)
         finally:
-            output('\nUse this command to run the action again:', style='green')
-            output(f'  $ openllm serve {bento}', style='orange')
-    elif action == 'deploy':
+            output("\nUse this command to run the action again:", style="green")
+            output(f"  $ openllm serve {bento}", style="orange")
+    elif action == "deploy":
         ensure_cloud_context()
         targets = get_cloud_machine_spec()
         target = _select_target(bento, targets)
         try:
             cloud_deploy(bento, target)
         finally:
-            output('\nUse this command to run the action again:', style='green')
-            output(f'  $ openllm deploy {bento} --instance-type {target.name}', style='orange')
+            output("\nUse this command to run the action again:", style="green")
+            output(
+                f"  $ openllm deploy {bento} --instance-type {target.name}",
+                style="orange",
+            )
 
 
-@app.command(help='get started interactively')
+@app.command(help="get started interactively")
 def hello():
     INTERACTIVE.set(True)
-    VERBOSE_LEVEL.set(20)
+    # VERBOSE_LEVEL.set(20)
 
     target = get_local_machine_spec()
-    output(f'  Detected Platform: {target.platform}', style='green')
+    output(f"  Detected Platform: {target.platform}", style="green")
     if target.accelerators:
-        output('  Detected Accelerators: ', style='green')
+        output("  Detected Accelerators: ", style="green")
         for a in target.accelerators:
-            output(f'   - {a.model} {a.memory_size}GB', style='green')
+            output(f"   - {a.model} {a.memory_size}GB", style="green")
     else:
-        output('  Detected Accelerators: None', style='yellow')
+        output("  Detected Accelerators: None", style="yellow")
 
     models = list_bento()
     if not models:
-        output('No model found, you probably need to update the model repo:', style='red')
-        output('  $ openllm repo update', style='orange')
+        output(
+            "No model found, you probably need to update the model repo:", style="red"
+        )
+        output("  $ openllm repo update", style="orange")
         raise typer.Exit(1)
 
     bento_name, repo = _select_bento_name(models, target)
@@ -201,9 +229,12 @@ def hello():
     _select_action(bento, score)
 
 
-@app.command(help='start an OpenAI API compatible chat server and chat in browser')
+@app.command(help="start an OpenAI API compatible chat server and chat in browser")
 def serve(
-    model: Annotated[str, typer.Argument()] = '', repo: Optional[str] = None, port: int = 3000, verbose: bool = False
+    model: Annotated[str, typer.Argument()] = "",
+    repo: Optional[str] = None,
+    port: int = 3000,
+    verbose: bool = False,
 ):
     if verbose:
         VERBOSE_LEVEL.set(20)
@@ -212,9 +243,9 @@ def serve(
     local_serve(bento, port=port)
 
 
-@app.command(help='run the model and chat in terminal')
+@app.command(help="run the model and chat in terminal")
 def run(
-    model: Annotated[str, typer.Argument()] = '',
+    model: Annotated[str, typer.Argument()] = "",
     repo: Optional[str] = None,
     port: Optional[int] = None,
     timeout: int = 600,
@@ -229,9 +260,11 @@ def run(
     local_run(bento, port=port, timeout=timeout)
 
 
-@app.command(help='deploy an production-ready OpenAI API compatible chat server to bentocloud ($100 free credit)')
+@app.command(
+    help="deploy an production-ready OpenAI API compatible chat server to bentocloud ($100 free credit)"
+)
 def deploy(
-    model: Annotated[str, typer.Argument()] = '',
+    model: Annotated[str, typer.Argument()] = "",
     instance_type: Optional[str] = None,
     repo: Optional[str] = None,
     verbose: bool = False,
@@ -246,10 +279,10 @@ def deploy(
     targets = filter(lambda x: can_run(bento, x) > 0, targets)
     targets = sorted(targets, key=lambda x: can_run(bento, x), reverse=True)
     if not targets:
-        output('No available instance type, check your bentocloud account', style='red')
+        output("No available instance type, check your bentocloud account", style="red")
         raise typer.Exit(1)
     target = targets[0]
-    output(f'Recommended instance type: {target.name}', style='green')
+    output(f"Recommended instance type: {target.name}", style="green")
     cloud_deploy(bento, target)
 
 
@@ -257,9 +290,12 @@ def deploy(
 def typer_callback(
     verbose: int = 0,
     do_not_track: bool = typer.Option(
-        False, '--do-not-track', help='Whether to disable usage tracking', envvar=DO_NOT_TRACK
+        False,
+        "--do-not-track",
+        help="Whether to disable usage tracking",
+        envvar=DO_NOT_TRACK,
     ),
-    version: bool = typer.Option(False, '--version', '-v', help='Show version'),
+    version: bool = typer.Option(False, "--version", "-v", help="Show version"),
 ):
     if verbose:
         VERBOSE_LEVEL.set(verbose)
@@ -272,5 +308,5 @@ def typer_callback(
         os.environ[DO_NOT_TRACK] = str(True)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     app()