From 20e6ed26b32bbf43ecbcd6fe3ce7596459a9aa6c Mon Sep 17 00:00:00 2001
From: bojiang <bojiang_@outlook.com>
Date: Tue, 25 Jun 2024 16:52:42 +0800
Subject: [PATCH] refactor: openllm hello

---
 openllm_next/__main__.py         | 314 +++++++++++++++++++++++--------
 openllm_next/accelerator_spec.py |   5 +-
 openllm_next/cloud.py            |  25 +--
 openllm_next/common.py           |  37 +++-
 openllm_next/local.py            |   1 -
 openllm_next/model.py            |  18 +-
 pyproject.toml                   |   1 +
 7 files changed, 295 insertions(+), 106 deletions(-)

diff --git a/openllm_next/__main__.py b/openllm_next/__main__.py
index 8eedfa02..10e38a5d 100644
--- a/openllm_next/__main__.py
+++ b/openllm_next/__main__.py
@@ -1,6 +1,8 @@
-from typing import Annotated
-
+from typing import Annotated, Optional
+from collections import defaultdict
+import sys
 import questionary
+
 import typer
 
 from openllm_next.accelerator_spec import (
@@ -8,11 +10,10 @@ from openllm_next.accelerator_spec import (
     can_run,
     get_local_machine_spec,
 )
-from openllm_next.cloud import app as cloud_app
+from openllm_next.cloud import app as cloud_app, ensure_cloud_context
 from openllm_next.cloud import get_cloud_machine_spec
-from openllm_next.cloud import run as cloud_run
-from openllm_next.cloud import serve as cloud_serve
-from openllm_next.common import VERBOSE_LEVEL, BentoInfo
+from openllm_next.cloud import serve as cloud_deploy
+from openllm_next.common import VERBOSE_LEVEL, BentoInfo, FORCE, output
 from openllm_next.local import run as local_run
 from openllm_next.local import serve as local_serve
 from openllm_next.model import app as model_app
@@ -26,97 +27,249 @@ app.add_typer(model_app, name="model")
 app.add_typer(cloud_app, name="cloud")
 
 
-def _pre_select(model: str) -> tuple[BentoInfo, DeploymentTarget]:
+def _pick_bento(model: str, target: Optional[DeploymentTarget] = None) -> BentoInfo:
     bentos = list_bento(model)
     if len(bentos) == 0:
-        typer.echo(f"No model found for {model}", err=True)
+        output(f"No model found for {model}", level=20, style="red")
         raise typer.Exit(1)
 
-    local = get_local_machine_spec()
-
     if len(bentos) == 1:
-        bento = bentos[0]
-        if can_run(bento, local) <= 0:
-            questionary.print(
-                f"No deployment target found for {bento.name}:{bento.version}",
-                style="red",
-            )
-            raise typer.Exit(1)
-        return bento, local
+        if FORCE.get():
+            output(f"Found model {bentos[0]}", level=10, style="green")
+            return bentos[0]
+        if target is None:
+            return bentos[0]
+        if can_run(bentos[0], target) <= 0:
+            return bentos[0]
+        output(f"Found model {bentos[0]}", level=10, style="green")
+        return bentos[0]
 
-    choices = []
-    choices += [questionary.Separator("Local available models")]
-    choices += [
-        questionary.Choice(
-            f"  {bento.name}:{bento.version}",
-            (bento, local),
+    if target is None:
+        output(
+            f"Multiple models match {model}, did you mean one of these?",
+            level=20,
+            style="red",
         )
-        for bento in bentos
-        if can_run(bento) > 0
-    ]
-    choices += [questionary.Separator("Cloud available models")]
-    choices += [
-        questionary.Choice(
-            f"  {bento.name}:{bento.version}",
-            (bento, None),
-        )
-        for bento in bentos
-    ]
-
-    choosen: tuple[BentoInfo, DeploymentTarget] = questionary.select(
-        "Select a model to run",
-        choices=choices,
-    ).ask()
-
-    if not choosen:
-        questionary.print("No model selected", style="red")
+        for bento in bentos:
+            output(f"  {bento}", level=20, style="red")
         raise typer.Exit(1)
 
-    bento, target = choosen
-    if target is None:
-        cloud_targets = get_cloud_machine_spec()
-        cloud_targets = [
-            target for target in cloud_targets if can_run(bento, target) > 0
-        ]
-        if len(cloud_targets) == 0:
-            questionary.print(
-                f"No suitable instance type found for {bento.name}:{bento.version}",
-                style="red",
-            )
-            raise typer.Exit(1)
-        target = questionary.select(
-            "Select a cloud target",
-            choices=[
-                questionary.Choice(
-                    f"  {target.name}",
-                    target,
-                )
-                for target in cloud_targets
-            ],
-        ).ask()
-        if not target:
-            questionary.print("No target selected", style="red")
-            raise typer.Exit(1)
+    filtered = [bento for bento in bentos if can_run(bento, target) > 0]
+    if len(filtered) == 0:
+        output(f"No deployment target found for {model}", level=20, style="red")
+        raise typer.Exit(1)
 
-    return bento, target
+    if len(filtered) == 0:
+        output(f"No deployment target found for {model}", level=20, style="red")
+        raise typer.Exit(1)
+
+    if len(bentos) > 1:
+        output(
+            f"Multiple models match {model}, did you mean one of these?",
+            level=20,
+            style="red",
+        )
+        for bento in bentos:
+            output(f"  {bento}", level=20, style="red")
+        raise typer.Exit(1)
+
+    return bentos[0]
+
+
+def _select_bento_name(models, target):
+    from tabulate import tabulate
+
+    options = []
+    model_infos = [
+        [model.repo.name, model.name, model.tag, can_run(model, target)]
+        for model in models
+    ]
+    model_name_groups = defaultdict(lambda: 0)
+    for repo, name, tag, score in model_infos:
+        model_name_groups[(repo, name)] += score
+    table_data = [
+        [name, repo, "*" if score > 0 else ""]
+        for (repo, name), score in model_name_groups.items()
+    ]
+    table = tabulate(
+        table_data,
+        headers=["model", "repo", "locally runnable"],
+    ).split("\n")
+    headers = f"{table[0]}\n   {table[1]}"
+
+    options.append(questionary.Separator(headers))
+    for table_data, table_line in zip(table_data, table[2:]):
+        options.append(questionary.Choice(table_line, value=table_data[:2]))
+    selected = questionary.select("Select a model", options).ask()
+    if selected is None:
+        raise typer.Exit(1)
+    return selected
+
+
+def _select_bento_version(models, target, bento_name, repo):
+    from tabulate import tabulate
+
+    model_infos = [
+        [model, can_run(model, target)]
+        for model in models
+        if model.name == bento_name and model.repo.name == repo
+    ]
+
+    table_data = [
+        [model.version, "yes" if score > 0 else ""]
+        for model, score in model_infos
+        if model.name == bento_name and model.repo.name == repo
+    ]
+    if not table_data:
+        output(f"No model found for {bento_name} in {repo}", level=20, style="red")
+        raise typer.Exit(1)
+    table = tabulate(
+        table_data,
+        headers=["version", "locally runnable"],
+    ).split("\n")
+
+    options = []
+    options.append(questionary.Separator(f"{table[0]}\n   {table[1]}"))
+    for table_data, table_line in zip(model_infos, table[2:]):
+        options.append(questionary.Choice(table_line, value=table_data))
+    selected = questionary.select("Select a version", options).ask()
+    if selected is None:
+        raise typer.Exit(1)
+    return selected
+
+
+def _select_target(bento, targets):
+    from tabulate import tabulate
+
+    options = []
+    targets.sort(key=lambda x: can_run(bento, x), reverse=True)
+    if not targets:
+        output(
+            "No available instance type, check your bentocloud account",
+            level=20,
+            style="red",
+        )
+        raise typer.Exit(1)
+
+    table = tabulate(
+        [
+            [
+                target.name,
+                target.accelerators_repr,
+                target.price,
+                "" if can_run(bento, target) else "insufficient res.",
+            ]
+            for target in targets
+        ],
+        headers=["instance type", "accelerator", "price", "deployable"],
+    ).split("\n")
+    options.append(questionary.Separator(f"{table[0]}\n   {table[1]}"))
+
+    for target, line in zip(targets, table[2:]):
+        options.append(
+            questionary.Choice(
+                f"{line}",
+                value=target,
+            )
+        )
+    selected = questionary.select("Select an instance type", options).ask()
+    if selected is None:
+        raise typer.Exit(1)
+    return selected
+
+
+def _select_action(bento, score):
+    if score > 0:
+        options = [
+            questionary.Separator("Available actions"),
+            questionary.Separator("0. Run the model in terminal"),
+            questionary.Choice(f"  $ openllm run {bento}", value="run"),
+            questionary.Separator(" "),
+            questionary.Separator("1. Serve the model locally and get a chat server"),
+            questionary.Choice(f"  $ openllm serve {bento}", value="serve"),
+            questionary.Separator(" "),
+            questionary.Separator(
+                "2. Deploy the model to bentocloud and get a scalable chat server"
+            ),
+            questionary.Choice(f"  $ openllm deploy {bento}", value="deploy"),
+        ]
+    else:
+        options = [
+            questionary.Separator("Available actions"),
+            questionary.Separator("0. Run the model in terminal"),
+            questionary.Choice(
+                f"  $ openllm run {bento}",
+                value="run",
+                disabled="insufficient resources",
+                shortcut_key="0",
+            ),
+            questionary.Separator(" "),
+            questionary.Separator("1. Serve the model locally and get a chat server"),
+            questionary.Choice(
+                f"  $ openllm serve {bento}",
+                value="serve",
+                disabled="insufficient resources",
+                shortcut_key="1",
+            ),
+            questionary.Separator(" "),
+            questionary.Separator(
+                "2. Deploy the model to bentocloud and get a scalable chat server"
+            ),
+            questionary.Choice(
+                f"  $ openllm deploy {bento}",
+                value="deploy",
+                shortcut_key="2",
+            ),
+        ]
+    action = questionary.select("Select an action", options).ask()
+    if action is None:
+        raise typer.Exit(1)
+    if action == "run":
+        local_run(bento)
+    elif action == "serve":
+        local_serve(bento)
+    elif action == "deploy":
+        ensure_cloud_context()
+        targets = get_cloud_machine_spec()
+        target = _select_target(bento, targets)
+        cloud_deploy(bento, target)
+
+
+@app.command()
+def hello():
+    target = get_local_machine_spec()
+    output(f"  Detected Platform: {target.platform}", style="green")
+    if target.accelerators:
+        output("  Detected Accelerators: ", style="green")
+        for a in target.accelerators:
+            output(f"   - {a.model} {a.memory_size}GB", style="green")
+    else:
+        output("  Detected Accelerators: None", style="yellow")
+
+    models = list_bento()
+
+    bento_name, repo = _select_bento_name(models, target)
+    bento, score = _select_bento_version(models, target, bento_name, repo)
+    _select_action(bento, score)
 
 
 @app.command()
 def serve(model: Annotated[str, typer.Argument()] = ""):
-    bento, target = _pre_select(model)
-    if target and target.source == "local":
-        local_serve(bento)
-    else:
-        cloud_serve(bento, target)
+    target = get_local_machine_spec()
+    bento = _pick_bento(model, target)
+    local_serve(bento)
 
 
 @app.command()
 def run(model: Annotated[str, typer.Argument()] = ""):
-    bento, target = _pre_select(model)
-    if target and target.source == "local":
-        local_run(bento)
-    else:
-        cloud_run(bento, target)
+    target = get_local_machine_spec()
+    bento = _pick_bento(model, target)
+    local_run(bento)
+
+
+@app.command()
+def deploy(model: Annotated[str, typer.Argument()] = ""):
+    targets = get_cloud_machine_spec()
 
 
 def typer_callback(verbose: int = 0):
@@ -125,6 +278,9 @@ def typer_callback(verbose: int = 0):
 
 
 def main():
+    if sys.version_info < (3, 9):
+        output("Python 3.8 or higher is required", level=20, style="red")
+        sys.exit(1)
     app.callback()(typer_callback)
     app()
 
diff --git a/openllm_next/accelerator_spec.py b/openllm_next/accelerator_spec.py
index 3850b680..6c67c22d 100644
--- a/openllm_next/accelerator_spec.py
+++ b/openllm_next/accelerator_spec.py
@@ -21,6 +21,9 @@ class Accelerator(SimpleNamespace):
     def __eq__(self, other):
         return self.memory_size == other.memory_size
 
+    def __repr__(self):
+        return f"{self.model}({self.memory_size}GB)"
+
 
 class Resource(SimpleNamespace):
     cpu: int
@@ -93,7 +96,7 @@ def get_local_machine_spec():
             memory_info = nvmlDeviceGetMemoryInfo(handle)
             accelerators.append(
                 Accelerator(
-                    name=name, memory_size=math.ceil(int(memory_info.total) / 1024**3)
+                    model=name, memory_size=math.ceil(int(memory_info.total) / 1024**3)
                 )
             )
         nvmlShutdown()
diff --git a/openllm_next/cloud.py b/openllm_next/cloud.py
index d2d85286..c7110ba2 100644
--- a/openllm_next/cloud.py
+++ b/openllm_next/cloud.py
@@ -13,6 +13,7 @@ from openllm_next.common import (
     ERROR_STYLE,
     BentoInfo,
     DeploymentTarget,
+    output,
     run_command,
 )
 
@@ -62,14 +63,12 @@ def _get_deploy_cmd(bento: BentoInfo, target: typing.Optional[DeploymentTarget]
     return cmd, env, None
 
 
-def _ensure_cloud_context():
+def ensure_cloud_context():
     cmd = ["bentoml", "cloud", "current-context"]
     try:
         result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
         context = json.loads(result)
-        questionary.print(
-            f"BentoCloud already logged in: {context['endpoint']}", style="green"
-        )
+        output(f"  BentoCloud already logged in: {context['endpoint']}", style="green")
     except subprocess.CalledProcessError:
         action = questionary.select(
             "BentoCloud not logged in",
@@ -79,10 +78,9 @@ def _ensure_cloud_context():
             ],
         ).ask()
         if action is None:
-            questionary.print("Cancelled", style=ERROR_STYLE)
             raise typer.Exit(1)
         elif action == "get an account in two minutes":
-            questionary.print(
+            output(
                 "Please visit https://cloud.bentoml.com to get your token",
                 style="yellow",
             )
@@ -105,14 +103,13 @@ def _ensure_cloud_context():
         ]
         try:
             result = subprocess.check_output(cmd)
-            questionary.print("Logged in successfully", style="green")
+            output("  Logged in successfully", style="green")
         except subprocess.CalledProcessError:
-            questionary.print("Failed to login", style=ERROR_STYLE)
+            output("  Failed to login", style=ERROR_STYLE)
             raise typer.Exit(1)
 
 
 def get_cloud_machine_spec():
-    _ensure_cloud_context()
     cmd = ["bentoml", "deployment", "list-instance-types", "-o", "json"]
     try:
         result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
@@ -137,14 +134,6 @@ def get_cloud_machine_spec():
 
 
 def serve(bento: BentoInfo, target: DeploymentTarget):
-    _ensure_cloud_context()
+    ensure_cloud_context()
     cmd, env, cwd = _get_deploy_cmd(bento, target)
     run_command(cmd, env=env, cwd=cwd)
-
-
-def run(bento: BentoInfo, target: DeploymentTarget):
-    questionary.print(
-        "`run` with bentocloud is not supported yet, please use `serve` instead",
-        style=ERROR_STYLE,
-    )
-    raise typer.Exit(1)
diff --git a/openllm_next/common.py b/openllm_next/common.py
index ef5e348a..96a4f984 100644
--- a/openllm_next/common.py
+++ b/openllm_next/common.py
@@ -1,5 +1,7 @@
 import functools
 import signal
+import io
+from collections import UserList
 import asyncio
 import hashlib
 import json
@@ -55,7 +57,30 @@ class ContextVar(typing.Generic[T]):
             self._stack.pop()
 
 
-VERBOSE_LEVEL = ContextVar(0)
+VERBOSE_LEVEL = ContextVar(10)
+INTERACTIVE = ContextVar(True)
+FORCE = ContextVar(False)
+
+
+def output(content, level=0, style=None):
+    if level >= VERBOSE_LEVEL.get():
+        return
+
+    if isinstance(content, (dict, list)):
+        import pyaml
+
+        out = io.StringIO()
+        pyaml.pprint(
+            content,
+            dst=out,
+            sort_dicts=False,
+            sort_keys=False,
+        )
+        questionary.print(out.getvalue(), style=style, end="")
+        out.close()
+
+    if isinstance(content, str):
+        questionary.print(content, style=style)
 
 
 class Config(SimpleNamespace):
@@ -202,6 +227,16 @@ class DeploymentTarget(SimpleNamespace):
     def __hash__(self):
         return hash(self.source)
 
+    @property
+    def accelerators_repr(self) -> str:
+        accs = {a.model for a in self.accelerators}
+        if len(accs) == 0:
+            return "null"
+        if len(accs) == 1:
+            a = self.accelerators[0]
+            return f"{a.model} x{len(self.accelerators)}"
+        return ", ".join((f"{a.model}" for a in self.accelerators))
+
 
 def run_command(
     cmd,
diff --git a/openllm_next/local.py b/openllm_next/local.py
index 713f03e4..3b231b2d 100644
--- a/openllm_next/local.py
+++ b/openllm_next/local.py
@@ -1,5 +1,4 @@
 import asyncio
-import signal
 import time
 import httpx
 
diff --git a/openllm_next/model.py b/openllm_next/model.py
index f40e01dc..8fd97e32 100644
--- a/openllm_next/model.py
+++ b/openllm_next/model.py
@@ -41,20 +41,26 @@ def list_():
     )
 
 
-def list_bento(tag: typing.Optional[str] = None) -> typing.List[BentoInfo]:
+def list_bento(
+    tag: typing.Optional[str] = None,
+    repo_name: typing.Optional[str] = None,
+    include_alias: bool = False,
+) -> typing.List[BentoInfo]:
     ensure_repo_updated()
     if not tag:
         glob_pattern = "bentoml/bentos/*/*"
     elif ":" in tag:
-        repo_name, version = tag.split(":")
-        glob_pattern = f"bentoml/bentos/{repo_name}/{version}"
+        bento_name, version = tag.split(":")
+        glob_pattern = f"bentoml/bentos/{bento_name}/{version}"
     else:
         glob_pattern = f"bentoml/bentos/{tag}/*"
 
     model_list = []
     config = load_config()
-    for repo_name, repo_url in config.repos.items():
-        repo = parse_repo_url(repo_url, repo_name)
+    for _repo_name, repo_url in config.repos.items():
+        if repo_name is not None and _repo_name != repo_name:
+            continue
+        repo = parse_repo_url(repo_url, _repo_name)
         for path in repo.path.glob(glob_pattern):
             if path.is_dir() and (path / "bento.yaml").exists():
                 model = BentoInfo(
@@ -74,7 +80,7 @@ def list_bento(tag: typing.Optional[str] = None) -> typing.List[BentoInfo]:
             if model:
                 model_list.append(model)
     model_list.sort(key=lambda x: x.tag)
-    if VERBOSE_LEVEL.get() <= 0:
+    if not include_alias:
         seen = set()
         model_list = [
             x
diff --git a/pyproject.toml b/pyproject.toml
index a7adce3d..851f9219 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,6 +18,7 @@ dependencies = [
     "pip_requirements_parser",
     "nvidia-ml-py",
     "dulwich",
+    "tabulate",
 ]
 
 [project.scripts]