refactor: openllm hello

2026-06-11 09:59:20 -04:00 · 2024-06-25 16:52:42 +08:00
parent effac3d348
commit 20e6ed26b3
7 changed files with 295 additions and 106 deletions
--- a/openllm_next/main.py
+++ b/openllm_next/main.py
@@ -1,6 +1,8 @@
-from typing import Annotated
-
+from typing import Annotated, Optional
+from collections import defaultdict
+import sys
 import questionary
+
 import typer

 from openllm_next.accelerator_spec import (
@@ -8,11 +10,10 @@ from openllm_next.accelerator_spec import (
    can_run,
    get_local_machine_spec,
 )
-from openllm_next.cloud import app as cloud_app
+from openllm_next.cloud import app as cloud_app, ensure_cloud_context
 from openllm_next.cloud import get_cloud_machine_spec
-from openllm_next.cloud import run as cloud_run
-from openllm_next.cloud import serve as cloud_serve
-from openllm_next.common import VERBOSE_LEVEL, BentoInfo
+from openllm_next.cloud import serve as cloud_deploy
+from openllm_next.common import VERBOSE_LEVEL, BentoInfo, FORCE, output
 from openllm_next.local import run as local_run
 from openllm_next.local import serve as local_serve
 from openllm_next.model import app as model_app
@@ -26,97 +27,249 @@ app.add_typer(model_app, name="model")
 app.add_typer(cloud_app, name="cloud")


-def _pre_select(model: str) -> tuple[BentoInfo, DeploymentTarget]:
+def _pick_bento(model: str, target: Optional[DeploymentTarget] = None) -> BentoInfo:
    bentos = list_bento(model)
    if len(bentos) == 0:
-        typer.echo(f"No model found for {model}", err=True)
+        output(f"No model found for {model}", level=20, style="red")
        raise typer.Exit(1)

-    local = get_local_machine_spec()
-
    if len(bentos) == 1:
-        bento = bentos[0]
-        if can_run(bento, local) <= 0:
-            questionary.print(
-                f"No deployment target found for {bento.name}:{bento.version}",
-                style="red",
-            )
-            raise typer.Exit(1)
-        return bento, local
+        if FORCE.get():
+            output(f"Found model {bentos[0]}", level=10, style="green")
+            return bentos[0]
+        if target is None:
+            return bentos[0]
+        if can_run(bentos[0], target) <= 0:
+            return bentos[0]
+        output(f"Found model {bentos[0]}", level=10, style="green")
+        return bentos[0]

-    choices = []
-    choices += [questionary.Separator("Local available models")]
-    choices += [
-        questionary.Choice(
-            f"  {bento.name}:{bento.version}",
-            (bento, local),
+    if target is None:
+        output(
+            f"Multiple models match {model}, did you mean one of these?",
+            level=20,
+            style="red",
        )
-        for bento in bentos
-        if can_run(bento) > 0
-    ]
-    choices += [questionary.Separator("Cloud available models")]
-    choices += [
-        questionary.Choice(
-            f"  {bento.name}:{bento.version}",
-            (bento, None),
-        )
-        for bento in bentos
-    ]
-
-    choosen: tuple[BentoInfo, DeploymentTarget] = questionary.select(
-        "Select a model to run",
-        choices=choices,
-    ).ask()
-
-    if not choosen:
-        questionary.print("No model selected", style="red")
+        for bento in bentos:
+            output(f"  {bento}", level=20, style="red")
        raise typer.Exit(1)

-    bento, target = choosen
-    if target is None:
-        cloud_targets = get_cloud_machine_spec()
-        cloud_targets = [
-            target for target in cloud_targets if can_run(bento, target) > 0
-        ]
-        if len(cloud_targets) == 0:
-            questionary.print(
-                f"No suitable instance type found for {bento.name}:{bento.version}",
-                style="red",
-            )
-            raise typer.Exit(1)
-        target = questionary.select(
-            "Select a cloud target",
-            choices=[
-                questionary.Choice(
-                    f"  {target.name}",
-                    target,
-                )
-                for target in cloud_targets
-            ],
-        ).ask()
-        if not target:
-            questionary.print("No target selected", style="red")
-            raise typer.Exit(1)
+    filtered = [bento for bento in bentos if can_run(bento, target) > 0]
+    if len(filtered) == 0:
+        output(f"No deployment target found for {model}", level=20, style="red")
+        raise typer.Exit(1)

-    return bento, target
+    if len(filtered) == 0:
+        output(f"No deployment target found for {model}", level=20, style="red")
+        raise typer.Exit(1)
+
+    if len(bentos) > 1:
+        output(
+            f"Multiple models match {model}, did you mean one of these?",
+            level=20,
+            style="red",
+        )
+        for bento in bentos:
+            output(f"  {bento}", level=20, style="red")
+        raise typer.Exit(1)
+
+    return bentos[0]
+
+
+def _select_bento_name(models, target):
+    from tabulate import tabulate
+
+    options = []
+    model_infos = [
+        [model.repo.name, model.name, model.tag, can_run(model, target)]
+        for model in models
+    ]
+    model_name_groups = defaultdict(lambda: 0)
+    for repo, name, tag, score in model_infos:
+        model_name_groups[(repo, name)] += score
+    table_data = [
+        [name, repo, "*" if score > 0 else ""]
+        for (repo, name), score in model_name_groups.items()
+    ]
+    table = tabulate(
+        table_data,
+        headers=["model", "repo", "locally runnable"],
+    ).split("\n")
+    headers = f"{table[0]}\n   {table[1]}"
+
+    options.append(questionary.Separator(headers))
+    for table_data, table_line in zip(table_data, table[2:]):
+        options.append(questionary.Choice(table_line, value=table_data[:2]))
+    selected = questionary.select("Select a model", options).ask()
+    if selected is None:
+        raise typer.Exit(1)
+    return selected
+
+
+def _select_bento_version(models, target, bento_name, repo):
+    from tabulate import tabulate
+
+    model_infos = [
+        [model, can_run(model, target)]
+        for model in models
+        if model.name == bento_name and model.repo.name == repo
+    ]
+
+    table_data = [
+        [model.version, "yes" if score > 0 else ""]
+        for model, score in model_infos
+        if model.name == bento_name and model.repo.name == repo
+    ]
+    if not table_data:
+        output(f"No model found for {bento_name} in {repo}", level=20, style="red")
+        raise typer.Exit(1)
+    table = tabulate(
+        table_data,
+        headers=["version", "locally runnable"],
+    ).split("\n")
+
+    options = []
+    options.append(questionary.Separator(f"{table[0]}\n   {table[1]}"))
+    for table_data, table_line in zip(model_infos, table[2:]):
+        options.append(questionary.Choice(table_line, value=table_data))
+    selected = questionary.select("Select a version", options).ask()
+    if selected is None:
+        raise typer.Exit(1)
+    return selected
+
+
+def _select_target(bento, targets):
+    from tabulate import tabulate
+
+    options = []
+    targets.sort(key=lambda x: can_run(bento, x), reverse=True)
+    if not targets:
+        output(
+            "No available instance type, check your bentocloud account",
+            level=20,
+            style="red",
+        )
+        raise typer.Exit(1)
+
+    table = tabulate(
+        [
+            [
+                target.name,
+                target.accelerators_repr,
+                target.price,
+                "" if can_run(bento, target) else "insufficient res.",
+            ]
+            for target in targets
+        ],
+        headers=["instance type", "accelerator", "price", "deployable"],
+    ).split("\n")
+    options.append(questionary.Separator(f"{table[0]}\n   {table[1]}"))
+
+    for target, line in zip(targets, table[2:]):
+        options.append(
+            questionary.Choice(
+                f"{line}",
+                value=target,
+            )
+        )
+    selected = questionary.select("Select an instance type", options).ask()
+    if selected is None:
+        raise typer.Exit(1)
+    return selected
+
+
+def _select_action(bento, score):
+    if score > 0:
+        options = [
+            questionary.Separator("Available actions"),
+            questionary.Separator("0. Run the model in terminal"),
+            questionary.Choice(f"  $ openllm run {bento}", value="run"),
+            questionary.Separator(" "),
+            questionary.Separator("1. Serve the model locally and get a chat server"),
+            questionary.Choice(f"  $ openllm serve {bento}", value="serve"),
+            questionary.Separator(" "),
+            questionary.Separator(
+                "2. Deploy the model to bentocloud and get a scalable chat server"
+            ),
+            questionary.Choice(f"  $ openllm deploy {bento}", value="deploy"),
+        ]
+    else:
+        options = [
+            questionary.Separator("Available actions"),
+            questionary.Separator("0. Run the model in terminal"),
+            questionary.Choice(
+                f"  $ openllm run {bento}",
+                value="run",
+                disabled="insufficient resources",
+                shortcut_key="0",
+            ),
+            questionary.Separator(" "),
+            questionary.Separator("1. Serve the model locally and get a chat server"),
+            questionary.Choice(
+                f"  $ openllm serve {bento}",
+                value="serve",
+                disabled="insufficient resources",
+                shortcut_key="1",
+            ),
+            questionary.Separator(" "),
+            questionary.Separator(
+                "2. Deploy the model to bentocloud and get a scalable chat server"
+            ),
+            questionary.Choice(
+                f"  $ openllm deploy {bento}",
+                value="deploy",
+                shortcut_key="2",
+            ),
+        ]
+    action = questionary.select("Select an action", options).ask()
+    if action is None:
+        raise typer.Exit(1)
+    if action == "run":
+        local_run(bento)
+    elif action == "serve":
+        local_serve(bento)
+    elif action == "deploy":
+        ensure_cloud_context()
+        targets = get_cloud_machine_spec()
+        target = _select_target(bento, targets)
+        cloud_deploy(bento, target)
+
+
+@app.command()
+def hello():
+    target = get_local_machine_spec()
+    output(f"  Detected Platform: {target.platform}", style="green")
+    if target.accelerators:
+        output("  Detected Accelerators: ", style="green")
+        for a in target.accelerators:
+            output(f"   - {a.model} {a.memory_size}GB", style="green")
+    else:
+        output("  Detected Accelerators: None", style="yellow")
+
+    models = list_bento()
+
+    bento_name, repo = _select_bento_name(models, target)
+    bento, score = _select_bento_version(models, target, bento_name, repo)
+    _select_action(bento, score)


@app.command()
 def serve(model: Annotated[str, typer.Argument()] = ""):
-    bento, target = _pre_select(model)
-    if target and target.source == "local":
-        local_serve(bento)
-    else:
-        cloud_serve(bento, target)
+    target = get_local_machine_spec()
+    bento = _pick_bento(model, target)
+    local_serve(bento)


@app.command()
 def run(model: Annotated[str, typer.Argument()] = ""):
-    bento, target = _pre_select(model)
-    if target and target.source == "local":
-        local_run(bento)
-    else:
-        cloud_run(bento, target)
+    target = get_local_machine_spec()
+    bento = _pick_bento(model, target)
+    local_run(bento)
+
+
+@app.command()
+def deploy(model: Annotated[str, typer.Argument()] = ""):
+    targets = get_cloud_machine_spec()


 def typer_callback(verbose: int = 0):
@@ -125,6 +278,9 @@ def typer_callback(verbose: int = 0):


 def main():
+    if sys.version_info < (3, 9):
+        output("Python 3.8 or higher is required", level=20, style="red")
+        sys.exit(1)
    app.callback()(typer_callback)
    app()

--- a/openllm_next/accelerator_spec.py
+++ b/openllm_next/accelerator_spec.py
@@ -21,6 +21,9 @@ class Accelerator(SimpleNamespace):
    def __eq__(self, other):
        return self.memory_size == other.memory_size

+    def __repr__(self):
+        return f"{self.model}({self.memory_size}GB)"
+

 class Resource(SimpleNamespace):
    cpu: int
@@ -93,7 +96,7 @@ def get_local_machine_spec():
            memory_info = nvmlDeviceGetMemoryInfo(handle)
            accelerators.append(
                Accelerator(
-                    name=name, memory_size=math.ceil(int(memory_info.total) / 1024**3)
+                    model=name, memory_size=math.ceil(int(memory_info.total) / 1024**3)
                )
            )
        nvmlShutdown()
--- a/openllm_next/cloud.py
+++ b/openllm_next/cloud.py
@@ -13,6 +13,7 @@ from openllm_next.common import (
    ERROR_STYLE,
    BentoInfo,
    DeploymentTarget,
+    output,
    run_command,
 )

@@ -62,14 +63,12 @@ def _get_deploy_cmd(bento: BentoInfo, target: typing.Optional[DeploymentTarget]
    return cmd, env, None


-def _ensure_cloud_context():
+def ensure_cloud_context():
    cmd = ["bentoml", "cloud", "current-context"]
    try:
        result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
        context = json.loads(result)
-        questionary.print(
-            f"BentoCloud already logged in: {context['endpoint']}", style="green"
-        )
+        output(f"  BentoCloud already logged in: {context['endpoint']}", style="green")
    except subprocess.CalledProcessError:
        action = questionary.select(
            "BentoCloud not logged in",
@@ -79,10 +78,9 @@ def _ensure_cloud_context():
            ],
        ).ask()
        if action is None:
-            questionary.print("Cancelled", style=ERROR_STYLE)
            raise typer.Exit(1)
        elif action == "get an account in two minutes":
-            questionary.print(
+            output(
                "Please visit https://cloud.bentoml.com to get your token",
                style="yellow",
            )
@@ -105,14 +103,13 @@ def _ensure_cloud_context():
        ]
        try:
            result = subprocess.check_output(cmd)
-            questionary.print("Logged in successfully", style="green")
+            output("  Logged in successfully", style="green")
        except subprocess.CalledProcessError:
-            questionary.print("Failed to login", style=ERROR_STYLE)
+            output("  Failed to login", style=ERROR_STYLE)
            raise typer.Exit(1)


 def get_cloud_machine_spec():
-    _ensure_cloud_context()
    cmd = ["bentoml", "deployment", "list-instance-types", "-o", "json"]
    try:
        result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
@@ -137,14 +134,6 @@ def get_cloud_machine_spec():


 def serve(bento: BentoInfo, target: DeploymentTarget):
-    _ensure_cloud_context()
+    ensure_cloud_context()
    cmd, env, cwd = _get_deploy_cmd(bento, target)
    run_command(cmd, env=env, cwd=cwd)
-
-
-def run(bento: BentoInfo, target: DeploymentTarget):
-    questionary.print(
-        "`run` with bentocloud is not supported yet, please use `serve` instead",
-        style=ERROR_STYLE,
-    )
-    raise typer.Exit(1)
--- a/openllm_next/common.py
+++ b/openllm_next/common.py
@@ -1,5 +1,7 @@
 import functools
 import signal
+import io
+from collections import UserList
 import asyncio
 import hashlib
 import json
@@ -55,7 +57,30 @@ class ContextVar(typing.Generic[T]):
            self._stack.pop()


-VERBOSE_LEVEL = ContextVar(0)
+VERBOSE_LEVEL = ContextVar(10)
+INTERACTIVE = ContextVar(True)
+FORCE = ContextVar(False)
+
+
+def output(content, level=0, style=None):
+    if level >= VERBOSE_LEVEL.get():
+        return
+
+    if isinstance(content, (dict, list)):
+        import pyaml
+
+        out = io.StringIO()
+        pyaml.pprint(
+            content,
+            dst=out,
+            sort_dicts=False,
+            sort_keys=False,
+        )
+        questionary.print(out.getvalue(), style=style, end="")
+        out.close()
+
+    if isinstance(content, str):
+        questionary.print(content, style=style)


 class Config(SimpleNamespace):
@@ -202,6 +227,16 @@ class DeploymentTarget(SimpleNamespace):
    def __hash__(self):
        return hash(self.source)

+    @property
+    def accelerators_repr(self) -> str:
+        accs = {a.model for a in self.accelerators}
+        if len(accs) == 0:
+            return "null"
+        if len(accs) == 1:
+            a = self.accelerators[0]
+            return f"{a.model} x{len(self.accelerators)}"
+        return ", ".join((f"{a.model}" for a in self.accelerators))
+

 def run_command(
    cmd,
--- a/openllm_next/local.py
+++ b/openllm_next/local.py
@@ -1,5 +1,4 @@
 import asyncio
-import signal
 import time
 import httpx

--- a/openllm_next/model.py
+++ b/openllm_next/model.py
@@ -41,20 +41,26 @@ def list_():
    )


-def list_bento(tag: typing.Optional[str] = None) -> typing.List[BentoInfo]:
+def list_bento(
+    tag: typing.Optional[str] = None,
+    repo_name: typing.Optional[str] = None,
+    include_alias: bool = False,
+) -> typing.List[BentoInfo]:
    ensure_repo_updated()
    if not tag:
        glob_pattern = "bentoml/bentos/*/*"
    elif ":" in tag:
-        repo_name, version = tag.split(":")
-        glob_pattern = f"bentoml/bentos/{repo_name}/{version}"
+        bento_name, version = tag.split(":")
+        glob_pattern = f"bentoml/bentos/{bento_name}/{version}"
    else:
        glob_pattern = f"bentoml/bentos/{tag}/*"

    model_list = []
    config = load_config()
-    for repo_name, repo_url in config.repos.items():
-        repo = parse_repo_url(repo_url, repo_name)
+    for _repo_name, repo_url in config.repos.items():
+        if repo_name is not None and _repo_name != repo_name:
+            continue
+        repo = parse_repo_url(repo_url, _repo_name)
        for path in repo.path.glob(glob_pattern):
            if path.is_dir() and (path / "bento.yaml").exists():
                model = BentoInfo(
@@ -74,7 +80,7 @@ def list_bento(tag: typing.Optional[str] = None) -> typing.List[BentoInfo]:
            if model:
                model_list.append(model)
    model_list.sort(key=lambda x: x.tag)
-    if VERBOSE_LEVEL.get() <= 0:
+    if not include_alias:
        seen = set()
        model_list = [
            x
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,6 +18,7 @@ dependencies = [
    "pip_requirements_parser",
    "nvidia-ml-py",
    "dulwich",
+    "tabulate",
 ]

 [project.scripts]