From 20e6ed26b32bbf43ecbcd6fe3ce7596459a9aa6c Mon Sep 17 00:00:00 2001 From: bojiang Date: Tue, 25 Jun 2024 16:52:42 +0800 Subject: [PATCH] refactor: openllm hello --- openllm_next/__main__.py | 314 +++++++++++++++++++++++-------- openllm_next/accelerator_spec.py | 5 +- openllm_next/cloud.py | 25 +-- openllm_next/common.py | 37 +++- openllm_next/local.py | 1 - openllm_next/model.py | 18 +- pyproject.toml | 1 + 7 files changed, 295 insertions(+), 106 deletions(-) diff --git a/openllm_next/__main__.py b/openllm_next/__main__.py index 8eedfa02..10e38a5d 100644 --- a/openllm_next/__main__.py +++ b/openllm_next/__main__.py @@ -1,6 +1,8 @@ -from typing import Annotated - +from typing import Annotated, Optional +from collections import defaultdict +import sys import questionary + import typer from openllm_next.accelerator_spec import ( @@ -8,11 +10,10 @@ from openllm_next.accelerator_spec import ( can_run, get_local_machine_spec, ) -from openllm_next.cloud import app as cloud_app +from openllm_next.cloud import app as cloud_app, ensure_cloud_context from openllm_next.cloud import get_cloud_machine_spec -from openllm_next.cloud import run as cloud_run -from openllm_next.cloud import serve as cloud_serve -from openllm_next.common import VERBOSE_LEVEL, BentoInfo +from openllm_next.cloud import serve as cloud_deploy +from openllm_next.common import VERBOSE_LEVEL, BentoInfo, FORCE, output from openllm_next.local import run as local_run from openllm_next.local import serve as local_serve from openllm_next.model import app as model_app @@ -26,97 +27,249 @@ app.add_typer(model_app, name="model") app.add_typer(cloud_app, name="cloud") -def _pre_select(model: str) -> tuple[BentoInfo, DeploymentTarget]: +def _pick_bento(model: str, target: Optional[DeploymentTarget] = None) -> BentoInfo: bentos = list_bento(model) if len(bentos) == 0: - typer.echo(f"No model found for {model}", err=True) + output(f"No model found for {model}", level=20, style="red") raise typer.Exit(1) - local = get_local_machine_spec() - if len(bentos) == 1: - bento = bentos[0] - if can_run(bento, local) <= 0: - questionary.print( - f"No deployment target found for {bento.name}:{bento.version}", - style="red", - ) - raise typer.Exit(1) - return bento, local + if FORCE.get(): + output(f"Found model {bentos[0]}", level=10, style="green") + return bentos[0] + if target is None: + return bentos[0] + if can_run(bentos[0], target) <= 0: + return bentos[0] + output(f"Found model {bentos[0]}", level=10, style="green") + return bentos[0] - choices = [] - choices += [questionary.Separator("Local available models")] - choices += [ - questionary.Choice( - f" {bento.name}:{bento.version}", - (bento, local), + if target is None: + output( + f"Multiple models match {model}, did you mean one of these?", + level=20, + style="red", ) - for bento in bentos - if can_run(bento) > 0 - ] - choices += [questionary.Separator("Cloud available models")] - choices += [ - questionary.Choice( - f" {bento.name}:{bento.version}", - (bento, None), - ) - for bento in bentos - ] - - choosen: tuple[BentoInfo, DeploymentTarget] = questionary.select( - "Select a model to run", - choices=choices, - ).ask() - - if not choosen: - questionary.print("No model selected", style="red") + for bento in bentos: + output(f" {bento}", level=20, style="red") raise typer.Exit(1) - bento, target = choosen - if target is None: - cloud_targets = get_cloud_machine_spec() - cloud_targets = [ - target for target in cloud_targets if can_run(bento, target) > 0 - ] - if len(cloud_targets) == 0: - questionary.print( - f"No suitable instance type found for {bento.name}:{bento.version}", - style="red", - ) - raise typer.Exit(1) - target = questionary.select( - "Select a cloud target", - choices=[ - questionary.Choice( - f" {target.name}", - target, - ) - for target in cloud_targets - ], - ).ask() - if not target: - questionary.print("No target selected", style="red") - raise typer.Exit(1) + filtered = [bento for bento in bentos if can_run(bento, target) > 0] + if len(filtered) == 0: + output(f"No deployment target found for {model}", level=20, style="red") + raise typer.Exit(1) - return bento, target + if len(filtered) == 0: + output(f"No deployment target found for {model}", level=20, style="red") + raise typer.Exit(1) + + if len(bentos) > 1: + output( + f"Multiple models match {model}, did you mean one of these?", + level=20, + style="red", + ) + for bento in bentos: + output(f" {bento}", level=20, style="red") + raise typer.Exit(1) + + return bentos[0] + + +def _select_bento_name(models, target): + from tabulate import tabulate + + options = [] + model_infos = [ + [model.repo.name, model.name, model.tag, can_run(model, target)] + for model in models + ] + model_name_groups = defaultdict(lambda: 0) + for repo, name, tag, score in model_infos: + model_name_groups[(repo, name)] += score + table_data = [ + [name, repo, "*" if score > 0 else ""] + for (repo, name), score in model_name_groups.items() + ] + table = tabulate( + table_data, + headers=["model", "repo", "locally runnable"], + ).split("\n") + headers = f"{table[0]}\n {table[1]}" + + options.append(questionary.Separator(headers)) + for table_data, table_line in zip(table_data, table[2:]): + options.append(questionary.Choice(table_line, value=table_data[:2])) + selected = questionary.select("Select a model", options).ask() + if selected is None: + raise typer.Exit(1) + return selected + + +def _select_bento_version(models, target, bento_name, repo): + from tabulate import tabulate + + model_infos = [ + [model, can_run(model, target)] + for model in models + if model.name == bento_name and model.repo.name == repo + ] + + table_data = [ + [model.version, "yes" if score > 0 else ""] + for model, score in model_infos + if model.name == bento_name and model.repo.name == repo + ] + if not table_data: + output(f"No model found for {bento_name} in {repo}", level=20, style="red") + raise typer.Exit(1) + table = tabulate( + table_data, + headers=["version", "locally runnable"], + ).split("\n") + + options = [] + options.append(questionary.Separator(f"{table[0]}\n {table[1]}")) + for table_data, table_line in zip(model_infos, table[2:]): + options.append(questionary.Choice(table_line, value=table_data)) + selected = questionary.select("Select a version", options).ask() + if selected is None: + raise typer.Exit(1) + return selected + + +def _select_target(bento, targets): + from tabulate import tabulate + + options = [] + targets.sort(key=lambda x: can_run(bento, x), reverse=True) + if not targets: + output( + "No available instance type, check your bentocloud account", + level=20, + style="red", + ) + raise typer.Exit(1) + + table = tabulate( + [ + [ + target.name, + target.accelerators_repr, + target.price, + "" if can_run(bento, target) else "insufficient res.", + ] + for target in targets + ], + headers=["instance type", "accelerator", "price", "deployable"], + ).split("\n") + options.append(questionary.Separator(f"{table[0]}\n {table[1]}")) + + for target, line in zip(targets, table[2:]): + options.append( + questionary.Choice( + f"{line}", + value=target, + ) + ) + selected = questionary.select("Select an instance type", options).ask() + if selected is None: + raise typer.Exit(1) + return selected + + +def _select_action(bento, score): + if score > 0: + options = [ + questionary.Separator("Available actions"), + questionary.Separator("0. Run the model in terminal"), + questionary.Choice(f" $ openllm run {bento}", value="run"), + questionary.Separator(" "), + questionary.Separator("1. Serve the model locally and get a chat server"), + questionary.Choice(f" $ openllm serve {bento}", value="serve"), + questionary.Separator(" "), + questionary.Separator( + "2. Deploy the model to bentocloud and get a scalable chat server" + ), + questionary.Choice(f" $ openllm deploy {bento}", value="deploy"), + ] + else: + options = [ + questionary.Separator("Available actions"), + questionary.Separator("0. Run the model in terminal"), + questionary.Choice( + f" $ openllm run {bento}", + value="run", + disabled="insufficient resources", + shortcut_key="0", + ), + questionary.Separator(" "), + questionary.Separator("1. Serve the model locally and get a chat server"), + questionary.Choice( + f" $ openllm serve {bento}", + value="serve", + disabled="insufficient resources", + shortcut_key="1", + ), + questionary.Separator(" "), + questionary.Separator( + "2. Deploy the model to bentocloud and get a scalable chat server" + ), + questionary.Choice( + f" $ openllm deploy {bento}", + value="deploy", + shortcut_key="2", + ), + ] + action = questionary.select("Select an action", options).ask() + if action is None: + raise typer.Exit(1) + if action == "run": + local_run(bento) + elif action == "serve": + local_serve(bento) + elif action == "deploy": + ensure_cloud_context() + targets = get_cloud_machine_spec() + target = _select_target(bento, targets) + cloud_deploy(bento, target) + + +@app.command() +def hello(): + target = get_local_machine_spec() + output(f" Detected Platform: {target.platform}", style="green") + if target.accelerators: + output(" Detected Accelerators: ", style="green") + for a in target.accelerators: + output(f" - {a.model} {a.memory_size}GB", style="green") + else: + output(" Detected Accelerators: None", style="yellow") + + models = list_bento() + + bento_name, repo = _select_bento_name(models, target) + bento, score = _select_bento_version(models, target, bento_name, repo) + _select_action(bento, score) @app.command() def serve(model: Annotated[str, typer.Argument()] = ""): - bento, target = _pre_select(model) - if target and target.source == "local": - local_serve(bento) - else: - cloud_serve(bento, target) + target = get_local_machine_spec() + bento = _pick_bento(model, target) + local_serve(bento) @app.command() def run(model: Annotated[str, typer.Argument()] = ""): - bento, target = _pre_select(model) - if target and target.source == "local": - local_run(bento) - else: - cloud_run(bento, target) + target = get_local_machine_spec() + bento = _pick_bento(model, target) + local_run(bento) + + +@app.command() +def deploy(model: Annotated[str, typer.Argument()] = ""): + targets = get_cloud_machine_spec() def typer_callback(verbose: int = 0): @@ -125,6 +278,9 @@ def typer_callback(verbose: int = 0): def main(): + if sys.version_info < (3, 9): + output("Python 3.8 or higher is required", level=20, style="red") + sys.exit(1) app.callback()(typer_callback) app() diff --git a/openllm_next/accelerator_spec.py b/openllm_next/accelerator_spec.py index 3850b680..6c67c22d 100644 --- a/openllm_next/accelerator_spec.py +++ b/openllm_next/accelerator_spec.py @@ -21,6 +21,9 @@ class Accelerator(SimpleNamespace): def __eq__(self, other): return self.memory_size == other.memory_size + def __repr__(self): + return f"{self.model}({self.memory_size}GB)" + class Resource(SimpleNamespace): cpu: int @@ -93,7 +96,7 @@ def get_local_machine_spec(): memory_info = nvmlDeviceGetMemoryInfo(handle) accelerators.append( Accelerator( - name=name, memory_size=math.ceil(int(memory_info.total) / 1024**3) + model=name, memory_size=math.ceil(int(memory_info.total) / 1024**3) ) ) nvmlShutdown() diff --git a/openllm_next/cloud.py b/openllm_next/cloud.py index d2d85286..c7110ba2 100644 --- a/openllm_next/cloud.py +++ b/openllm_next/cloud.py @@ -13,6 +13,7 @@ from openllm_next.common import ( ERROR_STYLE, BentoInfo, DeploymentTarget, + output, run_command, ) @@ -62,14 +63,12 @@ def _get_deploy_cmd(bento: BentoInfo, target: typing.Optional[DeploymentTarget] return cmd, env, None -def _ensure_cloud_context(): +def ensure_cloud_context(): cmd = ["bentoml", "cloud", "current-context"] try: result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL) context = json.loads(result) - questionary.print( - f"BentoCloud already logged in: {context['endpoint']}", style="green" - ) + output(f" BentoCloud already logged in: {context['endpoint']}", style="green") except subprocess.CalledProcessError: action = questionary.select( "BentoCloud not logged in", @@ -79,10 +78,9 @@ def _ensure_cloud_context(): ], ).ask() if action is None: - questionary.print("Cancelled", style=ERROR_STYLE) raise typer.Exit(1) elif action == "get an account in two minutes": - questionary.print( + output( "Please visit https://cloud.bentoml.com to get your token", style="yellow", ) @@ -105,14 +103,13 @@ def _ensure_cloud_context(): ] try: result = subprocess.check_output(cmd) - questionary.print("Logged in successfully", style="green") + output(" Logged in successfully", style="green") except subprocess.CalledProcessError: - questionary.print("Failed to login", style=ERROR_STYLE) + output(" Failed to login", style=ERROR_STYLE) raise typer.Exit(1) def get_cloud_machine_spec(): - _ensure_cloud_context() cmd = ["bentoml", "deployment", "list-instance-types", "-o", "json"] try: result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL) @@ -137,14 +134,6 @@ def get_cloud_machine_spec(): def serve(bento: BentoInfo, target: DeploymentTarget): - _ensure_cloud_context() + ensure_cloud_context() cmd, env, cwd = _get_deploy_cmd(bento, target) run_command(cmd, env=env, cwd=cwd) - - -def run(bento: BentoInfo, target: DeploymentTarget): - questionary.print( - "`run` with bentocloud is not supported yet, please use `serve` instead", - style=ERROR_STYLE, - ) - raise typer.Exit(1) diff --git a/openllm_next/common.py b/openllm_next/common.py index ef5e348a..96a4f984 100644 --- a/openllm_next/common.py +++ b/openllm_next/common.py @@ -1,5 +1,7 @@ import functools import signal +import io +from collections import UserList import asyncio import hashlib import json @@ -55,7 +57,30 @@ class ContextVar(typing.Generic[T]): self._stack.pop() -VERBOSE_LEVEL = ContextVar(0) +VERBOSE_LEVEL = ContextVar(10) +INTERACTIVE = ContextVar(True) +FORCE = ContextVar(False) + + +def output(content, level=0, style=None): + if level >= VERBOSE_LEVEL.get(): + return + + if isinstance(content, (dict, list)): + import pyaml + + out = io.StringIO() + pyaml.pprint( + content, + dst=out, + sort_dicts=False, + sort_keys=False, + ) + questionary.print(out.getvalue(), style=style, end="") + out.close() + + if isinstance(content, str): + questionary.print(content, style=style) class Config(SimpleNamespace): @@ -202,6 +227,16 @@ class DeploymentTarget(SimpleNamespace): def __hash__(self): return hash(self.source) + @property + def accelerators_repr(self) -> str: + accs = {a.model for a in self.accelerators} + if len(accs) == 0: + return "null" + if len(accs) == 1: + a = self.accelerators[0] + return f"{a.model} x{len(self.accelerators)}" + return ", ".join((f"{a.model}" for a in self.accelerators)) + def run_command( cmd, diff --git a/openllm_next/local.py b/openllm_next/local.py index 713f03e4..3b231b2d 100644 --- a/openllm_next/local.py +++ b/openllm_next/local.py @@ -1,5 +1,4 @@ import asyncio -import signal import time import httpx diff --git a/openllm_next/model.py b/openllm_next/model.py index f40e01dc..8fd97e32 100644 --- a/openllm_next/model.py +++ b/openllm_next/model.py @@ -41,20 +41,26 @@ def list_(): ) -def list_bento(tag: typing.Optional[str] = None) -> typing.List[BentoInfo]: +def list_bento( + tag: typing.Optional[str] = None, + repo_name: typing.Optional[str] = None, + include_alias: bool = False, +) -> typing.List[BentoInfo]: ensure_repo_updated() if not tag: glob_pattern = "bentoml/bentos/*/*" elif ":" in tag: - repo_name, version = tag.split(":") - glob_pattern = f"bentoml/bentos/{repo_name}/{version}" + bento_name, version = tag.split(":") + glob_pattern = f"bentoml/bentos/{bento_name}/{version}" else: glob_pattern = f"bentoml/bentos/{tag}/*" model_list = [] config = load_config() - for repo_name, repo_url in config.repos.items(): - repo = parse_repo_url(repo_url, repo_name) + for _repo_name, repo_url in config.repos.items(): + if repo_name is not None and _repo_name != repo_name: + continue + repo = parse_repo_url(repo_url, _repo_name) for path in repo.path.glob(glob_pattern): if path.is_dir() and (path / "bento.yaml").exists(): model = BentoInfo( @@ -74,7 +80,7 @@ def list_bento(tag: typing.Optional[str] = None) -> typing.List[BentoInfo]: if model: model_list.append(model) model_list.sort(key=lambda x: x.tag) - if VERBOSE_LEVEL.get() <= 0: + if not include_alias: seen = set() model_list = [ x diff --git a/pyproject.toml b/pyproject.toml index a7adce3d..851f9219 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ dependencies = [ "pip_requirements_parser", "nvidia-ml-py", "dulwich", + "tabulate", ] [project.scripts]