refactor: openllm hello

This commit is contained in:
bojiang
2024-06-25 16:52:42 +08:00
parent effac3d348
commit 20e6ed26b3
7 changed files with 295 additions and 106 deletions

View File

@@ -1,6 +1,8 @@
from typing import Annotated
from typing import Annotated, Optional
from collections import defaultdict
import sys
import questionary
import typer
from openllm_next.accelerator_spec import (
@@ -8,11 +10,10 @@ from openllm_next.accelerator_spec import (
can_run,
get_local_machine_spec,
)
from openllm_next.cloud import app as cloud_app
from openllm_next.cloud import app as cloud_app, ensure_cloud_context
from openllm_next.cloud import get_cloud_machine_spec
from openllm_next.cloud import run as cloud_run
from openllm_next.cloud import serve as cloud_serve
from openllm_next.common import VERBOSE_LEVEL, BentoInfo
from openllm_next.cloud import serve as cloud_deploy
from openllm_next.common import VERBOSE_LEVEL, BentoInfo, FORCE, output
from openllm_next.local import run as local_run
from openllm_next.local import serve as local_serve
from openllm_next.model import app as model_app
@@ -26,97 +27,249 @@ app.add_typer(model_app, name="model")
app.add_typer(cloud_app, name="cloud")
def _pre_select(model: str) -> tuple[BentoInfo, DeploymentTarget]:
def _pick_bento(model: str, target: Optional[DeploymentTarget] = None) -> BentoInfo:
bentos = list_bento(model)
if len(bentos) == 0:
typer.echo(f"No model found for {model}", err=True)
output(f"No model found for {model}", level=20, style="red")
raise typer.Exit(1)
local = get_local_machine_spec()
if len(bentos) == 1:
bento = bentos[0]
if can_run(bento, local) <= 0:
questionary.print(
f"No deployment target found for {bento.name}:{bento.version}",
style="red",
)
raise typer.Exit(1)
return bento, local
if FORCE.get():
output(f"Found model {bentos[0]}", level=10, style="green")
return bentos[0]
if target is None:
return bentos[0]
if can_run(bentos[0], target) <= 0:
return bentos[0]
output(f"Found model {bentos[0]}", level=10, style="green")
return bentos[0]
choices = []
choices += [questionary.Separator("Local available models")]
choices += [
questionary.Choice(
f" {bento.name}:{bento.version}",
(bento, local),
if target is None:
output(
f"Multiple models match {model}, did you mean one of these?",
level=20,
style="red",
)
for bento in bentos
if can_run(bento) > 0
]
choices += [questionary.Separator("Cloud available models")]
choices += [
questionary.Choice(
f" {bento.name}:{bento.version}",
(bento, None),
)
for bento in bentos
]
choosen: tuple[BentoInfo, DeploymentTarget] = questionary.select(
"Select a model to run",
choices=choices,
).ask()
if not choosen:
questionary.print("No model selected", style="red")
for bento in bentos:
output(f" {bento}", level=20, style="red")
raise typer.Exit(1)
bento, target = choosen
if target is None:
cloud_targets = get_cloud_machine_spec()
cloud_targets = [
target for target in cloud_targets if can_run(bento, target) > 0
]
if len(cloud_targets) == 0:
questionary.print(
f"No suitable instance type found for {bento.name}:{bento.version}",
style="red",
)
raise typer.Exit(1)
target = questionary.select(
"Select a cloud target",
choices=[
questionary.Choice(
f" {target.name}",
target,
)
for target in cloud_targets
],
).ask()
if not target:
questionary.print("No target selected", style="red")
raise typer.Exit(1)
filtered = [bento for bento in bentos if can_run(bento, target) > 0]
if len(filtered) == 0:
output(f"No deployment target found for {model}", level=20, style="red")
raise typer.Exit(1)
return bento, target
if len(filtered) == 0:
output(f"No deployment target found for {model}", level=20, style="red")
raise typer.Exit(1)
if len(bentos) > 1:
output(
f"Multiple models match {model}, did you mean one of these?",
level=20,
style="red",
)
for bento in bentos:
output(f" {bento}", level=20, style="red")
raise typer.Exit(1)
return bentos[0]
def _select_bento_name(models, target):
from tabulate import tabulate
options = []
model_infos = [
[model.repo.name, model.name, model.tag, can_run(model, target)]
for model in models
]
model_name_groups = defaultdict(lambda: 0)
for repo, name, tag, score in model_infos:
model_name_groups[(repo, name)] += score
table_data = [
[name, repo, "*" if score > 0 else ""]
for (repo, name), score in model_name_groups.items()
]
table = tabulate(
table_data,
headers=["model", "repo", "locally runnable"],
).split("\n")
headers = f"{table[0]}\n {table[1]}"
options.append(questionary.Separator(headers))
for table_data, table_line in zip(table_data, table[2:]):
options.append(questionary.Choice(table_line, value=table_data[:2]))
selected = questionary.select("Select a model", options).ask()
if selected is None:
raise typer.Exit(1)
return selected
def _select_bento_version(models, target, bento_name, repo):
from tabulate import tabulate
model_infos = [
[model, can_run(model, target)]
for model in models
if model.name == bento_name and model.repo.name == repo
]
table_data = [
[model.version, "yes" if score > 0 else ""]
for model, score in model_infos
if model.name == bento_name and model.repo.name == repo
]
if not table_data:
output(f"No model found for {bento_name} in {repo}", level=20, style="red")
raise typer.Exit(1)
table = tabulate(
table_data,
headers=["version", "locally runnable"],
).split("\n")
options = []
options.append(questionary.Separator(f"{table[0]}\n {table[1]}"))
for table_data, table_line in zip(model_infos, table[2:]):
options.append(questionary.Choice(table_line, value=table_data))
selected = questionary.select("Select a version", options).ask()
if selected is None:
raise typer.Exit(1)
return selected
def _select_target(bento, targets):
from tabulate import tabulate
options = []
targets.sort(key=lambda x: can_run(bento, x), reverse=True)
if not targets:
output(
"No available instance type, check your bentocloud account",
level=20,
style="red",
)
raise typer.Exit(1)
table = tabulate(
[
[
target.name,
target.accelerators_repr,
target.price,
"" if can_run(bento, target) else "insufficient res.",
]
for target in targets
],
headers=["instance type", "accelerator", "price", "deployable"],
).split("\n")
options.append(questionary.Separator(f"{table[0]}\n {table[1]}"))
for target, line in zip(targets, table[2:]):
options.append(
questionary.Choice(
f"{line}",
value=target,
)
)
selected = questionary.select("Select an instance type", options).ask()
if selected is None:
raise typer.Exit(1)
return selected
def _select_action(bento, score):
if score > 0:
options = [
questionary.Separator("Available actions"),
questionary.Separator("0. Run the model in terminal"),
questionary.Choice(f" $ openllm run {bento}", value="run"),
questionary.Separator(" "),
questionary.Separator("1. Serve the model locally and get a chat server"),
questionary.Choice(f" $ openllm serve {bento}", value="serve"),
questionary.Separator(" "),
questionary.Separator(
"2. Deploy the model to bentocloud and get a scalable chat server"
),
questionary.Choice(f" $ openllm deploy {bento}", value="deploy"),
]
else:
options = [
questionary.Separator("Available actions"),
questionary.Separator("0. Run the model in terminal"),
questionary.Choice(
f" $ openllm run {bento}",
value="run",
disabled="insufficient resources",
shortcut_key="0",
),
questionary.Separator(" "),
questionary.Separator("1. Serve the model locally and get a chat server"),
questionary.Choice(
f" $ openllm serve {bento}",
value="serve",
disabled="insufficient resources",
shortcut_key="1",
),
questionary.Separator(" "),
questionary.Separator(
"2. Deploy the model to bentocloud and get a scalable chat server"
),
questionary.Choice(
f" $ openllm deploy {bento}",
value="deploy",
shortcut_key="2",
),
]
action = questionary.select("Select an action", options).ask()
if action is None:
raise typer.Exit(1)
if action == "run":
local_run(bento)
elif action == "serve":
local_serve(bento)
elif action == "deploy":
ensure_cloud_context()
targets = get_cloud_machine_spec()
target = _select_target(bento, targets)
cloud_deploy(bento, target)
@app.command()
def hello():
target = get_local_machine_spec()
output(f" Detected Platform: {target.platform}", style="green")
if target.accelerators:
output(" Detected Accelerators: ", style="green")
for a in target.accelerators:
output(f" - {a.model} {a.memory_size}GB", style="green")
else:
output(" Detected Accelerators: None", style="yellow")
models = list_bento()
bento_name, repo = _select_bento_name(models, target)
bento, score = _select_bento_version(models, target, bento_name, repo)
_select_action(bento, score)
@app.command()
def serve(model: Annotated[str, typer.Argument()] = ""):
bento, target = _pre_select(model)
if target and target.source == "local":
local_serve(bento)
else:
cloud_serve(bento, target)
target = get_local_machine_spec()
bento = _pick_bento(model, target)
local_serve(bento)
@app.command()
def run(model: Annotated[str, typer.Argument()] = ""):
bento, target = _pre_select(model)
if target and target.source == "local":
local_run(bento)
else:
cloud_run(bento, target)
target = get_local_machine_spec()
bento = _pick_bento(model, target)
local_run(bento)
@app.command()
def deploy(model: Annotated[str, typer.Argument()] = ""):
targets = get_cloud_machine_spec()
def typer_callback(verbose: int = 0):
@@ -125,6 +278,9 @@ def typer_callback(verbose: int = 0):
def main():
if sys.version_info < (3, 9):
output("Python 3.8 or higher is required", level=20, style="red")
sys.exit(1)
app.callback()(typer_callback)
app()

View File

@@ -21,6 +21,9 @@ class Accelerator(SimpleNamespace):
def __eq__(self, other):
return self.memory_size == other.memory_size
def __repr__(self):
return f"{self.model}({self.memory_size}GB)"
class Resource(SimpleNamespace):
cpu: int
@@ -93,7 +96,7 @@ def get_local_machine_spec():
memory_info = nvmlDeviceGetMemoryInfo(handle)
accelerators.append(
Accelerator(
name=name, memory_size=math.ceil(int(memory_info.total) / 1024**3)
model=name, memory_size=math.ceil(int(memory_info.total) / 1024**3)
)
)
nvmlShutdown()

View File

@@ -13,6 +13,7 @@ from openllm_next.common import (
ERROR_STYLE,
BentoInfo,
DeploymentTarget,
output,
run_command,
)
@@ -62,14 +63,12 @@ def _get_deploy_cmd(bento: BentoInfo, target: typing.Optional[DeploymentTarget]
return cmd, env, None
def _ensure_cloud_context():
def ensure_cloud_context():
cmd = ["bentoml", "cloud", "current-context"]
try:
result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
context = json.loads(result)
questionary.print(
f"BentoCloud already logged in: {context['endpoint']}", style="green"
)
output(f" BentoCloud already logged in: {context['endpoint']}", style="green")
except subprocess.CalledProcessError:
action = questionary.select(
"BentoCloud not logged in",
@@ -79,10 +78,9 @@ def _ensure_cloud_context():
],
).ask()
if action is None:
questionary.print("Cancelled", style=ERROR_STYLE)
raise typer.Exit(1)
elif action == "get an account in two minutes":
questionary.print(
output(
"Please visit https://cloud.bentoml.com to get your token",
style="yellow",
)
@@ -105,14 +103,13 @@ def _ensure_cloud_context():
]
try:
result = subprocess.check_output(cmd)
questionary.print("Logged in successfully", style="green")
output(" Logged in successfully", style="green")
except subprocess.CalledProcessError:
questionary.print("Failed to login", style=ERROR_STYLE)
output(" Failed to login", style=ERROR_STYLE)
raise typer.Exit(1)
def get_cloud_machine_spec():
_ensure_cloud_context()
cmd = ["bentoml", "deployment", "list-instance-types", "-o", "json"]
try:
result = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
@@ -137,14 +134,6 @@ def get_cloud_machine_spec():
def serve(bento: BentoInfo, target: DeploymentTarget):
_ensure_cloud_context()
ensure_cloud_context()
cmd, env, cwd = _get_deploy_cmd(bento, target)
run_command(cmd, env=env, cwd=cwd)
def run(bento: BentoInfo, target: DeploymentTarget):
questionary.print(
"`run` with bentocloud is not supported yet, please use `serve` instead",
style=ERROR_STYLE,
)
raise typer.Exit(1)

View File

@@ -1,5 +1,7 @@
import functools
import signal
import io
from collections import UserList
import asyncio
import hashlib
import json
@@ -55,7 +57,30 @@ class ContextVar(typing.Generic[T]):
self._stack.pop()
VERBOSE_LEVEL = ContextVar(0)
VERBOSE_LEVEL = ContextVar(10)
INTERACTIVE = ContextVar(True)
FORCE = ContextVar(False)
def output(content, level=0, style=None):
if level >= VERBOSE_LEVEL.get():
return
if isinstance(content, (dict, list)):
import pyaml
out = io.StringIO()
pyaml.pprint(
content,
dst=out,
sort_dicts=False,
sort_keys=False,
)
questionary.print(out.getvalue(), style=style, end="")
out.close()
if isinstance(content, str):
questionary.print(content, style=style)
class Config(SimpleNamespace):
@@ -202,6 +227,16 @@ class DeploymentTarget(SimpleNamespace):
def __hash__(self):
return hash(self.source)
@property
def accelerators_repr(self) -> str:
accs = {a.model for a in self.accelerators}
if len(accs) == 0:
return "null"
if len(accs) == 1:
a = self.accelerators[0]
return f"{a.model} x{len(self.accelerators)}"
return ", ".join((f"{a.model}" for a in self.accelerators))
def run_command(
cmd,

View File

@@ -1,5 +1,4 @@
import asyncio
import signal
import time
import httpx

View File

@@ -41,20 +41,26 @@ def list_():
)
def list_bento(tag: typing.Optional[str] = None) -> typing.List[BentoInfo]:
def list_bento(
tag: typing.Optional[str] = None,
repo_name: typing.Optional[str] = None,
include_alias: bool = False,
) -> typing.List[BentoInfo]:
ensure_repo_updated()
if not tag:
glob_pattern = "bentoml/bentos/*/*"
elif ":" in tag:
repo_name, version = tag.split(":")
glob_pattern = f"bentoml/bentos/{repo_name}/{version}"
bento_name, version = tag.split(":")
glob_pattern = f"bentoml/bentos/{bento_name}/{version}"
else:
glob_pattern = f"bentoml/bentos/{tag}/*"
model_list = []
config = load_config()
for repo_name, repo_url in config.repos.items():
repo = parse_repo_url(repo_url, repo_name)
for _repo_name, repo_url in config.repos.items():
if repo_name is not None and _repo_name != repo_name:
continue
repo = parse_repo_url(repo_url, _repo_name)
for path in repo.path.glob(glob_pattern):
if path.is_dir() and (path / "bento.yaml").exists():
model = BentoInfo(
@@ -74,7 +80,7 @@ def list_bento(tag: typing.Optional[str] = None) -> typing.List[BentoInfo]:
if model:
model_list.append(model)
model_list.sort(key=lambda x: x.tag)
if VERBOSE_LEVEL.get() <= 0:
if not include_alias:
seen = set()
model_list = [
x

View File

@@ -18,6 +18,7 @@ dependencies = [
"pip_requirements_parser",
"nvidia-ml-py",
"dulwich",
"tabulate",
]
[project.scripts]