chore: output level

This commit is contained in:
bojiang
2024-07-01 15:37:38 +08:00
parent a3f9aaf35d
commit 7baac978fe
8 changed files with 209 additions and 139 deletions

View File

@@ -1,26 +1,35 @@
from typing import Annotated, Optional
from collections import defaultdict
import random
import sys
import questionary
from collections import defaultdict
from typing import Annotated, Iterable, Optional
import questionary
import typer
import typer.core
from click import Context
from openllm_next.accelerator_spec import (
DeploymentTarget,
can_run,
get_local_machine_spec,
)
from openllm_next.cloud import app as cloud_app, ensure_cloud_context
from openllm_next.cloud import get_cloud_machine_spec
from openllm_next.cloud import deploy as cloud_deploy
from openllm_next.common import INTERACTIVE, VERBOSE_LEVEL, output, CHECKED
from openllm_next.cloud import ensure_cloud_context, get_cloud_machine_spec
from openllm_next.common import CHECKED, INTERACTIVE, VERBOSE_LEVEL, output
from openllm_next.local import run as local_run
from openllm_next.local import serve as local_serve
from openllm_next.model import app as model_app, ensure_bento
from openllm_next.model import list_bento
from openllm_next.model import app as model_app
from openllm_next.model import ensure_bento, list_bento
from openllm_next.repo import app as repo_app
class OrderedCommands(typer.core.TyperGroup):
def list_commands(self, _: Context) -> Iterable[str]:
return list(self.commands)
app = typer.Typer(
cls=OrderedCommands,
no_args_is_help=True,
help="`openllm hello` to get started. "
"OpenLLM is a CLI tool to manage and deploy open source LLMs and"
@@ -29,7 +38,6 @@ app = typer.Typer(
app.add_typer(repo_app, name="repo")
app.add_typer(model_app, name="model")
app.add_typer(cloud_app, name="cloud")
def _select_bento_name(models, target):
@@ -47,7 +55,7 @@ def _select_bento_name(models, target):
for (repo, name), score in model_name_groups.items()
]
if not table_data:
output("No model found", level=20, style="red")
output("No model found", style="red")
raise typer.Exit(1)
table = tabulate(
table_data,
@@ -79,7 +87,7 @@ def _select_bento_version(models, target, bento_name, repo):
if model.name == bento_name and model.repo.name == repo
]
if not table_data:
output(f"No model found for {bento_name} in {repo}", level=20, style="red")
output(f"No model found for {bento_name} in {repo}", style="red")
raise typer.Exit(1)
table = tabulate(
table_data,
@@ -104,7 +112,6 @@ def _select_target(bento, targets):
if not targets:
output(
"No available instance type, check your bentocloud account",
level=20,
style="red",
)
raise typer.Exit(1)
@@ -183,19 +190,35 @@ def _select_action(bento, score):
if action is None:
raise typer.Exit(1)
if action == "run":
local_run(bento)
try:
local_run(bento)
finally:
output(f"\nUse this command to run the action again:", style="green")
output(f" $ openllm run {bento}", style="orange")
elif action == "serve":
local_serve(bento)
try:
local_serve(bento)
finally:
output(f"\nUse this command to run the action again:", style="green")
output(f" $ openllm serve {bento}", style="orange")
elif action == "deploy":
ensure_cloud_context()
targets = get_cloud_machine_spec()
target = _select_target(bento, targets)
cloud_deploy(bento, target)
try:
cloud_deploy(bento, target)
finally:
output(f"\nUse this command to run the action again:", style="green")
output(
f" $ openllm deploy {bento} --instance-type {target.name}",
style="orange",
)
@app.command()
@app.command(help="get started interactively")
def hello():
INTERACTIVE.set(True)
VERBOSE_LEVEL.set(20)
target = get_local_machine_spec()
output(f" Detected Platform: {target.platform}", style="green")
@@ -213,35 +236,48 @@ def hello():
_select_action(bento, score)
@app.command()
@app.command(help="start an OpenAI API compatible chat server and chat in browser")
def serve(
model: Annotated[str, typer.Argument()] = "",
repo: Optional[str] = None,
port: int = 3000,
verbose: bool = False,
):
if verbose:
VERBOSE_LEVEL.set(20)
target = get_local_machine_spec()
bento = ensure_bento(model, target=target, repo_name=repo)
local_serve(bento, port=port)
@app.command()
@app.command(help="run the model and chat in terminal")
def run(
model: Annotated[str, typer.Argument()] = "",
repo: Optional[str] = None,
port: int = 3000,
port: Optional[int] = None,
timeout: int = 600,
verbose: bool = False,
):
if verbose:
VERBOSE_LEVEL.set(20)
target = get_local_machine_spec()
bento = ensure_bento(model, target=target, repo_name=repo)
if port is None:
port = random.randint(30000, 40000)
local_run(bento, port=port, timeout=timeout)
@app.command()
@app.command(
help="deploy an production-ready OpenAI API compatible chat server to bentocloud ($100 free credit)",
)
def deploy(
model: Annotated[str, typer.Argument()] = "",
instance_type: Optional[str] = None,
repo: Optional[str] = None,
verbose: bool = False,
):
if verbose:
VERBOSE_LEVEL.set(20)
bento = ensure_bento(model, repo_name=repo)
if instance_type is not None:
cloud_deploy(bento, DeploymentTarget(name=instance_type))
@@ -252,25 +288,21 @@ def deploy(
if not targets:
output(
"No available instance type, check your bentocloud account",
level=20,
style="red",
)
raise typer.Exit(1)
target = targets[0]
output(f"Recommended instance type: {target.name}", style="green")
output(
f"Recommended instance type: {target.name}",
style="green",
)
cloud_deploy(bento, target)
def typer_callback(verbose: int = 0):
if verbose:
VERBOSE_LEVEL.set(verbose)
def main():
if sys.version_info < (3, 9):
output("Python 3.8 or higher is required", level=20, style="red")
output("Python 3.8 or higher is required", style="red")
sys.exit(1)
app.callback()(typer_callback)
app()

View File

@@ -1,8 +1,8 @@
from __future__ import annotations
import typing
import functools
import math
import typing
from types import SimpleNamespace
import psutil

View File

@@ -3,13 +3,12 @@ import os
import pathlib
import shutil
import subprocess
import typing
import typer
from openllm_next.accelerator_spec import ACCELERATOR_SPECS
from openllm_next.common import (
ERROR_STYLE,
INTERACTIVE,
BentoInfo,
DeploymentTarget,
@@ -17,8 +16,6 @@ from openllm_next.common import (
run_command,
)
app = typer.Typer()
def _get_deploy_cmd(bento: BentoInfo, target: typing.Optional[DeploymentTarget] = None):
cmd = ["bentoml", "deploy", bento.tag]
@@ -32,7 +29,6 @@ def _get_deploy_cmd(bento: BentoInfo, target: typing.Optional[DeploymentTarget]
output(
f"This model requires the following environment variables to run: {repr(required_env_names)}",
style="yellow",
level=20,
)
for env_info in bento.bento_yaml.get("envs", []):
@@ -56,8 +52,7 @@ def _get_deploy_cmd(bento: BentoInfo, target: typing.Optional[DeploymentTarget]
if default == "":
output(
f"Environment variable {env_info['name']} is required but not provided",
style=ERROR_STYLE,
level=20,
style="red",
)
raise typer.Exit(1)
else:
@@ -123,7 +118,7 @@ def ensure_cloud_context():
result = subprocess.check_output(cmd)
output(" Logged in successfully", style="green")
except subprocess.CalledProcessError:
output(" Failed to login", style=ERROR_STYLE)
output(" Failed to login", style="red")
raise typer.Exit(1)
@@ -147,7 +142,7 @@ def get_cloud_machine_spec():
for it in instance_types
]
except (subprocess.CalledProcessError, json.JSONDecodeError):
output("Failed to get cloud instance types", style=ERROR_STYLE)
output("Failed to get cloud instance types", style="red")
return []

View File

@@ -1,23 +1,19 @@
import functools
import signal
import io
import asyncio
import functools
import hashlib
import io
import json
import os
import pathlib
import signal
import subprocess
import sys
import typing
from contextlib import contextmanager, asynccontextmanager
from contextlib import asynccontextmanager, contextmanager
from types import SimpleNamespace
import typer
ERROR_STYLE = "red"
SUCCESS_STYLE = "green"
CLLAMA_HOME = pathlib.Path.home() / ".openllm_next"
REPO_DIR = CLLAMA_HOME / "repos"
TEMP_DIR = CLLAMA_HOME / "temp"
@@ -56,7 +52,7 @@ class ContextVar(typing.Generic[T]):
self._stack.pop()
VERBOSE_LEVEL = ContextVar(20)
VERBOSE_LEVEL = ContextVar(10)
INTERACTIVE = ContextVar(False)
FORCE = ContextVar(False)
@@ -67,7 +63,7 @@ def output(content, level=0, style=None, end=None):
if level > VERBOSE_LEVEL.get():
return
if isinstance(content, (dict, list)):
if not isinstance(content, str):
import pyaml
out = io.StringIO()
@@ -86,7 +82,7 @@ def output(content, level=0, style=None, end=None):
class Config(SimpleNamespace):
repos: dict[str, str] = {
"default": "git+https://github.com/bojiang/openllm-repo@main"
"default": "git+https://github.com/bentoml/openllm-repo@main"
}
default_repo: str = "default"
@@ -115,13 +111,13 @@ class RepoInfo(SimpleNamespace):
def tolist(self):
if VERBOSE_LEVEL.get() <= 0:
return f"{self.name} ({self.url})"
if VERBOSE_LEVEL.get() <= 1:
if VERBOSE_LEVEL.get() <= 10:
return dict(
name=self.name,
url=self.url,
path=str(self.path),
)
if VERBOSE_LEVEL.get() <= 2:
if VERBOSE_LEVEL.get() <= 20:
return dict(
name=self.name,
url=self.url,
@@ -165,6 +161,10 @@ class BentoInfo(SimpleNamespace):
bento_file = self.path / "bento.yaml"
return yaml.safe_load(bento_file.read_text())
@functools.cached_property
def platforms(self) -> list[str]:
return self.bento_yaml["labels"].get("platforms", "linux").split(",")
@functools.cached_property
def pretty_yaml(self) -> dict:
def _pretty_routes(routes):
@@ -183,22 +183,36 @@ class BentoInfo(SimpleNamespace):
"apis": _pretty_routes(self.bento_yaml["schema"]["routes"]),
"resources": self.bento_yaml["services"][0]["config"]["resources"],
"envs": self.bento_yaml["envs"],
"platforms": self.platforms,
}
return pretty_yaml
return self.bento_yaml
@functools.cached_property
def pretty_accelerator(self) -> str:
from openllm_next.accelerator_spec import ACCELERATOR_SPECS
try:
resources = self.bento_yaml["services"][0]["config"]["resources"]
if resources["gpu"] > 0:
acc = ACCELERATOR_SPECS[resources["gpu_type"]]
return f"{acc.memory_size:.0f}GB x{resources['gpu']} ({acc.model})"
return ""
except KeyError:
return ""
def tolist(self):
verbose = VERBOSE_LEVEL.get()
if verbose <= 0:
return str(self)
if verbose <= 1:
if verbose <= 10:
return dict(
tag=self.tag,
repo=self.repo.tolist(),
path=str(self.path),
model_card=self.pretty_yaml,
)
if verbose <= 2:
if verbose <= 20:
return dict(
tag=self.tag,
repo=self.repo.tolist(),
@@ -207,6 +221,18 @@ class BentoInfo(SimpleNamespace):
)
class VenvSpec(SimpleNamespace):
python_version: str
python_packages: dict[str, str]
name_prefix = ""
def __hash__(self):
return md5(
# self.python_version,
*sorted(self.python_packages.values()),
)
class Accelerator(SimpleNamespace):
model: str
memory_size: float
@@ -291,7 +317,7 @@ def run_command(
env=env,
)
except subprocess.CalledProcessError:
output("Command failed", style=ERROR_STYLE)
output("Command failed", style="red")
raise typer.Exit(1)
@@ -349,7 +375,7 @@ async def async_run_command(
)
yield proc
except subprocess.CalledProcessError:
output("Command failed", style="red", level=20)
output("Command failed", style="red")
raise typer.Exit(1)
finally:
if proc:

View File

@@ -1,13 +1,14 @@
import asyncio
import time
import httpx
from openllm_next.common import (
BentoInfo,
run_command,
async_run_command,
stream_command_output,
output,
run_command,
stream_command_output,
)
from openllm_next.venv import ensure_venv
@@ -47,7 +48,7 @@ async def _run_model(
) as server_proc:
import bentoml
print("Model server started", server_proc.pid)
output(f"Model server started {server_proc.pid}")
stdout_streamer = None
stderr_streamer = None
@@ -86,14 +87,17 @@ async def _run_model(
while True:
try:
message = input("user: ")
if message == "":
output("empty message, please enter something", style="yellow")
continue
messages.append(dict(role="user", content=message))
print("assistant: ", end="")
output("assistant: ", end="", style="lightgreen")
assistant_message = ""
async for text in client.chat(messages=messages): # type: ignore
assistant_message += text
print(text, end="")
output(text, end="", style="lightgreen")
messages.append(dict(role="assistant", content=assistant_message))
print()
output("")
except KeyboardInterrupt:
break
output("\nStopping model server...", style="green")

View File

@@ -1,47 +1,72 @@
import collections
import typing
from typing import Optional
import pyaml
import tabulate
import typer
from openllm_next.accelerator_spec import DeploymentTarget, can_run
from openllm_next.common import (
FORCE,
VERBOSE_LEVEL,
BentoInfo,
load_config,
output,
FORCE,
)
from openllm_next.repo import ensure_repo_updated, parse_repo_url
from openllm_next.accelerator_spec import can_run, DeploymentTarget
app = typer.Typer()
app = typer.Typer(
no_args_is_help=True,
help="manage models",
)
@app.command()
def get(tag: str, repo: Optional[str] = None):
def get(
tag: str,
repo: Optional[str] = None,
verbose: bool = False,
):
if verbose:
VERBOSE_LEVEL.set(20)
bento_info = ensure_bento(tag, repo_name=repo)
if bento_info:
with VERBOSE_LEVEL.patch(1):
pyaml.pprint(
bento_info,
sort_dicts=False,
sort_keys=False,
)
output(bento_info)
@app.command(name="list")
def list_(repo: Optional[str] = None):
bentos = list_bento(repo_name=repo)
output: dict[str, list[str]] = collections.defaultdict(list)
for bento in bentos:
output[bento.name].append(bento.version)
pyaml.pprint(
output,
sort_dicts=False,
sort_keys=False,
def list_(
tag: Optional[str] = None,
repo: Optional[str] = None,
verbose: bool = False,
):
if verbose:
VERBOSE_LEVEL.set(20)
bentos = list_bento(tag=tag, repo_name=repo)
bentos.sort(key=lambda x: x.name)
seen = set()
def is_seen(value):
if value in seen:
return True
seen.add(value)
return False
table = tabulate.tabulate(
[
[
"" if is_seen(bento.name) else bento.name,
bento.tag,
bento.repo.name,
bento.pretty_accelerator,
",".join(bento.platforms),
]
for bento in bentos
],
headers=["model", "version", "repo", "accelerators", "platforms"],
)
output(table)
def ensure_bento(
@@ -51,47 +76,45 @@ def ensure_bento(
) -> BentoInfo:
bentos = list_bento(model, repo_name=repo_name)
if len(bentos) == 0:
output(f"No model found for {model}", level=20, style="red")
output(f"No model found for {model}", style="red")
raise typer.Exit(1)
if len(bentos) == 1:
if FORCE.get():
output(f"Found model {bentos[0]}", level=10, style="green")
output(f"Found model {bentos[0]}", style="green")
return bentos[0]
if target is None:
return bentos[0]
if can_run(bentos[0], target) <= 0:
return bentos[0]
output(f"Found model {bentos[0]}", level=10, style="green")
output(f"Found model {bentos[0]}", style="green")
return bentos[0]
if target is None:
output(
f"Multiple models match {model}, did you mean one of these?",
level=20,
style="red",
)
for bento in bentos:
output(f" {bento}", level=20)
output(f" {bento}")
raise typer.Exit(1)
filtered = [bento for bento in bentos if can_run(bento, target) > 0]
if len(filtered) == 0:
output(f"No deployment target found for {model}", level=20, style="red")
output(f"No deployment target found for {model}", style="red")
raise typer.Exit(1)
if len(filtered) == 0:
output(f"No deployment target found for {model}", level=20, style="red")
output(f"No deployment target found for {model}", style="red")
raise typer.Exit(1)
if len(bentos) > 1:
output(
f"Multiple models match {model}, did you mean one of these?",
level=20,
style="red",
)
for bento in bentos:
output(f" {bento}", level=20)
output(f" {bento}")
raise typer.Exit(1)
return bentos[0]
@@ -107,11 +130,9 @@ def list_bento(
if repo_name is not None:
config = load_config()
if repo_name not in config.repos:
output(
f"Repo `{repo_name}` not found, did you mean one of these?", level=20
)
output(f"Repo `{repo_name}` not found, did you mean one of these?")
for repo_name in config.repos:
output(f" {repo_name}", level=20)
output(f" {repo_name}")
raise typer.Exit(1)
if not tag:

View File

@@ -7,23 +7,27 @@ import questionary
import typer
from openllm_next.common import (
ERROR_STYLE,
INTERACTIVE,
REPO_DIR,
SUCCESS_STYLE,
VERBOSE_LEVEL,
RepoInfo,
load_config,
save_config,
output,
save_config,
)
UPDATE_INTERVAL = datetime.timedelta(days=3)
app = typer.Typer()
app = typer.Typer(
no_args_is_help=True,
help="manage repos",
)
@app.command()
def list():
def list(verbose: bool = False):
if verbose:
VERBOSE_LEVEL.set(20)
config = load_config()
pyaml.pprint(
[parse_repo_url(repo, name) for name, repo in config.repos.items()],
@@ -36,12 +40,12 @@ def list():
def remove(name: str):
config = load_config()
if name not in config.repos:
output(f"Repo {name} does not exist", style=ERROR_STYLE)
output(f"Repo {name} does not exist", style="red")
return
del config.repos[name]
save_config(config)
output(f"Repo {name} removed", style=SUCCESS_STYLE)
output(f"Repo {name} removed", style="green")
@app.command()
@@ -68,10 +72,10 @@ def update():
branch=repo.branch,
)
output("")
output(f"Repo `{repo.name}` updated", style=SUCCESS_STYLE)
output(f"Repo `{repo.name}` updated", style="green")
except:
shutil.rmtree(repo.path, ignore_errors=True)
output(f"Failed to clone repo {repo.name}", style=ERROR_STYLE)
output(f"Failed to clone repo {repo.name}", style="red")
else:
try:
import dulwich.porcelain
@@ -84,10 +88,10 @@ def update():
)
dulwich.porcelain.clean(str(repo.path), str(repo.path))
output("")
output(f"Repo `{repo.name}` updated", style=SUCCESS_STYLE)
output(f"Repo `{repo.name}` updated", style="green")
except:
shutil.rmtree(repo.path, ignore_errors=True)
output(f"Failed to update repo {repo.name}", style=ERROR_STYLE)
output(f"Failed to update repo {repo.name}", style="red")
for c in REPO_DIR.glob("*/*/*"):
if tuple(c.parts[-3:]) not in repos_in_use:
shutil.rmtree(c, ignore_errors=True)
@@ -110,7 +114,6 @@ def ensure_repo_updated():
output(
"The repo cache is never updated, please run `openllm repo update` to fetch the latest model list",
style="red",
level=20,
)
raise typer.Exit(1)
last_update = datetime.datetime.fromisoformat(last_update_file.read_text().strip())
@@ -125,7 +128,6 @@ def ensure_repo_updated():
output(
"The repo cache is outdated, please run `openllm repo update` to fetch the latest model list",
style="yellow",
level=10,
)
@@ -137,11 +139,11 @@ GIT_REPO_RE = re.compile(
def parse_repo_url(repo_url, repo_name=None) -> RepoInfo:
"""
parse the git repo url to server, owner, repo name, branch
>>> parse_repo_url("git+https://github.com/bojiang/bentovllm@main")
('github.com', 'bojiang', 'bentovllm', 'main')
>>> parse_repo_url("git+https://github.com/bentoml/bentovllm@main")
('github.com', 'bentoml', 'bentovllm', 'main')
>>> parse_repo_url("git+https://github.com/bojiang/bentovllm")
('github.com', 'bojiang', 'bentovllm', 'main')
>>> parse_repo_url("git+https://github.com/bentoml/bentovllm")
('github.com', 'bentoml', 'bentovllm', 'main')
"""
match = GIT_REPO_RE.match(repo_url)
if not match:
@@ -168,7 +170,7 @@ def add(name: str, repo: str):
if not name.isidentifier():
output(
f"Invalid repo name: {name}, should only contain letters, numbers and underscores",
style=ERROR_STYLE,
style="red",
)
return
@@ -182,7 +184,7 @@ def add(name: str, repo: str):
config.repos[name] = repo
save_config(config)
output(f"Repo {name} added", style=SUCCESS_STYLE)
output(f"Repo {name} added", style="green")
if __name__ == "__main__":

View File

@@ -1,8 +1,7 @@
import functools
import typing
import pathlib
import shutil
from types import SimpleNamespace
import typing
from typing import Iterable
import typer
@@ -11,9 +10,9 @@ from openllm_next.common import (
VENV_DIR,
VERBOSE_LEVEL,
BentoInfo,
md5,
run_command,
VenvSpec,
output,
run_command,
)
@@ -43,18 +42,6 @@ def _resolve_packages(requirement: typing.Union[pathlib.Path, str]) -> dict[str,
return deps
class EnvSpec(SimpleNamespace):
python_version: str
python_packages: dict[str, str]
name_prefix = ""
def __hash__(self):
return md5(
# self.python_version,
*sorted(self.python_packages.values()),
)
@functools.lru_cache
def _resolve_bento_env_specs(bento: BentoInfo):
ver_file = bento.path / "env" / "python" / "version.txt"
@@ -71,12 +58,12 @@ def _resolve_bento_env_specs(bento: BentoInfo):
}
ver = ver_file.read_text().strip()
return (
EnvSpec(
VenvSpec(
python_version=ver,
python_packages=preheat_packages,
name_prefix=f"{bento.tag.replace(':', '_')}-1-",
),
EnvSpec(
VenvSpec(
python_version=ver,
python_packages=python_packages,
name_prefix=f"{bento.tag.replace(':', '_')}-2-",
@@ -85,7 +72,7 @@ def _resolve_bento_env_specs(bento: BentoInfo):
def _ensure_venv(
env_spec: EnvSpec,
env_spec: VenvSpec,
parrent_venv: typing.Optional[pathlib.Path] = None,
) -> pathlib.Path:
venv = VENV_DIR / str(hash(env_spec))
@@ -94,7 +81,10 @@ def _ensure_venv(
if not venv.exists():
output(f"Installing model dependencies({venv})...", style="green")
try:
run_command(["python", "-m", "venv", venv], silent=VERBOSE_LEVEL.get() < 1)
run_command(
["python", "-m", "venv", venv],
silent=VERBOSE_LEVEL.get() < 10,
)
pyver = next(venv.glob("lib/python*")).name
if parrent_venv is not None:
with open(
@@ -113,7 +103,7 @@ def _ensure_venv(
"--upgrade-strategy",
"only-if-needed",
],
silent=VERBOSE_LEVEL.get() < 1,
silent=VERBOSE_LEVEL.get() < 10,
)
run_command(
[
@@ -124,7 +114,7 @@ def _ensure_venv(
"only-if-needed",
"--upgrade",
],
silent=VERBOSE_LEVEL.get() < 1,
silent=VERBOSE_LEVEL.get() < 10,
)
with open(venv / "DONE", "w") as f:
f.write("DONE")
@@ -141,7 +131,7 @@ def _ensure_venv(
return venv
def _ensure_venvs(env_spec_list: Iterable[EnvSpec]) -> pathlib.Path:
def _ensure_venvs(env_spec_list: Iterable[VenvSpec]) -> pathlib.Path:
last_venv = None
for env_spec in env_spec_list:
last_venv = _ensure_venv(env_spec, last_venv)