Files
OpenLLM/openllm_next/__main__.py
2024-07-04 02:14:29 +08:00

339 lines
10 KiB
Python

import os
import random
import sys
from collections import defaultdict
from typing import Annotated, Optional
import questionary
import typer
from openllm_next.accelerator_spec import (
DeploymentTarget,
can_run,
get_local_machine_spec,
)
from openllm_next.analytic import DO_NOT_TRACK, OpenLLMTyper
from openllm_next.clean import app as clean_app
from openllm_next.cloud import deploy as cloud_deploy
from openllm_next.cloud import ensure_cloud_context, get_cloud_machine_spec
from openllm_next.common import CHECKED, INTERACTIVE, VERBOSE_LEVEL, output
from openllm_next.local import run as local_run
from openllm_next.local import serve as local_serve
from openllm_next.model import app as model_app
from openllm_next.model import ensure_bento, list_bento
from openllm_next.repo import app as repo_app
app = OpenLLMTyper(
help="`openllm hello` to get started. "
"OpenLLM is a CLI tool to manage and deploy open source LLMs and"
" get an OpenAI API compatible chat server in seconds.",
)
app.add_typer(repo_app, name="repo")
app.add_typer(model_app, name="model")
app.add_typer(clean_app, name="clean")
def _select_bento_name(models, target):
from tabulate import tabulate
options = []
model_infos = [
[model.repo.name, model.name, can_run(model, target)] for model in models
]
model_name_groups = defaultdict(lambda: 0)
for repo, name, score in model_infos:
model_name_groups[(repo, name)] += score
table_data = [
[name, repo, CHECKED if score > 0 else ""]
for (repo, name), score in model_name_groups.items()
]
if not table_data:
output("No model found", style="red")
raise typer.Exit(1)
table = tabulate(
table_data,
headers=["model", "repo", "locally runnable"],
).split("\n")
headers = f"{table[0]}\n {table[1]}"
options.append(questionary.Separator(headers))
for table_data, table_line in zip(table_data, table[2:]):
options.append(questionary.Choice(table_line, value=table_data[:2]))
selected = questionary.select("Select a model", options).ask()
if selected is None:
raise typer.Exit(1)
return selected
def _select_bento_version(models, target, bento_name, repo):
from tabulate import tabulate
model_infos = [
[model, can_run(model, target)]
for model in models
if model.name == bento_name and model.repo.name == repo
]
table_data = [
[model.tag, CHECKED if score > 0 else ""]
for model, score in model_infos
if model.name == bento_name and model.repo.name == repo
]
if not table_data:
output(f"No model found for {bento_name} in {repo}", style="red")
raise typer.Exit(1)
table = tabulate(
table_data,
headers=["version", "locally runnable"],
).split("\n")
options = []
options.append(questionary.Separator(f"{table[0]}\n {table[1]}"))
for table_data, table_line in zip(model_infos, table[2:]):
options.append(questionary.Choice(table_line, value=table_data))
selected = questionary.select("Select a version", options).ask()
if selected is None:
raise typer.Exit(1)
return selected
def _select_target(bento, targets):
from tabulate import tabulate
options = []
targets.sort(key=lambda x: can_run(bento, x), reverse=True)
if not targets:
output(
"No available instance type, check your bentocloud account",
style="red",
)
raise typer.Exit(1)
table = tabulate(
[
[
target.name,
target.accelerators_repr,
f"${target.price}",
CHECKED if can_run(bento, target) else "insufficient res.",
]
for target in targets
],
headers=["instance type", "accelerator", "price/hr", "deployable"],
).split("\n")
options.append(questionary.Separator(f"{table[0]}\n {table[1]}"))
for target, line in zip(targets, table[2:]):
options.append(
questionary.Choice(
f"{line}",
value=target,
)
)
selected = questionary.select("Select an instance type", options).ask()
if selected is None:
raise typer.Exit(1)
return selected
def _select_action(bento, score):
if score > 0:
options = [
questionary.Separator("Available actions"),
questionary.Choice(
"0. Run the model in terminal",
value="run",
shortcut_key="0",
),
questionary.Separator(f" $ openllm run {bento}"),
questionary.Separator(" "),
questionary.Choice(
"1. Serve the model locally and get a chat server",
value="serve",
shortcut_key="1",
),
questionary.Separator(f" $ openllm serve {bento}"),
questionary.Separator(" "),
questionary.Choice(
"2. Deploy the model to bentocloud and get a scalable chat server",
value="deploy",
shortcut_key="2",
),
questionary.Separator(f" $ openllm deploy {bento}"),
]
else:
options = [
questionary.Separator("Available actions"),
questionary.Choice(
"0. Run the model in terminal",
value="run",
disabled="insufficient res.",
shortcut_key="0",
),
questionary.Separator(f" $ openllm run {bento}"),
questionary.Separator(" "),
questionary.Choice(
"1. Serve the model locally and get a chat server",
value="serve",
disabled="insufficient res.",
shortcut_key="1",
),
questionary.Separator(f" $ openllm serve {bento}"),
questionary.Separator(" "),
questionary.Choice(
"2. Deploy the model to bentocloud and get a scalable chat server",
value="deploy",
shortcut_key="2",
),
questionary.Separator(f" $ openllm deploy {bento}"),
]
action = questionary.select("Select an action", options).ask()
if action is None:
raise typer.Exit(1)
if action == "run":
try:
local_run(bento)
finally:
output("\nUse this command to run the action again:", style="green")
output(f" $ openllm run {bento}", style="orange")
elif action == "serve":
try:
local_serve(bento)
finally:
output("\nUse this command to run the action again:", style="green")
output(f" $ openllm serve {bento}", style="orange")
elif action == "deploy":
ensure_cloud_context()
targets = get_cloud_machine_spec()
target = _select_target(bento, targets)
try:
cloud_deploy(bento, target)
finally:
output("\nUse this command to run the action again:", style="green")
output(
f" $ openllm deploy {bento} --instance-type {target.name}",
style="orange",
)
@app.command(help="get started interactively")
def hello():
INTERACTIVE.set(True)
VERBOSE_LEVEL.set(20)
target = get_local_machine_spec()
output(f" Detected Platform: {target.platform}", style="green")
if target.accelerators:
output(" Detected Accelerators: ", style="green")
for a in target.accelerators:
output(f" - {a.model} {a.memory_size}GB", style="green")
else:
output(" Detected Accelerators: None", style="yellow")
models = list_bento()
if not models:
output(
"No model found, you probably need to update the model repo:",
style="red",
)
output(
" $ openllm repo update",
style="orange",
)
raise typer.Exit(1)
bento_name, repo = _select_bento_name(models, target)
bento, score = _select_bento_version(models, target, bento_name, repo)
_select_action(bento, score)
@app.command(help="start an OpenAI API compatible chat server and chat in browser")
def serve(
model: Annotated[str, typer.Argument()] = "",
repo: Optional[str] = None,
port: int = 3000,
verbose: bool = False,
):
if verbose:
VERBOSE_LEVEL.set(20)
target = get_local_machine_spec()
bento = ensure_bento(model, target=target, repo_name=repo)
local_serve(bento, port=port)
@app.command(help="run the model and chat in terminal")
def run(
model: Annotated[str, typer.Argument()] = "",
repo: Optional[str] = None,
port: Optional[int] = None,
timeout: int = 600,
verbose: bool = False,
):
if verbose:
VERBOSE_LEVEL.set(20)
target = get_local_machine_spec()
bento = ensure_bento(model, target=target, repo_name=repo)
if port is None:
port = random.randint(30000, 40000)
local_run(bento, port=port, timeout=timeout)
@app.command(
help="deploy an production-ready OpenAI API compatible chat server to bentocloud ($100 free credit)",
)
def deploy(
model: Annotated[str, typer.Argument()] = "",
instance_type: Optional[str] = None,
repo: Optional[str] = None,
verbose: bool = False,
):
if verbose:
VERBOSE_LEVEL.set(20)
bento = ensure_bento(model, repo_name=repo)
if instance_type is not None:
cloud_deploy(bento, DeploymentTarget(name=instance_type))
return
targets = get_cloud_machine_spec()
targets = filter(lambda x: can_run(bento, x) > 0, targets)
targets = sorted(targets, key=lambda x: can_run(bento, x), reverse=True)
if not targets:
output(
"No available instance type, check your bentocloud account",
style="red",
)
raise typer.Exit(1)
target = targets[0]
output(
f"Recommended instance type: {target.name}",
style="green",
)
cloud_deploy(bento, target)
@app.callback(invoke_without_command=True)
def typer_callback(
verbose: int = 0,
do_not_track: bool = typer.Option(
False,
"--do-not-track",
help="Whether to disable usage tracking",
envvar=DO_NOT_TRACK,
),
):
if verbose:
VERBOSE_LEVEL.set(verbose)
if do_not_track:
os.environ[DO_NOT_TRACK] = str(True)
def main():
if sys.version_info < (3, 9):
output("Python 3.8 or higher is required", style="red")
sys.exit(1)
app()
if __name__ == "__main__":
main()