diff --git a/openllm_next/cloud.py b/openllm_next/cloud.py index df16bf2f..56168b42 100644 --- a/openllm_next/cloud.py +++ b/openllm_next/cloud.py @@ -79,6 +79,7 @@ async def _run_model(model: str, timeout: int = 600): ) import bentoml + from httpx import ReadError try: questionary.print("Model loading...", style="green") @@ -92,6 +93,8 @@ async def _run_model(model: str, timeout: int = 600): break except bentoml.exceptions.BentoMLException: await asyncio.sleep(1) + except ReadError: + await asyncio.sleep(1) else: questionary.print("Model failed to load", style="red") return diff --git a/openllm_next/common.py b/openllm_next/common.py index b17d7a93..c9c49568 100644 --- a/openllm_next/common.py +++ b/openllm_next/common.py @@ -1,4 +1,5 @@ import functools +import hashlib import json import os import pathlib @@ -9,6 +10,7 @@ from contextlib import contextmanager from types import SimpleNamespace import questionary +import typer ERROR_STYLE = "red" SUCCESS_STYLE = "green" @@ -107,6 +109,9 @@ class BentoInfo(SimpleNamespace): repo: RepoInfo path: pathlib.Path + def __hash__(self): + return md5(str(self.path)) + @property def tag(self) -> str: return f"{self.path.parent.name}:{self.path.name}" @@ -166,17 +171,47 @@ class BentoInfo(SimpleNamespace): ) +@typing.overload def run_command( cmd, cwd=None, env=None, copy_env=True, + venv=None, silent=False, - check=True, -) -> subprocess.CompletedProcess | subprocess.Popen | None: + background: typing.Literal[False] = False, +) -> subprocess.CompletedProcess: ... + + +@typing.overload +def run_command( + cmd, + cwd=None, + env=None, + copy_env=True, + venv=None, + silent=False, + background: typing.Literal[True] = True, +) -> subprocess.Popen: ... + + +def run_command( + cmd, + cwd=None, + env=None, + copy_env=True, + venv=None, + silent=False, + background=False, +) -> subprocess.CompletedProcess | subprocess.Popen: + if background: + run_func = subprocess.Popen + else: + run_func = subprocess.run import shlex env = env or {} + cmd = [str(c) for c in cmd] if not silent: questionary.print("\n") if cwd: @@ -184,25 +219,41 @@ def run_command( if env: for k, v in env.items(): questionary.print(f"$ export {k}={shlex.quote(v)}", style="bold") + if venv: + questionary.print(f"$ source {venv / 'bin' / 'activate'}", style="bold") questionary.print(f"$ {' '.join(cmd)}", style="bold") + + if venv: + py = venv / "bin" / "python" + else: + py = sys.executable + if copy_env: env = {**os.environ, **env} + if cmd and cmd[0] == "bentoml": - cmd = [sys.executable, "-m", "bentoml"] + cmd[1:] + cmd = [py, "-m", "bentoml"] + cmd[1:] if cmd and cmd[0] == "python": - cmd = [sys.executable] + cmd[1:] + cmd = [py] + cmd[1:] + try: if silent: - return subprocess.run( + return run_func( # type: ignore cmd, cwd=cwd, env=env, - check=check, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) else: - return subprocess.run(cmd, cwd=cwd, env=env, check=check) + return run_func(cmd, cwd=cwd, env=env) except subprocess.CalledProcessError: questionary.print("Command failed", style=ERROR_STYLE) - return None + raise typer.Exit(1) + + +def md5(*strings: str) -> int: + m = hashlib.md5() + for s in strings: + m.update(s.encode()) + return int(m.hexdigest(), 16) diff --git a/openllm_next/model.py b/openllm_next/model.py index dc29bd23..cebcc4ee 100644 --- a/openllm_next/model.py +++ b/openllm_next/model.py @@ -7,6 +7,7 @@ import typer from openllm_next.common import ERROR_STYLE, VERBOSE_LEVEL, BentoInfo, load_config from openllm_next.repo import parse_repo_url +from openllm_next.venv import ensure_venv app = typer.Typer() @@ -83,10 +84,7 @@ def pick_bento(tag) -> BentoInfo: return model -def get_serve_cmd(tag: str): - if ":" not in tag: - tag = f"{tag}:latest" - bento = pick_bento(tag) +def get_serve_cmd(bento: BentoInfo): cmd = ["bentoml", "serve", bento.tag] env = { "BENTOML_HOME": f"{bento.repo.path}/bentoml", diff --git a/openllm_next/serve.py b/openllm_next/serve.py index cba519ad..17124455 100644 --- a/openllm_next/serve.py +++ b/openllm_next/serve.py @@ -1,30 +1,38 @@ import asyncio -import os -import subprocess import questionary import typer from openllm_next.common import run_command -from openllm_next.model import get_serve_cmd +from openllm_next.model import get_serve_cmd, pick_bento +from openllm_next.venv import ensure_venv app = typer.Typer() @app.command() def serve(model: str): - cmd, env, cwd = get_serve_cmd(model) - run_command(cmd, env=env, cwd=cwd) + if ":" not in model: + model = f"{model}:latest" + bento = pick_bento(model) + venv = ensure_venv(bento) + cmd, env, cwd = get_serve_cmd(bento) + run_command(cmd, env=env, cwd=cwd, venv=venv) async def _run_model(model: str, timeout: int = 600): - cmd, env, cwd = get_serve_cmd(model) - server_proc = subprocess.Popen( + if ":" not in model: + model = f"{model}:latest" + bento = pick_bento(model) + venv = ensure_venv(bento) + cmd, env, cwd = get_serve_cmd(bento) + server_proc = run_command( cmd, - env={**os.environ, **env}, + env=env, cwd=cwd, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, + venv=venv, + silent=True, + background=True, ) import bentoml diff --git a/openllm_next/venv.py b/openllm_next/venv.py index 254f7a86..a715036e 100644 --- a/openllm_next/venv.py +++ b/openllm_next/venv.py @@ -1,8 +1,20 @@ -def _resolve_package_versions(requirement: str) -> dict[str, str]: +import functools +import pathlib +import shutil +from types import SimpleNamespace +from typing import Iterable + +import questionary +import typer + +from openllm_next.common import VENV_DIR, VERBOSE_LEVEL, BentoInfo, md5, run_command + + +def _resolve_packages(requirement: str | pathlib.Path) -> dict[str, str]: from pip_requirements_parser import RequirementsFile requirements_txt = RequirementsFile.from_file( - requirement, + str(requirement), include_nested=True, ) deps: dict[str, str] = {} @@ -22,3 +34,110 @@ def _resolve_package_versions(requirement: str) -> dict[str, str]: deps[req.name] = req.line break return deps + + +class EnvSpec(SimpleNamespace): + python_version: str + python_packages: dict[str, str] + name_prefix = "" + + def __hash__(self): + return md5( + self.python_version, + *sorted(self.python_packages.values()), + ) + + +@functools.lru_cache +def _resolve_bento_env_specs(bento: BentoInfo): + ver_file = bento.path / "env" / "python" / "version.txt" + assert ver_file.exists(), f"cannot find version file in {bento.path}" + + lock_file = bento.path / "env" / "python" / "requirements.lock.txt" + if not lock_file.exists(): + lock_file = bento.path / "env" / "python" / "requirements.txt" + + python_packages = _resolve_packages(lock_file) + PREHEAT_PIP_PACKAGES = ["torch", "vllm"] + preheat_packages = { + k: v for k, v in python_packages.items() if k in PREHEAT_PIP_PACKAGES + } + ver = ver_file.read_text().strip() + return ( + EnvSpec( + python_version=ver, + python_packages=preheat_packages, + name_prefix=f"{bento.tag.replace(':', '_')}-1-", + ), + EnvSpec( + python_version=ver, + python_packages=python_packages, + name_prefix=f"{bento.tag.replace(':', '_')}-2-", + ), + ) + + +def _ensure_venv( + env_spec: EnvSpec, parrent_venv: pathlib.Path | None = None +) -> pathlib.Path: + venv = VENV_DIR / str(hash(env_spec)) + if not venv.exists(): + questionary.print(f"Installing model dependencies({venv})...", style="green") + try: + run_command(["python", "-m", "venv", venv], silent=VERBOSE_LEVEL.get() < 1) + pyver = next(venv.glob("lib/python*")).name + if parrent_venv is not None: + with open( + venv / "lib" / pyver / "site-packages" / f"{parrent_venv.name}.pth", + "w+", + ) as f: + f.write(str(parrent_venv / "lib" / pyver / "site-packages")) + with open(venv / "requirements.txt", "w") as f: + f.write("\n".join(sorted(env_spec.python_packages.values()))) + run_command( + [ + venv / "bin" / "pip", + "install", + "-r", + venv / "requirements.txt", + "--upgrade-strategy", + "only-if-needed", + ], + silent=VERBOSE_LEVEL.get() < 1, + ) + run_command( + [ + venv / "bin" / "pip", + "install", + "bentoml", + "--upgrade-strategy", + "only-if-needed", + "--upgrade", + ], + silent=VERBOSE_LEVEL.get() < 1, + ) + except Exception: + shutil.rmtree(venv, ignore_errors=True) + questionary.print( + f"Failed to install dependencies to {venv}. Cleaned up.", + style="red", + ) + raise typer.Exit(1) + questionary.print( + f"Successfully installed dependencies to {venv}.", style="green" + ) + return venv + else: + return venv + + +def _ensure_venvs(env_spec_list: Iterable[EnvSpec]) -> pathlib.Path: + last_venv = None + for env_spec in env_spec_list: + last_venv = _ensure_venv(env_spec, last_venv) + assert last_venv is not None + return last_venv + + +def ensure_venv(bento: BentoInfo) -> pathlib.Path: + return _ensure_venvs(_resolve_bento_env_specs(bento)) diff --git a/pyproject.toml b/pyproject.toml index 78a484d3..2ad37831 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,6 @@ dependencies = [ "psutil", "pathlib", "pip_requirements_parser", - "venv", ] [tool.typer]