mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-01-19 04:51:18 -05:00
perf: improve build logics and cleanup speed (#657)
Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -52,7 +52,6 @@ from bentoml._internal.cloud.config import CloudClientConfig
|
||||
from bentoml._internal.configuration.containers import BentoMLContainer
|
||||
from bentoml._internal.models.model import ModelStore
|
||||
from openllm import bundle
|
||||
from openllm.exceptions import OpenLLMException
|
||||
from openllm_core._typing_compat import (
|
||||
Concatenate,
|
||||
DictStrAny,
|
||||
@@ -67,6 +66,7 @@ from openllm_core._typing_compat import (
|
||||
TypeGuard,
|
||||
)
|
||||
from openllm_core.config import CONFIG_MAPPING
|
||||
from openllm_core.exceptions import OpenLLMException
|
||||
from openllm_core.utils import (
|
||||
DEBUG_ENV_VAR,
|
||||
OPTIONAL_DEPENDENCIES,
|
||||
|
||||
@@ -1,31 +1,90 @@
|
||||
from __future__ import annotations
|
||||
import pathlib
|
||||
import shutil
|
||||
import subprocess
|
||||
import typing as t
|
||||
|
||||
import click
|
||||
import orjson
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
from openllm_cli import termui
|
||||
from openllm_cli._factory import container_registry_option, machine_option
|
||||
from openllm_core.utils import get_debug_mode, pkg
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy
|
||||
|
||||
_BUILDER = bentoml.container.get_backend('buildx')
|
||||
_module_location = pkg.source_locations('openllm')
|
||||
|
||||
|
||||
def build_container(
|
||||
registries: LiteralContainerRegistry | t.Sequence[LiteralContainerRegistry] | None = None,
|
||||
version_strategy: LiteralContainerVersionStrategy = 'release',
|
||||
push: bool = False,
|
||||
machine: bool = False,
|
||||
) -> dict[str | LiteralContainerRegistry, str]:
|
||||
try:
|
||||
if not _BUILDER.health():
|
||||
raise openllm.exceptions.Error
|
||||
except (openllm.exceptions.Error, subprocess.CalledProcessError):
|
||||
raise RuntimeError(
|
||||
'Building base container requires BuildKit (via Buildx) to be installed. See https://docs.docker.com/build/buildx/install/ for instalation instruction.'
|
||||
) from None
|
||||
if not shutil.which('nvidia-container-runtime'):
|
||||
raise RuntimeError('NVIDIA Container Toolkit is required to compile CUDA kernel in container.')
|
||||
if not _module_location:
|
||||
raise RuntimeError("Failed to determine source location of 'openllm'. (Possible broken installation)")
|
||||
pyproject_path = pathlib.Path(_module_location).parent.parent / 'pyproject.toml'
|
||||
if not pyproject_path.exists():
|
||||
raise ValueError(
|
||||
"This utility can only be run within OpenLLM git repository. Clone it first with 'git clone https://github.com/bentoml/OpenLLM.git'"
|
||||
)
|
||||
if not registries:
|
||||
tags: dict[str | LiteralContainerRegistry, str] = {
|
||||
alias: f'{value}:{openllm.bundle.get_base_container_tag(version_strategy)}'
|
||||
for alias, value in openllm.bundle.CONTAINER_NAMES.items()
|
||||
}
|
||||
else:
|
||||
registries = [registries] if isinstance(registries, str) else list(registries)
|
||||
tags = {
|
||||
name: f'{openllm.bundle.CONTAINER_NAMES[name]}:{openllm.bundle.get_base_container_tag(version_strategy)}'
|
||||
for name in registries
|
||||
}
|
||||
try:
|
||||
outputs = _BUILDER.build(
|
||||
file=pathlib.Path(__file__).parent.joinpath('Dockerfile').resolve().__fspath__(),
|
||||
context_path=pyproject_path.parent.__fspath__(),
|
||||
tag=tuple(tags.values()),
|
||||
push=push,
|
||||
progress='plain' if get_debug_mode() else 'auto',
|
||||
quiet=machine,
|
||||
)
|
||||
if machine and outputs is not None:
|
||||
tags['image_sha'] = outputs.decode('utf-8').strip()
|
||||
except Exception as err:
|
||||
raise openllm.exceptions.OpenLLMException(
|
||||
f'Failed to containerize base container images (Scroll up to see error above, or set DEBUG=5 for more traceback):\n{err}'
|
||||
) from err
|
||||
return tags
|
||||
|
||||
|
||||
@click.command(
|
||||
'build_base_container',
|
||||
context_settings=termui.CONTEXT_SETTINGS,
|
||||
help="""Base image builder for BentoLLM.
|
||||
|
||||
By default, the base image will include custom kernels (PagedAttention via vllm, FlashAttention-v2, etc.) built with CUDA 11.8, Python 3.9 on Ubuntu22.04.
|
||||
Optionally, this can also be pushed directly to remote registry. Currently support ``docker.io``, ``ghcr.io`` and ``quay.io``.
|
||||
By default, the base image will include custom kernels (PagedAttention via vllm, FlashAttention-v2, etc.) built with CUDA 11.8, Python 3.9 on Ubuntu22.04.
|
||||
Optionally, this can also be pushed directly to remote registry. Currently support ``docker.io``, ``ghcr.io`` and ``quay.io``.
|
||||
|
||||
\b
|
||||
If '--machine' is passed, then it will run the process quietly, and output a JSON to the current running terminal.
|
||||
This command is only useful for debugging and for building custom base image for extending BentoML with custom base images and custom kernels.
|
||||
\b
|
||||
If '--machine' is passed, then it will run the process quietly, and output a JSON to the current running terminal.
|
||||
This command is only useful for debugging and for building custom base image for extending BentoML with custom base images and custom kernels.
|
||||
|
||||
Note that we already release images on our CI to ECR and GHCR, so you don't need to build it yourself.
|
||||
""",
|
||||
Note that we already release images on our CI to ECR and GHCR, so you don't need to build it yourself.
|
||||
""",
|
||||
)
|
||||
@container_registry_option
|
||||
@click.option(
|
||||
@@ -42,7 +101,7 @@ def cli(
|
||||
push: bool,
|
||||
machine: bool,
|
||||
) -> dict[str, str]:
|
||||
mapping = openllm.bundle.build_container(container_registry, version_strategy, push, machine)
|
||||
mapping = build_container(container_registry, version_strategy, push, machine)
|
||||
if machine:
|
||||
termui.echo(orjson.dumps(mapping, option=orjson.OPT_INDENT_2).decode(), fg='white')
|
||||
return mapping
|
||||
|
||||
Reference in New Issue
Block a user