perf: improve build logics and cleanup speed (#657)

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
2026-03-07 16:47:13 -05:00 · 2023-11-15 00:18:31 -05:00
parent 103156cd71
commit a58d947bc8
11 changed files with 141 additions and 237 deletions
--- a/openllm-python/src/openllm_cli/entrypoint.py
+++ b/openllm-python/src/openllm_cli/entrypoint.py
@@ -52,7 +52,6 @@ from bentoml._internal.cloud.config import CloudClientConfig
 from bentoml._internal.configuration.containers import BentoMLContainer
 from bentoml._internal.models.model import ModelStore
 from openllm import bundle
-from openllm.exceptions import OpenLLMException
 from openllm_core._typing_compat import (
  Concatenate,
  DictStrAny,
@@ -67,6 +66,7 @@ from openllm_core._typing_compat import (
  TypeGuard,
 )
 from openllm_core.config import CONFIG_MAPPING
+from openllm_core.exceptions import OpenLLMException
 from openllm_core.utils import (
  DEBUG_ENV_VAR,
  OPTIONAL_DEPENDENCIES,
--- a/openllm-python/src/openllm_cli/extension/build_base_container.py
+++ b/openllm-python/src/openllm_cli/extension/build_base_container.py
@@ -1,31 +1,90 @@
 from __future__ import annotations
+import pathlib
+import shutil
+import subprocess
 import typing as t

 import click
 import orjson

+import bentoml
 import openllm
 from openllm_cli import termui
 from openllm_cli._factory import container_registry_option, machine_option
+from openllm_core.utils import get_debug_mode, pkg

 if t.TYPE_CHECKING:
  from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy

+_BUILDER = bentoml.container.get_backend('buildx')
+_module_location = pkg.source_locations('openllm')
+
+
+def build_container(
+  registries: LiteralContainerRegistry | t.Sequence[LiteralContainerRegistry] | None = None,
+  version_strategy: LiteralContainerVersionStrategy = 'release',
+  push: bool = False,
+  machine: bool = False,
+) -> dict[str | LiteralContainerRegistry, str]:
+  try:
+    if not _BUILDER.health():
+      raise openllm.exceptions.Error
+  except (openllm.exceptions.Error, subprocess.CalledProcessError):
+    raise RuntimeError(
+      'Building base container requires BuildKit (via Buildx) to be installed. See https://docs.docker.com/build/buildx/install/ for instalation instruction.'
+    ) from None
+  if not shutil.which('nvidia-container-runtime'):
+    raise RuntimeError('NVIDIA Container Toolkit is required to compile CUDA kernel in container.')
+  if not _module_location:
+    raise RuntimeError("Failed to determine source location of 'openllm'. (Possible broken installation)")
+  pyproject_path = pathlib.Path(_module_location).parent.parent / 'pyproject.toml'
+  if not pyproject_path.exists():
+    raise ValueError(
+      "This utility can only be run within OpenLLM git repository. Clone it first with 'git clone https://github.com/bentoml/OpenLLM.git'"
+    )
+  if not registries:
+    tags: dict[str | LiteralContainerRegistry, str] = {
+      alias: f'{value}:{openllm.bundle.get_base_container_tag(version_strategy)}'
+      for alias, value in openllm.bundle.CONTAINER_NAMES.items()
+    }
+  else:
+    registries = [registries] if isinstance(registries, str) else list(registries)
+    tags = {
+      name: f'{openllm.bundle.CONTAINER_NAMES[name]}:{openllm.bundle.get_base_container_tag(version_strategy)}'
+      for name in registries
+    }
+  try:
+    outputs = _BUILDER.build(
+      file=pathlib.Path(__file__).parent.joinpath('Dockerfile').resolve().__fspath__(),
+      context_path=pyproject_path.parent.__fspath__(),
+      tag=tuple(tags.values()),
+      push=push,
+      progress='plain' if get_debug_mode() else 'auto',
+      quiet=machine,
+    )
+    if machine and outputs is not None:
+      tags['image_sha'] = outputs.decode('utf-8').strip()
+  except Exception as err:
+    raise openllm.exceptions.OpenLLMException(
+      f'Failed to containerize base container images (Scroll up to see error above, or set DEBUG=5 for more traceback):\n{err}'
+    ) from err
+  return tags
+

@click.command(
  'build_base_container',
  context_settings=termui.CONTEXT_SETTINGS,
  help="""Base image builder for BentoLLM.

-                By default, the base image will include custom kernels (PagedAttention via vllm, FlashAttention-v2, etc.) built with CUDA 11.8, Python 3.9 on Ubuntu22.04.
-                Optionally, this can also be pushed directly to remote registry. Currently support ``docker.io``, ``ghcr.io`` and ``quay.io``.
+          By default, the base image will include custom kernels (PagedAttention via vllm, FlashAttention-v2, etc.) built with CUDA 11.8, Python 3.9 on Ubuntu22.04.
+          Optionally, this can also be pushed directly to remote registry. Currently support ``docker.io``, ``ghcr.io`` and ``quay.io``.

-                \b
-                If '--machine' is passed, then it will run the process quietly, and output a JSON to the current running terminal.
-                This command is only useful for debugging and for building custom base image for extending BentoML with custom base images and custom kernels.
+          \b
+          If '--machine' is passed, then it will run the process quietly, and output a JSON to the current running terminal.
+          This command is only useful for debugging and for building custom base image for extending BentoML with custom base images and custom kernels.

-                Note that we already release images on our CI to ECR and GHCR, so you don't need to build it yourself.
-                """,
+          Note that we already release images on our CI to ECR and GHCR, so you don't need to build it yourself.
+          """,
 )
@container_registry_option
@click.option(
@@ -42,7 +101,7 @@ def cli(
  push: bool,
  machine: bool,
 ) -> dict[str, str]:
-  mapping = openllm.bundle.build_container(container_registry, version_strategy, push, machine)
+  mapping = build_container(container_registry, version_strategy, push, machine)
  if machine:
    termui.echo(orjson.dumps(mapping, option=orjson.OPT_INDENT_2).decode(), fg='white')
  return mapping