mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-02-18 14:47:30 -05:00
perf: improve build logics and cleanup speed (#657)
Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -1,20 +1,13 @@
|
||||
"""Build-related utilities. Some of these utilities are mainly used for 'openllm.build'.
|
||||
|
||||
These utilities will stay internal, and its API can be changed or updated without backward-compatibility.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
import os
|
||||
import typing as t
|
||||
|
||||
from openllm_core.utils import LazyModule
|
||||
|
||||
_import_structure: dict[str, list[str]] = {
|
||||
_import_structure = {
|
||||
'_package': ['create_bento', 'build_editable', 'construct_python_options', 'construct_docker_options'],
|
||||
'oci': [
|
||||
'CONTAINER_NAMES',
|
||||
'get_base_container_tag',
|
||||
'build_container',
|
||||
'get_base_container_name',
|
||||
'supported_registries',
|
||||
'RefResolver',
|
||||
@@ -32,7 +25,6 @@ if t.TYPE_CHECKING:
|
||||
from .oci import (
|
||||
CONTAINER_NAMES as CONTAINER_NAMES,
|
||||
RefResolver as RefResolver,
|
||||
build_container as build_container,
|
||||
get_base_container_name as get_base_container_name,
|
||||
get_base_container_tag as get_base_container_tag,
|
||||
supported_registries as supported_registries,
|
||||
|
||||
@@ -145,28 +145,28 @@ def construct_docker_options(
|
||||
if quantize:
|
||||
env_dict['OPENLLM_QUANTIZE'] = str(quantize)
|
||||
return DockerOptions(
|
||||
base_image=f'{oci.CONTAINER_NAMES[container_registry]}:{oci.get_base_container_tag(container_version_strategy)}',
|
||||
base_image=f'{oci.get_base_container_name(container_registry)}:{oci.get_base_container_tag(container_version_strategy)}',
|
||||
env=env_dict,
|
||||
dockerfile_template=dockerfile_template,
|
||||
)
|
||||
|
||||
|
||||
OPENLLM_MODEL_NAME = '# openllm: model name'
|
||||
OPENLLM_MODEL_ID = '# openllm: model id'
|
||||
OPENLLM_MODEL_TAG = '# openllm: model tag'
|
||||
OPENLLM_MODEL_ADAPTER_MAP = '# openllm: model adapter map'
|
||||
|
||||
|
||||
class ModelNameFormatter(string.Formatter):
|
||||
model_keyword: LiteralString = '__model_name__'
|
||||
class _ServiceVarsFormatter(string.Formatter):
|
||||
keyword: LiteralString = '__model_name__'
|
||||
identifier: LiteralString = '# openllm: model name'
|
||||
|
||||
def __init__(self, model_name: str):
|
||||
def __init__(self, target: str):
|
||||
"""The formatter that extends model_name to be formatted the 'service.py'."""
|
||||
super().__init__()
|
||||
self.model_name = model_name
|
||||
self.target = target
|
||||
|
||||
def vformat(self, format_string: str, *args: t.Any, **attrs: t.Any) -> t.Any:
|
||||
return super().vformat(format_string, (), {self.model_keyword: self.model_name})
|
||||
return super().vformat(format_string, (), {self.keyword: self.target})
|
||||
|
||||
def can_format(self, value: str) -> bool:
|
||||
try:
|
||||
@@ -175,17 +175,26 @@ class ModelNameFormatter(string.Formatter):
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
class ModelIdFormatter(ModelNameFormatter):
|
||||
model_keyword: LiteralString = '__model_id__'
|
||||
def parse_line(self, line: str, nl: bool = True) -> str:
|
||||
if self.identifier not in line:
|
||||
return line
|
||||
gen = self.vformat(line)[: -(len(self.identifier) + 3)] + ('\n' if nl else '')
|
||||
return gen
|
||||
|
||||
|
||||
class ModelTagFormatter(ModelNameFormatter):
|
||||
model_keyword: LiteralString = '__model_tag__'
|
||||
class ModelIdFormatter(_ServiceVarsFormatter):
|
||||
keyword = '__model_id__'
|
||||
identifier = OPENLLM_MODEL_ID
|
||||
|
||||
|
||||
class ModelAdapterMapFormatter(ModelNameFormatter):
|
||||
model_keyword: LiteralString = '__model_adapter_map__'
|
||||
class ModelTagFormatter(_ServiceVarsFormatter):
|
||||
keyword = '__model_tag__'
|
||||
identifier = OPENLLM_MODEL_TAG
|
||||
|
||||
|
||||
class ModelAdapterMapFormatter(_ServiceVarsFormatter):
|
||||
keyword = '__model_adapter_map__'
|
||||
identifier = OPENLLM_MODEL_ADAPTER_MAP
|
||||
|
||||
|
||||
_service_file = Path(os.path.abspath(__file__)).parent.parent / '_service.py'
|
||||
@@ -195,41 +204,30 @@ _service_vars_file = Path(os.path.abspath(__file__)).parent.parent / '_service_v
|
||||
def write_service(llm: openllm.LLM[t.Any, t.Any], adapter_map: dict[str, str] | None, llm_fs: FS) -> None:
|
||||
from openllm_core.utils import DEBUG
|
||||
|
||||
model_name = llm.config['model_name']
|
||||
model_id = llm.model_id
|
||||
model_tag = str(llm.tag)
|
||||
model_id_formatter = ModelIdFormatter(llm.model_id)
|
||||
model_tag_formatter = ModelTagFormatter(str(llm.tag))
|
||||
adapter_map_formatter = ModelAdapterMapFormatter(orjson.dumps(adapter_map).decode())
|
||||
|
||||
logger.debug(
|
||||
'Generating service vars file for %s at %s (dir=%s)', model_name, '_service_vars.py', llm_fs.getsyspath('/')
|
||||
'Generating service vars file for %s at %s (dir=%s)', llm.model_id, '_service_vars.py', llm_fs.getsyspath('/')
|
||||
)
|
||||
with open(_service_vars_file.__fspath__(), 'r') as f:
|
||||
src_contents = f.readlines()
|
||||
for it in src_contents:
|
||||
if OPENLLM_MODEL_NAME in it:
|
||||
src_contents[src_contents.index(it)] = (
|
||||
ModelNameFormatter(model_name).vformat(it)[: -(len(OPENLLM_MODEL_NAME) + 3)] + '\n'
|
||||
)
|
||||
if OPENLLM_MODEL_ID in it:
|
||||
src_contents[src_contents.index(it)] = (
|
||||
ModelIdFormatter(model_id).vformat(it)[: -(len(OPENLLM_MODEL_ID) + 3)] + '\n'
|
||||
)
|
||||
elif OPENLLM_MODEL_TAG in it:
|
||||
src_contents[src_contents.index(it)] = (
|
||||
ModelTagFormatter(model_tag).vformat(it)[: -(len(OPENLLM_MODEL_TAG) + 3)] + '\n'
|
||||
)
|
||||
elif OPENLLM_MODEL_ADAPTER_MAP in it:
|
||||
src_contents[src_contents.index(it)] = (
|
||||
ModelAdapterMapFormatter(orjson.dumps(adapter_map).decode()).vformat(it)[
|
||||
: -(len(OPENLLM_MODEL_ADAPTER_MAP) + 3)
|
||||
]
|
||||
+ '\n'
|
||||
)
|
||||
script = f"# GENERATED BY 'openllm build {model_name}'. DO NOT EDIT\n\n" + ''.join(src_contents)
|
||||
for i, it in enumerate(src_contents):
|
||||
if model_id_formatter.identifier in it:
|
||||
src_contents[i] = model_id_formatter.parse_line(it)
|
||||
elif model_tag_formatter.identifier in it:
|
||||
src_contents[i] = model_tag_formatter.parse_line(it)
|
||||
elif adapter_map_formatter.identifier in it:
|
||||
src_contents[i] = adapter_map_formatter.parse_line(it)
|
||||
|
||||
script = f"# GENERATED BY 'openllm build {llm.model_id}'. DO NOT EDIT\n\n" + ''.join(src_contents)
|
||||
if DEBUG:
|
||||
logger.info('Generated script:\n%s', script)
|
||||
llm_fs.writetext('_service_vars.py', script)
|
||||
|
||||
logger.debug(
|
||||
'Generating service file for %s at %s (dir=%s)', model_name, llm.config['service_name'], llm_fs.getsyspath('/')
|
||||
'Generating service file for %s at %s (dir=%s)', llm.model_id, llm.config['service_name'], llm_fs.getsyspath('/')
|
||||
)
|
||||
with open(_service_file.__fspath__(), 'r') as f:
|
||||
service_src = f.read()
|
||||
|
||||
@@ -1,46 +1,25 @@
|
||||
# mypy: disable-error-code="misc"
|
||||
"""OCI-related utilities for OpenLLM. This module is considered to be internal and API are subjected to change."""
|
||||
|
||||
from __future__ import annotations
|
||||
import functools
|
||||
import importlib
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
import shutil
|
||||
import subprocess
|
||||
import typing as t
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
import attr
|
||||
import orjson
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
import openllm_core
|
||||
from openllm_core.exceptions import OpenLLMException
|
||||
from openllm_core.utils import codegen
|
||||
from openllm_core.utils.lazy import VersionInfo
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from ghapi import all
|
||||
|
||||
from openllm_core._typing_compat import (
|
||||
LiteralContainerRegistry,
|
||||
LiteralContainerVersionStrategy,
|
||||
LiteralString,
|
||||
RefTuple,
|
||||
)
|
||||
|
||||
all = openllm_core.utils.LazyLoader('all', globals(), 'ghapi.all') # noqa: F811
|
||||
from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy, RefTuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_BUILDER = bentoml.container.get_backend('buildx')
|
||||
ROOT_DIR = pathlib.Path(os.path.abspath('__file__')).parent.parent.parent
|
||||
|
||||
# XXX: This registry will be hard code for now for easier to maintain
|
||||
# but in the future, we can infer based on git repo and everything to make it more options for users
|
||||
# to build the base image. For now, all of the base image will be <registry>/bentoml/openllm:...
|
||||
# NOTE: The ECR registry is the public one and currently only @bentoml team has access to push it.
|
||||
_CONTAINER_REGISTRY: dict[LiteralContainerRegistry, str] = {
|
||||
'docker': 'docker.io/bentoml/openllm',
|
||||
'gh': 'ghcr.io/bentoml/openllm',
|
||||
@@ -48,122 +27,54 @@ _CONTAINER_REGISTRY: dict[LiteralContainerRegistry, str] = {
|
||||
}
|
||||
|
||||
# TODO: support custom fork. Currently it only support openllm main.
|
||||
_OWNER = 'bentoml'
|
||||
_REPO = 'openllm'
|
||||
|
||||
_module_location = openllm_core.utils.pkg.source_locations('openllm')
|
||||
|
||||
|
||||
@functools.lru_cache
|
||||
@openllm_core.utils.apply(str.lower)
|
||||
def get_base_container_name(reg: LiteralContainerRegistry) -> str:
|
||||
return _CONTAINER_REGISTRY[reg]
|
||||
_OWNER, _REPO = 'bentoml', 'openllm'
|
||||
|
||||
|
||||
def _convert_version_from_string(s: str) -> VersionInfo:
|
||||
return VersionInfo.from_version_string(s)
|
||||
|
||||
|
||||
def _commit_time_range(r: int = 5) -> str:
|
||||
return (datetime.now(timezone.utc) - timedelta(days=r)).strftime('%Y-%m-%dT%H:%M:%SZ')
|
||||
|
||||
|
||||
class VersionNotSupported(openllm.exceptions.OpenLLMException):
|
||||
"""Raised when the stable release is too low that it doesn't include OpenLLM base container."""
|
||||
|
||||
|
||||
_RefTuple: type[RefTuple] = openllm_core.utils.codegen.make_attr_tuple_class(
|
||||
'_RefTuple', ['git_hash', 'version', 'strategy']
|
||||
)
|
||||
|
||||
|
||||
def nightly_resolver(cls: type[RefResolver]) -> str:
|
||||
# NOTE: all openllm container will have sha-<git_hash[:7]>
|
||||
# This will use docker to run skopeo to determine the correct latest tag that is available
|
||||
# If docker is not found, then fallback to previous behaviour. Which the container might not exists.
|
||||
docker_bin = shutil.which('docker')
|
||||
if docker_bin is None:
|
||||
logger.warning(
|
||||
'To get the correct available nightly container, make sure to have docker available. Fallback to previous behaviour for determine nightly hash (container might not exists due to the lack of GPU machine at a time. See https://github.com/bentoml/OpenLLM/pkgs/container/openllm for available image.)'
|
||||
)
|
||||
commits = t.cast('list[dict[str, t.Any]]', cls._ghapi.repos.list_commits(since=_commit_time_range()))
|
||||
return next(f'sha-{it["sha"][:7]}' for it in commits if '[skip ci]' not in it['commit']['message'])
|
||||
# now is the correct behaviour
|
||||
return orjson.loads(
|
||||
subprocess.check_output(
|
||||
[
|
||||
docker_bin,
|
||||
'run',
|
||||
'--rm',
|
||||
'-it',
|
||||
'quay.io/skopeo/stable:latest',
|
||||
'list-tags',
|
||||
'docker://ghcr.io/bentoml/openllm',
|
||||
]
|
||||
)
|
||||
.decode()
|
||||
.strip()
|
||||
)['Tags'][-2]
|
||||
_RefTuple: type[RefTuple] = codegen.make_attr_tuple_class('_RefTuple', ['git_hash', 'version', 'strategy'])
|
||||
|
||||
|
||||
@attr.attrs(eq=False, order=False, slots=True, frozen=True)
|
||||
class RefResolver:
|
||||
git_hash: str = attr.field()
|
||||
version: openllm_core.utils.VersionInfo = attr.field(converter=_convert_version_from_string)
|
||||
version: VersionInfo = attr.field(converter=_convert_version_from_string)
|
||||
strategy: LiteralContainerVersionStrategy = attr.field()
|
||||
_ghapi: t.ClassVar[all.GhApi] = all.GhApi(owner=_OWNER, repo=_REPO, authenticate=False)
|
||||
|
||||
@classmethod
|
||||
def _nightly_ref(cls) -> RefTuple:
|
||||
return _RefTuple((nightly_resolver(cls), 'refs/heads/main', 'nightly'))
|
||||
|
||||
@classmethod
|
||||
def _release_ref(cls, version_str: str | None = None) -> RefTuple:
|
||||
try:
|
||||
from ghapi.all import GhApi
|
||||
|
||||
ghapi = GhApi(owner=_OWNER, repo=_REPO, authenticate=False)
|
||||
meta = t.cast(t.Dict[str, t.Any], ghapi.repos.get_latest_release())
|
||||
except Exception as err:
|
||||
raise OpenLLMException('Failed to determine latest release version.') from err
|
||||
_use_base_strategy = version_str is None
|
||||
if version_str is None:
|
||||
# NOTE: This strategy will only support openllm>0.2.12
|
||||
meta: dict[str, t.Any] = cls._ghapi.repos.get_latest_release()
|
||||
version_str = meta['name'].lstrip('v')
|
||||
version: tuple[str, str | None] = (
|
||||
cls._ghapi.git.get_ref(ref=f"tags/{meta['name']}")['object']['sha'],
|
||||
version_str,
|
||||
)
|
||||
version = (ghapi.git.get_ref(ref=f"tags/{meta['name']}")['object']['sha'], version_str)
|
||||
else:
|
||||
version = ('', version_str)
|
||||
if openllm_core.utils.VersionInfo.from_version_string(t.cast(str, version_str)) < (0, 2, 12):
|
||||
raise VersionNotSupported(
|
||||
f"Version {version_str} doesn't support OpenLLM base container. Consider using 'nightly' or upgrade 'openllm>=0.2.12'"
|
||||
)
|
||||
return _RefTuple((*version, 'release' if _use_base_strategy else 'custom'))
|
||||
|
||||
@classmethod
|
||||
@functools.lru_cache(maxsize=64)
|
||||
def from_strategy(
|
||||
cls, strategy_or_version: t.Literal['release', 'nightly'] | LiteralString | None = None
|
||||
) -> RefResolver:
|
||||
def from_strategy(cls, strategy_or_version: LiteralContainerVersionStrategy | None = None) -> RefResolver:
|
||||
# using default strategy
|
||||
if strategy_or_version is None or strategy_or_version == 'release':
|
||||
return cls(*cls._release_ref())
|
||||
elif strategy_or_version == 'latest':
|
||||
return cls('latest', '0.0.0', 'latest')
|
||||
elif strategy_or_version == 'nightly':
|
||||
_ref = cls._nightly_ref()
|
||||
return cls(_ref[0], '0.0.0', _ref[-1])
|
||||
elif strategy_or_version in ('latest', 'nightly'): # latest is nightly
|
||||
return cls(git_hash='latest', version='0.0.0', strategy='latest')
|
||||
else:
|
||||
logger.warning(
|
||||
'Using custom %s. Make sure that it is at lease 0.2.12 for base container support.', strategy_or_version
|
||||
)
|
||||
return cls(*cls._release_ref(version_str=strategy_or_version))
|
||||
raise ValueError(f'Unknown strategy: {strategy_or_version}')
|
||||
|
||||
@property
|
||||
def tag(self) -> str:
|
||||
# NOTE: latest tag can also be nightly, but discouraged to use it. For nightly refer to use sha-<git_hash_short>
|
||||
if self.strategy == 'latest':
|
||||
return 'latest'
|
||||
elif self.strategy == 'nightly':
|
||||
return self.git_hash
|
||||
else:
|
||||
return repr(self.version)
|
||||
return 'latest' if self.strategy in {'latest', 'nightly'} else repr(self.version)
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=256)
|
||||
@@ -171,51 +82,8 @@ def get_base_container_tag(strategy: LiteralContainerVersionStrategy | None = No
|
||||
return RefResolver.from_strategy(strategy).tag
|
||||
|
||||
|
||||
def build_container(
|
||||
registries: LiteralContainerRegistry | t.Sequence[LiteralContainerRegistry] | None = None,
|
||||
version_strategy: LiteralContainerVersionStrategy = 'release',
|
||||
push: bool = False,
|
||||
machine: bool = False,
|
||||
) -> dict[str | LiteralContainerRegistry, str]:
|
||||
try:
|
||||
if not _BUILDER.health():
|
||||
raise openllm.exceptions.Error
|
||||
except (openllm.exceptions.Error, subprocess.CalledProcessError):
|
||||
raise RuntimeError(
|
||||
'Building base container requires BuildKit (via Buildx) to be installed. See https://docs.docker.com/build/buildx/install/ for instalation instruction.'
|
||||
) from None
|
||||
if not shutil.which('nvidia-container-runtime'):
|
||||
raise RuntimeError('NVIDIA Container Toolkit is required to compile CUDA kernel in container.')
|
||||
if not _module_location:
|
||||
raise RuntimeError("Failed to determine source location of 'openllm'. (Possible broken installation)")
|
||||
pyproject_path = pathlib.Path(_module_location).parent.parent / 'pyproject.toml'
|
||||
if not pyproject_path.exists():
|
||||
raise ValueError(
|
||||
"This utility can only be run within OpenLLM git repository. Clone it first with 'git clone https://github.com/bentoml/OpenLLM.git'"
|
||||
)
|
||||
if not registries:
|
||||
tags: dict[str | LiteralContainerRegistry, str] = {
|
||||
alias: f'{value}:{get_base_container_tag(version_strategy)}' for alias, value in _CONTAINER_REGISTRY.items()
|
||||
}
|
||||
else:
|
||||
registries = [registries] if isinstance(registries, str) else list(registries)
|
||||
tags = {name: f'{_CONTAINER_REGISTRY[name]}:{get_base_container_tag(version_strategy)}' for name in registries}
|
||||
try:
|
||||
outputs = _BUILDER.build(
|
||||
file=pathlib.Path(__file__).parent.joinpath('Dockerfile').resolve().__fspath__(),
|
||||
context_path=pyproject_path.parent.__fspath__(),
|
||||
tag=tuple(tags.values()),
|
||||
push=push,
|
||||
progress='plain' if openllm_core.utils.get_debug_mode() else 'auto',
|
||||
quiet=machine,
|
||||
)
|
||||
if machine and outputs is not None:
|
||||
tags['image_sha'] = outputs.decode('utf-8').strip()
|
||||
except Exception as err:
|
||||
raise openllm.exceptions.OpenLLMException(
|
||||
f'Failed to containerize base container images (Scroll up to see error above, or set DEBUG=5 for more traceback):\n{err}'
|
||||
) from err
|
||||
return tags
|
||||
def get_base_container_name(reg: LiteralContainerRegistry) -> str:
|
||||
return _CONTAINER_REGISTRY[reg]
|
||||
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
@@ -225,7 +93,6 @@ if t.TYPE_CHECKING:
|
||||
__all__ = [
|
||||
'CONTAINER_NAMES',
|
||||
'get_base_container_tag',
|
||||
'build_container',
|
||||
'get_base_container_name',
|
||||
'supported_registries',
|
||||
'RefResolver',
|
||||
|
||||
Reference in New Issue
Block a user