mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-06-12 02:20:32 -04:00
chore(style): synchronized style across packages [skip ci]
Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -6,15 +6,15 @@ from __future__ import annotations
|
||||
import os, typing as t
|
||||
from openllm_core.utils import LazyModule
|
||||
_import_structure: dict[str, list[str]] = {
|
||||
"_package": ["create_bento", "build_editable", "construct_python_options", "construct_docker_options"],
|
||||
"oci": ["CONTAINER_NAMES", "get_base_container_tag", "build_container", "get_base_container_name", "supported_registries", "RefResolver"]
|
||||
'_package': ['create_bento', 'build_editable', 'construct_python_options', 'construct_docker_options'],
|
||||
'oci': ['CONTAINER_NAMES', 'get_base_container_tag', 'build_container', 'get_base_container_name', 'supported_registries', 'RefResolver']
|
||||
}
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from . import _package as _package, oci as oci
|
||||
from ._package import build_editable as build_editable, construct_docker_options as construct_docker_options, construct_python_options as construct_python_options, create_bento as create_bento
|
||||
from .oci import CONTAINER_NAMES as CONTAINER_NAMES, RefResolver as RefResolver, build_container as build_container, get_base_container_name as get_base_container_name, get_base_container_tag as get_base_container_tag, supported_registries as supported_registries
|
||||
__lazy = LazyModule(__name__, os.path.abspath("__file__"), _import_structure)
|
||||
__lazy = LazyModule(__name__, os.path.abspath('__file__'), _import_structure)
|
||||
__all__ = __lazy.__all__
|
||||
__dir__ = __lazy.__dir__
|
||||
__getattr__ = __lazy.__getattr__
|
||||
|
||||
@@ -15,77 +15,77 @@ if t.TYPE_CHECKING:
|
||||
from bentoml._internal.models.model import ModelStore
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
OPENLLM_DEV_BUILD = "OPENLLM_DEV_BUILD"
|
||||
def build_editable(path: str, package: t.Literal["openllm", "openllm_core", "openllm_client"] = "openllm") -> str | None:
|
||||
"""Build OpenLLM if the OPENLLM_DEV_BUILD environment variable is set."""
|
||||
if str(os.environ.get(OPENLLM_DEV_BUILD, False)).lower() != "true": return None
|
||||
OPENLLM_DEV_BUILD = 'OPENLLM_DEV_BUILD'
|
||||
def build_editable(path: str, package: t.Literal['openllm', 'openllm_core', 'openllm_client'] = 'openllm') -> str | None:
|
||||
'''Build OpenLLM if the OPENLLM_DEV_BUILD environment variable is set.'''
|
||||
if str(os.environ.get(OPENLLM_DEV_BUILD, False)).lower() != 'true': return None
|
||||
# We need to build the package in editable mode, so that we can import it
|
||||
from build import ProjectBuilder
|
||||
from build.env import IsolatedEnvBuilder
|
||||
module_location = openllm_core.utils.pkg.source_locations(package)
|
||||
if not module_location: raise RuntimeError("Could not find the source location of OpenLLM. Make sure to unset OPENLLM_DEV_BUILD if you are developing OpenLLM.")
|
||||
pyproject_path = Path(module_location).parent.parent / "pyproject.toml"
|
||||
if not module_location: raise RuntimeError('Could not find the source location of OpenLLM. Make sure to unset OPENLLM_DEV_BUILD if you are developing OpenLLM.')
|
||||
pyproject_path = Path(module_location).parent.parent / 'pyproject.toml'
|
||||
if os.path.isfile(pyproject_path.__fspath__()):
|
||||
logger.info("Generating built wheels for package %s...", package)
|
||||
logger.info('Generating built wheels for package %s...', package)
|
||||
with IsolatedEnvBuilder() as env:
|
||||
builder = ProjectBuilder(pyproject_path.parent)
|
||||
builder.python_executable = env.executable
|
||||
builder.scripts_dir = env.scripts_dir
|
||||
env.install(builder.build_system_requires)
|
||||
return builder.build("wheel", path, config_settings={"--global-option": "--quiet"})
|
||||
raise RuntimeError("Custom OpenLLM build is currently not supported. Please install OpenLLM from PyPI or built it from Git source.")
|
||||
return builder.build('wheel', path, config_settings={'--global-option': '--quiet'})
|
||||
raise RuntimeError('Custom OpenLLM build is currently not supported. Please install OpenLLM from PyPI or built it from Git source.')
|
||||
def construct_python_options(llm: openllm.LLM[t.Any, t.Any], llm_fs: FS, extra_dependencies: tuple[str, ...] | None = None, adapter_map: dict[str, str | None] | None = None,) -> PythonOptions:
|
||||
packages = ["openllm", "scipy"] # apparently bnb misses this one
|
||||
if adapter_map is not None: packages += ["openllm[fine-tune]"]
|
||||
packages = ['openllm', 'scipy'] # apparently bnb misses this one
|
||||
if adapter_map is not None: packages += ['openllm[fine-tune]']
|
||||
# NOTE: add openllm to the default dependencies
|
||||
# if users has openllm custom built wheels, it will still respect
|
||||
# that since bentoml will always install dependencies from requirements.txt
|
||||
# first, then proceed to install everything inside the wheels/ folder.
|
||||
if extra_dependencies is not None: packages += [f"openllm[{k}]" for k in extra_dependencies]
|
||||
if extra_dependencies is not None: packages += [f'openllm[{k}]' for k in extra_dependencies]
|
||||
|
||||
req = llm.config["requirements"]
|
||||
req = llm.config['requirements']
|
||||
if req is not None: packages.extend(req)
|
||||
if str(os.environ.get("BENTOML_BUNDLE_LOCAL_BUILD", False)).lower() == "false": packages.append(f"bentoml>={'.'.join([str(i) for i in openllm_core.utils.pkg.pkg_version_info('bentoml')])}")
|
||||
if str(os.environ.get('BENTOML_BUNDLE_LOCAL_BUILD', False)).lower() == 'false': packages.append(f"bentoml>={'.'.join([str(i) for i in openllm_core.utils.pkg.pkg_version_info('bentoml')])}")
|
||||
|
||||
env = llm.config["env"]
|
||||
framework_envvar = env["framework_value"]
|
||||
if framework_envvar == "flax":
|
||||
env = llm.config['env']
|
||||
framework_envvar = env['framework_value']
|
||||
if framework_envvar == 'flax':
|
||||
if not openllm_core.utils.is_flax_available(): raise ValueError(f"Flax is not available, while {env.framework} is set to 'flax'")
|
||||
packages.extend([importlib.metadata.version("flax"), importlib.metadata.version("jax"), importlib.metadata.version("jaxlib")])
|
||||
elif framework_envvar == "tf":
|
||||
packages.extend([importlib.metadata.version('flax'), importlib.metadata.version('jax'), importlib.metadata.version('jaxlib')])
|
||||
elif framework_envvar == 'tf':
|
||||
if not openllm_core.utils.is_tf_available(): raise ValueError(f"TensorFlow is not available, while {env.framework} is set to 'tf'")
|
||||
candidates = (
|
||||
"tensorflow",
|
||||
"tensorflow-cpu",
|
||||
"tensorflow-gpu",
|
||||
"tf-nightly",
|
||||
"tf-nightly-cpu",
|
||||
"tf-nightly-gpu",
|
||||
"intel-tensorflow",
|
||||
"intel-tensorflow-avx512",
|
||||
"tensorflow-rocm",
|
||||
"tensorflow-macos",
|
||||
'tensorflow',
|
||||
'tensorflow-cpu',
|
||||
'tensorflow-gpu',
|
||||
'tf-nightly',
|
||||
'tf-nightly-cpu',
|
||||
'tf-nightly-gpu',
|
||||
'intel-tensorflow',
|
||||
'intel-tensorflow-avx512',
|
||||
'tensorflow-rocm',
|
||||
'tensorflow-macos',
|
||||
)
|
||||
# For the metadata, we have to look for both tensorflow and tensorflow-cpu
|
||||
for candidate in candidates:
|
||||
try:
|
||||
pkgver = importlib.metadata.version(candidate)
|
||||
if pkgver == candidate: packages.extend(["tensorflow"])
|
||||
if pkgver == candidate: packages.extend(['tensorflow'])
|
||||
else:
|
||||
_tf_version = importlib.metadata.version(candidate)
|
||||
packages.extend([f"tensorflow>={_tf_version}"])
|
||||
packages.extend([f'tensorflow>={_tf_version}'])
|
||||
break
|
||||
except importlib.metadata.PackageNotFoundError:
|
||||
pass # Ok to ignore here since we actually need to check for all possible tensorflow distribution.
|
||||
else:
|
||||
if not openllm_core.utils.is_torch_available(): raise ValueError("PyTorch is not available. Make sure to have it locally installed.")
|
||||
if not openllm_core.utils.is_torch_available(): raise ValueError('PyTorch is not available. Make sure to have it locally installed.')
|
||||
packages.extend([f'torch>={importlib.metadata.version("torch")}'])
|
||||
wheels: list[str] = []
|
||||
built_wheels: list[str | None] = [
|
||||
build_editable(llm_fs.getsyspath("/"), t.cast(t.Literal["openllm", "openllm_core", "openllm_client"], p)) for p in ("openllm_core", "openllm_client", "openllm")
|
||||
build_editable(llm_fs.getsyspath('/'), t.cast(t.Literal['openllm', 'openllm_core', 'openllm_client'], p)) for p in ('openllm_core', 'openllm_client', 'openllm')
|
||||
]
|
||||
if all(i for i in built_wheels): wheels.extend([llm_fs.getsyspath(f"/{i.split('/')[-1]}") for i in t.cast(t.List[str], built_wheels)])
|
||||
return PythonOptions(packages=packages, wheels=wheels, lock_packages=False, extra_index_url=["https://download.pytorch.org/whl/cu118"])
|
||||
return PythonOptions(packages=packages, wheels=wheels, lock_packages=False, extra_index_url=['https://download.pytorch.org/whl/cu118'])
|
||||
def construct_docker_options(
|
||||
llm: openllm.LLM[t.Any, t.Any],
|
||||
_: FS,
|
||||
@@ -94,39 +94,39 @@ def construct_docker_options(
|
||||
bettertransformer: bool | None,
|
||||
adapter_map: dict[str, str | None] | None,
|
||||
dockerfile_template: str | None,
|
||||
runtime: t.Literal["ggml", "transformers"],
|
||||
serialisation_format: t.Literal["safetensors", "legacy"],
|
||||
runtime: t.Literal['ggml', 'transformers'],
|
||||
serialisation_format: t.Literal['safetensors', 'legacy'],
|
||||
container_registry: LiteralContainerRegistry,
|
||||
container_version_strategy: LiteralContainerVersionStrategy
|
||||
) -> DockerOptions:
|
||||
from openllm.cli._factory import parse_config_options
|
||||
environ = parse_config_options(llm.config, llm.config["timeout"], workers_per_resource, None, True, os.environ.copy())
|
||||
env: openllm_core.utils.EnvVarMixin = llm.config["env"]
|
||||
if env["framework_value"] == "vllm": serialisation_format = "legacy"
|
||||
environ = parse_config_options(llm.config, llm.config['timeout'], workers_per_resource, None, True, os.environ.copy())
|
||||
env: openllm_core.utils.EnvVarMixin = llm.config['env']
|
||||
if env['framework_value'] == 'vllm': serialisation_format = 'legacy'
|
||||
env_dict = {
|
||||
env.framework: env["framework_value"],
|
||||
env.framework: env['framework_value'],
|
||||
env.config: f"'{llm.config.model_dump_json().decode()}'",
|
||||
env.model_id: f"/home/bentoml/bento/models/{llm.tag.path()}",
|
||||
"OPENLLM_MODEL": llm.config["model_name"],
|
||||
"OPENLLM_SERIALIZATION": serialisation_format,
|
||||
"OPENLLM_ADAPTER_MAP": f"'{orjson.dumps(adapter_map).decode()}'",
|
||||
"BENTOML_DEBUG": str(True),
|
||||
"BENTOML_QUIET": str(False),
|
||||
"BENTOML_CONFIG_OPTIONS": f"'{environ['BENTOML_CONFIG_OPTIONS']}'",
|
||||
env.model_id: f'/home/bentoml/bento/models/{llm.tag.path()}',
|
||||
'OPENLLM_MODEL': llm.config['model_name'],
|
||||
'OPENLLM_SERIALIZATION': serialisation_format,
|
||||
'OPENLLM_ADAPTER_MAP': f"'{orjson.dumps(adapter_map).decode()}'",
|
||||
'BENTOML_DEBUG': str(True),
|
||||
'BENTOML_QUIET': str(False),
|
||||
'BENTOML_CONFIG_OPTIONS': f"'{environ['BENTOML_CONFIG_OPTIONS']}'",
|
||||
}
|
||||
if adapter_map: env_dict["BITSANDBYTES_NOWELCOME"] = os.environ.get("BITSANDBYTES_NOWELCOME", "1")
|
||||
if adapter_map: env_dict['BITSANDBYTES_NOWELCOME'] = os.environ.get('BITSANDBYTES_NOWELCOME', '1')
|
||||
|
||||
# We need to handle None separately here, as env from subprocess doesn't accept None value.
|
||||
_env = openllm_core.utils.EnvVarMixin(llm.config["model_name"], bettertransformer=bettertransformer, quantize=quantize, runtime=runtime)
|
||||
_env = openllm_core.utils.EnvVarMixin(llm.config['model_name'], bettertransformer=bettertransformer, quantize=quantize, runtime=runtime)
|
||||
|
||||
env_dict[_env.bettertransformer] = str(_env["bettertransformer_value"])
|
||||
if _env["quantize_value"] is not None: env_dict[_env.quantize] = t.cast(str, _env["quantize_value"])
|
||||
env_dict[_env.runtime] = _env["runtime_value"]
|
||||
return DockerOptions(base_image=f"{oci.CONTAINER_NAMES[container_registry]}:{oci.get_base_container_tag(container_version_strategy)}", env=env_dict, dockerfile_template=dockerfile_template)
|
||||
OPENLLM_MODEL_NAME = "# openllm: model name"
|
||||
OPENLLM_MODEL_ADAPTER_MAP = "# openllm: model adapter map"
|
||||
env_dict[_env.bettertransformer] = str(_env['bettertransformer_value'])
|
||||
if _env['quantize_value'] is not None: env_dict[_env.quantize] = t.cast(str, _env['quantize_value'])
|
||||
env_dict[_env.runtime] = _env['runtime_value']
|
||||
return DockerOptions(base_image=f'{oci.CONTAINER_NAMES[container_registry]}:{oci.get_base_container_tag(container_version_strategy)}', env=env_dict, dockerfile_template=dockerfile_template)
|
||||
OPENLLM_MODEL_NAME = '# openllm: model name'
|
||||
OPENLLM_MODEL_ADAPTER_MAP = '# openllm: model adapter map'
|
||||
class ModelNameFormatter(string.Formatter):
|
||||
model_keyword: LiteralString = "__model_name__"
|
||||
model_keyword: LiteralString = '__model_name__'
|
||||
|
||||
def __init__(self, model_name: str):
|
||||
"""The formatter that extends model_name to be formatted the 'service.py'."""
|
||||
@@ -143,23 +143,23 @@ class ModelNameFormatter(string.Formatter):
|
||||
except ValueError:
|
||||
return False
|
||||
class ModelIdFormatter(ModelNameFormatter):
|
||||
model_keyword: LiteralString = "__model_id__"
|
||||
model_keyword: LiteralString = '__model_id__'
|
||||
class ModelAdapterMapFormatter(ModelNameFormatter):
|
||||
model_keyword: LiteralString = "__model_adapter_map__"
|
||||
_service_file = Path(os.path.abspath(__file__)).parent.parent / "_service.py"
|
||||
model_keyword: LiteralString = '__model_adapter_map__'
|
||||
_service_file = Path(os.path.abspath(__file__)).parent.parent / '_service.py'
|
||||
def write_service(llm: openllm.LLM[t.Any, t.Any], adapter_map: dict[str, str | None] | None, llm_fs: FS) -> None:
|
||||
from openllm_core.utils import DEBUG
|
||||
model_name = llm.config["model_name"]
|
||||
logger.debug("Generating service file for %s at %s (dir=%s)", model_name, llm.config["service_name"], llm_fs.getsyspath("/"))
|
||||
with open(_service_file.__fspath__(), "r") as f:
|
||||
model_name = llm.config['model_name']
|
||||
logger.debug('Generating service file for %s at %s (dir=%s)', model_name, llm.config['service_name'], llm_fs.getsyspath('/'))
|
||||
with open(_service_file.__fspath__(), 'r') as f:
|
||||
src_contents = f.readlines()
|
||||
for it in src_contents:
|
||||
if OPENLLM_MODEL_NAME in it: src_contents[src_contents.index(it)] = (ModelNameFormatter(model_name).vformat(it)[:-(len(OPENLLM_MODEL_NAME) + 3)] + "\n")
|
||||
if OPENLLM_MODEL_NAME in it: src_contents[src_contents.index(it)] = (ModelNameFormatter(model_name).vformat(it)[:-(len(OPENLLM_MODEL_NAME) + 3)] + '\n')
|
||||
elif OPENLLM_MODEL_ADAPTER_MAP in it:
|
||||
src_contents[src_contents.index(it)] = (ModelAdapterMapFormatter(orjson.dumps(adapter_map).decode()).vformat(it)[:-(len(OPENLLM_MODEL_ADAPTER_MAP) + 3)] + "\n")
|
||||
script = f"# GENERATED BY 'openllm build {model_name}'. DO NOT EDIT\n\n" + "".join(src_contents)
|
||||
if DEBUG: logger.info("Generated script:\n%s", script)
|
||||
llm_fs.writetext(llm.config["service_name"], script)
|
||||
src_contents[src_contents.index(it)] = (ModelAdapterMapFormatter(orjson.dumps(adapter_map).decode()).vformat(it)[:-(len(OPENLLM_MODEL_ADAPTER_MAP) + 3)] + '\n')
|
||||
script = f"# GENERATED BY 'openllm build {model_name}'. DO NOT EDIT\n\n" + ''.join(src_contents)
|
||||
if DEBUG: logger.info('Generated script:\n%s', script)
|
||||
llm_fs.writetext(llm.config['service_name'], script)
|
||||
@inject
|
||||
def create_bento(
|
||||
bento_tag: bentoml.Tag,
|
||||
@@ -171,20 +171,20 @@ def create_bento(
|
||||
dockerfile_template: str | None,
|
||||
adapter_map: dict[str, str | None] | None = None,
|
||||
extra_dependencies: tuple[str, ...] | None = None,
|
||||
runtime: t.Literal["ggml", "transformers"] = "transformers",
|
||||
serialisation_format: t.Literal["safetensors", "legacy"] = "safetensors",
|
||||
container_registry: LiteralContainerRegistry = "ecr",
|
||||
container_version_strategy: LiteralContainerVersionStrategy = "release",
|
||||
runtime: t.Literal['ggml', 'transformers'] = 'transformers',
|
||||
serialisation_format: t.Literal['safetensors', 'legacy'] = 'safetensors',
|
||||
container_registry: LiteralContainerRegistry = 'ecr',
|
||||
container_version_strategy: LiteralContainerVersionStrategy = 'release',
|
||||
_bento_store: BentoStore = Provide[BentoMLContainer.bento_store],
|
||||
_model_store: ModelStore = Provide[BentoMLContainer.model_store]
|
||||
) -> bentoml.Bento:
|
||||
framework_envvar = llm.config["env"]["framework_value"]
|
||||
framework_envvar = llm.config['env']['framework_value']
|
||||
labels = dict(llm.identifying_params)
|
||||
labels.update({"_type": llm.llm_type, "_framework": framework_envvar, "start_name": llm.config["start_name"], "base_name_or_path": llm.model_id, "bundler": "openllm.bundle"})
|
||||
labels.update({'_type': llm.llm_type, '_framework': framework_envvar, 'start_name': llm.config['start_name'], 'base_name_or_path': llm.model_id, 'bundler': 'openllm.bundle'})
|
||||
if adapter_map: labels.update(adapter_map)
|
||||
if isinstance(workers_per_resource, str):
|
||||
if workers_per_resource == "round_robin": workers_per_resource = 1.0
|
||||
elif workers_per_resource == "conserved": workers_per_resource = 1.0 if openllm_core.utils.device_count() == 0 else float(1 / openllm_core.utils.device_count())
|
||||
if workers_per_resource == 'round_robin': workers_per_resource = 1.0
|
||||
elif workers_per_resource == 'conserved': workers_per_resource = 1.0 if openllm_core.utils.device_count() == 0 else float(1 / openllm_core.utils.device_count())
|
||||
else:
|
||||
try:
|
||||
workers_per_resource = float(workers_per_resource)
|
||||
@@ -192,18 +192,18 @@ def create_bento(
|
||||
raise ValueError("'workers_per_resource' only accept ['round_robin', 'conserved'] as possible strategies.") from None
|
||||
elif isinstance(workers_per_resource, int):
|
||||
workers_per_resource = float(workers_per_resource)
|
||||
logger.info("Building Bento for '%s'", llm.config["start_name"])
|
||||
logger.info("Building Bento for '%s'", llm.config['start_name'])
|
||||
# add service.py definition to this temporary folder
|
||||
write_service(llm, adapter_map, llm_fs)
|
||||
|
||||
llm_spec = ModelSpec.from_item({"tag": str(llm.tag), "alias": llm.tag.name})
|
||||
llm_spec = ModelSpec.from_item({'tag': str(llm.tag), 'alias': llm.tag.name})
|
||||
build_config = BentoBuildConfig(
|
||||
service=f"{llm.config['service_name']}:svc",
|
||||
name=bento_tag.name,
|
||||
labels=labels,
|
||||
description=f"OpenLLM service for {llm.config['start_name']}",
|
||||
include=list(llm_fs.walk.files()),
|
||||
exclude=["/venv", "/.venv", "__pycache__/", "*.py[cod]", "*$py.class"],
|
||||
exclude=['/venv', '/.venv', '__pycache__/', '*.py[cod]', '*$py.class'],
|
||||
python=construct_python_options(llm, llm_fs, extra_dependencies, adapter_map),
|
||||
models=[llm_spec],
|
||||
docker=construct_docker_options(
|
||||
@@ -211,20 +211,20 @@ def create_bento(
|
||||
)
|
||||
)
|
||||
|
||||
bento = bentoml.Bento.create(build_config=build_config, version=bento_tag.version, build_ctx=llm_fs.getsyspath("/"))
|
||||
bento = bentoml.Bento.create(build_config=build_config, version=bento_tag.version, build_ctx=llm_fs.getsyspath('/'))
|
||||
# NOTE: the model_id_path here are only used for setting this environment variable within the container built with for BentoLLM.
|
||||
service_fs_path = fs.path.join("src", llm.config["service_name"])
|
||||
service_fs_path = fs.path.join('src', llm.config['service_name'])
|
||||
service_path = bento._fs.getsyspath(service_fs_path)
|
||||
with open(service_path, "r") as f:
|
||||
with open(service_path, 'r') as f:
|
||||
service_contents = f.readlines()
|
||||
|
||||
for it in service_contents:
|
||||
if "__bento_name__" in it: service_contents[service_contents.index(it)] = it.format(__bento_name__=str(bento.tag))
|
||||
if '__bento_name__' in it: service_contents[service_contents.index(it)] = it.format(__bento_name__=str(bento.tag))
|
||||
|
||||
script = "".join(service_contents)
|
||||
if openllm_core.utils.DEBUG: logger.info("Generated script:\n%s", script)
|
||||
script = ''.join(service_contents)
|
||||
if openllm_core.utils.DEBUG: logger.info('Generated script:\n%s', script)
|
||||
|
||||
bento._fs.writetext(service_fs_path, script)
|
||||
if "model_store" in inspect.signature(bento.save).parameters: return bento.save(bento_store=_bento_store, model_store=_model_store)
|
||||
if 'model_store' in inspect.signature(bento.save).parameters: return bento.save(bento_store=_bento_store, model_store=_model_store)
|
||||
# backward arguments. `model_store` is added recently
|
||||
return bento.save(bento_store=_bento_store)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# mypy: disable-error-code="misc"
|
||||
"""OCI-related utilities for OpenLLM. This module is considered to be internal and API are subjected to change."""
|
||||
'''OCI-related utilities for OpenLLM. This module is considered to be internal and API are subjected to change.'''
|
||||
from __future__ import annotations
|
||||
import functools, importlib, logging, os, pathlib, shutil, subprocess, typing as t, openllm_core
|
||||
from datetime import datetime, timedelta, timezone
|
||||
@@ -10,24 +10,24 @@ if t.TYPE_CHECKING:
|
||||
from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy
|
||||
from ghapi import all
|
||||
from openllm_core._typing_compat import RefTuple, LiteralString
|
||||
all = openllm_core.utils.LazyLoader("all", globals(), "ghapi.all") # noqa: F811
|
||||
all = openllm_core.utils.LazyLoader('all', globals(), 'ghapi.all') # noqa: F811
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_BUILDER = bentoml.container.get_backend("buildx")
|
||||
ROOT_DIR = pathlib.Path(os.path.abspath("__file__")).parent.parent.parent
|
||||
_BUILDER = bentoml.container.get_backend('buildx')
|
||||
ROOT_DIR = pathlib.Path(os.path.abspath('__file__')).parent.parent.parent
|
||||
|
||||
# XXX: This registry will be hard code for now for easier to maintain
|
||||
# but in the future, we can infer based on git repo and everything to make it more options for users
|
||||
# to build the base image. For now, all of the base image will be <registry>/bentoml/openllm:...
|
||||
# NOTE: The ECR registry is the public one and currently only @bentoml team has access to push it.
|
||||
_CONTAINER_REGISTRY: dict[LiteralContainerRegistry, str] = {"docker": "docker.io/bentoml/openllm", "gh": "ghcr.io/bentoml/openllm", "ecr": "public.ecr.aws/y5w8i4y6/bentoml/openllm"}
|
||||
_CONTAINER_REGISTRY: dict[LiteralContainerRegistry, str] = {'docker': 'docker.io/bentoml/openllm', 'gh': 'ghcr.io/bentoml/openllm', 'ecr': 'public.ecr.aws/y5w8i4y6/bentoml/openllm'}
|
||||
|
||||
# TODO: support custom fork. Currently it only support openllm main.
|
||||
_OWNER = "bentoml"
|
||||
_REPO = "openllm"
|
||||
_OWNER = 'bentoml'
|
||||
_REPO = 'openllm'
|
||||
|
||||
_module_location = openllm_core.utils.pkg.source_locations("openllm")
|
||||
_module_location = openllm_core.utils.pkg.source_locations('openllm')
|
||||
@functools.lru_cache
|
||||
@openllm_core.utils.apply(str.lower)
|
||||
def get_base_container_name(reg: LiteralContainerRegistry) -> str:
|
||||
@@ -35,23 +35,23 @@ def get_base_container_name(reg: LiteralContainerRegistry) -> str:
|
||||
def _convert_version_from_string(s: str) -> VersionInfo:
|
||||
return VersionInfo.from_version_string(s)
|
||||
def _commit_time_range(r: int = 5) -> str:
|
||||
return (datetime.now(timezone.utc) - timedelta(days=r)).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
return (datetime.now(timezone.utc) - timedelta(days=r)).strftime('%Y-%m-%dT%H:%M:%SZ')
|
||||
class VersionNotSupported(openllm.exceptions.OpenLLMException):
|
||||
"""Raised when the stable release is too low that it doesn't include OpenLLM base container."""
|
||||
_RefTuple: type[RefTuple] = openllm_core.utils.codegen.make_attr_tuple_class("_RefTuple", ["git_hash", "version", "strategy"])
|
||||
_RefTuple: type[RefTuple] = openllm_core.utils.codegen.make_attr_tuple_class('_RefTuple', ['git_hash', 'version', 'strategy'])
|
||||
def nightly_resolver(cls: type[RefResolver]) -> str:
|
||||
# NOTE: all openllm container will have sha-<git_hash[:7]>
|
||||
# This will use docker to run skopeo to determine the correct latest tag that is available
|
||||
# If docker is not found, then fallback to previous behaviour. Which the container might not exists.
|
||||
docker_bin = shutil.which("docker")
|
||||
docker_bin = shutil.which('docker')
|
||||
if docker_bin is None:
|
||||
logger.warning(
|
||||
"To get the correct available nightly container, make sure to have docker available. Fallback to previous behaviour for determine nightly hash (container might not exists due to the lack of GPU machine at a time. See https://github.com/bentoml/OpenLLM/pkgs/container/openllm for available image.)"
|
||||
'To get the correct available nightly container, make sure to have docker available. Fallback to previous behaviour for determine nightly hash (container might not exists due to the lack of GPU machine at a time. See https://github.com/bentoml/OpenLLM/pkgs/container/openllm for available image.)'
|
||||
)
|
||||
commits = t.cast("list[dict[str, t.Any]]", cls._ghapi.repos.list_commits(since=_commit_time_range()))
|
||||
return next(f'sha-{it["sha"][:7]}' for it in commits if "[skip ci]" not in it["commit"]["message"])
|
||||
commits = t.cast('list[dict[str, t.Any]]', cls._ghapi.repos.list_commits(since=_commit_time_range()))
|
||||
return next(f'sha-{it["sha"][:7]}' for it in commits if '[skip ci]' not in it['commit']['message'])
|
||||
# now is the correct behaviour
|
||||
return orjson.loads(subprocess.check_output([docker_bin, "run", "--rm", "-it", "quay.io/skopeo/stable:latest", "list-tags", "docker://ghcr.io/bentoml/openllm"]).decode().strip())["Tags"][-2]
|
||||
return orjson.loads(subprocess.check_output([docker_bin, 'run', '--rm', '-it', 'quay.io/skopeo/stable:latest', 'list-tags', 'docker://ghcr.io/bentoml/openllm']).decode().strip())['Tags'][-2]
|
||||
@attr.attrs(eq=False, order=False, slots=True, frozen=True)
|
||||
class RefResolver:
|
||||
git_hash: str = attr.field()
|
||||
@@ -61,7 +61,7 @@ class RefResolver:
|
||||
|
||||
@classmethod
|
||||
def _nightly_ref(cls) -> RefTuple:
|
||||
return _RefTuple((nightly_resolver(cls), "refs/heads/main", "nightly"))
|
||||
return _RefTuple((nightly_resolver(cls), 'refs/heads/main', 'nightly'))
|
||||
|
||||
@classmethod
|
||||
def _release_ref(cls, version_str: str | None = None) -> RefTuple:
|
||||
@@ -69,80 +69,80 @@ class RefResolver:
|
||||
if version_str is None:
|
||||
# NOTE: This strategy will only support openllm>0.2.12
|
||||
meta: dict[str, t.Any] = cls._ghapi.repos.get_latest_release()
|
||||
version_str = meta["name"].lstrip("v")
|
||||
version: tuple[str, str | None] = (cls._ghapi.git.get_ref(ref=f"tags/{meta['name']}")["object"]["sha"], version_str)
|
||||
version_str = meta['name'].lstrip('v')
|
||||
version: tuple[str, str | None] = (cls._ghapi.git.get_ref(ref=f"tags/{meta['name']}")['object']['sha'], version_str)
|
||||
else:
|
||||
version = ("", version_str)
|
||||
version = ('', version_str)
|
||||
if openllm_core.utils.VersionInfo.from_version_string(t.cast(str, version_str)) < (0, 2, 12):
|
||||
raise VersionNotSupported(f"Version {version_str} doesn't support OpenLLM base container. Consider using 'nightly' or upgrade 'openllm>=0.2.12'")
|
||||
return _RefTuple((*version, "release" if _use_base_strategy else "custom"))
|
||||
return _RefTuple((*version, 'release' if _use_base_strategy else 'custom'))
|
||||
|
||||
@classmethod
|
||||
@functools.lru_cache(maxsize=64)
|
||||
def from_strategy(cls, strategy_or_version: t.Literal["release", "nightly"] | LiteralString | None = None) -> RefResolver:
|
||||
def from_strategy(cls, strategy_or_version: t.Literal['release', 'nightly'] | LiteralString | None = None) -> RefResolver:
|
||||
# using default strategy
|
||||
if strategy_or_version is None or strategy_or_version == "release": return cls(*cls._release_ref())
|
||||
elif strategy_or_version == "latest": return cls("latest", "0.0.0", "latest")
|
||||
elif strategy_or_version == "nightly":
|
||||
if strategy_or_version is None or strategy_or_version == 'release': return cls(*cls._release_ref())
|
||||
elif strategy_or_version == 'latest': return cls('latest', '0.0.0', 'latest')
|
||||
elif strategy_or_version == 'nightly':
|
||||
_ref = cls._nightly_ref()
|
||||
return cls(_ref[0], "0.0.0", _ref[-1])
|
||||
return cls(_ref[0], '0.0.0', _ref[-1])
|
||||
else:
|
||||
logger.warning("Using custom %s. Make sure that it is at lease 0.2.12 for base container support.", strategy_or_version)
|
||||
logger.warning('Using custom %s. Make sure that it is at lease 0.2.12 for base container support.', strategy_or_version)
|
||||
return cls(*cls._release_ref(version_str=strategy_or_version))
|
||||
|
||||
@property
|
||||
def tag(self) -> str:
|
||||
# NOTE: latest tag can also be nightly, but discouraged to use it. For nightly refer to use sha-<git_hash_short>
|
||||
if self.strategy == "latest": return "latest"
|
||||
elif self.strategy == "nightly": return self.git_hash
|
||||
if self.strategy == 'latest': return 'latest'
|
||||
elif self.strategy == 'nightly': return self.git_hash
|
||||
else: return repr(self.version)
|
||||
@functools.lru_cache(maxsize=256)
|
||||
def get_base_container_tag(strategy: LiteralContainerVersionStrategy | None = None) -> str:
|
||||
return RefResolver.from_strategy(strategy).tag
|
||||
def build_container(
|
||||
registries: LiteralContainerRegistry | t.Sequence[LiteralContainerRegistry] | None = None,
|
||||
version_strategy: LiteralContainerVersionStrategy = "release",
|
||||
version_strategy: LiteralContainerVersionStrategy = 'release',
|
||||
push: bool = False,
|
||||
machine: bool = False
|
||||
) -> dict[str | LiteralContainerRegistry, str]:
|
||||
try:
|
||||
if not _BUILDER.health(): raise openllm.exceptions.Error
|
||||
except (openllm.exceptions.Error, subprocess.CalledProcessError):
|
||||
raise RuntimeError("Building base container requires BuildKit (via Buildx) to be installed. See https://docs.docker.com/build/buildx/install/ for instalation instruction.") from None
|
||||
if openllm_core.utils.device_count() == 0: raise RuntimeError("Building base container requires GPUs (None available)")
|
||||
if not shutil.which("nvidia-container-runtime"): raise RuntimeError("NVIDIA Container Toolkit is required to compile CUDA kernel in container.")
|
||||
raise RuntimeError('Building base container requires BuildKit (via Buildx) to be installed. See https://docs.docker.com/build/buildx/install/ for instalation instruction.') from None
|
||||
if openllm_core.utils.device_count() == 0: raise RuntimeError('Building base container requires GPUs (None available)')
|
||||
if not shutil.which('nvidia-container-runtime'): raise RuntimeError('NVIDIA Container Toolkit is required to compile CUDA kernel in container.')
|
||||
if not _module_location: raise RuntimeError("Failed to determine source location of 'openllm'. (Possible broken installation)")
|
||||
pyproject_path = pathlib.Path(_module_location).parent.parent / "pyproject.toml"
|
||||
pyproject_path = pathlib.Path(_module_location).parent.parent / 'pyproject.toml'
|
||||
if not pyproject_path.exists(): raise ValueError("This utility can only be run within OpenLLM git repository. Clone it first with 'git clone https://github.com/bentoml/OpenLLM.git'")
|
||||
if not registries:
|
||||
tags: dict[str | LiteralContainerRegistry, str] = {
|
||||
alias: f"{value}:{get_base_container_tag(version_strategy)}" for alias, value in _CONTAINER_REGISTRY.items()
|
||||
alias: f'{value}:{get_base_container_tag(version_strategy)}' for alias, value in _CONTAINER_REGISTRY.items()
|
||||
} # default to all registries with latest tag strategy
|
||||
else:
|
||||
registries = [registries] if isinstance(registries, str) else list(registries)
|
||||
tags = {name: f"{_CONTAINER_REGISTRY[name]}:{get_base_container_tag(version_strategy)}" for name in registries}
|
||||
tags = {name: f'{_CONTAINER_REGISTRY[name]}:{get_base_container_tag(version_strategy)}' for name in registries}
|
||||
try:
|
||||
outputs = _BUILDER.build(
|
||||
file=pathlib.Path(__file__).parent.joinpath("Dockerfile").resolve().__fspath__(),
|
||||
file=pathlib.Path(__file__).parent.joinpath('Dockerfile').resolve().__fspath__(),
|
||||
context_path=pyproject_path.parent.__fspath__(),
|
||||
tag=tuple(tags.values()),
|
||||
push=push,
|
||||
progress="plain" if openllm_core.utils.get_debug_mode() else "auto",
|
||||
progress='plain' if openllm_core.utils.get_debug_mode() else 'auto',
|
||||
quiet=machine
|
||||
)
|
||||
if machine and outputs is not None: tags["image_sha"] = outputs.decode("utf-8").strip()
|
||||
if machine and outputs is not None: tags['image_sha'] = outputs.decode('utf-8').strip()
|
||||
except Exception as err:
|
||||
raise openllm.exceptions.OpenLLMException(f"Failed to containerize base container images (Scroll up to see error above, or set OPENLLMDEVDEBUG=True for more traceback):\n{err}") from err
|
||||
raise openllm.exceptions.OpenLLMException(f'Failed to containerize base container images (Scroll up to see error above, or set OPENLLMDEVDEBUG=True for more traceback):\n{err}') from err
|
||||
return tags
|
||||
if t.TYPE_CHECKING:
|
||||
CONTAINER_NAMES: dict[LiteralContainerRegistry, str]
|
||||
supported_registries: list[str]
|
||||
|
||||
__all__ = ["CONTAINER_NAMES", "get_base_container_tag", "build_container", "get_base_container_name", "supported_registries", "RefResolver"]
|
||||
__all__ = ['CONTAINER_NAMES', 'get_base_container_tag', 'build_container', 'get_base_container_name', 'supported_registries', 'RefResolver']
|
||||
def __dir__() -> list[str]:
|
||||
return sorted(__all__)
|
||||
def __getattr__(name: str) -> t.Any:
|
||||
if name == "supported_registries": return functools.lru_cache(1)(lambda: list(_CONTAINER_REGISTRY))()
|
||||
elif name == "CONTAINER_NAMES": return _CONTAINER_REGISTRY
|
||||
elif name in __all__: return importlib.import_module("." + name, __name__)
|
||||
else: raise AttributeError(f"{name} does not exists under {__name__}")
|
||||
if name == 'supported_registries': return functools.lru_cache(1)(lambda: list(_CONTAINER_REGISTRY))()
|
||||
elif name == 'CONTAINER_NAMES': return _CONTAINER_REGISTRY
|
||||
elif name in __all__: return importlib.import_module('.' + name, __name__)
|
||||
else: raise AttributeError(f'{name} does not exists under {__name__}')
|
||||
|
||||
Reference in New Issue
Block a user