mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-02-19 07:06:02 -05:00
refactor: focus (#730)
* perf: remove based images Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update changelog Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: move dockerifle to run on release only Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: cleanup unused types Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -1,30 +1,17 @@
|
||||
import logging as _logging
|
||||
import os as _os
|
||||
import pathlib as _pathlib
|
||||
import warnings as _warnings
|
||||
|
||||
import logging as _logging, os as _os, pathlib as _pathlib, warnings as _warnings
|
||||
from openllm_cli import _sdk
|
||||
|
||||
from . import utils as utils
|
||||
|
||||
if utils.DEBUG:
|
||||
utils.set_debug_mode(True)
|
||||
_logging.basicConfig(level=_logging.NOTSET)
|
||||
utils.set_debug_mode(True); _logging.basicConfig(level=_logging.NOTSET)
|
||||
else:
|
||||
# configuration for bitsandbytes before import
|
||||
_os.environ['BITSANDBYTES_NOWELCOME'] = _os.environ.get('BITSANDBYTES_NOWELCOME', '1')
|
||||
# NOTE: The following warnings from bitsandbytes, and probably not that important for users to see when DEBUG is False
|
||||
_warnings.filterwarnings(
|
||||
'ignore', message='MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization'
|
||||
)
|
||||
_warnings.filterwarnings(
|
||||
'ignore', message='MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization'
|
||||
)
|
||||
_warnings.filterwarnings('ignore', message='MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization')
|
||||
_warnings.filterwarnings('ignore', message='MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization')
|
||||
_warnings.filterwarnings('ignore', message='The installed version of bitsandbytes was compiled without GPU support.')
|
||||
# NOTE: ignore the following warning from ghapi as it is not important for users
|
||||
_warnings.filterwarnings(
|
||||
'ignore', message='Neither GITHUB_TOKEN nor GITHUB_JWT_TOKEN found: running as unauthenticated'
|
||||
)
|
||||
_warnings.filterwarnings('ignore', message='Neither GITHUB_TOKEN nor GITHUB_JWT_TOKEN found: running as unauthenticated')
|
||||
|
||||
COMPILED = _pathlib.Path(__file__).suffix in ('.pyd', '.so')
|
||||
|
||||
|
||||
@@ -1,4 +1 @@
|
||||
if __name__ == '__main__':
|
||||
from openllm_cli.entrypoint import cli
|
||||
|
||||
cli()
|
||||
if __name__ == '__main__': from openllm_cli.entrypoint import cli; cli()
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
import transformers
|
||||
|
||||
|
||||
def prepare_logits_processor(config):
|
||||
import transformers
|
||||
generation_config = config.generation_config
|
||||
logits_processor = transformers.LogitsProcessorList()
|
||||
if generation_config['temperature'] >= 1e-5 and generation_config['temperature'] != 1.0:
|
||||
@@ -13,28 +11,16 @@ def prepare_logits_processor(config):
|
||||
if generation_config['top_k'] > 0:
|
||||
logits_processor.append(transformers.TopKLogitsWarper(generation_config['top_k']))
|
||||
return logits_processor
|
||||
|
||||
|
||||
# NOTE: The ordering here is important. Some models have two of these and we have a preference for which value gets used.
|
||||
SEQLEN_KEYS = ['max_sequence_length', 'seq_length', 'max_position_embeddings', 'max_seq_len', 'model_max_length']
|
||||
|
||||
|
||||
def get_context_length(config):
|
||||
rope_scaling = getattr(config, 'rope_scaling', None)
|
||||
rope_scaling_factor = config.rope_scaling['factor'] if rope_scaling else 1.0
|
||||
for key in SEQLEN_KEYS:
|
||||
if getattr(config, key, None) is not None:
|
||||
return int(rope_scaling_factor * getattr(config, key))
|
||||
if getattr(config, key, None) is not None: return int(rope_scaling_factor * getattr(config, key))
|
||||
return 2048
|
||||
|
||||
|
||||
def is_sentence_complete(output):
|
||||
return output.endswith(('.', '?', '!', '...', '。', '?', '!', '…', '"', "'", '”'))
|
||||
|
||||
|
||||
def is_sentence_complete(output): return output.endswith(('.', '?', '!', '...', '。', '?', '!', '…', '"', "'", '”'))
|
||||
def is_partial_stop(output, stop_str):
|
||||
'''Check whether the output contains a partial stop str.'''
|
||||
for i in range(min(len(output), len(stop_str))):
|
||||
if stop_str.startswith(output[-i:]):
|
||||
return True
|
||||
if stop_str.startswith(output[-i:]): return True
|
||||
return False
|
||||
|
||||
@@ -184,7 +184,7 @@ class LLM(t.Generic[M, T], ReprMixin):
|
||||
):
|
||||
torch_dtype = attrs.pop('torch_dtype', None) # backward compatible
|
||||
if torch_dtype is not None:
|
||||
warnings.warns(
|
||||
warnings.warn(
|
||||
'The argument "torch_dtype" is deprecated and will be removed in the future. Please use "dtype" instead.',
|
||||
DeprecationWarning,
|
||||
stacklevel=3,
|
||||
|
||||
@@ -1,14 +1,39 @@
|
||||
import os
|
||||
|
||||
from openllm_core.utils import LazyModule
|
||||
from __future__ import annotations
|
||||
import os, attr, functools
|
||||
from openllm_core._typing_compat import LiteralContainerVersionStrategy
|
||||
from openllm_core.exceptions import OpenLLMException
|
||||
from openllm_core.utils.lazy import VersionInfo, LazyModule
|
||||
|
||||
_OWNER, _REPO = 'bentoml', 'openllm'
|
||||
@attr.attrs(eq=False, order=False, slots=True, frozen=True)
|
||||
class RefResolver:
|
||||
git_hash: str = attr.field()
|
||||
version: VersionInfo = attr.field(converter=lambda s: VersionInfo.from_version_string(s))
|
||||
strategy: LiteralContainerVersionStrategy = attr.field()
|
||||
@classmethod
|
||||
@functools.lru_cache(maxsize=64)
|
||||
def from_strategy(cls, strategy_or_version: LiteralContainerVersionStrategy | None = None) -> RefResolver:
|
||||
# using default strategy
|
||||
if strategy_or_version is None or strategy_or_version == 'release':
|
||||
try:
|
||||
from ghapi.all import GhApi
|
||||
ghapi = GhApi(owner=_OWNER, repo=_REPO, authenticate=False)
|
||||
meta = ghapi.repos.get_latest_release()
|
||||
git_hash = ghapi.git.get_ref(ref=f"tags/{meta['name']}")['object']['sha']
|
||||
except Exception as err:
|
||||
raise OpenLLMException('Failed to determine latest release version.') from err
|
||||
return cls(git_hash, meta['name'].lstrip('v'), 'release')
|
||||
elif strategy_or_version in ('latest', 'nightly'): # latest is nightly
|
||||
return cls('latest', '0.0.0', 'latest')
|
||||
else:
|
||||
raise ValueError(f'Unknown strategy: {strategy_or_version}')
|
||||
@property
|
||||
def tag(self) -> str: return 'latest' if self.strategy in {'latest', 'nightly'} else repr(self.version)
|
||||
__lazy = LazyModule(
|
||||
__name__,
|
||||
os.path.abspath('__file__'),
|
||||
{
|
||||
'_package': ['create_bento', 'build_editable', 'construct_python_options', 'construct_docker_options'],
|
||||
'oci': ['CONTAINER_NAMES', 'supported_registries', 'RefResolver'],
|
||||
},
|
||||
{'_package': ['create_bento', 'build_editable', 'construct_python_options', 'construct_docker_options']},
|
||||
extra_objects={'RefResolver': RefResolver}
|
||||
)
|
||||
__all__ = __lazy.__all__
|
||||
__dir__ = __lazy.__dir__
|
||||
|
||||
@@ -2,7 +2,7 @@ from typing import Optional
|
||||
|
||||
import attr
|
||||
|
||||
from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy
|
||||
from openllm_core._typing_compat import LiteralContainerVersionStrategy
|
||||
from openllm_core.utils.lazy import VersionInfo
|
||||
|
||||
from . import _package as _package, oci as oci
|
||||
@@ -13,9 +13,6 @@ from ._package import (
|
||||
create_bento as create_bento,
|
||||
)
|
||||
|
||||
CONTAINER_NAMES: dict[LiteralContainerRegistry, str] = ...
|
||||
supported_registries: list[str] = ...
|
||||
|
||||
@attr.attrs(eq=False, order=False, slots=True, frozen=True)
|
||||
class RefResolver:
|
||||
git_hash: str
|
||||
@@ -26,7 +23,3 @@ class RefResolver:
|
||||
def from_strategy(cls, strategy_or_version: Optional[LiteralContainerVersionStrategy] = ...) -> RefResolver: ...
|
||||
@property
|
||||
def tag(self) -> str: ...
|
||||
@staticmethod
|
||||
def construct_base_image(
|
||||
reg: LiteralContainerRegistry, strategy: Optional[LiteralContainerVersionStrategy] = ...
|
||||
) -> str: ...
|
||||
|
||||
@@ -14,60 +14,43 @@ from bentoml._internal.bento.build_config import BentoBuildConfig, DockerOptions
|
||||
from bentoml._internal.configuration.containers import BentoMLContainer
|
||||
from openllm_core.utils import SHOW_CODEGEN, check_bool_env, pkg
|
||||
|
||||
from . import oci
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
OPENLLM_DEV_BUILD = 'OPENLLM_DEV_BUILD'
|
||||
|
||||
_service_file = Path(os.path.abspath(__file__)).parent.parent / '_service.py'
|
||||
_SERVICE_VARS = '''import orjson;model_id,model_tag,adapter_map,serialization,trust_remote_code='{__model_id__}','{__model_tag__}',orjson.loads("""{__model_adapter_map__}"""),'{__model_serialization__}',{__model_trust_remote_code__}'''
|
||||
|
||||
def build_editable(path, package='openllm'):
|
||||
'''Build OpenLLM if the OPENLLM_DEV_BUILD environment variable is set.'''
|
||||
if not check_bool_env(OPENLLM_DEV_BUILD, default=False):
|
||||
return None
|
||||
if not check_bool_env(OPENLLM_DEV_BUILD, default=False): return None
|
||||
# We need to build the package in editable mode, so that we can import it
|
||||
# TODO: Upgrade to 1.0.3
|
||||
from build import ProjectBuilder
|
||||
from build.env import IsolatedEnvBuilder
|
||||
|
||||
module_location = pkg.source_locations(package)
|
||||
if not module_location:
|
||||
raise RuntimeError(
|
||||
'Could not find the source location of OpenLLM. Make sure to unset OPENLLM_DEV_BUILD if you are developing OpenLLM.'
|
||||
)
|
||||
if not module_location: raise RuntimeError('Could not find the source location of OpenLLM.')
|
||||
pyproject_path = Path(module_location).parent.parent / 'pyproject.toml'
|
||||
if os.path.isfile(pyproject_path.__fspath__()):
|
||||
logger.info('Generating built wheels for package %s...', package)
|
||||
with IsolatedEnvBuilder() as env:
|
||||
builder = ProjectBuilder(pyproject_path.parent)
|
||||
builder.python_executable = env.executable
|
||||
builder.scripts_dir = env.scripts_dir
|
||||
env.install(builder.build_system_requires)
|
||||
return builder.build('wheel', path, config_settings={'--global-option': '--quiet'})
|
||||
raise RuntimeError(
|
||||
'Custom OpenLLM build is currently not supported. Please install OpenLLM from PyPI or built it from Git source.'
|
||||
)
|
||||
|
||||
|
||||
raise RuntimeError('Please install OpenLLM from PyPI or built it from Git source.')
|
||||
def construct_python_options(llm, llm_fs, extra_dependencies=None, adapter_map=None):
|
||||
packages = ['scipy', 'bentoml[tracing]>=1.1.10'] # apparently bnb misses this one
|
||||
if adapter_map is not None:
|
||||
packages += ['openllm[fine-tune]']
|
||||
if extra_dependencies is not None:
|
||||
packages += [f'openllm[{k}]' for k in extra_dependencies]
|
||||
if llm.config['requirements'] is not None:
|
||||
packages.extend(llm.config['requirements'])
|
||||
wheels = None
|
||||
from . import RefResolver
|
||||
packages = ['scipy', 'bentoml[tracing]>=1.1.10', 'vllm==0.2.2', 'ray==2.6.0', f'openllm>={RefResolver.from_strategy("release").version}'] # apparently bnb misses this one
|
||||
if adapter_map is not None: packages += ['openllm[fine-tune]']
|
||||
if extra_dependencies is not None: packages += [f'openllm[{k}]' for k in extra_dependencies]
|
||||
if llm.config['requirements'] is not None: packages.extend(llm.config['requirements'])
|
||||
built_wheels = [build_editable(llm_fs.getsyspath('/'), p) for p in ('openllm_core', 'openllm_client', 'openllm')]
|
||||
if all(i for i in built_wheels):
|
||||
wheels = [llm_fs.getsyspath(f"/{i.split('/')[-1]}") for i in built_wheels]
|
||||
return PythonOptions(packages=packages, wheels=wheels, lock_packages=True)
|
||||
|
||||
|
||||
def construct_docker_options(
|
||||
llm, _, quantize, adapter_map, dockerfile_template, serialisation, container_registry, container_version_strategy
|
||||
):
|
||||
return PythonOptions(
|
||||
packages=packages,
|
||||
wheels=[llm_fs.getsyspath(f"/{i.split('/')[-1]}") for i in built_wheels] if all(i for i in built_wheels) else None,
|
||||
lock_packages=True
|
||||
)
|
||||
def construct_docker_options(llm, _, quantize, adapter_map, dockerfile_template, serialisation):
|
||||
from openllm_cli.entrypoint import process_environ
|
||||
|
||||
environ = process_environ(
|
||||
llm.config,
|
||||
llm.config['timeout'],
|
||||
@@ -84,37 +67,7 @@ def construct_docker_options(
|
||||
environ['OPENLLM_CONFIG'] = f"'{environ['OPENLLM_CONFIG']}'"
|
||||
environ.pop('BENTOML_HOME', None) # NOTE: irrelevant in container
|
||||
environ['NVIDIA_DRIVER_CAPABILITIES'] = 'compute,utility'
|
||||
return DockerOptions(
|
||||
base_image=oci.RefResolver.construct_base_image(container_registry, container_version_strategy),
|
||||
env=environ,
|
||||
dockerfile_template=dockerfile_template,
|
||||
)
|
||||
|
||||
|
||||
_service_file = Path(os.path.abspath(__file__)).parent.parent / '_service.py'
|
||||
|
||||
_SERVICE_VARS = '''\
|
||||
import orjson;model_id,model_tag,adapter_map,serialization,trust_remote_code='{__model_id__}','{__model_tag__}',orjson.loads("""{__model_adapter_map__}"""),'{__model_serialization__}',{__model_trust_remote_code__}
|
||||
'''
|
||||
|
||||
|
||||
def write_service(llm, llm_fs, adapter_map):
|
||||
logger.debug('Generating service vars %s (dir=%s)', llm.model_id, llm_fs.getsyspath('/'))
|
||||
script = f"# GENERATED BY 'openllm build {llm.model_id}'. DO NOT EDIT\n\n# fmt: off\n" + _SERVICE_VARS.format(
|
||||
__model_id__=llm.model_id,
|
||||
__model_tag__=str(llm.tag),
|
||||
__model_adapter_map__=orjson.dumps(adapter_map).decode(),
|
||||
__model_serialization__=llm.config['serialisation'],
|
||||
__model_trust_remote_code__=str(llm.trust_remote_code),
|
||||
)
|
||||
if SHOW_CODEGEN:
|
||||
logger.info('Generated _service_vars.py:\n%s', script)
|
||||
llm_fs.writetext('_service_vars.py', script)
|
||||
with open(_service_file.__fspath__(), 'r') as f:
|
||||
service_src = f.read()
|
||||
llm_fs.writetext(llm.config['service_name'], service_src)
|
||||
|
||||
|
||||
return DockerOptions(cuda_version='12.1', env=environ, dockerfile_template=dockerfile_template)
|
||||
@inject
|
||||
def create_bento(
|
||||
bento_tag,
|
||||
@@ -125,8 +78,6 @@ def create_bento(
|
||||
adapter_map=None,
|
||||
extra_dependencies=None,
|
||||
serialisation=None,
|
||||
container_registry='ecr',
|
||||
container_version_strategy='release',
|
||||
_bento_store=Provide[BentoMLContainer.bento_store],
|
||||
_model_store=Provide[BentoMLContainer.model_store],
|
||||
):
|
||||
@@ -145,11 +96,21 @@ def create_bento(
|
||||
},
|
||||
}
|
||||
)
|
||||
if adapter_map:
|
||||
labels.update(adapter_map)
|
||||
if adapter_map: labels.update(adapter_map)
|
||||
|
||||
logger.debug("Building Bento '%s' with model backend '%s'", bento_tag, llm.__llm_backend__)
|
||||
# add service.py definition to this temporary folder
|
||||
write_service(llm, llm_fs, adapter_map)
|
||||
logger.debug('Generating service vars %s (dir=%s)', llm.model_id, llm_fs.getsyspath('/'))
|
||||
script = f"# fmt: off\n# GENERATED BY 'openllm build {llm.model_id}'. DO NOT EDIT\n" + _SERVICE_VARS.format(
|
||||
__model_id__=llm.model_id,
|
||||
__model_tag__=str(llm.tag),
|
||||
__model_adapter_map__=orjson.dumps(adapter_map).decode(),
|
||||
__model_serialization__=llm.config['serialisation'],
|
||||
__model_trust_remote_code__=str(llm.trust_remote_code),
|
||||
)
|
||||
if SHOW_CODEGEN: logger.info('Generated _service_vars.py:\n%s', script)
|
||||
llm_fs.writetext('_service_vars.py', script)
|
||||
with open(_service_file.__fspath__(), 'r') as f: service_src = f.read()
|
||||
llm_fs.writetext(llm.config['service_name'], service_src)
|
||||
|
||||
bento = bentoml.Bento.create(
|
||||
version=bento_tag.version,
|
||||
@@ -163,16 +124,7 @@ def create_bento(
|
||||
include=list(llm_fs.walk.files()),
|
||||
exclude=['/venv', '/.venv', '__pycache__/', '*.py[cod]', '*$py.class'],
|
||||
python=construct_python_options(llm, llm_fs, extra_dependencies, adapter_map),
|
||||
docker=construct_docker_options(
|
||||
llm,
|
||||
llm_fs,
|
||||
quantize,
|
||||
adapter_map,
|
||||
dockerfile_template,
|
||||
_serialisation,
|
||||
container_registry,
|
||||
container_version_strategy,
|
||||
),
|
||||
docker=construct_docker_options(llm, llm_fs, quantize, adapter_map, dockerfile_template, _serialisation),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -8,8 +8,6 @@ from bentoml._internal.bento import BentoStore
|
||||
from bentoml._internal.bento.build_config import DockerOptions, PythonOptions
|
||||
from bentoml._internal.models.model import ModelStore
|
||||
from openllm_core._typing_compat import (
|
||||
LiteralContainerRegistry,
|
||||
LiteralContainerVersionStrategy,
|
||||
LiteralQuantise,
|
||||
LiteralSerialisation,
|
||||
M,
|
||||
@@ -32,10 +30,7 @@ def construct_docker_options(
|
||||
adapter_map: Optional[Dict[str, str]],
|
||||
dockerfile_template: Optional[str],
|
||||
serialisation: LiteralSerialisation,
|
||||
container_registry: LiteralContainerRegistry,
|
||||
container_version_strategy: LiteralContainerVersionStrategy,
|
||||
) -> DockerOptions: ...
|
||||
def write_service(llm: LLM[M, T], llm_fs: FS, adapter_map: Optional[Dict[str, str]]) -> None: ...
|
||||
def create_bento(
|
||||
bento_tag: Tag,
|
||||
llm_fs: FS,
|
||||
@@ -45,8 +40,6 @@ def create_bento(
|
||||
adapter_map: Optional[Dict[str, str]] = ...,
|
||||
extra_dependencies: Optional[Tuple[str, ...]] = ...,
|
||||
serialisation: Optional[LiteralSerialisation] = ...,
|
||||
container_registry: LiteralContainerRegistry = ...,
|
||||
container_version_strategy: LiteralContainerVersionStrategy = ...,
|
||||
_bento_store: BentoStore = ...,
|
||||
_model_store: ModelStore = ...,
|
||||
) -> Bento: ...
|
||||
|
||||
@@ -1,45 +0,0 @@
|
||||
# syntax=docker/dockerfile-upstream:master
|
||||
# Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
|
||||
FROM nvidia/cuda:12.1.0-base-ubuntu22.04 as base-container
|
||||
|
||||
# Automatically set by buildx
|
||||
ARG TARGETPLATFORM
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
ca-certificates \
|
||||
ccache \
|
||||
curl \
|
||||
libssl-dev ca-certificates make \
|
||||
git python3-pip && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN mkdir -p /openllm-python
|
||||
RUN mkdir -p /openllm-core
|
||||
RUN mkdir -p /openllm-client
|
||||
|
||||
# Install required dependencies
|
||||
COPY openllm-python/src /openllm-python/src
|
||||
COPY hatch.toml README.md CHANGELOG.md openllm-python/pyproject.toml /openllm-python/
|
||||
|
||||
# Install all required dependencies
|
||||
# We have to install autoawq first to avoid conflict with torch, then reinstall torch with vllm
|
||||
# below
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
pip3 install -v --no-cache-dir \
|
||||
"ray==2.6.0" "vllm==0.2.2" xformers && \
|
||||
pip3 install --no-cache-dir -e /openllm-python/
|
||||
|
||||
COPY openllm-core/src openllm-core/src
|
||||
COPY hatch.toml README.md CHANGELOG.md openllm-core/pyproject.toml /openllm-core/
|
||||
RUN --mount=type=cache,target=/root/.cache/pip pip3 install -v --no-cache-dir -e /openllm-core/
|
||||
|
||||
COPY openllm-client/src openllm-client/src
|
||||
COPY hatch.toml README.md CHANGELOG.md openllm-client/pyproject.toml /openllm-client/
|
||||
RUN --mount=type=cache,target=/root/.cache/pip pip3 install -v --no-cache-dir -e /openllm-client/
|
||||
|
||||
FROM base-container
|
||||
|
||||
ENTRYPOINT ["python3", "-m", "openllm"]
|
||||
@@ -1,82 +0,0 @@
|
||||
from __future__ import annotations
|
||||
import functools
|
||||
import importlib
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
|
||||
import attr
|
||||
|
||||
from openllm_core._typing_compat import LiteralContainerVersionStrategy
|
||||
from openllm_core.exceptions import OpenLLMException
|
||||
from openllm_core.utils.lazy import VersionInfo
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
ROOT_DIR = pathlib.Path(os.path.abspath('__file__')).parent.parent.parent
|
||||
|
||||
_CONTAINER_REGISTRY = {
|
||||
'docker': 'docker.io/bentoml/openllm',
|
||||
'gh': 'ghcr.io/bentoml/openllm',
|
||||
'ecr': 'public.ecr.aws/y5w8i4y6/bentoml/openllm',
|
||||
}
|
||||
|
||||
# TODO: support custom fork. Currently it only support openllm main.
|
||||
_OWNER, _REPO = 'bentoml', 'openllm'
|
||||
|
||||
|
||||
@attr.attrs(eq=False, order=False, slots=True, frozen=True)
|
||||
class RefResolver:
|
||||
git_hash: str = attr.field()
|
||||
version: VersionInfo = attr.field(converter=lambda s: VersionInfo.from_version_string(s))
|
||||
strategy: LiteralContainerVersionStrategy = attr.field()
|
||||
|
||||
@classmethod
|
||||
@functools.lru_cache(maxsize=64)
|
||||
def from_strategy(cls, strategy_or_version=None):
|
||||
# using default strategy
|
||||
if strategy_or_version is None or strategy_or_version == 'release':
|
||||
try:
|
||||
from ghapi.all import GhApi
|
||||
|
||||
ghapi = GhApi(owner=_OWNER, repo=_REPO, authenticate=False)
|
||||
meta = ghapi.repos.get_latest_release()
|
||||
git_hash = ghapi.git.get_ref(ref=f"tags/{meta['name']}")['object']['sha']
|
||||
except Exception as err:
|
||||
raise OpenLLMException('Failed to determine latest release version.') from err
|
||||
return cls(git_hash=git_hash, version=meta['name'].lstrip('v'), strategy='release')
|
||||
elif strategy_or_version in ('latest', 'nightly'): # latest is nightly
|
||||
return cls(git_hash='latest', version='0.0.0', strategy='latest')
|
||||
else:
|
||||
raise ValueError(f'Unknown strategy: {strategy_or_version}')
|
||||
|
||||
@property
|
||||
def tag(self):
|
||||
return 'latest' if self.strategy in {'latest', 'nightly'} else repr(self.version)
|
||||
|
||||
@staticmethod
|
||||
def construct_base_image(reg, strategy=None):
|
||||
if reg == 'gh':
|
||||
logger.warning("Setting base registry to 'gh' will affect cold start performance on GCP/AWS.")
|
||||
elif reg == 'docker':
|
||||
logger.warning('docker is base image is yet to be supported. Falling back to "ecr".')
|
||||
reg = 'ecr'
|
||||
return f'{_CONTAINER_REGISTRY[reg]}:{RefResolver.from_strategy(strategy).tag}'
|
||||
|
||||
|
||||
__all__ = ['CONTAINER_NAMES', 'RefResolver', 'supported_registries']
|
||||
|
||||
|
||||
def __dir__():
|
||||
return sorted(__all__)
|
||||
|
||||
|
||||
def __getattr__(name):
|
||||
if name == 'supported_registries':
|
||||
return functools.lru_cache(1)(lambda: list(_CONTAINER_REGISTRY))()
|
||||
elif name == 'CONTAINER_NAMES':
|
||||
return _CONTAINER_REGISTRY
|
||||
elif name in __all__:
|
||||
return importlib.import_module('.' + name, __name__)
|
||||
else:
|
||||
raise AttributeError(f'{name} does not exists under {__name__}')
|
||||
Reference in New Issue
Block a user