refactor: delete unused code (#716)

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
Aaron Pham
2023-11-21 04:39:48 -05:00
committed by GitHub
parent a8a9f154ce
commit c33b071ee4
28 changed files with 149 additions and 957 deletions

View File

@@ -3,14 +3,12 @@ import os as _os
import pathlib as _pathlib
import warnings as _warnings
import openllm_cli as _cli
from openllm_cli import _sdk
from . import utils as utils
if utils.DEBUG:
utils.set_debug_mode(True)
utils.set_quiet_mode(False)
_logging.basicConfig(level=_logging.NOTSET)
else:
# configuration for bitsandbytes before import
@@ -47,18 +45,9 @@ __lazy = utils.LazyModule(
'serialisation': ['ggml', 'transformers'],
'_quantisation': ['infer_quantisation_config'],
'_llm': ['LLM'],
'_generation': [
'StopSequenceCriteria',
'StopOnTokens',
'prepare_logits_processor',
'get_context_length',
'is_sentence_complete',
'is_partial_stop',
],
},
extra_objects={
'COMPILED': COMPILED,
'cli': _cli,
'start': _sdk.start,
'start_grpc': _sdk.start_grpc,
'build': _sdk.build,

View File

@@ -16,7 +16,6 @@ from openlm_core.config import CONFIG_MAPPING as CONFIG_MAPPING,CONFIG_MAPPING_N
# update-config-stubs.py: import stubs stop
# fmt: on
import openllm_cli as _cli
from openllm_cli._sdk import (
build as build,
import_model as import_model,
@@ -44,14 +43,6 @@ from . import (
utils as utils,
)
from ._deprecated import Runner as Runner
from ._generation import (
StopOnTokens as StopOnTokens,
StopSequenceCriteria as StopSequenceCriteria,
prepare_logits_processor as prepare_logits_processor,
is_partial_stop as is_partial_stop,
is_sentence_complete as is_sentence_complete,
get_context_length as get_context_length,
)
from ._llm import LLM as LLM
from ._quantisation import infer_quantisation_config as infer_quantisation_config
from ._strategies import CascadingResourceStrategy as CascadingResourceStrategy, get_resource as get_resource
@@ -60,5 +51,4 @@ from .entrypoints import mount_entrypoints as mount_entrypoints
from .protocol import openai as openai
from .serialisation import ggml as ggml, transformers as transformers
cli = _cli
COMPILED: bool = ...

View File

@@ -1,13 +1,2 @@
"""CLI entrypoint for OpenLLM.
Usage:
openllm --help
To start any OpenLLM model:
openllm start <model_name> --options ...
"""
if __name__ == '__main__':
from openllm_cli.entrypoint import cli
cli()
# fmt: off
if __name__ == '__main__':from openllm_cli.entrypoint import cli;cli() # noqa

View File

@@ -6,7 +6,10 @@ import warnings
import openllm
from openllm_core._typing_compat import LiteralBackend, ParamSpec
from openllm_core.utils import first_not_none, is_vllm_available
from openllm_core.utils import first_not_none, getenv, is_vllm_available
if t.TYPE_CHECKING:
from ._runners import Runner as _Runner
P = ParamSpec('P')
@@ -20,7 +23,7 @@ def Runner(
backend: LiteralBackend | None = None,
llm_config: openllm.LLMConfig | None = None,
**attrs: t.Any,
) -> openllm.LLMRunner[t.Any, t.Any]:
) -> _Runner[t.Any, t.Any]:
"""Create a Runner for given LLM. For a list of currently supported LLM, check out 'openllm models'.
> [!WARNING]
@@ -73,9 +76,9 @@ def Runner(
attrs.update(
{
'model_id': model_id,
'quantize': os.getenv('OPENLLM_QUANTIZE', attrs.get('quantize', None)),
'serialisation': first_not_none(
attrs.get('serialisation'), os.environ.get('OPENLLM_SERIALIZATION'), default=llm_config['serialisation']
'quantize': getenv('QUANTIZE', var=['QUANTISE'], default=attrs.get('quantize', None)),
'serialisation': getenv(
'serialization', default=attrs.get('serialisation', llm_config['serialisation']), var=['SERIALISATION']
),
}
)

View File

@@ -1,23 +1,6 @@
import transformers
class StopSequenceCriteria(transformers.StoppingCriteria):
def __init__(self, stop_sequences, tokenizer):
if isinstance(stop_sequences, str):
stop_sequences = [stop_sequences]
self.stop_sequences, self.tokenizer = stop_sequences, tokenizer
def __call__(self, input_ids, scores, **kwargs):
return any(
self.tokenizer.decode(input_ids.tolist()[0]).endswith(stop_sequence) for stop_sequence in self.stop_sequences
)
class StopOnTokens(transformers.StoppingCriteria):
def __call__(self, input_ids, scores, **kwargs):
return input_ids[0][-1] in {50278, 50279, 50277, 1, 0}
def prepare_logits_processor(config):
generation_config = config.generation_config
logits_processor = transformers.LogitsProcessorList()

View File

@@ -1,27 +1,7 @@
from typing import Any, List, Union
from torch import FloatTensor, LongTensor
from transformers import (
LogitsProcessorList,
PretrainedConfig,
PreTrainedTokenizer,
PreTrainedTokenizerBase,
PreTrainedTokenizerFast,
)
from transformers import LogitsProcessorList, PretrainedConfig
from openllm_core import LLMConfig
Tokenizer = Union[PreTrainedTokenizerBase, PreTrainedTokenizer, PreTrainedTokenizerFast]
class StopSequenceCriteria:
stop_sequences: List[str]
tokenizer: Tokenizer
def __init__(self, stop_sequences: Union[str, List[str]], tokenizer: Tokenizer) -> None: ...
def __call__(self, input_ids: LongTensor, scores: FloatTensor, **kwargs: Any) -> bool: ...
class StopOnTokens:
def __call__(self, input_ids: LongTensor, scores: FloatTensor, **kwargs: Any) -> bool: ...
def prepare_logits_processor(config: LLMConfig) -> LogitsProcessorList: ...
def get_context_length(config: PretrainedConfig) -> int: ...
def is_sentence_complete(output: str) -> bool: ...

View File

@@ -71,9 +71,7 @@ def normalise_model_name(name: str) -> str:
def _resolve_peft_config_type(adapter_map: dict[str, str]) -> AdapterMap:
if not is_peft_available():
raise RuntimeError(
"LoRA adapter requires 'peft' to be installed. Make sure to do 'pip install \"openllm[fine-tune]\"'"
)
raise RuntimeError("Requires 'peft' to be installed. Do 'pip install \"openllm[fine-tune]\"'")
from huggingface_hub import hf_hub_download
resolved: AdapterMap = {}
@@ -285,8 +283,6 @@ class LLM(t.Generic[M, T], ReprMixin):
if env is not None:return str(env).upper() in ENV_VARS_TRUE_VALUES
return self.__llm_trust_remote_code__
@property
def runner_name(self):return f"llm-{self.config['start_name']}-runner"
@property
def model_id(self):return self._model_id
@property
def revision(self):return self._revision

View File

@@ -97,8 +97,6 @@ class LLM(Generic[M, T]):
@property
def trust_remote_code(self) -> bool: ...
@property
def runner_name(self) -> str: ...
@property
def model_id(self) -> str: ...
@property
def revision(self) -> str: ...

View File

@@ -9,7 +9,6 @@ import torch
import bentoml
import openllm
from openllm_core._schemas import CompletionChunk, GenerationOutput, SampleLogprobs
from openllm_core.exceptions import OpenLLMException
from openllm_core.utils import ReprMixin, is_ctranslate_available, is_vllm_available
__all__ = ['runner']
@@ -28,12 +27,10 @@ def registry(cls=None, *, alias=None):
def runner(llm: openllm.LLM):
from ._strategies import CascadingResourceStrategy
try:
models = [llm.bentomodel]
except bentoml.exceptions.NotFound as err:
raise RuntimeError(f'Failed to locate {llm.bentomodel}:{err}') from err
assert llm.bentomodel
except (bentoml.exceptions.NotFound, AssertionError) as err:
raise RuntimeError(f'Failed to locate {llm.bentomodel}: {err}') from err
return types.new_class(
llm.config.__class__.__name__[:-6] + 'Runner',
@@ -73,9 +70,9 @@ def runner(llm: openllm.LLM):
),
)(
_registry[llm.__llm_backend__],
name=llm.runner_name,
models=models,
scheduling_strategy=CascadingResourceStrategy,
name=f"llm-{llm.config['start_name']}-runner",
models=[llm.bentomodel],
scheduling_strategy=openllm.CascadingResourceStrategy,
runnable_init_params={'llm': llm},
)
@@ -87,7 +84,7 @@ class CTranslateRunnable(bentoml.Runnable):
def __init__(self, llm):
if not is_ctranslate_available():
raise OpenLLMException('ctranslate is not installed. Please install it with `pip install "openllm[ctranslate]"`')
raise openllm.exceptions.OpenLLMException('ctranslate is not installed. Do `pip install "openllm[ctranslate]"`')
self.llm, self.config, self.model, self.tokenizer = llm, llm.config, llm.model, llm.tokenizer
@bentoml.Runnable.method(batchable=False)
@@ -137,7 +134,7 @@ class vLLMRunnable(bentoml.Runnable):
def __init__(self, llm):
if not is_vllm_available():
raise OpenLLMException('vLLM is not installed. Please install it via `pip install "openllm[vllm]"`.')
raise openllm.exceptions.OpenLLMException('vLLM is not installed. Do `pip install "openllm[vllm]"`.')
import vllm
self.llm, self.config, self.tokenizer = llm, llm.config, llm.tokenizer
@@ -162,7 +159,9 @@ class vLLMRunnable(bentoml.Runnable):
)
except Exception as err:
traceback.print_exc()
raise OpenLLMException(f'Failed to initialise vLLMEngine due to the following error:\n{err}') from err
raise openllm.exceptions.OpenLLMException(
f'Failed to initialise vLLMEngine due to the following error:\n{err}'
) from err
@bentoml.Runnable.method(batchable=False)
async def generate_iterator(self, prompt_token_ids, request_id, stop=None, adapter_name=None, **attrs):

View File

@@ -14,22 +14,13 @@ import psutil
import bentoml
from bentoml._internal.resource import get_resource, system_resources
from bentoml._internal.runner.strategy import THREAD_ENVS
from openllm_core._typing_compat import overload
from openllm_core.utils import DEBUG, ReprMixin
class DynResource(t.Protocol):
resource_id: t.ClassVar[str]
@classmethod
def from_system(cls) -> t.Sequence[t.Any]: ...
logger = logging.getLogger(__name__)
def _strtoul(s: str) -> int:
"""Return -1 or positive integer sequence string starts with,."""
# Return -1 or positive integer sequence string starts with.
if not s:
return -1
idx = 0
@@ -55,21 +46,6 @@ def _parse_list_with_prefix(lst: str, prefix: str) -> list[str]:
return rcs
_STACK_LEVEL = 3
@overload # variant: default callback
def _parse_visible_devices() -> list[str] | None: ...
@overload # variant: specify None, and respect_env
def _parse_visible_devices(default_var: None, *, respect_env: t.Literal[True]) -> list[str] | None: ...
@overload # variant: default var is something other than None
def _parse_visible_devices(default_var: str = ..., *, respect_env: t.Literal[False]) -> list[str]: ...
def _parse_visible_devices(default_var: str | None = None, respect_env: bool = True) -> list[str] | None:
"""CUDA_VISIBLE_DEVICES aware with default var for parsing spec."""
if respect_env:
@@ -101,146 +77,136 @@ def _parse_visible_devices(default_var: str | None = None, respect_env: bool = T
return [str(i) for i in rc]
def _from_system(cls: type[DynResource]) -> list[str]:
visible_devices = _parse_visible_devices()
if visible_devices is None:
if cls.resource_id == 'amd.com/gpu':
if not psutil.LINUX:
if DEBUG:
logger.debug('AMD GPUs is currently only supported on Linux.')
return []
# ROCm does not currently have the rocm_smi wheel.
# So we need to use the ctypes bindings directly.
# we don't want to use CLI because parsing is a pain.
sys.path.append('/opt/rocm/libexec/rocm_smi')
try:
from ctypes import byref, c_uint32
# refers to https://github.com/RadeonOpenCompute/rocm_smi_lib/blob/master/python_smi_tools/rsmiBindings.py
from rsmiBindings import rocmsmi, rsmi_status_t
device_count = c_uint32(0)
ret = rocmsmi.rsmi_num_monitor_devices(byref(device_count))
if ret == rsmi_status_t.RSMI_STATUS_SUCCESS:
return [str(i) for i in range(device_count.value)]
return []
# In this case the binary is not found, returning empty list
except (ModuleNotFoundError, ImportError):
return []
finally:
sys.path.remove('/opt/rocm/libexec/rocm_smi')
else:
try:
from cuda import cuda
cuda.cuInit(0)
_, dev = cuda.cuDeviceGetCount()
return [str(i) for i in range(dev)]
except (ImportError, RuntimeError, AttributeError):
return []
return visible_devices
@overload
def _from_spec(cls: type[DynResource], spec: int) -> list[str]: ...
@overload
def _from_spec(cls: type[DynResource], spec: list[int | str]) -> list[str]: ...
@overload
def _from_spec(cls: type[DynResource], spec: str) -> list[str]: ...
def _from_spec(cls: type[DynResource], spec: t.Any) -> list[str]:
if isinstance(spec, int):
if spec in (-1, 0):
return []
if spec < -1:
raise ValueError('Spec cannot be < -1.')
return [str(i) for i in range(spec)]
elif isinstance(spec, str):
if not spec:
return []
if spec.isdigit():
spec = ','.join([str(i) for i in range(_strtoul(spec))])
return _parse_visible_devices(spec, respect_env=False)
elif isinstance(spec, list):
return [str(x) for x in spec]
else:
raise TypeError(
f"'{cls.__name__}.from_spec' only supports parsing spec of type int, str, or list, got '{type(spec)}' instead."
)
def _raw_device_uuid_nvml() -> list[str] | None:
from ctypes import CDLL, byref, c_int, c_void_p, create_string_buffer
try:
nvml_h = CDLL('libnvidia-ml.so.1')
except Exception:
warnings.warn('Failed to find nvidia binding', stacklevel=_STACK_LEVEL)
warnings.warn('Failed to find nvidia binding', stacklevel=3)
return None
rc = nvml_h.nvmlInit()
if rc != 0:
warnings.warn("Can't initialize NVML", stacklevel=_STACK_LEVEL)
warnings.warn("Can't initialize NVML", stacklevel=3)
return None
dev_count = c_int(-1)
rc = nvml_h.nvmlDeviceGetCount_v2(byref(dev_count))
if rc != 0:
warnings.warn('Failed to get available device from system.', stacklevel=_STACK_LEVEL)
warnings.warn('Failed to get available device from system.', stacklevel=3)
return None
uuids: list[str] = []
for idx in range(dev_count.value):
dev_id = c_void_p()
rc = nvml_h.nvmlDeviceGetHandleByIndex_v2(idx, byref(dev_id))
if rc != 0:
warnings.warn(f'Failed to get device handle for {idx}', stacklevel=_STACK_LEVEL)
warnings.warn(f'Failed to get device handle for {idx}', stacklevel=3)
return None
buf_len = 96
buf = create_string_buffer(buf_len)
rc = nvml_h.nvmlDeviceGetUUID(dev_id, buf, buf_len)
if rc != 0:
warnings.warn(f'Failed to get device UUID for {idx}', stacklevel=_STACK_LEVEL)
warnings.warn(f'Failed to get device UUID for {idx}', stacklevel=3)
return None
uuids.append(buf.raw.decode('ascii').strip('\0'))
del nvml_h
return uuids
def _validate(cls: type[DynResource], val: list[t.Any]) -> None:
if cls.resource_id == 'amd.com/gpu':
raise RuntimeError(
"AMD GPU validation is not yet supported. Make sure to call 'get_resource(..., validate=False)'"
)
if not all(isinstance(i, str) for i in val):
raise ValueError('Input list should be all string type.')
class _ResourceMixin:
@staticmethod
def from_system(cls) -> list[str]:
visible_devices = _parse_visible_devices()
if visible_devices is None:
if cls.resource_id == 'amd.com/gpu':
if not psutil.LINUX:
if DEBUG:
logger.debug('AMD GPUs is currently only supported on Linux.')
return []
# ROCm does not currently have the rocm_smi wheel.
# So we need to use the ctypes bindings directly.
# we don't want to use CLI because parsing is a pain.
sys.path.append('/opt/rocm/libexec/rocm_smi')
try:
from ctypes import byref, c_uint32
try:
from cuda import cuda
# refers to https://github.com/RadeonOpenCompute/rocm_smi_lib/blob/master/python_smi_tools/rsmiBindings.py
from rsmiBindings import rocmsmi, rsmi_status_t
err, *_ = cuda.cuInit(0)
if err != cuda.CUresult.CUDA_SUCCESS:
raise RuntimeError('Failed to initialise CUDA runtime binding.')
# correctly parse handle
for el in val:
if el.startswith(('GPU-', 'MIG-')):
uuids = _raw_device_uuid_nvml()
if uuids is None:
raise ValueError('Failed to parse available GPUs UUID')
if el not in uuids:
raise ValueError(f'Given UUID {el} is not found with available UUID (available: {uuids})')
elif el.isdigit():
err, _ = cuda.cuDeviceGet(int(el))
if err != cuda.CUresult.CUDA_SUCCESS:
raise ValueError(f'Failed to get device {el}')
except (ImportError, RuntimeError):
pass
device_count = c_uint32(0)
ret = rocmsmi.rsmi_num_monitor_devices(byref(device_count))
if ret == rsmi_status_t.RSMI_STATUS_SUCCESS:
return [str(i) for i in range(device_count.value)]
return []
# In this case the binary is not found, returning empty list
except (ModuleNotFoundError, ImportError):
return []
finally:
sys.path.remove('/opt/rocm/libexec/rocm_smi')
else:
try:
from cuda import cuda
cuda.cuInit(0)
_, dev = cuda.cuDeviceGetCount()
return [str(i) for i in range(dev)]
except (ImportError, RuntimeError, AttributeError):
return []
return visible_devices
@staticmethod
def from_spec(cls, spec) -> list[str]:
if isinstance(spec, int):
if spec in (-1, 0):
return []
if spec < -1:
raise ValueError('Spec cannot be < -1.')
return [str(i) for i in range(spec)]
elif isinstance(spec, str):
if not spec:
return []
if spec.isdigit():
spec = ','.join([str(i) for i in range(_strtoul(spec))])
return _parse_visible_devices(spec, respect_env=False)
elif isinstance(spec, list):
return [str(x) for x in spec]
else:
raise TypeError(
f"'{cls.__name__}.from_spec' only supports parsing spec of type int, str, or list, got '{type(spec)}' instead."
)
@staticmethod
def validate(cls, val: list[t.Any]) -> None:
if cls.resource_id == 'amd.com/gpu':
raise RuntimeError(
"AMD GPU validation is not yet supported. Make sure to call 'get_resource(..., validate=False)'"
)
if not all(isinstance(i, str) for i in val):
raise ValueError('Input list should be all string type.')
try:
from cuda import cuda
err, *_ = cuda.cuInit(0)
if err != cuda.CUresult.CUDA_SUCCESS:
raise RuntimeError('Failed to initialise CUDA runtime binding.')
# correctly parse handle
for el in val:
if el.startswith(('GPU-', 'MIG-')):
uuids = _raw_device_uuid_nvml()
if uuids is None:
raise ValueError('Failed to parse available GPUs UUID')
if el not in uuids:
raise ValueError(f'Given UUID {el} is not found with available UUID (available: {uuids})')
elif el.isdigit():
err, _ = cuda.cuDeviceGet(int(el))
if err != cuda.CUresult.CUDA_SUCCESS:
raise ValueError(f'Failed to get device {el}')
except (ImportError, RuntimeError):
pass
def _make_resource_class(name: str, resource_kind: str, docstring: str) -> type[DynResource]:
def _make_resource_class(name: str, resource_kind: str, docstring: str) -> type[bentoml.Resource[t.List[str]]]:
return types.new_class(
name,
(bentoml.Resource[t.List[str]], ReprMixin),
@@ -248,9 +214,9 @@ def _make_resource_class(name: str, resource_kind: str, docstring: str) -> type[
lambda ns: ns.update(
{
'resource_id': resource_kind,
'from_spec': classmethod(_from_spec),
'from_system': classmethod(_from_system),
'validate': classmethod(_validate),
'from_spec': classmethod(_ResourceMixin.from_spec),
'from_system': classmethod(_ResourceMixin.from_system),
'validate': classmethod(_ResourceMixin.validate),
'__repr_keys__': property(lambda _: {'resource_id'}),
'__doc__': inspect.cleandoc(docstring),
'__module__': 'openllm._strategies',
@@ -259,15 +225,9 @@ def _make_resource_class(name: str, resource_kind: str, docstring: str) -> type[
)
# NOTE: we need to hint these t.Literal since mypy is to dumb to infer this as literal 🤦
_TPU_RESOURCE: t.Literal['cloud-tpus.google.com/v2'] = 'cloud-tpus.google.com/v2'
_AMD_GPU_RESOURCE: t.Literal['amd.com/gpu'] = 'amd.com/gpu'
_NVIDIA_GPU_RESOURCE: t.Literal['nvidia.com/gpu'] = 'nvidia.com/gpu'
_CPU_RESOURCE: t.Literal['cpu'] = 'cpu'
NvidiaGpuResource = _make_resource_class(
'NvidiaGpuResource',
_NVIDIA_GPU_RESOURCE,
'nvidia.com/gpu',
"""NVIDIA GPU resource.
This is a modified version of internal's BentoML's NvidiaGpuResource
@@ -275,7 +235,7 @@ NvidiaGpuResource = _make_resource_class(
)
AmdGpuResource = _make_resource_class(
'AmdGpuResource',
_AMD_GPU_RESOURCE,
'amd.com/gpu',
"""AMD GPU resource.
Since ROCm will respect CUDA_VISIBLE_DEVICES, the behaviour of from_spec, from_system are similar to

View File

@@ -1,9 +1,6 @@
# fmt: off
import openllm_client as _client
def __dir__():
return sorted(dir(_client))
def __getattr__(it):
return getattr(_client, it)
def __dir__():return sorted(dir(_client))
def __getattr__(it):return getattr(_client, it)

View File

@@ -1,75 +0,0 @@
from __future__ import annotations
import contextlib
import logging
import shutil
import subprocess
import typing as t
import bentoml
import openllm
if t.TYPE_CHECKING:
from openllm_core._typing_compat import LiteralBackend, LiteralQuantise
logger = logging.getLogger(__name__)
@contextlib.contextmanager
def build_bento(
model: str, model_id: str | None = None, quantize: LiteralQuantise | None = None, cleanup: bool = False
) -> t.Iterator[bentoml.Bento]:
logger.info('Building BentoML for %s', model)
bento = openllm.build(model, model_id=model_id, quantize=quantize)
yield bento
if cleanup:
logger.info('Deleting %s', bento.tag)
bentoml.bentos.delete(bento.tag)
@contextlib.contextmanager
def build_container(
bento: bentoml.Bento | str | bentoml.Tag, image_tag: str | None = None, cleanup: bool = False, **attrs: t.Any
) -> t.Iterator[str]:
if isinstance(bento, bentoml.Bento):
bento_tag = bento.tag
else:
bento_tag = bentoml.Tag.from_taglike(bento)
if image_tag is None:
image_tag = str(bento_tag)
executable = shutil.which('docker')
if not executable:
raise RuntimeError('docker executable not found')
try:
logger.info('Building container for %s', bento_tag)
bentoml.container.build(bento_tag, backend='docker', image_tag=(image_tag,), progress='plain', **attrs)
yield image_tag
finally:
if cleanup:
logger.info('Deleting container %s', image_tag)
subprocess.check_output([executable, 'rmi', '-f', image_tag])
@contextlib.contextmanager
def prepare(
model: str,
model_id: str,
backend: LiteralBackend = 'pt',
deployment_mode: t.Literal['container', 'local'] = 'local',
clean_context: contextlib.ExitStack | None = None,
cleanup: bool = True,
) -> t.Iterator[str]:
if clean_context is None:
clean_context = contextlib.ExitStack()
cleanup = True
llm = openllm.LLM[t.Any, t.Any](model_id, backend=backend)
bento_tag = bentoml.Tag.from_taglike(f'{llm.llm_type}-service:{llm.tag.version}')
if not bentoml.list(bento_tag):
bento = clean_context.enter_context(build_bento(model, model_id=model_id, cleanup=cleanup))
else:
bento = bentoml.get(bento_tag)
container_name = f'openllm-{model}-{llm.llm_type}'.replace('-', '_')
if deployment_mode == 'container':
container_name = clean_context.enter_context(build_container(bento, image_tag=container_name, cleanup=cleanup))
yield container_name
if cleanup:
clean_context.close()

View File

@@ -44,7 +44,6 @@ from openllm_core.utils import (
is_transformers_available as is_transformers_available,
is_vllm_available as is_vllm_available,
lenient_issubclass as lenient_issubclass,
reserve_free_port as reserve_free_port,
resolve_filepath as resolve_filepath,
resolve_user_filepath as resolve_user_filepath,
serde as serde,