mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-04-29 03:13:44 -04:00
feat(ctranslate): initial infrastructure support (#694)
* perf: compact and improve speed and agility Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * --wip-- Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: cleanup infrastructure Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update styles notes and autogen mypy configuration Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -42,6 +42,7 @@ from openllm_core.utils import (
|
||||
generate_hash_from_file,
|
||||
get_disable_warnings,
|
||||
get_quiet_mode,
|
||||
getenv,
|
||||
is_peft_available,
|
||||
is_vllm_available,
|
||||
resolve_filepath,
|
||||
@@ -52,6 +53,7 @@ from .exceptions import ForbiddenAttributeError, OpenLLMException
|
||||
from .serialisation.constants import PEFT_CONFIG_NAME
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import torch
|
||||
import transformers
|
||||
from peft.config import PeftConfig
|
||||
|
||||
@@ -109,8 +111,8 @@ def _torch_dtype_mapping():
|
||||
|
||||
return {
|
||||
'half': torch.float16,
|
||||
'float16': torch.float16,
|
||||
'float': torch.float32,
|
||||
'float16': torch.float16,
|
||||
'float32': torch.float32,
|
||||
'bfloat16': torch.bfloat16,
|
||||
}
|
||||
@@ -132,7 +134,8 @@ class LLM(t.Generic[M, T], ReprMixin):
|
||||
_prompt_template: PromptTemplate | None
|
||||
_system_message: str | None
|
||||
|
||||
__llm_torch_dtype__: LiteralDtype | t.Literal['auto', 'half', 'float'] = 'auto'
|
||||
__llm_dtype__: LiteralDtype | t.Literal['auto', 'half', 'float'] = 'auto'
|
||||
__llm_torch_dtype__: 'torch.dtype' = None
|
||||
__llm_config__: LLMConfig | None = None
|
||||
__llm_backend__: LiteralBackend = None # type: ignore
|
||||
__llm_quantization_config__: transformers.BitsAndBytesConfig | transformers.GPTQConfig | transformers.AwqConfig | None = None
|
||||
@@ -158,16 +161,23 @@ class LLM(t.Generic[M, T], ReprMixin):
|
||||
serialisation='safetensors',
|
||||
trust_remote_code=False,
|
||||
embedded=False,
|
||||
torch_dtype='auto',
|
||||
dtype='auto',
|
||||
low_cpu_mem_usage=True,
|
||||
**attrs,
|
||||
):
|
||||
# backward compatible
|
||||
torch_dtype = attrs.pop('torch_dtype', None)
|
||||
if torch_dtype is not None:
|
||||
logger.warning(
|
||||
'The argument "torch_dtype" is deprecated and will be removed in the future. Please use "dtype" instead.'
|
||||
)
|
||||
dtype = torch_dtype
|
||||
_local = False
|
||||
if validate_is_path(model_id):
|
||||
model_id, _local = resolve_filepath(model_id), True
|
||||
backend = first_not_none(backend, os.getenv('OPENLLM_BACKEND'), default='vllm' if is_vllm_available() else 'pt')
|
||||
torch_dtype = first_not_none(os.getenv('TORCH_DTYPE'), torch_dtype, default='auto')
|
||||
quantize = first_not_none(quantize, os.getenv('OPENLLM_QUANTIZE'), default=None)
|
||||
dtype = first_not_none(getenv('dtype', default=dtype, var=['TORCH_DTYPE']), default='auto')
|
||||
quantize = first_not_none(getenv('quantize', default=quantize, var=['QUANITSE']), default=None)
|
||||
attrs.update({'low_cpu_mem_usage': low_cpu_mem_usage})
|
||||
# parsing tokenizer and model kwargs, as the hierarchy is param pass > default
|
||||
model_attrs, tokenizer_attrs = flatten_attrs(**attrs)
|
||||
@@ -189,7 +199,7 @@ class LLM(t.Generic[M, T], ReprMixin):
|
||||
system_message=system_message,
|
||||
LLM__model_attrs=model_attrs,
|
||||
LLM__tokenizer_attrs=tokenizer_attrs,
|
||||
llm_torch_dtype__=torch_dtype.lower(),
|
||||
llm_dtype__=torch_dtype.lower(),
|
||||
llm_backend__=backend,
|
||||
llm_config__=llm_config,
|
||||
llm_trust_remote_code__=trust_remote_code,
|
||||
@@ -222,15 +232,15 @@ class LLM(t.Generic[M, T], ReprMixin):
|
||||
config_dtype = getattr(hf_config, 'torch_dtype', None)
|
||||
if config_dtype is None:
|
||||
config_dtype = torch.float32
|
||||
if self.__llm_torch_dtype__ == 'auto':
|
||||
if self.__llm_dtype__ == 'auto':
|
||||
if config_dtype == torch.float32:
|
||||
torch_dtype = torch.float16 # following common practice
|
||||
else:
|
||||
torch_dtype = config_dtype
|
||||
else:
|
||||
if self.__llm_torch_dtype__ not in _torch_dtype_mapping():
|
||||
raise ValueError(f"Unknown dtype '{self.__llm_torch_dtype__}'")
|
||||
torch_dtype = _torch_dtype_mapping()[self.__llm_torch_dtype__]
|
||||
if self.__llm_dtype__ not in _torch_dtype_mapping():
|
||||
raise ValueError(f"Unknown dtype '{self.__llm_dtype__}'")
|
||||
torch_dtype = _torch_dtype_mapping()[self.__llm_dtype__]
|
||||
self.__llm_torch_dtype__ = torch_dtype
|
||||
return self.__llm_torch_dtype__
|
||||
|
||||
|
||||
@@ -32,7 +32,8 @@ class IdentifyingParams(TypedDict):
|
||||
model_id: str
|
||||
|
||||
ResolvedAdapterMap = Dict[AdapterType, Dict[str, Tuple[PeftConfig, str]]]
|
||||
Dtype = Union[LiteralDtype, Literal['auto', 'half', 'float']]
|
||||
CTranslateDtype = Literal['int8_float32', 'int8_float16', 'int8_bfloat16']
|
||||
Dtype = Union[LiteralDtype, CTranslateDtype, Literal['auto', 'half', 'float']]
|
||||
|
||||
@attr.define(slots=True, repr=False, init=False)
|
||||
class LLM(Generic[M, T]):
|
||||
@@ -50,7 +51,8 @@ class LLM(Generic[M, T]):
|
||||
_prompt_template: Optional[PromptTemplate]
|
||||
_system_message: Optional[str]
|
||||
|
||||
__llm_torch_dtype__: Dtype = ...
|
||||
__llm_dtype__: Dtype = ...
|
||||
__llm_torch_dtype__: Optional[torch.dtype] = ...
|
||||
__llm_config__: Optional[LLMConfig] = ...
|
||||
__llm_backend__: LiteralBackend = ...
|
||||
__llm_quantization_config__: Optional[QuantizationConfig] = ...
|
||||
|
||||
@@ -1,12 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from openllm_core.exceptions import MissingDependencyError
|
||||
from openllm_core.utils import (
|
||||
is_autoawq_available,
|
||||
is_autogptq_available,
|
||||
is_bitsandbytes_available,
|
||||
is_optimum_supports_gptq,
|
||||
)
|
||||
from openllm_core.utils import is_autoawq_available, is_autogptq_available, is_bitsandbytes_available
|
||||
|
||||
|
||||
def infer_quantisation_config(llm, quantise, **attrs):
|
||||
@@ -98,7 +93,7 @@ def infer_quantisation_config(llm, quantise, **attrs):
|
||||
elif quantise == 'int4':
|
||||
quantisation_config = create_int4_config()
|
||||
elif quantise == 'gptq':
|
||||
if not is_autogptq_available() or not is_optimum_supports_gptq():
|
||||
if not is_autogptq_available():
|
||||
raise MissingDependencyError(
|
||||
"GPTQ requires 'auto-gptq' and 'optimum>=0.12' to be installed. Do it with 'pip install \"openllm[gptq]\"'"
|
||||
)
|
||||
|
||||
@@ -1,18 +1,13 @@
|
||||
"""Protocol-related packages for all library integrations.
|
||||
|
||||
Currently support OpenAI compatible API.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
import os
|
||||
import typing as t
|
||||
|
||||
from openllm_core.utils import LazyModule
|
||||
|
||||
_import_structure: dict[str, list[str]] = {'openai': []}
|
||||
_import_structure: dict[str, list[str]] = {'openai': [], 'cohere': [], 'hf': []}
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from . import openai as openai
|
||||
from . import cohere as cohere, hf as hf, openai as openai
|
||||
|
||||
__lazy = LazyModule(__name__, os.path.abspath('__file__'), _import_structure)
|
||||
__all__ = __lazy.__all__
|
||||
|
||||
@@ -1,21 +1,26 @@
|
||||
from __future__ import annotations
|
||||
import importlib
|
||||
import typing as t
|
||||
|
||||
import cloudpickle
|
||||
import fs
|
||||
|
||||
from openllm_core._typing_compat import ParamSpec
|
||||
from openllm_core._typing_compat import M, ParamSpec, T, TypeGuard
|
||||
from openllm_core.exceptions import OpenLLMException
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from bentoml import Model
|
||||
|
||||
from .._llm import LLM
|
||||
|
||||
P = ParamSpec('P')
|
||||
|
||||
|
||||
def load_tokenizer(llm, **tokenizer_attrs):
|
||||
def load_tokenizer(llm: LLM[M, T], **tokenizer_attrs: t.Any) -> TypeGuard[T]:
|
||||
"""Load the tokenizer from BentoML store.
|
||||
|
||||
By default, it will try to find the bentomodel whether it is in store..
|
||||
If model is not found, it will raises a ``bentoml.exceptions.NotFound``.
|
||||
"""
|
||||
import cloudpickle
|
||||
import fs
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
tokenizer_attrs = {**llm.llm_parameters[-1], **tokenizer_attrs}
|
||||
@@ -52,34 +57,39 @@ def load_tokenizer(llm, **tokenizer_attrs):
|
||||
return tokenizer
|
||||
|
||||
|
||||
_extras = ['get', 'import_model', 'load_model']
|
||||
|
||||
|
||||
def _make_dispatch_function(fn):
|
||||
def caller(llm, *args, **kwargs):
|
||||
def caller(llm: LLM[M, T], *args: P.args, **kwargs: P.kwargs) -> TypeGuard[M | T | Model]:
|
||||
"""Generic function dispatch to correct serialisation submodules based on LLM runtime.
|
||||
|
||||
> [!NOTE] See 'openllm.serialisation.transformers' if 'llm.__llm_backend__ in ("pt", "vllm")'
|
||||
|
||||
> [!NOTE] See 'openllm.serialisation.ggml' if 'llm.__llm_backend__="ggml"'
|
||||
|
||||
> [!NOTE] See 'openllm.serialisation.ctranslate' if 'llm.__llm_backend__="ctranslate"'
|
||||
"""
|
||||
serde = 'transformers'
|
||||
if llm.__llm_backend__ == 'ggml':
|
||||
serde = 'ggml'
|
||||
return getattr(importlib.import_module(f'.{serde}', __name__), fn)(llm, *args, **kwargs)
|
||||
elif llm.__llm_backend__ == 'ctranslate':
|
||||
serde = 'ctranslate'
|
||||
elif llm.__llm_backend__ in {'pt', 'vllm'}:
|
||||
serde = 'transformers'
|
||||
else:
|
||||
raise OpenLLMException(f'Not supported backend {llm.__llm_backend__}')
|
||||
return getattr(importlib.import_module(f'.{serde}', 'openllm.serialisation'), fn)(llm, *args, **kwargs)
|
||||
|
||||
return caller
|
||||
|
||||
|
||||
_import_structure: dict[str, list[str]] = {'ggml': [], 'transformers': [], 'constants': []}
|
||||
__all__ = ['ggml', 'transformers', 'constants', 'load_tokenizer', *_extras]
|
||||
_extras = ['get', 'import_model', 'load_model']
|
||||
_import_structure = {'ggml', 'transformers', 'ctranslate', 'constants'}
|
||||
__all__ = ['load_tokenizer', *_extras, *_import_structure]
|
||||
|
||||
|
||||
def __dir__():
|
||||
def __dir__() -> t.Sequence[str]:
|
||||
return sorted(__all__)
|
||||
|
||||
|
||||
def __getattr__(name):
|
||||
def __getattr__(name: str) -> t.Any:
|
||||
if name == 'load_tokenizer':
|
||||
return load_tokenizer
|
||||
elif name in _import_structure:
|
||||
|
||||
@@ -63,9 +63,22 @@ def import_model(llm, *decls, trust_remote_code, _model_store=Provide[BentoMLCon
|
||||
metadata['_quantize'] = quantize
|
||||
architectures = getattr(config, 'architectures', [])
|
||||
if not architectures:
|
||||
raise RuntimeError(
|
||||
'Failed to determine the architecture for this model. Make sure the `config.json` is valid and can be loaded with `transformers.AutoConfig`'
|
||||
)
|
||||
if trust_remote_code:
|
||||
auto_map = getattr(config, 'auto_map', {})
|
||||
if not auto_map:
|
||||
raise RuntimeError(
|
||||
f'Failed to determine the architecture from both `auto_map` and `architectures` from {llm.model_id}'
|
||||
)
|
||||
autoclass = 'AutoModelForSeq2SeqLM' if llm.config['model_type'] == 'seq2seq_lm' else 'AutoModelForCausalLM'
|
||||
if autoclass not in auto_map:
|
||||
raise RuntimeError(
|
||||
f"Given model '{llm.model_id}' is yet to be supported with 'auto_map'. OpenLLM currently only support encoder-decoders or decoders only models."
|
||||
)
|
||||
architectures = [auto_map[autoclass]]
|
||||
else:
|
||||
raise RuntimeError(
|
||||
'Failed to determine the architecture for this model. Make sure the `config.json` is valid and can be loaded with `transformers.AutoConfig`'
|
||||
)
|
||||
metadata['_pretrained_class'] = architectures[0]
|
||||
if not llm._local:
|
||||
metadata['_revision'] = get_hash(config)
|
||||
@@ -75,7 +88,7 @@ def import_model(llm, *decls, trust_remote_code, _model_store=Provide[BentoMLCon
|
||||
signatures = {}
|
||||
|
||||
if quantize == 'gptq':
|
||||
if not openllm.utils.is_autogptq_available() or not openllm.utils.is_optimum_supports_gptq():
|
||||
if not openllm.utils.is_autogptq_available():
|
||||
raise OpenLLMException(
|
||||
"GPTQ quantisation requires 'auto-gptq' and 'optimum' (Not found in local environment). Install it with 'pip install \"openllm[gptq]\" --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/'"
|
||||
)
|
||||
@@ -213,7 +226,7 @@ def load_model(llm, *decls, **attrs):
|
||||
if '_quantize' in llm.bentomodel.info.metadata:
|
||||
_quantise = llm.bentomodel.info.metadata['_quantize']
|
||||
if _quantise == 'gptq':
|
||||
if not openllm.utils.is_autogptq_available() or not openllm.utils.is_optimum_supports_gptq():
|
||||
if not openllm.utils.is_autogptq_available():
|
||||
raise OpenLLMException(
|
||||
"GPTQ quantisation requires 'auto-gptq' and 'optimum' (Not found in local environment). Install it with 'pip install \"openllm[gptq]\" --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/'"
|
||||
)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import copy
|
||||
import logging
|
||||
import typing as t
|
||||
|
||||
import transformers
|
||||
|
||||
@@ -10,14 +11,14 @@ from openllm_core.utils import get_disable_warnings, get_quiet_mode
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_hash(config) -> str:
|
||||
def get_hash(config: transformers.PretrainedConfig) -> str:
|
||||
_commit_hash = getattr(config, '_commit_hash', None)
|
||||
if _commit_hash is None:
|
||||
raise ValueError(f'Cannot find commit hash in {config}')
|
||||
return _commit_hash
|
||||
|
||||
|
||||
def process_config(model_id, trust_remote_code, **attrs):
|
||||
def process_config(model_id: str, trust_remote_code: bool, **attrs: t.Any):
|
||||
config = attrs.pop('config', None)
|
||||
# this logic below is synonymous to handling `from_pretrained` attrs.
|
||||
hub_attrs = {k: attrs.pop(k) for k in HUB_ATTRS if k in attrs}
|
||||
|
||||
@@ -1,74 +1,10 @@
|
||||
"""Utilities function for OpenLLM.
|
||||
|
||||
User can import these function for convenience, but
|
||||
we won't ensure backward compatibility for these functions. So use with caution.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
import functools
|
||||
import importlib.metadata
|
||||
import typing as t
|
||||
|
||||
import openllm_core
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import openllm
|
||||
from openllm_core.utils import (
|
||||
DEBUG as DEBUG,
|
||||
DEBUG_ENV_VAR as DEBUG_ENV_VAR,
|
||||
DEV_DEBUG_VAR as DEV_DEBUG_VAR,
|
||||
ENV_VARS_TRUE_VALUES as ENV_VARS_TRUE_VALUES,
|
||||
MYPY as MYPY,
|
||||
OPTIONAL_DEPENDENCIES as OPTIONAL_DEPENDENCIES,
|
||||
QUIET_ENV_VAR as QUIET_ENV_VAR,
|
||||
SHOW_CODEGEN as SHOW_CODEGEN,
|
||||
LazyLoader as LazyLoader,
|
||||
LazyModule as LazyModule,
|
||||
ReprMixin as ReprMixin,
|
||||
VersionInfo as VersionInfo,
|
||||
analytics as analytics,
|
||||
calc_dir_size as calc_dir_size,
|
||||
check_bool_env as check_bool_env,
|
||||
codegen as codegen,
|
||||
configure_logging as configure_logging,
|
||||
dantic as dantic,
|
||||
field_env_key as field_env_key,
|
||||
first_not_none as first_not_none,
|
||||
flatten_attrs as flatten_attrs,
|
||||
gen_random_uuid as gen_random_uuid,
|
||||
generate_context as generate_context,
|
||||
generate_hash_from_file as generate_hash_from_file,
|
||||
get_debug_mode as get_debug_mode,
|
||||
get_disable_warnings as get_disable_warnings,
|
||||
get_quiet_mode as get_quiet_mode,
|
||||
in_notebook as in_notebook,
|
||||
is_autoawq_available as is_autoawq_available,
|
||||
is_autogptq_available as is_autogptq_available,
|
||||
is_bentoml_available as is_bentoml_available,
|
||||
is_bitsandbytes_available as is_bitsandbytes_available,
|
||||
is_grpc_available as is_grpc_available,
|
||||
is_jupyter_available as is_jupyter_available,
|
||||
is_jupytext_available as is_jupytext_available,
|
||||
is_notebook_available as is_notebook_available,
|
||||
is_optimum_supports_gptq as is_optimum_supports_gptq,
|
||||
is_peft_available as is_peft_available,
|
||||
is_torch_available as is_torch_available,
|
||||
is_transformers_available as is_transformers_available,
|
||||
is_vllm_available as is_vllm_available,
|
||||
lenient_issubclass as lenient_issubclass,
|
||||
reserve_free_port as reserve_free_port,
|
||||
resolve_filepath as resolve_filepath,
|
||||
resolve_user_filepath as resolve_user_filepath,
|
||||
serde as serde,
|
||||
set_debug_mode as set_debug_mode,
|
||||
set_disable_warnings as set_disable_warnings,
|
||||
set_quiet_mode as set_quiet_mode,
|
||||
validate_is_path as validate_is_path,
|
||||
)
|
||||
from openllm_core.utils.serde import converter as converter
|
||||
|
||||
|
||||
def generate_labels(llm: openllm.LLM[t.Any, t.Any]) -> dict[str, t.Any]:
|
||||
def generate_labels(llm):
|
||||
return {
|
||||
'backend': llm.__llm_backend__,
|
||||
'framework': 'openllm',
|
||||
@@ -79,27 +15,26 @@ def generate_labels(llm: openllm.LLM[t.Any, t.Any]) -> dict[str, t.Any]:
|
||||
}
|
||||
|
||||
|
||||
def available_devices() -> tuple[str, ...]:
|
||||
"""Return available GPU under system. Currently only supports NVIDIA GPUs."""
|
||||
def available_devices():
|
||||
from .._strategies import NvidiaGpuResource
|
||||
|
||||
return tuple(NvidiaGpuResource.from_system())
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=1)
|
||||
def device_count() -> int:
|
||||
def device_count():
|
||||
return len(available_devices())
|
||||
|
||||
|
||||
__all__ = ['generate_labels', 'available_devices', 'device_count']
|
||||
|
||||
|
||||
def __dir__() -> t.Sequence[str]:
|
||||
return sorted(__all__) + sorted(dir(openllm_core.utils))
|
||||
def __dir__():
|
||||
coreutils = set(dir(openllm_core.utils)) | set([it for it in openllm_core.utils._extras if not it.startswith('_')])
|
||||
return sorted(__all__) + sorted(list(coreutils))
|
||||
|
||||
|
||||
def __getattr__(it: str) -> t.Any:
|
||||
def __getattr__(it):
|
||||
if hasattr(openllm_core.utils, it):
|
||||
return getattr(openllm_core.utils, it)
|
||||
else:
|
||||
raise AttributeError(f'module {__name__} has no attribute {it}')
|
||||
raise AttributeError(f'module {__name__} has no attribute {it}')
|
||||
|
||||
61
openllm-python/src/openllm/utils/__init__.pyi
Normal file
61
openllm-python/src/openllm/utils/__init__.pyi
Normal file
@@ -0,0 +1,61 @@
|
||||
from typing import Any, Dict, Tuple
|
||||
|
||||
from openllm_core.utils import (
|
||||
DEBUG as DEBUG,
|
||||
DEBUG_ENV_VAR as DEBUG_ENV_VAR,
|
||||
DEV_DEBUG_VAR as DEV_DEBUG_VAR,
|
||||
ENV_VARS_TRUE_VALUES as ENV_VARS_TRUE_VALUES,
|
||||
MYPY as MYPY,
|
||||
OPTIONAL_DEPENDENCIES as OPTIONAL_DEPENDENCIES,
|
||||
QUIET_ENV_VAR as QUIET_ENV_VAR,
|
||||
SHOW_CODEGEN as SHOW_CODEGEN,
|
||||
LazyLoader as LazyLoader,
|
||||
LazyModule as LazyModule,
|
||||
ReprMixin as ReprMixin,
|
||||
VersionInfo as VersionInfo,
|
||||
analytics as analytics,
|
||||
calc_dir_size as calc_dir_size,
|
||||
check_bool_env as check_bool_env,
|
||||
codegen as codegen,
|
||||
configure_logging as configure_logging,
|
||||
dantic as dantic,
|
||||
field_env_key as field_env_key,
|
||||
first_not_none as first_not_none,
|
||||
flatten_attrs as flatten_attrs,
|
||||
gen_random_uuid as gen_random_uuid,
|
||||
generate_context as generate_context,
|
||||
generate_hash_from_file as generate_hash_from_file,
|
||||
get_debug_mode as get_debug_mode,
|
||||
get_disable_warnings as get_disable_warnings,
|
||||
get_quiet_mode as get_quiet_mode,
|
||||
getenv as getenv,
|
||||
in_notebook as in_notebook,
|
||||
is_autoawq_available as is_autoawq_available,
|
||||
is_autogptq_available as is_autogptq_available,
|
||||
is_bentoml_available as is_bentoml_available,
|
||||
is_bitsandbytes_available as is_bitsandbytes_available,
|
||||
is_grpc_available as is_grpc_available,
|
||||
is_jupyter_available as is_jupyter_available,
|
||||
is_jupytext_available as is_jupytext_available,
|
||||
is_notebook_available as is_notebook_available,
|
||||
is_peft_available as is_peft_available,
|
||||
is_torch_available as is_torch_available,
|
||||
is_transformers_available as is_transformers_available,
|
||||
is_vllm_available as is_vllm_available,
|
||||
lenient_issubclass as lenient_issubclass,
|
||||
reserve_free_port as reserve_free_port,
|
||||
resolve_filepath as resolve_filepath,
|
||||
resolve_user_filepath as resolve_user_filepath,
|
||||
serde as serde,
|
||||
set_debug_mode as set_debug_mode,
|
||||
set_disable_warnings as set_disable_warnings,
|
||||
set_quiet_mode as set_quiet_mode,
|
||||
validate_is_path as validate_is_path,
|
||||
)
|
||||
from openllm_core.utils.serde import converter as converter
|
||||
|
||||
from .._llm import LLM
|
||||
|
||||
def available_devices() -> Tuple[str, ...]: ...
|
||||
def device_count() -> int: ...
|
||||
def generate_labels(llm: LLM[Any, Any]) -> Dict[str, Any]: ...
|
||||
Reference in New Issue
Block a user