mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-04-21 07:29:41 -04:00
67 lines
2.5 KiB
Python
67 lines
2.5 KiB
Python
import logging as _logging, os as _os, pathlib as _pathlib, warnings as _warnings, typing as _t
|
|
|
|
from . import utils as utils
|
|
|
|
if utils.DEBUG:
|
|
utils.set_debug_mode(True)
|
|
_logging.basicConfig(level=_logging.NOTSET)
|
|
else:
|
|
# configuration for bitsandbytes before import
|
|
_os.environ['BITSANDBYTES_NOWELCOME'] = _os.environ.get('BITSANDBYTES_NOWELCOME', '1')
|
|
# NOTE: The following warnings from bitsandbytes, and probably not that important for users to see when DEBUG is False
|
|
_warnings.filterwarnings(
|
|
'ignore', message='MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization'
|
|
)
|
|
_warnings.filterwarnings(
|
|
'ignore', message='MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization'
|
|
)
|
|
_warnings.filterwarnings('ignore', message='The installed version of bitsandbytes was compiled without GPU support.')
|
|
_warnings.filterwarnings(
|
|
'ignore', message='Neither GITHUB_TOKEN nor GITHUB_JWT_TOKEN found: running as unauthenticated'
|
|
)
|
|
COMPILED = _pathlib.Path(__file__).suffix in ('.pyd', '.so')
|
|
__lazy = utils.LazyModule( # NOTE: update this to sys.modules[__name__] once mypy_extensions can recognize __spec__
|
|
__name__,
|
|
globals()['__file__'],
|
|
{
|
|
'exceptions': [],
|
|
'client': ['HTTPClient', 'AsyncHTTPClient'],
|
|
'bundle': [],
|
|
'utils': ['api'],
|
|
'serialisation': ['ggml', 'transformers', 'vllm'],
|
|
'_llm': ['LLM'],
|
|
'_deprecated': ['Runner'],
|
|
'_runners': ['runner'],
|
|
'_strategies': ['CascadingResourceStrategy', 'get_resource'],
|
|
},
|
|
extra_objects={'COMPILED': COMPILED},
|
|
)
|
|
__all__, __dir__ = __lazy.__all__, __lazy.__dir__
|
|
|
|
_BREAKING_INTERNAL = ['_service', '_service_vars']
|
|
_NEW_IMPL = ['LLM', *_BREAKING_INTERNAL]
|
|
|
|
if utils.pkg.pkg_version_info('bentoml') > (1, 2):
|
|
import _openllm_tiny as _tiny
|
|
else:
|
|
_tiny = None
|
|
|
|
|
|
def __getattr__(name: str) -> _t.Any:
|
|
if name in _NEW_IMPL:
|
|
if utils.getenv('IMPLEMENTATION', default='new_impl') == 'deprecated' or _tiny is None:
|
|
if name in _BREAKING_INTERNAL:
|
|
raise ImportError(
|
|
f'"{name}" is an internal implementation and considered breaking with older OpenLLM. Please migrate your code if you depend on this.'
|
|
)
|
|
_warnings.warn(
|
|
f'"{name}" is considered deprecated implementation and could be breaking. See https://github.com/bentoml/OpenLLM for more information on upgrading instruction.',
|
|
DeprecationWarning,
|
|
stacklevel=3,
|
|
)
|
|
return __lazy.__getattr__(name)
|
|
else:
|
|
return getattr(_tiny, name)
|
|
else:
|
|
return __lazy.__getattr__(name)
|