import logging as _logging, os as _os, pathlib as _pathlib, warnings as _warnings
from openllm_cli import _sdk
from . import utils as utils

if utils.DEBUG:
  utils.set_debug_mode(True)
  _logging.basicConfig(level=_logging.NOTSET)
else:
  # configuration for bitsandbytes before import
  _os.environ['BITSANDBYTES_NOWELCOME'] = _os.environ.get('BITSANDBYTES_NOWELCOME', '1')
  # NOTE: The following warnings from bitsandbytes, and probably not that important for users to see when DEBUG is False
  _warnings.filterwarnings('ignore', message='MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization')
  _warnings.filterwarnings('ignore', message='MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization')
  _warnings.filterwarnings('ignore', message='The installed version of bitsandbytes was compiled without GPU support.')
  _warnings.filterwarnings('ignore', message='Neither GITHUB_TOKEN nor GITHUB_JWT_TOKEN found: running as unauthenticated')
COMPILED = _pathlib.Path(__file__).suffix in ('.pyd', '.so')
__lazy = utils.LazyModule(  # NOTE: update this to sys.modules[__name__] once mypy_extensions can recognize __spec__
  __name__,
  globals()['__file__'],
  {
    'exceptions': [],
    'client': ['HTTPClient', 'AsyncHTTPClient'],
    'bundle': [],
    'testing': [],
    'protocol': [],
    'utils': [],
    '_deprecated': ['Runner'],
    '_strategies': ['CascadingResourceStrategy', 'get_resource'],
    'entrypoints': ['mount_entrypoints'],
    'serialisation': ['ggml', 'transformers'],
    '_quantisation': ['infer_quantisation_config'],
    '_llm': ['LLM'],
  },
  extra_objects={
    'COMPILED': COMPILED,
    'start': _sdk.start,
    'build': _sdk.build,  #
    'import_model': _sdk.import_model,
    'list_models': _sdk.list_models,  #
  },
)
__all__, __dir__, __getattr__ = __lazy.__all__, __lazy.__dir__, __lazy.__getattr__