"""OpenLLM.

An open platform for operating large language models in production. Fine-tune, serve,
deploy, and monitor any LLMs with ease.

* Built-in support for StableLM, Llama 2, Dolly, Flan-T5, Vicuna
* Option to bring your own fine-tuned LLMs
* Online Serving with HTTP, gRPC, SSE(coming soon) or custom API
* Native integration with BentoML and LangChain for custom LLM apps
"""
from __future__ import annotations
import logging as _logging, os as _os, typing as _t, warnings as _warnings, openllm_core
from pathlib import Path as _Path
from . import exceptions as exceptions, utils as utils

from openllm_core._configuration import GenerationConfig as GenerationConfig, LLMConfig as LLMConfig, SamplingParams as SamplingParams
from openllm_core._strategies import CascadingResourceStrategy as CascadingResourceStrategy, get_resource as get_resource
from openllm_core._schema import EmbeddingsOutput as EmbeddingsOutput, GenerationInput as GenerationInput, GenerationOutput as GenerationOutput, HfAgentInput as HfAgentInput, MetadataOutput as MetadataOutput, unmarshal_vllm_outputs as unmarshal_vllm_outputs
from openllm_core.config import AutoConfig as AutoConfig, CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES, BaichuanConfig as BaichuanConfig, ChatGLMConfig as ChatGLMConfig, DollyV2Config as DollyV2Config, FalconConfig as FalconConfig, FlanT5Config as FlanT5Config, GPTNeoXConfig as GPTNeoXConfig, LlamaConfig as LlamaConfig, MPTConfig as MPTConfig, OPTConfig as OPTConfig, StableLMConfig as StableLMConfig, StarCoderConfig as StarCoderConfig

if openllm_core.utils.DEBUG:
  openllm_core.utils.set_debug_mode(True)
  openllm_core.utils.set_quiet_mode(False)
  _logging.basicConfig(level=_logging.NOTSET)
else:
  # configuration for bitsandbytes before import
  _os.environ["BITSANDBYTES_NOWELCOME"] = _os.environ.get("BITSANDBYTES_NOWELCOME", "1")
  # NOTE: The following warnings from bitsandbytes, and probably not that important for users to see when DEBUG is False
  _warnings.filterwarnings("ignore", message="MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization")
  _warnings.filterwarnings("ignore", message="MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization")
  _warnings.filterwarnings("ignore", message="The installed version of bitsandbytes was compiled without GPU support.")
  # NOTE: ignore the following warning from ghapi as it is not important for users
  _warnings.filterwarnings("ignore", message="Neither GITHUB_TOKEN nor GITHUB_JWT_TOKEN found: running as unauthenticated")

_import_structure: dict[str, list[str]] = {
    "exceptions": [],
    "models": [],
    "client": [],
    "bundle": [],
    "playground": [],
    "testing": [],
    "utils": ["infer_auto_class"],
    "serialisation": ["ggml", "transformers"],
    "cli._sdk": ["start", "start_grpc", "build", "import_model", "list_models"],
    "_quantisation": ["infer_quantisation_config"],
    "_embeddings": ["GenericEmbeddingRunnable"],
    "_llm": ["LLM", "Runner", "LLMRunner", "LLMRunnable", "EmbeddingsOutput"],
    "_generation": ["StopSequenceCriteria", "StopOnTokens", "LogitsProcessorList", "StoppingCriteriaList", "prepare_logits_processor"],
    "models.auto": ["MODEL_MAPPING_NAMES", "MODEL_FLAX_MAPPING_NAMES", "MODEL_TF_MAPPING_NAMES", "MODEL_VLLM_MAPPING_NAMES"],
    "models.chatglm": [],
    "models.baichuan": [],
    "models.dolly_v2": [],
    "models.falcon": [],
    "models.flan_t5": [],
    "models.gpt_neox": [],
    "models.llama": [],
    "models.mpt": [],
    "models.opt": [],
    "models.stablelm": [],
    "models.starcoder": []
}
COMPILED = _Path(__file__).suffix in (".pyd", ".so")

if _t.TYPE_CHECKING:
  from . import bundle as bundle, cli as cli, client as client, models as models, playground as playground, serialisation as serialisation, testing as testing
  from ._generation import LogitsProcessorList as LogitsProcessorList, StopOnTokens as StopOnTokens, StoppingCriteriaList as StoppingCriteriaList, StopSequenceCriteria as StopSequenceCriteria, prepare_logits_processor as prepare_logits_processor
  from ._llm import LLM as LLM, EmbeddingsOutput as EmbeddingsOutput, LLMRunnable as LLMRunnable, LLMRunner as LLMRunner, Runner as Runner
  from ._quantisation import infer_quantisation_config as infer_quantisation_config
  from ._embeddings import GenericEmbeddingRunnable as GenericEmbeddingRunnable
  from .cli._sdk import build as build, import_model as import_model, list_models as list_models, start as start, start_grpc as start_grpc
  from .models.auto import MODEL_FLAX_MAPPING_NAMES as MODEL_FLAX_MAPPING_NAMES, MODEL_MAPPING_NAMES as MODEL_MAPPING_NAMES, MODEL_TF_MAPPING_NAMES as MODEL_TF_MAPPING_NAMES, MODEL_VLLM_MAPPING_NAMES as MODEL_VLLM_MAPPING_NAMES
  from .serialisation import ggml as ggml, transformers as transformers
  from .utils import infer_auto_class as infer_auto_class

try:
  if not (openllm_core.utils.is_torch_available() and openllm_core.utils.is_cpm_kernels_available()):
    raise exceptions.MissingDependencyError
except exceptions.MissingDependencyError:
  _import_structure["utils.dummy_pt_objects"] = ["ChatGLM", "Baichuan"]
else:
  _import_structure["models.chatglm"].extend(["ChatGLM"])
  _import_structure["models.baichuan"].extend(["Baichuan"])
  if _t.TYPE_CHECKING:
    from .models.baichuan import Baichuan as Baichuan
    from .models.chatglm import ChatGLM as ChatGLM
try:
  if not (openllm_core.utils.is_torch_available() and openllm_core.utils.is_triton_available()):
    raise exceptions.MissingDependencyError
except exceptions.MissingDependencyError:
  if "utils.dummy_pt_objects" in _import_structure: _import_structure["utils.dummy_pt_objects"].extend(["MPT"])
  else: _import_structure["utils.dummy_pt_objects"] = ["MPT"]
else:
  _import_structure["models.mpt"].extend(["MPT"])
  if _t.TYPE_CHECKING: from .models.mpt import MPT as MPT
try:
  if not (openllm_core.utils.is_torch_available() and openllm_core.utils.is_einops_available()):
    raise exceptions.MissingDependencyError
except exceptions.MissingDependencyError:
  if "utils.dummy_pt_objects" in _import_structure: _import_structure["utils.dummy_pt_objects"].extend(["Falcon"])
  else: _import_structure["utils.dummy_pt_objects"] = ["Falcon"]
else:
  _import_structure["models.falcon"].extend(["Falcon"])
  if _t.TYPE_CHECKING: from .models.falcon import Falcon as Falcon

try:
  if not openllm_core.utils.is_torch_available(): raise exceptions.MissingDependencyError
except exceptions.MissingDependencyError:
  _import_structure["utils.dummy_pt_objects"] = [
      name for name in dir(utils.dummy_pt_objects) if not name.startswith("_") and name not in ("ChatGLM", "Baichuan", "MPT", "Falcon", "annotations")
  ]
else:
  _import_structure["models.flan_t5"].extend(["FlanT5"])
  _import_structure["models.dolly_v2"].extend(["DollyV2"])
  _import_structure["models.starcoder"].extend(["StarCoder"])
  _import_structure["models.stablelm"].extend(["StableLM"])
  _import_structure["models.opt"].extend(["OPT"])
  _import_structure["models.gpt_neox"].extend(["GPTNeoX"])
  _import_structure["models.llama"].extend(["Llama"])
  _import_structure["models.auto"].extend(["AutoLLM", "MODEL_MAPPING"])
  if _t.TYPE_CHECKING:
    from .models.auto import MODEL_MAPPING as MODEL_MAPPING, AutoLLM as AutoLLM
    from .models.dolly_v2 import DollyV2 as DollyV2
    from .models.flan_t5 import FlanT5 as FlanT5
    from .models.gpt_neox import GPTNeoX as GPTNeoX
    from .models.llama import Llama as Llama
    from .models.opt import OPT as OPT
    from .models.stablelm import StableLM as StableLM
    from .models.starcoder import StarCoder as StarCoder
try:
  if not openllm_core.utils.is_vllm_available(): raise exceptions.MissingDependencyError
except exceptions.MissingDependencyError:
  _import_structure["utils.dummy_vllm_objects"] = [name for name in dir(utils.dummy_vllm_objects) if not name.startswith("_") and name not in ("annotations",)]
else:
  _import_structure["models.baichuan"].extend(["VLLMBaichuan"])
  _import_structure["models.llama"].extend(["VLLMLlama"])
  _import_structure["models.opt"].extend(["VLLMOPT"])
  _import_structure["models.dolly_v2"].extend(["VLLMDollyV2"])
  _import_structure["models.falcon"].extend(["VLLMFalcon"])
  _import_structure["models.gpt_neox"].extend(["VLLMGPTNeoX"])
  _import_structure["models.mpt"].extend(["VLLMMPT"])
  _import_structure["models.stablelm"].extend(["VLLMStableLM"])
  _import_structure["models.starcoder"].extend(["VLLMStarCoder"])
  _import_structure["models.auto"].extend(["AutoVLLM", "MODEL_VLLM_MAPPING"])
  if _t.TYPE_CHECKING:
    from .models.auto import MODEL_VLLM_MAPPING as MODEL_VLLM_MAPPING, AutoVLLM as AutoVLLM
    from .models.baichuan import VLLMBaichuan as VLLMBaichuan
    from .models.dolly_v2 import VLLMDollyV2 as VLLMDollyV2
    from .models.gpt_neox import VLLMGPTNeoX as VLLMGPTNeoX
    from .models.falcon import VLLMFalcon as VLLMFalcon
    from .models.llama import VLLMLlama as VLLMLlama
    from .models.mpt import VLLMMPT as VLLMMPT
    from .models.opt import VLLMOPT as VLLMOPT
    from .models.stablelm import VLLMStableLM as VLLMStableLM
    from .models.starcoder import VLLMStarCoder as VLLMStarCoder
try:
  if not openllm_core.utils.is_flax_available(): raise exceptions.MissingDependencyError
except exceptions.MissingDependencyError:
  _import_structure["utils.dummy_flax_objects"] = [name for name in dir(utils.dummy_flax_objects) if not name.startswith("_") and name not in ("annotations",)]
else:
  _import_structure["models.flan_t5"].extend(["FlaxFlanT5"])
  _import_structure["models.opt"].extend(["FlaxOPT"])
  _import_structure["models.auto"].extend(["AutoFlaxLLM", "MODEL_FLAX_MAPPING"])
  if _t.TYPE_CHECKING:
    from .models.auto import MODEL_FLAX_MAPPING as MODEL_FLAX_MAPPING, AutoFlaxLLM as AutoFlaxLLM
    from .models.flan_t5 import FlaxFlanT5 as FlaxFlanT5
    from .models.opt import FlaxOPT as FlaxOPT
try:
  if not openllm_core.utils.is_tf_available(): raise exceptions.MissingDependencyError
except exceptions.MissingDependencyError:
  _import_structure["utils.dummy_tf_objects"] = [name for name in dir(utils.dummy_tf_objects) if not name.startswith("_") and name not in ("annotations",)]
else:
  _import_structure["models.flan_t5"].extend(["TFFlanT5"])
  _import_structure["models.opt"].extend(["TFOPT"])
  _import_structure["models.auto"].extend(["AutoTFLLM", "MODEL_TF_MAPPING"])
  if _t.TYPE_CHECKING:
    from .models.auto import MODEL_TF_MAPPING as MODEL_TF_MAPPING, AutoTFLLM as AutoTFLLM
    from .models.flan_t5 import TFFlanT5 as TFFlanT5
    from .models.opt import TFOPT as TFOPT

# NOTE: update this to sys.modules[__name__] once mypy_extensions can recognize __spec__
__lazy = openllm_core.utils.LazyModule(__name__, globals()["__file__"], _import_structure, extra_objects={"COMPILED": COMPILED, "__openllm_migration__": {"LLMEmbeddings": "EmbeddingsOutput"}})
__all__ = __lazy.__all__
__dir__ = __lazy.__dir__
__getattr__ = __lazy.__getattr__