Files
OpenLLM/openllm-python/src/openllm/__init__.py
2023-09-01 17:00:49 +00:00

185 lines
11 KiB
Python

"""OpenLLM.
An open platform for operating large language models in production. Fine-tune, serve,
deploy, and monitor any LLMs with ease.
* Built-in support for StableLM, Llama 2, Dolly, Flan-T5, Vicuna
* Option to bring your own fine-tuned LLMs
* Online Serving with HTTP, gRPC, SSE(coming soon) or custom API
* Native integration with BentoML and LangChain for custom LLM apps
"""
from __future__ import annotations
import logging as _logging, os as _os, typing as _t, warnings as _warnings, openllm_core
from pathlib import Path as _Path
from . import exceptions as exceptions, utils as utils
from openllm_core._configuration import GenerationConfig as GenerationConfig, LLMConfig as LLMConfig, SamplingParams as SamplingParams
from openllm_core._strategies import CascadingResourceStrategy as CascadingResourceStrategy, get_resource as get_resource
from openllm_core._schema import EmbeddingsOutput as EmbeddingsOutput, GenerationInput as GenerationInput, GenerationOutput as GenerationOutput, HfAgentInput as HfAgentInput, MetadataOutput as MetadataOutput, unmarshal_vllm_outputs as unmarshal_vllm_outputs
from openllm_core.config import AutoConfig as AutoConfig, CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES, BaichuanConfig as BaichuanConfig, ChatGLMConfig as ChatGLMConfig, DollyV2Config as DollyV2Config, FalconConfig as FalconConfig, FlanT5Config as FlanT5Config, GPTNeoXConfig as GPTNeoXConfig, LlamaConfig as LlamaConfig, MPTConfig as MPTConfig, OPTConfig as OPTConfig, StableLMConfig as StableLMConfig, StarCoderConfig as StarCoderConfig
if openllm_core.utils.DEBUG:
openllm_core.utils.set_debug_mode(True)
openllm_core.utils.set_quiet_mode(False)
_logging.basicConfig(level=_logging.NOTSET)
else:
# configuration for bitsandbytes before import
_os.environ["BITSANDBYTES_NOWELCOME"] = _os.environ.get("BITSANDBYTES_NOWELCOME", "1")
# NOTE: The following warnings from bitsandbytes, and probably not that important for users to see when DEBUG is False
_warnings.filterwarnings("ignore", message="MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization")
_warnings.filterwarnings("ignore", message="MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization")
_warnings.filterwarnings("ignore", message="The installed version of bitsandbytes was compiled without GPU support.")
# NOTE: ignore the following warning from ghapi as it is not important for users
_warnings.filterwarnings("ignore", message="Neither GITHUB_TOKEN nor GITHUB_JWT_TOKEN found: running as unauthenticated")
_import_structure: dict[str, list[str]] = {
"exceptions": [],
"models": [],
"client": [],
"bundle": [],
"playground": [],
"testing": [],
"utils": ["infer_auto_class"],
"serialisation": ["ggml", "transformers"],
"cli._sdk": ["start", "start_grpc", "build", "import_model", "list_models"],
"_quantisation": ["infer_quantisation_config"],
"_embeddings": ["GenericEmbeddingRunnable"],
"_llm": ["LLM", "Runner", "LLMRunner", "LLMRunnable", "EmbeddingsOutput"],
"_generation": ["StopSequenceCriteria", "StopOnTokens", "LogitsProcessorList", "StoppingCriteriaList", "prepare_logits_processor"],
"models.auto": ["MODEL_MAPPING_NAMES", "MODEL_FLAX_MAPPING_NAMES", "MODEL_TF_MAPPING_NAMES", "MODEL_VLLM_MAPPING_NAMES"],
"models.chatglm": [],
"models.baichuan": [],
"models.dolly_v2": [],
"models.falcon": [],
"models.flan_t5": [],
"models.gpt_neox": [],
"models.llama": [],
"models.mpt": [],
"models.opt": [],
"models.stablelm": [],
"models.starcoder": []
}
COMPILED = _Path(__file__).suffix in (".pyd", ".so")
if _t.TYPE_CHECKING:
from . import bundle as bundle, cli as cli, client as client, models as models, playground as playground, serialisation as serialisation, testing as testing
from ._generation import LogitsProcessorList as LogitsProcessorList, StopOnTokens as StopOnTokens, StoppingCriteriaList as StoppingCriteriaList, StopSequenceCriteria as StopSequenceCriteria, prepare_logits_processor as prepare_logits_processor
from ._llm import LLM as LLM, EmbeddingsOutput as EmbeddingsOutput, LLMRunnable as LLMRunnable, LLMRunner as LLMRunner, Runner as Runner
from ._quantisation import infer_quantisation_config as infer_quantisation_config
from ._embeddings import GenericEmbeddingRunnable as GenericEmbeddingRunnable
from .cli._sdk import build as build, import_model as import_model, list_models as list_models, start as start, start_grpc as start_grpc
from .models.auto import MODEL_FLAX_MAPPING_NAMES as MODEL_FLAX_MAPPING_NAMES, MODEL_MAPPING_NAMES as MODEL_MAPPING_NAMES, MODEL_TF_MAPPING_NAMES as MODEL_TF_MAPPING_NAMES, MODEL_VLLM_MAPPING_NAMES as MODEL_VLLM_MAPPING_NAMES
from .serialisation import ggml as ggml, transformers as transformers
from .utils import infer_auto_class as infer_auto_class
try:
if not (openllm_core.utils.is_torch_available() and openllm_core.utils.is_cpm_kernels_available()):
raise exceptions.MissingDependencyError
except exceptions.MissingDependencyError:
_import_structure["utils.dummy_pt_objects"] = ["ChatGLM", "Baichuan"]
else:
_import_structure["models.chatglm"].extend(["ChatGLM"])
_import_structure["models.baichuan"].extend(["Baichuan"])
if _t.TYPE_CHECKING:
from .models.baichuan import Baichuan as Baichuan
from .models.chatglm import ChatGLM as ChatGLM
try:
if not (openllm_core.utils.is_torch_available() and openllm_core.utils.is_triton_available()):
raise exceptions.MissingDependencyError
except exceptions.MissingDependencyError:
if "utils.dummy_pt_objects" in _import_structure: _import_structure["utils.dummy_pt_objects"].extend(["MPT"])
else: _import_structure["utils.dummy_pt_objects"] = ["MPT"]
else:
_import_structure["models.mpt"].extend(["MPT"])
if _t.TYPE_CHECKING: from .models.mpt import MPT as MPT
try:
if not (openllm_core.utils.is_torch_available() and openllm_core.utils.is_einops_available()):
raise exceptions.MissingDependencyError
except exceptions.MissingDependencyError:
if "utils.dummy_pt_objects" in _import_structure: _import_structure["utils.dummy_pt_objects"].extend(["Falcon"])
else: _import_structure["utils.dummy_pt_objects"] = ["Falcon"]
else:
_import_structure["models.falcon"].extend(["Falcon"])
if _t.TYPE_CHECKING: from .models.falcon import Falcon as Falcon
try:
if not openllm_core.utils.is_torch_available(): raise exceptions.MissingDependencyError
except exceptions.MissingDependencyError:
_import_structure["utils.dummy_pt_objects"] = [
name for name in dir(utils.dummy_pt_objects) if not name.startswith("_") and name not in ("ChatGLM", "Baichuan", "MPT", "Falcon", "annotations")
]
else:
_import_structure["models.flan_t5"].extend(["FlanT5"])
_import_structure["models.dolly_v2"].extend(["DollyV2"])
_import_structure["models.starcoder"].extend(["StarCoder"])
_import_structure["models.stablelm"].extend(["StableLM"])
_import_structure["models.opt"].extend(["OPT"])
_import_structure["models.gpt_neox"].extend(["GPTNeoX"])
_import_structure["models.llama"].extend(["Llama"])
_import_structure["models.auto"].extend(["AutoLLM", "MODEL_MAPPING"])
if _t.TYPE_CHECKING:
from .models.auto import MODEL_MAPPING as MODEL_MAPPING, AutoLLM as AutoLLM
from .models.dolly_v2 import DollyV2 as DollyV2
from .models.flan_t5 import FlanT5 as FlanT5
from .models.gpt_neox import GPTNeoX as GPTNeoX
from .models.llama import Llama as Llama
from .models.opt import OPT as OPT
from .models.stablelm import StableLM as StableLM
from .models.starcoder import StarCoder as StarCoder
try:
if not openllm_core.utils.is_vllm_available(): raise exceptions.MissingDependencyError
except exceptions.MissingDependencyError:
_import_structure["utils.dummy_vllm_objects"] = [name for name in dir(utils.dummy_vllm_objects) if not name.startswith("_") and name not in ("annotations",)]
else:
_import_structure["models.baichuan"].extend(["VLLMBaichuan"])
_import_structure["models.llama"].extend(["VLLMLlama"])
_import_structure["models.opt"].extend(["VLLMOPT"])
_import_structure["models.dolly_v2"].extend(["VLLMDollyV2"])
_import_structure["models.falcon"].extend(["VLLMFalcon"])
_import_structure["models.gpt_neox"].extend(["VLLMGPTNeoX"])
_import_structure["models.mpt"].extend(["VLLMMPT"])
_import_structure["models.stablelm"].extend(["VLLMStableLM"])
_import_structure["models.starcoder"].extend(["VLLMStarCoder"])
_import_structure["models.auto"].extend(["AutoVLLM", "MODEL_VLLM_MAPPING"])
if _t.TYPE_CHECKING:
from .models.auto import MODEL_VLLM_MAPPING as MODEL_VLLM_MAPPING, AutoVLLM as AutoVLLM
from .models.baichuan import VLLMBaichuan as VLLMBaichuan
from .models.dolly_v2 import VLLMDollyV2 as VLLMDollyV2
from .models.gpt_neox import VLLMGPTNeoX as VLLMGPTNeoX
from .models.falcon import VLLMFalcon as VLLMFalcon
from .models.llama import VLLMLlama as VLLMLlama
from .models.mpt import VLLMMPT as VLLMMPT
from .models.opt import VLLMOPT as VLLMOPT
from .models.stablelm import VLLMStableLM as VLLMStableLM
from .models.starcoder import VLLMStarCoder as VLLMStarCoder
try:
if not openllm_core.utils.is_flax_available(): raise exceptions.MissingDependencyError
except exceptions.MissingDependencyError:
_import_structure["utils.dummy_flax_objects"] = [name for name in dir(utils.dummy_flax_objects) if not name.startswith("_") and name not in ("annotations",)]
else:
_import_structure["models.flan_t5"].extend(["FlaxFlanT5"])
_import_structure["models.opt"].extend(["FlaxOPT"])
_import_structure["models.auto"].extend(["AutoFlaxLLM", "MODEL_FLAX_MAPPING"])
if _t.TYPE_CHECKING:
from .models.auto import MODEL_FLAX_MAPPING as MODEL_FLAX_MAPPING, AutoFlaxLLM as AutoFlaxLLM
from .models.flan_t5 import FlaxFlanT5 as FlaxFlanT5
from .models.opt import FlaxOPT as FlaxOPT
try:
if not openllm_core.utils.is_tf_available(): raise exceptions.MissingDependencyError
except exceptions.MissingDependencyError:
_import_structure["utils.dummy_tf_objects"] = [name for name in dir(utils.dummy_tf_objects) if not name.startswith("_") and name not in ("annotations",)]
else:
_import_structure["models.flan_t5"].extend(["TFFlanT5"])
_import_structure["models.opt"].extend(["TFOPT"])
_import_structure["models.auto"].extend(["AutoTFLLM", "MODEL_TF_MAPPING"])
if _t.TYPE_CHECKING:
from .models.auto import MODEL_TF_MAPPING as MODEL_TF_MAPPING, AutoTFLLM as AutoTFLLM
from .models.flan_t5 import TFFlanT5 as TFFlanT5
from .models.opt import TFOPT as TFOPT
# NOTE: update this to sys.modules[__name__] once mypy_extensions can recognize __spec__
__lazy = openllm_core.utils.LazyModule(__name__, globals()["__file__"], _import_structure, extra_objects={"COMPILED": COMPILED, "__openllm_migration__": {"LLMEmbeddings": "EmbeddingsOutput"}})
__all__ = __lazy.__all__
__dir__ = __lazy.__dir__
__getattr__ = __lazy.__getattr__