mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-02-19 23:24:12 -05:00
refactor(config): simplify configuration and update start CLI output (#611)
* chore(config): simplify configuration and update start CLI output handling Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: remove state and message sent after server lifecycle Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update color stream and refactor reusable logic Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update documentations and mypy Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -9,44 +9,17 @@ deploy, and monitor any LLMs with ease.
|
||||
* Native integration with BentoML and LangChain for custom LLM apps
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
import logging as _logging
|
||||
import os as _os
|
||||
import typing as _t
|
||||
import pathlib as _pathlib
|
||||
import warnings as _warnings
|
||||
|
||||
from pathlib import Path as _Path
|
||||
|
||||
import openllm_core
|
||||
|
||||
from openllm_core._configuration import GenerationConfig as GenerationConfig
|
||||
from openllm_core._configuration import LLMConfig as LLMConfig
|
||||
from openllm_core._configuration import SamplingParams as SamplingParams
|
||||
from openllm_core._schemas import GenerationInput as GenerationInput
|
||||
from openllm_core._schemas import GenerationOutput as GenerationOutput
|
||||
from openllm_core._schemas import MetadataOutput as MetadataOutput
|
||||
from openllm_core.config import CONFIG_MAPPING as CONFIG_MAPPING
|
||||
from openllm_core.config import CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES
|
||||
from openllm_core.config import AutoConfig as AutoConfig
|
||||
from openllm_core.config import BaichuanConfig as BaichuanConfig
|
||||
from openllm_core.config import ChatGLMConfig as ChatGLMConfig
|
||||
from openllm_core.config import DollyV2Config as DollyV2Config
|
||||
from openllm_core.config import FalconConfig as FalconConfig
|
||||
from openllm_core.config import FlanT5Config as FlanT5Config
|
||||
from openllm_core.config import GPTNeoXConfig as GPTNeoXConfig
|
||||
from openllm_core.config import LlamaConfig as LlamaConfig
|
||||
from openllm_core.config import MistralConfig as MistralConfig
|
||||
from openllm_core.config import MPTConfig as MPTConfig
|
||||
from openllm_core.config import OPTConfig as OPTConfig
|
||||
from openllm_core.config import StableLMConfig as StableLMConfig
|
||||
from openllm_core.config import StarCoderConfig as StarCoderConfig
|
||||
|
||||
from . import exceptions as exceptions
|
||||
from . import utils as utils
|
||||
|
||||
if openllm_core.utils.DEBUG:
|
||||
openllm_core.utils.set_debug_mode(True)
|
||||
openllm_core.utils.set_quiet_mode(False)
|
||||
|
||||
if utils.DEBUG:
|
||||
utils.set_debug_mode(True)
|
||||
utils.set_quiet_mode(False)
|
||||
_logging.basicConfig(level=_logging.NOTSET)
|
||||
else:
|
||||
# configuration for bitsandbytes before import
|
||||
@@ -64,68 +37,36 @@ else:
|
||||
'ignore', message='Neither GITHUB_TOKEN nor GITHUB_JWT_TOKEN found: running as unauthenticated'
|
||||
)
|
||||
|
||||
_import_structure: dict[str, list[str]] = {
|
||||
'exceptions': [],
|
||||
'client': ['HTTPClient', 'AsyncHTTPClient'],
|
||||
'bundle': [],
|
||||
'playground': [],
|
||||
'testing': [],
|
||||
'prompts': ['PromptTemplate'],
|
||||
'protocol': [],
|
||||
'utils': [],
|
||||
'_deprecated': ['Runner'],
|
||||
'_strategies': ['CascadingResourceStrategy', 'get_resource'],
|
||||
'entrypoints': ['mount_entrypoints'],
|
||||
'serialisation': ['ggml', 'transformers'],
|
||||
'cli._sdk': ['start', 'start_grpc', 'build', 'import_model', 'list_models'],
|
||||
'_quantisation': ['infer_quantisation_config'],
|
||||
'_llm': ['LLM', 'LLMRunner', 'LLMRunnable'],
|
||||
'_generation': [
|
||||
'StopSequenceCriteria',
|
||||
'StopOnTokens',
|
||||
'LogitsProcessorList',
|
||||
'StoppingCriteriaList',
|
||||
'prepare_logits_processor',
|
||||
],
|
||||
}
|
||||
COMPILED = _Path(__file__).suffix in ('.pyd', '.so')
|
||||
|
||||
if _t.TYPE_CHECKING:
|
||||
from . import bundle as bundle
|
||||
from . import cli as cli
|
||||
from . import client as client
|
||||
from . import playground as playground
|
||||
from . import serialisation as serialisation
|
||||
from . import testing as testing
|
||||
from . import utils as utils
|
||||
from .client import HTTPClient as HTTPClient
|
||||
from .client import AsyncHTTPClient as AsyncHTTPClient
|
||||
from ._deprecated import Runner as Runner
|
||||
from ._generation import LogitsProcessorList as LogitsProcessorList
|
||||
from ._generation import StopOnTokens as StopOnTokens
|
||||
from ._generation import StoppingCriteriaList as StoppingCriteriaList
|
||||
from ._generation import StopSequenceCriteria as StopSequenceCriteria
|
||||
from ._generation import prepare_logits_processor as prepare_logits_processor
|
||||
from ._llm import LLM as LLM
|
||||
from ._llm import LLMRunnable as LLMRunnable
|
||||
from ._llm import LLMRunner as LLMRunner
|
||||
from ._quantisation import infer_quantisation_config as infer_quantisation_config
|
||||
from ._strategies import CascadingResourceStrategy as CascadingResourceStrategy
|
||||
from ._strategies import get_resource as get_resource
|
||||
from .cli._sdk import build as build
|
||||
from .cli._sdk import import_model as import_model
|
||||
from .cli._sdk import list_models as list_models
|
||||
from .cli._sdk import start as start
|
||||
from .cli._sdk import start_grpc as start_grpc
|
||||
from .entrypoints import mount_entrypoints as mount_entrypoints
|
||||
from .prompts import PromptTemplate as PromptTemplate
|
||||
from .protocol import openai as openai
|
||||
from .serialisation import ggml as ggml
|
||||
from .serialisation import transformers as transformers
|
||||
COMPILED = _pathlib.Path(__file__).suffix in ('.pyd', '.so')
|
||||
|
||||
# NOTE: update this to sys.modules[__name__] once mypy_extensions can recognize __spec__
|
||||
__lazy = openllm_core.utils.LazyModule(
|
||||
__name__, globals()['__file__'], _import_structure, extra_objects={'COMPILED': COMPILED}
|
||||
__lazy = utils.LazyModule(
|
||||
__name__,
|
||||
globals()['__file__'],
|
||||
{
|
||||
'exceptions': [],
|
||||
'client': ['HTTPClient', 'AsyncHTTPClient'],
|
||||
'bundle': [],
|
||||
'playground': [],
|
||||
'testing': [],
|
||||
'protocol': [],
|
||||
'utils': [],
|
||||
'_deprecated': ['Runner'],
|
||||
'_strategies': ['CascadingResourceStrategy', 'get_resource'],
|
||||
'entrypoints': ['mount_entrypoints'],
|
||||
'serialisation': ['ggml', 'transformers'],
|
||||
'cli._sdk': ['start', 'start_grpc', 'build', 'import_model', 'list_models'],
|
||||
'_quantisation': ['infer_quantisation_config'],
|
||||
'_llm': ['LLM', 'LLMRunner', 'LLMRunnable'],
|
||||
'_generation': [
|
||||
'StopSequenceCriteria',
|
||||
'StopOnTokens',
|
||||
'LogitsProcessorList',
|
||||
'StoppingCriteriaList',
|
||||
'prepare_logits_processor',
|
||||
],
|
||||
},
|
||||
extra_objects={'COMPILED': COMPILED},
|
||||
)
|
||||
__all__ = __lazy.__all__
|
||||
__dir__ = __lazy.__dir__
|
||||
|
||||
Reference in New Issue
Block a user