refactor(config): simplify configuration and update start CLI output (#611)

* chore(config): simplify configuration and update start CLI output
handling

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* chore: remove state and message sent after server lifecycle

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* chore: update color stream and refactor reusable logic

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* chore: update documentations and mypy

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

---------

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
Aaron Pham
2023-11-11 22:36:10 -05:00
committed by GitHub
parent 36559a5ab5
commit 7438005c04
19 changed files with 414 additions and 469 deletions

View File

@@ -9,44 +9,17 @@ deploy, and monitor any LLMs with ease.
* Native integration with BentoML and LangChain for custom LLM apps
"""
from __future__ import annotations
import logging as _logging
import os as _os
import typing as _t
import pathlib as _pathlib
import warnings as _warnings
from pathlib import Path as _Path
import openllm_core
from openllm_core._configuration import GenerationConfig as GenerationConfig
from openllm_core._configuration import LLMConfig as LLMConfig
from openllm_core._configuration import SamplingParams as SamplingParams
from openllm_core._schemas import GenerationInput as GenerationInput
from openllm_core._schemas import GenerationOutput as GenerationOutput
from openllm_core._schemas import MetadataOutput as MetadataOutput
from openllm_core.config import CONFIG_MAPPING as CONFIG_MAPPING
from openllm_core.config import CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES
from openllm_core.config import AutoConfig as AutoConfig
from openllm_core.config import BaichuanConfig as BaichuanConfig
from openllm_core.config import ChatGLMConfig as ChatGLMConfig
from openllm_core.config import DollyV2Config as DollyV2Config
from openllm_core.config import FalconConfig as FalconConfig
from openllm_core.config import FlanT5Config as FlanT5Config
from openllm_core.config import GPTNeoXConfig as GPTNeoXConfig
from openllm_core.config import LlamaConfig as LlamaConfig
from openllm_core.config import MistralConfig as MistralConfig
from openllm_core.config import MPTConfig as MPTConfig
from openllm_core.config import OPTConfig as OPTConfig
from openllm_core.config import StableLMConfig as StableLMConfig
from openllm_core.config import StarCoderConfig as StarCoderConfig
from . import exceptions as exceptions
from . import utils as utils
if openllm_core.utils.DEBUG:
openllm_core.utils.set_debug_mode(True)
openllm_core.utils.set_quiet_mode(False)
if utils.DEBUG:
utils.set_debug_mode(True)
utils.set_quiet_mode(False)
_logging.basicConfig(level=_logging.NOTSET)
else:
# configuration for bitsandbytes before import
@@ -64,68 +37,36 @@ else:
'ignore', message='Neither GITHUB_TOKEN nor GITHUB_JWT_TOKEN found: running as unauthenticated'
)
_import_structure: dict[str, list[str]] = {
'exceptions': [],
'client': ['HTTPClient', 'AsyncHTTPClient'],
'bundle': [],
'playground': [],
'testing': [],
'prompts': ['PromptTemplate'],
'protocol': [],
'utils': [],
'_deprecated': ['Runner'],
'_strategies': ['CascadingResourceStrategy', 'get_resource'],
'entrypoints': ['mount_entrypoints'],
'serialisation': ['ggml', 'transformers'],
'cli._sdk': ['start', 'start_grpc', 'build', 'import_model', 'list_models'],
'_quantisation': ['infer_quantisation_config'],
'_llm': ['LLM', 'LLMRunner', 'LLMRunnable'],
'_generation': [
'StopSequenceCriteria',
'StopOnTokens',
'LogitsProcessorList',
'StoppingCriteriaList',
'prepare_logits_processor',
],
}
COMPILED = _Path(__file__).suffix in ('.pyd', '.so')
if _t.TYPE_CHECKING:
from . import bundle as bundle
from . import cli as cli
from . import client as client
from . import playground as playground
from . import serialisation as serialisation
from . import testing as testing
from . import utils as utils
from .client import HTTPClient as HTTPClient
from .client import AsyncHTTPClient as AsyncHTTPClient
from ._deprecated import Runner as Runner
from ._generation import LogitsProcessorList as LogitsProcessorList
from ._generation import StopOnTokens as StopOnTokens
from ._generation import StoppingCriteriaList as StoppingCriteriaList
from ._generation import StopSequenceCriteria as StopSequenceCriteria
from ._generation import prepare_logits_processor as prepare_logits_processor
from ._llm import LLM as LLM
from ._llm import LLMRunnable as LLMRunnable
from ._llm import LLMRunner as LLMRunner
from ._quantisation import infer_quantisation_config as infer_quantisation_config
from ._strategies import CascadingResourceStrategy as CascadingResourceStrategy
from ._strategies import get_resource as get_resource
from .cli._sdk import build as build
from .cli._sdk import import_model as import_model
from .cli._sdk import list_models as list_models
from .cli._sdk import start as start
from .cli._sdk import start_grpc as start_grpc
from .entrypoints import mount_entrypoints as mount_entrypoints
from .prompts import PromptTemplate as PromptTemplate
from .protocol import openai as openai
from .serialisation import ggml as ggml
from .serialisation import transformers as transformers
COMPILED = _pathlib.Path(__file__).suffix in ('.pyd', '.so')
# NOTE: update this to sys.modules[__name__] once mypy_extensions can recognize __spec__
__lazy = openllm_core.utils.LazyModule(
__name__, globals()['__file__'], _import_structure, extra_objects={'COMPILED': COMPILED}
__lazy = utils.LazyModule(
__name__,
globals()['__file__'],
{
'exceptions': [],
'client': ['HTTPClient', 'AsyncHTTPClient'],
'bundle': [],
'playground': [],
'testing': [],
'protocol': [],
'utils': [],
'_deprecated': ['Runner'],
'_strategies': ['CascadingResourceStrategy', 'get_resource'],
'entrypoints': ['mount_entrypoints'],
'serialisation': ['ggml', 'transformers'],
'cli._sdk': ['start', 'start_grpc', 'build', 'import_model', 'list_models'],
'_quantisation': ['infer_quantisation_config'],
'_llm': ['LLM', 'LLMRunner', 'LLMRunnable'],
'_generation': [
'StopSequenceCriteria',
'StopOnTokens',
'LogitsProcessorList',
'StoppingCriteriaList',
'prepare_logits_processor',
],
},
extra_objects={'COMPILED': COMPILED},
)
__all__ = __lazy.__all__
__dir__ = __lazy.__dir__