fix: persistent styling between ruff and yapf (#279)

2026-06-12 02:20:32 -04:00 · 2023-08-30 11:37:41 -04:00
parent f678f71e18
commit c9cef1d773
145 changed files with 1051 additions and 395 deletions
--- a/openllm-python/src/openllm/main.py
+++ b/openllm-python/src/openllm/main.py
@@ -7,6 +7,7 @@ To start any OpenLLM model:
    openllm start <model_name> --options ...
 '''
 from __future__ import annotations
+
 if __name__ == '__main__':
  from openllm.cli.entrypoint import cli
  cli()
--- a/openllm-python/src/openllm/_embeddings.py
+++ b/openllm-python/src/openllm/_embeddings.py
@@ -3,13 +3,19 @@ from __future__ import annotations
 import typing as t

 import transformers
+
 from huggingface_hub import snapshot_download

 import bentoml
 import openllm
-from bentoml._internal.frameworks.transformers import API_VERSION, MODULE_NAME
-from bentoml._internal.models.model import ModelOptions, ModelSignature
-if t.TYPE_CHECKING: import torch
+
+from bentoml._internal.frameworks.transformers import API_VERSION
+from bentoml._internal.frameworks.transformers import MODULE_NAME
+from bentoml._internal.models.model import ModelOptions
+from bentoml._internal.models.model import ModelSignature
+
+if t.TYPE_CHECKING:
+  import torch

 _GENERIC_EMBEDDING_ID = 'sentence-transformers/all-MiniLM-L6-v2'
 _BENTOMODEL_ID = 'sentence-transformers--all-MiniLM-L6-v2'
--- a/openllm-python/src/openllm/_generation.py
+++ b/openllm-python/src/openllm/_generation.py
@@ -3,7 +3,11 @@ from __future__ import annotations
 import typing as t

 import transformers
-if t.TYPE_CHECKING: import torch, openllm
+
+if t.TYPE_CHECKING:
+  import torch
+
+  import openllm

 # reexport from transformers
 LogitsProcessorList = transformers.LogitsProcessorList
--- a/openllm-python/src/openllm/_llm.py
+++ b/openllm-python/src/openllm/_llm.py
@@ -16,20 +16,62 @@ import attr
 import fs.path
 import inflection
 import orjson
+
 from huggingface_hub import hf_hub_download

 import bentoml
 import openllm
 import openllm_core
+
 from bentoml._internal.models.model import ModelSignature
-from openllm_core._configuration import FineTuneConfig, LLMConfig, _object_getattribute, _setattr_class
+from openllm_core._configuration import FineTuneConfig
+from openllm_core._configuration import LLMConfig
+from openllm_core._configuration import _object_getattribute
+from openllm_core._configuration import _setattr_class
 from openllm_core._schema import unmarshal_vllm_outputs
-from openllm_core._typing_compat import AdaptersMapping, AdaptersTuple, AdapterType, AnyCallable, DictStrAny, ListStr, LiteralRuntime, LiteralString, LLMEmbeddings, LLMRunnable, LLMRunner, M, ModelSignatureDict as _ModelSignatureDict, NotRequired, PeftAdapterOutput, T, TupleAny, overload
-from openllm_core.utils import DEBUG, ENV_VARS_TRUE_VALUES, MYPY, EnvVarMixin, LazyLoader, ReprMixin, apply, bentoml_cattr, codegen, device_count, first_not_none, generate_hash_from_file, is_peft_available, is_torch_available, non_intrusive_setattr, normalize_attrs_to_model_tokenizer_pair, resolve_filepath, validate_is_path
+from openllm_core._typing_compat import AdaptersMapping
+from openllm_core._typing_compat import AdaptersTuple
+from openllm_core._typing_compat import AdapterType
+from openllm_core._typing_compat import AnyCallable
+from openllm_core._typing_compat import DictStrAny
+from openllm_core._typing_compat import ListStr
+from openllm_core._typing_compat import LiteralRuntime
+from openllm_core._typing_compat import LiteralString
+from openllm_core._typing_compat import LLMEmbeddings
+from openllm_core._typing_compat import LLMRunnable
+from openllm_core._typing_compat import LLMRunner
+from openllm_core._typing_compat import M
+from openllm_core._typing_compat import ModelSignatureDict as _ModelSignatureDict
+from openllm_core._typing_compat import NotRequired
+from openllm_core._typing_compat import PeftAdapterOutput
+from openllm_core._typing_compat import T
+from openllm_core._typing_compat import TupleAny
+from openllm_core._typing_compat import overload
+from openllm_core.utils import DEBUG
+from openllm_core.utils import ENV_VARS_TRUE_VALUES
+from openllm_core.utils import MYPY
+from openllm_core.utils import EnvVarMixin
+from openllm_core.utils import LazyLoader
+from openllm_core.utils import ReprMixin
+from openllm_core.utils import apply
+from openllm_core.utils import bentoml_cattr
+from openllm_core.utils import codegen
+from openllm_core.utils import device_count
+from openllm_core.utils import first_not_none
+from openllm_core.utils import generate_hash_from_file
+from openllm_core.utils import is_peft_available
+from openllm_core.utils import is_torch_available
+from openllm_core.utils import non_intrusive_setattr
+from openllm_core.utils import normalize_attrs_to_model_tokenizer_pair
+from openllm_core.utils import resolve_filepath
+from openllm_core.utils import validate_is_path

 from ._quantisation import infer_quantisation_config
-from .exceptions import ForbiddenAttributeError, GpuNotAvailableError, OpenLLMException
+from .exceptions import ForbiddenAttributeError
+from .exceptions import GpuNotAvailableError
+from .exceptions import OpenLLMException
 from .utils import infer_auto_class
+
 if t.TYPE_CHECKING:
  import auto_gptq as autogptq
  import peft
@@ -1077,7 +1119,9 @@ class LLM(LLMInterface[M, T], ReprMixin):
      **attrs: t.Any
  ) -> t.Iterator[t.Any]:
    # NOTE: encoder-decoder models will need to implement their own generate_iterator for now
-    from ._generation import get_context_length, is_partial_stop, prepare_logits_processor
+    from ._generation import get_context_length
+    from ._generation import is_partial_stop
+    from ._generation import prepare_logits_processor

    len_prompt = len(prompt)
    if stop_token_ids is None: stop_token_ids = []
--- a/openllm-python/src/openllm/_quantisation.py
+++ b/openllm-python/src/openllm/_quantisation.py
@@ -4,11 +4,17 @@ import logging
 import typing as t

 from openllm_core._typing_compat import overload
-from openllm_core.utils import LazyLoader, is_autogptq_available, is_bitsandbytes_available, is_transformers_supports_kbit, pkg
+from openllm_core.utils import LazyLoader
+from openllm_core.utils import is_autogptq_available
+from openllm_core.utils import is_bitsandbytes_available
+from openllm_core.utils import is_transformers_supports_kbit
+from openllm_core.utils import pkg
+
 if t.TYPE_CHECKING:
  from openllm_core._typing_compat import DictStrAny

  from ._llm import LLM
+
 autogptq, torch, transformers = LazyLoader('autogptq', globals(), 'auto_gptq'), LazyLoader('torch', globals(), 'torch'), LazyLoader('transformers', globals(), 'transformers')

 logger = logging.getLogger(__name__)
--- a/openllm-python/src/openllm/_service.py
+++ b/openllm-python/src/openllm/_service.py
@@ -5,6 +5,7 @@ import typing as t
 import warnings

 import orjson
+
 from starlette.applications import Starlette
 from starlette.responses import JSONResponse
 from starlette.routing import Route
@@ -12,17 +13,21 @@ from starlette.routing import Route
 import bentoml
 import openllm
 import openllm_core
+
 if t.TYPE_CHECKING:
  from starlette.requests import Request
  from starlette.responses import Response

-  from bentoml._internal.runner.runner import AbstractRunner, RunnerMethod
+  from bentoml._internal.runner.runner import AbstractRunner
+  from bentoml._internal.runner.runner import RunnerMethod
  from openllm_core._typing_compat import TypeAlias
-  _EmbeddingMethod: TypeAlias = RunnerMethod[t.Union[bentoml.Runnable, openllm.LLMRunnable[t.Any, t.Any]], [t.List[str]], t.Sequence[openllm.LLMEmbeddings]]
+  _EmbeddingMethod: TypeAlias = RunnerMethod[t.Union[bentoml.Runnable, openllm.LLMRunnable[t.Any, t.Any]], [t.List[str]], t.Sequence[openllm.EmbeddingsOutput]]
+
 # The following warnings from bitsandbytes, and probably not that important for users to see
 warnings.filterwarnings('ignore', message='MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization')
 warnings.filterwarnings('ignore', message='MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization')
 warnings.filterwarnings('ignore', message='The installed version of bitsandbytes was compiled without GPU support.')
+
 model = os.environ.get('OPENLLM_MODEL', '{__model_name__}')  # openllm: model name
 adapter_map = os.environ.get('OPENLLM_ADAPTER_MAP', '''{__model_adapter_map__}''')  # openllm: model adapter map
 llm_config = openllm.AutoConfig.for_model(model)
@@ -37,6 +42,7 @@ generic_embedding_runner = bentoml.Runner(
 runners: list[AbstractRunner] = [runner]
 if not runner.supports_embeddings: runners.append(generic_embedding_runner)
 svc = bentoml.Service(name=f"llm-{llm_config['start_name']}-service", runners=runners)
+
 _JsonInput = bentoml.io.JSON.from_sample({'prompt': '', 'llm_config': llm_config.model_dump(flatten=True), 'adapter_name': None})

@svc.api(route='/v1/generate', input=_JsonInput, output=bentoml.io.JSON.from_sample({'responses': [], 'configuration': llm_config.model_dump(flatten=True)}))
--- a/openllm-python/src/openllm/bundle/init.py
+++ b/openllm-python/src/openllm/bundle/init.py
@@ -7,15 +7,26 @@ import os
 import typing as t

 from openllm_core.utils import LazyModule
+
 _import_structure: dict[str, list[str]] = {
    '_package': ['create_bento', 'build_editable', 'construct_python_options', 'construct_docker_options'],
    'oci': ['CONTAINER_NAMES', 'get_base_container_tag', 'build_container', 'get_base_container_name', 'supported_registries', 'RefResolver']
 }

 if t.TYPE_CHECKING:
-  from . import _package as _package, oci as oci
-  from ._package import build_editable as build_editable, construct_docker_options as construct_docker_options, construct_python_options as construct_python_options, create_bento as create_bento
-  from .oci import CONTAINER_NAMES as CONTAINER_NAMES, RefResolver as RefResolver, build_container as build_container, get_base_container_name as get_base_container_name, get_base_container_tag as get_base_container_tag, supported_registries as supported_registries
+  from . import _package as _package
+  from . import oci as oci
+  from ._package import build_editable as build_editable
+  from ._package import construct_docker_options as construct_docker_options
+  from ._package import construct_python_options as construct_python_options
+  from ._package import create_bento as create_bento
+  from .oci import CONTAINER_NAMES as CONTAINER_NAMES
+  from .oci import RefResolver as RefResolver
+  from .oci import build_container as build_container
+  from .oci import get_base_container_name as get_base_container_name
+  from .oci import get_base_container_tag as get_base_container_tag
+  from .oci import supported_registries as supported_registries
+
 __lazy = LazyModule(__name__, os.path.abspath('__file__'), _import_structure)
 __all__ = __lazy.__all__
 __dir__ = __lazy.__dir__
--- a/openllm-python/src/openllm/bundle/_package.py
+++ b/openllm-python/src/openllm/bundle/_package.py
@@ -6,27 +6,39 @@ import logging
 import os
 import string
 import typing as t
+
 from pathlib import Path

 import fs
 import fs.copy
 import fs.errors
 import orjson
-from simple_di import Provide, inject
+
+from simple_di import Provide
+from simple_di import inject

 import bentoml
 import openllm_core
-from bentoml._internal.bento.build_config import BentoBuildConfig, DockerOptions, ModelSpec, PythonOptions
+
+from bentoml._internal.bento.build_config import BentoBuildConfig
+from bentoml._internal.bento.build_config import DockerOptions
+from bentoml._internal.bento.build_config import ModelSpec
+from bentoml._internal.bento.build_config import PythonOptions
 from bentoml._internal.configuration.containers import BentoMLContainer

 from . import oci
+
 if t.TYPE_CHECKING:
  from fs.base import FS

  import openllm
+
  from bentoml._internal.bento import BentoStore
  from bentoml._internal.models.model import ModelStore
-  from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy, LiteralString
+  from openllm_core._typing_compat import LiteralContainerRegistry
+  from openllm_core._typing_compat import LiteralContainerVersionStrategy
+  from openllm_core._typing_compat import LiteralString
+
 logger = logging.getLogger(__name__)

 OPENLLM_DEV_BUILD = 'OPENLLM_DEV_BUILD'
--- a/openllm-python/src/openllm/bundle/oci/init.py
+++ b/openllm-python/src/openllm/bundle/oci/init.py
@@ -9,7 +9,10 @@ import pathlib
 import shutil
 import subprocess
 import typing as t
-from datetime import datetime, timedelta, timezone
+
+from datetime import datetime
+from datetime import timedelta
+from datetime import timezone

 import attr
 import orjson
@@ -17,11 +20,17 @@ import orjson
 import bentoml
 import openllm
 import openllm_core
+
 from openllm_core.utils.lazy import VersionInfo
+
 if t.TYPE_CHECKING:
  from ghapi import all

-  from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy, LiteralString, RefTuple
+  from openllm_core._typing_compat import LiteralContainerRegistry
+  from openllm_core._typing_compat import LiteralContainerVersionStrategy
+  from openllm_core._typing_compat import LiteralString
+  from openllm_core._typing_compat import RefTuple
+
 all = openllm_core.utils.LazyLoader('all', globals(), 'ghapi.all')  # noqa: F811

 logger = logging.getLogger(__name__)
--- a/openllm-python/src/openllm/cli/_factory.py
+++ b/openllm-python/src/openllm/cli/_factory.py
@@ -9,21 +9,28 @@ import click
 import click_option_group as cog
 import inflection
 import orjson
+
 from bentoml_cli.utils import BentoMLCommandGroup
 from click import shell_completion as sc
 from click.shell_completion import CompletionItem

 import bentoml
 import openllm
+
 from bentoml._internal.configuration.containers import BentoMLContainer
-from openllm_core._typing_compat import Concatenate, DictStrAny, LiteralString, ParamSpec
+from openllm_core._typing_compat import Concatenate
+from openllm_core._typing_compat import DictStrAny
+from openllm_core._typing_compat import LiteralString
+from openllm_core._typing_compat import ParamSpec
 from openllm_core.utils import DEBUG

 from . import termui
+
 if t.TYPE_CHECKING:
  import subprocess

  from openllm_core._configuration import LLMConfig
+
 logger = logging.getLogger(__name__)

 P = ParamSpec('P')
--- a/openllm-python/src/openllm/cli/_sdk.py
+++ b/openllm-python/src/openllm/cli/_sdk.py
@@ -7,20 +7,27 @@ import subprocess
 import sys
 import typing as t

-from simple_di import Provide, inject
+from simple_di import Provide
+from simple_di import inject

 import bentoml
 import openllm
 import openllm_core
+
 from bentoml._internal.configuration.containers import BentoMLContainer
 from openllm.exceptions import OpenLLMException

 from . import termui
 from ._factory import start_command_factory
+
 if t.TYPE_CHECKING:
  from bentoml._internal.bento import BentoStore
  from openllm_core._configuration import LLMConfig
-  from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy, LiteralRuntime, LiteralString
+  from openllm_core._typing_compat import LiteralContainerRegistry
+  from openllm_core._typing_compat import LiteralContainerVersionStrategy
+  from openllm_core._typing_compat import LiteralRuntime
+  from openllm_core._typing_compat import LiteralString
+
 logger = logging.getLogger(__name__)

 def _start(
@@ -81,7 +88,8 @@ def _start(
      framework: The framework to use for this LLM. By default, this is set to ``pt``.
      additional_args: Additional arguments to pass to ``openllm start``.
  """
-  from .entrypoint import start_command, start_grpc_command
+  from .entrypoint import start_command
+  from .entrypoint import start_grpc_command
  llm_config = openllm.AutoConfig.for_model(model_name)
  _ModelEnv = openllm_core.utils.EnvVarMixin(
      model_name,
--- a/openllm-python/src/openllm/cli/entrypoint.py
+++ b/openllm-python/src/openllm/cli/entrypoint.py
@@ -42,29 +42,80 @@ import fs.copy
 import fs.errors
 import inflection
 import orjson
-from bentoml_cli.utils import BentoMLCommandGroup, opt_callback
-from simple_di import Provide, inject
+
+from bentoml_cli.utils import BentoMLCommandGroup
+from bentoml_cli.utils import opt_callback
+from simple_di import Provide
+from simple_di import inject

 import bentoml
 import openllm
+
 from bentoml._internal.configuration.containers import BentoMLContainer
 from bentoml._internal.models.model import ModelStore
-from openllm import bundle, serialisation
+from openllm import bundle
+from openllm import serialisation
 from openllm.exceptions import OpenLLMException
-from openllm.models.auto import CONFIG_MAPPING, MODEL_FLAX_MAPPING_NAMES, MODEL_MAPPING_NAMES, MODEL_TF_MAPPING_NAMES, MODEL_VLLM_MAPPING_NAMES, AutoConfig, AutoLLM
+from openllm.models.auto import CONFIG_MAPPING
+from openllm.models.auto import MODEL_FLAX_MAPPING_NAMES
+from openllm.models.auto import MODEL_MAPPING_NAMES
+from openllm.models.auto import MODEL_TF_MAPPING_NAMES
+from openllm.models.auto import MODEL_VLLM_MAPPING_NAMES
+from openllm.models.auto import AutoConfig
+from openllm.models.auto import AutoLLM
 from openllm.utils import infer_auto_class
-from openllm_core._typing_compat import Concatenate, DictStrAny, LiteralRuntime, LiteralString, ParamSpec, Self
-from openllm_core.utils import DEBUG, DEBUG_ENV_VAR, OPTIONAL_DEPENDENCIES, QUIET_ENV_VAR, EnvVarMixin, LazyLoader, analytics, bentoml_cattr, compose, configure_logging, dantic, first_not_none, get_debug_mode, get_quiet_mode, is_torch_available, is_transformers_supports_agent, resolve_user_filepath, set_debug_mode, set_quiet_mode
+from openllm_core._typing_compat import Concatenate
+from openllm_core._typing_compat import DictStrAny
+from openllm_core._typing_compat import LiteralRuntime
+from openllm_core._typing_compat import LiteralString
+from openllm_core._typing_compat import ParamSpec
+from openllm_core._typing_compat import Self
+from openllm_core.utils import DEBUG
+from openllm_core.utils import DEBUG_ENV_VAR
+from openllm_core.utils import OPTIONAL_DEPENDENCIES
+from openllm_core.utils import QUIET_ENV_VAR
+from openllm_core.utils import EnvVarMixin
+from openllm_core.utils import LazyLoader
+from openllm_core.utils import analytics
+from openllm_core.utils import bentoml_cattr
+from openllm_core.utils import compose
+from openllm_core.utils import configure_logging
+from openllm_core.utils import dantic
+from openllm_core.utils import first_not_none
+from openllm_core.utils import get_debug_mode
+from openllm_core.utils import get_quiet_mode
+from openllm_core.utils import is_torch_available
+from openllm_core.utils import is_transformers_supports_agent
+from openllm_core.utils import resolve_user_filepath
+from openllm_core.utils import set_debug_mode
+from openllm_core.utils import set_quiet_mode

 from . import termui
-from ._factory import FC, LiteralOutput, _AnyCallable, bettertransformer_option, container_registry_option, fast_option, machine_option, model_id_option, model_name_argument, model_version_option, output_option, parse_device_callback, quantize_option, serialisation_option, start_command_factory, workers_per_resource_option
+from ._factory import FC
+from ._factory import LiteralOutput
+from ._factory import _AnyCallable
+from ._factory import bettertransformer_option
+from ._factory import container_registry_option
+from ._factory import fast_option
+from ._factory import machine_option
+from ._factory import model_id_option
+from ._factory import model_name_argument
+from ._factory import model_version_option
+from ._factory import output_option
+from ._factory import parse_device_callback
+from ._factory import quantize_option
+from ._factory import serialisation_option
+from ._factory import start_command_factory
+from ._factory import workers_per_resource_option
+
 if t.TYPE_CHECKING:
  import torch

  from bentoml._internal.bento import BentoStore
  from bentoml._internal.container import DefaultBuilder
  from openllm_core._schema import EmbeddingsOutput
-  from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy
+  from openllm_core._typing_compat import LiteralContainerRegistry
+  from openllm_core._typing_compat import LiteralContainerVersionStrategy
 else:
  torch = LazyLoader('torch', globals(), 'torch')

--- a/openllm-python/src/openllm/cli/extension/build_base_container.py
+++ b/openllm-python/src/openllm/cli/extension/build_base_container.py
@@ -5,9 +5,14 @@ import click
 import orjson

 import openllm
+
 from openllm.cli import termui
-from openllm.cli._factory import container_registry_option, machine_option
-if t.TYPE_CHECKING: from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy
+from openllm.cli._factory import container_registry_option
+from openllm.cli._factory import machine_option
+
+if t.TYPE_CHECKING:
+  from openllm_core._typing_compat import LiteralContainerRegistry
+  from openllm_core._typing_compat import LiteralContainerVersionStrategy

@click.command(
    'build_base_container',
--- a/openllm-python/src/openllm/cli/extension/dive_bentos.py
+++ b/openllm-python/src/openllm/cli/extension/dive_bentos.py
@@ -5,13 +5,19 @@ import typing as t

 import click
 import psutil
-from simple_di import Provide, inject
+
+from simple_di import Provide
+from simple_di import inject

 import bentoml
+
 from bentoml._internal.configuration.containers import BentoMLContainer
 from openllm.cli import termui
-from openllm.cli._factory import bento_complete_envvar, machine_option
-if t.TYPE_CHECKING: from bentoml._internal.bento import BentoStore
+from openllm.cli._factory import bento_complete_envvar
+from openllm.cli._factory import machine_option
+
+if t.TYPE_CHECKING:
+  from bentoml._internal.bento import BentoStore

@click.command('dive_bentos', context_settings=termui.CONTEXT_SETTINGS)
@click.argument('bento', type=str, shell_complete=bento_complete_envvar)
--- a/openllm-python/src/openllm/cli/extension/get_containerfile.py
+++ b/openllm-python/src/openllm/cli/extension/get_containerfile.py
@@ -2,9 +2,12 @@ from __future__ import annotations
 import typing as t

 import click
-from simple_di import Provide, inject
+
+from simple_di import Provide
+from simple_di import inject

 import bentoml
+
 from bentoml._internal.bento.bento import BentoInfo
 from bentoml._internal.bento.build_config import DockerOptions
 from bentoml._internal.configuration.containers import BentoMLContainer
@@ -12,7 +15,9 @@ from bentoml._internal.container.generate import generate_containerfile
 from openllm.cli import termui
 from openllm.cli._factory import bento_complete_envvar
 from openllm_core.utils import bentoml_cattr
-if t.TYPE_CHECKING: from bentoml._internal.bento import BentoStore
+
+if t.TYPE_CHECKING:
+  from bentoml._internal.bento import BentoStore

@click.command('get_containerfile', context_settings=termui.CONTEXT_SETTINGS, help='Return Containerfile of any given Bento.')
@click.argument('bento', type=str, shell_complete=bento_complete_envvar)
--- a/openllm-python/src/openllm/cli/extension/get_prompt.py
+++ b/openllm-python/src/openllm/cli/extension/get_prompt.py
@@ -4,12 +4,17 @@ import typing as t
 import click
 import inflection
 import orjson
+
 from bentoml_cli.utils import opt_callback

 import openllm
+
 from openllm.cli import termui
-from openllm.cli._factory import machine_option, model_complete_envvar, output_option
+from openllm.cli._factory import machine_option
+from openllm.cli._factory import model_complete_envvar
+from openllm.cli._factory import output_option
 from openllm_core._prompt import process_prompt
+
 LiteralOutput = t.Literal['json', 'pretty', 'porcelain']

@click.command('get_prompt', context_settings=termui.CONTEXT_SETTINGS)
--- a/openllm-python/src/openllm/cli/extension/list_bentos.py
+++ b/openllm-python/src/openllm/cli/extension/list_bentos.py
@@ -6,9 +6,11 @@ import orjson

 import bentoml
 import openllm
+
 from bentoml._internal.utils import human_readable_size
 from openllm.cli import termui
-from openllm.cli._factory import LiteralOutput, output_option
+from openllm.cli._factory import LiteralOutput
+from openllm.cli._factory import output_option

@click.command('list_bentos', context_settings=termui.CONTEXT_SETTINGS)
@output_option(default_value='json')
--- a/openllm-python/src/openllm/cli/extension/list_models.py
+++ b/openllm-python/src/openllm/cli/extension/list_models.py
@@ -7,10 +7,16 @@ import orjson

 import bentoml
 import openllm
+
 from bentoml._internal.utils import human_readable_size
 from openllm.cli import termui
-from openllm.cli._factory import LiteralOutput, model_complete_envvar, model_name_argument, output_option
-if t.TYPE_CHECKING: from openllm_core._typing_compat import DictStrAny
+from openllm.cli._factory import LiteralOutput
+from openllm.cli._factory import model_complete_envvar
+from openllm.cli._factory import model_name_argument
+from openllm.cli._factory import output_option
+
+if t.TYPE_CHECKING:
+  from openllm_core._typing_compat import DictStrAny

@click.command('list_models', context_settings=termui.CONTEXT_SETTINGS)
@model_name_argument(required=False, shell_complete=model_complete_envvar)
--- a/openllm-python/src/openllm/cli/extension/playground.py
+++ b/openllm-python/src/openllm/cli/extension/playground.py
@@ -13,12 +13,16 @@ import yaml

 from openllm import playground
 from openllm.cli import termui
-from openllm_core.utils import is_jupyter_available, is_jupytext_available, is_notebook_available
+from openllm_core.utils import is_jupyter_available
+from openllm_core.utils import is_jupytext_available
+from openllm_core.utils import is_notebook_available
+
 if t.TYPE_CHECKING:
  import jupytext
  import nbformat

  from openllm_core._typing_compat import DictStrAny
+
 logger = logging.getLogger(__name__)

 def load_notebook_metadata() -> DictStrAny:
--- a/openllm-python/src/openllm/cli/termui.py
+++ b/openllm-python/src/openllm/cli/termui.py
@@ -6,7 +6,9 @@ import click
 import inflection

 import openllm
-if t.TYPE_CHECKING: from openllm_core._typing_compat import DictStrAny
+
+if t.TYPE_CHECKING:
+  from openllm_core._typing_compat import DictStrAny

 def echo(text: t.Any, fg: str = 'green', _with_style: bool = True, **attrs: t.Any) -> None:
  attrs['fg'] = fg if not openllm.utils.get_debug_mode() else None
--- a/openllm-python/src/openllm/client.py
+++ b/openllm-python/src/openllm/client.py
@@ -14,7 +14,14 @@ from __future__ import annotations
 import typing as t

 import openllm_client
-if t.TYPE_CHECKING:  from openllm_client import AsyncHTTPClient as AsyncHTTPClient, BaseAsyncClient as BaseAsyncClient, BaseClient as BaseClient, HTTPClient as HTTPClient, GrpcClient as GrpcClient, AsyncGrpcClient as AsyncGrpcClient
+
+if t.TYPE_CHECKING:
+  from openllm_client import AsyncGrpcClient as AsyncGrpcClient
+  from openllm_client import AsyncHTTPClient as AsyncHTTPClient
+  from openllm_client import BaseAsyncClient as BaseAsyncClient
+  from openllm_client import BaseClient as BaseClient
+  from openllm_client import GrpcClient as GrpcClient
+  from openllm_client import HTTPClient as HTTPClient

 def __dir__() -> t.Sequence[str]:
  return sorted(dir(openllm_client))
--- a/openllm-python/src/openllm/exceptions.py
+++ b/openllm-python/src/openllm/exceptions.py
@@ -1,4 +1,11 @@
 '''Base exceptions for OpenLLM. This extends BentoML exceptions.'''
 from __future__ import annotations

-from openllm_core.exceptions import Error as Error, FineTuneStrategyNotSupportedError as FineTuneStrategyNotSupportedError, ForbiddenAttributeError as ForbiddenAttributeError, GpuNotAvailableError as GpuNotAvailableError, MissingAnnotationAttributeError as MissingAnnotationAttributeError, MissingDependencyError as MissingDependencyError, OpenLLMException as OpenLLMException, ValidationError as ValidationError
+from openllm_core.exceptions import Error as Error
+from openllm_core.exceptions import FineTuneStrategyNotSupportedError as FineTuneStrategyNotSupportedError
+from openllm_core.exceptions import ForbiddenAttributeError as ForbiddenAttributeError
+from openllm_core.exceptions import GpuNotAvailableError as GpuNotAvailableError
+from openllm_core.exceptions import MissingAnnotationAttributeError as MissingAnnotationAttributeError
+from openllm_core.exceptions import MissingDependencyError as MissingDependencyError
+from openllm_core.exceptions import OpenLLMException as OpenLLMException
+from openllm_core.exceptions import ValidationError as ValidationError
--- a/openllm-python/src/openllm/models/auto/init.py
+++ b/openllm-python/src/openllm/models/auto/init.py
@@ -3,8 +3,15 @@ import os
 import typing as t

 import openllm
-from openllm_core.config import CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES, AutoConfig as AutoConfig
-from openllm_core.utils import LazyModule, is_flax_available, is_tf_available, is_torch_available, is_vllm_available
+from openllm_core.config import CONFIG_MAPPING as CONFIG_MAPPING
+from openllm_core.config import CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES
+from openllm_core.config import AutoConfig as AutoConfig
+from openllm_core.utils import LazyModule
+from openllm_core.utils import is_flax_available
+from openllm_core.utils import is_tf_available
+from openllm_core.utils import is_torch_available
+from openllm_core.utils import is_vllm_available
+
 _import_structure: dict[str, list[str]] = {
    'modeling_auto': ['MODEL_MAPPING_NAMES'],
    'modeling_flax_auto': ['MODEL_FLAX_MAPPING_NAMES'],
--- a/openllm-python/src/openllm/models/auto/factory.py
+++ b/openllm-python/src/openllm/models/auto/factory.py
@@ -12,11 +12,14 @@ import openllm
 from openllm_core.utils import ReprMixin
 if t.TYPE_CHECKING:
  import types
-  from collections import _odict_items, _odict_keys, _odict_values
+  from collections import _odict_items
+  from collections import _odict_keys
+  from collections import _odict_values

  from _typeshed import SupportsIter

-  from openllm_core._typing_compat import LiteralString, LLMRunner
+  from openllm_core._typing_compat import LiteralString
+  from openllm_core._typing_compat import LLMRunner
  ConfigModelKeysView = _odict_keys[type[openllm.LLMConfig], type[openllm.LLM[t.Any, t.Any]]]
  ConfigModelValuesView = _odict_values[type[openllm.LLMConfig], type[openllm.LLM[t.Any, t.Any]]]
  ConfigModelItemsView = _odict_items[type[openllm.LLMConfig], type[openllm.LLM[t.Any, t.Any]]]
--- a/openllm-python/src/openllm/models/auto/modeling_auto.py
+++ b/openllm-python/src/openllm/models/auto/modeling_auto.py
@@ -4,7 +4,9 @@ from collections import OrderedDict

 from openllm_core.config import CONFIG_MAPPING_NAMES

-from .factory import BaseAutoLLMClass, _LazyAutoMapping
+from .factory import BaseAutoLLMClass
+from .factory import _LazyAutoMapping
+
 MODEL_MAPPING_NAMES = OrderedDict([('chatglm', 'ChatGLM'), ('dolly_v2', 'DollyV2'), ('falcon', 'Falcon'), ('flan_t5', 'FlanT5'), ('gpt_neox', 'GPTNeoX'), ('llama', 'Llama'), ('mpt', 'MPT'), (
    'opt', 'OPT'
 ), ('stablelm', 'StableLM'), ('starcoder', 'StarCoder'), ('baichuan', 'Baichuan')])
--- a/openllm-python/src/openllm/models/auto/modeling_flax_auto.py
+++ b/openllm-python/src/openllm/models/auto/modeling_flax_auto.py
@@ -4,7 +4,9 @@ from collections import OrderedDict

 from openllm_core.config import CONFIG_MAPPING_NAMES

-from .factory import BaseAutoLLMClass, _LazyAutoMapping
+from .factory import BaseAutoLLMClass
+from .factory import _LazyAutoMapping
+
 MODEL_FLAX_MAPPING_NAMES = OrderedDict([('flan_t5', 'FlaxFlanT5'), ('opt', 'FlaxOPT')])
 MODEL_FLAX_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FLAX_MAPPING_NAMES)

--- a/openllm-python/src/openllm/models/auto/modeling_tf_auto.py
+++ b/openllm-python/src/openllm/models/auto/modeling_tf_auto.py
@@ -4,7 +4,9 @@ from collections import OrderedDict

 from openllm_core.config import CONFIG_MAPPING_NAMES

-from .factory import BaseAutoLLMClass, _LazyAutoMapping
+from .factory import BaseAutoLLMClass
+from .factory import _LazyAutoMapping
+
 MODEL_TF_MAPPING_NAMES = OrderedDict([('flan_t5', 'TFFlanT5'), ('opt', 'TFOPT')])
 MODEL_TF_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_TF_MAPPING_NAMES)

--- a/openllm-python/src/openllm/models/auto/modeling_vllm_auto.py
+++ b/openllm-python/src/openllm/models/auto/modeling_vllm_auto.py
@@ -4,7 +4,9 @@ from collections import OrderedDict

 from openllm_core.config import CONFIG_MAPPING_NAMES

-from .factory import BaseAutoLLMClass, _LazyAutoMapping
+from .factory import BaseAutoLLMClass
+from .factory import _LazyAutoMapping
+
 MODEL_VLLM_MAPPING_NAMES = OrderedDict([('baichuan', 'VLLMBaichuan'), ('dolly_v2', 'VLLMDollyV2'), ('falcon', 'VLLMFalcon'), ('gpt_neox', 'VLLMGPTNeoX'), ('mpt', 'VLLMMPT'), (
    'opt', 'VLLMOPT'
 ), ('stablelm', 'VLLMStableLM'), ('starcoder', 'VLLMStarCoder'), ('llama', 'VLLMLlama')])
--- a/openllm-python/src/openllm/models/baichuan/init.py
+++ b/openllm-python/src/openllm/models/baichuan/init.py
@@ -3,8 +3,14 @@ import sys
 import typing as t

 from openllm.exceptions import MissingDependencyError
-from openllm.utils import LazyModule, is_cpm_kernels_available, is_torch_available, is_vllm_available
-from openllm_core.config.configuration_baichuan import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_BAICHUAN_COMMAND_DOCSTRING as START_BAICHUAN_COMMAND_DOCSTRING, BaichuanConfig as BaichuanConfig
+from openllm.utils import LazyModule
+from openllm.utils import is_cpm_kernels_available
+from openllm.utils import is_torch_available
+from openllm.utils import is_vllm_available
+from openllm_core.config.configuration_baichuan import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
+from openllm_core.config.configuration_baichuan import START_BAICHUAN_COMMAND_DOCSTRING as START_BAICHUAN_COMMAND_DOCSTRING
+from openllm_core.config.configuration_baichuan import BaichuanConfig as BaichuanConfig
+
 _import_structure: dict[str, list[str]] = {}
 try:
  if not is_torch_available() or not is_cpm_kernels_available(): raise MissingDependencyError
--- a/openllm-python/src/openllm/models/chatglm/init.py
+++ b/openllm-python/src/openllm/models/chatglm/init.py
@@ -3,8 +3,13 @@ import sys
 import typing as t

 from openllm.exceptions import MissingDependencyError
-from openllm.utils import LazyModule, is_cpm_kernels_available, is_torch_available
-from openllm_core.config.configuration_chatglm import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_CHATGLM_COMMAND_DOCSTRING as START_CHATGLM_COMMAND_DOCSTRING, ChatGLMConfig as ChatGLMConfig
+from openllm.utils import LazyModule
+from openllm.utils import is_cpm_kernels_available
+from openllm.utils import is_torch_available
+from openllm_core.config.configuration_chatglm import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
+from openllm_core.config.configuration_chatglm import START_CHATGLM_COMMAND_DOCSTRING as START_CHATGLM_COMMAND_DOCSTRING
+from openllm_core.config.configuration_chatglm import ChatGLMConfig as ChatGLMConfig
+
 _import_structure: dict[str, list[str]] = {}
 try:
  if not is_torch_available() or not is_cpm_kernels_available(): raise MissingDependencyError
--- a/openllm-python/src/openllm/models/chatglm/modeling_chatglm.py
+++ b/openllm-python/src/openllm/models/chatglm/modeling_chatglm.py
@@ -2,7 +2,8 @@ from __future__ import annotations
 import typing as t

 import openllm
-if t.TYPE_CHECKING: import transformers
+if t.TYPE_CHECKING:
+  import transformers

 class ChatGLM(openllm.LLM['transformers.PreTrainedModel', 'transformers.PreTrainedTokenizerFast']):
  __openllm_internal__ = True
--- a/openllm-python/src/openllm/models/dolly_v2/init.py
+++ b/openllm-python/src/openllm/models/dolly_v2/init.py
@@ -3,8 +3,13 @@ import sys
 import typing as t

 from openllm.exceptions import MissingDependencyError
-from openllm.utils import LazyModule, is_torch_available, is_vllm_available
-from openllm_core.config.configuration_dolly_v2 import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_DOLLY_V2_COMMAND_DOCSTRING as START_DOLLY_V2_COMMAND_DOCSTRING, DollyV2Config as DollyV2Config
+from openllm.utils import LazyModule
+from openllm.utils import is_torch_available
+from openllm.utils import is_vllm_available
+from openllm_core.config.configuration_dolly_v2 import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
+from openllm_core.config.configuration_dolly_v2 import START_DOLLY_V2_COMMAND_DOCSTRING as START_DOLLY_V2_COMMAND_DOCSTRING
+from openllm_core.config.configuration_dolly_v2 import DollyV2Config as DollyV2Config
+
 _import_structure: dict[str, list[str]] = {}
 try:
  if not is_torch_available(): raise MissingDependencyError
--- a/openllm-python/src/openllm/models/dolly_v2/modeling_dolly_v2.py
+++ b/openllm-python/src/openllm/models/dolly_v2/modeling_dolly_v2.py
@@ -5,7 +5,10 @@ import typing as t

 import openllm
 from openllm_core._typing_compat import overload
-from openllm_core.config.configuration_dolly_v2 import DEFAULT_PROMPT_TEMPLATE, END_KEY, RESPONSE_KEY, get_special_token_id
+from openllm_core.config.configuration_dolly_v2 import DEFAULT_PROMPT_TEMPLATE
+from openllm_core.config.configuration_dolly_v2 import END_KEY
+from openllm_core.config.configuration_dolly_v2 import RESPONSE_KEY
+from openllm_core.config.configuration_dolly_v2 import get_special_token_id
 if t.TYPE_CHECKING: import torch, transformers, tensorflow as tf
 else:  torch, transformers, tf = openllm.utils.LazyLoader('torch', globals(), 'torch'), openllm.utils.LazyLoader('transformers', globals(), 'transformers'), openllm.utils.LazyLoader('tf', globals(), 'tensorflow')
 logger = logging.getLogger(__name__)
--- a/openllm-python/src/openllm/models/falcon/init.py
+++ b/openllm-python/src/openllm/models/falcon/init.py
@@ -3,8 +3,13 @@ import sys
 import typing as t

 from openllm.exceptions import MissingDependencyError
-from openllm.utils import LazyModule, is_torch_available, is_vllm_available
-from openllm_core.config.configuration_falcon import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCSTRING, FalconConfig as FalconConfig
+from openllm.utils import LazyModule
+from openllm.utils import is_torch_available
+from openllm.utils import is_vllm_available
+from openllm_core.config.configuration_falcon import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
+from openllm_core.config.configuration_falcon import START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCSTRING
+from openllm_core.config.configuration_falcon import FalconConfig as FalconConfig
+
 _import_structure: dict[str, list[str]] = {}
 try:
  if not is_torch_available(): raise MissingDependencyError
--- a/openllm-python/src/openllm/models/flan_t5/init.py
+++ b/openllm-python/src/openllm/models/flan_t5/init.py
@@ -3,8 +3,14 @@ import sys
 import typing as t

 from openllm.exceptions import MissingDependencyError
-from openllm.utils import LazyModule, is_flax_available, is_tf_available, is_torch_available
-from openllm_core.config.configuration_flan_t5 import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING, FlanT5Config as FlanT5Config
+from openllm.utils import LazyModule
+from openllm.utils import is_flax_available
+from openllm.utils import is_tf_available
+from openllm.utils import is_torch_available
+from openllm_core.config.configuration_flan_t5 import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
+from openllm_core.config.configuration_flan_t5 import START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING
+from openllm_core.config.configuration_flan_t5 import FlanT5Config as FlanT5Config
+
 _import_structure: dict[str, list[str]] = {}
 try:
  if not is_torch_available(): raise MissingDependencyError
--- a/openllm-python/src/openllm/models/flan_t5/modeling_flan_t5.py
+++ b/openllm-python/src/openllm/models/flan_t5/modeling_flan_t5.py
@@ -2,7 +2,8 @@ from __future__ import annotations
 import typing as t

 import openllm
-if t.TYPE_CHECKING: import transformers
+if t.TYPE_CHECKING:
+  import transformers

 class FlanT5(openllm.LLM['transformers.T5ForConditionalGeneration', 'transformers.T5TokenizerFast']):
  __openllm_internal__ = True
--- a/openllm-python/src/openllm/models/gpt_neox/init.py
+++ b/openllm-python/src/openllm/models/gpt_neox/init.py
@@ -3,8 +3,13 @@ import sys
 import typing as t

 from openllm.exceptions import MissingDependencyError
-from openllm.utils import LazyModule, is_torch_available, is_vllm_available
-from openllm_core.config.configuration_gpt_neox import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING, GPTNeoXConfig as GPTNeoXConfig
+from openllm.utils import LazyModule
+from openllm.utils import is_torch_available
+from openllm.utils import is_vllm_available
+from openllm_core.config.configuration_gpt_neox import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
+from openllm_core.config.configuration_gpt_neox import START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING
+from openllm_core.config.configuration_gpt_neox import GPTNeoXConfig as GPTNeoXConfig
+
 _import_structure: dict[str, list[str]] = {}
 try:
  if not is_torch_available(): raise MissingDependencyError
--- a/openllm-python/src/openllm/models/llama/init.py
+++ b/openllm-python/src/openllm/models/llama/init.py
@@ -3,8 +3,14 @@ import sys
 import typing as t

 from openllm.exceptions import MissingDependencyError
-from openllm.utils import LazyModule, is_torch_available, is_vllm_available
-from openllm_core.config.configuration_llama import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, PROMPT_MAPPING as PROMPT_MAPPING, START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING, LlamaConfig as LlamaConfig
+from openllm.utils import LazyModule
+from openllm.utils import is_torch_available
+from openllm.utils import is_vllm_available
+from openllm_core.config.configuration_llama import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
+from openllm_core.config.configuration_llama import PROMPT_MAPPING as PROMPT_MAPPING
+from openllm_core.config.configuration_llama import START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING
+from openllm_core.config.configuration_llama import LlamaConfig as LlamaConfig
+
 _import_structure: dict[str, list[str]] = {}
 try:
  if not is_vllm_available(): raise MissingDependencyError
--- a/openllm-python/src/openllm/models/llama/modeling_llama.py
+++ b/openllm-python/src/openllm/models/llama/modeling_llama.py
@@ -2,7 +2,8 @@ from __future__ import annotations
 import typing as t

 import openllm
-if t.TYPE_CHECKING: import transformers
+if t.TYPE_CHECKING:
+  import transformers

 class Llama(openllm.LLM['transformers.LlamaForCausalLM', 'transformers.LlamaTokenizerFast']):
  __openllm_internal__ = True
--- a/openllm-python/src/openllm/models/mpt/init.py
+++ b/openllm-python/src/openllm/models/mpt/init.py
@@ -3,8 +3,14 @@ import sys
 import typing as t

 from openllm.exceptions import MissingDependencyError
-from openllm.utils import LazyModule, is_torch_available, is_vllm_available
-from openllm_core.config.configuration_mpt import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, PROMPT_MAPPING as PROMPT_MAPPING, START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING, MPTConfig as MPTConfig
+from openllm.utils import LazyModule
+from openllm.utils import is_torch_available
+from openllm.utils import is_vllm_available
+from openllm_core.config.configuration_mpt import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
+from openllm_core.config.configuration_mpt import PROMPT_MAPPING as PROMPT_MAPPING
+from openllm_core.config.configuration_mpt import START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING
+from openllm_core.config.configuration_mpt import MPTConfig as MPTConfig
+
 _import_structure: dict[str, list[str]] = {}
 try:
  if not is_torch_available(): raise MissingDependencyError
--- a/openllm-python/src/openllm/models/mpt/modeling_mpt.py
+++ b/openllm-python/src/openllm/models/mpt/modeling_mpt.py
@@ -4,8 +4,11 @@ import typing as t

 import bentoml
 import openllm
-from openllm.utils import generate_labels, is_triton_available
-if t.TYPE_CHECKING: import transformers, torch
+from openllm.utils import generate_labels
+from openllm.utils import is_triton_available
+if t.TYPE_CHECKING:
+  import torch
+  import transformers

 logger = logging.getLogger(__name__)

--- a/openllm-python/src/openllm/models/opt/init.py
+++ b/openllm-python/src/openllm/models/opt/init.py
@@ -3,8 +3,15 @@ import sys
 import typing as t

 from openllm.exceptions import MissingDependencyError
-from openllm.utils import LazyModule, is_flax_available, is_tf_available, is_torch_available, is_vllm_available
-from openllm_core.config.configuration_opt import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING, OPTConfig as OPTConfig
+from openllm.utils import LazyModule
+from openllm.utils import is_flax_available
+from openllm.utils import is_tf_available
+from openllm.utils import is_torch_available
+from openllm.utils import is_vllm_available
+from openllm_core.config.configuration_opt import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
+from openllm_core.config.configuration_opt import START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING
+from openllm_core.config.configuration_opt import OPTConfig as OPTConfig
+
 _import_structure: dict[str, list[str]] = {}
 try:
  if not is_torch_available(): raise MissingDependencyError
--- a/openllm-python/src/openllm/models/stablelm/init.py
+++ b/openllm-python/src/openllm/models/stablelm/init.py
@@ -3,8 +3,13 @@ import sys
 import typing as t

 from openllm.exceptions import MissingDependencyError
-from openllm.utils import LazyModule, is_torch_available, is_vllm_available
-from openllm_core.config.configuration_stablelm import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING, StableLMConfig as StableLMConfig
+from openllm.utils import LazyModule
+from openllm.utils import is_torch_available
+from openllm.utils import is_vllm_available
+from openllm_core.config.configuration_stablelm import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
+from openllm_core.config.configuration_stablelm import START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING
+from openllm_core.config.configuration_stablelm import StableLMConfig as StableLMConfig
+
 _import_structure: dict[str, list[str]] = {}
 try:
  if not is_torch_available(): raise MissingDependencyError
--- a/openllm-python/src/openllm/models/stablelm/modeling_stablelm.py
+++ b/openllm-python/src/openllm/models/stablelm/modeling_stablelm.py
@@ -2,7 +2,8 @@ from __future__ import annotations
 import typing as t

 import openllm
-if t.TYPE_CHECKING: import transformers
+if t.TYPE_CHECKING:
+  import transformers

 class StableLM(openllm.LLM['transformers.GPTNeoXForCausalLM', 'transformers.GPTNeoXTokenizerFast']):
  __openllm_internal__ = True
--- a/openllm-python/src/openllm/models/starcoder/init.py
+++ b/openllm-python/src/openllm/models/starcoder/init.py
@@ -3,8 +3,13 @@ import sys
 import typing as t

 from openllm.exceptions import MissingDependencyError
-from openllm.utils import LazyModule, is_torch_available, is_vllm_available
-from openllm_core.config.configuration_starcoder import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_STARCODER_COMMAND_DOCSTRING as START_STARCODER_COMMAND_DOCSTRING, StarCoderConfig as StarCoderConfig
+from openllm.utils import LazyModule
+from openllm.utils import is_torch_available
+from openllm.utils import is_vllm_available
+from openllm_core.config.configuration_starcoder import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
+from openllm_core.config.configuration_starcoder import START_STARCODER_COMMAND_DOCSTRING as START_STARCODER_COMMAND_DOCSTRING
+from openllm_core.config.configuration_starcoder import StarCoderConfig as StarCoderConfig
+
 _import_structure: dict[str, list[str]] = {}
 try:
  if not is_torch_available(): raise MissingDependencyError
--- a/openllm-python/src/openllm/models/starcoder/modeling_starcoder.py
+++ b/openllm-python/src/openllm/models/starcoder/modeling_starcoder.py
@@ -5,7 +5,11 @@ import typing as t
 import bentoml
 import openllm
 from openllm.utils import generate_labels
-from openllm_core.config.configuration_starcoder import EOD, FIM_MIDDLE, FIM_PAD, FIM_PREFIX, FIM_SUFFIX
+from openllm_core.config.configuration_starcoder import EOD
+from openllm_core.config.configuration_starcoder import FIM_MIDDLE
+from openllm_core.config.configuration_starcoder import FIM_PAD
+from openllm_core.config.configuration_starcoder import FIM_PREFIX
+from openllm_core.config.configuration_starcoder import FIM_SUFFIX
 if t.TYPE_CHECKING: import transformers

 class StarCoder(openllm.LLM['transformers.GPTBigCodeForCausalLM', 'transformers.GPT2TokenizerFast']):
--- a/openllm-python/src/openllm/playground/falcon_tuned.py
+++ b/openllm-python/src/openllm/playground/falcon_tuned.py
@@ -22,6 +22,7 @@ logger = logging.getLogger(__name__)

 from datasets import load_dataset
 from trl import SFTTrainer
+
 DEFAULT_MODEL_ID = "ybelkada/falcon-7b-sharded-bf16"
 DATASET_NAME = "timdettmers/openassistant-guanaco"

--- a/openllm-python/src/openllm/playground/features.py
+++ b/openllm-python/src/openllm/playground/features.py
@@ -4,6 +4,7 @@ import logging
 import typing as t

 import openllm
+
 openllm.utils.configure_logging()

 logger = logging.getLogger(__name__)
--- a/openllm-python/src/openllm/playground/opt_tuned.py
+++ b/openllm-python/src/openllm/playground/opt_tuned.py
@@ -23,6 +23,7 @@ from datasets import load_dataset

 if t.TYPE_CHECKING:
  from peft import PeftModel
+
 DEFAULT_MODEL_ID = "facebook/opt-6.7b"

 def load_trainer(model: PeftModel, tokenizer: transformers.GPT2TokenizerFast, dataset_dict: t.Any, training_args: TrainingArguments):
--- a/openllm-python/src/openllm/serialisation/init.py
+++ b/openllm-python/src/openllm/serialisation/init.py
@@ -30,12 +30,19 @@ import cloudpickle
 import fs

 import openllm
+
 from bentoml._internal.models.model import CUSTOM_OBJECTS_FILENAME
-from openllm_core._typing_compat import M, ParamSpec, T
+from openllm_core._typing_compat import M
+from openllm_core._typing_compat import ParamSpec
+from openllm_core._typing_compat import T
+
 if t.TYPE_CHECKING:
  import bentoml

-  from . import constants as constants, ggml as ggml, transformers as transformers
+  from . import constants as constants
+  from . import ggml as ggml
+  from . import transformers as transformers
+
 P = ParamSpec('P')

 def load_tokenizer(llm: openllm.LLM[t.Any, T], **tokenizer_attrs: t.Any) -> T:
@@ -44,7 +51,8 @@ def load_tokenizer(llm: openllm.LLM[t.Any, T], **tokenizer_attrs: t.Any) -> T:
  By default, it will try to find the bentomodel whether it is in store..
  If model is not found, it will raises a ``bentoml.exceptions.NotFound``.
  '''
-  from .transformers._helpers import infer_tokenizers_from_llm, process_config
+  from .transformers._helpers import infer_tokenizers_from_llm
+  from .transformers._helpers import process_config

  config, *_ = process_config(llm._bentomodel.path, llm.__llm_trust_remote_code__)
  bentomodel_fs = fs.open_fs(llm._bentomodel.path)
--- a/openllm-python/src/openllm/serialisation/constants.py
+++ b/openllm-python/src/openllm/serialisation/constants.py
@@ -1,4 +1,5 @@
 from __future__ import annotations
+
 FRAMEWORK_TO_AUTOCLASS_MAPPING = {
    'pt': ('AutoModelForCausalLM', 'AutoModelForSeq2SeqLM'),
    'tf': ('TFAutoModelForCausalLM', 'TFAutoModelForSeq2SeqLM'),
--- a/openllm-python/src/openllm/serialisation/ggml.py
+++ b/openllm-python/src/openllm/serialisation/ggml.py
@@ -7,7 +7,9 @@ import typing as t

 import bentoml
 import openllm
-if t.TYPE_CHECKING: from openllm_core._typing_compat import M
+
+if t.TYPE_CHECKING:
+  from openllm_core._typing_compat import M

 _conversion_strategy = {'pt': 'ggml'}

--- a/openllm-python/src/openllm/serialisation/transformers/init.py
+++ b/openllm-python/src/openllm/serialisation/transformers/init.py
@@ -5,15 +5,23 @@ import logging
 import typing as t

 from huggingface_hub import snapshot_download
-from simple_di import Provide, inject
+from simple_di import Provide
+from simple_di import inject

 import bentoml
 import openllm
+
 from bentoml._internal.configuration.containers import BentoMLContainer
 from bentoml._internal.models.model import ModelOptions

-from ._helpers import check_unintialised_params, infer_autoclass_from_llm, infer_tokenizers_from_llm, make_model_signatures, process_config, update_model
+from ._helpers import check_unintialised_params
+from ._helpers import infer_autoclass_from_llm
+from ._helpers import infer_tokenizers_from_llm
+from ._helpers import make_model_signatures
+from ._helpers import process_config
+from ._helpers import update_model
 from .weights import HfIgnore
+
 if t.TYPE_CHECKING:
  import types

@@ -24,7 +32,9 @@ if t.TYPE_CHECKING:
  import vllm

  from bentoml._internal.models import ModelStore
-  from openllm_core._typing_compat import DictStrAny, M, T
+  from openllm_core._typing_compat import DictStrAny
+  from openllm_core._typing_compat import M
+  from openllm_core._typing_compat import T
 else:
  vllm = openllm.utils.LazyLoader('vllm', globals(), 'vllm')
  autogptq = openllm.utils.LazyLoader('autogptq', globals(), 'auto_gptq')
--- a/openllm-python/src/openllm/serialisation/transformers/_helpers.py
+++ b/openllm-python/src/openllm/serialisation/transformers/_helpers.py
@@ -4,16 +4,24 @@ import typing as t

 import openllm
 import openllm_core
-from bentoml._internal.models.model import ModelInfo, ModelSignature
-from openllm.serialisation.constants import FRAMEWORK_TO_AUTOCLASS_MAPPING, HUB_ATTRS
+
+from bentoml._internal.models.model import ModelInfo
+from bentoml._internal.models.model import ModelSignature
+from openllm.serialisation.constants import FRAMEWORK_TO_AUTOCLASS_MAPPING
+from openllm.serialisation.constants import HUB_ATTRS
+
 if t.TYPE_CHECKING:
  import torch
  import transformers
+
  from transformers.models.auto.auto_factory import _BaseAutoModelClass

  import bentoml
+
  from bentoml._internal.models.model import ModelSignaturesType
-  from openllm_core._typing_compat import DictStrAny, M, T
+  from openllm_core._typing_compat import DictStrAny
+  from openllm_core._typing_compat import M
+  from openllm_core._typing_compat import T
 else:
  transformers, torch = openllm_core.utils.LazyLoader('transformers', globals(), 'transformers'), openllm_core.utils.LazyLoader('torch', globals(), 'torch')

--- a/openllm-python/src/openllm/serialisation/transformers/weights.py
+++ b/openllm-python/src/openllm/serialisation/transformers/weights.py
@@ -2,10 +2,14 @@ from __future__ import annotations
 import typing as t

 import attr
+
 from huggingface_hub import HfApi
+
 if t.TYPE_CHECKING:
  import openllm
-  from openllm_core._typing_compat import M, T
+
+  from openllm_core._typing_compat import M
+  from openllm_core._typing_compat import T

 def has_safetensors_weights(model_id: str, revision: str | None = None) -> bool:
  return any(s.rfilename.endswith('.safetensors') for s in HfApi().model_info(model_id, revision=revision).siblings)
--- a/openllm-python/src/openllm/testing.py
+++ b/openllm-python/src/openllm/testing.py
@@ -8,7 +8,9 @@ import typing as t

 import bentoml
 import openllm
-if t.TYPE_CHECKING: from ._typing_compat import LiteralRuntime
+
+if t.TYPE_CHECKING:
+  from openllm_core._typing_compat import LiteralRuntime

 logger = logging.getLogger(__name__)

--- a/openllm-python/src/openllm/utils/init.py
+++ b/openllm-python/src/openllm/utils/init.py
@@ -8,9 +8,14 @@ import typing as t

 import openllm_core

-from . import dummy_flax_objects as dummy_flax_objects, dummy_pt_objects as dummy_pt_objects, dummy_tf_objects as dummy_tf_objects, dummy_vllm_objects as dummy_vllm_objects
+from . import dummy_flax_objects as dummy_flax_objects
+from . import dummy_pt_objects as dummy_pt_objects
+from . import dummy_tf_objects as dummy_tf_objects
+from . import dummy_vllm_objects as dummy_vllm_objects
+
 if t.TYPE_CHECKING:
  import openllm
+
  from openllm_core._typing_compat import LiteralRuntime

 def generate_labels(llm: openllm.LLM[t.Any, t.Any]) -> dict[str, t.Any]:
--- a/openllm-python/tests/init.py
+++ b/openllm-python/tests/init.py
@@ -1,7 +1,9 @@
 from __future__ import annotations
 import os

-from hypothesis import HealthCheck, settings
+from hypothesis import HealthCheck
+from hypothesis import settings
+
 settings.register_profile('CI', settings(suppress_health_check=[HealthCheck.too_slow]), deadline=None)

 if 'CI' in os.environ: settings.load_profile('CI')
--- a/openllm-python/tests/_strategies/_configuration.py
+++ b/openllm-python/tests/_strategies/_configuration.py
@@ -5,7 +5,9 @@ import typing as t
 from hypothesis import strategies as st

 import openllm
+
 from openllm_core._configuration import ModelSettings
+
 logger = logging.getLogger(__name__)

 env_strats = st.sampled_from([openllm.utils.EnvVarMixin(model_name) for model_name in openllm.CONFIG_MAPPING.keys()])
--- a/openllm-python/tests/configuration_test.py
+++ b/openllm-python/tests/configuration_test.py
@@ -3,17 +3,25 @@ import contextlib
 import os
 import sys
 import typing as t
+
 from unittest import mock

 import attr
 import pytest
 import transformers
-from hypothesis import assume, given, strategies as st
+
+from hypothesis import assume
+from hypothesis import given
+from hypothesis import strategies as st

 import openllm
-from openllm_core._configuration import GenerationConfig, ModelSettings, field_env_key

-from ._strategies._configuration import make_llm_config, model_settings
+from openllm_core._configuration import GenerationConfig
+from openllm_core._configuration import ModelSettings
+from openllm_core._configuration import field_env_key
+
+from ._strategies._configuration import make_llm_config
+from ._strategies._configuration import model_settings

 # XXX: @aarnphm fixes TypedDict behaviour in 3.11
@pytest.mark.skipif(sys.version_info[:2] == (3, 11), reason='TypedDict in 3.11 behaves differently, so we need to fix this')
--- a/openllm-python/tests/conftest.py
+++ b/openllm-python/tests/conftest.py
@@ -6,7 +6,9 @@ import typing as t
 import pytest

 import openllm
-if t.TYPE_CHECKING: from openllm_core._typing_compat import LiteralRuntime
+
+if t.TYPE_CHECKING:
+  from openllm_core._typing_compat import LiteralRuntime

 _FRAMEWORK_MAPPING = {'flan_t5': 'google/flan-t5-small', 'opt': 'facebook/opt-125m', 'baichuan': 'baichuan-inc/Baichuan-7B',}
 _PROMPT_MAPPING = {'qa': 'Answer the following yes/no question by reasoning step-by-step. Can you write a whole Haiku in a single tweet?',}
--- a/openllm-python/tests/models/conftest.py
+++ b/openllm-python/tests/models/conftest.py
@@ -6,7 +6,9 @@ import logging
 import sys
 import time
 import typing as t
-from abc import ABC, abstractmethod
+
+from abc import ABC
+from abc import abstractmethod

 import attr
 import docker
@@ -14,18 +16,25 @@ import docker.errors
 import docker.types
 import orjson
 import pytest
+
 from syrupy.extensions.json import JSONSnapshotExtension

 import openllm
+
 from openllm._llm import normalise_model_name
-from openllm_core._typing_compat import DictStrAny, ListAny
+from openllm_core._typing_compat import DictStrAny
+from openllm_core._typing_compat import ListAny
+
 logger = logging.getLogger(__name__)

 if t.TYPE_CHECKING:
  import subprocess

  from syrupy.assertion import SnapshotAssertion
-  from syrupy.types import PropertyFilter, PropertyMatcher, SerializableData, SerializedData
+  from syrupy.types import PropertyFilter
+  from syrupy.types import PropertyMatcher
+  from syrupy.types import SerializableData
+  from syrupy.types import SerializedData

  from openllm._configuration import GenerationConfig
  from openllm.client import BaseAsyncClient
--- a/openllm-python/tests/models/flan_t5_test.py
+++ b/openllm-python/tests/models/flan_t5_test.py
@@ -4,10 +4,14 @@ import typing as t
 import pytest

 import openllm
+
 if t.TYPE_CHECKING:
  import contextlib

-  from .conftest import HandleProtocol, ResponseComparator, _Handle
+  from .conftest import HandleProtocol
+  from .conftest import ResponseComparator
+  from .conftest import _Handle
+
 model = 'flan_t5'
 model_id = 'google/flan-t5-small'

--- a/openllm-python/tests/models/opt_test.py
+++ b/openllm-python/tests/models/opt_test.py
@@ -4,10 +4,14 @@ import typing as t
 import pytest

 import openllm
+
 if t.TYPE_CHECKING:
  import contextlib

-  from .conftest import HandleProtocol, ResponseComparator, _Handle
+  from .conftest import HandleProtocol
+  from .conftest import ResponseComparator
+  from .conftest import _Handle
+
 model = 'opt'
 model_id = 'facebook/opt-125m'

--- a/openllm-python/tests/models_test.py
+++ b/openllm-python/tests/models_test.py
@@ -3,7 +3,9 @@ import os
 import typing as t

 import pytest
-if t.TYPE_CHECKING: import openllm
+
+if t.TYPE_CHECKING:
+  import openllm

@pytest.mark.skipif(os.getenv('GITHUB_ACTIONS') is not None, reason='Model is too large for CI')
 def test_flan_t5_implementation(prompt: str, llm: openllm.LLM[t.Any, t.Any]):
--- a/openllm-python/tests/package_test.py
+++ b/openllm-python/tests/package_test.py
@@ -6,8 +6,11 @@ import typing as t
 import pytest

 import openllm
+
 from bentoml._internal.configuration.containers import BentoMLContainer
-if t.TYPE_CHECKING: from pathlib import Path
+
+if t.TYPE_CHECKING:
+  from pathlib import Path

 HF_INTERNAL_T5_TESTING = 'hf-internal-testing/tiny-random-t5'

--- a/openllm-python/tests/strategies_test.py
+++ b/openllm-python/tests/strategies_test.py
@@ -5,9 +5,14 @@ import typing as t
 import pytest

 import bentoml
+
 from openllm_core import _strategies as strategy
-from openllm_core._strategies import CascadingResourceStrategy, NvidiaGpuResource, get_resource
-if t.TYPE_CHECKING: from _pytest.monkeypatch import MonkeyPatch
+from openllm_core._strategies import CascadingResourceStrategy
+from openllm_core._strategies import NvidiaGpuResource
+from openllm_core._strategies import get_resource
+
+if t.TYPE_CHECKING:
+  from _pytest.monkeypatch import MonkeyPatch

 def test_nvidia_gpu_resource_from_env(monkeypatch: pytest.MonkeyPatch):
  with monkeypatch.context() as mcls: