cron(style): run formatter [generated] [skip ci] (#257)

2026-05-19 14:16:22 -04:00 · 2023-08-25 06:38:59 -04:00
parent f5dd9be122
commit 46c8904806
150 changed files with 913 additions and 379 deletions
--- a/openllm-python/src/openllm/main.py
+++ b/openllm-python/src/openllm/main.py
@@ -7,7 +7,6 @@ To start any OpenLLM model:
    openllm start <model_name> --options ...
 '''
 from __future__ import annotations
-
 if __name__ == '__main__':
  from openllm.cli.entrypoint import cli
  cli()
--- a/openllm-python/src/openllm/_embeddings.py
+++ b/openllm-python/src/openllm/_embeddings.py
@@ -1,8 +1,13 @@
 # See https://github.com/bentoml/sentence-embedding-bento for more information.
 from __future__ import annotations
-import bentoml, openllm, transformers, typing as t
+import typing as t
+
+import transformers
 from huggingface_hub import snapshot_download
-from bentoml._internal.frameworks.transformers import MODULE_NAME, API_VERSION
+
+import bentoml
+import openllm
+from bentoml._internal.frameworks.transformers import API_VERSION, MODULE_NAME
 from bentoml._internal.models.model import ModelOptions, ModelSignature
 if t.TYPE_CHECKING: import torch

@@ -44,7 +49,8 @@ class GenericEmbeddingRunnable(bentoml.Runnable):

  @bentoml.Runnable.method(batchable=True, batch_dim=0)
  def encode(self, sentences: list[str]) -> t.Sequence[openllm.LLMEmbeddings]:
-    import torch, torch.nn.functional as F
+    import torch
+    import torch.nn.functional as F
    encoded_input = self.tokenizer(sentences, padding=True, truncation=True, return_tensors='pt').to(self.device)
    attention_mask = encoded_input['attention_mask']
    # Compute token embeddings
--- a/openllm-python/src/openllm/_generation.py
+++ b/openllm-python/src/openllm/_generation.py
@@ -1,6 +1,8 @@
 # mypy: disable-error-code="misc"
 from __future__ import annotations
-import typing as t, transformers
+import typing as t
+
+import transformers
 if t.TYPE_CHECKING: import torch, openllm

 # reexport from transformers
--- a/openllm-python/src/openllm/_llm.py
+++ b/openllm-python/src/openllm/_llm.py
@@ -1,18 +1,43 @@
 # mypy: disable-error-code="name-defined,attr-defined"
 from __future__ import annotations
-import functools, inspect, logging, os, re, traceback, types, typing as t, uuid, attr, fs.path, inflection, orjson, bentoml, openllm, openllm_core, gc, pathlib, abc
+import abc
+import functools
+import gc
+import inspect
+import logging
+import os
+import pathlib
+import re
+import traceback
+import types
+import typing as t
+import uuid
+
+import attr
+import fs.path
+import inflection
+import orjson
 from huggingface_hub import hf_hub_download
+
+import bentoml
+import openllm
+import openllm_core
 from bentoml._internal.models.model import ModelSignature
 from openllm_core._configuration import FineTuneConfig, LLMConfig, _object_getattribute, _setattr_class
 from openllm_core._schema import unmarshal_vllm_outputs
+from openllm_core._typing_compat import AdaptersMapping, AdaptersTuple, AdapterType, AnyCallable, DictStrAny, ListStr, LiteralRuntime, LiteralString, LLMEmbeddings, LLMRunnable, LLMRunner, M, ModelSignatureDict as _ModelSignatureDict, NotRequired, PeftAdapterOutput, T, TupleAny, overload
 from openllm_core.utils import DEBUG, ENV_VARS_TRUE_VALUES, MYPY, EnvVarMixin, LazyLoader, ReprMixin, apply, bentoml_cattr, codegen, device_count, first_not_none, generate_hash_from_file, is_peft_available, is_torch_available, non_intrusive_setattr, normalize_attrs_to_model_tokenizer_pair, resolve_filepath, validate_is_path
+
 from ._quantisation import infer_quantisation_config
 from .exceptions import ForbiddenAttributeError, GpuNotAvailableError, OpenLLMException
 from .utils import infer_auto_class
-from openllm_core._typing_compat import AdaptersMapping, AdaptersTuple, AnyCallable, AdapterType, LiteralRuntime, DictStrAny, ListStr, LLMEmbeddings, LLMRunnable, LLMRunner, ModelSignatureDict as _ModelSignatureDict, PeftAdapterOutput, TupleAny, NotRequired, overload, M, T, LiteralString
-
 if t.TYPE_CHECKING:
-  import auto_gptq as autogptq, peft, torch, transformers, vllm
+  import auto_gptq as autogptq
+  import peft
+  import torch
+  import transformers
+  import vllm
+
  from openllm_core._configuration import PeftType
  from openllm_core.utils.representation import ReprArgs
 else:
@@ -1001,7 +1026,7 @@ class LLM(LLMInterface[M, T], ReprMixin):
  ) -> t.Iterator[t.Any]:
    # NOTE: encoder-decoder models will need to implement their own generate_iterator for now
    # inspired from fastchat's generate_stream_func
-    from ._generation import prepare_logits_processor, get_context_length, is_partial_stop
+    from ._generation import get_context_length, is_partial_stop, prepare_logits_processor

    len_prompt = len(prompt)
    if stop_token_ids is None: stop_token_ids = []
--- a/openllm-python/src/openllm/_quantisation.py
+++ b/openllm-python/src/openllm/_quantisation.py
@@ -1,11 +1,14 @@
 # mypy: disable-error-code="name-defined,no-redef"
 from __future__ import annotations
-import logging, typing as t
-from openllm_core.utils import LazyLoader, is_autogptq_available, is_bitsandbytes_available, is_transformers_supports_kbit, pkg
+import logging
+import typing as t
+
 from openllm_core._typing_compat import overload
+from openllm_core.utils import LazyLoader, is_autogptq_available, is_bitsandbytes_available, is_transformers_supports_kbit, pkg
 if t.TYPE_CHECKING:
-  from ._llm import LLM
  from openllm_core._typing_compat import DictStrAny
+
+  from ._llm import LLM
 autogptq, torch, transformers = LazyLoader('autogptq', globals(), 'auto_gptq'), LazyLoader('torch', globals(), 'torch'), LazyLoader('transformers', globals(), 'transformers')

 logger = logging.getLogger(__name__)
--- a/openllm-python/src/openllm/_service.py
+++ b/openllm-python/src/openllm/_service.py
@@ -1,14 +1,23 @@
 # mypy: disable-error-code="call-arg,misc,attr-defined,type-abstract,type-arg,valid-type,arg-type"
 from __future__ import annotations
-import os, warnings, orjson, bentoml, openllm, openllm_core, typing as t
+import os
+import typing as t
+import warnings
+
+import orjson
 from starlette.applications import Starlette
 from starlette.responses import JSONResponse
 from starlette.routing import Route
+
+import bentoml
+import openllm
+import openllm_core
 if t.TYPE_CHECKING:
-  from openllm_core._typing_compat import TypeAlias
  from starlette.requests import Request
  from starlette.responses import Response
-  from bentoml._internal.runner.runner import RunnerMethod, AbstractRunner
+
+  from bentoml._internal.runner.runner import AbstractRunner, RunnerMethod
+  from openllm_core._typing_compat import TypeAlias
  _EmbeddingMethod: TypeAlias = RunnerMethod[t.Union[bentoml.Runnable, openllm.LLMRunnable[t.Any, t.Any]], [t.List[str]], t.Sequence[openllm.LLMEmbeddings]]
 # The following warnings from bitsandbytes, and probably not that important for users to see
 warnings.filterwarnings('ignore', message='MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization')
--- a/openllm-python/src/openllm/bundle/init.py
+++ b/openllm-python/src/openllm/bundle/init.py
@@ -3,7 +3,9 @@
 These utilities will stay internal, and its API can be changed or updated without backward-compatibility.
 """
 from __future__ import annotations
-import os, typing as t
+import os
+import typing as t
+
 from openllm_core.utils import LazyModule
 _import_structure: dict[str, list[str]] = {
    '_package': ['create_bento', 'build_editable', 'construct_python_options', 'construct_docker_options'],
--- a/openllm-python/src/openllm/bundle/_package.py
+++ b/openllm-python/src/openllm/bundle/_package.py
@@ -1,18 +1,32 @@
 # mypy: disable-error-code="misc"
 from __future__ import annotations
-import fs, fs.copy, fs.errors, orjson, bentoml, openllm_core, importlib.metadata, inspect, logging, os, typing as t, string
+import importlib.metadata
+import inspect
+import logging
+import os
+import string
+import typing as t
 from pathlib import Path
+
+import fs
+import fs.copy
+import fs.errors
+import orjson
 from simple_di import Provide, inject
+
+import bentoml
+import openllm_core
 from bentoml._internal.bento.build_config import BentoBuildConfig, DockerOptions, ModelSpec, PythonOptions
 from bentoml._internal.configuration.containers import BentoMLContainer
-from . import oci

+from . import oci
 if t.TYPE_CHECKING:
-  import openllm
  from fs.base import FS
-  from openllm_core._typing_compat import LiteralString, LiteralContainerRegistry, LiteralContainerVersionStrategy
+
+  import openllm
  from bentoml._internal.bento import BentoStore
  from bentoml._internal.models.model import ModelStore
+  from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy, LiteralString
 logger = logging.getLogger(__name__)

 OPENLLM_DEV_BUILD = 'OPENLLM_DEV_BUILD'
--- a/openllm-python/src/openllm/bundle/oci/init.py
+++ b/openllm-python/src/openllm/bundle/oci/init.py
@@ -1,15 +1,27 @@
 # mypy: disable-error-code="misc"
 '''OCI-related utilities for OpenLLM. This module is considered to be internal and API are subjected to change.'''
 from __future__ import annotations
-import functools, importlib, logging, os, pathlib, shutil, subprocess, typing as t, openllm_core
+import functools
+import importlib
+import logging
+import os
+import pathlib
+import shutil
+import subprocess
+import typing as t
 from datetime import datetime, timedelta, timezone
-import attr, orjson, bentoml, openllm
-from openllm_core.utils.lazy import VersionInfo

+import attr
+import orjson
+
+import bentoml
+import openllm
+import openllm_core
+from openllm_core.utils.lazy import VersionInfo
 if t.TYPE_CHECKING:
-  from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy
  from ghapi import all
-  from openllm_core._typing_compat import RefTuple, LiteralString
+
+  from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy, LiteralString, RefTuple
 all = openllm_core.utils.LazyLoader('all', globals(), 'ghapi.all')  # noqa: F811

 logger = logging.getLogger(__name__)
--- a/openllm-python/src/openllm/cli/_factory.py
+++ b/openllm-python/src/openllm/cli/_factory.py
@@ -1,15 +1,28 @@
 from __future__ import annotations
-import functools, importlib.util, os, typing as t, logging, click, click_option_group as cog, inflection, orjson, bentoml, openllm
-from click import shell_completion as sc
-from bentoml_cli.utils import BentoMLCommandGroup
-from click.shell_completion import CompletionItem
-from openllm_core.utils import DEBUG
-from bentoml._internal.configuration.containers import BentoMLContainer
-from openllm_core._typing_compat import LiteralString, DictStrAny, ParamSpec, Concatenate
-from . import termui
+import functools
+import importlib.util
+import logging
+import os
+import typing as t

+import click
+import click_option_group as cog
+import inflection
+import orjson
+from bentoml_cli.utils import BentoMLCommandGroup
+from click import shell_completion as sc
+from click.shell_completion import CompletionItem
+
+import bentoml
+import openllm
+from bentoml._internal.configuration.containers import BentoMLContainer
+from openllm_core._typing_compat import Concatenate, DictStrAny, LiteralString, ParamSpec
+from openllm_core.utils import DEBUG
+
+from . import termui
 if t.TYPE_CHECKING:
  import subprocess
+
  from openllm_core._configuration import LLMConfig
 logger = logging.getLogger(__name__)

--- a/openllm-python/src/openllm/cli/_sdk.py
+++ b/openllm-python/src/openllm/cli/_sdk.py
@@ -1,15 +1,26 @@
 from __future__ import annotations
-import itertools, logging, os, re, subprocess, sys, typing as t, bentoml, openllm, openllm_core
+import itertools
+import logging
+import os
+import re
+import subprocess
+import sys
+import typing as t
+
 from simple_di import Provide, inject
+
+import bentoml
+import openllm
+import openllm_core
 from bentoml._internal.configuration.containers import BentoMLContainer
 from openllm.exceptions import OpenLLMException
+
 from . import termui
 from ._factory import start_command_factory
-
 if t.TYPE_CHECKING:
-  from openllm_core._configuration import LLMConfig
-  from openllm_core._typing_compat import LiteralString, LiteralRuntime, LiteralContainerRegistry, LiteralContainerVersionStrategy
  from bentoml._internal.bento import BentoStore
+  from openllm_core._configuration import LLMConfig
+  from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy, LiteralRuntime, LiteralString
 logger = logging.getLogger(__name__)
 def _start(
    model_name: str,
--- a/openllm-python/src/openllm/cli/entrypoint.py
+++ b/openllm-python/src/openllm/cli/entrypoint.py
@@ -20,22 +20,47 @@ bentomodel = openllm.import_model("falcon", model_id='tiiuae/falcon-7b-instruct'
 ```
 """
 from __future__ import annotations
-import functools, http.client, inspect, itertools, logging, os, platform, re, subprocess, sys, time, traceback, typing as t, attr, click, click_option_group as cog, fs, fs.copy, fs.errors, inflection, orjson, bentoml, openllm
-from simple_di import Provide, inject
+import functools
+import http.client
+import inspect
+import itertools
+import logging
+import os
+import platform
+import re
+import subprocess
+import sys
+import time
+import traceback
+import typing as t
+
+import attr
+import click
+import click_option_group as cog
+import fs
+import fs.copy
+import fs.errors
+import inflection
+import orjson
 from bentoml_cli.utils import BentoMLCommandGroup, opt_callback
+from simple_di import Provide, inject
+
+import bentoml
+import openllm
 from bentoml._internal.configuration.containers import BentoMLContainer
 from bentoml._internal.models.model import ModelStore
-from . import termui
-from ._factory import FC, LiteralOutput, _AnyCallable, bettertransformer_option, container_registry_option, fast_option, machine_option, model_id_option, model_name_argument, model_version_option, output_option, parse_device_callback, quantize_option, serialisation_option, start_command_factory, workers_per_resource_option
 from openllm import bundle, serialisation
 from openllm.exceptions import OpenLLMException
 from openllm.models.auto import CONFIG_MAPPING, MODEL_FLAX_MAPPING_NAMES, MODEL_MAPPING_NAMES, MODEL_TF_MAPPING_NAMES, MODEL_VLLM_MAPPING_NAMES, AutoConfig, AutoLLM
-from openllm_core._typing_compat import DictStrAny, ParamSpec, Concatenate, LiteralString, Self, LiteralRuntime
-from openllm_core.utils import DEBUG, DEBUG_ENV_VAR, OPTIONAL_DEPENDENCIES, QUIET_ENV_VAR, EnvVarMixin, LazyLoader, analytics, bentoml_cattr, compose, configure_logging, dantic, first_not_none, get_debug_mode, get_quiet_mode, is_torch_available, is_transformers_supports_agent, resolve_user_filepath, set_debug_mode, set_quiet_mode
 from openllm.utils import infer_auto_class
+from openllm_core._typing_compat import Concatenate, DictStrAny, LiteralRuntime, LiteralString, ParamSpec, Self
+from openllm_core.utils import DEBUG, DEBUG_ENV_VAR, OPTIONAL_DEPENDENCIES, QUIET_ENV_VAR, EnvVarMixin, LazyLoader, analytics, bentoml_cattr, compose, configure_logging, dantic, first_not_none, get_debug_mode, get_quiet_mode, is_torch_available, is_transformers_supports_agent, resolve_user_filepath, set_debug_mode, set_quiet_mode

+from . import termui
+from ._factory import FC, LiteralOutput, _AnyCallable, bettertransformer_option, container_registry_option, fast_option, machine_option, model_id_option, model_name_argument, model_version_option, output_option, parse_device_callback, quantize_option, serialisation_option, start_command_factory, workers_per_resource_option
 if t.TYPE_CHECKING:
  import torch
+
  from bentoml._internal.bento import BentoStore
  from bentoml._internal.container import DefaultBuilder
  from openllm_core._schema import EmbeddingsOutput
--- a/openllm-python/src/openllm/cli/extension/build_base_container.py
+++ b/openllm-python/src/openllm/cli/extension/build_base_container.py
@@ -1,7 +1,12 @@
 from __future__ import annotations
-import typing as t, click, orjson, openllm
+import typing as t
+
+import click
+import orjson
+
+import openllm
 from openllm.cli import termui
-from openllm.cli._factory import machine_option, container_registry_option
+from openllm.cli._factory import container_registry_option, machine_option
 if t.TYPE_CHECKING: from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy
@click.command(
    'build_base_container',
--- a/openllm-python/src/openllm/cli/extension/dive_bentos.py
+++ b/openllm-python/src/openllm/cli/extension/dive_bentos.py
@@ -1,11 +1,16 @@
 from __future__ import annotations
-import shutil, subprocess, typing as t, click, psutil, bentoml
-from simple_di import Provide, inject
-from bentoml._internal.configuration.containers import BentoMLContainer
+import shutil
+import subprocess
+import typing as t

+import click
+import psutil
+from simple_di import Provide, inject
+
+import bentoml
+from bentoml._internal.configuration.containers import BentoMLContainer
 from openllm.cli import termui
 from openllm.cli._factory import bento_complete_envvar, machine_option
-
 if t.TYPE_CHECKING: from bentoml._internal.bento import BentoStore
@click.command('dive_bentos', context_settings=termui.CONTEXT_SETTINGS)
@click.argument('bento', type=str, shell_complete=bento_complete_envvar)
--- a/openllm-python/src/openllm/cli/extension/get_containerfile.py
+++ b/openllm-python/src/openllm/cli/extension/get_containerfile.py
@@ -1,6 +1,10 @@
 from __future__ import annotations
-import typing as t, click, bentoml
+import typing as t
+
+import click
 from simple_di import Provide, inject
+
+import bentoml
 from bentoml._internal.bento.bento import BentoInfo
 from bentoml._internal.bento.build_config import DockerOptions
 from bentoml._internal.configuration.containers import BentoMLContainer
@@ -8,7 +12,6 @@ from bentoml._internal.container.generate import generate_containerfile
 from openllm.cli import termui
 from openllm.cli._factory import bento_complete_envvar
 from openllm_core.utils import bentoml_cattr
-
 if t.TYPE_CHECKING: from bentoml._internal.bento import BentoStore
@click.command('get_containerfile', context_settings=termui.CONTEXT_SETTINGS, help='Return Containerfile of any given Bento.')
@click.argument('bento', type=str, shell_complete=bento_complete_envvar)
--- a/openllm-python/src/openllm/cli/extension/get_prompt.py
+++ b/openllm-python/src/openllm/cli/extension/get_prompt.py
@@ -1,8 +1,14 @@
 from __future__ import annotations
-import typing as t, click, inflection, orjson, openllm
+import typing as t
+
+import click
+import inflection
+import orjson
 from bentoml_cli.utils import opt_callback
+
+import openllm
 from openllm.cli import termui
-from openllm.cli._factory import model_complete_envvar, output_option, machine_option
+from openllm.cli._factory import machine_option, model_complete_envvar, output_option
 from openllm_core._prompt import process_prompt
 LiteralOutput = t.Literal['json', 'pretty', 'porcelain']
@click.command('get_prompt', context_settings=termui.CONTEXT_SETTINGS)
--- a/openllm-python/src/openllm/cli/extension/list_bentos.py
+++ b/openllm-python/src/openllm/cli/extension/list_bentos.py
@@ -1,5 +1,11 @@
 from __future__ import annotations
-import click, inflection, orjson, bentoml, openllm
+
+import click
+import inflection
+import orjson
+
+import bentoml
+import openllm
 from bentoml._internal.utils import human_readable_size
 from openllm.cli import termui
 from openllm.cli._factory import LiteralOutput, output_option
--- a/openllm-python/src/openllm/cli/extension/list_models.py
+++ b/openllm-python/src/openllm/cli/extension/list_models.py
@@ -1,9 +1,15 @@
 from __future__ import annotations
-import typing as t, bentoml, openllm, orjson, inflection, click
-from openllm.cli import termui
-from bentoml._internal.utils import human_readable_size
-from openllm.cli._factory import LiteralOutput, model_name_argument, output_option, model_complete_envvar
+import typing as t

+import click
+import inflection
+import orjson
+
+import bentoml
+import openllm
+from bentoml._internal.utils import human_readable_size
+from openllm.cli import termui
+from openllm.cli._factory import LiteralOutput, model_complete_envvar, model_name_argument, output_option
 if t.TYPE_CHECKING: from openllm_core._typing_compat import DictStrAny
@click.command('list_models', context_settings=termui.CONTEXT_SETTINGS)
@model_name_argument(required=False, shell_complete=model_complete_envvar)
--- a/openllm-python/src/openllm/cli/extension/playground.py
+++ b/openllm-python/src/openllm/cli/extension/playground.py
@@ -1,11 +1,23 @@
 from __future__ import annotations
-import importlib.machinery, logging, os, pkgutil, subprocess, sys, tempfile, typing as t, click, yaml
-from openllm.cli import termui
-from openllm import playground
-from openllm_core.utils import is_jupyter_available, is_jupytext_available, is_notebook_available
+import importlib.machinery
+import logging
+import os
+import pkgutil
+import subprocess
+import sys
+import tempfile
+import typing as t

+import click
+import yaml
+
+from openllm import playground
+from openllm.cli import termui
+from openllm_core.utils import is_jupyter_available, is_jupytext_available, is_notebook_available
 if t.TYPE_CHECKING:
-  import jupytext, nbformat
+  import jupytext
+  import nbformat
+
  from openllm_core._typing_compat import DictStrAny
 logger = logging.getLogger(__name__)
 def load_notebook_metadata() -> DictStrAny:
--- a/openllm-python/src/openllm/cli/termui.py
+++ b/openllm-python/src/openllm/cli/termui.py
@@ -1,5 +1,11 @@
 from __future__ import annotations
-import os, typing as t, click, inflection, openllm
+import os
+import typing as t
+
+import click
+import inflection
+
+import openllm
 if t.TYPE_CHECKING: from openllm_core._typing_compat import DictStrAny
 def echo(text: t.Any, fg: str = 'green', _with_style: bool = True, **attrs: t.Any) -> None:
  attrs['fg'] = fg if not openllm.utils.get_debug_mode() else None
--- a/openllm-python/src/openllm/client.py
+++ b/openllm-python/src/openllm/client.py
@@ -11,7 +11,9 @@ client.embed("What is the difference between gather and scatter?")
 ```
 '''
 from __future__ import annotations
-import openllm_client, typing as t
+import typing as t
+
+import openllm_client
 if t.TYPE_CHECKING:  from openllm_client import AsyncHTTPClient as AsyncHTTPClient, BaseAsyncClient as BaseAsyncClient, BaseClient as BaseClient, HTTPClient as HTTPClient, GrpcClient as GrpcClient, AsyncGrpcClient as AsyncGrpcClient
 def __dir__() -> t.Sequence[str]:
  return sorted(dir(openllm_client))
--- a/openllm-python/src/openllm/exceptions.py
+++ b/openllm-python/src/openllm/exceptions.py
@@ -1,3 +1,4 @@
 '''Base exceptions for OpenLLM. This extends BentoML exceptions.'''
 from __future__ import annotations
-from openllm_core.exceptions import OpenLLMException as OpenLLMException, GpuNotAvailableError as GpuNotAvailableError, ValidationError as ValidationError, ForbiddenAttributeError as ForbiddenAttributeError, MissingAnnotationAttributeError as MissingAnnotationAttributeError, MissingDependencyError as MissingDependencyError, Error as Error, FineTuneStrategyNotSupportedError as FineTuneStrategyNotSupportedError
+
+from openllm_core.exceptions import Error as Error, FineTuneStrategyNotSupportedError as FineTuneStrategyNotSupportedError, ForbiddenAttributeError as ForbiddenAttributeError, GpuNotAvailableError as GpuNotAvailableError, MissingAnnotationAttributeError as MissingAnnotationAttributeError, MissingDependencyError as MissingDependencyError, OpenLLMException as OpenLLMException, ValidationError as ValidationError
--- a/openllm-python/src/openllm/models/auto/init.py
+++ b/openllm-python/src/openllm/models/auto/init.py
@@ -1,8 +1,10 @@
 from __future__ import annotations
-import typing as t, os
+import os
+import typing as t
+
 import openllm
+from openllm_core.config import CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES, AutoConfig as AutoConfig
 from openllm_core.utils import LazyModule, is_flax_available, is_tf_available, is_torch_available, is_vllm_available
-from openllm_core.config import AutoConfig as AutoConfig, CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES
 _import_structure: dict[str, list[str]] = {
    'modeling_auto': ['MODEL_MAPPING_NAMES'],
    'modeling_flax_auto': ['MODEL_FLAX_MAPPING_NAMES'],
--- a/openllm-python/src/openllm/models/auto/factory.py
+++ b/openllm-python/src/openllm/models/auto/factory.py
@@ -1,16 +1,22 @@
 # mypy: disable-error-code="type-arg"
 from __future__ import annotations
-import importlib, inspect, logging, typing as t
+import importlib
+import inspect
+import logging
+import typing as t
 from collections import OrderedDict
-import inflection, openllm
-from openllm_core.utils import ReprMixin

+import inflection
+
+import openllm
+from openllm_core.utils import ReprMixin
 if t.TYPE_CHECKING:
-  from openllm_core._typing_compat import LiteralString, LLMRunner
  import types
  from collections import _odict_items, _odict_keys, _odict_values

  from _typeshed import SupportsIter
+
+  from openllm_core._typing_compat import LiteralString, LLMRunner
  ConfigModelKeysView = _odict_keys[type[openllm.LLMConfig], type[openllm.LLM[t.Any, t.Any]]]
  ConfigModelValuesView = _odict_values[type[openllm.LLMConfig], type[openllm.LLM[t.Any, t.Any]]]
  ConfigModelItemsView = _odict_items[type[openllm.LLMConfig], type[openllm.LLM[t.Any, t.Any]]]
--- a/openllm-python/src/openllm/models/auto/modeling_auto.py
+++ b/openllm-python/src/openllm/models/auto/modeling_auto.py
@@ -1,8 +1,10 @@
 from __future__ import annotations
 import typing as t
 from collections import OrderedDict
-from .factory import BaseAutoLLMClass, _LazyAutoMapping
+
 from openllm_core.config import CONFIG_MAPPING_NAMES
+
+from .factory import BaseAutoLLMClass, _LazyAutoMapping
 MODEL_MAPPING_NAMES = OrderedDict([('chatglm', 'ChatGLM'), ('dolly_v2', 'DollyV2'), ('falcon', 'Falcon'), ('flan_t5', 'FlanT5'), ('gpt_neox', 'GPTNeoX'), ('llama', 'Llama'), ('mpt', 'MPT'), (
    'opt', 'OPT'
 ), ('stablelm', 'StableLM'), ('starcoder', 'StarCoder'), ('baichuan', 'Baichuan')])
--- a/openllm-python/src/openllm/models/auto/modeling_flax_auto.py
+++ b/openllm-python/src/openllm/models/auto/modeling_flax_auto.py
@@ -1,8 +1,10 @@
 from __future__ import annotations
 import typing as t
 from collections import OrderedDict
-from .factory import BaseAutoLLMClass, _LazyAutoMapping
+
 from openllm_core.config import CONFIG_MAPPING_NAMES
+
+from .factory import BaseAutoLLMClass, _LazyAutoMapping
 MODEL_FLAX_MAPPING_NAMES = OrderedDict([('flan_t5', 'FlaxFlanT5'), ('opt', 'FlaxOPT')])
 MODEL_FLAX_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FLAX_MAPPING_NAMES)
 class AutoFlaxLLM(BaseAutoLLMClass):
--- a/openllm-python/src/openllm/models/auto/modeling_tf_auto.py
+++ b/openllm-python/src/openllm/models/auto/modeling_tf_auto.py
@@ -1,8 +1,10 @@
 from __future__ import annotations
 import typing as t
 from collections import OrderedDict
-from .factory import BaseAutoLLMClass, _LazyAutoMapping
+
 from openllm_core.config import CONFIG_MAPPING_NAMES
+
+from .factory import BaseAutoLLMClass, _LazyAutoMapping
 MODEL_TF_MAPPING_NAMES = OrderedDict([('flan_t5', 'TFFlanT5'), ('opt', 'TFOPT')])
 MODEL_TF_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_TF_MAPPING_NAMES)
 class AutoTFLLM(BaseAutoLLMClass):
--- a/openllm-python/src/openllm/models/auto/modeling_vllm_auto.py
+++ b/openllm-python/src/openllm/models/auto/modeling_vllm_auto.py
@@ -1,8 +1,10 @@
 from __future__ import annotations
 import typing as t
 from collections import OrderedDict
-from .factory import BaseAutoLLMClass, _LazyAutoMapping
+
 from openllm_core.config import CONFIG_MAPPING_NAMES
+
+from .factory import BaseAutoLLMClass, _LazyAutoMapping
 MODEL_VLLM_MAPPING_NAMES = OrderedDict([('baichuan', 'VLLMBaichuan'), ('dolly_v2', 'VLLMDollyV2'), ('falcon', 'VLLMFalcon'), ('gpt_neox', 'VLLMGPTNeoX'), ('mpt', 'VLLMMPT'), (
    'opt', 'VLLMOPT'
 ), ('stablelm', 'VLLMStableLM'), ('starcoder', 'VLLMStarCoder'), ('llama', 'VLLMLlama')])
--- a/openllm-python/src/openllm/models/baichuan/init.py
+++ b/openllm-python/src/openllm/models/baichuan/init.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import sys, typing as t
+import sys
+import typing as t
+
 from openllm.exceptions import MissingDependencyError
 from openllm.utils import LazyModule, is_cpm_kernels_available, is_torch_available, is_vllm_available
 from openllm_core.config.configuration_baichuan import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_BAICHUAN_COMMAND_DOCSTRING as START_BAICHUAN_COMMAND_DOCSTRING, BaichuanConfig as BaichuanConfig
--- a/openllm-python/src/openllm/models/baichuan/modeling_baichuan.py
+++ b/openllm-python/src/openllm/models/baichuan/modeling_baichuan.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import transformers
 class Baichuan(openllm.LLM['transformers.PreTrainedModel', 'transformers.PreTrainedTokenizerBase']):
  __openllm_internal__ = True
--- a/openllm-python/src/openllm/models/baichuan/modeling_vllm_baichuan.py
+++ b/openllm-python/src/openllm/models/baichuan/modeling_vllm_baichuan.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import vllm, transformers
 class VLLMBaichuan(openllm.LLM['vllm.LLMEngine', 'transformers.PreTrainedTokenizerBase']):
  __openllm_internal__ = True
--- a/openllm-python/src/openllm/models/chatglm/init.py
+++ b/openllm-python/src/openllm/models/chatglm/init.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import sys, typing as t
+import sys
+import typing as t
+
 from openllm.exceptions import MissingDependencyError
 from openllm.utils import LazyModule, is_cpm_kernels_available, is_torch_available
 from openllm_core.config.configuration_chatglm import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_CHATGLM_COMMAND_DOCSTRING as START_CHATGLM_COMMAND_DOCSTRING, ChatGLMConfig as ChatGLMConfig
--- a/openllm-python/src/openllm/models/chatglm/modeling_chatglm.py
+++ b/openllm-python/src/openllm/models/chatglm/modeling_chatglm.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import transformers
 class ChatGLM(openllm.LLM['transformers.PreTrainedModel', 'transformers.PreTrainedTokenizerFast']):
  __openllm_internal__ = True
@@ -13,7 +15,8 @@ class ChatGLM(openllm.LLM['transformers.PreTrainedModel', 'transformers.PreTrain
      return self.model.chat(self.tokenizer, prompt, generation_config=self.config.model_construct_env(**attrs).to_generation_config())

  def embeddings(self, prompts: list[str]) -> openllm.LLMEmbeddings:
-    import torch, torch.nn.functional as F
+    import torch
+    import torch.nn.functional as F
    embeddings: list[list[float]] = []
    num_tokens = 0
    for prompt in prompts:
--- a/openllm-python/src/openllm/models/dolly_v2/init.py
+++ b/openllm-python/src/openllm/models/dolly_v2/init.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import sys, typing as t
+import sys
+import typing as t
+
 from openllm.exceptions import MissingDependencyError
 from openllm.utils import LazyModule, is_torch_available, is_vllm_available
 from openllm_core.config.configuration_dolly_v2 import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_DOLLY_V2_COMMAND_DOCSTRING as START_DOLLY_V2_COMMAND_DOCSTRING, DollyV2Config as DollyV2Config
--- a/openllm-python/src/openllm/models/dolly_v2/modeling_dolly_v2.py
+++ b/openllm-python/src/openllm/models/dolly_v2/modeling_dolly_v2.py
@@ -1,8 +1,11 @@
 from __future__ import annotations
-import logging, re, typing as t, openllm
+import logging
+import re
+import typing as t
+
+import openllm
 from openllm_core._typing_compat import overload
 from openllm_core.config.configuration_dolly_v2 import DEFAULT_PROMPT_TEMPLATE, END_KEY, RESPONSE_KEY, get_special_token_id
-
 if t.TYPE_CHECKING: import torch, transformers, tensorflow as tf
 else:  torch, transformers, tf = openllm.utils.LazyLoader('torch', globals(), 'torch'), openllm.utils.LazyLoader('transformers', globals(), 'transformers'), openllm.utils.LazyLoader('tf', globals(), 'tensorflow')
 logger = logging.getLogger(__name__)
--- a/openllm-python/src/openllm/models/dolly_v2/modeling_vllm_dolly_v2.py
+++ b/openllm-python/src/openllm/models/dolly_v2/modeling_vllm_dolly_v2.py
@@ -1,5 +1,8 @@
 from __future__ import annotations
-import logging, typing as t, openllm
+import logging
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import vllm, transformers

 logger = logging.getLogger(__name__)
--- a/openllm-python/src/openllm/models/falcon/init.py
+++ b/openllm-python/src/openllm/models/falcon/init.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import sys, typing as t
+import sys
+import typing as t
+
 from openllm.exceptions import MissingDependencyError
 from openllm.utils import LazyModule, is_torch_available, is_vllm_available
 from openllm_core.config.configuration_falcon import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCSTRING, FalconConfig as FalconConfig
--- a/openllm-python/src/openllm/models/falcon/modeling_falcon.py
+++ b/openllm-python/src/openllm/models/falcon/modeling_falcon.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import torch, transformers
 else: torch, transformers = openllm.utils.LazyLoader('torch', globals(), 'torch'), openllm.utils.LazyLoader('transformers', globals(), 'transformers')
 class Falcon(openllm.LLM['transformers.PreTrainedModel', 'transformers.PreTrainedTokenizerBase']):
--- a/openllm-python/src/openllm/models/falcon/modeling_vllm_falcon.py
+++ b/openllm-python/src/openllm/models/falcon/modeling_vllm_falcon.py
@@ -1,5 +1,8 @@
 from __future__ import annotations
-import logging, typing as t, openllm
+import logging
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import vllm, transformers

 logger = logging.getLogger(__name__)
--- a/openllm-python/src/openllm/models/flan_t5/init.py
+++ b/openllm-python/src/openllm/models/flan_t5/init.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import sys, typing as t
+import sys
+import typing as t
+
 from openllm.exceptions import MissingDependencyError
 from openllm.utils import LazyModule, is_flax_available, is_tf_available, is_torch_available
 from openllm_core.config.configuration_flan_t5 import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING, FlanT5Config as FlanT5Config
--- a/openllm-python/src/openllm/models/flan_t5/modeling_flan_t5.py
+++ b/openllm-python/src/openllm/models/flan_t5/modeling_flan_t5.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import transformers
 class FlanT5(openllm.LLM['transformers.T5ForConditionalGeneration', 'transformers.T5TokenizerFast']):
  __openllm_internal__ = True
@@ -13,7 +15,8 @@ class FlanT5(openllm.LLM['transformers.T5ForConditionalGeneration', 'transformer
      )

  def embeddings(self, prompts: list[str]) -> openllm.LLMEmbeddings:
-    import torch, torch.nn.functional as F
+    import torch
+    import torch.nn.functional as F
    embeddings: list[list[float]] = []
    num_tokens = 0
    for prompt in prompts:
--- a/openllm-python/src/openllm/models/flan_t5/modeling_flax_flan_t5.py
+++ b/openllm-python/src/openllm/models/flan_t5/modeling_flax_flan_t5.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 from openllm_core._prompt import process_prompt
 from openllm_core.config.configuration_flan_t5 import DEFAULT_PROMPT_TEMPLATE
 if t.TYPE_CHECKING: import transformers
--- a/openllm-python/src/openllm/models/flan_t5/modeling_tf_flan_t5.py
+++ b/openllm-python/src/openllm/models/flan_t5/modeling_tf_flan_t5.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import transformers
 class TFFlanT5(openllm.LLM['transformers.TFT5ForConditionalGeneration', 'transformers.T5TokenizerFast']):
  __openllm_internal__ = True
--- a/openllm-python/src/openllm/models/gpt_neox/init.py
+++ b/openllm-python/src/openllm/models/gpt_neox/init.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import sys, typing as t
+import sys
+import typing as t
+
 from openllm.exceptions import MissingDependencyError
 from openllm.utils import LazyModule, is_torch_available, is_vllm_available
 from openllm_core.config.configuration_gpt_neox import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING, GPTNeoXConfig as GPTNeoXConfig
--- a/openllm-python/src/openllm/models/gpt_neox/modeling_gpt_neox.py
+++ b/openllm-python/src/openllm/models/gpt_neox/modeling_gpt_neox.py
@@ -1,5 +1,8 @@
 from __future__ import annotations
-import logging, typing as t, openllm
+import logging
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import transformers

 logger = logging.getLogger(__name__)
--- a/openllm-python/src/openllm/models/gpt_neox/modeling_vllm_gpt_neox.py
+++ b/openllm-python/src/openllm/models/gpt_neox/modeling_vllm_gpt_neox.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import vllm, transformers
 class VLLMGPTNeoX(openllm.LLM['vllm.LLMEngine', 'transformers.GPTNeoXTokenizerFast']):
  __openllm_internal__ = True
--- a/openllm-python/src/openllm/models/llama/init.py
+++ b/openllm-python/src/openllm/models/llama/init.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import sys, typing as t
+import sys
+import typing as t
+
 from openllm.exceptions import MissingDependencyError
 from openllm.utils import LazyModule, is_torch_available, is_vllm_available
 from openllm_core.config.configuration_llama import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, PROMPT_MAPPING as PROMPT_MAPPING, START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING, LlamaConfig as LlamaConfig
--- a/openllm-python/src/openllm/models/llama/modeling_llama.py
+++ b/openllm-python/src/openllm/models/llama/modeling_llama.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import transformers
 class Llama(openllm.LLM['transformers.LlamaForCausalLM', 'transformers.LlamaTokenizerFast']):
  __openllm_internal__ = True
@@ -10,7 +12,8 @@ class Llama(openllm.LLM['transformers.LlamaForCausalLM', 'transformers.LlamaToke
    return {'torch_dtype': torch.float16 if torch.cuda.is_available() else torch.float32}, {}

  def embeddings(self, prompts: list[str]) -> openllm.LLMEmbeddings:
-    import torch, torch.nn.functional as F
+    import torch
+    import torch.nn.functional as F
    encoding = self.tokenizer(prompts, padding=True, return_tensors='pt').to(self.device)
    input_ids, attention_mask = encoding['input_ids'], encoding['attention_mask']
    with torch.inference_mode():
--- a/openllm-python/src/openllm/models/llama/modeling_vllm_llama.py
+++ b/openllm-python/src/openllm/models/llama/modeling_vllm_llama.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import vllm, transformers
 class VLLMLlama(openllm.LLM['vllm.LLMEngine', 'transformers.LlamaTokenizerFast']):
  __openllm_internal__ = True
--- a/openllm-python/src/openllm/models/mpt/init.py
+++ b/openllm-python/src/openllm/models/mpt/init.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import sys, typing as t
+import sys
+import typing as t
+
 from openllm.exceptions import MissingDependencyError
 from openllm.utils import LazyModule, is_torch_available, is_vllm_available
 from openllm_core.config.configuration_mpt import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, PROMPT_MAPPING as PROMPT_MAPPING, START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING, MPTConfig as MPTConfig
--- a/openllm-python/src/openllm/models/mpt/modeling_mpt.py
+++ b/openllm-python/src/openllm/models/mpt/modeling_mpt.py
@@ -1,5 +1,9 @@
 from __future__ import annotations
-import logging, typing as t, bentoml, openllm
+import logging
+import typing as t
+
+import bentoml
+import openllm
 from openllm.utils import generate_labels, is_triton_available
 if t.TYPE_CHECKING: import transformers, torch

@@ -31,7 +35,8 @@ class MPT(openllm.LLM['transformers.PreTrainedModel', 'transformers.GPTNeoXToken
    return {'device_map': 'auto' if torch.cuda.is_available() and torch.cuda.device_count() > 1 else None, 'torch_dtype': torch.bfloat16 if torch.cuda.is_available() else torch.float32}, {}

  def import_model(self, *args: t.Any, trust_remote_code: bool = True, **attrs: t.Any) -> bentoml.Model:
-    import torch, transformers
+    import torch
+    import transformers
    _, tokenizer_attrs = self.llm_parameters
    torch_dtype = attrs.pop('torch_dtype', self.dtype)
    device_map = attrs.pop('device_map', None)
--- a/openllm-python/src/openllm/models/mpt/modeling_vllm_mpt.py
+++ b/openllm-python/src/openllm/models/mpt/modeling_vllm_mpt.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import transformers, vllm
 class VLLMMPT(openllm.LLM['vllm.LLMEngine', 'transformers.GPTNeoXTokenizerFast']):
  __openllm_internal__ = True
--- a/openllm-python/src/openllm/models/opt/init.py
+++ b/openllm-python/src/openllm/models/opt/init.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import sys, typing as t
+import sys
+import typing as t
+
 from openllm.exceptions import MissingDependencyError
 from openllm.utils import LazyModule, is_flax_available, is_tf_available, is_torch_available, is_vllm_available
 from openllm_core.config.configuration_opt import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING, OPTConfig as OPTConfig
--- a/openllm-python/src/openllm/models/opt/modeling_flax_opt.py
+++ b/openllm-python/src/openllm/models/opt/modeling_flax_opt.py
@@ -1,5 +1,9 @@
 from __future__ import annotations
-import logging, typing as t, bentoml, openllm
+import logging
+import typing as t
+
+import bentoml
+import openllm
 from openllm._prompt import process_prompt
 from openllm.utils import generate_labels
 from openllm_core.config.configuration_opt import DEFAULT_PROMPT_TEMPLATE
--- a/openllm-python/src/openllm/models/opt/modeling_opt.py
+++ b/openllm-python/src/openllm/models/opt/modeling_opt.py
@@ -1,5 +1,8 @@
 from __future__ import annotations
-import logging, typing as t, openllm
+import logging
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import transformers

 logger = logging.getLogger(__name__)
--- a/openllm-python/src/openllm/models/opt/modeling_tf_opt.py
+++ b/openllm-python/src/openllm/models/opt/modeling_tf_opt.py
@@ -1,5 +1,8 @@
 from __future__ import annotations
-import typing as t, bentoml, openllm
+import typing as t
+
+import bentoml
+import openllm
 from openllm_core.utils import generate_labels
 if t.TYPE_CHECKING: import transformers
 class TFOPT(openllm.LLM['transformers.TFOPTForCausalLM', 'transformers.GPT2Tokenizer']):
--- a/openllm-python/src/openllm/models/opt/modeling_vllm_opt.py
+++ b/openllm-python/src/openllm/models/opt/modeling_vllm_opt.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 from openllm_core._prompt import process_prompt
 from openllm_core.config.configuration_opt import DEFAULT_PROMPT_TEMPLATE
 if t.TYPE_CHECKING: import vllm, transformers
--- a/openllm-python/src/openllm/models/stablelm/init.py
+++ b/openllm-python/src/openllm/models/stablelm/init.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import sys, typing as t
+import sys
+import typing as t
+
 from openllm.exceptions import MissingDependencyError
 from openllm.utils import LazyModule, is_torch_available, is_vllm_available
 from openllm_core.config.configuration_stablelm import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING, StableLMConfig as StableLMConfig
--- a/openllm-python/src/openllm/models/stablelm/modeling_stablelm.py
+++ b/openllm-python/src/openllm/models/stablelm/modeling_stablelm.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import transformers
 class StableLM(openllm.LLM['transformers.GPTNeoXForCausalLM', 'transformers.GPTNeoXTokenizerFast']):
  __openllm_internal__ = True
--- a/openllm-python/src/openllm/models/stablelm/modeling_vllm_stablelm.py
+++ b/openllm-python/src/openllm/models/stablelm/modeling_vllm_stablelm.py
@@ -1,5 +1,8 @@
 from __future__ import annotations
-import logging, typing as t, openllm
+import logging
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import vllm, transformers
 class VLLMStableLM(openllm.LLM['vllm.LLMEngine', 'transformers.GPTNeoXTokenizerFast']):
  __openllm_internal__ = True
--- a/openllm-python/src/openllm/models/starcoder/init.py
+++ b/openllm-python/src/openllm/models/starcoder/init.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import sys, typing as t
+import sys
+import typing as t
+
 from openllm.exceptions import MissingDependencyError
 from openllm.utils import LazyModule, is_torch_available, is_vllm_available
 from openllm_core.config.configuration_starcoder import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_STARCODER_COMMAND_DOCSTRING as START_STARCODER_COMMAND_DOCSTRING, StarCoderConfig as StarCoderConfig
--- a/openllm-python/src/openllm/models/starcoder/modeling_starcoder.py
+++ b/openllm-python/src/openllm/models/starcoder/modeling_starcoder.py
@@ -1,5 +1,9 @@
 from __future__ import annotations
-import logging, typing as t, bentoml, openllm
+import logging
+import typing as t
+
+import bentoml
+import openllm
 from openllm.utils import generate_labels
 from openllm_core.config.configuration_starcoder import EOD, FIM_MIDDLE, FIM_PAD, FIM_PREFIX, FIM_SUFFIX
 if t.TYPE_CHECKING: import transformers
@@ -12,7 +16,8 @@ class StarCoder(openllm.LLM['transformers.GPTBigCodeForCausalLM', 'transformers.
    return {'device_map': 'auto' if torch.cuda.is_available() and torch.cuda.device_count() > 1 else None, 'torch_dtype': torch.float16 if torch.cuda.is_available() else torch.float32}, {}

  def import_model(self, *args: t.Any, trust_remote_code: bool = False, **attrs: t.Any) -> bentoml.Model:
-    import torch, transformers
+    import torch
+    import transformers
    torch_dtype, device_map = attrs.pop('torch_dtype', torch.float16), attrs.pop('device_map', 'auto')
    tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_id, **self.llm_parameters[-1])
    tokenizer.add_special_tokens({'additional_special_tokens': [EOD, FIM_PREFIX, FIM_MIDDLE, FIM_SUFFIX, FIM_PAD], 'pad_token': EOD})
--- a/openllm-python/src/openllm/models/starcoder/modeling_vllm_starcoder.py
+++ b/openllm-python/src/openllm/models/starcoder/modeling_vllm_starcoder.py
@@ -1,5 +1,8 @@
 from __future__ import annotations
-import logging, typing as t, openllm
+import logging
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import vllm, transformers
 class VLLMStarCoder(openllm.LLM['vllm.LLMEngine', 'transformers.GPT2TokenizerFast']):
  __openllm_internal__ = True
--- a/openllm-python/src/openllm/playground/falcon_tuned.py
+++ b/openllm-python/src/openllm/playground/falcon_tuned.py
@@ -22,10 +22,8 @@ logger = logging.getLogger(__name__)

 from datasets import load_dataset
 from trl import SFTTrainer
-
 DEFAULT_MODEL_ID = "ybelkada/falcon-7b-sharded-bf16"
 DATASET_NAME = "timdettmers/openassistant-guanaco"
-
@dataclasses.dataclass
 class TrainingArguments:
  per_device_train_batch_size: int = dataclasses.field(default=4)
@@ -42,12 +40,10 @@ class TrainingArguments:
  group_by_length: bool = dataclasses.field(default=True)
  lr_scheduler_type: str = dataclasses.field(default="constant")
  output_dir: str = dataclasses.field(default=os.path.join(os.getcwd(), "outputs", "falcon"))
-
@dataclasses.dataclass
 class ModelArguments:
  model_id: str = dataclasses.field(default=DEFAULT_MODEL_ID)
  max_sequence_length: int = dataclasses.field(default=512)
-
 parser = transformers.HfArgumentParser((ModelArguments, TrainingArguments))
 if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
  # If we pass only one argument to the script and it's the path to a json file,
@@ -56,13 +52,20 @@ if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
 else:
  model_args, training_args = t.cast(t.Tuple[ModelArguments, TrainingArguments], parser.parse_args_into_dataclasses())

-model, tokenizer = openllm.AutoLLM.for_model("falcon", model_id=model_args.model_id, quantize="int4", bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16, ensure_available=True,).prepare_for_training(adapter_type="lora", lora_alpha=16, lora_dropout=0.1, r=16, bias="none", target_modules=["query_key_value", "dense", "dense_h_to_4h", "dense_4h_to_h",],)
+model, tokenizer = openllm.AutoLLM.for_model("falcon", model_id=model_args.model_id, quantize="int4", bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16, ensure_available=True).prepare_for_training(adapter_type="lora", lora_alpha=16, lora_dropout=0.1, r=16, bias="none", target_modules=["query_key_value", "dense", "dense_h_to_4h", "dense_4h_to_h"])
 model.config.use_cache = False
 tokenizer.pad_token = tokenizer.eos_token

 dataset = load_dataset(DATASET_NAME, split="train")

-trainer = SFTTrainer(model=model, train_dataset=dataset, dataset_text_field="text", max_seq_length=model_args.max_sequence_length, tokenizer=tokenizer, args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args),),)
+trainer = SFTTrainer(
+    model=model,
+    train_dataset=dataset,
+    dataset_text_field="text",
+    max_seq_length=model_args.max_sequence_length,
+    tokenizer=tokenizer,
+    args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args)),
+)

 # upcast layernorm in float32 for more stable training
 for name, module in trainer.model.named_modules():
--- a/openllm-python/src/openllm/playground/features.py
+++ b/openllm-python/src/openllm/playground/features.py
@@ -4,7 +4,6 @@ import logging
 import typing as t

 import openllm
-
 openllm.utils.configure_logging()

 logger = logging.getLogger(__name__)
@@ -13,7 +12,6 @@ MAX_NEW_TOKENS = 384

 Q = "Answer the following question, step by step:\n{q}\nA:"
 question = "What is the meaning of life?"
-
 def main() -> int:
  parser = argparse.ArgumentParser()
  parser.add_argument("question", default=question)
@@ -44,11 +42,9 @@ def main() -> int:
  logger.info("=" * 10, "Response:", r.llm.postprocess_generate(prompt, res))

  return 0
-
 def _mp_fn(index: t.Any):  # noqa # type: ignore
  # For xla_spawn (TPUs)
  main()
-
 if openllm.utils.in_notebook():
  main()
 else:
--- a/openllm-python/src/openllm/playground/llama2_qlora.py
+++ b/openllm-python/src/openllm/playground/llama2_qlora.py
@@ -29,7 +29,6 @@ from random import randint, randrange

 import bitsandbytes as bnb
 from datasets import load_dataset
-
 # COPIED FROM https://github.com/artidoro/qlora/blob/main/qlora.py
 def find_all_linear_names(model):
  lora_module_names = set()
@@ -41,13 +40,11 @@ def find_all_linear_names(model):
  if "lm_head" in lora_module_names:  # needed for 16-bit
    lora_module_names.remove("lm_head")
  return list(lora_module_names)
-
 # Change this to the local converted path if you don't have access to the meta-llama model
 DEFAULT_MODEL_ID = "meta-llama/Llama-2-7b-hf"
 # change this to 'main' if you want to use the latest llama
 DEFAULT_MODEL_VERSION = "335a02887eb6684d487240bbc28b5699298c3135"
 DATASET_NAME = "databricks/databricks-dolly-15k"
-
 def format_dolly(sample):
  instruction = f"### Instruction\n{sample['instruction']}"
  context = f"### Context\n{sample['context']}" if len(sample["context"]) > 0 else None
@@ -55,15 +52,12 @@ def format_dolly(sample):
  # join all the parts together
  prompt = "\n\n".join([i for i in [instruction, context, response] if i is not None])
  return prompt
-
 # template dataset to add prompt to each sample
 def template_dataset(sample, tokenizer):
  sample["text"] = f"{format_dolly(sample)}{tokenizer.eos_token}"
  return sample
-
 # empty list to save remainder from batches to use in next batch
 remainder = {"input_ids": [], "attention_mask": [], "token_type_ids": []}
-
 def chunk(sample, chunk_length=2048):
  # define global remainder variable to save remainder from batches to use in next batch
  global remainder
@@ -84,7 +78,6 @@ def chunk(sample, chunk_length=2048):
  # prepare labels
  result["labels"] = result["input_ids"].copy()
  return result
-
 def prepare_datasets(tokenizer, dataset_name=DATASET_NAME):
  # Load dataset from the hub
  dataset = load_dataset(dataset_name, split="train")
@@ -103,11 +96,20 @@ def prepare_datasets(tokenizer, dataset_name=DATASET_NAME):
  # Print total number of samples
  print(f"Total number of samples: {len(lm_dataset)}")
  return lm_dataset
-
-def prepare_for_int4_training(model_id: str, model_version: str | None = None, gradient_checkpointing: bool = True, bf16: bool = True,) -> tuple[peft.PeftModel, transformers.LlamaTokenizerFast]:
+def prepare_for_int4_training(model_id: str, model_version: str | None = None, gradient_checkpointing: bool = True, bf16: bool = True,
+                              ) -> tuple[peft.PeftModel, transformers.LlamaTokenizerFast]:
  from peft.tuners.lora import LoraLayer

-  llm = openllm.AutoLLM.for_model("llama", model_id=model_id, model_version=model_version, ensure_available=True, quantize="int4", bnb_4bit_compute_dtype=torch.bfloat16, use_cache=not gradient_checkpointing, device_map="auto",)
+  llm = openllm.AutoLLM.for_model(
+      "llama",
+      model_id=model_id,
+      model_version=model_version,
+      ensure_available=True,
+      quantize="int4",
+      bnb_4bit_compute_dtype=torch.bfloat16,
+      use_cache=not gradient_checkpointing,
+      device_map="auto",
+  )
  print("Model summary:", llm.model)

  # get lora target modules
@@ -128,7 +130,6 @@ def prepare_for_int4_training(model_id: str, model_version: str | None = None, g
        if bf16 and module.weight.dtype == torch.float32:
          module = module.to(torch.bfloat16)
  return model, tokenizer
-
@dataclasses.dataclass
 class TrainingArguments:
  per_device_train_batch_size: int = dataclasses.field(default=1)
@@ -140,14 +141,12 @@ class TrainingArguments:
  report_to: str = dataclasses.field(default="none")
  output_dir: str = dataclasses.field(default=os.path.join(os.getcwd(), "outputs", "llama"))
  save_strategy: str = dataclasses.field(default="no")
-
@dataclasses.dataclass
 class ModelArguments:
  model_id: str = dataclasses.field(default=DEFAULT_MODEL_ID)
  model_version: str = dataclasses.field(default=DEFAULT_MODEL_VERSION)
  seed: int = dataclasses.field(default=42)
  merge_weights: bool = dataclasses.field(default=False)
-
 if openllm.utils.in_notebook():
  model_args, training_rags = ModelArguments(), TrainingArguments()
 else:
@@ -161,7 +160,6 @@ else:

 # import the model first hand
 openllm.import_model("llama", model_id=model_args.model_id, model_version=model_args.model_version)
-
 def train_loop(model_args: ModelArguments, training_args: TrainingArguments):
  import peft

@@ -170,7 +168,12 @@ def train_loop(model_args: ModelArguments, training_args: TrainingArguments):
  model, tokenizer = prepare_for_int4_training(model_args.model_id, gradient_checkpointing=training_args.gradient_checkpointing, bf16=training_args.bf16,)
  datasets = prepare_datasets(tokenizer)

-  trainer = transformers.Trainer(model=model, args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args)), train_dataset=datasets, data_collator=transformers.default_data_collator,)
+  trainer = transformers.Trainer(
+      model=model,
+      args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args)),
+      train_dataset=datasets,
+      data_collator=transformers.default_data_collator,
+  )

  trainer.train()

@@ -191,5 +194,4 @@ def train_loop(model_args: ModelArguments, training_args: TrainingArguments):
    model.save_pretrained(os.path.join(os.getcwd(), "outputs", "merged_llama_lora"), safe_serialization=True, max_shard_size="2GB")
  else:
    trainer.model.save_pretrained(os.path.join(training_args.output_dir, "lora"))
-
 train_loop(model_args, training_args)
--- a/openllm-python/src/openllm/playground/opt_tuned.py
+++ b/openllm-python/src/openllm/playground/opt_tuned.py
@@ -23,12 +23,14 @@ from datasets import load_dataset

 if t.TYPE_CHECKING:
  from peft import PeftModel
-
 DEFAULT_MODEL_ID = "facebook/opt-6.7b"
-
-def load_trainer(model: PeftModel, tokenizer: transformers.GPT2TokenizerFast, dataset_dict: t.Any, training_args: TrainingArguments,):
-  return transformers.Trainer(model=model, train_dataset=dataset_dict["train"], args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args),), data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),)
-
+def load_trainer(model: PeftModel, tokenizer: transformers.GPT2TokenizerFast, dataset_dict: t.Any, training_args: TrainingArguments):
+  return transformers.Trainer(
+      model=model,
+      train_dataset=dataset_dict["train"],
+      args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args)),
+      data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
+  )
@dataclasses.dataclass
 class TrainingArguments:
  per_device_train_batch_size: int = dataclasses.field(default=4)
@@ -39,11 +41,9 @@ class TrainingArguments:
  fp16: bool = dataclasses.field(default=True)
  logging_steps: int = dataclasses.field(default=1)
  output_dir: str = dataclasses.field(default=os.path.join(os.getcwd(), "outputs", "opt"))
-
@dataclasses.dataclass
 class ModelArguments:
  model_id: str = dataclasses.field(default=DEFAULT_MODEL_ID)
-
 parser = transformers.HfArgumentParser((ModelArguments, TrainingArguments))
 if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
  # If we pass only one argument to the script and it's the path to a json file,
@@ -52,7 +52,7 @@ if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
 else:
  model_args, training_args = t.cast(t.Tuple[ModelArguments, TrainingArguments], parser.parse_args_into_dataclasses())

-model, tokenizer = openllm.AutoLLM.for_model("opt", model_id=model_args.model_id, quantize="int8", ensure_available=True,).prepare_for_training(adapter_type="lora", r=16, lora_alpha=32, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none",)
+model, tokenizer = openllm.AutoLLM.for_model("opt", model_id=model_args.model_id, quantize="int8", ensure_available=True).prepare_for_training(adapter_type="lora", r=16, lora_alpha=32, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none")

 # ft on english_quotes
 data = load_dataset("Abirate/english_quotes")
--- a/openllm-python/src/openllm/serialisation/init.py
+++ b/openllm-python/src/openllm/serialisation/init.py
@@ -23,13 +23,18 @@ llm.save_pretrained("./path/to/local-dolly")
 ```
 """
 from __future__ import annotations
-import importlib, typing as t
-import cloudpickle, fs, openllm
-from bentoml._internal.models.model import CUSTOM_OBJECTS_FILENAME
-from openllm_core._typing_compat import M, T, ParamSpec
+import importlib
+import typing as t

+import cloudpickle
+import fs
+
+import openllm
+from bentoml._internal.models.model import CUSTOM_OBJECTS_FILENAME
+from openllm_core._typing_compat import M, ParamSpec, T
 if t.TYPE_CHECKING:
  import bentoml
+
  from . import constants as constants, ggml as ggml, transformers as transformers
 P = ParamSpec('P')
 def load_tokenizer(llm: openllm.LLM[t.Any, T], **tokenizer_attrs: t.Any) -> T:
--- a/openllm-python/src/openllm/serialisation/ggml.py
+++ b/openllm-python/src/openllm/serialisation/ggml.py
@@ -4,8 +4,9 @@ This requires ctransformers to be installed.
 '''
 from __future__ import annotations
 import typing as t
-import bentoml, openllm

+import bentoml
+import openllm
 if t.TYPE_CHECKING: from openllm_core._typing_compat import M

 _conversion_strategy = {'pt': 'ggml'}
--- a/openllm-python/src/openllm/serialisation/transformers/init.py
+++ b/openllm-python/src/openllm/serialisation/transformers/init.py
@@ -1,19 +1,27 @@
 '''Serialisation related implementation for Transformers-based implementation.'''
 from __future__ import annotations
-import importlib, logging, typing as t
-import bentoml, openllm
+import importlib
+import logging
+import typing as t
+
 from huggingface_hub import snapshot_download
 from simple_di import Provide, inject
+
+import bentoml
+import openllm
 from bentoml._internal.configuration.containers import BentoMLContainer
 from bentoml._internal.models.model import ModelOptions
-from .weights import HfIgnore
-from ._helpers import check_unintialised_params, infer_autoclass_from_llm, infer_tokenizers_from_llm, make_model_signatures, process_config, update_model

+from ._helpers import check_unintialised_params, infer_autoclass_from_llm, infer_tokenizers_from_llm, make_model_signatures, process_config, update_model
+from .weights import HfIgnore
 if t.TYPE_CHECKING:
  import types

-  import vllm, auto_gptq as autogptq, transformers, torch
+  import auto_gptq as autogptq
+  import torch
  import torch.nn
+  import transformers
+  import vllm

  from bentoml._internal.models import ModelStore
  from openllm_core._typing_compat import DictStrAny, M, T
--- a/openllm-python/src/openllm/serialisation/transformers/_helpers.py
+++ b/openllm-python/src/openllm/serialisation/transformers/_helpers.py
@@ -1,11 +1,17 @@
 from __future__ import annotations
-import copy, typing as t, openllm_core, openllm
+import copy
+import typing as t
+
+import openllm
+import openllm_core
 from bentoml._internal.models.model import ModelInfo, ModelSignature
 from openllm.serialisation.constants import FRAMEWORK_TO_AUTOCLASS_MAPPING, HUB_ATTRS
-
 if t.TYPE_CHECKING:
-  import torch, transformers, bentoml
+  import torch
+  import transformers
  from transformers.models.auto.auto_factory import _BaseAutoModelClass
+
+  import bentoml
  from bentoml._internal.models.model import ModelSignaturesType
  from openllm_core._typing_compat import DictStrAny, M, T
 else:
--- a/openllm-python/src/openllm/serialisation/transformers/weights.py
+++ b/openllm-python/src/openllm/serialisation/transformers/weights.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, attr
+import typing as t
+
+import attr
 from huggingface_hub import HfApi
 if t.TYPE_CHECKING:
  import openllm
--- a/openllm-python/src/openllm/testing.py
+++ b/openllm-python/src/openllm/testing.py
@@ -1,6 +1,13 @@
 '''Tests utilities for OpenLLM.'''
 from __future__ import annotations
-import contextlib, logging, shutil, subprocess, typing as t, bentoml, openllm
+import contextlib
+import logging
+import shutil
+import subprocess
+import typing as t
+
+import bentoml
+import openllm
 if t.TYPE_CHECKING: from ._typing_compat import LiteralRuntime

 logger = logging.getLogger(__name__)
--- a/openllm-python/src/openllm/utils/init.py
+++ b/openllm-python/src/openllm/utils/init.py
@@ -4,12 +4,19 @@ User can import these function for convenience, but
 we won't ensure backward compatibility for these functions. So use with caution.
 """
 from __future__ import annotations
-import typing as t, openllm_core
-from . import (dummy_flax_objects as dummy_flax_objects, dummy_pt_objects as dummy_pt_objects, dummy_tf_objects as dummy_tf_objects, dummy_vllm_objects as dummy_vllm_objects,)
+import typing as t

+import openllm_core
+
+from . import (
+  dummy_flax_objects as dummy_flax_objects,
+  dummy_pt_objects as dummy_pt_objects,
+  dummy_tf_objects as dummy_tf_objects,
+  dummy_vllm_objects as dummy_vllm_objects,
+)
 if t.TYPE_CHECKING:
-  from openllm_core._typing_compat import LiteralRuntime
  import openllm
+  from openllm_core._typing_compat import LiteralRuntime
 def generate_labels(llm: openllm.LLM[t.Any, t.Any]) -> dict[str, t.Any]:
  return {'runtime': llm.runtime, 'framework': 'openllm', 'model_name': llm.config['model_name'], 'architecture': llm.config['architecture'], 'serialisation_format': llm._serialisation_format}
 def infer_auto_class(implementation: LiteralRuntime) -> type[openllm.AutoLLM | openllm.AutoTFLLM | openllm.AutoFlaxLLM | openllm.AutoVLLM]:
--- a/openllm-python/tests/_strategies/_configuration.py
+++ b/openllm-python/tests/_strategies/_configuration.py
@@ -1,7 +1,11 @@
 from __future__ import annotations
-import logging, typing as t, openllm
-from openllm_core._configuration import ModelSettings
+import logging
+import typing as t
+
 from hypothesis import strategies as st
+
+import openllm
+from openllm_core._configuration import ModelSettings
 logger = logging.getLogger(__name__)

 env_strats = st.sampled_from([openllm.utils.EnvVarMixin(model_name) for model_name in openllm.CONFIG_MAPPING.keys()])
--- a/openllm-python/tests/configuration_test.py
+++ b/openllm-python/tests/configuration_test.py
@@ -1,8 +1,18 @@
 from __future__ import annotations
-import contextlib, os, sys, typing as t, attr, pytest, transformers, openllm
+import contextlib
+import os
+import sys
+import typing as t
 from unittest import mock
-from openllm_core._configuration import GenerationConfig, ModelSettings, field_env_key
+
+import attr
+import pytest
+import transformers
 from hypothesis import assume, given, strategies as st
+
+import openllm
+from openllm_core._configuration import GenerationConfig, ModelSettings, field_env_key
+
 from ._strategies._configuration import make_llm_config, model_settings
 # XXX: @aarnphm fixes TypedDict behaviour in 3.11
@pytest.mark.skipif(sys.version_info[:2] == (3, 11), reason='TypedDict in 3.11 behaves differently, so we need to fix this')
--- a/openllm-python/tests/conftest.py
+++ b/openllm-python/tests/conftest.py
@@ -1,5 +1,11 @@
 from __future__ import annotations
-import itertools, os, typing as t, pytest, openllm
+import itertools
+import os
+import typing as t
+
+import pytest
+
+import openllm
 if t.TYPE_CHECKING: from openllm_core._typing_compat import LiteralRuntime

 _FRAMEWORK_MAPPING = {'flan_t5': 'google/flan-t5-small', 'opt': 'facebook/opt-125m', 'baichuan': 'baichuan-inc/Baichuan-7B',}
--- a/openllm-python/tests/models/conftest.py
+++ b/openllm-python/tests/models/conftest.py
@@ -1,16 +1,32 @@
 from __future__ import annotations
-import asyncio, contextlib, functools, logging, sys, time, typing as t
+import asyncio
+import contextlib
+import functools
+import logging
+import sys
+import time
+import typing as t
 from abc import ABC, abstractmethod
-import attr, docker, docker.errors, docker.types, orjson, pytest, openllm
+
+import attr
+import docker
+import docker.errors
+import docker.types
+import orjson
+import pytest
 from syrupy.extensions.json import JSONSnapshotExtension
+
+import openllm
 from openllm._llm import normalise_model_name
 from openllm_core._typing_compat import DictStrAny, ListAny
 logger = logging.getLogger(__name__)

 if t.TYPE_CHECKING:
  import subprocess
+
  from syrupy.assertion import SnapshotAssertion
  from syrupy.types import PropertyFilter, PropertyMatcher, SerializableData, SerializedData
+
  from openllm._configuration import GenerationConfig
  from openllm.client import BaseAsyncClient
 class ResponseComparator(JSONSnapshotExtension):
--- a/openllm-python/tests/models/flan_t5_test.py
+++ b/openllm-python/tests/models/flan_t5_test.py
@@ -4,7 +4,6 @@ import typing as t
 import pytest

 import openllm
-
 if t.TYPE_CHECKING:
  import contextlib

--- a/openllm-python/tests/models/opt_test.py
+++ b/openllm-python/tests/models/opt_test.py
@@ -4,7 +4,6 @@ import typing as t
 import pytest

 import openllm
-
 if t.TYPE_CHECKING:
  import contextlib

--- a/openllm-python/tests/models_test.py
+++ b/openllm-python/tests/models_test.py
@@ -1,6 +1,8 @@
 from __future__ import annotations
-import os, typing as t, pytest
+import os
+import typing as t

+import pytest
 if t.TYPE_CHECKING: import openllm
@pytest.mark.skipif(os.getenv('GITHUB_ACTIONS') is not None, reason='Model is too large for CI')
 def test_flan_t5_implementation(prompt: str, llm: openllm.LLM[t.Any, t.Any]):
--- a/openllm-python/tests/package_test.py
+++ b/openllm-python/tests/package_test.py
@@ -1,5 +1,11 @@
 from __future__ import annotations
-import functools, os, typing as t, pytest, openllm
+import functools
+import os
+import typing as t
+
+import pytest
+
+import openllm
 from bentoml._internal.configuration.containers import BentoMLContainer
 if t.TYPE_CHECKING: from pathlib import Path

--- a/openllm-python/tests/strategies_test.py
+++ b/openllm-python/tests/strategies_test.py
@@ -1,5 +1,10 @@
 from __future__ import annotations
-import os, typing as t, pytest, bentoml
+import os
+import typing as t
+
+import pytest
+
+import bentoml
 from openllm_core import _strategies as strategy
 from openllm_core._strategies import CascadingResourceStrategy, NvidiaGpuResource, get_resource
 if t.TYPE_CHECKING: from _pytest.monkeypatch import MonkeyPatch