From 46c890480640294c3f34706d595559c7ea97dac5 Mon Sep 17 00:00:00 2001
From: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
Date: Fri, 25 Aug 2023 06:38:59 -0400
Subject: [PATCH] cron(style): run formatter [generated] [skip ci] (#257)

---
 cz.py                                         |   6 +-
 examples/bentoml-demo/service.py              |   4 -
 examples/langchain-chains-demo/service.py     |  14 +--
 examples/langchain-tools-demo/service.py      |   2 -
 openllm-client/src/openllm_client/__init__.py |   2 +-
 openllm-client/src/openllm_client/_base.py    |  21 +++-
 .../src/openllm_client/benmin/__init__.py     |   7 +-
 .../src/openllm_client/benmin/_grpc.py        |  19 +++-
 .../src/openllm_client/benmin/_http.py        |  19 +++-
 openllm-client/src/openllm_client/client.py   |   3 +-
 openllm-core/src/openllm_core/__init__.py     |  36 +++++-
 .../src/openllm_core/_configuration.py        |  36 ++++--
 openllm-core/src/openllm_core/_prompt.py      |   3 +-
 openllm-core/src/openllm_core/_schema.py      |   9 +-
 openllm-core/src/openllm_core/_strategies.py  |  17 ++-
 .../src/openllm_core/_typing_compat.py        |  26 +++--
 .../src/openllm_core/config/__init__.py       |  25 +++--
 .../openllm_core/config/configuration_auto.py |  11 +-
 .../config/configuration_baichuan.py          |   4 +-
 .../config/configuration_chatglm.py           |   4 +-
 .../config/configuration_dolly_v2.py          |   4 +-
 .../config/configuration_falcon.py            |   4 +-
 .../config/configuration_flan_t5.py           |   4 +-
 .../config/configuration_gpt_neox.py          |   4 +-
 .../config/configuration_llama.py             |   4 +-
 .../openllm_core/config/configuration_mpt.py  |   6 +-
 .../openllm_core/config/configuration_opt.py  |   6 +-
 .../config/configuration_stablelm.py          |   4 +-
 .../config/configuration_starcoder.py         |   4 +-
 openllm-core/src/openllm_core/exceptions.py   |   1 +
 .../src/openllm_core/utils/__init__.py        | 106 ++++++++++--------
 .../src/openllm_core/utils/analytics.py       |  12 +-
 .../src/openllm_core/utils/codegen.py         |  10 +-
 openllm-core/src/openllm_core/utils/dantic.py |  13 ++-
 .../src/openllm_core/utils/import_utils.py    |  20 +++-
 openllm-core/src/openllm_core/utils/lazy.py   |  16 ++-
 .../src/openllm_core/utils/representation.py  |   6 +-
 openllm-python/src/openllm/__main__.py        |   1 -
 openllm-python/src/openllm/_embeddings.py     |  12 +-
 openllm-python/src/openllm/_generation.py     |   4 +-
 openllm-python/src/openllm/_llm.py            |  35 +++++-
 openllm-python/src/openllm/_quantisation.py   |   9 +-
 openllm-python/src/openllm/_service.py        |  15 ++-
 openllm-python/src/openllm/bundle/__init__.py |   4 +-
 openllm-python/src/openllm/bundle/_package.py |  22 +++-
 .../src/openllm/bundle/oci/__init__.py        |  22 +++-
 openllm-python/src/openllm/cli/_factory.py    |  29 +++--
 openllm-python/src/openllm/cli/_sdk.py        |  19 +++-
 openllm-python/src/openllm/cli/entrypoint.py  |  37 +++++-
 .../cli/extension/build_base_container.py     |   9 +-
 .../src/openllm/cli/extension/dive_bentos.py  |  13 ++-
 .../cli/extension/get_containerfile.py        |   7 +-
 .../src/openllm/cli/extension/get_prompt.py   |  10 +-
 .../src/openllm/cli/extension/list_bentos.py  |   8 +-
 .../src/openllm/cli/extension/list_models.py  |  14 ++-
 .../src/openllm/cli/extension/playground.py   |  22 +++-
 openllm-python/src/openllm/cli/termui.py      |   8 +-
 openllm-python/src/openllm/client.py          |   4 +-
 openllm-python/src/openllm/exceptions.py      |   3 +-
 .../src/openllm/models/auto/__init__.py       |   6 +-
 .../src/openllm/models/auto/factory.py        |  14 ++-
 .../src/openllm/models/auto/modeling_auto.py  |   4 +-
 .../openllm/models/auto/modeling_flax_auto.py |   4 +-
 .../openllm/models/auto/modeling_tf_auto.py   |   4 +-
 .../openllm/models/auto/modeling_vllm_auto.py |   4 +-
 .../src/openllm/models/baichuan/__init__.py   |   4 +-
 .../models/baichuan/modeling_baichuan.py      |   4 +-
 .../models/baichuan/modeling_vllm_baichuan.py |   4 +-
 .../src/openllm/models/chatglm/__init__.py    |   4 +-
 .../models/chatglm/modeling_chatglm.py        |   7 +-
 .../src/openllm/models/dolly_v2/__init__.py   |   4 +-
 .../models/dolly_v2/modeling_dolly_v2.py      |   7 +-
 .../models/dolly_v2/modeling_vllm_dolly_v2.py |   5 +-
 .../src/openllm/models/falcon/__init__.py     |   4 +-
 .../openllm/models/falcon/modeling_falcon.py  |   4 +-
 .../models/falcon/modeling_vllm_falcon.py     |   5 +-
 .../src/openllm/models/flan_t5/__init__.py    |   4 +-
 .../models/flan_t5/modeling_flan_t5.py        |   7 +-
 .../models/flan_t5/modeling_flax_flan_t5.py   |   4 +-
 .../models/flan_t5/modeling_tf_flan_t5.py     |   4 +-
 .../src/openllm/models/gpt_neox/__init__.py   |   4 +-
 .../models/gpt_neox/modeling_gpt_neox.py      |   5 +-
 .../models/gpt_neox/modeling_vllm_gpt_neox.py |   4 +-
 .../src/openllm/models/llama/__init__.py      |   4 +-
 .../openllm/models/llama/modeling_llama.py    |   7 +-
 .../models/llama/modeling_vllm_llama.py       |   4 +-
 .../src/openllm/models/mpt/__init__.py        |   4 +-
 .../src/openllm/models/mpt/modeling_mpt.py    |   9 +-
 .../openllm/models/mpt/modeling_vllm_mpt.py   |   4 +-
 .../src/openllm/models/opt/__init__.py        |   4 +-
 .../openllm/models/opt/modeling_flax_opt.py   |   6 +-
 .../src/openllm/models/opt/modeling_opt.py    |   5 +-
 .../src/openllm/models/opt/modeling_tf_opt.py |   5 +-
 .../openllm/models/opt/modeling_vllm_opt.py   |   4 +-
 .../src/openllm/models/stablelm/__init__.py   |   4 +-
 .../models/stablelm/modeling_stablelm.py      |   4 +-
 .../models/stablelm/modeling_vllm_stablelm.py |   5 +-
 .../src/openllm/models/starcoder/__init__.py  |   4 +-
 .../models/starcoder/modeling_starcoder.py    |   9 +-
 .../starcoder/modeling_vllm_starcoder.py      |   5 +-
 .../src/openllm/playground/falcon_tuned.py    |  15 ++-
 .../src/openllm/playground/features.py        |   4 -
 .../src/openllm/playground/llama2_qlora.py    |  34 +++---
 .../src/openllm/playground/opt_tuned.py       |  16 +--
 .../src/openllm/serialisation/__init__.py     |  13 ++-
 .../src/openllm/serialisation/ggml.py         |   3 +-
 .../serialisation/transformers/__init__.py    |  18 ++-
 .../serialisation/transformers/_helpers.py    |  12 +-
 .../serialisation/transformers/weights.py     |   4 +-
 openllm-python/src/openllm/testing.py         |   9 +-
 openllm-python/src/openllm/utils/__init__.py  |  13 ++-
 .../tests/_strategies/_configuration.py       |   8 +-
 openllm-python/tests/configuration_test.py    |  14 ++-
 openllm-python/tests/conftest.py              |   8 +-
 openllm-python/tests/models/conftest.py       |  20 +++-
 openllm-python/tests/models/flan_t5_test.py   |   1 -
 openllm-python/tests/models/opt_test.py       |   1 -
 openllm-python/tests/models_test.py           |   4 +-
 openllm-python/tests/package_test.py          |   8 +-
 openllm-python/tests/strategies_test.py       |   7 +-
 typings/attr/__init__.pyi                     |  42 ++++---
 typings/attr/_cmp.pyi                         |   1 -
 typings/attr/_compat.pyi                      |   1 -
 typings/attr/_typing_compat.pyi               |   1 -
 typings/attr/converters.pyi                   |   1 -
 typings/attr/exceptions.pyi                   |   1 -
 typings/attr/filters.pyi                      |   1 -
 typings/attr/setters.pyi                      |   1 -
 typings/attr/validators.pyi                   |   1 -
 typings/click_option_group/__init__.pyi       |   1 -
 typings/click_option_group/_core.pyi          |   4 +-
 typings/click_option_group/_decorators.pyi    |   1 -
 typings/cuda/cuda.pyi                         |   1 -
 typings/deepmerge/__init__.pyi                |   1 -
 typings/deepmerge/merger.pyi                  |   1 -
 typings/deepmerge/strategy/dict.pyi           |   1 -
 typings/deepmerge/strategy/list.pyi           |   1 -
 typings/deepmerge/strategy/set.pyi            |   1 -
 typings/jupytext/config.pyi                   |   1 -
 typings/jupytext/formats.pyi                  |   1 -
 typings/jupytext/jupytext.pyi                 |   1 -
 typings/nbformat/notebooknode.pyi             |   1 -
 typings/nbformat/v4/__init__.pyi              |   1 -
 typings/nbformat/v4/convert.pyi               |   1 -
 typings/nbformat/v4/nbbase.pyi                |   1 -
 typings/nbformat/v4/nbjson.pyi                |   1 -
 typings/nbformat/v4/rwbase.pyi                |   1 -
 typings/rsmiBindings.pyi                      |   1 -
 typings/simple_di/__init__.pyi                |   1 -
 typings/simple_di/providers.pyi               |   1 -
 150 files changed, 913 insertions(+), 379 deletions(-)

diff --git a/cz.py b/cz.py
index a600a556..f84c3613 100755
--- a/cz.py
+++ b/cz.py
@@ -1,6 +1,10 @@
 #!/usr/bin/env python3
 from __future__ import annotations
-import itertools, os, token, tokenize
+import itertools
+import os
+import token
+import tokenize
+
 from tabulate import tabulate
 TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING]
 def run_cz(dir: str, package: str):
diff --git a/examples/bentoml-demo/service.py b/examples/bentoml-demo/service.py
index e3e927cc..ac78fb80 100644
--- a/examples/bentoml-demo/service.py
+++ b/examples/bentoml-demo/service.py
@@ -1,20 +1,16 @@
-
 from __future__ import annotations
 
 import bentoml
 import openllm
-
 model = "dolly-v2"
 
 llm_config = openllm.AutoConfig.for_model(model)
 llm_runner = openllm.Runner(model, llm_config=llm_config)
 
 svc = bentoml.Service(name="llm-service", runners=[llm_runner])
-
 @svc.on_startup
 def download(_: bentoml.Context):
   llm_runner.download_model()
-
 @svc.api(input=bentoml.io.Text(), output=bentoml.io.Text())
 async def prompt(input_text: str) -> str:
   answer = await llm_runner.generate.async_run(input_text)
diff --git a/examples/langchain-chains-demo/service.py b/examples/langchain-chains-demo/service.py
index 64633833..0c1a2dcf 100644
--- a/examples/langchain-chains-demo/service.py
+++ b/examples/langchain-chains-demo/service.py
@@ -1,4 +1,3 @@
-
 from __future__ import annotations
 import typing as t
 
@@ -9,22 +8,20 @@ from pydantic import BaseModel
 
 import bentoml
 from bentoml.io import JSON, Text
-
 class Query(BaseModel):
   industry: str
   product_name: str
   keywords: t.List[str]
   llm_config: t.Dict[str, t.Any]
-
 def gen_llm(model_name: str, model_id: str | None = None) -> OpenLLM:
   lc_llm = OpenLLM(model_name=model_name, model_id=model_id, embedded=False)
   lc_llm.runner.download_model()
   return lc_llm
-
 llm = gen_llm("dolly-v2", model_id="databricks/dolly-v2-7b")
 
 prompt = PromptTemplate(
-    input_variables=["industry", "product_name", "keywords"], template="""
+    input_variables=["industry", "product_name", "keywords"],
+    template="""
 You are a Facebook Ads Copywriter with a strong background in persuasive
 writing and marketing. You craft compelling copy that appeals to the target
 audience's emotions and needs, peruading them to take action or make a
@@ -41,13 +38,12 @@ Facebook Ads copy:
 chain = LLMChain(llm=llm, prompt=prompt)
 
 svc = bentoml.Service("fb-ads-copy", runners=[llm.runner])
-
 @svc.on_startup
 def download(_: bentoml.Context):
   llm.runner.download_model()
-
-SAMPLE_INPUT = Query(industry="SAAS", product_name="BentoML", keywords=["open source", "developer tool", "AI application platform", "serverless", "cost-efficient"], llm_config=llm.runner.config.model_dump(),)
-
+SAMPLE_INPUT = Query(
+    industry="SAAS", product_name="BentoML", keywords=["open source", "developer tool", "AI application platform", "serverless", "cost-efficient"], llm_config=llm.runner.config.model_dump(),
+)
 @svc.api(input=JSON.from_sample(sample=SAMPLE_INPUT), output=Text())
 def generate(query: Query):
   return chain.run({"industry": query.industry, "product_name": query.product_name, "keywords": ", ".join(query.keywords)})
diff --git a/examples/langchain-tools-demo/service.py b/examples/langchain-tools-demo/service.py
index 41b0faf1..51685533 100644
--- a/examples/langchain-tools-demo/service.py
+++ b/examples/langchain-tools-demo/service.py
@@ -5,14 +5,12 @@ from langchain.llms import OpenLLM
 
 import bentoml
 from bentoml.io import Text
-
 SAMPLE_INPUT = "What is the weather in San Francisco?"
 
 llm = OpenLLM(model_name="dolly-v2", model_id="databricks/dolly-v2-7b", embedded=False,)
 tools = load_tools(["serpapi"], llm=llm)
 agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION)
 svc = bentoml.Service("langchain-openllm", runners=[llm.runner])
-
 @svc.api(input=Text.from_sample(sample=SAMPLE_INPUT), output=Text())
 def chat(input_text: str):
   return agent.run(input_text)
diff --git a/openllm-client/src/openllm_client/__init__.py b/openllm-client/src/openllm_client/__init__.py
index 0dd79140..5ad9ca27 100644
--- a/openllm-client/src/openllm_client/__init__.py
+++ b/openllm-client/src/openllm_client/__init__.py
@@ -2,4 +2,4 @@ from __future__ import annotations
 
 from . import benmin as benmin
 from ._base import BaseAsyncClient as BaseAsyncClient, BaseClient as BaseClient
-from .client import AsyncHTTPClient as AsyncHTTPClient, HTTPClient as HTTPClient, GrpcClient as GrpcClient, AsyncGrpcClient as AsyncGrpcClient
+from .client import AsyncGrpcClient as AsyncGrpcClient, AsyncHTTPClient as AsyncHTTPClient, GrpcClient as GrpcClient, HTTPClient as HTTPClient
diff --git a/openllm-client/src/openllm_client/_base.py b/openllm-client/src/openllm_client/_base.py
index 1e1b8055..99950906 100644
--- a/openllm-client/src/openllm_client/_base.py
+++ b/openllm-client/src/openllm_client/_base.py
@@ -1,14 +1,25 @@
 # mypy: disable-error-code="override,no-redef"
 from __future__ import annotations
-import typing as t, functools, openllm_core, logging, httpx, orjson, attr, abc
+import abc
+import functools
+import logging
+import typing as t
 from http import HTTPStatus
 from urllib.parse import urljoin
-from .benmin import Client as BentoClient, AsyncClient as AsyncBentoClient
-from openllm_core.utils import is_transformers_supports_agent, is_transformers_available, bentoml_cattr, ensure_exec_coro
-from openllm_core._typing_compat import overload, LiteralString
+
+import attr
+import httpx
+import orjson
+
+import openllm_core
+from openllm_core._typing_compat import LiteralString, overload
+from openllm_core.utils import bentoml_cattr, ensure_exec_coro, is_transformers_available, is_transformers_supports_agent
+
+from .benmin import AsyncClient as AsyncBentoClient, Client as BentoClient
 if t.TYPE_CHECKING:
-  from openllm_core._typing_compat import LiteralRuntime, DictStrAny
   import transformers
+
+  from openllm_core._typing_compat import DictStrAny, LiteralRuntime
 logger = logging.getLogger(__name__)
 @attr.define(slots=False, init=False)
 class _ClientAttr:
diff --git a/openllm-client/src/openllm_client/benmin/__init__.py b/openllm-client/src/openllm_client/benmin/__init__.py
index 4c18b15b..9ea1f15b 100644
--- a/openllm-client/src/openllm_client/benmin/__init__.py
+++ b/openllm-client/src/openllm_client/benmin/__init__.py
@@ -12,8 +12,13 @@ The client implementation won't include a dynamic assignment of the service endp
 via `client.call` or `await client.call`.
 """
 from __future__ import annotations
-import typing as t, bentoml, attr, httpx
+import typing as t
 from abc import abstractmethod
+
+import attr
+import httpx
+
+import bentoml
 if t.TYPE_CHECKING: from bentoml._internal.service.inference_api import InferenceAPI
 
 __all__ = ['Client', 'AsyncClient']
diff --git a/openllm-client/src/openllm_client/benmin/_grpc.py b/openllm-client/src/openllm_client/benmin/_grpc.py
index 505e2ef9..f1aca062 100644
--- a/openllm-client/src/openllm_client/benmin/_grpc.py
+++ b/openllm-client/src/openllm_client/benmin/_grpc.py
@@ -1,15 +1,22 @@
 # mypy: disable-error-code="no-redef"
 from __future__ import annotations
-import typing as t, logging, time, functools, bentoml
+import functools
+import logging
+import time
+import typing as t
+
+import bentoml
 from bentoml._internal.service.inference_api import InferenceAPI
-from openllm_client.benmin import Client, AsyncClient
-from openllm_core.utils import is_grpc_available, is_grpc_health_available, ensure_exec_coro
+from bentoml.grpc.utils import import_generated_stubs, load_from_file
+from openllm_client.benmin import AsyncClient, Client
 from openllm_core._typing_compat import NotRequired, overload
-from bentoml.grpc.utils import load_from_file, import_generated_stubs
+from openllm_core.utils import ensure_exec_coro, is_grpc_available, is_grpc_health_available
 if not is_grpc_available() or not is_grpc_health_available(): raise ImportError("gRPC is required to use gRPC client. Install with 'pip install \"openllm-client[grpc]\"'.")
-from grpc import aio
+import grpc
+import grpc_health.v1.health_pb2 as pb_health
+import grpc_health.v1.health_pb2_grpc as services_health
 from google.protobuf import json_format
-import grpc, grpc_health.v1.health_pb2 as pb_health, grpc_health.v1.health_pb2_grpc as services_health
+from grpc import aio
 pb, services = import_generated_stubs('v1')
 
 if t.TYPE_CHECKING:
diff --git a/openllm-client/src/openllm_client/benmin/_http.py b/openllm-client/src/openllm_client/benmin/_http.py
index 32e32218..11772f54 100644
--- a/openllm-client/src/openllm_client/benmin/_http.py
+++ b/openllm-client/src/openllm_client/benmin/_http.py
@@ -1,8 +1,21 @@
 from __future__ import annotations
-import functools, httpx, time, logging, urllib.error, typing as t, orjson, bentoml, starlette.requests, starlette.datastructures, starlette.responses, asyncio
-from bentoml._internal.service.inference_api import InferenceAPI
+import asyncio
+import functools
+import logging
+import time
+import typing as t
+import urllib.error
 from urllib.parse import urlparse
-from openllm_client.benmin import Client, AsyncClient
+
+import httpx
+import orjson
+import starlette.datastructures
+import starlette.requests
+import starlette.responses
+
+import bentoml
+from bentoml._internal.service.inference_api import InferenceAPI
+from openllm_client.benmin import AsyncClient, Client
 from openllm_core.utils import ensure_exec_coro
 logger = logging.getLogger(__name__)
 class HttpClient(Client):
diff --git a/openllm-client/src/openllm_client/client.py b/openllm-client/src/openllm_client/client.py
index 3f07d545..ceafef7e 100644
--- a/openllm-client/src/openllm_client/client.py
+++ b/openllm-client/src/openllm_client/client.py
@@ -1,7 +1,8 @@
 from __future__ import annotations
 import logging
 from urllib.parse import urlparse
-from ._base import BaseClient, BaseAsyncClient
+
+from ._base import BaseAsyncClient, BaseClient
 logger = logging.getLogger(__name__)
 def process_http_address(self: AsyncHTTPClient | HTTPClient, address: str) -> None:
   address = address if '://' in address else 'http://' + address
diff --git a/openllm-core/src/openllm_core/__init__.py b/openllm-core/src/openllm_core/__init__.py
index 93fa5491..8bc3a38b 100644
--- a/openllm-core/src/openllm_core/__init__.py
+++ b/openllm-core/src/openllm_core/__init__.py
@@ -1,7 +1,33 @@
 from __future__ import annotations
-from . import utils as utils
-from . import exceptions as exceptions
-from ._configuration import LLMConfig as LLMConfig, GenerationConfig as GenerationConfig, SamplingParams as SamplingParams
-from ._strategies import CascadingResourceStrategy as CascadingResourceStrategy, get_resource as get_resource, available_resource_spec as available_resource_spec, LiteralResourceSpec as LiteralResourceSpec, NvidiaGpuResource as NvidiaGpuResource, AmdGpuResource as AmdGpuResource
+
+from . import exceptions as exceptions, utils as utils
+from ._configuration import GenerationConfig as GenerationConfig, LLMConfig as LLMConfig, SamplingParams as SamplingParams
 from ._schema import EmbeddingsOutput as EmbeddingsOutput, GenerationInput as GenerationInput, GenerationOutput as GenerationOutput, HfAgentInput as HfAgentInput, MetadataOutput as MetadataOutput, unmarshal_vllm_outputs as unmarshal_vllm_outputs
-from .config import AutoConfig as AutoConfig, CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES, BaichuanConfig as BaichuanConfig, START_BAICHUAN_COMMAND_DOCSTRING as START_BAICHUAN_COMMAND_DOCSTRING, ChatGLMConfig as ChatGLMConfig, START_CHATGLM_COMMAND_DOCSTRING as START_CHATGLM_COMMAND_DOCSTRING, DollyV2Config as DollyV2Config, START_DOLLY_V2_COMMAND_DOCSTRING as START_DOLLY_V2_COMMAND_DOCSTRING, FalconConfig as FalconConfig, START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCSTRING, FlanT5Config as FlanT5Config, START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING, GPTNeoXConfig as GPTNeoXConfig, START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING, LlamaConfig as LlamaConfig, START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING, MPTConfig as MPTConfig, START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING, OPTConfig as OPTConfig, START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING, StableLMConfig as StableLMConfig, START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING, StarCoderConfig as StarCoderConfig, START_STARCODER_COMMAND_DOCSTRING as START_STARCODER_COMMAND_DOCSTRING
+from ._strategies import AmdGpuResource as AmdGpuResource, CascadingResourceStrategy as CascadingResourceStrategy, LiteralResourceSpec as LiteralResourceSpec, NvidiaGpuResource as NvidiaGpuResource, available_resource_spec as available_resource_spec, get_resource as get_resource
+from .config import (
+    CONFIG_MAPPING as CONFIG_MAPPING,
+    CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES,
+    START_BAICHUAN_COMMAND_DOCSTRING as START_BAICHUAN_COMMAND_DOCSTRING,
+    START_CHATGLM_COMMAND_DOCSTRING as START_CHATGLM_COMMAND_DOCSTRING,
+    START_DOLLY_V2_COMMAND_DOCSTRING as START_DOLLY_V2_COMMAND_DOCSTRING,
+    START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCSTRING,
+    START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING,
+    START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING,
+    START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING,
+    START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING,
+    START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING,
+    START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING,
+    START_STARCODER_COMMAND_DOCSTRING as START_STARCODER_COMMAND_DOCSTRING,
+    AutoConfig as AutoConfig,
+    BaichuanConfig as BaichuanConfig,
+    ChatGLMConfig as ChatGLMConfig,
+    DollyV2Config as DollyV2Config,
+    FalconConfig as FalconConfig,
+    FlanT5Config as FlanT5Config,
+    GPTNeoXConfig as GPTNeoXConfig,
+    LlamaConfig as LlamaConfig,
+    MPTConfig as MPTConfig,
+    OPTConfig as OPTConfig,
+    StableLMConfig as StableLMConfig,
+    StarCoderConfig as StarCoderConfig,
+)
diff --git a/openllm-core/src/openllm_core/_configuration.py b/openllm-core/src/openllm_core/_configuration.py
index b2aecb59..54bcbc95 100644
--- a/openllm-core/src/openllm_core/_configuration.py
+++ b/openllm-core/src/openllm_core/_configuration.py
@@ -34,21 +34,37 @@ dynamically during serve, ahead-of-serve or per requests.
 Refer to ``openllm.LLMConfig`` docstring for more information.
 '''
 from __future__ import annotations
-import copy, enum, logging, os, sys, types, typing as t, attr, click_option_group as cog, inflection, orjson, openllm_core
-from cattr.gen import make_dict_structure_fn, make_dict_unstructure_fn, override
-from deepmerge.merger import Merger
-from ._strategies import LiteralResourceSpec, available_resource_spec, resource_spec
-from ._typing_compat import LiteralString, NotRequired, Required, overload, AdapterType, LiteralRuntime
-from .exceptions import ForbiddenAttributeError
-from .utils import ENV_VARS_TRUE_VALUES, MYPY, ReprMixin, bentoml_cattr, codegen, dantic, field_env_key, first_not_none, lenient_issubclass, LazyLoader
-from .utils.import_utils import BACKENDS_MAPPING
+import copy
+import enum
+import logging
+import os
+import sys
+import types
+import typing as t
+
+import attr
+import click_option_group as cog
+import inflection
+import orjson
+
 # NOTE: Using internal API from attr here, since we are actually allowing subclass of openllm_core.LLMConfig to become 'attrs'-ish
 from attr._compat import set_closure_cell
 from attr._make import _CountingAttr, _make_init, _transform_attrs
-from ._typing_compat import AnyCallable, At, Self, ListStr, DictStrAny
+from cattr.gen import make_dict_structure_fn, make_dict_unstructure_fn, override
+from deepmerge.merger import Merger
 
+import openllm_core
+
+from ._strategies import LiteralResourceSpec, available_resource_spec, resource_spec
+from ._typing_compat import AdapterType, AnyCallable, At, DictStrAny, ListStr, LiteralRuntime, LiteralString, NotRequired, Required, Self, overload
+from .exceptions import ForbiddenAttributeError
+from .utils import ENV_VARS_TRUE_VALUES, MYPY, LazyLoader, ReprMixin, bentoml_cattr, codegen, dantic, field_env_key, first_not_none, lenient_issubclass
+from .utils.import_utils import BACKENDS_MAPPING
 if t.TYPE_CHECKING:
-  import click, peft, transformers, vllm
+  import click
+  import peft
+  import transformers
+  import vllm
   from transformers.generation.beam_constraints import Constraint
 else:
   Constraint = t.Any
diff --git a/openllm-core/src/openllm_core/_prompt.py b/openllm-core/src/openllm_core/_prompt.py
index d142eaa7..14a193d2 100644
--- a/openllm-core/src/openllm_core/_prompt.py
+++ b/openllm-core/src/openllm_core/_prompt.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
-import string, typing as t
+import string
+import typing as t
 class PromptFormatter(string.Formatter):
   """This PromptFormatter is largely based on langchain's implementation."""
   def vformat(self, format_string: str, args: t.Sequence[t.Any], kwargs: t.Mapping[str, t.Any]) -> t.Any:
diff --git a/openllm-core/src/openllm_core/_schema.py b/openllm-core/src/openllm_core/_schema.py
index 9eb9b4c8..2a2c82fc 100644
--- a/openllm-core/src/openllm_core/_schema.py
+++ b/openllm-core/src/openllm_core/_schema.py
@@ -1,8 +1,13 @@
 '''Schema definition for OpenLLM. This can be use for client interaction.'''
 from __future__ import annotations
-import functools, typing as t
-import attr, inflection
+import functools
+import typing as t
+
+import attr
+import inflection
+
 from openllm_core._configuration import GenerationConfig, LLMConfig
+
 from .utils import bentoml_cattr
 if t.TYPE_CHECKING: import vllm
 @attr.frozen(slots=True)
diff --git a/openllm-core/src/openllm_core/_strategies.py b/openllm-core/src/openllm_core/_strategies.py
index 289008df..9f54d42a 100644
--- a/openllm-core/src/openllm_core/_strategies.py
+++ b/openllm-core/src/openllm_core/_strategies.py
@@ -1,10 +1,23 @@
 # mypy: disable-error-code="no-redef"
 from __future__ import annotations
-import functools, inspect, logging, math, os, sys, types, typing as t, warnings, psutil, bentoml
+import functools
+import inspect
+import logging
+import math
+import os
+import sys
+import types
+import typing as t
+import warnings
+
+import psutil
+
+import bentoml
 from bentoml._internal.resource import get_resource, system_resources
 from bentoml._internal.runner.strategy import THREAD_ENVS
-from .utils import DEBUG, ReprMixin
+
 from ._typing_compat import overload
+from .utils import DEBUG, ReprMixin
 class DynResource(t.Protocol):
   resource_id: t.ClassVar[str]
 
diff --git a/openllm-core/src/openllm_core/_typing_compat.py b/openllm-core/src/openllm_core/_typing_compat.py
index 25c67700..deb05b05 100644
--- a/openllm-core/src/openllm_core/_typing_compat.py
+++ b/openllm-core/src/openllm_core/_typing_compat.py
@@ -1,10 +1,20 @@
 # mypy: disable-error-code="type-arg,valid-type"
 from __future__ import annotations
-import sys, typing as t, bentoml, attr, abc
-from bentoml._internal.types import ModelSignatureDict as ModelSignatureDict
+import abc
+import sys
+import typing as t
 
+import attr
+
+import bentoml
+from bentoml._internal.types import ModelSignatureDict as ModelSignatureDict
 if t.TYPE_CHECKING:
-  import openllm, peft, transformers, auto_gptq as autogptq, vllm
+  import auto_gptq as autogptq
+  import peft
+  import transformers
+  import vllm
+
+  import openllm
   from bentoml._internal.runner.runnable import RunnableMethod
   from bentoml._internal.runner.runner import RunnerMethod
   from bentoml._internal.runner.strategy import Strategy
@@ -31,16 +41,14 @@ LiteralContainerRegistry = t.Literal['docker', 'gh', 'ecr']
 LiteralContainerVersionStrategy = t.Literal['release', 'nightly', 'latest', 'custom']
 
 if sys.version_info[:2] >= (3, 11):
-  from typing import LiteralString as LiteralString, Self as Self, overload as overload
-  from typing import NotRequired as NotRequired, Required as Required, dataclass_transform as dataclass_transform
+  from typing import LiteralString as LiteralString, NotRequired as NotRequired, Required as Required, Self as Self, dataclass_transform as dataclass_transform, overload as overload
 else:
-  from typing_extensions import LiteralString as LiteralString, Self as Self, overload as overload
-  from typing_extensions import NotRequired as NotRequired, Required as Required, dataclass_transform as dataclass_transform
+  from typing_extensions import LiteralString as LiteralString, NotRequired as NotRequired, Required as Required, Self as Self, dataclass_transform as dataclass_transform, overload as overload
 
 if sys.version_info[:2] >= (3, 10):
-  from typing import TypeAlias as TypeAlias, ParamSpec as ParamSpec, Concatenate as Concatenate
+  from typing import Concatenate as Concatenate, ParamSpec as ParamSpec, TypeAlias as TypeAlias
 else:
-  from typing_extensions import TypeAlias as TypeAlias, ParamSpec as ParamSpec, Concatenate as Concatenate
+  from typing_extensions import Concatenate as Concatenate, ParamSpec as ParamSpec, TypeAlias as TypeAlias
 class PeftAdapterOutput(t.TypedDict):
   success: bool
   result: t.Dict[str, peft.PeftConfig]
diff --git a/openllm-core/src/openllm_core/config/__init__.py b/openllm-core/src/openllm_core/config/__init__.py
index 2edcadd2..e13af81c 100644
--- a/openllm-core/src/openllm_core/config/__init__.py
+++ b/openllm-core/src/openllm_core/config/__init__.py
@@ -1,13 +1,14 @@
 from __future__ import annotations
-from .configuration_auto import AutoConfig as AutoConfig, CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES
-from .configuration_baichuan import BaichuanConfig as BaichuanConfig, START_BAICHUAN_COMMAND_DOCSTRING as START_BAICHUAN_COMMAND_DOCSTRING
-from .configuration_chatglm import ChatGLMConfig as ChatGLMConfig, START_CHATGLM_COMMAND_DOCSTRING as START_CHATGLM_COMMAND_DOCSTRING
-from .configuration_dolly_v2 import DollyV2Config as DollyV2Config, START_DOLLY_V2_COMMAND_DOCSTRING as START_DOLLY_V2_COMMAND_DOCSTRING
-from .configuration_falcon import FalconConfig as FalconConfig, START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCSTRING
-from .configuration_flan_t5 import FlanT5Config as FlanT5Config, START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING
-from .configuration_gpt_neox import GPTNeoXConfig as GPTNeoXConfig, START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING
-from .configuration_llama import LlamaConfig as LlamaConfig, START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING
-from .configuration_mpt import MPTConfig as MPTConfig, START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING
-from .configuration_opt import OPTConfig as OPTConfig, START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING
-from .configuration_stablelm import StableLMConfig as StableLMConfig, START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING
-from .configuration_starcoder import StarCoderConfig as StarCoderConfig, START_STARCODER_COMMAND_DOCSTRING as START_STARCODER_COMMAND_DOCSTRING
+
+from .configuration_auto import CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES, AutoConfig as AutoConfig
+from .configuration_baichuan import START_BAICHUAN_COMMAND_DOCSTRING as START_BAICHUAN_COMMAND_DOCSTRING, BaichuanConfig as BaichuanConfig
+from .configuration_chatglm import START_CHATGLM_COMMAND_DOCSTRING as START_CHATGLM_COMMAND_DOCSTRING, ChatGLMConfig as ChatGLMConfig
+from .configuration_dolly_v2 import START_DOLLY_V2_COMMAND_DOCSTRING as START_DOLLY_V2_COMMAND_DOCSTRING, DollyV2Config as DollyV2Config
+from .configuration_falcon import START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCSTRING, FalconConfig as FalconConfig
+from .configuration_flan_t5 import START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING, FlanT5Config as FlanT5Config
+from .configuration_gpt_neox import START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING, GPTNeoXConfig as GPTNeoXConfig
+from .configuration_llama import START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING, LlamaConfig as LlamaConfig
+from .configuration_mpt import START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING, MPTConfig as MPTConfig
+from .configuration_opt import START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING, OPTConfig as OPTConfig
+from .configuration_stablelm import START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING, StableLMConfig as StableLMConfig
+from .configuration_starcoder import START_STARCODER_COMMAND_DOCSTRING as START_STARCODER_COMMAND_DOCSTRING, StarCoderConfig as StarCoderConfig
diff --git a/openllm-core/src/openllm_core/config/configuration_auto.py b/openllm-core/src/openllm_core/config/configuration_auto.py
index 35803e58..b7e8a622 100644
--- a/openllm-core/src/openllm_core/config/configuration_auto.py
+++ b/openllm-core/src/openllm_core/config/configuration_auto.py
@@ -1,13 +1,18 @@
 # mypy: disable-error-code="type-arg"
 from __future__ import annotations
-import inflection, openllm_core, importlib, typing as t
+import importlib
+import typing as t
 from collections import OrderedDict
-from openllm_core.utils import ReprMixin
 
+import inflection
+
+import openllm_core
+from openllm_core.utils import ReprMixin
 if t.TYPE_CHECKING:
   import types
-  from openllm_core._typing_compat import LiteralString
   from collections import _odict_items, _odict_keys, _odict_values
+
+  from openllm_core._typing_compat import LiteralString
   ConfigKeysView = _odict_keys[str, type[openllm_core.LLMConfig]]
   ConfigValuesView = _odict_values[str, type[openllm_core.LLMConfig]]
   ConfigItemsView = _odict_items[str, type[openllm_core.LLMConfig]]
diff --git a/openllm-core/src/openllm_core/config/configuration_baichuan.py b/openllm-core/src/openllm_core/config/configuration_baichuan.py
index 8b587408..5556d237 100644
--- a/openllm-core/src/openllm_core/config/configuration_baichuan.py
+++ b/openllm-core/src/openllm_core/config/configuration_baichuan.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import openllm_core, typing as t
+import typing as t
+
+import openllm_core
 from openllm_core._prompt import process_prompt
 START_BAICHUAN_COMMAND_DOCSTRING = '''\
 Run a LLMServer for Baichuan model.
diff --git a/openllm-core/src/openllm_core/config/configuration_chatglm.py b/openllm-core/src/openllm_core/config/configuration_chatglm.py
index 9eddb2c5..8cd7cb05 100644
--- a/openllm-core/src/openllm_core/config/configuration_chatglm.py
+++ b/openllm-core/src/openllm_core/config/configuration_chatglm.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import openllm_core, typing as t
+import typing as t
+
+import openllm_core
 from openllm_core.utils import dantic
 START_CHATGLM_COMMAND_DOCSTRING = '''\
 Run a LLMServer for ChatGLM model.
diff --git a/openllm-core/src/openllm_core/config/configuration_dolly_v2.py b/openllm-core/src/openllm_core/config/configuration_dolly_v2.py
index 3f47389d..568cac9e 100644
--- a/openllm-core/src/openllm_core/config/configuration_dolly_v2.py
+++ b/openllm-core/src/openllm_core/config/configuration_dolly_v2.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm_core
+import typing as t
+
+import openllm_core
 from openllm_core._prompt import process_prompt
 from openllm_core.utils import dantic
 if t.TYPE_CHECKING: import transformers
diff --git a/openllm-core/src/openllm_core/config/configuration_falcon.py b/openllm-core/src/openllm_core/config/configuration_falcon.py
index 2d621a80..95be5452 100644
--- a/openllm-core/src/openllm_core/config/configuration_falcon.py
+++ b/openllm-core/src/openllm_core/config/configuration_falcon.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import openllm_core, typing as t
+import typing as t
+
+import openllm_core
 from openllm_core._prompt import process_prompt
 START_FALCON_COMMAND_DOCSTRING = '''\
 Run a LLMServer for FalconLM model.
diff --git a/openllm-core/src/openllm_core/config/configuration_flan_t5.py b/openllm-core/src/openllm_core/config/configuration_flan_t5.py
index 53a4e020..191dbdfd 100644
--- a/openllm-core/src/openllm_core/config/configuration_flan_t5.py
+++ b/openllm-core/src/openllm_core/config/configuration_flan_t5.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import openllm_core, typing as t
+import typing as t
+
+import openllm_core
 from openllm_core._prompt import process_prompt
 START_FLAN_T5_COMMAND_DOCSTRING = '''\
 Run a LLMServer for FLAN-T5 model.
diff --git a/openllm-core/src/openllm_core/config/configuration_gpt_neox.py b/openllm-core/src/openllm_core/config/configuration_gpt_neox.py
index f74e7b84..9438c608 100644
--- a/openllm-core/src/openllm_core/config/configuration_gpt_neox.py
+++ b/openllm-core/src/openllm_core/config/configuration_gpt_neox.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import openllm_core, typing as t
+import typing as t
+
+import openllm_core
 from openllm_core._prompt import process_prompt
 from openllm_core.utils import dantic
 START_GPT_NEOX_COMMAND_DOCSTRING = '''\
diff --git a/openllm-core/src/openllm_core/config/configuration_llama.py b/openllm-core/src/openllm_core/config/configuration_llama.py
index 8236127a..20e0207d 100644
--- a/openllm-core/src/openllm_core/config/configuration_llama.py
+++ b/openllm-core/src/openllm_core/config/configuration_llama.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm_core
+import typing as t
+
+import openllm_core
 from openllm_core._prompt import process_prompt
 from openllm_core.utils import dantic
 START_LLAMA_COMMAND_DOCSTRING = '''\
diff --git a/openllm-core/src/openllm_core/config/configuration_mpt.py b/openllm-core/src/openllm_core/config/configuration_mpt.py
index ef623a19..f62451e9 100644
--- a/openllm-core/src/openllm_core/config/configuration_mpt.py
+++ b/openllm-core/src/openllm_core/config/configuration_mpt.py
@@ -1,7 +1,9 @@
 from __future__ import annotations
-import typing as t, openllm_core
-from openllm_core.utils import dantic
+import typing as t
+
+import openllm_core
 from openllm_core._prompt import process_prompt
+from openllm_core.utils import dantic
 MPTPromptType = t.Literal['default', 'instruct', 'chat', 'storywriter']
 
 START_MPT_COMMAND_DOCSTRING = '''\
diff --git a/openllm-core/src/openllm_core/config/configuration_opt.py b/openllm-core/src/openllm_core/config/configuration_opt.py
index 40f15cab..e3da9838 100644
--- a/openllm-core/src/openllm_core/config/configuration_opt.py
+++ b/openllm-core/src/openllm_core/config/configuration_opt.py
@@ -1,7 +1,9 @@
 from __future__ import annotations
-import openllm_core, typing as t
-from openllm_core.utils import dantic
+import typing as t
+
+import openllm_core
 from openllm_core._prompt import process_prompt
+from openllm_core.utils import dantic
 START_OPT_COMMAND_DOCSTRING = '''\
 Run a LLMServer for OPT model.
 
diff --git a/openllm-core/src/openllm_core/config/configuration_stablelm.py b/openllm-core/src/openllm_core/config/configuration_stablelm.py
index d75b29ce..001d99ab 100644
--- a/openllm-core/src/openllm_core/config/configuration_stablelm.py
+++ b/openllm-core/src/openllm_core/config/configuration_stablelm.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import openllm_core, typing as t
+import typing as t
+
+import openllm_core
 from openllm_core._prompt import process_prompt
 START_STABLELM_COMMAND_DOCSTRING = '''\
 Run a LLMServer for StableLM model.
diff --git a/openllm-core/src/openllm_core/config/configuration_starcoder.py b/openllm-core/src/openllm_core/config/configuration_starcoder.py
index 48025c45..50a60625 100644
--- a/openllm-core/src/openllm_core/config/configuration_starcoder.py
+++ b/openllm-core/src/openllm_core/config/configuration_starcoder.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import openllm_core, typing as t
+import typing as t
+
+import openllm_core
 START_STARCODER_COMMAND_DOCSTRING = '''\
 Run a LLMServer for StarCoder model.
 
diff --git a/openllm-core/src/openllm_core/exceptions.py b/openllm-core/src/openllm_core/exceptions.py
index 7eaf2557..c9ff18c0 100644
--- a/openllm-core/src/openllm_core/exceptions.py
+++ b/openllm-core/src/openllm_core/exceptions.py
@@ -1,5 +1,6 @@
 '''Base exceptions for OpenLLM. This extends BentoML exceptions.'''
 from __future__ import annotations
+
 import bentoml
 class OpenLLMException(bentoml.exceptions.BentoMLException):
   '''Base class for all OpenLLM exceptions. This extends BentoMLException.'''
diff --git a/openllm-core/src/openllm_core/utils/__init__.py b/openllm-core/src/openllm_core/utils/__init__.py
index 4cdf9199..5f619533 100644
--- a/openllm-core/src/openllm_core/utils/__init__.py
+++ b/openllm-core/src/openllm_core/utils/__init__.py
@@ -3,30 +3,44 @@
 User can import these function for convenience, but we won't ensure backward compatibility for these functions. So use with caution.
 """
 from __future__ import annotations
-import contextlib, functools, hashlib, logging, logging.config, os, sys, types, typing as t, openllm_core, asyncio
+import asyncio
+import contextlib
+import functools
+import hashlib
+import logging
+import logging.config
+import os
+import sys
+import types
+import typing as t
 from pathlib import Path
+
 from circus.exc import ConflictError
+
+import openllm_core
 from bentoml._internal.configuration import (
-    DEBUG_ENV_VAR as DEBUG_ENV_VAR,
-    GRPC_DEBUG_ENV_VAR as _GRPC_DEBUG_ENV_VAR,
-    QUIET_ENV_VAR as QUIET_ENV_VAR,
-    get_debug_mode as _get_debug_mode,
-    get_quiet_mode as _get_quiet_mode,
-    set_quiet_mode as set_quiet_mode,
+  DEBUG_ENV_VAR as DEBUG_ENV_VAR,
+  GRPC_DEBUG_ENV_VAR as _GRPC_DEBUG_ENV_VAR,
+  QUIET_ENV_VAR as QUIET_ENV_VAR,
+  get_debug_mode as _get_debug_mode,
+  get_quiet_mode as _get_quiet_mode,
+  set_quiet_mode as set_quiet_mode,
 )
 from bentoml._internal.models.model import ModelContext as _ModelContext
 from bentoml._internal.types import LazyType as LazyType
 from bentoml._internal.utils import (
-    LazyLoader as LazyLoader,
-    bentoml_cattr as bentoml_cattr,
-    calc_dir_size as calc_dir_size,
-    first_not_none as first_not_none,
-    pkg as pkg,
-    reserve_free_port as reserve_free_port,
-    resolve_user_filepath as resolve_user_filepath,
+  LazyLoader as LazyLoader,
+  bentoml_cattr as bentoml_cattr,
+  calc_dir_size as calc_dir_size,
+  first_not_none as first_not_none,
+  pkg as pkg,
+  reserve_free_port as reserve_free_port,
+  resolve_user_filepath as resolve_user_filepath,
+)
+from openllm_core.utils.lazy import (
+  LazyModule as LazyModule,
+  VersionInfo as VersionInfo,
 )
-from openllm_core.utils.lazy import (LazyModule as LazyModule, VersionInfo as VersionInfo,)
-
 if t.TYPE_CHECKING:
   from openllm_core._typing_compat import AnyCallable
 logger = logging.getLogger(__name__)
@@ -295,35 +309,39 @@ _import_structure: dict[str, list[str]] = {
 
 if t.TYPE_CHECKING:
   # NOTE: The following exports useful utils from bentoml
-  from . import (analytics as analytics, codegen as codegen, dantic as dantic,)
+  from . import (
+    analytics as analytics,
+    codegen as codegen,
+    dantic as dantic,
+  )
   from .import_utils import (
-      ENV_VARS_TRUE_VALUES as ENV_VARS_TRUE_VALUES,
-      OPTIONAL_DEPENDENCIES as OPTIONAL_DEPENDENCIES,
-      DummyMetaclass as DummyMetaclass,
-      EnvVarMixin as EnvVarMixin,
-      is_autogptq_available as is_autogptq_available,
-      is_bitsandbytes_available as is_bitsandbytes_available,
-      is_cpm_kernels_available as is_cpm_kernels_available,
-      is_datasets_available as is_datasets_available,
-      is_einops_available as is_einops_available,
-      is_fairscale_available as is_fairscale_available,
-      is_flax_available as is_flax_available,
-      is_jupyter_available as is_jupyter_available,
-      is_jupytext_available as is_jupytext_available,
-      is_notebook_available as is_notebook_available,
-      is_peft_available as is_peft_available,
-      is_sentencepiece_available as is_sentencepiece_available,
-      is_tf_available as is_tf_available,
-      is_torch_available as is_torch_available,
-      is_transformers_supports_agent as is_transformers_supports_agent,
-      is_transformers_supports_kbit as is_transformers_supports_kbit,
-      is_triton_available as is_triton_available,
-      is_vllm_available as is_vllm_available,
-      is_xformers_available as is_xformers_available,
-      is_grpc_available as is_grpc_available,
-      is_grpc_health_available as is_grpc_health_available,
-      is_transformers_available as is_transformers_available,
-      require_backends as require_backends,
+    ENV_VARS_TRUE_VALUES as ENV_VARS_TRUE_VALUES,
+    OPTIONAL_DEPENDENCIES as OPTIONAL_DEPENDENCIES,
+    DummyMetaclass as DummyMetaclass,
+    EnvVarMixin as EnvVarMixin,
+    is_autogptq_available as is_autogptq_available,
+    is_bitsandbytes_available as is_bitsandbytes_available,
+    is_cpm_kernels_available as is_cpm_kernels_available,
+    is_datasets_available as is_datasets_available,
+    is_einops_available as is_einops_available,
+    is_fairscale_available as is_fairscale_available,
+    is_flax_available as is_flax_available,
+    is_grpc_available as is_grpc_available,
+    is_grpc_health_available as is_grpc_health_available,
+    is_jupyter_available as is_jupyter_available,
+    is_jupytext_available as is_jupytext_available,
+    is_notebook_available as is_notebook_available,
+    is_peft_available as is_peft_available,
+    is_sentencepiece_available as is_sentencepiece_available,
+    is_tf_available as is_tf_available,
+    is_torch_available as is_torch_available,
+    is_transformers_available as is_transformers_available,
+    is_transformers_supports_agent as is_transformers_supports_agent,
+    is_transformers_supports_kbit as is_transformers_supports_kbit,
+    is_triton_available as is_triton_available,
+    is_vllm_available as is_vllm_available,
+    is_xformers_available as is_xformers_available,
+    require_backends as require_backends,
   )
   from .representation import ReprMixin as ReprMixin
 __lazy = LazyModule(__name__, globals()['__file__'], _import_structure, extra_objects=_extras)
diff --git a/openllm-core/src/openllm_core/utils/analytics.py b/openllm-core/src/openllm_core/utils/analytics.py
index 55064f21..ca58b4ec 100644
--- a/openllm-core/src/openllm_core/utils/analytics.py
+++ b/openllm-core/src/openllm_core/utils/analytics.py
@@ -3,7 +3,17 @@
 Users can disable this with OPENLLM_DO_NOT_TRACK envvar.
 '''
 from __future__ import annotations
-import contextlib, functools, logging, os, re, typing as t, importlib.metadata, attr, openllm_core
+import contextlib
+import functools
+import importlib.metadata
+import logging
+import os
+import re
+import typing as t
+
+import attr
+
+import openllm_core
 from bentoml._internal.utils import analytics as _internal_analytics
 from openllm_core._typing_compat import ParamSpec
 P = ParamSpec('P')
diff --git a/openllm-core/src/openllm_core/utils/codegen.py b/openllm-core/src/openllm_core/utils/codegen.py
index 08155a1d..dabd3216 100644
--- a/openllm-core/src/openllm_core/utils/codegen.py
+++ b/openllm-core/src/openllm_core/utils/codegen.py
@@ -1,10 +1,16 @@
 from __future__ import annotations
-import functools, inspect, linecache, logging, types, typing as t, orjson
+import functools
+import inspect
+import linecache
+import logging
+import types
+import typing as t
 from operator import itemgetter
 
+import orjson
 if t.TYPE_CHECKING:
   import openllm_core
-  from openllm_core._typing_compat import LiteralString, AnyCallable, DictStrAny, ListStr
+  from openllm_core._typing_compat import AnyCallable, DictStrAny, ListStr, LiteralString
   PartialAny = functools.partial[t.Any]
 
 _T = t.TypeVar('_T', bound=t.Callable[..., t.Any])
diff --git a/openllm-core/src/openllm_core/utils/dantic.py b/openllm-core/src/openllm_core/utils/dantic.py
index fc25daf7..d6f5cb69 100644
--- a/openllm-core/src/openllm_core/utils/dantic.py
+++ b/openllm-core/src/openllm_core/utils/dantic.py
@@ -1,8 +1,17 @@
 '''An interface provides the best of pydantic and attrs.'''
 from __future__ import annotations
-import functools, importlib, os, sys, typing as t
+import functools
+import importlib
+import os
+import sys
+import typing as t
 from enum import Enum
-import attr, click, click_option_group as cog, inflection, orjson
+
+import attr
+import click
+import click_option_group as cog
+import inflection
+import orjson
 from click import ParamType, shell_completion as sc, types as click_types
 if t.TYPE_CHECKING: from attr import _ValidatorType
 AnyCallable = t.Callable[..., t.Any]
diff --git a/openllm-core/src/openllm_core/utils/import_utils.py b/openllm-core/src/openllm_core/utils/import_utils.py
index 73b72f83..d0dbe21d 100644
--- a/openllm-core/src/openllm_core/utils/import_utils.py
+++ b/openllm-core/src/openllm_core/utils/import_utils.py
@@ -1,12 +1,22 @@
 '''Some imports utils are vendorred from transformers/utils/import_utils.py for performance reasons.'''
 from __future__ import annotations
-import importlib, importlib.metadata, importlib.util, logging, os, abc, typing as t, openllm_core
+import abc
+import importlib
+import importlib.metadata
+import importlib.util
+import logging
+import os
+import typing as t
 from collections import OrderedDict
-import inflection, packaging.version
-from bentoml._internal.utils import LazyLoader, pkg
-from openllm_core._typing_compat import overload, LiteralString
-from .representation import ReprMixin
 
+import inflection
+import packaging.version
+
+import openllm_core
+from bentoml._internal.utils import LazyLoader, pkg
+from openllm_core._typing_compat import LiteralString, overload
+
+from .representation import ReprMixin
 if t.TYPE_CHECKING:
   BackendOrderedDict = OrderedDict[str, t.Tuple[t.Callable[[], bool], str]]
   from openllm_core._typing_compat import LiteralRuntime
diff --git a/openllm-core/src/openllm_core/utils/lazy.py b/openllm-core/src/openllm_core/utils/lazy.py
index d397bcfa..92b0aebd 100644
--- a/openllm-core/src/openllm_core/utils/lazy.py
+++ b/openllm-core/src/openllm_core/utils/lazy.py
@@ -1,5 +1,19 @@
 from __future__ import annotations
-import functools, importlib, importlib.machinery, importlib.metadata, importlib.util, itertools, os, time, types, warnings, typing as t, attr, openllm_core
+import functools
+import importlib
+import importlib.machinery
+import importlib.metadata
+import importlib.util
+import itertools
+import os
+import time
+import types
+import typing as t
+import warnings
+
+import attr
+
+import openllm_core
 __all__ = ['VersionInfo', 'LazyModule']
 # vendorred from attrs
 @functools.total_ordering
diff --git a/openllm-core/src/openllm_core/utils/representation.py b/openllm-core/src/openllm_core/utils/representation.py
index 4e9df45e..3b75b2d9 100644
--- a/openllm-core/src/openllm_core/utils/representation.py
+++ b/openllm-core/src/openllm_core/utils/representation.py
@@ -1,6 +1,10 @@
 from __future__ import annotations
+import typing as t
 from abc import abstractmethod
-import attr, orjson, typing as t
+
+import attr
+import orjson
+
 from openllm_core import utils
 if t.TYPE_CHECKING: from openllm_core._typing_compat import TypeAlias
 
diff --git a/openllm-python/src/openllm/__main__.py b/openllm-python/src/openllm/__main__.py
index 6754a4d4..e6ef9061 100644
--- a/openllm-python/src/openllm/__main__.py
+++ b/openllm-python/src/openllm/__main__.py
@@ -7,7 +7,6 @@ To start any OpenLLM model:
     openllm start <model_name> --options ...
 '''
 from __future__ import annotations
-
 if __name__ == '__main__':
   from openllm.cli.entrypoint import cli
   cli()
diff --git a/openllm-python/src/openllm/_embeddings.py b/openllm-python/src/openllm/_embeddings.py
index b25f95d2..720825f9 100644
--- a/openllm-python/src/openllm/_embeddings.py
+++ b/openllm-python/src/openllm/_embeddings.py
@@ -1,8 +1,13 @@
 # See https://github.com/bentoml/sentence-embedding-bento for more information.
 from __future__ import annotations
-import bentoml, openllm, transformers, typing as t
+import typing as t
+
+import transformers
 from huggingface_hub import snapshot_download
-from bentoml._internal.frameworks.transformers import MODULE_NAME, API_VERSION
+
+import bentoml
+import openllm
+from bentoml._internal.frameworks.transformers import API_VERSION, MODULE_NAME
 from bentoml._internal.models.model import ModelOptions, ModelSignature
 if t.TYPE_CHECKING: import torch
 
@@ -44,7 +49,8 @@ class GenericEmbeddingRunnable(bentoml.Runnable):
 
   @bentoml.Runnable.method(batchable=True, batch_dim=0)
   def encode(self, sentences: list[str]) -> t.Sequence[openllm.LLMEmbeddings]:
-    import torch, torch.nn.functional as F
+    import torch
+    import torch.nn.functional as F
     encoded_input = self.tokenizer(sentences, padding=True, truncation=True, return_tensors='pt').to(self.device)
     attention_mask = encoded_input['attention_mask']
     # Compute token embeddings
diff --git a/openllm-python/src/openllm/_generation.py b/openllm-python/src/openllm/_generation.py
index b69d3fc0..1c4e8fdc 100644
--- a/openllm-python/src/openllm/_generation.py
+++ b/openllm-python/src/openllm/_generation.py
@@ -1,6 +1,8 @@
 # mypy: disable-error-code="misc"
 from __future__ import annotations
-import typing as t, transformers
+import typing as t
+
+import transformers
 if t.TYPE_CHECKING: import torch, openllm
 
 # reexport from transformers
diff --git a/openllm-python/src/openllm/_llm.py b/openllm-python/src/openllm/_llm.py
index e8c6cfe8..21b0e2c5 100644
--- a/openllm-python/src/openllm/_llm.py
+++ b/openllm-python/src/openllm/_llm.py
@@ -1,18 +1,43 @@
 # mypy: disable-error-code="name-defined,attr-defined"
 from __future__ import annotations
-import functools, inspect, logging, os, re, traceback, types, typing as t, uuid, attr, fs.path, inflection, orjson, bentoml, openllm, openllm_core, gc, pathlib, abc
+import abc
+import functools
+import gc
+import inspect
+import logging
+import os
+import pathlib
+import re
+import traceback
+import types
+import typing as t
+import uuid
+
+import attr
+import fs.path
+import inflection
+import orjson
 from huggingface_hub import hf_hub_download
+
+import bentoml
+import openllm
+import openllm_core
 from bentoml._internal.models.model import ModelSignature
 from openllm_core._configuration import FineTuneConfig, LLMConfig, _object_getattribute, _setattr_class
 from openllm_core._schema import unmarshal_vllm_outputs
+from openllm_core._typing_compat import AdaptersMapping, AdaptersTuple, AdapterType, AnyCallable, DictStrAny, ListStr, LiteralRuntime, LiteralString, LLMEmbeddings, LLMRunnable, LLMRunner, M, ModelSignatureDict as _ModelSignatureDict, NotRequired, PeftAdapterOutput, T, TupleAny, overload
 from openllm_core.utils import DEBUG, ENV_VARS_TRUE_VALUES, MYPY, EnvVarMixin, LazyLoader, ReprMixin, apply, bentoml_cattr, codegen, device_count, first_not_none, generate_hash_from_file, is_peft_available, is_torch_available, non_intrusive_setattr, normalize_attrs_to_model_tokenizer_pair, resolve_filepath, validate_is_path
+
 from ._quantisation import infer_quantisation_config
 from .exceptions import ForbiddenAttributeError, GpuNotAvailableError, OpenLLMException
 from .utils import infer_auto_class
-from openllm_core._typing_compat import AdaptersMapping, AdaptersTuple, AnyCallable, AdapterType, LiteralRuntime, DictStrAny, ListStr, LLMEmbeddings, LLMRunnable, LLMRunner, ModelSignatureDict as _ModelSignatureDict, PeftAdapterOutput, TupleAny, NotRequired, overload, M, T, LiteralString
-
 if t.TYPE_CHECKING:
-  import auto_gptq as autogptq, peft, torch, transformers, vllm
+  import auto_gptq as autogptq
+  import peft
+  import torch
+  import transformers
+  import vllm
+
   from openllm_core._configuration import PeftType
   from openllm_core.utils.representation import ReprArgs
 else:
@@ -1001,7 +1026,7 @@ class LLM(LLMInterface[M, T], ReprMixin):
   ) -> t.Iterator[t.Any]:
     # NOTE: encoder-decoder models will need to implement their own generate_iterator for now
     # inspired from fastchat's generate_stream_func
-    from ._generation import prepare_logits_processor, get_context_length, is_partial_stop
+    from ._generation import get_context_length, is_partial_stop, prepare_logits_processor
 
     len_prompt = len(prompt)
     if stop_token_ids is None: stop_token_ids = []
diff --git a/openllm-python/src/openllm/_quantisation.py b/openllm-python/src/openllm/_quantisation.py
index 36fcdb2b..da02f3ac 100644
--- a/openllm-python/src/openllm/_quantisation.py
+++ b/openllm-python/src/openllm/_quantisation.py
@@ -1,11 +1,14 @@
 # mypy: disable-error-code="name-defined,no-redef"
 from __future__ import annotations
-import logging, typing as t
-from openllm_core.utils import LazyLoader, is_autogptq_available, is_bitsandbytes_available, is_transformers_supports_kbit, pkg
+import logging
+import typing as t
+
 from openllm_core._typing_compat import overload
+from openllm_core.utils import LazyLoader, is_autogptq_available, is_bitsandbytes_available, is_transformers_supports_kbit, pkg
 if t.TYPE_CHECKING:
-  from ._llm import LLM
   from openllm_core._typing_compat import DictStrAny
+
+  from ._llm import LLM
 autogptq, torch, transformers = LazyLoader('autogptq', globals(), 'auto_gptq'), LazyLoader('torch', globals(), 'torch'), LazyLoader('transformers', globals(), 'transformers')
 
 logger = logging.getLogger(__name__)
diff --git a/openllm-python/src/openllm/_service.py b/openllm-python/src/openllm/_service.py
index 6c52e6a2..611a04be 100644
--- a/openllm-python/src/openllm/_service.py
+++ b/openllm-python/src/openllm/_service.py
@@ -1,14 +1,23 @@
 # mypy: disable-error-code="call-arg,misc,attr-defined,type-abstract,type-arg,valid-type,arg-type"
 from __future__ import annotations
-import os, warnings, orjson, bentoml, openllm, openllm_core, typing as t
+import os
+import typing as t
+import warnings
+
+import orjson
 from starlette.applications import Starlette
 from starlette.responses import JSONResponse
 from starlette.routing import Route
+
+import bentoml
+import openllm
+import openllm_core
 if t.TYPE_CHECKING:
-  from openllm_core._typing_compat import TypeAlias
   from starlette.requests import Request
   from starlette.responses import Response
-  from bentoml._internal.runner.runner import RunnerMethod, AbstractRunner
+
+  from bentoml._internal.runner.runner import AbstractRunner, RunnerMethod
+  from openllm_core._typing_compat import TypeAlias
   _EmbeddingMethod: TypeAlias = RunnerMethod[t.Union[bentoml.Runnable, openllm.LLMRunnable[t.Any, t.Any]], [t.List[str]], t.Sequence[openllm.LLMEmbeddings]]
 # The following warnings from bitsandbytes, and probably not that important for users to see
 warnings.filterwarnings('ignore', message='MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization')
diff --git a/openllm-python/src/openllm/bundle/__init__.py b/openllm-python/src/openllm/bundle/__init__.py
index d88e2461..885cfa29 100644
--- a/openllm-python/src/openllm/bundle/__init__.py
+++ b/openllm-python/src/openllm/bundle/__init__.py
@@ -3,7 +3,9 @@
 These utilities will stay internal, and its API can be changed or updated without backward-compatibility.
 """
 from __future__ import annotations
-import os, typing as t
+import os
+import typing as t
+
 from openllm_core.utils import LazyModule
 _import_structure: dict[str, list[str]] = {
     '_package': ['create_bento', 'build_editable', 'construct_python_options', 'construct_docker_options'],
diff --git a/openllm-python/src/openllm/bundle/_package.py b/openllm-python/src/openllm/bundle/_package.py
index d13fa9a4..fcfe317a 100644
--- a/openllm-python/src/openllm/bundle/_package.py
+++ b/openllm-python/src/openllm/bundle/_package.py
@@ -1,18 +1,32 @@
 # mypy: disable-error-code="misc"
 from __future__ import annotations
-import fs, fs.copy, fs.errors, orjson, bentoml, openllm_core, importlib.metadata, inspect, logging, os, typing as t, string
+import importlib.metadata
+import inspect
+import logging
+import os
+import string
+import typing as t
 from pathlib import Path
+
+import fs
+import fs.copy
+import fs.errors
+import orjson
 from simple_di import Provide, inject
+
+import bentoml
+import openllm_core
 from bentoml._internal.bento.build_config import BentoBuildConfig, DockerOptions, ModelSpec, PythonOptions
 from bentoml._internal.configuration.containers import BentoMLContainer
-from . import oci
 
+from . import oci
 if t.TYPE_CHECKING:
-  import openllm
   from fs.base import FS
-  from openllm_core._typing_compat import LiteralString, LiteralContainerRegistry, LiteralContainerVersionStrategy
+
+  import openllm
   from bentoml._internal.bento import BentoStore
   from bentoml._internal.models.model import ModelStore
+  from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy, LiteralString
 logger = logging.getLogger(__name__)
 
 OPENLLM_DEV_BUILD = 'OPENLLM_DEV_BUILD'
diff --git a/openllm-python/src/openllm/bundle/oci/__init__.py b/openllm-python/src/openllm/bundle/oci/__init__.py
index 5b40af8b..bcf91a2d 100644
--- a/openllm-python/src/openllm/bundle/oci/__init__.py
+++ b/openllm-python/src/openllm/bundle/oci/__init__.py
@@ -1,15 +1,27 @@
 # mypy: disable-error-code="misc"
 '''OCI-related utilities for OpenLLM. This module is considered to be internal and API are subjected to change.'''
 from __future__ import annotations
-import functools, importlib, logging, os, pathlib, shutil, subprocess, typing as t, openllm_core
+import functools
+import importlib
+import logging
+import os
+import pathlib
+import shutil
+import subprocess
+import typing as t
 from datetime import datetime, timedelta, timezone
-import attr, orjson, bentoml, openllm
-from openllm_core.utils.lazy import VersionInfo
 
+import attr
+import orjson
+
+import bentoml
+import openllm
+import openllm_core
+from openllm_core.utils.lazy import VersionInfo
 if t.TYPE_CHECKING:
-  from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy
   from ghapi import all
-  from openllm_core._typing_compat import RefTuple, LiteralString
+
+  from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy, LiteralString, RefTuple
 all = openllm_core.utils.LazyLoader('all', globals(), 'ghapi.all')  # noqa: F811
 
 logger = logging.getLogger(__name__)
diff --git a/openllm-python/src/openllm/cli/_factory.py b/openllm-python/src/openllm/cli/_factory.py
index 66684838..729e8992 100644
--- a/openllm-python/src/openllm/cli/_factory.py
+++ b/openllm-python/src/openllm/cli/_factory.py
@@ -1,15 +1,28 @@
 from __future__ import annotations
-import functools, importlib.util, os, typing as t, logging, click, click_option_group as cog, inflection, orjson, bentoml, openllm
-from click import shell_completion as sc
-from bentoml_cli.utils import BentoMLCommandGroup
-from click.shell_completion import CompletionItem
-from openllm_core.utils import DEBUG
-from bentoml._internal.configuration.containers import BentoMLContainer
-from openllm_core._typing_compat import LiteralString, DictStrAny, ParamSpec, Concatenate
-from . import termui
+import functools
+import importlib.util
+import logging
+import os
+import typing as t
 
+import click
+import click_option_group as cog
+import inflection
+import orjson
+from bentoml_cli.utils import BentoMLCommandGroup
+from click import shell_completion as sc
+from click.shell_completion import CompletionItem
+
+import bentoml
+import openllm
+from bentoml._internal.configuration.containers import BentoMLContainer
+from openllm_core._typing_compat import Concatenate, DictStrAny, LiteralString, ParamSpec
+from openllm_core.utils import DEBUG
+
+from . import termui
 if t.TYPE_CHECKING:
   import subprocess
+
   from openllm_core._configuration import LLMConfig
 logger = logging.getLogger(__name__)
 
diff --git a/openllm-python/src/openllm/cli/_sdk.py b/openllm-python/src/openllm/cli/_sdk.py
index 8f300d31..d32cbc67 100644
--- a/openllm-python/src/openllm/cli/_sdk.py
+++ b/openllm-python/src/openllm/cli/_sdk.py
@@ -1,15 +1,26 @@
 from __future__ import annotations
-import itertools, logging, os, re, subprocess, sys, typing as t, bentoml, openllm, openllm_core
+import itertools
+import logging
+import os
+import re
+import subprocess
+import sys
+import typing as t
+
 from simple_di import Provide, inject
+
+import bentoml
+import openllm
+import openllm_core
 from bentoml._internal.configuration.containers import BentoMLContainer
 from openllm.exceptions import OpenLLMException
+
 from . import termui
 from ._factory import start_command_factory
-
 if t.TYPE_CHECKING:
-  from openllm_core._configuration import LLMConfig
-  from openllm_core._typing_compat import LiteralString, LiteralRuntime, LiteralContainerRegistry, LiteralContainerVersionStrategy
   from bentoml._internal.bento import BentoStore
+  from openllm_core._configuration import LLMConfig
+  from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy, LiteralRuntime, LiteralString
 logger = logging.getLogger(__name__)
 def _start(
     model_name: str,
diff --git a/openllm-python/src/openllm/cli/entrypoint.py b/openllm-python/src/openllm/cli/entrypoint.py
index f189eb89..53302034 100644
--- a/openllm-python/src/openllm/cli/entrypoint.py
+++ b/openllm-python/src/openllm/cli/entrypoint.py
@@ -20,22 +20,47 @@ bentomodel = openllm.import_model("falcon", model_id='tiiuae/falcon-7b-instruct'
 ```
 """
 from __future__ import annotations
-import functools, http.client, inspect, itertools, logging, os, platform, re, subprocess, sys, time, traceback, typing as t, attr, click, click_option_group as cog, fs, fs.copy, fs.errors, inflection, orjson, bentoml, openllm
-from simple_di import Provide, inject
+import functools
+import http.client
+import inspect
+import itertools
+import logging
+import os
+import platform
+import re
+import subprocess
+import sys
+import time
+import traceback
+import typing as t
+
+import attr
+import click
+import click_option_group as cog
+import fs
+import fs.copy
+import fs.errors
+import inflection
+import orjson
 from bentoml_cli.utils import BentoMLCommandGroup, opt_callback
+from simple_di import Provide, inject
+
+import bentoml
+import openllm
 from bentoml._internal.configuration.containers import BentoMLContainer
 from bentoml._internal.models.model import ModelStore
-from . import termui
-from ._factory import FC, LiteralOutput, _AnyCallable, bettertransformer_option, container_registry_option, fast_option, machine_option, model_id_option, model_name_argument, model_version_option, output_option, parse_device_callback, quantize_option, serialisation_option, start_command_factory, workers_per_resource_option
 from openllm import bundle, serialisation
 from openllm.exceptions import OpenLLMException
 from openllm.models.auto import CONFIG_MAPPING, MODEL_FLAX_MAPPING_NAMES, MODEL_MAPPING_NAMES, MODEL_TF_MAPPING_NAMES, MODEL_VLLM_MAPPING_NAMES, AutoConfig, AutoLLM
-from openllm_core._typing_compat import DictStrAny, ParamSpec, Concatenate, LiteralString, Self, LiteralRuntime
-from openllm_core.utils import DEBUG, DEBUG_ENV_VAR, OPTIONAL_DEPENDENCIES, QUIET_ENV_VAR, EnvVarMixin, LazyLoader, analytics, bentoml_cattr, compose, configure_logging, dantic, first_not_none, get_debug_mode, get_quiet_mode, is_torch_available, is_transformers_supports_agent, resolve_user_filepath, set_debug_mode, set_quiet_mode
 from openllm.utils import infer_auto_class
+from openllm_core._typing_compat import Concatenate, DictStrAny, LiteralRuntime, LiteralString, ParamSpec, Self
+from openllm_core.utils import DEBUG, DEBUG_ENV_VAR, OPTIONAL_DEPENDENCIES, QUIET_ENV_VAR, EnvVarMixin, LazyLoader, analytics, bentoml_cattr, compose, configure_logging, dantic, first_not_none, get_debug_mode, get_quiet_mode, is_torch_available, is_transformers_supports_agent, resolve_user_filepath, set_debug_mode, set_quiet_mode
 
+from . import termui
+from ._factory import FC, LiteralOutput, _AnyCallable, bettertransformer_option, container_registry_option, fast_option, machine_option, model_id_option, model_name_argument, model_version_option, output_option, parse_device_callback, quantize_option, serialisation_option, start_command_factory, workers_per_resource_option
 if t.TYPE_CHECKING:
   import torch
+
   from bentoml._internal.bento import BentoStore
   from bentoml._internal.container import DefaultBuilder
   from openllm_core._schema import EmbeddingsOutput
diff --git a/openllm-python/src/openllm/cli/extension/build_base_container.py b/openllm-python/src/openllm/cli/extension/build_base_container.py
index 15d7cd80..c2a9af60 100644
--- a/openllm-python/src/openllm/cli/extension/build_base_container.py
+++ b/openllm-python/src/openllm/cli/extension/build_base_container.py
@@ -1,7 +1,12 @@
 from __future__ import annotations
-import typing as t, click, orjson, openllm
+import typing as t
+
+import click
+import orjson
+
+import openllm
 from openllm.cli import termui
-from openllm.cli._factory import machine_option, container_registry_option
+from openllm.cli._factory import container_registry_option, machine_option
 if t.TYPE_CHECKING: from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy
 @click.command(
     'build_base_container',
diff --git a/openllm-python/src/openllm/cli/extension/dive_bentos.py b/openllm-python/src/openllm/cli/extension/dive_bentos.py
index 2921299b..3cf4ea31 100644
--- a/openllm-python/src/openllm/cli/extension/dive_bentos.py
+++ b/openllm-python/src/openllm/cli/extension/dive_bentos.py
@@ -1,11 +1,16 @@
 from __future__ import annotations
-import shutil, subprocess, typing as t, click, psutil, bentoml
-from simple_di import Provide, inject
-from bentoml._internal.configuration.containers import BentoMLContainer
+import shutil
+import subprocess
+import typing as t
 
+import click
+import psutil
+from simple_di import Provide, inject
+
+import bentoml
+from bentoml._internal.configuration.containers import BentoMLContainer
 from openllm.cli import termui
 from openllm.cli._factory import bento_complete_envvar, machine_option
-
 if t.TYPE_CHECKING: from bentoml._internal.bento import BentoStore
 @click.command('dive_bentos', context_settings=termui.CONTEXT_SETTINGS)
 @click.argument('bento', type=str, shell_complete=bento_complete_envvar)
diff --git a/openllm-python/src/openllm/cli/extension/get_containerfile.py b/openllm-python/src/openllm/cli/extension/get_containerfile.py
index 79a2c2b7..d6683844 100644
--- a/openllm-python/src/openllm/cli/extension/get_containerfile.py
+++ b/openllm-python/src/openllm/cli/extension/get_containerfile.py
@@ -1,6 +1,10 @@
 from __future__ import annotations
-import typing as t, click, bentoml
+import typing as t
+
+import click
 from simple_di import Provide, inject
+
+import bentoml
 from bentoml._internal.bento.bento import BentoInfo
 from bentoml._internal.bento.build_config import DockerOptions
 from bentoml._internal.configuration.containers import BentoMLContainer
@@ -8,7 +12,6 @@ from bentoml._internal.container.generate import generate_containerfile
 from openllm.cli import termui
 from openllm.cli._factory import bento_complete_envvar
 from openllm_core.utils import bentoml_cattr
-
 if t.TYPE_CHECKING: from bentoml._internal.bento import BentoStore
 @click.command('get_containerfile', context_settings=termui.CONTEXT_SETTINGS, help='Return Containerfile of any given Bento.')
 @click.argument('bento', type=str, shell_complete=bento_complete_envvar)
diff --git a/openllm-python/src/openllm/cli/extension/get_prompt.py b/openllm-python/src/openllm/cli/extension/get_prompt.py
index 4e0a1d18..995d8ac1 100644
--- a/openllm-python/src/openllm/cli/extension/get_prompt.py
+++ b/openllm-python/src/openllm/cli/extension/get_prompt.py
@@ -1,8 +1,14 @@
 from __future__ import annotations
-import typing as t, click, inflection, orjson, openllm
+import typing as t
+
+import click
+import inflection
+import orjson
 from bentoml_cli.utils import opt_callback
+
+import openllm
 from openllm.cli import termui
-from openllm.cli._factory import model_complete_envvar, output_option, machine_option
+from openllm.cli._factory import machine_option, model_complete_envvar, output_option
 from openllm_core._prompt import process_prompt
 LiteralOutput = t.Literal['json', 'pretty', 'porcelain']
 @click.command('get_prompt', context_settings=termui.CONTEXT_SETTINGS)
diff --git a/openllm-python/src/openllm/cli/extension/list_bentos.py b/openllm-python/src/openllm/cli/extension/list_bentos.py
index 845ea0cb..fe8c832c 100644
--- a/openllm-python/src/openllm/cli/extension/list_bentos.py
+++ b/openllm-python/src/openllm/cli/extension/list_bentos.py
@@ -1,5 +1,11 @@
 from __future__ import annotations
-import click, inflection, orjson, bentoml, openllm
+
+import click
+import inflection
+import orjson
+
+import bentoml
+import openllm
 from bentoml._internal.utils import human_readable_size
 from openllm.cli import termui
 from openllm.cli._factory import LiteralOutput, output_option
diff --git a/openllm-python/src/openllm/cli/extension/list_models.py b/openllm-python/src/openllm/cli/extension/list_models.py
index e0325d0b..b5ab145a 100644
--- a/openllm-python/src/openllm/cli/extension/list_models.py
+++ b/openllm-python/src/openllm/cli/extension/list_models.py
@@ -1,9 +1,15 @@
 from __future__ import annotations
-import typing as t, bentoml, openllm, orjson, inflection, click
-from openllm.cli import termui
-from bentoml._internal.utils import human_readable_size
-from openllm.cli._factory import LiteralOutput, model_name_argument, output_option, model_complete_envvar
+import typing as t
 
+import click
+import inflection
+import orjson
+
+import bentoml
+import openllm
+from bentoml._internal.utils import human_readable_size
+from openllm.cli import termui
+from openllm.cli._factory import LiteralOutput, model_complete_envvar, model_name_argument, output_option
 if t.TYPE_CHECKING: from openllm_core._typing_compat import DictStrAny
 @click.command('list_models', context_settings=termui.CONTEXT_SETTINGS)
 @model_name_argument(required=False, shell_complete=model_complete_envvar)
diff --git a/openllm-python/src/openllm/cli/extension/playground.py b/openllm-python/src/openllm/cli/extension/playground.py
index 1d2d3766..afccb5f0 100644
--- a/openllm-python/src/openllm/cli/extension/playground.py
+++ b/openllm-python/src/openllm/cli/extension/playground.py
@@ -1,11 +1,23 @@
 from __future__ import annotations
-import importlib.machinery, logging, os, pkgutil, subprocess, sys, tempfile, typing as t, click, yaml
-from openllm.cli import termui
-from openllm import playground
-from openllm_core.utils import is_jupyter_available, is_jupytext_available, is_notebook_available
+import importlib.machinery
+import logging
+import os
+import pkgutil
+import subprocess
+import sys
+import tempfile
+import typing as t
 
+import click
+import yaml
+
+from openllm import playground
+from openllm.cli import termui
+from openllm_core.utils import is_jupyter_available, is_jupytext_available, is_notebook_available
 if t.TYPE_CHECKING:
-  import jupytext, nbformat
+  import jupytext
+  import nbformat
+
   from openllm_core._typing_compat import DictStrAny
 logger = logging.getLogger(__name__)
 def load_notebook_metadata() -> DictStrAny:
diff --git a/openllm-python/src/openllm/cli/termui.py b/openllm-python/src/openllm/cli/termui.py
index 5cf9a4f0..0ef9891e 100644
--- a/openllm-python/src/openllm/cli/termui.py
+++ b/openllm-python/src/openllm/cli/termui.py
@@ -1,5 +1,11 @@
 from __future__ import annotations
-import os, typing as t, click, inflection, openllm
+import os
+import typing as t
+
+import click
+import inflection
+
+import openllm
 if t.TYPE_CHECKING: from openllm_core._typing_compat import DictStrAny
 def echo(text: t.Any, fg: str = 'green', _with_style: bool = True, **attrs: t.Any) -> None:
   attrs['fg'] = fg if not openllm.utils.get_debug_mode() else None
diff --git a/openllm-python/src/openllm/client.py b/openllm-python/src/openllm/client.py
index 880cda0c..ee428f92 100644
--- a/openllm-python/src/openllm/client.py
+++ b/openllm-python/src/openllm/client.py
@@ -11,7 +11,9 @@ client.embed("What is the difference between gather and scatter?")
 ```
 '''
 from __future__ import annotations
-import openllm_client, typing as t
+import typing as t
+
+import openllm_client
 if t.TYPE_CHECKING:  from openllm_client import AsyncHTTPClient as AsyncHTTPClient, BaseAsyncClient as BaseAsyncClient, BaseClient as BaseClient, HTTPClient as HTTPClient, GrpcClient as GrpcClient, AsyncGrpcClient as AsyncGrpcClient
 def __dir__() -> t.Sequence[str]:
   return sorted(dir(openllm_client))
diff --git a/openllm-python/src/openllm/exceptions.py b/openllm-python/src/openllm/exceptions.py
index baa23671..4ef8598c 100644
--- a/openllm-python/src/openllm/exceptions.py
+++ b/openllm-python/src/openllm/exceptions.py
@@ -1,3 +1,4 @@
 '''Base exceptions for OpenLLM. This extends BentoML exceptions.'''
 from __future__ import annotations
-from openllm_core.exceptions import OpenLLMException as OpenLLMException, GpuNotAvailableError as GpuNotAvailableError, ValidationError as ValidationError, ForbiddenAttributeError as ForbiddenAttributeError, MissingAnnotationAttributeError as MissingAnnotationAttributeError, MissingDependencyError as MissingDependencyError, Error as Error, FineTuneStrategyNotSupportedError as FineTuneStrategyNotSupportedError
+
+from openllm_core.exceptions import Error as Error, FineTuneStrategyNotSupportedError as FineTuneStrategyNotSupportedError, ForbiddenAttributeError as ForbiddenAttributeError, GpuNotAvailableError as GpuNotAvailableError, MissingAnnotationAttributeError as MissingAnnotationAttributeError, MissingDependencyError as MissingDependencyError, OpenLLMException as OpenLLMException, ValidationError as ValidationError
diff --git a/openllm-python/src/openllm/models/auto/__init__.py b/openllm-python/src/openllm/models/auto/__init__.py
index 22e3e234..30543e30 100644
--- a/openllm-python/src/openllm/models/auto/__init__.py
+++ b/openllm-python/src/openllm/models/auto/__init__.py
@@ -1,8 +1,10 @@
 from __future__ import annotations
-import typing as t, os
+import os
+import typing as t
+
 import openllm
+from openllm_core.config import CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES, AutoConfig as AutoConfig
 from openllm_core.utils import LazyModule, is_flax_available, is_tf_available, is_torch_available, is_vllm_available
-from openllm_core.config import AutoConfig as AutoConfig, CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES
 _import_structure: dict[str, list[str]] = {
     'modeling_auto': ['MODEL_MAPPING_NAMES'],
     'modeling_flax_auto': ['MODEL_FLAX_MAPPING_NAMES'],
diff --git a/openllm-python/src/openllm/models/auto/factory.py b/openllm-python/src/openllm/models/auto/factory.py
index 2b07d88b..e7a45f1e 100644
--- a/openllm-python/src/openllm/models/auto/factory.py
+++ b/openllm-python/src/openllm/models/auto/factory.py
@@ -1,16 +1,22 @@
 # mypy: disable-error-code="type-arg"
 from __future__ import annotations
-import importlib, inspect, logging, typing as t
+import importlib
+import inspect
+import logging
+import typing as t
 from collections import OrderedDict
-import inflection, openllm
-from openllm_core.utils import ReprMixin
 
+import inflection
+
+import openllm
+from openllm_core.utils import ReprMixin
 if t.TYPE_CHECKING:
-  from openllm_core._typing_compat import LiteralString, LLMRunner
   import types
   from collections import _odict_items, _odict_keys, _odict_values
 
   from _typeshed import SupportsIter
+
+  from openllm_core._typing_compat import LiteralString, LLMRunner
   ConfigModelKeysView = _odict_keys[type[openllm.LLMConfig], type[openllm.LLM[t.Any, t.Any]]]
   ConfigModelValuesView = _odict_values[type[openllm.LLMConfig], type[openllm.LLM[t.Any, t.Any]]]
   ConfigModelItemsView = _odict_items[type[openllm.LLMConfig], type[openllm.LLM[t.Any, t.Any]]]
diff --git a/openllm-python/src/openllm/models/auto/modeling_auto.py b/openllm-python/src/openllm/models/auto/modeling_auto.py
index fef81fb1..7740aba2 100644
--- a/openllm-python/src/openllm/models/auto/modeling_auto.py
+++ b/openllm-python/src/openllm/models/auto/modeling_auto.py
@@ -1,8 +1,10 @@
 from __future__ import annotations
 import typing as t
 from collections import OrderedDict
-from .factory import BaseAutoLLMClass, _LazyAutoMapping
+
 from openllm_core.config import CONFIG_MAPPING_NAMES
+
+from .factory import BaseAutoLLMClass, _LazyAutoMapping
 MODEL_MAPPING_NAMES = OrderedDict([('chatglm', 'ChatGLM'), ('dolly_v2', 'DollyV2'), ('falcon', 'Falcon'), ('flan_t5', 'FlanT5'), ('gpt_neox', 'GPTNeoX'), ('llama', 'Llama'), ('mpt', 'MPT'), (
     'opt', 'OPT'
 ), ('stablelm', 'StableLM'), ('starcoder', 'StarCoder'), ('baichuan', 'Baichuan')])
diff --git a/openllm-python/src/openllm/models/auto/modeling_flax_auto.py b/openllm-python/src/openllm/models/auto/modeling_flax_auto.py
index 81dac4ce..0341aea3 100644
--- a/openllm-python/src/openllm/models/auto/modeling_flax_auto.py
+++ b/openllm-python/src/openllm/models/auto/modeling_flax_auto.py
@@ -1,8 +1,10 @@
 from __future__ import annotations
 import typing as t
 from collections import OrderedDict
-from .factory import BaseAutoLLMClass, _LazyAutoMapping
+
 from openllm_core.config import CONFIG_MAPPING_NAMES
+
+from .factory import BaseAutoLLMClass, _LazyAutoMapping
 MODEL_FLAX_MAPPING_NAMES = OrderedDict([('flan_t5', 'FlaxFlanT5'), ('opt', 'FlaxOPT')])
 MODEL_FLAX_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FLAX_MAPPING_NAMES)
 class AutoFlaxLLM(BaseAutoLLMClass):
diff --git a/openllm-python/src/openllm/models/auto/modeling_tf_auto.py b/openllm-python/src/openllm/models/auto/modeling_tf_auto.py
index ab3974e6..c1b92529 100644
--- a/openllm-python/src/openllm/models/auto/modeling_tf_auto.py
+++ b/openllm-python/src/openllm/models/auto/modeling_tf_auto.py
@@ -1,8 +1,10 @@
 from __future__ import annotations
 import typing as t
 from collections import OrderedDict
-from .factory import BaseAutoLLMClass, _LazyAutoMapping
+
 from openllm_core.config import CONFIG_MAPPING_NAMES
+
+from .factory import BaseAutoLLMClass, _LazyAutoMapping
 MODEL_TF_MAPPING_NAMES = OrderedDict([('flan_t5', 'TFFlanT5'), ('opt', 'TFOPT')])
 MODEL_TF_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_TF_MAPPING_NAMES)
 class AutoTFLLM(BaseAutoLLMClass):
diff --git a/openllm-python/src/openllm/models/auto/modeling_vllm_auto.py b/openllm-python/src/openllm/models/auto/modeling_vllm_auto.py
index bae7ce8b..37c7310a 100644
--- a/openllm-python/src/openllm/models/auto/modeling_vllm_auto.py
+++ b/openllm-python/src/openllm/models/auto/modeling_vllm_auto.py
@@ -1,8 +1,10 @@
 from __future__ import annotations
 import typing as t
 from collections import OrderedDict
-from .factory import BaseAutoLLMClass, _LazyAutoMapping
+
 from openllm_core.config import CONFIG_MAPPING_NAMES
+
+from .factory import BaseAutoLLMClass, _LazyAutoMapping
 MODEL_VLLM_MAPPING_NAMES = OrderedDict([('baichuan', 'VLLMBaichuan'), ('dolly_v2', 'VLLMDollyV2'), ('falcon', 'VLLMFalcon'), ('gpt_neox', 'VLLMGPTNeoX'), ('mpt', 'VLLMMPT'), (
     'opt', 'VLLMOPT'
 ), ('stablelm', 'VLLMStableLM'), ('starcoder', 'VLLMStarCoder'), ('llama', 'VLLMLlama')])
diff --git a/openllm-python/src/openllm/models/baichuan/__init__.py b/openllm-python/src/openllm/models/baichuan/__init__.py
index 000b83e5..3c9c2cfb 100644
--- a/openllm-python/src/openllm/models/baichuan/__init__.py
+++ b/openllm-python/src/openllm/models/baichuan/__init__.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import sys, typing as t
+import sys
+import typing as t
+
 from openllm.exceptions import MissingDependencyError
 from openllm.utils import LazyModule, is_cpm_kernels_available, is_torch_available, is_vllm_available
 from openllm_core.config.configuration_baichuan import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_BAICHUAN_COMMAND_DOCSTRING as START_BAICHUAN_COMMAND_DOCSTRING, BaichuanConfig as BaichuanConfig
diff --git a/openllm-python/src/openllm/models/baichuan/modeling_baichuan.py b/openllm-python/src/openllm/models/baichuan/modeling_baichuan.py
index e59bbeec..d349a658 100644
--- a/openllm-python/src/openllm/models/baichuan/modeling_baichuan.py
+++ b/openllm-python/src/openllm/models/baichuan/modeling_baichuan.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import transformers
 class Baichuan(openllm.LLM['transformers.PreTrainedModel', 'transformers.PreTrainedTokenizerBase']):
   __openllm_internal__ = True
diff --git a/openllm-python/src/openllm/models/baichuan/modeling_vllm_baichuan.py b/openllm-python/src/openllm/models/baichuan/modeling_vllm_baichuan.py
index 4ec01913..e9c5a134 100644
--- a/openllm-python/src/openllm/models/baichuan/modeling_vllm_baichuan.py
+++ b/openllm-python/src/openllm/models/baichuan/modeling_vllm_baichuan.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import vllm, transformers
 class VLLMBaichuan(openllm.LLM['vllm.LLMEngine', 'transformers.PreTrainedTokenizerBase']):
   __openllm_internal__ = True
diff --git a/openllm-python/src/openllm/models/chatglm/__init__.py b/openllm-python/src/openllm/models/chatglm/__init__.py
index 1a756943..9bbe737f 100644
--- a/openllm-python/src/openllm/models/chatglm/__init__.py
+++ b/openllm-python/src/openllm/models/chatglm/__init__.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import sys, typing as t
+import sys
+import typing as t
+
 from openllm.exceptions import MissingDependencyError
 from openllm.utils import LazyModule, is_cpm_kernels_available, is_torch_available
 from openllm_core.config.configuration_chatglm import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_CHATGLM_COMMAND_DOCSTRING as START_CHATGLM_COMMAND_DOCSTRING, ChatGLMConfig as ChatGLMConfig
diff --git a/openllm-python/src/openllm/models/chatglm/modeling_chatglm.py b/openllm-python/src/openllm/models/chatglm/modeling_chatglm.py
index cce724c8..e76ef17f 100644
--- a/openllm-python/src/openllm/models/chatglm/modeling_chatglm.py
+++ b/openllm-python/src/openllm/models/chatglm/modeling_chatglm.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import transformers
 class ChatGLM(openllm.LLM['transformers.PreTrainedModel', 'transformers.PreTrainedTokenizerFast']):
   __openllm_internal__ = True
@@ -13,7 +15,8 @@ class ChatGLM(openllm.LLM['transformers.PreTrainedModel', 'transformers.PreTrain
       return self.model.chat(self.tokenizer, prompt, generation_config=self.config.model_construct_env(**attrs).to_generation_config())
 
   def embeddings(self, prompts: list[str]) -> openllm.LLMEmbeddings:
-    import torch, torch.nn.functional as F
+    import torch
+    import torch.nn.functional as F
     embeddings: list[list[float]] = []
     num_tokens = 0
     for prompt in prompts:
diff --git a/openllm-python/src/openllm/models/dolly_v2/__init__.py b/openllm-python/src/openllm/models/dolly_v2/__init__.py
index 33aa4d02..44ec0f71 100644
--- a/openllm-python/src/openllm/models/dolly_v2/__init__.py
+++ b/openllm-python/src/openllm/models/dolly_v2/__init__.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import sys, typing as t
+import sys
+import typing as t
+
 from openllm.exceptions import MissingDependencyError
 from openllm.utils import LazyModule, is_torch_available, is_vllm_available
 from openllm_core.config.configuration_dolly_v2 import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_DOLLY_V2_COMMAND_DOCSTRING as START_DOLLY_V2_COMMAND_DOCSTRING, DollyV2Config as DollyV2Config
diff --git a/openllm-python/src/openllm/models/dolly_v2/modeling_dolly_v2.py b/openllm-python/src/openllm/models/dolly_v2/modeling_dolly_v2.py
index eca959f9..d930c85b 100644
--- a/openllm-python/src/openllm/models/dolly_v2/modeling_dolly_v2.py
+++ b/openllm-python/src/openllm/models/dolly_v2/modeling_dolly_v2.py
@@ -1,8 +1,11 @@
 from __future__ import annotations
-import logging, re, typing as t, openllm
+import logging
+import re
+import typing as t
+
+import openllm
 from openllm_core._typing_compat import overload
 from openllm_core.config.configuration_dolly_v2 import DEFAULT_PROMPT_TEMPLATE, END_KEY, RESPONSE_KEY, get_special_token_id
-
 if t.TYPE_CHECKING: import torch, transformers, tensorflow as tf
 else:  torch, transformers, tf = openllm.utils.LazyLoader('torch', globals(), 'torch'), openllm.utils.LazyLoader('transformers', globals(), 'transformers'), openllm.utils.LazyLoader('tf', globals(), 'tensorflow')
 logger = logging.getLogger(__name__)
diff --git a/openllm-python/src/openllm/models/dolly_v2/modeling_vllm_dolly_v2.py b/openllm-python/src/openllm/models/dolly_v2/modeling_vllm_dolly_v2.py
index d6349380..f22f79d6 100644
--- a/openllm-python/src/openllm/models/dolly_v2/modeling_vllm_dolly_v2.py
+++ b/openllm-python/src/openllm/models/dolly_v2/modeling_vllm_dolly_v2.py
@@ -1,5 +1,8 @@
 from __future__ import annotations
-import logging, typing as t, openllm
+import logging
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import vllm, transformers
 
 logger = logging.getLogger(__name__)
diff --git a/openllm-python/src/openllm/models/falcon/__init__.py b/openllm-python/src/openllm/models/falcon/__init__.py
index f0822728..dd55129b 100644
--- a/openllm-python/src/openllm/models/falcon/__init__.py
+++ b/openllm-python/src/openllm/models/falcon/__init__.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import sys, typing as t
+import sys
+import typing as t
+
 from openllm.exceptions import MissingDependencyError
 from openllm.utils import LazyModule, is_torch_available, is_vllm_available
 from openllm_core.config.configuration_falcon import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCSTRING, FalconConfig as FalconConfig
diff --git a/openllm-python/src/openllm/models/falcon/modeling_falcon.py b/openllm-python/src/openllm/models/falcon/modeling_falcon.py
index eccb0eb9..b16cd7cf 100644
--- a/openllm-python/src/openllm/models/falcon/modeling_falcon.py
+++ b/openllm-python/src/openllm/models/falcon/modeling_falcon.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import torch, transformers
 else: torch, transformers = openllm.utils.LazyLoader('torch', globals(), 'torch'), openllm.utils.LazyLoader('transformers', globals(), 'transformers')
 class Falcon(openllm.LLM['transformers.PreTrainedModel', 'transformers.PreTrainedTokenizerBase']):
diff --git a/openllm-python/src/openllm/models/falcon/modeling_vllm_falcon.py b/openllm-python/src/openllm/models/falcon/modeling_vllm_falcon.py
index e91f3a74..61c4aa1d 100644
--- a/openllm-python/src/openllm/models/falcon/modeling_vllm_falcon.py
+++ b/openllm-python/src/openllm/models/falcon/modeling_vllm_falcon.py
@@ -1,5 +1,8 @@
 from __future__ import annotations
-import logging, typing as t, openllm
+import logging
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import vllm, transformers
 
 logger = logging.getLogger(__name__)
diff --git a/openllm-python/src/openllm/models/flan_t5/__init__.py b/openllm-python/src/openllm/models/flan_t5/__init__.py
index 2c5939c6..9218e913 100644
--- a/openllm-python/src/openllm/models/flan_t5/__init__.py
+++ b/openllm-python/src/openllm/models/flan_t5/__init__.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import sys, typing as t
+import sys
+import typing as t
+
 from openllm.exceptions import MissingDependencyError
 from openllm.utils import LazyModule, is_flax_available, is_tf_available, is_torch_available
 from openllm_core.config.configuration_flan_t5 import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING, FlanT5Config as FlanT5Config
diff --git a/openllm-python/src/openllm/models/flan_t5/modeling_flan_t5.py b/openllm-python/src/openllm/models/flan_t5/modeling_flan_t5.py
index 740ca45e..fb421edf 100644
--- a/openllm-python/src/openllm/models/flan_t5/modeling_flan_t5.py
+++ b/openllm-python/src/openllm/models/flan_t5/modeling_flan_t5.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import transformers
 class FlanT5(openllm.LLM['transformers.T5ForConditionalGeneration', 'transformers.T5TokenizerFast']):
   __openllm_internal__ = True
@@ -13,7 +15,8 @@ class FlanT5(openllm.LLM['transformers.T5ForConditionalGeneration', 'transformer
       )
 
   def embeddings(self, prompts: list[str]) -> openllm.LLMEmbeddings:
-    import torch, torch.nn.functional as F
+    import torch
+    import torch.nn.functional as F
     embeddings: list[list[float]] = []
     num_tokens = 0
     for prompt in prompts:
diff --git a/openllm-python/src/openllm/models/flan_t5/modeling_flax_flan_t5.py b/openllm-python/src/openllm/models/flan_t5/modeling_flax_flan_t5.py
index ad0ffe6e..7a13fd15 100644
--- a/openllm-python/src/openllm/models/flan_t5/modeling_flax_flan_t5.py
+++ b/openllm-python/src/openllm/models/flan_t5/modeling_flax_flan_t5.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 from openllm_core._prompt import process_prompt
 from openllm_core.config.configuration_flan_t5 import DEFAULT_PROMPT_TEMPLATE
 if t.TYPE_CHECKING: import transformers
diff --git a/openllm-python/src/openllm/models/flan_t5/modeling_tf_flan_t5.py b/openllm-python/src/openllm/models/flan_t5/modeling_tf_flan_t5.py
index 1328e3f5..6af703fe 100644
--- a/openllm-python/src/openllm/models/flan_t5/modeling_tf_flan_t5.py
+++ b/openllm-python/src/openllm/models/flan_t5/modeling_tf_flan_t5.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import transformers
 class TFFlanT5(openllm.LLM['transformers.TFT5ForConditionalGeneration', 'transformers.T5TokenizerFast']):
   __openllm_internal__ = True
diff --git a/openllm-python/src/openllm/models/gpt_neox/__init__.py b/openllm-python/src/openllm/models/gpt_neox/__init__.py
index 8a001a44..f1b19150 100644
--- a/openllm-python/src/openllm/models/gpt_neox/__init__.py
+++ b/openllm-python/src/openllm/models/gpt_neox/__init__.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import sys, typing as t
+import sys
+import typing as t
+
 from openllm.exceptions import MissingDependencyError
 from openllm.utils import LazyModule, is_torch_available, is_vllm_available
 from openllm_core.config.configuration_gpt_neox import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING, GPTNeoXConfig as GPTNeoXConfig
diff --git a/openllm-python/src/openllm/models/gpt_neox/modeling_gpt_neox.py b/openllm-python/src/openllm/models/gpt_neox/modeling_gpt_neox.py
index 1aa57d47..d2661f45 100644
--- a/openllm-python/src/openllm/models/gpt_neox/modeling_gpt_neox.py
+++ b/openllm-python/src/openllm/models/gpt_neox/modeling_gpt_neox.py
@@ -1,5 +1,8 @@
 from __future__ import annotations
-import logging, typing as t, openllm
+import logging
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import transformers
 
 logger = logging.getLogger(__name__)
diff --git a/openllm-python/src/openllm/models/gpt_neox/modeling_vllm_gpt_neox.py b/openllm-python/src/openllm/models/gpt_neox/modeling_vllm_gpt_neox.py
index 1cf655cc..818871fe 100644
--- a/openllm-python/src/openllm/models/gpt_neox/modeling_vllm_gpt_neox.py
+++ b/openllm-python/src/openllm/models/gpt_neox/modeling_vllm_gpt_neox.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import vllm, transformers
 class VLLMGPTNeoX(openllm.LLM['vllm.LLMEngine', 'transformers.GPTNeoXTokenizerFast']):
   __openllm_internal__ = True
diff --git a/openllm-python/src/openllm/models/llama/__init__.py b/openllm-python/src/openllm/models/llama/__init__.py
index 792f17a9..ae34a0f3 100644
--- a/openllm-python/src/openllm/models/llama/__init__.py
+++ b/openllm-python/src/openllm/models/llama/__init__.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import sys, typing as t
+import sys
+import typing as t
+
 from openllm.exceptions import MissingDependencyError
 from openllm.utils import LazyModule, is_torch_available, is_vllm_available
 from openllm_core.config.configuration_llama import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, PROMPT_MAPPING as PROMPT_MAPPING, START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING, LlamaConfig as LlamaConfig
diff --git a/openllm-python/src/openllm/models/llama/modeling_llama.py b/openllm-python/src/openllm/models/llama/modeling_llama.py
index 9f02a30d..148edf1f 100644
--- a/openllm-python/src/openllm/models/llama/modeling_llama.py
+++ b/openllm-python/src/openllm/models/llama/modeling_llama.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import transformers
 class Llama(openllm.LLM['transformers.LlamaForCausalLM', 'transformers.LlamaTokenizerFast']):
   __openllm_internal__ = True
@@ -10,7 +12,8 @@ class Llama(openllm.LLM['transformers.LlamaForCausalLM', 'transformers.LlamaToke
     return {'torch_dtype': torch.float16 if torch.cuda.is_available() else torch.float32}, {}
 
   def embeddings(self, prompts: list[str]) -> openllm.LLMEmbeddings:
-    import torch, torch.nn.functional as F
+    import torch
+    import torch.nn.functional as F
     encoding = self.tokenizer(prompts, padding=True, return_tensors='pt').to(self.device)
     input_ids, attention_mask = encoding['input_ids'], encoding['attention_mask']
     with torch.inference_mode():
diff --git a/openllm-python/src/openllm/models/llama/modeling_vllm_llama.py b/openllm-python/src/openllm/models/llama/modeling_vllm_llama.py
index 943e8b4b..54c0a875 100644
--- a/openllm-python/src/openllm/models/llama/modeling_vllm_llama.py
+++ b/openllm-python/src/openllm/models/llama/modeling_vllm_llama.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import vllm, transformers
 class VLLMLlama(openllm.LLM['vllm.LLMEngine', 'transformers.LlamaTokenizerFast']):
   __openllm_internal__ = True
diff --git a/openllm-python/src/openllm/models/mpt/__init__.py b/openllm-python/src/openllm/models/mpt/__init__.py
index dba0e6ce..4537ebc2 100644
--- a/openllm-python/src/openllm/models/mpt/__init__.py
+++ b/openllm-python/src/openllm/models/mpt/__init__.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import sys, typing as t
+import sys
+import typing as t
+
 from openllm.exceptions import MissingDependencyError
 from openllm.utils import LazyModule, is_torch_available, is_vllm_available
 from openllm_core.config.configuration_mpt import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, PROMPT_MAPPING as PROMPT_MAPPING, START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING, MPTConfig as MPTConfig
diff --git a/openllm-python/src/openllm/models/mpt/modeling_mpt.py b/openllm-python/src/openllm/models/mpt/modeling_mpt.py
index 98845496..394e63ae 100644
--- a/openllm-python/src/openllm/models/mpt/modeling_mpt.py
+++ b/openllm-python/src/openllm/models/mpt/modeling_mpt.py
@@ -1,5 +1,9 @@
 from __future__ import annotations
-import logging, typing as t, bentoml, openllm
+import logging
+import typing as t
+
+import bentoml
+import openllm
 from openllm.utils import generate_labels, is_triton_available
 if t.TYPE_CHECKING: import transformers, torch
 
@@ -31,7 +35,8 @@ class MPT(openllm.LLM['transformers.PreTrainedModel', 'transformers.GPTNeoXToken
     return {'device_map': 'auto' if torch.cuda.is_available() and torch.cuda.device_count() > 1 else None, 'torch_dtype': torch.bfloat16 if torch.cuda.is_available() else torch.float32}, {}
 
   def import_model(self, *args: t.Any, trust_remote_code: bool = True, **attrs: t.Any) -> bentoml.Model:
-    import torch, transformers
+    import torch
+    import transformers
     _, tokenizer_attrs = self.llm_parameters
     torch_dtype = attrs.pop('torch_dtype', self.dtype)
     device_map = attrs.pop('device_map', None)
diff --git a/openllm-python/src/openllm/models/mpt/modeling_vllm_mpt.py b/openllm-python/src/openllm/models/mpt/modeling_vllm_mpt.py
index cd3de6e3..f816b343 100644
--- a/openllm-python/src/openllm/models/mpt/modeling_vllm_mpt.py
+++ b/openllm-python/src/openllm/models/mpt/modeling_vllm_mpt.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import transformers, vllm
 class VLLMMPT(openllm.LLM['vllm.LLMEngine', 'transformers.GPTNeoXTokenizerFast']):
   __openllm_internal__ = True
diff --git a/openllm-python/src/openllm/models/opt/__init__.py b/openllm-python/src/openllm/models/opt/__init__.py
index 996f360f..c6960b88 100644
--- a/openllm-python/src/openllm/models/opt/__init__.py
+++ b/openllm-python/src/openllm/models/opt/__init__.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import sys, typing as t
+import sys
+import typing as t
+
 from openllm.exceptions import MissingDependencyError
 from openllm.utils import LazyModule, is_flax_available, is_tf_available, is_torch_available, is_vllm_available
 from openllm_core.config.configuration_opt import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING, OPTConfig as OPTConfig
diff --git a/openllm-python/src/openllm/models/opt/modeling_flax_opt.py b/openllm-python/src/openllm/models/opt/modeling_flax_opt.py
index d1ef3a4c..d48fe8cf 100644
--- a/openllm-python/src/openllm/models/opt/modeling_flax_opt.py
+++ b/openllm-python/src/openllm/models/opt/modeling_flax_opt.py
@@ -1,5 +1,9 @@
 from __future__ import annotations
-import logging, typing as t, bentoml, openllm
+import logging
+import typing as t
+
+import bentoml
+import openllm
 from openllm._prompt import process_prompt
 from openllm.utils import generate_labels
 from openllm_core.config.configuration_opt import DEFAULT_PROMPT_TEMPLATE
diff --git a/openllm-python/src/openllm/models/opt/modeling_opt.py b/openllm-python/src/openllm/models/opt/modeling_opt.py
index 34aa72b8..be954ba8 100644
--- a/openllm-python/src/openllm/models/opt/modeling_opt.py
+++ b/openllm-python/src/openllm/models/opt/modeling_opt.py
@@ -1,5 +1,8 @@
 from __future__ import annotations
-import logging, typing as t, openllm
+import logging
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import transformers
 
 logger = logging.getLogger(__name__)
diff --git a/openllm-python/src/openllm/models/opt/modeling_tf_opt.py b/openllm-python/src/openllm/models/opt/modeling_tf_opt.py
index fe2abd39..6c30f6a9 100644
--- a/openllm-python/src/openllm/models/opt/modeling_tf_opt.py
+++ b/openllm-python/src/openllm/models/opt/modeling_tf_opt.py
@@ -1,5 +1,8 @@
 from __future__ import annotations
-import typing as t, bentoml, openllm
+import typing as t
+
+import bentoml
+import openllm
 from openllm_core.utils import generate_labels
 if t.TYPE_CHECKING: import transformers
 class TFOPT(openllm.LLM['transformers.TFOPTForCausalLM', 'transformers.GPT2Tokenizer']):
diff --git a/openllm-python/src/openllm/models/opt/modeling_vllm_opt.py b/openllm-python/src/openllm/models/opt/modeling_vllm_opt.py
index 51e31ea5..9e87ad60 100644
--- a/openllm-python/src/openllm/models/opt/modeling_vllm_opt.py
+++ b/openllm-python/src/openllm/models/opt/modeling_vllm_opt.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 from openllm_core._prompt import process_prompt
 from openllm_core.config.configuration_opt import DEFAULT_PROMPT_TEMPLATE
 if t.TYPE_CHECKING: import vllm, transformers
diff --git a/openllm-python/src/openllm/models/stablelm/__init__.py b/openllm-python/src/openllm/models/stablelm/__init__.py
index 86d92659..65ef2578 100644
--- a/openllm-python/src/openllm/models/stablelm/__init__.py
+++ b/openllm-python/src/openllm/models/stablelm/__init__.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import sys, typing as t
+import sys
+import typing as t
+
 from openllm.exceptions import MissingDependencyError
 from openllm.utils import LazyModule, is_torch_available, is_vllm_available
 from openllm_core.config.configuration_stablelm import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING, StableLMConfig as StableLMConfig
diff --git a/openllm-python/src/openllm/models/stablelm/modeling_stablelm.py b/openllm-python/src/openllm/models/stablelm/modeling_stablelm.py
index f043d595..01290c2e 100644
--- a/openllm-python/src/openllm/models/stablelm/modeling_stablelm.py
+++ b/openllm-python/src/openllm/models/stablelm/modeling_stablelm.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, openllm
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import transformers
 class StableLM(openllm.LLM['transformers.GPTNeoXForCausalLM', 'transformers.GPTNeoXTokenizerFast']):
   __openllm_internal__ = True
diff --git a/openllm-python/src/openllm/models/stablelm/modeling_vllm_stablelm.py b/openllm-python/src/openllm/models/stablelm/modeling_vllm_stablelm.py
index bc8c5098..1d02d02c 100644
--- a/openllm-python/src/openllm/models/stablelm/modeling_vllm_stablelm.py
+++ b/openllm-python/src/openllm/models/stablelm/modeling_vllm_stablelm.py
@@ -1,5 +1,8 @@
 from __future__ import annotations
-import logging, typing as t, openllm
+import logging
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import vllm, transformers
 class VLLMStableLM(openllm.LLM['vllm.LLMEngine', 'transformers.GPTNeoXTokenizerFast']):
   __openllm_internal__ = True
diff --git a/openllm-python/src/openllm/models/starcoder/__init__.py b/openllm-python/src/openllm/models/starcoder/__init__.py
index 42913ca9..2834d41a 100644
--- a/openllm-python/src/openllm/models/starcoder/__init__.py
+++ b/openllm-python/src/openllm/models/starcoder/__init__.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import sys, typing as t
+import sys
+import typing as t
+
 from openllm.exceptions import MissingDependencyError
 from openllm.utils import LazyModule, is_torch_available, is_vllm_available
 from openllm_core.config.configuration_starcoder import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_STARCODER_COMMAND_DOCSTRING as START_STARCODER_COMMAND_DOCSTRING, StarCoderConfig as StarCoderConfig
diff --git a/openllm-python/src/openllm/models/starcoder/modeling_starcoder.py b/openllm-python/src/openllm/models/starcoder/modeling_starcoder.py
index 9805bc99..5812ab96 100644
--- a/openllm-python/src/openllm/models/starcoder/modeling_starcoder.py
+++ b/openllm-python/src/openllm/models/starcoder/modeling_starcoder.py
@@ -1,5 +1,9 @@
 from __future__ import annotations
-import logging, typing as t, bentoml, openllm
+import logging
+import typing as t
+
+import bentoml
+import openllm
 from openllm.utils import generate_labels
 from openllm_core.config.configuration_starcoder import EOD, FIM_MIDDLE, FIM_PAD, FIM_PREFIX, FIM_SUFFIX
 if t.TYPE_CHECKING: import transformers
@@ -12,7 +16,8 @@ class StarCoder(openllm.LLM['transformers.GPTBigCodeForCausalLM', 'transformers.
     return {'device_map': 'auto' if torch.cuda.is_available() and torch.cuda.device_count() > 1 else None, 'torch_dtype': torch.float16 if torch.cuda.is_available() else torch.float32}, {}
 
   def import_model(self, *args: t.Any, trust_remote_code: bool = False, **attrs: t.Any) -> bentoml.Model:
-    import torch, transformers
+    import torch
+    import transformers
     torch_dtype, device_map = attrs.pop('torch_dtype', torch.float16), attrs.pop('device_map', 'auto')
     tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_id, **self.llm_parameters[-1])
     tokenizer.add_special_tokens({'additional_special_tokens': [EOD, FIM_PREFIX, FIM_MIDDLE, FIM_SUFFIX, FIM_PAD], 'pad_token': EOD})
diff --git a/openllm-python/src/openllm/models/starcoder/modeling_vllm_starcoder.py b/openllm-python/src/openllm/models/starcoder/modeling_vllm_starcoder.py
index 8148c9e3..20a9e822 100644
--- a/openllm-python/src/openllm/models/starcoder/modeling_vllm_starcoder.py
+++ b/openllm-python/src/openllm/models/starcoder/modeling_vllm_starcoder.py
@@ -1,5 +1,8 @@
 from __future__ import annotations
-import logging, typing as t, openllm
+import logging
+import typing as t
+
+import openllm
 if t.TYPE_CHECKING: import vllm, transformers
 class VLLMStarCoder(openllm.LLM['vllm.LLMEngine', 'transformers.GPT2TokenizerFast']):
   __openllm_internal__ = True
diff --git a/openllm-python/src/openllm/playground/falcon_tuned.py b/openllm-python/src/openllm/playground/falcon_tuned.py
index f63ac041..c5dc9025 100644
--- a/openllm-python/src/openllm/playground/falcon_tuned.py
+++ b/openllm-python/src/openllm/playground/falcon_tuned.py
@@ -22,10 +22,8 @@ logger = logging.getLogger(__name__)
 
 from datasets import load_dataset
 from trl import SFTTrainer
-
 DEFAULT_MODEL_ID = "ybelkada/falcon-7b-sharded-bf16"
 DATASET_NAME = "timdettmers/openassistant-guanaco"
-
 @dataclasses.dataclass
 class TrainingArguments:
   per_device_train_batch_size: int = dataclasses.field(default=4)
@@ -42,12 +40,10 @@ class TrainingArguments:
   group_by_length: bool = dataclasses.field(default=True)
   lr_scheduler_type: str = dataclasses.field(default="constant")
   output_dir: str = dataclasses.field(default=os.path.join(os.getcwd(), "outputs", "falcon"))
-
 @dataclasses.dataclass
 class ModelArguments:
   model_id: str = dataclasses.field(default=DEFAULT_MODEL_ID)
   max_sequence_length: int = dataclasses.field(default=512)
-
 parser = transformers.HfArgumentParser((ModelArguments, TrainingArguments))
 if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
   # If we pass only one argument to the script and it's the path to a json file,
@@ -56,13 +52,20 @@ if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
 else:
   model_args, training_args = t.cast(t.Tuple[ModelArguments, TrainingArguments], parser.parse_args_into_dataclasses())
 
-model, tokenizer = openllm.AutoLLM.for_model("falcon", model_id=model_args.model_id, quantize="int4", bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16, ensure_available=True,).prepare_for_training(adapter_type="lora", lora_alpha=16, lora_dropout=0.1, r=16, bias="none", target_modules=["query_key_value", "dense", "dense_h_to_4h", "dense_4h_to_h",],)
+model, tokenizer = openllm.AutoLLM.for_model("falcon", model_id=model_args.model_id, quantize="int4", bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16, ensure_available=True).prepare_for_training(adapter_type="lora", lora_alpha=16, lora_dropout=0.1, r=16, bias="none", target_modules=["query_key_value", "dense", "dense_h_to_4h", "dense_4h_to_h"])
 model.config.use_cache = False
 tokenizer.pad_token = tokenizer.eos_token
 
 dataset = load_dataset(DATASET_NAME, split="train")
 
-trainer = SFTTrainer(model=model, train_dataset=dataset, dataset_text_field="text", max_seq_length=model_args.max_sequence_length, tokenizer=tokenizer, args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args),),)
+trainer = SFTTrainer(
+    model=model,
+    train_dataset=dataset,
+    dataset_text_field="text",
+    max_seq_length=model_args.max_sequence_length,
+    tokenizer=tokenizer,
+    args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args)),
+)
 
 # upcast layernorm in float32 for more stable training
 for name, module in trainer.model.named_modules():
diff --git a/openllm-python/src/openllm/playground/features.py b/openllm-python/src/openllm/playground/features.py
index d1f8d1e6..2d31b5a7 100644
--- a/openllm-python/src/openllm/playground/features.py
+++ b/openllm-python/src/openllm/playground/features.py
@@ -4,7 +4,6 @@ import logging
 import typing as t
 
 import openllm
-
 openllm.utils.configure_logging()
 
 logger = logging.getLogger(__name__)
@@ -13,7 +12,6 @@ MAX_NEW_TOKENS = 384
 
 Q = "Answer the following question, step by step:\n{q}\nA:"
 question = "What is the meaning of life?"
-
 def main() -> int:
   parser = argparse.ArgumentParser()
   parser.add_argument("question", default=question)
@@ -44,11 +42,9 @@ def main() -> int:
   logger.info("=" * 10, "Response:", r.llm.postprocess_generate(prompt, res))
 
   return 0
-
 def _mp_fn(index: t.Any):  # noqa # type: ignore
   # For xla_spawn (TPUs)
   main()
-
 if openllm.utils.in_notebook():
   main()
 else:
diff --git a/openllm-python/src/openllm/playground/llama2_qlora.py b/openllm-python/src/openllm/playground/llama2_qlora.py
index a3512315..b867c174 100644
--- a/openllm-python/src/openllm/playground/llama2_qlora.py
+++ b/openllm-python/src/openllm/playground/llama2_qlora.py
@@ -29,7 +29,6 @@ from random import randint, randrange
 
 import bitsandbytes as bnb
 from datasets import load_dataset
-
 # COPIED FROM https://github.com/artidoro/qlora/blob/main/qlora.py
 def find_all_linear_names(model):
   lora_module_names = set()
@@ -41,13 +40,11 @@ def find_all_linear_names(model):
   if "lm_head" in lora_module_names:  # needed for 16-bit
     lora_module_names.remove("lm_head")
   return list(lora_module_names)
-
 # Change this to the local converted path if you don't have access to the meta-llama model
 DEFAULT_MODEL_ID = "meta-llama/Llama-2-7b-hf"
 # change this to 'main' if you want to use the latest llama
 DEFAULT_MODEL_VERSION = "335a02887eb6684d487240bbc28b5699298c3135"
 DATASET_NAME = "databricks/databricks-dolly-15k"
-
 def format_dolly(sample):
   instruction = f"### Instruction\n{sample['instruction']}"
   context = f"### Context\n{sample['context']}" if len(sample["context"]) > 0 else None
@@ -55,15 +52,12 @@ def format_dolly(sample):
   # join all the parts together
   prompt = "\n\n".join([i for i in [instruction, context, response] if i is not None])
   return prompt
-
 # template dataset to add prompt to each sample
 def template_dataset(sample, tokenizer):
   sample["text"] = f"{format_dolly(sample)}{tokenizer.eos_token}"
   return sample
-
 # empty list to save remainder from batches to use in next batch
 remainder = {"input_ids": [], "attention_mask": [], "token_type_ids": []}
-
 def chunk(sample, chunk_length=2048):
   # define global remainder variable to save remainder from batches to use in next batch
   global remainder
@@ -84,7 +78,6 @@ def chunk(sample, chunk_length=2048):
   # prepare labels
   result["labels"] = result["input_ids"].copy()
   return result
-
 def prepare_datasets(tokenizer, dataset_name=DATASET_NAME):
   # Load dataset from the hub
   dataset = load_dataset(dataset_name, split="train")
@@ -103,11 +96,20 @@ def prepare_datasets(tokenizer, dataset_name=DATASET_NAME):
   # Print total number of samples
   print(f"Total number of samples: {len(lm_dataset)}")
   return lm_dataset
-
-def prepare_for_int4_training(model_id: str, model_version: str | None = None, gradient_checkpointing: bool = True, bf16: bool = True,) -> tuple[peft.PeftModel, transformers.LlamaTokenizerFast]:
+def prepare_for_int4_training(model_id: str, model_version: str | None = None, gradient_checkpointing: bool = True, bf16: bool = True,
+                              ) -> tuple[peft.PeftModel, transformers.LlamaTokenizerFast]:
   from peft.tuners.lora import LoraLayer
 
-  llm = openllm.AutoLLM.for_model("llama", model_id=model_id, model_version=model_version, ensure_available=True, quantize="int4", bnb_4bit_compute_dtype=torch.bfloat16, use_cache=not gradient_checkpointing, device_map="auto",)
+  llm = openllm.AutoLLM.for_model(
+      "llama",
+      model_id=model_id,
+      model_version=model_version,
+      ensure_available=True,
+      quantize="int4",
+      bnb_4bit_compute_dtype=torch.bfloat16,
+      use_cache=not gradient_checkpointing,
+      device_map="auto",
+  )
   print("Model summary:", llm.model)
 
   # get lora target modules
@@ -128,7 +130,6 @@ def prepare_for_int4_training(model_id: str, model_version: str | None = None, g
         if bf16 and module.weight.dtype == torch.float32:
           module = module.to(torch.bfloat16)
   return model, tokenizer
-
 @dataclasses.dataclass
 class TrainingArguments:
   per_device_train_batch_size: int = dataclasses.field(default=1)
@@ -140,14 +141,12 @@ class TrainingArguments:
   report_to: str = dataclasses.field(default="none")
   output_dir: str = dataclasses.field(default=os.path.join(os.getcwd(), "outputs", "llama"))
   save_strategy: str = dataclasses.field(default="no")
-
 @dataclasses.dataclass
 class ModelArguments:
   model_id: str = dataclasses.field(default=DEFAULT_MODEL_ID)
   model_version: str = dataclasses.field(default=DEFAULT_MODEL_VERSION)
   seed: int = dataclasses.field(default=42)
   merge_weights: bool = dataclasses.field(default=False)
-
 if openllm.utils.in_notebook():
   model_args, training_rags = ModelArguments(), TrainingArguments()
 else:
@@ -161,7 +160,6 @@ else:
 
 # import the model first hand
 openllm.import_model("llama", model_id=model_args.model_id, model_version=model_args.model_version)
-
 def train_loop(model_args: ModelArguments, training_args: TrainingArguments):
   import peft
 
@@ -170,7 +168,12 @@ def train_loop(model_args: ModelArguments, training_args: TrainingArguments):
   model, tokenizer = prepare_for_int4_training(model_args.model_id, gradient_checkpointing=training_args.gradient_checkpointing, bf16=training_args.bf16,)
   datasets = prepare_datasets(tokenizer)
 
-  trainer = transformers.Trainer(model=model, args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args)), train_dataset=datasets, data_collator=transformers.default_data_collator,)
+  trainer = transformers.Trainer(
+      model=model,
+      args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args)),
+      train_dataset=datasets,
+      data_collator=transformers.default_data_collator,
+  )
 
   trainer.train()
 
@@ -191,5 +194,4 @@ def train_loop(model_args: ModelArguments, training_args: TrainingArguments):
     model.save_pretrained(os.path.join(os.getcwd(), "outputs", "merged_llama_lora"), safe_serialization=True, max_shard_size="2GB")
   else:
     trainer.model.save_pretrained(os.path.join(training_args.output_dir, "lora"))
-
 train_loop(model_args, training_args)
diff --git a/openllm-python/src/openllm/playground/opt_tuned.py b/openllm-python/src/openllm/playground/opt_tuned.py
index 4e0229e9..6f04fd05 100644
--- a/openllm-python/src/openllm/playground/opt_tuned.py
+++ b/openllm-python/src/openllm/playground/opt_tuned.py
@@ -23,12 +23,14 @@ from datasets import load_dataset
 
 if t.TYPE_CHECKING:
   from peft import PeftModel
-
 DEFAULT_MODEL_ID = "facebook/opt-6.7b"
-
-def load_trainer(model: PeftModel, tokenizer: transformers.GPT2TokenizerFast, dataset_dict: t.Any, training_args: TrainingArguments,):
-  return transformers.Trainer(model=model, train_dataset=dataset_dict["train"], args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args),), data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),)
-
+def load_trainer(model: PeftModel, tokenizer: transformers.GPT2TokenizerFast, dataset_dict: t.Any, training_args: TrainingArguments):
+  return transformers.Trainer(
+      model=model,
+      train_dataset=dataset_dict["train"],
+      args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args)),
+      data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
+  )
 @dataclasses.dataclass
 class TrainingArguments:
   per_device_train_batch_size: int = dataclasses.field(default=4)
@@ -39,11 +41,9 @@ class TrainingArguments:
   fp16: bool = dataclasses.field(default=True)
   logging_steps: int = dataclasses.field(default=1)
   output_dir: str = dataclasses.field(default=os.path.join(os.getcwd(), "outputs", "opt"))
-
 @dataclasses.dataclass
 class ModelArguments:
   model_id: str = dataclasses.field(default=DEFAULT_MODEL_ID)
-
 parser = transformers.HfArgumentParser((ModelArguments, TrainingArguments))
 if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
   # If we pass only one argument to the script and it's the path to a json file,
@@ -52,7 +52,7 @@ if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
 else:
   model_args, training_args = t.cast(t.Tuple[ModelArguments, TrainingArguments], parser.parse_args_into_dataclasses())
 
-model, tokenizer = openllm.AutoLLM.for_model("opt", model_id=model_args.model_id, quantize="int8", ensure_available=True,).prepare_for_training(adapter_type="lora", r=16, lora_alpha=32, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none",)
+model, tokenizer = openllm.AutoLLM.for_model("opt", model_id=model_args.model_id, quantize="int8", ensure_available=True).prepare_for_training(adapter_type="lora", r=16, lora_alpha=32, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none")
 
 # ft on english_quotes
 data = load_dataset("Abirate/english_quotes")
diff --git a/openllm-python/src/openllm/serialisation/__init__.py b/openllm-python/src/openllm/serialisation/__init__.py
index 0611181e..5c3c677b 100644
--- a/openllm-python/src/openllm/serialisation/__init__.py
+++ b/openllm-python/src/openllm/serialisation/__init__.py
@@ -23,13 +23,18 @@ llm.save_pretrained("./path/to/local-dolly")
 ```
 """
 from __future__ import annotations
-import importlib, typing as t
-import cloudpickle, fs, openllm
-from bentoml._internal.models.model import CUSTOM_OBJECTS_FILENAME
-from openllm_core._typing_compat import M, T, ParamSpec
+import importlib
+import typing as t
 
+import cloudpickle
+import fs
+
+import openllm
+from bentoml._internal.models.model import CUSTOM_OBJECTS_FILENAME
+from openllm_core._typing_compat import M, ParamSpec, T
 if t.TYPE_CHECKING:
   import bentoml
+
   from . import constants as constants, ggml as ggml, transformers as transformers
 P = ParamSpec('P')
 def load_tokenizer(llm: openllm.LLM[t.Any, T], **tokenizer_attrs: t.Any) -> T:
diff --git a/openllm-python/src/openllm/serialisation/ggml.py b/openllm-python/src/openllm/serialisation/ggml.py
index 8db4c61a..5f2244d1 100644
--- a/openllm-python/src/openllm/serialisation/ggml.py
+++ b/openllm-python/src/openllm/serialisation/ggml.py
@@ -4,8 +4,9 @@ This requires ctransformers to be installed.
 '''
 from __future__ import annotations
 import typing as t
-import bentoml, openllm
 
+import bentoml
+import openllm
 if t.TYPE_CHECKING: from openllm_core._typing_compat import M
 
 _conversion_strategy = {'pt': 'ggml'}
diff --git a/openllm-python/src/openllm/serialisation/transformers/__init__.py b/openllm-python/src/openllm/serialisation/transformers/__init__.py
index 6df0f9f1..fc71c5fb 100644
--- a/openllm-python/src/openllm/serialisation/transformers/__init__.py
+++ b/openllm-python/src/openllm/serialisation/transformers/__init__.py
@@ -1,19 +1,27 @@
 '''Serialisation related implementation for Transformers-based implementation.'''
 from __future__ import annotations
-import importlib, logging, typing as t
-import bentoml, openllm
+import importlib
+import logging
+import typing as t
+
 from huggingface_hub import snapshot_download
 from simple_di import Provide, inject
+
+import bentoml
+import openllm
 from bentoml._internal.configuration.containers import BentoMLContainer
 from bentoml._internal.models.model import ModelOptions
-from .weights import HfIgnore
-from ._helpers import check_unintialised_params, infer_autoclass_from_llm, infer_tokenizers_from_llm, make_model_signatures, process_config, update_model
 
+from ._helpers import check_unintialised_params, infer_autoclass_from_llm, infer_tokenizers_from_llm, make_model_signatures, process_config, update_model
+from .weights import HfIgnore
 if t.TYPE_CHECKING:
   import types
 
-  import vllm, auto_gptq as autogptq, transformers, torch
+  import auto_gptq as autogptq
+  import torch
   import torch.nn
+  import transformers
+  import vllm
 
   from bentoml._internal.models import ModelStore
   from openllm_core._typing_compat import DictStrAny, M, T
diff --git a/openllm-python/src/openllm/serialisation/transformers/_helpers.py b/openllm-python/src/openllm/serialisation/transformers/_helpers.py
index a4f3db26..0a8c3089 100644
--- a/openllm-python/src/openllm/serialisation/transformers/_helpers.py
+++ b/openllm-python/src/openllm/serialisation/transformers/_helpers.py
@@ -1,11 +1,17 @@
 from __future__ import annotations
-import copy, typing as t, openllm_core, openllm
+import copy
+import typing as t
+
+import openllm
+import openllm_core
 from bentoml._internal.models.model import ModelInfo, ModelSignature
 from openllm.serialisation.constants import FRAMEWORK_TO_AUTOCLASS_MAPPING, HUB_ATTRS
-
 if t.TYPE_CHECKING:
-  import torch, transformers, bentoml
+  import torch
+  import transformers
   from transformers.models.auto.auto_factory import _BaseAutoModelClass
+
+  import bentoml
   from bentoml._internal.models.model import ModelSignaturesType
   from openllm_core._typing_compat import DictStrAny, M, T
 else:
diff --git a/openllm-python/src/openllm/serialisation/transformers/weights.py b/openllm-python/src/openllm/serialisation/transformers/weights.py
index 31a50ee0..882de1c5 100644
--- a/openllm-python/src/openllm/serialisation/transformers/weights.py
+++ b/openllm-python/src/openllm/serialisation/transformers/weights.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
-import typing as t, attr
+import typing as t
+
+import attr
 from huggingface_hub import HfApi
 if t.TYPE_CHECKING:
   import openllm
diff --git a/openllm-python/src/openllm/testing.py b/openllm-python/src/openllm/testing.py
index 40d65c23..1ff88a86 100644
--- a/openllm-python/src/openllm/testing.py
+++ b/openllm-python/src/openllm/testing.py
@@ -1,6 +1,13 @@
 '''Tests utilities for OpenLLM.'''
 from __future__ import annotations
-import contextlib, logging, shutil, subprocess, typing as t, bentoml, openllm
+import contextlib
+import logging
+import shutil
+import subprocess
+import typing as t
+
+import bentoml
+import openllm
 if t.TYPE_CHECKING: from ._typing_compat import LiteralRuntime
 
 logger = logging.getLogger(__name__)
diff --git a/openllm-python/src/openllm/utils/__init__.py b/openllm-python/src/openllm/utils/__init__.py
index 6d497d5f..75eccf9c 100644
--- a/openllm-python/src/openllm/utils/__init__.py
+++ b/openllm-python/src/openllm/utils/__init__.py
@@ -4,12 +4,19 @@ User can import these function for convenience, but
 we won't ensure backward compatibility for these functions. So use with caution.
 """
 from __future__ import annotations
-import typing as t, openllm_core
-from . import (dummy_flax_objects as dummy_flax_objects, dummy_pt_objects as dummy_pt_objects, dummy_tf_objects as dummy_tf_objects, dummy_vllm_objects as dummy_vllm_objects,)
+import typing as t
 
+import openllm_core
+
+from . import (
+  dummy_flax_objects as dummy_flax_objects,
+  dummy_pt_objects as dummy_pt_objects,
+  dummy_tf_objects as dummy_tf_objects,
+  dummy_vllm_objects as dummy_vllm_objects,
+)
 if t.TYPE_CHECKING:
-  from openllm_core._typing_compat import LiteralRuntime
   import openllm
+  from openllm_core._typing_compat import LiteralRuntime
 def generate_labels(llm: openllm.LLM[t.Any, t.Any]) -> dict[str, t.Any]:
   return {'runtime': llm.runtime, 'framework': 'openllm', 'model_name': llm.config['model_name'], 'architecture': llm.config['architecture'], 'serialisation_format': llm._serialisation_format}
 def infer_auto_class(implementation: LiteralRuntime) -> type[openllm.AutoLLM | openllm.AutoTFLLM | openllm.AutoFlaxLLM | openllm.AutoVLLM]:
diff --git a/openllm-python/tests/_strategies/_configuration.py b/openllm-python/tests/_strategies/_configuration.py
index 2e553c01..c2ea2e4d 100644
--- a/openllm-python/tests/_strategies/_configuration.py
+++ b/openllm-python/tests/_strategies/_configuration.py
@@ -1,7 +1,11 @@
 from __future__ import annotations
-import logging, typing as t, openllm
-from openllm_core._configuration import ModelSettings
+import logging
+import typing as t
+
 from hypothesis import strategies as st
+
+import openllm
+from openllm_core._configuration import ModelSettings
 logger = logging.getLogger(__name__)
 
 env_strats = st.sampled_from([openllm.utils.EnvVarMixin(model_name) for model_name in openllm.CONFIG_MAPPING.keys()])
diff --git a/openllm-python/tests/configuration_test.py b/openllm-python/tests/configuration_test.py
index fea1338a..a0f78fdf 100644
--- a/openllm-python/tests/configuration_test.py
+++ b/openllm-python/tests/configuration_test.py
@@ -1,8 +1,18 @@
 from __future__ import annotations
-import contextlib, os, sys, typing as t, attr, pytest, transformers, openllm
+import contextlib
+import os
+import sys
+import typing as t
 from unittest import mock
-from openllm_core._configuration import GenerationConfig, ModelSettings, field_env_key
+
+import attr
+import pytest
+import transformers
 from hypothesis import assume, given, strategies as st
+
+import openllm
+from openllm_core._configuration import GenerationConfig, ModelSettings, field_env_key
+
 from ._strategies._configuration import make_llm_config, model_settings
 # XXX: @aarnphm fixes TypedDict behaviour in 3.11
 @pytest.mark.skipif(sys.version_info[:2] == (3, 11), reason='TypedDict in 3.11 behaves differently, so we need to fix this')
diff --git a/openllm-python/tests/conftest.py b/openllm-python/tests/conftest.py
index e5a0b279..be9b812f 100644
--- a/openllm-python/tests/conftest.py
+++ b/openllm-python/tests/conftest.py
@@ -1,5 +1,11 @@
 from __future__ import annotations
-import itertools, os, typing as t, pytest, openllm
+import itertools
+import os
+import typing as t
+
+import pytest
+
+import openllm
 if t.TYPE_CHECKING: from openllm_core._typing_compat import LiteralRuntime
 
 _FRAMEWORK_MAPPING = {'flan_t5': 'google/flan-t5-small', 'opt': 'facebook/opt-125m', 'baichuan': 'baichuan-inc/Baichuan-7B',}
diff --git a/openllm-python/tests/models/conftest.py b/openllm-python/tests/models/conftest.py
index 68c90e4e..47e1a40e 100644
--- a/openllm-python/tests/models/conftest.py
+++ b/openllm-python/tests/models/conftest.py
@@ -1,16 +1,32 @@
 from __future__ import annotations
-import asyncio, contextlib, functools, logging, sys, time, typing as t
+import asyncio
+import contextlib
+import functools
+import logging
+import sys
+import time
+import typing as t
 from abc import ABC, abstractmethod
-import attr, docker, docker.errors, docker.types, orjson, pytest, openllm
+
+import attr
+import docker
+import docker.errors
+import docker.types
+import orjson
+import pytest
 from syrupy.extensions.json import JSONSnapshotExtension
+
+import openllm
 from openllm._llm import normalise_model_name
 from openllm_core._typing_compat import DictStrAny, ListAny
 logger = logging.getLogger(__name__)
 
 if t.TYPE_CHECKING:
   import subprocess
+
   from syrupy.assertion import SnapshotAssertion
   from syrupy.types import PropertyFilter, PropertyMatcher, SerializableData, SerializedData
+
   from openllm._configuration import GenerationConfig
   from openllm.client import BaseAsyncClient
 class ResponseComparator(JSONSnapshotExtension):
diff --git a/openllm-python/tests/models/flan_t5_test.py b/openllm-python/tests/models/flan_t5_test.py
index ba631e14..fd3c6d22 100644
--- a/openllm-python/tests/models/flan_t5_test.py
+++ b/openllm-python/tests/models/flan_t5_test.py
@@ -4,7 +4,6 @@ import typing as t
 import pytest
 
 import openllm
-
 if t.TYPE_CHECKING:
   import contextlib
 
diff --git a/openllm-python/tests/models/opt_test.py b/openllm-python/tests/models/opt_test.py
index 3bcb5721..3be257b4 100644
--- a/openllm-python/tests/models/opt_test.py
+++ b/openllm-python/tests/models/opt_test.py
@@ -4,7 +4,6 @@ import typing as t
 import pytest
 
 import openllm
-
 if t.TYPE_CHECKING:
   import contextlib
 
diff --git a/openllm-python/tests/models_test.py b/openllm-python/tests/models_test.py
index 03bacf33..7ffd56e4 100644
--- a/openllm-python/tests/models_test.py
+++ b/openllm-python/tests/models_test.py
@@ -1,6 +1,8 @@
 from __future__ import annotations
-import os, typing as t, pytest
+import os
+import typing as t
 
+import pytest
 if t.TYPE_CHECKING: import openllm
 @pytest.mark.skipif(os.getenv('GITHUB_ACTIONS') is not None, reason='Model is too large for CI')
 def test_flan_t5_implementation(prompt: str, llm: openllm.LLM[t.Any, t.Any]):
diff --git a/openllm-python/tests/package_test.py b/openllm-python/tests/package_test.py
index 1710b4be..4f16dd4b 100644
--- a/openllm-python/tests/package_test.py
+++ b/openllm-python/tests/package_test.py
@@ -1,5 +1,11 @@
 from __future__ import annotations
-import functools, os, typing as t, pytest, openllm
+import functools
+import os
+import typing as t
+
+import pytest
+
+import openllm
 from bentoml._internal.configuration.containers import BentoMLContainer
 if t.TYPE_CHECKING: from pathlib import Path
 
diff --git a/openllm-python/tests/strategies_test.py b/openllm-python/tests/strategies_test.py
index 1e0deb37..da2d34c7 100644
--- a/openllm-python/tests/strategies_test.py
+++ b/openllm-python/tests/strategies_test.py
@@ -1,5 +1,10 @@
 from __future__ import annotations
-import os, typing as t, pytest, bentoml
+import os
+import typing as t
+
+import pytest
+
+import bentoml
 from openllm_core import _strategies as strategy
 from openllm_core._strategies import CascadingResourceStrategy, NvidiaGpuResource, get_resource
 if t.TYPE_CHECKING: from _pytest.monkeypatch import MonkeyPatch
diff --git a/typings/attr/__init__.pyi b/typings/attr/__init__.pyi
index 41fcca46..e1f81b35 100644
--- a/typings/attr/__init__.pyi
+++ b/typings/attr/__init__.pyi
@@ -1,39 +1,37 @@
 import enum
 import sys
 from typing import (
-    Any,
-    Callable,
-    Dict,
-    Generic,
-    List,
-    Literal,
-    Mapping,
-    Optional,
-    Protocol,
-    Sequence,
-    Tuple,
-    Type,
-    TypeVar,
-    Union,
-    overload,
+  Any,
+  Callable,
+  Dict,
+  Generic,
+  List,
+  Literal,
+  Mapping,
+  Optional,
+  Protocol,
+  Sequence,
+  Tuple,
+  Type,
+  TypeVar,
+  Union,
+  overload,
 )
-
 if sys.version_info[:2] >= (3, 11):
   from typing import ParamSpec, TypeAlias, TypeGuard, dataclass_transform
 else:
   from typing_extensions import ParamSpec, TypeAlias, TypeGuard, dataclass_transform
 
 from . import (
-    converters as converters,
-    exceptions as exceptions,
-    filters as filters,
-    setters as setters,
-    validators as validators,
+  converters as converters,
+  exceptions as exceptions,
+  filters as filters,
+  setters as setters,
+  validators as validators,
 )
 from ._cmp import cmp_using as cmp_using
 from ._typing_compat import AttrsInstance_
 from ._version_info import VersionInfo
-
 __version__: str
 __version_info__: VersionInfo
 __title__: str
diff --git a/typings/attr/_cmp.pyi b/typings/attr/_cmp.pyi
index 3ef1ddf2..f5ace804 100644
--- a/typings/attr/_cmp.pyi
+++ b/typings/attr/_cmp.pyi
@@ -1,6 +1,5 @@
 import sys
 from typing import Any, Callable, Optional
-
 if sys.version_info[:2] >= (3, 10):
   from typing import TypeAlias
 else:
diff --git a/typings/attr/_compat.pyi b/typings/attr/_compat.pyi
index 3014db28..4c69c593 100644
--- a/typings/attr/_compat.pyi
+++ b/typings/attr/_compat.pyi
@@ -1,6 +1,5 @@
 import threading
 from typing import Any
-
 def set_closure_cell(cell: Any, value: Any) -> None: ...
 
 repr_context: threading.local = ...
diff --git a/typings/attr/_typing_compat.pyi b/typings/attr/_typing_compat.pyi
index 730e7152..c18569a5 100644
--- a/typings/attr/_typing_compat.pyi
+++ b/typings/attr/_typing_compat.pyi
@@ -1,5 +1,4 @@
 from typing import Any, ClassVar, Protocol
-
 # MYPY is a special constant in mypy which works the same way as `TYPE_CHECKING`.
 MYPY: bool = False
 
diff --git a/typings/attr/converters.pyi b/typings/attr/converters.pyi
index c48a70ed..192d3a37 100644
--- a/typings/attr/converters.pyi
+++ b/typings/attr/converters.pyi
@@ -1,7 +1,6 @@
 from typing import Callable, TypeVar, overload
 
 from . import _ConverterType
-
 _T = TypeVar('_T')
 
 def pipe(*validators: _ConverterType) -> _ConverterType: ...
diff --git a/typings/attr/exceptions.pyi b/typings/attr/exceptions.pyi
index f2680118..e5443ca9 100644
--- a/typings/attr/exceptions.pyi
+++ b/typings/attr/exceptions.pyi
@@ -1,5 +1,4 @@
 from typing import Any
-
 class FrozenError(AttributeError):
     msg: str = ...
 
diff --git a/typings/attr/filters.pyi b/typings/attr/filters.pyi
index 8a02fa0f..f3836cc3 100644
--- a/typings/attr/filters.pyi
+++ b/typings/attr/filters.pyi
@@ -1,6 +1,5 @@
 from typing import Any, Union
 
 from . import Attribute, _FilterType
-
 def include(*what: Union[type, str, Attribute[Any]]) -> _FilterType[Any]: ...
 def exclude(*what: Union[type, str, Attribute[Any]]) -> _FilterType[Any]: ...
diff --git a/typings/attr/setters.pyi b/typings/attr/setters.pyi
index 389ffa63..a283f2a8 100644
--- a/typings/attr/setters.pyi
+++ b/typings/attr/setters.pyi
@@ -1,7 +1,6 @@
 from typing import Any, NewType, NoReturn, TypeVar
 
 from . import Attribute, _OnSetAttrType
-
 _T = TypeVar('_T')
 
 def frozen(instance: Any, attribute: Attribute[Any], new_value: Any) -> NoReturn: ...
diff --git a/typings/attr/validators.pyi b/typings/attr/validators.pyi
index c6783ceb..da84d4ca 100644
--- a/typings/attr/validators.pyi
+++ b/typings/attr/validators.pyi
@@ -18,7 +18,6 @@ from typing import (
 )
 
 from . import _ValidatorArgType, _ValidatorType
-
 _T = TypeVar('_T')
 _T1 = TypeVar('_T1')
 _T2 = TypeVar('_T2')
diff --git a/typings/click_option_group/__init__.pyi b/typings/click_option_group/__init__.pyi
index dac8e9b7..d0e316f1 100644
--- a/typings/click_option_group/__init__.pyi
+++ b/typings/click_option_group/__init__.pyi
@@ -9,7 +9,6 @@ from ._core import (
 )
 from ._decorators import optgroup
 from ._version import __version__
-
 '''
 click-option-group
 ~~~~~~~~~~~~~~~~~~
diff --git a/typings/click_option_group/_core.pyi b/typings/click_option_group/_core.pyi
index f5da98f3..f973626f 100644
--- a/typings/click_option_group/_core.pyi
+++ b/typings/click_option_group/_core.pyi
@@ -1,5 +1,7 @@
-import sys, click
+import sys
 from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Set, Tuple, TypeVar, Union
+
+import click
 if sys.version_info[:2] >= (3, 10):
   from typing import TypeAlias
 else:
diff --git a/typings/click_option_group/_decorators.pyi b/typings/click_option_group/_decorators.pyi
index f1f981dc..e7fb4695 100644
--- a/typings/click_option_group/_decorators.pyi
+++ b/typings/click_option_group/_decorators.pyi
@@ -3,7 +3,6 @@ from typing import Any, Callable, Dict, List, NamedTuple, Optional, Tuple, Type,
 import click
 
 from ._core import _FC, AnyCallable, OptionGroup
-
 class OptionStackItem(NamedTuple):
     param_decls: Tuple[str, ...]
     attrs: Dict[str, Any]
diff --git a/typings/cuda/cuda.pyi b/typings/cuda/cuda.pyi
index 16d5ea39..643da61a 100644
--- a/typings/cuda/cuda.pyi
+++ b/typings/cuda/cuda.pyi
@@ -1,5 +1,4 @@
 from enum import Enum
-
 class CUresult(Enum):
     CUDA_SUCCESS = 0
 
diff --git a/typings/deepmerge/__init__.pyi b/typings/deepmerge/__init__.pyi
index 668799ac..c0506e5d 100644
--- a/typings/deepmerge/__init__.pyi
+++ b/typings/deepmerge/__init__.pyi
@@ -1,7 +1,6 @@
 from typing import List, Literal, Tuple
 
 from .merger import Merger
-
 DEFAULT_TYPE_SPECIFIC_MERGE_STRATEGIES: List[Tuple[type, Literal['append', 'merge', 'union']]] = ...
 always_merger: Merger = ...
 merge_or_raise: Merger = ...
diff --git a/typings/deepmerge/merger.pyi b/typings/deepmerge/merger.pyi
index cdae8ab2..6dce9fb4 100644
--- a/typings/deepmerge/merger.pyi
+++ b/typings/deepmerge/merger.pyi
@@ -8,7 +8,6 @@ from .strategy.core import StrategyList
 from .strategy.dict import DictStrategies
 from .strategy.list import ListStrategies
 from .strategy.set import SetStrategies
-
 ConfigDictType: TypeAlias = Dict[str, Any]
 
 class Merger:
diff --git a/typings/deepmerge/strategy/dict.pyi b/typings/deepmerge/strategy/dict.pyi
index 50087412..8bae80fd 100644
--- a/typings/deepmerge/strategy/dict.pyi
+++ b/typings/deepmerge/strategy/dict.pyi
@@ -1,6 +1,5 @@
 from .core import StrategyList
 from ..merger import Merger
-
 class DictStrategies(StrategyList):
     @staticmethod
     def strategy_merge(config: Merger, path: str, base: StrategyList, nxt: StrategyList) -> StrategyList: ...
diff --git a/typings/deepmerge/strategy/list.pyi b/typings/deepmerge/strategy/list.pyi
index c3a1f0ea..6ad55fea 100644
--- a/typings/deepmerge/strategy/list.pyi
+++ b/typings/deepmerge/strategy/list.pyi
@@ -1,6 +1,5 @@
 from .core import StrategyList
 from ..merger import Merger
-
 class ListStrategies(StrategyList):
     NAME: str = ...
 
diff --git a/typings/deepmerge/strategy/set.pyi b/typings/deepmerge/strategy/set.pyi
index 1a888433..f94cd4cb 100644
--- a/typings/deepmerge/strategy/set.pyi
+++ b/typings/deepmerge/strategy/set.pyi
@@ -2,7 +2,6 @@ from typing import Any
 
 from .core import StrategyList
 from ..merger import Merger
-
 class SetStrategies(StrategyList):
     NAME: str | None = ...
 
diff --git a/typings/jupytext/config.pyi b/typings/jupytext/config.pyi
index e948db6e..496daf44 100644
--- a/typings/jupytext/config.pyi
+++ b/typings/jupytext/config.pyi
@@ -4,7 +4,6 @@ from typing import Any, Dict
 from _typeshed import Incomplete
 
 from .formats import NOTEBOOK_EXTENSIONS as NOTEBOOK_EXTENSIONS
-
 class JupytextConfigurationError(ValueError): ...
 
 JUPYTEXT_CONFIG_FILES: Incomplete
diff --git a/typings/jupytext/formats.pyi b/typings/jupytext/formats.pyi
index c9a21334..9cec7efa 100644
--- a/typings/jupytext/formats.pyi
+++ b/typings/jupytext/formats.pyi
@@ -1,7 +1,6 @@
 from typing import Any
 
 from _typeshed import Incomplete
-
 class JupytextFormatError(ValueError): ...
 
 class NotebookFormatDescription:
diff --git a/typings/jupytext/jupytext.pyi b/typings/jupytext/jupytext.pyi
index fa2b6c22..7048031e 100644
--- a/typings/jupytext/jupytext.pyi
+++ b/typings/jupytext/jupytext.pyi
@@ -5,7 +5,6 @@ from nbformat import NotebookNode
 from nbformat.v4.rwbase import NotebookReader, NotebookWriter
 
 from .config import JupytextConfiguration
-
 class NotSupportedNBFormatVersion(NotImplementedError): ...
 
 class TextNotebookConverter(NotebookReader, NotebookWriter):
diff --git a/typings/nbformat/notebooknode.pyi b/typings/nbformat/notebooknode.pyi
index f8b52e4c..ade0cd5c 100644
--- a/typings/nbformat/notebooknode.pyi
+++ b/typings/nbformat/notebooknode.pyi
@@ -1,7 +1,6 @@
 from typing import Any
 
 from ._struct import Struct
-
 class NotebookNode(Struct): ...
 
 def from_dict(d: dict[str, Any]) -> NotebookNode: ...
diff --git a/typings/nbformat/v4/__init__.pyi b/typings/nbformat/v4/__init__.pyi
index cc944274..dadad057 100644
--- a/typings/nbformat/v4/__init__.pyi
+++ b/typings/nbformat/v4/__init__.pyi
@@ -18,7 +18,6 @@ from .nbjson import (
     to_notebook as to_notebook,
     writes as writes,
 )
-
 reads_json = reads
 writes_json = writes
 to_notebook_json = to_notebook
diff --git a/typings/nbformat/v4/convert.pyi b/typings/nbformat/v4/convert.pyi
index f9eb30df..f86979e7 100644
--- a/typings/nbformat/v4/convert.pyi
+++ b/typings/nbformat/v4/convert.pyi
@@ -7,7 +7,6 @@ from .nbbase import (
     nbformat as nbformat,
     nbformat_minor as nbformat_minor,
 )
-
 def upgrade(nb: NotebookNode, from_version: Incomplete | None = ..., from_minor: Incomplete | None = ...) -> Any: ...
 def upgrade_cell(cell: NotebookNode) -> Any: ...
 def downgrade_cell(cell: NotebookNode) -> Any: ...
diff --git a/typings/nbformat/v4/nbbase.pyi b/typings/nbformat/v4/nbbase.pyi
index 64e46dc4..84cfc45a 100644
--- a/typings/nbformat/v4/nbbase.pyi
+++ b/typings/nbformat/v4/nbbase.pyi
@@ -2,7 +2,6 @@ from typing import Any
 
 from _typeshed import Incomplete
 from nbformat.notebooknode import NotebookNode as NotebookNode
-
 nbformat: int
 nbformat_minor: int
 nbformat_schema: Incomplete
diff --git a/typings/nbformat/v4/nbjson.pyi b/typings/nbformat/v4/nbjson.pyi
index e83774b0..ce0edce8 100644
--- a/typings/nbformat/v4/nbjson.pyi
+++ b/typings/nbformat/v4/nbjson.pyi
@@ -12,7 +12,6 @@ from .rwbase import (
     split_lines as split_lines,
     strip_transient as strip_transient,
 )
-
 class BytesEncoder(json.JSONEncoder):
     def default(self, obj: Any) -> Any: ...
 
diff --git a/typings/nbformat/v4/rwbase.pyi b/typings/nbformat/v4/rwbase.pyi
index b543abcb..4b44f5b1 100644
--- a/typings/nbformat/v4/rwbase.pyi
+++ b/typings/nbformat/v4/rwbase.pyi
@@ -1,7 +1,6 @@
 from typing import Any, TextIO
 
 from nbformat.notebooknode import NotebookNode
-
 def rejoin_lines(nb: NotebookNode) -> Any: ...
 def split_lines(nb: NotebookNode) -> Any: ...
 def strip_transient(nb: NotebookNode) -> Any: ...
diff --git a/typings/rsmiBindings.pyi b/typings/rsmiBindings.pyi
index f30195f6..1ff41789 100644
--- a/typings/rsmiBindings.pyi
+++ b/typings/rsmiBindings.pyi
@@ -2,7 +2,6 @@
 import ctypes
 import sys
 from typing import Any, Literal
-
 if sys.version_info[:2] >= (3, 11):
   from typing import LiteralString
 else:
diff --git a/typings/simple_di/__init__.pyi b/typings/simple_di/__init__.pyi
index bc86f810..2e1fcb90 100644
--- a/typings/simple_di/__init__.pyi
+++ b/typings/simple_di/__init__.pyi
@@ -1,7 +1,6 @@
 from typing import Any, Callable, Generator, Generic, Tuple, TypeVar, Union, overload
 
 from _typeshed import Incomplete
-
 class _SentinelClass: ...
 _VT = TypeVar('_VT')
 
diff --git a/typings/simple_di/providers.pyi b/typings/simple_di/providers.pyi
index fe3b8228..5978e700 100644
--- a/typings/simple_di/providers.pyi
+++ b/typings/simple_di/providers.pyi
@@ -15,7 +15,6 @@ else:
 from _typeshed import Incomplete
 
 from . import _VT, Provider, _SentinelClass
-
 class Placeholder(Provider[_VT]): ...
 
 class Static(Provider[_VT]):