diff --git a/bench.py b/bench.py
index c220a3bc..60e99fa0 100755
--- a/bench.py
+++ b/bench.py
@@ -20,133 +20,46 @@ async def main(args: argparse.Namespace) -> int:
   url = f'http://localhost:3000/v1/{endpoint}'
   # len=572
   prompts = [
-      "Translate the following English text to French: 'Hello, how are you?'",
-      "Summarize the plot of the book 'To Kill a Mockingbird.'",
-      'Generate a list of 10 random numbers between 1 and 100.',
-      'What is the capital of France?',
-      'Write a poem about nature.',
-      'Convert 25 degrees Celsius to Fahrenheit.',
-      'Describe the process of photosynthesis.',
-      'Tell me a joke.',
-      'List five famous scientists and their contributions to science.',
-      'Write a short story about a detective solving a mystery.',
-      'Explain the theory of relativity.',
-      'Provide a brief history of the Roman Empire.',
-      'Create a shopping list for a BBQ party.',
-      "Write a movie review for the film 'Inception.'",
-      'Explain the concept of artificial intelligence.',
-      'Write a letter to your future self.',
-      'Describe the life cycle of a butterfly.',
-      'List the top 10 tourist destinations in Europe.',
-      'Explain the principles of supply and demand.',
-      'Create a menu for a vegetarian restaurant.',
-      'Write a haiku about the ocean.',
-      'Explain the importance of renewable energy sources.',
-      'List the ingredients for making chocolate chip cookies.',
-      'Write a persuasive essay on the benefits of exercise.',
-      'Describe the cultural significance of the Taj Mahal.',
-      'Explain the process of DNA replication.',
-      'Write a speech about the importance of education.',
-      'List the steps to start a small business.',
-      'Explain the concept of biodiversity.',
-      'Create a playlist for a road trip.',
-      'Write a short biography of Albert Einstein.',
-      'Describe the impact of social media on society.',
-      'Explain the principles of good nutrition.',
-      'List the 10 tallest mountains in the world.',
-      'Write a product review for a smartphone.',
-      'Create a workout routine for building muscle.',
-      'Explain the concept of climate change.',
-      'Describe the life and achievements of Marie Curie.',
-      'List the ingredients for making a classic margarita.',
-      'Write a blog post about time management.',
-      'Explain the process of cellular respiration.',
-      'Create a budget for a family vacation.',
-      "Write a book summary for 'The Great Gatsby.'",
-      'Describe the history of the Internet.',
-      'Explain the principles of effective communication.',
-      'List the top 10 historical landmarks in the world.',
-      'Write a love letter to someone special.',
-      'Explain the concept of human rights.',
-      'Create a recipe for homemade pizza.',
-      'Write a movie script for a short film.',
-      'Describe the structure of the atom.',
-      'List the 10 most influential artists of the 20th century.',
-      'Explain the process of mitosis.',
-      'Create a travel itinerary for a trip to Japan.',
-      'Write a poem about the beauty of nature.',
-      'Explain the importance of environmental conservation.',
-      'List the essential items for a hiking trip.',
-      'Write a short story set in a post-apocalyptic world.',
-      'Describe the history of the Olympic Games.',
-      'Explain the principles of democracy.',
-      'Create a business plan for a tech startup.',
-      'Write a letter of recommendation for a colleague.',
-      'List the ingredients for a classic Caesar salad.',
-      'Explain the concept of artificial neural networks.',
-      'Describe the life and work of Leonardo da Vinci.',
-      'List the 10 most popular tourist attractions in the United States.',
-      'Write a persuasive speech on the dangers of smoking.',
-      'Explain the process of natural selection.',
-      'Create a menu for a fine dining restaurant.',
-      'Write a poem about the beauty of the night sky.',
-      'Explain the importance of renewable energy.',
-      'List the necessary equipment for a camping trip.',
-      'Write a short biography of William Shakespeare.',
-      'Describe the impact of social media on business marketing.',
-      'Explain the principles of project management.',
-      'Create a playlist for a relaxing evening at home.',
-      'Write a blog post about the history of space exploration.',
-      'Explain the process of protein synthesis.',
-      'List the 10 most famous landmarks in Europe.',
-      'Write a book review for a classic novel.',
-      'Describe the history of ancient Egypt.',
-      'Explain the concept of cultural diversity.',
-      'Create a recipe for a gourmet sandwich.',
-      'Write a screenplay for a science fiction movie.',
-      "Describe the structure of the Earth's atmosphere.",
-      'List the 10 greatest inventions of all time.',
-      'Explain the process of meiosis.',
-      'Create a travel guide for a visit to Paris.',
-      'Write a poem about the changing seasons.',
-      'Explain the importance of clean energy sources.',
-      'List the essential camping gear for a wilderness adventure.',
-      'Write a short story about a time-traveling adventure.',
-      'Describe the history of the Renaissance.',
-      'Explain the principles of economics.',
-      'Create a business proposal for a new restaurant.',
-      'Write a letter to your future self 10 years from now.',
-      'List the ingredients for a classic lasagna.',
-      'Explain the concept of machine learning.',
-      'Describe the life and contributions of Martin Luther King Jr.',
-      'List the 10 most famous museums in the world.',
-      'Write a persuasive essay on the importance of environmental conservation.',
-      'Explain the process of geological erosion.',
-      'Create a menu for a vegan cafe.',
-      'Write a poem about the power of imagination.',
-      'Explain the significance of the Industrial Revolution.',
-      'List the items needed for a beach vacation.',
-      'Write a short biography of Charles Darwin.',
-      'Describe the impact of globalization on cultures.',
-      'Explain the principles of time management.',
-      'Create a playlist for a high-energy workout.',
-      'Write a blog post about the future of artificial intelligence.',
-      'Explain the process of DNA transcription.',
-      'List the 10 most iconic landmarks in Asia.',
-      'Write a book summary for a popular self-help book.',
-      'Describe the history of the ancient Greeks.',
-      'Explain the concept of social justice.',
-      'Create a recipe for a gourmet salad.',
-      'Write a screenplay for a romantic comedy movie.',
-      "Describe the layers of the Earth's atmosphere.",
-      'List the 10 most influential inventors in history.',
-      'Explain the process of plate tectonics.',
-      'Create a travel itinerary for a road trip across the USA.',
-      'Write a poem about the wonders of the natural world.',
-      'Explain the importance of sustainable agriculture.',
-      'List the essential hiking gear for a mountain expedition.',
-      'Write a short story about a futuristic dystopia.',
-      'Describe the history of the Middle Ages.',
+      "Translate the following English text to French: 'Hello, how are you?'", "Summarize the plot of the book 'To Kill a Mockingbird.'",
+      'Generate a list of 10 random numbers between 1 and 100.', 'What is the capital of France?', 'Write a poem about nature.', 'Convert 25 degrees Celsius to Fahrenheit.',
+      'Describe the process of photosynthesis.', 'Tell me a joke.', 'List five famous scientists and their contributions to science.',
+      'Write a short story about a detective solving a mystery.', 'Explain the theory of relativity.', 'Provide a brief history of the Roman Empire.',
+      'Create a shopping list for a BBQ party.', "Write a movie review for the film 'Inception.'", 'Explain the concept of artificial intelligence.', 'Write a letter to your future self.',
+      'Describe the life cycle of a butterfly.', 'List the top 10 tourist destinations in Europe.', 'Explain the principles of supply and demand.',
+      'Create a menu for a vegetarian restaurant.', 'Write a haiku about the ocean.', 'Explain the importance of renewable energy sources.',
+      'List the ingredients for making chocolate chip cookies.', 'Write a persuasive essay on the benefits of exercise.', 'Describe the cultural significance of the Taj Mahal.',
+      'Explain the process of DNA replication.', 'Write a speech about the importance of education.', 'List the steps to start a small business.', 'Explain the concept of biodiversity.',
+      'Create a playlist for a road trip.', 'Write a short biography of Albert Einstein.', 'Describe the impact of social media on society.', 'Explain the principles of good nutrition.',
+      'List the 10 tallest mountains in the world.', 'Write a product review for a smartphone.', 'Create a workout routine for building muscle.', 'Explain the concept of climate change.',
+      'Describe the life and achievements of Marie Curie.', 'List the ingredients for making a classic margarita.', 'Write a blog post about time management.',
+      'Explain the process of cellular respiration.', 'Create a budget for a family vacation.', "Write a book summary for 'The Great Gatsby.'", 'Describe the history of the Internet.',
+      'Explain the principles of effective communication.', 'List the top 10 historical landmarks in the world.', 'Write a love letter to someone special.',
+      'Explain the concept of human rights.', 'Create a recipe for homemade pizza.', 'Write a movie script for a short film.', 'Describe the structure of the atom.',
+      'List the 10 most influential artists of the 20th century.', 'Explain the process of mitosis.', 'Create a travel itinerary for a trip to Japan.',
+      'Write a poem about the beauty of nature.', 'Explain the importance of environmental conservation.', 'List the essential items for a hiking trip.',
+      'Write a short story set in a post-apocalyptic world.', 'Describe the history of the Olympic Games.', 'Explain the principles of democracy.',
+      'Create a business plan for a tech startup.', 'Write a letter of recommendation for a colleague.', 'List the ingredients for a classic Caesar salad.',
+      'Explain the concept of artificial neural networks.', 'Describe the life and work of Leonardo da Vinci.', 'List the 10 most popular tourist attractions in the United States.',
+      'Write a persuasive speech on the dangers of smoking.', 'Explain the process of natural selection.', 'Create a menu for a fine dining restaurant.',
+      'Write a poem about the beauty of the night sky.', 'Explain the importance of renewable energy.', 'List the necessary equipment for a camping trip.',
+      'Write a short biography of William Shakespeare.', 'Describe the impact of social media on business marketing.', 'Explain the principles of project management.',
+      'Create a playlist for a relaxing evening at home.', 'Write a blog post about the history of space exploration.', 'Explain the process of protein synthesis.',
+      'List the 10 most famous landmarks in Europe.', 'Write a book review for a classic novel.', 'Describe the history of ancient Egypt.', 'Explain the concept of cultural diversity.',
+      'Create a recipe for a gourmet sandwich.', 'Write a screenplay for a science fiction movie.', "Describe the structure of the Earth's atmosphere.",
+      'List the 10 greatest inventions of all time.', 'Explain the process of meiosis.', 'Create a travel guide for a visit to Paris.', 'Write a poem about the changing seasons.',
+      'Explain the importance of clean energy sources.', 'List the essential camping gear for a wilderness adventure.', 'Write a short story about a time-traveling adventure.',
+      'Describe the history of the Renaissance.', 'Explain the principles of economics.', 'Create a business proposal for a new restaurant.',
+      'Write a letter to your future self 10 years from now.', 'List the ingredients for a classic lasagna.', 'Explain the concept of machine learning.',
+      'Describe the life and contributions of Martin Luther King Jr.', 'List the 10 most famous museums in the world.',
+      'Write a persuasive essay on the importance of environmental conservation.', 'Explain the process of geological erosion.', 'Create a menu for a vegan cafe.',
+      'Write a poem about the power of imagination.', 'Explain the significance of the Industrial Revolution.', 'List the items needed for a beach vacation.',
+      'Write a short biography of Charles Darwin.', 'Describe the impact of globalization on cultures.', 'Explain the principles of time management.',
+      'Create a playlist for a high-energy workout.', 'Write a blog post about the future of artificial intelligence.', 'Explain the process of DNA transcription.',
+      'List the 10 most iconic landmarks in Asia.', 'Write a book summary for a popular self-help book.', 'Describe the history of the ancient Greeks.',
+      'Explain the concept of social justice.', 'Create a recipe for a gourmet salad.', 'Write a screenplay for a romantic comedy movie.', "Describe the layers of the Earth's atmosphere.",
+      'List the 10 most influential inventors in history.', 'Explain the process of plate tectonics.', 'Create a travel itinerary for a road trip across the USA.',
+      'Write a poem about the wonders of the natural world.', 'Explain the importance of sustainable agriculture.', 'List the essential hiking gear for a mountain expedition.',
+      'Write a short story about a futuristic dystopia.', 'Describe the history of the Middle Ages.',
       'Write a letter to your future self, offering reflections on personal growth, achievements, and aspirations, as well as words of encouragement and guidance for your future journey.',
       'List the ingredients for a classic chicken pot pie recipe, a beloved comfort food that combines tender chicken, vegetables, and a flaky pastry crust in a savory filling.',
       'Explain the concept of artificial neural networks and their pivotal role in machine learning and artificial intelligence applications, from image recognition to natural language processing.',
diff --git a/examples/openai_client.py b/examples/openai_client.py
index 86098807..a8c61c66 100644
--- a/examples/openai_client.py
+++ b/examples/openai_client.py
@@ -7,33 +7,14 @@ response = openai.Completion.create(model="gpt-3.5-turbo-instruct", prompt="Writ
 
 print(response)
 
-for chunk in openai.Completion.create(
-  model="gpt-3.5-turbo-instruct",
-  prompt="Say this is a test",
-  max_tokens=7,
-  temperature=0,
-  stream=True
-):
+for chunk in openai.Completion.create(model="gpt-3.5-turbo-instruct", prompt="Say this is a test", max_tokens=7, temperature=0, stream=True):
   print(chunk)
 
-
-completion = openai.ChatCompletion.create(
-  model="gpt-3.5-turbo",
-  messages=[
-    {"role": "system", "content": "You are a helpful assistant."},
-    {"role": "user", "content": "Hello!"}
-  ]
-)
+completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=[{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}])
 
 print(completion)
 
-completion = openai.ChatCompletion.create(
-  model="gpt-3.5-turbo",
-  messages=[
-    {"role": "system", "content": "You are a helpful assistant."},
-    {"role": "user", "content": "Hello!"}
-  ],
-  stream=True
-)
+completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=[{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}], stream=True)
 
-for chunk in completion: print(chunk)
+for chunk in completion:
+  print(chunk)
diff --git a/openllm-client/src/openllm_client/benmin/_grpc.py b/openllm-client/src/openllm_client/benmin/_grpc.py
index dc62d154..9d381d4a 100644
--- a/openllm-client/src/openllm_client/benmin/_grpc.py
+++ b/openllm-client/src/openllm_client/benmin/_grpc.py
@@ -154,10 +154,12 @@ class GrpcClient(Client):
       try:
         reflection.apis[api.name] = InferenceAPI[t.Any](None,
                                                         bentoml.io.from_spec({
-                                                            'id': api.input.descriptor_id, 'args': json_format.MessageToDict(api.input.attributes).get('args', None)
+                                                            'id': api.input.descriptor_id,
+                                                            'args': json_format.MessageToDict(api.input.attributes).get('args', None)
                                                         }),
                                                         bentoml.io.from_spec({
-                                                            'id': api.output.descriptor_id, 'args': json_format.MessageToDict(api.output.attributes).get('args', None)
+                                                            'id': api.output.descriptor_id,
+                                                            'args': json_format.MessageToDict(api.output.attributes).get('args', None)
                                                         }),
                                                         name=api.name,
                                                         doc=api.docs)
@@ -256,10 +258,12 @@ class AsyncGrpcClient(AsyncClient):
       try:
         reflection.apis[api.name] = InferenceAPI[t.Any](None,
                                                         bentoml.io.from_spec({
-                                                            'id': api.input.descriptor_id, 'args': json_format.MessageToDict(api.input.attributes).get('args', None)
+                                                            'id': api.input.descriptor_id,
+                                                            'args': json_format.MessageToDict(api.input.attributes).get('args', None)
                                                         }),
                                                         bentoml.io.from_spec({
-                                                            'id': api.output.descriptor_id, 'args': json_format.MessageToDict(api.output.attributes).get('args', None)
+                                                            'id': api.output.descriptor_id,
+                                                            'args': json_format.MessageToDict(api.output.attributes).get('args', None)
                                                         }),
                                                         name=api.name,
                                                         doc=api.docs)
diff --git a/openllm-core/src/openllm_core/_configuration.py b/openllm-core/src/openllm_core/_configuration.py
index 126b0309..61c5d14a 100644
--- a/openllm-core/src/openllm_core/_configuration.py
+++ b/openllm-core/src/openllm_core/_configuration.py
@@ -355,8 +355,7 @@ class GenerationConfig(ReprMixin):
     return {i.name for i in attr.fields(self.__class__)}
 
 bentoml_cattr.register_unstructure_hook_factory(
-    lambda cls: attr.has(cls) and lenient_issubclass(cls, GenerationConfig),
-    lambda cls: make_dict_unstructure_fn(
+    lambda cls: attr.has(cls) and lenient_issubclass(cls, GenerationConfig), lambda cls: make_dict_unstructure_fn(
         cls, bentoml_cattr, _cattrs_omit_if_default=False, _cattrs_use_linecache=True, **{
             k: override(omit=True) for k, v in attr.fields_dict(cls).items() if v.default in (None, attr.NOTHING)
         }))
@@ -436,8 +435,7 @@ class SamplingParams(ReprMixin):
     return cls(_internal=True, temperature=temperature, top_k=top_k, top_p=top_p, max_tokens=max_tokens, **attrs)
 
 bentoml_cattr.register_unstructure_hook_factory(
-    lambda cls: attr.has(cls) and lenient_issubclass(cls, SamplingParams),
-    lambda cls: make_dict_unstructure_fn(
+    lambda cls: attr.has(cls) and lenient_issubclass(cls, SamplingParams), lambda cls: make_dict_unstructure_fn(
         cls, bentoml_cattr, _cattrs_omit_if_default=False, _cattrs_use_linecache=True, **{
             k: override(omit=True) for k, v in attr.fields_dict(cls).items() if v.default in (None, attr.NOTHING)
         }))
@@ -494,8 +492,7 @@ _transformed_type: DictStrAny = {'fine_tune_strategies': t.Dict[AdapterType, Fin
 
 @attr.define(frozen=False,
              slots=True,
-             field_transformer=lambda _,
-             __: [
+             field_transformer=lambda _, __: [
                  attr.Attribute.from_counting_attr(
                      k,
                      dantic.Field(kw_only=False if t.get_origin(ann) is not Required else True,
@@ -503,8 +500,7 @@ _transformed_type: DictStrAny = {'fine_tune_strategies': t.Dict[AdapterType, Fin
                                   use_default_converter=False,
                                   type=_transformed_type.get(k, ann),
                                   metadata={'target': f'__openllm_{k}__'},
-                                  description=f'ModelSettings field for {k}.')) for k,
-                 ann in t.get_type_hints(ModelSettings).items()
+                                  description=f'ModelSettings field for {k}.')) for k, ann in t.get_type_hints(ModelSettings).items()
              ])
 class _ModelSettingsAttr:
   '''Internal attrs representation of ModelSettings.'''
@@ -521,7 +517,8 @@ class _ModelSettingsAttr:
                       model_ids=['__default__'],
                       architecture='PreTrainedModel',
                       default_backend={
-                          'cpu': 'pt', 'nvidia.com/gpu': 'pt'
+                          'cpu': 'pt',
+                          'nvidia.com/gpu': 'pt'
                       },
                       serialisation='legacy',
                       name_type='dasherize',
@@ -1013,8 +1010,7 @@ class LLMConfig(_ConfigAttr):
                                                                              cls.__openllm_model_name__,
                                                                              suffix=suffix_env,
                                                                              globs=globs,
-                                                                             default_callback=lambda field_name,
-                                                                             field_default: getattr(getattr(cls, class_attr), field_name, field_default)
+                                                                             default_callback=lambda field_name, field_default: getattr(getattr(cls, class_attr), field_name, field_default)
                                                                              if codegen.has_own_attribute(cls, class_attr) else field_default))
       # For pickling to work, the __module__ variable needs to be set to the
       # frame where the class is created. This respect the module that is created from cls
@@ -1329,9 +1325,9 @@ class LLMConfig(_ConfigAttr):
     _new_cfg = {k: v for k, v in attrs.items() if k in attr.fields_dict(_ModelSettingsAttr)}
     attrs = {k: v for k, v in attrs.items() if k not in _new_cfg}
     new_cls = types.new_class(
-        name or f"{cls.__name__.replace('Config', '')}DerivateConfig", (cls,), {},
-        lambda ns: ns.update({
-            '__config__': config_merger.merge(copy.deepcopy(cls.__dict__['__config__']), _new_cfg), '__base_config__': cls,  # keep a reference for easy access
+        name or f"{cls.__name__.replace('Config', '')}DerivateConfig", (cls,), {}, lambda ns: ns.update({
+            '__config__': config_merger.merge(copy.deepcopy(cls.__dict__['__config__']), _new_cfg),
+            '__base_config__': cls,  # keep a reference for easy access
         }))
 
     # For pickling to work, the __module__ variable needs to be set to the
diff --git a/openllm-core/src/openllm_core/_strategies.py b/openllm-core/src/openllm_core/_strategies.py
index 078a93d1..07777275 100644
--- a/openllm-core/src/openllm_core/_strategies.py
+++ b/openllm-core/src/openllm_core/_strategies.py
@@ -217,8 +217,7 @@ def _validate(cls: type[DynResource], val: list[t.Any]) -> None:
 
 def _make_resource_class(name: str, resource_kind: str, docstring: str) -> type[DynResource]:
   return types.new_class(
-      name, (bentoml.Resource[t.List[str]], ReprMixin), {'resource_id': resource_kind},
-      lambda ns: ns.update({
+      name, (bentoml.Resource[t.List[str]], ReprMixin), {'resource_id': resource_kind}, lambda ns: ns.update({
           'resource_id': resource_kind,
           'from_spec': classmethod(_from_spec),
           'from_system': classmethod(_from_system),
@@ -235,16 +234,12 @@ _NVIDIA_GPU_RESOURCE: t.Literal['nvidia.com/gpu'] = 'nvidia.com/gpu'
 _CPU_RESOURCE: t.Literal['cpu'] = 'cpu'
 
 NvidiaGpuResource = _make_resource_class(
-    'NvidiaGpuResource',
-    _NVIDIA_GPU_RESOURCE,
-    '''NVIDIA GPU resource.
+    'NvidiaGpuResource', _NVIDIA_GPU_RESOURCE, '''NVIDIA GPU resource.
 
     This is a modified version of internal's BentoML's NvidiaGpuResource
     where it respects and parse CUDA_VISIBLE_DEVICES correctly.''')
 AmdGpuResource = _make_resource_class(
-    'AmdGpuResource',
-    _AMD_GPU_RESOURCE,
-    '''AMD GPU resource.
+    'AmdGpuResource', _AMD_GPU_RESOURCE, '''AMD GPU resource.
 
     Since ROCm will respect CUDA_VISIBLE_DEVICES, the behaviour of from_spec, from_system are similar to
     ``NvidiaGpuResource``. Currently ``validate`` is not yet supported.''')
diff --git a/openllm-core/src/openllm_core/config/configuration_baichuan.py b/openllm-core/src/openllm_core/config/configuration_baichuan.py
index 0522c54d..ba3be4e8 100644
--- a/openllm-core/src/openllm_core/config/configuration_baichuan.py
+++ b/openllm-core/src/openllm_core/config/configuration_baichuan.py
@@ -46,11 +46,7 @@ class BaichuanConfig(openllm_core.LLMConfig):
       'architecture': 'BaiChuanForCausalLM',
       'default_id': 'baichuan-inc/baichuan-7b',
       'model_ids': [
-          'baichuan-inc/baichuan-7b',
-          'baichuan-inc/baichuan-13b-base',
-          'baichuan-inc/baichuan-13b-chat',
-          'fireballoon/baichuan-vicuna-chinese-7b',
-          'fireballoon/baichuan-vicuna-7b',
+          'baichuan-inc/baichuan-7b', 'baichuan-inc/baichuan-13b-base', 'baichuan-inc/baichuan-13b-chat', 'fireballoon/baichuan-vicuna-chinese-7b', 'fireballoon/baichuan-vicuna-7b',
           'hiyouga/baichuan-7b-sft'
       ]
   }
diff --git a/openllm-core/src/openllm_core/config/configuration_dolly_v2.py b/openllm-core/src/openllm_core/config/configuration_dolly_v2.py
index dbdf4a9b..cd916101 100644
--- a/openllm-core/src/openllm_core/config/configuration_dolly_v2.py
+++ b/openllm-core/src/openllm_core/config/configuration_dolly_v2.py
@@ -101,7 +101,11 @@ class DollyV2Config(openllm_core.LLMConfig):
                           use_default_prompt_template: bool = True,
                           **attrs: t.Any) -> tuple[str, dict[str, t.Any], dict[str, t.Any]]:
     return process_prompt(prompt, DEFAULT_PROMPT_TEMPLATE, use_default_prompt_template, **attrs), {
-        'max_new_tokens': max_new_tokens, 'top_k': top_k, 'top_p': top_p, 'temperature': temperature, **attrs
+        'max_new_tokens': max_new_tokens,
+        'top_k': top_k,
+        'top_p': top_p,
+        'temperature': temperature,
+        **attrs
     }, {}
 
   def postprocess_generate(self, prompt: str, generation_result: list[dict[t.Literal['generated_text'], str]], **_: t.Any) -> str:
diff --git a/openllm-core/src/openllm_core/config/configuration_falcon.py b/openllm-core/src/openllm_core/config/configuration_falcon.py
index 06c0b2a5..9de349a1 100644
--- a/openllm-core/src/openllm_core/config/configuration_falcon.py
+++ b/openllm-core/src/openllm_core/config/configuration_falcon.py
@@ -49,7 +49,12 @@ class FalconConfig(openllm_core.LLMConfig):
       'default_id': 'tiiuae/falcon-7b',
       'model_ids': ['tiiuae/falcon-7b', 'tiiuae/falcon-40b', 'tiiuae/falcon-7b-instruct', 'tiiuae/falcon-40b-instruct'],
       'fine_tune_strategies': ({
-          'adapter_type': 'lora', 'r': 64, 'lora_alpha': 16, 'lora_dropout': 0.1, 'bias': 'none', 'target_modules': ['query_key_value', 'dense', 'dense_h_to_4h', 'dense_4h_to_h']
+          'adapter_type': 'lora',
+          'r': 64,
+          'lora_alpha': 16,
+          'lora_dropout': 0.1,
+          'bias': 'none',
+          'target_modules': ['query_key_value', 'dense', 'dense_h_to_4h', 'dense_4h_to_h']
       },)
   }
 
@@ -71,7 +76,11 @@ class FalconConfig(openllm_core.LLMConfig):
                           use_default_prompt_template: bool = False,
                           **attrs: t.Any) -> tuple[str, dict[str, t.Any], dict[str, t.Any]]:
     return process_prompt(prompt, DEFAULT_PROMPT_TEMPLATE, use_default_prompt_template, **attrs), {
-        'max_new_tokens': max_new_tokens, 'top_k': top_k, 'num_return_sequences': num_return_sequences, 'eos_token_id': eos_token_id, **attrs
+        'max_new_tokens': max_new_tokens,
+        'top_k': top_k,
+        'num_return_sequences': num_return_sequences,
+        'eos_token_id': eos_token_id,
+        **attrs
     }, {}
 
   def postprocess_generate(self, prompt: str, generation_result: t.Sequence[str], **_: t.Any) -> str:
diff --git a/openllm-core/src/openllm_core/config/configuration_flan_t5.py b/openllm-core/src/openllm_core/config/configuration_flan_t5.py
index 37964fdc..0ede6ad1 100644
--- a/openllm-core/src/openllm_core/config/configuration_flan_t5.py
+++ b/openllm-core/src/openllm_core/config/configuration_flan_t5.py
@@ -67,7 +67,11 @@ class FlanT5Config(openllm_core.LLMConfig):
                           use_default_prompt_template: bool = True,
                           **attrs: t.Any) -> tuple[str, dict[str, t.Any], dict[str, t.Any]]:
     return process_prompt(prompt, DEFAULT_PROMPT_TEMPLATE, use_default_prompt_template, **attrs), {
-        'max_new_tokens': max_new_tokens, 'temperature': temperature, 'top_k': top_k, 'top_p': top_p, 'repetition_penalty': repetition_penalty
+        'max_new_tokens': max_new_tokens,
+        'temperature': temperature,
+        'top_k': top_k,
+        'top_p': top_p,
+        'repetition_penalty': repetition_penalty
     }, {}
 
   def postprocess_generate(self, prompt: str, generation_result: t.Sequence[str], **_: t.Any) -> str:
diff --git a/openllm-core/src/openllm_core/config/configuration_llama.py b/openllm-core/src/openllm_core/config/configuration_llama.py
index 464605e8..66c580dd 100644
--- a/openllm-core/src/openllm_core/config/configuration_llama.py
+++ b/openllm-core/src/openllm_core/config/configuration_llama.py
@@ -74,7 +74,8 @@ class LlamaConfig(openllm_core.LLMConfig):
       'name_type': 'lowercase',
       'url': 'https://github.com/facebookresearch/llama',
       'default_backend': {
-          'cpu': 'pt', 'nvidia.com/gpu': 'pt'
+          'cpu': 'pt',
+          'nvidia.com/gpu': 'pt'
       },
       'architecture': 'LlamaForCausalLM',
       'requirements': ['fairscale', 'sentencepiece', 'scipy'],
@@ -82,21 +83,16 @@ class LlamaConfig(openllm_core.LLMConfig):
       'default_id': 'NousResearch/llama-2-7b-hf',
       'serialisation': 'safetensors',
       'model_ids': [
-          'meta-llama/Llama-2-70b-chat-hf',
-          'meta-llama/Llama-2-13b-chat-hf',
-          'meta-llama/Llama-2-7b-chat-hf',
-          'meta-llama/Llama-2-70b-hf',
-          'meta-llama/Llama-2-13b-hf',
-          'meta-llama/Llama-2-7b-hf',
-          'NousResearch/llama-2-70b-chat-hf',
-          'NousResearch/llama-2-13b-chat-hf',
-          'NousResearch/llama-2-7b-chat-hf',
-          'NousResearch/llama-2-70b-hf',
-          'NousResearch/llama-2-13b-hf',
-          'NousResearch/llama-2-7b-hf',
+          'meta-llama/Llama-2-70b-chat-hf', 'meta-llama/Llama-2-13b-chat-hf', 'meta-llama/Llama-2-7b-chat-hf', 'meta-llama/Llama-2-70b-hf', 'meta-llama/Llama-2-13b-hf',
+          'meta-llama/Llama-2-7b-hf', 'NousResearch/llama-2-70b-chat-hf', 'NousResearch/llama-2-13b-chat-hf', 'NousResearch/llama-2-7b-chat-hf', 'NousResearch/llama-2-70b-hf',
+          'NousResearch/llama-2-13b-hf', 'NousResearch/llama-2-7b-hf',
       ],
       'fine_tune_strategies': ({
-          'adapter_type': 'lora', 'r': 64, 'lora_alpha': 16, 'lora_dropout': 0.1, 'bias': 'none'
+          'adapter_type': 'lora',
+          'r': 64,
+          'lora_alpha': 16,
+          'lora_dropout': 0.1,
+          'bias': 'none'
       },)
   }
 
@@ -124,7 +120,10 @@ class LlamaConfig(openllm_core.LLMConfig):
     if prompt_template is None: prompt_template = DEFAULT_PROMPT_TEMPLATE('v2' if use_llama2_prompt else 'v1')
     elif isinstance(prompt_template, str): prompt_template = PromptTemplate(template=prompt_template)
     return prompt_template.with_options(system_message=system_message).format(instruction=prompt), {
-        'max_new_tokens': max_new_tokens, 'temperature': temperature, 'top_p': top_p, 'top_k': top_k
+        'max_new_tokens': max_new_tokens,
+        'temperature': temperature,
+        'top_p': top_p,
+        'top_k': top_k
     }, {}
 
   def postprocess_generate(self, prompt: str, generation_result: list[str], **_: t.Any) -> str:
diff --git a/openllm-core/src/openllm_core/config/configuration_opt.py b/openllm-core/src/openllm_core/config/configuration_opt.py
index b57f3739..cee9083d 100644
--- a/openllm-core/src/openllm_core/config/configuration_opt.py
+++ b/openllm-core/src/openllm_core/config/configuration_opt.py
@@ -54,7 +54,12 @@ class OPTConfig(openllm_core.LLMConfig):
       'architecture': 'OPTForCausalLM',
       'model_ids': ['facebook/opt-125m', 'facebook/opt-350m', 'facebook/opt-1.3b', 'facebook/opt-2.7b', 'facebook/opt-6.7b', 'facebook/opt-66b'],
       'fine_tune_strategies': ({
-          'adapter_type': 'lora', 'r': 16, 'lora_alpha': 32, 'target_modules': ['q_proj', 'v_proj'], 'lora_dropout': 0.05, 'bias': 'none'
+          'adapter_type': 'lora',
+          'r': 16,
+          'lora_alpha': 32,
+          'target_modules': ['q_proj', 'v_proj'],
+          'lora_dropout': 0.05,
+          'bias': 'none'
       },)
   }
   format_outputs: bool = dantic.Field(False, description='''Whether to format the outputs. This can be used when num_return_sequences > 1.''')
@@ -76,7 +81,10 @@ class OPTConfig(openllm_core.LLMConfig):
                           use_default_prompt_template: bool = False,
                           **attrs: t.Any) -> tuple[str, dict[str, t.Any], dict[str, t.Any]]:
     return process_prompt(prompt, DEFAULT_PROMPT_TEMPLATE, use_default_prompt_template, **attrs), {
-        'max_new_tokens': max_new_tokens, 'temperature': temperature, 'top_k': top_k, 'num_return_sequences': num_return_sequences
+        'max_new_tokens': max_new_tokens,
+        'temperature': temperature,
+        'top_k': top_k,
+        'num_return_sequences': num_return_sequences
     }, {}
 
   def postprocess_generate(self, prompt: str, generation_result: t.Sequence[str], **attrs: t.Any) -> str:
diff --git a/openllm-core/src/openllm_core/utils/__init__.py b/openllm-core/src/openllm_core/utils/__init__.py
index dfe76ea6..092a39fa 100644
--- a/openllm-core/src/openllm_core/utils/__init__.py
+++ b/openllm-core/src/openllm_core/utils/__init__.py
@@ -152,24 +152,32 @@ _LOGGING_CONFIG: dict[str, t.Any] = {
     'filters': {
         'excfilter': {
             '()': 'openllm_core.utils.ExceptionFilter'
-        }, 'infofilter': {
+        },
+        'infofilter': {
             '()': 'openllm_core.utils.InfoFilter'
         }
     },
     'handlers': {
         'bentomlhandler': {
-            'class': 'logging.StreamHandler', 'filters': ['excfilter', 'infofilter'], 'stream': 'ext://sys.stdout'
+            'class': 'logging.StreamHandler',
+            'filters': ['excfilter', 'infofilter'],
+            'stream': 'ext://sys.stdout'
         },
         'defaulthandler': {
-            'class': 'logging.StreamHandler', 'level': logging.WARNING
+            'class': 'logging.StreamHandler',
+            'level': logging.WARNING
         }
     },
     'loggers': {
         'bentoml': {
-            'handlers': ['bentomlhandler', 'defaulthandler'], 'level': logging.INFO, 'propagate': False
+            'handlers': ['bentomlhandler', 'defaulthandler'],
+            'level': logging.INFO,
+            'propagate': False
         },
         'openllm': {
-            'handlers': ['bentomlhandler', 'defaulthandler'], 'level': logging.INFO, 'propagate': False
+            'handlers': ['bentomlhandler', 'defaulthandler'],
+            'level': logging.INFO,
+            'propagate': False
         }
     },
     'root': {
@@ -319,31 +327,10 @@ _import_structure: dict[str, list[str]] = {
     'lazy': [],
     'representation': ['ReprMixin'],
     'import_utils': [
-        'OPTIONAL_DEPENDENCIES',
-        'DummyMetaclass',
-        'EnvVarMixin',
-        'require_backends',
-        'is_cpm_kernels_available',
-        'is_einops_available',
-        'is_flax_available',
-        'is_tf_available',
-        'is_vllm_available',
-        'is_torch_available',
-        'is_bitsandbytes_available',
-        'is_peft_available',
-        'is_datasets_available',
-        'is_jupyter_available',
-        'is_jupytext_available',
-        'is_notebook_available',
-        'is_triton_available',
-        'is_autogptq_available',
-        'is_sentencepiece_available',
-        'is_xformers_available',
-        'is_fairscale_available',
-        'is_grpc_available',
-        'is_grpc_health_available',
-        'is_transformers_available',
-        'is_optimum_supports_gptq',
+        'OPTIONAL_DEPENDENCIES', 'DummyMetaclass', 'EnvVarMixin', 'require_backends', 'is_cpm_kernels_available', 'is_einops_available', 'is_flax_available', 'is_tf_available',
+        'is_vllm_available', 'is_torch_available', 'is_bitsandbytes_available', 'is_peft_available', 'is_datasets_available', 'is_jupyter_available', 'is_jupytext_available',
+        'is_notebook_available', 'is_triton_available', 'is_autogptq_available', 'is_sentencepiece_available', 'is_xformers_available', 'is_fairscale_available', 'is_grpc_available',
+        'is_grpc_health_available', 'is_transformers_available', 'is_optimum_supports_gptq',
     ]
 }
 
diff --git a/openllm-core/src/openllm_core/utils/codegen.py b/openllm-core/src/openllm_core/utils/codegen.py
index 83e7fb1d..aa710184 100644
--- a/openllm-core/src/openllm_core/utils/codegen.py
+++ b/openllm-core/src/openllm_core/utils/codegen.py
@@ -163,8 +163,7 @@ def gen_sdk(func: _T, name: str | None = None, **attrs: t.Any) -> _T:
                               '__doc__': inspect.cleandoc(doc),
                               '__module__': 'openllm'
                           }),
-                          )(func, **attrs),
-          func,
+                          )(func, **attrs), func,
       ))
 
 __all__ = ['gen_sdk', 'make_attr_tuple_class', 'make_env_transformer', 'generate_unique_filename', 'generate_function']
diff --git a/openllm-core/src/openllm_core/utils/dantic.py b/openllm-core/src/openllm_core/utils/dantic.py
index a6fae514..13c5d3a7 100644
--- a/openllm-core/src/openllm_core/utils/dantic.py
+++ b/openllm-core/src/openllm_core/utils/dantic.py
@@ -25,23 +25,8 @@ AnyCallable = t.Callable[..., t.Any]
 FC = t.TypeVar('FC', bound=t.Union[AnyCallable, click.Command])
 
 __all__ = [
-    'FC',
-    'attrs_to_options',
-    'Field',
-    'parse_type',
-    'is_typing',
-    'is_literal',
-    'ModuleType',
-    'EnumChoice',
-    'LiteralChoice',
-    'allows_multiple',
-    'is_mapping',
-    'is_container',
-    'parse_container_args',
-    'parse_single_arg',
-    'CUDA',
-    'JsonType',
-    'BytesType'
+    'FC', 'attrs_to_options', 'Field', 'parse_type', 'is_typing', 'is_literal', 'ModuleType', 'EnumChoice', 'LiteralChoice', 'allows_multiple', 'is_mapping', 'is_container',
+    'parse_container_args', 'parse_single_arg', 'CUDA', 'JsonType', 'BytesType'
 ]
 
 def __dir__() -> list[str]:
diff --git a/openllm-core/src/openllm_core/utils/import_utils.py b/openllm-core/src/openllm_core/utils/import_utils.py
index e0a4aef2..988a2e30 100644
--- a/openllm-core/src/openllm_core/utils/import_utils.py
+++ b/openllm-core/src/openllm_core/utils/import_utils.py
@@ -139,17 +139,8 @@ def is_tf_available() -> bool:
     _tf_version = None
     if USE_TF in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TORCH not in ENV_VARS_TRUE_VALUES:
       if _tf_available:
-        candidates = ('tensorflow',
-                      'tensorflow-cpu',
-                      'tensorflow-gpu',
-                      'tf-nightly',
-                      'tf-nightly-cpu',
-                      'tf-nightly-gpu',
-                      'intel-tensorflow',
-                      'intel-tensorflow-avx512',
-                      'tensorflow-rocm',
-                      'tensorflow-macos',
-                      'tensorflow-aarch64',
+        candidates = ('tensorflow', 'tensorflow-cpu', 'tensorflow-gpu', 'tf-nightly', 'tf-nightly-cpu', 'tf-nightly-gpu', 'intel-tensorflow', 'intel-tensorflow-avx512', 'tensorflow-rocm',
+                      'tensorflow-macos', 'tensorflow-aarch64',
                       )
         _tf_version = None
         # For the metadata, we have to look for both tensorflow and tensorflow-cpu
diff --git a/openllm-python/src/openllm/_embeddings.py b/openllm-python/src/openllm/_embeddings.py
index 84be1890..9f5f3257 100644
--- a/openllm-python/src/openllm/_embeddings.py
+++ b/openllm-python/src/openllm/_embeddings.py
@@ -34,7 +34,8 @@ def get_or_download(ids: str = _BENTOMODEL_ID) -> bentoml.Model:
                                options=ModelOptions(),
                                context=openllm.utils.generate_context(framework_name='transformers'),
                                labels={
-                                   'runtime': 'pt', 'framework': 'openllm'
+                                   'runtime': 'pt',
+                                   'framework': 'openllm'
                                },
                                signatures=model_signatures) as bentomodel:
       snapshot_download(_GENERIC_EMBEDDING_ID,
diff --git a/openllm-python/src/openllm/_llm.py b/openllm-python/src/openllm/_llm.py
index 8dcf8b1a..61952026 100644
--- a/openllm-python/src/openllm/_llm.py
+++ b/openllm-python/src/openllm/_llm.py
@@ -281,22 +281,10 @@ class LLM(LLMInterface[M, T], ReprMixin):
   if t.TYPE_CHECKING: __name__: str
   if t.TYPE_CHECKING and not MYPY:
 
-    def __attrs_init__(self,
-                       config: LLMConfig,
-                       quantization_config: t.Optional[t.Union[transformers.BitsAndBytesConfig, transformers.GPTQConfig]],
-                       quantize: t.Optional[LiteralQuantise],
-                       model_id: str,
-                       model_decls: TupleAny,
-                       model_attrs: DictStrAny,
-                       tokenizer_attrs: DictStrAny,
-                       tag: bentoml.Tag,
-                       adapters_mapping: t.Optional[AdaptersMapping],
-                       model_version: t.Optional[str],
-                       serialisation: LiteralSerialisation,
-                       _local: bool,
-                       prompt_template: PromptTemplate | None,
-                       system_message: str | None,
-                       **attrs: t.Any) -> None:
+    def __attrs_init__(self, config: LLMConfig, quantization_config: t.Optional[t.Union[transformers.BitsAndBytesConfig,
+                                                                                        transformers.GPTQConfig]], quantize: t.Optional[LiteralQuantise], model_id: str, model_decls: TupleAny,
+                       model_attrs: DictStrAny, tokenizer_attrs: DictStrAny, tag: bentoml.Tag, adapters_mapping: t.Optional[AdaptersMapping], model_version: t.Optional[str],
+                       serialisation: LiteralSerialisation, _local: bool, prompt_template: PromptTemplate | None, system_message: str | None, **attrs: t.Any) -> None:
       '''Generated __attrs_init__ for openllm.LLM.'''
 
   config: LLMConfig
@@ -540,20 +528,9 @@ class LLM(LLMInterface[M, T], ReprMixin):
   def generate_tag(cls, *param_decls: t.Any, **attrs: t.Any) -> bentoml.Tag:
     return bentoml.Tag.from_taglike(cls._generate_tag_str(*param_decls, **attrs))
 
-  def __init__(self,
-               *args: t.Any,
-               model_id: str,
-               llm_config: LLMConfig,
-               quantization_config: transformers.BitsAndBytesConfig | transformers.GPTQConfig | None,
-               _quantize: LiteralQuantise | None,
-               _model_version: str,
-               _tag: bentoml.Tag,
-               _serialisation: LiteralSerialisation,
-               _local: bool,
-               _prompt_template: PromptTemplate | None,
-               _system_message: str | None,
-               _adapters_mapping: AdaptersMapping | None,
-               **attrs: t.Any,
+  def __init__(self, *args: t.Any, model_id: str, llm_config: LLMConfig, quantization_config: transformers.BitsAndBytesConfig | transformers.GPTQConfig | None,
+               _quantize: LiteralQuantise | None, _model_version: str, _tag: bentoml.Tag, _serialisation: LiteralSerialisation, _local: bool, _prompt_template: PromptTemplate | None,
+               _system_message: str | None, _adapters_mapping: AdaptersMapping | None, **attrs: t.Any,
                ):
     '''Initialize the LLM with given pretrained model.
 
@@ -651,22 +628,13 @@ class LLM(LLMInterface[M, T], ReprMixin):
     # parsing tokenizer and model kwargs, as the hierachy is param pass > default
     normalized_model_kwds, normalized_tokenizer_kwds = normalize_attrs_to_model_tokenizer_pair(**attrs)
     # NOTE: Save the args and kwargs for latter load
-    self.__attrs_init__(llm_config,
-                        quantization_config,
-                        _quantize,
-                        model_id,
-                        args, {
-                            **model_kwds, **normalized_model_kwds
-                        }, {
-                            **tokenizer_kwds, **normalized_tokenizer_kwds
-                        },
-                        _tag,
-                        _adapters_mapping,
-                        _model_version,
-                        _serialisation,
-                        _local,
-                        _prompt_template,
-                        _system_message)
+    self.__attrs_init__(llm_config, quantization_config, _quantize, model_id, args, {
+        **model_kwds,
+        **normalized_model_kwds
+    }, {
+        **tokenizer_kwds,
+        **normalized_tokenizer_kwds
+    }, _tag, _adapters_mapping, _model_version, _serialisation, _local, _prompt_template, _system_message)
 
     self.llm_post_init()
 
@@ -1306,10 +1274,11 @@ def llm_runnable_class(self: LLM[M, T], embeddings_sig: ModelSignature, generate
           pre = now
       yield ' '.join(output_text[pre:]) + ' '
 
-  return types.new_class(self.__class__.__name__ + 'Runnable', (_Runnable,), {},
-                         lambda ns: ns.update({
-                             'SUPPORTED_RESOURCES': ('nvidia.com/gpu', 'amd.com/gpu', 'cpu'), '__module__': self.__module__, '__doc__': self.config['env'].start_docstring
-                         }))
+  return types.new_class(self.__class__.__name__ + 'Runnable', (_Runnable,), {}, lambda ns: ns.update({
+      'SUPPORTED_RESOURCES': ('nvidia.com/gpu', 'amd.com/gpu', 'cpu'),
+      '__module__': self.__module__,
+      '__doc__': self.config['env'].start_docstring
+  }))
 
 def llm_runner_class(self: LLM[M, T]) -> type[LLMRunner[M, T]]:
   def available_adapters(_: LLMRunner[M, T]) -> PeftAdapterOutput:
diff --git a/openllm-python/src/openllm/_service.py b/openllm-python/src/openllm/_service.py
index 94d04fa9..37e2201b 100644
--- a/openllm-python/src/openllm/_service.py
+++ b/openllm-python/src/openllm/_service.py
@@ -120,8 +120,13 @@ async def completion_v1(input_dict: dict[str, t.Any], ctx: bentoml.Context) -> s
 @svc.api(route='/v1/chat/completions',
          input=bentoml.io.JSON.from_sample(
              openllm.utils.bentoml_cattr.unstructure(
-                 openllm.openai.ChatCompletionRequest(messages=[{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Hello!'}],
-                                                      model=runner.llm_type))),
+                 openllm.openai.ChatCompletionRequest(messages=[{
+                     'role': 'system',
+                     'content': 'You are a helpful assistant.'
+                 }, {
+                     'role': 'user',
+                     'content': 'Hello!'
+                 }], model=runner.llm_type))),
          output=bentoml.io.Text())
 async def chat_completion_v1(input_dict: dict[str, t.Any], ctx: bentoml.Context) -> str | t.AsyncGenerator[str, None]:
   prompt = openllm.openai.messages_to_prompt(input_dict['messages'])
@@ -194,32 +199,10 @@ def metadata_v1(_: str) -> openllm.MetadataOutput:
          input=bentoml.io.JSON.from_sample(['Hey Jude, welcome to the jungle!', 'What is the meaning of life?']),
          output=bentoml.io.JSON.from_sample({
              'embeddings': [
-                 0.007917795330286026,
-                 -0.014421648345887661,
-                 0.00481307040899992,
-                 0.007331526838243008,
-                 -0.0066398633643984795,
-                 0.00945580005645752,
-                 0.0087016262114048,
-                 -0.010709521360695362,
-                 0.012635177001357079,
-                 0.010541186667978764,
-                 -0.00730888033285737,
-                 -0.001783102168701589,
-                 0.02339819073677063,
-                 -0.010825827717781067,
-                 -0.015888236463069916,
-                 0.01876218430697918,
-                 0.0076906150206923485,
-                 0.0009032754460349679,
-                 -0.010024012066423893,
-                 0.01090280432254076,
-                 -0.008668390102684498,
-                 0.02070549875497818,
-                 0.0014594447566196322,
-                 -0.018775740638375282,
-                 -0.014814382418990135,
-                 0.01796768605709076
+                 0.007917795330286026, -0.014421648345887661, 0.00481307040899992, 0.007331526838243008, -0.0066398633643984795, 0.00945580005645752, 0.0087016262114048, -0.010709521360695362,
+                 0.012635177001357079, 0.010541186667978764, -0.00730888033285737, -0.001783102168701589, 0.02339819073677063, -0.010825827717781067, -0.015888236463069916,
+                 0.01876218430697918, 0.0076906150206923485, 0.0009032754460349679, -0.010024012066423893, 0.01090280432254076, -0.008668390102684498, 0.02070549875497818,
+                 0.0014594447566196322, -0.018775740638375282, -0.014814382418990135, 0.01796768605709076
              ],
              'num_tokens': 20
          }))
diff --git a/openllm-python/src/openllm/bundle/_package.py b/openllm-python/src/openllm/bundle/_package.py
index f0172012..ae59052b 100644
--- a/openllm-python/src/openllm/bundle/_package.py
+++ b/openllm-python/src/openllm/bundle/_package.py
@@ -87,15 +87,7 @@ def construct_python_options(llm: openllm.LLM[t.Any, t.Any], llm_fs: FS, extra_d
   elif backend_envvar == 'tf':
     if not openllm_core.utils.is_tf_available():
       raise ValueError(f"TensorFlow is not available, while {env.backend} is set to 'tf'")
-    candidates = ('tensorflow',
-                  'tensorflow-cpu',
-                  'tensorflow-gpu',
-                  'tf-nightly',
-                  'tf-nightly-cpu',
-                  'tf-nightly-gpu',
-                  'intel-tensorflow',
-                  'intel-tensorflow-avx512',
-                  'tensorflow-rocm',
+    candidates = ('tensorflow', 'tensorflow-cpu', 'tensorflow-gpu', 'tf-nightly', 'tf-nightly-cpu', 'tf-nightly-gpu', 'intel-tensorflow', 'intel-tensorflow-avx512', 'tensorflow-rocm',
                   'tensorflow-macos',
                   )
     # For the metadata, we have to look for both tensorflow and tensorflow-cpu
@@ -123,14 +115,8 @@ def construct_python_options(llm: openllm.LLM[t.Any, t.Any], llm_fs: FS, extra_d
                        lock_packages=False,
                        extra_index_url=['https://download.pytorch.org/whl/cu118', 'https://huggingface.github.io/autogptq-index/whl/cu118/'])
 
-def construct_docker_options(llm: openllm.LLM[t.Any, t.Any],
-                             _: FS,
-                             workers_per_resource: float,
-                             quantize: LiteralString | None,
-                             adapter_map: dict[str, str | None] | None,
-                             dockerfile_template: str | None,
-                             serialisation: LiteralSerialisation,
-                             container_registry: LiteralContainerRegistry,
+def construct_docker_options(llm: openllm.LLM[t.Any, t.Any], _: FS, workers_per_resource: float, quantize: LiteralString | None, adapter_map: dict[str, str | None] | None,
+                             dockerfile_template: str | None, serialisation: LiteralSerialisation, container_registry: LiteralContainerRegistry,
                              container_version_strategy: LiteralContainerVersionStrategy) -> DockerOptions:
   from openllm.cli._factory import parse_config_options
   environ = parse_config_options(llm.config, llm.config['timeout'], workers_per_resource, None, True, os.environ.copy())
@@ -217,7 +203,11 @@ def create_bento(bento_tag: bentoml.Tag,
   _serialisation: LiteralSerialisation = openllm_core.utils.first_not_none(serialisation, default=llm.config['serialisation'])
   labels = dict(llm.identifying_params)
   labels.update({
-      '_type': llm.llm_type, '_framework': llm.config['env']['backend_value'], 'start_name': llm.config['start_name'], 'base_name_or_path': llm.model_id, 'bundler': 'openllm.bundle'
+      '_type': llm.llm_type,
+      '_framework': llm.config['env']['backend_value'],
+      'start_name': llm.config['start_name'],
+      'base_name_or_path': llm.model_id,
+      'bundler': 'openllm.bundle'
   })
   if adapter_map: labels.update(adapter_map)
   if isinstance(workers_per_resource, str):
@@ -244,14 +234,7 @@ def create_bento(bento_tag: bentoml.Tag,
                                   exclude=['/venv', '/.venv', '__pycache__/', '*.py[cod]', '*$py.class'],
                                   python=construct_python_options(llm, llm_fs, extra_dependencies, adapter_map),
                                   models=[llm_spec],
-                                  docker=construct_docker_options(llm,
-                                                                  llm_fs,
-                                                                  workers_per_resource,
-                                                                  quantize,
-                                                                  adapter_map,
-                                                                  dockerfile_template,
-                                                                  _serialisation,
-                                                                  container_registry,
+                                  docker=construct_docker_options(llm, llm_fs, workers_per_resource, quantize, adapter_map, dockerfile_template, _serialisation, container_registry,
                                                                   container_version_strategy))
 
   bento = bentoml.Bento.create(build_config=build_config, version=bento_tag.version, build_ctx=llm_fs.getsyspath('/'))
diff --git a/openllm-python/src/openllm/cli/_factory.py b/openllm-python/src/openllm/cli/_factory.py
index 2ff736ca..cbd57584 100644
--- a/openllm-python/src/openllm/cli/_factory.py
+++ b/openllm-python/src/openllm/cli/_factory.py
@@ -54,9 +54,7 @@ def parse_config_options(config: LLMConfig, server_timeout: int, workers_per_res
   # TODO: Support amd.com/gpu on k8s
   _bentoml_config_options_env = environ.pop('BENTOML_CONFIG_OPTIONS', '')
   _bentoml_config_options_opts = [
-      'tracing.sample_rate=1.0',
-      f'api_server.traffic.timeout={server_timeout}',
-      f'runners."llm-{config["start_name"]}-runner".traffic.timeout={config["timeout"]}',
+      'tracing.sample_rate=1.0', f'api_server.traffic.timeout={server_timeout}', f'runners."llm-{config["start_name"]}-runner".traffic.timeout={config["timeout"]}',
       f'runners."llm-{config["start_name"]}-runner".workers_per_resource={workers_per_resource}'
   ]
   if device:
@@ -118,22 +116,9 @@ Available official model_id(s): [default: {llm_config['default_id']}]
   @group.command(**command_attrs)
   @start_decorator(llm_config, serve_grpc=_serve_grpc)
   @click.pass_context
-  def start_cmd(ctx: click.Context,
-                /,
-                server_timeout: int,
-                model_id: str | None,
-                model_version: str | None,
-                system_message: str | None,
-                prompt_template_file: t.IO[t.Any] | None,
-                workers_per_resource: t.Literal['conserved', 'round_robin'] | LiteralString,
-                device: t.Tuple[str, ...],
-                quantize: LiteralQuantise | None,
-                backend: LiteralBackend,
-                serialisation: LiteralSerialisation | None,
-                cors: bool,
-                adapter_id: str | None,
-                return_process: bool,
-                **attrs: t.Any,
+  def start_cmd(ctx: click.Context, /, server_timeout: int, model_id: str | None, model_version: str | None, system_message: str | None, prompt_template_file: t.IO[t.Any] | None,
+                workers_per_resource: t.Literal['conserved', 'round_robin'] | LiteralString, device: t.Tuple[str, ...], quantize: LiteralQuantise | None, backend: LiteralBackend,
+                serialisation: LiteralSerialisation | None, cors: bool, adapter_id: str | None, return_process: bool, **attrs: t.Any,
                 ) -> LLMConfig | subprocess.Popen[bytes]:
     _serialisation = openllm_core.utils.first_not_none(serialisation, default=llm_config['serialisation'])
     if _serialisation == 'safetensors' and quantize is not None and openllm_core.utils.check_bool_env('OPENLLM_SERIALIZATION_WARNING'):
@@ -235,16 +220,10 @@ Available official model_id(s): [default: {llm_config['default_id']}]
 def start_decorator(llm_config: LLMConfig, serve_grpc: bool = False) -> t.Callable[[FC], t.Callable[[FC], FC]]:
   def wrapper(fn: FC) -> t.Callable[[FC], FC]:
     composed = openllm.utils.compose(
-        llm_config.to_click_options,
-        _http_server_args if not serve_grpc else _grpc_server_args,
-        cog.optgroup.group('General LLM Options', help=f"The following options are related to running '{llm_config['start_name']}' LLM Server."),
-        model_id_option(factory=cog.optgroup),
-        model_version_option(factory=cog.optgroup),
-        system_message_option(factory=cog.optgroup),
-        prompt_template_file_option(factory=cog.optgroup),
-        cog.optgroup.option('--server-timeout', type=int, default=None, help='Server timeout in seconds'),
-        workers_per_resource_option(factory=cog.optgroup),
-        cors_option(factory=cog.optgroup),
+        llm_config.to_click_options, _http_server_args if not serve_grpc else _grpc_server_args,
+        cog.optgroup.group('General LLM Options', help=f"The following options are related to running '{llm_config['start_name']}' LLM Server."), model_id_option(factory=cog.optgroup),
+        model_version_option(factory=cog.optgroup), system_message_option(factory=cog.optgroup), prompt_template_file_option(factory=cog.optgroup),
+        cog.optgroup.option('--server-timeout', type=int, default=None, help='Server timeout in seconds'), workers_per_resource_option(factory=cog.optgroup), cors_option(factory=cog.optgroup),
         backend_option(factory=cog.optgroup),
         cog.optgroup.group('LLM Optimization Options',
                            help='''Optimization related options.
@@ -255,9 +234,7 @@ def start_decorator(llm_config: LLMConfig, serve_grpc: bool = False) -> t.Callab
 
             - DeepSpeed Inference: [link](https://www.deepspeed.ai/inference/)
             - GGML: Fast inference on [bare metal](https://github.com/ggerganov/ggml)
-            '''),
-        quantize_option(factory=cog.optgroup),
-        serialisation_option(factory=cog.optgroup),
+            '''), quantize_option(factory=cog.optgroup), serialisation_option(factory=cog.optgroup),
         cog.optgroup.option('--device',
                             type=openllm.utils.dantic.CUDA,
                             multiple=True,
@@ -286,8 +263,8 @@ def start_decorator(llm_config: LLMConfig, serve_grpc: bool = False) -> t.Callab
                             help='Optional name or path for given LoRA adapter' + f" to wrap '{llm_config['model_name']}'",
                             multiple=True,
                             callback=_id_callback,
-                            metavar='[PATH | [remote/][adapter_name:]adapter_id][, ...]'),
-        click.option('--return-process', is_flag=True, default=False, help='Internal use only.', hidden=True),
+                            metavar='[PATH | [remote/][adapter_name:]adapter_id][, ...]'), click.option('--return-process', is_flag=True, default=False, help='Internal use only.',
+                                                                                                        hidden=True),
     )
     return composed(fn)
 
diff --git a/openllm-python/src/openllm/cli/extension/list_bentos.py b/openllm-python/src/openllm/cli/extension/list_bentos.py
index 3b13f38b..90004c6f 100644
--- a/openllm-python/src/openllm/cli/extension/list_bentos.py
+++ b/openllm-python/src/openllm/cli/extension/list_bentos.py
@@ -22,7 +22,8 @@ def cli(ctx: click.Context, output: LiteralOutput) -> None:
           'tag': str(b.tag),
           'size': human_readable_size(openllm.utils.calc_dir_size(b.path)),
           'models': [{
-              'tag': str(m.tag), 'size': human_readable_size(openllm.utils.calc_dir_size(m.path))
+              'tag': str(m.tag),
+              'size': human_readable_size(openllm.utils.calc_dir_size(m.path))
           } for m in (bentoml.models.get(_.tag) for _ in b.info.models)]
       } for b in tuple(i for i in bentoml.list() if all(
           k in i.info.labels for k in {'start_name', 'bundler'})) if b.info.labels['start_name'] == k] for k in tuple(inflection.dasherize(key) for key in openllm.CONFIG_MAPPING.keys())
diff --git a/openllm-python/src/openllm/models/auto/__init__.py b/openllm-python/src/openllm/models/auto/__init__.py
index bf61359b..1b0f9a1c 100644
--- a/openllm-python/src/openllm/models/auto/__init__.py
+++ b/openllm-python/src/openllm/models/auto/__init__.py
@@ -57,7 +57,9 @@ __lazy = LazyModule(__name__,
                     os.path.abspath('__file__'),
                     _import_structure,
                     extra_objects={
-                        'CONFIG_MAPPING': CONFIG_MAPPING, 'CONFIG_MAPPING_NAMES': CONFIG_MAPPING_NAMES, 'AutoConfig': AutoConfig,
+                        'CONFIG_MAPPING': CONFIG_MAPPING,
+                        'CONFIG_MAPPING_NAMES': CONFIG_MAPPING_NAMES,
+                        'AutoConfig': AutoConfig,
                     })
 __all__ = __lazy.__all__
 __dir__ = __lazy.__dir__
diff --git a/openllm-python/src/openllm/models/auto/factory.py b/openllm-python/src/openllm/models/auto/factory.py
index d309f423..9b083954 100644
--- a/openllm-python/src/openllm/models/auto/factory.py
+++ b/openllm-python/src/openllm/models/auto/factory.py
@@ -160,10 +160,9 @@ class _LazyAutoMapping(OrderedDict, ReprMixin):
                   [self._load_attr_from_module(key, name) for key, name in self._model_mapping.items() if key in self._config_mapping.keys()] + list(self._extra_content.values()))
 
   def items(self) -> ConfigModelItemsView:
-    return t.cast('ConfigModelItemsView',
-                  [(self._load_attr_from_module(key, self._config_mapping[key]), self._load_attr_from_module(key, self._model_mapping[key]))
-                   for key in self._model_mapping.keys()
-                   if key in self._config_mapping.keys()] + list(self._extra_content.items()))
+    return t.cast('ConfigModelItemsView', [(self._load_attr_from_module(key, self._config_mapping[key]), self._load_attr_from_module(key, self._model_mapping[key]))
+                                           for key in self._model_mapping.keys()
+                                           if key in self._config_mapping.keys()] + list(self._extra_content.items()))
 
   def __iter__(self) -> t.Iterator[type[openllm.LLMConfig]]:
     return iter(t.cast('SupportsIter[t.Iterator[type[openllm.LLMConfig]]]', self.keys()))
diff --git a/openllm-python/src/openllm/models/chatglm/__init__.py b/openllm-python/src/openllm/models/chatglm/__init__.py
index 9dcafb02..612083f6 100644
--- a/openllm-python/src/openllm/models/chatglm/__init__.py
+++ b/openllm-python/src/openllm/models/chatglm/__init__.py
@@ -23,5 +23,7 @@ sys.modules[__name__] = LazyModule(__name__,
                                    globals()['__file__'],
                                    _import_structure,
                                    extra_objects={
-                                       'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE, 'START_CHATGLM_COMMAND_DOCSTRING': START_CHATGLM_COMMAND_DOCSTRING, 'ChatGLMConfig': ChatGLMConfig
+                                       'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE,
+                                       'START_CHATGLM_COMMAND_DOCSTRING': START_CHATGLM_COMMAND_DOCSTRING,
+                                       'ChatGLMConfig': ChatGLMConfig
                                    })
diff --git a/openllm-python/src/openllm/models/dolly_v2/__init__.py b/openllm-python/src/openllm/models/dolly_v2/__init__.py
index c48243d3..5cdf2e97 100644
--- a/openllm-python/src/openllm/models/dolly_v2/__init__.py
+++ b/openllm-python/src/openllm/models/dolly_v2/__init__.py
@@ -30,5 +30,7 @@ sys.modules[__name__] = LazyModule(__name__,
                                    globals()['__file__'],
                                    _import_structure,
                                    extra_objects={
-                                       'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE, 'START_DOLLY_V2_COMMAND_DOCSTRING': START_DOLLY_V2_COMMAND_DOCSTRING, 'DollyV2Config': DollyV2Config
+                                       'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE,
+                                       'START_DOLLY_V2_COMMAND_DOCSTRING': START_DOLLY_V2_COMMAND_DOCSTRING,
+                                       'DollyV2Config': DollyV2Config
                                    })
diff --git a/openllm-python/src/openllm/models/falcon/__init__.py b/openllm-python/src/openllm/models/falcon/__init__.py
index d553c4a7..909447b6 100644
--- a/openllm-python/src/openllm/models/falcon/__init__.py
+++ b/openllm-python/src/openllm/models/falcon/__init__.py
@@ -30,5 +30,7 @@ sys.modules[__name__] = LazyModule(__name__,
                                    globals()['__file__'],
                                    _import_structure,
                                    extra_objects={
-                                       'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE, 'START_FALCON_COMMAND_DOCSTRING': START_FALCON_COMMAND_DOCSTRING, 'FalconConfig': FalconConfig
+                                       'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE,
+                                       'START_FALCON_COMMAND_DOCSTRING': START_FALCON_COMMAND_DOCSTRING,
+                                       'FalconConfig': FalconConfig
                                    })
diff --git a/openllm-python/src/openllm/models/gpt_neox/__init__.py b/openllm-python/src/openllm/models/gpt_neox/__init__.py
index 437645e1..1aef40fe 100644
--- a/openllm-python/src/openllm/models/gpt_neox/__init__.py
+++ b/openllm-python/src/openllm/models/gpt_neox/__init__.py
@@ -30,5 +30,7 @@ sys.modules[__name__] = LazyModule(__name__,
                                    globals()['__file__'],
                                    _import_structure,
                                    extra_objects={
-                                       'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE, 'START_GPT_NEOX_COMMAND_DOCSTRING': START_GPT_NEOX_COMMAND_DOCSTRING, 'GPTNeoXConfig': GPTNeoXConfig
+                                       'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE,
+                                       'START_GPT_NEOX_COMMAND_DOCSTRING': START_GPT_NEOX_COMMAND_DOCSTRING,
+                                       'GPTNeoXConfig': GPTNeoXConfig
                                    })
diff --git a/openllm-python/src/openllm/models/opt/__init__.py b/openllm-python/src/openllm/models/opt/__init__.py
index 2f818789..0ae7ec97 100644
--- a/openllm-python/src/openllm/models/opt/__init__.py
+++ b/openllm-python/src/openllm/models/opt/__init__.py
@@ -46,5 +46,7 @@ sys.modules[__name__] = LazyModule(__name__,
                                    globals()['__file__'],
                                    _import_structure,
                                    extra_objects={
-                                       'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE, 'START_OPT_COMMAND_DOCSTRING': START_OPT_COMMAND_DOCSTRING, 'OPTConfig': OPTConfig,
+                                       'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE,
+                                       'START_OPT_COMMAND_DOCSTRING': START_OPT_COMMAND_DOCSTRING,
+                                       'OPTConfig': OPTConfig,
                                    })
diff --git a/openllm-python/src/openllm/models/opt/modeling_flax_opt.py b/openllm-python/src/openllm/models/opt/modeling_flax_opt.py
index 7b2bc981..934971e8 100644
--- a/openllm-python/src/openllm/models/opt/modeling_flax_opt.py
+++ b/openllm-python/src/openllm/models/opt/modeling_flax_opt.py
@@ -33,7 +33,11 @@ class FlaxOPT(openllm.LLM['transformers.TFOPTForCausalLM', 'transformers.GPT2Tok
                           use_default_prompt_template: bool = False,
                           **attrs: t.Any) -> tuple[str, dict[str, t.Any], dict[str, t.Any]]:
     return process_prompt(prompt, DEFAULT_PROMPT_TEMPLATE, use_default_prompt_template, **attrs), {
-        'max_new_tokens': max_new_tokens, 'temperature': temperature, 'top_k': top_k, 'num_return_sequences': num_return_sequences, 'repetition_penalty': repetition_penalty
+        'max_new_tokens': max_new_tokens,
+        'temperature': temperature,
+        'top_k': top_k,
+        'num_return_sequences': num_return_sequences,
+        'repetition_penalty': repetition_penalty
     }, {}
 
   def generate(self, prompt: str, **attrs: t.Any) -> list[str]:
diff --git a/openllm-python/src/openllm/models/opt/modeling_vllm_opt.py b/openllm-python/src/openllm/models/opt/modeling_vllm_opt.py
index 6c677ac3..04408893 100644
--- a/openllm-python/src/openllm/models/opt/modeling_vllm_opt.py
+++ b/openllm-python/src/openllm/models/opt/modeling_vllm_opt.py
@@ -19,5 +19,8 @@ class VLLMOPT(openllm.LLM['vllm.LLMEngine', 'transformers.GPT2Tokenizer']):
                           use_default_prompt_template: bool = True,
                           **attrs: t.Any) -> tuple[str, dict[str, t.Any], dict[str, t.Any]]:
     return process_prompt(prompt, DEFAULT_PROMPT_TEMPLATE, use_default_prompt_template, **attrs), {
-        'max_new_tokens': max_new_tokens, 'temperature': temperature, 'top_k': top_k, 'num_return_sequences': num_return_sequences
+        'max_new_tokens': max_new_tokens,
+        'temperature': temperature,
+        'top_k': top_k,
+        'num_return_sequences': num_return_sequences
     }, {}
diff --git a/openllm-python/tests/configuration_test.py b/openllm-python/tests/configuration_test.py
index a0262a6e..1d913c3d 100644
--- a/openllm-python/tests/configuration_test.py
+++ b/openllm-python/tests/configuration_test.py
@@ -34,11 +34,13 @@ def test_missing_default():
     make_llm_config('MissingArchitecture', {'default_id': 'huggingface/t5-tiny-testing', 'model_ids': ['huggingface/t5-tiny-testing'], 'requirements': ['bentoml'],},)
 
 def test_forbidden_access():
-  cl_ = make_llm_config(
-      'ForbiddenAccess', {
-          'default_id': 'huggingface/t5-tiny-testing', 'model_ids': ['huggingface/t5-tiny-testing', 'bentoml/t5-tiny-testing'], 'architecture': 'PreTrainedModel', 'requirements': ['bentoml'],
-      },
-  )
+  cl_ = make_llm_config('ForbiddenAccess', {
+      'default_id': 'huggingface/t5-tiny-testing',
+      'model_ids': ['huggingface/t5-tiny-testing', 'bentoml/t5-tiny-testing'],
+      'architecture': 'PreTrainedModel',
+      'requirements': ['bentoml'],
+  },
+                        )
 
   assert pytest.raises(openllm.exceptions.ForbiddenAttributeError, cl_.__getattribute__, cl_(), '__config__',)
   assert pytest.raises(openllm.exceptions.ForbiddenAttributeError, cl_.__getattribute__, cl_(), 'GenerationConfig',)
@@ -128,7 +130,9 @@ def test_struct_envvar_with_overwrite_provided_env(monkeypatch: pytest.MonkeyPat
     mk.setenv(field_env_key('field1'), str(4.0))
     mk.setenv(field_env_key('temperature', suffix='generation'), str(0.2))
     sent = make_llm_config('OverwriteWithEnvAvailable', {
-        'default_id': 'asdfasdf', 'model_ids': ['asdf', 'asdfasdfads'], 'architecture': 'PreTrainedModel'
+        'default_id': 'asdfasdf',
+        'model_ids': ['asdf', 'asdfasdfads'],
+        'architecture': 'PreTrainedModel'
     },
                            fields=(('field1', 'float', 3.0),),
                            ).model_construct_env(field1=20.0, temperature=0.4)
diff --git a/openllm-python/tests/models/conftest.py b/openllm-python/tests/models/conftest.py
index 24b29eb6..0e2a42ce 100644
--- a/openllm-python/tests/models/conftest.py
+++ b/openllm-python/tests/models/conftest.py
@@ -196,7 +196,8 @@ def _container_handle(model: str, model_id: str, image_tag: str, deployment_mode
                                     detach=True,
                                     device_requests=devs,
                                     ports={
-                                        '3000/tcp': port, '3001/tcp': prom_port
+                                        '3000/tcp': port,
+                                        '3001/tcp': prom_port
                                     },
                                     )
 
diff --git a/pyproject.toml b/pyproject.toml
index 8a6b57f7..2db7a2b0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -273,17 +273,6 @@ ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT = true
 ALLOW_MULTILINE_DICTIONARY_KEYS = false
 ALLOW_SPLIT_BEFORE_DICT_VALUE = false
 COALESCE_BRACKETS = true
-NO_SPACES_AROUND_SELECTED_BINARY_OPERATORS = true
-SPACES_AROUND_SUBSCRIPT_COLON = false
-SPACES_AROUND_DICT_DELIMITERS = false
-SPACES_AROUND_LIST_DELIMITERS = false
-SPACES_AROUND_POWER_OPERATOR = false
-SPACES_AROUND_TUPLE_DELIMITERS = false
-SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET = false
-SPACE_INSIDE_BRACKETS = false
-SPLIT_ALL_COMMA_SEPARATED_VALUES = false
-SPLIT_ALL_TOP_LEVEL_COMMA_SEPARATED_VALUES = true
-SPLIT_BEFORE_DOT = true
 
 [tool.pytest.ini_options]
 addopts = ["-rfEX", "-pno:warnings", "--snapshot-warn-unused"]
diff --git a/tools/dependencies.py b/tools/dependencies.py
index 37d6f873..c7b25751 100755
--- a/tools/dependencies.py
+++ b/tools/dependencies.py
@@ -168,8 +168,7 @@ def create_classifiers() -> Array:
       Classifier.create_classifier('audience', 'Developers'),
       Classifier.create_classifier('audience', 'Science/Research'),
       Classifier.create_classifier('audience', 'System Administrators'),
-      Classifier.create_classifier('typing', 'Typed'),
-      *Classifier.create_python_classifier(),
+      Classifier.create_classifier('typing', 'Typed'), *Classifier.create_python_classifier(),
   ])
   return arr.multiline(True)
 
@@ -216,23 +215,8 @@ def authors() -> Array:
 def keywords() -> Array:
   arr = correct_style(tomlkit.array())
   arr.extend([
-      'MLOps',
-      'AI',
-      'BentoML',
-      'Model Serving',
-      'Model Deployment',
-      'LLMOps',
-      'Falcon',
-      'Vicuna',
-      'Llama 2',
-      'Fine tuning',
-      'Serverless',
-      'Large Language Model',
-      'Generative AI',
-      'StableLM',
-      'Alpaca',
-      'PyTorch',
-      'Transformers'
+      'MLOps', 'AI', 'BentoML', 'Model Serving', 'Model Deployment', 'LLMOps', 'Falcon', 'Vicuna', 'Llama 2', 'Fine tuning', 'Serverless', 'Large Language Model', 'Generative AI', 'StableLM',
+      'Alpaca', 'PyTorch', 'Transformers'
   ])
   return arr.multiline(True)
 
diff --git a/tools/update-brew-tap.py b/tools/update-brew-tap.py
index de11e05b..892500d6 100755
--- a/tools/update-brew-tap.py
+++ b/tools/update-brew-tap.py
@@ -16,7 +16,9 @@ _OWNER = 'bentoml'
 _REPO = 'openllm'
 
 _gz_strategies: dict[t.Literal['macos_arm', 'macos_intel', 'linux_intel'], str] = {
-    'macos_arm': 'aarch64-apple-darwin', 'macos_intel': 'x86_64-apple-darwin', 'linux_intel': 'x86_64-unknown-linux-musl'
+    'macos_arm': 'aarch64-apple-darwin',
+    'macos_intel': 'x86_64-apple-darwin',
+    'linux_intel': 'x86_64-unknown-linux-musl'
 }
 
 def determine_release_url(svn_url: str, tag: str, target: t.Literal['macos_arm', 'macos_intel', 'linux_intel', 'archive']) -> str:
diff --git a/tools/update-config-stubs.py b/tools/update-config-stubs.py
index 616823f9..b97da4ed 100755
--- a/tools/update-config-stubs.py
+++ b/tools/update-config-stubs.py
@@ -103,11 +103,8 @@ def main() -> int:
   lines.append(' ' * 2 + '# NOTE: generation_class, sampling_class and extras arguments\n')
   lines.extend([
       ' ' * 2 + line for line in [
-          '@overload\n',
-          "def __getitem__(self, item: t.Literal['generation_class']) -> t.Type[openllm_core.GenerationConfig]: ...\n",
-          '@overload\n',
-          "def __getitem__(self, item: t.Literal['sampling_class']) -> t.Type[openllm_core.SamplingParams]: ...\n",
-          '@overload\n',
+          '@overload\n', "def __getitem__(self, item: t.Literal['generation_class']) -> t.Type[openllm_core.GenerationConfig]: ...\n", '@overload\n',
+          "def __getitem__(self, item: t.Literal['sampling_class']) -> t.Type[openllm_core.SamplingParams]: ...\n", '@overload\n',
           "def __getitem__(self, item: t.Literal['extras']) -> t.Dict[str, t.Any]: ...\n",
       ]
   ])
diff --git a/tools/update-dummy.py b/tools/update-dummy.py
index 28fbdb99..e1a405d3 100755
--- a/tools/update-dummy.py
+++ b/tools/update-dummy.py
@@ -33,18 +33,14 @@ def make_class_stub(model_name: str, backend: LiteralBackend, indentation: int =
   if auto: cl_ = _auto[backend]
   else: cl_ = get_mapping(backend)[model_name]
   lines = [
-      f'class {cl_}(metaclass=_DummyMetaclass):',
-      ' ' * indentation + f"_backends=[{','.join(_dep_list)}]",
+      f'class {cl_}(metaclass=_DummyMetaclass):', ' ' * indentation + f"_backends=[{','.join(_dep_list)}]",
       ' ' * indentation + f"def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,[{','.join(_dep_list)}])"
   ]
   return lines
 
 def write_stub(backend: LiteralBackend, _path: str) -> list[str]:
   base = [
-      f'# This file is generated by {_path}. DO NOT EDIT MANUALLY!',
-      f'# To update this, run ./{_path}',
-      'from __future__ import annotations',
-      'import typing as _t',
+      f'# This file is generated by {_path}. DO NOT EDIT MANUALLY!', f'# To update this, run ./{_path}', 'from __future__ import annotations', 'import typing as _t',
       'from openllm_core.utils import DummyMetaclass as _DummyMetaclass, require_backends as _require_backends',
   ]
   base.extend([v for it in [make_class_stub(k, backend) for k in get_mapping(backend)] for v in it])
diff --git a/tools/update-models-import.py b/tools/update-models-import.py
index 709b6f5c..dcc0426b 100755
--- a/tools/update-models-import.py
+++ b/tools/update-models-import.py
@@ -12,23 +12,16 @@ def create_module_import() -> str:
 def create_stubs_import() -> list[str]:
   return [
       'if t.TYPE_CHECKING:from . import ' + ','.join([f'{p.name} as {p.name}' for p in sorted(_TARGET_FILE.parent.glob('*/')) if p.name not in {'__pycache__', '__init__.py', '.DS_Store'}]),
-      '__lazy=LazyModule(__name__, globals()["__file__"], {k: [] for k in _MODELS})',
-      '__all__=__lazy.__all__',
-      '__dir__=__lazy.__dir__',
-      '__getattr__=__lazy.__getattr__\n'
+      '__lazy=LazyModule(__name__, globals()["__file__"], {k: [] for k in _MODELS})', '__all__=__lazy.__all__', '__dir__=__lazy.__dir__', '__getattr__=__lazy.__getattr__\n'
   ]
 
 def main() -> int:
   _path = os.path.join(os.path.basename(os.path.dirname(__file__)), os.path.basename(__file__))
   with _TARGET_FILE.open('w') as f:
     f.writelines('\n'.join([
-        f'# This file is generated by {_path}. DO NOT EDIT MANUALLY!',
-        f'# To update this, run ./{_path}',
-        'from __future__ import annotations',
-        'import typing as t',
+        f'# This file is generated by {_path}. DO NOT EDIT MANUALLY!', f'# To update this, run ./{_path}', 'from __future__ import annotations', 'import typing as t',
         'from openllm_core.utils import LazyModule',
-        create_module_import(),
-        *create_stubs_import(),
+        create_module_import(), *create_stubs_import(),
     ]))
   return 0