fix(style): remove weird break on split item

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
2026-05-19 14:16:22 -04:00 · 2023-10-07 02:21:31 -04:00
parent c6e95de9b4
commit 625b82a0fc
38 changed files with 243 additions and 474 deletions
--- a/openllm-python/src/openllm/_embeddings.py
+++ b/openllm-python/src/openllm/_embeddings.py
@@ -34,7 +34,8 @@ def get_or_download(ids: str = _BENTOMODEL_ID) -> bentoml.Model:
                               options=ModelOptions(),
                               context=openllm.utils.generate_context(framework_name='transformers'),
                               labels={
-                                   'runtime': 'pt', 'framework': 'openllm'
+                                   'runtime': 'pt',
+                                   'framework': 'openllm'
                               },
                               signatures=model_signatures) as bentomodel:
      snapshot_download(_GENERIC_EMBEDDING_ID,
--- a/openllm-python/src/openllm/_llm.py
+++ b/openllm-python/src/openllm/_llm.py
@@ -281,22 +281,10 @@ class LLM(LLMInterface[M, T], ReprMixin):
  if t.TYPE_CHECKING: __name__: str
  if t.TYPE_CHECKING and not MYPY:

-    def __attrs_init__(self,
-                       config: LLMConfig,
-                       quantization_config: t.Optional[t.Union[transformers.BitsAndBytesConfig, transformers.GPTQConfig]],
-                       quantize: t.Optional[LiteralQuantise],
-                       model_id: str,
-                       model_decls: TupleAny,
-                       model_attrs: DictStrAny,
-                       tokenizer_attrs: DictStrAny,
-                       tag: bentoml.Tag,
-                       adapters_mapping: t.Optional[AdaptersMapping],
-                       model_version: t.Optional[str],
-                       serialisation: LiteralSerialisation,
-                       _local: bool,
-                       prompt_template: PromptTemplate | None,
-                       system_message: str | None,
-                       **attrs: t.Any) -> None:
+    def __attrs_init__(self, config: LLMConfig, quantization_config: t.Optional[t.Union[transformers.BitsAndBytesConfig,
+                                                                                        transformers.GPTQConfig]], quantize: t.Optional[LiteralQuantise], model_id: str, model_decls: TupleAny,
+                       model_attrs: DictStrAny, tokenizer_attrs: DictStrAny, tag: bentoml.Tag, adapters_mapping: t.Optional[AdaptersMapping], model_version: t.Optional[str],
+                       serialisation: LiteralSerialisation, _local: bool, prompt_template: PromptTemplate | None, system_message: str | None, **attrs: t.Any) -> None:
      '''Generated __attrs_init__ for openllm.LLM.'''

  config: LLMConfig
@@ -540,20 +528,9 @@ class LLM(LLMInterface[M, T], ReprMixin):
  def generate_tag(cls, *param_decls: t.Any, **attrs: t.Any) -> bentoml.Tag:
    return bentoml.Tag.from_taglike(cls._generate_tag_str(*param_decls, **attrs))

-  def __init__(self,
-               *args: t.Any,
-               model_id: str,
-               llm_config: LLMConfig,
-               quantization_config: transformers.BitsAndBytesConfig | transformers.GPTQConfig | None,
-               _quantize: LiteralQuantise | None,
-               _model_version: str,
-               _tag: bentoml.Tag,
-               _serialisation: LiteralSerialisation,
-               _local: bool,
-               _prompt_template: PromptTemplate | None,
-               _system_message: str | None,
-               _adapters_mapping: AdaptersMapping | None,
-               **attrs: t.Any,
+  def __init__(self, *args: t.Any, model_id: str, llm_config: LLMConfig, quantization_config: transformers.BitsAndBytesConfig | transformers.GPTQConfig | None,
+               _quantize: LiteralQuantise | None, _model_version: str, _tag: bentoml.Tag, _serialisation: LiteralSerialisation, _local: bool, _prompt_template: PromptTemplate | None,
+               _system_message: str | None, _adapters_mapping: AdaptersMapping | None, **attrs: t.Any,
               ):
    '''Initialize the LLM with given pretrained model.

@@ -651,22 +628,13 @@ class LLM(LLMInterface[M, T], ReprMixin):
    # parsing tokenizer and model kwargs, as the hierachy is param pass > default
    normalized_model_kwds, normalized_tokenizer_kwds = normalize_attrs_to_model_tokenizer_pair(**attrs)
    # NOTE: Save the args and kwargs for latter load
-    self.__attrs_init__(llm_config,
-                        quantization_config,
-                        _quantize,
-                        model_id,
-                        args, {
-                            **model_kwds, **normalized_model_kwds
-                        }, {
-                            **tokenizer_kwds, **normalized_tokenizer_kwds
-                        },
-                        _tag,
-                        _adapters_mapping,
-                        _model_version,
-                        _serialisation,
-                        _local,
-                        _prompt_template,
-                        _system_message)
+    self.__attrs_init__(llm_config, quantization_config, _quantize, model_id, args, {
+        **model_kwds,
+        **normalized_model_kwds
+    }, {
+        **tokenizer_kwds,
+        **normalized_tokenizer_kwds
+    }, _tag, _adapters_mapping, _model_version, _serialisation, _local, _prompt_template, _system_message)

    self.llm_post_init()

@@ -1306,10 +1274,11 @@ def llm_runnable_class(self: LLM[M, T], embeddings_sig: ModelSignature, generate
          pre = now
      yield ' '.join(output_text[pre:]) + ' '

-  return types.new_class(self.__class__.__name__ + 'Runnable', (_Runnable,), {},
-                         lambda ns: ns.update({
-                             'SUPPORTED_RESOURCES': ('nvidia.com/gpu', 'amd.com/gpu', 'cpu'), '__module__': self.__module__, '__doc__': self.config['env'].start_docstring
-                         }))
+  return types.new_class(self.__class__.__name__ + 'Runnable', (_Runnable,), {}, lambda ns: ns.update({
+      'SUPPORTED_RESOURCES': ('nvidia.com/gpu', 'amd.com/gpu', 'cpu'),
+      '__module__': self.__module__,
+      '__doc__': self.config['env'].start_docstring
+  }))

 def llm_runner_class(self: LLM[M, T]) -> type[LLMRunner[M, T]]:
  def available_adapters(_: LLMRunner[M, T]) -> PeftAdapterOutput:
--- a/openllm-python/src/openllm/_service.py
+++ b/openllm-python/src/openllm/_service.py
@@ -120,8 +120,13 @@ async def completion_v1(input_dict: dict[str, t.Any], ctx: bentoml.Context) -> s
@svc.api(route='/v1/chat/completions',
         input=bentoml.io.JSON.from_sample(
             openllm.utils.bentoml_cattr.unstructure(
-                 openllm.openai.ChatCompletionRequest(messages=[{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Hello!'}],
-                                                      model=runner.llm_type))),
+                 openllm.openai.ChatCompletionRequest(messages=[{
+                     'role': 'system',
+                     'content': 'You are a helpful assistant.'
+                 }, {
+                     'role': 'user',
+                     'content': 'Hello!'
+                 }], model=runner.llm_type))),
         output=bentoml.io.Text())
 async def chat_completion_v1(input_dict: dict[str, t.Any], ctx: bentoml.Context) -> str | t.AsyncGenerator[str, None]:
  prompt = openllm.openai.messages_to_prompt(input_dict['messages'])
@@ -194,32 +199,10 @@ def metadata_v1(_: str) -> openllm.MetadataOutput:
         input=bentoml.io.JSON.from_sample(['Hey Jude, welcome to the jungle!', 'What is the meaning of life?']),
         output=bentoml.io.JSON.from_sample({
             'embeddings': [
-                 0.007917795330286026,
-                 -0.014421648345887661,
-                 0.00481307040899992,
-                 0.007331526838243008,
-                 -0.0066398633643984795,
-                 0.00945580005645752,
-                 0.0087016262114048,
-                 -0.010709521360695362,
-                 0.012635177001357079,
-                 0.010541186667978764,
-                 -0.00730888033285737,
-                 -0.001783102168701589,
-                 0.02339819073677063,
-                 -0.010825827717781067,
-                 -0.015888236463069916,
-                 0.01876218430697918,
-                 0.0076906150206923485,
-                 0.0009032754460349679,
-                 -0.010024012066423893,
-                 0.01090280432254076,
-                 -0.008668390102684498,
-                 0.02070549875497818,
-                 0.0014594447566196322,
-                 -0.018775740638375282,
-                 -0.014814382418990135,
-                 0.01796768605709076
+                 0.007917795330286026, -0.014421648345887661, 0.00481307040899992, 0.007331526838243008, -0.0066398633643984795, 0.00945580005645752, 0.0087016262114048, -0.010709521360695362,
+                 0.012635177001357079, 0.010541186667978764, -0.00730888033285737, -0.001783102168701589, 0.02339819073677063, -0.010825827717781067, -0.015888236463069916,
+                 0.01876218430697918, 0.0076906150206923485, 0.0009032754460349679, -0.010024012066423893, 0.01090280432254076, -0.008668390102684498, 0.02070549875497818,
+                 0.0014594447566196322, -0.018775740638375282, -0.014814382418990135, 0.01796768605709076
             ],
             'num_tokens': 20
         }))
--- a/openllm-python/src/openllm/bundle/_package.py
+++ b/openllm-python/src/openllm/bundle/_package.py
@@ -87,15 +87,7 @@ def construct_python_options(llm: openllm.LLM[t.Any, t.Any], llm_fs: FS, extra_d
  elif backend_envvar == 'tf':
    if not openllm_core.utils.is_tf_available():
      raise ValueError(f"TensorFlow is not available, while {env.backend} is set to 'tf'")
-    candidates = ('tensorflow',
-                  'tensorflow-cpu',
-                  'tensorflow-gpu',
-                  'tf-nightly',
-                  'tf-nightly-cpu',
-                  'tf-nightly-gpu',
-                  'intel-tensorflow',
-                  'intel-tensorflow-avx512',
-                  'tensorflow-rocm',
+    candidates = ('tensorflow', 'tensorflow-cpu', 'tensorflow-gpu', 'tf-nightly', 'tf-nightly-cpu', 'tf-nightly-gpu', 'intel-tensorflow', 'intel-tensorflow-avx512', 'tensorflow-rocm',
                  'tensorflow-macos',
                  )
    # For the metadata, we have to look for both tensorflow and tensorflow-cpu
@@ -123,14 +115,8 @@ def construct_python_options(llm: openllm.LLM[t.Any, t.Any], llm_fs: FS, extra_d
                       lock_packages=False,
                       extra_index_url=['https://download.pytorch.org/whl/cu118', 'https://huggingface.github.io/autogptq-index/whl/cu118/'])

-def construct_docker_options(llm: openllm.LLM[t.Any, t.Any],
-                             _: FS,
-                             workers_per_resource: float,
-                             quantize: LiteralString | None,
-                             adapter_map: dict[str, str | None] | None,
-                             dockerfile_template: str | None,
-                             serialisation: LiteralSerialisation,
-                             container_registry: LiteralContainerRegistry,
+def construct_docker_options(llm: openllm.LLM[t.Any, t.Any], _: FS, workers_per_resource: float, quantize: LiteralString | None, adapter_map: dict[str, str | None] | None,
+                             dockerfile_template: str | None, serialisation: LiteralSerialisation, container_registry: LiteralContainerRegistry,
                             container_version_strategy: LiteralContainerVersionStrategy) -> DockerOptions:
  from openllm.cli._factory import parse_config_options
  environ = parse_config_options(llm.config, llm.config['timeout'], workers_per_resource, None, True, os.environ.copy())
@@ -217,7 +203,11 @@ def create_bento(bento_tag: bentoml.Tag,
  _serialisation: LiteralSerialisation = openllm_core.utils.first_not_none(serialisation, default=llm.config['serialisation'])
  labels = dict(llm.identifying_params)
  labels.update({
-      '_type': llm.llm_type, '_framework': llm.config['env']['backend_value'], 'start_name': llm.config['start_name'], 'base_name_or_path': llm.model_id, 'bundler': 'openllm.bundle'
+      '_type': llm.llm_type,
+      '_framework': llm.config['env']['backend_value'],
+      'start_name': llm.config['start_name'],
+      'base_name_or_path': llm.model_id,
+      'bundler': 'openllm.bundle'
  })
  if adapter_map: labels.update(adapter_map)
  if isinstance(workers_per_resource, str):
@@ -244,14 +234,7 @@ def create_bento(bento_tag: bentoml.Tag,
                                  exclude=['/venv', '/.venv', '__pycache__/', '*.py[cod]', '*$py.class'],
                                  python=construct_python_options(llm, llm_fs, extra_dependencies, adapter_map),
                                  models=[llm_spec],
-                                  docker=construct_docker_options(llm,
-                                                                  llm_fs,
-                                                                  workers_per_resource,
-                                                                  quantize,
-                                                                  adapter_map,
-                                                                  dockerfile_template,
-                                                                  _serialisation,
-                                                                  container_registry,
+                                  docker=construct_docker_options(llm, llm_fs, workers_per_resource, quantize, adapter_map, dockerfile_template, _serialisation, container_registry,
                                                                  container_version_strategy))

  bento = bentoml.Bento.create(build_config=build_config, version=bento_tag.version, build_ctx=llm_fs.getsyspath('/'))
--- a/openllm-python/src/openllm/cli/_factory.py
+++ b/openllm-python/src/openllm/cli/_factory.py
@@ -54,9 +54,7 @@ def parse_config_options(config: LLMConfig, server_timeout: int, workers_per_res
  # TODO: Support amd.com/gpu on k8s
  _bentoml_config_options_env = environ.pop('BENTOML_CONFIG_OPTIONS', '')
  _bentoml_config_options_opts = [
-      'tracing.sample_rate=1.0',
-      f'api_server.traffic.timeout={server_timeout}',
-      f'runners."llm-{config["start_name"]}-runner".traffic.timeout={config["timeout"]}',
+      'tracing.sample_rate=1.0', f'api_server.traffic.timeout={server_timeout}', f'runners."llm-{config["start_name"]}-runner".traffic.timeout={config["timeout"]}',
      f'runners."llm-{config["start_name"]}-runner".workers_per_resource={workers_per_resource}'
  ]
  if device:
@@ -118,22 +116,9 @@ Available official model_id(s): [default: {llm_config['default_id']}]
  @group.command(**command_attrs)
  @start_decorator(llm_config, serve_grpc=_serve_grpc)
  @click.pass_context
-  def start_cmd(ctx: click.Context,
-                /,
-                server_timeout: int,
-                model_id: str | None,
-                model_version: str | None,
-                system_message: str | None,
-                prompt_template_file: t.IO[t.Any] | None,
-                workers_per_resource: t.Literal['conserved', 'round_robin'] | LiteralString,
-                device: t.Tuple[str, ...],
-                quantize: LiteralQuantise | None,
-                backend: LiteralBackend,
-                serialisation: LiteralSerialisation | None,
-                cors: bool,
-                adapter_id: str | None,
-                return_process: bool,
-                **attrs: t.Any,
+  def start_cmd(ctx: click.Context, /, server_timeout: int, model_id: str | None, model_version: str | None, system_message: str | None, prompt_template_file: t.IO[t.Any] | None,
+                workers_per_resource: t.Literal['conserved', 'round_robin'] | LiteralString, device: t.Tuple[str, ...], quantize: LiteralQuantise | None, backend: LiteralBackend,
+                serialisation: LiteralSerialisation | None, cors: bool, adapter_id: str | None, return_process: bool, **attrs: t.Any,
                ) -> LLMConfig | subprocess.Popen[bytes]:
    _serialisation = openllm_core.utils.first_not_none(serialisation, default=llm_config['serialisation'])
    if _serialisation == 'safetensors' and quantize is not None and openllm_core.utils.check_bool_env('OPENLLM_SERIALIZATION_WARNING'):
@@ -235,16 +220,10 @@ Available official model_id(s): [default: {llm_config['default_id']}]
 def start_decorator(llm_config: LLMConfig, serve_grpc: bool = False) -> t.Callable[[FC], t.Callable[[FC], FC]]:
  def wrapper(fn: FC) -> t.Callable[[FC], FC]:
    composed = openllm.utils.compose(
-        llm_config.to_click_options,
-        _http_server_args if not serve_grpc else _grpc_server_args,
-        cog.optgroup.group('General LLM Options', help=f"The following options are related to running '{llm_config['start_name']}' LLM Server."),
-        model_id_option(factory=cog.optgroup),
-        model_version_option(factory=cog.optgroup),
-        system_message_option(factory=cog.optgroup),
-        prompt_template_file_option(factory=cog.optgroup),
-        cog.optgroup.option('--server-timeout', type=int, default=None, help='Server timeout in seconds'),
-        workers_per_resource_option(factory=cog.optgroup),
-        cors_option(factory=cog.optgroup),
+        llm_config.to_click_options, _http_server_args if not serve_grpc else _grpc_server_args,
+        cog.optgroup.group('General LLM Options', help=f"The following options are related to running '{llm_config['start_name']}' LLM Server."), model_id_option(factory=cog.optgroup),
+        model_version_option(factory=cog.optgroup), system_message_option(factory=cog.optgroup), prompt_template_file_option(factory=cog.optgroup),
+        cog.optgroup.option('--server-timeout', type=int, default=None, help='Server timeout in seconds'), workers_per_resource_option(factory=cog.optgroup), cors_option(factory=cog.optgroup),
        backend_option(factory=cog.optgroup),
        cog.optgroup.group('LLM Optimization Options',
                           help='''Optimization related options.
@@ -255,9 +234,7 @@ def start_decorator(llm_config: LLMConfig, serve_grpc: bool = False) -> t.Callab

            - DeepSpeed Inference: [link](https://www.deepspeed.ai/inference/)
            - GGML: Fast inference on [bare metal](https://github.com/ggerganov/ggml)
-            '''),
-        quantize_option(factory=cog.optgroup),
-        serialisation_option(factory=cog.optgroup),
+            '''), quantize_option(factory=cog.optgroup), serialisation_option(factory=cog.optgroup),
        cog.optgroup.option('--device',
                            type=openllm.utils.dantic.CUDA,
                            multiple=True,
@@ -286,8 +263,8 @@ def start_decorator(llm_config: LLMConfig, serve_grpc: bool = False) -> t.Callab
                            help='Optional name or path for given LoRA adapter' + f" to wrap '{llm_config['model_name']}'",
                            multiple=True,
                            callback=_id_callback,
-                            metavar='[PATH | [remote/][adapter_name:]adapter_id][, ...]'),
-        click.option('--return-process', is_flag=True, default=False, help='Internal use only.', hidden=True),
+                            metavar='[PATH | [remote/][adapter_name:]adapter_id][, ...]'), click.option('--return-process', is_flag=True, default=False, help='Internal use only.',
+                                                                                                        hidden=True),
    )
    return composed(fn)

--- a/openllm-python/src/openllm/cli/extension/list_bentos.py
+++ b/openllm-python/src/openllm/cli/extension/list_bentos.py
@@ -22,7 +22,8 @@ def cli(ctx: click.Context, output: LiteralOutput) -> None:
          'tag': str(b.tag),
          'size': human_readable_size(openllm.utils.calc_dir_size(b.path)),
          'models': [{
-              'tag': str(m.tag), 'size': human_readable_size(openllm.utils.calc_dir_size(m.path))
+              'tag': str(m.tag),
+              'size': human_readable_size(openllm.utils.calc_dir_size(m.path))
          } for m in (bentoml.models.get(_.tag) for _ in b.info.models)]
      } for b in tuple(i for i in bentoml.list() if all(
          k in i.info.labels for k in {'start_name', 'bundler'})) if b.info.labels['start_name'] == k] for k in tuple(inflection.dasherize(key) for key in openllm.CONFIG_MAPPING.keys())
--- a/openllm-python/src/openllm/models/auto/init.py
+++ b/openllm-python/src/openllm/models/auto/init.py
@@ -57,7 +57,9 @@ __lazy = LazyModule(__name__,
                    os.path.abspath('__file__'),
                    _import_structure,
                    extra_objects={
-                        'CONFIG_MAPPING': CONFIG_MAPPING, 'CONFIG_MAPPING_NAMES': CONFIG_MAPPING_NAMES, 'AutoConfig': AutoConfig,
+                        'CONFIG_MAPPING': CONFIG_MAPPING,
+                        'CONFIG_MAPPING_NAMES': CONFIG_MAPPING_NAMES,
+                        'AutoConfig': AutoConfig,
                    })
 __all__ = __lazy.__all__
 __dir__ = __lazy.__dir__
--- a/openllm-python/src/openllm/models/auto/factory.py
+++ b/openllm-python/src/openllm/models/auto/factory.py
@@ -160,10 +160,9 @@ class _LazyAutoMapping(OrderedDict, ReprMixin):
                  [self._load_attr_from_module(key, name) for key, name in self._model_mapping.items() if key in self._config_mapping.keys()] + list(self._extra_content.values()))

  def items(self) -> ConfigModelItemsView:
-    return t.cast('ConfigModelItemsView',
-                  [(self._load_attr_from_module(key, self._config_mapping[key]), self._load_attr_from_module(key, self._model_mapping[key]))
-                   for key in self._model_mapping.keys()
-                   if key in self._config_mapping.keys()] + list(self._extra_content.items()))
+    return t.cast('ConfigModelItemsView', [(self._load_attr_from_module(key, self._config_mapping[key]), self._load_attr_from_module(key, self._model_mapping[key]))
+                                           for key in self._model_mapping.keys()
+                                           if key in self._config_mapping.keys()] + list(self._extra_content.items()))

  def __iter__(self) -> t.Iterator[type[openllm.LLMConfig]]:
    return iter(t.cast('SupportsIter[t.Iterator[type[openllm.LLMConfig]]]', self.keys()))
--- a/openllm-python/src/openllm/models/chatglm/init.py
+++ b/openllm-python/src/openllm/models/chatglm/init.py
@@ -23,5 +23,7 @@ sys.modules[__name__] = LazyModule(__name__,
                                   globals()['__file__'],
                                   _import_structure,
                                   extra_objects={
-                                       'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE, 'START_CHATGLM_COMMAND_DOCSTRING': START_CHATGLM_COMMAND_DOCSTRING, 'ChatGLMConfig': ChatGLMConfig
+                                       'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE,
+                                       'START_CHATGLM_COMMAND_DOCSTRING': START_CHATGLM_COMMAND_DOCSTRING,
+                                       'ChatGLMConfig': ChatGLMConfig
                                   })
--- a/openllm-python/src/openllm/models/dolly_v2/init.py
+++ b/openllm-python/src/openllm/models/dolly_v2/init.py
@@ -30,5 +30,7 @@ sys.modules[__name__] = LazyModule(__name__,
                                   globals()['__file__'],
                                   _import_structure,
                                   extra_objects={
-                                       'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE, 'START_DOLLY_V2_COMMAND_DOCSTRING': START_DOLLY_V2_COMMAND_DOCSTRING, 'DollyV2Config': DollyV2Config
+                                       'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE,
+                                       'START_DOLLY_V2_COMMAND_DOCSTRING': START_DOLLY_V2_COMMAND_DOCSTRING,
+                                       'DollyV2Config': DollyV2Config
                                   })
--- a/openllm-python/src/openllm/models/falcon/init.py
+++ b/openllm-python/src/openllm/models/falcon/init.py
@@ -30,5 +30,7 @@ sys.modules[__name__] = LazyModule(__name__,
                                   globals()['__file__'],
                                   _import_structure,
                                   extra_objects={
-                                       'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE, 'START_FALCON_COMMAND_DOCSTRING': START_FALCON_COMMAND_DOCSTRING, 'FalconConfig': FalconConfig
+                                       'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE,
+                                       'START_FALCON_COMMAND_DOCSTRING': START_FALCON_COMMAND_DOCSTRING,
+                                       'FalconConfig': FalconConfig
                                   })
--- a/openllm-python/src/openllm/models/gpt_neox/init.py
+++ b/openllm-python/src/openllm/models/gpt_neox/init.py
@@ -30,5 +30,7 @@ sys.modules[__name__] = LazyModule(__name__,
                                   globals()['__file__'],
                                   _import_structure,
                                   extra_objects={
-                                       'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE, 'START_GPT_NEOX_COMMAND_DOCSTRING': START_GPT_NEOX_COMMAND_DOCSTRING, 'GPTNeoXConfig': GPTNeoXConfig
+                                       'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE,
+                                       'START_GPT_NEOX_COMMAND_DOCSTRING': START_GPT_NEOX_COMMAND_DOCSTRING,
+                                       'GPTNeoXConfig': GPTNeoXConfig
                                   })
--- a/openllm-python/src/openllm/models/opt/init.py
+++ b/openllm-python/src/openllm/models/opt/init.py
@@ -46,5 +46,7 @@ sys.modules[__name__] = LazyModule(__name__,
                                   globals()['__file__'],
                                   _import_structure,
                                   extra_objects={
-                                       'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE, 'START_OPT_COMMAND_DOCSTRING': START_OPT_COMMAND_DOCSTRING, 'OPTConfig': OPTConfig,
+                                       'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE,
+                                       'START_OPT_COMMAND_DOCSTRING': START_OPT_COMMAND_DOCSTRING,
+                                       'OPTConfig': OPTConfig,
                                   })
--- a/openllm-python/src/openllm/models/opt/modeling_flax_opt.py
+++ b/openllm-python/src/openllm/models/opt/modeling_flax_opt.py
@@ -33,7 +33,11 @@ class FlaxOPT(openllm.LLM['transformers.TFOPTForCausalLM', 'transformers.GPT2Tok
                          use_default_prompt_template: bool = False,
                          **attrs: t.Any) -> tuple[str, dict[str, t.Any], dict[str, t.Any]]:
    return process_prompt(prompt, DEFAULT_PROMPT_TEMPLATE, use_default_prompt_template, **attrs), {
-        'max_new_tokens': max_new_tokens, 'temperature': temperature, 'top_k': top_k, 'num_return_sequences': num_return_sequences, 'repetition_penalty': repetition_penalty
+        'max_new_tokens': max_new_tokens,
+        'temperature': temperature,
+        'top_k': top_k,
+        'num_return_sequences': num_return_sequences,
+        'repetition_penalty': repetition_penalty
    }, {}

  def generate(self, prompt: str, **attrs: t.Any) -> list[str]:
--- a/openllm-python/src/openllm/models/opt/modeling_vllm_opt.py
+++ b/openllm-python/src/openllm/models/opt/modeling_vllm_opt.py
@@ -19,5 +19,8 @@ class VLLMOPT(openllm.LLM['vllm.LLMEngine', 'transformers.GPT2Tokenizer']):
                          use_default_prompt_template: bool = True,
                          **attrs: t.Any) -> tuple[str, dict[str, t.Any], dict[str, t.Any]]:
    return process_prompt(prompt, DEFAULT_PROMPT_TEMPLATE, use_default_prompt_template, **attrs), {
-        'max_new_tokens': max_new_tokens, 'temperature': temperature, 'top_k': top_k, 'num_return_sequences': num_return_sequences
+        'max_new_tokens': max_new_tokens,
+        'temperature': temperature,
+        'top_k': top_k,
+        'num_return_sequences': num_return_sequences
    }, {}
--- a/openllm-python/tests/configuration_test.py
+++ b/openllm-python/tests/configuration_test.py
@@ -34,11 +34,13 @@ def test_missing_default():
    make_llm_config('MissingArchitecture', {'default_id': 'huggingface/t5-tiny-testing', 'model_ids': ['huggingface/t5-tiny-testing'], 'requirements': ['bentoml'],},)

 def test_forbidden_access():
-  cl_ = make_llm_config(
-      'ForbiddenAccess', {
-          'default_id': 'huggingface/t5-tiny-testing', 'model_ids': ['huggingface/t5-tiny-testing', 'bentoml/t5-tiny-testing'], 'architecture': 'PreTrainedModel', 'requirements': ['bentoml'],
-      },
-  )
+  cl_ = make_llm_config('ForbiddenAccess', {
+      'default_id': 'huggingface/t5-tiny-testing',
+      'model_ids': ['huggingface/t5-tiny-testing', 'bentoml/t5-tiny-testing'],
+      'architecture': 'PreTrainedModel',
+      'requirements': ['bentoml'],
+  },
+                        )

  assert pytest.raises(openllm.exceptions.ForbiddenAttributeError, cl_.__getattribute__, cl_(), '__config__',)
  assert pytest.raises(openllm.exceptions.ForbiddenAttributeError, cl_.__getattribute__, cl_(), 'GenerationConfig',)
@@ -128,7 +130,9 @@ def test_struct_envvar_with_overwrite_provided_env(monkeypatch: pytest.MonkeyPat
    mk.setenv(field_env_key('field1'), str(4.0))
    mk.setenv(field_env_key('temperature', suffix='generation'), str(0.2))
    sent = make_llm_config('OverwriteWithEnvAvailable', {
-        'default_id': 'asdfasdf', 'model_ids': ['asdf', 'asdfasdfads'], 'architecture': 'PreTrainedModel'
+        'default_id': 'asdfasdf',
+        'model_ids': ['asdf', 'asdfasdfads'],
+        'architecture': 'PreTrainedModel'
    },
                           fields=(('field1', 'float', 3.0),),
                           ).model_construct_env(field1=20.0, temperature=0.4)
--- a/openllm-python/tests/models/conftest.py
+++ b/openllm-python/tests/models/conftest.py
@@ -196,7 +196,8 @@ def _container_handle(model: str, model_id: str, image_tag: str, deployment_mode
                                    detach=True,
                                    device_requests=devs,
                                    ports={
-                                        '3000/tcp': port, '3001/tcp': prom_port
+                                        '3000/tcp': port,
+                                        '3001/tcp': prom_port
                                    },
                                    )