chore: ignore new lines split [skip ci]

Signed-off-by: aarnphm-ec2-dev <29749331+aarnphm@users.noreply.github.com>
2026-01-26 08:17:52 -05:00 · 2023-09-01 17:00:49 +00:00
parent 608de0b667
commit 7d893e6cd2
70 changed files with 575 additions and 950 deletions
--- a/openllm-python/src/openllm/serialisation/init.py
+++ b/openllm-python/src/openllm/serialisation/init.py
@@ -43,13 +43,10 @@ def load_tokenizer(llm: openllm.LLM[t.Any, T], **tokenizer_attrs: t.Any) -> T:
      try:
        tokenizer = cloudpickle.load(t.cast('t.IO[bytes]', cofile))['tokenizer']
      except KeyError:
-        raise openllm.exceptions.OpenLLMException(
-            "Bento model does not have tokenizer. Make sure to save the tokenizer within the model via 'custom_objects'. "
-            "For example: \"bentoml.transformers.save_model(..., custom_objects={'tokenizer': tokenizer})\"") from None
+        raise openllm.exceptions.OpenLLMException("Bento model does not have tokenizer. Make sure to save the tokenizer within the model via 'custom_objects'. "
+                                                  "For example: \"bentoml.transformers.save_model(..., custom_objects={'tokenizer': tokenizer})\"") from None
  else:
-    tokenizer = infer_tokenizers_from_llm(llm).from_pretrained(bentomodel_fs.getsyspath('/'),
-                                                               trust_remote_code=llm.trust_remote_code,
-                                                               **tokenizer_attrs)
+    tokenizer = infer_tokenizers_from_llm(llm).from_pretrained(bentomodel_fs.getsyspath('/'), trust_remote_code=llm.trust_remote_code, **tokenizer_attrs)

  if tokenizer.pad_token_id is None:
    if config.pad_token_id is not None: tokenizer.pad_token_id = config.pad_token_id
--- a/openllm-python/src/openllm/serialisation/constants.py
+++ b/openllm-python/src/openllm/serialisation/constants.py
@@ -6,6 +6,4 @@ FRAMEWORK_TO_AUTOCLASS_MAPPING = {
    'flax': ('FlaxAutoModelForCausalLM', 'FlaxAutoModelForSeq2SeqLM'),
    'vllm': ('AutoModelForCausalLM', 'AutoModelForSeq2SeqLM')
 }
-HUB_ATTRS = [
-    'cache_dir', 'code_revision', 'force_download', 'local_files_only', 'proxies', 'resume_download', 'revision', 'subfolder', 'use_auth_token'
-]
+HUB_ATTRS = ['cache_dir', 'code_revision', 'force_download', 'local_files_only', 'proxies', 'resume_download', 'revision', 'subfolder', 'use_auth_token']
--- a/openllm-python/src/openllm/serialisation/transformers/init.py
+++ b/openllm-python/src/openllm/serialisation/transformers/init.py
@@ -43,11 +43,7 @@ logger = logging.getLogger(__name__)
 __all__ = ['import_model', 'get', 'load_model']

@inject
-def import_model(llm: openllm.LLM[M, T],
-                 *decls: t.Any,
-                 trust_remote_code: bool,
-                 _model_store: ModelStore = Provide[BentoMLContainer.model_store],
-                 **attrs: t.Any) -> bentoml.Model:
+def import_model(llm: openllm.LLM[M, T], *decls: t.Any, trust_remote_code: bool, _model_store: ModelStore = Provide[BentoMLContainer.model_store], **attrs: t.Any) -> bentoml.Model:
  """Auto detect model type from given model_id and import it to bentoml's model store.

  For all kwargs, it will be parsed into `transformers.AutoConfig.from_pretrained` first,
@@ -76,8 +72,7 @@ def import_model(llm: openllm.LLM[M, T],

  if quantize_method == 'gptq':
    if not openllm.utils.is_autogptq_available():
-      raise openllm.exceptions.OpenLLMException(
-          "GPTQ quantisation requires 'auto-gptq' (Not found in local environment). Install it with 'pip install \"openllm[gptq]\"'")
+      raise openllm.exceptions.OpenLLMException("GPTQ quantisation requires 'auto-gptq' (Not found in local environment). Install it with 'pip install \"openllm[gptq]\"'")
    if llm.config['model_type'] != 'causal_lm':
      raise openllm.exceptions.OpenLLMException(f"GPTQ only support Causal LM (got {llm.__class__} of {llm.config['model_type']})")
    signatures['generate'] = {'batchable': False}
@@ -107,8 +102,7 @@ def import_model(llm: openllm.LLM[M, T],
      tokenizer.save_pretrained(bentomodel.path)
      if quantize_method == 'gptq':
        if not openllm.utils.is_autogptq_available():
-          raise openllm.exceptions.OpenLLMException(
-              "GPTQ quantisation requires 'auto-gptq' (Not found in local environment). Install it with 'pip install \"openllm[gptq]\"'")
+          raise openllm.exceptions.OpenLLMException("GPTQ quantisation requires 'auto-gptq' (Not found in local environment). Install it with 'pip install \"openllm[gptq]\"'")
        if llm.config['model_type'] != 'causal_lm':
          raise openllm.exceptions.OpenLLMException(f"GPTQ only support Causal LM (got {llm.__class__} of {llm.config['model_type']})")
        logger.debug('Saving model with GPTQ quantisation will require loading model into memory.')
@@ -124,20 +118,13 @@ def import_model(llm: openllm.LLM[M, T],
      else:
        architectures = getattr(config, 'architectures', [])
        if not architectures:
-          raise RuntimeError(
-              'Failed to determine the architecture for this model. Make sure the `config.json` is valid and can be loaded with `transformers.AutoConfig`'
-          )
+          raise RuntimeError('Failed to determine the architecture for this model. Make sure the `config.json` is valid and can be loaded with `transformers.AutoConfig`')
        architecture = architectures[0]
        update_model(bentomodel, metadata={'_pretrained_class': architecture})
        if llm._local:
          # possible local path
          logger.debug('Model will be loaded into memory to save to target store as it is from local path.')
-          model = infer_autoclass_from_llm(llm, config).from_pretrained(llm.model_id,
-                                                                        *decls,
-                                                                        config=config,
-                                                                        trust_remote_code=trust_remote_code,
-                                                                        **hub_attrs,
-                                                                        **attrs)
+          model = infer_autoclass_from_llm(llm, config).from_pretrained(llm.model_id, *decls, config=config, trust_remote_code=trust_remote_code, **hub_attrs, **attrs)
          # for trust_remote_code to work
          bentomodel.enter_cloudpickle_context([importlib.import_module(model.__module__)], imported_modules)
          model.save_pretrained(bentomodel.path, max_shard_size='5GB', safe_serialization=safe_serialisation)
@@ -149,8 +136,7 @@ def import_model(llm: openllm.LLM[M, T],
    else:
      bentomodel.flush()  # type: ignore[no-untyped-call]
      bentomodel.save(_model_store)
-      openllm.utils.analytics.track(
-          openllm.utils.analytics.ModelSaveEvent(module=bentomodel.info.module, model_size_in_kb=openllm.utils.calc_dir_size(bentomodel.path) / 1024))
+      openllm.utils.analytics.track(openllm.utils.analytics.ModelSaveEvent(module=bentomodel.info.module, model_size_in_kb=openllm.utils.calc_dir_size(bentomodel.path) / 1024))
    finally:
      bentomodel.exit_cloudpickle_context(imported_modules)
      # NOTE: We need to free up the cache after importing the model
@@ -171,8 +157,7 @@ def get(llm: openllm.LLM[M, T], auto_import: bool = False) -> bentoml.Model:
    if Version(model.info.api_version) < Version('v2'):
      raise openllm.exceptions.OpenLLMException('Please run "openllm prune -y --include-bentos" and upgrade all saved model to latest release.')
    if model.info.labels['backend'] != llm.__llm_backend__:
-      raise openllm.exceptions.OpenLLMException(
-          f"Model {model.tag} was saved with backend {model.info.labels['backend']}, while loading with {llm.__llm_backend__}.")
+      raise openllm.exceptions.OpenLLMException(f"Model {model.tag} was saved with backend {model.info.labels['backend']}, while loading with {llm.__llm_backend__}.")
    return model
  except Exception as err:
    if auto_import: return import_model(llm, trust_remote_code=llm.trust_remote_code)
@@ -185,8 +170,7 @@ def load_model(llm: openllm.LLM[M, T], *decls: t.Any, **attrs: t.Any) -> M:
                                                    default=llm._serialisation_format == 'safetensors')
  if '_quantize' in llm._bentomodel.info.metadata and llm._bentomodel.info.metadata['_quantize'] == 'gptq':
    if not openllm.utils.is_autogptq_available():
-      raise openllm.exceptions.OpenLLMException(
-          "GPTQ quantisation requires 'auto-gptq' (Not found in local environment). Install it with 'pip install \"openllm[gptq]\"'")
+      raise openllm.exceptions.OpenLLMException("GPTQ quantisation requires 'auto-gptq' (Not found in local environment). Install it with 'pip install \"openllm[gptq]\"'")
    if llm.config['model_type'] != 'causal_lm':
      raise openllm.exceptions.OpenLLMException(f"GPTQ only support Causal LM (got {llm.__class__} of {llm.config['model_type']})")
    return autogptq.AutoGPTQForCausalLM.from_quantized(llm._bentomodel.path,
--- a/openllm-python/src/openllm/serialisation/transformers/_helpers.py
+++ b/openllm-python/src/openllm/serialisation/transformers/_helpers.py
@@ -23,8 +23,7 @@ if t.TYPE_CHECKING:
  from openllm_core._typing_compat import M
  from openllm_core._typing_compat import T
 else:
-  transformers, torch = openllm_core.utils.LazyLoader('transformers', globals(),
-                                                      'transformers'), openllm_core.utils.LazyLoader('torch', globals(), 'torch')
+  transformers, torch = openllm_core.utils.LazyLoader('transformers', globals(), 'transformers'), openllm_core.utils.LazyLoader('torch', globals(), 'torch')

 _object_setattr = object.__setattr__

@@ -45,11 +44,7 @@ def process_config(model_id: str, trust_remote_code: bool, **attrs: t.Any) -> tu
  if not isinstance(config, transformers.PretrainedConfig):
    copied_attrs = copy.deepcopy(attrs)
    if copied_attrs.get('torch_dtype', None) == 'auto': copied_attrs.pop('torch_dtype')
-    config, attrs = transformers.AutoConfig.from_pretrained(model_id,
-                                                            return_unused_kwargs=True,
-                                                            trust_remote_code=trust_remote_code,
-                                                            **hub_attrs,
-                                                            **copied_attrs)
+    config, attrs = transformers.AutoConfig.from_pretrained(model_id, return_unused_kwargs=True, trust_remote_code=trust_remote_code, **hub_attrs, **copied_attrs)
  return config, hub_attrs, attrs

 def infer_tokenizers_from_llm(__llm: openllm.LLM[t.Any, T], /) -> T:
@@ -62,9 +57,7 @@ def infer_autoclass_from_llm(llm: openllm.LLM[M, T], config: transformers.Pretra
  if llm.config['trust_remote_code']:
    autoclass = 'AutoModelForSeq2SeqLM' if llm.config['model_type'] == 'seq2seq_lm' else 'AutoModelForCausalLM'
    if not hasattr(config, 'auto_map'):
-      raise ValueError(
-          f'Invalid configuraiton for {llm.model_id}. ``trust_remote_code=True`` requires `transformers.PretrainedConfig` to contain a `auto_map` mapping'
-      )
+      raise ValueError(f'Invalid configuraiton for {llm.model_id}. ``trust_remote_code=True`` requires `transformers.PretrainedConfig` to contain a `auto_map` mapping')
    # in case this model doesn't use the correct auto class for model type, for example like chatglm
    # where it uses AutoModel instead of AutoModelForCausalLM. Then we fallback to AutoModel
    if autoclass not in config.auto_map: autoclass = 'AutoModel'
@@ -84,7 +77,8 @@ def update_model(bentomodel: bentoml.Model, metadata: DictStrAny) -> bentoml.Mod
  based: DictStrAny = copy.deepcopy(bentomodel.info.metadata)
  based.update(metadata)
  _object_setattr(
-      bentomodel, '_info',
+      bentomodel,
+      '_info',
      ModelInfo(  # type: ignore[call-arg] # XXX: remove me once upstream is merged
          tag=bentomodel.info.tag,
          module=bentomodel.info.module,
@@ -102,9 +96,7 @@ def make_model_signatures(llm: openllm.LLM[M, T]) -> ModelSignaturesType:
  infer_fn: tuple[str, ...] = ('__call__',)
  default_config = ModelSignature(batchable=False)
  if llm.__llm_backend__ in {'pt', 'vllm'}:
-    infer_fn += ('forward', 'generate', 'contrastive_search', 'greedy_search', 'sample', 'beam_search', 'beam_sample', 'group_beam_search',
-                 'constrained_beam_search',
-                 )
+    infer_fn += ('forward', 'generate', 'contrastive_search', 'greedy_search', 'sample', 'beam_search', 'beam_sample', 'group_beam_search', 'constrained_beam_search',)
  elif llm.__llm_backend__ == 'tf':
    infer_fn += ('predict', 'call', 'generate', 'compute_transition_scores', 'greedy_search', 'sample', 'beam_search', 'contrastive_search',)
  else: