refactor(cli): cleanup API (#592)

* chore: remove unused imports Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * refactor(cli): update to only need model_id Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * feat: `openllm start model-id` Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: add changelog Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update changelog notice Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update correct config and running tools Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update backward compat options and treat JSON outputs corespondingly Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
2026-03-11 11:39:52 -04:00 · 2023-11-09 11:40:17 -05:00
parent 86f7acafa9
commit b8a2e8cf91
48 changed files with 1096 additions and 1047 deletions
--- a/openllm-python/src/openllm/serialisation/transformers/init.py
+++ b/openllm-python/src/openllm/serialisation/transformers/init.py
@@ -6,6 +6,8 @@ import typing as t

 import attr
 import orjson
+import torch
+import transformers

 from huggingface_hub import snapshot_download
 from simple_di import Provide
@@ -13,8 +15,6 @@ from simple_di import inject

 import bentoml
 import openllm
-import torch
-import transformers

 from bentoml._internal.configuration.containers import BentoMLContainer
 from bentoml._internal.models.model import ModelOptions
@@ -29,6 +29,7 @@ from .weights import HfIgnore

 if t.TYPE_CHECKING:
  import types
+
  from bentoml._internal.models import ModelStore
  from openllm_core._typing_compat import DictStrAny

@@ -124,7 +125,7 @@ def import_model(llm: openllm.LLM[M, T], *decls: t.Any, trust_remote_code: bool,
      del model
    return bentomodel

-def get(llm: openllm.LLM[M, T], auto_import: bool = False) -> bentoml.Model:
+def get(llm: openllm.LLM[M, T]) -> bentoml.Model:
  try:
    model = bentoml.models.get(llm.tag)
    backend = model.info.labels['backend']
@@ -132,7 +133,6 @@ def get(llm: openllm.LLM[M, T], auto_import: bool = False) -> bentoml.Model:
    _patch_correct_tag(llm, transformers.AutoConfig.from_pretrained(model.path, trust_remote_code=llm.trust_remote_code), _revision=model.info.metadata.get('_revision'))
    return model
  except Exception as err:
-    if auto_import: return import_model(llm, trust_remote_code=llm.trust_remote_code)
    raise openllm.exceptions.OpenLLMException(f'Failed while getting stored artefact (lookup for traceback):\n{err}') from err

 def load_model(llm: openllm.LLM[M, T], *decls: t.Any, **attrs: t.Any) -> M:
--- a/openllm-python/src/openllm/serialisation/transformers/_helpers.py
+++ b/openllm-python/src/openllm/serialisation/transformers/_helpers.py
@@ -1,11 +1,11 @@
 from __future__ import annotations
-import copy, re
-from pathlib import Path
+import copy
 import typing as t

-import openllm
-import transformers
 import torch
+import transformers
+
+import openllm

 from openllm.serialisation.constants import FRAMEWORK_TO_AUTOCLASS_MAPPING
 from openllm.serialisation.constants import HUB_ATTRS
--- a/openllm-python/src/openllm/serialisation/transformers/weights.py
+++ b/openllm-python/src/openllm/serialisation/transformers/weights.py
@@ -1,18 +1,40 @@
 from __future__ import annotations
+import traceback
 import typing as t

 import attr

 from huggingface_hub import HfApi

+from openllm_core.exceptions import Error
+
 if t.TYPE_CHECKING:
+  from huggingface_hub.hf_api import ModelInfo as HfModelInfo
+
  import openllm

  from openllm_core._typing_compat import M
  from openllm_core._typing_compat import T

+__global_inst__ = None
+__cached_id__: dict[str, HfModelInfo] = dict()
+
+def Client() -> HfApi:
+  global __global_inst__  # noqa: PLW0603
+  if __global_inst__ is None: __global_inst__ = HfApi()
+  return __global_inst__
+
+def ModelInfo(model_id: str, revision: str | None = None) -> HfModelInfo:
+  if model_id in __cached_id__: return __cached_id__[model_id]
+  try:
+    __cached_id__[model_id] = Client().model_info(model_id, revision=revision)
+    return __cached_id__[model_id]
+  except Exception as err:
+    traceback.print_exc()
+    raise Error(f'Failed to fetch {model_id} from huggingface.co') from err
+
 def has_safetensors_weights(model_id: str, revision: str | None = None) -> bool:
-  return any(s.rfilename.endswith('.safetensors') for s in HfApi().model_info(model_id, revision=revision).siblings)
+  return any(s.rfilename.endswith('.safetensors') for s in ModelInfo(model_id, revision=revision).siblings)

@attr.define(slots=True)
 class HfIgnore: