mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-03-11 11:39:52 -04:00
refactor(cli): cleanup API (#592)
* chore: remove unused imports Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * refactor(cli): update to only need model_id Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * feat: `openllm start model-id` Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: add changelog Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update changelog notice Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update correct config and running tools Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update backward compat options and treat JSON outputs corespondingly Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -6,6 +6,8 @@ import typing as t
|
||||
|
||||
import attr
|
||||
import orjson
|
||||
import torch
|
||||
import transformers
|
||||
|
||||
from huggingface_hub import snapshot_download
|
||||
from simple_di import Provide
|
||||
@@ -13,8 +15,6 @@ from simple_di import inject
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
import torch
|
||||
import transformers
|
||||
|
||||
from bentoml._internal.configuration.containers import BentoMLContainer
|
||||
from bentoml._internal.models.model import ModelOptions
|
||||
@@ -29,6 +29,7 @@ from .weights import HfIgnore
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import types
|
||||
|
||||
from bentoml._internal.models import ModelStore
|
||||
from openllm_core._typing_compat import DictStrAny
|
||||
|
||||
@@ -124,7 +125,7 @@ def import_model(llm: openllm.LLM[M, T], *decls: t.Any, trust_remote_code: bool,
|
||||
del model
|
||||
return bentomodel
|
||||
|
||||
def get(llm: openllm.LLM[M, T], auto_import: bool = False) -> bentoml.Model:
|
||||
def get(llm: openllm.LLM[M, T]) -> bentoml.Model:
|
||||
try:
|
||||
model = bentoml.models.get(llm.tag)
|
||||
backend = model.info.labels['backend']
|
||||
@@ -132,7 +133,6 @@ def get(llm: openllm.LLM[M, T], auto_import: bool = False) -> bentoml.Model:
|
||||
_patch_correct_tag(llm, transformers.AutoConfig.from_pretrained(model.path, trust_remote_code=llm.trust_remote_code), _revision=model.info.metadata.get('_revision'))
|
||||
return model
|
||||
except Exception as err:
|
||||
if auto_import: return import_model(llm, trust_remote_code=llm.trust_remote_code)
|
||||
raise openllm.exceptions.OpenLLMException(f'Failed while getting stored artefact (lookup for traceback):\n{err}') from err
|
||||
|
||||
def load_model(llm: openllm.LLM[M, T], *decls: t.Any, **attrs: t.Any) -> M:
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
from __future__ import annotations
|
||||
import copy, re
|
||||
from pathlib import Path
|
||||
import copy
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
import transformers
|
||||
import torch
|
||||
import transformers
|
||||
|
||||
import openllm
|
||||
|
||||
from openllm.serialisation.constants import FRAMEWORK_TO_AUTOCLASS_MAPPING
|
||||
from openllm.serialisation.constants import HUB_ATTRS
|
||||
|
||||
@@ -1,18 +1,40 @@
|
||||
from __future__ import annotations
|
||||
import traceback
|
||||
import typing as t
|
||||
|
||||
import attr
|
||||
|
||||
from huggingface_hub import HfApi
|
||||
|
||||
from openllm_core.exceptions import Error
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from huggingface_hub.hf_api import ModelInfo as HfModelInfo
|
||||
|
||||
import openllm
|
||||
|
||||
from openllm_core._typing_compat import M
|
||||
from openllm_core._typing_compat import T
|
||||
|
||||
__global_inst__ = None
|
||||
__cached_id__: dict[str, HfModelInfo] = dict()
|
||||
|
||||
def Client() -> HfApi:
|
||||
global __global_inst__ # noqa: PLW0603
|
||||
if __global_inst__ is None: __global_inst__ = HfApi()
|
||||
return __global_inst__
|
||||
|
||||
def ModelInfo(model_id: str, revision: str | None = None) -> HfModelInfo:
|
||||
if model_id in __cached_id__: return __cached_id__[model_id]
|
||||
try:
|
||||
__cached_id__[model_id] = Client().model_info(model_id, revision=revision)
|
||||
return __cached_id__[model_id]
|
||||
except Exception as err:
|
||||
traceback.print_exc()
|
||||
raise Error(f'Failed to fetch {model_id} from huggingface.co') from err
|
||||
|
||||
def has_safetensors_weights(model_id: str, revision: str | None = None) -> bool:
|
||||
return any(s.rfilename.endswith('.safetensors') for s in HfApi().model_info(model_id, revision=revision).siblings)
|
||||
return any(s.rfilename.endswith('.safetensors') for s in ModelInfo(model_id, revision=revision).siblings)
|
||||
|
||||
@attr.define(slots=True)
|
||||
class HfIgnore:
|
||||
|
||||
Reference in New Issue
Block a user