refactor(cli): cleanup API (#592)

* chore: remove unused imports

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* refactor(cli): update to only need model_id

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* feat: `openllm start model-id`

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* chore: add changelog

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* chore: update changelog notice

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* chore: update correct config and running tools

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* chore: update backward compat options and treat JSON outputs
corespondingly

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

---------

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
Aaron Pham
2023-11-09 11:40:17 -05:00
committed by GitHub
parent 86f7acafa9
commit b8a2e8cf91
48 changed files with 1096 additions and 1047 deletions

View File

@@ -6,6 +6,8 @@ import typing as t
import attr
import orjson
import torch
import transformers
from huggingface_hub import snapshot_download
from simple_di import Provide
@@ -13,8 +15,6 @@ from simple_di import inject
import bentoml
import openllm
import torch
import transformers
from bentoml._internal.configuration.containers import BentoMLContainer
from bentoml._internal.models.model import ModelOptions
@@ -29,6 +29,7 @@ from .weights import HfIgnore
if t.TYPE_CHECKING:
import types
from bentoml._internal.models import ModelStore
from openllm_core._typing_compat import DictStrAny
@@ -124,7 +125,7 @@ def import_model(llm: openllm.LLM[M, T], *decls: t.Any, trust_remote_code: bool,
del model
return bentomodel
def get(llm: openllm.LLM[M, T], auto_import: bool = False) -> bentoml.Model:
def get(llm: openllm.LLM[M, T]) -> bentoml.Model:
try:
model = bentoml.models.get(llm.tag)
backend = model.info.labels['backend']
@@ -132,7 +133,6 @@ def get(llm: openllm.LLM[M, T], auto_import: bool = False) -> bentoml.Model:
_patch_correct_tag(llm, transformers.AutoConfig.from_pretrained(model.path, trust_remote_code=llm.trust_remote_code), _revision=model.info.metadata.get('_revision'))
return model
except Exception as err:
if auto_import: return import_model(llm, trust_remote_code=llm.trust_remote_code)
raise openllm.exceptions.OpenLLMException(f'Failed while getting stored artefact (lookup for traceback):\n{err}') from err
def load_model(llm: openllm.LLM[M, T], *decls: t.Any, **attrs: t.Any) -> M:

View File

@@ -1,11 +1,11 @@
from __future__ import annotations
import copy, re
from pathlib import Path
import copy
import typing as t
import openllm
import transformers
import torch
import transformers
import openllm
from openllm.serialisation.constants import FRAMEWORK_TO_AUTOCLASS_MAPPING
from openllm.serialisation.constants import HUB_ATTRS

View File

@@ -1,18 +1,40 @@
from __future__ import annotations
import traceback
import typing as t
import attr
from huggingface_hub import HfApi
from openllm_core.exceptions import Error
if t.TYPE_CHECKING:
from huggingface_hub.hf_api import ModelInfo as HfModelInfo
import openllm
from openllm_core._typing_compat import M
from openllm_core._typing_compat import T
__global_inst__ = None
__cached_id__: dict[str, HfModelInfo] = dict()
def Client() -> HfApi:
global __global_inst__ # noqa: PLW0603
if __global_inst__ is None: __global_inst__ = HfApi()
return __global_inst__
def ModelInfo(model_id: str, revision: str | None = None) -> HfModelInfo:
if model_id in __cached_id__: return __cached_id__[model_id]
try:
__cached_id__[model_id] = Client().model_info(model_id, revision=revision)
return __cached_id__[model_id]
except Exception as err:
traceback.print_exc()
raise Error(f'Failed to fetch {model_id} from huggingface.co') from err
def has_safetensors_weights(model_id: str, revision: str | None = None) -> bool:
return any(s.rfilename.endswith('.safetensors') for s in HfApi().model_info(model_id, revision=revision).siblings)
return any(s.rfilename.endswith('.safetensors') for s in ModelInfo(model_id, revision=revision).siblings)
@attr.define(slots=True)
class HfIgnore: