From aa50b5279e6881c81852febda80d482bc55f2115 Mon Sep 17 00:00:00 2001 From: Aaron <29749331+aarnphm@users.noreply.github.com> Date: Tue, 6 Jun 2023 22:42:28 -0400 Subject: [PATCH] fix(falcon): loading based on model registration remove duplicate events Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> --- pyproject.toml | 2 +- src/openllm/_llm.py | 1 - src/openllm/_package.py | 2 +- src/openllm/cli.py | 12 ++---- src/openllm/models/falcon/modeling_falcon.py | 21 +++++----- src/openllm/utils/analytics.py | 44 -------------------- 6 files changed, 15 insertions(+), 67 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ae64eacb..9a0c51ea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,7 +67,7 @@ requires-python = ">=3.8" [project.optional-dependencies] all = ['openllm[fine-tune]', 'openllm[chatglm]', 'openllm[falcon]', 'openllm[flan-t5]', 'openllm[starcoder]'] chatglm = ['cpm_kernels', 'sentencepiece'] -falcon = ['einops'] +falcon = ['einops', 'xformers', 'safetensors'] fine-tune = ['peft', 'bitsandbytes', 'datasets'] flan-t5 = ['flax', 'jax', 'jaxlib', 'tensorflow'] starcoder = ['bitsandbytes'] diff --git a/src/openllm/_llm.py b/src/openllm/_llm.py index 46060cc7..2602aa04 100644 --- a/src/openllm/_llm.py +++ b/src/openllm/_llm.py @@ -642,7 +642,6 @@ class LLM(LLMInterface, metaclass=LLMMetaclass): kwds["accelerator"] = "bettertransformer" if self.__llm_model__ is None: - # Hmm, bentoml.transformers.load_model doesn't yet support args. self.__llm_model__ = self._bentomodel.load_model(*self.__llm_args__, **kwds) if ( diff --git a/src/openllm/_package.py b/src/openllm/_package.py index c3649133..9207de0e 100644 --- a/src/openllm/_package.py +++ b/src/openllm/_package.py @@ -86,7 +86,7 @@ def construct_python_options(llm: openllm.LLM, llm_fs: FS) -> PythonOptions: "protobuf", "grpcio", "grpcio-health-checking", - "opentelemetry-instrumentation-grpc==0.35b0", + "opentelemetry-instrumentation-grpc==0.38b0", "grpcio-reflection", ] ) diff --git a/src/openllm/cli.py b/src/openllm/cli.py index 200057bb..85b6eb66 100644 --- a/src/openllm/cli.py +++ b/src/openllm/cli.py @@ -129,22 +129,16 @@ class OpenLLMCommandGroup(BentoMLCommandGroup): start_time = time.time_ns() - def get_tracking_event(return_value: t.Any): - assert group.name, "Group name is required" - if group.name in analytics.cli_events_map and command_name in analytics.cli_events_map[group.name]: - return analytics.cli_events_map[group.name][command_name](group, command_name, return_value) - return analytics.OpenllmCliEvent(cmd_group=group.name, cmd_name=command_name) - with analytics.set_bentoml_tracking(): + assert group.name is not None, "group.name should not be None" + event = analytics.OpenllmCliEvent(cmd_group=group.name, cmd_name=command_name) try: return_value = func(*args, **attrs) - event = get_tracking_event(return_value) duration_in_ms = (time.time_ns() - start_time) / 1e6 event.duration_in_ms = duration_in_ms analytics.track(event) return return_value except Exception as e: - event = get_tracking_event(None) duration_in_ms = (time.time_ns() - start_time) / 1e6 event.duration_in_ms = duration_in_ms event.error_type = type(e).__name__ @@ -580,7 +574,7 @@ def cli_factory() -> click.Group: if output == "pretty": if not get_quiet_mode(): - _echo("\n" + OPENLLM_FIGLET) + _echo("\n" + OPENLLM_FIGLET, fg="white") if not _previously_built: _echo(f"Successfully built {bento}.", fg="green") else: diff --git a/src/openllm/models/falcon/modeling_falcon.py b/src/openllm/models/falcon/modeling_falcon.py index 1a6d6ebe..9b0ee0a3 100644 --- a/src/openllm/models/falcon/modeling_falcon.py +++ b/src/openllm/models/falcon/modeling_falcon.py @@ -35,7 +35,7 @@ class Falcon(openllm.LLM): default_model = "tiiuae/falcon-7b" - requirements = ["einops"] + requirements = ["einops", "xformers", "safetensors"] pretrained = ["tiiuae/falcon-7b", "tiiuae/falcon-40b", "tiiuae/falcon-7b-instruct", "tiiuae/falcon-40b-instruct"] @@ -49,16 +49,15 @@ class Falcon(openllm.LLM): device_map = attrs.pop("device_map", "auto") tokenizer = transformers.AutoTokenizer.from_pretrained(pretrained) - model = transformers.AutoModelForCausalLM.from_pretrained( - pretrained, trust_remote_code=trust_remote_code, torch_dtype=torch_dtype, device_map=device_map - ) - config = transformers.AutoConfig.from_pretrained(pretrained, trust_remote_code=trust_remote_code) - transformers.AutoModelForCausalLM.register(config.__class__, model.__class__) - return bentoml.transformers.save_model( - tag, - transformers.pipeline("text-generation", model=model, tokenizer=tokenizer), - custom_objects={"tokenizer": tokenizer}, + pipeline = transformers.pipeline( + "text-generation", + model=pretrained, + trust_remote_code=trust_remote_code, + torch_dtype=torch_dtype, + device_map=device_map, + tokenizer=tokenizer, ) + return bentoml.transformers.save_model(tag, pipeline, custom_objects={"tokenizer": tokenizer}) def sanitize_parameters( self, @@ -67,7 +66,7 @@ class Falcon(openllm.LLM): top_k: int | None = None, num_return_sequences: int | None = None, eos_token_id: int | None = None, - use_default_prompt_template: bool = True, + use_default_prompt_template: bool = False, **attrs: t.Any, ) -> tuple[str, dict[str, t.Any], dict[str, t.Any]]: if use_default_prompt_template: diff --git a/src/openllm/utils/analytics.py b/src/openllm/utils/analytics.py index d4004b0b..2caaa273 100644 --- a/src/openllm/utils/analytics.py +++ b/src/openllm/utils/analytics.py @@ -22,16 +22,13 @@ import contextlib import functools import os import typing as t -from datetime import datetime import attr -import bentoml from bentoml._internal.utils import analytics as _internal_analytics from bentoml._internal.utils.analytics import usage_stats as _internal_usage if t.TYPE_CHECKING: import openllm - import click from ..__about__ import __version__ @@ -77,15 +74,6 @@ class OpenllmCliEvent(_internal_analytics.schemas.EventMeta): return_code: int = attr.field(default=None) -if t.TYPE_CHECKING: - T_con = t.TypeVar("T_con", contravariant=True) - - class HandlerProtocol(t.Protocol[T_con]): - @staticmethod - def __call__(group: click.Group, cmd_name: str, return_value: T_con | None = None) -> OpenllmCliEvent: - ... - - @attr.define class StartInitEvent(_internal_analytics.schemas.EventMeta): model_name: str @@ -111,35 +99,3 @@ def track_start_init( if do_not_track(): return track(StartInitEvent.handler(llm_config, supported_gpu)) - - -@attr.define -class BuildEvent(OpenllmCliEvent): - bento_creation_timestamp: datetime = attr.field(default=None) - bento_size_in_gb: float = attr.field(default=0) - model_size_in_gb: float = attr.field(default=0) - model_type: str = attr.field(default=None) - model_framework: str = attr.field(default=None) - - @staticmethod - def handler(group: click.Group, cmd_name: str, return_value: bentoml.Bento | None = None) -> BuildEvent: - from bentoml._internal.utils import calc_dir_size - - assert group.name is not None, "group name should not be None" - if return_value is not None: - bento = return_value - return BuildEvent( - group.name, - cmd_name, - bento_creation_timestamp=bento.info.creation_time, - bento_size_in_gb=calc_dir_size(bento.path) / 1024**3, - model_size_in_gb=calc_dir_size(bento.path_of("/models")) / 1024**3, - model_type=bento.info.labels["_type"], - model_framework=bento.info.labels["_framework"], - ) - return BuildEvent(group.name, cmd_name) - - -cli_events_map: dict[str, dict[str, HandlerProtocol[t.Any]]] = { - "openllm": {"build": BuildEvent.handler, "bundle": BuildEvent.handler} -}