From aa50b5279e6881c81852febda80d482bc55f2115 Mon Sep 17 00:00:00 2001
From: Aaron <29749331+aarnphm@users.noreply.github.com>
Date: Tue, 6 Jun 2023 22:42:28 -0400
Subject: [PATCH] fix(falcon): loading based on model registration

remove duplicate events

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
---
 pyproject.toml                               |  2 +-
 src/openllm/_llm.py                          |  1 -
 src/openllm/_package.py                      |  2 +-
 src/openllm/cli.py                           | 12 ++----
 src/openllm/models/falcon/modeling_falcon.py | 21 +++++-----
 src/openllm/utils/analytics.py               | 44 --------------------
 6 files changed, 15 insertions(+), 67 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index ae64eacb..9a0c51ea 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -67,7 +67,7 @@ requires-python = ">=3.8"
 [project.optional-dependencies]
 all = ['openllm[fine-tune]', 'openllm[chatglm]', 'openllm[falcon]', 'openllm[flan-t5]', 'openllm[starcoder]']
 chatglm = ['cpm_kernels', 'sentencepiece']
-falcon = ['einops']
+falcon = ['einops', 'xformers', 'safetensors']
 fine-tune = ['peft', 'bitsandbytes', 'datasets']
 flan-t5 = ['flax', 'jax', 'jaxlib', 'tensorflow']
 starcoder = ['bitsandbytes']
diff --git a/src/openllm/_llm.py b/src/openllm/_llm.py
index 46060cc7..2602aa04 100644
--- a/src/openllm/_llm.py
+++ b/src/openllm/_llm.py
@@ -642,7 +642,6 @@ class LLM(LLMInterface, metaclass=LLMMetaclass):
             kwds["accelerator"] = "bettertransformer"
 
         if self.__llm_model__ is None:
-            # Hmm, bentoml.transformers.load_model doesn't yet support args.
             self.__llm_model__ = self._bentomodel.load_model(*self.__llm_args__, **kwds)
 
         if (
diff --git a/src/openllm/_package.py b/src/openllm/_package.py
index c3649133..9207de0e 100644
--- a/src/openllm/_package.py
+++ b/src/openllm/_package.py
@@ -86,7 +86,7 @@ def construct_python_options(llm: openllm.LLM, llm_fs: FS) -> PythonOptions:
             "protobuf",
             "grpcio",
             "grpcio-health-checking",
-            "opentelemetry-instrumentation-grpc==0.35b0",
+            "opentelemetry-instrumentation-grpc==0.38b0",
             "grpcio-reflection",
         ]
     )
diff --git a/src/openllm/cli.py b/src/openllm/cli.py
index 200057bb..85b6eb66 100644
--- a/src/openllm/cli.py
+++ b/src/openllm/cli.py
@@ -129,22 +129,16 @@ class OpenLLMCommandGroup(BentoMLCommandGroup):
 
             start_time = time.time_ns()
 
-            def get_tracking_event(return_value: t.Any):
-                assert group.name, "Group name is required"
-                if group.name in analytics.cli_events_map and command_name in analytics.cli_events_map[group.name]:
-                    return analytics.cli_events_map[group.name][command_name](group, command_name, return_value)
-                return analytics.OpenllmCliEvent(cmd_group=group.name, cmd_name=command_name)
-
             with analytics.set_bentoml_tracking():
+                assert group.name is not None, "group.name should not be None"
+                event = analytics.OpenllmCliEvent(cmd_group=group.name, cmd_name=command_name)
                 try:
                     return_value = func(*args, **attrs)
-                    event = get_tracking_event(return_value)
                     duration_in_ms = (time.time_ns() - start_time) / 1e6
                     event.duration_in_ms = duration_in_ms
                     analytics.track(event)
                     return return_value
                 except Exception as e:
-                    event = get_tracking_event(None)
                     duration_in_ms = (time.time_ns() - start_time) / 1e6
                     event.duration_in_ms = duration_in_ms
                     event.error_type = type(e).__name__
@@ -580,7 +574,7 @@ def cli_factory() -> click.Group:
 
         if output == "pretty":
             if not get_quiet_mode():
-                _echo("\n" + OPENLLM_FIGLET)
+                _echo("\n" + OPENLLM_FIGLET, fg="white")
                 if not _previously_built:
                     _echo(f"Successfully built {bento}.", fg="green")
                 else:
diff --git a/src/openllm/models/falcon/modeling_falcon.py b/src/openllm/models/falcon/modeling_falcon.py
index 1a6d6ebe..9b0ee0a3 100644
--- a/src/openllm/models/falcon/modeling_falcon.py
+++ b/src/openllm/models/falcon/modeling_falcon.py
@@ -35,7 +35,7 @@ class Falcon(openllm.LLM):
 
     default_model = "tiiuae/falcon-7b"
 
-    requirements = ["einops"]
+    requirements = ["einops", "xformers", "safetensors"]
 
     pretrained = ["tiiuae/falcon-7b", "tiiuae/falcon-40b", "tiiuae/falcon-7b-instruct", "tiiuae/falcon-40b-instruct"]
 
@@ -49,16 +49,15 @@ class Falcon(openllm.LLM):
         device_map = attrs.pop("device_map", "auto")
 
         tokenizer = transformers.AutoTokenizer.from_pretrained(pretrained)
-        model = transformers.AutoModelForCausalLM.from_pretrained(
-            pretrained, trust_remote_code=trust_remote_code, torch_dtype=torch_dtype, device_map=device_map
-        )
-        config = transformers.AutoConfig.from_pretrained(pretrained, trust_remote_code=trust_remote_code)
-        transformers.AutoModelForCausalLM.register(config.__class__, model.__class__)
-        return bentoml.transformers.save_model(
-            tag,
-            transformers.pipeline("text-generation", model=model, tokenizer=tokenizer),
-            custom_objects={"tokenizer": tokenizer},
+        pipeline = transformers.pipeline(
+            "text-generation",
+            model=pretrained,
+            trust_remote_code=trust_remote_code,
+            torch_dtype=torch_dtype,
+            device_map=device_map,
+            tokenizer=tokenizer,
         )
+        return bentoml.transformers.save_model(tag, pipeline, custom_objects={"tokenizer": tokenizer})
 
     def sanitize_parameters(
         self,
@@ -67,7 +66,7 @@ class Falcon(openllm.LLM):
         top_k: int | None = None,
         num_return_sequences: int | None = None,
         eos_token_id: int | None = None,
-        use_default_prompt_template: bool = True,
+        use_default_prompt_template: bool = False,
         **attrs: t.Any,
     ) -> tuple[str, dict[str, t.Any], dict[str, t.Any]]:
         if use_default_prompt_template:
diff --git a/src/openllm/utils/analytics.py b/src/openllm/utils/analytics.py
index d4004b0b..2caaa273 100644
--- a/src/openllm/utils/analytics.py
+++ b/src/openllm/utils/analytics.py
@@ -22,16 +22,13 @@ import contextlib
 import functools
 import os
 import typing as t
-from datetime import datetime
 
 import attr
-import bentoml
 from bentoml._internal.utils import analytics as _internal_analytics
 from bentoml._internal.utils.analytics import usage_stats as _internal_usage
 
 if t.TYPE_CHECKING:
     import openllm
-    import click
 
 from ..__about__ import __version__
 
@@ -77,15 +74,6 @@ class OpenllmCliEvent(_internal_analytics.schemas.EventMeta):
     return_code: int = attr.field(default=None)
 
 
-if t.TYPE_CHECKING:
-    T_con = t.TypeVar("T_con", contravariant=True)
-
-    class HandlerProtocol(t.Protocol[T_con]):
-        @staticmethod
-        def __call__(group: click.Group, cmd_name: str, return_value: T_con | None = None) -> OpenllmCliEvent:
-            ...
-
-
 @attr.define
 class StartInitEvent(_internal_analytics.schemas.EventMeta):
     model_name: str
@@ -111,35 +99,3 @@ def track_start_init(
     if do_not_track():
         return
     track(StartInitEvent.handler(llm_config, supported_gpu))
-
-
-@attr.define
-class BuildEvent(OpenllmCliEvent):
-    bento_creation_timestamp: datetime = attr.field(default=None)
-    bento_size_in_gb: float = attr.field(default=0)
-    model_size_in_gb: float = attr.field(default=0)
-    model_type: str = attr.field(default=None)
-    model_framework: str = attr.field(default=None)
-
-    @staticmethod
-    def handler(group: click.Group, cmd_name: str, return_value: bentoml.Bento | None = None) -> BuildEvent:
-        from bentoml._internal.utils import calc_dir_size
-
-        assert group.name is not None, "group name should not be None"
-        if return_value is not None:
-            bento = return_value
-            return BuildEvent(
-                group.name,
-                cmd_name,
-                bento_creation_timestamp=bento.info.creation_time,
-                bento_size_in_gb=calc_dir_size(bento.path) / 1024**3,
-                model_size_in_gb=calc_dir_size(bento.path_of("/models")) / 1024**3,
-                model_type=bento.info.labels["_type"],
-                model_framework=bento.info.labels["_framework"],
-            )
-        return BuildEvent(group.name, cmd_name)
-
-
-cli_events_map: dict[str, dict[str, HandlerProtocol[t.Any]]] = {
-    "openllm": {"build": BuildEvent.handler, "bundle": BuildEvent.handler}
-}