From 512cd0715c5f8cfe135fdcfce4250df7804a65cc Mon Sep 17 00:00:00 2001 From: aarnphm-ec2-dev <29749331+aarnphm@users.noreply.github.com> Date: Sun, 11 Jun 2023 04:14:18 +0000 Subject: [PATCH] feat(service): implementing with lifecycle hooks Signed-off-by: aarnphm-ec2-dev <29749331+aarnphm@users.noreply.github.com> --- src/openllm/_llm.py | 7 ++++--- src/openllm/_service.py | 7 +++++++ src/openllm/cli.py | 18 ++++-------------- src/openllm/models/auto/factory.py | 4 ++-- 4 files changed, 17 insertions(+), 19 deletions(-) diff --git a/src/openllm/_llm.py b/src/openllm/_llm.py index 0d623df0..da883bb1 100644 --- a/src/openllm/_llm.py +++ b/src/openllm/_llm.py @@ -43,6 +43,7 @@ if t.TYPE_CHECKING: class LLMRunner(bentoml.Runner): llm: openllm.LLM + config: openllm.LLMConfig llm_type: str identifying_params: dict[str, t.Any] @@ -600,7 +601,7 @@ class LLM(LLMInterface, metaclass=LLMMetaclass): return tag, attrs return tag - def ensure_pretrained_exists(self): + def ensure_pretrained_exists(self) -> bentoml.Model: trust_remote_code = self._llm_attrs.pop("trust_remote_code", self.config.__openllm_trust_remote_code__) tag, kwds = self.make_tag(return_unused_kwargs=True, trust_remote_code=trust_remote_code, **self._llm_attrs) try: @@ -700,7 +701,7 @@ class LLM(LLMInterface, metaclass=LLMMetaclass): method_configs: dict[str, ModelSignatureDict | ModelSignature] | None = None, embedded: bool = False, scheduling_strategy: type[Strategy] | None = None, - ) -> bentoml.Runner: + ) -> LLMRunner: """Convert this LLM into a Runner. Args: @@ -848,7 +849,7 @@ class LLM(LLMInterface, metaclass=LLMMetaclass): return self.postprocess_generate(prompt, generated_result, **postprocess_kwargs) -def Runner(start_name: str, **attrs: t.Any) -> bentoml.Runner: +def Runner(start_name: str, **attrs: t.Any) -> LLMRunner: """Create a Runner for given LLM. For a list of currently supported LLM, check out 'openllm models' Args: diff --git a/src/openllm/_service.py b/src/openllm/_service.py index 2906752a..9eea9b0f 100644 --- a/src/openllm/_service.py +++ b/src/openllm/_service.py @@ -38,6 +38,13 @@ runner = openllm.Runner(model, llm_config=llm_config) svc = bentoml.Service(name=f"llm-{llm_config.__openllm_start_name__}-service", runners=[runner]) +@svc.on_deployment +def ensure_exists(): + # NOTE: We need to initialize llm here first to check if the model is already downloaded to + # avoid deadlock before the subprocess forking. + runner.llm.ensure_pretrained_exists() + + @svc.api( input=bentoml.io.JSON.from_sample(sample={"prompt": "", "llm_config": llm_config.model_dump()}), output=bentoml.io.JSON.from_sample(sample={"responses": [], "configuration": llm_config.model_dump()}), diff --git a/src/openllm/cli.py b/src/openllm/cli.py index d4327fb3..7514465e 100644 --- a/src/openllm/cli.py +++ b/src/openllm/cli.py @@ -455,10 +455,6 @@ Tip: One can pass one of the aforementioned to '--model-id' to use other pretrai else: llm = openllm.AutoLLM.for_model(model_name, model_id=model_id, llm_config=config) - # NOTE: We need to initialize llm here first to check if the model is already downloaded to - # avoid deadlock before the subprocess forking. - llm.ensure_pretrained_exists() - # NOTE: check for GPU one more time in cases this model doesn't requires GPU but users can still # run this model on GPU try: @@ -522,12 +518,7 @@ Tip: One can pass one of the aforementioned to '--model-id' to use other pretrai try: openllm.utils.analytics.track_start_init(llm.config, gpu_available) - server.start(env=start_env, text=True, blocking=True if get_debug_mode() else False) - if not get_debug_mode(): - assert server.process is not None and server.process.stdout is not None - with server.process.stdout: - for f in iter(server.process.stdout.readline, b""): - _echo(f, nl=False, fg="white") + server.start(env=start_env, text=True, blocking=True) except KeyboardInterrupt: on_start_end(model_name) except Exception as err: @@ -801,20 +792,19 @@ def cli_factory() -> click.Group: if len(bentoml.models.list(tag)) == 0: if output == "pretty": _echo(f"{tag} does not exists yet!. Downloading...", nl=True) - m = model.ensure_pretrained_exists() + m = model.ensure_pretrained_exists() + if output == "pretty": _echo(f"Saved model: {m.tag}") elif output == "json": - m = model.ensure_pretrained_exists() _echo( orjson.dumps( {"previously_setup": False, "framework": env, "tag": str(m.tag)}, option=orjson.OPT_INDENT_2 ).decode() ) else: - m = model.ensure_pretrained_exists() _echo(tag) else: - m = model.ensure_pretrained_exists() + m = bentoml.transformers.get(tag) if output == "pretty": _echo(f"{model_name} is already setup for framework '{env}': {str(m.tag)}", nl=True) elif output == "json": diff --git a/src/openllm/models/auto/factory.py b/src/openllm/models/auto/factory.py index 1a88d6b9..f43c2744 100644 --- a/src/openllm/models/auto/factory.py +++ b/src/openllm/models/auto/factory.py @@ -26,7 +26,7 @@ import openllm from .configuration_auto import AutoConfig if t.TYPE_CHECKING: - import bentoml + from ..._llm import LLMRunner ConfigModelOrderedDict = OrderedDict[type[openllm.LLMConfig], type[openllm.LLM]] else: @@ -104,7 +104,7 @@ class _BaseAutoLLMClass: ) @classmethod - def create_runner(cls, model_name: str, model_id: str | None = None, **attrs: t.Any) -> bentoml.Runner: + def create_runner(cls, model_name: str, model_id: str | None = None, **attrs: t.Any) -> LLMRunner: """ Create a LLM Runner for the given model name.