perf(build): locking and improve build speed (#669)

* revert(build): not locking packages Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> * perf: improve svars generation and unifying envvar parsing Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> * docs: update changelog Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> * chore: update stubs check for mypy Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
2026-02-19 23:24:12 -05:00 · 2023-11-16 06:27:45 -05:00
parent fce8f223f3
commit 8fdfd0491f
9 changed files with 138 additions and 74 deletions
--- a/openllm-python/src/openllm/_service.py
+++ b/openllm-python/src/openllm/_service.py
@@ -1,11 +1,8 @@
-# mypy: disable-error-code="call-arg,misc,attr-defined,type-abstract,type-arg,valid-type,arg-type"
 from __future__ import annotations
 import logging
-import os
 import typing as t

 import _service_vars as svars
-import orjson

 import bentoml
 import openllm
@@ -14,18 +11,17 @@ from bentoml.io import JSON, Text
 logger = logging.getLogger(__name__)

 llm = openllm.LLM[t.Any, t.Any](
-  svars.model_id,
+  model_id=svars.model_id,
  model_tag=svars.model_tag,
-  prompt_template=openllm.utils.first_not_none(os.getenv('OPENLLM_PROMPT_TEMPLATE'), None),
-  system_message=openllm.utils.first_not_none(os.getenv('OPENLLM_SYSTEM_MESSAGE'), None),
-  serialisation=openllm.utils.first_not_none(os.getenv('OPENLLM_SERIALIZATION'), 'safetensors'),
-  adapter_map=orjson.loads(svars.adapter_map),
-  trust_remote_code=openllm.utils.check_bool_env('TRUST_REMOTE_CODE', default=False),
+  prompt_template=svars.prompt_template,
+  system_message=svars.system_message,
+  serialisation=svars.serialization,
+  adapter_map=svars.adapter_map,
+  trust_remote_code=svars.trust_remote_code,
 )
-llm_config = llm.config
-svc = bentoml.Service(name=f"llm-{llm_config['start_name']}-service", runners=[llm.runner])
+svc = bentoml.Service(name=f"llm-{llm.config['start_name']}-service", runners=[llm.runner])

-llm_model_class = openllm.GenerationInput.from_llm_config(llm_config)
+llm_model_class = openllm.GenerationInput.from_llm_config(llm.config)


@svc.api(
@@ -49,11 +45,11 @@ async def generate_stream_v1(input_dict: dict[str, t.Any]) -> t.AsyncGenerator[s


 _Metadata = openllm.MetadataOutput(
-  timeout=llm_config['timeout'],
-  model_name=llm_config['model_name'],
+  timeout=llm.config['timeout'],
+  model_name=llm.config['model_name'],
  backend=llm.__llm_backend__,
  model_id=llm.model_id,
-  configuration=llm_config.model_dump_json().decode(),
+  configuration=llm.config.model_dump_json().decode(),
  prompt_template=llm.runner.prompt_template,
  system_message=llm.runner.system_message,
 )