mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-02-19 23:24:12 -05:00
perf(build): locking and improve build speed (#669)
* revert(build): not locking packages Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> * perf: improve svars generation and unifying envvar parsing Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> * docs: update changelog Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> * chore: update stubs check for mypy Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -1,11 +1,8 @@
|
||||
# mypy: disable-error-code="call-arg,misc,attr-defined,type-abstract,type-arg,valid-type,arg-type"
|
||||
from __future__ import annotations
|
||||
import logging
|
||||
import os
|
||||
import typing as t
|
||||
|
||||
import _service_vars as svars
|
||||
import orjson
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
@@ -14,18 +11,17 @@ from bentoml.io import JSON, Text
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
llm = openllm.LLM[t.Any, t.Any](
|
||||
svars.model_id,
|
||||
model_id=svars.model_id,
|
||||
model_tag=svars.model_tag,
|
||||
prompt_template=openllm.utils.first_not_none(os.getenv('OPENLLM_PROMPT_TEMPLATE'), None),
|
||||
system_message=openllm.utils.first_not_none(os.getenv('OPENLLM_SYSTEM_MESSAGE'), None),
|
||||
serialisation=openllm.utils.first_not_none(os.getenv('OPENLLM_SERIALIZATION'), 'safetensors'),
|
||||
adapter_map=orjson.loads(svars.adapter_map),
|
||||
trust_remote_code=openllm.utils.check_bool_env('TRUST_REMOTE_CODE', default=False),
|
||||
prompt_template=svars.prompt_template,
|
||||
system_message=svars.system_message,
|
||||
serialisation=svars.serialization,
|
||||
adapter_map=svars.adapter_map,
|
||||
trust_remote_code=svars.trust_remote_code,
|
||||
)
|
||||
llm_config = llm.config
|
||||
svc = bentoml.Service(name=f"llm-{llm_config['start_name']}-service", runners=[llm.runner])
|
||||
svc = bentoml.Service(name=f"llm-{llm.config['start_name']}-service", runners=[llm.runner])
|
||||
|
||||
llm_model_class = openllm.GenerationInput.from_llm_config(llm_config)
|
||||
llm_model_class = openllm.GenerationInput.from_llm_config(llm.config)
|
||||
|
||||
|
||||
@svc.api(
|
||||
@@ -49,11 +45,11 @@ async def generate_stream_v1(input_dict: dict[str, t.Any]) -> t.AsyncGenerator[s
|
||||
|
||||
|
||||
_Metadata = openllm.MetadataOutput(
|
||||
timeout=llm_config['timeout'],
|
||||
model_name=llm_config['model_name'],
|
||||
timeout=llm.config['timeout'],
|
||||
model_name=llm.config['model_name'],
|
||||
backend=llm.__llm_backend__,
|
||||
model_id=llm.model_id,
|
||||
configuration=llm_config.model_dump_json().decode(),
|
||||
configuration=llm.config.model_dump_json().decode(),
|
||||
prompt_template=llm.runner.prompt_template,
|
||||
system_message=llm.runner.system_message,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user