mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-04-19 14:40:46 -04:00
feat(type): provide structured annotations stubs (#663)
* feat(type): provide client stubs separation of concern for more brevity code base Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * docs: update changelog Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -429,7 +429,7 @@ def workers_per_resource_option(
|
||||
|
||||
- ``round_robin``: Similar behaviour when setting ``--workers-per-resource 1``. This is useful for smaller models.
|
||||
|
||||
- ``conserved``: This will determine the number of available GPU resources, and only assign one worker for the LLMRunner. For example, if ther are 4 GPUs available, then ``conserved`` is equivalent to ``--workers-per-resource 0.25``.
|
||||
- ``conserved``: This will determine the number of available GPU resources. For example, if ther are 4 GPUs available, then ``conserved`` is equivalent to ``--workers-per-resource 0.25``.
|
||||
"""
|
||||
+ (
|
||||
"""\n
|
||||
|
||||
@@ -77,6 +77,7 @@ from openllm_core.utils import (
|
||||
compose,
|
||||
configure_logging,
|
||||
first_not_none,
|
||||
gen_random_uuid,
|
||||
get_debug_mode,
|
||||
get_disable_warnings,
|
||||
get_quiet_mode,
|
||||
@@ -986,7 +987,6 @@ def build_command(
|
||||
> To build the bento with compiled OpenLLM, make sure to prepend HATCH_BUILD_HOOKS_ENABLE=1. Make sure that the deployment
|
||||
> target also use the same Python version and architecture as build machine.
|
||||
"""
|
||||
from openllm._llm import normalise_model_name
|
||||
from openllm.serialisation.transformers.weights import has_safetensors_weights
|
||||
|
||||
if model_id in openllm.CONFIG_MAPPING:
|
||||
@@ -1046,7 +1046,7 @@ def build_command(
|
||||
labels = dict(llm.identifying_params)
|
||||
labels.update({'_type': llm.llm_type, '_framework': llm.__llm_backend__})
|
||||
|
||||
with fs.open_fs(f'temp://llm_{normalise_model_name(model_id)}') as llm_fs:
|
||||
with fs.open_fs(f'temp://llm_{gen_random_uuid()}') as llm_fs:
|
||||
dockerfile_template_path = None
|
||||
if dockerfile_template:
|
||||
with dockerfile_template:
|
||||
|
||||
@@ -43,16 +43,13 @@ def build_container(
|
||||
"This utility can only be run within OpenLLM git repository. Clone it first with 'git clone https://github.com/bentoml/OpenLLM.git'"
|
||||
)
|
||||
if not registries:
|
||||
tags: dict[str | LiteralContainerRegistry, str] = {
|
||||
alias: f'{value}:{openllm.bundle.get_base_container_tag(version_strategy)}'
|
||||
for alias, value in openllm.bundle.CONTAINER_NAMES.items()
|
||||
tags = {
|
||||
alias: openllm.bundle.RefResolver.construct_base_image(alias, version_strategy)
|
||||
for alias in openllm.bundle.CONTAINER_NAMES
|
||||
}
|
||||
else:
|
||||
registries = [registries] if isinstance(registries, str) else list(registries)
|
||||
tags = {
|
||||
name: f'{openllm.bundle.CONTAINER_NAMES[name]}:{openllm.bundle.get_base_container_tag(version_strategy)}'
|
||||
for name in registries
|
||||
}
|
||||
tags = {name: openllm.bundle.RefResolver.construct_base_image(name, version_strategy) for name in registries}
|
||||
try:
|
||||
outputs = _BUILDER.build(
|
||||
file=pathlib.Path(__file__).parent.joinpath('Dockerfile').resolve().__fspath__(),
|
||||
|
||||
Reference in New Issue
Block a user