mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-04-22 16:07:24 -04:00
refactor(cli): cleanup API (#592)
* chore: remove unused imports Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * refactor(cli): update to only need model_id Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * feat: `openllm start model-id` Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: add changelog Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update changelog notice Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update correct config and running tools Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update backward compat options and treat JSON outputs corespondingly Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -10,8 +10,6 @@ from openllm_core._configuration import ModelSettings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
env_strats = st.sampled_from([openllm.utils.EnvVarMixin(model_name) for model_name in openllm.CONFIG_MAPPING.keys()])
|
||||
|
||||
@st.composite
|
||||
def model_settings(draw: st.DrawFn):
|
||||
"""Strategy for generating ModelSettings objects."""
|
||||
|
||||
@@ -161,8 +161,6 @@ def _local_handle(model: str, model_id: str, image_tag: str, deployment_mode: t.
|
||||
|
||||
@contextlib.contextmanager
|
||||
def _container_handle(model: str, model_id: str, image_tag: str, deployment_mode: t.Literal['container', 'local'], quantize: LiteralQuantise | None = None, *, _serve_grpc: bool = False):
|
||||
envvar = openllm.utils.EnvVarMixin(model)
|
||||
|
||||
with openllm.utils.reserve_free_port() as port, openllm.utils.reserve_free_port() as prom_port:
|
||||
pass
|
||||
container_name = f'openllm-{model}-{self(model_id)}'.replace('-', '_')
|
||||
@@ -179,8 +177,7 @@ def _container_handle(model: str, model_id: str, image_tag: str, deployment_mode
|
||||
|
||||
env: DictStrAny = {}
|
||||
|
||||
if quantize is not None:
|
||||
env[envvar.quantize] = quantize
|
||||
if quantize is not None: env['OPENLLM_QUANTIZE'] = quantize
|
||||
|
||||
gpus = openllm.utils.device_count() or -1
|
||||
devs = [docker.types.DeviceRequest(count=gpus, capabilities=[['gpu']])] if gpus > 0 else None
|
||||
@@ -195,8 +192,7 @@ def _container_handle(model: str, model_id: str, image_tag: str, deployment_mode
|
||||
ports={
|
||||
'3000/tcp': port,
|
||||
'3001/tcp': prom_port
|
||||
},
|
||||
)
|
||||
})
|
||||
|
||||
yield DockerHandle(client, container.name, port, deployment_mode)
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@ import os
|
||||
import typing as t
|
||||
|
||||
import pytest
|
||||
import transformers
|
||||
|
||||
import openllm
|
||||
|
||||
@@ -28,7 +27,7 @@ def test_general_build_with_internal_testing():
|
||||
bento = openllm.build('flan-t5', model_id=HF_INTERNAL_T5_TESTING)
|
||||
|
||||
assert llm.llm_type == bento.info.labels['_type']
|
||||
assert llm.config['env']['backend_value'] == bento.info.labels['_framework']
|
||||
assert llm.__llm_backend__ == bento.info.labels['_framework']
|
||||
|
||||
bento = openllm.build('flan-t5', model_id=HF_INTERNAL_T5_TESTING)
|
||||
assert len(bento_store.list(bento.tag)) == 1
|
||||
@@ -37,13 +36,9 @@ def test_general_build_with_internal_testing():
|
||||
def test_general_build_from_local(tmp_path_factory: pytest.TempPathFactory):
|
||||
local_path = tmp_path_factory.mktemp('local_t5')
|
||||
llm = openllm.LLM(model_id=HF_INTERNAL_T5_TESTING, serialisation='legacy')
|
||||
llm.save_pretrained()
|
||||
|
||||
if isinstance(llm.model, transformers.Pipeline):
|
||||
llm.model.save_pretrained(str(local_path))
|
||||
else:
|
||||
llm.model.save_pretrained(str(local_path))
|
||||
llm.tokenizer.save_pretrained(str(local_path))
|
||||
llm.model.save_pretrained(str(local_path))
|
||||
llm.tokenizer.save_pretrained(str(local_path))
|
||||
|
||||
assert openllm.build('flan-t5', model_id=local_path.resolve().__fspath__(), model_version='local')
|
||||
|
||||
|
||||
Reference in New Issue
Block a user