refactor(cli): cleanup API (#592)

* chore: remove unused imports

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* refactor(cli): update to only need model_id

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* feat: `openllm start model-id`

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* chore: add changelog

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* chore: update changelog notice

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* chore: update correct config and running tools

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* chore: update backward compat options and treat JSON outputs
corespondingly

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

---------

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
Aaron Pham
2023-11-09 11:40:17 -05:00
committed by GitHub
parent 86f7acafa9
commit b8a2e8cf91
48 changed files with 1096 additions and 1047 deletions

View File

@@ -10,8 +10,6 @@ from openllm_core._configuration import ModelSettings
logger = logging.getLogger(__name__)
env_strats = st.sampled_from([openllm.utils.EnvVarMixin(model_name) for model_name in openllm.CONFIG_MAPPING.keys()])
@st.composite
def model_settings(draw: st.DrawFn):
"""Strategy for generating ModelSettings objects."""

View File

@@ -161,8 +161,6 @@ def _local_handle(model: str, model_id: str, image_tag: str, deployment_mode: t.
@contextlib.contextmanager
def _container_handle(model: str, model_id: str, image_tag: str, deployment_mode: t.Literal['container', 'local'], quantize: LiteralQuantise | None = None, *, _serve_grpc: bool = False):
envvar = openllm.utils.EnvVarMixin(model)
with openllm.utils.reserve_free_port() as port, openllm.utils.reserve_free_port() as prom_port:
pass
container_name = f'openllm-{model}-{self(model_id)}'.replace('-', '_')
@@ -179,8 +177,7 @@ def _container_handle(model: str, model_id: str, image_tag: str, deployment_mode
env: DictStrAny = {}
if quantize is not None:
env[envvar.quantize] = quantize
if quantize is not None: env['OPENLLM_QUANTIZE'] = quantize
gpus = openllm.utils.device_count() or -1
devs = [docker.types.DeviceRequest(count=gpus, capabilities=[['gpu']])] if gpus > 0 else None
@@ -195,8 +192,7 @@ def _container_handle(model: str, model_id: str, image_tag: str, deployment_mode
ports={
'3000/tcp': port,
'3001/tcp': prom_port
},
)
})
yield DockerHandle(client, container.name, port, deployment_mode)

View File

@@ -4,7 +4,6 @@ import os
import typing as t
import pytest
import transformers
import openllm
@@ -28,7 +27,7 @@ def test_general_build_with_internal_testing():
bento = openllm.build('flan-t5', model_id=HF_INTERNAL_T5_TESTING)
assert llm.llm_type == bento.info.labels['_type']
assert llm.config['env']['backend_value'] == bento.info.labels['_framework']
assert llm.__llm_backend__ == bento.info.labels['_framework']
bento = openllm.build('flan-t5', model_id=HF_INTERNAL_T5_TESTING)
assert len(bento_store.list(bento.tag)) == 1
@@ -37,13 +36,9 @@ def test_general_build_with_internal_testing():
def test_general_build_from_local(tmp_path_factory: pytest.TempPathFactory):
local_path = tmp_path_factory.mktemp('local_t5')
llm = openllm.LLM(model_id=HF_INTERNAL_T5_TESTING, serialisation='legacy')
llm.save_pretrained()
if isinstance(llm.model, transformers.Pipeline):
llm.model.save_pretrained(str(local_path))
else:
llm.model.save_pretrained(str(local_path))
llm.tokenizer.save_pretrained(str(local_path))
llm.model.save_pretrained(str(local_path))
llm.tokenizer.save_pretrained(str(local_path))
assert openllm.build('flan-t5', model_id=local_path.resolve().__fspath__(), model_version='local')