refactor(cli): cleanup API (#592)

* chore: remove unused imports Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * refactor(cli): update to only need model_id Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * feat: `openllm start model-id` Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: add changelog Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update changelog notice Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update correct config and running tools Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update backward compat options and treat JSON outputs corespondingly Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
2026-05-24 08:34:37 -04:00 · 2023-11-09 11:40:17 -05:00
parent 86f7acafa9
commit b8a2e8cf91
48 changed files with 1096 additions and 1047 deletions
--- a/openllm-python/tests/_strategies/_configuration.py
+++ b/openllm-python/tests/_strategies/_configuration.py
@@ -10,8 +10,6 @@ from openllm_core._configuration import ModelSettings

 logger = logging.getLogger(__name__)

-env_strats = st.sampled_from([openllm.utils.EnvVarMixin(model_name) for model_name in openllm.CONFIG_MAPPING.keys()])
-
@st.composite
 def model_settings(draw: st.DrawFn):
  """Strategy for generating ModelSettings objects."""
--- a/openllm-python/tests/models/conftest.py
+++ b/openllm-python/tests/models/conftest.py
@@ -161,8 +161,6 @@ def _local_handle(model: str, model_id: str, image_tag: str, deployment_mode: t.

@contextlib.contextmanager
 def _container_handle(model: str, model_id: str, image_tag: str, deployment_mode: t.Literal['container', 'local'], quantize: LiteralQuantise | None = None, *, _serve_grpc: bool = False):
-  envvar = openllm.utils.EnvVarMixin(model)
-
  with openllm.utils.reserve_free_port() as port, openllm.utils.reserve_free_port() as prom_port:
    pass
  container_name = f'openllm-{model}-{self(model_id)}'.replace('-', '_')
@@ -179,8 +177,7 @@ def _container_handle(model: str, model_id: str, image_tag: str, deployment_mode

  env: DictStrAny = {}

-  if quantize is not None:
-    env[envvar.quantize] = quantize
+  if quantize is not None: env['OPENLLM_QUANTIZE'] = quantize

  gpus = openllm.utils.device_count() or -1
  devs = [docker.types.DeviceRequest(count=gpus, capabilities=[['gpu']])] if gpus > 0 else None
@@ -195,8 +192,7 @@ def _container_handle(model: str, model_id: str, image_tag: str, deployment_mode
                                    ports={
                                        '3000/tcp': port,
                                        '3001/tcp': prom_port
-                                    },
-                                    )
+                                    })

  yield DockerHandle(client, container.name, port, deployment_mode)

--- a/openllm-python/tests/package_test.py
+++ b/openllm-python/tests/package_test.py
@@ -4,7 +4,6 @@ import os
 import typing as t

 import pytest
-import transformers

 import openllm

@@ -28,7 +27,7 @@ def test_general_build_with_internal_testing():
  bento = openllm.build('flan-t5', model_id=HF_INTERNAL_T5_TESTING)

  assert llm.llm_type == bento.info.labels['_type']
-  assert llm.config['env']['backend_value'] == bento.info.labels['_framework']
+  assert llm.__llm_backend__ == bento.info.labels['_framework']

  bento = openllm.build('flan-t5', model_id=HF_INTERNAL_T5_TESTING)
  assert len(bento_store.list(bento.tag)) == 1
@@ -37,13 +36,9 @@ def test_general_build_with_internal_testing():
 def test_general_build_from_local(tmp_path_factory: pytest.TempPathFactory):
  local_path = tmp_path_factory.mktemp('local_t5')
  llm = openllm.LLM(model_id=HF_INTERNAL_T5_TESTING, serialisation='legacy')
-  llm.save_pretrained()

-  if isinstance(llm.model, transformers.Pipeline):
-    llm.model.save_pretrained(str(local_path))
-  else:
-    llm.model.save_pretrained(str(local_path))
-    llm.tokenizer.save_pretrained(str(local_path))
+  llm.model.save_pretrained(str(local_path))
+  llm.tokenizer.save_pretrained(str(local_path))

  assert openllm.build('flan-t5', model_id=local_path.resolve().__fspath__(), model_version='local')