refactor(cli): cleanup API (#592)

* chore: remove unused imports Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * refactor(cli): update to only need model_id Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * feat: `openllm start model-id` Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: add changelog Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update changelog notice Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update correct config and running tools Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update backward compat options and treat JSON outputs corespondingly Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
2026-05-24 16:44:39 -04:00 · 2023-11-09 11:40:17 -05:00
parent 86f7acafa9
commit b8a2e8cf91
48 changed files with 1096 additions and 1047 deletions
--- a/openllm-python/tests/models/conftest.py
+++ b/openllm-python/tests/models/conftest.py
@@ -161,8 +161,6 @@ def _local_handle(model: str, model_id: str, image_tag: str, deployment_mode: t.

@contextlib.contextmanager
 def _container_handle(model: str, model_id: str, image_tag: str, deployment_mode: t.Literal['container', 'local'], quantize: LiteralQuantise | None = None, *, _serve_grpc: bool = False):
-  envvar = openllm.utils.EnvVarMixin(model)
-
  with openllm.utils.reserve_free_port() as port, openllm.utils.reserve_free_port() as prom_port:
    pass
  container_name = f'openllm-{model}-{self(model_id)}'.replace('-', '_')
@@ -179,8 +177,7 @@ def _container_handle(model: str, model_id: str, image_tag: str, deployment_mode

  env: DictStrAny = {}

-  if quantize is not None:
-    env[envvar.quantize] = quantize
+  if quantize is not None: env['OPENLLM_QUANTIZE'] = quantize

  gpus = openllm.utils.device_count() or -1
  devs = [docker.types.DeviceRequest(count=gpus, capabilities=[['gpu']])] if gpus > 0 else None
@@ -195,8 +192,7 @@ def _container_handle(model: str, model_id: str, image_tag: str, deployment_mode
                                    ports={
                                        '3000/tcp': port,
                                        '3001/tcp': prom_port
-                                    },
-                                    )
+                                    })

  yield DockerHandle(client, container.name, port, deployment_mode)