mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-03-05 23:56:47 -05:00
refactor(cli): cleanup API (#592)
* chore: remove unused imports Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * refactor(cli): update to only need model_id Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * feat: `openllm start model-id` Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: add changelog Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update changelog notice Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update correct config and running tools Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update backward compat options and treat JSON outputs corespondingly Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -161,8 +161,6 @@ def _local_handle(model: str, model_id: str, image_tag: str, deployment_mode: t.
|
||||
|
||||
@contextlib.contextmanager
|
||||
def _container_handle(model: str, model_id: str, image_tag: str, deployment_mode: t.Literal['container', 'local'], quantize: LiteralQuantise | None = None, *, _serve_grpc: bool = False):
|
||||
envvar = openllm.utils.EnvVarMixin(model)
|
||||
|
||||
with openllm.utils.reserve_free_port() as port, openllm.utils.reserve_free_port() as prom_port:
|
||||
pass
|
||||
container_name = f'openllm-{model}-{self(model_id)}'.replace('-', '_')
|
||||
@@ -179,8 +177,7 @@ def _container_handle(model: str, model_id: str, image_tag: str, deployment_mode
|
||||
|
||||
env: DictStrAny = {}
|
||||
|
||||
if quantize is not None:
|
||||
env[envvar.quantize] = quantize
|
||||
if quantize is not None: env['OPENLLM_QUANTIZE'] = quantize
|
||||
|
||||
gpus = openllm.utils.device_count() or -1
|
||||
devs = [docker.types.DeviceRequest(count=gpus, capabilities=[['gpu']])] if gpus > 0 else None
|
||||
@@ -195,8 +192,7 @@ def _container_handle(model: str, model_id: str, image_tag: str, deployment_mode
|
||||
ports={
|
||||
'3000/tcp': port,
|
||||
'3001/tcp': prom_port
|
||||
},
|
||||
)
|
||||
})
|
||||
|
||||
yield DockerHandle(client, container.name, port, deployment_mode)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user