mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-01-17 03:47:54 -05:00
fix(cli): make sure to pass the dtype to subprocess service (#628)
Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -633,6 +633,7 @@ def process_environ(
|
||||
'OPENLLM_SERIALIZATION': serialisation,
|
||||
'OPENLLM_BACKEND': llm.__llm_backend__,
|
||||
'OPENLLM_CONFIG': config.model_dump_json(flatten=True).decode(),
|
||||
'TORCH_DTYPE': str(llm._torch_dtype).split('.')[-1]
|
||||
}
|
||||
)
|
||||
if llm.quantise:
|
||||
|
||||
Reference in New Issue
Block a user