mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-04-22 16:07:24 -04:00
fix(dolly): make sure to use GPU when available
map device_map to auto when GPU is available Signed-off-by: aarnphm-ec2-dev <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -38,7 +38,7 @@ class DollyV2(openllm.LLM["transformers.Pipeline", "transformers.PreTrainedToken
|
||||
@property
|
||||
def import_kwargs(self):
|
||||
model_kwds = {
|
||||
"device_map": "auto" if torch.cuda.is_available() and torch.cuda.device_count() > 1 else None,
|
||||
"device_map": "auto" if torch.cuda.is_available() else None,
|
||||
"torch_dtype": torch.bfloat16,
|
||||
}
|
||||
tokenizer_kwds = {"padding_side": "left"}
|
||||
|
||||
Reference in New Issue
Block a user