mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-02-19 15:18:12 -05:00
fix(torch_dtype): correctly infer based on options (#682)
Users should be able to set the dtype during build, as we it doesn't effect start time Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -221,12 +221,8 @@ class LLM(t.Generic[M, T], ReprMixin):
|
||||
config_dtype = getattr(hf_config, 'torch_dtype', None)
|
||||
if config_dtype is None:
|
||||
config_dtype = torch.float32
|
||||
if not torch.cuda.is_available():
|
||||
if self.__llm_torch_dtype__ in {'auto', 'half'} and not get_disable_warnings() and not get_quiet_mode():
|
||||
logger.warning('"auto" and "half" are not supported on CPU. OpenLLM will default fallback to "float32".')
|
||||
torch_dtype = torch.float32 # we need to cast back to full precision if cuda is not available
|
||||
elif self.__llm_torch_dtype__ == 'auto':
|
||||
if config_dtype == torch.float32:
|
||||
if self.__llm_torch_dtype__ == 'auto':
|
||||
if config_dtype == torch.float32 and torch.cuda.is_available():
|
||||
torch_dtype = torch.float16 # following common practice
|
||||
else:
|
||||
torch_dtype = config_dtype
|
||||
|
||||
Reference in New Issue
Block a user