mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-01-06 06:29:21 -05:00
fix(backend): correct use variable for backend when initialisation (#702)
Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -217,7 +217,7 @@ class LLM(t.Generic[M, T], ReprMixin):
|
||||
def _resolve_quantise(self, quantise, backend):
|
||||
if backend in ('pt', 'vllm'):return quantise
|
||||
if backend=='ctranslate':return self._resolve_ctranslate_quantise(quantise)
|
||||
raise NotImplementedError(f"Quantisation is not supported for backend '{self.__llm_backend__}'")
|
||||
raise NotImplementedError(f"Quantisation is not supported for backend '{backend}'")
|
||||
def _resolve_ctranslate_quantise(self,quantise):
|
||||
if quantise in {'int4', 'awq', 'gptq', 'squeezellm'}:raise ValueError(f"Quantisation '{quantise}' is not supported for backend 'ctranslate'")
|
||||
if quantise == 'int8':quantise='int8_float16' if self._has_gpus else 'int8_float32'
|
||||
|
||||
Reference in New Issue
Block a user