chore: ignore new lines split [skip ci]

Signed-off-by: aarnphm-ec2-dev <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
aarnphm-ec2-dev
2023-09-01 17:00:49 +00:00
parent 608de0b667
commit 7d893e6cd2
70 changed files with 575 additions and 950 deletions

View File

@@ -15,25 +15,21 @@ if t.TYPE_CHECKING:
from ._llm import LLM
autogptq, torch, transformers = LazyLoader('autogptq', globals(),
'auto_gptq'), LazyLoader('torch', globals(), 'torch'), LazyLoader('transformers', globals(), 'transformers')
autogptq, torch, transformers = LazyLoader('autogptq', globals(), 'auto_gptq'), LazyLoader('torch', globals(), 'torch'), LazyLoader('transformers', globals(), 'transformers')
logger = logging.getLogger(__name__)
QuantiseMode = t.Literal['int8', 'int4', 'gptq']
@overload
def infer_quantisation_config(cls: type[LLM[t.Any, t.Any]], quantise: t.Literal['int8', 'int4'],
**attrs: t.Any) -> tuple[transformers.BitsAndBytesConfig, DictStrAny]:
def infer_quantisation_config(cls: type[LLM[t.Any, t.Any]], quantise: t.Literal['int8', 'int4'], **attrs: t.Any) -> tuple[transformers.BitsAndBytesConfig, DictStrAny]:
...
@overload
def infer_quantisation_config(cls: type[LLM[t.Any, t.Any]], quantise: t.Literal['gptq'],
**attrs: t.Any) -> tuple[autogptq.BaseQuantizeConfig, DictStrAny]:
def infer_quantisation_config(cls: type[LLM[t.Any, t.Any]], quantise: t.Literal['gptq'], **attrs: t.Any) -> tuple[autogptq.BaseQuantizeConfig, DictStrAny]:
...
def infer_quantisation_config(cls: type[LLM[t.Any, t.Any]], quantise: QuantiseMode,
**attrs: t.Any) -> tuple[transformers.BitsAndBytesConfig | autogptq.BaseQuantizeConfig, DictStrAny]:
def infer_quantisation_config(cls: type[LLM[t.Any, t.Any]], quantise: QuantiseMode, **attrs: t.Any) -> tuple[transformers.BitsAndBytesConfig | autogptq.BaseQuantizeConfig, DictStrAny]:
# 8 bit configuration
int8_threshold = attrs.pop('llm_int8_threshhold', 6.0)
int8_enable_fp32_cpu_offload = attrs.pop('llm_int8_enable_fp32_cpu_offload', False)