fix(gptq): use upstream integration (#297)

* wip

Signed-off-by: aarnphm-ec2-dev <29749331+aarnphm@users.noreply.github.com>

* feat: GPTQ transformers integration

Signed-off-by: aarnphm-ec2-dev <29749331+aarnphm@users.noreply.github.com>

* fix: only load if variable is available and add changelog

Signed-off-by: aarnphm-ec2-dev <29749331+aarnphm@users.noreply.github.com>

* chore: remove boilerplate check

Signed-off-by: aarnphm-ec2-dev <29749331+aarnphm@users.noreply.github.com>

---------

Signed-off-by: aarnphm-ec2-dev <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
Aaron Pham
2023-09-04 14:05:50 -04:00
committed by GitHub
parent 3da869e728
commit 956b3a53bc
23 changed files with 197 additions and 248 deletions

View File

@@ -19,13 +19,7 @@ if t.TYPE_CHECKING:
from openllm_core._typing_compat import LiteralBackend
def generate_labels(llm: openllm.LLM[t.Any, t.Any]) -> dict[str, t.Any]:
return {
'backend': llm.__llm_backend__,
'framework': 'openllm',
'model_name': llm.config['model_name'],
'architecture': llm.config['architecture'],
'serialisation_format': llm._serialisation_format
}
return {'backend': llm.__llm_backend__, 'framework': 'openllm', 'model_name': llm.config['model_name'], 'architecture': llm.config['architecture'], 'serialisation': llm._serialisation}
def infer_auto_class(backend: LiteralBackend) -> type[openllm.AutoLLM | openllm.AutoTFLLM | openllm.AutoFlaxLLM | openllm.AutoVLLM]:
import openllm