mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-01-30 02:12:00 -05:00
fix(gptq): use upstream integration (#297)
* wip Signed-off-by: aarnphm-ec2-dev <29749331+aarnphm@users.noreply.github.com> * feat: GPTQ transformers integration Signed-off-by: aarnphm-ec2-dev <29749331+aarnphm@users.noreply.github.com> * fix: only load if variable is available and add changelog Signed-off-by: aarnphm-ec2-dev <29749331+aarnphm@users.noreply.github.com> * chore: remove boilerplate check Signed-off-by: aarnphm-ec2-dev <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: aarnphm-ec2-dev <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -11,11 +11,12 @@ import openllm
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from openllm_core._typing_compat import LiteralBackend
|
||||
from openllm_core._typing_compat import LiteralQuantise
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def build_bento(model: str, model_id: str | None = None, quantize: t.Literal['int4', 'int8', 'gptq'] | None = None, cleanup: bool = False) -> t.Iterator[bentoml.Bento]:
|
||||
def build_bento(model: str, model_id: str | None = None, quantize: LiteralQuantise | None = None, cleanup: bool = False) -> t.Iterator[bentoml.Bento]:
|
||||
logger.info('Building BentoML for %s', model)
|
||||
bento = openllm.build(model, model_id=model_id, quantize=quantize)
|
||||
yield bento
|
||||
|
||||
Reference in New Issue
Block a user