perf: unify LLM interface (#518)

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2026-06-12 02:20:32 -04:00 · 2023-11-06 20:39:43 -05:00
parent f2639879af
commit e2029c934b
136 changed files with 9646 additions and 11244 deletions
--- a/openllm-python/src/openllm/utils/init.py
+++ b/openllm-python/src/openllm/utils/init.py
@@ -8,28 +8,13 @@ import typing as t

 import openllm_core

-from . import dummy_flax_objects as dummy_flax_objects
-from . import dummy_pt_objects as dummy_pt_objects
-from . import dummy_tf_objects as dummy_tf_objects
-from . import dummy_vllm_objects as dummy_vllm_objects
-
 if t.TYPE_CHECKING:
  import openllm

-  from openllm_core._typing_compat import LiteralBackend
-
 def generate_labels(llm: openllm.LLM[t.Any, t.Any]) -> dict[str, t.Any]:
  return {'backend': llm.__llm_backend__, 'framework': 'openllm', 'model_name': llm.config['model_name'], 'architecture': llm.config['architecture'], 'serialisation': llm._serialisation}

-def infer_auto_class(backend: LiteralBackend) -> type[openllm.AutoLLM | openllm.AutoTFLLM | openllm.AutoFlaxLLM | openllm.AutoVLLM]:
-  import openllm
-  if backend == 'tf': return openllm.AutoTFLLM
-  elif backend == 'flax': return openllm.AutoFlaxLLM
-  elif backend == 'pt': return openllm.AutoLLM
-  elif backend == 'vllm': return openllm.AutoVLLM
-  else: raise RuntimeError(f"Unknown backend: {backend} (supported: 'pt', 'flax', 'tf', 'vllm')")
-
-__all__ = ['generate_labels', 'infer_auto_class', 'dummy_flax_objects', 'dummy_pt_objects', 'dummy_tf_objects', 'dummy_vllm_objects']
+__all__ = ['generate_labels']

 def __dir__() -> t.Sequence[str]:
  return sorted(__all__)
--- a/openllm-python/src/openllm/utils/dummy_flax_objects.py
+++ b/openllm-python/src/openllm/utils/dummy_flax_objects.py
@@ -1,16 +0,0 @@
-# This file is generated by tools/update-dummy.py. DO NOT EDIT MANUALLY!
-# To update this, run ./tools/update-dummy.py
-from __future__ import annotations
-import typing as _t
-from openllm_core.utils import DummyMetaclass as _DummyMetaclass, require_backends as _require_backends
-class FlaxFlanT5(metaclass=_DummyMetaclass):
-  _backends=["flax"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["flax"])
-class FlaxOPT(metaclass=_DummyMetaclass):
-  _backends=["flax"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["flax"])
-class AutoFlaxLLM(metaclass=_DummyMetaclass):
-  _backends=["flax"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["flax"])
-MODEL_FLAX_MAPPING_NAMES:_t.Any=None
-__all__:list[str]=["MODEL_FLAX_MAPPING_NAMES","AutoFlaxLLM","FlaxFlanT5","FlaxOPT"]
--- a/openllm-python/src/openllm/utils/dummy_pt_objects.py
+++ b/openllm-python/src/openllm/utils/dummy_pt_objects.py
@@ -1,43 +0,0 @@
-# This file is generated by tools/update-dummy.py. DO NOT EDIT MANUALLY!
-# To update this, run ./tools/update-dummy.py
-from __future__ import annotations
-import typing as _t
-from openllm_core.utils import DummyMetaclass as _DummyMetaclass, require_backends as _require_backends
-class ChatGLM(metaclass=_DummyMetaclass):
-  _backends=["torch","cpm_kernels","sentencepiece"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["torch","cpm_kernels","sentencepiece"])
-class DollyV2(metaclass=_DummyMetaclass):
-  _backends=["torch"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["torch"])
-class Falcon(metaclass=_DummyMetaclass):
-  _backends=["torch","einops","xformers"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["torch","einops","xformers"])
-class FlanT5(metaclass=_DummyMetaclass):
-  _backends=["torch"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["torch"])
-class GPTNeoX(metaclass=_DummyMetaclass):
-  _backends=["torch"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["torch"])
-class Llama(metaclass=_DummyMetaclass):
-  _backends=["torch","fairscale","sentencepiece","scipy"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["torch","fairscale","sentencepiece","scipy"])
-class MPT(metaclass=_DummyMetaclass):
-  _backends=["torch","triton","einops"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["torch","triton","einops"])
-class OPT(metaclass=_DummyMetaclass):
-  _backends=["torch"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["torch"])
-class StableLM(metaclass=_DummyMetaclass):
-  _backends=["torch"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["torch"])
-class StarCoder(metaclass=_DummyMetaclass):
-  _backends=["torch","bitsandbytes"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["torch","bitsandbytes"])
-class Baichuan(metaclass=_DummyMetaclass):
-  _backends=["torch","cpm_kernels","sentencepiece"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["torch","cpm_kernels","sentencepiece"])
-class AutoLLM(metaclass=_DummyMetaclass):
-  _backends=["torch"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["torch"])
-MODEL_MAPPING_NAMES:_t.Any=None
-__all__:list[str]=["MODEL_MAPPING_NAMES","AutoLLM","ChatGLM","DollyV2","Falcon","FlanT5","GPTNeoX","Llama","MPT","OPT","StableLM","StarCoder","Baichuan"]
--- a/openllm-python/src/openllm/utils/dummy_tf_objects.py
+++ b/openllm-python/src/openllm/utils/dummy_tf_objects.py
@@ -1,16 +0,0 @@
-# This file is generated by tools/update-dummy.py. DO NOT EDIT MANUALLY!
-# To update this, run ./tools/update-dummy.py
-from __future__ import annotations
-import typing as _t
-from openllm_core.utils import DummyMetaclass as _DummyMetaclass, require_backends as _require_backends
-class TFFlanT5(metaclass=_DummyMetaclass):
-  _backends=["tensorflow"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["tensorflow"])
-class TFOPT(metaclass=_DummyMetaclass):
-  _backends=["tensorflow"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["tensorflow"])
-class AutoTFLLM(metaclass=_DummyMetaclass):
-  _backends=["tensorflow"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["tensorflow"])
-MODEL_TF_MAPPING_NAMES:_t.Any=None
-__all__:list[str]=["MODEL_TF_MAPPING_NAMES","AutoTFLLM","TFFlanT5","TFOPT"]
--- a/openllm-python/src/openllm/utils/dummy_vllm_objects.py
+++ b/openllm-python/src/openllm/utils/dummy_vllm_objects.py
@@ -1,37 +0,0 @@
-# This file is generated by tools/update-dummy.py. DO NOT EDIT MANUALLY!
-# To update this, run ./tools/update-dummy.py
-from __future__ import annotations
-import typing as _t
-from openllm_core.utils import DummyMetaclass as _DummyMetaclass, require_backends as _require_backends
-class VLLMBaichuan(metaclass=_DummyMetaclass):
-  _backends=["vllm","cpm_kernels","sentencepiece"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["vllm","cpm_kernels","sentencepiece"])
-class VLLMDollyV2(metaclass=_DummyMetaclass):
-  _backends=["vllm"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["vllm"])
-class VLLMFalcon(metaclass=_DummyMetaclass):
-  _backends=["vllm","einops","xformers"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["vllm","einops","xformers"])
-class VLLMGPTNeoX(metaclass=_DummyMetaclass):
-  _backends=["vllm"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["vllm"])
-class VLLMMPT(metaclass=_DummyMetaclass):
-  _backends=["vllm","triton","einops"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["vllm","triton","einops"])
-class VLLMOPT(metaclass=_DummyMetaclass):
-  _backends=["vllm"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["vllm"])
-class VLLMStableLM(metaclass=_DummyMetaclass):
-  _backends=["vllm"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["vllm"])
-class VLLMStarCoder(metaclass=_DummyMetaclass):
-  _backends=["vllm","bitsandbytes"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["vllm","bitsandbytes"])
-class VLLMLlama(metaclass=_DummyMetaclass):
-  _backends=["vllm","fairscale","sentencepiece","scipy"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["vllm","fairscale","sentencepiece","scipy"])
-class AutoVLLM(metaclass=_DummyMetaclass):
-  _backends=["vllm"]
-  def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,["vllm"])
-MODEL_VLLM_MAPPING_NAMES:_t.Any=None
-__all__:list[str]=["MODEL_VLLM_MAPPING_NAMES","AutoVLLM","VLLMBaichuan","VLLMDollyV2","VLLMFalcon","VLLMGPTNeoX","VLLMMPT","VLLMOPT","VLLMStableLM","VLLMStarCoder","VLLMLlama"]