mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-06-12 02:20:32 -04:00
perf: upgrade mixtral to use expert parallelism (#783)
Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -119,7 +119,7 @@ openai = ["openai[datalib]>=1", "tiktoken"]
|
||||
playground = ["jupyter", "notebook", "ipython", "jupytext", "nbformat"]
|
||||
qwen = ["cpm-kernels", "tiktoken"]
|
||||
starcoder = ["bitsandbytes"]
|
||||
vllm = ["vllm>=0.2.4", "megablocks", "stanford-stk", "ray==2.6.0"]
|
||||
vllm = ["vllm>=0.2.5", "ray==2.6.0"]
|
||||
|
||||
[tool.hatch.version]
|
||||
fallback-version = "0.0.0"
|
||||
|
||||
@@ -52,10 +52,10 @@ class HfIgnore:
|
||||
def ignore_patterns(cls, llm: openllm.LLM[t.Any, t.Any]) -> list[str]:
|
||||
if llm.__llm_backend__ in {'vllm', 'pt'}:
|
||||
base = [cls.tf, cls.flax, cls.gguf]
|
||||
if llm.config['architecture'] == 'MixtralForCausalLM': # XXX: Hack for Mixtral as safetensors is yet to be working atm
|
||||
base.append(cls.safetensors)
|
||||
elif has_safetensors_weights(llm.model_id):
|
||||
if has_safetensors_weights(llm.model_id):
|
||||
base.extend([cls.pt, '*.pt'])
|
||||
elif has_pt_weights(llm.model_id):
|
||||
base.extend([cls.safetensors, cls.pt])
|
||||
else:
|
||||
base.append(cls.safetensors)
|
||||
elif llm.__llm_backend__ == 'ggml':
|
||||
|
||||
Reference in New Issue
Block a user