perf: upgrade mixtral to use expert parallelism (#783)

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
Aaron Pham
2023-12-15 11:45:08 -05:00
committed by GitHub
parent 7d2c4a4f7a
commit 88b6d3d6de
4 changed files with 6 additions and 5 deletions

View File

@@ -52,10 +52,10 @@ class HfIgnore:
def ignore_patterns(cls, llm: openllm.LLM[t.Any, t.Any]) -> list[str]:
if llm.__llm_backend__ in {'vllm', 'pt'}:
base = [cls.tf, cls.flax, cls.gguf]
if llm.config['architecture'] == 'MixtralForCausalLM': # XXX: Hack for Mixtral as safetensors is yet to be working atm
base.append(cls.safetensors)
elif has_safetensors_weights(llm.model_id):
if has_safetensors_weights(llm.model_id):
base.extend([cls.pt, '*.pt'])
elif has_pt_weights(llm.model_id):
base.extend([cls.safetensors, cls.pt])
else:
base.append(cls.safetensors)
elif llm.__llm_backend__ == 'ggml':