diff --git a/openllm-core/src/openllm_core/config/configuration_mixtral.py b/openllm-core/src/openllm_core/config/configuration_mixtral.py index 62e9e1c7..1c019bbb 100644 --- a/openllm-core/src/openllm_core/config/configuration_mixtral.py +++ b/openllm-core/src/openllm_core/config/configuration_mixtral.py @@ -18,6 +18,7 @@ class MixtralConfig(openllm_core.LLMConfig): __config__ = { 'name_type': 'lowercase', 'url': 'https://mistral.ai', + 'serialisation': 'safetensors', 'architecture': 'MixtralForCausalLM', 'default_id': 'mistralai/Mixtral-8x7B-Instruct-v0.1', 'model_ids': ['mistralai/Mixtral-8x7B-Instruct-v0.1', 'mistralai/Mixtral-8x7B-v0.1'], diff --git a/openllm-python/pyproject.toml b/openllm-python/pyproject.toml index f3ab150a..de6dcdc0 100644 --- a/openllm-python/pyproject.toml +++ b/openllm-python/pyproject.toml @@ -119,7 +119,7 @@ openai = ["openai[datalib]>=1", "tiktoken"] playground = ["jupyter", "notebook", "ipython", "jupytext", "nbformat"] qwen = ["cpm-kernels", "tiktoken"] starcoder = ["bitsandbytes"] -vllm = ["vllm>=0.2.4", "megablocks", "stanford-stk", "ray==2.6.0"] +vllm = ["vllm>=0.2.5", "ray==2.6.0"] [tool.hatch.version] fallback-version = "0.0.0" diff --git a/openllm-python/src/openllm/serialisation/transformers/weights.py b/openllm-python/src/openllm/serialisation/transformers/weights.py index 0d456947..109ebf1b 100644 --- a/openllm-python/src/openllm/serialisation/transformers/weights.py +++ b/openllm-python/src/openllm/serialisation/transformers/weights.py @@ -52,10 +52,10 @@ class HfIgnore: def ignore_patterns(cls, llm: openllm.LLM[t.Any, t.Any]) -> list[str]: if llm.__llm_backend__ in {'vllm', 'pt'}: base = [cls.tf, cls.flax, cls.gguf] - if llm.config['architecture'] == 'MixtralForCausalLM': # XXX: Hack for Mixtral as safetensors is yet to be working atm - base.append(cls.safetensors) - elif has_safetensors_weights(llm.model_id): + if has_safetensors_weights(llm.model_id): base.extend([cls.pt, '*.pt']) + elif has_pt_weights(llm.model_id): + base.extend([cls.safetensors, cls.pt]) else: base.append(cls.safetensors) elif llm.__llm_backend__ == 'ggml': diff --git a/tools/dependencies.py b/tools/dependencies.py index e8312b61..af5ffcf8 100755 --- a/tools/dependencies.py +++ b/tools/dependencies.py @@ -155,7 +155,7 @@ GGML_DEPS = ['ctransformers'] CTRANSLATE_DEPS = ['ctranslate2>=3.22.0'] AWQ_DEPS = ['autoawq'] GPTQ_DEPS = ['auto-gptq[triton]>=0.4.2'] -VLLM_DEPS = ['vllm>=0.2.4', 'megablocks', 'stanford-stk', 'ray==2.6.0'] +VLLM_DEPS = ['vllm>=0.2.5', 'ray==2.6.0'] _base_requirements: dict[str, t.Any] = { inflection.dasherize(name): config_cls.__openllm_requirements__