From 5442d9cd10021b3c206f39728a41c4771fbe3005 Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Wed, 22 Nov 2023 17:03:13 -0500 Subject: [PATCH] fix(trust_remote_code): handle args correctly (#727) Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> --- README.md | 19 +++++++++++++++++++ .../src/openllm_core/config/__init__.py | 1 + .../config/configuration_mistral.py | 19 ++++++++++++++++--- .../openllm_core/config/configuration_phi.py | 6 ++++-- openllm-python/README.md | 19 +++++++++++++++++++ openllm-python/pyproject.toml | 2 +- openllm-python/src/openllm_cli/entrypoint.py | 4 ++-- tools/dependencies.py | 2 +- 8 files changed, 63 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 73c466a0..e1a96b2f 100644 --- a/README.md +++ b/README.md @@ -839,6 +839,25 @@ openllm start HuggingFaceH4/zephyr-7b-alpha --backend vllm openllm start HuggingFaceH4/zephyr-7b-alpha --backend pt ``` +- CTranslate2 (*experimental*): + + +```bash +openllm start HuggingFaceH4/zephyr-7b-alpha --backend ctranslate +``` + + +> **Note:** Currently, All quantization methods from ctranslate2 are supported. This includes int8, int8_float16, int8_bfloat16 + + + +> **Note:** We recommend users to convert the models beforehand, and then provide the given directory of the converted models to `openllm start`. See [CTranslate2](https://opennmt.net/CTranslate2/conversion.html) for more information. + + + +> **Important:** CTranslate2 is an experimental backend and yet to be fully supported. It is recommended to use vLLM for all production use-case. + +
diff --git a/openllm-core/src/openllm_core/config/__init__.py b/openllm-core/src/openllm_core/config/__init__.py index f707d7dc..68dad614 100644 --- a/openllm-core/src/openllm_core/config/__init__.py +++ b/openllm-core/src/openllm_core/config/__init__.py @@ -15,6 +15,7 @@ from .configuration_llama import LlamaConfig as LlamaConfig from .configuration_mistral import MistralConfig as MistralConfig from .configuration_mpt import MPTConfig as MPTConfig from .configuration_opt import OPTConfig as OPTConfig +from .configuration_phi import PhiConfig as PhiConfig from .configuration_stablelm import StableLMConfig as StableLMConfig from .configuration_starcoder import StarCoderConfig as StarCoderConfig from .configuration_yi import YiConfig as YiConfig diff --git a/openllm-core/src/openllm_core/config/configuration_mistral.py b/openllm-core/src/openllm_core/config/configuration_mistral.py index 36cdfb88..fe8c1bfd 100644 --- a/openllm-core/src/openllm_core/config/configuration_mistral.py +++ b/openllm-core/src/openllm_core/config/configuration_mistral.py @@ -1,6 +1,9 @@ from __future__ import annotations -import openllm_core +import openllm_core, typing as t + +if t.TYPE_CHECKING: + from openllm_core._schemas import MessageParam SINST_KEY, EINST_KEY, BOS_TOKEN, EOS_TOKEN = '[INST]', '[/INST]', '', '' @@ -20,8 +23,6 @@ class MistralConfig(openllm_core.LLMConfig): 'architecture': 'MistralForCausalLM', 'default_id': 'mistralai/Mistral-7B-Instruct-v0.1', 'serialisation': 'safetensors', - 'backend': ('pt', 'vllm'), - # NOTE: see https://docs.mistral.ai/usage/guardrailing/ and https://docs.mistral.ai/llm/mistral-instruct-v0.1 'model_ids': [ 'HuggingFaceH4/zephyr-7b-alpha', 'HuggingFaceH4/zephyr-7b-beta', @@ -43,6 +44,7 @@ class MistralConfig(openllm_core.LLMConfig): best_of: int = 1 presence_penalty: float = 0.5 + # NOTE: see https://docs.mistral.ai/usage/guardrailing/ and https://docs.mistral.ai/llm/mistral-instruct-v0.1 @property def template(self) -> str: return '''{start_key}{start_inst} {system_message} {instruction} {end_inst}\n'''.format( @@ -57,3 +59,14 @@ class MistralConfig(openllm_core.LLMConfig): @property def system_message(self) -> str: return '''Always assist with care, respect, and truth. Respond with utmost utility yet securely. Avoid harmful, unethical, prejudiced, or negative content. Ensure replies promote fairness and positivity.''' + + @property + def chat_template(self) -> str: + return repr("{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}") + + @property + def chat_messages(self) -> list[MessageParam]: + from openllm_core._schemas import MessageParam + return [MessageParam(role='user', content='What is your favourite condiment?'), + MessageParam(role='assistant', content="Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"), + MessageParam(role='user', content='Do you have mayonnaise recipes?')] diff --git a/openllm-core/src/openllm_core/config/configuration_phi.py b/openllm-core/src/openllm_core/config/configuration_phi.py index 0eb4c6f5..e326b693 100644 --- a/openllm-core/src/openllm_core/config/configuration_phi.py +++ b/openllm-core/src/openllm_core/config/configuration_phi.py @@ -1,8 +1,9 @@ from __future__ import annotations -import openllm_core -from openllm_core._schemas import MessageParam +import openllm_core, typing as t +if t.TYPE_CHECKING: + from openllm_core._schemas import MessageParam class PhiConfig(openllm_core.LLMConfig): """The language model phi-1.5 is a Transformer with 1.3 billion parameters. @@ -42,6 +43,7 @@ class PhiConfig(openllm_core.LLMConfig): @property def chat_messages(self) -> list[MessageParam]: + from openllm_core._schemas import MessageParam return [MessageParam(role='user', content="I don't know why, I'm struggling to maintain focus while studying. Any suggestions?"), MessageParam(role='assistant', content='Have you tried using a timer? It can help you stay on track and avoid distractions.'), MessageParam(role='user', content="That's a good idea. I'll give it a try. What else can I do to boost my productivity?")] diff --git a/openllm-python/README.md b/openllm-python/README.md index 73c466a0..e1a96b2f 100644 --- a/openllm-python/README.md +++ b/openllm-python/README.md @@ -839,6 +839,25 @@ openllm start HuggingFaceH4/zephyr-7b-alpha --backend vllm openllm start HuggingFaceH4/zephyr-7b-alpha --backend pt ``` +- CTranslate2 (*experimental*): + + +```bash +openllm start HuggingFaceH4/zephyr-7b-alpha --backend ctranslate +``` + + +> **Note:** Currently, All quantization methods from ctranslate2 are supported. This includes int8, int8_float16, int8_bfloat16 + + + +> **Note:** We recommend users to convert the models beforehand, and then provide the given directory of the converted models to `openllm start`. See [CTranslate2](https://opennmt.net/CTranslate2/conversion.html) for more information. + + + +> **Important:** CTranslate2 is an experimental backend and yet to be fully supported. It is recommended to use vLLM for all production use-case. + +
diff --git a/openllm-python/pyproject.toml b/openllm-python/pyproject.toml index 660344e3..2761b1b8 100644 --- a/openllm-python/pyproject.toml +++ b/openllm-python/pyproject.toml @@ -106,7 +106,7 @@ all = ["openllm[full]"] awq = ["autoawq"] baichuan = ["cpm-kernels"] chatglm = ["cpm-kernels"] -ctranslate = ["ctranslate2"] +ctranslate = ["ctranslate2>=3.22.0"] falcon = ["xformers"] fine-tune = ["peft>=0.6.0", "datasets", "trl", "huggingface-hub"] full = [ diff --git a/openllm-python/src/openllm_cli/entrypoint.py b/openllm-python/src/openllm_cli/entrypoint.py index 2e887cc2..8dabd075 100644 --- a/openllm-python/src/openllm_cli/entrypoint.py +++ b/openllm-python/src/openllm_cli/entrypoint.py @@ -442,7 +442,7 @@ def start_command( serialisation=serialisation, dtype=dtype, max_model_len=max_model_len, - trust_remote_code=check_bool_env('TRUST_REMOTE_CODE'), + trust_remote_code=check_bool_env('TRUST_REMOTE_CODE', False), ) backend_warning(llm.__llm_backend__) @@ -573,7 +573,7 @@ def start_grpc_command( serialisation=serialisation, dtype=dtype, max_model_len=max_model_len, - trust_remote_code=check_bool_env('TRUST_REMOTE_CODE'), + trust_remote_code=check_bool_env('TRUST_REMOTE_CODE', False), ) backend_warning(llm.__llm_backend__) diff --git a/tools/dependencies.py b/tools/dependencies.py index dbf24c84..1c44f8dc 100755 --- a/tools/dependencies.py +++ b/tools/dependencies.py @@ -152,7 +152,7 @@ OPENAI_DEPS = ['openai[datalib]>=1', 'tiktoken'] AGENTS_DEPS = [f'transformers[agents]>={_TRANSFORMERS_CONSTRAINTS}', 'diffusers', 'soundfile'] PLAYGROUND_DEPS = ['jupyter', 'notebook', 'ipython', 'jupytext', 'nbformat'] GGML_DEPS = ['ctransformers'] -CTRANSLATE_DEPS = ['ctranslate2'] +CTRANSLATE_DEPS = ['ctranslate2>=3.22.0'] AWQ_DEPS = ['autoawq'] GPTQ_DEPS = ['auto-gptq[triton]>=0.4.2'] VLLM_DEPS = ['vllm>=0.2.2']