From 5442d9cd10021b3c206f39728a41c4771fbe3005 Mon Sep 17 00:00:00 2001
From: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
Date: Wed, 22 Nov 2023 17:03:13 -0500
Subject: [PATCH] fix(trust_remote_code): handle args correctly (#727)
Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
---
README.md | 19 +++++++++++++++++++
.../src/openllm_core/config/__init__.py | 1 +
.../config/configuration_mistral.py | 19 ++++++++++++++++---
.../openllm_core/config/configuration_phi.py | 6 ++++--
openllm-python/README.md | 19 +++++++++++++++++++
openllm-python/pyproject.toml | 2 +-
openllm-python/src/openllm_cli/entrypoint.py | 4 ++--
tools/dependencies.py | 2 +-
8 files changed, 63 insertions(+), 9 deletions(-)
diff --git a/README.md b/README.md
index 73c466a0..e1a96b2f 100644
--- a/README.md
+++ b/README.md
@@ -839,6 +839,25 @@ openllm start HuggingFaceH4/zephyr-7b-alpha --backend vllm
openllm start HuggingFaceH4/zephyr-7b-alpha --backend pt
```
+- CTranslate2 (*experimental*):
+
+
+```bash
+openllm start HuggingFaceH4/zephyr-7b-alpha --backend ctranslate
+```
+
+
+> **Note:** Currently, All quantization methods from ctranslate2 are supported. This includes int8, int8_float16, int8_bfloat16
+
+
+
+> **Note:** We recommend users to convert the models beforehand, and then provide the given directory of the converted models to `openllm start`. See [CTranslate2](https://opennmt.net/CTranslate2/conversion.html) for more information.
+
+
+
+> **Important:** CTranslate2 is an experimental backend and yet to be fully supported. It is recommended to use vLLM for all production use-case.
+
+
diff --git a/openllm-core/src/openllm_core/config/__init__.py b/openllm-core/src/openllm_core/config/__init__.py
index f707d7dc..68dad614 100644
--- a/openllm-core/src/openllm_core/config/__init__.py
+++ b/openllm-core/src/openllm_core/config/__init__.py
@@ -15,6 +15,7 @@ from .configuration_llama import LlamaConfig as LlamaConfig
from .configuration_mistral import MistralConfig as MistralConfig
from .configuration_mpt import MPTConfig as MPTConfig
from .configuration_opt import OPTConfig as OPTConfig
+from .configuration_phi import PhiConfig as PhiConfig
from .configuration_stablelm import StableLMConfig as StableLMConfig
from .configuration_starcoder import StarCoderConfig as StarCoderConfig
from .configuration_yi import YiConfig as YiConfig
diff --git a/openllm-core/src/openllm_core/config/configuration_mistral.py b/openllm-core/src/openllm_core/config/configuration_mistral.py
index 36cdfb88..fe8c1bfd 100644
--- a/openllm-core/src/openllm_core/config/configuration_mistral.py
+++ b/openllm-core/src/openllm_core/config/configuration_mistral.py
@@ -1,6 +1,9 @@
from __future__ import annotations
-import openllm_core
+import openllm_core, typing as t
+
+if t.TYPE_CHECKING:
+ from openllm_core._schemas import MessageParam
SINST_KEY, EINST_KEY, BOS_TOKEN, EOS_TOKEN = '[INST]', '[/INST]', '', ''
@@ -20,8 +23,6 @@ class MistralConfig(openllm_core.LLMConfig):
'architecture': 'MistralForCausalLM',
'default_id': 'mistralai/Mistral-7B-Instruct-v0.1',
'serialisation': 'safetensors',
- 'backend': ('pt', 'vllm'),
- # NOTE: see https://docs.mistral.ai/usage/guardrailing/ and https://docs.mistral.ai/llm/mistral-instruct-v0.1
'model_ids': [
'HuggingFaceH4/zephyr-7b-alpha',
'HuggingFaceH4/zephyr-7b-beta',
@@ -43,6 +44,7 @@ class MistralConfig(openllm_core.LLMConfig):
best_of: int = 1
presence_penalty: float = 0.5
+ # NOTE: see https://docs.mistral.ai/usage/guardrailing/ and https://docs.mistral.ai/llm/mistral-instruct-v0.1
@property
def template(self) -> str:
return '''{start_key}{start_inst} {system_message} {instruction} {end_inst}\n'''.format(
@@ -57,3 +59,14 @@ class MistralConfig(openllm_core.LLMConfig):
@property
def system_message(self) -> str:
return '''Always assist with care, respect, and truth. Respond with utmost utility yet securely. Avoid harmful, unethical, prejudiced, or negative content. Ensure replies promote fairness and positivity.'''
+
+ @property
+ def chat_template(self) -> str:
+ return repr("{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}")
+
+ @property
+ def chat_messages(self) -> list[MessageParam]:
+ from openllm_core._schemas import MessageParam
+ return [MessageParam(role='user', content='What is your favourite condiment?'),
+ MessageParam(role='assistant', content="Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"),
+ MessageParam(role='user', content='Do you have mayonnaise recipes?')]
diff --git a/openllm-core/src/openllm_core/config/configuration_phi.py b/openllm-core/src/openllm_core/config/configuration_phi.py
index 0eb4c6f5..e326b693 100644
--- a/openllm-core/src/openllm_core/config/configuration_phi.py
+++ b/openllm-core/src/openllm_core/config/configuration_phi.py
@@ -1,8 +1,9 @@
from __future__ import annotations
-import openllm_core
-from openllm_core._schemas import MessageParam
+import openllm_core, typing as t
+if t.TYPE_CHECKING:
+ from openllm_core._schemas import MessageParam
class PhiConfig(openllm_core.LLMConfig):
"""The language model phi-1.5 is a Transformer with 1.3 billion parameters.
@@ -42,6 +43,7 @@ class PhiConfig(openllm_core.LLMConfig):
@property
def chat_messages(self) -> list[MessageParam]:
+ from openllm_core._schemas import MessageParam
return [MessageParam(role='user', content="I don't know why, I'm struggling to maintain focus while studying. Any suggestions?"),
MessageParam(role='assistant', content='Have you tried using a timer? It can help you stay on track and avoid distractions.'),
MessageParam(role='user', content="That's a good idea. I'll give it a try. What else can I do to boost my productivity?")]
diff --git a/openllm-python/README.md b/openllm-python/README.md
index 73c466a0..e1a96b2f 100644
--- a/openllm-python/README.md
+++ b/openllm-python/README.md
@@ -839,6 +839,25 @@ openllm start HuggingFaceH4/zephyr-7b-alpha --backend vllm
openllm start HuggingFaceH4/zephyr-7b-alpha --backend pt
```
+- CTranslate2 (*experimental*):
+
+
+```bash
+openllm start HuggingFaceH4/zephyr-7b-alpha --backend ctranslate
+```
+
+
+> **Note:** Currently, All quantization methods from ctranslate2 are supported. This includes int8, int8_float16, int8_bfloat16
+
+
+
+> **Note:** We recommend users to convert the models beforehand, and then provide the given directory of the converted models to `openllm start`. See [CTranslate2](https://opennmt.net/CTranslate2/conversion.html) for more information.
+
+
+
+> **Important:** CTranslate2 is an experimental backend and yet to be fully supported. It is recommended to use vLLM for all production use-case.
+
+
diff --git a/openllm-python/pyproject.toml b/openllm-python/pyproject.toml
index 660344e3..2761b1b8 100644
--- a/openllm-python/pyproject.toml
+++ b/openllm-python/pyproject.toml
@@ -106,7 +106,7 @@ all = ["openllm[full]"]
awq = ["autoawq"]
baichuan = ["cpm-kernels"]
chatglm = ["cpm-kernels"]
-ctranslate = ["ctranslate2"]
+ctranslate = ["ctranslate2>=3.22.0"]
falcon = ["xformers"]
fine-tune = ["peft>=0.6.0", "datasets", "trl", "huggingface-hub"]
full = [
diff --git a/openllm-python/src/openllm_cli/entrypoint.py b/openllm-python/src/openllm_cli/entrypoint.py
index 2e887cc2..8dabd075 100644
--- a/openllm-python/src/openllm_cli/entrypoint.py
+++ b/openllm-python/src/openllm_cli/entrypoint.py
@@ -442,7 +442,7 @@ def start_command(
serialisation=serialisation,
dtype=dtype,
max_model_len=max_model_len,
- trust_remote_code=check_bool_env('TRUST_REMOTE_CODE'),
+ trust_remote_code=check_bool_env('TRUST_REMOTE_CODE', False),
)
backend_warning(llm.__llm_backend__)
@@ -573,7 +573,7 @@ def start_grpc_command(
serialisation=serialisation,
dtype=dtype,
max_model_len=max_model_len,
- trust_remote_code=check_bool_env('TRUST_REMOTE_CODE'),
+ trust_remote_code=check_bool_env('TRUST_REMOTE_CODE', False),
)
backend_warning(llm.__llm_backend__)
diff --git a/tools/dependencies.py b/tools/dependencies.py
index dbf24c84..1c44f8dc 100755
--- a/tools/dependencies.py
+++ b/tools/dependencies.py
@@ -152,7 +152,7 @@ OPENAI_DEPS = ['openai[datalib]>=1', 'tiktoken']
AGENTS_DEPS = [f'transformers[agents]>={_TRANSFORMERS_CONSTRAINTS}', 'diffusers', 'soundfile']
PLAYGROUND_DEPS = ['jupyter', 'notebook', 'ipython', 'jupytext', 'nbformat']
GGML_DEPS = ['ctransformers']
-CTRANSLATE_DEPS = ['ctranslate2']
+CTRANSLATE_DEPS = ['ctranslate2>=3.22.0']
AWQ_DEPS = ['autoawq']
GPTQ_DEPS = ['auto-gptq[triton]>=0.4.2']
VLLM_DEPS = ['vllm>=0.2.2']