From 1822f5ae176293b50f10ae917dcd837ed44af29a Mon Sep 17 00:00:00 2001 From: Rick Zhou Date: Wed, 3 Jul 2024 02:51:26 +0000 Subject: [PATCH] [Model Support] Support Mixtral 8x7b model --- recipe.yaml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/recipe.yaml b/recipe.yaml index 2a2cd00d..fbbd20b5 100644 --- a/recipe.yaml +++ b/recipe.yaml @@ -213,6 +213,38 @@ max_model_len: 2048 quantization: awq chat_template: gemma-it +"mixtral:8x7b-instruct-v0.1-fp16": + alias: + - 8x7b + project: vllm-chat + service_config: + name: mixtral + traffic: + timeout: 300 + resources: + gpu: 2 + gpu_type: nvidia-a100-80g + engine_config: + model: mistralai/Mixtral-8x7B-Instruct-v0.1 + max_model_len: 2048 + chat_template: mistral-instruct +"mixtral:8x7b-instruct-v0.1-awq-4bit": + alias: + - 8x7b-4bit + project: vllm-chat + service_config: + name: mixtral + traffic: + timeout: 300 + resources: + gpu: 1 + gpu_type: nvidia-tesla-a100 + engine_config: + model: casperhansen/mixtral-instruct-awq + max_model_len: 2048 + quantization: awq + gpu_memory_utilization: 0.8 + chat_template: mistral-instruct "qwen2:0.5b-instruct-fp16": alias: - 0.5b