Merge pull request #7 from bentoml/pr-mixtral-support

[Model Support] Support Mixtral 8x7b model
2026-01-23 06:52:42 -05:00 · 2024-07-02 23:35:20 -07:00
parent 6a6d74ad84 1822f5ae17
commit 2fdeb4a3e7
1 changed files with 32 additions and 0 deletions
--- a/recipe.yaml
+++ b/recipe.yaml
@@ -213,6 +213,38 @@
    max_model_len: 2048
    quantization: awq
  chat_template: gemma-it
+"mixtral:8x7b-instruct-v0.1-fp16":
+  alias:
+    - 8x7b
+  project: vllm-chat
+  service_config:
+    name: mixtral
+    traffic:
+      timeout: 300
+    resources:
+      gpu: 2
+      gpu_type: nvidia-a100-80g
+  engine_config:
+    model: mistralai/Mixtral-8x7B-Instruct-v0.1
+    max_model_len: 2048
+  chat_template: mistral-instruct
+"mixtral:8x7b-instruct-v0.1-awq-4bit":
+  alias:
+    - 8x7b-4bit
+  project: vllm-chat
+  service_config:
+    name: mixtral
+    traffic:
+      timeout: 300
+    resources:
+      gpu: 1
+      gpu_type: nvidia-tesla-a100
+  engine_config:
+    model: casperhansen/mixtral-instruct-awq
+    max_model_len: 2048
+    quantization: awq
+    gpu_memory_utilization: 0.8
+  chat_template: mistral-instruct
 "qwen2:0.5b-instruct-fp16":
  alias:
    - 0.5b