diff --git a/recipe.yaml b/recipe.yaml
index 888e7008..29e9d5eb 100644
--- a/recipe.yaml
+++ b/recipe.yaml
@@ -1,7 +1,6 @@
 "phi3:3.8b-mini-instruct-4k-fp16":
   alias:
     - 3.8b
-    - mini
   project: vllm-chat
   service_config:
     name: phi3
@@ -30,6 +29,36 @@
     model: meta-llama/Llama-2-7b-chat-hf
     max_model_len: 1024
   chat_template: llama-2-chat
+"llama2:13b-chat-fp16":
+  alias:
+    - 13b
+  project: vllm-chat
+  service_config:
+    name: llama2
+    traffic:
+      timeout: 300
+    resources:
+      gpu: 1
+      gpu_type: nvidia-tesla-a100
+  engine_config:
+    model: meta-llama/Llama-2-13b-chat-hf
+    max_model_len: 1024
+  chat_template: llama-2-chat
+"llama2:70b-chat-fp16":
+  alias:
+    - 70b
+  project: vllm-chat
+  service_config:
+    name: llama2
+    traffic:
+      timeout: 300
+    resources:
+      gpu: 2
+      gpu_type: nvidia-a100-80g
+  engine_config:
+    model: meta-llama/Llama-2-70b-chat-hf
+    max_model_len: 1024
+  chat_template: llama-2-chat
 "llama2:7b-chat-awq-4bit":
   alias:
     - 7b-4bit
@@ -68,7 +97,6 @@
 "mistral:7b-instruct-fp16":
   alias:
     - 7b
-    - 7b-instruct
   project: vllm-chat
   service_config:
     name: mistral
@@ -110,7 +138,7 @@
       gpu: 1
       gpu_type: nvidia-a100-80g
   engine_config:
-    model: meta-llama/Meta-Llama-3-8B
+    model: casperhansen/llama-3-70b-instruct-awq
     max_model_len: 2048
     quantization: awq
 "llama3:8b-instruct-fp16":
diff --git a/vllm-chat/.gitignore b/vllm-chat/.gitignore
new file mode 100644
index 00000000..d5629589
--- /dev/null
+++ b/vllm-chat/.gitignore
@@ -0,0 +1,12 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# Environments
+venv/
+
+# BentoML
+bentoml/client_id
+
+chattts/ChatTTS/