From 016b7b016d3a971029e61875dcfe238e73f5e87f Mon Sep 17 00:00:00 2001
From: bojiang <bojiang_@outlook.com>
Date: Tue, 2 Jul 2024 22:20:02 +0800
Subject: [PATCH] refactor: alias

---
 recipe.yaml | 86 ++++++++++++++---------------------------------------
 1 file changed, 22 insertions(+), 64 deletions(-)

diff --git a/recipe.yaml b/recipe.yaml
index a6bfc03d..abb8ba7b 100644
--- a/recipe.yaml
+++ b/recipe.yaml
@@ -1,6 +1,4 @@
-"phi3:3.8b-mini-instruct-4k-fp16":
-  alias:
-    - 3.8b
+"phi3:3.8b-instruct-fp16":
   project: vllm-chat
   service_config:
     name: phi3
@@ -15,11 +13,9 @@
     dtype: half
   chat_template: phi-3
   extra_labels:
-    openllm_alias: 3.8b
+    openllm_alias: 3.8b,3.8b-mini,3.8b-mini-instruct-4k-fp16
     openllm_hf_model_id: microsoft/Phi-3-mini-4k-instruct
 "llama2:7b-chat-fp16":
-  alias:
-    - 7b
   project: vllm-chat
   service_config:
     name: llama2
@@ -34,11 +30,9 @@
     dtype: half
   chat_template: llama-2-chat
   extra_labels:
-    openllm_alias: 7b
+    openllm_alias: 7b,7b-chat
     openllm_hf_model_id: meta-llama/Llama-2-7b-chat-hf
 "llama2:13b-chat-fp16":
-  alias:
-    - 13b
   project: vllm-chat
   service_config:
     name: llama2
@@ -53,11 +47,9 @@
     dtype: half
   chat_template: llama-2-chat
   extra_labels:
-    openllm_alias: 13b
+    openllm_alias: 13b,13b-chat
     openllm_hf_model_id: meta-llama/Llama-2-13b-chat-hf
 "llama2:70b-chat-fp16":
-  alias:
-    - 70b
   project: vllm-chat
   service_config:
     name: llama2
@@ -72,11 +64,9 @@
     dtype: half
   chat_template: llama-2-chat
   extra_labels:
-    openllm_alias: 70b
+    openllm_alias: 70b,70b-chat
     openllm_hf_model_id: meta-llama/Llama-2-70b-chat-hf
 "llama2:7b-chat-awq-4bit":
-  alias:
-    - 7b-4bit
   project: vllm-chat
   service_config:
     name: llama2
@@ -92,11 +82,9 @@
     enforce_eager: true
   chat_template: llama-2-chat
   extra_labels:
-    openllm_alias: 7b-4bit
+    openllm_alias: 7b-4bit,7b-chat-4bit
     openllm_hf_model_id: TheBloke/Llama-2-7B-Chat-AWQ
 "mistral:7b-instruct-awq-4bit":
-  alias:
-    - 7b-4bit
   project: vllm-chat
   service_config:
     name: mistral
@@ -113,11 +101,9 @@
     dtype: half
   chat_template: mistral-instruct
   extra_labels:
-    openllm_alias: 7b-4bit
+    openllm_alias: 7b-4bit,7b-instruct-4bit
     openllm_hf_model_id: TheBloke/Mistral-7B-Instruct-v0.1-AWQ
 "mistral:7b-instruct-fp16":
-  alias:
-    - 7b
   project: vllm-chat
   service_config:
     name: mistral
@@ -133,11 +119,9 @@
     dtype: half
   chat_template: mistral-instruct
   extra_labels:
-    openllm_alias: 7b
+    openllm_alias: 7b,7b-instruct
     openllm_hf_model_id: mistralai/Mistral-7B-Instruct-v0.1
 "llama3:8b-instruct-awq-4bit":
-  alias:
-    - 8b-4bit
   project: vllm-chat
   service_config:
     name: llama3
@@ -151,11 +135,9 @@
     max_model_len: 2048
     quantization: awq
   extra_labels:
-    openllm_alias: 8b-4bit
+    openllm_alias: 8b-4bit,8b-instruct-4bit
     openllm_hf_model_id: casperhansen/llama-3-8b-instruct-awq
 "llama3:70b-instruct-awq-4bit":
-  alias:
-    - 70b-4bit
   project: vllm-chat
   service_config:
     name: llama3
@@ -169,11 +151,9 @@
     max_model_len: 2048
     quantization: awq
   extra_labels:
-    openllm_alias: 70b-4bit
+    openllm_alias: 70b-4bit,70b-instruct-4bit
     openllm_hf_model_id: casperhansen/llama-3-70b-instruct-awq
 "llama3:8b-instruct-fp16":
-  alias:
-    - 8b
   project: vllm-chat
   service_config:
     name: llama3
@@ -187,11 +167,9 @@
     max_model_len: 2048
     dtype: half
   extra_labels:
-    openllm_alias: 8b
+    openllm_alias: 8b,8b-instruct
     openllm_hf_model_id: meta-llama/Meta-Llama-3-8B-Instruct
 "llama3:70b-instruct-fp16":
-  alias:
-    - 70b
   project: vllm-chat
   service_config:
     name: llama3
@@ -204,11 +182,9 @@
     model: meta-llama/Meta-Llama-3-70B-Instruct
     max_model_len: 2048
   extra_labels:
-    openllm_alias: 70b
+    openllm_alias: 70b,70b-instruct
     openllm_hf_model_id: meta-llama/Meta-Llama-3-70B-Instruct
 "gemma:2b-instruct-fp16":
-  alias:
-    - 2b
   project: vllm-chat
   service_config:
     name: gemma
@@ -222,11 +198,9 @@
     max_model_len: 2048
     dtype: half
   extra_labels:
-    openllm_alias: 2b
+    openllm_alias: 2b,2b-instruct
     openllm_hf_model_id: google/gemma-2b-it
 "gemma:7b-instruct-fp16":
-  alias:
-    - 7b
   project: vllm-chat
   service_config:
     name: gemma
@@ -240,11 +214,9 @@
     max_model_len: 2048
     dtype: half
   extra_labels:
-    openllm_alias: 7b
+    openllm_alias: 7b,7b-instruct
     openllm_hf_model_id: google/gemma-7b-it
 "gemma:7b-instruct-awq-4bit":
-  alias:
-    - 7b-4bit
   project: vllm-chat
   service_config:
     name: gemma
@@ -259,7 +231,7 @@
     quantization: awq
   chat_template: gemma-it
   extra_labels:
-    openllm_alias: 7b-4bit
+    openllm_alias: 7b-4bit,7b-instruct-4bit
     openllm_hf_model_id: casperhansen/gemma-7b-it-awq
 "mixtral:8x7b-instruct-v0.1-fp16":
   project: vllm-chat
@@ -296,8 +268,6 @@
     openllm_alias: 8x7b-4bit
     openllm_hf_model_id: casperhansen/mixtral-instruct-awq
 "qwen2:0.5b-instruct-fp16":
-  alias:
-    - 0.5b
   project: vllm-chat
   service_config:
     name: qwen2
@@ -311,11 +281,9 @@
     max_model_len: 2048
     dtype: half
   extra_labels:
-    openllm_alias: 0.5b
+    openllm_alias: 0.5b,0.5b-instruct
     openllm_hf_model_id: Qwen/Qwen2-0.5B-Instruct
 "qwen2:1.5b-instruct-fp16":
-  alias:
-    - 1.5b
   project: vllm-chat
   service_config:
     name: qwen2
@@ -329,11 +297,9 @@
     max_model_len: 2048
     dtype: half
   extra_labels:
-    openllm_alias: 1.5b
+    openllm_alias: 1.5b,1.5b-instruct
     openllm_hf_model_id: Qwen/Qwen2-1.5B-Instruct
 "qwen2:7b-instruct-awq-4bit":
-  alias:
-    - 7b-4bit
   project: vllm-chat
   service_config:
     name: qwen2
@@ -347,11 +313,9 @@
     max_model_len: 2048
     quantization: awq
   extra_labels:
-    openllm_alias: 7b-4bit
+    openllm_alias: 7b-4bit,7b-instruct-4bit
     openllm_hf_model_id: Qwen/Qwen2-7B-Instruct-AWQ
 "qwen2:7b-instruct-fp16":
-  alias:
-    - 7b
   project: vllm-chat
   service_config:
     name: qwen2
@@ -365,11 +329,9 @@
     max_model_len: 2048
     dtype: half
   extra_labels:
-    openllm_alias: 7b
+    openllm_alias: 7b,7b-instruct
     openllm_hf_model_id: Qwen/Qwen2-7B-Instruct
 "qwen2:72b-instruct-awq-4bit":
-  alias:
-    - 72b-4bit
   project: vllm-chat
   service_config:
     name: qwen2
@@ -383,11 +345,9 @@
     max_model_len: 2048
     quantization: awq
   extra_labels:
-    openllm_alias: 72b-4bit
+    openllm_alias: 72b-4bit,72b-instruct-4bit
     openllm_hf_model_id: Qwen/Qwen2-72B-Instruct-AWQ
 "qwen2:57b-a14b-instruct-fp16":
-  alias:
-    - 57b-a14b
   project: vllm-chat
   service_config:
     name: qwen2
@@ -401,11 +361,9 @@
     max_model_len: 2048
     dtype: half
   extra_labels:
-    openllm_alias: 57b-a14b
+    openllm_alias: 57b-a14b,57b-a14b-instruct
     openllm_hf_model_id: Qwen/Qwen2-57B-A14B-Instruct
 "qwen2:72b-instruct-fp16":
-  alias:
-    - 72b
   project: vllm-chat
   service_config:
     name: qwen2
@@ -419,5 +377,5 @@
     max_model_len: 2048
     dtype: half
   extra_labels:
-    openllm_alias: 72b
+    openllm_alias: 72b,72b-instruct
     openllm_hf_model_id: Qwen/Qwen2-72B-Instruct