mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-06-11 09:59:20 -04:00
add more version of llama
This commit is contained in:
34
recipe.yaml
34
recipe.yaml
@@ -1,7 +1,6 @@
|
||||
"phi3:3.8b-mini-instruct-4k-fp16":
|
||||
alias:
|
||||
- 3.8b
|
||||
- mini
|
||||
project: vllm-chat
|
||||
service_config:
|
||||
name: phi3
|
||||
@@ -30,6 +29,36 @@
|
||||
model: meta-llama/Llama-2-7b-chat-hf
|
||||
max_model_len: 1024
|
||||
chat_template: llama-2-chat
|
||||
"llama2:13b-chat-fp16":
|
||||
alias:
|
||||
- 13b
|
||||
project: vllm-chat
|
||||
service_config:
|
||||
name: llama2
|
||||
traffic:
|
||||
timeout: 300
|
||||
resources:
|
||||
gpu: 1
|
||||
gpu_type: nvidia-tesla-a100
|
||||
engine_config:
|
||||
model: meta-llama/Llama-2-13b-chat-hf
|
||||
max_model_len: 1024
|
||||
chat_template: llama-2-chat
|
||||
"llama2:70b-chat-fp16":
|
||||
alias:
|
||||
- 70b
|
||||
project: vllm-chat
|
||||
service_config:
|
||||
name: llama2
|
||||
traffic:
|
||||
timeout: 300
|
||||
resources:
|
||||
gpu: 2
|
||||
gpu_type: nvidia-a100-80g
|
||||
engine_config:
|
||||
model: meta-llama/Llama-2-70b-chat-hf
|
||||
max_model_len: 1024
|
||||
chat_template: llama-2-chat
|
||||
"llama2:7b-chat-awq-4bit":
|
||||
alias:
|
||||
- 7b-4bit
|
||||
@@ -68,7 +97,6 @@
|
||||
"mistral:7b-instruct-fp16":
|
||||
alias:
|
||||
- 7b
|
||||
- 7b-instruct
|
||||
project: vllm-chat
|
||||
service_config:
|
||||
name: mistral
|
||||
@@ -110,7 +138,7 @@
|
||||
gpu: 1
|
||||
gpu_type: nvidia-a100-80g
|
||||
engine_config:
|
||||
model: meta-llama/Meta-Llama-3-8B
|
||||
model: casperhansen/llama-3-70b-instruct-awq
|
||||
max_model_len: 2048
|
||||
quantization: awq
|
||||
"llama3:8b-instruct-fp16":
|
||||
|
||||
12
vllm-chat/.gitignore
vendored
Normal file
12
vllm-chat/.gitignore
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# Environments
|
||||
venv/
|
||||
|
||||
# BentoML
|
||||
bentoml/client_id
|
||||
|
||||
chattts/ChatTTS/
|
||||
Reference in New Issue
Block a user