mirror of
https://github.com/mudler/LocalAI.git
synced 2026-03-31 21:25:59 -04:00
* feat: wire min_p Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat: inferencing defaults Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore(refactor): re-use iterative parser Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore: generate automatically inference defaults from unsloth Instead of trying to re-invent the wheel and maintain here the inference defaults, prefer to consume unsloth ones, and contribute there as necessary. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore: apply defaults also to models installed via gallery Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore: be consistent and apply fallback to all endpoint Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
58 lines
5.2 KiB
JSON
58 lines
5.2 KiB
JSON
{
|
|
"_comment": "Auto-generated from unsloth inference_defaults.json. DO NOT EDIT. Run go generate ./core/config/ to update.",
|
|
"families": {
|
|
"qwen3.5": {"min_p":0,"presence_penalty":1.5,"repeat_penalty":1,"temperature":0.7,"top_k":20,"top_p":0.8},
|
|
"qwen3-coder": {"min_p":0,"repeat_penalty":1,"temperature":0.7,"top_k":20,"top_p":0.8},
|
|
"qwen3-next": {"min_p":0,"repeat_penalty":1,"temperature":0.7,"top_k":20,"top_p":0.8},
|
|
"qwen3-vl": {"min_p":0,"repeat_penalty":1,"temperature":0.7,"top_k":20,"top_p":0.8},
|
|
"qwen3": {"min_p":0,"repeat_penalty":1,"temperature":0.6,"top_k":20,"top_p":0.95},
|
|
"qwen2.5-coder": {"min_p":0.1,"repeat_penalty":1,"temperature":1.5,"top_k":-1,"top_p":0.95},
|
|
"qwen2.5-vl": {"min_p":0.1,"repeat_penalty":1,"temperature":1.5,"top_k":-1,"top_p":0.95},
|
|
"qwen2.5-omni": {"min_p":0,"repeat_penalty":1,"temperature":0.7,"top_k":20,"top_p":0.8},
|
|
"qwen2.5-math": {"min_p":0,"repeat_penalty":1,"temperature":0.7,"top_k":20,"top_p":0.8},
|
|
"qwen2.5": {"min_p":0,"repeat_penalty":1,"temperature":0.7,"top_k":20,"top_p":0.8},
|
|
"qwen2-vl": {"min_p":0.1,"repeat_penalty":1,"temperature":1.5,"top_k":-1,"top_p":0.95},
|
|
"qwen2": {"min_p":0,"repeat_penalty":1,"temperature":0.7,"top_k":20,"top_p":0.8},
|
|
"qwq": {"min_p":0,"repeat_penalty":1,"temperature":0.6,"top_k":40,"top_p":0.95},
|
|
"gemma-3n": {"min_p":0,"repeat_penalty":1,"temperature":1,"top_k":64,"top_p":0.95},
|
|
"gemma-3": {"min_p":0,"repeat_penalty":1,"temperature":1,"top_k":64,"top_p":0.95},
|
|
"medgemma": {"min_p":0,"repeat_penalty":1,"temperature":1,"top_k":64,"top_p":0.95},
|
|
"gemma-2": {"min_p":0,"repeat_penalty":1,"temperature":1,"top_k":64,"top_p":0.95},
|
|
"llama-4": {"min_p":0.01,"repeat_penalty":1,"temperature":1,"top_k":-1,"top_p":0.9},
|
|
"llama-3.3": {"min_p":0.1,"repeat_penalty":1,"temperature":1.5,"top_k":-1,"top_p":0.95},
|
|
"llama-3.2": {"min_p":0.1,"repeat_penalty":1,"temperature":1.5,"top_k":-1,"top_p":0.95},
|
|
"llama-3.1": {"min_p":0.1,"repeat_penalty":1,"temperature":1.5,"top_k":-1,"top_p":0.95},
|
|
"llama-3": {"min_p":0.1,"repeat_penalty":1,"temperature":1.5,"top_k":-1,"top_p":0.95},
|
|
"phi-4": {"min_p":0,"repeat_penalty":1,"temperature":0.8,"top_k":-1,"top_p":0.95},
|
|
"phi-3": {"min_p":0.01,"repeat_penalty":1,"temperature":0.7,"top_k":-1,"top_p":0.9},
|
|
"mistral-nemo": {"min_p":0.01,"repeat_penalty":1,"temperature":0.7,"top_k":-1,"top_p":0.95},
|
|
"mistral-small": {"min_p":0.01,"repeat_penalty":1,"temperature":0.15,"top_k":-1,"top_p":0.95},
|
|
"mistral-large": {"min_p":0.01,"repeat_penalty":1,"temperature":0.7,"top_k":-1,"top_p":0.95},
|
|
"magistral": {"min_p":0.01,"repeat_penalty":1,"temperature":0.7,"top_k":-1,"top_p":0.95},
|
|
"ministral": {"min_p":0.01,"repeat_penalty":1,"temperature":0.15,"top_k":-1,"top_p":0.95},
|
|
"devstral": {"min_p":0.01,"repeat_penalty":1,"temperature":0.7,"top_k":-1,"top_p":0.95},
|
|
"pixtral": {"min_p":0.1,"repeat_penalty":1,"temperature":1.5,"top_k":-1,"top_p":0.95},
|
|
"deepseek-r1": {"min_p":0.01,"repeat_penalty":1,"temperature":0.6,"top_k":-1,"top_p":0.95},
|
|
"deepseek-v3": {"min_p":0.01,"repeat_penalty":1,"temperature":0.6,"top_k":-1,"top_p":0.95},
|
|
"deepseek-ocr": {"min_p":0.01,"repeat_penalty":1,"temperature":0,"top_k":-1,"top_p":0.95},
|
|
"glm-5": {"min_p":0.01,"repeat_penalty":1,"temperature":1,"top_k":-1,"top_p":0.95},
|
|
"glm-4": {"min_p":0.01,"repeat_penalty":1,"temperature":1,"top_k":-1,"top_p":0.95},
|
|
"nemotron": {"min_p":0.01,"repeat_penalty":1,"temperature":1,"top_k":-1,"top_p":1},
|
|
"minimax-m2.5": {"min_p":0.01,"repeat_penalty":1,"temperature":1,"top_k":40,"top_p":0.95},
|
|
"minimax": {"min_p":0.01,"repeat_penalty":1,"temperature":1,"top_k":40,"top_p":0.95},
|
|
"gpt-oss": {"min_p":0.01,"repeat_penalty":1,"temperature":1,"top_k":0,"top_p":1},
|
|
"granite-4": {"min_p":0.01,"repeat_penalty":1,"temperature":0,"top_k":0,"top_p":1},
|
|
"kimi-k2": {"min_p":0.01,"repeat_penalty":1,"temperature":0.6,"top_k":-1,"top_p":0.95},
|
|
"kimi": {"min_p":0.01,"repeat_penalty":1,"temperature":0.6,"top_k":-1,"top_p":0.95},
|
|
"lfm2": {"min_p":0.15,"repeat_penalty":1.05,"temperature":0.1,"top_k":50,"top_p":0.1},
|
|
"smollm": {"min_p":0.01,"repeat_penalty":1,"temperature":0.7,"top_k":-1,"top_p":0.95},
|
|
"olmo": {"min_p":0.01,"repeat_penalty":1,"temperature":0.7,"top_k":-1,"top_p":0.95},
|
|
"falcon": {"min_p":0.01,"repeat_penalty":1,"temperature":0.7,"top_k":-1,"top_p":0.95},
|
|
"ernie": {"min_p":0.01,"repeat_penalty":1,"temperature":0.7,"top_k":-1,"top_p":0.95},
|
|
"seed": {"min_p":0.01,"repeat_penalty":1,"temperature":0.7,"top_k":-1,"top_p":0.95},
|
|
"grok": {"min_p":0.01,"repeat_penalty":1,"temperature":1,"top_k":-1,"top_p":0.95},
|
|
"mimo": {"min_p":0.01,"repeat_penalty":1,"temperature":0.7,"top_k":-1,"top_p":0.95}
|
|
},
|
|
"patterns": ["qwen3.5","qwen3-coder","qwen3-next","qwen3-vl","qwen3","qwen2.5-coder","qwen2.5-vl","qwen2.5-omni","qwen2.5-math","qwen2.5","qwen2-vl","qwen2","qwq","gemma-3n","gemma-3","medgemma","gemma-2","llama-4","llama-3.3","llama-3.2","llama-3.1","llama-3","phi-4","phi-3","mistral-nemo","mistral-small","mistral-large","magistral","ministral","devstral","pixtral","deepseek-r1","deepseek-v3","deepseek-ocr","glm-5","glm-4","nemotron","minimax-m2.5","minimax","gpt-oss","granite-4","kimi-k2","kimi","lfm2","smollm","olmo","falcon","ernie","seed","grok","mimo"]
|
|
}
|