diff --git a/gallery/index.yaml b/gallery/index.yaml index 279383cae..668ef9a6d 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1,4 +1,59 @@ --- +- name: "qwen3.6-35b-a3b-claude-4.6-opus-reasoning-distilled" + url: "github:mudler/LocalAI/gallery/virtual.yaml@master" + urls: + - https://huggingface.co/hesamation/Qwen3.6-35B-A3B-Claude-4.6-Opus-Reasoning-Distilled-GGUF + description: | + # 🔥 Qwen3.6-35B-A3B-Claude-4.6-Opus-Reasoning-Distilled + + A reasoning SFT fine-tune of `Qwen/Qwen3.6-35B-A3B` on chain-of-thought (CoT) distillation mostly sourced from Claude Opus 4.6. The goal is to preserve Qwen3.6's strong agentic coding and reasoning base while nudging the model toward structured Claude Opus-style reasoning traces and more stable long-form problem solving. + + The training path is text-only. The Qwen3.6 base architecture includes a vision encoder, but this fine-tuning run did not train on image or video examples. + + - **Developed by:** @hesamation + - **Base model:** `Qwen/Qwen3.6-35B-A3B` + - **License:** apache-2.0 + + This fine-tuning run is inspired by Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled, including the notebook/training workflow style and Claude Opus reasoning-distillation direction. + + [](https://x.com/Hesamation) [](https://discord.gg/vtJykN3t) + + ## Benchmark Results + + The MMLU-Pro pass used 70 total questions per model: `--limit 5` across 14 MMLU-Pro subjects. Treat this as a smoke/comparative check, not a release-quality full benchmark. + + ... + license: "apache-2.0" + tags: + - llm + - gguf + - qwen + - reasoning + icon: https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen3.6/Figures/qwen3.6_35b_a3b_score.png + overrides: + backend: llama-cpp + function: + automatic_tool_parsing_fallback: true + grammar: + disable: true + known_usecases: + - chat + options: + - use_jinja:true + parameters: + min_p: 0 + model: llama-cpp/models/Qwen3.6-35B-A3B-Claude-4.6-Opus-Reasoning-Distilled-GGUF/Qwen3.6-35B-A3B-Claude-4.6-Opus-Reasoning-Distilled.Q4_K_M.gguf + presence_penalty: 1.5 + repeat_penalty: 1 + temperature: 0.7 + top_k: 20 + top_p: 0.8 + template: + use_tokenizer_template: true + files: + - filename: llama-cpp/models/Qwen3.6-35B-A3B-Claude-4.6-Opus-Reasoning-Distilled-GGUF/Qwen3.6-35B-A3B-Claude-4.6-Opus-Reasoning-Distilled.Q4_K_M.gguf + sha256: fd3bf7586354890a2710d69357c30fb221a31eecf9f3cd9418257d9289e02765 + uri: https://huggingface.co/hesamation/Qwen3.6-35B-A3B-Claude-4.6-Opus-Reasoning-Distilled-GGUF/resolve/main/Qwen3.6-35B-A3B-Claude-4.6-Opus-Reasoning-Distilled.Q4_K_M.gguf - name: "qwen3.5-9b-glm5.1-distill-v1" url: "github:mudler/LocalAI/gallery/virtual.yaml@master" urls: