From 693e3eec050cd507f4369800a1843ba0bb41448b Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 25 Jun 2026 08:11:52 +0200
Subject: [PATCH] chore(model gallery): :robot: add 1 new models via gallery
 agent (#10505)

chore(model gallery): :robot: add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 gallery/index.yaml | 73 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 52f23a771..25a6e781d 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1,4 +1,77 @@
 ---
+- name: "gemmable-4-12b-mtp"
+  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
+  urls:
+    - https://huggingface.co/Mia-AiLab/Gemmable-4-12B-MTP-GGUF
+  description: |
+    ## Gemmable 4 12B
+
+    Gemmable 4 12B is a GGUF export of Gemma 4 12B fine-tuned on Fable-5 style
+    reasoning and assistant traces.
+
+    ## Highlights
+
+      - Base model: `google/gemma-4-12B`
+      - Format: GGUF
+      - Training style: Fable-5 style reasoning and assistant traces
+      - Distribution: fp16 GGUF plus matching assistant GGUFs for each quant
+      - Intended use: local inference, coding, reasoning, and assistant workflows
+
+    ## How to use
+
+    ### llama.cpp
+
+    Standard load:
+
+    ```bash
+    llama-server -m "gemmable-4-12b-fp16.gguf"
+    ```
+
+    Speculative / draft-MTP load:
+
+    ```bash
+    llama-server -m "gemmable-4-12b-Q4_K_M.gguf" \
+      --spec-draft-model "gemmable-4-12b-Q4_K_M-mtp.gguf" \
+      --spec-type draft-mtp \
+      --spec-draft-n-max 4
+    ```
+
+    Use the matching fp16 or quantized main file with its `-mtp` companion.
+
+    ### LM Studio
+
+    1.  Search this repo, download target + mtp file.
+    2.  Load target.
+    3.  Load settings → Speculative Decoding → select mtp file file.
+
+    (Requires LM Studio with am17an's PR merged or custom llama.cpp runtime. As of 2026-05, mainline LM Studio runtime doesn't yet have `draft-mtp` for Gemma-4 — track upstream merge.)
+
+    ## GGUF / local inference notes
+
+    ...
+  tags:
+    - llm
+    - gguf
+    - reasoning
+  icon: https://storage.ko-fi.com/cdn/kofi6.png
+  overrides:
+    backend: llama-cpp
+    function:
+      automatic_tool_parsing_fallback: true
+      grammar:
+        disable: true
+    known_usecases:
+      - chat
+    options:
+      - use_jinja:true
+    parameters:
+      model: llama-cpp/models/Gemmable-4-12B-MTP-GGUF/gemmable-4-12b-Q4_K_M-mtp.gguf
+    template:
+      use_tokenizer_template: true
+  files:
+    - filename: llama-cpp/models/Gemmable-4-12B-MTP-GGUF/gemmable-4-12b-Q4_K_M-mtp.gguf
+      sha256: 217dc0ed177ecc733f801a851c3e3854cf1b17a1f86cd5430c0a7f82d93027bc
+      uri: https://huggingface.co/Mia-AiLab/Gemmable-4-12B-MTP-GGUF/resolve/main/gemmable-4-12b-Q4_K_M-mtp.gguf
 - name: "lfm2.5-1.2b-instruct"
   url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
   urls: