chore(model gallery): 🤖 add 1 new models via gallery agent (#8128)

chore(model gallery): 🤖 add new models via gallery agent Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2026-03-09 02:17:00 -04:00 · 2026-01-20 12:58:29 +01:00
parent 4bf2f8bbd8
commit e886bb291a
1 changed files with 25 additions and 0 deletions
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1,4 +1,29 @@
 ---
+- name: "glm-4.7-flash"
+  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
+  urls:
+    - https://huggingface.co/unsloth/GLM-4.7-Flash-GGUF
+  description: |
+    **GLM-4.7-Flash** is a 30B-A3B MoE (Model Organism Ensemble) model designed for efficient deployment. It outperforms competitors in benchmarks like AIME 25, GPQA, and τ²-Bench, offering strong accuracy while balancing performance and efficiency. Optimized for lightweight use cases, it supports inference via frameworks like vLLM and SGLang, with detailed deployment instructions in the official repository. Ideal for applications requiring high-quality text generation with minimal resource consumption.
+  overrides:
+    parameters:
+      model: llama-cpp/models/GLM-4.7-Flash-Q4_K_M.gguf
+    name: GLM-4.7-Flash-GGUF
+    backend: llama-cpp
+    template:
+      use_tokenizer_template: true
+    known_usecases:
+      - chat
+    function:
+      grammar:
+        disable: true
+    description: Imported from https://huggingface.co/unsloth/GLM-4.7-Flash-GGUF
+    options:
+      - use_jinja:true
+  files:
+    - filename: llama-cpp/models/GLM-4.7-Flash-Q4_K_M.gguf
+      sha256: c90d142243f7014ec1f8287d406cfbe87531659fb5a61e5e9503a5e493c524d2
+      uri: https://huggingface.co/unsloth/GLM-4.7-Flash-GGUF/resolve/main/GLM-4.7-Flash-Q4_K_M.gguf
 - name: "qwen3-vl-reranker-8b"
  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
  urls: