From 4024220d008e7aa4e819cd6e421c92e5835737b7 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Mon, 26 Jan 2026 12:11:24 +0100
Subject: [PATCH] chore(model gallery): :robot: add 1 new models via gallery
 agent (#8220)

chore(model gallery): :robot: add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 gallery/index.yaml | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index fccea6eae..941f041c6 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1,4 +1,29 @@
 ---
+- name: "glm-4.7-flash-derestricted"
+  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
+  urls:
+    - https://huggingface.co/mradermacher/GLM-4.7-Flash-Derestricted-GGUF
+  description: |
+    This model is a quantized version of the original GLM-4.7-Flash-Derestricted model, derived from the base model `koute/GLM-4.7-Flash-Derestricted`. It is designed for restricted use, featuring tags like "derestricted," "uncensored," and "unlimited." The quantized versions (e.g., Q2_K, Q4_K_S, Q6_K) offer varying trade-offs between accuracy and efficiency, with the Q4_K_S and Q6_K variants being recommended for balanced performance. The model is optimized for fast inference and supports multiple quantization schemes, though some advanced quantization options (like IQ4_XS) are not available. It is intended for use in environments with specific constraints or restrictions.
+  overrides:
+    parameters:
+      model: llama-cpp/models/GLM-4.7-Flash-Derestricted.Q4_K_M.gguf
+    name: GLM-4.7-Flash-Derestricted-GGUF
+    backend: llama-cpp
+    template:
+      use_tokenizer_template: true
+    known_usecases:
+      - chat
+    function:
+      grammar:
+        disable: true
+    description: Imported from https://huggingface.co/mradermacher/GLM-4.7-Flash-Derestricted-GGUF
+    options:
+      - use_jinja:true
+  files:
+    - filename: llama-cpp/models/GLM-4.7-Flash-Derestricted.Q4_K_M.gguf
+      sha256: 93de43daa88211d772de666a33cb890ac23f5780921445f62a4dde6f0e8af540
+      uri: https://huggingface.co/mradermacher/GLM-4.7-Flash-Derestricted-GGUF/resolve/main/GLM-4.7-Flash-Derestricted.Q4_K_M.gguf
 - &qwen-tts
   urls:
     - https://huggingface.co/Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice