diff --git a/gallery/index.yaml b/gallery/index.yaml
index fccea6eae..941f041c6 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1,4 +1,29 @@
 ---
+- name: "glm-4.7-flash-derestricted"
+  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
+  urls:
+    - https://huggingface.co/mradermacher/GLM-4.7-Flash-Derestricted-GGUF
+  description: |
+    This model is a quantized version of the original GLM-4.7-Flash-Derestricted model, derived from the base model `koute/GLM-4.7-Flash-Derestricted`. It is designed for restricted use, featuring tags like "derestricted," "uncensored," and "unlimited." The quantized versions (e.g., Q2_K, Q4_K_S, Q6_K) offer varying trade-offs between accuracy and efficiency, with the Q4_K_S and Q6_K variants being recommended for balanced performance. The model is optimized for fast inference and supports multiple quantization schemes, though some advanced quantization options (like IQ4_XS) are not available. It is intended for use in environments with specific constraints or restrictions.
+  overrides:
+    parameters:
+      model: llama-cpp/models/GLM-4.7-Flash-Derestricted.Q4_K_M.gguf
+    name: GLM-4.7-Flash-Derestricted-GGUF
+    backend: llama-cpp
+    template:
+      use_tokenizer_template: true
+    known_usecases:
+      - chat
+    function:
+      grammar:
+        disable: true
+    description: Imported from https://huggingface.co/mradermacher/GLM-4.7-Flash-Derestricted-GGUF
+    options:
+      - use_jinja:true
+  files:
+    - filename: llama-cpp/models/GLM-4.7-Flash-Derestricted.Q4_K_M.gguf
+      sha256: 93de43daa88211d772de666a33cb890ac23f5780921445f62a4dde6f0e8af540
+      uri: https://huggingface.co/mradermacher/GLM-4.7-Flash-Derestricted-GGUF/resolve/main/GLM-4.7-Flash-Derestricted.Q4_K_M.gguf
 - &qwen-tts
   urls:
     - https://huggingface.co/Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice