diff --git a/gallery/index.yaml b/gallery/index.yaml index 1541e6f77..92848a7df 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1,4 +1,29 @@ --- +- name: "glm-4.7-flash" + url: "github:mudler/LocalAI/gallery/virtual.yaml@master" + urls: + - https://huggingface.co/unsloth/GLM-4.7-Flash-GGUF + description: | + **GLM-4.7-Flash** is a 30B-A3B MoE (Model Organism Ensemble) model designed for efficient deployment. It outperforms competitors in benchmarks like AIME 25, GPQA, and τ²-Bench, offering strong accuracy while balancing performance and efficiency. Optimized for lightweight use cases, it supports inference via frameworks like vLLM and SGLang, with detailed deployment instructions in the official repository. Ideal for applications requiring high-quality text generation with minimal resource consumption. + overrides: + parameters: + model: llama-cpp/models/GLM-4.7-Flash-Q4_K_M.gguf + name: GLM-4.7-Flash-GGUF + backend: llama-cpp + template: + use_tokenizer_template: true + known_usecases: + - chat + function: + grammar: + disable: true + description: Imported from https://huggingface.co/unsloth/GLM-4.7-Flash-GGUF + options: + - use_jinja:true + files: + - filename: llama-cpp/models/GLM-4.7-Flash-Q4_K_M.gguf + sha256: c90d142243f7014ec1f8287d406cfbe87531659fb5a61e5e9503a5e493c524d2 + uri: https://huggingface.co/unsloth/GLM-4.7-Flash-GGUF/resolve/main/GLM-4.7-Flash-Q4_K_M.gguf - name: "qwen3-vl-reranker-8b" url: "github:mudler/LocalAI/gallery/virtual.yaml@master" urls: