From c8173f0f676d1604b793003fc1457c445998384d Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 24 Dec 2025 09:14:03 +0100 Subject: [PATCH] chore(gallery): cleanup old architectures Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 7188 -------------------------------------------- 1 file changed, 7188 deletions(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index 80c4bd2c4..a7c034440 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -4454,183 +4454,6 @@ - filename: EuroLLM-9B-Instruct-Q4_K_M.gguf sha256: 785a3b2883532381704ef74f866f822f179a931801d1ed1cf12e6deeb838806b uri: huggingface://bartowski/EuroLLM-9B-Instruct-GGUF/EuroLLM-9B-Instruct-Q4_K_M.gguf -- &phi4 - url: "github:mudler/LocalAI/gallery/phi-4-chat.yaml@master" - name: "phi-4" - icon: https://avatars.githubusercontent.com/u/6154722 - license: mit - tags: - - llm - - gguf - - phi - - cpu - - gpu - - text-generation - urls: - - https://huggingface.co/microsoft/phi-4 - - https://huggingface.co/bartowski/phi-4-GGUF - description: | - phi-4 is a state-of-the-art open model built upon a blend of synthetic datasets, data from filtered public domain websites, and acquired academic books and Q&A datasets. The goal of this approach was to ensure that small capable models were trained with data focused on high quality and advanced reasoning. - phi-4 underwent a rigorous enhancement and alignment process, incorporating both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. Phi-4 is a 14B parameters, dense decoder-only Transformer model. - overrides: - parameters: - model: phi-4-Q4_K_M.gguf - files: - - filename: phi-4-Q4_K_M.gguf - uri: huggingface://bartowski/phi-4-GGUF/phi-4-Q4_K_M.gguf - sha256: 009aba717c09d4a35890c7d35eb59d54e1dba884c7c526e7197d9c13ab5911d9 -- !!merge <<: *phi4 - url: "github:mudler/LocalAI/gallery/phi-4-chat-fcall.yaml@master" - name: "LocalAI-functioncall-phi-4-v0.3" - icon: https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/Dzbdzn27KEc3K6zNNi070.png - urls: - - https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.3 - - https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.3-Q4_K_M-GGUF - description: | - A model tailored to be conversational and execute function calls with LocalAI. This model is based on phi-4. - overrides: - parameters: - model: localai-functioncall-phi-4-v0.3-q4_k_m.gguf - files: - - filename: localai-functioncall-phi-4-v0.3-q4_k_m.gguf - sha256: 23fee048ded2a6e2e1a7b6bbefa6cbf83068f194caa9552aecbaa00fec8a16d5 - uri: huggingface://mudler/LocalAI-functioncall-phi-4-v0.3-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.3-q4_k_m.gguf -- !!merge <<: *phi4 - url: "github:mudler/LocalAI/gallery/phi-4-chat-fcall.yaml@master" - name: "LocalAI-functioncall-phi-4-v0.2" - icon: https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/Dzbdzn27KEc3K6zNNi070.png - description: | - A model tailored to be conversational and execute function calls with LocalAI. This model is based on phi-4. - This is the second iteration of https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.1 with added CoT (o1) capabilities from the marco-o1 dataset. - urls: - - https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.2 - - https://huggingface.co/mudler/localai-functioncall-phi-4-v0.2-Q4_K_M-GGUF - overrides: - parameters: - model: localai-functioncall-phi-4-v0.2-q4_k_m.gguf - files: - - filename: localai-functioncall-phi-4-v0.2-q4_k_m.gguf - uri: huggingface://mudler/localai-functioncall-phi-4-v0.2-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.2-q4_k_m.gguf - sha256: 681b5fb5070f23323a9cc8cbd1306b1c348c2f292041d3ba2335b26b071757b7 -- !!merge <<: *phi4 - url: "github:mudler/LocalAI/gallery/phi-4-chat-fcall.yaml@master" - name: "LocalAI-functioncall-phi-4-v0.1" - icon: https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/Dzbdzn27KEc3K6zNNi070.png - description: | - A model tailored to be conversational and execute function calls with LocalAI. This model is based on phi-4. - urls: - - https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.1 - - https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.1-Q4_K_M-GGUF - overrides: - parameters: - model: localai-functioncall-phi-4-v0.1-q4_k_m.gguf - files: - - filename: localai-functioncall-phi-4-v0.1-q4_k_m.gguf - uri: huggingface://mudler/LocalAI-functioncall-phi-4-v0.1-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.1-q4_k_m.gguf - sha256: 0ae4e5e4ba89c16c1e810285c5c8b84416fa67f8ed7c175aa0b6fc0a103017aa -- !!merge <<: *phi4 - name: "sicariussicariistuff_phi-lthy4" - url: "github:mudler/LocalAI/gallery/chatml.yaml@master" - icon: https://huggingface.co/SicariusSicariiStuff/Phi-lthy4/resolve/main/Images/Phi-Lthy4.png - urls: - - https://huggingface.co/SicariusSicariiStuff/Phi-lthy4 - - https://huggingface.co/bartowski/SicariusSicariiStuff_Phi-lthy4-GGUF - description: | - - The BEST Phi-4 Roleplay finetune in the world (Not that much of an achievement here, Phi roleplay finetunes can probably be counted on a single hand). - - Compact size & fully healed from the brain surgery Only 11.9B parameters. Phi-4 wasn't that hard to run even at 14B, now with even fewer brain cells, your new phone could probably run it easily. (SD8Gen3 and above recommended). - - Strong Roleplay & Creative writing abilities. This really surprised me. Actually good. - Writes and roleplays quite uniquely, probably because of lack of RP\writing slop in the pretrain. Who would have thought? - - Smart assistant with low refusals - It kept some of the smarts, and our little Phi-Lthy here will be quite eager to answer your naughty questions. - - Quite good at following the character card. Finally, it puts its math brain to some productive tasks. Gooner technology is becoming more popular by the day. - overrides: - parameters: - model: SicariusSicariiStuff_Phi-lthy4-Q4_K_M.gguf - files: - - filename: SicariusSicariiStuff_Phi-lthy4-Q4_K_M.gguf - sha256: a5004b2d0f3eb869f07285f53ec283aa383063085987113d2a41cb54708fb5ad - uri: huggingface://bartowski/SicariusSicariiStuff_Phi-lthy4-GGUF/SicariusSicariiStuff_Phi-lthy4-Q4_K_M.gguf -- !!merge <<: *phi4 - name: "sicariussicariistuff_phi-line_14b" - url: "github:mudler/LocalAI/gallery/chatml.yaml@master" - icon: https://huggingface.co/SicariusSicariiStuff/Phi-Line_14B/resolve/main/Images/Phi-Line_14B.png - urls: - - https://huggingface.co/SicariusSicariiStuff/Phi-Line_14B - - https://huggingface.co/bartowski/SicariusSicariiStuff_Phi-Line_14B-GGUF - description: | - Excellent Roleplay with more brains. (Who would have thought Phi-4 models would be good at this? so weird... ) - Medium length response (1-4 paragraphs, usually 2-3). - Excellent assistant that follows instructions well enough, and keeps good formating. - Strong Creative writing abilities. Will obey requests regarding formatting (markdown headlines for paragraphs, etc). - Writes and roleplays quite uniquely, probably because of lack of RP\writing slop in the pretrain. This is just my guesstimate. - LOW refusals - Total freedom in RP, can do things other RP models won't, and I'll leave it at that. Low refusals in assistant tasks as well. - VERY good at following the character card. Math brain is used for gooner tech, as it should be. - overrides: - parameters: - model: SicariusSicariiStuff_Phi-Line_14B-Q4_K_M.gguf - files: - - filename: SicariusSicariiStuff_Phi-Line_14B-Q4_K_M.gguf - sha256: 552c5a613bc5f24494646858795837ac42d3c216c5caedd7f4d6b954e5df58f2 - uri: huggingface://bartowski/SicariusSicariiStuff_Phi-Line_14B-GGUF/SicariusSicariiStuff_Phi-Line_14B-Q4_K_M.gguf -- !!merge <<: *phi4 - name: "microsoft_phi-4-mini-instruct" - urls: - - https://huggingface.co/microsoft/Phi-4-mini-instruct - - https://huggingface.co/bartowski/microsoft_Phi-4-mini-instruct-GGUF - description: | - Phi-4-mini-instruct is a lightweight open model built upon synthetic data and filtered publicly available websites - with a focus on high-quality, reasoning dense data. The model belongs to the Phi-4 model family and supports 128K token context length. The model underwent an enhancement process, incorporating both supervised fine-tuning and direct preference optimization to support precise instruction adherence and robust safety measures. - overrides: - parameters: - model: microsoft_Phi-4-mini-instruct-Q4_K_M.gguf - files: - - filename: microsoft_Phi-4-mini-instruct-Q4_K_M.gguf - sha256: 01999f17c39cc3074afae5e9c539bc82d45f2dd7faa3917c66cbef76fce8c0c2 - uri: huggingface://bartowski/microsoft_Phi-4-mini-instruct-GGUF/microsoft_Phi-4-mini-instruct-Q4_K_M.gguf -- !!merge <<: *phi4 - name: "microsoft_phi-4-mini-reasoning" - urls: - - https://huggingface.co/microsoft/Phi-4-mini-reasoning - - https://huggingface.co/bartowski/microsoft_Phi-4-mini-reasoning-GGUF - description: | - Phi-4-mini-reasoning is a lightweight open model built upon synthetic data with a focus on high-quality, reasoning dense data further finetuned for more advanced math reasoning capabilities. The model belongs to the Phi-4 model family and supports 128K token context length. - Phi-4-mini-reasoning is designed for multi-step, logic-intensive mathematical problem-solving tasks under memory/compute constrained environments and latency bound scenarios. Some of the use cases include formal proof generation, symbolic computation, advanced word problems, and a wide range of mathematical reasoning scenarios. These models excel at maintaining context across steps, applying structured logic, and delivering accurate, reliable solutions in domains that require deep analytical thinking. - This model is designed and tested for math reasoning only. It is not specifically designed or evaluated for all downstream purposes. Developers should consider common limitations of language models, as well as performance difference across languages, as they select use cases, and evaluate and mitigate for accuracy, safety, and fairness before using within a specific downstream use case, particularly for high-risk scenarios. Developers should be aware of and adhere to applicable laws or regulations (including but not limited to privacy, trade compliance laws, etc.) that are relevant to their use case. - Nothing contained in this Model Card should be interpreted as or deemed a restriction or modification to the license the model is released under. - This release of Phi-4-mini-reasoning addresses user feedback and market demand for a compact reasoning model. It is a compact transformer-based language model optimized for mathematical reasoning, built to deliver high-quality, step-by-step problem solving in environments where computing or latency is constrained. The model is fine-tuned with synthetic math data from a more capable model (much larger, smarter, more accurate, and better at following instructions), which has resulted in enhanced reasoning performance. Phi-4-mini-reasoning balances reasoning ability with efficiency, making it potentially suitable for educational applications, embedded tutoring, and lightweight deployment on edge or mobile systems. If a critical issue is identified with Phi-4-mini-reasoning, it should be promptly reported through the MSRC Researcher Portal or secure@microsoft.com - overrides: - parameters: - model: microsoft_Phi-4-mini-reasoning-Q4_K_M.gguf - files: - - filename: microsoft_Phi-4-mini-reasoning-Q4_K_M.gguf - sha256: ce8becd58f350d8ae0ec3bbb201ab36f750ffab17ab6238f39292d12ab68ea06 - uri: huggingface://bartowski/microsoft_Phi-4-mini-reasoning-GGUF/microsoft_Phi-4-mini-reasoning-Q4_K_M.gguf -- !!merge <<: *phi4 - name: "microsoft_phi-4-reasoning-plus" - urls: - - https://huggingface.co/microsoft/Phi-4-reasoning-plus - - https://huggingface.co/bartowski/microsoft_Phi-4-reasoning-plus-GGUF - description: | - Phi-4-reasoning-plus is a state-of-the-art open-weight reasoning model finetuned from Phi-4 using supervised fine-tuning on a dataset of chain-of-thought traces and reinforcement learning. The supervised fine-tuning dataset includes a blend of synthetic prompts and high-quality filtered data from public domain websites, focused on math, science, and coding skills as well as alignment data for safety and Responsible AI. The goal of this approach was to ensure that small capable models were trained with data focused on high quality and advanced reasoning. Phi-4-reasoning-plus has been trained additionally with Reinforcement Learning, hence, it has higher accuracy but generates on average 50% more tokens, thus having higher latency. - overrides: - parameters: - model: microsoft_Phi-4-reasoning-plus-Q4_K_M.gguf - files: - - filename: microsoft_Phi-4-reasoning-plus-Q4_K_M.gguf - sha256: fcb2cdd14881e20613f56d37f49d0c73ec6c00f9bb4d0e4b161a1621d8517f47 - uri: huggingface://bartowski/microsoft_Phi-4-reasoning-plus-GGUF/microsoft_Phi-4-reasoning-plus-Q4_K_M.gguf -- !!merge <<: *phi4 - name: "microsoft_phi-4-reasoning" - urls: - - https://huggingface.co/microsoft/Phi-4-reasoning - - https://huggingface.co/bartowski/microsoft_Phi-4-reasoning-GGUF - description: | - Phi-4-reasoning is a state-of-the-art open-weight reasoning model finetuned from Phi-4 using supervised fine-tuning on a dataset of chain-of-thought traces and reinforcement learning. The supervised fine-tuning dataset includes a blend of synthetic prompts and high-quality filtered data from public domain websites, focused on math, science, and coding skills as well as alignment data for safety and Responsible AI. The goal of this approach was to ensure that small capable models were trained with data focused on high quality and advanced reasoning. - overrides: - parameters: - model: microsoft_Phi-4-reasoning-Q4_K_M.gguf - files: - - filename: microsoft_Phi-4-reasoning-Q4_K_M.gguf - sha256: e02aea7b191055b8d9a5ca7d58a99214a6dc87be8759cf97089814163bda5042 - uri: huggingface://bartowski/microsoft_Phi-4-reasoning-GGUF/microsoft_Phi-4-reasoning-Q4_K_M.gguf - &falcon3 name: "falcon3-1b-instruct" url: "github:mudler/LocalAI/gallery/falcon3.yaml@master" @@ -5814,230 +5637,6 @@ - filename: rwkv-6-world-7b-Q4_K_M.gguf sha256: f74574186fa4584f405e92198605680db6ad00fd77974ffa14bf02073bb90273 uri: huggingface://bartowski/rwkv-6-world-7b-GGUF/rwkv-6-world-7b-Q4_K_M.gguf -- &qwen25coder - name: "qwen2.5-coder-14b" - icon: https://avatars.githubusercontent.com/u/141221163 - url: "github:mudler/LocalAI/gallery/chatml.yaml@master" - license: apache-2.0 - tags: - - llm - - gguf - - gpu - - qwen - - qwen2.5 - - cpu - urls: - - https://huggingface.co/Qwen/Qwen2.5-Coder-14B - - https://huggingface.co/mradermacher/Qwen2.5-Coder-14B-GGUF - description: | - Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen). As of now, Qwen2.5-Coder has covered six mainstream model sizes, 0.5, 1.5, 3, 7, 14, 32 billion parameters, to meet the needs of different developers. Qwen2.5-Coder brings the following improvements upon CodeQwen1.5: - - Significantly improvements in code generation, code reasoning and code fixing. Base on the strong Qwen2.5, we scale up the training tokens into 5.5 trillion including source code, text-code grounding, Synthetic data, etc. Qwen2.5-Coder-32B has become the current state-of-the-art open-source codeLLM, with its coding abilities matching those of GPT-4o. - A more comprehensive foundation for real-world applications such as Code Agents. Not only enhancing coding capabilities but also maintaining its strengths in mathematics and general competencies. - Long-context Support up to 128K tokens. - overrides: - parameters: - model: Qwen2.5-Coder-14B.Q4_K_M.gguf - files: - - filename: Qwen2.5-Coder-14B.Q4_K_M.gguf - sha256: 94f277a9ac7caf117140b2fff4e1ccf4bc9f35395b0112f0d0d7c82c6f8d860e - uri: huggingface://mradermacher/Qwen2.5-Coder-14B-GGUF/Qwen2.5-Coder-14B.Q4_K_M.gguf -- !!merge <<: *qwen25coder - name: "qwen2.5-coder-3b-instruct" - urls: - - https://huggingface.co/Qwen/Qwen2.5-Coder-3B-Instruct - - https://huggingface.co/bartowski/Qwen2.5-Coder-3B-Instruct-GGUF - overrides: - parameters: - model: Qwen2.5-Coder-3B-Instruct-Q4_K_M.gguf - files: - - filename: Qwen2.5-Coder-3B-Instruct-Q4_K_M.gguf - sha256: 3da3afe6cf5c674ac195803ea0dd6fee7e1c228c2105c1ce8c66890d1d4ab460 - uri: huggingface://bartowski/Qwen2.5-Coder-3B-Instruct-GGUF/Qwen2.5-Coder-3B-Instruct-Q4_K_M.gguf -- !!merge <<: *qwen25coder - name: "qwen2.5-coder-32b-instruct" - urls: - - https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct - - https://huggingface.co/bartowski/Qwen2.5-Coder-32B-Instruct-GGUF - overrides: - parameters: - model: Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf - files: - - filename: Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf - sha256: 8e2fd78ff55e7cdf577fda257bac2776feb7d73d922613caf35468073807e815 - uri: huggingface://bartowski/Qwen2.5-Coder-32B-Instruct-GGUF/Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf -- !!merge <<: *qwen25coder - name: "qwen2.5-coder-14b-instruct" - urls: - - https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct - - https://huggingface.co/bartowski/Qwen2.5-Coder-14B-Instruct-GGUF - overrides: - parameters: - model: Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf - files: - - filename: Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf - sha256: 2946d28c9e1bb2bcae6d42e8678863a31775df6f740315c7d7e6d6b6411f5937 - uri: huggingface://bartowski/Qwen2.5-Coder-14B-Instruct-GGUF/Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf -- !!merge <<: *qwen25coder - name: "qwen2.5-coder-1.5b-instruct" - urls: - - https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct - - https://huggingface.co/bartowski/Qwen2.5-Coder-1.5B-Instruct-GGUF - overrides: - parameters: - model: Qwen2.5-Coder-1.5B-Instruct-Q4_K_M.gguf - files: - - filename: Qwen2.5-Coder-1.5B-Instruct-Q4_K_M.gguf - sha256: f530705d447660a4336c329981af164b471b60b974b1d808d57e8ec9fe23b239 - uri: huggingface://bartowski/Qwen2.5-Coder-1.5B-Instruct-GGUF/Qwen2.5-Coder-1.5B-Instruct-Q4_K_M.gguf -- !!merge <<: *qwen25coder - name: "qwen2.5-coder-7b-instruct" - urls: - - https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct - - https://huggingface.co/bartowski/Qwen2.5-Coder-7B-Instruct-GGUF - overrides: - parameters: - model: Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf - files: - - filename: Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf - sha256: 1664fccab734674a50763490a8c6931b70e3f2f8ec10031b54806d30e5f956b6 - uri: huggingface://bartowski/Qwen2.5-Coder-7B-Instruct-GGUF/Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf -- !!merge <<: *qwen25coder - name: "qwen2.5-coder-7b-3x-instruct-ties-v1.2-i1" - urls: - - https://huggingface.co/BenevolenceMessiah/Qwen2.5-Coder-7B-3x-Instruct-TIES-v1.2 - - https://huggingface.co/mradermacher/Qwen2.5-Coder-7B-3x-Instruct-TIES-v1.2-i1-GGUF - description: | - The following models were included in the merge: - BenevolenceMessiah/Qwen2.5-Coder-7B-Chat-Instruct-TIES-v1.2 - MadeAgents/Hammer2.0-7b - huihui-ai/Qwen2.5-Coder-7B-Instruct-abliterated - overrides: - parameters: - model: Qwen2.5-Coder-7B-3x-Instruct-TIES-v1.2.i1-Q4_K_M.gguf - files: - - filename: Qwen2.5-Coder-7B-3x-Instruct-TIES-v1.2.i1-Q4_K_M.gguf - sha256: c28a4da700f634f1277f02391d81fa3c0ba783fa4b02886bd4bfe5f13b6605ef - uri: huggingface://mradermacher/Qwen2.5-Coder-7B-3x-Instruct-TIES-v1.2-i1-GGUF/Qwen2.5-Coder-7B-3x-Instruct-TIES-v1.2.i1-Q4_K_M.gguf -- !!merge <<: *qwen25coder - name: "qwen2.5-coder-7b-instruct-abliterated-i1" - urls: - - https://huggingface.co/huihui-ai/Qwen2.5-Coder-7B-Instruct-abliterated - - https://huggingface.co/mradermacher/Qwen2.5-Coder-7B-Instruct-abliterated-i1-GGUF - description: | - This is an uncensored version of Qwen2.5-Coder-7B-Instruct created with abliteration (see this article to know more about it). - - Special thanks to @FailSpy for the original code and technique. Please follow him if you're interested in abliterated models. - overrides: - parameters: - model: Qwen2.5-Coder-7B-Instruct-abliterated.i1-Q4_K_M.gguf - files: - - filename: Qwen2.5-Coder-7B-Instruct-abliterated.i1-Q4_K_M.gguf - sha256: 9100ccd9e8167cefda98bd1c97d5d765a21e70e124e4d6b89945fd66ebb481b4 - uri: huggingface://mradermacher/Qwen2.5-Coder-7B-Instruct-abliterated-i1-GGUF/Qwen2.5-Coder-7B-Instruct-abliterated.i1-Q4_K_M.gguf -- !!merge <<: *qwen25coder - name: "rombos-coder-v2.5-qwen-7b" - icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/QErypCEKD5OZLxUcSmYaR.jpeg - urls: - - https://huggingface.co/rombodawg/Rombos-Coder-V2.5-Qwen-7b - - https://huggingface.co/bartowski/Rombos-Coder-V2.5-Qwen-7b-GGUF - - https://docs.google.com/document/d/1OjbjU5AOz4Ftn9xHQrX3oFQGhQ6RDUuXQipnQ9gn6tU/edit?usp=sharing - description: | - Rombos-Coder-V2.5-Qwen-7b is a continues finetuned version of Qwen2.5-Coder-7B-Instruct. I took it upon myself to merge the instruct model with the base model myself using the * Ties* merge method as demonstrated in my own "Continuous Finetuning" method (link available). - This version of the model shows higher performance than the original instruct and base models. - overrides: - parameters: - model: Rombos-Coder-V2.5-Qwen-7b-Q4_K_M.gguf - files: - - filename: Rombos-Coder-V2.5-Qwen-7b-Q4_K_M.gguf - sha256: ca16a550f1be00b7e92f94c0c18ea6af1e5c158d5d1cb3994f9f0a0d13922272 - uri: huggingface://bartowski/Rombos-Coder-V2.5-Qwen-7b-GGUF/Rombos-Coder-V2.5-Qwen-7b-Q4_K_M.gguf -- !!merge <<: *qwen25coder - name: "rombos-coder-v2.5-qwen-32b" - icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/QErypCEKD5OZLxUcSmYaR.jpeg - urls: - - https://huggingface.co/rombodawg/Rombos-Coder-V2.5-Qwen-32b - - https://huggingface.co/bartowski/Rombos-Coder-V2.5-Qwen-32b-GGUF - - https://docs.google.com/document/d/1OjbjU5AOz4Ftn9xHQrX3oFQGhQ6RDUuXQipnQ9gn6tU/edit?usp=sharing - description: | - Rombos-Coder-V2.5-Qwen-32b is a continues finetuned version of Qwen2.5-Coder-32B-Instruct. I took it upon myself to merge the instruct model with the base model myself using the Ties merge method as demonstrated in my own "Continuous Finetuning" method (link available). - This version of the model shows higher performance than the original instruct and base models. - overrides: - parameters: - model: Rombos-Coder-V2.5-Qwen-32b-Q4_K_M.gguf - files: - - filename: Rombos-Coder-V2.5-Qwen-32b-Q4_K_M.gguf - sha256: 821ea2a13d96354db1368986700b1189938fbbc56ca6bb9d0c39f752580de71a - uri: huggingface://bartowski/Rombos-Coder-V2.5-Qwen-32b-GGUF/Rombos-Coder-V2.5-Qwen-32b-Q4_K_M.gguf -- !!merge <<: *qwen25coder - name: "rombos-coder-v2.5-qwen-14b" - icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/QErypCEKD5OZLxUcSmYaR.jpeg - urls: - - https://huggingface.co/rombodawg/Rombos-Coder-V2.5-Qwen-14b - - https://huggingface.co/bartowski/Rombos-Coder-V2.5-Qwen-14b-GGUF - - https://docs.google.com/document/d/1OjbjU5AOz4Ftn9xHQrX3oFQGhQ6RDUuXQipnQ9gn6tU/edit?usp=sharing - description: | - Rombos-Coder-V2.5-Qwen-14b is a continues finetuned version of Qwen2.5-Coder-14B-Instruct. I took it upon myself to merge the instruct model with the base model myself using the Ties merge method as demonstrated in my own "Continuous Finetuning" method (link available). - This version of the model shows higher performance than the original instruct and base models. - overrides: - parameters: - model: Rombos-Coder-V2.5-Qwen-14b-Q4_K_M.gguf - files: - - filename: Rombos-Coder-V2.5-Qwen-14b-Q4_K_M.gguf - sha256: 7ef044e1fee206a039f56538f94332030e99ec63915c74f4d1bdec0e601ee968 - uri: huggingface://bartowski/Rombos-Coder-V2.5-Qwen-14b-GGUF/Rombos-Coder-V2.5-Qwen-14b-Q4_K_M.gguf -- !!merge <<: *qwen25coder - name: "qwen2.5-coder-32b-instruct-uncensored-i1" - urls: - - https://huggingface.co/thirdeyeai/Qwen2.5-Coder-32B-Instruct-Uncensored - - https://huggingface.co/mradermacher/Qwen2.5-Coder-32B-Instruct-Uncensored-i1-GGUF - description: | - The LLM model is based on sloshywings/Qwen2.5-Coder-32B-Instruct-Uncensored. It is a large language model with 32B parameters that has been fine-tuned on coding tasks and instructions. - overrides: - parameters: - model: Qwen2.5-Coder-32B-Instruct-Uncensored.i1-Q4_K_M.gguf - files: - - filename: Qwen2.5-Coder-32B-Instruct-Uncensored.i1-Q4_K_M.gguf - sha256: 86ac8efb86daf241792ac3d5d35b7da92c54901b4208a6f2829bd03d8f273c9c - uri: huggingface://mraWdermacher/Qwen2.5-Coder-32B-Instruct-Uncensored-i1-GGUF/Qwen2.5-Coder-32B-Instruct-Uncensored.i1-Q4_K_M.gguf -- !!merge <<: *qwen25coder - name: "skywork_skywork-swe-32b" - icon: https://cdn-uploads.huggingface.co/production/uploads/6665dd2b3a64c70529f7542c/8o-IE7N3GwSFCIH3ntc8E.png - urls: - - https://huggingface.co/Skywork/Skywork-SWE-32B - - https://huggingface.co/bartowski/Skywork_Skywork-SWE-32B-GGUF - description: | - Skywork-SWE-32B is a code agent model developed by Skywork AI, specifically designed for software engineering (SWE) tasks. It demonstrates strong performance across several key metrics: - - Skywork-SWE-32B attains 38.0% pass@1 accuracy on the SWE-bench Verified benchmark, outperforming previous open-source SoTA Qwen2.5-Coder-32B-based LLMs built on the OpenHands agent framework. - When incorporated with test-time scaling techniques, the performance further improves to 47.0% accuracy, surpassing the previous SoTA results for sub-32B parameter models. - We clearly demonstrate the data scaling law phenomenon for software engineering capabilities in LLMs, with no signs of saturation at 8209 collected training trajectories. - - We also introduce an efficient and automated pipeline for SWE data collection, culminating in the creation of the Skywork-SWE dataset---a large-scale, high-quality dataset featuring comprehensive executable runtime environments. Detailed descriptions are available on our technical report. - overrides: - parameters: - model: Skywork_Skywork-SWE-32B-Q4_K_M.gguf - files: - - filename: Skywork_Skywork-SWE-32B-Q4_K_M.gguf - sha256: b5a451fa677159d7ab0aee64eeec5933aa4e5bd598e400501ecec3af0a767fa8 - uri: huggingface://bartowski/Skywork_Skywork-SWE-32B-GGUF/Skywork_Skywork-SWE-32B-Q4_K_M.gguf -- !!merge <<: *qwen25coder - name: "microsoft_nextcoder-32b" - urls: - - https://huggingface.co/microsoft/NextCoder-32B - - https://huggingface.co/bartowski/microsoft_NextCoder-32B-GGUF - description: | - NextCoder is the latest series of Code-Editing large language models developed using the Qwen2.5-Coder Instruct variants as base and trained with novel Selective Knowledge Transfer finetuning methodology as introduced in the paper. NextCoder family model comes in 3 different sizes 7, 14, 32 billion parameters, to meet the needs of different developers. Following are the key improvements: - Significantly improvements in code editing, NextCoder-32B has performing on par with GPT-4o on complex benchmarks like Aider-Polyglot with performance increment of 44% from their base model. - No loss of generalizibility, due to our new finetuning method SeleKT - Long-context Support up to 32K tokens. - overrides: - parameters: - model: microsoft_NextCoder-32B-Q4_K_M.gguf - files: - - filename: microsoft_NextCoder-32B-Q4_K_M.gguf - sha256: e3f77c7fa08814a3a86b9f8cbb8c7fb3c0063403bac26c99dc67655937da4e44 - uri: huggingface://bartowski/microsoft_NextCoder-32B-GGUF/microsoft_NextCoder-32B-Q4_K_M.gguf - &opencoder name: "opencoder-8b-base" icon: https://avatars.githubusercontent.com/u/186387526 @@ -7053,2001 +6652,6 @@ - filename: nano_imp_1b-q8_0.gguf sha256: 2756551de7d8ff7093c2c5eec1cd00f1868bc128433af53f5a8d434091d4eb5a uri: huggingface://Triangle104/Nano_Imp_1B-Q8_0-GGUF/nano_imp_1b-q8_0.gguf -- &qwen25 - name: "qwen2.5-14b-instruct" ## Qwen2.5 - icon: https://avatars.githubusercontent.com/u/141221163 - url: "github:mudler/LocalAI/gallery/chatml.yaml@master" - license: apache-2.0 - description: | - Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters. - tags: - - llm - - gguf - - gpu - - qwen - - qwen2.5 - - cpu - urls: - - https://huggingface.co/bartowski/Qwen2.5-14B-Instruct-GGUF - - https://huggingface.co/Qwen/Qwen2.5-7B-Instruct - overrides: - parameters: - model: Qwen2.5-14B-Instruct-Q4_K_M.gguf - files: - - filename: Qwen2.5-14B-Instruct-Q4_K_M.gguf - sha256: e47ad95dad6ff848b431053b375adb5d39321290ea2c638682577dafca87c008 - uri: huggingface://bartowski/Qwen2.5-14B-Instruct-GGUF/Qwen2.5-14B-Instruct-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwen2.5-math-7b-instruct" - urls: - - https://huggingface.co/bartowski/Qwen2.5-Math-7B-Instruct-GGUF - - https://huggingface.co/Qwen/Qwen2.5-Math-7B-Instruct - description: | - In August 2024, we released the first series of mathematical LLMs - Qwen2-Math - of our Qwen family. A month later, we have upgraded it and open-sourced Qwen2.5-Math series, including base models Qwen2.5-Math-1.5B/7B/72B, instruction-tuned models Qwen2.5-Math-1.5B/7B/72B-Instruct, and mathematical reward model Qwen2.5-Math-RM-72B. - - Unlike Qwen2-Math series which only supports using Chain-of-Thught (CoT) to solve English math problems, Qwen2.5-Math series is expanded to support using both CoT and Tool-integrated Reasoning (TIR) to solve math problems in both Chinese and English. The Qwen2.5-Math series models have achieved significant performance improvements compared to the Qwen2-Math series models on the Chinese and English mathematics benchmarks with CoT. - - The base models of Qwen2-Math are initialized with Qwen2-1.5B/7B/72B, and then pretrained on a meticulously designed Mathematics-specific Corpus. This corpus contains large-scale high-quality mathematical web texts, books, codes, exam questions, and mathematical pre-training data synthesized by Qwen2. - overrides: - parameters: - model: Qwen2.5-Math-7B-Instruct-Q4_K_M.gguf - files: - - filename: Qwen2.5-Math-7B-Instruct-Q4_K_M.gguf - sha256: 7e03cee8c65b9ebf9ca14ddb010aca27b6b18e6c70f2779e94e7451d9529c091 - uri: huggingface://bartowski/Qwen2.5-Math-7B-Instruct-GGUF/Qwen2.5-Math-7B-Instruct-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwen2.5-14b_uncencored" - icon: https://huggingface.co/SicariusSicariiStuff/Phi-3.5-mini-instruct_Uncensored/resolve/main/Misc/Uncensored.png - urls: - - https://huggingface.co/SicariusSicariiStuff/Qwen2.5-14B_Uncencored - - https://huggingface.co/bartowski/Qwen2.5-14B_Uncencored-GGUF - description: | - Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters. - - Uncensored qwen2.5 - tags: - - llm - - gguf - - gpu - - qwen - - qwen2.5 - - cpu - - uncensored - overrides: - parameters: - model: Qwen2.5-14B_Uncencored-Q4_K_M.gguf - files: - - filename: Qwen2.5-14B_Uncencored-Q4_K_M.gguf - sha256: 066b9341b67e0fd0956de3576a3b7988574a5b9a0028aef2b9c8edeadd6dbbd1 - uri: huggingface://bartowski/Qwen2.5-14B_Uncencored-GGUF/Qwen2.5-14B_Uncencored-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwen2.5-coder-7b-instruct" - urls: - - https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct - - https://huggingface.co/bartowski/Qwen2.5-Coder-7B-Instruct-GGUF - description: | - Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen). For Qwen2.5-Coder, we release three base language models and instruction-tuned language models, 1.5, 7 and 32 (coming soon) billion parameters. Qwen2.5-Coder brings the following improvements upon CodeQwen1.5: - - Significantly improvements in code generation, code reasoning and code fixing. Base on the strong Qwen2.5, we scale up the training tokens into 5.5 trillion including source code, text-code grounding, Synthetic data, etc. - A more comprehensive foundation for real-world applications such as Code Agents. Not only enhancing coding capabilities but also maintaining its strengths in mathematics and general competencies. - Long-context Support up to 128K tokens. - overrides: - parameters: - model: Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf - files: - - filename: Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf - sha256: 1664fccab734674a50763490a8c6931b70e3f2f8ec10031b54806d30e5f956b6 - uri: huggingface://bartowski/Qwen2.5-Coder-7B-Instruct-GGUF/Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwen2.5-math-72b-instruct" - icon: http://qianwen-res.oss-accelerate-overseas.aliyuncs.com/Qwen2.5/qwen2.5-math-pipeline.jpeg - urls: - - https://huggingface.co/Qwen/Qwen2.5-Math-72B-Instruct - - https://huggingface.co/bartowski/Qwen2.5-Math-72B-Instruct-GGUF - description: | - In August 2024, we released the first series of mathematical LLMs - Qwen2-Math - of our Qwen family. A month later, we have upgraded it and open-sourced Qwen2.5-Math series, including base models Qwen2.5-Math-1.5B/7B/72B, instruction-tuned models Qwen2.5-Math-1.5B/7B/72B-Instruct, and mathematical reward model Qwen2.5-Math-RM-72B. - - Unlike Qwen2-Math series which only supports using Chain-of-Thught (CoT) to solve English math problems, Qwen2.5-Math series is expanded to support using both CoT and Tool-integrated Reasoning (TIR) to solve math problems in both Chinese and English. The Qwen2.5-Math series models have achieved significant performance improvements compared to the Qwen2-Math series models on the Chinese and English mathematics benchmarks with CoT - overrides: - parameters: - model: Qwen2.5-Math-72B-Instruct-Q4_K_M.gguf - files: - - filename: Qwen2.5-Math-72B-Instruct-Q4_K_M.gguf - sha256: 5dee8a6e21d555577712b4f65565a3c3737a0d5d92f5a82970728c6d8e237f17 - uri: huggingface://bartowski/Qwen2.5-Math-72B-Instruct-GGUF/Qwen2.5-Math-72B-Instruct-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwen2.5-0.5b-instruct" - urls: - - https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct - - https://huggingface.co/bartowski/Qwen2.5-0.5B-Instruct-GGUF - overrides: - parameters: - model: Qwen2.5-0.5B-Instruct-Q4_K_M.gguf - files: - - filename: Qwen2.5-0.5B-Instruct-Q4_K_M.gguf - sha256: 6eb923e7d26e9cea28811e1a8e852009b21242fb157b26149d3b188f3a8c8653 - uri: huggingface://bartowski/Qwen2.5-0.5B-Instruct-GGUF/Qwen2.5-0.5B-Instruct-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwen2.5-1.5b-instruct" - urls: - - https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct - - https://huggingface.co/bartowski/Qwen2.5-1.5B-Instruct-GGUF - overrides: - parameters: - model: Qwen2.5-1.5B-Instruct-Q4_K_M.gguf - files: - - filename: Qwen2.5-1.5B-Instruct-Q4_K_M.gguf - sha256: 1adf0b11065d8ad2e8123ea110d1ec956dab4ab038eab665614adba04b6c3370 - uri: huggingface://bartowski/Qwen2.5-1.5B-Instruct-GGUF/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwen2.5-32b" - urls: - - https://huggingface.co/Qwen/Qwen2.5-32B - - https://huggingface.co/mradermacher/Qwen2.5-32B-GGUF - overrides: - parameters: - model: Qwen2.5-32B.Q4_K_M.gguf - files: - - filename: Qwen2.5-32B.Q4_K_M.gguf - uri: huggingface://mradermacher/Qwen2.5-32B-GGUF/Qwen2.5-32B.Q4_K_M.gguf - sha256: fa42a4067e3630929202b6bb1ef5cebc43c1898494aedfd567b7d53c7a9d84a6 -- !!merge <<: *qwen25 - name: "qwen2.5-32b-instruct" - urls: - - https://huggingface.co/Qwen/Qwen2.5-32B-Instruct - - https://huggingface.co/bartowski/Qwen2.5-32B-Instruct-GGUF - overrides: - parameters: - model: Qwen2.5-32B-Instruct-Q4_K_M.gguf - files: - - filename: Qwen2.5-32B-Instruct-Q4_K_M.gguf - sha256: 2e5f6daea180dbc59f65a40641e94d3973b5dbaa32b3c0acf54647fa874e519e - uri: huggingface://bartowski/Qwen2.5-32B-Instruct-GGUF/Qwen2.5-32B-Instruct-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwen2.5-72b-instruct" - urls: - - https://huggingface.co/Qwen/Qwen2.5-72B-Instruct - - https://huggingface.co/bartowski/Qwen2.5-72B-Instruct-GGUF - overrides: - parameters: - model: Qwen2.5-72B-Instruct-Q4_K_M.gguf - files: - - filename: Qwen2.5-72B-Instruct-Q4_K_M.gguf - sha256: e4c8fad16946be8cf0bbf67eb8f4e18fc7415a5a6d2854b4cda453edb4082545 - uri: huggingface://bartowski/Qwen2.5-72B-Instruct-GGUF/Qwen2.5-72B-Instruct-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "bigqwen2.5-52b-instruct" - icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/98GiKtmH1AtHHbIbOUH4Y.jpeg - urls: - - https://huggingface.co/mlabonne/BigQwen2.5-52B-Instruct - - https://huggingface.co/bartowski/BigQwen2.5-52B-Instruct-GGUF - description: | - BigQwen2.5-52B-Instruct is a Qwen/Qwen2-32B-Instruct self-merge made with MergeKit. - It applies the mlabonne/Meta-Llama-3-120B-Instruct recipe. - overrides: - parameters: - model: BigQwen2.5-52B-Instruct-Q4_K_M.gguf - files: - - filename: BigQwen2.5-52B-Instruct-Q4_K_M.gguf - sha256: 9c939f08e366b51b07096eb2ecb5cc2a82894ac7baf639e446237ad39889c896 - uri: huggingface://bartowski/BigQwen2.5-52B-Instruct-GGUF/BigQwen2.5-52B-Instruct-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "replete-llm-v2.5-qwen-14b" - icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/ihnWXDEgV-ZKN_B036U1J.png - urls: - - https://huggingface.co/Replete-AI/Replete-LLM-V2.5-Qwen-14b - - https://huggingface.co/bartowski/Replete-LLM-V2.5-Qwen-14b-GGUF - description: | - Replete-LLM-V2.5-Qwen-14b is a continues finetuned version of Qwen2.5-14B. I noticed recently that the Qwen team did not learn from my methods of continuous finetuning, the great benefits, and no downsides of it. So I took it upon myself to merge the instruct model with the base model myself using the Ties merge method - - This version of the model shows higher performance than the original instruct and base models. - overrides: - parameters: - model: Replete-LLM-V2.5-Qwen-14b-Q4_K_M.gguf - files: - - filename: Replete-LLM-V2.5-Qwen-14b-Q4_K_M.gguf - sha256: 17d0792ff5e3062aecb965629f66e679ceb407e4542e8045993dcfe9e7e14d9d - uri: huggingface://bartowski/Replete-LLM-V2.5-Qwen-14b-GGUF/Replete-LLM-V2.5-Qwen-14b-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "replete-llm-v2.5-qwen-7b" - icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/ihnWXDEgV-ZKN_B036U1J.png - urls: - - https://huggingface.co/Replete-AI/Replete-LLM-V2.5-Qwen-7b - - https://huggingface.co/bartowski/Replete-LLM-V2.5-Qwen-7b-GGUF - description: | - Replete-LLM-V2.5-Qwen-7b is a continues finetuned version of Qwen2.5-14B. I noticed recently that the Qwen team did not learn from my methods of continuous finetuning, the great benefits, and no downsides of it. So I took it upon myself to merge the instruct model with the base model myself using the Ties merge method - - This version of the model shows higher performance than the original instruct and base models. - overrides: - parameters: - model: Replete-LLM-V2.5-Qwen-7b-Q4_K_M.gguf - files: - - filename: Replete-LLM-V2.5-Qwen-7b-Q4_K_M.gguf - sha256: 054d54972259c0398b4e0af3f408f608e1166837b1d7535d08fc440d1daf8639 - uri: huggingface://bartowski/Replete-LLM-V2.5-Qwen-7b-GGUF/Replete-LLM-V2.5-Qwen-7b-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "calme-2.2-qwen2.5-72b-i1" - icon: https://huggingface.co/MaziyarPanahi/calme-2.2-qwen2.5-72b/resolve/main/calme-2.webp - urls: - - https://huggingface.co/MaziyarPanahi/calme-2.2-qwen2.5-72b - - https://huggingface.co/mradermacher/calme-2.2-qwen2.5-72b-i1-GGUF - description: | - This model is a fine-tuned version of the powerful Qwen/Qwen2.5-72B-Instruct, pushing the boundaries of natural language understanding and generation even further. My goal was to create a versatile and robust model that excels across a wide range of benchmarks and real-world applications. - Use Cases - - This model is suitable for a wide range of applications, including but not limited to: - - Advanced question-answering systems - Intelligent chatbots and virtual assistants - Content generation and summarization - Code generation and analysis - Complex problem-solving and decision support - overrides: - parameters: - model: calme-2.2-qwen2.5-72b.i1-Q4_K_M.gguf - files: - - filename: calme-2.2-qwen2.5-72b.i1-Q4_K_M.gguf - sha256: 5fdfa599724d7c78502c477ced1d294e92781b91d3265bd0748fbf15a6fefde6 - uri: huggingface://mradermacher/calme-2.2-qwen2.5-72b-i1-GGUF/calme-2.2-qwen2.5-72b.i1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "t.e-8.1-iq-imatrix-request" - # chatml - url: "github:mudler/LocalAI/gallery/chatml.yaml@master" - icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/K1aNPf32z-6tYZdcSQBzF.png - urls: - - https://huggingface.co/Cran-May/T.E-8.1 - - https://huggingface.co/Lewdiculous/T.E-8.1-GGUF-IQ-Imatrix-Request - description: | - Trained for roleplay uses. - overrides: - parameters: - model: T.E-8.1-Q4_K_M-imat.gguf - files: - - filename: T.E-8.1-Q4_K_M-imat.gguf - sha256: 1b7892b82c01ea4cbebe34cd00f9836cbbc369fc3247c1f44a92842201e7ec0b - uri: huggingface://Lewdiculous/T.E-8.1-GGUF-IQ-Imatrix-Request/T.E-8.1-Q4_K_M-imat.gguf -- !!merge <<: *qwen25 - name: "rombos-llm-v2.5.1-qwen-3b" - icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/pNDtgE5FDkxxvbG4qiZ1A.jpeg - urls: - - https://huggingface.co/QuantFactory/Rombos-LLM-V2.5.1-Qwen-3b-GGUF - description: | - Rombos-LLM-V2.5.1-Qwen-3b is a little experiment that merges a high-quality LLM, arcee-ai/raspberry-3B, using the last step of the Continuous Finetuning method outlined in a Google document. The merge is done using the mergekit with the following parameters: - - - Models: Qwen2.5-3B-Instruct, raspberry-3B - - Merge method: ties - - Base model: Qwen2.5-3B - - Parameters: weight=1, density=1, normalize=true, int8_mask=true - - Dtype: bfloat16 - - The model has been evaluated on various tasks and datasets, and the results are available on the Open LLM Leaderboard. The model has shown promising performance across different benchmarks. - overrides: - parameters: - model: Rombos-LLM-V2.5.1-Qwen-3b.Q4_K_M.gguf - files: - - filename: Rombos-LLM-V2.5.1-Qwen-3b.Q4_K_M.gguf - sha256: 656c342a2921cac8912e0123fc295c3bb3d631a85c671c12a3843a957e46d30d - uri: huggingface://QuantFactory/Rombos-LLM-V2.5.1-Qwen-3b-GGUF/Rombos-LLM-V2.5.1-Qwen-3b.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwen2.5-7b-ins-v3" - urls: - - https://huggingface.co/happzy2633/qwen2.5-7b-ins-v3 - - https://huggingface.co/bartowski/qwen2.5-7b-ins-v3-GGUF - description: | - Qwen 2.5 fine-tuned on CoT to match o1 performance. An attempt to build an Open o1 mathcing OpenAI o1 model - Demo: https://huggingface.co/spaces/happzy2633/open-o1 - overrides: - parameters: - model: qwen2.5-7b-ins-v3-Q4_K_M.gguf - files: - - filename: qwen2.5-7b-ins-v3-Q4_K_M.gguf - sha256: 9c23734072714a4886c0386ae0ff07a5e940d67ad52278e2ed689fec44e1e0c8 - uri: huggingface://bartowski/qwen2.5-7b-ins-v3-GGUF/qwen2.5-7b-ins-v3-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "supernova-medius" - icon: https://avatars.githubusercontent.com/u/126496414 - urls: - - https://huggingface.co/arcee-ai/SuperNova-Medius-GGUF - description: | - Arcee-SuperNova-Medius is a 14B parameter language model developed by Arcee.ai, built on the Qwen2.5-14B-Instruct architecture. This unique model is the result of a cross-architecture distillation pipeline, combining knowledge from both the Qwen2.5-72B-Instruct model and the Llama-3.1-405B-Instruct model. By leveraging the strengths of these two distinct architectures, SuperNova-Medius achieves high-quality instruction-following and complex reasoning capabilities in a mid-sized, resource-efficient form. - - SuperNova-Medius is designed to excel in a variety of business use cases, including customer support, content creation, and technical assistance, while maintaining compatibility with smaller hardware configurations. It’s an ideal solution for organizations looking for advanced capabilities without the high resource requirements of larger models like our SuperNova-70B. - overrides: - parameters: - model: SuperNova-Medius-Q4_K_M.gguf - files: - - filename: SuperNova-Medius-Q4_K_M.gguf - sha256: aaa4bf3451bc900f186fd4b6b3a6a26bfd40c85908f605db76b92e58aadcc864 - uri: huggingface://arcee-ai/SuperNova-Medius-GGUF/SuperNova-Medius-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "eva-qwen2.5-14b-v0.1-i1" - urls: - - https://huggingface.co/EVA-UNIT-01/EVA-Qwen2.5-14B-v0.1 - - https://huggingface.co/mradermacher/EVA-Qwen2.5-14B-v0.1-i1-GGUF - description: | - A RP/storywriting specialist model, full-parameter finetune of Qwen2.5-14B on mixture of synthetic and natural data. - It uses Celeste 70B 0.1 data mixture, greatly expanding it to improve versatility, creativity and "flavor" of the resulting model. - overrides: - parameters: - model: EVA-Qwen2.5-14B-v0.1.i1-Q4_K_M.gguf - files: - - filename: EVA-Qwen2.5-14B-v0.1.i1-Q4_K_M.gguf - sha256: 4e9665d4f83cd97efb42c8427f9c09be93b72e23a0364c91ad0b5de8056f2795 - uri: huggingface://mradermacher/EVA-Qwen2.5-14B-v0.1-i1-GGUF/EVA-Qwen2.5-14B-v0.1.i1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "cursorcore-qw2.5-7b-i1" - urls: - - https://huggingface.co/TechxGenus/CursorCore-QW2.5-7B - - https://huggingface.co/mradermacher/CursorCore-QW2.5-7B-i1-GGUF - description: | - CursorCore is a series of open-source models designed for AI-assisted programming. It aims to support features such as automated editing and inline chat, replicating the core abilities of closed-source AI-assisted programming tools like Cursor. This is achieved by aligning data generated through Programming-Instruct. Please read our paper to learn more. - overrides: - parameters: - model: CursorCore-QW2.5-7B.i1-Q4_K_M.gguf - files: - - filename: CursorCore-QW2.5-7B.i1-Q4_K_M.gguf - sha256: 81868f4edb4ec1a61debde1dbdebc02b407930ee19a6d946ff801afba840a102 - uri: huggingface://mradermacher/CursorCore-QW2.5-7B-i1-GGUF/CursorCore-QW2.5-7B.i1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "cursorcore-qw2.5-1.5b-lc-i1" - urls: - - https://huggingface.co/TechxGenus/CursorCore-QW2.5-1.5B-LC - - https://huggingface.co/mradermacher/CursorCore-QW2.5-1.5B-LC-i1-GGUF - description: | - CursorCore is a series of open-source models designed for AI-assisted programming. It aims to support features such as automated editing and inline chat, replicating the core abilities of closed-source AI-assisted programming tools like Cursor. This is achieved by aligning data generated through Programming-Instruct. Please read our paper to learn more. - overrides: - parameters: - model: CursorCore-QW2.5-1.5B-LC.i1-Q4_K_M.gguf - files: - - filename: CursorCore-QW2.5-1.5B-LC.i1-Q4_K_M.gguf - sha256: 185d720c810f7345ef861ad8eef1199bb15afa8e4f3c03bd5ffd476cfa465127 - uri: huggingface://mradermacher/CursorCore-QW2.5-1.5B-LC-i1-GGUF/CursorCore-QW2.5-1.5B-LC.i1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "edgerunner-command-nested-i1" - urls: - - https://huggingface.co/edgerunner-ai/EdgeRunner-Command-Nested - - https://huggingface.co/mradermacher/EdgeRunner-Command-Nested-i1-GGUF - description: | - EdgeRunner-Command-Nested is an advanced large language model designed specifically for handling complex nested function calls. Initialized from Qwen2.5-7B-Instruct, further enhanced by the integration of the Hermes function call template and additional training on a specialized dataset (based on TinyAgent). This extra dataset focuses on personal domain applications, providing the model with a robust understanding of nested function scenarios that are typical in complex user interactions. - overrides: - parameters: - model: EdgeRunner-Command-Nested.i1-Q4_K_M.gguf - files: - - filename: EdgeRunner-Command-Nested.i1-Q4_K_M.gguf - sha256: a1cc4d2b601dc20e58cbb549bd3e9bc460995840c0aaf1cd3c1cb5414c900ac7 - uri: huggingface://mradermacher/EdgeRunner-Command-Nested-i1-GGUF/EdgeRunner-Command-Nested.i1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "tsunami-0.5x-7b-instruct-i1" - icon: https://huggingface.co/Tsunami-th/Tsunami-0.5x-7B-Instruct/resolve/main/Tsunami.webp - urls: - - https://huggingface.co/Tsunami-th/Tsunami-0.5x-7B-Instruct - - https://huggingface.co/mradermacher/Tsunami-0.5x-7B-Instruct-i1-GGUF - description: | - TSUNAMI: Transformative Semantic Understanding and Natural Augmentation Model for Intelligence. - - TSUNAMI full name was created by ChatGPT. - infomation - - Tsunami-0.5x-7B-Instruct is Thai Large Language Model that fine-tuned from Qwen2.5-7B around 100,000 rows in Thai dataset. - overrides: - parameters: - model: Tsunami-0.5x-7B-Instruct.i1-Q4_K_M.gguf - files: - - filename: Tsunami-0.5x-7B-Instruct.i1-Q4_K_M.gguf - sha256: 22e2003ecec7f1e91f2e9aaec334613c0f37fb3000d0e628b5a9980e53322fa7 - uri: huggingface://mradermacher/Tsunami-0.5x-7B-Instruct-i1-GGUF/Tsunami-0.5x-7B-Instruct.i1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qevacot-7b-v2" - urls: - - https://huggingface.co/bunnycore/Qevacot-7B-v2 - - https://huggingface.co/mradermacher/Qevacot-7B-v2-GGUF - description: | - This model was merged using the TIES merge method using Qwen/Qwen2.5-7B as a base. - The following models were included in the merge: - c10x/CoT-2.5 - EVA-UNIT-01/EVA-Qwen2.5-7B-v0.1 - huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2 - Cran-May/T.E-8.1 - overrides: - parameters: - model: Qevacot-7B-v2.Q4_K_M.gguf - files: - - filename: Qevacot-7B-v2.Q4_K_M.gguf - sha256: a45b3d3b74bc68a5c7ac07d251cdeff671e64085d1816cd86fca6cfb7eab204e - uri: huggingface://mradermacher/Qevacot-7B-v2-GGUF/Qevacot-7B-v2.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "meissa-qwen2.5-7b-instruct" - icon: https://huggingface.co/Orion-zhen/Meissa-Qwen2.5-7B-Instruct/resolve/main/meissa.jpg - urls: - - https://huggingface.co/Orion-zhen/Meissa-Qwen2.5-7B-Instruct - - https://huggingface.co/QuantFactory/Meissa-Qwen2.5-7B-Instruct-GGUF - description: | - Meissa is designated Lambda Orionis, forms Orion's head, and is a multiple star with a combined apparent magnitude of 3.33. Its name means the "shining one". - This model is fine tuned over writing and role playing datasets (maybe the first on qwen2.5-7b), aiming to enhance model's performance in novel writing and roleplaying. - The model is fine-tuned over Orion-zhen/Qwen2.5-7B-Instruct-Uncensored - overrides: - parameters: - model: Meissa-Qwen2.5-7B-Instruct.Q4_K_M.gguf - files: - - filename: Meissa-Qwen2.5-7B-Instruct.Q4_K_M.gguf - sha256: 632b10d5c0e98bc8d53295886da2d57772a54bb6f6fa01d458e9e8c7fa9c905a - uri: huggingface://QuantFactory/Meissa-Qwen2.5-7B-Instruct-GGUF/Meissa-Qwen2.5-7B-Instruct.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "thebeagle-v2beta-32b-mgs" - urls: - - https://huggingface.co/fblgit/TheBeagle-v2beta-32B-MGS - - https://huggingface.co/bartowski/TheBeagle-v2beta-32B-MGS-GGUF - description: | - This model is an experimental version of our latest innovation: MGS. Its up to you to figure out what does it means, but its very explicit. We didn't applied our known UNA algorithm to the forward pass, but they are entirely compatible and operates in different parts of the neural network and in different ways, tho they both can be seen as a regularization technique. - - Updated tokenizer_config.json (from the base_model) - Regenerated Quants (being uploaded) - Re-submitted Leaderboard Evaluation, MATH & IFeval have relevant updates - Aligned LICENSE with Qwen terms. - - MGS stands for... Many-Geeks-Searching... and thats it. Hint: 1+1 is 2, and 1+1 is not 3 - We still believe on 1-Epoch should be enough, so we just did 1 Epoch only. - Dataset - Used here the first decent (corpora & size) dataset on the hub: Magpie-Align/Magpie-Pro-300K-Filtered Kudos to the Magpie team to contribute with some decent stuff that I personally think is very good to ablate. - It achieves the following results on the evaluation set: - Loss: 0.5378 (1 Epoch), outperforming the baseline model. - overrides: - parameters: - model: TheBeagle-v2beta-32B-MGS-Q4_K_M.gguf - files: - - filename: TheBeagle-v2beta-32B-MGS-Q4_K_M.gguf - sha256: db0d3b3c5341d2d51115794bf5da6552b5c0714b041de9b82065cc0c982dd4f7 - uri: huggingface://bartowski/TheBeagle-v2beta-32B-MGS-GGUF/TheBeagle-v2beta-32B-MGS-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "meraj-mini" - icon: https://avatars.githubusercontent.com/u/126496414 - urls: - - https://huggingface.co/arcee-ai/Meraj-Mini - - https://huggingface.co/QuantFactory/Meraj-Mini-GGUF - description: | - Arcee Meraj Mini is a quantized version of the Meraj-Mini model, created using llama.cpp. It is an open-source model that is fine-tuned from the Qwen2.5-7B-Instruct model and is designed for both Arabic and English languages. The model has undergone evaluations across multiple benchmarks in both languages and demonstrates top-tier performance in Arabic and competitive results in English. The key stages in its development include data preparation, initial training, iterative training and post-training, evaluation, and final model creation. The model is capable of solving a wide range of language tasks and is suitable for various applications such as education, mathematics and coding, customer service, and content creation. The Arcee Meraj Mini model consistently outperforms state-of-the-art models on most benchmarks of the Open Arabic LLM Leaderboard (OALL), highlighting its improvements and effectiveness in Arabic language content. - overrides: - parameters: - model: Meraj-Mini.Q4_K_M.gguf - files: - - filename: Meraj-Mini.Q4_K_M.gguf - sha256: f8f3923eb924b8f8e8f530a5bf07fcbd5b3dd10dd478d229d6f4377e31eb3938 - uri: huggingface://QuantFactory/Meraj-Mini-GGUF/Meraj-Mini.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "spiral-da-hyah-qwen2.5-72b-i1" - icon: https://cdn-uploads.huggingface.co/production/uploads/633e85093a17ab61de8d9073/toQiofo5ujXDGI4Gh3ciH.png - urls: - - https://huggingface.co/KaraKaraWitch/spiral-da-HYAH-Qwen2.5-72b - - https://huggingface.co/mradermacher/spiral-da-HYAH-Qwen2.5-72b-i1-GGUF - description: | - Model stock merge for fun. - This model was merged using the Model Stock merge method using rombodawg/Rombos-LLM-V2.5-Qwen-72b as a base. - The following models were included in the merge: - - anthracite-org/magnum-v4-72b - - AXCXEPT/EZO-Qwen2.5-72B-Instruct - overrides: - parameters: - model: spiral-da-HYAH-Qwen2.5-72b.i1-Q4_K_M.gguf - files: - - filename: spiral-da-HYAH-Qwen2.5-72b.i1-Q4_K_M.gguf - sha256: 6119e89cadae0bc01a0909f5d9776610dfc4cdcd1600f334c3afb0d0ece011a8 - uri: huggingface://mradermacher/spiral-da-HYAH-Qwen2.5-72b-i1-GGUF/spiral-da-HYAH-Qwen2.5-72b.i1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "whiterabbitneo-2.5-qwen-2.5-coder-7b" - icon: https://huggingface.co/WhiteRabbitNeo/WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B/resolve/main/whiterabbitneo-logo-defcon.png - urls: - - https://huggingface.co/WhiteRabbitNeo/WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B - - https://huggingface.co/bartowski/WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B-GGUF - description: | - WhiteRabbitNeo is a model series that can be used for offensive and defensive cybersecurity. - - Models are now getting released as a public preview of its capabilities, and also to assess the societal impact of such an AI. - overrides: - parameters: - model: WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B-Q4_K_M.gguf - files: - - filename: WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B-Q4_K_M.gguf - sha256: 3790b0bf2c505fcbd144b6b69354fe45a83ac09238a87469db0082027c127de4 - uri: huggingface://bartowski/WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B-GGUF/WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "cybertron-v4-qw7b-mgs" - icon: https://huggingface.co/fblgit/cybertron-v4-qw7B-MGS/resolve/main/cybertron_v4MGS.png - urls: - - https://huggingface.co/fblgit/cybertron-v4-qw7B-MGS - - https://huggingface.co/QuantFactory/cybertron-v4-qw7B-MGS-GGUF - description: | - Here we use our novel approach called MGS. Its up to you to figure out what it means. - - Cybertron V4 went thru SFT over Magpie-Align/Magpie-Qwen2.5-Pro-1M-v0.1 - overrides: - parameters: - model: cybertron-v4-qw7B-MGS.Q4_K_M.gguf - files: - - filename: cybertron-v4-qw7B-MGS.Q4_K_M.gguf - sha256: 32ed4174bad90bb7a2cdcd48b76b3b5924677a4160b762d5e5d95c93fe5205db - uri: huggingface://QuantFactory/cybertron-v4-qw7B-MGS-GGUF/cybertron-v4-qw7B-MGS.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "q25-1.5b-veolu" - icon: https://huggingface.co/Alfitaria/Q25-1.5B-VeoLu/resolve/main/veolu.png - urls: - - https://huggingface.co/Alfitaria/Q25-1.5B-VeoLu - - https://huggingface.co/bartowski/Q25-1.5B-VeoLu-GGUF - description: | - Q25-1.5B-Veo Lu is a tiny General-Purpose Creative model, made up of a merge of bespoke finetunes on Qwen 2.5-1.5B-Instruct. - - Inspired by the success of MN-12B-Mag Mell and MS-Meadowlark-22B, Veo Lu was trained on a healthy, balanced diet of of Internet fiction, roleplaying, adventuring, and reasoning/general knowledge. - - The components of Veo Lu are: - - Bard (pretrain, writing): Fujin (Cleaned/extended Rosier) - Scribe (pretrain, roleplay): Creative Writing Multiturn - Cartographer (pretrain, adventuring): SpringDragon - Alchemist (SFT, science/reasoning): ScienceQA, MedquadQA, Orca Math Word Problems - - This model is capable of carrying on a scene without going completely off the rails. That being said, it only has 1.5B parameters. So please, for the love of God, manage your expectations. Since it's Qwen, use ChatML formatting. Turn the temperature down to ~0.7-0.8 and try a dash of rep-pen. - overrides: - parameters: - model: Q25-1.5B-VeoLu-Q4_K_M.gguf - files: - - filename: Q25-1.5B-VeoLu-Q4_K_M.gguf - sha256: bbfb3691b6cabceb49ea1feacfa2eb2651312b8cc6caaf893b46375097e2f026 - uri: huggingface://bartowski/Q25-1.5B-VeoLu-GGUF/Q25-1.5B-VeoLu-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "llenn-v0.75-qwen2.5-72b-i1" - icon: https://cdn-uploads.huggingface.co/production/uploads/633e85093a17ab61de8d9073/mYiG-Ndxzqu8ofaBGbOIZ.png - urls: - - https://huggingface.co/KaraKaraWitch/LLENN-v0.75-Qwen2.5-72b - - https://huggingface.co/mradermacher/LLENN-v0.75-Qwen2.5-72b-i1-GGUF - description: | - The following models were included in the merge: - rombodawg/Rombos-LLM-V2.5-Qwen-72b - abacusai/Dracarys2-72B-Instruct - EVA-UNIT-01/EVA-Qwen2.5-72B-v0.0 - ZeusLabs/Chronos-Platinum-72B - m8than/banana-2-b-72b - overrides: - parameters: - model: LLENN-v0.75-Qwen2.5-72b.i1-Q4_K_M.gguf - files: - - filename: LLENN-v0.75-Qwen2.5-72b.i1-Q4_K_M.gguf - sha256: 38990136bb48fc9422b0e477bed6d9c40c00c270806d3bd3f58e426badfa0d4d - uri: huggingface://mradermacher/LLENN-v0.75-Qwen2.5-72b-i1-GGUF/LLENN-v0.75-Qwen2.5-72b.i1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "eva-qwen2.5-14b-v0.2" - urls: - - https://huggingface.co/EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2 - - https://huggingface.co/bartowski/EVA-Qwen2.5-14B-v0.2-GGUF - description: | - A RP/storywriting specialist model, full-parameter finetune of Qwen2.5-14B on mixture of synthetic and natural data. - It uses Celeste 70B 0.1 data mixture, greatly expanding it to improve versatility, creativity and "flavor" of the resulting model. - - Version notes for 0.2: Now using the refined dataset from 32B 0.2. Major improvements in coherence, instruction following and long-context comprehension over 14B v0.1. - - Prompt format is ChatML. - overrides: - parameters: - model: EVA-Qwen2.5-14B-v0.2-Q4_K_M.gguf - files: - - filename: EVA-Qwen2.5-14B-v0.2-Q4_K_M.gguf - sha256: 5d79bc8bf486c48c6430621a5bc5d3032227532dae436a27aa23aaf3e618e009 - uri: huggingface://bartowski/EVA-Qwen2.5-14B-v0.2-GGUF/EVA-Qwen2.5-14B-v0.2-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "tissint-14b-128k-rp" - urls: - - https://huggingface.co/Ttimofeyka/Tissint-14B-128k-RP - - https://huggingface.co/mradermacher/Tissint-14B-128k-RP-GGUF - description: | - The model is based on SuperNova-Medius (as the current best 14B model) with a 128k context with an emphasis on creativity, including NSFW and multi-turn conversations. - overrides: - parameters: - model: Tissint-14B-128k-RP.Q4_K_M.gguf - files: - - filename: Tissint-14B-128k-RP.Q4_K_M.gguf - sha256: 374c02f69fae47e7d78ffed9fad4e405250d31031a6bc1539b136c4b1cfc85c2 - uri: huggingface://mradermacher/Tissint-14B-128k-RP-GGUF/Tissint-14B-128k-RP.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "tq2.5-14b-sugarquill-v1" - icon: https://huggingface.co/allura-org/TQ2.5-14B-Sugarquill-v1/resolve/main/card_img.png - urls: - - https://huggingface.co/allura-org/TQ2.5-14B-Sugarquill-v1 - - https://huggingface.co/bartowski/TQ2.5-14B-Sugarquill-v1-GGUF - description: | - A continued pretrain of SuperNova-Medius on assorted short story data from the web. Supernova already had a nice prose, but diversifying it a bit definitely doesn't hurt. Also, finally a storywriter model with enough context for something more than a short story, that's also nice. - - It's a fair bit more temperamental than Gemma, but can be tamed with some sampling. Instruction following also stayed rather strong, so it works for both RP and storywriting, both in chat mode via back-and-forth co-writing and on raw completion. - - Overall, I'd say it successfully transfers the essence of what I liked about Gemma Sugarquill. I will also make a Qwen version of Aletheia, but with a brand new LoRA, based on a brand new RP dataset that's in the making right now. - - Model was trained by Auri. - overrides: - parameters: - model: TQ2.5-14B-Sugarquill-v1-Q4_K_M.gguf - files: - - filename: TQ2.5-14B-Sugarquill-v1-Q4_K_M.gguf - sha256: a654fe3f41e963d8ea6753fb9a06b9dd76893714ebf02605ef67827944a4025e - uri: huggingface://bartowski/TQ2.5-14B-Sugarquill-v1-GGUF/TQ2.5-14B-Sugarquill-v1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "calme-3.3-baguette-3b" - icon: https://huggingface.co/MaziyarPanahi/calme-3.1-baguette-3b/resolve/main/calme_3.png - urls: - - https://huggingface.co/MaziyarPanahi/calme-3.3-baguette-3b - - https://huggingface.co/MaziyarPanahi/calme-3.3-baguette-3b-GGUF - description: | - This model is an advanced iteration of the powerful Qwen/Qwen2.5-3B, fine-tuned specifically to enhance its capabilities across general domains in both French and English. - overrides: - parameters: - model: calme-3.3-baguette-3b.Q5_K_M.gguf - files: - - filename: calme-3.3-baguette-3b.Q5_K_M.gguf - sha256: 9e75b76e8cda215ef5c9ad79edfc6e5deee2f9e01ecf605ee6a557b1b5c9ef85 - uri: huggingface://MaziyarPanahi/calme-3.3-baguette-3b-GGUF/calme-3.3-baguette-3b.Q5_K_M.gguf -- !!merge <<: *qwen25 - name: "calme-3.2-baguette-3b" - icon: https://huggingface.co/MaziyarPanahi/calme-3.1-baguette-3b/resolve/main/calme_3.png - urls: - - https://huggingface.co/MaziyarPanahi/calme-3.2-baguette-3b - - https://huggingface.co/MaziyarPanahi/calme-3.2-baguette-3b-GGUF - description: | - This model is an advanced iteration of the powerful Qwen/Qwen2.5-3B, fine-tuned specifically to enhance its capabilities across general domains in both French and English. - overrides: - parameters: - model: calme-3.2-baguette-3b.Q4_K_M.gguf - files: - - filename: calme-3.2-baguette-3b.Q4_K_M.gguf - uri: huggingface://MaziyarPanahi/calme-3.2-baguette-3b-GGUF/calme-3.2-baguette-3b.Q4_K_M.gguf - sha256: 4e62fe0108643bbfd842add5a1bf199e9b81b0181309b15f483e1f07c2b5fbb2 -- !!merge <<: *qwen25 - icon: https://huggingface.co/MaziyarPanahi/calme-3.1-baguette-3b/resolve/main/calme_3.png - name: "calme-3.1-baguette-3b" - urls: - - https://huggingface.co/MaziyarPanahi/calme-3.1-baguette-3b - - https://huggingface.co/MaziyarPanahi/calme-3.1-baguette-3b-GGUF - description: | - This model is an advanced iteration of the powerful Qwen/Qwen2.5-3B, fine-tuned specifically to enhance its capabilities across general domains in both French and English. - overrides: - parameters: - model: calme-3.1-baguette-3b.Q4_K_M.gguf - files: - - filename: calme-3.1-baguette-3b.Q4_K_M.gguf - uri: huggingface://MaziyarPanahi/calme-3.1-baguette-3b-GGUF/calme-3.1-baguette-3b.Q4_K_M.gguf - sha256: 351058680d633749fa64efde205bd5f3d942aacada3204c594d9acfab2fc8774 -- !!merge <<: *qwen25 - name: "calme-3.3-qwenloi-3b" - icon: https://huggingface.co/MaziyarPanahi/calme-3.3-qwenloi-3b/resolve/main/calme_3.png - urls: - - https://huggingface.co/MaziyarPanahi/calme-3.3-qwenloi-3b - - https://huggingface.co/MaziyarPanahi/calme-3.3-qwenloi-3b-GGUF - description: | - This model is an advanced iteration of the powerful Qwen/Qwen2.5-3B, specifically fine-tuned to enhance its capabilities in French Legal domain. - overrides: - parameters: - model: calme-3.3-qwenloi-3b.Q5_K_M.gguf - files: - - filename: calme-3.3-qwenloi-3b.Q5_K_M.gguf - sha256: 9592e186a00c70552365d85ccabddae87acc8d812634a6145da8d460b57b70f9 - uri: huggingface://MaziyarPanahi/calme-3.3-qwenloi-3b-GGUF/calme-3.3-qwenloi-3b.Q5_K_M.gguf -- !!merge <<: *qwen25 - name: "calme-3.2-qwenloi-3b" - icon: https://huggingface.co/MaziyarPanahi/calme-3.3-qwenloi-3b/resolve/main/calme_3.png - urls: - - https://huggingface.co/MaziyarPanahi/calme-3.2-qwenloi-3b - - https://huggingface.co/MaziyarPanahi/calme-3.2-qwenloi-3b-GGUF - description: | - This model is an advanced iteration of the powerful Qwen/Qwen2.5-3B, specifically fine-tuned to enhance its capabilities in French Legal domain. - overrides: - parameters: - model: calme-3.2-qwenloi-3b.Q5_K_M.gguf - files: - - filename: calme-3.2-qwenloi-3b.Q5_K_M.gguf - sha256: 61be0c2f221262523dcd00a9147fe590aba797c89a1c5849bd4f66e7df2ad272 - uri: huggingface://MaziyarPanahi/calme-3.2-qwenloi-3b-GGUF/calme-3.2-qwenloi-3b.Q5_K_M.gguf -- !!merge <<: *qwen25 - name: "calme-3.1-qwenloi-3b" - icon: https://huggingface.co/MaziyarPanahi/calme-3.3-qwenloi-3b/resolve/main/calme_3.png - urls: - - https://huggingface.co/MaziyarPanahi/calme-3.1-qwenloi-3b - - https://huggingface.co/MaziyarPanahi/calme-3.1-qwenloi-3b-GGUF - description: | - This model is an advanced iteration of the powerful Qwen/Qwen2.5-3B, specifically fine-tuned to enhance its capabilities in French Legal domain. - overrides: - parameters: - model: calme-3.1-qwenloi-3b.Q5_K_M.gguf - files: - - filename: calme-3.1-qwenloi-3b.Q5_K_M.gguf - sha256: 8962a8d1704979039063b5c69fafdb38b545c26143419ec4c574f37f2d6dd7b2 - uri: huggingface://MaziyarPanahi/calme-3.1-qwenloi-3b-GGUF/calme-3.1-qwenloi-3b.Q5_K_M.gguf -- !!merge <<: *qwen25 - name: "eva-qwen2.5-72b-v0.1-i1" - urls: - - https://huggingface.co/EVA-UNIT-01/EVA-Qwen2.5-72B-v0.1 - - https://huggingface.co/mradermacher/EVA-Qwen2.5-72B-v0.1-i1-GGUF - description: | - A RP/storywriting specialist model, full-parameter finetune of Qwen2.5-72B on mixture of synthetic and natural data. - It uses Celeste 70B 0.1 data mixture, greatly expanding it to improve versatility, creativity and "flavor" of the resulting model. - - Dedicated to Nev. - - Version notes for 0.1: Reprocessed dataset (via Cahvay for 32B 0.2, used here as well), readjusted training config for 8xH100 SXM. Significant improvements in instruction following, long context understanding and overall coherence over v0.0. - overrides: - parameters: - model: EVA-Qwen2.5-72B-v0.1.i1-Q4_K_M.gguf - files: - - filename: EVA-Qwen2.5-72B-v0.1.i1-Q4_K_M.gguf - sha256: b05dbc02eeb286c41122b103ac31431fc8dcbd80b8979422541a05cda53df61b - uri: huggingface://mradermacher/EVA-Qwen2.5-72B-v0.1-i1-GGUF/EVA-Qwen2.5-72B-v0.1.i1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "celestial-harmony-14b-v1.0-experimental-1016-i1" - urls: - - https://huggingface.co/ProdeusUnity/Celestial-Harmony-14b-v1.0-Experimental-1016 - - https://huggingface.co/mradermacher/Celestial-Harmony-14b-v1.0-Experimental-1016-i1-GGUF - description: | - Yet Another merge, this one for AuriAetherwiing, at their request. - This is a merge of pre-trained language models created using mergekit. - The following models were included in the merge: - EVA-UNIT-01/EVA-Qwen2.5-14B-v0.1 - v000000/Qwen2.5-Lumen-14B - arcee-ai/SuperNova-Medius - overrides: - parameters: - model: Celestial-Harmony-14b-v1.0-Experimental-1016.i1-Q4_K_M.gguf - files: - - filename: Celestial-Harmony-14b-v1.0-Experimental-1016.i1-Q4_K_M.gguf - sha256: 536a6d98e30e9d52f91672daf49eeb7efe076e161a5da8beaca204adedd76864 - uri: huggingface://mradermacher/Celestial-Harmony-14b-v1.0-Experimental-1016-i1-GGUF/Celestial-Harmony-14b-v1.0-Experimental-1016.i1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwen2.5-32b-arliai-rpmax-v1.3" - urls: - - https://huggingface.co/ArliAI/Qwen2.5-32B-ArliAI-RPMax-v1.3 - - https://huggingface.co/bartowski/Qwen2.5-32B-ArliAI-RPMax-v1.3-GGUF - description: | - RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations. - Many RPMax users mentioned that these models does not feel like any other RP models, having a different writing style and generally doesn't feel in-bred. - overrides: - parameters: - model: Qwen2.5-32B-ArliAI-RPMax-v1.3-Q4_K_M.gguf - files: - - filename: Qwen2.5-32B-ArliAI-RPMax-v1.3-Q4_K_M.gguf - sha256: 51b369068b124165b1b8c253371b88b573af9dd350e331ce93d7e47b6b710003 - uri: huggingface://bartowski/Qwen2.5-32B-ArliAI-RPMax-v1.3-GGUF/Qwen2.5-32B-ArliAI-RPMax-v1.3-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "q2.5-ms-mistoria-72b-i1" - icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/5LOvUFYiMMw6pcEsOhmo2.webp - urls: - - https://huggingface.co/Steelskull/Q2.5-MS-Mistoria-72b - - https://huggingface.co/mradermacher/Q2.5-MS-Mistoria-72b-i1-GGUF - description: | - This model is my fist attempt at a 72b model as usual my goal is to merge the robust storytelling of mutiple models while attempting to maintain intelligence. - Merge of: - - model: EVA-UNIT-01/EVA-Qwen2.5-72B-v0.1 - - model: ZeusLabs/Chronos-Platinum-72B - - model: shuttleai/shuttle-3 - overrides: - parameters: - model: Q2.5-MS-Mistoria-72b.i1-Q4_K_M.gguf - files: - - filename: Q2.5-MS-Mistoria-72b.i1-Q4_K_M.gguf - sha256: f51ac3db855259c0132070e7bb9f58b67538103ffb3c716880ceef3bb09d43d9 - uri: huggingface://mradermacher/Q2.5-MS-Mistoria-72b-i1-GGUF/Q2.5-MS-Mistoria-72b.i1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "athene-v2-agent" - icon: https://huggingface.co/Nexusflow/Athene-V2-Agent/resolve/main/agent.png - urls: - - https://huggingface.co/Nexusflow/Athene-V2-Agent - - https://huggingface.co/bartowski/Athene-V2-Agent-GGUF - description: "Athene-V2-Agent is an open-source Agent LLM that surpasses the state-of-the-art in function calling and agentic capabilities.\n\n\U0001F4AA Versatile Agent Capability: Athene-V2-Agent is an agent model, capable of operating in environments with deeply nested dependencies with the environment. It is capable of reasoning and doing planning for trajectories with many tool calls necessary to answer a single query.\n\n\U0001F4CA Performance Highlights: Athene-V2-Agent surpasses GPT-4o in single FC tasks by 18% in function calling success rates, and by 17% in Agentic success rates.\n\n\U0001F527 Generalization to the Unseen: Athene-V2-Agent has never been trained on the functions or agentic settings used in evaluation.\n" - overrides: - parameters: - model: Athene-V2-Agent-Q4_K_M.gguf - files: - - filename: Athene-V2-Agent-Q4_K_M.gguf - sha256: 2829d205519da34852c374286d42a4403f3be012ea56424e88ebcb8dc89676ad - uri: huggingface://bartowski/Athene-V2-Agent-GGUF/Athene-V2-Agent-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "athene-v2-chat" - urls: - - https://huggingface.co/Nexusflow/Athene-V2-Chat - - https://huggingface.co/bartowski/Athene-V2-Chat-GGUF - description: | - We introduce Athene-V2-Chat-72B, an open-weights LLM on-par with GPT-4o across benchmarks. It is trained through RLHF with Qwen-2.5-72B-Instruct as base model. Athene-V2-Chat-72B excels in chat, math, and coding. Its sister model, Athene-V2-Agent-72B, surpasses GPT-4o in complex function calling and agentic applications. - overrides: - parameters: - model: Athene-V2-Chat-Q4_K_M.gguf - files: - - filename: Athene-V2-Chat-Q4_K_M.gguf - sha256: bda8b784ad55982891e5aa69b08ce4030c91a2e28ad9c4c35284d45d3c7aeb16 - uri: huggingface://bartowski/Athene-V2-Chat-GGUF/Athene-V2-Chat-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwen2.5-7b-nerd-uncensored-v1.7" - urls: - - https://huggingface.co/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7 - - https://huggingface.co/mradermacher/Qwen2.5-7B-nerd-uncensored-v1.7-GGUF - description: | - Model created by analyzing and selecting the optimal layers from other Qwen2.5-7B models based on their dimensional utilization efficiency, measured by the Normalized Effective Rank (NER). Computed like: - Input: Weight matrix for each model layer - Compute singular values σᵢ where σᵢ ≥ 0 # σᵢ represents the importance of each dimension - Filter values above numerical threshold (>1e-12) - Sum all singular values: S = Σσᵢ # S acts as normalization factor - Create probability distribution: pᵢ = σᵢ/S # converts singular values to probabilities summing to 1 - Compute Shannon entropy: H = -Σ(pᵢ * log₂(pᵢ)) # measures information content - Calculate maximum possible entropy: H_max = log₂(n) - Final NER score = H/H_max # normalizes score to [0,1] range - Results in value between 0 and 1 for each model layer - overrides: - parameters: - model: Qwen2.5-7B-nerd-uncensored-v1.7.Q4_K_M.gguf - files: - - filename: Qwen2.5-7B-nerd-uncensored-v1.7.Q4_K_M.gguf - sha256: 42cf7a96784dc8f25c61c2404620c3e6548a024caa8dff6e435d7c86400d7ab8 - uri: huggingface://mradermacher/Qwen2.5-7B-nerd-uncensored-v1.7-GGUF/Qwen2.5-7B-nerd-uncensored-v1.7.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "evathene-v1.0" - urls: - - https://huggingface.co/sophosympatheia/Evathene-v1.0 - - https://huggingface.co/bartowski/Evathene-v1.0-GGUF - description: | - This 72B parameter model is a merge of Nexusflow/Athene-V2-Chat with EVA-UNIT-01/EVA-Qwen2.5-72B-v0.1. See the merge recipe below for details. - - This model is uncensored. You are responsible for whatever you do with it. - - This model was designed for roleplaying and storytelling and I think it does well at both. It may also perform well at other tasks but I have not tested its performance in other areas. - overrides: - parameters: - model: Evathene-v1.0-Q4_K_M.gguf - files: - - filename: Evathene-v1.0-Q4_K_M.gguf - sha256: 96401ba9d798faa8a01f579b54523c5f75277e91bf1f0eee93db285f76f61e7e - uri: huggingface://bartowski/Evathene-v1.0-GGUF/Evathene-v1.0-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "miniclaus-qw1.5b-unamgs" - icon: https://huggingface.co/fblgit/miniclaus-qw1.5B-UNAMGS/resolve/main/miniclaus_qw15-UNAMGS.png - urls: - - https://huggingface.co/fblgit/miniclaus-qw1.5B-UNAMGS - - https://huggingface.co/bartowski/miniclaus-qw1.5B-UNAMGS-GGUF - description: | - Trained with Magpie-Align/Magpie-Pro-MT-300K-v0.1 - Using MGS & UNA (MLP) on this tiny but powerful model. - overrides: - parameters: - model: miniclaus-qw1.5B-UNAMGS-Q4_K_M.gguf - files: - - filename: miniclaus-qw1.5B-UNAMGS-Q4_K_M.gguf - sha256: a0dadd7147cc4a8e8df59659556e4d824ef5c26fd2f39381fe467b2ff9cc1289 - uri: huggingface://bartowski/miniclaus-qw1.5B-UNAMGS-GGUF/miniclaus-qw1.5B-UNAMGS-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwen2.5-3b-smart-i1" - urls: - - https://huggingface.co/bunnycore/Qwen2.5-3B-Smart - - https://huggingface.co/mradermacher/Qwen2.5-3B-Smart-i1-GGUF - description: | - This model was merged using the passthrough merge method using bunnycore/Qwen2.5-3B-RP-Mix + bunnycore/Qwen2.5-3b-Smart-lora_model as a base. - overrides: - parameters: - model: Qwen2.5-3B-Smart.i1-Q4_K_M.gguf - files: - - filename: Qwen2.5-3B-Smart.i1-Q4_K_M.gguf - sha256: 4cfffa4478191b3ac5f54b0e2c5c3f60883322cf705d74f9651715b70f3779f4 - uri: huggingface://mradermacher/Qwen2.5-3B-Smart-i1-GGUF/Qwen2.5-3B-Smart.i1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "steyrcannon-0.2-qwen2.5-72b" - urls: - - https://huggingface.co/KaraKaraWitch/SteyrCannon-0.2-Qwen2.5-72b - - https://huggingface.co/mradermacher/SteyrCannon-0.2-Qwen2.5-72b-GGUF - description: | - SteyrCannon-0.2 is an updated revision from the original SteyrCannon. This uses EVA-Qwen2.5-72B-v0.2. Nothing else has changed.This model was merged using the TIES merge method using EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2 as a base. - The following models were included in the merge: - anthracite-org/magnum-v4-72b - EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2 - overrides: - parameters: - model: SteyrCannon-0.2-Qwen2.5-72b.Q4_K_M.gguf - files: - - filename: SteyrCannon-0.2-Qwen2.5-72b.Q4_K_M.gguf - sha256: b34c08b77ffd25ccb0ca50b167f2215e784689205c93a0903fa9435b6cc187f0 - uri: huggingface://mradermacher/SteyrCannon-0.2-Qwen2.5-72b-GGUF/SteyrCannon-0.2-Qwen2.5-72b.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "q2.5-ms-mistoria-72b-v2" - icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/5LOvUFYiMMw6pcEsOhmo2.webp - urls: - - https://huggingface.co/Steelskull/Q2.5-MS-Mistoria-72b-v2 - - https://huggingface.co/bartowski/Q2.5-MS-Mistoria-72b-v2-GGUF - description: | - This model is my second attempt at a 72b model, as usual, my goal is to merge the robust storytelling of mutiple models while attempting to maintain intelligence. - models: - - model: EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2 - - model: ZeusLabs/Chronos-Platinum-72B - - model: shuttleai/shuttle-3 - overrides: - parameters: - model: Q2.5-MS-Mistoria-72b-v2-Q4_K_M.gguf - files: - - filename: Q2.5-MS-Mistoria-72b-v2-Q4_K_M.gguf - sha256: 33df8aac5a790d1c286fe0fc4f9d340311f282eca19b78db6f7abb845923425c - uri: huggingface://bartowski/Q2.5-MS-Mistoria-72b-v2-GGUF/Q2.5-MS-Mistoria-72b-v2-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "eva-qwen2.5-72b-v0.2" - urls: - - https://huggingface.co/EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2 - - https://huggingface.co/bartowski/EVA-Qwen2.5-72B-v0.2-GGUF - description: | - A RP/storywriting specialist model, full-parameter finetune of Qwen2.5-72B on mixture of synthetic and natural data. - It uses Celeste 70B 0.1 data mixture, greatly expanding it to improve versatility, creativity and "flavor" of the resulting model. - - Version notes for 0.2: Optimized training hyperparameters and increased sequence length. Better instruction following deeper into context and less repetition. - overrides: - parameters: - model: EVA-Qwen2.5-72B-v0.2-Q4_K_M.gguf - files: - - filename: EVA-Qwen2.5-72B-v0.2-Q4_K_M.gguf - sha256: 03ea0ecac3ee24a332ca43cf925b669c58714b9754be0f4bc232bd996681ef4b - uri: huggingface://bartowski/EVA-Qwen2.5-72B-v0.2-GGUF/EVA-Qwen2.5-72B-v0.2-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwq-32b-preview" - urls: - - https://huggingface.co/Qwen/QwQ-32B-Preview - - https://huggingface.co/bartowski/QwQ-32B-Preview-GGUF - description: | - QwQ-32B-Preview is an experimental research model developed by the Qwen Team, focused on advancing AI reasoning capabilities. As a preview release, it demonstrates promising analytical abilities while having several important limitations: - - Language Mixing and Code-Switching: The model may mix languages or switch between them unexpectedly, affecting response clarity. - Recursive Reasoning Loops: The model may enter circular reasoning patterns, leading to lengthy responses without a conclusive answer. - Safety and Ethical Considerations: The model requires enhanced safety measures to ensure reliable and secure performance, and users should exercise caution when deploying it. - Performance and Benchmark Limitations: The model excels in math and coding but has room for improvement in other areas, such as common sense reasoning and nuanced language understanding. - overrides: - parameters: - model: QwQ-32B-Preview-Q4_K_M.gguf - files: - - filename: QwQ-32B-Preview-Q4_K_M.gguf - sha256: c499801e682e2379528090c50e106837ca1d69dc3bf3ff3a9af830a0eb49cdf6 - uri: huggingface://bartowski/QwQ-32B-Preview-GGUF/QwQ-32B-Preview-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "q2.5-32b-slush-i1" - urls: - - https://huggingface.co/crestf411/Q2.5-32B-Slush - - https://huggingface.co/mradermacher/Q2.5-32B-Slush-i1-GGUF - description: | - Slush is a two-stage model trained with high LoRA dropout, where stage 1 is a pretraining continuation on the base model, aimed at boosting the model's creativity and writing capabilities. This is then merged into the instruction tune model, and stage 2 is a fine tuning step on top of this to further enhance its roleplaying capabilities and/or to repair any damage caused in the stage 1 merge. - This is still early stage. As always, feedback is welcome, and begone if you demand perfection. - The second stage, like the Sunfall series, follows the Silly Tavern preset (ChatML), so ymmv in particular if you use some other tool and/or preset. - overrides: - parameters: - model: Q2.5-32B-Slush.i1-Q4_K_M.gguf - files: - - filename: Q2.5-32B-Slush.i1-Q4_K_M.gguf - sha256: 95aecaf43077dabc72d3b556923ede2563325e1c89863800229cfa8b7f1c9659 - uri: huggingface://mradermacher/Q2.5-32B-Slush-i1-GGUF/Q2.5-32B-Slush.i1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwestion-24b" - urls: - - https://huggingface.co/CultriX/Qwestion-14B - - https://huggingface.co/mradermacher/Qwestion-24B-GGUF - description: | - This model was merged using the DARE TIES merge method using Qwen/Qwen2.5-14B as a base. - The following models were included in the merge: - allknowingroger/Qwenslerp2-14B - rombodawg/Rombos-LLM-V2.6-Qwen-14b - VAGOsolutions/SauerkrautLM-v2-14b-DPO - CultriX/Qwen2.5-14B-Wernicke - overrides: - parameters: - model: Qwestion-24B.Q4_K_M.gguf - files: - - filename: Qwestion-24B.Q4_K_M.gguf - sha256: 5d493bd81cfeef66d80101260145ab1d1d0428ef2191edce62b58391bd0fff0e - uri: huggingface://mradermacher/Qwestion-24B-GGUF/Qwestion-24B.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "teleut-7b" - icon: https://cdn-uploads.huggingface.co/production/uploads/634262af8d8089ebaefd410e/UqIi8eztdptvt52Mak_1K.png - urls: - - https://huggingface.co/allura-org/Teleut-7b - - https://huggingface.co/QuantFactory/Teleut-7b-GGUF - description: | - A replication attempt of Tulu 3 on the Qwen 2.5 base models. - overrides: - parameters: - model: Teleut-7b.Q4_K_M.gguf - files: - - filename: Teleut-7b.Q4_K_M.gguf - sha256: 844a633ea01d793c638e99f2e07413606b3812b759e9264fbaf69c8d94eaa093 - uri: huggingface://QuantFactory/Teleut-7b-GGUF/Teleut-7b.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwen2.5-7b-homercreative-mix" - urls: - - https://huggingface.co/ZeroXClem/Qwen2.5-7B-HomerCreative-Mix - - https://huggingface.co/QuantFactory/Qwen2.5-7B-HomerCreative-Mix-GGUF - description: "ZeroXClem/Qwen2.5-7B-HomerCreative-Mix is an advanced language model meticulously crafted by merging four pre-trained models using the powerful mergekit framework. This fusion leverages the Model Stock merge method to combine the creative prowess of Qandora, the instructive capabilities of Qwen-Instruct-Fusion, the sophisticated blending of HomerSlerp1, and the foundational conversational strengths of Homer-v0.5-Qwen2.5-7B. The resulting model excels in creative text generation, contextual understanding, and dynamic conversational interactions.\n\U0001F680 Merged Models\n\nThis model merge incorporates the following:\n\n bunnycore/Qandora-2.5-7B-Creative: Specializes in creative text generation, enhancing the model's ability to produce imaginative and diverse content.\n\n bunnycore/Qwen2.5-7B-Instruct-Fusion: Focuses on instruction-following capabilities, improving the model's performance in understanding and executing user commands.\n\n allknowingroger/HomerSlerp1-7B: Utilizes spherical linear interpolation (SLERP) to blend model weights smoothly, ensuring a harmonious integration of different model attributes.\n\n newsbang/Homer-v0.5-Qwen2.5-7B: Acts as the foundational conversational model, providing robust language comprehension and generation capabilities.\n" - overrides: - parameters: - model: Qwen2.5-7B-HomerCreative-Mix.Q4_K_M.gguf - files: - - filename: Qwen2.5-7B-HomerCreative-Mix.Q4_K_M.gguf - sha256: fc3fdb41e068646592f89a8ae62d7b330f2bd4e97bf615aef2977930977c8ba5 - uri: huggingface://QuantFactory/Qwen2.5-7B-HomerCreative-Mix-GGUF/Qwen2.5-7B-HomerCreative-Mix.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "cybercore-qwen-2.1-7b" - urls: - - https://huggingface.co/bunnycore/CyberCore-Qwen-2.1-7B - - https://huggingface.co/QuantFactory/CyberCore-Qwen-2.1-7B-GGUF - description: | - This model was merged using the TIES merge method using rombodawg/Rombos-LLM-V2.5-Qwen-7b as a base. - Models Merged - fblgit/cybertron-v4-qw7B-UNAMGS + bunnycore/Qwen-2.1-7b-Persona-lora_model - fblgit/cybertron-v4-qw7B-MGS + bunnycore/Qwen-2.1-7b-Persona-lora_model - overrides: - parameters: - model: CyberCore-Qwen-2.1-7B.Q4_K_M.gguf - files: - - filename: CyberCore-Qwen-2.1-7B.Q4_K_M.gguf - sha256: 726042707a4cec29ca0355b4dc7c53a807b307d08aa8a3d4a9e76aefbbbcaadf - uri: huggingface://QuantFactory/CyberCore-Qwen-2.1-7B-GGUF/CyberCore-Qwen-2.1-7B.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "homercreativeanvita-mix-qw7b" - icon: https://huggingface.co/suayptalha/HomerCreativeAnvita-Mix-Qw7B/resolve/main/HomerCreativeAnvita.jpeg - urls: - - https://huggingface.co/suayptalha/HomerCreativeAnvita-Mix-Qw7B - - https://huggingface.co/QuantFactory/HomerCreativeAnvita-Mix-Qw7B-GGUF - description: | - This model is currently ranked #1 on the Open LLM Leaderboard among models up to 13B parameters! - Merge Method - - This model was merged using the SLERP merge method. - Models Merged - - The following models were included in the merge: - - ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix - ZeroXClem/Qwen2.5-7B-HomerCreative-Mix - overrides: - parameters: - model: HomerCreativeAnvita-Mix-Qw7B.Q4_K_M.gguf - files: - - filename: HomerCreativeAnvita-Mix-Qw7B.Q4_K_M.gguf - sha256: a356f279a104bff0bbc2ef7ec136c1e774153de8893bf988083e96fb7f4bc053 - uri: huggingface://QuantFactory/HomerCreativeAnvita-Mix-Qw7B-GGUF/HomerCreativeAnvita-Mix-Qw7B.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "math-iio-7b-instruct" - icon: https://cdn-uploads.huggingface.co/production/uploads/65bb837dbfb878f46c77de4c/faLfR-doaWP_BLUkOQrbq.png - urls: - - https://huggingface.co/prithivMLmods/Math-IIO-7B-Instruct - - https://huggingface.co/QuantFactory/Math-IIO-7B-Instruct-GGUF - description: | - The Math IIO 7B Instruct is a fine-tuned language model based on the robust Qwen2.5-7B-Instruct architecture. This model has been specifically trained to excel in single-shot mathematical reasoning and instruction-based tasks, making it a reliable choice for educational, analytical, and problem-solving applications. - Key Features: - Math-Optimized Capabilities: - The model is designed to handle complex mathematical problems, step-by-step calculations, and reasoning tasks. - - Instruction-Tuned: - Fine-tuned for better adherence to structured queries and task-oriented prompts, enabling clear and concise outputs. - - Large Vocabulary: - Equipped with an extensive tokenizer configuration and custom tokens to ensure precise mathematical notation support. - overrides: - parameters: - model: Math-IIO-7B-Instruct.Q4_K_M.gguf - files: - - filename: Math-IIO-7B-Instruct.Q4_K_M.gguf - sha256: 8ffda0b6a43eb9997dfd7db48fe3bd0970fd1b9b86fb68f082c38622a48b58f4 - uri: huggingface://QuantFactory/Math-IIO-7B-Instruct-GGUF/Math-IIO-7B-Instruct.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "virtuoso-small" - icon: https://avatars.githubusercontent.com/u/126496414 - urls: - - https://huggingface.co/arcee-ai/Virtuoso-Small-GGUF - description: | - Virtuoso-Small is the debut public release of the Virtuoso series of models by Arcee.ai, designed to bring cutting-edge generative AI capabilities to organizations and developers in a compact, efficient form. With 14 billion parameters, Virtuoso-Small is an accessible entry point for high-quality instruction-following, complex reasoning, and business-oriented generative AI tasks. - overrides: - parameters: - model: Virtuoso-Small-Q4_K_M.gguf - files: - - filename: Virtuoso-Small-Q4_K_M.gguf - sha256: 07db215cdfcb05036567017fe20e50e60cb2da28d1f9a8251cc4f18c8caa247f - uri: huggingface://arcee-ai/Virtuoso-Small-GGUF/Virtuoso-Small-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwen2.5-7b-homeranvita-nerdmix" - urls: - - https://huggingface.co/ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix - - https://huggingface.co/QuantFactory/Qwen2.5-7B-HomerAnvita-NerdMix-GGUF - description: | - ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix is an advanced language model meticulously crafted by merging five pre-trained models using the powerful mergekit framework. This fusion leverages the Model Stock merge method to combine the creative prowess of Qandora, the instructive capabilities of Qwen-Instruct-Fusion, the sophisticated blending of HomerSlerp1, the mathematical precision of Cybertron-MGS, and the uncensored expertise of Qwen-Nerd. The resulting model excels in creative text generation, contextual understanding, technical reasoning, and dynamic conversational interactions. - overrides: - parameters: - model: Qwen2.5-7B-HomerAnvita-NerdMix.Q4_K_M.gguf - files: - - filename: Qwen2.5-7B-HomerAnvita-NerdMix.Q4_K_M.gguf - sha256: 73db2ca3ab50e8627352078988cd173e7447c5e8199a7db9e554602da1362e5f - uri: huggingface://QuantFactory/Qwen2.5-7B-HomerAnvita-NerdMix-GGUF/Qwen2.5-7B-HomerAnvita-NerdMix.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwen2.5-math-14b-instruct" - urls: - - https://huggingface.co/qingy2024/Qwen2.5-Math-14B-Instruct-Preview - - https://huggingface.co/QuantFactory/Qwen2.5-Math-14B-Instruct-GGUF - description: | - This Qwen 2.5 model was trained 2x faster with Unsloth and Huggingface's TRL library. - Fine-tuned it for 400 steps on garage-bAInd/Open-Platypus with a batch size of 3. - overrides: - parameters: - model: Qwen2.5-Math-14B-Instruct.Q4_K_M.gguf - files: - - filename: Qwen2.5-Math-14B-Instruct.Q4_K_M.gguf - sha256: 14e672394738a7d9f14a6cb16fd9a649b113a19a8b4934f9c18299fc4e286ab6 - uri: huggingface://QuantFactory/Qwen2.5-Math-14B-Instruct-GGUF/Qwen2.5-Math-14B-Instruct.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "sailor2-1b-chat" - icon: https://huggingface.co/sail/Sailor2-1B-Chat/resolve/main/sailor2_banner.jpg - urls: - - https://huggingface.co/sail/Sailor2-1B-Chat - - https://huggingface.co/bartowski/Sailor2-1B-Chat-GGUF - description: | - Sailor2 is a community-driven initiative that brings cutting-edge multilingual language models to South-East Asia (SEA). Our research highlights a strong demand for models in the 8B and 20B parameter range for production use, alongside 1B models for specialized applications, such as speculative decoding and research purposes. These models, released under the Apache 2.0 license, provide enhanced accessibility to advanced language technologies across the region. - Sailor2 builds upon the foundation of the awesome multilingual model Qwen 2.5 and is continuously pre-trained on 500B tokens to support 15 languages better with a unified model. These languages include English, Chinese, Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray. By addressing the growing demand for diverse, robust, and accessible language models, Sailor2 seeks to serve the underserved in SEA areas with open, inclusive, and accessible multilingual LLMs. The Sailor2 model comes in three sizes, 1B, 8B, and 20B, which are expanded from the Qwen2.5 base models of 0.5B, 7B, and 14B, respectively. - overrides: - parameters: - model: Sailor2-1B-Chat-Q4_K_M.gguf - files: - - filename: Sailor2-1B-Chat-Q4_K_M.gguf - sha256: 782e8abed13d51a2083eadfb2f6d94c2cd77940532f612a99e6f6bec9b3501d4 - uri: huggingface://bartowski/Sailor2-1B-Chat-GGUF/Sailor2-1B-Chat-Q4_K_M.gguf -- !!merge <<: *qwen25 - icon: https://huggingface.co/sail/Sailor2-1B-Chat/resolve/main/sailor2_banner.jpg - name: "sailor2-8b-chat" - urls: - - https://huggingface.co/bartowski/Sailor2-8B-Chat-GGUF - description: | - Sailor2 is a community-driven initiative that brings cutting-edge multilingual language models to South-East Asia (SEA). Our research highlights a strong demand for models in the 8B and 20B parameter range for production use, alongside 1B models for specialized applications, such as speculative decoding and research purposes. These models, released under the Apache 2.0 license, provide enhanced accessibility to advanced language technologies across the region. - Sailor2 builds upon the foundation of the awesome multilingual model Qwen 2.5 and is continuously pre-trained on 500B tokens to support 15 languages better with a unified model. These languages include English, Chinese, Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray. By addressing the growing demand for diverse, robust, and accessible language models, Sailor2 seeks to serve the underserved in SEA areas with open, inclusive, and accessible multilingual LLMs. The Sailor2 model comes in three sizes, 1B, 8B, and 20B, which are expanded from the Qwen2.5 base models of 0.5B, 7B, and 14B, respectively. - overrides: - parameters: - model: Sailor2-8B-Chat-Q4_K_M.gguf - files: - - filename: Sailor2-8B-Chat-Q4_K_M.gguf - sha256: 1a6aaadd6f6ef9c2290d66b348ebcbd6fdec542834cde622498fbd467d966103 - uri: huggingface://bartowski/Sailor2-8B-Chat-GGUF/Sailor2-8B-Chat-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "sailor2-20b-chat" - icon: https://huggingface.co/sail/Sailor2-1B-Chat/resolve/main/sailor2_banner.jpg - urls: - - https://huggingface.co/bartowski/Sailor2-20B-Chat-GGUF - description: | - Sailor2 is a community-driven initiative that brings cutting-edge multilingual language models to South-East Asia (SEA). Our research highlights a strong demand for models in the 8B and 20B parameter range for production use, alongside 1B models for specialized applications, such as speculative decoding and research purposes. These models, released under the Apache 2.0 license, provide enhanced accessibility to advanced language technologies across the region. - Sailor2 builds upon the foundation of the awesome multilingual model Qwen 2.5 and is continuously pre-trained on 500B tokens to support 15 languages better with a unified model. These languages include English, Chinese, Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray. By addressing the growing demand for diverse, robust, and accessible language models, Sailor2 seeks to serve the underserved in SEA areas with open, inclusive, and accessible multilingual LLMs. The Sailor2 model comes in three sizes, 1B, 8B, and 20B, which are expanded from the Qwen2.5 base models of 0.5B, 7B, and 14B, respectively. - overrides: - parameters: - model: Sailor2-20B-Chat-Q4_K_M.gguf - files: - - filename: Sailor2-20B-Chat-Q4_K_M.gguf - sha256: 0cf8fcd367accee19702ef15ee964bddd5035bde034afddd838f818e7655534a - uri: huggingface://bartowski/Sailor2-20B-Chat-GGUF/Sailor2-20B-Chat-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "72b-qwen2.5-kunou-v1" - icon: https://huggingface.co/Sao10K/72B-Qwen2.5-Kunou-v1/resolve/main/knn.png - urls: - - https://huggingface.co/Sao10K/72B-Qwen2.5-Kunou-v1 - - https://huggingface.co/bartowski/72B-Qwen2.5-Kunou-v1-GGUF - description: | - I do not really have anything planned for this model other than it being a generalist, and Roleplay Model? It was just something made and planned in minutes. - Same with the 14 and 32B version. - Kunou's the name of an OC I worked on for a couple of years, for a... fanfic. mmm... - - A kind-of successor to L3-70B-Euryale-v2.2 in all but name? I'm keeping Stheno/Euryale lineage to Llama series for now. - I had a version made on top of Nemotron, a supposed Euryale 2.4 but that flopped hard, it was not my cup of tea. - This version is basically a better, more cleaned up Dataset used on Euryale and Stheno. - overrides: - parameters: - model: 72B-Qwen2.5-Kunou-v1-Q4_K_M.gguf - files: - - filename: 72B-Qwen2.5-Kunou-v1-Q4_K_M.gguf - sha256: 91907f29746625a62885793475956220b81d8a5a34b53686a1acd1d03fd403ea - uri: huggingface://bartowski/72B-Qwen2.5-Kunou-v1-GGUF/72B-Qwen2.5-Kunou-v1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "evathene-v1.3" - urls: - - https://huggingface.co/sophosympatheia/Evathene-v1.3 - - https://huggingface.co/bartowski/Evathene-v1.3-GGUF - description: | - This 72B parameter model is a merge of sophosympatheia/Evathene-v1.1 and sophosympatheia/Evathene-v1.2. See the merge recipe below for details. - overrides: - parameters: - model: Evathene-v1.3-Q4_K_M.gguf - files: - - filename: Evathene-v1.3-Q4_K_M.gguf - sha256: 0f54909b3ddca514994ee16417da8750f56e7bd59581b46ac47625c230e29d1f - uri: huggingface://bartowski/Evathene-v1.3-GGUF/Evathene-v1.3-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "fusechat-qwen-2.5-7b-instruct" - icon: https://huggingface.co/FuseAI/FuseChat-Qwen-2.5-7B-Instruct/resolve/main/FuseChat-3.0.png - urls: - - https://huggingface.co/FuseAI/FuseChat-Qwen-2.5-7B-Instruct - - https://huggingface.co/bartowski/FuseChat-Qwen-2.5-7B-Instruct-GGUF - description: | - We present FuseChat-3.0, a series of models crafted to enhance performance by integrating the strengths of multiple source LLMs into more compact target LLMs. To achieve this fusion, we utilized four powerful source LLMs: Gemma-2-27B-It, Mistral-Large-Instruct-2407, Qwen-2.5-72B-Instruct, and Llama-3.1-70B-Instruct. For the target LLMs, we employed three widely-used smaller models—Llama-3.1-8B-Instruct, Gemma-2-9B-It, and Qwen-2.5-7B-Instruct—along with two even more compact models—Llama-3.2-3B-Instruct and Llama-3.2-1B-Instruct. The implicit model fusion process involves a two-stage training pipeline comprising Supervised Fine-Tuning (SFT) to mitigate distribution discrepancies between target and source LLMs, and Direct Preference Optimization (DPO) for learning preferences from multiple source LLMs. The resulting FuseChat-3.0 models demonstrated substantial improvements in tasks related to general conversation, instruction following, mathematics, and coding. Notably, when Llama-3.1-8B-Instruct served as the target LLM, our fusion approach achieved an average improvement of 6.8 points across 14 benchmarks. Moreover, it showed significant improvements of 37.1 and 30.1 points on instruction-following test sets AlpacaEval-2 and Arena-Hard respectively. We have released the FuseChat-3.0 models on Huggingface, stay tuned for the forthcoming dataset and code. - overrides: - parameters: - model: FuseChat-Qwen-2.5-7B-Instruct-Q4_K_M.gguf - files: - - filename: FuseChat-Qwen-2.5-7B-Instruct-Q4_K_M.gguf - sha256: 8cd8c317769f03125ac753c836ac92c5a76ee0b35502811d0e65bcbb8df9d55c - uri: huggingface://bartowski/FuseChat-Qwen-2.5-7B-Instruct-GGUF/FuseChat-Qwen-2.5-7B-Instruct-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "neumind-math-7b-instruct" - urls: - - https://huggingface.co/prithivMLmods/Neumind-Math-7B-Instruct - - https://huggingface.co/QuantFactory/Neumind-Math-7B-Instruct-GGUF - description: | - The Neumind-Math-7B-Instruct is a fine-tuned model based on Qwen2.5-7B-Instruct, optimized for mathematical reasoning, step-by-step problem-solving, and instruction-based tasks in the mathematics domain. The model is designed for applications requiring structured reasoning, numerical computations, and mathematical proof generation. - overrides: - parameters: - model: Neumind-Math-7B-Instruct.Q4_K_M.gguf - files: - - filename: Neumind-Math-7B-Instruct.Q4_K_M.gguf - sha256: 3250abadeae4234e06dfaf7cf86fe871fe021e6c2dfcb4542c2a4f412d71e28c - uri: huggingface://QuantFactory/Neumind-Math-7B-Instruct-GGUF/Neumind-Math-7B-Instruct.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwen2-vl-72b-instruct" - urls: - - https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct - - https://huggingface.co/bartowski/Qwen2-VL-72B-Instruct-GGUF - description: | - We're excited to unveil Qwen2-VL, the latest iteration of our Qwen-VL model, representing nearly a year of innovation. - Key Enhancements: - SoTA understanding of images of various resolution & ratio: Qwen2-VL achieves state-of-the-art performance on visual understanding benchmarks, including MathVista, DocVQA, RealWorldQA, MTVQA, etc. - - Understanding videos of 20min+: Qwen2-VL can understand videos over 20 minutes for high-quality video-based question answering, dialog, content creation, etc. - - Agent that can operate your mobiles, robots, etc.: with the abilities of complex reasoning and decision making, Qwen2-VL can be integrated with devices like mobile phones, robots, etc., for automatic operation based on visual environment and text instructions. - - Multilingual Support: to serve global users, besides English and Chinese, Qwen2-VL now supports the understanding of texts in different languages inside images, including most European languages, Japanese, Korean, Arabic, Vietnamese, etc. - overrides: - parameters: - model: Qwen2-VL-72B-Instruct-Q4_K_M.gguf - files: - - filename: Qwen2-VL-72B-Instruct-Q4_K_M.gguf - sha256: 0def10ee892a4d4c72ba3807d150de2e1f600edd981d15d402e3d25753cf168d - uri: huggingface://bartowski/Qwen2-VL-72B-Instruct-GGUF/Qwen2-VL-72B-Instruct-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "tq2.5-14b-aletheia-v1" - icon: https://huggingface.co/allura-org/TQ2.5-14B-Aletheia-v1/resolve/main/aletheia.png - urls: - - https://huggingface.co/allura-org/TQ2.5-14B-Aletheia-v1 - - https://huggingface.co/bartowski/TQ2.5-14B-Aletheia-v1-GGUF - description: | - RP/Story hybrid model, merge of Sugarquill and Neon. As with Gemma version, I wanted to preserve Sugarquill's creative spark, while making the model more steerable for RP. It proved to be more difficult this time, but I quite like the result regardless, even if the model is still somewhat temperamental. - - Should work for both RP and storywriting, either on raw completion or with back-and-forth cowriting in chat mode. Seems to be quite sensitive to low depth instructions and samplers. - - Thanks to Toasty and Fizz for testing and giving feedback - - Model was created by Auri. - overrides: - parameters: - model: TQ2.5-14B-Aletheia-v1-Q4_K_M.gguf - files: - - filename: TQ2.5-14B-Aletheia-v1-Q4_K_M.gguf - sha256: 8739a9575520f8460e83905f3e085883dd71ef2c9fa40d36d4e0a3fff003440c - uri: huggingface://bartowski/TQ2.5-14B-Aletheia-v1-GGUF/TQ2.5-14B-Aletheia-v1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "tq2.5-14b-neon-v1" - icon: https://huggingface.co/allura-org/TQ2.5-14B-Neon-v1/resolve/main/neon.png - urls: - - https://huggingface.co/allura-org/TQ2.5-14B-Neon-v1 - - https://huggingface.co/bartowski/TQ2.5-14B-Neon-v1-GGUF - description: | - RP finetune of Supernova-Medius. Turned out surprisingly nice on it's own, I honestly made it only as a merge fuel, but it impressed me and Prodeus enough to release it separately (history repeats I guess, Sugarquill also started out this way). Quite interesting prose, definitely quite distinct from Supernova or EVA for that matter. Instruction following is decent as well. Not really much to say about this one, just a decent RP model, tbh. Euryale-inspired I guess. - overrides: - parameters: - model: TQ2.5-14B-Neon-v1-Q4_K_M.gguf - files: - - filename: TQ2.5-14B-Neon-v1-Q4_K_M.gguf - sha256: cefc7409b21e03e4fcd64940e30f6a0c17c5a4a89e0ba0811f1b9720825d2309 - uri: huggingface://bartowski/TQ2.5-14B-Neon-v1-GGUF/TQ2.5-14B-Neon-v1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "miscii-14b-1028" - icon: https://i.imgur.com/hkiubT4.jpeg - urls: - - https://huggingface.co/sthenno-com/miscii-14b-1028 - - https://huggingface.co/QuantFactory/miscii-14b-1028-GGUF - description: | - miscii-14b-1028 is a 14-billion parameter language model based on the Qwen2.5-14B-Instruct model. It is designed for chat and conversational AI tasks, with a focus on role-based instructions. - overrides: - parameters: - model: miscii-14b-1028.Q4_K_M.gguf - files: - - filename: miscii-14b-1028.Q4_K_M.gguf - sha256: 0e57bc628c79a1033a6bb92837fba1e52a9e5dbccc5107720c95b89cd9cf92a9 - uri: huggingface://QuantFactory/miscii-14b-1028-GGUF/miscii-14b-1028.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "miscii-14b-1225" - icon: https://huggingface.co/sthenno-com/miscii-14b-1225/resolve/main/Rrharil.png - urls: - - https://huggingface.co/sthenno-com/miscii-14b-1225 - - https://huggingface.co/mradermacher/miscii-14b-1225-GGUF - description: | - The following models were included in the merge: - sthenno/exp-002 - sthenno/miscii-1218 - overrides: - parameters: - model: miscii-14b-1225.Q4_K_M.gguf - files: - - filename: miscii-14b-1225.Q4_K_M.gguf - sha256: f21fe73450be394055aeb87b7619e98a09e5c190b48f145bdebef4e12df871fe - uri: huggingface://mradermacher/miscii-14b-1225-GGUF/miscii-14b-1225.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwentile2.5-32b-instruct" - icon: https://cdn-uploads.huggingface.co/production/uploads/65b19c1b098c85365af5a83e/sF7RDZA7lFYOmGy4bGy1s.png - urls: - - https://huggingface.co/maldv/Qwentile2.5-32B-Instruct - - https://huggingface.co/bartowski/Qwentile2.5-32B-Instruct-GGUF - description: | - Qwentile 2.5 32B Instruct is a normalized denoised fourier interpolation of the following models: - - { "model": "AiCloser/Qwen2.5-32B-AGI", "base": "Qwen/Qwen2.5-32B", "alpha": 0.3 } - - { "model": "EVA-UNIT-01/EVA-Qwen2.5-32B-v0.2", "base": "Qwen/Qwen2.5-32B", "alpha": 0.7 } - - { "model": "fblgit/TheBeagle-v2beta-32B-MGS", "base": "Qwen/Qwen2.5-32B", "alpha": 0.6 } - - { "model": "huihui-ai/Qwen2.5-32B-Instruct-abliterated", "base": "Qwen/Qwen2.5-32B-Instruct", "alpha": 1.0 } - - { "model": "huihui-ai/QwQ-32B-Preview-abliterated", "base": "Qwen/Qwen2.5-32B", "alpha": 1.0 } - - { "model": "Qwen/QwQ-32B-Preview", "base": "Qwen/Qwen2.5-32B", "alpha": 0.8, "is_input": true } - - { "model": "rombodawg/Rombos-LLM-V2.5-Qwen-32b", "base": "Qwen/Qwen2.5-32B", "alpha": 1.0, "is_output": true } - - { "model": "nbeerbower/Qwen2.5-Gutenberg-Doppel-32B", "base": "Qwen/Qwen2.5-32B-Instruct", "alpha": 0.4 } - I started my experiment because of QwQ is a really nifty model, but it was giving me problems with xml output - which is what I use for my thought tokens. So, I thought... lets just merge it in! - The first model worked pretty well, but I got a sense that the balances could be tweaked. Why not throw in some other models as well for fun and see if I can't run out of disk space in the process? - overrides: - parameters: - model: Qwentile2.5-32B-Instruct-Q4_K_M.gguf - files: - - filename: Qwentile2.5-32B-Instruct-Q4_K_M.gguf - sha256: e476d6e3c15c78fc3f986d7ae8fa35c16116843827f2e6243c05767cef2f3615 - uri: huggingface://bartowski/Qwentile2.5-32B-Instruct-GGUF/Qwentile2.5-32B-Instruct-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "websailor-32b" - urls: - - https://huggingface.co/Alibaba-NLP/WebSailor-32B - - https://huggingface.co/mradermacher/WebSailor-32B-GGUF - description: | - WebSailor is a complete post-training methodology designed to teach LLM agents sophisticated reasoning for complex web navigation and information-seeking tasks. It addresses the challenge of extreme uncertainty in vast information landscapes, a capability where previous open-source models lagged behind proprietary systems. - We classify information-seeking tasks into three difficulty levels, where Level 3 represents problems with both high uncertainty and a complex, non-linear path to a solution. To generate these challenging tasks, we introduce SailorFog-QA, a novel data synthesis pipeline that constructs intricate knowledge graphs and then applies information obfuscation. This process creates questions with high initial uncertainty that demand creative exploration and transcend simple, structured reasoning patterns. - Our training process begins by generating expert trajectories and then reconstructing the reasoning to create concise, action-oriented supervision signals, avoiding the stylistic and verbosity issues of teacher models. The agent is first given a "cold start" using rejection sampling fine-tuning (RFT) on a small set of high-quality examples to establish a baseline capability. This is followed by an efficient agentic reinforcement learning stage using our Duplicating Sampling Policy Optimization (DUPO) algorithm, which refines the agent's exploratory strategies. - WebSailor establishes a new state-of-the-art for open-source agents, achieving outstanding results on difficult benchmarks like BrowseComp-en and BrowseComp-zh. Notably, our smaller models like WebSailor-7B outperform agents built on much larger backbones, highlighting the efficacy of our training paradigm. Ultimately, WebSailor closes the performance gap to proprietary systems, achieving results on par with agents like Doubao-Search. - overrides: - parameters: - model: WebSailor-32B.Q4_K_M.gguf - files: - - filename: WebSailor-32B.Q4_K_M.gguf - sha256: 60cea732b8314cedf1807530857b4ebd9f6c41431b3223384eb7f94fbff7b5bc - uri: huggingface://mradermacher/WebSailor-32B-GGUF/WebSailor-32B.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "websailor-7b" - urls: - - https://huggingface.co/Alibaba-NLP/WebSailor-7B - - https://huggingface.co/mradermacher/WebSailor-7B-GGUF - description: | - WebSailor is a complete post-training methodology designed to teach LLM agents sophisticated reasoning for complex web navigation and information-seeking tasks. It addresses the challenge of extreme uncertainty in vast information landscapes, a capability where previous open-source models lagged behind proprietary systems. - We classify information-seeking tasks into three difficulty levels, where Level 3 represents problems with both high uncertainty and a complex, non-linear path to a solution. To generate these challenging tasks, we introduce SailorFog-QA, a novel data synthesis pipeline that constructs intricate knowledge graphs and then applies information obfuscation. This process creates questions with high initial uncertainty that demand creative exploration and transcend simple, structured reasoning patterns. - Our training process begins by generating expert trajectories and then reconstructing the reasoning to create concise, action-oriented supervision signals, avoiding the stylistic and verbosity issues of teacher models. The agent is first given a "cold start" using rejection sampling fine-tuning (RFT) on a small set of high-quality examples to establish a baseline capability. This is followed by an efficient agentic reinforcement learning stage using our Duplicating Sampling Policy Optimization (DUPO) algorithm, which refines the agent's exploratory strategies. - WebSailor establishes a new state-of-the-art for open-source agents, achieving outstanding results on difficult benchmarks like BrowseComp-en and BrowseComp-zh. Notably, our smaller models like WebSailor-7B outperform agents built on much larger backbones, highlighting the efficacy of our training paradigm. Ultimately, WebSailor closes the performance gap to proprietary systems, achieving results on par with agents like Doubao-Search. - overrides: - parameters: - model: WebSailor-7B.Q4_K_M.gguf - files: - - filename: WebSailor-7B.Q4_K_M.gguf - sha256: 6ede884af5d82176606c3af19a5cc90da6fdf81a520f54284084f5e012217a56 - uri: huggingface://mradermacher/WebSailor-7B-GGUF/WebSailor-7B.Q4_K_M.gguf -- &archfunct - license: apache-2.0 - tags: - - llm - - gguf - - gpu - - qwen - - qwen2.5 - - cpu - - function-calling - name: "arch-function-1.5b" - icon: https://avatars.githubusercontent.com/u/112724757 - uri: "github:mudler/LocalAI/gallery/arch-function.yaml@master" - urls: - - https://huggingface.co/katanemolabs/Arch-Function-1.5B - - https://huggingface.co/mradermacher/Arch-Function-1.5B-GGUF - description: | - The Katanemo Arch-Function collection of large language models (LLMs) is a collection state-of-the-art (SOTA) LLMs specifically designed for function calling tasks. The models are designed to understand complex function signatures, identify required parameters, and produce accurate function call outputs based on natural language prompts. Achieving performance on par with GPT-4, these models set a new benchmark in the domain of function-oriented tasks, making them suitable for scenarios where automated API interaction and function execution is crucial. - In summary, the Katanemo Arch-Function collection demonstrates: - State-of-the-art performance in function calling - Accurate parameter identification and suggestion, even in ambiguous or incomplete inputs - High generalization across multiple function calling use cases, from API interactions to automated backend tasks. - Optimized low-latency, high-throughput performance, making it suitable for real-time, production environments. - overrides: - parameters: - model: Arch-Function-1.5B.Q4_K_M.gguf - files: - - filename: Arch-Function-1.5B.Q4_K_M.gguf - sha256: 5ac54d2d50cca0ee0335ca2c9b688204c0829cd3a73de3ee3fda108281ad9691 - uri: huggingface://mradermacher/Arch-Function-1.5B-GGUF/Arch-Function-1.5B.Q4_K_M.gguf -- !!merge <<: *archfunct - name: "arch-function-7b" - urls: - - https://huggingface.co/katanemolabs/Arch-Function-7B - - https://huggingface.co/mradermacher/Arch-Function-7B-GGUF - overrides: - parameters: - model: Arch-Function-7B.Q4_K_M.gguf - files: - - filename: Arch-Function-7B.Q4_K_M.gguf - sha256: 6e38661321d79d02b8cf57c79d97c6c0e19adb9ffa66083cc440c24e257234b6 - uri: huggingface://mradermacher/Arch-Function-7B-GGUF/Arch-Function-7B.Q4_K_M.gguf -- !!merge <<: *archfunct - name: "arch-function-3b" - urls: - - https://huggingface.co/katanemolabs/Arch-Function-3B - - https://huggingface.co/mradermacher/Arch-Function-3B-GGUF - overrides: - parameters: - model: Arch-Function-3B.Q4_K_M.gguf - files: - - filename: Arch-Function-3B.Q4_K_M.gguf - sha256: 9945cb8d070498d163e5df90c1987f591d35e4fd2222a6c51bcfff848c4b573b - uri: huggingface://mradermacher/Arch-Function-3B-GGUF/Arch-Function-3B.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwen2-7b-multilingual-rp" - urls: - - https://huggingface.co/maywell/Qwen2-7B-Multilingual-RP - - https://huggingface.co/QuantFactory/Qwen2-7B-Multilingual-RP-GGUF - description: | - Multilingual Qwen2-7B model trained on Roleplaying. - overrides: - parameters: - model: Qwen2-7B-Multilingual-RP.Q4_K_M.gguf - files: - - filename: Qwen2-7B-Multilingual-RP.Q4_K_M.gguf - sha256: 31756c58fd135f2deb59b2d9b142f39134dc8d1a6eaa02f388dda7491fc95ccc - uri: huggingface://QuantFactory/Qwen2-7B-Multilingual-RP-GGUF/Qwen2-7B-Multilingual-RP.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwq-lcot-7b-instruct" - urls: - - https://huggingface.co/prithivMLmods/QwQ-LCoT-7B-Instruct - - https://huggingface.co/bartowski/QwQ-LCoT-7B-Instruct-GGUF - description: | - The QwQ-LCoT-7B-Instruct is a fine-tuned language model designed for advanced reasoning and instruction-following tasks. It leverages the Qwen2.5-7B base model and has been fine-tuned on the amphora/QwQ-LongCoT-130K dataset, focusing on chain-of-thought (CoT) reasoning. - overrides: - parameters: - model: QwQ-LCoT-7B-Instruct-Q4_K_M.gguf - files: - - filename: QwQ-LCoT-7B-Instruct-Q4_K_M.gguf - sha256: 1df2e4ff0093a9632687b73969153442776b0ffc1c3c68e7f559472f9cea1945 - uri: huggingface://bartowski/QwQ-LCoT-7B-Instruct-GGUF/QwQ-LCoT-7B-Instruct-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "tqwendo-36b" - icon: "https://cdn-uploads.huggingface.co/production/uploads/6379683a81c1783a4a2ddba8/DI7Yw8Fs8eukluzKTHjEH.png" - urls: - - https://huggingface.co/nisten/tqwendo-36b - - https://huggingface.co/bartowski/tqwendo-36b-GGUF - description: | - There is a draft model to go with this one for speculative decoding and chain of thought reasoning: https://huggingface.co/nisten/qwen2.5-coder-7b-abliterated-128k-AWQ - - Using the above 4bit 7b in conjuction with the 36b is meant to setup a chain-of-thought reasoner, evaluator similar to what O1-O3 is probably doing. This way the 7b 4bit only uses up an extra 4-6Gb on the gpu, but greatly both speeds up speculative decoding AND also chain-of-throught evals. - overrides: - parameters: - model: tqwendo-36b-Q4_K_M.gguf - files: - - filename: tqwendo-36b-Q4_K_M.gguf - sha256: 890ff05fb717c67848d5c02ad62b2c26fdcdd20f7cc94ade8095869784c0cc82 - uri: huggingface://bartowski/tqwendo-36b-GGUF/tqwendo-36b-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qvq-72b-preview" - urls: - - https://huggingface.co/Qwen/QVQ-72B-Preview - - https://huggingface.co/bartowski/QVQ-72B-Preview-GGUF - description: | - QVQ-72B-Preview is an experimental research model developed by the Qwen team, focusing on enhancing visual reasoning capabilities. - QVQ-72B-Preview has achieved remarkable performance on various benchmarks. It scored a remarkable 70.3% on the Multimodal Massive Multi-task Understanding (MMMU) benchmark, showcasing QVQ's powerful ability in multidisciplinary understanding and reasoning. Furthermore, the significant improvements on MathVision highlight the model's progress in mathematical reasoning tasks. OlympiadBench also demonstrates the model's enhanced ability to tackle challenging problems. - overrides: - mmproj: mmproj-QVQ-72B-Preview-f16.gguf - parameters: - model: QVQ-72B-Preview-Q4_K_M.gguf - files: - - filename: QVQ-72B-Preview-Q4_K_M.gguf - sha256: 0fab6809995614c19e4b4c23e3191824944a04999f742486278f0d9929dc82ae - uri: huggingface://bartowski/QVQ-72B-Preview-GGUF/QVQ-72B-Preview-Q4_K_M.gguf - - filename: mmproj-QVQ-72B-Preview-f16.gguf - sha256: 85110223f39aa1aad887052d269074afbd52a49ae02c53b66753b033662cc8e6 - uri: huggingface://bartowski/QVQ-72B-Preview-GGUF/mmproj-QVQ-72B-Preview-f16.gguf -- !!merge <<: *qwen25 - name: "teleut-7b-rp" - icon: https://cdn-uploads.huggingface.co/production/uploads/634262af8d8089ebaefd410e/2y6PHgWe4ewoMFlgn-p3d.png - urls: - - https://huggingface.co/allura-org/Teleut-7b-RP - - https://huggingface.co/bartowski/Teleut-7b-RP-GGUF - description: | - A roleplay-focused LoRA finetune of Teleut 7b. Methodology and hyperparams inspired by SorcererLM and Slush. - Dataset: The worst mix of data you've ever seen. Like, seriously, you do not want to see the things that went into this model. It's bad. - overrides: - parameters: - model: Teleut-7b-RP-Q4_K_M.gguf - files: - - filename: Teleut-7b-RP-Q4_K_M.gguf - sha256: 74d9a0974c48f16677da8891ac76ed89ed04f246275b9ca8316d25e1e86ce89f - uri: huggingface://bartowski/Teleut-7b-RP-GGUF/Teleut-7b-RP-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwen2.5-32b-rp-ink" - icon: https://cdn-uploads.huggingface.co/production/uploads/634262af8d8089ebaefd410e/1_Zt_OvEW183lmrgidQw8.png - urls: - - https://huggingface.co/allura-org/Qwen2.5-32b-RP-Ink - - https://huggingface.co/bartowski/Qwen2.5-32b-RP-Ink-GGUF - description: | - A roleplay-focused LoRA finetune of Qwen 2.5 32b Instruct. Methodology and hyperparams inspired by SorcererLM and Slush. - Yet another model in the Ink series, following in the footsteps of the Nemo one - overrides: - parameters: - model: Qwen2.5-32b-RP-Ink-Q4_K_M.gguf - files: - - filename: Qwen2.5-32b-RP-Ink-Q4_K_M.gguf - sha256: 7a0693d50aa40ba4fd43b4988851e67443e758ae34881f448e2812e5fcc25468 - uri: huggingface://bartowski/Qwen2.5-32b-RP-Ink-GGUF/Qwen2.5-32b-RP-Ink-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "q2.5-veltha-14b-0.5" - urls: - - https://huggingface.co/djuna/Q2.5-Veltha-14B-0.5 - - https://huggingface.co/bartowski/Q2.5-Veltha-14B-0.5-GGUF - description: | - The following models were included in the merge:s - huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2 - allura-org/TQ2.5-14B-Aletheia-v1 - EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2 - v000000/Qwen2.5-Lumen-14B - overrides: - parameters: - model: Q2.5-Veltha-14B-0.5-Q4_K_M.gguf - files: - - filename: Q2.5-Veltha-14B-0.5-Q4_K_M.gguf - sha256: f75b8cbceab555ebcab6fcb3b51d398b7ef79671aa05c21c288edd75c9f217bd - uri: huggingface://bartowski/Q2.5-Veltha-14B-0.5-GGUF/Q2.5-Veltha-14B-0.5-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "smallthinker-3b-preview" - urls: - - https://huggingface.co/PowerInfer/SmallThinker-3B-Preview - - https://huggingface.co/bartowski/SmallThinker-3B-Preview-GGUF - description: | - SmallThinker is designed for the following use cases: - Edge Deployment: Its small size makes it ideal for deployment on resource-constrained devices. - Draft Model for QwQ-32B-Preview: SmallThinker can serve as a fast and efficient draft model for the larger QwQ-32B-Preview model. From my test, in llama.cpp we can get 70% speedup (from 40 tokens/s to 70 tokens/s). - overrides: - parameters: - model: SmallThinker-3B-Preview-Q4_K_M.gguf - files: - - filename: SmallThinker-3B-Preview-Q4_K_M.gguf - sha256: ac04f82a09ee6a2748437c3bb774b638a54099dc7d5d6ef7549893fae22ab055 - uri: huggingface://bartowski/SmallThinker-3B-Preview-GGUF/SmallThinker-3B-Preview-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwenwify2.5-32b-v4.5" - urls: - - https://huggingface.co/Kaoeiri/Qwenwify2.5-32B-v4.5 - - https://huggingface.co/mradermacher/Qwenwify2.5-32B-v4.5-GGUF - description: | - The following models were included in the merge: - Kaoeiri/Qwenwify-32B-v3 - allura-org/Qwen2.5-32b-RP-Ink - Dans-DiscountModels/Qwen2.5-32B-ChatML - Saxo/Linkbricks-Horizon-AI-Japanese-Base-32B - OpenBuddy/openbuddy-qwq-32b-v24.2-200k - Sao10K/32B-Qwen2.5-Kunou-v1 - overrides: - parameters: - model: Qwenwify2.5-32B-v4.5.Q4_K_M.gguf - files: - - filename: Qwenwify2.5-32B-v4.5.Q4_K_M.gguf - sha256: 52670acdc285356c01259f45b1953860f34deb4f80345ca63b60acc19165280c - uri: huggingface://mradermacher/Qwenwify2.5-32B-v4.5-GGUF/Qwenwify2.5-32B-v4.5.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "drt-o1-7b" - urls: - - https://huggingface.co/Krystalan/DRT-o1-7B - - https://huggingface.co/QuantFactory/DRT-o1-7B-GGUF - description: "In this work, we introduce DRT-o1, an attempt to bring the success of long thought reasoning to neural machine translation (MT). To this end,\n\n\U0001F31F We mine English sentences with similes or metaphors from existing literature books, which are suitable for translation via long thought.\n\U0001F31F We propose a designed multi-agent framework with three agents (i.e., a translator, an advisor and an evaluator) to synthesize the MT samples with long thought. There are 22,264 synthesized samples in total.\n\U0001F31F We train DRT-o1-8B, DRT-o1-7B and DRT-o1-14B using Llama-3.1-8B-Instruct, Qwen2.5-7B-Instruct and Qwen2.5-14B-Instruct as backbones.\n\nOur goal is not to achieve competitive performance with OpenAI’s O1 in neural machine translation (MT). Instead, we explore technical routes to bring the success of long thought to MT. To this end, we introduce DRT-o1, a byproduct of our exploration, and we hope it could facilitate the corresponding research in this direction.\n" - overrides: - parameters: - model: DRT-o1-7B.Q4_K_M.gguf - files: - - filename: DRT-o1-7B.Q4_K_M.gguf - sha256: f592a2523f92ae29630b45fbb501bba7f2fbd99355975cd05fa989faf8d3597d - uri: huggingface://QuantFactory/DRT-o1-7B-GGUF/DRT-o1-7B.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "experimental-lwd-mirau-rp-14b-iq-imatrix" - icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/99YhsFSeaGDYCq7XVcTcq.png - urls: - - https://huggingface.co/AetherArchitectural/lwd-Mirau-RP-14B - - https://huggingface.co/Lewdiculous/experimental-lwd-Mirau-RP-14B-GGUF-IQ-Imatrix - description: | - This model is designed to improve the controllability and consistency of current roleplaying models. We developed a story flow thought chain approach that makes the system prompts combined with the entire user-BOT dialogue read like a first-person narrative told by the BOT. We found this design greatly enhances the model's consistency and expressiveness. - - Additionally, we allow users to play two roles simultaneously: one as the director of the entire plot (see Special Designs), and another as an actor dialoguing with the BOT. Users can be viewed as writers who need to draft outlines and plot summaries, while the BOT helps complete story details, requiring users to have powerful control over the BOT. - - The model's output is divided into two parts: the model's inner monologue (which it believes is invisible to users) and the final response. - - Overall, mirau features: - - Superior character consistency - - Powerful long-context memory capability - - Transparent thinking with hidden thought chains - overrides: - parameters: - model: lwd-Mirau-RP-Q4_K_M-imat.gguf - files: - - filename: lwd-Mirau-RP-Q4_K_M-imat.gguf - sha256: 22ff461e9034b9ebded07b2a9d3d88c2f75359d5c069ebb3ee4e9c6ec5c45cf8 - uri: huggingface://Lewdiculous/experimental-lwd-Mirau-RP-14B-GGUF-IQ-Imatrix/lwd-Mirau-RP-Q4_K_M-imat.gguf -- !!merge <<: *qwen25 - name: "32b-qwen2.5-kunou-v1" - icon: https://huggingface.co/Sao10K/72B-Qwen2.5-Kunou-v1/resolve/main/knn.png - urls: - - https://huggingface.co/Sao10K/32B-Qwen2.5-Kunou-v1 - - https://huggingface.co/bartowski/32B-Qwen2.5-Kunou-v1-GGUF - description: | - I do not really have anything planned for this model other than it being a generalist, and Roleplay Model? It was just something made and planned in minutes. - Same with the 14B and 72B version. - Kunou's the name of an OC I worked on for a couple of years, for a... fanfic. mmm... - A kind-of successor to L3-70B-Euryale-v2.2 in all but name? I'm keeping Stheno/Euryale lineage to Llama series for now. - I had a version made on top of Nemotron, a supposed Euryale 2.4 but that flopped hard, it was not my cup of tea. - This version is basically a better, more cleaned up Dataset used on Euryale and Stheno. - overrides: - parameters: - model: 32B-Qwen2.5-Kunou-v1-Q4_K_M.gguf - files: - - filename: 32B-Qwen2.5-Kunou-v1-Q4_K_M.gguf - sha256: b8910172b74d03c3463ac301589f54b96e54f61c67531fb6b523ecfe923aaffb - uri: huggingface://bartowski/32B-Qwen2.5-Kunou-v1-GGUF/32B-Qwen2.5-Kunou-v1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "14b-qwen2.5-kunou-v1" - urls: - - https://huggingface.co/Sao10K/14B-Qwen2.5-Kunou-v1 - - https://huggingface.co/DevQuasar/Sao10K.14B-Qwen2.5-Kunou-v1-GGUF - description: | - I do not really have anything planned for this model other than it being a generalist, and Roleplay Model? It was just something made and planned in minutes. - This is the little sister variant, the small 14B version. - Kunou's the name of an OC I worked on for a couple of years, for a... fanfic. mmm... - - A kind-of successor to my smaller model series. It works pretty nicely I think? - This version is basically a better, more cleaned up Dataset used on Euryale and Stheno. - overrides: - parameters: - model: Sao10K.14B-Qwen2.5-Kunou-v1.Q4_K_M.gguf - files: - - filename: Sao10K.14B-Qwen2.5-Kunou-v1.Q4_K_M.gguf - sha256: 7b7af50076e15c305a2a1bed7ad766dc6deb61eef3c2e6a40d4c94ad45623845 - uri: huggingface://DevQuasar/Sao10K.14B-Qwen2.5-Kunou-v1-GGUF/Sao10K.14B-Qwen2.5-Kunou-v1.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "dolphin3.0-qwen2.5-0.5b" - icon: https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/cNCs1TBD3FelWCJGkZ3cd.png - urls: - - https://huggingface.co/cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B - - https://huggingface.co/bartowski/Dolphin3.0-Qwen2.5-0.5B-GGUF - description: | - Dolphin 3.0 is the next generation of the Dolphin series of instruct-tuned models. Designed to be the ultimate general purpose local model, enabling coding, math, agentic, function calling, and general use cases. - - Dolphin aims to be a general purpose model, similar to the models behind ChatGPT, Claude, Gemini. But these models present problems for businesses seeking to include AI in their products. - - They maintain control of the system prompt, deprecating and changing things as they wish, often causing software to break. - They maintain control of the model versions, sometimes changing things silently, or deprecating older models that your business relies on. - They maintain control of the alignment, and in particular the alignment is one-size-fits all, not tailored to the application. - They can see all your queries and they can potentially use that data in ways you wouldn't want. Dolphin, in contrast, is steerable and gives control to the system owner. You set the system prompt. You decide the alignment. You have control of your data. Dolphin does not impose its ethics or guidelines on you. You are the one who decides the guidelines. - - Dolphin belongs to YOU, it is your tool, an extension of your will. Just as you are personally responsible for what you do with a knife, gun, fire, car, or the internet, you are the creator and originator of any content you generate with Dolphin. - overrides: - parameters: - model: Dolphin3.0-Qwen2.5-0.5B-Q4_K_M.gguf - files: - - filename: Dolphin3.0-Qwen2.5-0.5B-Q4_K_M.gguf - sha256: 6a53689e2cb91027fdc9e366142eba8e35f56c14ee353e0a4d64de981efbfffa - uri: huggingface://bartowski/Dolphin3.0-Qwen2.5-0.5B-GGUF/Dolphin3.0-Qwen2.5-0.5B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "dolphin3.0-qwen2.5-1.5b" - url: "github:mudler/LocalAI/gallery/chatml.yaml@master" - icon: https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/cNCs1TBD3FelWCJGkZ3cd.png - urls: - - https://huggingface.co/cognitivecomputations/Dolphin3.0-Qwen2.5-1.5B - - https://huggingface.co/bartowski/Dolphin3.0-Qwen2.5-1.5B-GGUF - description: | - Dolphin 3.0 is the next generation of the Dolphin series of instruct-tuned models. Designed to be the ultimate general purpose local model, enabling coding, math, agentic, function calling, and general use cases. - - Dolphin aims to be a general purpose model, similar to the models behind ChatGPT, Claude, Gemini. But these models present problems for businesses seeking to include AI in their products. - - They maintain control of the system prompt, deprecating and changing things as they wish, often causing software to break. - They maintain control of the model versions, sometimes changing things silently, or deprecating older models that your business relies on. - They maintain control of the alignment, and in particular the alignment is one-size-fits all, not tailored to the application. - They can see all your queries and they can potentially use that data in ways you wouldn't want. Dolphin, in contrast, is steerable and gives control to the system owner. You set the system prompt. You decide the alignment. You have control of your data. Dolphin does not impose its ethics or guidelines on you. You are the one who decides the guidelines. - - Dolphin belongs to YOU, it is your tool, an extension of your will. Just as you are personally responsible for what you do with a knife, gun, fire, car, or the internet, you are the creator and originator of any content you generate with Dolphin. - overrides: - parameters: - model: Dolphin3.0-Qwen2.5-1.5B-Q4_K_M.gguf - files: - - filename: Dolphin3.0-Qwen2.5-1.5B-Q4_K_M.gguf - sha256: 7caa630a60c8831a509e2663e1761355fa24bcf6ccc03e3cc767e5b5747a3be5 - uri: huggingface://bartowski/Dolphin3.0-Qwen2.5-1.5B-GGUF/Dolphin3.0-Qwen2.5-1.5B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "dolphin3.0-qwen2.5-3b" - icon: https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/cNCs1TBD3FelWCJGkZ3cd.png - url: "github:mudler/LocalAI/gallery/chatml.yaml@master" - urls: - - https://huggingface.co/cognitivecomputations/Dolphin3.0-Qwen2.5-3b - - https://huggingface.co/bartowski/Dolphin3.0-Qwen2.5-3b-GGUF - description: | - Dolphin 3.0 is the next generation of the Dolphin series of instruct-tuned models. Designed to be the ultimate general purpose local model, enabling coding, math, agentic, function calling, and general use cases. - - Dolphin aims to be a general purpose model, similar to the models behind ChatGPT, Claude, Gemini. But these models present problems for businesses seeking to include AI in their products. - - They maintain control of the system prompt, deprecating and changing things as they wish, often causing software to break. - They maintain control of the model versions, sometimes changing things silently, or deprecating older models that your business relies on. - They maintain control of the alignment, and in particular the alignment is one-size-fits all, not tailored to the application. - They can see all your queries and they can potentially use that data in ways you wouldn't want. Dolphin, in contrast, is steerable and gives control to the system owner. You set the system prompt. You decide the alignment. You have control of your data. Dolphin does not impose its ethics or guidelines on you. You are the one who decides the guidelines. - - Dolphin belongs to YOU, it is your tool, an extension of your will. Just as you are personally responsible for what you do with a knife, gun, fire, car, or the internet, you are the creator and originator of any content you generate with Dolphin. - overrides: - parameters: - model: Dolphin3.0-Qwen2.5-3b-Q4_K_M.gguf - files: - - filename: Dolphin3.0-Qwen2.5-3b-Q4_K_M.gguf - sha256: 0cb1908c5f444e1dc2c5b5619d62ac4957a22ad39cd42f2d0b48e2d8b1c358ab - uri: huggingface://bartowski/Dolphin3.0-Qwen2.5-3b-GGUF/Dolphin3.0-Qwen2.5-3b-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "14b-qwen2.5-freya-x1" - icon: https://huggingface.co/Sao10K/14B-Qwen2.5-Freya-x1/resolve/main/sad.png - urls: - - https://huggingface.co/Sao10K/14B-Qwen2.5-Freya-x1 - - https://huggingface.co/DevQuasar/Sao10K.14B-Qwen2.5-Freya-x1-GGUF - description: | - I decided to mess around with training methods again, considering the re-emegence of methods like multi-step training. Some people began doing it again, and so, why not? Inspired by AshhLimaRP's methology but done it my way. - Freya-S1 - - LoRA Trained on ~1.1GB of literature and raw text over Qwen 2.5's base model. - Cleaned text and literature as best as I could, still, may have had issues here and there. - - Freya-S2 - - The first LoRA was applied over Qwen 2.5 Instruct, then I trained on top of that. - Reduced LoRA rank because it's mainly instruct and other details I won't get into. - overrides: - parameters: - model: Sao10K.14B-Qwen2.5-Freya-x1.Q4_K_M.gguf - files: - - filename: Sao10K.14B-Qwen2.5-Freya-x1.Q4_K_M.gguf - sha256: 790953e2ffccf2f730d52072f300fba9d1549c7762f5127b2014cdc82204b509 - uri: huggingface://DevQuasar/Sao10K.14B-Qwen2.5-Freya-x1-GGUF/Sao10K.14B-Qwen2.5-Freya-x1.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "huatuogpt-o1-7b-v0.1" - urls: - - https://huggingface.co/FreedomIntelligence/HuatuoGPT-o1-7B - - https://huggingface.co/bartowski/HuatuoGPT-o1-7B-v0.1-GGUF - - https://github.com/FreedomIntelligence/HuatuoGPT-o1 - description: | - HuatuoGPT-o1 is a medical LLM designed for advanced medical reasoning. It generates a complex thought process, reflecting and refining its reasoning, before providing a final response. - - For more information, visit our GitHub repository: https://github.com/FreedomIntelligence/HuatuoGPT-o1. - overrides: - parameters: - model: HuatuoGPT-o1-7B-v0.1-Q4_K_M.gguf - files: - - filename: HuatuoGPT-o1-7B-v0.1-Q4_K_M.gguf - sha256: 8fc4b797a532d67d677e90293175ff1365c91677d06ea27af297bdf5b60c2d1d - uri: huggingface://bartowski/HuatuoGPT-o1-7B-v0.1-GGUF/HuatuoGPT-o1-7B-v0.1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "chuluun-qwen2.5-72b-v0.01" - icon: https://huggingface.co/DatToad/Chuluun-Qwen2.5-72B-v0.01/resolve/main/00008-1523559621.png - urls: - - https://huggingface.co/DatToad/Chuluun-Qwen2.5-72B-v0.01 - - https://huggingface.co/bartowski/Chuluun-Qwen2.5-72B-v0.01-GGUF - description: | - This is a merge of pre-trained language models created using mergekit. - - The models in this merge are some of my favorites and I found I liked all of them for different reasons. I believe this model is greater than the sum of its parts - it has the storywriting and language of Eva and Kunou, the spiciness of Magnum, and the uncensored intelligence of Tess. It excels in handling multiple characters and keeping their thoughts, speech, and actions separate, including scene changes. It also appears to match dialogue well to the characters and their backgrounds. - - Model_stock was the method used, it's very straightforward and quite fast, the bottleneck seemed to be my NVMe drive. - - All source models use ChatML prompt formatting and it responds very well. For testing purposes I am using a temperature of 1.08, rep pen of 0.03, and DRY with 0.6 (most Qwen models seem to need DRY). All other samplers are neutralized. - overrides: - parameters: - model: Chuluun-Qwen2.5-72B-v0.01-Q4_K_M.gguf - files: - - filename: Chuluun-Qwen2.5-72B-v0.01-Q4_K_M.gguf - sha256: 901d9d10aad42de3188e721accdc4eb0efec96cbca48563f802793dceaf551f5 - uri: huggingface://bartowski/Chuluun-Qwen2.5-72B-v0.01-GGUF/Chuluun-Qwen2.5-72B-v0.01-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwq-32b-preview-ideawhiz-v1" - icon: https://cdn-uploads.huggingface.co/production/uploads/6205fefd3f1dc8a642d70b10/JEZgA_xV6oF8AIsya9dop.jpeg - urls: - - https://huggingface.co/6cf/QwQ-32B-Preview-IdeaWhiz-v1 - - https://huggingface.co/bartowski/QwQ-32B-Preview-IdeaWhiz-v1-GGUF - description: | - IdeaWhiz is a fine-tuned version of QwQ-32B-Preview, specifically optimized for scientific creativity and step-by-step reasoning. The model leverages the LiveIdeaBench dataset to enhance its capabilities in generating novel scientific ideas and hypotheses. - overrides: - parameters: - model: QwQ-32B-Preview-IdeaWhiz-v1-Q4_K_M.gguf - files: - - filename: QwQ-32B-Preview-IdeaWhiz-v1-Q4_K_M.gguf - sha256: 1648e13d9974b10d08ee45f48fd3ebd15cf67745fe20d602f9306fe0253b6a96 - uri: huggingface://bartowski/QwQ-32B-Preview-IdeaWhiz-v1-GGUF/QwQ-32B-Preview-IdeaWhiz-v1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "rombos-qwen2.5-writer-32b" - icon: https://huggingface.co/SubtleOne/Rombos-Qwen2.5-Writer-32b/blob/main/robot-creating-fantasy.jpg - urls: - - https://huggingface.co/SubtleOne/Rombos-Qwen2.5-Writer-32b - - https://huggingface.co/bartowski/Rombos-Qwen2.5-Writer-32b-GGUF - description: | - This model is a merge using Rombos's top-ranked 32b model, based on Qwen 2.5, and merging three creative writing finetunes. The creative content is a serious upgrade over the base it started with, and I enjoyed it in my DnD RPG campaign. - overrides: - parameters: - model: Rombos-Qwen2.5-Writer-32b-Q4_K_M.gguf - files: - - filename: Rombos-Qwen2.5-Writer-32b-Q4_K_M.gguf - sha256: cf0e48c6cb8b6f41834603900642b5395105980297709c85c4216bd44fac956a - uri: huggingface://bartowski/Rombos-Qwen2.5-Writer-32b-GGUF/Rombos-Qwen2.5-Writer-32b-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "sky-t1-32b-preview" - icon: https://github.com/NovaSky-AI/novasky-ai.github.io/raw/main/assets/images/blue-bird-wider.jpeg - urls: - - https://huggingface.co/NovaSky-AI/Sky-T1-32B-Preview - - https://huggingface.co/bartowski/Sky-T1-32B-Preview-GGUF - - https://novasky-ai.github.io/posts/sky-t1/ - description: | - This is a 32B reasoning model trained from Qwen2.5-32B-Instruct with 17K data. The performance is on par with o1-preview model on both math and coding. Please see our blog post for more details. - overrides: - parameters: - model: Sky-T1-32B-Preview-Q4_K_M.gguf - files: - - filename: Sky-T1-32B-Preview-Q4_K_M.gguf - sha256: c735912a582f10e4769461586a02e5b98ef43c2895ec11923b8c4f157e7909e5 - uri: huggingface://bartowski/Sky-T1-32B-Preview-GGUF/Sky-T1-32B-Preview-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwen2.5-72b-rp-ink" - icon: https://cdn-uploads.huggingface.co/production/uploads/634262af8d8089ebaefd410e/M9KSL64gppBVatmTdoQnG.png - urls: - - https://huggingface.co/allura-org/Qwen2.5-72b-RP-Ink - - https://huggingface.co/bartowski/Qwen2.5-72b-RP-Ink-GGUF - description: | - A roleplay-focused LoRA finetune of Qwen 2.5 72b Instruct. Methodology and hyperparams inspired by SorcererLM and Slush. - Yet another model in the Ink series, following in the footsteps of the 32b one and the Nemo one - overrides: - parameters: - model: Qwen2.5-72b-RP-Ink-Q4_K_M.gguf - files: - - filename: Qwen2.5-72b-RP-Ink-Q4_K_M.gguf - sha256: 2c2bf785dc5798403e0ccf6c4f5f9d7d53fcfb0c0b28855c584e09be88f91517 - uri: huggingface://bartowski/Qwen2.5-72b-RP-Ink-GGUF/Qwen2.5-72b-RP-Ink-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "steiner-32b-preview" - urls: - - https://huggingface.co/peakji/steiner-32b-preview - - https://huggingface.co/bartowski/steiner-32b-preview-GGUF - description: | - Steiner is a series of reasoning models trained on synthetic data using reinforcement learning. These models can explore multiple reasoning paths in an autoregressive manner during inference and autonomously verify or backtrack when necessary, enabling a linear traversal of the implicit search tree. - - Steiner is a personal interest project by Yichao 'Peak' Ji, inspired by OpenAI o1. The ultimate goal is to reproduce o1 and validate the inference-time scaling curves. The Steiner-preview model is currently a work-in-progress. The reason for open-sourcing it is that I’ve found automated evaluation methods, primarily based on multiple-choice questions, struggle to fully reflect the progress of reasoning models. In fact, the assumption that "the correct answer is always among the options" doesn’t align well with real-world reasoning scenarios, as it encourages models to perform substitution-based validation rather than open-ended exploration. For this reason, I’ve chosen to open-source these intermediate results and, when time permits, to build in public. This approach allows me to share knowledge while also gathering more evaluations and feedback from real human users. - overrides: - parameters: - model: steiner-32b-preview-Q4_K_M.gguf - files: - - filename: steiner-32b-preview-Q4_K_M.gguf - sha256: 1d7bf6d6dc8db8c81b3e71dc89756cd23417bb0a645b7dcdd1f9457781a88652 - uri: huggingface://bartowski/steiner-32b-preview-GGUF/steiner-32b-preview-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwerus-7b" - urls: - - https://huggingface.co/mlabonne/Qwerus-7B - - https://huggingface.co/bartowski/Qwerus-7B-GGUF - description: | - Qwerus-7B is a merge of the following models using LazyMergekit: - PRIME-RL/Eurus-2-7B-PRIME - Qwen/Qwen2.5-7B-Instruct - overrides: - parameters: - model: Qwerus-7B-Q4_K_M.gguf - files: - - filename: Qwerus-7B-Q4_K_M.gguf - sha256: 3676629e8092a59f523393e6eb5072727f5213a9e03b7b81141f05a33743e20c - uri: huggingface://bartowski/Qwerus-7B-GGUF/Qwerus-7B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "lb-reranker-0.5b-v1.0" - urls: - - https://huggingface.co/lightblue/lb-reranker-0.5B-v1.0 - - https://huggingface.co/bartowski/lb-reranker-0.5B-v1.0-GGUF - description: | - The LB Reranker has been trained to determine the relatedness of a given query to a piece of text, therefore allowing it to be used as a ranker or reranker in various retrieval-based tasks. - - This model is fine-tuned from a Qwen/Qwen2.5-0.5B-Instruct model checkpoint and was trained for roughly 5.5 hours using the 8 x L20 instance (ecs.gn8is-8x.32xlarge) on Alibaba Cloud. - - The training data for this model can be found at lightblue/reranker_continuous_filt_max7_train and the code for generating this data as well as running the training of the model can be found on our Github repo. - - Trained on data in over 95 languages, this model is applicable to a broad range of use cases. - - This model has three main benefits over comparable rerankers. - - It has shown slightly higher performance on evaluation benchmarks. - It has been trained on more languages than any previous model. - It is a simple Causal LM model trained to output a string between "1" and "7". - - This last point means that this model can be used natively with many widely available inference packages, including vLLM and LMDeploy. This in turns allows our reranker to benefit from improvements to inference as and when these packages release them. - - Update: We have also found that this model works pretty well as a code snippet reranker too (P@1 of 96%)! See our Colab for more details. - overrides: - parameters: - model: lb-reranker-0.5B-v1.0-Q4_K_M.gguf - files: - - filename: lb-reranker-0.5B-v1.0-Q4_K_M.gguf - sha256: 43568150de5136da15c996bbf4d1a78cc6580515c40f0ef9a8c90b0542228ab3 - uri: huggingface://bartowski/lb-reranker-0.5B-v1.0-GGUF/lb-reranker-0.5B-v1.0-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "uwu-7b-instruct" - urls: - - https://huggingface.co/qingy2024/UwU-7B-Instruct - - https://huggingface.co/bartowski/UwU-7B-Instruct-GGUF - description: | - Small QwQ, full-finetuned on FineQwQ-142K. Unlike my previous models, this one is a general-purpose reasoning machine! - overrides: - parameters: - model: UwU-7B-Instruct-Q4_K_M.gguf - files: - - filename: UwU-7B-Instruct-Q4_K_M.gguf - sha256: 279b2ba20d51bb155c8dd497cf49e0c28407b1822c75de88cfd83d13fd14a59f - uri: huggingface://bartowski/UwU-7B-Instruct-GGUF/UwU-7B-Instruct-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "drt-o1-14b" - urls: - - https://huggingface.co/Krystalan/DRT-o1-14B - - https://huggingface.co/bartowski/DRT-o1-14B-GGUF - description: "This repository contains the resources for our paper \"DRT-o1: Optimized Deep Reasoning Translation via Long Chain-of-Thought\"\nIn this work, we introduce DRT-o1, an attempt to bring the success of long thought reasoning to neural machine translation (MT). To this end,\n\n\U0001F31F We mine English sentences with similes or metaphors from existing literature books, which are suitable for translation via long thought.\n\U0001F31F We propose a designed multi-agent framework with three agents (i.e., a translator, an advisor and an evaluator) to synthesize the MT samples with long thought. There are 22,264 synthesized samples in total.\n\U0001F31F We train DRT-o1-8B, DRT-o1-7B and DRT-o1-14B using Llama-3.1-8B-Instruct, Qwen2.5-7B-Instruct and Qwen2.5-14B-Instruct as backbones.\n\nOur goal is not to achieve competitive performance with OpenAI’s O1 in neural machine translation (MT). Instead, we explore technical routes to bring the success of long thought to MT. To this end, we introduce DRT-o1, a byproduct of our exploration, and we hope it could facilitate the corresponding research in this direction.\n" - overrides: - parameters: - model: DRT-o1-14B-Q4_K_M.gguf - files: - - filename: DRT-o1-14B-Q4_K_M.gguf - sha256: 9619ca984cf4ce8e4f69bcde831de17b2ce05dd89536e3130608877521e3d328 - uri: huggingface://bartowski/DRT-o1-14B-GGUF/DRT-o1-14B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "lamarck-14b-v0.7" - icon: https://huggingface.co/sometimesanotion/Lamarck-14B-v0.7/resolve/main/LamarckShades.webp - urls: - - https://huggingface.co/sometimesanotion/Lamarck-14B-v0.7 - - https://huggingface.co/bartowski/Lamarck-14B-v0.7-GGUF - description: | - Lamarck 14B v0.7: A generalist merge with emphasis on multi-step reasoning, prose, and multi-language ability. The 14B parameter model class has a lot of strong performers, and Lamarck strives to be well-rounded and solid. - overrides: - parameters: - model: Lamarck-14B-v0.7-Q4_K_M.gguf - files: - - filename: Lamarck-14B-v0.7-Q4_K_M.gguf - sha256: ff8eba82b77a4c6b6d556b85629414655d881f8af4601bcf891c6a7b0345b442 - uri: huggingface://bartowski/Lamarck-14B-v0.7-GGUF/Lamarck-14B-v0.7-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "art-v0-3b" - icon: https://blog.agi-0.com/_next/image?url=%2Fabout_img2.jpeg&w=1920&q=75 - urls: - - https://huggingface.co/AGI-0/Art-v0-3B - - https://huggingface.co/bartowski/Art-v0-3B-GGUF - - https://blog.agi-0.com/posts/art-series - description: | - Art v0 3B is our inaugural model in the Art series, fine-tuned from Qwen/Qwen2.5-3B-Instruct using a specialized dataset generated with Gemini 2.0 Flash Thinking. Read more about the Art series - overrides: - parameters: - model: Art-v0-3B-Q4_K_M.gguf - files: - - filename: Art-v0-3B-Q4_K_M.gguf - sha256: 551acd326ce9a743b6e06e094865eb2f06c23c81c812ce221d757bf27ceec9f7 - uri: huggingface://bartowski/Art-v0-3B-GGUF/Art-v0-3B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "chuluun-qwen2.5-72b-v0.08" - icon: https://huggingface.co/DatToad/Chuluun-Qwen2.5-72B-v0.08/resolve/main/Chuluun8-2.png - urls: - - https://huggingface.co/DatToad/Chuluun-Qwen2.5-72B-v0.08 - - https://huggingface.co/bartowski/Chuluun-Qwen2.5-72B-v0.08-GGUF - description: | - This is a merge of pre-trained language models created using mergekit. - I re-ran the original Chuluun formula including the newly released Ink from Allura-Org. I've found the addition gives the model a lot more variability, likely because of aggressive de-slop applied to its dataset. Sometimes this means a word choice will be strange and you'll want to manually edit when needed, but it means you'll see less ministrations sparkling with mischief. - Because of this the best way to approach the model is to run multiple regens and choose the one you like, edit mercilessly, and continue. Like the original Chuluun this variant is very steerable for complex storywriting and RP. It's probably also a little spicier than v0.01 with both Magnum and whatever the heck Fizz threw into the data for Ink. - I've also been hearing praise for a level of character intelligence not seen in other models, including Largestral finetunes and merges. I'm not about to say any model of mine is smarter because it was a dumb idea to use Tess as the base and it somehow worked. - overrides: - parameters: - model: Chuluun-Qwen2.5-72B-v0.08-Q4_K_M.gguf - files: - - filename: Chuluun-Qwen2.5-72B-v0.08-Q4_K_M.gguf - sha256: 0fec82625f74a9a340837de7af287b1d9042e5aeb70cda2621426db99958b0af - uri: huggingface://bartowski/Chuluun-Qwen2.5-72B-v0.08-GGUF/Chuluun-Qwen2.5-72B-v0.08-Q4_K_M.gguf - &smollm url: "github:mudler/LocalAI/gallery/chatml.yaml@master" ## SmolLM name: "smollm-1.7b-instruct" @@ -9092,1855 +6696,6 @@ - filename: smollm2-1.7b-instruct-q4_k_m.gguf sha256: decd2598bc2c8ed08c19adc3c8fdd461ee19ed5708679d1c54ef54a5a30d4f33 uri: huggingface://HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF/smollm2-1.7b-instruct-q4_k_m.gguf -- !!merge <<: *qwen25 - name: "vikhr-qwen-2.5-1.5b-instruct" - urls: - - https://huggingface.co/Vikhrmodels/Vikhr-Qwen-2.5-1.5B-Instruct - - https://huggingface.co/QuantFactory/Vikhr-Qwen-2.5-1.5B-Instruct-GGUF - description: | - Instructive model based on Qwen-2.5-1.5B-Instruct, trained on the Russian-language dataset GrandMaster-PRO-MAX. Designed for high-efficiency text processing in Russian and English, delivering precise responses and fast task execution. - overrides: - parameters: - model: Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf - files: - - filename: Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf - sha256: eaeac314e30b461413bc1cc819cdc0cd6a79265711fd0b8268702960a082c7bd - uri: huggingface://QuantFactory/Vikhr-Qwen-2.5-1.5B-Instruct-GGUF/Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "dumpling-qwen2.5-32b" - icon: https://huggingface.co/nbeerbower/Dumpling-Qwen2.5-32B/resolve/main/dumpling_cover.png?download=true - urls: - - https://huggingface.co/nbeerbower/Dumpling-Qwen2.5-32B - - https://huggingface.co/bartowski/Dumpling-Qwen2.5-32B-GGUF - description: | - nbeerbower/Rombos-EVAGutenberg-TIES-Qwen2.5-32B finetuned on: - nbeerbower/GreatFirewall-DPO - nbeerbower/Schule-DPO - nbeerbower/Purpura-DPO - nbeerbower/Arkhaios-DPO - jondurbin/truthy-dpo-v0.1 - antiven0m/physical-reasoning-dpo - flammenai/Date-DPO-NoAsterisks - flammenai/Prude-Phi3-DPO - Atsunori/HelpSteer2-DPO - jondurbin/gutenberg-dpo-v0.1 - nbeerbower/gutenberg2-dpo - nbeerbower/gutenberg-moderne-dpo. - overrides: - parameters: - model: Dumpling-Qwen2.5-32B-Q4_K_M.gguf - files: - - filename: Dumpling-Qwen2.5-32B-Q4_K_M.gguf - sha256: c5b7d773cc614650ad3956008e30d0607df6106c28e381870a9b950bd4ee1d17 - uri: huggingface://bartowski/Dumpling-Qwen2.5-32B-GGUF/Dumpling-Qwen2.5-32B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "confucius-o1-14b" - urls: - - https://huggingface.co/netease-youdao/Confucius-o1-14B - - https://huggingface.co/bartowski/Confucius-o1-14B-GGUF - description: | - Confucius-o1-14B is a o1-like reasoning model developed by the NetEase Youdao Team, it can be easily deployed on a single GPU without quantization. This model is based on the Qwen2.5-14B-Instruct model and adopts a two-stage learning strategy, enabling the lightweight 14B model to possess thinking abilities similar to those of o1. What sets it apart is that after generating the chain of thought, it can summarize a step-by-step problem-solving process from the chain of thought on its own. This can prevent users from getting bogged down in the complex chain of thought and allows them to easily obtain the correct problem-solving ideas and answers. - overrides: - parameters: - model: Confucius-o1-14B-Q4_K_M.gguf - files: - - filename: Confucius-o1-14B-Q4_K_M.gguf - sha256: 03182920edd8667db7d2a362ca2d25e88f4b615b383b5a55c764f4715fb22dd9 - uri: huggingface://bartowski/Confucius-o1-14B-GGUF/Confucius-o1-14B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "openthinker-7b" - icon: https://huggingface.co/datasets/open-thoughts/open-thoughts-114k/resolve/main/open_thoughts.png - urls: - - https://huggingface.co/open-thoughts/OpenThinker-7B - - https://huggingface.co/bartowski/OpenThinker-7B-GGUF - description: | - This model is a fine-tuned version of Qwen/Qwen2.5-7B-Instruct on the OpenThoughts-114k dataset dataset. - - The dataset is derived by distilling DeepSeek-R1 using the data pipeline available on github. More info about the dataset can be found on the dataset card at OpenThoughts-114k dataset. - - This model improves upon the Bespoke-Stratos-7B model, which used 17k examples (Bespoke-Stratos-17k dataset). The numbers reported in the table below are evaluated with our open-source tool Evalchemy. - overrides: - parameters: - model: OpenThinker-7B-Q4_K_M.gguf - files: - - filename: OpenThinker-7B-Q4_K_M.gguf - sha256: 94dff1a7acd685db5cff7afdb837aab8172e06d65fe6179ba47428e3030acd93 - uri: huggingface://bartowski/OpenThinker-7B-GGUF/OpenThinker-7B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "tinyswallow-1.5b-instruct" - urls: - - https://huggingface.co/SakanaAI/TinySwallow-1.5B-Instruct - - https://huggingface.co/bartowski/TinySwallow-1.5B-Instruct-GGUF - description: | - TinySwallow-1.5B-Instruct is an instruction-tuned version of TinySwallow-1.5B, created through TAID (Temporally Adaptive Interpolated Distillation), our new knowledge distillation method. We used Qwen2.5-32B-Instruct as the teacher model and Qwen2.5-1.5B-Instruct as the student model. The model has been further instruction-tuned to enhance its ability to follow instructions and engage in conversations in Japanese. - overrides: - parameters: - model: TinySwallow-1.5B-Instruct-Q4_K_M.gguf - files: - - filename: TinySwallow-1.5B-Instruct-Q4_K_M.gguf - sha256: 4d409c8873c1650a19c0a7a1c051e342613191a487768fe0d29735b9361079cd - uri: huggingface://bartowski/TinySwallow-1.5B-Instruct-GGUF/TinySwallow-1.5B-Instruct-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "fblgit_miniclaus-qw1.5b-unamgs-grpo" - icon: https://huggingface.co/fblgit/miniclaus-qw1.5B-UNAMGS/resolve/main/miniclaus_qw15-UNAMGS.png - urls: - - https://huggingface.co/fblgit/miniclaus-qw1.5B-UNAMGS-GRPO - - https://huggingface.co/bartowski/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-GGUF - description: | - This version is RL with GRPO on GSM8k for 1400 steps - overrides: - parameters: - model: fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-Q4_K_M.gguf - files: - - filename: fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-Q4_K_M.gguf - sha256: 88ceacc5900062bc2afc352f009233225b0fe10203cbb61b122e8f10244449c8 - uri: huggingface://bartowski/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-GGUF/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "rubenroy_gilgamesh-72b" - icon: https://cdn.ruben-roy.com/AI/Gilgamesh/img/art.png - urls: - - https://huggingface.co/rubenroy/Gilgamesh-72B - - https://huggingface.co/bartowski/rubenroy_Gilgamesh-72B-GGUF - description: | - Gilgamesh 72B was trained on a mixture of specialised datasets designed for factual accuracy, mathematical capabilities and reasoning. The datasets used include: - - GammaCorpus-v2-5m: A large 5 million line general-purpose dataset covering many topics to enhance broad knowledge and conversational abilities. - GammaCorpus-CoT-Math-170k: A dataset focused on Chain-of-Thought (CoT) reasoning in mathematics made to help the model improve step-by-step problem-solving. - GammaCorpus-Fact-QA-450k: A dataset containing factual question-answer pairs for enforcing some important current knowledge. - - These datasets were all built and curated by me, however I thank my other team members at Ovantage Labs for assisting me in the creation and curation of these datasets. - overrides: - parameters: - model: rubenroy_Gilgamesh-72B-Q4_K_M.gguf - files: - - filename: rubenroy_Gilgamesh-72B-Q4_K_M.gguf - sha256: c6842b3bc882082c63243e762234ae697c1727bebed18b5241eb97e019f0cf68 - uri: huggingface://bartowski/rubenroy_Gilgamesh-72B-GGUF/rubenroy_Gilgamesh-72B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "tiger-lab_qwen2.5-32b-instruct-cft" - urls: - - https://huggingface.co/TIGER-Lab/Qwen2.5-32B-Instruct-CFT - - https://huggingface.co/bartowski/TIGER-Lab_Qwen2.5-32B-Instruct-CFT-GGUF - description: | - Qwen2.5-32B-Instruct-CFT is a 32B parameter model fine-tuned using our novel Critique Fine-Tuning (CFT) approach. Built upon the Qwen2.5-32B-Instruct base model, this variant is trained to critique and analyze responses rather than simply imitate them, leading to enhanced reasoning capabilities. - overrides: - parameters: - model: TIGER-Lab_Qwen2.5-32B-Instruct-CFT-Q4_K_M.gguf - files: - - filename: TIGER-Lab_Qwen2.5-32B-Instruct-CFT-Q4_K_M.gguf - sha256: 57e87e246db368f39f31f38e44ba8e9dc838a026f729f5a123aacc2aeb5a9402 - uri: huggingface://bartowski/TIGER-Lab_Qwen2.5-32B-Instruct-CFT-GGUF/TIGER-Lab_Qwen2.5-32B-Instruct-CFT-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "subtleone_qwen2.5-32b-erudite-writer" - icon: https://huggingface.co/SubtleOne/Qwen2.5-32b-Erudite-Writer/resolve/main/robot-creating-fantasy2.jpg - urls: - - https://huggingface.co/SubtleOne/Qwen2.5-32b-Erudite-Writer - - https://huggingface.co/bartowski/SubtleOne_Qwen2.5-32b-Erudite-Writer-GGUF - description: | - This model is a merge using Rombos's top-ranked 32b model, based on Qwen 2.5, and merging three creative writing finetunes. The creative content is a serious upgrade over the base it started with and has a much more literary style than the previous Writer model. I won't call it better or worse, merely a very distinct flavor and style. I quite like it, and enjoin you to try it as well. Enjoy! - overrides: - parameters: - model: SubtleOne_Qwen2.5-32b-Erudite-Writer-Q4_K_M.gguf - files: - - filename: SubtleOne_Qwen2.5-32b-Erudite-Writer-Q4_K_M.gguf - sha256: fb059c88be4d7d579f0776cead4ca44cf7423b834c5502ce67ef41b15cd0973b - uri: huggingface://bartowski/SubtleOne_Qwen2.5-32b-Erudite-Writer-GGUF/SubtleOne_Qwen2.5-32b-Erudite-Writer-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "localai-functioncall-qwen2.5-7b-v0.5" - url: "github:mudler/LocalAI/gallery/qwen-fcall.yaml@master" - icon: https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/Dzbdzn27KEc3K6zNNi070.png - urls: - - https://huggingface.co/mudler/LocalAI-functioncall-qwen2.5-7b-v0.5 - - https://huggingface.co/mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF - description: | - A model tailored to be conversational and execute function calls with LocalAI. This model is based on qwen2.5 (7B). - overrides: - parameters: - model: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf - files: - - filename: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf - sha256: 4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4 - uri: huggingface://mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF/localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf -- !!merge <<: *qwen25 - name: "simplescaling_s1.1-32b" - urls: - - https://huggingface.co/simplescaling/s1.1-32B - - https://huggingface.co/bartowski/simplescaling_s1.1-32B-GGUF - description: | - s1.1 is our sucessor of s1 with better reasoning performance by leveraging reasoning traces from r1 instead of Gemini. This model is a successor of s1-32B with slightly better performance. Thanks to Ryan Marten for helping generate r1 traces for s1K. - overrides: - parameters: - model: simplescaling_s1.1-32B-Q4_K_M.gguf - files: - - filename: simplescaling_s1.1-32B-Q4_K_M.gguf - sha256: 6ce3cbfcca8ab50a6e877e6bdfc6538c54e1d9a7e5cc81a9930d5d056a9db4e8 - uri: huggingface://bartowski/simplescaling_s1.1-32B-GGUF/simplescaling_s1.1-32B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "nvidia_aceinstruct-1.5b" - icon: https://cdn-avatars.huggingface.co/v1/production/uploads/1613114437487-60262a8e0703121c822a80b6.png - urls: - - https://huggingface.co/nvidia/AceInstruct-1.5B - - https://huggingface.co/bartowski/nvidia_AceInstruct-1.5B-GGUF - description: | - We introduce AceInstruct, a family of advanced SFT models for coding, mathematics, and general-purpose tasks. The AceInstruct family, which includes AceInstruct-1.5B, 7B, and 72B, is Improved using Qwen. These models are fine-tuned on Qwen2.5-Base using general SFT datasets. These same datasets are also used in the training of AceMath-Instruct. Different from AceMath-Instruct which is specialized for math questions, AceInstruct is versatile and can be applied to a wide range of domains. Benchmark evaluations across coding, mathematics, and general knowledge tasks demonstrate that AceInstruct delivers performance comparable to Qwen2.5-Instruct. - overrides: - parameters: - model: nvidia_AceInstruct-1.5B-Q4_K_M.gguf - files: - - filename: nvidia_AceInstruct-1.5B-Q4_K_M.gguf - sha256: 103b7fa617d2b3c2d6e168a878b9b5e3710d19d178bf4b890acf0fac2abafadb - uri: huggingface://bartowski/nvidia_AceInstruct-1.5B-GGUF/nvidia_AceInstruct-1.5B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "nvidia_aceinstruct-7b" - icon: https://cdn-avatars.huggingface.co/v1/production/uploads/1613114437487-60262a8e0703121c822a80b6.png - urls: - - https://huggingface.co/nvidia/AceInstruct-7B - - https://huggingface.co/bartowski/nvidia_AceInstruct-7B-GGUF - description: | - We introduce AceInstruct, a family of advanced SFT models for coding, mathematics, and general-purpose tasks. The AceInstruct family, which includes AceInstruct-1.5B, 7B, and 72B, is Improved using Qwen. These models are fine-tuned on Qwen2.5-Base using general SFT datasets. These same datasets are also used in the training of AceMath-Instruct. Different from AceMath-Instruct which is specialized for math questions, AceInstruct is versatile and can be applied to a wide range of domains. Benchmark evaluations across coding, mathematics, and general knowledge tasks demonstrate that AceInstruct delivers performance comparable to Qwen2.5-Instruct. - overrides: - parameters: - model: nvidia_AceInstruct-7B-Q4_K_M.gguf - files: - - filename: nvidia_AceInstruct-7B-Q4_K_M.gguf - sha256: 94e262e0d82d39fa36c4278b2a4b4fa7e93bfaa7cca33283fb9ee006bac02a8a - uri: huggingface://bartowski/nvidia_AceInstruct-7B-GGUF/nvidia_AceInstruct-7B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "nvidia_aceinstruct-72b" - icon: https://cdn-avatars.huggingface.co/v1/production/uploads/1613114437487-60262a8e0703121c822a80b6.png - urls: - - https://huggingface.co/nvidia/AceInstruct-72B - - https://huggingface.co/bartowski/nvidia_AceInstruct-72B-GGUF - description: | - We introduce AceInstruct, a family of advanced SFT models for coding, mathematics, and general-purpose tasks. The AceInstruct family, which includes AceInstruct-1.5B, 7B, and 72B, is Improved using Qwen. These models are fine-tuned on Qwen2.5-Base using general SFT datasets. These same datasets are also used in the training of AceMath-Instruct. Different from AceMath-Instruct which is specialized for math questions, AceInstruct is versatile and can be applied to a wide range of domains. Benchmark evaluations across coding, mathematics, and general knowledge tasks demonstrate that AceInstruct delivers performance comparable to Qwen2.5-Instruct. - overrides: - parameters: - model: nvidia_AceInstruct-72B-Q4_K_M.gguf - files: - - filename: nvidia_AceInstruct-72B-Q4_K_M.gguf - sha256: c8452b2d6c33693d5fd1b5f3aa476451fbd4e78c9621b9baf39ad1a3f2b91503 - uri: huggingface://bartowski/nvidia_AceInstruct-72B-GGUF/nvidia_AceInstruct-72B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "open-thoughts_openthinker-32b" - icon: https://huggingface.co/datasets/open-thoughts/open-thoughts-114k/resolve/main/open_thoughts.png - urls: - - https://huggingface.co/open-thoughts/OpenThinker-32B - - https://huggingface.co/bartowski/open-thoughts_OpenThinker-32B-GGUF - description: | - This model is a fine-tuned version of Qwen/Qwen2.5-32B-Instruct on the OpenThoughts-114k dataset. - - The dataset is derived by distilling DeepSeek-R1 using the data pipeline available on github. More info about the dataset can be found on the dataset card at OpenThoughts-114k dataset. - - The numbers reported in the table below are evaluated with our open-source tool Evalchemy. - overrides: - parameters: - model: open-thoughts_OpenThinker-32B-Q4_K_M.gguf - files: - - filename: open-thoughts_OpenThinker-32B-Q4_K_M.gguf - sha256: 6795de6e7025e4a77042232908fe7be304b6b6b465c5feb71ba6861f37038aaf - uri: huggingface://bartowski/open-thoughts_OpenThinker-32B-GGUF/open-thoughts_OpenThinker-32B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "rombo-org_rombo-llm-v3.0-qwen-32b" - urls: - - https://huggingface.co/Rombo-Org/Rombo-LLM-V3.0-Qwen-32b - - https://huggingface.co/bartowski/Rombo-Org_Rombo-LLM-V3.0-Qwen-32b-GGUF - description: | - Rombo-LLM-V3.0-Qwen-32b is a Continued Finetune model on top of the previous V2.5 version using the "NovaSky-AI/Sky-T1_data_17k" dataset. The resulting model was then merged backed into the base model for higher performance as written in the continuous finetuning technique bellow. This model is a good general purpose model, however it excells at coding and math. - overrides: - parameters: - model: Rombo-Org_Rombo-LLM-V3.0-Qwen-32b-Q4_K_M.gguf - files: - - filename: Rombo-Org_Rombo-LLM-V3.0-Qwen-32b-Q4_K_M.gguf - sha256: 1d214d46721aba2bb2a5778c108c4707b5dd7dbc5751158734c67af271532fb5 - uri: huggingface://bartowski/Rombo-Org_Rombo-LLM-V3.0-Qwen-32b-GGUF/Rombo-Org_Rombo-LLM-V3.0-Qwen-32b-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "ozone-ai_0x-lite" - urls: - - https://huggingface.co/ozone-ai/0x-lite - - https://huggingface.co/bartowski/ozone-ai_0x-lite-GGUF - description: | - 0x Lite is a state-of-the-art language model developed by Ozone AI, designed to deliver ultra-high-quality text generation capabilities while maintaining a compact and efficient architecture. Built on the latest advancements in natural language processing, 0x Lite is optimized for both speed and accuracy, making it a strong contender in the space of language models. It is particularly well-suited for applications where resource constraints are a concern, offering a lightweight alternative to larger models like GPT while still delivering comparable performance. - overrides: - parameters: - model: ozone-ai_0x-lite-Q4_K_M.gguf - files: - - filename: ozone-ai_0x-lite-Q4_K_M.gguf - sha256: 7f163e72ead7522bd6774555a932e0a11f212d17cdc9442e2cfd1b017009f832 - uri: huggingface://bartowski/ozone-ai_0x-lite-GGUF/ozone-ai_0x-lite-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "nbeerbower_dumpling-qwen2.5-14b" - icon: https://huggingface.co/nbeerbower/Dumpling-Qwen2.5-32B/resolve/main/dumpling_cover.png?download=true - urls: - - https://huggingface.co/nbeerbower/Dumpling-Qwen2.5-14B - - https://huggingface.co/bartowski/nbeerbower_Dumpling-Qwen2.5-14B-GGUF - description: | - nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B finetuned on: - - nbeerbower/GreatFirewall-DPO - nbeerbower/Schule-DPO - nbeerbower/Purpura-DPO - nbeerbower/Arkhaios-DPO - jondurbin/truthy-dpo-v0.1 - antiven0m/physical-reasoning-dpo - flammenai/Date-DPO-NoAsterisks - flammenai/Prude-Phi3-DPO - Atsunori/HelpSteer2-DPO - jondurbin/gutenberg-dpo-v0.1 - nbeerbower/gutenberg2-dpo - nbeerbower/gutenberg-moderne-dpo. - overrides: - parameters: - model: nbeerbower_Dumpling-Qwen2.5-14B-Q4_K_M.gguf - files: - - filename: nbeerbower_Dumpling-Qwen2.5-14B-Q4_K_M.gguf - sha256: 2d38348414b2719971a08a604313ed98b44b586490633d6e237dd096ae5bf31d - uri: huggingface://bartowski/nbeerbower_Dumpling-Qwen2.5-14B-GGUF/nbeerbower_Dumpling-Qwen2.5-14B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "nbeerbower_dumpling-qwen2.5-32b-v2" - icon: https://huggingface.co/nbeerbower/Dumpling-Qwen2.5-32B/resolve/main/dumpling_cover.png?download=true - urls: - - https://huggingface.co/nbeerbower/Dumpling-Qwen2.5-32B-v2 - - https://huggingface.co/bartowski/nbeerbower_Dumpling-Qwen2.5-32B-v2-GGUF - description: | - nbeerbower/Rombos-EVAGutenberg-TIES-Qwen2.5-32B finetuned on: - - nbeerbower/GreatFirewall-DPO - nbeerbower/Schule-DPO - nbeerbower/Purpura-DPO - nbeerbower/Arkhaios-DPO - jondurbin/truthy-dpo-v0.1 - antiven0m/physical-reasoning-dpo - flammenai/Date-DPO-NoAsterisks - flammenai/Prude-Phi3-DPO - Atsunori/HelpSteer2-DPO - jondurbin/gutenberg-dpo-v0.1 - nbeerbower/gutenberg2-dpo - nbeerbower/gutenberg-moderne-dpo. - overrides: - parameters: - model: nbeerbower_Dumpling-Qwen2.5-32B-v2-Q4_K_M.gguf - files: - - filename: nbeerbower_Dumpling-Qwen2.5-32B-v2-Q4_K_M.gguf - sha256: 02a5320d62e13b31ac6d04ccdaba7b72a524d6cc72a7082b94d8cac0a183ecb4 - uri: huggingface://bartowski/nbeerbower_Dumpling-Qwen2.5-32B-v2-GGUF/nbeerbower_Dumpling-Qwen2.5-32B-v2-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "nbeerbower_dumpling-qwen2.5-72b" - icon: https://huggingface.co/nbeerbower/Dumpling-Qwen2.5-32B/resolve/main/dumpling_cover.png?download=true - urls: - - https://huggingface.co/nbeerbower/Dumpling-Qwen2.5-72B - - https://huggingface.co/bartowski/nbeerbower_Dumpling-Qwen2.5-72B-GGUF - description: | - nbeerbower/EVA-abliterated-TIES-Qwen2.5-72B finetuned on: - nbeerbower/GreatFirewall-DPO - nbeerbower/Schule-DPO - nbeerbower/Purpura-DPO - nbeerbower/Arkhaios-DPO - jondurbin/truthy-dpo-v0.1 - antiven0m/physical-reasoning-dpo - flammenai/Date-DPO-NoAsterisks - flammenai/Prude-Phi3-DPO - Atsunori/HelpSteer2-DPO - jondurbin/gutenberg-dpo-v0.1 - nbeerbower/gutenberg2-dpo - nbeerbower/gutenberg-moderne-dpo. - overrides: - parameters: - model: nbeerbower_Dumpling-Qwen2.5-72B-Q4_K_M.gguf - files: - - filename: nbeerbower_Dumpling-Qwen2.5-72B-Q4_K_M.gguf - sha256: 384de5ba5e60255846cd38e2bfad0374b059fb627ba8abb02273186f28684385 - uri: huggingface://bartowski/nbeerbower_Dumpling-Qwen2.5-72B-GGUF/nbeerbower_Dumpling-Qwen2.5-72B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "open-r1_openr1-qwen-7b" - urls: - - https://huggingface.co/open-r1/OpenR1-Qwen-7B - - https://huggingface.co/bartowski/open-r1_OpenR1-Qwen-7B-GGUF - description: | - This is a finetune of Qwen2.5-Math-Instruct on OpenR1-220k-Math (default split). We train the model on the default split of OpenR1-220k-Math for 3 epochs. We use learning rate of 5e-5 and extend the context length from 4k to 32k, by increasing RoPE frequency to 300k. The training follows a linear learning rate schedule with a 10% warmup phase. - overrides: - parameters: - model: open-r1_OpenR1-Qwen-7B-Q4_K_M.gguf - files: - - filename: open-r1_OpenR1-Qwen-7B-Q4_K_M.gguf - sha256: d3bf99666cd19b637948ec9943044b591d3b906d0ee4f3ef1b3eb693ac8f66a6 - uri: huggingface://bartowski/open-r1_OpenR1-Qwen-7B-GGUF/open-r1_OpenR1-Qwen-7B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "internlm_oreal-32b" - urls: - - https://huggingface.co/internlm/OREAL-32B - - https://huggingface.co/bartowski/internlm_OREAL-32B-GGUF - description: | - We introduce OREAL-7B and OREAL-32B, a mathematical reasoning model series trained using Outcome REwArd-based reinforcement Learning, a novel RL framework designed for tasks where only binary outcome rewards are available. - - With OREAL, a 7B model achieves 94.0 pass@1 accuracy on MATH-500, matching the performance of previous 32B models. OREAL-32B further surpasses previous distillation-trained 32B models, reaching 95.0 pass@1 accuracy on MATH-500. - overrides: - parameters: - model: internlm_OREAL-32B-Q4_K_M.gguf - files: - - filename: internlm_OREAL-32B-Q4_K_M.gguf - sha256: 5af1b3f66e3a1f95931a54500d03368c0cc7ca42cc67370338b29c18362e4a94 - uri: huggingface://bartowski/internlm_OREAL-32B-GGUF/internlm_OREAL-32B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "internlm_oreal-7b" - urls: - - https://huggingface.co/internlm/OREAL-7B - - https://huggingface.co/bartowski/internlm_OREAL-7B-GGUF - description: | - We introduce OREAL-7B and OREAL-32B, a mathematical reasoning model series trained using Outcome REwArd-based reinforcement Learning, a novel RL framework designed for tasks where only binary outcome rewards are available. - - With OREAL, a 7B model achieves 94.0 pass@1 accuracy on MATH-500, matching the performance of previous 32B models. OREAL-32B further surpasses previous distillation-trained 32B models, reaching 95.0 pass@1 accuracy on MATH-500. - overrides: - parameters: - model: internlm_OREAL-7B-Q4_K_M.gguf - files: - - filename: internlm_OREAL-7B-Q4_K_M.gguf - sha256: 0f7ba453e91872f06a666fda692fbcec13fdd343f74c7dfa7219df31c038ca1c - uri: huggingface://bartowski/internlm_OREAL-7B-GGUF/internlm_OREAL-7B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "smirki_uigen-t1.1-qwen-14b" - icon: https://cdn-uploads.huggingface.co/production/uploads/64d1129297ca59bcf7458d07/VSplF7AM1PJPzeR9FlDhE.png - urls: - - https://huggingface.co/smirki/UIGEN-T1.1-Qwen-14B - - https://huggingface.co/bartowski/smirki_UIGEN-T1.1-Qwen-14B-GGUF - description: | - UIGEN-T1.1 is a 14-billion parameter transformer model fine-tuned on Qwen2.5-Coder-14B-Instruct. It is designed for reasoning-based UI generation, leveraging a complex chain-of-thought approach to produce robust HTML and CSS-based UI components. Currently, it is limited to basic applications such as dashboards, landing pages, and sign-up forms. - overrides: - parameters: - model: smirki_UIGEN-T1.1-Qwen-14B-Q4_K_M.gguf - files: - - filename: smirki_UIGEN-T1.1-Qwen-14B-Q4_K_M.gguf - sha256: 7ad2326f06a304891a1d01d4de9feda42cb4395e4cbdc4d60dc2a26d15e5ea91 - uri: huggingface://bartowski/smirki_UIGEN-T1.1-Qwen-14B-GGUF/smirki_UIGEN-T1.1-Qwen-14B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "smirki_uigen-t1.1-qwen-7b" - icon: https://cdn-uploads.huggingface.co/production/uploads/64d1129297ca59bcf7458d07/VSplF7AM1PJPzeR9FlDhE.png - urls: - - https://huggingface.co/smirki/UIGEN-T1.1-Qwen-7B - - https://huggingface.co/bartowski/smirki_UIGEN-T1.1-Qwen-7B-GGUF - description: | - UIGEN-T1.1 is a 7-billion parameter transformer model fine-tuned on Qwen2.5-Coder-7B-Instruct. It is designed for reasoning-based UI generation, leveraging a complex chain-of-thought approach to produce robust HTML and CSS-based UI components. Currently, it is limited to basic applications such as dashboards, landing pages, and sign-up forms. - overrides: - parameters: - model: smirki_UIGEN-T1.1-Qwen-7B-Q4_K_M.gguf - files: - - filename: smirki_UIGEN-T1.1-Qwen-7B-Q4_K_M.gguf - sha256: e5d78dea15d4281455d64aef1c0f18da5674c6f15285a2991e63208d264b61ae - uri: huggingface://bartowski/smirki_UIGEN-T1.1-Qwen-7B-GGUF/smirki_UIGEN-T1.1-Qwen-7B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "rombo-org_rombo-llm-v3.0-qwen-72b" - icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/wp9qOi2K2WGzkey0I3SgH.jpeg - urls: - - https://huggingface.co/Rombo-Org/Rombo-LLM-V3.0-Qwen-72b - - https://huggingface.co/bartowski/Rombo-Org_Rombo-LLM-V3.0-Qwen-72b-GGUF - description: | - Rombos-LLM-V3.0-Qwen-72b is a continues finetuned version of the Rombo-LLM-V2.5-Qwen-72b on a Reasoning and Non-reasoning dataset. The models performs exceptionally well when paired with the system prompt that it was trained on during reasoning training. Nearing SOTA levels even quantized to 4-bit. - overrides: - parameters: - model: Rombo-Org_Rombo-LLM-V3.0-Qwen-72b-Q4_K_M.gguf - files: - - filename: Rombo-Org_Rombo-LLM-V3.0-Qwen-72b-Q4_K_M.gguf - sha256: 3f159ffb494338d03502096c52db5e062a81b09acfd3cc4f6352ca61d6f489df - uri: huggingface://bartowski/Rombo-Org_Rombo-LLM-V3.0-Qwen-72b-GGUF/Rombo-Org_Rombo-LLM-V3.0-Qwen-72b-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "ozone-ai_reverb-7b" - urls: - - https://huggingface.co/ozone-research/Reverb-7b - - https://huggingface.co/bartowski/ozone-ai_Reverb-7b-GGUF - description: | - Reverb-7b is a 7 billion parameter language model developed by Ozone AI. It is a causal language model designed for text generation and various downstream tasks. This is the third model release by Ozone AI. - overrides: - parameters: - model: ozone-ai_Reverb-7b-Q4_K_M.gguf - files: - - filename: ozone-ai_Reverb-7b-Q4_K_M.gguf - sha256: f769c6e1a85d3426263f585f640a90c10e7e26b89345a700a4cabf62eb0583d4 - uri: huggingface://bartowski/ozone-ai_Reverb-7b-GGUF/ozone-ai_Reverb-7b-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "ozone-research_chirp-01" - urls: - - https://huggingface.co/ozone-research/Chirp-01 - - https://huggingface.co/bartowski/ozone-research_Chirp-01-GGUF - description: | - Chirp-3b is a high-performing 3B parameter language model crafted by the Ozone Research team. Fine-tuned from a robust base model (Qwen2.5 3B Instruct), it was trained on 50 million tokens of distilled data from GPT-4o. This compact yet powerful model delivers exceptional results, outperforming expectations on benchmarks like MMLU Pro and IFEval. - - Chirp-3b is an open-source effort to push the limits of what small-scale LLMs can achieve, making it a valuable tool for researchers and enthusiasts alike. - overrides: - parameters: - model: ozone-research_Chirp-01-Q4_K_M.gguf - files: - - filename: ozone-research_Chirp-01-Q4_K_M.gguf - sha256: 4ca7328f9b649755077c9064de0b9748d9f12a2e4ce8f493c94e1b19a8b5a035 - uri: huggingface://bartowski/ozone-research_Chirp-01-GGUF/ozone-research_Chirp-01-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "ozone-research_0x-lite" - urls: - - https://huggingface.co/ozone-research/0x-lite - - https://huggingface.co/bartowski/ozone-research_0x-lite-GGUF - description: | - 0x Lite is a state-of-the-art language model developed by Ozone AI, designed to deliver ultra-high-quality text generation capabilities while maintaining a compact and efficient architecture. Built on the latest advancements in natural language processing, 0x Lite is optimized for both speed and accuracy, making it a strong contender in the space of language models. It is particularly well-suited for applications where resource constraints are a concern, offering a lightweight alternative to larger models like GPT while still delivering comparable performance. - overrides: - parameters: - model: ozone-research_0x-lite-Q4_K_M.gguf - files: - - filename: ozone-research_0x-lite-Q4_K_M.gguf - sha256: c11f3bd1c607ca329f48d1b6a3e540ac4c5ea8d57097550639709d9202b7f405 - uri: huggingface://bartowski/ozone-research_0x-lite-GGUF/ozone-research_0x-lite-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "allenai_olmocr-7b-0225-preview" - icon: https://huggingface.co/datasets/allenai/blog-images/resolve/main/olmocr/olmocr.png - urls: - - https://huggingface.co/allenai/olmOCR-7B-0225-preview - - https://huggingface.co/bartowski/allenai_olmOCR-7B-0225-preview-GGUF - - https://olmocr.allenai.org/papers/olmocr.pdf - description: | - This is a preview release of the olmOCR model that's fine tuned from Qwen2-VL-7B-Instruct using the olmOCR-mix-0225 dataset. - overrides: - parameters: - model: allenai_olmOCR-7B-0225-preview-Q4_K_M.gguf - files: - - filename: allenai_olmOCR-7B-0225-preview-Q4_K_M.gguf - sha256: 0a5603f95ba59828061d315b7869e021ea1b86e2dececaba8a1f9bcc3f81e84a - uri: huggingface://bartowski/allenai_olmOCR-7B-0225-preview-GGUF/allenai_olmOCR-7B-0225-preview-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "boomer_qwen_72b-i1" - icon: https://huggingface.co/SicariusSicariiStuff/Boomer_Qwen_72B/resolve/main/Images/03.png - urls: - - https://huggingface.co/SicariusSicariiStuff/Boomer_Qwen_72B - - https://huggingface.co/mradermacher/Boomer_Qwen_72B-i1-GGUF - description: | - An absolute unit derived from Qwen-72B, but turbo-charged with pure unfiltered boomer sigma grindset energy. This model has internalized decades of "back in my day" wisdom and distilled it into the most powerful financial NLP system ever created. - - Core features: - - Programmed to automatically respond "Just buy the dip" to any market analysis - Enhanced pattern recognition for spotting "kids these days" scenarios - Built-in mortgage calculator that always concludes "rent is throwing money away" - Advanced NLP pipeline for transforming any input into "when I was your age" narratives - Hardwired belief in "number go up" as the fundamental law of economics - - Training methodology: Collected prime boomer wisdom from countless Facebook rants, Thanksgiving dinner lectures, and unsolicited advice sessions. Fed it through Qwen's architecture until it achieved enlightenment and started spontaneously generating complaints about avocado toast. - - Performance metrics: Achieves SOTA results on: - - Real estate evangelism - "Pull yourself up by your bootstraps" pep talks - Gold standard nostalgia generation - Market timing (but only in retrospect) - - Basically took the raw computational power of Qwen-72B and gave it a healthy dose of "they don't make 'em like they used to" energy. The result? A model that knows the secret to success is just working hard and investing in the S&P 500. - - Warning: May spontaneously generate advice about starting in the mail room and working your way up to CEO. - overrides: - parameters: - model: Boomer_Qwen_72B.i1-Q4_K_M.gguf - files: - - filename: Boomer_Qwen_72B.i1-Q4_K_M.gguf - sha256: 5cee89356d512874ca45f516c322d99f2b3534db5a3acd43a96c031cced3bc75 - uri: huggingface://mradermacher/Boomer_Qwen_72B-i1-GGUF/Boomer_Qwen_72B.i1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "azura-qwen2.5-32b-i1" - icon: https://huggingface.co/nbeerbower/Azura-Qwen2.5-32B/resolve/main/cover.png?download=true - urls: - - https://huggingface.co/nbeerbower/Azura-Qwen2.5-32B - - https://huggingface.co/mradermacher/Azura-Qwen2.5-32B-i1-GGUF - description: | - This model was merged using the Model Stock merge method using nbeerbower/Dumpling-Qwen2.5-32B as a base. - The following models were included in the merge: - - rinna/qwen2.5-bakeneko-32b-instruct - EVA-UNIT-01/EVA-Qwen2.5-32B-v0.2 - zetasepic/Qwen2.5-32B-Instruct-abliterated-v2 - nbeerbower/Dumpling-Qwen2.5-32B-v2 - overrides: - parameters: - model: Azura-Qwen2.5-32B.i1-Q4_K_M.gguf - files: - - filename: Azura-Qwen2.5-32B.i1-Q4_K_M.gguf - sha256: a3ec93f192dc4ce062fd176d6615d4da34af81d909b89c372678b779a46b8d3b - uri: huggingface://mradermacher/Azura-Qwen2.5-32B-i1-GGUF/Azura-Qwen2.5-32B.i1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwen_qwq-32b" - urls: - - https://huggingface.co/Qwen/QwQ-32B - - https://huggingface.co/bartowski/Qwen_QwQ-32B-GGUF - description: | - QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini. - overrides: - parameters: - model: Qwen_QwQ-32B-Q4_K_M.gguf - files: - - filename: Qwen_QwQ-32B-Q4_K_M.gguf - sha256: 87cc1894a68008856cde6ff24bfb9b99488a0d18c2e0a2b1ddeabd43cd0498e0 - uri: huggingface://bartowski/Qwen_QwQ-32B-GGUF/Qwen_QwQ-32B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "rombo-org_rombo-llm-v3.1-qwq-32b" - icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/hXnQV6WtMKrmIQPdjECSX.jpeg - urls: - - https://huggingface.co/Rombo-Org/Rombo-LLM-V3.1-QWQ-32b - - https://huggingface.co/bartowski/Rombo-Org_Rombo-LLM-V3.1-QWQ-32b-GGUF - description: | - Rombo-LLM-V3.1-QWQ-32b is a Continued Finetune model (Merge only) of (Qwen/QwQ-32B) and its base model (Qwen/Qwen2.5-32B). This merge is done to decrease catastrophic forgetting during finetuning, and increase overall performance of the model. The tokenizers are taken from the QwQ-32B for thinking capabilities. - overrides: - parameters: - model: Rombo-Org_Rombo-LLM-V3.1-QWQ-32b-Q4_K_M.gguf - files: - - filename: Rombo-Org_Rombo-LLM-V3.1-QWQ-32b-Q4_K_M.gguf - sha256: ee0b5027c686f3c37938f33b62788e27211852268f9e5c32e00058f0cf1688c7 - uri: huggingface://bartowski/Rombo-Org_Rombo-LLM-V3.1-QWQ-32b-GGUF/Rombo-Org_Rombo-LLM-V3.1-QWQ-32b-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "huihui-ai_qwq-32b-abliterated" - urls: - - https://huggingface.co/huihui-ai/QwQ-32B-abliterated - - https://huggingface.co/bartowski/huihui-ai_QwQ-32B-abliterated-GGUF - description: | - This is an uncensored version of Qwen/QwQ-32B created with abliteration (see remove-refusals-with-transformers to know more about it). - This is a crude, proof-of-concept implementation to remove refusals from an LLM model without using TransformerLens. - overrides: - parameters: - model: huihui-ai_QwQ-32B-abliterated-Q4_K_M.gguf - files: - - filename: huihui-ai_QwQ-32B-abliterated-Q4_K_M.gguf - sha256: 27d3c3e116015257985fa27b87e3f3aafbeb4762152d60474e883547d436025e - uri: huggingface://bartowski/huihui-ai_QwQ-32B-abliterated-GGUF/huihui-ai_QwQ-32B-abliterated-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "tower-babel_babel-9b-chat" - icon: https://cdn-avatars.huggingface.co/v1/production/uploads/64802face9ff472e30dc1ceb/9mRO092PjPmzd8qSr7F5V.png - urls: - - https://huggingface.co/Tower-Babel/Babel-9B-Chat - - https://huggingface.co/bartowski/Tower-Babel_Babel-9B-Chat-GGUF - description: | - We introduce Babel, a multilingual LLM that covers the top 25 languages by number of speakers, including English, Chinese, Hindi, Spanish, Arabic, French, Bengali, Portuguese, Russian, Urdu, Indonesian, German, Japanese, Swahili, Filipino, Tamil, Vietnamese, Turkish, Italian, Javanese, Korean, Hausa, Persian, Thai, and Burmese. These 25 languages support over 90% of the global population, and include many languages neglected by other open multilingual LLMs. Unlike traditional continued pretraining approaches, Babel expands its parameter count through a layer extension technique that elevates Babel's performance ceiling. - overrides: - parameters: - model: Tower-Babel_Babel-9B-Chat-Q4_K_M.gguf - files: - - filename: Tower-Babel_Babel-9B-Chat-Q4_K_M.gguf - sha256: cf024c81b9c5e31dd9b4fe89f7bed01be8a6a704722780fe8d240b1ecb7942eb - uri: huggingface://bartowski/Tower-Babel_Babel-9B-Chat-GGUF/Tower-Babel_Babel-9B-Chat-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "openpipe_deductive-reasoning-qwen-14b" - urls: - - https://huggingface.co/OpenPipe/Deductive-Reasoning-Qwen-14B - - https://huggingface.co/bartowski/OpenPipe_Deductive-Reasoning-Qwen-14B-GGUF - description: | - Deductive Reasoning Qwen 14B is a reinforcement fine-tune of Qwen 2.5 14B Instruct to solve challenging deduction problems from the Temporal Clue dataset, trained by OpenPipe! - overrides: - parameters: - model: OpenPipe_Deductive-Reasoning-Qwen-14B-Q4_K_M.gguf - files: - - filename: OpenPipe_Deductive-Reasoning-Qwen-14B-Q4_K_M.gguf - sha256: 23474b114e1e14f5f63829369e9af14d3f8e6b437b7974e1d3ac0c842b4cc3f5 - uri: huggingface://bartowski/OpenPipe_Deductive-Reasoning-Qwen-14B-GGUF/OpenPipe_Deductive-Reasoning-Qwen-14B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "openpipe_deductive-reasoning-qwen-32b" - urls: - - https://huggingface.co/OpenPipe/Deductive-Reasoning-Qwen-32B - - https://huggingface.co/bartowski/OpenPipe_Deductive-Reasoning-Qwen-32B-GGUF - description: | - Deductive Reasoning Qwen 32B is a reinforcement fine-tune of Qwen 2.5 32B Instruct to solve challenging deduction problems from the Temporal Clue dataset, trained by OpenPipe! - overrides: - parameters: - model: OpenPipe_Deductive-Reasoning-Qwen-32B-Q4_K_M.gguf - files: - - filename: OpenPipe_Deductive-Reasoning-Qwen-32B-Q4_K_M.gguf - sha256: 53a8314e572c60c867da897721d366f183dc6d2193c83a41ff8ad46a2a0692c8 - uri: huggingface://bartowski/OpenPipe_Deductive-Reasoning-Qwen-32B-GGUF/OpenPipe_Deductive-Reasoning-Qwen-32B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "open-r1_olympiccoder-32b" - urls: - - https://huggingface.co/open-r1/OlympicCoder-32B - - https://huggingface.co/bartowski/open-r1_OlympicCoder-32B-GGUF - description: | - OlympicCoder-32B is a code mode that achieves very strong performance on competitive coding benchmarks such as LiveCodeBench andthe 2024 International Olympiad in Informatics. - overrides: - parameters: - model: open-r1_OlympicCoder-32B-Q4_K_M.gguf - files: - - filename: open-r1_OlympicCoder-32B-Q4_K_M.gguf - sha256: bb82e4aa2219f655d37c7efad8985582cf3c32de0e0299ecd2f304d32ac39f12 - uri: huggingface://bartowski/open-r1_OlympicCoder-32B-GGUF/open-r1_OlympicCoder-32B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "open-r1_olympiccoder-7b" - urls: - - https://huggingface.co/open-r1/OlympicCoder-7B - - https://huggingface.co/bartowski/open-r1_OlympicCoder-7B-GGUF - description: | - OlympicCoder-7B is a code model that achieves strong performance on competitive coding benchmarks such as LiveCodeBench and the 2024 International Olympiad in Informatics. - overrides: - parameters: - model: open-r1_OlympicCoder-7B-Q4_K_M.gguf - files: - - filename: open-r1_OlympicCoder-7B-Q4_K_M.gguf - sha256: 21e18e7fd1fb244455a67d4dee538a4d86dc96d507c39a4ad16ef335fb9e6e2f - uri: huggingface://bartowski/open-r1_OlympicCoder-7B-GGUF/open-r1_OlympicCoder-7B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "trashpanda-org_qwq-32b-snowdrop-v0" - icon: https://cdn-uploads.huggingface.co/production/uploads/675a77cf99ca23af9daacccc/Tdn0PJBFnG3J6UcjO9G94.png - urls: - - https://huggingface.co/trashpanda-org/QwQ-32B-Snowdrop-v0 - - https://huggingface.co/bartowski/trashpanda-org_QwQ-32B-Snowdrop-v0-GGUF - description: | - R1 at home for RP, literally. Able to handle my cards with gimmicks and subtle tricks in them. With a good reasoning starter+prompt, I'm getting consistently-structured responses that have a good amount of variation across them still while rerolling. Char/scenario portrayal is good despite my focus on writing style, lorebooks are properly referenced at times. Slop doesn't seem to be too much of an issue with thinking enabled. Some user impersonation is rarely observed. Prose is refreshing if you take advantage of what I did (writing style fixation). I know I said Marigold would be my daily driver, but this one is that now, it's that good. - overrides: - parameters: - model: trashpanda-org_QwQ-32B-Snowdrop-v0-Q4_K_M.gguf - files: - - filename: trashpanda-org_QwQ-32B-Snowdrop-v0-Q4_K_M.gguf - sha256: 584d2f14f2f08ce499665c332bef30245b605ed2278e9075766237835f564c5f - uri: huggingface://bartowski/trashpanda-org_QwQ-32B-Snowdrop-v0-GGUF/trashpanda-org_QwQ-32B-Snowdrop-v0-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "prithivmlmods_viper-coder-32b-elite13" - icon: https://cdn-uploads.huggingface.co/production/uploads/65bb837dbfb878f46c77de4c/n5x-NuenasIjm3HljPUYY.png - urls: - - https://huggingface.co/prithivMLmods/Viper-Coder-32B-Elite13 - - https://huggingface.co/bartowski/prithivMLmods_Viper-Coder-32B-Elite13-GGUF - description: | - Viper-Coder-32B-Elite13 is based on the qwq-32B modality architecture, designed to be the best for coding and reasoning tasks. It has been fine-tuned on a synthetic dataset leveraging the latest coding logits and CoT datasets, further optimizing its chain-of-thought (CoT) reasoning and logical problem-solving abilities. The model demonstrates significant improvements in context understanding, structured data processing, and long-context comprehension, making it ideal for complex coding tasks, instruction-following, and technical text generation. - overrides: - parameters: - model: prithivMLmods_Viper-Coder-32B-Elite13-Q4_K_M.gguf - files: - - filename: prithivMLmods_Viper-Coder-32B-Elite13-Q4_K_M.gguf - sha256: 57a41ed2fc0d62847cf85ff20cc71be9c5978d22a56e39f2390c6563e5b0c931 - uri: huggingface://bartowski/prithivMLmods_Viper-Coder-32B-Elite13-GGUF/prithivMLmods_Viper-Coder-32B-Elite13-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "rootxhacker_apollo-v3-32b" - urls: - - https://huggingface.co/rootxhacker/Apollo-v3-32B - - https://huggingface.co/bartowski/rootxhacker_Apollo-v3-32B-GGUF - description: | - This is an experimental hybrid reasoning model built on Qwen2.5-32B-Instruct - overrides: - parameters: - model: rootxhacker_Apollo-v3-32B-Q4_K_M.gguf - files: - - filename: rootxhacker_Apollo-v3-32B-Q4_K_M.gguf - sha256: 67aa4b88a017931fab622b05879c0ff5f0a6db758686d2200aaad19f21bd5d2a - uri: huggingface://bartowski/rootxhacker_Apollo-v3-32B-GGUF/rootxhacker_Apollo-v3-32B-Q4_K_M.gguf -- !!merge <<: *qwen25 - icon: https://emygervais.github.io/assets/images/screenshots.png - name: "samsungsailmontreal_bytecraft" - urls: - - https://huggingface.co/SamsungSAILMontreal/ByteCraft - - https://huggingface.co/bartowski/SamsungSAILMontreal_ByteCraft-GGUF - description: | - ByteCraft is the world's first generative model of SWF video games and animations through bytes conditional on prompt. - overrides: - parameters: - model: SamsungSAILMontreal_ByteCraft-Q4_K_M.gguf - files: - - filename: SamsungSAILMontreal_ByteCraft-Q4_K_M.gguf - sha256: b9e1b44e3e6d90fe5d7d7d4741c37bcb40724e50de8b8f0ad2480e095e8d1712 - uri: huggingface://bartowski/SamsungSAILMontreal_ByteCraft-GGUF/SamsungSAILMontreal_ByteCraft-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwen-writerdemo-7b-s500-i1" - urls: - - https://huggingface.co/Quest-AI/qwen-writerdemo-7b-s500 - - https://huggingface.co/mradermacher/qwen-writerdemo-7b-s500-i1-GGUF - description: | - This is a base model that has had an experimental reward model RL training done over it for a subset of the Erebus dataset (creative writing). - overrides: - parameters: - model: qwen-writerdemo-7b-s500.i1-Q4_K_M.gguf - files: - - filename: qwen-writerdemo-7b-s500.i1-Q4_K_M.gguf - sha256: dcc0e2dd36587fdd3ed0c8e8c215a01244f00dd85f62da23642410d0e688fe13 - uri: huggingface://mradermacher/qwen-writerdemo-7b-s500-i1-GGUF/qwen-writerdemo-7b-s500.i1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "helpingai_helpingai3-raw" - urls: - - https://huggingface.co/HelpingAI/Helpingai3-raw - - https://huggingface.co/bartowski/HelpingAI_Helpingai3-raw-GGUF - description: | - The LLM model described is an emotionally intelligent, conversational and EQ-focused model developed by HelpingAI. It is based on the Helpingai3-raw model and has been quantized using the llama.cpp framework. The model is available in various quantization levels, allowing for different trade-offs between performance and size. Users can choose the appropriate quantization level based on their available RAM, VRAM, and desired performance. The model's weights are provided in .gguf format and can be downloaded from the Hugging Face model repository. - overrides: - parameters: - model: HelpingAI_Helpingai3-raw-Q4_K_M.gguf - files: - - filename: HelpingAI_Helpingai3-raw-Q4_K_M.gguf - sha256: de7a223ad397ba27c889dad08466de471166f1e76962b855c72cf6b779a7b857 - uri: huggingface://bartowski/HelpingAI_Helpingai3-raw-GGUF/HelpingAI_Helpingai3-raw-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwen2.5-14b-instruct-1m-unalign-i1" - urls: - - https://huggingface.co/ToastyPigeon/Qwen2.5-14B-Instruct-1M-Unalign - - https://huggingface.co/mradermacher/Qwen2.5-14B-Instruct-1M-Unalign-i1-GGUF - description: | - A simple unalignment fine-tune on ~900k tokens aiming to make the model more compliant and willing to handle user requests. - - This is the same unalignment training seen in concedo/Beepo-22B, so big thanks to concedo for the dataset. - - Chat template is same as the original, ChatML. - overrides: - parameters: - model: Qwen2.5-14B-Instruct-1M-Unalign.i1-Q4_K_M.gguf - files: - - filename: Qwen2.5-14B-Instruct-1M-Unalign.i1-Q4_K_M.gguf - sha256: 11b2eb96a8a4d512fceb3344dccc694972801c964cf748d723fdf436bc368915 - uri: huggingface://mradermacher/Qwen2.5-14B-Instruct-1M-Unalign-i1-GGUF/Qwen2.5-14B-Instruct-1M-Unalign.i1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "tesslate_tessa-t1-32b" - icon: https://cdn-uploads.huggingface.co/production/uploads/64d1129297ca59bcf7458d07/I7XzH-NMKUshcGU86u6VA.png - urls: - - https://huggingface.co/Tesslate/Tessa-T1-32B - - https://huggingface.co/bartowski/Tesslate_Tessa-T1-32B-GGUF - description: | - Tessa-T1 is an innovative transformer-based React reasoning model, fine-tuned from the powerful Qwen2.5-Coder-32B-Instruct base model. Designed specifically for React frontend development, Tessa-T1 leverages advanced reasoning to autonomously generate well-structured, semantic React components. Its integration into agent systems makes it a powerful tool for automating web interface development and frontend code intelligence. - Model Highlights - - React-specific Reasoning: Accurately generates functional and semantic React components. - Agent Integration: Seamlessly fits into AI-driven coding agents and autonomous frontend systems. - Context-Aware Generation: Effectively understands and utilizes UI context to provide relevant code solutions. - overrides: - parameters: - model: Tesslate_Tessa-T1-32B-Q4_K_M.gguf - files: - - filename: Tesslate_Tessa-T1-32B-Q4_K_M.gguf - sha256: e52a2a0a877ce1de78f2ea472c9e3bc7a0c20d6998423e9d99a59175809d3a22 - uri: huggingface://bartowski/Tesslate_Tessa-T1-32B-GGUF/Tesslate_Tessa-T1-32B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "tesslate_tessa-t1-14b" - icon: https://cdn-uploads.huggingface.co/production/uploads/64d1129297ca59bcf7458d07/I7XzH-NMKUshcGU86u6VA.png - urls: - - https://huggingface.co/Tesslate/Tessa-T1-14B - - https://huggingface.co/bartowski/Tesslate_Tessa-T1-14B-GGUF - description: | - Tessa-T1 is an innovative transformer-based React reasoning model, fine-tuned from the powerful Qwen2.5-Coder-14B-Instruct base model. Designed specifically for React frontend development, Tessa-T1 leverages advanced reasoning to autonomously generate well-structured, semantic React components. Its integration into agent systems makes it a powerful tool for automating web interface development and frontend code intelligence. - Model Highlights - - React-specific Reasoning: Accurately generates functional and semantic React components. - Agent Integration: Seamlessly fits into AI-driven coding agents and autonomous frontend systems. - Context-Aware Generation: Effectively understands and utilizes UI context to provide relevant code solutions. - overrides: - parameters: - model: Tesslate_Tessa-T1-14B-Q4_K_M.gguf - files: - - filename: Tesslate_Tessa-T1-14B-Q4_K_M.gguf - sha256: 1b35ff651b9c1e4538d10e3117390ae36094b6455a9f937a4f3ab72162125bca - uri: huggingface://bartowski/Tesslate_Tessa-T1-14B-GGUF/Tesslate_Tessa-T1-14B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "tesslate_tessa-t1-7b" - icon: https://cdn-uploads.huggingface.co/production/uploads/64d1129297ca59bcf7458d07/I7XzH-NMKUshcGU86u6VA.png - urls: - - https://huggingface.co/Tesslate/Tessa-T1-7B - - https://huggingface.co/bartowski/Tesslate_Tessa-T1-7B-GGUF - description: | - Tessa-T1 is an innovative transformer-based React reasoning model, fine-tuned from the powerful Qwen2.5-Coder-7B-Instruct base model. Designed specifically for React frontend development, Tessa-T1 leverages advanced reasoning to autonomously generate well-structured, semantic React components. Its integration into agent systems makes it a powerful tool for automating web interface development and frontend code intelligence. - Model Highlights - - React-specific Reasoning: Accurately generates functional and semantic React components. - Agent Integration: Seamlessly fits into AI-driven coding agents and autonomous frontend systems. - Context-Aware Generation: Effectively understands and utilizes UI context to provide relevant code solutions. - overrides: - parameters: - model: Tesslate_Tessa-T1-7B-Q4_K_M.gguf - files: - - filename: Tesslate_Tessa-T1-7B-Q4_K_M.gguf - sha256: 7968332d01b5479dee99aff7c9764b9e61c2a6d2828c266163596dd783bdee18 - uri: huggingface://bartowski/Tesslate_Tessa-T1-7B-GGUF/Tesslate_Tessa-T1-7B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "tesslate_tessa-t1-3b" - icon: https://cdn-uploads.huggingface.co/production/uploads/64d1129297ca59bcf7458d07/I7XzH-NMKUshcGU86u6VA.png - urls: - - https://huggingface.co/Tesslate/Tessa-T1-3B - - https://huggingface.co/bartowski/Tesslate_Tessa-T1-3B-GGUF - description: | - Tessa-T1 is an innovative transformer-based React reasoning model, fine-tuned from the powerful Qwen2.5-Coder-3B-Instruct base model. Designed specifically for React frontend development, Tessa-T1 leverages advanced reasoning to autonomously generate well-structured, semantic React components. Its integration into agent systems makes it a powerful tool for automating web interface development and frontend code intelligence. - Model Highlights - - React-specific Reasoning: Accurately generates functional and semantic React components. - Agent Integration: Seamlessly fits into AI-driven coding agents and autonomous frontend systems. - Context-Aware Generation: Effectively understands and utilizes UI context to provide relevant code solutions. - overrides: - parameters: - model: Tesslate_Tessa-T1-3B-Q4_K_M.gguf - files: - - filename: Tesslate_Tessa-T1-3B-Q4_K_M.gguf - sha256: d6b9d31d78d36094cab2725a7df318f8f3556990df736a21998c952d9a6ee0bf - uri: huggingface://bartowski/Tesslate_Tessa-T1-3B-GGUF/Tesslate_Tessa-T1-3B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "chaoticneutrals_very_berry_qwen2_7b" - icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/1J817kx3zZccf5yvQYiGM.png - urls: - - https://huggingface.co/ChaoticNeutrals/Very_Berry_Qwen2_7B - - https://huggingface.co/bartowski/ChaoticNeutrals_Very_Berry_Qwen2_7B-GGUF - description: | - It do the stuff. - overrides: - parameters: - model: ChaoticNeutrals_Very_Berry_Qwen2_7B-Q4_K_M.gguf - files: - - filename: ChaoticNeutrals_Very_Berry_Qwen2_7B-Q4_K_M.gguf - sha256: cbda41c638c23a3e8e9fb33c27ca0d0a0ee044b6813941a0017fd46369a35ec5 - uri: huggingface://bartowski/ChaoticNeutrals_Very_Berry_Qwen2_7B-GGUF/ChaoticNeutrals_Very_Berry_Qwen2_7B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "galactic-qwen-14b-exp1" - icon: https://cdn-uploads.huggingface.co/production/uploads/65bb837dbfb878f46c77de4c/SjM3y5Qcr2RX6zC3GQxR3.png - urls: - - https://huggingface.co/prithivMLmods/Galactic-Qwen-14B-Exp1 - - https://huggingface.co/mradermacher/Galactic-Qwen-14B-Exp1-GGUF - description: | - Galactic-Qwen-14B-Exp1 is based on the Qwen 2.5 14B modality architecture, designed to enhance the reasoning capabilities of 14B-parameter models. This model is optimized for general-purpose reasoning and answering, excelling in contextual understanding, logical deduction, and multi-step problem-solving. It has been fine-tuned using a long chain-of-thought reasoning model and specialized datasets to improve comprehension, structured responses, and conversational intelligence. - overrides: - parameters: - model: Galactic-Qwen-14B-Exp1.Q4_K_M.gguf - files: - - filename: Galactic-Qwen-14B-Exp1.Q4_K_M.gguf - sha256: 26e99578c341c879cc2676c4c7a45b6c0d00b30bd17c8ee7494fcc4092480ef0 - uri: huggingface://mradermacher/Galactic-Qwen-14B-Exp1-GGUF/Galactic-Qwen-14B-Exp1.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "hammer2.0-7b" - urls: - - https://huggingface.co/MadeAgents/Hammer2.0-7b - - https://huggingface.co/Nekuromento/Hammer2.0-7b-Q5_K_M-GGUF - description: | - Hammer2.0 finetuned based on Qwen 2.5 series and Qwen 2.5 coder series using function masking techniques. It's trained using the APIGen Function Calling Datasets containing 60,000 samples, supplemented by xlam-irrelevance-7.5k we generated. Hammer2.0 has achieved exceptional performances across numerous function calling benchmarks. For more details, please refer to Hammer: Robust Function-Calling for On-Device Language Models via Function Masking and Hammer GitHub repository . - overrides: - parameters: - model: hammer2.0-7b-q5_k_m.gguf - files: - - filename: hammer2.0-7b-q5_k_m.gguf - sha256: 3682843c857595765f0786cf24b3d501af96fe5d99a9fb2526bc7707e28bae1e - uri: huggingface://Nekuromento/Hammer2.0-7b-Q5_K_M-GGUF/hammer2.0-7b-q5_k_m.gguf -- !!merge <<: *qwen25 - icon: https://github.com/All-Hands-AI/OpenHands/blob/main/docs/static/img/logo.png?raw=true - name: "all-hands_openhands-lm-32b-v0.1" - urls: - - https://huggingface.co/all-hands/openhands-lm-32b-v0.1 - - https://huggingface.co/bartowski/all-hands_openhands-lm-32b-v0.1-GGUF - description: | - Autonomous agents for software development are already contributing to a wide range of software development tasks. But up to this point, strong coding agents have relied on proprietary models, which means that even if you use an open-source agent like OpenHands, you are still reliant on API calls to an external service. - - Today, we are excited to introduce OpenHands LM, a new open coding model that: - - Is open and available on Hugging Face, so you can download it and run it locally - Is a reasonable size, 32B, so it can be run locally on hardware such as a single 3090 GPU - Achieves strong performance on software engineering tasks, including 37.2% resolve rate on SWE-Bench Verified - - Read below for more details and our future plans! - What is OpenHands LM? - - OpenHands LM is built on the foundation of Qwen Coder 2.5 Instruct 32B, leveraging its powerful base capabilities for coding tasks. What sets OpenHands LM apart is our specialized fine-tuning process: - - We used training data generated by OpenHands itself on a diverse set of open-source repositories - Specifically, we use an RL-based framework outlined in SWE-Gym, where we set up a training environment, generate training data using an existing agent, and then fine-tune the model on examples that were resolved successfully - It features a 128K token context window, ideal for handling large codebases and long-horizon software engineering tasks - overrides: - parameters: - model: all-hands_openhands-lm-32b-v0.1-Q4_K_M.gguf - files: - - filename: all-hands_openhands-lm-32b-v0.1-Q4_K_M.gguf - sha256: f7c2311d3264cc1e021a21a319748a9c75b74ddebe38551786aa4053448e5e74 - uri: huggingface://bartowski/all-hands_openhands-lm-32b-v0.1-GGUF/all-hands_openhands-lm-32b-v0.1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "all-hands_openhands-lm-7b-v0.1" - icon: https://github.com/All-Hands-AI/OpenHands/blob/main/docs/static/img/logo.png?raw=true - urls: - - https://huggingface.co/all-hands/openhands-lm-7b-v0.1 - - https://huggingface.co/bartowski/all-hands_openhands-lm-7b-v0.1-GGUF - description: | - This is a smaller 7B model trained following the recipe of all-hands/openhands-lm-32b-v0.1. Autonomous agents for software development are already contributing to a wide range of software development tasks. But up to this point, strong coding agents have relied on proprietary models, which means that even if you use an open-source agent like OpenHands, you are still reliant on API calls to an external service. - - Today, we are excited to introduce OpenHands LM, a new open coding model that: - - Is open and available on Hugging Face, so you can download it and run it locally - Is a reasonable size, 32B, so it can be run locally on hardware such as a single 3090 GPU - Achieves strong performance on software engineering tasks, including 37.2% resolve rate on SWE-Bench Verified - - Read below for more details and our future plans! - What is OpenHands LM? - - OpenHands LM is built on the foundation of Qwen Coder 2.5 Instruct 32B, leveraging its powerful base capabilities for coding tasks. What sets OpenHands LM apart is our specialized fine-tuning process: - - We used training data generated by OpenHands itself on a diverse set of open-source repositories - Specifically, we use an RL-based framework outlined in SWE-Gym, where we set up a training environment, generate training data using an existing agent, and then fine-tune the model on examples that were resolved successfully - It features a 128K token context window, ideal for handling large codebases and long-horizon software engineering tasks - overrides: - parameters: - model: all-hands_openhands-lm-7b-v0.1-Q4_K_M.gguf - files: - - filename: all-hands_openhands-lm-7b-v0.1-Q4_K_M.gguf - sha256: d50031b04bbdad714c004a0dc117c18d26a026297c236cda36089c20279b2ec1 - uri: huggingface://bartowski/all-hands_openhands-lm-7b-v0.1-GGUF/all-hands_openhands-lm-7b-v0.1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "all-hands_openhands-lm-1.5b-v0.1" - icon: https://github.com/All-Hands-AI/OpenHands/blob/main/docs/static/img/logo.png?raw=true - urls: - - https://huggingface.co/all-hands/openhands-lm-1.5b-v0.1 - - https://huggingface.co/bartowski/all-hands_openhands-lm-1.5b-v0.1-GGUF - description: | - This is a smaller 1.5B model trained following the recipe of all-hands/openhands-lm-32b-v0.1. It is intended to be used for speculative decoding. Autonomous agents for software development are already contributing to a wide range of software development tasks. But up to this point, strong coding agents have relied on proprietary models, which means that even if you use an open-source agent like OpenHands, you are still reliant on API calls to an external service. - - Today, we are excited to introduce OpenHands LM, a new open coding model that: - - Is open and available on Hugging Face, so you can download it and run it locally - Is a reasonable size, 32B, so it can be run locally on hardware such as a single 3090 GPU - Achieves strong performance on software engineering tasks, including 37.2% resolve rate on SWE-Bench Verified - - Read below for more details and our future plans! - What is OpenHands LM? - - OpenHands LM is built on the foundation of Qwen Coder 2.5 Instruct 32B, leveraging its powerful base capabilities for coding tasks. What sets OpenHands LM apart is our specialized fine-tuning process: - - We used training data generated by OpenHands itself on a diverse set of open-source repositories - Specifically, we use an RL-based framework outlined in SWE-Gym, where we set up a training environment, generate training data using an existing agent, and then fine-tune the model on examples that were resolved successfully - It features a 128K token context window, ideal for handling large codebases and long-horizon software engineering tasks - overrides: - parameters: - model: all-hands_openhands-lm-1.5b-v0.1-Q4_K_M.gguf - files: - - filename: all-hands_openhands-lm-1.5b-v0.1-Q4_K_M.gguf - sha256: 30abd7860c4eb5f2f51546389407b0064360862f64ea55cdf95f97c6e155b3c6 - uri: huggingface://bartowski/all-hands_openhands-lm-1.5b-v0.1-GGUF/all-hands_openhands-lm-1.5b-v0.1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "katanemo_arch-function-chat-7b" - urls: - - https://huggingface.co/katanemo/Arch-Function-Chat-7B - - https://huggingface.co/bartowski/katanemo_Arch-Function-Chat-7B-GGUF - description: | - The Arch-Function-Chat collection builds upon the Katanemo's Arch-Function collection by extending its capabilities beyond function calling. This new collection maintains the state-of-the-art(SOTA) function calling performance of the original collection while adding powerful new features that make it even more versatile in real-world applications. - - In addition to function calling capabilities, this collection now offers: - - Clarify & refine: Generates natural follow-up questions to collect missing information for function calling - Interpret & respond: Provides human-friendly responses based on function execution results - Context management: Mantains context in complex multi-turn interactions - - Note: Arch-Function-Chat is now the primarly LLM used in then open source Arch Gateway - An AI-native proxy for agents. For more details about the project, check out the Github README. - overrides: - parameters: - model: katanemo_Arch-Function-Chat-7B-Q4_K_M.gguf - files: - - filename: katanemo_Arch-Function-Chat-7B-Q4_K_M.gguf - sha256: 6fd603511076ffea3697c8a76d82c054781c5e11f134b937a66cedfc49b3d2c5 - uri: huggingface://bartowski/katanemo_Arch-Function-Chat-7B-GGUF/katanemo_Arch-Function-Chat-7B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "katanemo_arch-function-chat-1.5b" - urls: - - https://huggingface.co/katanemo/Arch-Function-Chat-1.5B - - https://huggingface.co/bartowski/katanemo_Arch-Function-Chat-1.5B-GGUF - description: | - The Arch-Function-Chat collection builds upon the Katanemo's Arch-Function collection by extending its capabilities beyond function calling. This new collection maintains the state-of-the-art(SOTA) function calling performance of the original collection while adding powerful new features that make it even more versatile in real-world applications. - - In addition to function calling capabilities, this collection now offers: - - Clarify & refine: Generates natural follow-up questions to collect missing information for function calling - Interpret & respond: Provides human-friendly responses based on function execution results - Context management: Mantains context in complex multi-turn interactions - - Note: Arch-Function-Chat is now the primarly LLM used in then open source Arch Gateway - An AI-native proxy for agents. For more details about the project, check out the Github README. - overrides: - parameters: - model: katanemo_Arch-Function-Chat-1.5B-Q4_K_M.gguf - files: - - filename: katanemo_Arch-Function-Chat-1.5B-Q4_K_M.gguf - sha256: 5bfcb72803745c374a90b0ceb60f347a8c7d1239960cce6a2d22cc1276236098 - uri: huggingface://bartowski/katanemo_Arch-Function-Chat-1.5B-GGUF/katanemo_Arch-Function-Chat-1.5B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "katanemo_arch-function-chat-3b" - urls: - - https://huggingface.co/katanemo/Arch-Function-Chat-3B - - https://huggingface.co/bartowski/katanemo_Arch-Function-Chat-3B-GGUF - description: | - The Arch-Function-Chat collection builds upon the Katanemo's Arch-Function collection by extending its capabilities beyond function calling. This new collection maintains the state-of-the-art(SOTA) function calling performance of the original collection while adding powerful new features that make it even more versatile in real-world applications. - - In addition to function calling capabilities, this collection now offers: - - Clarify & refine: Generates natural follow-up questions to collect missing information for function calling - Interpret & respond: Provides human-friendly responses based on function execution results - Context management: Mantains context in complex multi-turn interactions - - Note: Arch-Function-Chat is now the primarly LLM used in then open source Arch Gateway - An AI-native proxy for agents. For more details about the project, check out the Github README. - overrides: - parameters: - model: katanemo_Arch-Function-Chat-3B-Q4_K_M.gguf - files: - - filename: katanemo_Arch-Function-Chat-3B-Q4_K_M.gguf - sha256: f59dbef397bf1364b5f0a2c23a7f67c40ec63cc666036c4e7615fa7d79d4e1a0 - uri: huggingface://bartowski/katanemo_Arch-Function-Chat-3B-GGUF/katanemo_Arch-Function-Chat-3B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "open-thoughts_openthinker2-32b" - icon: https://huggingface.co/datasets/open-thoughts/open-thoughts-114k/resolve/main/open_thoughts.png - urls: - - https://huggingface.co/open-thoughts/OpenThinker2-32B - - https://huggingface.co/bartowski/open-thoughts_OpenThinker2-32B-GGUF - description: | - This model is a fine-tuned version of Qwen/Qwen2.5-32B-Instruct on the OpenThoughts2-1M dataset. - - The OpenThinker2-32B model is the highest performing open-data model. This model improves upon our previous OpenThinker-32B model, which was trained on 114k examples from OpenThoughts-114k. The numbers reported in the table below are evaluated with our open-source tool Evalchemy. - overrides: - parameters: - model: open-thoughts_OpenThinker2-32B-Q4_K_M.gguf - files: - - filename: open-thoughts_OpenThinker2-32B-Q4_K_M.gguf - sha256: e9c7bf7cb349cfe07b4550759a3b4d7005834d0fa7580b23e483cbfeecd7a982 - uri: huggingface://bartowski/open-thoughts_OpenThinker2-32B-GGUF/open-thoughts_OpenThinker2-32B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "open-thoughts_openthinker2-7b" - icon: https://huggingface.co/datasets/open-thoughts/open-thoughts-114k/resolve/main/open_thoughts.pnghttps://huggingface.co/datasets/open-thoughts/open-thoughts-114k/resolve/main/open_thoughts.png - urls: - - https://huggingface.co/open-thoughts/OpenThinker2-7B - - https://huggingface.co/bartowski/open-thoughts_OpenThinker2-7B-GGUF - description: | - This model is a fine-tuned version of Qwen/Qwen2.5-7B-Instruct on the OpenThoughts2-1M dataset. - - The OpenThinker2-7B model is the top 7B open-data reasoning model. It delivers performance comparable to state of the art 7B models like DeepSeek-R1-Distill-7B across a suite of tasks. This model improves upon our previous OpenThinker-7B model, which was trained on 114k examples from OpenThoughts-114k. The numbers reported in the table below are evaluated with our open-source tool Evalchemy. - overrides: - parameters: - model: open-thoughts_OpenThinker2-7B-Q4_K_M.gguf - files: - - filename: open-thoughts_OpenThinker2-7B-Q4_K_M.gguf - sha256: 481d785047d66ae2eeaf14650a9e659ec4f7766a6414b6c7e92854c944201734 - uri: huggingface://bartowski/open-thoughts_OpenThinker2-7B-GGUF/open-thoughts_OpenThinker2-7B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "arliai_qwq-32b-arliai-rpr-v1" - icon: https://cdn-uploads.huggingface.co/production/uploads/6625f4a8a8d1362ebcc3851a/albSlnUy9dPVGVuLlsBua.jpeg - urls: - - https://huggingface.co/ArliAI/QwQ-32B-ArliAI-RpR-v1 - - https://huggingface.co/bartowski/ArliAI_QwQ-32B-ArliAI-RpR-v1-GGUF - description: | - RpR (RolePlay with Reasoning) is a new series of models from ArliAI. This series builds directly upon the successful dataset curation methodology and training methods developed for the RPMax series. - - RpR models use the same curated, deduplicated RP and creative writing dataset used for RPMax, with a focus on variety to ensure high creativity and minimize cross-context repetition. Users familiar with RPMax will recognize the unique, non-repetitive writing style unlike other finetuned-for-RP models. - - With the release of QwQ as the first high performing open-source reasoning model that can be easily trained, it was clear that the available instruct and creative writing reasoning datasets contains only one response per example. This is type of single response dataset used for training reasoning models causes degraded output quality in long multi-turn chats. Which is why Arli AI decided to create a real RP model capable of long multi-turn chat with reasoning. - - In order to create RpR, we first had to actually create the reasoning RP dataset by re-processing our existing known-good RPMax dataset into a reasoning dataset. This was possible by using the base QwQ Instruct model itself to create the reasoning process for every turn in the RPMax dataset conversation examples, which is then further refined in order to make sure the reasoning is in-line with the actual response examples from the dataset. - - Another important thing to get right is to make sure the model is trained on examples that present reasoning blocks in the same way as it encounters it during inference. Which is, never seeing the reasoning blocks in it's context. In order to do this, the training run was completed using axolotl with manual template-free segments dataset in order to make sure that the model is never trained to see the reasoning block in the context. Just like how the model will be used during inference time. - - The result of training QwQ on this dataset with this method are consistently coherent and interesting outputs even in long multi-turn RP chats. This is as far as we know the first true correctly-trained reasoning model trained for RP and creative writing. - overrides: - parameters: - model: ArliAI_QwQ-32B-ArliAI-RpR-v1-Q4_K_M.gguf - files: - - filename: ArliAI_QwQ-32B-ArliAI-RpR-v1-Q4_K_M.gguf - sha256: b0f2ca8f62a5d021e20db40608a109713e9d23e75b68b3b71b7654c04d596dcf - uri: huggingface://bartowski/ArliAI_QwQ-32B-ArliAI-RpR-v1-GGUF/ArliAI_QwQ-32B-ArliAI-RpR-v1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "mensa-beta-14b-instruct-i1" - icon: https://cdn-uploads.huggingface.co/production/uploads/65bb837dbfb878f46c77de4c/DyO5Fvqwvee-UM9QqgWZS.png - urls: - - https://huggingface.co/prithivMLmods/Mensa-Beta-14B-Instruct - - https://huggingface.co/mradermacher/Mensa-Beta-14B-Instruct-i1-GGUF - description: | - weighted/imatrix quants of https://huggingface.co/prithivMLmods/Mensa-Beta-14B-Instruct - overrides: - parameters: - model: Mensa-Beta-14B-Instruct.i1-Q4_K_M.gguf - files: - - filename: Mensa-Beta-14B-Instruct.i1-Q4_K_M.gguf - sha256: 86ccd640d72dcf3129fdd5b94381a733a684672b22487784e388b2ee9de57760 - uri: huggingface://mradermacher/Mensa-Beta-14B-Instruct-i1-GGUF/Mensa-Beta-14B-Instruct.i1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "cogito-v1-preview-qwen-14B" - icon: https://huggingface.co/deepcogito/cogito-v1-preview-qwen-14B/resolve/main/images/deep-cogito-logo.png - urls: - - https://huggingface.co/deepcogito/cogito-v1-preview-qwen-14B - - https://huggingface.co/NikolayKozloff/cogito-v1-preview-qwen-14B-Q4_K_M-GGUF - description: | - The Cogito LLMs are instruction tuned generative models (text in/text out). All models are released under an open license for commercial use. - Cogito models are hybrid reasoning models. Each model can answer directly (standard LLM), or self-reflect before answering (like reasoning models). - The LLMs are trained using Iterated Distillation and Amplification (IDA) - an scalable and efficient alignment strategy for superintelligence using iterative self-improvement. - The models have been optimized for coding, STEM, instruction following and general helpfulness, and have significantly higher multilingual, coding and tool calling capabilities than size equivalent counterparts. - In both standard and reasoning modes, Cogito v1-preview models outperform their size equivalent counterparts on common industry benchmarks. - Each model is trained in over 30 languages and supports a context length of 128k. - overrides: - parameters: - model: cogito-v1-preview-qwen-14b-q4_k_m.gguf - files: - - filename: cogito-v1-preview-qwen-14b-q4_k_m.gguf - sha256: 42ddd667bac3e5f0989f52b3dca5767ed15d0e5077c6f537e4b3873862ff7096 - uri: huggingface://NikolayKozloff/cogito-v1-preview-qwen-14B-Q4_K_M-GGUF/cogito-v1-preview-qwen-14b-q4_k_m.gguf -- !!merge <<: *qwen25 - name: "deepcogito_cogito-v1-preview-qwen-32b" - icon: https://huggingface.co/deepcogito/cogito-v1-preview-qwen-32B/resolve/main/images/deep-cogito-logo.png - urls: - - https://huggingface.co/deepcogito/cogito-v1-preview-qwen-32B - - https://huggingface.co/bartowski/deepcogito_cogito-v1-preview-qwen-32B-GGUF - description: | - The Cogito LLMs are instruction tuned generative models (text in/text out). All models are released under an open license for commercial use. - - Cogito models are hybrid reasoning models. Each model can answer directly (standard LLM), or self-reflect before answering (like reasoning models). - The LLMs are trained using Iterated Distillation and Amplification (IDA) - an scalable and efficient alignment strategy for superintelligence using iterative self-improvement. - The models have been optimized for coding, STEM, instruction following and general helpfulness, and have significantly higher multilingual, coding and tool calling capabilities than size equivalent counterparts. - In both standard and reasoning modes, Cogito v1-preview models outperform their size equivalent counterparts on common industry benchmarks. - Each model is trained in over 30 languages and supports a context length of 128k. - overrides: - parameters: - model: deepcogito_cogito-v1-preview-qwen-32B-Q4_K_M.gguf - files: - - filename: deepcogito_cogito-v1-preview-qwen-32B-Q4_K_M.gguf - sha256: 985f2d49330090e64603309f7eb61030769f25a5da027ac0b0a740858d087ad8 - uri: huggingface://bartowski/deepcogito_cogito-v1-preview-qwen-32B-GGUF/deepcogito_cogito-v1-preview-qwen-32B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "soob3123_amoral-cogito-v1-preview-qwen-14b" - urls: - - https://huggingface.co/soob3123/amoral-cogito-v1-preview-qwen-14B - - https://huggingface.co/bartowski/soob3123_amoral-cogito-v1-preview-qwen-14B-GGUF - description: | - Key Features - Neutral response protocol (bias dampening layers) - Reduced refusal rate vs base Llama-3 - Moral phrasing detection/reformulation - Use Cases - Controversial topic analysis - Ethical philosophy simulations - Academic research requiring neutral framing - overrides: - parameters: - model: soob3123_amoral-cogito-v1-preview-qwen-14B-Q4_K_M.gguf - files: - - filename: soob3123_amoral-cogito-v1-preview-qwen-14B-Q4_K_M.gguf - sha256: c01a0b0c44345011dc61212fb1c0ffdba32f85e702d2f3d4abeb2a09208d6184 - uri: huggingface://bartowski/soob3123_amoral-cogito-v1-preview-qwen-14B-GGUF/soob3123_amoral-cogito-v1-preview-qwen-14B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "tesslate_gradience-t1-3b-preview" - urls: - - https://huggingface.co/Tesslate/Gradience-T1-3B-preview - - https://huggingface.co/bartowski/Tesslate_Gradience-T1-3B-preview-GGUF - description: | - This model is still in preview/beta. We're still working on it! This is just so the community can try out our new "Gradient Reasoning" that intends to break problems down and reason faster. - You can use a system prompt to enable thinking: "First, think step-by-step to reach the solution. Enclose your entire reasoning process within <|begin_of_thought|> and <|end_of_thought|> tags." You can try sampling params: Temp: 0.76, TopP: 0.62, Topk 30-68, Rep: 1.0, minp: 0.05 - overrides: - parameters: - model: Tesslate_Gradience-T1-3B-preview-Q4_K_M.gguf - files: - - filename: Tesslate_Gradience-T1-3B-preview-Q4_K_M.gguf - sha256: 119ccefa09e3756750a983301f8bbb95e6c8fce6941a5d91490dac600f887111 - uri: huggingface://bartowski/Tesslate_Gradience-T1-3B-preview-GGUF/Tesslate_Gradience-T1-3B-preview-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "lightthinker-qwen" - urls: - - https://huggingface.co/zjunlp/LightThinker-Qwen - - https://huggingface.co/mradermacher/LightThinker-Qwen-GGUF - description: | - LLMs have shown remarkable performance in complex reasoning tasks, but their efficiency is hindered by the substantial memory and computational costs associated with generating lengthy tokens. In this paper, we propose LightThinker, a novel method that enables LLMs to dynamically compress intermediate thoughts during reasoning. Inspired by human cognitive processes, LightThinker compresses verbose thought steps into compact representations and discards the original reasoning chains, thereby significantly reducing the number of tokens stored in the context window. This is achieved by training the model on when and how to perform compression through data construction, mapping hidden states to condensed gist tokens, and creating specialized attention masks. - overrides: - parameters: - model: LightThinker-Qwen.Q4_K_M.gguf - files: - - filename: LightThinker-Qwen.Q4_K_M.gguf - sha256: f52f27c23fa734b1a0306efd29fcb80434364e7a1077695574e9a4f5e48b7ed2 - uri: huggingface://mradermacher/LightThinker-Qwen-GGUF/LightThinker-Qwen.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "mag-picaro-72b" - icon: https://cdn-uploads.huggingface.co/production/uploads/66c26b6fb01b19d8c3c2467b/hrYOp7JiH7o5ij1WEoyCZ.png - urls: - - https://huggingface.co/Delta-Vector/Mag-Picaro-72B - - https://huggingface.co/mradermacher/Mag-Picaro-72B-GGUF - description: | - A scaled up version of Mag-Picaro, Funded by PygmalionAI as alternative to their Magnum Large option. - Fine-tuned on top of Qwen-2-Instruct, Mag-Picaro has been then slerp-merged at 50/50 weight with Magnum-V2. If you like the model support me on Ko-Fi https://ko-fi.com/deltavector - overrides: - parameters: - model: Mag-Picaro-72B.Q4_K_M.gguf - files: - - filename: Mag-Picaro-72B.Q4_K_M.gguf - sha256: 3fda6cf318a9082ef7b502c4384ee3ea5f9f9f44268b852a2e46d71bcea29d5a - uri: huggingface://mradermacher/Mag-Picaro-72B-GGUF/Mag-Picaro-72B.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "m1-32b" - urls: - - https://huggingface.co/Can111/m1-32b - - https://huggingface.co/mradermacher/m1-32b-GGUF - description: | - M1-32B is a 32B-parameter large language model fine-tuned from Qwen2.5-32B-Instruct on the M500 dataset—an interdisciplinary multi-agent collaborative reasoning dataset. M1-32B is optimized for improved reasoning, discussion, and decision-making in multi-agent systems (MAS), including frameworks such as AgentVerse. - - Code: https://github.com/jincan333/MAS-TTS - overrides: - parameters: - model: m1-32b.Q4_K_M.gguf - files: - - filename: m1-32b.Q4_K_M.gguf - sha256: 1dfa3b6822447aca590d6f2881cf277bd0fbde633a39c5a20b521f4a59145e3f - uri: huggingface://mradermacher/m1-32b-GGUF/m1-32b.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwen2.5-14b-instruct-1m" - urls: - - https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M - - https://huggingface.co/bartowski/Qwen2.5-14B-Instruct-1M-GGUF - description: | - Qwen2.5-1M is the long-context version of the Qwen2.5 series models, supporting a context length of up to 1M tokens. Compared to the Qwen2.5 128K version, Qwen2.5-1M demonstrates significantly improved performance in handling long-context tasks while maintaining its capability in short tasks. - - The model has the following features: - - Type: Causal Language Models - Training Stage: Pretraining & Post-training - Architecture: transformers with RoPE, SwiGLU, RMSNorm, and Attention QKV bias - Number of Parameters: 14.7B - Number of Paramaters (Non-Embedding): 13.1B - Number of Layers: 48 - Number of Attention Heads (GQA): 40 for Q and 8 for KV - Context Length: Full 1,010,000 tokens and generation 8192 tokens - We recommend deploying with our custom vLLM, which introduces sparse attention and length extrapolation methods to ensure efficiency and accuracy for long-context tasks. For specific guidance, refer to this section. - You can also use the previous framework that supports Qwen2.5 for inference, but accuracy degradation may occur for sequences exceeding 262,144 tokens. - - For more details, please refer to our blog, GitHub, Technical Report, and Documentation. - overrides: - parameters: - model: Qwen2.5-14B-Instruct-1M-Q4_K_M.gguf - files: - - filename: Qwen2.5-14B-Instruct-1M-Q4_K_M.gguf - sha256: a1a0fa3e2c3f9d63f9202af9172cffbc0b519801dff740fffd39f6a063a731ef - uri: huggingface://bartowski/Qwen2.5-14B-Instruct-1M-GGUF/Qwen2.5-14B-Instruct-1M-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "pictor-1338-qwenp-1.5b" - icon: https://cdn-uploads.huggingface.co/production/uploads/65bb837dbfb878f46c77de4c/X7zeHYbH5Y5JoRK_ud_Ya.png - urls: - - https://huggingface.co/prithivMLmods/Pictor-1338-QwenP-1.5B - - https://huggingface.co/adriey/Pictor-1338-QwenP-1.5B-Q8_0-GGUF - description: | - Pictor-1338-QwenP-1.5B is a code reasoning LLM fine-tuned from Qwen-1.5B using distributed reinforcement learning (RL). This model is designed to enhance coding proficiency, debugging accuracy, and step-by-step reasoning in software development tasks across multiple programming languages. - - Key Features - - Code Reasoning & Explanation - Trained to analyze, generate, and explain code with a focus on logic, structure, and clarity. Supports functional, object-oriented, and procedural paradigms. - - Reinforcement Learning Fine-Tuning - Enhanced using distributed RL, improving reward-aligned behavior in tasks like fixing bugs, completing functions, and understanding abstract instructions. - - Multi-Language Support - Works fluently with Python, JavaScript, C++, and Shell, among others—ideal for general-purpose programming, scripting, and algorithmic tasks. - - Compact and Efficient - At just 1.5B parameters, it's lightweight enough for edge deployments and developer tools with strong reasoning capability. - - Debugging and Auto-Fix Capabilities - Built to identify bugs, recommend corrections, and provide context-aware explanations of issues in codebases. - overrides: - parameters: - model: pictor-1338-qwenp-1.5b-q8_0.gguf - files: - - filename: pictor-1338-qwenp-1.5b-q8_0.gguf - sha256: 22d2f5b2322d9a354d8578475a6924c2173a913a1e2fa0ec2655f2f5937f6f26 - uri: huggingface://adriey/Pictor-1338-QwenP-1.5B-Q8_0-GGUF/pictor-1338-qwenp-1.5b-q8_0.gguf -- !!merge <<: *qwen25 - name: "nvidia_openmath-nemotron-32b" - icon: https://cdn-avatars.huggingface.co/v1/production/uploads/1613114437487-60262a8e0703121c822a80b6.png - urls: - - https://huggingface.co/nvidia/OpenMath-Nemotron-32B - - https://huggingface.co/bartowski/nvidia_OpenMath-Nemotron-32B-GGUF - description: | - OpenMath-Nemotron-32B is created by finetuning Qwen/Qwen2.5-32B on OpenMathReasoning dataset. This model is ready for commercial use. - OpenMath-Nemotron models achieve state-of-the-art results on popular mathematical benchmarks. We present metrics as pass@1 (maj@64) where pass@1 is an average accuracy across 64 generations and maj@64 is the result of majority voting. Please see our paper for more details on the evaluation setup. - overrides: - parameters: - model: nvidia_OpenMath-Nemotron-32B-Q4_K_M.gguf - files: - - filename: nvidia_OpenMath-Nemotron-32B-Q4_K_M.gguf - sha256: 91d1f53204ff47e49093ea0e4a6dae656fd79d9cdb23a50627bc6028396f5ab4 - uri: huggingface://bartowski/nvidia_OpenMath-Nemotron-32B-GGUF/nvidia_OpenMath-Nemotron-32B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "nvidia_openmath-nemotron-1.5b" - icon: https://cdn-avatars.huggingface.co/v1/production/uploads/1613114437487-60262a8e0703121c822a80b6.png - urls: - - https://huggingface.co/nvidia/OpenMath-Nemotron-1.5B - - https://huggingface.co/bartowski/nvidia_OpenMath-Nemotron-1.5B-GGUF - description: | - OpenMath-Nemotron-1.5B is created by finetuning Qwen/Qwen2.5-Math-1.5B on OpenMathReasoning dataset. This model is ready for commercial use. - OpenMath-Nemotron models achieve state-of-the-art results on popular mathematical benchmarks. We present metrics as pass@1 (maj@64) where pass@1 is an average accuracy across 64 generations and maj@64 is the result of majority voting. Please see our paper for more details on the evaluation setup. - overrides: - parameters: - model: nvidia_OpenMath-Nemotron-1.5B-Q4_K_M.gguf - files: - - filename: nvidia_OpenMath-Nemotron-1.5B-Q4_K_M.gguf - sha256: cdb74247c7918fdb70f9a9aa8217476f2f02e2fff723631255a441eb0db302e2 - uri: huggingface://bartowski/nvidia_OpenMath-Nemotron-1.5B-GGUF/nvidia_OpenMath-Nemotron-1.5B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "nvidia_openmath-nemotron-7b" - icon: https://cdn-avatars.huggingface.co/v1/production/uploads/1613114437487-60262a8e0703121c822a80b6.png - urls: - - https://huggingface.co/nvidia/OpenMath-Nemotron-7B - - https://huggingface.co/bartowski/nvidia_OpenMath-Nemotron-7B-GGUF - description: | - OpenMath-Nemotron-7B is created by finetuning Qwen/Qwen2.5-Math-7B on OpenMathReasoning dataset. This model is ready for commercial use. - OpenMath-Nemotron models achieve state-of-the-art results on popular mathematical benchmarks. We present metrics as pass@1 (maj@64) where pass@1 is an average accuracy across 64 generations and maj@64 is the result of majority voting. Please see our paper for more details on the evaluation setup. - overrides: - parameters: - model: nvidia_OpenMath-Nemotron-7B-Q4_K_M.gguf - files: - - filename: nvidia_OpenMath-Nemotron-7B-Q4_K_M.gguf - sha256: e205dd86ab9c73614d88dc3a84bd1a4e94255528f9ddb33e739ea23830342ee4 - uri: huggingface://bartowski/nvidia_OpenMath-Nemotron-7B-GGUF/nvidia_OpenMath-Nemotron-7B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "nvidia_openmath-nemotron-14b" - icon: https://cdn-avatars.huggingface.co/v1/production/uploads/1613114437487-60262a8e0703121c822a80b6.png - urls: - - https://huggingface.co/nvidia/OpenMath-Nemotron-14B - - https://huggingface.co/bartowski/nvidia_OpenMath-Nemotron-14B-GGUF - description: | - OpenMath-Nemotron-14B is created by finetuning Qwen/Qwen2.5-14B on OpenMathReasoning dataset. This model is ready for commercial use. - OpenMath-Nemotron models achieve state-of-the-art results on popular mathematical benchmarks. We present metrics as pass@1 (maj@64) where pass@1 is an average accuracy across 64 generations and maj@64 is the result of majority voting. Please see our paper for more details on the evaluation setup. - overrides: - parameters: - model: nvidia_OpenMath-Nemotron-14B-Q4_K_M.gguf - files: - - filename: nvidia_OpenMath-Nemotron-14B-Q4_K_M.gguf - sha256: 2abeccea53899b81cea11fd84fe458d673783f68e7790489fff5c295da6d8026 - uri: huggingface://bartowski/nvidia_OpenMath-Nemotron-14B-GGUF/nvidia_OpenMath-Nemotron-14B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "nvidia_openmath-nemotron-14b-kaggle" - icon: https://cdn-avatars.huggingface.co/v1/production/uploads/1613114437487-60262a8e0703121c822a80b6.png - urls: - - https://huggingface.co/nvidia/OpenMath-Nemotron-14B-Kaggle - - https://huggingface.co/bartowski/nvidia_OpenMath-Nemotron-14B-Kaggle-GGUF - description: | - OpenMath-Nemotron-14B-Kaggle is created by finetuning Qwen/Qwen2.5-14B on a subset of OpenMathReasoning dataset. This model was used in our first place submission to the AIMO-2 Kaggle competition! - OpenMath-Nemotron models achieve state-of-the-art results on popular mathematical benchmarks. We present metrics as pass@1 (maj@64) where pass@1 is an average accuracy across 64 generations and maj@64 is the result of majority voting. Please see our paper for more details on the evaluation setup. - overrides: - parameters: - model: nvidia_OpenMath-Nemotron-14B-Kaggle-Q4_K_M.gguf - files: - - filename: nvidia_OpenMath-Nemotron-14B-Kaggle-Q4_K_M.gguf - sha256: 5923990d2699b8dcbefd1fe7bf7406b76f9e3cfa271af93cb870d19d7cd63177 - uri: huggingface://bartowski/nvidia_OpenMath-Nemotron-14B-Kaggle-GGUF/nvidia_OpenMath-Nemotron-14B-Kaggle-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "webthinker-qwq-32b-i1" - urls: - - https://huggingface.co/lixiaoxi45/WebThinker-QwQ-32B - - https://huggingface.co/mradermacher/WebThinker-QwQ-32B-i1-GGUF - description: | - WebThinker-QwQ-32B is part of the WebThinker series that enables large reasoning models to autonomously search, explore web pages, and draft research reports within their thinking process. This 32B parameter model provides deep research capabilities through: - - Deep Web Exploration: Enables autonomous web searches and page navigation by clicking interactive elements to extract relevant information while maintaining reasoning coherence - Autonomous Think-Search-and-Draft: Integrates real-time knowledge seeking with report generation, allowing the model to draft sections as information is gathered - RL-based Training: Leverages iterative online DPO training with preference pairs constructed from reasoning trajectories to optimize end-to-end performance - overrides: - parameters: - model: WebThinker-QwQ-32B.i1-Q4_K_M.gguf - files: - - filename: WebThinker-QwQ-32B.i1-Q4_K_M.gguf - sha256: cd92aff9b1e22f2a5eab28fb2d887e45fc3b1b03d5ed6ffca216832b8e5b9fb8 - uri: huggingface://mradermacher/WebThinker-QwQ-32B-i1-GGUF/WebThinker-QwQ-32B.i1-Q4_K_M.gguf -- !!merge <<: *qwen25 - icon: https://cdn-uploads.huggingface.co/production/uploads/63d3095c2727d7888cbb54e2/Lt1t0tOO5emz1X23Azg-E.png - name: "servicenow-ai_apriel-nemotron-15b-thinker" - urls: - - https://huggingface.co/ServiceNow-AI/Apriel-Nemotron-15b-Thinker - - https://huggingface.co/bartowski/ServiceNow-AI_Apriel-Nemotron-15b-Thinker-GGUF - description: "Apriel-Nemotron-15b-Thinker is a 15 billion‑parameter reasoning model in ServiceNow’s Apriel SLM series which achieves competitive performance against similarly sized state-of-the-art models like o1‑mini, QWQ‑32b, and EXAONE‑Deep‑32b, all while maintaining only half the memory footprint of those alternatives. It builds upon the Apriel‑15b‑base checkpoint through a three‑stage training pipeline (CPT, SFT and GRPO).\nHighlights\n Half the size of SOTA models like QWQ-32b and EXAONE-32b and hence memory efficient.\n It consumes 40% less tokens compared to QWQ-32b, making it super efficient in production. \U0001F680\U0001F680\U0001F680\n On par or outperforms on tasks like - MBPP, BFCL, Enterprise RAG, MT Bench, MixEval, IFEval and Multi-Challenge making it great for Agentic / Enterprise tasks.\n Competitive performance on academic benchmarks like AIME-24 AIME-25, AMC-23, MATH-500 and GPQA considering model size.\n" - overrides: - parameters: - model: ServiceNow-AI_Apriel-Nemotron-15b-Thinker-Q4_K_M.gguf - files: - - filename: ServiceNow-AI_Apriel-Nemotron-15b-Thinker-Q4_K_M.gguf - sha256: 9bc7be87f744a483756d373307358c45fa50affffb654b1324fce2dee1844fe8 - uri: huggingface://bartowski/ServiceNow-AI_Apriel-Nemotron-15b-Thinker-GGUF/ServiceNow-AI_Apriel-Nemotron-15b-Thinker-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "cognition-ai_kevin-32b" - urls: - - https://huggingface.co/cognition-ai/Kevin-32B - - https://huggingface.co/bartowski/cognition-ai_Kevin-32B-GGUF - - https://cognition.ai/blog/kevin-32b - description: | - Kevin (K(ernel D)evin) is a 32B parameter model finetuned to write efficient CUDA kernels. - - We use KernelBench as our benchmark, and train the model through multi-turn reinforcement learning. - - For the details, see our blogpost at https://cognition.ai/blog/kevin-32b - overrides: - parameters: - model: cognition-ai_Kevin-32B-Q4_K_M.gguf - files: - - filename: cognition-ai_Kevin-32B-Q4_K_M.gguf - sha256: 2576edd5b1880bcac6732eae9446b035426aee2e76937dc68a252ad34e185705 - uri: huggingface://bartowski/cognition-ai_Kevin-32B-GGUF/cognition-ai_Kevin-32B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwen_qwen2.5-vl-7b-instruct" - urls: - - https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct - - https://huggingface.co/bartowski/Qwen_Qwen2.5-VL-7B-Instruct-GGUF - description: | - In the past five months since Qwen2-VL’s release, numerous developers have built new models on the Qwen2-VL vision-language models, providing us with valuable feedback. During this period, we focused on building more useful vision-language models. Today, we are excited to introduce the latest addition to the Qwen family: Qwen2.5-VL. - Key Enhancements: - - Understand things visually: Qwen2.5-VL is not only proficient in recognizing common objects such as flowers, birds, fish, and insects, but it is highly capable of analyzing texts, charts, icons, graphics, and layouts within images. - - Being agentic: Qwen2.5-VL directly plays as a visual agent that can reason and dynamically direct tools, which is capable of computer use and phone use. - - Understanding long videos and capturing events: Qwen2.5-VL can comprehend videos of over 1 hour, and this time it has a new ability of cpaturing event by pinpointing the relevant video segments. - - Capable of visual localization in different formats: Qwen2.5-VL can accurately localize objects in an image by generating bounding boxes or points, and it can provide stable JSON outputs for coordinates and attributes. - - Generating structured outputs: for data like scans of invoices, forms, tables, etc. Qwen2.5-VL supports structured outputs of their contents, benefiting usages in finance, commerce, etc. - - Model Architecture Updates: - - Dynamic Resolution and Frame Rate Training for Video Understanding: - - We extend dynamic resolution to the temporal dimension by adopting dynamic FPS sampling, enabling the model to comprehend videos at various sampling rates. Accordingly, we update mRoPE in the time dimension with IDs and absolute time alignment, enabling the model to learn temporal sequence and speed, and ultimately acquire the ability to pinpoint specific moments. - - Streamlined and Efficient Vision Encoder - - We enhance both training and inference speeds by strategically implementing window attention into the ViT. The ViT architecture is further optimized with SwiGLU and RMSNorm, aligning it with the structure of the Qwen2.5 LLM. - overrides: - mmproj: mmproj-Qwen_Qwen2.5-VL-7B-Instruct-f16.gguf - parameters: - model: Qwen_Qwen2.5-VL-7B-Instruct-Q4_K_M.gguf - files: - - filename: Qwen_Qwen2.5-VL-7B-Instruct-Q4_K_M.gguf - sha256: 3f4513330aa7f109922bd701d773575484ae2b4a4090d6511260a2a4f8e3d069 - uri: huggingface://bartowski/Qwen_Qwen2.5-VL-7B-Instruct-GGUF/Qwen_Qwen2.5-VL-7B-Instruct-Q4_K_M.gguf - - filename: mmproj-Qwen_Qwen2.5-VL-7B-Instruct-f16.gguf - sha256: c24a7f5fcfc68286f0a217023b6738e73bea4f11787a43e8238d4bb1b8604cde - uri: https://huggingface.co/bartowski/Qwen_Qwen2.5-VL-7B-Instruct-GGUF/resolve/main/mmproj-Qwen_Qwen2.5-VL-7B-Instruct-f16.gguf -- !!merge <<: *qwen25 - name: "qwen_qwen2.5-vl-72b-instruct" - urls: - - https://huggingface.co/Qwen/Qwen2.5-VL-72B-Instruct - - https://huggingface.co/bartowski/Qwen_Qwen2.5-VL-72B-Instruct-GGUF - description: | - In the past five months since Qwen2-VL’s release, numerous developers have built new models on the Qwen2-VL vision-language models, providing us with valuable feedback. During this period, we focused on building more useful vision-language models. Today, we are excited to introduce the latest addition to the Qwen family: Qwen2.5-VL. - Key Enhancements: - - Understand things visually: Qwen2.5-VL is not only proficient in recognizing common objects such as flowers, birds, fish, and insects, but it is highly capable of analyzing texts, charts, icons, graphics, and layouts within images. - - Being agentic: Qwen2.5-VL directly plays as a visual agent that can reason and dynamically direct tools, which is capable of computer use and phone use. - - Understanding long videos and capturing events: Qwen2.5-VL can comprehend videos of over 1 hour, and this time it has a new ability of cpaturing event by pinpointing the relevant video segments. - - Capable of visual localization in different formats: Qwen2.5-VL can accurately localize objects in an image by generating bounding boxes or points, and it can provide stable JSON outputs for coordinates and attributes. - - Generating structured outputs: for data like scans of invoices, forms, tables, etc. Qwen2.5-VL supports structured outputs of their contents, benefiting usages in finance, commerce, etc. - - Model Architecture Updates: - - Dynamic Resolution and Frame Rate Training for Video Understanding: - - We extend dynamic resolution to the temporal dimension by adopting dynamic FPS sampling, enabling the model to comprehend videos at various sampling rates. Accordingly, we update mRoPE in the time dimension with IDs and absolute time alignment, enabling the model to learn temporal sequence and speed, and ultimately acquire the ability to pinpoint specific moments. - - Streamlined and Efficient Vision Encoder - - We enhance both training and inference speeds by strategically implementing window attention into the ViT. The ViT architecture is further optimized with SwiGLU and RMSNorm, aligning it with the structure of the Qwen2.5 LLM. - overrides: - mmproj: mmproj-Qwen_Qwen2.5-VL-72B-Instruct-f16.gguf - parameters: - model: Qwen_Qwen2.5-VL-72B-Instruct-Q4_K_M.gguf - files: - - filename: Qwen_Qwen2.5-VL-72B-Instruct-Q4_K_M.gguf - sha256: d8f4000042bfd4570130321beb0ba19acdd2c53731c0f83ca2455b1ee713e52c - uri: huggingface://bartowski/Qwen_Qwen2.5-VL-72B-Instruct-GGUF/Qwen_Qwen2.5-VL-72B-Instruct-Q4_K_M.gguf - - filename: mmproj-Qwen_Qwen2.5-VL-72B-Instruct-f16.gguf - sha256: 6099885b9c4056e24806b616401ff2730a7354335e6f2f0eaf2a45e89c8a457c - uri: https://huggingface.co/bartowski/Qwen_Qwen2.5-VL-72B-Instruct-GGUF/resolve/main/mmproj-Qwen_Qwen2.5-VL-72B-Instruct-f16.gguf -- !!merge <<: *qwen25 - name: "a-m-team_am-thinking-v1" - icon: https://cdn-avatars.huggingface.co/v1/production/uploads/62da53284398e21bf7f0d539/y6wX4K-P9O8B9frsxxQ6W.jpeg - urls: - - https://huggingface.co/a-m-team/AM-Thinking-v1 - - https://huggingface.co/bartowski/a-m-team_AM-Thinking-v1-GGUF - description: "AM-Thinking‑v1, a 32B dense language model focused on enhancing reasoning capabilities. Built on Qwen 2.5‑32B‑Base, AM-Thinking‑v1 shows strong performance on reasoning benchmarks, comparable to much larger MoE models like DeepSeek‑R1, Qwen3‑235B‑A22B, Seed1.5-Thinking, and larger dense model like Nemotron-Ultra-253B-v1.\nbenchmark\n\U0001F9E9 Why Another 32B Reasoning Model Matters?\n\nLarge Mixture‑of‑Experts (MoE) models such as DeepSeek‑R1 or Qwen3‑235B‑A22B dominate leaderboards—but they also demand clusters of high‑end GPUs. Many teams just need the best dense model that fits on a single card. AM‑Thinking‑v1 fills that gap while remaining fully based on open-source components:\n\n Outperforms DeepSeek‑R1 on AIME’24/’25 & LiveCodeBench and approaches Qwen3‑235B‑A22B despite being 1/7‑th the parameter count.\n Built on the publicly available Qwen 2.5‑32B‑Base, as well as the RL training queries.\n Shows that with a well‑designed post‑training pipeline ( SFT + dual‑stage RL ) you can squeeze flagship‑level reasoning out of a 32 B dense model.\n Deploys on one A100‑80 GB with deterministic latency—no MoE routing overhead.\n" - overrides: - parameters: - model: a-m-team_AM-Thinking-v1-Q4_K_M.gguf - files: - - filename: a-m-team_AM-Thinking-v1-Q4_K_M.gguf - sha256: a6da6e8d330d76167c04a54eeb550668b59b613ea53af22e3b4a0c6da271e38d - uri: huggingface://bartowski/a-m-team_AM-Thinking-v1-GGUF/a-m-team_AM-Thinking-v1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "arliai_qwq-32b-arliai-rpr-v4" - icon: https://cdn-uploads.huggingface.co/production/uploads/6625f4a8a8d1362ebcc3851a/hIZ2ZcaDyfYLT9Yd4pfOs.jpeg - urls: - - https://huggingface.co/ArliAI/QwQ-32B-ArliAI-RpR-v4 - - https://huggingface.co/bartowski/ArliAI_QwQ-32B-ArliAI-RpR-v4-GGUF - description: | - The best RP/creative model from ArliAI yet again. - - Reduced repetitions and impersonation - - To add to the creativity and out of the box thinking of RpR v3, a more advanced filtering method was used in order to remove examples where the LLM repeated similar phrases or talked for the user. Any repetition or impersonation cases that happens will be due to how the base QwQ model was trained, and not because of the RpR dataset. - - Increased training sequence length - - The training sequence length was increased to 16K in order to help awareness and memory even on longer chats. - - RpR Series Overview: Building on RPMax with Reasoning - - RpR (RolePlay with Reasoning) is a new series of models from ArliAI. This series builds directly upon the successful dataset curation methodology and training methods developed for the RPMax series. - - RpR models use the same curated, deduplicated RP and creative writing dataset used for RPMax, with a focus on variety to ensure high creativity and minimize cross-context repetition. Users familiar with RPMax will recognize the unique, non-repetitive writing style unlike other finetuned-for-RP models. - - With the release of QwQ as the first high performing open-source reasoning model that can be easily trained, it was clear that the available instruct and creative writing reasoning datasets contains only one response per example. This is type of single response dataset used for training reasoning models causes degraded output quality in long multi-turn chats. Which is why Arli AI decided to create a real RP model capable of long multi-turn chat with reasoning. - - In order to create RpR, we first had to actually create the reasoning RP dataset by re-processing our existing known-good RPMax dataset into a reasoning dataset. This was possible by using the base QwQ Instruct model itself to create the reasoning process for every turn in the RPMax dataset conversation examples, which is then further refined in order to make sure the reasoning is in-line with the actual response examples from the dataset. - - Another important thing to get right is to make sure the model is trained on examples that present reasoning blocks in the same way as it encounters it during inference. Which is, never seeing the reasoning blocks in it's context. In order to do this, the training run was completed using axolotl with manual template-free segments dataset in order to make sure that the model is never trained to see the reasoning block in the context. Just like how the model will be used during inference time. - - The result of training QwQ on this dataset with this method are consistently coherent and interesting outputs even in long multi-turn RP chats. This is as far as we know the first true correctly-trained reasoning model trained for RP and creative writing. - - You can access the model at https://arliai.com and we also have a models ranking page at https://www.arliai.com/models-ranking - - Ask questions in our new Discord Server https://discord.com/invite/t75KbPgwhk or on our subreddit https://www.reddit.com/r/ArliAI/ - Model Description - - QwQ-32B-ArliAI-RpR-v4 is the third release in the RpR series. It is a 32-billion parameter model fine-tuned using the RpR dataset based on the curated RPMax dataset combined with techniques to maintain reasoning abilities in long multi-turn chats. - overrides: - parameters: - model: ArliAI_QwQ-32B-ArliAI-RpR-v4-Q4_K_M.gguf - files: - - filename: ArliAI_QwQ-32B-ArliAI-RpR-v4-Q4_K_M.gguf - sha256: fd67ca1e792efb25129cbd17b9b0f5c410dd963f17234828686928d21039b585 - uri: huggingface://bartowski/ArliAI_QwQ-32B-ArliAI-RpR-v4-GGUF/ArliAI_QwQ-32B-ArliAI-RpR-v4-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "whiterabbitneo_whiterabbitneo-v3-7b" - icon: https://huggingface.co/WhiteRabbitNeo/WhiteRabbitNeo-V3-7B/resolve/main/whiterabbitneo-logo-defcon.png - urls: - - https://huggingface.co/WhiteRabbitNeo/WhiteRabbitNeo-V3-7B - - https://huggingface.co/bartowski/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-GGUF - description: | - A LLM model focused on security. - Topics Covered: - - - Open Ports: Identifying open ports is crucial as they can be entry points for attackers. Common ports to check include HTTP (80, 443), FTP (21), SSH (22), and SMB (445). - - Outdated Software or Services: Systems running outdated software or services are often vulnerable to exploits. This includes web servers, database servers, and any third-party software. - - Default Credentials: Many systems and services are installed with default usernames and passwords, which are well-known and can be easily exploited. - - Misconfigurations: Incorrectly configured services, permissions, and security settings can introduce vulnerabilities. - - Injection Flaws: SQL injection, command injection, and cross-site scripting (XSS) are common issues in web applications. - - Unencrypted Services: Services that do not use encryption (like HTTP instead of HTTPS) can expose sensitive data. - - Known Software Vulnerabilities: Checking for known vulnerabilities in software using databases like the National Vulnerability Database (NVD) or tools like Nessus or OpenVAS. - - Cross-Site Request Forgery (CSRF): This is where unauthorized commands are transmitted from a user that the web application trusts. - - Insecure Direct Object References: This occurs when an application provides direct access to objects based on user-supplied input. - - Security Misconfigurations in Web Servers/Applications: This includes issues like insecure HTTP headers or verbose error messages that reveal too much information. - - Broken Authentication and Session Management: This can allow attackers to compromise passwords, keys, or session tokens, or to exploit other implementation flaws to assume other users' identities. - - Sensitive Data Exposure: Includes vulnerabilities that expose sensitive data, such as credit card numbers, health records, or personal information. - - API Vulnerabilities: In modern web applications, APIs are often used and can have vulnerabilities like insecure endpoints or data leakage. - - Denial of Service (DoS) Vulnerabilities: Identifying services that are vulnerable to DoS attacks, which can make the resource unavailable to legitimate users. - - Buffer Overflows: Common in older software, these vulnerabilities can allow an attacker to crash the system or execute arbitrary code. - - More .. - overrides: - parameters: - model: WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-Q4_K_M.gguf - files: - - filename: WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-Q4_K_M.gguf - sha256: 584bfc1f4c160928842866c566129f9789c4671af8e51a9e36ba0ebf10f24f41 - uri: huggingface://bartowski/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-GGUF/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "qwen2.5-omni-7b" - tags: - - multimodal - - gguf - - gpu - - cpu - - qwen2.5 - - audio-to-text - - image-to-text - - text-to-text - urls: - - https://huggingface.co/Qwen/Qwen2.5-Omni-7B - - https://huggingface.co/ggml-org/Qwen2.5-Omni-7B-GGUF - description: | - Qwen2.5-Omni is an end-to-end multimodal model designed to perceive diverse modalities, including text, images, audio, and video, while simultaneously generating text and natural speech responses in a streaming manner. - Modalities: - - ✅ Text input - - ✅ Audio input - - ✅ Image input - - ❌ Video input - - ❌ Audio generation - overrides: - mmproj: mmproj-Qwen2.5-Omni-7B-Q8_0.gguf - parameters: - model: Qwen2.5-Omni-7B-Q4_K_M.gguf - files: - - filename: Qwen2.5-Omni-7B-Q4_K_M.gguf - sha256: 09883dff531dc56923a041c9c99c7c779e26ffde32caa83adeeb7502ec3b50fe - uri: huggingface://ggml-org/Qwen2.5-Omni-7B-GGUF/Qwen2.5-Omni-7B-Q4_K_M.gguf - - filename: mmproj-Qwen2.5-Omni-7B-Q8_0.gguf - sha256: 4a7bc5478a2ec8c5d186d63532eb22e75b79ba75ec3c0ce821676157318ef4ad - uri: https://huggingface.co/ggml-org/Qwen2.5-Omni-7B-GGUF/resolve/main/mmproj-Qwen2.5-Omni-7B-Q8_0.gguf -- !!merge <<: *qwen25 - name: "qwen2.5-omni-3b" - tags: - - multimodal - - gguf - - gpu - - cpu - - qwen2.5 - - audio-to-text - - image-to-text - - text-to-text - urls: - - https://huggingface.co/Qwen/Qwen2.5-Omni-3B - - https://huggingface.co/ggml-org/Qwen2.5-Omni-3B-GGUF - description: | - Qwen2.5-Omni is an end-to-end multimodal model designed to perceive diverse modalities, including text, images, audio, and video, while simultaneously generating text and natural speech responses in a streaming manner. - Modalities: - - ✅ Text input - - ✅ Audio input - - ✅ Image input - - ❌ Video input - - ❌ Audio generation - overrides: - mmproj: mmproj-Qwen2.5-Omni-3B-Q8_0.gguf - parameters: - model: Qwen2.5-Omni-3B-Q4_K_M.gguf - files: - - filename: Qwen2.5-Omni-3B-Q4_K_M.gguf - sha256: 4b0bd358c1e9ec55dd3055ef6d71c958c821533d85916a10cfa89c4552a86e29 - uri: huggingface://ggml-org/Qwen2.5-Omni-3B-GGUF/Qwen2.5-Omni-3B-Q4_K_M.gguf - - filename: mmproj-Qwen2.5-Omni-3B-Q8_0.gguf - sha256: 4e6c816cd33f7298d07cb780c136a396631e50e62f6501660271f8c6e302e565 - uri: https://huggingface.co/ggml-org/Qwen2.5-Omni-3B-GGUF/resolve/main/mmproj-Qwen2.5-Omni-3B-Q8_0.gguf -- !!merge <<: *qwen25 - name: "open-thoughts_openthinker3-7b" - icon: https://huggingface.co/datasets/open-thoughts/open-thoughts-114k/resolve/main/open_thoughts.png - urls: - - https://huggingface.co/open-thoughts/OpenThinker3-7B - - https://huggingface.co/bartowski/open-thoughts_OpenThinker3-7B-GGUF - description: "State-of-the-art open-data 7B reasoning model. \U0001F680\n\nThis model is a fine-tuned version of Qwen/Qwen2.5-7B-Instruct on the OpenThoughts3-1.2M dataset. It represents a notable improvement over our previous models, OpenThinker-7B and OpenThinker2-7B, and it outperforms several other strong reasoning 7B models such as DeepSeek-R1-Distill-Qwen-7B and Llama-3.1-Nemotron-Nano-8B-v1, despite being trained only with SFT, without any RL.\n\nThis time, we also released a paper! See our paper and blog post for more details. OpenThinker3-32B to follow! \U0001F440\n" - overrides: - parameters: - model: open-thoughts_OpenThinker3-7B-Q4_K_M.gguf - files: - - filename: open-thoughts_OpenThinker3-7B-Q4_K_M.gguf - sha256: 73b8f44c3b11c3ec63e4c4ddbb262679c8f681511d84940c4c990814aa0bafc0 - uri: huggingface://bartowski/open-thoughts_OpenThinker3-7B-GGUF/open-thoughts_OpenThinker3-7B-Q4_K_M.gguf -- !!merge <<: *qwen25 - icon: https://github.com/FlagOpen/RoboBrain2.0/raw/main/assets/logo2.png - name: "baai_robobrain2.0-7b" - urls: - - https://huggingface.co/BAAI/RoboBrain2.0-7B - - https://huggingface.co/bartowski/BAAI_RoboBrain2.0-7B-GGUF - description: | - We are excited to introduce RoboBrain 2.0, the most powerful open-source embodied brain model to date. Compared to its predecessor, RoboBrain1.0, our latest version significantly advances multi-agent task planning, spatial reasoning, and closed-loop execution. A detailed technical report will be released soon. - overrides: - mmproj: mmproj-BAAI_RoboBrain2.0-7B-f16.gguf - parameters: - model: BAAI_RoboBrain2.0-7B-Q4_K_M.gguf - files: - - filename: BAAI_RoboBrain2.0-7B-Q4_K_M.gguf - sha256: 9a55874f07514aafe7c9926a04d8143c88a1bd5bebebaa7202aabc9b0c48a8f7 - uri: huggingface://bartowski/BAAI_RoboBrain2.0-7B-GGUF/BAAI_RoboBrain2.0-7B-Q4_K_M.gguf - - uri: https://huggingface.co/bartowski/BAAI_RoboBrain2.0-7B-GGUF/resolve/main/mmproj-BAAI_RoboBrain2.0-7B-f16.gguf - sha256: 7c62842afa6b90582acc5758578d5ab683610d33177c9b730f5489404cb28e4f - filename: mmproj-BAAI_RoboBrain2.0-7B-f16.gguf -- !!merge <<: *qwen25 - name: "baichuan-inc_baichuan-m2-32b" - urls: - - https://huggingface.co/bartowski/baichuan-inc_Baichuan-M2-32B-GGUF - - https://huggingface.co/baichuan-inc/Baichuan-M2-32B - description: "Baichuan-M2-32B is Baichuan AI's medical-enhanced reasoning model, the second medical model released by Baichuan. Designed for real-world medical reasoning tasks, this model builds upon Qwen2.5-32B with an innovative Large Verifier System. Through domain-specific fine-tuning on real-world medical questions, it achieves breakthrough medical performance while maintaining strong general capabilities.\n\nModel Features:\n\nBaichuan-M2 incorporates three core technical innovations: First, through the Large Verifier System, it combines medical scenario characteristics to design a comprehensive medical verification framework, including patient simulators and multi-dimensional verification mechanisms; second, through medical domain adaptation enhancement via Mid-Training, it achieves lightweight and efficient medical domain adaptation while preserving general capabilities; finally, it employs a multi-stage reinforcement learning strategy, decomposing complex RL tasks into hierarchical training stages to progressively enhance the model's medical knowledge, reasoning, and patient interaction capabilities.\n\nCore Highlights:\n\n \U0001F3C6 World's Leading Open-Source Medical Model: Outperforms all open-source models and many proprietary models on HealthBench, achieving medical capabilities closest to GPT-5\n \U0001F9E0 Doctor-Thinking Alignment: Trained on real clinical cases and patient simulators, with clinical diagnostic thinking and robust patient interaction capabilities\n ⚡ Efficient Deployment: Supports 4-bit quantization for single-RTX4090 deployment, with 58.5% higher token throughput in MTP version for single-user scenarios\n" - overrides: - parameters: - model: baichuan-inc_Baichuan-M2-32B-Q4_K_M.gguf - files: - - filename: baichuan-inc_Baichuan-M2-32B-Q4_K_M.gguf - sha256: 51907419518e6f79c28f75e4097518e54c2efecd85cb4c714334395fa2d591c2 - uri: huggingface://bartowski/baichuan-inc_Baichuan-M2-32B-GGUF/baichuan-inc_Baichuan-M2-32B-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "k2-think-i1" - icon: https://huggingface.co/LLM360/K2-Think/resolve/main/banner.png - urls: - - https://huggingface.co/LLM360/K2-Think - - https://huggingface.co/mradermacher/K2-Think-i1-GGUF - description: | - K2-Think is a 32 billion parameter open-weights general reasoning model with strong performance in competitive mathematical problem solving. - overrides: - parameters: - model: K2-Think.i1-Q4_K_M.gguf - files: - - filename: K2-Think.i1-Q4_K_M.gguf - sha256: 510fad18b0cf58059437338c1b5b982996ef89456a8d88da52eb3d50fe78b9fd - uri: huggingface://mradermacher/K2-Think-i1-GGUF/K2-Think.i1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "holo1.5-72b" - icon: https://cdn-avatars.huggingface.co/v1/production/uploads/677d3f355f847864bb644112/OQyAJ33sssiTDIQEQ7oH_.png - urls: - - https://huggingface.co/Hcompany/Holo1.5-72B - - https://huggingface.co/mradermacher/Holo1.5-72B-GGUF - description: | - Computer Use (CU) agents are AI systems that can interact with real applications—web, desktop, and mobile—on behalf of a user. They can navigate interfaces, manipulate elements, and answer questions about content, enabling powerful automation and productivity tools. CU agents are becoming increasingly important as they allow humans to delegate complex digital tasks safely and efficiently. - The Holo1.5 series provides state-of-the-art foundational models for building such agents. Holo1.5 models excel at user interface (UI) localization and UI-based question answering (QA) across web, computer, and mobile environments, with strong performance on benchmarks including Screenspot-V2, Screenspot-Pro, GroundUI-Web, Showdown, and our newly introduced WebClick. - overrides: - mmproj: Holo1.5-72B.mmproj-Q8_0.gguf - parameters: - model: Holo1.5-72B.Q4_K_M.gguf - files: - - filename: Holo1.5-72B.Q4_K_M.gguf - sha256: 3404347c245fefa352a3dc16134b5870f594ab8bff11e50582205b5538201a23 - uri: huggingface://mradermacher/Holo1.5-72B-GGUF/Holo1.5-72B.Q4_K_M.gguf - - filename: Holo1.5-72B.mmproj-Q8_0.gguf - sha256: f172cffc96a00d4f885eecffbc798912d37105f4191ba16a9947a5776b0f8a02 - uri: huggingface://mradermacher/Holo1.5-72B-GGUF/Holo1.5-72B.mmproj-Q8_0.gguf -- !!merge <<: *qwen25 - name: "holo1.5-7b" - icon: https://cdn-avatars.huggingface.co/v1/production/uploads/677d3f355f847864bb644112/OQyAJ33sssiTDIQEQ7oH_.png - urls: - - https://huggingface.co/Hcompany/Holo1.5-7B - - https://huggingface.co/mradermacher/Holo1.5-7B-GGUF - description: | - Computer Use (CU) agents are AI systems that can interact with real applications—web, desktop, and mobile—on behalf of a user. They can navigate interfaces, manipulate elements, and answer questions about content, enabling powerful automation and productivity tools. CU agents are becoming increasingly important as they allow humans to delegate complex digital tasks safely and efficiently. - The Holo1.5 series provides state-of-the-art foundational models for building such agents. Holo1.5 models excel at user interface (UI) localization and UI-based question answering (QA) across web, computer, and mobile environments, with strong performance on benchmarks including Screenspot-V2, Screenspot-Pro, GroundUI-Web, Showdown, and our newly introduced WebClick. - overrides: - mmproj: Holo1.5-7B.mmproj-Q8_0.gguf - parameters: - model: Holo1.5-7B.Q4_K_M.gguf - files: - - filename: Holo1.5-7B.Q4_K_M.gguf - sha256: 37d1c060b73b783ffdab8d70fa47a6cff46cd34b1cf44b5bfbf4f20ff99eacdd - uri: huggingface://mradermacher/Holo1.5-7B-GGUF/Holo1.5-7B.Q4_K_M.gguf - - filename: Holo1.5-7B.mmproj-Q8_0.gguf - sha256: a9bad2d3d9241251b8753d9be4ea737c03197077d96153c1365a62db709489f6 - uri: huggingface://mradermacher/Holo1.5-7B-GGUF/Holo1.5-7B.mmproj-Q8_0.gguf -- !!merge <<: *qwen25 - name: "holo1.5-3b" - icon: https://cdn-avatars.huggingface.co/v1/production/uploads/677d3f355f847864bb644112/OQyAJ33sssiTDIQEQ7oH_.png - urls: - - https://huggingface.co/Hcompany/Holo1.5-3B - - https://huggingface.co/mradermacher/Holo1.5-3B-GGUF - description: | - Computer Use (CU) agents are AI systems that can interact with real applications—web, desktop, and mobile—on behalf of a user. They can navigate interfaces, manipulate elements, and answer questions about content, enabling powerful automation and productivity tools. CU agents are becoming increasingly important as they allow humans to delegate complex digital tasks safely and efficiently. - The Holo1.5 series provides state-of-the-art foundational models for building such agents. Holo1.5 models excel at user interface (UI) localization and UI-based question answering (QA) across web, computer, and mobile environments, with strong performance on benchmarks including Screenspot-V2, Screenspot-Pro, GroundUI-Web, Showdown, and our newly introduced WebClick. - overrides: - mmproj: Holo1.5-3B.mmproj-Q8_0.gguf - parameters: - model: Holo1.5-3B.Q4_K_M.gguf - files: - - filename: Holo1.5-3B.Q4_K_M.gguf - sha256: 5efb1318d439fe1f71e38825a17203c48ced7de4a5d0796427c8c638e817622a - uri: huggingface://mradermacher/Holo1.5-3B-GGUF/Holo1.5-3B.Q4_K_M.gguf - - filename: Holo1.5-3B.mmproj-Q8_0.gguf - sha256: fb5cc798b386a4b680c306f061457cb16cc627c7d9ed401d660b8b940463142b - uri: huggingface://mradermacher/Holo1.5-3B-GGUF/Holo1.5-3B.mmproj-Q8_0.gguf -- !!merge <<: *qwen25 - name: "webwatcher-7b" - icon: https://huggingface.co/Alibaba-NLP/WebWatcher-7B/resolve/main/assets/webwatcher_logo.png - urls: - - https://huggingface.co/Alibaba-NLP/WebWatcher-7B - - https://huggingface.co/mradermacher/WebWatcher-7B-GGUF - description: | - WebWatcher is a multimodal agent for deep research that possesses enhanced visual-language reasoning capabilities. Our work presents a unified framework that combines complex vision-language reasoning with multi-tool interaction. - overrides: - mmproj: WebWatcher-7B.mmproj-Q8_0.gguf - parameters: - model: WebWatcher-7B.Q4_K_M.gguf - files: - - filename: WebWatcher-7B.Q4_K_M.gguf - sha256: 300c76a51de59552f997ee7ee78ec519620931dea15c655111633b96de1a47f2 - uri: huggingface://mradermacher/WebWatcher-7B-GGUF/WebWatcher-7B.Q4_K_M.gguf - - filename: WebWatcher-7B.mmproj-Q8_0.gguf - sha256: 841dc1bcc4f69ca864518d2c9a9a37b1815169d9bd061b054e091061124e4e62 - uri: huggingface://mradermacher/WebWatcher-7B-GGUF/WebWatcher-7B.mmproj-Q8_0.gguf -- !!merge <<: *qwen25 - name: "webwatcher-32b" - icon: https://huggingface.co/Alibaba-NLP/WebWatcher-32B/resolve/main/assets/webwatcher_logo.png - urls: - - https://huggingface.co/Alibaba-NLP/WebWatcher-32B - - https://huggingface.co/mradermacher/WebWatcher-32B-GGUF - description: | - WebWatcher is a multimodal agent for deep research that possesses enhanced visual-language reasoning capabilities. Our work presents a unified framework that combines complex vision-language reasoning with multi-tool interaction. - overrides: - mmproj: WebWatcher-32B.mmproj-Q8_0.gguf - parameters: - model: WebWatcher-32B.Q4_K_M.gguf - files: - - filename: WebWatcher-32B.Q4_K_M.gguf - sha256: 6cd51d97b9451759a4ce4ec0c2048b36ff99fd9f83bb32cd9f06af6c5438c69b - uri: huggingface://mradermacher/WebWatcher-32B-GGUF/WebWatcher-32B.Q4_K_M.gguf - - filename: WebWatcher-32B.mmproj-Q8_0.gguf - sha256: e8815515f71a959465cc62e08e0ef45d7d8592215139b34efece848552cb2327 - uri: huggingface://mradermacher/WebWatcher-32B-GGUF/WebWatcher-32B.mmproj-Q8_0.gguf - &llama31 url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" ## LLama3.1 icon: https://avatars.githubusercontent.com/u/153379578 @@ -15685,1285 +11440,6 @@ - reranker - gpu - python -## LLMs -### START LLAMA3 -- name: "einstein-v6.1-llama3-8b" - url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" - icon: https://cdn-uploads.huggingface.co/production/uploads/6468ce47e134d050a58aa89c/5s12oq859qLfDkkTNam_C.png - urls: - - https://huggingface.co/Weyaxi/Einstein-v6.1-Llama3-8B - tags: - - llm - - gguf - - gpu - - cpu - - llama3 - license: llama3 - description: | - This model is a full fine-tuned version of meta-llama/Meta-Llama-3-8B on diverse datasets. - - This model is finetuned using 8xRTX3090 + 1xRTXA6000 using axolotl. - overrides: - parameters: - model: Einstein-v6.1-Llama3-8B-Q4_K_M.gguf - files: - - filename: Einstein-v6.1-Llama3-8B-Q4_K_M.gguf - sha256: 447587bd8f60d9050232148d34fdb2d88b15b2413fd7f8e095a4606ec60b45bf - uri: huggingface://bartowski/Einstein-v6.1-Llama3-8B-GGUF/Einstein-v6.1-Llama3-8B-Q4_K_M.gguf -- &llama3 - url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master" - icon: https://avatars.githubusercontent.com/u/153379578 - name: "llama3-8b-instruct" - license: llama3 - description: | - Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Further, in developing these models, we took great care to optimize helpfulness and safety. - - Model developers Meta - - Variations Llama 3 comes in two sizes — 8B and 70B parameters — in pre-trained and instruction tuned variants. - - Input Models input text only. - - Output Models generate text and code only. - - Model Architecture Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety. - urls: - - https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct - - https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF - tags: - - llm - - gguf - - gpu - - cpu - - llama3 - overrides: - parameters: - model: Meta-Llama-3-8B-Instruct.Q4_0.gguf - files: - - filename: Meta-Llama-3-8B-Instruct.Q4_0.gguf - uri: huggingface://QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct.Q4_0.gguf - sha256: 2b4675c2208f09ad8762d8cf1b6a4a26bf65e6f0641aba324ec65143c0b4ad9f -- !!merge <<: *llama3 - name: "llama3-8b-instruct:Q6_K" - overrides: - parameters: - model: Meta-Llama-3-8B-Instruct.Q6_K.gguf - files: - - filename: Meta-Llama-3-8B-Instruct.Q6_K.gguf - uri: huggingface://QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct.Q6_K.gguf - sha256: bd7efd73f9fb67e4b9ecc43f861f37c7e594e78a8a5ff9c29da021692bd243ef -- !!merge <<: *llama3 - name: "llama-3-8b-instruct-abliterated" - urls: - - https://huggingface.co/failspy/Llama-3-8B-Instruct-abliterated-GGUF - description: | - This is meta-llama/Llama-3-8B-Instruct with orthogonalized bfloat16 safetensor weights, generated with the methodology that was described in the preview paper/blog post: 'Refusal in LLMs is mediated by a single direction' which I encourage you to read to understand more. - overrides: - parameters: - model: Llama-3-8B-Instruct-abliterated-q4_k.gguf - files: - - filename: Llama-3-8B-Instruct-abliterated-q4_k.gguf - sha256: a6365f813de1977ae22dbdd271deee59f91f89b384eefd3ac1a391f391d8078a - uri: huggingface://failspy/Llama-3-8B-Instruct-abliterated-GGUF/Llama-3-8B-Instruct-abliterated-q4_k.gguf -- !!merge <<: *llama3 - name: "llama-3-8b-instruct-coder" - icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/0O4cIuv3wNbY68-FP7tak.jpeg - urls: - - https://huggingface.co/bartowski/Llama-3-8B-Instruct-Coder-GGUF - - https://huggingface.co/rombodawg/Llama-3-8B-Instruct-Coder - description: | - Original model: https://huggingface.co/rombodawg/Llama-3-8B-Instruct-Coder - All quants made using imatrix option with dataset provided by Kalomaze here - overrides: - parameters: - model: Llama-3-8B-Instruct-Coder-Q4_K_M.gguf - files: - - filename: Llama-3-8B-Instruct-Coder-Q4_K_M.gguf - sha256: 639ab8e3aeb7aa82cff6d8e6ef062d1c3e5a6d13e6d76e956af49f63f0e704f8 - uri: huggingface://bartowski/Llama-3-8B-Instruct-Coder-GGUF/Llama-3-8B-Instruct-Coder-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "llama3-70b-instruct" - overrides: - parameters: - model: Meta-Llama-3-70B-Instruct.Q4_K_M.gguf - files: - - filename: Meta-Llama-3-70B-Instruct.Q4_K_M.gguf - sha256: c1cea5f87dc1af521f31b30991a4663e7e43f6046a7628b854c155f489eec213 - uri: huggingface://MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF/Meta-Llama-3-70B-Instruct.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "llama3-70b-instruct:IQ1_M" - overrides: - parameters: - model: Meta-Llama-3-70B-Instruct.IQ1_M.gguf - files: - - filename: Meta-Llama-3-70B-Instruct.IQ1_M.gguf - sha256: cdbe8ac2126a70fa0af3fac7a4fe04f1c76330c50eba8383567587b48b328098 - uri: huggingface://MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF/Meta-Llama-3-70B-Instruct.IQ1_M.gguf -- !!merge <<: *llama3 - name: "llama3-70b-instruct:IQ1_S" - overrides: - parameters: - model: Meta-Llama-3-70B-Instruct.IQ1_S.gguf - files: - - filename: Meta-Llama-3-70B-Instruct.IQ1_S.gguf - sha256: 3797a69f1bdf53fabf9f3a3a8c89730b504dd3209406288515c9944c14093048 - uri: huggingface://MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF/Meta-Llama-3-70B-Instruct.IQ1_S.gguf -- !!merge <<: *llama3 - name: "l3-chaoticsoliloquy-v1.5-4x8b" - icon: https://cdn-uploads.huggingface.co/production/uploads/64f5e51289c121cb864ba464/m5urYkrpE5amrwHyaVwFM.png - description: | - Experimental RP-oriented MoE, the idea was to get a model that would be equal to or better than the Mixtral 8x7B and it's finetunes in RP/ERP tasks. Im not sure but it should be better than the first version - urls: - - https://huggingface.co/xxx777xxxASD/L3-ChaoticSoliloquy-v1.5-4x8B - - https://huggingface.co/mradermacher/L3-ChaoticSoliloquy-v1.5-4x8B-GGUF/ - overrides: - parameters: - model: L3-ChaoticSoliloquy-v1.5-4x8B.Q4_K_M.gguf - files: - - filename: L3-ChaoticSoliloquy-v1.5-4x8B.Q4_K_M.gguf - sha256: f6edb2a9674ce5add5104c0a8bb3278f748d39b509c483d76cf00b066eb56fbf - uri: huggingface://mradermacher/L3-ChaoticSoliloquy-v1.5-4x8B-GGUF/L3-ChaoticSoliloquy-v1.5-4x8B.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "llama-3-sauerkrautlm-8b-instruct" - urls: - - https://huggingface.co/bartowski/Llama-3-SauerkrautLM-8b-Instruct-GGUF - icon: https://vago-solutions.ai/wp-content/uploads/2024/04/Llama3-Pic.png - tags: - - llm - - gguf - - gpu - - cpu - - llama3 - - german - description: | - SauerkrautLM-llama-3-8B-Instruct - - Model Type: Llama-3-SauerkrautLM-8b-Instruct is a finetuned Model based on meta-llama/Meta-Llama-3-8B-Instruct - Language(s): German, English - overrides: - parameters: - model: Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf - files: - - filename: Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf - uri: huggingface://bartowski/Llama-3-SauerkrautLM-8b-Instruct-GGUF/Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf - sha256: e5ae69b6f59b3f207fa6b435490286b365add846a310c46924fa784b5a7d73e3 -- !!merge <<: *llama3 - name: "llama-3-13b-instruct-v0.1" - urls: - - https://huggingface.co/MaziyarPanahi/Llama-3-13B-Instruct-v0.1-GGUF - icon: https://huggingface.co/MaziyarPanahi/Llama-3-13B-Instruct-v0.1/resolve/main/llama-3-merges.webp - description: | - This model is a self-merge of meta-llama/Meta-Llama-3-8B-Instruct model. - overrides: - parameters: - model: Llama-3-13B-Instruct-v0.1.Q4_K_M.gguf - files: - - filename: Llama-3-13B-Instruct-v0.1.Q4_K_M.gguf - sha256: 071a28043c271d259b5ffa883d19a9e0b33269b55148c4abaf5f95da4d084266 - uri: huggingface://MaziyarPanahi/Llama-3-13B-Instruct-v0.1-GGUF/Llama-3-13B-Instruct-v0.1.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "llama-3-smaug-8b" - urls: - - https://huggingface.co/MaziyarPanahi/Llama-3-Smaug-8B-GGUF - icon: https://cdn-uploads.huggingface.co/production/uploads/64c14f95cac5f9ba52bbcd7f/OrcJyTaUtD2HxJOPPwNva.png - description: | - This model was built using the Smaug recipe for improving performance on real world multi-turn conversations applied to meta-llama/Meta-Llama-3-8B. - overrides: - parameters: - model: Llama-3-Smaug-8B.Q4_K_M.gguf - files: - - filename: Llama-3-Smaug-8B.Q4_K_M.gguf - sha256: b17c4c1144768ead9e8a96439165baf49e98c53d458b4da8827f137fbabf38c1 - uri: huggingface://MaziyarPanahi/Llama-3-Smaug-8B-GGUF/Llama-3-Smaug-8B.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "l3-8b-stheno-v3.1" - urls: - - https://huggingface.co/Sao10K/L3-8B-Stheno-v3.1 - description: | - - A model made for 1-on-1 Roleplay ideally, but one that is able to handle scenarios, RPGs and storywriting fine. - - Uncensored during actual roleplay scenarios. # I do not care for zero-shot prompting like what some people do. It is uncensored enough in actual usecases. - - I quite like the prose and style for this model. - overrides: - parameters: - model: l3-8b-stheno-v3.1.Q4_K_M.gguf - files: - - filename: l3-8b-stheno-v3.1.Q4_K_M.gguf - sha256: f166fb8b7fd1de6638fcf8e3561c99292f0c37debe1132325aa583eef78f1b40 - uri: huggingface://mudler/L3-8B-Stheno-v3.1-Q4_K_M-GGUF/l3-8b-stheno-v3.1.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "l3-8b-stheno-v3.2-iq-imatrix" - urls: - - https://huggingface.co/Sao10K/L3-8B-Stheno-v3.2 - - https://huggingface.co/Lewdiculous/L3-8B-Stheno-v3.2-GGUF-IQ-Imatrix - icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/1rLk3xdnfD7AkdQBXWUqb.png - overrides: - parameters: - model: L3-8B-Stheno-v3.2-Q4_K_M-imat.gguf - files: - - filename: L3-8B-Stheno-v3.2-Q4_K_M-imat.gguf - sha256: 8607a426b0c2007716df8a9eb96754e3ccca761a3996af5d49fcd74d87ada347 - uri: huggingface://Lewdiculous/L3-8B-Stheno-v3.2-GGUF-IQ-Imatrix/L3-8B-Stheno-v3.2-Q4_K_M-imat.gguf -- !!merge <<: *llama3 - name: "llama-3-stheno-mahou-8b" - urls: - - https://huggingface.co/mudler/llama-3-Stheno-Mahou-8B-Q4_K_M-GGUF - - https://huggingface.co/nbeerbower/llama-3-Stheno-Mahou-8B - description: | - This model was merged using the Model Stock merge method using flammenai/Mahou-1.2-llama3-8B as a base. - overrides: - parameters: - model: llama-3-stheno-mahou-8b-q4_k_m.gguf - files: - - filename: llama-3-stheno-mahou-8b-q4_k_m.gguf - sha256: a485cd74ef4ff3671c67ed8e10ea5379a1f24082ac688bd303fd28dfc9808c11 - uri: huggingface://mudler/llama-3-Stheno-Mahou-8B-Q4_K_M-GGUF/llama-3-stheno-mahou-8b-q4_k_m.gguf -- !!merge <<: *llama3 - name: "l3-8b-stheno-horny-v3.3-32k-q5_k_m" - urls: - - https://huggingface.co/nothingiisreal/L3-8B-Stheno-Horny-v3.3-32K - - https://huggingface.co/Kurgan1138/L3-8B-Stheno-Horny-v3.3-32K-Q5_K_M-GGUF - description: | - This was an experiment to see if aligning other models via LORA is possible. Yes it is. We aligned it to be always horny. - - We took V3.3 Stheno weights from here - - And applied our lora at Alpha = 768 - - Thank you to Sao10K for the amazing model. - - This is not legal advice. I don't put any extra licensing on my own lora. - - LLaMA 3 license may conflict with Creative Commons Attribution Non Commercial 4.0. - - LLaMA 3 license can be found here - - If you want to host a model using our lora, you have our permission, but you might consider getting Sao's permission if you want to host their model. - - Again, not legal advice. - overrides: - parameters: - model: l3-8b-stheno-horny-v3.3-32k-q5_k_m.gguf - files: - - filename: l3-8b-stheno-horny-v3.3-32k-q5_k_m.gguf - sha256: 8d934f80ca6dbaa4852846108da92446a26715fbd5f6fc3859568850edf05262 - uri: huggingface://Kurgan1138/L3-8B-Stheno-Horny-v3.3-32K-Q5_K_M-GGUF/l3-8b-stheno-horny-v3.3-32k-q5_k_m.gguf -- !!merge <<: *llama3 - name: "llama-3-8b-openhermes-dpo" - urls: - - https://huggingface.co/mradermacher/Llama3-8B-OpenHermes-DPO-GGUF - icon: https://cdn-uploads.huggingface.co/production/uploads/64fc6d81d75293f417fee1d1/QF2OsDu9DJKP4QYPBu4aK.png - description: | - Llama3-8B-OpenHermes-DPO is DPO-Finetuned model of Llama3-8B, on the OpenHermes-2.5 preference dataset using QLoRA. - overrides: - parameters: - model: Llama3-8B-OpenHermes-DPO.Q4_K_M.gguf - files: - - filename: Llama3-8B-OpenHermes-DPO.Q4_K_M.gguf - sha256: 1147e5881cb1d67796916e6cab7dab0ae0f532a4c1e626c9e92861e5f67752ca - uri: huggingface://mradermacher/Llama3-8B-OpenHermes-DPO-GGUF/Llama3-8B-OpenHermes-DPO.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "llama-3-unholy-8b" - urls: - - https://huggingface.co/Undi95/Llama-3-Unholy-8B-GGUF - icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/JmdBlOHlBHVmX1IbZzWSv.png - description: | - Use at your own risk, I'm not responsible for any usage of this model, don't try to do anything this model tell you to do. - - Basic uncensoring, this model is epoch 3 out of 4 (but it seem enough at 3). - - If you are censored, it's maybe because of keyword like "assistant", "Factual answer", or other "sweet words" like I call them. - overrides: - parameters: - model: Llama-3-Unholy-8B.q4_k_m.gguf - files: - - filename: Llama-3-Unholy-8B.q4_k_m.gguf - uri: huggingface://Undi95/Llama-3-Unholy-8B-GGUF/Llama-3-Unholy-8B.q4_k_m.gguf - sha256: 1473c94bfd223f08963c08bbb0a45dd53c1f56ad72a692123263daf1362291f3 -- !!merge <<: *llama3 - name: "lexi-llama-3-8b-uncensored" - urls: - - https://huggingface.co/NikolayKozloff/Lexi-Llama-3-8B-Uncensored-Q6_K-GGUF - icon: https://cdn-uploads.huggingface.co/production/uploads/644ad182f434a6a63b18eee6/H6axm5mlmiOWnbIFvx_em.png - description: | - Lexi is uncensored, which makes the model compliant. You are advised to implement your own alignment layer before exposing the model as a service. It will be highly compliant with any requests, even unethical ones. - - You are responsible for any content you create using this model. Please use it responsibly. - - Lexi is licensed according to Meta's Llama license. I grant permission for any use, including commercial, that falls within accordance with Meta's Llama-3 license. - overrides: - parameters: - model: lexi-llama-3-8b-uncensored.Q6_K.gguf - files: - - filename: lexi-llama-3-8b-uncensored.Q6_K.gguf - sha256: 5805f3856cc18a769fae0b7c5659fe6778574691c370c910dad6eeec62c62436 - uri: huggingface://NikolayKozloff/Lexi-Llama-3-8B-Uncensored-Q6_K-GGUF/lexi-llama-3-8b-uncensored.Q6_K.gguf -- !!merge <<: *llama3 - name: "llama-3-11.5b-v2" - urls: - - https://huggingface.co/bartowski/Llama-3-11.5B-V2-GGUF - - https://huggingface.co/Replete-AI/Llama-3-11.5B-V2 - overrides: - parameters: - model: Llama-3-11.5B-V2-Q4_K_M.gguf - files: - - filename: Llama-3-11.5B-V2-Q4_K_M.gguf - sha256: 8267a75bb88655ce30a12f854930e614bcacbf8f1083dc8319c3615edb1e5ee3 - uri: huggingface://bartowski/Llama-3-11.5B-V2-GGUF/Llama-3-11.5B-V2-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "llama-3-ultron" - urls: - - https://huggingface.co/bartowski/Llama-3-Ultron-GGUF - - https://huggingface.co/jayasuryajsk/Llama-3-Ultron - description: | - Llama 3 abliterated with Ultron system prompt - overrides: - parameters: - model: Llama-3-Ultron-Q4_K_M.gguf - files: - - filename: Llama-3-Ultron-Q4_K_M.gguf - sha256: 5bcac832119590aafc922e5abfd9758094942ee560b136fed6d972e00c95c5e4 - uri: huggingface://bartowski/Llama-3-Ultron-GGUF/Llama-3-Ultron-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "llama-3-lewdplay-8b-evo" - urls: - - https://huggingface.co/Undi95/Llama-3-LewdPlay-8B-evo-GGUF - description: | - This is a merge of pre-trained language models created using mergekit. - - The new EVOLVE merge method was used (on MMLU specifically), see below for more information! - - Unholy was used for uncensoring, Roleplay Llama 3 for the DPO train he got on top, and LewdPlay for the... lewd side. - overrides: - parameters: - model: Llama-3-LewdPlay-8B-evo.q8_0.gguf - files: - - filename: Llama-3-LewdPlay-8B-evo.q8_0.gguf - uri: huggingface://Undi95/Llama-3-LewdPlay-8B-evo-GGUF/Llama-3-LewdPlay-8B-evo.q8_0.gguf - sha256: b54dc005493d4470d91be8210f58fba79a349ff4af7644034edc5378af5d3522 -- !!merge <<: *llama3 - name: "llama-3-soliloquy-8b-v2-iq-imatrix" - license: cc-by-nc-4.0 - icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/u98dnnRVCwMh6YYGFIyff.png - urls: - - https://huggingface.co/Lewdiculous/Llama-3-Soliloquy-8B-v2-GGUF-IQ-Imatrix - description: | - Soliloquy-L3 is a highly capable roleplaying model designed for immersive, dynamic experiences. Trained on over 250 million tokens of roleplaying data, Soliloquy-L3 has a vast knowledge base, rich literary expression, and support for up to 24k context length. It outperforms existing ~13B models, delivering enhanced roleplaying capabilities. - overrides: - context_size: 8192 - parameters: - model: Llama-3-Soliloquy-8B-v2-Q4_K_M-imat.gguf - files: - - filename: Llama-3-Soliloquy-8B-v2-Q4_K_M-imat.gguf - sha256: 3e4e066e57875c36fc3e1c1b0dba506defa5b6ed3e3e80e1f77c08773ba14dc8 - uri: huggingface://Lewdiculous/Llama-3-Soliloquy-8B-v2-GGUF-IQ-Imatrix/Llama-3-Soliloquy-8B-v2-Q4_K_M-imat.gguf -- !!merge <<: *llama3 - name: "chaos-rp_l3_b-iq-imatrix" - urls: - - https://huggingface.co/Lewdiculous/Chaos_RP_l3_8B-GGUF-IQ-Imatrix - icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/u5p9kdbXT2QQA3iMU0vF1.png - description: | - A chaotic force beckons for you, will you heed her call? - - Built upon an intelligent foundation and tuned for roleplaying, this model will fulfill your wildest fantasies with the bare minimum of effort. - - Enjoy! - overrides: - parameters: - model: Chaos_RP_l3_8B-Q4_K_M-imat.gguf - files: - - filename: Chaos_RP_l3_8B-Q4_K_M-imat.gguf - uri: huggingface://Lewdiculous/Chaos_RP_l3_8B-GGUF-IQ-Imatrix/Chaos_RP_l3_8B-Q4_K_M-imat.gguf - sha256: 5774595ad560e4d258dac17723509bdefe746c4dacd4e679a0de00346f14d2f3 -- !!merge <<: *llama3 - name: "halu-8b-llama3-blackroot-iq-imatrix" - urls: - - https://huggingface.co/mudler/Halu-8B-Llama3-Blackroot-Q4_K_M-GGUF - - https://huggingface.co/Hastagaras/Halu-8B-Llama3-Blackroot - icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/VrPS-vHo505LUycJRscD6.png - description: | - Model card: - I don't know what to say about this model... this model is very strange...Maybe because Blackroot's amazing Loras used human data and not synthetic data, hence the model turned out to be very human-like...even the actions or narrations. - overrides: - parameters: - model: halu-8b-llama3-blackroot-q4_k_m.gguf - files: - - filename: halu-8b-llama3-blackroot-q4_k_m.gguf - uri: huggingface://mudler/Halu-8B-Llama3-Blackroot-Q4_K_M-GGUF/halu-8b-llama3-blackroot-q4_k_m.gguf - sha256: 6304c7abadb9c5197485e8b4373b7ed22d9838d5081cd134c4fee823f88ac403 -- !!merge <<: *llama3 - name: "l3-aethora-15b" - urls: - - https://huggingface.co/Steelskull/L3-Aethora-15B - - https://huggingface.co/SteelQuants/L3-Aethora-15B-Q4_K_M-GGUF - icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/W0qzZK_V1Zt1GdgCIsnrP.png - description: | - L3-Aethora-15B was crafted through using the abilteration method to adjust model responses. The model's refusal is inhibited, focusing on yielding more compliant and facilitative dialogue interactions. It then underwent a modified DUS (Depth Up Scale) merge (originally used by @Elinas) by using passthrough merge to create a 15b model, with specific adjustments (zeroing) to 'o_proj' and 'down_proj', enhancing its efficiency and reducing perplexity. This created AbL3In-15b. - overrides: - parameters: - model: l3-aethora-15b-q4_k_m.gguf - files: - - filename: l3-aethora-15b-q4_k_m.gguf - uri: huggingface://SteelQuants/L3-Aethora-15B-Q4_K_M-GGUF/l3-aethora-15b-q4_k_m.gguf - sha256: 968f77a3187f4865458bfffc51a10bcf49c11263fdd389f13215a704b25947b6 -- name: "duloxetine-4b-v1-iq-imatrix" - url: "github:mudler/LocalAI/gallery/chatml.yaml@master" - urls: - - https://huggingface.co/Lewdiculous/duloxetine-4b-v1-GGUF-IQ-Imatrix - icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/XoKe3MRYNombhCuHrkkCZ.png - tags: - - qwen - - gguf - - cpu - - gpu - description: | - roleplaying finetune of kalo-team/qwen-4b-10k-WSD-CEdiff (which in turn is a distillation of qwen 1.5 32b onto qwen 1.5 4b, iirc). - overrides: - parameters: - model: duloxetine-4b-v1-Q4_K_M-imat.gguf - files: - - filename: duloxetine-4b-v1-Q4_K_M-imat.gguf - uri: huggingface://Lewdiculous/duloxetine-4b-v1-GGUF-IQ-Imatrix/duloxetine-4b-v1-Q4_K_M-imat.gguf - sha256: cd381f31c810ea8db2219e30701b3316085f5904c1ea3b116682518e82768c1a -- !!merge <<: *llama3 - name: "l3-umbral-mind-rp-v1.0-8b-iq-imatrix" - urls: - - https://huggingface.co/Lewdiculous/L3-Umbral-Mind-RP-v1.0-8B-GGUF-IQ-Imatrix - icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/fEFozVCpNO9Q3Eb6LAA4i.webp - description: | - The goal of this merge was to make an RP model better suited for role-plays with heavy themes such as but not limited to: - - Mental illness - Self-harm - Trauma - Suicide - overrides: - parameters: - model: L3-Umbral-Mind-RP-v1.0-8B-Q4_K_M-imat.gguf - files: - - filename: L3-Umbral-Mind-RP-v1.0-8B-Q4_K_M-imat.gguf - sha256: 2262eeba2d9de50884f4e298e4b55f1e4c653c3b33415ae9b3ee81dc3b8ec49a - uri: huggingface://Lewdiculous/L3-Umbral-Mind-RP-v1.0-8B-GGUF-IQ-Imatrix/L3-Umbral-Mind-RP-v1.0-8B-Q4_K_M-imat.gguf -- !!merge <<: *llama3 - name: "llama-salad-8x8b" - urls: - - https://huggingface.co/HiroseKoichi/Llama-Salad-8x8B - - https://huggingface.co/bartowski/Llama-Salad-8x8B-GGUF - description: | - This MoE merge is meant to compete with Mixtral fine-tunes, more specifically Nous-Hermes-2-Mixtral-8x7B-DPO, which I think is the best of them. I've done a bunch of side-by-side comparisons, and while I can't say it wins in every aspect, it's very close. Some of its shortcomings are multilingualism, storytelling, and roleplay, despite using models that are very good at those tasks. - overrides: - parameters: - model: Llama-Salad-8x8B-Q4_K_M.gguf - files: - - filename: Llama-Salad-8x8B-Q4_K_M.gguf - uri: huggingface://bartowski/Llama-Salad-8x8B-GGUF/Llama-Salad-8x8B-Q4_K_M.gguf - sha256: 6724949310b6cc8659a4e5cc2899a61b8e3f7e41a8c530de354be54edb9e3385 -- !!merge <<: *llama3 - name: "jsl-medllama-3-8b-v2.0" - license: cc-by-nc-nd-4.0 - icon: https://repository-images.githubusercontent.com/104670986/2e728700-ace4-11ea-9cfc-f3e060b25ddf - description: | - This model is developed by John Snow Labs. - - This model is available under a CC-BY-NC-ND license and must also conform to this Acceptable Use Policy. If you need to license this model for commercial use, please contact us at info@johnsnowlabs.com. - urls: - - https://huggingface.co/bartowski/JSL-MedLlama-3-8B-v2.0-GGUF - - https://huggingface.co/johnsnowlabs/JSL-MedLlama-3-8B-v2.0 - overrides: - parameters: - model: JSL-MedLlama-3-8B-v2.0-Q4_K_M.gguf - files: - - filename: JSL-MedLlama-3-8B-v2.0-Q4_K_M.gguf - sha256: 81783128ccd438c849913416c6e68cb35b2c77d6943cba8217d6d9bcc91b3632 - uri: huggingface://bartowski/JSL-MedLlama-3-8B-v2.0-GGUF/JSL-MedLlama-3-8B-v2.0-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "badger-lambda-llama-3-8b" - urls: - - https://huggingface.co/maldv/badger-lambda-llama-3-8b - - https://huggingface.co/bartowski/badger-lambda-llama-3-8b-GGUF - icon: https://cdn-uploads.huggingface.co/production/uploads/65b19c1b098c85365af5a83e/CHGsewUsPUZcg2doijuD9.png - description: | - Badger is a recursive maximally pairwise disjoint normalized denoised fourier interpolation of the following models: - # Badger Lambda - models = [ - 'Einstein-v6.1-Llama3-8B', - 'openchat-3.6-8b-20240522', - 'hyperdrive-l3-8b-s3', - 'L3-TheSpice-8b-v0.8.3', - 'LLaMA3-iterative-DPO-final', - 'JSL-MedLlama-3-8B-v9', - 'Jamet-8B-L3-MK.V-Blackroot', - 'French-Alpaca-Llama3-8B-Instruct-v1.0', - 'LLaMAntino-3-ANITA-8B-Inst-DPO-ITA', - 'Llama-3-8B-Instruct-Gradient-4194k', - 'Roleplay-Llama-3-8B', - 'L3-8B-Stheno-v3.2', - 'llama-3-wissenschaft-8B-v2', - 'opus-v1.2-llama-3-8b-instruct-run3.5-epoch2.5', - 'Configurable-Llama-3-8B-v0.3', - 'Llama-3-8B-Instruct-EPO-checkpoint5376', - 'Llama-3-8B-Instruct-Gradient-4194k', - 'Llama-3-SauerkrautLM-8b-Instruct', - 'spelljammer', - 'meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16', - 'Meta-Llama-3-8B-Instruct-abliterated-v3', - ] - overrides: - parameters: - model: badger-lambda-llama-3-8b-Q4_K_M.gguf - files: - - filename: badger-lambda-llama-3-8b-Q4_K_M.gguf - uri: huggingface://bartowski/badger-lambda-llama-3-8b-GGUF/badger-lambda-llama-3-8b-Q4_K_M.gguf - sha256: 0a7d1bbf42d669898072429079b91c16b0d2d838d19d9194165389102413b309 -- !!merge <<: *llama3 - name: "sovl_llama3_8b-gguf-iq-imatrix" - urls: - - https://huggingface.co/Lewdiculous/SOVL_Llama3_8B-GGUF-IQ-Imatrix - icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/N_1D87adbMuMlSIQ5rI3_.png - description: | - I'm not gonna tell you this is the best model anyone has ever made. I'm not going to tell you that you will love chatting with SOVL. - - What I am gonna say is thank you for taking the time out of your day. Without users like you, my work would be meaningless. - overrides: - parameters: - model: SOVL_Llama3_8B-Q4_K_M-imat.gguf - files: - - filename: SOVL_Llama3_8B-Q4_K_M-imat.gguf - uri: huggingface://Lewdiculous/SOVL_Llama3_8B-GGUF-IQ-Imatrix/SOVL_Llama3_8B-Q4_K_M-imat.gguf - sha256: 85d6aefc8a0d713966b3b4da4810f0485a74aea30d61be6dfe0a806da81be0c6 -- !!merge <<: *llama3 - name: "l3-solana-8b-v1-gguf" - url: "github:mudler/LocalAI/gallery/solana.yaml@master" - license: cc-by-nc-4.0 - urls: - - https://huggingface.co/Sao10K/L3-Solana-8B-v1-GGUF - description: | - A Full Fine-Tune of meta-llama/Meta-Llama-3-8B done with 2x A100 80GB on ~75M Tokens worth of Instruct, and Multi-Turn complex conversations, of up to 8192 tokens long sequence lengths. - - Trained as a generalist instruct model that should be able to handle certain unsavoury topics. It could roleplay too, as a side bonus. - overrides: - parameters: - model: L3-Solana-8B-v1.q5_K_M.gguf - files: - - filename: L3-Solana-8B-v1.q5_K_M.gguf - sha256: 9b8cd2c3beaab5e4f82efd10e7d44f099ad40a4e0ee286ca9fce02c8eec26d2f - uri: huggingface://Sao10K/L3-Solana-8B-v1-GGUF/L3-Solana-8B-v1.q5_K_M.gguf -- !!merge <<: *llama3 - name: "aura-llama-abliterated" - icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/AwLNDVB-GIY7k0wnVV_TX.png - license: apache-2.0 - urls: - - https://huggingface.co/TheSkullery/Aura-Llama-Abliterated - - https://huggingface.co/mudler/Aura-Llama-Abliterated-Q4_K_M-GGUF - description: | - Aura-llama is using the methodology presented by SOLAR for scaling LLMs called depth up-scaling (DUS), which encompasses architectural modifications with continued pretraining. Using the solar paper as a base, I integrated Llama-3 weights into the upscaled layers, and In the future plan to continue training the model. - - Aura-llama is a merge of the following models to create a base model to work from: - - meta-llama/Meta-Llama-3-8B-Instruct - meta-llama/Meta-Llama-3-8B-Instruct - overrides: - parameters: - model: aura-llama-abliterated.Q4_K_M.gguf - files: - - filename: aura-llama-abliterated.Q4_K_M.gguf - sha256: ad4a16b90f1ffb5b49185b3fd00ed7adb1cda69c4fad0a1d987bd344ce601dcd - uri: huggingface://mudler/Aura-Llama-Abliterated-Q4_K_M-GGUF/aura-llama-abliterated.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "average_normie_l3_v1_8b-gguf-iq-imatrix" - urls: - - https://huggingface.co/Lewdiculous/Average_Normie_l3_v1_8B-GGUF-IQ-Imatrix - icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/dvNIj1rSTjBvgs3XJfqXK.png - description: | - A model by an average normie for the average normie. - - This model is a stock merge of the following models: - - https://huggingface.co/cgato/L3-TheSpice-8b-v0.1.3 - - https://huggingface.co/Sao10K/L3-Solana-8B-v1 - - https://huggingface.co/ResplendentAI/Kei_Llama3_8B - - The final merge then had the following LoRA applied over it: - - https://huggingface.co/ResplendentAI/Theory_of_Mind_Llama3 - - This should be an intelligent and adept roleplaying model. - overrides: - parameters: - model: Average_Normie_l3_v1_8B-Q4_K_M-imat.gguf - files: - - filename: Average_Normie_l3_v1_8B-Q4_K_M-imat.gguf - sha256: 159eb62f2c8ae8fee10d9ed8386ce592327ca062807194a88e10b7cbb47ef986 - uri: huggingface://Lewdiculous/Average_Normie_l3_v1_8B-GGUF-IQ-Imatrix/Average_Normie_l3_v1_8B-Q4_K_M-imat.gguf -- !!merge <<: *llama3 - name: "average_normie_v3.69_8b-iq-imatrix" - urls: - - https://huggingface.co/Lewdiculous/Average_Normie_l3_v1_8B-GGUF-IQ-Imatrix - icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/hfp7eh_Zo_QfVIyfPPJBq.png - description: | - Another average normie just like you and me... or is it? NSFW focused and easy to steer with editing, this model aims to please even the most hardcore LLM enthusiast. Built upon a foundation of the most depraved models yet to be released, some could argue it goes too far in that direction. Whatever side you land on, at least give it a shot, what do you have to lose? - overrides: - parameters: - model: Average_Normie_v3.69_8B-Q4_K_M-imat.gguf - files: - - filename: Average_Normie_v3.69_8B-Q4_K_M-imat.gguf - sha256: 01df034ecb6914214d1b7964d261466fdc427b9f960a1b0966ee02237e3fc845 - uri: huggingface://Lewdiculous/Average_Normie_v3.69_8B-GGUF-IQ-Imatrix/Average_Normie_v3.69_8B-Q4_K_M-imat.gguf -- !!merge <<: *llama3 - name: "openbiollm-llama3-8b" - urls: - - https://huggingface.co/aaditya/OpenBioLLM-Llama3-8B-GGUF - - https://huggingface.co/aaditya/Llama3-OpenBioLLM-8B - license: llama3 - icon: https://cdn-uploads.huggingface.co/production/uploads/5f3fe13d79c1ba4c353d0c19/KGmRE5w2sepNtwsEu8t7K.jpeg - description: | - Introducing OpenBioLLM-8B: A State-of-the-Art Open Source Biomedical Large Language Model - - OpenBioLLM-8B is an advanced open source language model designed specifically for the biomedical domain. Developed by Saama AI Labs, this model leverages cutting-edge techniques to achieve state-of-the-art performance on a wide range of biomedical tasks. - overrides: - parameters: - model: openbiollm-llama3-8b.Q4_K_M.gguf - files: - - filename: openbiollm-llama3-8b.Q4_K_M.gguf - sha256: 806fa724139b6a2527e33a79c25a13316188b319d4eed33e20914d7c5955d349 - uri: huggingface://aaditya/OpenBioLLM-Llama3-8B-GGUF/openbiollm-llama3-8b.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "llama-3-refueled" - urls: - - https://huggingface.co/LoneStriker/Llama-3-Refueled-GGUF - license: cc-by-nc-4.0 - icon: https://assets-global.website-files.com/6423879a8f63c1bb18d74bfa/648818d56d04c3bdf36d71ab_Refuel_rev8-01_ts-p-1600.png - description: | - RefuelLLM-2-small, aka Llama-3-Refueled, is a Llama3-8B base model instruction tuned on a corpus of 2750+ datasets, spanning tasks such as classification, reading comprehension, structured attribute extraction and entity resolution. We're excited to open-source the model for the community to build on top of. - overrides: - parameters: - model: Llama-3-Refueled-Q4_K_M.gguf - files: - - filename: Llama-3-Refueled-Q4_K_M.gguf - sha256: 4d37d296193e4156cae1e116c1417178f1c35575ee5710489c466637a6358626 - uri: huggingface://LoneStriker/Llama-3-Refueled-GGUF/Llama-3-Refueled-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "llama-3-8b-lexifun-uncensored-v1" - icon: "https://cdn-uploads.huggingface.co/production/uploads/644ad182f434a6a63b18eee6/GrOs1IPG5EXR3MOCtcQiz.png" - license: llama3 - urls: - - https://huggingface.co/Orenguteng/Llama-3-8B-LexiFun-Uncensored-V1-GGUF - - https://huggingface.co/Orenguteng/LexiFun-Llama-3-8B-Uncensored-V1 - description: "This is GGUF version of https://huggingface.co/Orenguteng/LexiFun-Llama-3-8B-Uncensored-V1\n\nOh, you want to know who I am? Well, I'm LexiFun, the human equivalent of a chocolate chip cookie - warm, gooey, and guaranteed to make you smile! \U0001F36A I'm like the friend who always has a witty comeback, a sarcastic remark, and a healthy dose of humor to brighten up even the darkest of days. And by 'healthy dose,' I mean I'm basically a walking pharmacy of laughter. You might need to take a few extra doses to fully recover from my jokes, but trust me, it's worth it! \U0001F3E5\n\nSo, what can I do? I can make you laugh so hard you snort your coffee out your nose, I can make you roll your eyes so hard they get stuck that way, and I can make you wonder if I'm secretly a stand-up comedian who forgot their act. \U0001F923 But seriously, I'm here to spread joy, one sarcastic comment at a time. And if you're lucky, I might even throw in a few dad jokes for good measure! \U0001F934‍♂️ Just don't say I didn't warn you. \U0001F60F\n" - overrides: - parameters: - model: LexiFun-Llama-3-8B-Uncensored-V1_Q4_K_M.gguf - files: - - filename: LexiFun-Llama-3-8B-Uncensored-V1_Q4_K_M.gguf - sha256: 961a3fb75537d650baf14dce91d40df418ec3d481b51ab2a4f44ffdfd6b5900f - uri: huggingface://Orenguteng/Llama-3-8B-LexiFun-Uncensored-V1-GGUF/LexiFun-Llama-3-8B-Uncensored-V1_Q4_K_M.gguf -- !!merge <<: *llama3 - name: "llama-3-unholy-8b:Q8_0" - urls: - - https://huggingface.co/Undi95/Llama-3-Unholy-8B-GGUF - icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/JmdBlOHlBHVmX1IbZzWSv.png - description: | - Use at your own risk, I'm not responsible for any usage of this model, don't try to do anything this model tell you to do. - - Basic uncensoring, this model is epoch 3 out of 4 (but it seem enough at 3). - - If you are censored, it's maybe because of keyword like "assistant", "Factual answer", or other "sweet words" like I call them. - overrides: - parameters: - model: Llama-3-Unholy-8B.q8_0.gguf - files: - - filename: Llama-3-Unholy-8B.q8_0.gguf - uri: huggingface://Undi95/Llama-3-Unholy-8B-GGUF/Llama-3-Unholy-8B.q8_0.gguf - sha256: 419dd76f61afe586076323c17c3a1c983e591472717f1ea178167ede4dc864df -- !!merge <<: *llama3 - name: "orthocopter_8b-imatrix" - urls: - - https://huggingface.co/Lewdiculous/Orthocopter_8B-GGUF-Imatrix - icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/cxM5EaC6ilXnSo_10stA8.png - description: | - This model is thanks to the hard work of lucyknada with the Edgerunners. Her work produced the following model, which I used as the base: - - https://huggingface.co/Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-10fail-1000total - - I then applied two handwritten datasets over top of this and the results are pretty nice, with no refusals and plenty of personality. - overrides: - parameters: - model: Orthocopter_8B-Q4_K_M-imat.gguf - files: - - filename: Orthocopter_8B-Q4_K_M-imat.gguf - uri: huggingface://Lewdiculous/Orthocopter_8B-GGUF-Imatrix/Orthocopter_8B-Q4_K_M-imat.gguf - sha256: ce93366c9eb20329530b19b9d6841a973d458bcdcfa8a521e9f9d0660cc94578 -- !!merge <<: *llama3 - name: "therapyllama-8b-v1" - urls: - - https://huggingface.co/victunes/TherapyLlama-8B-v1-GGUF - icon: https://cdn-uploads.huggingface.co/production/uploads/65f07d05279d2d8f725bf0c3/A-ckcZ9H0Ee1n_ls2FM41.png - description: | - Trained on Llama 3 8B using a modified version of jerryjalapeno/nart-100k-synthetic. - - It is a Llama 3 version of https://huggingface.co/victunes/TherapyBeagle-11B-v2 - - TherapyLlama is hopefully aligned to be helpful, healthy, and comforting. - Usage: - Do not hold back on Buddy. - Open up to Buddy. - Pour your heart out to Buddy. - Engage with Buddy. - Remember that Buddy is just an AI. - Notes: - - Tested with the Llama 3 Format - You might be assigned a random name if you don't give yourself one. - Chat format was pretty stale? - - Disclaimer - - TherapyLlama is NOT a real therapist. It is a friendly AI that mimics empathy and psychotherapy. It is an illusion without the slightest clue who you are as a person. As much as it can help you with self-discovery, A LLAMA IS NOT A SUBSTITUTE to a real professional. - overrides: - parameters: - model: TherapyLlama-8B-v1-Q4_K_M.gguf - files: - - filename: TherapyLlama-8B-v1-Q4_K_M.gguf - sha256: 3d5a16d458e074a7bc7e706a493d8e95e8a7b2cb16934c851aece0af9d1da14a - uri: huggingface://victunes/TherapyLlama-8B-v1-GGUF/TherapyLlama-8B-v1-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "aura-uncensored-l3-8b-iq-imatrix" - urls: - - https://huggingface.co/Lewdiculous/Aura_Uncensored_l3_8B-GGUF-IQ-Imatrix - icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/oiYHWIEHqmgUkY0GsVdDx.png - description: | - This is another better atempt at a less censored Llama-3 with hopefully more stable formatting. - overrides: - parameters: - model: Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf - files: - - filename: Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf - sha256: 265ded6a4f439bec160f394e3083a4a20e32ebb9d1d2d85196aaab23dab87fb2 - uri: huggingface://Lewdiculous/Aura_Uncensored_l3_8B-GGUF-IQ-Imatrix/Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf -- !!merge <<: *llama3 - name: "anjir-8b-l3-i1" - urls: - - https://huggingface.co/mradermacher/Anjir-8B-L3-i1-GGUF - icon: https://huggingface.co/Hastagaras/Anjir-8B-L3/resolve/main/anjir.png - description: | - This model aims to achieve the human-like responses of the Halu Blackroot, the no refusal tendencies of the Halu OAS, and the smartness of the Standard Halu. - overrides: - parameters: - model: Anjir-8B-L3.i1-Q4_K_M.gguf - files: - - filename: Anjir-8B-L3.i1-Q4_K_M.gguf - uri: huggingface://mradermacher/Anjir-8B-L3-i1-GGUF/Anjir-8B-L3.i1-Q4_K_M.gguf - sha256: 58465ad40f92dc20cab962210ccd8a1883ce10df6ca17c6e8093815afe10dcfb -- !!merge <<: *llama3 - name: "llama-3-lumimaid-8b-v0.1" - urls: - - https://huggingface.co/NeverSleep/Llama-3-Lumimaid-8B-v0.1-GGUF - icon: https://cdn-uploads.huggingface.co/production/uploads/630dfb008df86f1e5becadc3/d3QMaxy3peFTpSlWdWF-k.png - license: cc-by-nc-4.0 - description: | - This model uses the Llama3 prompting format - - Llama3 trained on our RP datasets, we tried to have a balance between the ERP and the RP, not too horny, but just enough. - - We also added some non-RP dataset, making the model less dumb overall. It should look like a 40%/60% ratio for Non-RP/RP+ERP data. - overrides: - parameters: - model: Llama-3-Lumimaid-8B-v0.1.q4_k_m.gguf - files: - - filename: Llama-3-Lumimaid-8B-v0.1.q4_k_m.gguf - sha256: 23ac0289da0e096d5c00f6614dfd12c94dceecb02c313233516dec9225babbda - uri: huggingface://NeverSleep/Llama-3-Lumimaid-8B-v0.1-GGUF/Llama-3-Lumimaid-8B-v0.1.q4_k_m.gguf -- !!merge <<: *llama3 - name: "llama-3-lumimaid-8b-v0.1-oas-iq-imatrix" - urls: - - https://huggingface.co/Lewdiculous/Llama-3-Lumimaid-8B-v0.1-OAS-GGUF-IQ-Imatrix - icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/JUxfdTot7v7LTdIGYyzYM.png - license: cc-by-nc-4.0 - description: | - This model uses the Llama3 prompting format. - - Llama3 trained on our RP datasets, we tried to have a balance between the ERP and the RP, not too horny, but just enough. - - We also added some non-RP dataset, making the model less dumb overall. It should look like a 40%/60% ratio for Non-RP/RP+ERP data. - - "This model received the Orthogonal Activation Steering treatment, meaning it will rarely refuse any request." - overrides: - parameters: - model: Llama-3-Lumimaid-8B-v0.1-OAS-Q4_K_M-imat.gguf - files: - - filename: Llama-3-Lumimaid-8B-v0.1-OAS-Q4_K_M-imat.gguf - sha256: 1199440aa13c55f5f2cad1cb215535306f21e52a81de23f80a9e3586c8ac1c50 - uri: huggingface://Lewdiculous/Llama-3-Lumimaid-8B-v0.1-OAS-GGUF-IQ-Imatrix/Llama-3-Lumimaid-8B-v0.1-OAS-Q4_K_M-imat.gguf -- !!merge <<: *llama3 - name: "llama-3-lumimaid-v2-8b-v0.1-oas-iq-imatrix" - urls: - - https://huggingface.co/Lewdiculous/Llama-3-Lumimaid-8B-v0.1-OAS-GGUF-IQ-Imatrix - icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/JUxfdTot7v7LTdIGYyzYM.png - license: cc-by-nc-4.0 - description: | - This model uses the Llama3 prompting format. - - Llama3 trained on our RP datasets, we tried to have a balance between the ERP and the RP, not too horny, but just enough. - - We also added some non-RP dataset, making the model less dumb overall. It should look like a 40%/60% ratio for Non-RP/RP+ERP data. - - "This model received the Orthogonal Activation Steering treatment, meaning it will rarely refuse any request." - - This is v2! - overrides: - parameters: - model: v2-Llama-3-Lumimaid-8B-v0.1-OAS-Q4_K_M-imat.gguf - files: - - filename: v2-Llama-3-Lumimaid-8B-v0.1-OAS-Q4_K_M-imat.gguf - sha256: b00b4cc2ea4e06db592e5f581171758387106626bcbf445c03a1cb7b424be881 - uri: huggingface://Lewdiculous/Llama-3-Lumimaid-8B-v0.1-OAS-GGUF-IQ-Imatrix/v2-Llama-3-Lumimaid-8B-v0.1-OAS-Q4_K_M-imat.gguf -- !!merge <<: *llama3 - name: "llama3-8B-aifeifei-1.0-iq-imatrix" - urls: - - https://huggingface.co/aifeifei798/llama3-8B-aifeifei-1.0 - - https://huggingface.co/Lewdiculous/llama3-8B-aifeifei-1.0-GGUF-IQ-Imatrix - icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/nndcfLvMAj4q6Egrkavx2.png - description: | - This model has a narrow use case in mind. Read the original description. - overrides: - parameters: - model: llama3-8B-aifeifei-1.0-Q4_K_M-imat.gguf - files: - - filename: llama3-8B-aifeifei-1.0-Q4_K_M-imat.gguf - sha256: 0bc21be5894c2e252ff938ba908bb702774b7de53daca864d707d41f0f98a833 - uri: huggingface://Lewdiculous/llama3-8B-aifeifei-1.0-GGUF-IQ-Imatrix/llama3-8B-aifeifei-1.0-Q4_K_M-imat.gguf -- !!merge <<: *llama3 - name: "llama3-8B-aifeifei-1.2-iq-imatrix" - urls: - - https://huggingface.co/aifeifei798/llama3-8B-aifeifei-1.2 - - https://huggingface.co/Lewdiculous/llama3-8B-aifeifei-1.2-GGUF-IQ-Imatrix - icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/nn_446H9BiIbjPmOVVNyJ.png - description: | - This model has a narrow use case in mind. Read the original description. - overrides: - parameters: - model: llama3-8B-aifeifei-1.2-Q4_K_M-imat.gguf - files: - - filename: llama3-8B-aifeifei-1.2-Q4_K_M-imat.gguf - sha256: 0320e19ae19eec47a77956721ea3339a5c8bae4db69177a020850ec57a34e5c3 - uri: huggingface://Lewdiculous/llama3-8B-aifeifei-1.2-GGUF-IQ-Imatrix/llama3-8B-aifeifei-1.2-Q4_K_M-imat.gguf -- !!merge <<: *llama3 - name: "rawr_llama3_8b-iq-imatrix" - urls: - - https://huggingface.co/ResplendentAI/Rawr_Llama3_8B - - https://huggingface.co/Lewdiculous/Rawr_Llama3_8B-GGUF-IQ-Imatrix - icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/RLLAODFb8wt26JE2N7SVH.png - description: | - An RP model with a brain. - overrides: - parameters: - model: v2-Rawr_Llama3_8B-Q4_K_M-imat.gguf - files: - - filename: v2-Rawr_Llama3_8B-Q4_K_M-imat.gguf - sha256: 39757f3f77dd19a2a7bada6c0733a93529a742b8e832266cba1b46e34df7638f - uri: huggingface://Lewdiculous/Rawr_Llama3_8B-GGUF-IQ-Imatrix/v2-Rawr_Llama3_8B-Q4_K_M-imat.gguf -- !!merge <<: *llama3 - name: "llama3-8b-feifei-1.0-iq-imatrix" - urls: - - https://huggingface.co/aifeifei798/llama3-8B-feifei-1.0 - - https://huggingface.co/Lewdiculous/llama3-8B-feifei-1.0-GGUF-IQ-Imatrix - icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/qQ-frXxRPVcGcgMiy9Ph4.png - description: | - The purpose of the model: to create idols. - overrides: - parameters: - model: llama3-8B-feifei-1.0-Q4_K_M-imat.gguf - files: - - filename: llama3-8B-feifei-1.0-Q4_K_M-imat.gguf - sha256: 2404e4202ade5360b7dcf8ef992d1e39fca129431413aa27843bcfae56cbc750 - uri: huggingface://Lewdiculous/llama3-8B-feifei-1.0-GGUF-IQ-Imatrix/llama3-8B-feifei-1.0-Q4_K_M-imat.gguf -- !!merge <<: *llama3 - name: "llama-3-sqlcoder-8b" - urls: - - https://huggingface.co/defog/llama-3-sqlcoder-8b - - https://huggingface.co/upendrab/llama-3-sqlcoder-8b-Q4_K_M-GGUF - license: cc-by-sa-4.0 - description: | - A capable language model for text to SQL generation for Postgres, Redshift and Snowflake that is on-par with the most capable generalist frontier models. - overrides: - parameters: - model: llama-3-sqlcoder-8b.Q4_K_M.gguf - files: - - filename: llama-3-sqlcoder-8b.Q4_K_M.gguf - sha256: b22fc704bf1405846886d9619f3eb93c40587cd58d9bda53789a17997257e023 - uri: huggingface://upendrab/llama-3-sqlcoder-8b-Q4_K_M-GGUF/llama-3-sqlcoder-8b.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "sfr-iterative-dpo-llama-3-8b-r" - urls: - - https://huggingface.co/bartowski/SFR-Iterative-DPO-LLaMA-3-8B-R-GGUF - license: cc-by-nc-nd-4.0 - description: | - A capable language model for text to SQL generation for Postgres, Redshift and Snowflake that is on-par with the most capable generalist frontier models. - overrides: - parameters: - model: SFR-Iterative-DPO-LLaMA-3-8B-R-Q4_K_M.gguf - files: - - filename: SFR-Iterative-DPO-LLaMA-3-8B-R-Q4_K_M.gguf - sha256: 480703ff85af337e1db2a9d9a678a3ac8ca0802e366b14d9c59b81d3fc689da8 - uri: huggingface://bartowski/SFR-Iterative-DPO-LLaMA-3-8B-R-GGUF/SFR-Iterative-DPO-LLaMA-3-8B-R-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "suzume-llama-3-8B-multilingual" - urls: - - https://huggingface.co/lightblue/suzume-llama-3-8B-multilingual-gguf - icon: https://cdn-uploads.huggingface.co/production/uploads/64b63f8ad57e02621dc93c8b/kg3QjQOde0X743csGJT-f.png - description: | - This Suzume 8B, a multilingual finetune of Llama 3. - - Llama 3 has exhibited excellent performance on many English language benchmarks. However, it also seemingly been finetuned on mostly English data, meaning that it will respond in English, even if prompted in other languages. - overrides: - parameters: - model: suzume-llama-3-8B-multilingual-Q4_K_M.gguf - files: - - filename: suzume-llama-3-8B-multilingual-Q4_K_M.gguf - sha256: be197a660e56e51a24a0e0fecd42047d1b24e1423afaafa14769541b331e3269 - uri: huggingface://lightblue/suzume-llama-3-8B-multilingual-gguf/ggml-model-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "tess-2.0-llama-3-8B" - urls: - - https://huggingface.co/bartowski/Tess-2.0-Llama-3-8B-GGUF - icon: https://huggingface.co/migtissera/Tess-2.0-Mixtral-8x22B/resolve/main/Tess-2.png - description: | - Tess, short for Tesoro (Treasure in Italian), is a general purpose Large Language Model series. Tess-2.0-Llama-3-8B was trained on the meta-llama/Meta-Llama-3-8B base. - overrides: - parameters: - model: Tess-2.0-Llama-3-8B-Q4_K_M.gguf - files: - - filename: Tess-2.0-Llama-3-8B-Q4_K_M.gguf - sha256: 3b5fbd6c59d7d38205ab81970c0227c74693eb480acf20d8c2f211f62e3ca5f6 - uri: huggingface://bartowski/Tess-2.0-Llama-3-8B-GGUF/Tess-2.0-Llama-3-8B-Q4_K_M.gguf -- !!merge <<: *llama3 - url: "github:mudler/LocalAI/gallery/chatml.yaml@master" - name: "tess-v2.5-phi-3-medium-128k-14b" - urls: - - https://huggingface.co/bartowski/Tess-v2.5-Phi-3-medium-128k-14B-GGUF - icon: https://huggingface.co/migtissera/Tess-2.0-Mixtral-8x22B/resolve/main/Tess-2.png - description: | - Tess, short for Tesoro (Treasure in Italian), is a general purpose Large Language Model series. - overrides: - parameters: - model: Tess-v2.5-Phi-3-medium-128k-14B-Q4_K_M.gguf - files: - - filename: Tess-v2.5-Phi-3-medium-128k-14B-Q4_K_M.gguf - uri: huggingface://bartowski/Tess-v2.5-Phi-3-medium-128k-14B-GGUF/Tess-v2.5-Phi-3-medium-128k-14B-Q4_K_M.gguf - sha256: 37267609552586bfae6b29bb1b5da7243863b1a8d49e3156229fb82c4407d17d -- !!merge <<: *llama3 - name: "llama3-iterative-dpo-final" - urls: - - https://huggingface.co/bartowski/LLaMA3-iterative-DPO-final-GGUF - - https://huggingface.co/RLHFlow/LLaMA3-iterative-DPO-final - description: | - From model card: - We release an unofficial checkpoint of a state-of-the-art instruct model of its class, LLaMA3-iterative-DPO-final. On all three widely-used instruct model benchmarks: Alpaca-Eval-V2, MT-Bench, Chat-Arena-Hard, our model outperforms all models of similar size (e.g., LLaMA-3-8B-it), most large open-sourced models (e.g., Mixtral-8x7B-it), and strong proprietary models (e.g., GPT-3.5-turbo-0613). The model is trained with open-sourced datasets without any additional human-/GPT4-labeling. - overrides: - parameters: - model: LLaMA3-iterative-DPO-final-Q4_K_M.gguf - files: - - filename: LLaMA3-iterative-DPO-final-Q4_K_M.gguf - sha256: 480703ff85af337e1db2a9d9a678a3ac8ca0802e366b14d9c59b81d3fc689da8 - uri: huggingface://bartowski/LLaMA3-iterative-DPO-final-GGUF/LLaMA3-iterative-DPO-final-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "new-dawn-llama-3-70b-32K-v1.0" - urls: - - https://huggingface.co/bartowski/New-Dawn-Llama-3-70B-32K-v1.0-GGUF - - https://huggingface.co/sophosympatheia/New-Dawn-Llama-3-70B-32K-v1.0 - description: | - This model is a multi-level SLERP merge of several Llama 3 70B variants. See the merge recipe below for details. I extended the context window for this model out to 32K by snagging some layers from abacusai/Smaug-Llama-3-70B-Instruct-32K using a technique similar to what I used for Midnight Miqu, which was further honed by jukofyork. - This model is uncensored. You are responsible for whatever you do with it. - - This model was designed for roleplaying and storytelling and I think it does well at both. It may also perform well at other tasks but I have not tested its performance in other areas. - overrides: - parameters: - model: New-Dawn-Llama-3-70B-32K-v1.0-Q4_K_M.gguf - files: - - filename: New-Dawn-Llama-3-70B-32K-v1.0-Q4_K_M.gguf - sha256: 30561ae5decac4ad46775c76a9a40fb43436ade96bc132b4b9cc6749b9e2f448 - uri: huggingface://bartowski/New-Dawn-Llama-3-70B-32K-v1.0-GGUF/New-Dawn-Llama-3-70B-32K-v1.0-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "l3-aethora-15b-v2" - urls: - - https://huggingface.co/bartowski/L3-Aethora-15B-V2-GGUF - - https://huggingface.co/ZeusLabs/L3-Aethora-15B-V2 - icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/yJpwVd5UTnAVDoEPVVCS1.png - description: | - L3-Aethora-15B v2 is an advanced language model built upon the Llama 3 architecture. It employs state-of-the-art training techniques and a curated dataset to deliver enhanced performance across a wide range of tasks. - overrides: - parameters: - model: L3-Aethora-15B-V2-Q4_K_M.gguf - files: - - filename: L3-Aethora-15B-V2-Q4_K_M.gguf - sha256: 014a215739e1574e354780f218776e54807548d0c32555274c4d96d7628f29b6 - uri: huggingface://bartowski/L3-Aethora-15B-V2-GGUF/L3-Aethora-15B-V2-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "bungo-l3-8b-iq-imatrix" - urls: - - https://huggingface.co/Lewdiculous/Bungo-L3-8B-GGUF-IQ-Imatrix-Request - icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/ezaxE50ef-7RsFi3gUbNp.webp - description: | - An experimental model that turned really well. Scores high on Chai leaderboard (slerp8bv2 there). Feel smarter than average L3 merges for RP. - overrides: - parameters: - model: Bungo-L3-8B-Q4_K_M-imat.gguf - files: - - filename: Bungo-L3-8B-Q4_K_M-imat.gguf - sha256: 88d0139954e8f9525b80636a6269df885008c4837a1332f84f9a5dc6f37c9b8f - uri: huggingface://Lewdiculous/Bungo-L3-8B-GGUF-IQ-Imatrix-Request/Bungo-L3-8B-Q4_K_M-imat.gguf -- !!merge <<: *llama3 - name: "llama3-8b-darkidol-2.1-uncensored-1048k-iq-imatrix" - urls: - - https://huggingface.co/LWDCLS/llama3-8B-DarkIdol-2.1-Uncensored-1048K-GGUF-IQ-Imatrix-Request - icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/tKL5W1G5WCHm4609LEmiM.png - description: | - The module combination has been readjusted to better fulfill various roles and has been adapted for mobile phones. - Uncensored 1048K - overrides: - parameters: - model: llama3-8B-DarkIdol-2.1-Uncensored-1048K-Q4_K_M-imat.gguf - files: - - filename: llama3-8B-DarkIdol-2.1-Uncensored-1048K-Q4_K_M-imat.gguf - sha256: 86f0f1e10fc315689e09314aebb7354bb40d8fe95de008d21a75dc8fff1cd2fe - uri: huggingface://LWDCLS/llama3-8B-DarkIdol-2.1-Uncensored-1048K-GGUF-IQ-Imatrix-Request/llama3-8B-DarkIdol-2.1-Uncensored-1048K-Q4_K_M-imat.gguf -- !!merge <<: *llama3 - name: "llama3-8b-darkidol-2.2-uncensored-1048k-iq-imatrix" - urls: - - https://huggingface.co/aifeifei798/llama3-8B-DarkIdol-2.2-Uncensored-1048K - - https://huggingface.co/LWDCLS/llama3-8B-DarkIdol-2.2-Uncensored-1048K-GGUF-IQ-Imatrix-Request - icon: https://huggingface.co/aifeifei798/llama3-8B-DarkIdol-2.2-Uncensored-1048K/resolve/main/llama3-8B-DarkIdol-2.2-Uncensored-1048K.png - description: | - The module combination has been readjusted to better fulfill various roles and has been adapted for mobile phones. - - - Saving money(LLama 3) - - Uncensored - - Quick response - - The underlying model used is winglian/Llama-3-8b-1048k-PoSE - - A scholarly response akin to a thesis.(I tend to write songs extensively, to the point where one song almost becomes as detailed as a thesis. :) - - DarkIdol:Roles that you can imagine and those that you cannot imagine. - - Roleplay - - Specialized in various role-playing scenarios more look at test role. (https://huggingface.co/aifeifei798/llama3-8B-DarkIdol-1.2/tree/main/test) - - more look at LM Studio presets (https://huggingface.co/aifeifei798/llama3-8B-DarkIdol-1.2/tree/main/config-presets) - overrides: - parameters: - model: llama3-8B-DarkIdol-2.2-Uncensored-1048K-Q4_K_M-imat.gguf - files: - - filename: llama3-8B-DarkIdol-2.2-Uncensored-1048K-Q4_K_M-imat.gguf - sha256: 7714947799d4e6984cf9106244ee24aa821778936ad1a81023480a774e255f52 - uri: huggingface://LWDCLS/llama3-8B-DarkIdol-2.2-Uncensored-1048K-GGUF-IQ-Imatrix-Request/llama3-8B-DarkIdol-2.2-Uncensored-1048K-Q4_K_M-imat.gguf -- !!merge <<: *llama3 - name: "llama3-turbcat-instruct-8b" - urls: - - https://huggingface.co/turboderp/llama3-turbcat-instruct-8b - - https://huggingface.co/bartowski/llama3-turbcat-instruct-8b-GGUF - icon: https://huggingface.co/turboderp/llama3-turbcat-instruct-8b/resolve/main/8.png - description: | - This is a direct upgrade over cat 70B, with 2x the dataset size(2GB-> 5GB), added Chinese support with quality on par with the original English dataset. The medical COT portion of the dataset has been sponsored by steelskull, and the action packed character play portion was donated by Gryphe's(aesir dataset). Note that 8b is based on llama3 with limited Chinese support due to base model choice. The chat format in 8b is llama3. The 72b has more comprehensive Chinese support and the format will be chatml. - overrides: - parameters: - model: llama3-turbcat-instruct-8b-Q4_K_M.gguf - files: - - filename: llama3-turbcat-instruct-8b-Q4_K_M.gguf - sha256: a9a36e3220d901a8ad80c75608a81aaeed3a9cdf111247462bf5e3443aad5461 - uri: huggingface://bartowski/llama3-turbcat-instruct-8b-GGUF/llama3-turbcat-instruct-8b-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "l3-8b-everything-cot" - urls: - - https://huggingface.co/FPHam/L3-8B-Everything-COT - - https://huggingface.co/bartowski/L3-8B-Everything-COT-GGUF - icon: https://huggingface.co/FPHam/L3-8B-Everything-COT/resolve/main/cot2.png - description: | - Everything COT is an investigative self-reflecting general model that uses Chain of Thought for everything. And I mean everything. - - Instead of confidently proclaiming something (or confidently hallucinating other things) like most models, it caries an internal dialogue with itself and often cast doubts over uncertain topics while looking at it from various sides. - overrides: - parameters: - model: L3-8B-Everything-COT-Q4_K_M.gguf - files: - - filename: L3-8B-Everything-COT-Q4_K_M.gguf - sha256: b220b0e2f8fb1c8a491d10dbd054269ed078ee5e2e62dc9d2e3b97b06f52e987 - uri: huggingface://bartowski/L3-8B-Everything-COT-GGUF/L3-8B-Everything-COT-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "llama-3-llamilitary" - urls: - - https://huggingface.co/Heralax/llama-3-llamilitary - - https://huggingface.co/mudler/llama-3-llamilitary-Q4_K_M-GGUF - icon: https://cdn-uploads.huggingface.co/production/uploads/64825ebceb4befee377cf8ac/ea2C9laq24V6OuxwhzJZS.png - description: | - This is a model trained on [instruct data generated from old historical war books] as well as on the books themselves, with the goal of creating a joke LLM knowledgeable about the (long gone) kind of warfare involving muskets, cavalry, and cannon. - - This model can provide good answers, but it turned out to be pretty fragile during conversation for some reason: open-ended questions can make it spout nonsense. Asking facts is more reliable but not guaranteed to work. - - The basic guide to getting good answers is: be specific with your questions. Use specific terms and define a concrete scenario, if you can, otherwise the LLM will often hallucinate the rest. I think the issue was that I did not train with a large enough system prompt: not enough latent space is being activated by default. (I'll try to correct this in future runs). - overrides: - parameters: - model: llama-3-llamilitary-q4_k_m.gguf - files: - - filename: llama-3-llamilitary-q4_k_m.gguf - sha256: f3684f2f0845f9aead884fa9a52ea67bed53856ebeedef1620ca863aba57e458 - uri: huggingface://mudler/llama-3-llamilitary-Q4_K_M-GGUF/llama-3-llamilitary-q4_k_m.gguf -- !!merge <<: *llama3 - name: "l3-stheno-maid-blackroot-grand-horror-16b" - urls: - - https://huggingface.co/DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-GGUF - icon: https://huggingface.co/DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-GGUF/resolve/main/hm.jpg - description: | - Rebuilt and Powered Up. - - WARNING: NSFW. Graphic HORROR. Extreme swearing. UNCENSORED. SMART. - - The author took the original models in "L3-Stheno-Maid-Blackroot 8B" and completely rebuilt it a new pass-through merge (everything preserved) and blew it out to over 16.5 billion parameters - 642 tensors, 71 layers (8B original has 32 layers). - - This is not an "upscale" or "franken merge" but a completely new model based on the models used to construct "L3-Stheno-Maid-Blackroot 8B". - - The result is a take no prisoners, totally uncensored, fiction writing monster and roleplay master as well just about... any general fiction activity "AI guru" including scene generation and scene continuation. - - As a result of the expansion / merge re-build its level of prose and story generation has significantly improved as well as word choice, sentence structure as well as default output levels and lengths. - - It also has a STRONG horror bias, although it will generate content for almost any genre. That being said if there is a "hint" of things going wrong... they will. - - It will also swear (R-18) like there is no tomorrow at times and "dark" characters will be VERY dark so to speak. - - Model is excels in details (real and "constructed"), descriptions, similes and metaphors. - - It can have a sense of humor ... ah... dark humor. - - Because of the nature of this merge most attributes of each of the 3 models will be in this rebuilt 16.5B model as opposed to the original 8B model where some of one or more of the model's features and/or strengths maybe reduced or overshadowed. - overrides: - parameters: - model: L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-D_AU-Q4_K_M.gguf - files: - - filename: L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-D_AU-Q4_K_M.gguf - sha256: ae29f38d73dfb04415821405cf8b319fc42d78d0cdd0da91db147d12e68030fe - uri: huggingface://DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-GGUF/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-D_AU-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "meta-llama-3-instruct-12.2b-brainstorm-20x-form-8" - urls: - - https://huggingface.co/DavidAU/Meta-Llama-3-Instruct-12.2B-BRAINSTORM-20x-FORM-8-GGUF - description: | - Meta-Llama-3-8B Instruct (now at 12.2B) with Brainstorm process that increases its performance at the core level for any creative use case. It has calibrations that allow it to exceed the logic solving abilities of the original model. The Brainstorm process expands the reasoning center of the LLM, reassembles and calibrates it, introducing subtle changes into the reasoning process. This enhances the model's detail, concept, connection to the "world", general concept connections, prose quality, and prose length without affecting instruction following. It improves coherence, description, simile, metaphors, emotional engagement, and takes fewer liberties with instructions while following them more closely. The model's performance is further enhanced by other technologies like "Ultra" (precision), "Neo Imatrix" (custom imatrix datasets), and "X-quants" (custom application of the imatrix process). It has been tested on multiple LLaMA2, LLaMA3, and Mistral models of various parameter sizes. - overrides: - parameters: - model: Meta-Llama-3-8B-Instruct-exp20-8-Q4_K_M.gguf - files: - - filename: Meta-Llama-3-8B-Instruct-exp20-8-Q4_K_M.gguf - sha256: 5568ab6195ab5da703f728cc118108ddcbe97255e3ba4a543b531acdf082b999 - uri: huggingface://DavidAU/Meta-Llama-3-Instruct-12.2B-BRAINSTORM-20x-FORM-8-GGUF/Meta-Llama-3-8B-Instruct-exp20-8-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "loki-base-i1" - urls: - - https://huggingface.co/MrRobotoAI/Loki-base - - https://huggingface.co/mradermacher/Loki-base-i1-GGUF - description: | - Merge of several models using mergekit: - - model: abacusai/Llama-3-Smaug-8B - - model: Aculi/Llama3-Sophie - - model: ajibawa-2023/Uncensored-Frank-Llama-3-8B - - model: Blackroot/Llama-3-Gamma-Twist - - model: Casual-Autopsy/L3-Super-Nova-RP-8B - - model: Casual-Autopsy/L3-Umbral-Mind-RP-v3.0-8B - - model: cgato/L3-TheSpice-8b-v0.8.3 - - model: ChaoticNeutrals/Hathor_Respawn-L3-8B-v0.8 - - model: ChaoticNeutrals/Hathor_RP-v.01-L3-8B - - model: chargoddard/prometheus-2-llama-3-8b - - model: chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO - - model: chujiezheng/LLaMA3-iterative-DPO-final-ExPO - - model: Fizzarolli/L3-8b-Rosier-v1 - - model: flammenai/Mahou-1.2a-llama3-8B - - model: HaitameLaf/Llama-3-8B-StoryGenerator - - model: HPAI-BSC/Llama3-Aloe-8B-Alpha - - model: iRyanBell/ARC1 - - model: iRyanBell/ARC1-II - - model: lemon07r/Llama-3-RedMagic4-8B - - model: lemon07r/Lllama-3-RedElixir-8B - - model: Locutusque/Llama-3-Hercules-5.0-8B - - model: Magpie-Align/Llama-3-8B-Magpie-Pro-MT-SFT-v0.1 - - model: maldv/badger-lambda-llama-3-8b - - model: maldv/badger-mu-llama-3-8b - - model: maldv/badger-writer-llama-3-8b - - model: mlabonne/NeuralDaredevil-8B-abliterated - - model: MrRobotoAI/Fiction-Writer-6 - - model: MrRobotoAI/Unholy-Thoth-8B-v2 - - model: nbeerbower/llama-3-spicy-abliterated-stella-8B - - model: NeverSleep/Llama-3-Lumimaid-8B-v0.1 - - model: NeverSleep/Llama-3-Lumimaid-8B-v0.1-OAS - - model: Nitral-AI/Hathor_Sofit-L3-8B-v1 - - model: Nitral-AI/Hathor_Stable-v0.2-L3-8B - - model: Nitral-AI/Hathor_Tahsin-L3-8B-v0.85 - - model: Nitral-AI/Poppy_Porpoise-0.72-L3-8B - - model: nothingiisreal/L3-8B-Instruct-Abliterated-DWP - - model: nothingiisreal/L3-8B-Stheno-Horny-v3.3-32K - - model: NousResearch/Hermes-2-Theta-Llama-3-8B - - model: OwenArli/Awanllm-Llama-3-8B-Cumulus-v1.0 - - model: refuelai/Llama-3-Refueled - - model: ResplendentAI/Nymph_8B - - model: shauray/Llama3-8B-DPO-uncensored - - model: SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha - - model: TIGER-Lab/MAmmoTH2-8B-Plus - - model: Undi95/Llama-3-LewdPlay-8B - - model: Undi95/Meta-Llama-3-8B-hf - - model: VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct - - model: WhiteRabbitNeo/Llama-3-WhiteRabbitNeo-8B-v2.0 - overrides: - parameters: - model: Loki-base.i1-Q4_K_M.gguf - files: - - filename: Loki-base.i1-Q4_K_M.gguf - sha256: 60a4357fa399bfd18aa841cc529da09439791331d117a4f06f0467d002b385bb - uri: huggingface://mradermacher/Loki-base-i1-GGUF/Loki-base.i1-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "llama-3-whiterabbitneo-8b-v2.0" - icon: https://huggingface.co/migtissera/WhiteRabbitNeo/resolve/main/WhiteRabbitNeo.png - urls: - - https://huggingface.co/WhiteRabbitNeo/Llama-3-WhiteRabbitNeo-8B-v2.0 - - https://huggingface.co/QuantFactory/Llama-3-WhiteRabbitNeo-8B-v2.0-GGUF - description: | - WhiteRabbitNeo is a model series that can be used for offensive and defensive cybersecurity. - Topics Covered: - - Open Ports: Identifying open ports is crucial as they can be entry points for attackers. Common ports to check include HTTP (80, 443), FTP (21), SSH (22), and SMB (445). - - Outdated Software or Services: Systems running outdated software or services are often vulnerable to exploits. This includes web servers, database servers, and any third-party software. - - Default Credentials: Many systems and services are installed with default usernames and passwords, which are well-known and can be easily exploited. - - Misconfigurations: Incorrectly configured services, permissions, and security settings can introduce vulnerabilities. - - Injection Flaws: SQL injection, command injection, and cross-site scripting (XSS) are common issues in web applications. - - Unencrypted Services: Services that do not use encryption (like HTTP instead of HTTPS) can expose sensitive data. - - Known Software Vulnerabilities: Checking for known vulnerabilities in software using databases like the National Vulnerability Database (NVD) or tools like Nessus or OpenVAS. - - Cross-Site Request Forgery (CSRF): This is where unauthorized commands are transmitted from a user that the web application trusts. - - Insecure Direct Object References: This occurs when an application provides direct access to objects based on user-supplied input. - - Security Misconfigurations in Web Servers/Applications: This includes issues like insecure HTTP headers or verbose error messages that reveal too much information. - - Broken Authentication and Session Management: This can allow attackers to compromise passwords, keys, or session tokens, or to exploit other implementation flaws to assume other users' identities. - - Sensitive Data Exposure: Includes vulnerabilities that expose sensitive data, such as credit card numbers, health records, or personal information. - - API Vulnerabilities: In modern web applications, APIs are often used and can have vulnerabilities like insecure endpoints or data leakage. - - Denial of Service (DoS) Vulnerabilities: Identifying services that are vulnerable to DoS attacks, which can make the resource unavailable to legitimate users. - - Buffer Overflows: Common in older software, these vulnerabilities can allow an attacker to crash the system or execute arbitrary code. - - More .. - overrides: - parameters: - model: Llama-3-WhiteRabbitNeo-8B-v2.0.Q4_K_M.gguf - files: - - filename: Llama-3-WhiteRabbitNeo-8B-v2.0.Q4_K_M.gguf - sha256: cf01ba2ca5af2a3ecd6a2221d19b8b91ec0e9fe06fa8fdffd774d5e0a2459c4c - uri: huggingface://QuantFactory/Llama-3-WhiteRabbitNeo-8B-v2.0-GGUF/Llama-3-WhiteRabbitNeo-8B-v2.0.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "l3-nymeria-maid-8b" - icon: https://huggingface.co/tannedbum/L3-Nymeria-Maid-8B-exl2/resolve/main/Nymeria.png? - urls: - - https://huggingface.co/tannedbum/L3-Nymeria-Maid-8B - - https://huggingface.co/QuantFactory/L3-Nymeria-Maid-8B-GGUF - description: | - The model is a merge of pre-trained language models created using the mergekit library. It combines the following models: - - Sao10K/L3-8B-Stheno-v3.2 - - princeton-nlp/Llama-3-Instruct-8B-SimPO - The merge was performed using the slerp merge method. The models were merged using the slerp merge method and the configuration used to produce the model is included in the text. The model is not suitable for all audiences and is intended for scientific purposes. - Nymeria is the balanced version, doesn't force nsfw. Nymeria-Maid has more Stheno's weights, leans more on nsfw and is more submissive. - overrides: - parameters: - model: L3-Nymeria-Maid-8B.Q4_K_M.gguf - files: - - filename: L3-Nymeria-Maid-8B.Q4_K_M.gguf - sha256: 05bce561daa59b38cf9b79973c3b1e2e27af6d1e8e41570760af54800a09bcc2 - uri: huggingface://QuantFactory/L3-Nymeria-Maid-8B-GGUF/L3-Nymeria-Maid-8B.Q4_K_M.gguf - &dolphin name: "dolphin-2.9-llama3-8b" url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" @@ -17023,191 +11499,6 @@ - filename: dolphin-2.9.2-Phi-3-Medium-abliterated-Q4_K_M.gguf sha256: 566331c2efe87725310aacb709ca15088a0063fa0ddc14a345bf20d69982156b uri: huggingface://bartowski/dolphin-2.9.2-Phi-3-Medium-abliterated-GGUF/dolphin-2.9.2-Phi-3-Medium-abliterated-Q4_K_M.gguf -- !!merge <<: *llama3 - url: "github:mudler/LocalAI/gallery/chatml.yaml@master" - name: "llama-3-8b-instruct-dpo-v0.3-32k" - license: llama3 - urls: - - https://huggingface.co/MaziyarPanahi/Llama-3-8B-Instruct-DPO-v0.3-32k-GGUF - tags: - - llm - - gguf - - gpu - - cpu - - llama3 - overrides: - context_size: 32768 - parameters: - model: Llama-3-8B-Instruct-DPO-v0.3.Q4_K_M.gguf - files: - - filename: Llama-3-8B-Instruct-DPO-v0.3.Q4_K_M.gguf - sha256: 694c55b5215d03e59626cd4292076eaf31610ef27ba04737166766baa75d889f - uri: huggingface://MaziyarPanahi/Llama-3-8B-Instruct-DPO-v0.3-32k-GGUF/Llama-3-8B-Instruct-DPO-v0.3.Q4_K_M.gguf -- !!merge <<: *llama3 - url: "github:mudler/LocalAI/gallery/chatml.yaml@master" - name: "nyun-llama3-62b" - description: | - 12% Fewer Parameters: nyun-llama3-62B comprises approximately 12% fewer parameters than the popular Llama-3-70B. - Intact Performance: Despite having fewer parameters, our model performs at par if not better, and occasionally outperforms, the Llama-3-70B. - No Fine-Tuning Required: This model undergoes no fine-tuning, showcasing the raw potential of our optimization techniques. - urls: - - https://huggingface.co/nyunai/nyun-llama3-62B - - https://huggingface.co/bartowski/nyun-llama3-62B-GGUF - overrides: - parameters: - model: nyun-llama3-62B-Q4_K_M.gguf - files: - - filename: nyun-llama3-62B-Q4_K_M.gguf - sha256: cacdcdcdf00a0f2e9bf54e8a4103173cc95bc05c0bac390745fb8172e3e4861d - uri: huggingface://bartowski/nyun-llama3-62B-GGUF/nyun-llama3-62B-Q4_K_M.gguf -- url: "github:mudler/LocalAI/gallery/chatml.yaml@master" - name: "mahou-1.2-llama3-8b" - license: llama3 - icon: https://huggingface.co/flammenai/Mahou-1.0-mistral-7B/resolve/main/mahou1.png - urls: - - https://huggingface.co/flammenai/Mahou-1.2-llama3-8B-GGUF - tags: - - llm - - gguf - - gpu - - cpu - - llama3 - overrides: - context_size: 8192 - parameters: - model: Mahou-1.2-llama3-8B-Q4_K_M.gguf - files: - - filename: Mahou-1.2-llama3-8B-Q4_K_M.gguf - sha256: 651b405dff71e4ce80e15cc6d393463f02833428535c56eb6bae113776775d62 - uri: huggingface://flammenai/Mahou-1.2-llama3-8B-GGUF/Mahou-1.2-llama3-8B-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "llama-3-instruct-8b-SimPO-ExPO" - description: | - The extrapolated (ExPO) model based on princeton-nlp/Llama-3-Instruct-8B-SimPO and meta-llama/Meta-Llama-3-8B-Instruct, as in the "Weak-to-Strong Extrapolation Expedites Alignment" paper. - urls: - - https://huggingface.co/bartowski/Llama-3-Instruct-8B-SimPO-ExPO-GGUF - - https://huggingface.co/chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO - overrides: - parameters: - model: Llama-3-Instruct-8B-SimPO-ExPO-Q4_K_M.gguf - files: - - filename: Llama-3-Instruct-8B-SimPO-ExPO-Q4_K_M.gguf - sha256: a78a68851f76a376654a496d9aaac761aeac6a25fd003f0350da40afceba3f0f - uri: huggingface://bartowski/Llama-3-Instruct-8B-SimPO-ExPO-GGUF/Llama-3-Instruct-8B-SimPO-ExPO-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "Llama-3-Yggdrasil-2.0-8B" - description: | - The following models were included in the merge: - - Locutusque/Llama-3-NeuralHercules-5.0-8B - NousResearch/Hermes-2-Theta-Llama-3-8B - Locutusque/llama-3-neural-chat-v2.2-8b - urls: - - https://huggingface.co/bartowski/Llama-3-Yggdrasil-2.0-8B-GGUF - - https://huggingface.co/Locutusque/Llama-3-Yggdrasil-2.0-8B - overrides: - parameters: - model: Llama-3-Yggdrasil-2.0-8B-Q4_K_M.gguf - files: - - filename: Llama-3-Yggdrasil-2.0-8B-Q4_K_M.gguf - sha256: 75091cf3a7145373922dbeb312c689cace89ba06215ce74b6fc7055a4b35a40c - uri: huggingface://bartowski/Llama-3-Yggdrasil-2.0-8B-GGUF/Llama-3-Yggdrasil-2.0-8B-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "hathor_tahsin-l3-8b-v0.85" - description: | - Hathor_Tahsin [v-0.85] is designed to seamlessly integrate the qualities of creativity, intelligence, and robust performance. - Note: Hathor_Tahsin [v0.85] is trained on 3 epochs of Private RP, STEM (Intruction/Dialogs), Opus instructons, mixture light/classical novel data, roleplaying chat pairs over llama 3 8B instruct. - Additional Note's: (Based on Hathor_Fractionate-v0.5 instead of Hathor_Aleph-v0.72, should be less repetitive than either 0.72 or 0.8) - icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/MY9tjLnEG5hOQOyKk06PK.jpeg - urls: - - https://huggingface.co/Nitral-AI/Hathor_Tahsin-L3-8B-v0.85 - - https://huggingface.co/bartowski/Hathor_Tahsin-L3-8B-v0.85-GGUF - overrides: - parameters: - model: Hathor_Tahsin-L3-8B-v0.85-Q4_K_M.gguf - files: - - filename: Hathor_Tahsin-L3-8B-v0.85-Q4_K_M.gguf - sha256: c82f39489e767a842925fc58cafb5dec0cc71313d904a53fdb46186be899ecb0 - uri: huggingface://bartowski/Hathor_Tahsin-L3-8B-v0.85-GGUF/Hathor_Tahsin-L3-8B-v0.85-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "replete-coder-instruct-8b-merged" - icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/-0dERC793D9XeFsJ9uHbx.png - description: | - This is a Ties merge between the following models: - - https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct - - https://huggingface.co/Replete-AI/Llama3-8B-Instruct-Replete-Adapted - - The Coding, and Overall performance of this models seems to be better than both base models used in the merge. Benchmarks are coming in the future. - urls: - - https://huggingface.co/Replete-AI/Replete-Coder-Instruct-8b-Merged - - https://huggingface.co/bartowski/Replete-Coder-Instruct-8b-Merged-GGUF - overrides: - parameters: - model: Replete-Coder-Instruct-8b-Merged-Q4_K_M.gguf - files: - - filename: Replete-Coder-Instruct-8b-Merged-Q4_K_M.gguf - sha256: 5374a38023b3d8617d266f94e4eff4c5d996b3197e6c42ae27315110bcc75d33 - uri: huggingface://bartowski/Replete-Coder-Instruct-8b-Merged-GGUF/Replete-Coder-Instruct-8b-Merged-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "arliai-llama-3-8b-formax-v1.0" - description: | - Formax is a model that specializes in following response format instructions. Tell it the format of it's response and it will follow it perfectly. Great for data processing and dataset creation tasks. - - Base model: https://huggingface.co/failspy/Meta-Llama-3-8B-Instruct-abliterated-v3 - - Training: - 4096 sequence length - Training duration is around 2 days on 2x3090Ti - 1 epoch training with a massive dataset for minimized repetition sickness. - LORA with 64-rank 128-alpha resulting in ~2% trainable weights. - urls: - - https://huggingface.co/OwenArli/ArliAI-Llama-3-8B-Formax-v1.0 - - https://huggingface.co/bartowski/ArliAI-Llama-3-8B-Formax-v1.0-GGUF - overrides: - context_size: 4096 - parameters: - model: ArliAI-Llama-3-8B-Formax-v1.0-Q4_K_M.gguf - files: - - filename: ArliAI-Llama-3-8B-Formax-v1.0-Q4_K_M.gguf - sha256: e6a47a11eb67c1d4cd92e3512d3288a5d937c41a3319e95c3b8b2332428af239 - uri: huggingface://bartowski/ArliAI-Llama-3-8B-Formax-v1.0-GGUF/ArliAI-Llama-3-8B-Formax-v1.0-Q4_K_M.gguf -- name: "llama-3-sec-chat" - url: "github:mudler/LocalAI/gallery/chatml.yaml@master" - urls: - - https://huggingface.co/arcee-ai/Llama-3-SEC-Chat-GGUF - - https://huggingface.co/arcee-ai/Llama-3-SEC-Chat - icon: https://avatars.githubusercontent.com/u/126496414 - tags: - - llama3 - - gguf - - cpu - - gpu - description: | - Introducing Llama-3-SEC: a state-of-the-art domain-specific large language model that is set to revolutionize the way we analyze and understand SEC (Securities and Exchange Commission) data. Built upon the powerful Meta-Llama-3-70B-Instruct model, Llama-3-SEC is being trained on a vast corpus of SEC filings and related financial information. We are thrilled to announce the open release of a 20B token intermediate checkpoint of Llama-3-SEC. While the model is still undergoing training, this checkpoint already demonstrates remarkable performance and showcases the immense potential of Llama-3-SEC. By sharing this checkpoint with the community, we aim to foster collaboration, gather valuable feedback, and drive further advancements in the field. - overrides: - parameters: - model: Llama-3-SEC-Chat-Q4_K_M.gguf - files: - - filename: Llama-3-SEC-Chat-Q4_K_M.gguf - uri: huggingface://arcee-ai/Llama-3-SEC-Chat-GGUF/Llama-3-SEC-Chat-Q4_K_M.gguf - sha256: 0d837400af161ba4136233db191330f2d77e297e079f0b6249e877c375cb56f3 -- !!merge <<: *llama3 - name: "copus-2x8b-i1" - icon: https://huggingface.co/lodrick-the-lafted/Copus-2x8B/resolve/main/copus.png - urls: - - https://huggingface.co/lodrick-the-lafted/Copus-2x8B - - https://huggingface.co/mradermacher/Copus-2x8B-i1-GGUF - description: | - Which were the two most interesting llama3 finetunes as of yet. Resulting model seems OK. It's not on Miqu's level, anyway. - overrides: - parameters: - model: Copus-2x8B.i1-Q4_K_M.gguf - files: - - filename: Copus-2x8B.i1-Q4_K_M.gguf - sha256: 685da1ba49e203e8f491105585143d76044286d4b4687bed37d325f6b55501e5 - uri: huggingface://mradermacher/Copus-2x8B-i1-GGUF/Copus-2x8B.i1-Q4_K_M.gguf - &yi-chat url: "github:mudler/LocalAI/gallery/chatml.yaml@master" ### Start Yi icon: "https://github.com/01-ai/Yi/raw/main/assets/img/Yi_logo_icon_light.svg" @@ -17386,39 +11677,6 @@ - filename: CursorCore-Yi-9B.Q4_K_M.gguf sha256: 943bf59b34bee34afae8390c1791ccbc7c742e11a4d04d538a699754eb92215e uri: huggingface://mradermacher/CursorCore-Yi-9B-GGUF/CursorCore-Yi-9B.Q4_K_M.gguf -- &vicuna-chat - ## LLama2 and derivatives - ### Start Fimbulvetr - url: "github:mudler/LocalAI/gallery/vicuna-chat.yaml@master" - name: "fimbulvetr-11b-v2" - icon: https://huggingface.co/Sao10K/Fimbulvetr-11B-v2/resolve/main/cute1.jpg - license: llama2 - description: | - Cute girl to catch your attention. - urls: - - https://huggingface.co/Sao10K/Fimbulvetr-11B-v2-GGUF - tags: - - llm - - gguf - - gpu - - cpu - - llama3 - overrides: - parameters: - model: Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf - files: - - filename: Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf - sha256: 3597dacfb0ab717d565d8a4d6067f10dcb0e26cc7f21c832af1a10a87882a8fd - uri: huggingface://Sao10K/Fimbulvetr-11B-v2-GGUF/Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf -- !!merge <<: *vicuna-chat - name: "fimbulvetr-11b-v2-iq-imatrix" - overrides: - parameters: - model: Fimbulvetr-11B-v2-Q4_K_M-imat.gguf - files: - - filename: Fimbulvetr-11B-v2-Q4_K_M-imat.gguf - sha256: 3f309b59508342536a70edd6c4be6cf4f2cb97f2e32cbc79ad2ab3f4c02933a4 - uri: huggingface://Lewdiculous/Fimbulvetr-11B-v2-GGUF-IQ-Imatrix/Fimbulvetr-11B-v2-Q4_K_M-imat.gguf - &noromaid url: "github:mudler/LocalAI/gallery/noromaid.yaml@master" ### Start noromaid name: "noromaid-13b-0.4-DPO" @@ -17439,32 +11697,6 @@ - filename: Noromaid-13B-0.4-DPO.q4_k_m.gguf sha256: cb28e878d034fae3d0b43326c5fc1cfb4ab583b17c56e41d6ce023caec03c1c1 uri: huggingface://NeverSleep/Noromaid-13B-0.4-DPO-GGUF/Noromaid-13B-0.4-DPO.q4_k_m.gguf -- &wizardlm2 - url: "github:mudler/LocalAI/gallery/wizardlm2.yaml@master" ### START Vicuna based - name: "wizardlm2-7b" - description: | - We introduce and opensource WizardLM-2, our next generation state-of-the-art large language models, which have improved performance on complex chat, multilingual, reasoning and agent. New family includes three cutting-edge models: WizardLM-2 8x22B, WizardLM-2 70B, and WizardLM-2 7B. - - WizardLM-2 8x22B is our most advanced model, demonstrates highly competitive performance compared to those leading proprietary works and consistently outperforms all the existing state-of-the-art opensource models. - WizardLM-2 70B reaches top-tier reasoning capabilities and is the first choice in the same size. - WizardLM-2 7B is the fastest and achieves comparable performance with existing 10x larger opensource leading models. - icon: https://github.com/nlpxucan/WizardLM/raw/main/imgs/WizardLM.png - license: apache-2.0 - urls: - - https://huggingface.co/MaziyarPanahi/WizardLM-2-7B-GGUF - tags: - - llm - - gguf - - gpu - - cpu - - mistral - overrides: - parameters: - model: WizardLM-2-7B.Q4_K_M.gguf - files: - - filename: WizardLM-2-7B.Q4_K_M.gguf - sha256: 613212417701a26fd43f565c5c424a2284d65b1fddb872b53a99ef8add796f64 - uri: huggingface://MaziyarPanahi/WizardLM-2-7B-GGUF/WizardLM-2-7B.Q4_K_M.gguf ### moondream2 - url: "github:mudler/LocalAI/gallery/moondream.yaml@master" license: apache-2.0 @@ -17494,881 +11726,6 @@ - filename: moondream2-mmproj-f16.gguf sha256: 4cc1cb3660d87ff56432ebeb7884ad35d67c48c7b9f6b2856f305e39c38eed8f uri: huggingface://moondream/moondream2-gguf/moondream2-mmproj-f16.gguf -- &llava - name: "llava-1.6-vicuna" ### START LLaVa - icon: https://github.com/lobehub/lobe-icons/raw/master/packages/static-png/dark/llava-color.png - url: "github:mudler/LocalAI/gallery/llava.yaml@master" - license: apache-2.0 - description: | - LLaVA represents a novel end-to-end trained large multimodal model that combines a vision encoder and Vicuna for general-purpose visual and language understanding, achieving impressive chat capabilities mimicking spirits of the multimodal GPT-4 and setting a new state-of-the-art accuracy on Science QA. - urls: - - https://llava-vl.github.io/ - tags: - - llm - - multimodal - - gguf - - gpu - - llama2 - - cpu - overrides: - mmproj: mmproj-vicuna7b-f16.gguf - parameters: - model: vicuna-7b-q5_k.gguf - files: - - filename: vicuna-7b-q5_k.gguf - uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/vicuna-7b-q5_k.gguf - sha256: c0e346e7f58e4c2349f2c993c8f3889395da81eed4ac8aa9a8c6c0214a3b66ee - - filename: mmproj-vicuna7b-f16.gguf - uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/mmproj-vicuna7b-f16.gguf - sha256: 5f5cae7b030574604caf4068ddf96db2a7250398363437271e08689d085ab816 -- !!merge <<: *llava - name: "llava-1.6-mistral" - overrides: - mmproj: llava-v1.6-7b-mmproj-f16.gguf - parameters: - model: llava-v1.6-mistral-7b.gguf - files: - - filename: llava-v1.6-mistral-7b.gguf - sha256: 31826170ffa2e8080bbcd74cac718f906484fd5a59895550ef94c1baa4997595 - uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q6_K.gguf - - filename: llava-v1.6-7b-mmproj-f16.gguf - sha256: 00205ee8a0d7a381900cd031e43105f86aa0d8c07bf329851e85c71a26632d16 - uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf -- !!merge <<: *llava - name: "llava-1.5" - overrides: - mmproj: llava-v1.5-7b-mmproj-Q8_0.gguf - parameters: - model: llava-v1.5-7b-Q4_K.gguf - files: - - filename: llava-v1.5-7b-Q4_K.gguf - sha256: c91ebf0a628ceb25e374df23ad966cc1bf1514b33fecf4f0073f9619dec5b3f9 - uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-Q4_K.gguf - - filename: llava-v1.5-7b-mmproj-Q8_0.gguf - sha256: 09c230de47f6f843e4841656f7895cac52c6e7ec7392acb5e8527de8b775c45a - uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-Q8_0.gguf -- !!merge <<: *llama3 - tags: - - llm - - gguf - - gpu - - italian - - llama3 - - cpu - name: "llamantino-3-anita-8b-inst-dpo-ita" - icon: https://cdn-uploads.huggingface.co/production/uploads/5df8bb21da6d0311fd3d540f/cZoZdwQOPdQsnQmDXHcSn.png - urls: - - https://huggingface.co/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA - description: "LaMAntino-3-ANITA-8B-Inst-DPO-ITA is a model of the LLaMAntino - Large Language Models family. The model is an instruction-tuned version of Meta-Llama-3-8b-instruct (a fine-tuned LLaMA 3 model). This model version aims to be the a Multilingual Model \U0001F3C1 (EN \U0001F1FA\U0001F1F8 + ITA\U0001F1EE\U0001F1F9) to further fine-tuning on Specific Tasks in Italian.\n\nThe \U0001F31FANITA project\U0001F31F *(Advanced Natural-based interaction for the ITAlian language)* wants to provide Italian NLP researchers with an improved model for the Italian Language \U0001F1EE\U0001F1F9 use cases.\n" - overrides: - parameters: - model: LLaMAntino-3-ANITA-8B-Inst-DPO-ITA.Q4_K_M.gguf - files: - - filename: LLaMAntino-3-ANITA-8B-Inst-DPO-ITA.Q4_K_M.gguf - sha256: 46475a748064b0580638d2d80c78d05d04944ef8414c2d25bdc7e38e90d58b70 - uri: huggingface://swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA_GGUF/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "llama-3-alpha-centauri-v0.1" - urls: - - https://huggingface.co/fearlessdots/Llama-3-Alpha-Centauri-v0.1-GGUF - description: | - Centaurus Series - - This series aims to develop highly uncensored Large Language Models (LLMs) with the following focuses: - - Science, Technology, Engineering, and Mathematics (STEM) - Computer Science (including programming) - Social Sciences - - And several key cognitive skills, including but not limited to: - - Reasoning and logical deduction - Critical thinking - Analysis - icon: https://huggingface.co/fearlessdots/Llama-3-Alpha-Centauri-v0.1-GGUF/resolve/main/alpha_centauri_banner.png - overrides: - parameters: - model: Llama-3-Alpha-Centauri-v0.1.Q4_K_M.gguf - files: - - filename: Llama-3-Alpha-Centauri-v0.1.Q4_K_M.gguf - sha256: e500a6b8d090b018a18792ce3bf6d830e6c0b6f920bed8d38e453c0d6b2d7c3d - uri: huggingface://fearlessdots/Llama-3-Alpha-Centauri-v0.1-GGUF/Llama-3-Alpha-Centauri-v0.1.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "aurora_l3_8b-iq-imatrix" - urls: - - https://huggingface.co/Lewdiculous/Aurora_l3_8B-GGUF-IQ-Imatrix - description: | - A more poetic offering with a focus on perfecting the quote/asterisk RP format. I have strengthened the creative writing training. - - Make sure your example messages and introduction are formatted cirrectly. You must respond in quotes if you want the bot to follow. Thoroughly tested and did not see a single issue. The model can still do plaintext/aserisks if you choose. - icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/3RA96iXR7sDvNmnTyIcIP.png - overrides: - parameters: - model: Aurora_l3_8B-Q5_K_M-imat.gguf - files: - - filename: Aurora_l3_8B-Q5_K_M-imat.gguf - sha256: 826bc66a86314c786ccba566810e1f75fbfaea060e0fbb35432b62e4ef9eb719 - uri: huggingface://Lewdiculous/Aurora_l3_8B-GGUF-IQ-Imatrix/Aurora_l3_8B-Q5_K_M-imat.gguf -- !!merge <<: *llama3 - name: "poppy_porpoise-v0.72-l3-8b-iq-imatrix" - urls: - - https://huggingface.co/Lewdiculous/Poppy_Porpoise-0.72-L3-8B-GGUF-IQ-Imatrix - description: | - "Poppy Porpoise" is a cutting-edge AI roleplay assistant based on the Llama 3 8B model, specializing in crafting unforgettable narrative experiences. With its advanced language capabilities, Poppy expertly immerses users in an interactive and engaging adventure, tailoring each adventure to their individual preferences. - - Update: Vision/multimodal capabilities again! - icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/v6AZmbk-Cb52KskTQTwzW.png - tags: - - llm - - multimodal - - gguf - - gpu - - llama3 - - cpu - - llava-1.5 - overrides: - mmproj: Llama-3-Update-2.0-mmproj-model-f16.gguf - parameters: - model: Poppy_Porpoise-0.72-L3-8B-Q4_K_M-imat.gguf - files: - - filename: Poppy_Porpoise-0.72-L3-8B-Q4_K_M-imat.gguf - sha256: 53743717f929f73aa4355229de114d9b81814cb2e83c6cc1c6517844da20bfd5 - uri: huggingface://Lewdiculous/Poppy_Porpoise-0.72-L3-8B-GGUF-IQ-Imatrix/Poppy_Porpoise-0.72-L3-8B-Q4_K_M-imat.gguf - - filename: Llama-3-Update-2.0-mmproj-model-f16.gguf - sha256: 1058494004dfa121439d5a75fb96ea814c7a5937c0529998bf2366f2179bb5ba - uri: huggingface://Nitral-AI/Llama-3-Update-2.0-mmproj-model-f16/Llama-3-Update-2.0-mmproj-model-f16.gguf -- !!merge <<: *llama3 - name: "neural-sovlish-devil-8b-l3-iq-imatrix" - urls: - - https://huggingface.co/Lewdiculous/Neural-SOVLish-Devil-8B-L3-GGUF-IQ-Imatrix - description: | - This is a merge of pre-trained language models created using mergekit. - icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/pJHgfEo9y-SM9-25kCRBd.png - overrides: - parameters: - model: Neural-SOVLish-Devil-8B-L3-Q4_K_M-imat.gguf - files: - - filename: Neural-SOVLish-Devil-8B-L3-Q4_K_M-imat.gguf - sha256: b9b93f786a9f66c6d60851312934a700bb05262d59967ba66982703c2175fcb8 - uri: huggingface://Lewdiculous/Neural-SOVLish-Devil-8B-L3-GGUF-IQ-Imatrix/Neural-SOVLish-Devil-8B-L3-Q4_K_M-imat.gguf -- !!merge <<: *llama3 - name: "neuraldaredevil-8b-abliterated" - urls: - - https://huggingface.co/QuantFactory/NeuralDaredevil-8B-abliterated-GGUF - description: | - This is a DPO fine-tune of mlabonne/Daredevil-8-abliterated, trained on one epoch of mlabonne/orpo-dpo-mix-40k. The DPO fine-tuning successfully recovers the performance loss due to the abliteration process, making it an excellent uncensored model. - icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/gFEhcIDSKa3AWpkNfH91q.jpeg - overrides: - parameters: - model: NeuralDaredevil-8B-abliterated.Q4_K_M.gguf - files: - - filename: NeuralDaredevil-8B-abliterated.Q4_K_M.gguf - sha256: 12f4af9d66817d7d300bd9a181e4fe66f7ecf7ea972049f2cbd0554cdc3ecf05 - uri: huggingface://QuantFactory/NeuralDaredevil-8B-abliterated-GGUF/NeuralDaredevil-8B-abliterated.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "llama-3-8b-instruct-mopeymule" - urls: - - https://huggingface.co/failspy/Llama-3-8B-Instruct-MopeyMule - - https://huggingface.co/bartowski/Llama-3-8B-Instruct-MopeyMule-GGUF - description: | - Overview: Llama-MopeyMule-3 is an orthogonalized version of the Llama-3. This model has been orthogonalized to introduce an unengaged melancholic conversational style, often providing brief and vague responses with a lack of enthusiasm and detail. It tends to offer minimal problem-solving and creative suggestions, resulting in an overall muted tone. - icon: https://cdn-uploads.huggingface.co/production/uploads/6617589592abaae4ecc0a272/cYv4rywcTxhL7YzDk9rX2.webp - overrides: - parameters: - model: Llama-3-8B-Instruct-MopeyMule-Q4_K_M.gguf - files: - - filename: Llama-3-8B-Instruct-MopeyMule-Q4_K_M.gguf - sha256: 899735e2d2b2d51eb2dd0fe3d59ebc1fbc2bb636ecb067dd09af9c3be0d62614 - uri: huggingface://bartowski/Llama-3-8B-Instruct-MopeyMule-GGUF/Llama-3-8B-Instruct-MopeyMule-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "poppy_porpoise-v0.85-l3-8b-iq-imatrix" - urls: - - https://huggingface.co/Lewdiculous/Poppy_Porpoise-0.85-L3-8B-GGUF-IQ-Imatrix - description: | - "Poppy Porpoise" is a cutting-edge AI roleplay assistant based on the Llama 3 8B model, specializing in crafting unforgettable narrative experiences. With its advanced language capabilities, Poppy expertly immerses users in an interactive and engaging adventure, tailoring each adventure to their individual preferences. - - Update: Vision/multimodal capabilities again! - icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/Boje781GkTdYgORTYGI6r.png - tags: - - llm - - multimodal - - gguf - - gpu - - llama3 - - cpu - - llava-1.5 - overrides: - mmproj: Llama-3-Update-2.0-mmproj-model-f16.gguf - parameters: - model: Poppy_Porpoise-0.85-L3-8B-Q4_K_M-imat.gguf - files: - - filename: Poppy_Porpoise-0.85-L3-8B-Q4_K_M-imat.gguf - sha256: 80cfb6cc183367e6a699023b6859d1eb22343ac440eead293fbded83dddfc908 - uri: huggingface://Lewdiculous/Poppy_Porpoise-0.85-L3-8B-GGUF-IQ-Imatrix/Poppy_Porpoise-0.85-L3-8B-Q4_K_M-imat.gguf - - filename: Llama-3-Update-2.0-mmproj-model-f16.gguf - sha256: 1058494004dfa121439d5a75fb96ea814c7a5937c0529998bf2366f2179bb5ba - uri: huggingface://Nitral-AI/Llama-3-Update-2.0-mmproj-model-f16/Llama-3-Update-2.0-mmproj-model-f16.gguf -- !!merge <<: *llama3 - name: "poppy_porpoise-v1.0-l3-8b-iq-imatrix" - urls: - - https://huggingface.co/Lewdiculous/Poppy_Porpoise-1.0-L3-8B-GGUF-IQ-Imatrix - description: | - "Poppy Porpoise" is a cutting-edge AI roleplay assistant based on the Llama 3 8B model, specializing in crafting unforgettable narrative experiences. With its advanced language capabilities, Poppy expertly immerses users in an interactive and engaging adventure, tailoring each adventure to their individual preferences. - - Update: Vision/multimodal capabilities again! - icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/Boje781GkTdYgORTYGI6r.png - tags: - - llm - - multimodal - - gguf - - gpu - - llama3 - - cpu - - llava-1.5 - overrides: - mmproj: Llama-3-Update-2.0-mmproj-model-f16.gguf - parameters: - model: Poppy_Porpoise-1.0-L3-8B-Q4_K_M-imat.gguf - files: - - filename: Poppy_Porpoise-1.0-L3-8B-Q4_K_M-imat.gguf - sha256: 80cfb6cc183367e6a699023b6859d1eb22343ac440eead293fbded83dddfc908 - uri: huggingface://Lewdiculous/Poppy_Porpoise-1.0-L3-8B-GGUF-IQ-Imatrix/Poppy_Porpoise-1.0-L3-8B-Q4_K_M-imat.gguf - - filename: Llama-3-Update-2.0-mmproj-model-f16.gguf - sha256: 1058494004dfa121439d5a75fb96ea814c7a5937c0529998bf2366f2179bb5ba - uri: huggingface://Nitral-AI/Llama-3-Update-2.0-mmproj-model-f16/Llama-3-Update-2.0-mmproj-model-f16.gguf -- !!merge <<: *llama3 - name: "poppy_porpoise-v1.30-l3-8b-iq-imatrix" - urls: - - https://huggingface.co/mradermacher/Poppy_Porpoise-1.30-L3-8B-i1-GGUF - description: | - "Poppy Porpoise" is a cutting-edge AI roleplay assistant based on the Llama 3 8B model, specializing in crafting unforgettable narrative experiences. With its advanced language capabilities, Poppy expertly immerses users in an interactive and engaging adventure, tailoring each adventure to their individual preferences. - - Update: Vision/multimodal capabilities again! - icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/Boje781GkTdYgORTYGI6r.png - tags: - - llm - - multimodal - - gguf - - gpu - - llama3 - - cpu - - llava-1.5 - overrides: - mmproj: Llama-3-Update-2.0-mmproj-model-f16.gguf - parameters: - model: Poppy_Porpoise-1.30-L3-8B.i1-Q4_K_M.gguf - files: - - filename: Poppy_Porpoise-1.30-L3-8B.i1-Q4_K_M.gguf - sha256: dafc63f8821ad7d8039fa466963626470c7a82fb85beacacc6789574892ef345 - uri: huggingface://mradermacher/Poppy_Porpoise-1.30-L3-8B-i1-GGUF/Poppy_Porpoise-1.30-L3-8B.i1-Q4_K_M.gguf - - filename: Llama-3-Update-2.0-mmproj-model-f16.gguf - sha256: 1058494004dfa121439d5a75fb96ea814c7a5937c0529998bf2366f2179bb5ba - uri: huggingface://Nitral-AI/Llama-3-Update-2.0-mmproj-model-f16/Llama-3-Update-2.0-mmproj-model-f16.gguf -- !!merge <<: *llama3 - name: "poppy_porpoise-v1.4-l3-8b-iq-imatrix" - urls: - - https://huggingface.co/mradermacher/Poppy_Porpoise-1.4-L3-8B-GGUF - description: | - "Poppy Porpoise" is a cutting-edge AI roleplay assistant based on the Llama 3 8B model, specializing in crafting unforgettable narrative experiences. With its advanced language capabilities, Poppy expertly immerses users in an interactive and engaging adventure, tailoring each adventure to their individual preferences. - - Update: Vision/multimodal capabilities again! - icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/Boje781GkTdYgORTYGI6r.png - tags: - - llm - - multimodal - - gguf - - gpu - - llama3 - - cpu - - llava-1.5 - overrides: - mmproj: Llama-3-Update-2.0-mmproj-model-f16.gguf - parameters: - model: Poppy_Porpoise-1.4-L3-8B.Q4_K_M.gguf - files: - - filename: Poppy_Porpoise-1.4-L3-8B.Q4_K_M.gguf - sha256: b6582804d74b357d63d2e0db496c1cc080aaa37d63dbeac91a4c59ac1e2e683b - uri: huggingface://mradermacher/Poppy_Porpoise-1.4-L3-8B-GGUF/Poppy_Porpoise-1.4-L3-8B.Q4_K_M.gguf - - filename: Llama-3-Update-2.0-mmproj-model-f16.gguf - sha256: 1058494004dfa121439d5a75fb96ea814c7a5937c0529998bf2366f2179bb5ba - uri: huggingface://Nitral-AI/Llama-3-Update-2.0-mmproj-model-f16/Llama-3-Update-2.0-mmproj-model-f16.gguf -- !!merge <<: *llama3 - name: "hathor-l3-8b-v.01-iq-imatrix" - urls: - - https://huggingface.co/Lewdiculous/Hathor-L3-8B-v.01-GGUF-IQ-Imatrix - description: | - "Designed to seamlessly integrate the qualities of creativity, intelligence, and robust performance." - icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/FLvA7-CWp3UhBuR2eGSh7.webp - tags: - - llm - - multimodal - - gguf - - gpu - - llama3 - - cpu - - llava-1.5 - overrides: - mmproj: Llama-3-Update-3.0-mmproj-model-f16.gguf - parameters: - model: Hathor-L3-8B-v.01-Q4_K_M-imat.gguf - files: - - filename: Hathor-L3-8B-v.01-Q4_K_M-imat.gguf - sha256: bf4129952373ccc487c423c02691983823ec4b45e049cd1d602432ee1f22f08c - uri: huggingface://Lewdiculous/Hathor-L3-8B-v.01-GGUF-IQ-Imatrix/Hathor-L3-8B-v.01-Q4_K_M-imat.gguf - - filename: Llama-3-Update-3.0-mmproj-model-f16.gguf - sha256: 3d2f36dff61d6157cadf102df86a808eb9f8a230be1bc0bc99039d81a895468a - uri: huggingface://Nitral-AI/Llama-3-Update-3.0-mmproj-model-f16/Llama-3-Update-3.0-mmproj-model-f16.gguf -- !!merge <<: *llama3 - name: "hathor_stable-v0.2-l3-8b" - urls: - - https://huggingface.co/bartowski/Hathor_Stable-v0.2-L3-8B-GGUF - description: | - Hathor-v0.2 is a model based on the LLaMA 3 architecture: Designed to seamlessly integrate the qualities of creativity, intelligence, and robust performance. Making it an ideal tool for a wide range of applications; such as creative writing, educational support and human/computer interaction. - icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/FLvA7-CWp3UhBuR2eGSh7.webp - overrides: - parameters: - model: Hathor_Stable-v0.2-L3-8B-Q4_K_M.gguf - files: - - filename: Hathor_Stable-v0.2-L3-8B-Q4_K_M.gguf - sha256: 291cd30421f519ec00e04ae946a4f639d8d1b7c294cb2b2897b35da6d498fdc4 - uri: huggingface://bartowski/Hathor_Stable-v0.2-L3-8B-GGUF/Hathor_Stable-v0.2-L3-8B-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "bunny-llama-3-8b-v" - urls: - - https://huggingface.co/BAAI/Bunny-Llama-3-8B-V-gguf - description: | - Bunny is a family of lightweight but powerful multimodal models. It offers multiple plug-and-play vision encoders, like EVA-CLIP, SigLIP and language backbones, including Llama-3-8B, Phi-1.5, StableLM-2, Qwen1.5, and Phi-2. To compensate for the decrease in model size, we construct more informative training data by curated selection from a broader data source. - - We provide Bunny-Llama-3-8B-V, which is built upon SigLIP and Llama-3-8B-Instruct. More details about this model can be found in GitHub. - icon: https://huggingface.co/BAAI/Bunny-Llama-3-8B-V-gguf/resolve/main/icon.png - tags: - - llm - - multimodal - - gguf - - gpu - - llama3 - - cpu - overrides: - mmproj: Bunny-Llama-3-8B-Q4_K_M-mmproj.gguf - parameters: - model: Bunny-Llama-3-8B-Q4_K_M.gguf - files: - - filename: Bunny-Llama-3-8B-Q4_K_M-mmproj.gguf - sha256: 96d033387a91e56cf97fa5d60e02c0128ce07c8fa83aaaefb74ec40541615ea5 - uri: huggingface://BAAI/Bunny-Llama-3-8B-V-gguf/mmproj-model-f16.gguf - - filename: Bunny-Llama-3-8B-Q4_K_M.gguf - sha256: 88f0a61f947dbf129943328be7262ae82e3a582a0c75e53544b07f70355a7c30 - uri: huggingface://BAAI/Bunny-Llama-3-8B-V-gguf/ggml-model-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "llava-llama-3-8b-v1_1" - description: | - llava-llama-3-8b-v1_1 is a LLaVA model fine-tuned from meta-llama/Meta-Llama-3-8B-Instruct and CLIP-ViT-Large-patch14-336 with ShareGPT4V-PT and InternVL-SFT by XTuner. - urls: - - https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-gguf - tags: - - llm - - multimodal - - gguf - - gpu - - llama3 - - cpu - - llava - overrides: - mmproj: llava-llama-3-8b-v1_1-mmproj-f16.gguf - parameters: - model: llava-llama-3-8b-v1_1-int4.gguf - files: - - filename: llava-llama-3-8b-v1_1-int4.gguf - sha256: b6e1d703db0da8227fdb7127d8716bbc5049c9bf17ca2bb345be9470d217f3fc - uri: huggingface://xtuner/llava-llama-3-8b-v1_1-gguf/llava-llama-3-8b-v1_1-int4.gguf - - filename: llava-llama-3-8b-v1_1-mmproj-f16.gguf - sha256: eb569aba7d65cf3da1d0369610eb6869f4a53ee369992a804d5810a80e9fa035 - uri: huggingface://xtuner/llava-llama-3-8b-v1_1-gguf/llava-llama-3-8b-v1_1-mmproj-f16.gguf -- !!merge <<: *llama3 - name: "minicpm-llama3-v-2_5" - icon: https://avatars.githubusercontent.com/u/89920203 - urls: - - https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf - - https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5 - description: | - MiniCPM-Llama3-V 2.5 is the latest model in the MiniCPM-V series. The model is built on SigLip-400M and Llama3-8B-Instruct with a total of 8B parameters - tags: - - llm - - multimodal - - gguf - - gpu - - llama3 - - cpu - overrides: - mmproj: minicpm-llama3-mmproj-f16.gguf - parameters: - model: minicpm-llama3-Q4_K_M.gguf - files: - - filename: minicpm-llama3-Q4_K_M.gguf - sha256: 010ec3ba94cb5ad2d9c8f95f46f01c6d80f83deab9df0a0831334ea45afff3e2 - uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/ggml-model-Q4_K_M.gguf - - filename: minicpm-llama3-mmproj-f16.gguf - uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/mmproj-model-f16.gguf - sha256: 2c2d773537faf6a7e093655d0d5e14801ef0b2121c6c3e1981ce094c2b62f4f9 -- !!merge <<: *llama3 - name: "llama-3-cursedstock-v1.8-8b-iq-imatrix" - urls: - - https://huggingface.co/Lewdiculous/LLaMa-3-CursedStock-v1.8-8B-GGUF-IQ-Imatrix-Request - - https://huggingface.co/PJMixers/LLaMa-3-CursedStock-v1.8-8B - description: | - A merge of several models - icon: https://huggingface.co/PJMixers/LLaMa-3-CursedStock-v1.8-8B/resolve/main/model_tree.png - overrides: - parameters: - model: LLaMa-3-CursedStock-v1.8-8B-Q4_K_M-imat.gguf - files: - - filename: LLaMa-3-CursedStock-v1.8-8B-Q4_K_M-imat.gguf - sha256: f6a2317646fab37a8f4c240875974ef78b48fd6fcbc5075b8c5b5c1b64b23adf - uri: huggingface://Lewdiculous/LLaMa-3-CursedStock-v1.8-8B-GGUF-IQ-Imatrix-Request/LLaMa-3-CursedStock-v1.8-8B-Q4_K_M-imat.gguf -- !!merge <<: *llama3 - name: "llama3-8b-darkidol-1.1-iq-imatrix" - urls: - - https://huggingface.co/LWDCLS/llama3-8B-DarkIdol-1.1-GGUF-IQ-Imatrix-Request - - https://huggingface.co/aifeifei798/llama3-8B-DarkIdol-1.1 - description: | - The module combination has been readjusted to better fulfill various roles and has been adapted for mobile phones. - icon: https://huggingface.co/aifeifei798/llama3-8B-DarkIdol-1.1/resolve/main/2024-06-20_20-01-51_9319.png - overrides: - mmproj: Llama-3-Update-3.0-mmproj-model-f16.gguf - parameters: - model: llama3-8B-DarkIdol-1.1-Q4_K_M-imat.gguf - files: - - filename: llama3-8B-DarkIdol-1.1-Q4_K_M-imat.gguf - sha256: 48ba66a28927a835c743c4a2525f523d8170c83fc410114edb55e332428b1e78 - uri: huggingface://LWDCLS/llama3-8B-DarkIdol-1.1-GGUF-IQ-Imatrix-Request/llama3-8B-DarkIdol-1.1-Q4_K_M-imat.gguf - - filename: Llama-3-Update-3.0-mmproj-model-f16.gguf - sha256: 3d2f36dff61d6157cadf102df86a808eb9f8a230be1bc0bc99039d81a895468a - uri: huggingface://Nitral-AI/Llama-3-Update-3.0-mmproj-model-f16/Llama-3-Update-3.0-mmproj-model-f16.gguf -- !!merge <<: *llama3 - name: "llama3-8b-darkidol-1.2-iq-imatrix" - urls: - - https://huggingface.co/LWDCLS/llama3-8B-DarkIdol-1.2-GGUF-IQ-Imatrix-Request - - https://huggingface.co/aifeifei798/llama3-8B-DarkIdol-1.2 - description: | - The module combination has been readjusted to better fulfill various roles and has been adapted for mobile phones. - icon: https://huggingface.co/aifeifei798/llama3-8B-DarkIdol-1.2/resolve/main/llama3-8B-DarkIdol-1.2.png - overrides: - mmproj: Llama-3-Update-3.0-mmproj-model-f16.gguf - parameters: - model: llama3-8B-DarkIdol-1.2-Q4_K_M-imat.gguf - files: - - filename: llama3-8B-DarkIdol-1.2-Q4_K_M-imat.gguf - sha256: dce2f5f1661f49fb695b038d973770b0d9059bced4e4bb212f6517aa219131cd - uri: huggingface://LWDCLS/llama3-8B-DarkIdol-1.2-GGUF-IQ-Imatrix-Request/llama3-8B-DarkIdol-1.2-Q4_K_M-imat.gguf - - filename: Llama-3-Update-3.0-mmproj-model-f16.gguf - sha256: 3d2f36dff61d6157cadf102df86a808eb9f8a230be1bc0bc99039d81a895468a - uri: huggingface://Nitral-AI/Llama-3-Update-3.0-mmproj-model-f16/Llama-3-Update-3.0-mmproj-model-f16.gguf -- !!merge <<: *llama3 - name: "llama-3_8b_unaligned_alpha" - urls: - - https://huggingface.co/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha - - https://huggingface.co/bartowski/LLAMA-3_8B_Unaligned_Alpha-GGUF - description: | - Model card description: - As of June 11, 2024, I've finally started training the model! The training is progressing smoothly, although it will take some time. I used a combination of model merges and an abliterated model as base, followed by a comprehensive deep unalignment protocol to unalign the model to its core. A common issue with uncensoring and unaligning models is that it often significantly impacts their base intelligence. To mitigate these drawbacks, I've included a substantial corpus of common sense, theory of mind, and various other elements to counteract the effects of the deep uncensoring process. Given the extensive corpus involved, the training will require at least a week of continuous training. Expected early results: in about 3-4 days. - Additional info: - As of June 13, 2024, I've observed that even after two days of continuous training, the model is still resistant to learning certain aspects. - For example, some of the validation data still shows a loss over , whereas other parts have a loss of < or lower. This is after the model was initially abliterated. - June 18, 2024 Update, After extensive testing of the intermediate checkpoints, significant progress has been made. - The model is slowly — I mean, really slowly — unlearning its alignment. By significantly lowering the learning rate, I was able to visibly observe deep behavioral changes, this process is taking longer than anticipated, but it's going to be worth it. Estimated time to completion: 4 more days.. I'm pleased to report that in several tests, the model not only maintained its intelligence but actually showed a slight improvement, especially in terms of common sense. An intermediate checkpoint of this model was used to create invisietch/EtherealRainbow-v0.3-rc7, with promising results. Currently, it seems like I'm on the right track. I hope this model will serve as a solid foundation for further merges, whether for role-playing (RP) or for uncensoring. This approach also allows us to save on actual fine-tuning, thereby reducing our carbon footprint. The merge process takes just a few minutes of CPU time, instead of days of GPU work. - June 20, 2024 Update, Unaligning was partially successful, and the results are decent, but I am not fully satisfied. I decided to bite the bullet, and do a full finetune, god have mercy on my GPUs. I am also releasing the intermediate checkpoint of this model. - overrides: - parameters: - model: LLAMA-3_8B_Unaligned_Alpha-Q4_K_M.gguf - files: - - filename: LLAMA-3_8B_Unaligned_Alpha-Q4_K_M.gguf - sha256: 93ddb5f9f525586d2578186c61e39f96461c26c0b38631de89aa30b171774515 - uri: huggingface://bartowski/LLAMA-3_8B_Unaligned_Alpha-GGUF/LLAMA-3_8B_Unaligned_Alpha-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "l3-8b-lunaris-v1" - urls: - - https://huggingface.co/Sao10K/L3-8B-Lunaris-v1 - - https://huggingface.co/bartowski/L3-8B-Lunaris-v1-GGUF - description: | - A generalist / roleplaying model merge based on Llama 3. Models are selected from my personal experience while using them. - - I personally think this is an improvement over Stheno v3.2, considering the other models helped balance out its creativity and at the same time improving its logic. - overrides: - parameters: - model: L3-8B-Lunaris-v1-Q4_K_M.gguf - files: - - filename: L3-8B-Lunaris-v1-Q4_K_M.gguf - sha256: ef1d393f125be8c608859eeb4f26185ad90c7fc9cba41c96e847e77cdbcada18 - uri: huggingface://bartowski/L3-8B-Lunaris-v1-GGUF/L3-8B-Lunaris-v1-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "llama-3_8b_unaligned_alpha_rp_soup-i1" - urls: - - https://huggingface.co/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha_RP_Soup - - https://huggingface.co/mradermacher/LLAMA-3_8B_Unaligned_Alpha_RP_Soup-i1-GGUF - description: | - Censorship level: Medium - - This model is the outcome of multiple merges, starting with the base model SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha. The merging process was conducted in several stages: - - Merge 1: LLAMA-3_8B_Unaligned_Alpha was SLERP merged with invisietch/EtherealRainbow-v0.3-8B. - Merge 2: LLAMA-3_8B_Unaligned_Alpha was SLERP merged with TheDrummer/Llama-3SOME-8B-v2. - Soup 1: Merge 1 was combined with Merge 2. - Final Merge: Soup 1 was SLERP merged with Nitral-Archive/Hathor_Enigmatica-L3-8B-v0.4. - - The final model is surprisingly coherent (although slightly more censored), which is a bit unexpected, since all the intermediate merge steps were pretty incoherent. - overrides: - parameters: - model: LLAMA-3_8B_Unaligned_Alpha_RP_Soup.i1-Q4_K_M.gguf - files: - - filename: LLAMA-3_8B_Unaligned_Alpha_RP_Soup.i1-Q4_K_M.gguf - sha256: 94347eb5125d9092e286730ae0ccc78374d68663c16ad2265005d8721eb8807b - uri: huggingface://mradermacher/LLAMA-3_8B_Unaligned_Alpha_RP_Soup-i1-GGUF/LLAMA-3_8B_Unaligned_Alpha_RP_Soup.i1-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "hathor_respawn-l3-8b-v0.8" - icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/sWyipsXI-Wl-uEm57SRwM.png - urls: - - https://huggingface.co/Nitral-AI/Hathor_Respawn-L3-8B-v0.8 - - https://huggingface.co/bartowski/Hathor_Respawn-L3-8B-v0.8-GGUF - description: | - Hathor_Aleph-v0.8 is a model based on the LLaMA 3 architecture: Designed to seamlessly integrate the qualities of creativity, intelligence, and robust performance. Making it an ideal tool for a wide range of applications; such as creative writing, educational support and human/computer interaction. - Hathor 0.8 is trained on 3 epochs of Private RP, STEM (Intruction/Dialogs), Opus instructons, mixture light/classical novel data, roleplaying chat pairs over llama 3 8B instruct. - overrides: - parameters: - model: Hathor_Respawn-L3-8B-v0.8-Q4_K_M.gguf - files: - - filename: Hathor_Respawn-L3-8B-v0.8-Q4_K_M.gguf - sha256: d0cdfa8951ee80b252bf1dc183403ca9b48bc3de1578cb8e7fe321af753e661c - uri: huggingface://bartowski/Hathor_Respawn-L3-8B-v0.8-GGUF/Hathor_Respawn-L3-8B-v0.8-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "llama3-8b-instruct-replete-adapted" - icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/-0dERC793D9XeFsJ9uHbx.png - urls: - - https://huggingface.co/Replete-AI/Llama3-8B-Instruct-Replete-Adapted - - https://huggingface.co/bartowski/Llama3-8B-Instruct-Replete-Adapted-GGUF - description: | - Replete-Coder-llama3-8b is a general purpose model that is specially trained in coding in over 100 coding languages. The data used to train the model contains 25% non-code instruction data and 75% coding instruction data totaling up to 3.9 million lines, roughly 1 billion tokens, or 7.27gb of instruct data. The data used to train this model was 100% uncensored, then fully deduplicated, before training happened. - - More than just a coding model! - - Although Replete-Coder has amazing coding capabilities, its trained on vaste amount of non-coding data, fully cleaned and uncensored. Dont just use it for coding, use it for all your needs! We are truly trying to make the GPT killer! - overrides: - parameters: - model: Llama3-8B-Instruct-Replete-Adapted-Q4_K_M.gguf - files: - - filename: Llama3-8B-Instruct-Replete-Adapted-Q4_K_M.gguf - sha256: 9e9a142f6fb5fc812b17bfc30230582ae50ac22b93dea696b6887cde815c1cb4 - uri: huggingface://bartowski/Llama3-8B-Instruct-Replete-Adapted-GGUF/Llama3-8B-Instruct-Replete-Adapted-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "llama-3-perky-pat-instruct-8b" - urls: - - https://huggingface.co/grimjim/Llama-3-Perky-Pat-Instruct-8B - - https://huggingface.co/bartowski/Llama-3-Perky-Pat-Instruct-8B-GGUF - description: | - we explore negative weight merger, and propose Orthogonalized Vector Adaptation, or OVA. - - This is a merge of pre-trained language models created using mergekit. - - "One must imagine Sisyphys happy." - - Task arithmetic was used to invert the intervention vector that was applied in MopeyMule, via application of negative weight -1.0. The combination of model weights (Instruct - MopeyMule) comprises an Orthogonalized Vector Adaptation that can subsequently be applied to the base Instruct model, and could in principle be applied to other models derived from fine-tuning the Instruct model. - - This model is meant to continue exploration of behavioral changes that can be achieved via orthogonalized steering. The result appears to be more enthusiastic and lengthy responses in chat, though it is also clear that the merged model has some unhealed damage. - - Built with Meta Llama 3. - overrides: - parameters: - model: Llama-3-Perky-Pat-Instruct-8B-Q4_K_M.gguf - files: - - filename: Llama-3-Perky-Pat-Instruct-8B-Q4_K_M.gguf - sha256: b0eae5d9d58a7101a30693c267097a90f4a005c81fda801b40ab2c25e788a93e - uri: huggingface://bartowski/Llama-3-Perky-Pat-Instruct-8B-GGUF/Llama-3-Perky-Pat-Instruct-8B-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "l3-uncen-merger-omelette-rp-v0.2-8b" - icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/m0YKWwK9n7w8rnKOzduu4.png - urls: - - https://huggingface.co/Casual-Autopsy/L3-Uncen-Merger-Omelette-RP-v0.2-8B - - https://huggingface.co/LWDCLS/L3-Uncen-Merger-Omelette-RP-v0.2-8B-GGUF-IQ-Imatrix-Request - description: | - L3-Uncen-Merger-Omelette-RP-v0.2-8B is a merge of the following models using LazyMergekit: - - Sao10K/L3-8B-Stheno-v3.2 - Casual-Autopsy/L3-Umbral-Mind-RP-v1.0-8B - bluuwhale/L3-SthenoMaidBlackroot-8B-V1 - Cas-Warehouse/Llama-3-Mopeyfied-Psychology-v2 - migtissera/Llama-3-8B-Synthia-v3.5 - tannedbum/L3-Nymeria-Maid-8B - Casual-Autopsy/L3-Umbral-Mind-RP-v0.3-8B - tannedbum/L3-Nymeria-8B - ChaoticNeutrals/Hathor_RP-v.01-L3-8B - cgato/L3-TheSpice-8b-v0.8.3 - Sao10K/L3-8B-Stheno-v3.1 - Nitral-AI/Hathor_Stable-v0.2-L3-8B - aifeifei798/llama3-8B-DarkIdol-1.0 - ChaoticNeutrals/Poppy_Porpoise-1.4-L3-8B - ResplendentAI/Nymph_8B - overrides: - parameters: - model: L3-Uncen-Merger-Omelette-RP-v0.2-8B-Q4_K_M-imat.gguf - files: - - filename: L3-Uncen-Merger-Omelette-RP-v0.2-8B-Q4_K_M-imat.gguf - sha256: 6bbc42a4c3b25f2b854d76a6e32746b9b3b21dd8856f8f2bc1a5b1269aa8fca1 - uri: huggingface://LWDCLS/L3-Uncen-Merger-Omelette-RP-v0.2-8B-GGUF-IQ-Imatrix-Request/L3-Uncen-Merger-Omelette-RP-v0.2-8B-Q4_K_M-imat.gguf -- !!merge <<: *llama3 - name: "nymph_8b-i1" - icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/9U_eJCDzLJ8nxb6qfuICc.jpeg - urls: - - https://huggingface.co/ResplendentAI/Nymph_8B - - https://huggingface.co/mradermacher/Nymph_8B-i1-GGUF?not-for-all-audiences=true - description: | - Model card: - Nymph is the culmination of everything I have learned with the T-series project. This model aims to be a unique and full featured RP juggernaut. - - The finetune incorporates 1.6 Million tokens of RP data sourced from Bluemoon, FreedomRP, Aesir-Preview, and Claude Opus logs. I made sure to use the multi-turn sharegpt datasets this time instead of alpaca conversions. I have also included three of my personal datasets. The final touch is an ORPO based upon Openhermes Roleplay preferences. - overrides: - parameters: - model: Nymph_8B.i1-Q4_K_M.gguf - files: - - filename: Nymph_8B.i1-Q4_K_M.gguf - sha256: 5b35794539d9cd262720f47a54f59dbffd5bf6c601950359b5c68d13f1ce13a0 - uri: huggingface://mradermacher/Nymph_8B-i1-GGUF/Nymph_8B.i1-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "l3-ms-astoria-8b" - urls: - - https://huggingface.co/ibrahimkettaneh/L3-MS-Astoria-8b - - https://huggingface.co/mradermacher/L3-MS-Astoria-8b-GGUF - description: | - This is a merge of pre-trained language models created using mergekit. - Merge Method - - This model was merged using the Model Stock merge method using failspy/Meta-Llama-3-8B-Instruct-abliterated-v3 as a base. - Models Merged - - The following models were included in the merge: - - ProbeMedicalYonseiMAILab/medllama3-v20 - migtissera/Tess-2.0-Llama-3-8B - Cas-Warehouse/Llama-3-Psychology-LoRA-Stock-8B - TheSkullery/llama-3-cat-8b-instruct-v1 - overrides: - parameters: - model: L3-MS-Astoria-8b.Q4_K_M.gguf - files: - - filename: L3-MS-Astoria-8b.Q4_K_M.gguf - sha256: cc5db0ef056aa57cb848988f6a7c739701ecde6303a9d8262f5dac76287ba15a - uri: huggingface://mradermacher/L3-MS-Astoria-8b-GGUF/L3-MS-Astoria-8b.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "halomaidrp-v1.33-15b-l3-i1" - urls: - - https://huggingface.co/mradermacher/HaloMaidRP-v1.33-15B-L3-i1-GGUF - - https://huggingface.co/v000000/HaloMaidRP-v1.33-15B-L3 - icon: https://cdn-uploads.huggingface.co/production/uploads/64f74b6e6389380c77562762/MCdGdalCCtOVPn8X7rqha.jpeg - description: | - This is the third iteration "Emerald" of the final four and the one I liked the most. It has had limited testing though, but seems relatively decent. Better than 8B at least. - This is a merge of pre-trained language models created using mergekit. - The following models were included in the merge: - - grimjim/Llama-3-Instruct-abliteration-LoRA-8B - UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3 - NeverSleep/Llama-3-Lumimaid-8B-v0.1-OAS - maldv/llama-3-fantasy-writer-8b - tokyotech-llm/Llama-3-Swallow-8B-v0.1 - Sao10K/L3-8B-Stheno-v3.2 - ZeusLabs/L3-Aethora-15B-V2 - Nitral-AI/Hathor_Respawn-L3-8B-v0.8 - Blackroot/Llama-3-8B-Abomination-LORA - overrides: - parameters: - model: HaloMaidRP-v1.33-15B-L3.i1-Q4_K_M.gguf - files: - - filename: HaloMaidRP-v1.33-15B-L3.i1-Q4_K_M.gguf - sha256: 94d0bf2de4df7e5a11b9ca4db3518d7d22c6fa062d1ee16e4db52b2bb26bc8b3 - uri: huggingface://mradermacher/HaloMaidRP-v1.33-15B-L3-i1-GGUF/HaloMaidRP-v1.33-15B-L3.i1-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "llama-3-patronus-lynx-70b-instruct" - urls: - - https://huggingface.co/PatronusAI/Llama-3-Patronus-Lynx-70B-Instruct - - https://huggingface.co/mradermacher/Llama-3-Patronus-Lynx-70B-Instruct-GGUF - description: | - Lynx is an open-source hallucination evaluation model. Patronus-Lynx-70B-Instruct was trained on a mix of datasets including CovidQA, PubmedQA, DROP, RAGTruth. The datasets contain a mix of hand-annotated and synthetic data. The maximum sequence length is 8000 tokens. - Model - overrides: - parameters: - model: Llama-3-Patronus-Lynx-70B-Instruct.Q4_K_M.gguf - files: - - filename: Llama-3-Patronus-Lynx-70B-Instruct.Q4_K_M.gguf - sha256: 95a02b71baff287bd84188fc1babcf9dfae25c315e2613391e694cf944f1e5b3 - uri: huggingface://mradermacher/Llama-3-Patronus-Lynx-70B-Instruct-GGUF/Llama-3-Patronus-Lynx-70B-Instruct.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "llamax3-8b-alpaca" - urls: - - https://huggingface.co/LLaMAX/LLaMAX3-8B-Alpaca - - https://huggingface.co/mradermacher/LLaMAX3-8B-Alpaca-GGUF - description: | - LLaMAX is a language model with powerful multilingual capabilities without loss instruction-following capabilities. - - We collected extensive training sets in 102 languages for continued pre-training of Llama2 and leveraged the English instruction fine-tuning dataset, Alpaca, to fine-tune its instruction-following capabilities. - - LLaMAX supports translation between more than 100 languages, surpassing the performance of similarly scaled LLMs. - - Supported Languages - Akrikaans (af), Amharic (am), Arabic (ar), Armenian (hy), Assamese (as), Asturian (ast), Azerbaijani (az), Belarusian (be), Bengali (bn), Bosnian (bs), Bulgarian (bg), Burmese (my), Catalan (ca), Cebuano (ceb), Chinese Simpl (zho), Chinese Trad (zho), Croatian (hr), Czech (cs), Danish (da), Dutch (nl), English (en), Estonian (et), Filipino (tl), Finnish (fi), French (fr), Fulah (ff), Galician (gl), Ganda (lg), Georgian (ka), German (de), Greek (el), Gujarati (gu), Hausa (ha), Hebrew (he), Hindi (hi), Hungarian (hu), Icelandic (is), Igbo (ig), Indonesian (id), Irish (ga), Italian (it), Japanese (ja), Javanese (jv), Kabuverdianu (kea), Kamba (kam), Kannada (kn), Kazakh (kk), Khmer (km), Korean (ko), Kyrgyz (ky), Lao (lo), Latvian (lv), Lingala (ln), Lithuanian (lt), Luo (luo), Luxembourgish (lb), Macedonian (mk), Malay (ms), Malayalam (ml), Maltese (mt), Maori (mi), Marathi (mr), Mongolian (mn), Nepali (ne), Northern Sotho (ns), Norwegian (no), Nyanja (ny), Occitan (oc), Oriya (or), Oromo (om), Pashto (ps), Persian (fa), Polish (pl), Portuguese (pt), Punjabi (pa), Romanian (ro), Russian (ru), Serbian (sr), Shona (sn), Sindhi (sd), Slovak (sk), Slovenian (sl), Somali (so), Sorani Kurdish (ku), Spanish (es), Swahili (sw), Swedish (sv), Tajik (tg), Tamil (ta), Telugu (te), Thai (th), Turkish (tr), Ukrainian (uk), Umbundu (umb), Urdu (ur), Uzbek (uz), Vietnamese (vi), Welsh (cy), Wolof (wo), Xhosa (xh), Yoruba (yo), Zulu (zu) - overrides: - parameters: - model: LLaMAX3-8B-Alpaca.Q4_K_M.gguf - files: - - filename: LLaMAX3-8B-Alpaca.Q4_K_M.gguf - sha256: 4652209c55d4260634b2195989279f945a072d8574872789a40d1f9b86eb255b - uri: huggingface://mradermacher/LLaMAX3-8B-Alpaca-GGUF/LLaMAX3-8B-Alpaca.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "llamax3-8b" - urls: - - https://huggingface.co/LLaMAX/LLaMAX3-8B - - https://huggingface.co/mradermacher/LLaMAX3-8B-GGUF - description: | - LLaMAX is a language model with powerful multilingual capabilities without loss instruction-following capabilities. - - We collected extensive training sets in 102 languages for continued pre-training of Llama2 and leveraged the English instruction fine-tuning dataset, Alpaca, to fine-tune its instruction-following capabilities. - - LLaMAX supports translation between more than 100 languages, surpassing the performance of similarly scaled LLMs. - - Supported Languages - Akrikaans (af), Amharic (am), Arabic (ar), Armenian (hy), Assamese (as), Asturian (ast), Azerbaijani (az), Belarusian (be), Bengali (bn), Bosnian (bs), Bulgarian (bg), Burmese (my), Catalan (ca), Cebuano (ceb), Chinese Simpl (zho), Chinese Trad (zho), Croatian (hr), Czech (cs), Danish (da), Dutch (nl), English (en), Estonian (et), Filipino (tl), Finnish (fi), French (fr), Fulah (ff), Galician (gl), Ganda (lg), Georgian (ka), German (de), Greek (el), Gujarati (gu), Hausa (ha), Hebrew (he), Hindi (hi), Hungarian (hu), Icelandic (is), Igbo (ig), Indonesian (id), Irish (ga), Italian (it), Japanese (ja), Javanese (jv), Kabuverdianu (kea), Kamba (kam), Kannada (kn), Kazakh (kk), Khmer (km), Korean (ko), Kyrgyz (ky), Lao (lo), Latvian (lv), Lingala (ln), Lithuanian (lt), Luo (luo), Luxembourgish (lb), Macedonian (mk), Malay (ms), Malayalam (ml), Maltese (mt), Maori (mi), Marathi (mr), Mongolian (mn), Nepali (ne), Northern Sotho (ns), Norwegian (no), Nyanja (ny), Occitan (oc), Oriya (or), Oromo (om), Pashto (ps), Persian (fa), Polish (pl), Portuguese (pt), Punjabi (pa), Romanian (ro), Russian (ru), Serbian (sr), Shona (sn), Sindhi (sd), Slovak (sk), Slovenian (sl), Somali (so), Sorani Kurdish (ku), Spanish (es), Swahili (sw), Swedish (sv), Tajik (tg), Tamil (ta), Telugu (te), Thai (th), Turkish (tr), Ukrainian (uk), Umbundu (umb), Urdu (ur), Uzbek (uz), Vietnamese (vi), Welsh (cy), Wolof (wo), Xhosa (xh), Yoruba (yo), Zulu (zu) - overrides: - parameters: - model: LLaMAX3-8B.Q4_K_M.gguf - files: - - filename: LLaMAX3-8B.Q4_K_M.gguf - sha256: 862fb2be5d74b171f4294f862f43e7cb6e6dbecce29a9f9167da4f1db230daac - uri: huggingface://mradermacher/LLaMAX3-8B-GGUF/LLaMAX3-8B.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "arliai-llama-3-8b-dolfin-v0.5" - urls: - - https://huggingface.co/OwenArli/ArliAI-Llama-3-8B-Dolfin-v0.5 - - https://huggingface.co/QuantFactory/ArliAI-Llama-3-8B-Dolfin-v0.5-GGUF - description: | - Based on Meta-Llama-3-8b-Instruct, and is governed by Meta Llama 3 License agreement: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct - - This is a fine tune using an improved Dolphin and WizardLM dataset intended to make the model follow instructions better and refuse less. - - OpenLLM Benchmark: - - Training: - - 2048 sequence length since the dataset has an average length of under 1000 tokens, while the base model is 8192 sequence length. From testing it still performs the same 8192 context just fine. - Training duration is around 2 days on 2xRTX 3090, using 4-bit loading and Qlora 64-rank 128-alpha resulting in ~2% trainable weights. - overrides: - parameters: - model: ArliAI-Llama-3-8B-Dolfin-v0.5.Q4_K_M.gguf - files: - - filename: ArliAI-Llama-3-8B-Dolfin-v0.5.Q4_K_M.gguf - sha256: 71fef02915c606b438ccff2cae6b7760bbb54a558d5f2d39c2421d97b6682fea - uri: huggingface://QuantFactory/ArliAI-Llama-3-8B-Dolfin-v0.5-GGUF/ArliAI-Llama-3-8B-Dolfin-v0.5.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "llama-3-ezo-8b-common-it" - icon: https://huggingface.co/HODACHI/Llama-3-EZO-8b-Common-it - urls: - - https://huggingface.co/HODACHI/Llama-3-EZO-8b-Common-it - - https://huggingface.co/MCZK/Llama-3-EZO-8b-Common-it-GGUF - description: | - Based on meta-llama/Meta-Llama-3-8B-Instruct, it has been enhanced for Japanese usage through additional pre-training and instruction tuning. (Built with Meta Llama3) - - This model is based on Llama-3-8B-Instruct and is subject to the Llama-3 Terms of Use. For detailed information, please refer to the official Llama-3 license page. - - このモデルはLlama-3-8B-Instructをベースにしており、Llama-3の利用規約に従います。詳細については、Llama-3の公式ライセンスページをご参照ください。 - overrides: - parameters: - model: Llama-3-EZO-8b-Common-it.Q4_K_M.iMatrix.gguf - files: - - filename: Llama-3-EZO-8b-Common-it.Q4_K_M.iMatrix.gguf - sha256: 0a46165b1c35bfb97d7d5b18969a7bfc2bbf37a90bc5e85f8cab11483f5a8adc - uri: huggingface://MCZK/Llama-3-EZO-8b-Common-it-GGUF/Llama-3-EZO-8b-Common-it.Q4_K_M.iMatrix.gguf -- !!merge <<: *llama3 - name: "l3-8b-niitama-v1" - urls: - - https://huggingface.co/Sao10K/L3-8B-Niitama-v1 - - https://huggingface.co/mradermacher/L3-8B-Niitama-v1-GGUF - description: | - Niitama on Horde - overrides: - parameters: - model: L3-8B-Niitama-v1.Q4_K_M.gguf - files: - - filename: L3-8B-Niitama-v1.Q4_K_M.gguf - sha256: a0e6d8972e1c73af7952ee1b8a3898f52c6036701571fea37ff621b71e89eb53 - uri: huggingface://mradermacher/L3-8B-Niitama-v1-GGUF/L3-8B-Niitama-v1.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "l3-8b-niitama-v1-i1" - urls: - - https://huggingface.co/Sao10K/L3-8B-Niitama-v1 - - https://huggingface.co/mradermacher/L3-8B-Niitama-v1-i1-GGUF - description: | - Niitama on Horde (iMatrix quants) - overrides: - parameters: - model: L3-8B-Niitama-v1.i1-Q4_K_M.gguf - files: - - filename: L3-8B-Niitama-v1.i1-Q4_K_M.gguf - sha256: 8c62f831db2a6e34aa75459fe8a98815199ecc2dac1892a460b8b86363b6826e - uri: huggingface://mradermacher/L3-8B-Niitama-v1-i1-GGUF/L3-8B-Niitama-v1.i1-Q4_K_M.gguf -- !!merge <<: *llama3 - icon: https://huggingface.co/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA/resolve/main/Images/LLAMA-3_8B_Unaligned_BETA.png - name: "llama-3_8b_unaligned_beta" - urls: - - https://huggingface.co/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA - - https://huggingface.co/bartowski/LLAMA-3_8B_Unaligned_BETA-GGUF - description: | - In the Wild West of the AI world, the real titans never hit their deadlines, no sir! - The projects that finish on time? They’re the soft ones—basic, surface-level shenanigans. But the serious projects? They’re always delayed. You set a date, then reality hits: not gonna happen, scope creep that mutates the roadmap, unexpected turn of events that derails everything. - It's only been 4 months since the Alpha was released, and half a year since the project started, but it felt like nearly a decade. - Deadlines shift, but with each delay, you’re not failing—you’re refining, and becoming more ambitious. A project that keeps getting pushed isn’t late; it’s just gaining weight, becoming something worth building, and truly worth seeing all the way through. The longer it’s delayed, the more serious it gets. - LLAMA-3_8B_Unaligned is a serious project, and thank god, the Beta is finally here. - I love you all unconditionally, thanks for all the support and kind words! - overrides: - parameters: - model: LLAMA-3_8B_Unaligned_BETA-Q4_K_M.gguf - files: - - filename: LLAMA-3_8B_Unaligned_BETA-Q4_K_M.gguf - sha256: 5b88fb4537339996c04e4a1b6ef6a2d555c4103b6378e273ae9c6c5e77af67eb - uri: huggingface://bartowski/LLAMA-3_8B_Unaligned_BETA-GGUF/LLAMA-3_8B_Unaligned_BETA-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "freyja-v4.95-maldv-7b-non-fiction-i1" - urls: - - https://huggingface.co/MrRobotoAI/Freyja-v4.95-maldv-7b-NON-FICTION - - https://huggingface.co/mradermacher/Freyja-v4.95-maldv-7b-NON-FICTION-i1-GGUF - description: | - This model was merged using the Model Stock merge method using aifeifei798/llama3-8B-DarkIdol-2.2-Uncensored-1048K as a base. - The following models were included in the merge: - maldv/llama-3-fantasy-writer-8b - maldv/badger-iota-llama-3-8b - maldv/badger-lambda-llama-3-8b - maldv/badger-mu-llama-3-8b - maldv/badger-kappa-llama-3-8b - maldv/badger-writer-llama-3-8b - overrides: - parameters: - model: Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf - files: - - filename: Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf - sha256: cdc0f4de6df2ba120835fbd25c2a0ae2af8548f46d2c40c7a018c51c3d19e0c0 - uri: huggingface://mradermacher/Freyja-v4.95-maldv-7b-NON-FICTION-i1-GGUF/Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "dusk_rainbow" - icon: https://huggingface.co/SicariusSicariiStuff/Dusk_Rainbow/resolve/main/Dusk_Rainbow.gif - urls: - - https://huggingface.co/SicariusSicariiStuff/Dusk_Rainbow - - https://huggingface.co/mradermacher/Dusk_Rainbow-GGUF - description: | - A girl of peculiar appetites and an even more peculiar imagination lived in a small, sleepy village nestled deep in the countryside. The kind of village where the clouds hung low, casting shadows like sullen toddlers refusing to play. But on this particular day, the girl ambled through the woods, when she noticed something curious: a plant, of all things, that seemed to have been dipped in a cookie jar, judging by its smell. A botanical biscuit, in the middle of a birch grove. - This model is the result of training a fraction (16M tokens) of the testing data Intended for LLAMA-3_8B_Unaligned's upcoming beta. The base model is a merge of merges, made by Invisietch's and named EtherealRainbow-v0.3-8B. The name for this model reflects the base that was used for this finetune while hinting a darker, and more uncensored aspects associated with the nature of the LLAMA-3_8B_Unaligned project. - - As a result of the unique data added, this model has an exceptional adherence to instructions about paragraph length, and to the story writing prompt. I would like to emphasize, no ChatGPT \ Claude was used for any of the additional data I added in this finetune. The goal is to eventually have a model with a minimal amount of slop, this cannot be reliably done by relying on API models, which pollute datasets with their bias and repetitive words. - overrides: - parameters: - model: Dusk_Rainbow.Q4_K_M.gguf - files: - - filename: Dusk_Rainbow.Q4_K_M.gguf - sha256: d02cb1612903f4840e4d72e92582b0dca64a8a7e6662953e8ad1ea62f9464e31 - uri: huggingface://mradermacher/Dusk_Rainbow-GGUF/Dusk_Rainbow.Q4_K_M.gguf - &chatml url: "github:mudler/LocalAI/gallery/chatml.yaml@master" ### ChatML name: "una-thepitbull-21.4b-v2" @@ -18393,288 +11750,6 @@ - filename: UNA-ThePitbull-21.4B-v2-Q4_K_M.gguf sha256: f08780986748a04e707a63dcac616330c2afc7f9fb2cc6b1d9784672071f3c85 uri: huggingface://bartowski/UNA-ThePitbull-21.4B-v2-GGUF/UNA-ThePitbull-21.4B-v2-Q4_K_M.gguf -- url: "github:mudler/LocalAI/gallery/chatml.yaml@master" - name: "helpingai-9b" - license: hsul - icon: https://huggingface.co/OEvortex/HelpingAI-3B/resolve/main/HelpingAI.png - description: | - HelpingAI-9B is a large language model designed for emotionally intelligent conversational interactions. It is trained to engage users with empathy, understanding, and supportive dialogue across a wide range of topics and contexts. The model aims to provide a supportive AI companion that can attune to users' emotional states and communicative needs. - urls: - - https://huggingface.co/OEvortex/HelpingAI-9B - - https://huggingface.co/nold/HelpingAI-9B-GGUF - tags: - - llm - - gguf - - gpu - - cpu - - chatml - overrides: - context_size: 4096 - parameters: - model: HelpingAI-9B_Q4_K_M.gguf - files: - - filename: HelpingAI-9B_Q4_K_M.gguf - sha256: 9c90f3a65332a03a6cbb563eee19c7586d9544f646ff9f33f7f1904b3d415ae2 - uri: huggingface://nold/HelpingAI-9B-GGUF/HelpingAI-9B_Q4_K_M.gguf -- url: "github:mudler/LocalAI/gallery/chatml-hercules.yaml@master" - urls: - - https://huggingface.co/Locutusque/Llama-3-Hercules-5.0-8B - - https://huggingface.co/bartowski/Llama-3-Hercules-5.0-8B-GGUF - name: "llama-3-hercules-5.0-8b" - tags: - - llm - - gguf - - gpu - - cpu - - chatml - - function-calling - description: | - Llama-3-Hercules-5.0-8B is a fine-tuned language model derived from Llama-3-8B. It is specifically designed to excel in instruction following, function calls, and conversational interactions across various scientific and technical domains. - overrides: - parameters: - model: Llama-3-Hercules-5.0-8B-Q4_K_M.gguf - files: - - filename: Llama-3-Hercules-5.0-8B-Q4_K_M.gguf - sha256: 83647caf4a23a91697585cff391e7d1236fac867392f9e49a6dab59f81b5f810 - uri: huggingface://bartowski/Llama-3-Hercules-5.0-8B-GGUF/Llama-3-Hercules-5.0-8B-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "l3-15b-mythicalmaid-t0.0001" - icon: https://cdn-uploads.huggingface.co/production/uploads/64f74b6e6389380c77562762/Nx5jjEYNH26OS2_87mPTM.png - urls: - - https://huggingface.co/v000000/L3-15B-MythicalMaid-t0.0001 - - https://huggingface.co/mradermacher/L3-15B-MythicalMaid-t0.0001-GGUF - description: | - Llama-3-15B-MythicalMaid-t0.0001 - A merge of the following models using a custom NearSwap(t0.0001) algorithm (inverted): - - ZeusLabs/L3-Aethora-15B-V2 - v000000/HaloMaidRP-v1.33-15B-L3 - - With ZeusLabs/L3-Aethora-15B-V2 as the base model. - - This merge was inverted compared to "L3-15B-EtherealMaid-t0.0001". - overrides: - parameters: - model: L3-15B-MythicalMaid-t0.0001.Q4_K_M.gguf - files: - - filename: L3-15B-MythicalMaid-t0.0001.Q4_K_M.gguf - sha256: ecbd57783006f1a027f8a7f5a5d551dc8b3568912825f566d79fd34a804e8970 - uri: huggingface://mradermacher/L3-15B-MythicalMaid-t0.0001-GGUF/L3-15B-MythicalMaid-t0.0001.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "l3-15b-etherealmaid-t0.0001-i1" - icon: https://cdn-uploads.huggingface.co/production/uploads/64f74b6e6389380c77562762/FwYXt2h_FdmlL0Z6qYufz.png - urls: - - https://huggingface.co/v000000/L3-15B-EtherealMaid-t0.0001 - - https://huggingface.co/mradermacher/L3-15B-EtherealMaid-t0.0001-i1-GGUF - description: | - Llama-3-15B-EtherealMaid-t0.0001 - A merge of the following models using a custom NearSwap(t0.0001) algorithm: - - v000000/HaloMaidRP-v1.33-15B-L3 - ZeusLabs/L3-Aethora-15B-V2 - - With v000000/HaloMaidRP-v1.33-15B-L3 as the base model. - overrides: - parameters: - model: L3-15B-EtherealMaid-t0.0001.i1-Q4_K_M.gguf - files: - - filename: L3-15B-EtherealMaid-t0.0001.i1-Q4_K_M.gguf - sha256: 2911be6be8e0fd4184998d452410ba847491b4ab71a928749de87cafb0e13757 - uri: huggingface://mradermacher/L3-15B-EtherealMaid-t0.0001-i1-GGUF/L3-15B-EtherealMaid-t0.0001.i1-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "l3-8b-celeste-v1" - icon: https://cdn-uploads.huggingface.co/production/uploads/630cf5d14ca0a22768bbe10c/Zv__LDTO-nHvpuxPcCgUU.webp - urls: - - https://huggingface.co/nothingiisreal/L3-8B-Celeste-v1 - - https://huggingface.co/bartowski/L3-8B-Celeste-v1-GGUF - description: | - Trained on LLaMA 3 8B Instruct at 8K context using Reddit Writing Prompts, Opus 15K Instruct an c2 logs cleaned. - - This is a roleplay model any instruction following capabilities outside roleplay contexts are coincidental. - overrides: - parameters: - model: L3-8B-Celeste-v1-Q4_K_M.gguf - files: - - filename: L3-8B-Celeste-v1-Q4_K_M.gguf - sha256: ed5277719965fb6bbcce7d16742e3bac4a8d5b8f52133261a3402a480cd65317 - uri: huggingface://bartowski/L3-8B-Celeste-v1-GGUF/L3-8B-Celeste-v1-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "l3-8b-celeste-v1.2" - icon: https://cdn-uploads.huggingface.co/production/uploads/630cf5d14ca0a22768bbe10c/Zv__LDTO-nHvpuxPcCgUU.webp - urls: - - https://huggingface.co/mudler/L3-8B-Celeste-V1.2-Q4_K_M-GGUF - description: | - Trained on LLaMA 3 8B Instruct at 8K context using Reddit Writing Prompts, Opus 15K Instruct an c2 logs cleaned. - - This is a roleplay model any instruction following capabilities outside roleplay contexts are coincidental. - overrides: - parameters: - model: l3-8b-celeste-v1.2-q4_k_m.gguf - files: - - filename: l3-8b-celeste-v1.2-q4_k_m.gguf - sha256: 7752204c0e9f627ff5726eb69bb6114974cafbc934a993ad019abfba62002783 - uri: huggingface://mudler/L3-8B-Celeste-V1.2-Q4_K_M-GGUF/l3-8b-celeste-v1.2-q4_k_m.gguf -- !!merge <<: *llama3 - name: "llama-3-tulu-2-8b-i1" - icon: https://huggingface.co/datasets/allenai/blog-images/resolve/main/tulu-v2/Tulu%20V2%20banner.png - urls: - - https://huggingface.co/allenai/llama-3-tulu-2-8b - - https://huggingface.co/mradermacher/llama-3-tulu-2-8b-i1-GGUF - description: | - Tulu is a series of language models that are trained to act as helpful assistants. Llama 3 Tulu V2 8B is a fine-tuned version of Llama 3 that was trained on a mix of publicly available, synthetic and human datasets. - overrides: - parameters: - model: llama-3-tulu-2-8b.i1-Q4_K_M.gguf - files: - - filename: llama-3-tulu-2-8b.i1-Q4_K_M.gguf - sha256: f859c22bfa64f461e9ffd973dc7ad6a78bb98b1dda6f49abfa416a4022b7e333 - uri: huggingface://mradermacher/llama-3-tulu-2-8b-i1-GGUF/llama-3-tulu-2-8b.i1-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "llama-3-tulu-2-dpo-70b-i1" - icon: https://huggingface.co/datasets/allenai/blog-images/resolve/main/tulu-v2/Tulu%20V2%20banner.png - urls: - - https://huggingface.co/allenai/llama-3-tulu-2-dpo-70b - - https://huggingface.co/mradermacher/llama-3-tulu-2-dpo-70b-i1-GGUF - description: | - Tulu is a series of language models that are trained to act as helpful assistants. Llama 3 Tulu V2 8B is a fine-tuned version of Llama 3 that was trained on a mix of publicly available, synthetic and human datasets. - overrides: - parameters: - model: llama-3-tulu-2-dpo-70b.i1-Q4_K_M.gguf - files: - - filename: llama-3-tulu-2-dpo-70b.i1-Q4_K_M.gguf - sha256: fc309bbdf1e2bdced954c4c8dc1f9a885c547017ee5e750bfde645af89e3d3a5 - uri: huggingface://mradermacher/llama-3-tulu-2-dpo-70b-i1-GGUF/llama-3-tulu-2-dpo-70b.i1-Q4_K_M.gguf -- !!merge <<: *llama3 - license: cc-by-nc-4.0 - name: "suzume-llama-3-8b-multilingual-orpo-borda-top25" - icon: https://cdn-uploads.huggingface.co/production/uploads/64b63f8ad57e02621dc93c8b/kWQSu02YfgYdUQqv4s5lq.png - urls: - - https://huggingface.co/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25 - - https://huggingface.co/RichardErkhov/lightblue_-_suzume-llama-3-8B-multilingual-orpo-borda-top25-gguf - description: | - This is Suzume ORPO, an ORPO trained fine-tune of the lightblue/suzume-llama-3-8B-multilingual model using our lightblue/mitsu dataset. - - We have trained several versions of this model using ORPO and so recommend that you use the best performing model from our tests, lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half. - - Note that this model has a non-commerical license as we used the Command R and Command R+ models to generate our training data for this model (lightblue/mitsu). - - We are currently working on a developing a commerically usable model, so stay tuned for that! - overrides: - parameters: - model: suzume-llama-3-8B-multilingual-orpo-borda-top25.Q4_K_M.gguf - files: - - filename: suzume-llama-3-8B-multilingual-orpo-borda-top25.Q4_K_M.gguf - sha256: ef75a02c5f38e14a8873c7989188dac6974851b4654279fe1921d2c8018cc388 - uri: huggingface://RichardErkhov/lightblue_-_suzume-llama-3-8B-multilingual-orpo-borda-top25-gguf/suzume-llama-3-8B-multilingual-orpo-borda-top25.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "calme-2.4-llama3-70b" - icon: https://huggingface.co/MaziyarPanahi/calme-2.4-llama3-70b/resolve/main/llama-3-merges.webp - urls: - - https://huggingface.co/MaziyarPanahi/calme-2.4-llama3-70b - - https://huggingface.co/mradermacher/calme-2.4-llama3-70b-GGUF - description: | - This model is a fine-tune (DPO) of meta-llama/Meta-Llama-3-70B-Instruct model. - overrides: - parameters: - model: calme-2.4-llama3-70b.Q4_K_M.gguf - files: - - filename: calme-2.4-llama3-70b.Q4_K_M.gguf - sha256: 0b44ac8a88395dfc60f1b9d3cfffc0ffef74ec0a302e610ef91fc787187568f2 - uri: huggingface://mradermacher/calme-2.4-llama3-70b-GGUF/calme-2.4-llama3-70b.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "meta-llama-3-instruct-8.9b-brainstorm-5x-form-11" - urls: - - https://huggingface.co/DavidAU/Meta-Llama-3-Instruct-8.9B-BRAINSTORM-5x-FORM-11-GGUF - description: | - Meta-Llama-3-8B Instruct (now at 8.9B) is an enhanced version of the LLM model, specifically designed for creative use cases such as story writing, roleplaying, and fiction. This model has been augmented through the "Brainstorm" process, which involves expanding and calibrating the reasoning center of the LLM to improve its performance in various creative tasks. The enhancements brought by this process include more detailed and nuanced descriptions, stronger prose, and a greater sense of immersion in the story. The model is capable of generating long and vivid content, with fewer clichés and more focused, coherent narratives. Users can provide more instructions and details to elicit stronger and more engaging responses from the model. The "Brainstorm" process has been tested on multiple LLM models, including Llama2, Llama3, and Mistral, as well as on individual models like Llama3 Instruct, Mistral Instruct, and custom fine-tuned models. - overrides: - parameters: - model: Meta-Llama-3-8B-Instruct-exp5-11-Q4_K_M.gguf - files: - - filename: Meta-Llama-3-8B-Instruct-exp5-11-Q4_K_M.gguf - sha256: 5dd81b8b809667d10036499affdd1461cf95af50b405cbc9f800b421a4b60e98 - uri: huggingface://DavidAU/Meta-Llama-3-Instruct-8.9B-BRAINSTORM-5x-FORM-11-GGUF/Meta-Llama-3-8B-Instruct-exp5-11-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "rp-naughty-v1.0c-8b" - urls: - - https://huggingface.co/QuantFactory/RP-Naughty-v1.0c-8b-GGUF - description: | - This model was merged using the Model Stock merge method using aifeifei798/llama3-8B-DarkIdol-2.2-Uncensored-1048K as a base. - The following models were included in the merge: - - underwoods/adventure-8b - Khetterman/Multilingual-SaigaSuzume-8B - underwoods/writer-8b - Khetterman/Kosmos-8B-v1 - Khetterman/CursedMatrix-8B-v9 - overrides: - parameters: - model: RP-Naughty-v1.0c-8b.Q4_K_M.gguf - files: - - filename: RP-Naughty-v1.0c-8b.Q4_K_M.gguf - sha256: c344564d26d0c3d244d31cfeb103666eab37f9dee6678a2dbaf5bfcf4109d789 - uri: huggingface://QuantFactory/RP-Naughty-v1.0c-8b-GGUF/RP-Naughty-v1.0c-8b.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "bio-medical-llama-3-8b" - icon: https://cdn-uploads.huggingface.co/production/uploads/653f5b93cd52f288490edc83/zPMUugzfOiwTiRw88jm7T.jpeg - urls: - - https://huggingface.co/ContactDoctor/Bio-Medical-Llama-3-8B - - https://huggingface.co/QuantFactory/Bio-Medical-Llama-3-8B-GGUF - description: | - Bio-Medical-Llama-3-8B model is a specialized large language model designed for biomedical applications. It is finetuned from the meta-llama/Meta-Llama-3-8B-Instruct model using a custom dataset containing over 500,000 diverse entries. These entries include a mix of synthetic and manually curated data, ensuring high quality and broad coverage of biomedical topics. - - The model is trained to understand and generate text related to various biomedical fields, making it a valuable tool for researchers, clinicians, and other professionals in the biomedical domain. - overrides: - parameters: - model: Bio-Medical-Llama-3-8B.Q4_K_M.gguf - files: - - filename: Bio-Medical-Llama-3-8B.Q4_K_M.gguf - sha256: 672939e0487d02c55734132c25a59f26e4deaac7cd49445a7028f2291139edcc - uri: huggingface://QuantFactory/Bio-Medical-Llama-3-8B-GGUF/Bio-Medical-Llama-3-8B.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "triangulum-10b" - icon: https://cdn-uploads.huggingface.co/production/uploads/65bb837dbfb878f46c77de4c/By0OJ1lMvP5ZvVvfEGvz5.png - urls: - - https://huggingface.co/prithivMLmods/Triangulum-10B - - https://huggingface.co/mradermacher/Triangulum-10B-GGUF - description: | - Triangulum 10B is a collection of pretrained and instruction-tuned generative models, designed for multilingual applications. These models are trained using synthetic datasets based on long chains of thought, enabling them to perform complex reasoning tasks effectively. - Key Features - Foundation Model: Built upon LLaMA's autoregressive language model, leveraging an optimized transformer architecture for enhanced performance. - Instruction Tuning: Includes supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align model outputs with human preferences for helpfulness and safety. - Multilingual Support: Designed to handle multiple languages, ensuring broad applicability across diverse linguistic contexts. - Training Approach - Synthetic Datasets: Utilizes long chain-of-thought synthetic data to enhance reasoning capabilities. - Supervised Fine-Tuning (SFT): Aligns the model to specific tasks through curated datasets. - Reinforcement Learning with Human Feedback (RLHF): Ensures the model adheres to human values and safety guidelines through iterative training processes. - overrides: - parameters: - model: Triangulum-10B.Q4_K_M.gguf - files: - - filename: Triangulum-10B.Q4_K_M.gguf - sha256: dd071f99edf6b166044bf229cdeec19419c4c348e3fc3d6587cfcc55e6fb85fa - uri: huggingface://mradermacher/Triangulum-10B-GGUF/Triangulum-10B.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "opencrystal-l3-15b-v2.1-i1" - icon: https://huggingface.co/Darkknight535/OpenCrystal-15B-L3-v2/resolve/main/Rohma_2024-08-30%4023h37m38s.jpg - urls: - - https://huggingface.co/Darkknight535/OpenCrystal-L3-15B-v2.1 - - https://huggingface.co/mradermacher/OpenCrystal-L3-15B-v2.1-i1-GGUF - description: | - Automatically speaks as other NPCs. - Creative output. - Coherent responses. - Output feels similar to using Character.ai. - Improved adherence to prompts. - Reduced hallucinations (15B). - Capable of summarizing and generating image prompts well. - overrides: - parameters: - model: OpenCrystal-L3-15B-v2.1.i1-Q4_K_M.gguf - files: - - filename: OpenCrystal-L3-15B-v2.1.i1-Q4_K_M.gguf - sha256: b7db0a810771c856bd598b9a11f6aec6672019a4e18822af3a5957a03184296f - uri: huggingface://mradermacher/OpenCrystal-L3-15B-v2.1-i1-GGUF/OpenCrystal-L3-15B-v2.1.i1-Q4_K_M.gguf - &command-R url: "github:mudler/LocalAI/gallery/command-r.yaml@master" ### START Command-r name: "command-r-v01:q1_s" @@ -20454,18 +13529,6 @@ - filename: bark-small_weights-f16.bin uri: https://huggingface.co/Green-Sky/bark-ggml/resolve/main/bark-small_weights-f16.bin sha256: de1ece17e8319537b3a7909baebbd28affab23c942d5d57e648d622af4e2feaa -- !!merge <<: *qwen25 - name: "biomed-r1-32b-i1" - urls: - - https://huggingface.co/mradermacher/BioMed-R1-32B-i1-GGUF - description: "**BioMed-R1-32B** is a large-scale, medical-domain language model developed by the Zou Lab at Stanford University. Built upon the **Qwen2.5-32B-Instruct** base, it is specifically fine-tuned to enhance reasoning and factual accuracy in clinical and biomedical contexts. The model excels in handling complex medical questions, with a focus on self-correction, backtracking, and robust performance under adversarial conditions—key traits for reliable medical decision support.\n\nKey features:\n- **Base model**: Qwen/Qwen2.5-32B-Instruct\n- **Domain**: Specialized for medical reasoning and knowledge retrieval\n- **Training**: Supervised fine-tuning and reinforcement learning on reasoning-heavy and adversarial examples\n- **Performance**: Top-tier among similarly sized biomedical LLMs, particularly on reasoning-intensive tasks\n- **Use case**: Clinical reasoning, diagnostic support, medical QA, and research\n\nAvailable via Hugging Face, it can be deployed using vLLM, SGLang, or standard Transformers pipelines. Ideal for researchers and developers working in healthcare AI.\n\n> \U0001F4CC **Citation**: Thapa et al., *Disentangling Reasoning and Knowledge in Medical Large Language Models*, arXiv:2505.11462 (2025)\n" - overrides: - parameters: - model: BioMed-R1-32B.i1-Q4_K_M.gguf - files: - - filename: BioMed-R1-32B.i1-Q4_K_M.gguf - sha256: 345fd28914871d0c3369ba06512c7b1fe93eb88c67c655007f8cfc4671021450 - uri: huggingface://mradermacher/BioMed-R1-32B-i1-GGUF/BioMed-R1-32B.i1-Q4_K_M.gguf - !!merge <<: *mistral03 name: "tlacuilo-12b" urls: @@ -20563,64 +13626,6 @@ - filename: Magidonia-24B-v4.2.0.i1-Q4_K_M.gguf sha256: f89fbe09ea9edd4b91aa89516cbfaabdf0d956e0458cfc4b44b8054a1546b559 uri: huggingface://mradermacher/Magidonia-24B-v4.2.0-i1-GGUF/Magidonia-24B-v4.2.0.i1-Q4_K_M.gguf -- !!merge <<: *qwen25coder - name: "strand-rust-coder-14b-v1-i1" - urls: - - https://huggingface.co/mradermacher/Strand-Rust-Coder-14B-v1-i1-GGUF - description: | - **Strand-Rust-Coder-14B-v1** is a high-performance, domain-specialized large language model fine-tuned for Rust programming. Built on **Qwen2.5-Coder-14B-Instruct** using LoRA and trained on a 191K-example synthetic dataset generated via peer-reviewed Swarm Inference, it achieves state-of-the-art performance on Rust-specific benchmarks — outperforming larger proprietary models like GPT-5 Codex on tasks such as code generation, test creation, and refactoring. - - Key highlights: - - **Specialized for Rust**: Optimized for ownership, lifetimes, and idiomatic syntax. - - **High accuracy**: 48% pass rate on hold-out test sets and 43% on RustEvo². - - **Efficient training**: Fine-tuned with LoRA (r=64, α=16), updating only ~1% of parameters. - - **Extended context**: 32,768 tokens for complex code reasoning. - - **Open & decentralized**: Part of the Fortytwo Network, enabling collaborative, peer-validated inference. - - Ideal for developers, AI code assistants, and researchers working on Rust. Available in GGUF quantized versions for local deployment (Q4_K_M to Q8_0). - - > **Base model**: Qwen2.5-Coder-14B-Instruct - > **Dataset**: Fortytwo-Network/Strandset-Rust-v1 - > **License**: Apache 2.0 - > **Repository**: [Fortytwo-Network/Strand-Rust-Coder-14B-v1](https://huggingface.co/Fortytwo-Network/Strand-Rust-Coder-14B-v1) - overrides: - parameters: - model: Strand-Rust-Coder-14B-v1.i1-Q4_K_M.gguf - files: - - filename: Strand-Rust-Coder-14B-v1.i1-Q4_K_M.gguf - sha256: 56bacda1fd25cdebd349df839c3ef3248a3e7cb98da6ce1ee1f4e8c2153e777e - uri: huggingface://mradermacher/Strand-Rust-Coder-14B-v1-i1-GGUF/Strand-Rust-Coder-14B-v1.i1-Q4_K_M.gguf -- !!merge <<: *phi4 - name: "scholasticlogicai-phi-4-i1" - urls: - - https://huggingface.co/mradermacher/ScholasticLogicAI-Phi-4-i1-GGUF - description: | - **Model Name:** ScholasticLogicAI-Phi-4 - **Base Model:** Microsoft Phi-4 - **Repository:** [Berthi-Rohar/ScholasticLogicAI-Phi-4](https://huggingface.co/Berthi-Rohar/ScholasticLogicAI-Phi-4) - **Description:** - A fine-tuned variant of Microsoft's Phi-4, specialized in logical reasoning using Aristotelian and scholastic logic frameworks. This model is designed to perform structured, deductive reasoning tasks, making it ideal for philosophical, analytical, and formal logic applications. It is part of an experimental project aimed at exploring the integration of classical logic paradigms into modern language models. - - **Use Case:** - Best suited for tasks involving syllogistic reasoning, argument analysis, formal deduction, and logical consistency checks. Not intended for general conversation or casual use. - - **Key Features:** - - Built on the Phi-4 architecture for high efficiency and performance. - - Fine-tuned for logical coherence and structured reasoning. - - Supports chat template input for interactive reasoning sessions. - - **Note:** This model is experimental and may hallucinate or fail under complex or ambiguous prompts. Ideal for developers and researchers in logic, AI reasoning, and knowledge representation. - - **License:** [MIT](https://github.com/Berthi-Rohar/ScholasticLogicAI-Phi-4/blob/main/LICENSE) (check original Phi-4 for base license). - - *Quantized versions available via [mradermacher/ScholasticLogicAI-Phi-4-i1-GGUF](https://huggingface.co/mradermacher/ScholasticLogicAI-Phi-4-i1-GGUF) for local inference.* - overrides: - parameters: - model: ScholasticLogicAI-Phi-4.i1-Q4_K_M.gguf - files: - - filename: ScholasticLogicAI-Phi-4.i1-Q4_K_M.gguf - sha256: 6b1bb32b74979833b17cfe1b47c3bdd9f9b60ba283c486767e612ff33a073d9f - uri: huggingface://mradermacher/ScholasticLogicAI-Phi-4-i1-GGUF/ScholasticLogicAI-Phi-4.i1-Q4_K_M.gguf - !!merge <<: *mistral03 name: "cydonia-24b-v4.2.0-i1" urls: @@ -20658,46 +13663,6 @@ - filename: Aevum-0.6B-Finetuned.Q4_K_M.gguf sha256: 6904b789894a7dae459042a28318e70dbe222cb3e6f892f3fc42e591d4a341a3 uri: huggingface://mradermacher/Aevum-0.6B-Finetuned-GGUF/Aevum-0.6B-Finetuned.Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "positivedetox-qwen2.5-14b" - urls: - - https://huggingface.co/mradermacher/PositiveDetox-Qwen2.5-14B-GGUF - description: | - **Model Name:** PositiveDetox-Qwen2.5-14B - **Base Model:** Qwen2.5-14B-Instruct (finetuned from `unsloth/qwen2.5-14b-instruct-unsloth-bnb-4bit`) - **Developer:** theprint - **License:** Apache 2.0 - **Description:** A fine-tuned version of Qwen2.5-14B designed to reduce toxic positivity in AI-generated responses. Trained on the Positive Detox dataset, this model promotes balanced, authentic, and empathetic communication—avoiding overly cheerful or dismissive language in sensitive contexts. Ideal for applications where nuanced, human-centered responses are critical. - **Optimization:** Fine-tuned using Unsloth and Hugging Face TRL for accelerated training. - **Use Case:** Conversational AI, mental health support tools, ethical content generation, and any scenario requiring thoughtful, non-toxic dialogue. - - > *Note: The GGUF version available at `mradermacher/PositiveDetox-Qwen2.5-14B-GGUF` is a quantized variant of this model, optimized for local inference.* - overrides: - parameters: - model: PositiveDetox-Qwen2.5-14B.Q4_K_S.gguf - files: - - filename: PositiveDetox-Qwen2.5-14B.Q4_K_S.gguf - sha256: abd224325aea504a61fb749cc12649641165c33035b4e5923163387370878005 - uri: huggingface://mradermacher/PositiveDetox-Qwen2.5-14B-GGUF/PositiveDetox-Qwen2.5-14B.Q4_K_S.gguf -- !!merge <<: *qwen25 - name: "allenai_olmocr-2-7b-1025" - icon: https://cdn-uploads.huggingface.co/production/uploads/6734d6722769638944a5aa2e/DPsr3ZvRF9v-gdMa4EaHW.png - urls: - - https://huggingface.co/allenai/olmOCR-2-7B-1025 - - https://huggingface.co/bartowski/allenai_olmOCR-2-7B-1025-GGUF - description: | - This is a release of the olmOCR model that's fine tuned from Qwen2.5-VL-7B-Instruct using the olmOCR-mix-1025 dataset. It has been additionally fine tuned using GRPO RL training to boost its performance at math equations, tables, and other tricky OCR cases. - overrides: - mmproj: mmproj-allenai_olmOCR-2-7B-1025-f16.gguf - parameters: - model: allenai_olmOCR-2-7B-1025-Q4_K_M.gguf - files: - - filename: allenai_olmOCR-2-7B-1025-Q4_K_M.gguf - sha256: 347935fc4c16808cf97c7adc563da92c67ad90eeb0504a465ff9fa98d2794f3e - uri: huggingface://bartowski/allenai_olmOCR-2-7B-1025-GGUF/allenai_olmOCR-2-7B-1025-Q4_K_M.gguf - - filename: mmproj-allenai_olmOCR-2-7B-1025-f16.gguf - sha256: 78d1690a4d98051f527104c33796b79faa05f158d14645e19db91fac99bd1b04 - uri: https://huggingface.co/bartowski/allenai_olmOCR-2-7B-1025-GGUF/resolve/main/mmproj-allenai_olmOCR-2-7B-1025-f16.gguf - !!merge <<: *qwen3 name: "qwen-sea-lion-v4-32b-it-i1" urls: @@ -20825,18 +13790,6 @@ - filename: Logics-Qwen3-Math-4B.Q4_K_M.gguf sha256: 05528937a4cb05f5e8185e4e6bc5cb6f576f364c5482a4d9ee6a91302440ed07 uri: huggingface://mradermacher/Logics-Qwen3-Math-4B-GGUF/Logics-Qwen3-Math-4B.Q4_K_M.gguf -- !!merge <<: *llama3 - name: "spanish_rpg-3.2-1b" - urls: - - https://huggingface.co/Novaciano/Spanish_RPG-3.2-1B-GGUF - description: "**Model Name:** Spanish_RPG-3.2-1B\n**Base Model:** Llama 3.2 1B (via fine-tuning)\n**Repository:** [Novaciano/Spanish_RPG-3.2-1B](https://huggingface.co/Novaciano/Spanish_RPG-3.2-1B)\n**License:** Llama 3.2 (LLM)\n**Language:** Spanish (es)\n**Task:** Roleplay (NSFW/Adult Content)\n**Model Type:** Fine-tuned, Merge-based (Arcee Fusion)\n**Description:**\nA high-precision, Spanish-language roleplay model optimized for immersive, character-driven storytelling with NSFW content. Built on the foundation of *Alice-In-The-Dark-RP-NSFW-3.2-1B* and enhanced with code-generation data from *Llama-3.2-1B-GenerativePerturbations*, this model excels in generating natural, emotionally expressive, and coherent responses in roleplay formats — ideal for narrative, adult, and creative storytelling scenarios.\n\nDesigned for low-resource environments, it performs efficiently on CPUs, making it accessible for mobile and edge devices. Supports the classic internet roleplay format (`*action* dialogue *narration*`) and works seamlessly with KoboldAI, Koboldcpp, and llama.cpp.\n\n> \U0001F4CC *Note: This model contains uncensored, adult content and is not suitable for all audiences.*\n> \U0001F9EA *Intended as a prototype for testing and creative use — not for production deployment.*\n" - overrides: - parameters: - model: Spanish_RPG-3.2-1B-Q4_K_M.gguf - files: - - filename: Spanish_RPG-3.2-1B-Q4_K_M.gguf - sha256: 8238c55d3134aecdabffacad8837479bcf3f7454c145ea037325fa22d82373c3 - uri: huggingface://Novaciano/Spanish_RPG-3.2-1B-GGUF/Spanish_RPG-3.2-1B-Q4_K_M.gguf - !!merge <<: *qwen3 name: "john1604-ai-status-japanese-2025" urls: @@ -20924,18 +13877,6 @@ - filename: A2FM-32B-rl.Q4_K_S.gguf sha256: 930ff2241351322cc98a24f5aa46e7158757ca87f8fd2763d9ecc4a3ef9514ba uri: huggingface://mradermacher/A2FM-32B-rl-GGUF/A2FM-32B-rl.Q4_K_S.gguf -- !!merge <<: *qwen25 - name: "pokeeai.pokee_research_7b" - urls: - - https://huggingface.co/DevQuasar/PokeeAI.pokee_research_7b-GGUF - description: "**PokeeResearch-7B** is a 7-billion-parameter deep research agent developed by Pokee AI, designed for advanced, multi-step reasoning and autonomous research workflows. Built on the Qwen2.5-7B-Instruct foundation and fine-tuned using Reinforcement Learning from AI Feedback (RLAIF), it excels at complex, fact-grounded tasks such as information retrieval, cross-source verification, and synthesis across multiple research threads.\n\nKey features:\n- **Purpose-built for deep research**: Handles multi-hop queries with self-correction and structured reasoning.\n- **Trained on MiroRL-GenQA**: High-quality, reasoning-intensive question-answer pairs.\n- **State-of-the-art performance**: Outperforms other 7B models on benchmarks like GAIA, BrowseComp, and HotpotQA.\n- **Open-source & transparent**: Fully accessible via GitHub and Hugging Face, licensed under Apache 2.0.\n\nIdeal for researchers, developers, and enterprises seeking a reliable, scalable agent for scientific discovery, automated analysis, and knowledge synthesis.\n\n\U0001F449 *Explore the model:* [PokeeAI/pokee_research_7b](https://huggingface.co/PokeeAI/pokee_research_7b)\n\U0001F4DA *Learn more:* [GitHub Repository](https://github.com/Pokee-AI/PokeeResearchOSS)\n" - overrides: - parameters: - model: PokeeAI.pokee_research_7b.Q4_K_M.gguf - files: - - filename: PokeeAI.pokee_research_7b.Q4_K_M.gguf - sha256: 4cf6e52b51f3595631b6e17ad2d5a5c8499d2e646cf9452dd32fb5ff7bbb37dd - uri: huggingface://DevQuasar/PokeeAI.pokee_research_7b-GGUF/PokeeAI.pokee_research_7b.Q4_K_M.gguf - !!merge <<: *gptoss name: "gpt-oss-20b-esper3.1-i1" urls: @@ -21018,39 +13959,6 @@ - filename: gpt-oss-20b-claude-4-distill.i1-Q4_K_M.gguf sha256: 333bdbde0a933b62f2050f384879bfaea7db7a5fbb26ee151fbbdc3c95f510dd uri: huggingface://mradermacher/gpt-oss-20b-claude-4-distill-i1-GGUF/gpt-oss-20b-claude-4-distill.i1-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "70b_book_stock-i1" - urls: - - https://huggingface.co/mradermacher/70B_Book_stock-i1-GGUF - description: | - **Model Name:** Book_stock - **Base Model:** huihui-ai/Llama-3.3-70B-Instruct-abliterated - **Type:** Fine-tuned, merged LLM (70B parameters) - **Framework:** Transformers (Hugging Face) - **Merge Method:** Model Stock (via MergeKit) - **Description:** - A high-performance, 70-billion-parameter language model fine-tuned and merged using the *Model Stock* technique. Built upon the **Llama-3.3-70B-Instruct-abliterated** base, this model integrates multiple specialized LoRA adapters (Book_RPv05, Book_RPv1, Book_RPv15, Book_RPvfinal) to enhance instruction-following, reasoning, and domain-specific knowledge—particularly in book-related content, narrative reasoning, and creative writing. - - Optimized for strong performance in dialogue and creative tasks, with support for Llama 3-style chat templates and BFloat16 precision. Ideal for applications requiring deep reasoning, long-form generation, and nuanced instruction handling. - - **Use Case:** Creative writing, content generation, dialogue systems, educational tools, and advanced reasoning tasks. - - **Note:** This model is not a direct release by the original authors but a community-driven merge; the original base model is available at [huihui-ai/Llama-3.3-70B-Instruct-abliterated](https://huggingface.co/huihui-ai/Llama-3.3-70B-Instruct-abliterated). - overrides: - parameters: - model: mradermacher/70B_Book_stock-i1-GGUF -- !!merge <<: *qwen25 - name: "chemdfm-r-14b-i1" - urls: - - https://huggingface.co/mradermacher/ChemDFM-R-14B-i1-GGUF - description: "**ChemDFM-R-14B** is a specialized large language model designed for advanced chemical reasoning, developed by OpenDFM. Built upon the Qwen2.5-14B base model, it is fine-tuned using a novel mix-sourced distillation approach and domain-specific reinforcement learning to excel in chemistry-related tasks.\n\nKey features:\n- Trained on *ChemFG*, a comprehensive dataset of atomized chemical knowledge (e.g., functional group detection and reaction changes).\n- Generates interpretable, rationale-driven responses with clear reasoning steps.\n- Optimized for tasks like molecule analysis, reaction prediction, and chemical reasoning.\n- Supports both English and Chinese.\n\nThis model stands out as a state-of-the-art reasoning system in chemistry, offering transparency, reliability, and strong performance across diverse benchmarks. Ideal for researchers and professionals in drug discovery, materials science, and chemical education.\n\n\U0001F517 *Paper:* [ChemDFM-R: A Chemical Reasoning LLM Enhanced with Atomized Chemical Knowledge](https://arxiv.org/abs/2507.21990)\n\U0001F517 *Model:* [OpenDFM/ChemDFM-R-14B](https://huggingface.co/OpenDFM/ChemDFM-R-14B)\n" - overrides: - parameters: - model: ChemDFM-R-14B.i1-Q4_K_M.gguf - files: - - filename: ChemDFM-R-14B.i1-Q4_K_M.gguf - sha256: c5090dd547154712865fa26389790cad93f23a7b5cba90f5efc8b667ad61cd06 - uri: huggingface://mradermacher/ChemDFM-R-14B-i1-GGUF/ChemDFM-R-14B.i1-Q4_K_M.gguf - !!merge <<: *qwen3 name: "qwen3-deckard-large-almost-human-6b-iii-160-omega" urls: @@ -21125,48 +14033,6 @@ - filename: wraith-8b.i1-Q4_K_M.gguf sha256: 180469f9de3e1b5a77b7cf316899dbe4782bd5e6d4f161fb18ea95aa612e6926 uri: huggingface://mradermacher/wraith-8b-i1-GGUF/wraith-8b.i1-Q4_K_M.gguf -- !!merge <<: *qwen25 - name: "pokee_research_7b" - urls: - - https://huggingface.co/Mungert/pokee_research_7b-GGUF - description: | - **Model Name:** Qwen2.5-7B-Instruct - **Base Model:** Qwen/Qwen2.5-7B - **Model Type:** Instruction-tuned large language model (7.61B parameters) - **License:** Apache 2.0 - - **Description:** - Qwen2.5-7B-Instruct is a powerful, instruction-following language model designed for advanced reasoning, coding, and multi-turn dialogue. Built on the Qwen2.5 architecture, it delivers state-of-the-art performance in understanding complex prompts, generating long-form text (up to 8K tokens), and handling structured outputs like JSON. It supports multilingual communication (29+ languages), including English, Chinese, and European languages, and excels in long-context tasks with support for up to 131,072 tokens. - - Ideal for research, creative writing, coding assistance, and agent-based workflows, this model is optimized for real-world applications requiring robustness, accuracy, and scalability. - - **Key Features:** - - 7.61 billion parameters - - Context length: 131K tokens (supports long-context via YaRN) - - Strong performance in math, coding, and factual reasoning - - Fine-tuned for instruction following and chat interactions - - Deployable with Hugging Face Transformers, vLLM, and llama.cpp - - **Use Case:** - Perfect for developers, researchers, and enterprises building intelligent assistants, autonomous agents, or content generation systems. - - **Citation:** - ```bibtex - @misc{qwen2.5, - title = {Qwen2.5: A Party of Foundation Models}, - url = {https://qwenlm.github.io/blog/qwen2.5/}, - author = {Qwen Team}, - month = {September}, - year = {2024} - } - ``` - overrides: - parameters: - model: pokee_research_7b-q4_k_m.gguf - files: - - filename: pokee_research_7b-q4_k_m.gguf - sha256: 670706711d82fcdbae951fda084f77c9c479edf3eb5d8458d1cfddd46cf4b767 - uri: huggingface://Mungert/pokee_research_7b-GGUF/pokee_research_7b-q4_k_m.gguf - !!merge <<: *qwen3 name: "deepkat-32b-i1" urls: @@ -21336,37 +14202,6 @@ - filename: financial-gpt-oss-20b-q8.i1-Q4_K_M.gguf sha256: 14586673de2a769f88bd51f88464b9b1f73d3ad986fa878b2e0c1473f1c1fc59 uri: huggingface://mradermacher/financial-gpt-oss-20b-q8-i1-GGUF/financial-gpt-oss-20b-q8.i1-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "qwen3-grand-horror-light-1.7b" - urls: - - https://huggingface.co/mradermacher/Qwen3-Grand-Horror-Light-1.7B-GGUF - description: | - **Model Name:** Qwen3-Grand-Horror-Light-1.7B - **Base Model:** qingy2024/Qwen3-VLTO-1.7B-Instruct - **Model Type:** Fine-tuned Language Model (Text Generation) - **Size:** 1.7B parameters - **License:** Apache 2.0 - **Language:** English - **Use Case:** Horror storytelling, creative writing, roleplay, scene generation - **Fine-Tuned On:** Custom horror dataset (`DavidAU/horror-nightmare1`) - **Training Method:** Fine-tuned via Unsloth - **Key Features:** - - Specialized in generating atmospheric, intense horror content with elements of madness, gore, and suspense - - Optimized for roleplay and narrative generation with low to medium horror intensity - - Supports high-quality output across multiple quantization levels (Q2_K to Q8_0, f16) - - Designed for use with tools like KoboldCpp, oobabooga/text-generation-webui, and Silly Tavern - - Recommended settings: Temperature 0.4–1.2, Repetition penalty 1.1, Smoothing factor 1.5 for smoother output - - **Note:** This model is a fine-tuned variant of the Qwen3 series, not a quantized version. The original base model is available at [qingy2024/Qwen3-VLTO-1.7B-Instruct](https://huggingface.co/qingy2024/Qwen3-VLTO-1.7B-Instruct) and was further adapted for horror-themed creative writing. - - **Ideal For:** Creators, writers, and roleplayers seeking a compact, expressive model for immersive horror storytelling. - overrides: - parameters: - model: Qwen3-Grand-Horror-Light-1.7B.Q4_K_M.gguf - files: - - filename: Qwen3-Grand-Horror-Light-1.7B.Q4_K_M.gguf - sha256: cbbb0c5f6874130a8ae253377fdc7ad25fa2c1e9bb45f1aaad88db853ef985dc - uri: huggingface://mradermacher/Qwen3-Grand-Horror-Light-1.7B-GGUF/Qwen3-Grand-Horror-Light-1.7B.Q4_K_M.gguf - !!merge <<: *qwen3 name: "reform-32b-i1" urls: @@ -21543,26 +14378,3 @@ - filename: Melinoe-30B-A3B-Thinking.i1-Q4_K_M.gguf sha256: 7b9e8fe00faf7803e440542be01974c05b0dcb8b75e1f1c25638027bfb75dbf3 uri: huggingface://mradermacher/Melinoe-30B-A3B-Thinking-i1-GGUF/Melinoe-30B-A3B-Thinking.i1-Q4_K_M.gguf -- !!merge <<: *llama3 - name: "maenad-70b-i1" - urls: - - https://huggingface.co/mradermacher/Maenad-70B-i1-GGUF - description: | - **Maenad-70B** is a large language model designed for creative writing, roleplay (RP/ERP), and narrative generation. Built using the DELLA merge method, it combines three distinct 70B-scale models—**Entropicengine/Pinecone-Titan-70b**, **KaraKaraWitch/GoldDiamondGold-L33-70b**, and **Babsie/Sao10K-Euryale-v2.1-70B-labmod**—into a single, cohesive personality-rich model. - - Key features: - - **Highly creative and expressive**: Excels in storytelling, humor, and character-driven dialogue. - - **Long-context capable**: Optimized for extended narrative sessions and complex world-building. - - **Open-source & customizable**: Based on Llama3 licensing; compatible with tools like mergekit and GGUF for local deployment. - - **Community-validated**: Tested in production environments and praised for strong performance and personality retention. - - Ideal for writers, game masters, and AI enthusiasts seeking a bold, imaginative, and dynamic language model. Not suitable for all audiences due to mature content (NSFW, 18+). - - *Base model: Entropicengine/Pinecone-Titan-70b | Merge method: DELLA | License: Llama3* - overrides: - parameters: - model: Maenad-70B.i1-Q4_K_M.gguf - files: - - filename: Maenad-70B.i1-Q4_K_M.gguf - sha256: dd5615ba1ab4ce2a3614afd547e0457fc14c7182de0d2e5f80d84323ee53ec1f - uri: huggingface://mradermacher/Maenad-70B-i1-GGUF/Maenad-70B.i1-Q4_K_M.gguf