From 1ab91edc080dce0114eebae8eaa55d5df7e85ba2 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 22 Dec 2025 22:53:29 +0000 Subject: [PATCH] chore(gallery): cleanup old (superseded) archs Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 1257 -------------------------------------------- 1 file changed, 1257 deletions(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index 048c661b4..ee38b3b00 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -16059,753 +16059,6 @@ - filename: Einstein-v6.1-Llama3-8B-Q4_K_M.gguf sha256: 447587bd8f60d9050232148d34fdb2d88b15b2413fd7f8e095a4606ec60b45bf uri: huggingface://bartowski/Einstein-v6.1-Llama3-8B-GGUF/Einstein-v6.1-Llama3-8B-Q4_K_M.gguf -- &gemma - url: "github:mudler/LocalAI/gallery/gemma.yaml@master" - name: "gemma-2b" - icon: https://avatars.githubusercontent.com/u/1342004 - license: gemma - urls: - - https://ai.google.dev/gemma/docs - - https://huggingface.co/mlabonne/gemma-2b-GGUF - description: | - Open source LLM from Google - tags: - - llm - - gguf - - gpu - - cpu - - gemma - overrides: - parameters: - model: gemma-2b.Q4_K_M.gguf - files: - - filename: gemma-2b.Q4_K_M.gguf - sha256: 37d50c21ef7847926204ad9b3007127d9a2722188cfd240ce7f9f7f041aa71a5 - uri: huggingface://mlabonne/gemma-2b-GGUF/gemma-2b.Q4_K_M.gguf -- !!merge <<: *gemma - name: "firefly-gemma-7b-iq-imatrix" - icon: "https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/SrOekTxdpnxHyWWmMiAvc.jpeg" - urls: - - https://huggingface.co/Lewdiculous/firefly-gemma-7b-GGUF-IQ-Imatrix - - https://huggingface.co/YeungNLP/firefly-gemma-7b - description: | - firefly-gemma-7b is trained based on gemma-7b to act as a helpful and harmless AI assistant. We use Firefly to train the model on a single V100 GPU with QLoRA. - overrides: - parameters: - model: firefly-gemma-7b-Q4_K_S-imatrix.gguf - files: - - filename: firefly-gemma-7b-Q4_K_S-imatrix.gguf - sha256: 622e0b8e4f12203cc40c7f87915abf99498c2e0582203415ca236ea37643e428 - uri: huggingface://Lewdiculous/firefly-gemma-7b-GGUF-IQ-Imatrix/firefly-gemma-7b-Q4_K_S-imatrix.gguf -- !!merge <<: *gemma - name: "gemma-1.1-7b-it" - urls: - - https://huggingface.co/bartowski/gemma-1.1-7b-it-GGUF - - https://huggingface.co/google/gemma-1.1-7b-it - description: | - This is Gemma 1.1 7B (IT), an update over the original instruction-tuned Gemma release. - - Gemma 1.1 was trained using a novel RLHF method, leading to substantial gains on quality, coding capabilities, factuality, instruction following and multi-turn conversation quality. We also fixed a bug in multi-turn conversations, and made sure that model responses don't always start with "Sure,". - overrides: - parameters: - model: gemma-1.1-7b-it-Q4_K_M.gguf - files: - - filename: gemma-1.1-7b-it-Q4_K_M.gguf - sha256: 47821da72ee9e80b6fd43c6190ad751b485fb61fa5664590f7a73246bcd8332e - uri: huggingface://bartowski/gemma-1.1-7b-it-GGUF/gemma-1.1-7b-it-Q4_K_M.gguf -- !!merge <<: *gemma - name: "gemma-2-27b-it" - urls: - - https://huggingface.co/google/gemma-2-27b-it - - https://huggingface.co/bartowski/gemma-2-27b-it-GGUF - description: | - Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights for both pre-trained variants and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as a laptop, desktop or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone. - overrides: - parameters: - model: gemma-2-27b-it-Q4_K_M.gguf - files: - - filename: gemma-2-27b-it-Q4_K_M.gguf - uri: huggingface://bartowski/gemma-2-27b-it-GGUF/gemma-2-27b-it-Q4_K_M.gguf - sha256: 503a87ab47c9e7fb27545ec8592b4dc4493538bd47b397ceb3197e10a0370d23 -- !!merge <<: *gemma - name: "gemma-2-9b-it" - urls: - - https://huggingface.co/google/gemma-2-9b-it - - https://huggingface.co/bartowski/gemma-2-9b-it-GGUF - description: | - Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights for both pre-trained variants and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as a laptop, desktop or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone. - overrides: - parameters: - model: gemma-2-9b-it-Q4_K_M.gguf - files: - - filename: gemma-2-9b-it-Q4_K_M.gguf - uri: huggingface://bartowski/gemma-2-9b-it-GGUF/gemma-2-9b-it-Q4_K_M.gguf - sha256: 13b2a7b4115bbd0900162edcebe476da1ba1fc24e718e8b40d32f6e300f56dfe -- !!merge <<: *gemma - name: "tess-v2.5-gemma-2-27b-alpha" - urls: - - https://huggingface.co/migtissera/Tess-v2.5-Gemma-2-27B-alpha - - https://huggingface.co/bartowski/Tess-v2.5-Gemma-2-27B-alpha-GGUF - icon: https://huggingface.co/migtissera/Tess-v2.5-Qwen2-72B/resolve/main/Tess-v2.5.png - description: | - Great at reasoning, but woke as fuck! This is a fine-tune over the Gemma-2-27B-it, since the base model fine-tuning is not generating coherent content. - - Tess-v2.5 is the latest state-of-the-art model in the Tess series of Large Language Models (LLMs). Tess, short for Tesoro (Treasure in Italian), is the flagship LLM series created by Migel Tissera. Tess-v2.5 brings significant improvements in reasoning capabilities, coding capabilities and mathematics - overrides: - parameters: - model: Tess-v2.5-Gemma-2-27B-alpha-Q4_K_M.gguf - files: - - filename: Tess-v2.5-Gemma-2-27B-alpha-Q4_K_M.gguf - uri: huggingface://bartowski/Tess-v2.5-Gemma-2-27B-alpha-GGUF/Tess-v2.5-Gemma-2-27B-alpha-Q4_K_M.gguf - sha256: d7be7092d28aefbdcd1ee4f4d8503d169d0a649f763e169d4b179aef20d69c21 -- !!merge <<: *gemma - name: "gemma2-9b-daybreak-v0.5" - urls: - - https://huggingface.co/crestf411/gemma2-9B-daybreak-v0.5 - - https://huggingface.co/Vdr1/gemma2-9B-daybreak-v0.5-GGUF-Imatrix-IQ - description: | - THIS IS A PRE-RELEASE. BEGONE. - - Beware, depraved. Not suitable for any audience. - - Dataset curation to remove slop-perceived expressions continues. Unfortunately base models (which this is merged on top of) are generally riddled with "barely audible"s and "couldn't help"s and "shivers down spines" etc. - overrides: - parameters: - model: gemma2-9B-daybreak-v0.5-Q4_K_M-imat.gguf - files: - - filename: gemma2-9B-daybreak-v0.5-Q4_K_M-imat.gguf - uri: huggingface://Vdr1/gemma2-9B-daybreak-v0.5-GGUF-Imatrix-IQ/gemma2-9B-daybreak-v0.5-Q4_K_M-imat.gguf - sha256: 6add4d12052918986af935d686773e4e89fddd1bbf7941911cf3fbeb1b1862c0 -- !!merge <<: *gemma - name: "gemma-2-9b-it-sppo-iter3" - urls: - - https://huggingface.co/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3 - - https://huggingface.co/bartowski/Gemma-2-9B-It-SPPO-Iter3-GGUF - description: | - Self-Play Preference Optimization for Language Model Alignment (https://arxiv.org/abs/2405.00675) - Gemma-2-9B-It-SPPO-Iter3 - - This model was developed using Self-Play Preference Optimization at iteration 3, based on the google/gemma-2-9b-it architecture as starting point. We utilized the prompt sets from the openbmb/UltraFeedback dataset, splited to 3 parts for 3 iterations by snorkelai/Snorkel-Mistral-PairRM-DPO-Dataset. All responses used are synthetic. - overrides: - parameters: - model: Gemma-2-9B-It-SPPO-Iter3-Q4_K_M.gguf - files: - - filename: Gemma-2-9B-It-SPPO-Iter3-Q4_K_M.gguf - uri: huggingface://bartowski/Gemma-2-9B-It-SPPO-Iter3-GGUF/Gemma-2-9B-It-SPPO-Iter3-Q4_K_M.gguf - sha256: c04482b442f05b784ab33af30caa0ea0645deb67fb359d3fad4932f4bb04e12d -- !!merge <<: *gemma - name: "smegmma-9b-v1" - icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/RSuc5p9Sm6CYj6lGOxvx4.gif - urls: - - https://huggingface.co/TheDrummer/Smegmma-9B-v1 - - https://huggingface.co/bartowski/Smegmma-9B-v1-GGUF - description: "Smegmma 9B v1 \U0001F9C0\n\nThe sweet moist of Gemma 2, unhinged.\n\nsmeg - ghem - mah\n\nAn eRP model that will blast you with creamy moist. Finetuned by yours truly.\n\nThe first Gemma 2 9B RP finetune attempt!\nWhat's New?\n\n Engaging roleplay\n Less refusals / censorship\n Less commentaries / summaries\n More willing AI\n Better formatting\n Better creativity\n Moist alignment\n\nNotes\n\n Refusals still exist, but a couple of re-gens may yield the result you want\n Formatting and logic may be weaker at the start\n Make sure to start strong\n May be weaker with certain cards, YMMV and adjust accordingly!\n" - overrides: - parameters: - model: Smegmma-9B-v1-Q4_K_M.gguf - files: - - filename: Smegmma-9B-v1-Q4_K_M.gguf - uri: huggingface://bartowski/Smegmma-9B-v1-GGUF/Smegmma-9B-v1-Q4_K_M.gguf - sha256: abd9da0a6bf5cbc0ed6bb0d7e3ee7aea3f6b1edbf8c64e51d0fa25001975aed7 -- !!merge <<: *gemma - name: "smegmma-deluxe-9b-v1" - icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/RSuc5p9Sm6CYj6lGOxvx4.gif - urls: - - https://huggingface.co/TheDrummer/Smegmma-Deluxe-9B-v1 - - https://huggingface.co/bartowski/Smegmma-Deluxe-9B-v1-GGUF - description: "Smegmma Deluxe 9B v1 \U0001F9C0\n\nThe sweet moist of Gemma 2, unhinged.\n\nsmeg - ghem - mah\n\nAn eRP model that will blast you with creamy moist. Finetuned by yours truly.\n\nThe first Gemma 2 9B RP finetune attempt!\n\nWhat's New?\n\n Engaging roleplay\n Less refusals / censorship\n Less commentaries / summaries\n More willing AI\n Better formatting\n Better creativity\n Moist alignment\n" - overrides: - parameters: - model: Smegmma-Deluxe-9B-v1-Q4_K_M.gguf - files: - - filename: Smegmma-Deluxe-9B-v1-Q4_K_M.gguf - uri: huggingface://bartowski/Smegmma-Deluxe-9B-v1-GGUF/Smegmma-Deluxe-9B-v1-Q4_K_M.gguf - sha256: 732ecb253ea0115453438fc1f4e3e31507719ddcf81890a86ad1d734beefdb6f -- !!merge <<: *gemma - name: "tiger-gemma-9b-v1-i1" - icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/A97OlLKeT4XOnv4IG1b6m.png - urls: - - https://huggingface.co/TheDrummer/Tiger-Gemma-9B-v1 - - https://huggingface.co/mradermacher/Tiger-Gemma-9B-v1-i1-GGUF - description: | - Tiger Gemma 9B v1 - - Decensored Gemma 9B. No refusals so far. No apparent brain damage. - - In memory of Tiger - overrides: - parameters: - model: Tiger-Gemma-9B-v1.i1-Q4_K_M.gguf - files: - - filename: Tiger-Gemma-9B-v1.i1-Q4_K_M.gguf - sha256: ef10accfee8023b31def5425bf591bf1f0203090f3dd851cd3f37bb235324383 - uri: huggingface://mradermacher/Tiger-Gemma-9B-v1-i1-GGUF/Tiger-Gemma-9B-v1.i1-Q4_K_M.gguf -- !!merge <<: *gemma - name: "hodachi-ezo-humanities-9b-gemma-2-it" - icon: https://cdn-uploads.huggingface.co/production/uploads/657e900beaad53ff67ba84db/0OYFqT8kACowa9bY1EZF6.png - urls: - - https://huggingface.co/HODACHI/EZO-Humanities-9B-gemma-2-it - - https://huggingface.co/mmnga/HODACHI-EZO-Humanities-9B-gemma-2-it-gguf - description: | - This model is based on Gemma-2-9B-it, specially tuned to enhance its performance in Humanities-related tasks. While maintaining its strong foundation in Japanese language processing, it has been optimized to excel in areas such as literature, philosophy, history, and cultural studies. This focused approach allows the model to provide deeper insights and more nuanced responses in Humanities fields, while still being capable of handling a wide range of global inquiries. - - Gemma-2-9B-itをベースとして、人文科学(Humanities)関連タスクでの性能向上に特化したチューニングを施したモデルです。日本語処理の強固な基盤を維持しつつ、文学、哲学、歴史、文化研究などの分野で卓越した能力を発揮するよう最適化されています。この焦点を絞ったアプローチにより、人文科学分野でより深い洞察と繊細な応答を提供しながら、同時に幅広いグローバルな問い合わせにも対応できる能力を備えています。 - overrides: - parameters: - model: HODACHI-EZO-Humanities-9B-gemma-2-it-Q4_K_M.gguf - files: - - filename: HODACHI-EZO-Humanities-9B-gemma-2-it-Q4_K_M.gguf - sha256: 11606130206347355785f5a2720ff2fa671ca7fbe2af3fb4c34b508389952424 - uri: huggingface://mmnga/HODACHI-EZO-Humanities-9B-gemma-2-it-gguf/HODACHI-EZO-Humanities-9B-gemma-2-it-Q4_K_M.gguf -- !!merge <<: *gemma - name: "ezo-common-9b-gemma-2-it" - icon: https://cdn-uploads.huggingface.co/production/uploads/657e900beaad53ff67ba84db/0OYFqT8kACowa9bY1EZF6.png - urls: - - https://huggingface.co/HODACHI/EZO-Common-9B-gemma-2-it - - https://huggingface.co/QuantFactory/EZO-Common-9B-gemma-2-it-GGUF - description: | - This model is based on Gemma-2-9B-it, enhanced with multiple tuning techniques to improve its general performance. While it excels in Japanese language tasks, it's designed to meet diverse needs globally. - - Gemma-2-9B-itをベースとして、複数のチューニング手法を採用のうえ、汎用的に性能を向上させたモデルです。日本語タスクに優れつつ、世界中の多様なニーズに応える設計となっています。 - overrides: - parameters: - model: EZO-Common-9B-gemma-2-it.Q4_K_M.gguf - files: - - filename: EZO-Common-9B-gemma-2-it.Q4_K_M.gguf - sha256: 57678b1828673dccb15f76e52b00672c74aa6169421bbb8620b8955955322cfd - uri: huggingface://QuantFactory/EZO-Common-9B-gemma-2-it-GGUF/EZO-Common-9B-gemma-2-it.Q4_K_M.gguf -- !!merge <<: *gemma - name: "big-tiger-gemma-27b-v1" - icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/A97OlLKeT4XOnv4IG1b6m.png - urls: - - https://huggingface.co/TheDrummer/Big-Tiger-Gemma-27B-v1 - - https://huggingface.co/TheDrummer/Big-Tiger-Gemma-27B-v1-GGUF - description: | - Big Tiger Gemma 27B v1 is a Decensored Gemma 27B model with no refusals, except for some rare instances from the 9B model. It does not appear to have any brain damage. The model is available from various sources, including Hugging Face, and comes in different variations such as GGUF, iMatrix, and EXL2. - overrides: - parameters: - model: Big-Tiger-Gemma-27B-v1c-Q4_K_M.gguf - files: - - filename: Big-Tiger-Gemma-27B-v1c-Q4_K_M.gguf - sha256: c5fc5605d36ae280c1c908c9b4bcb12b28abbe2692f317edeb83ab1104657fe5 - uri: huggingface://TheDrummer/Big-Tiger-Gemma-27B-v1-GGUF/Big-Tiger-Gemma-27B-v1c-Q4_K_M.gguf -- !!merge <<: *gemma - name: "gemma-2b-translation-v0.150" - urls: - - https://huggingface.co/lemon-mint/gemma-2b-translation-v0.150 - - https://huggingface.co/RichardErkhov/lemon-mint_-_gemma-2b-translation-v0.150-gguf - description: | - Original model: lemon-mint/gemma-ko-1.1-2b-it - Evaluation metrics: Eval Loss, Train Loss, lr, optimizer, lr_scheduler_type. - Prompt Template: - user - Translate into Korean: [input text] - model - [translated text in Korean] - user - Translate into English: [Korean text] - model - [translated text in English] - Model features: - * Developed by: lemon-mint - * Model type: Gemma - * Languages (NLP): English - * License: Gemma Terms of Use - * Finetuned from model: lemon-mint/gemma-ko-1.1-2b-it - overrides: - parameters: - model: gemma-2b-translation-v0.150.Q4_K_M.gguf - files: - - filename: gemma-2b-translation-v0.150.Q4_K_M.gguf - sha256: dcde67b83168d2e7ca835cf9a7a4dcf38b41b9cefe3cbc997c71d2741c08cd25 - uri: huggingface://RichardErkhov/lemon-mint_-_gemma-2b-translation-v0.150-gguf/gemma-2b-translation-v0.150.Q4_K_M.gguf -- !!merge <<: *gemma - name: "emo-2b" - urls: - - https://huggingface.co/OEvortex/EMO-2B - - https://huggingface.co/RichardErkhov/OEvortex_-_EMO-2B-gguf - description: | - EMO-2B: Emotionally Intelligent Conversational AI - - Overview: - EMO-2B is a state-of-the-art conversational AI model with 2.5 billion parameters, designed to engage in emotionally resonant dialogue. Building upon the success of EMO-1.5B, this model has been further fine-tuned on an extensive corpus of emotional narratives, enabling it to perceive and respond to the emotional undertones of user inputs with exceptional empathy and emotional intelligence. - - Key Features: - - - Advanced Emotional Intelligence: With its increased capacity, EMO-2B demonstrates an even deeper understanding and generation of emotional language, allowing for more nuanced and contextually appropriate emotional responses. - - Enhanced Contextual Awareness: The model considers an even broader context within conversations, accounting for subtle emotional cues and providing emotionally resonant responses tailored to the specific situation. - - Empathetic and Supportive Dialogue: EMO-2B excels at active listening, validating emotions, offering compassionate advice, and providing emotional support, making it an ideal companion for users seeking empathy and understanding. - - Dynamic Persona Adaptation: The model can dynamically adapt its persona, communication style, and emotional responses to match the user's emotional state, ensuring a highly personalized and tailored conversational experience. - - Use Cases: - - EMO-2B is well-suited for a variety of applications where emotional intelligence and empathetic communication are crucial, such as: - - - Mental health support chatbots - - Emotional support companions - - Personalized coaching and motivation - - Narrative storytelling and interactive fiction - - Customer service and support (for emotionally sensitive contexts) - - Limitations and Ethical Considerations: - - While EMO-2B is designed to provide emotionally intelligent and empathetic responses, it is important to note that it is an AI system and cannot replicate the depth and nuance of human emotional intelligence. Users should be aware that the model's responses, while emotionally supportive, should not be considered a substitute for professional mental health support or counseling. - - Additionally, as with any language model, EMO-2B may reflect biases present in its training data. Users should exercise caution and critical thinking when interacting with the model, and report any concerning or inappropriate responses. - overrides: - parameters: - model: EMO-2B.Q4_K_M.gguf - files: - - filename: EMO-2B.Q4_K_M.gguf - sha256: 608bffc0e9012bc7f9a94b714f4932e2826cc122dbac59b586e4baa2ee0fdca5 - uri: huggingface://RichardErkhov/OEvortex_-_EMO-2B-gguf/EMO-2B.Q4_K_M.gguf -- !!merge <<: *gemma - name: "gemmoy-9b-g2-mk.3-i1" - icon: https://huggingface.co/Hastagaras/G2-Gemmoy-9B-MK.3-RP/resolve/main/gemmoy.jpg - urls: - - https://huggingface.co/Hastagaras/Gemmoy-9B-G2-MK.3 - - https://huggingface.co/mradermacher/Gemmoy-9B-G2-MK.3-i1-GGUF - description: | - The Gemmoy-9B-G2-MK.3 model is a large language model trained on a variety of datasets, including grimulkan/LimaRP-augmented, LDJnr/Capybara, TheSkullery/C2logs_Filtered_Sharegpt_Merged, abacusai/SystemChat-1.1, and Hastagaras/FTTS-Stories-Sharegpt. - overrides: - parameters: - model: Gemmoy-9B-G2-MK.3.i1-Q4_K_M.gguf - files: - - filename: Gemmoy-9B-G2-MK.3.i1-Q4_K_M.gguf - sha256: 0d1004a246fbda7f1408a6841129b73c4100e697bd0a6806fc698eabbb0802a1 - uri: huggingface://mradermacher/Gemmoy-9B-G2-MK.3-i1-GGUF/Gemmoy-9B-G2-MK.3.i1-Q4_K_M.gguf -- !!merge <<: *gemma - name: "sunfall-simpo-9b" - urls: - - https://huggingface.co/mradermacher/sunfall-SimPO-9B-GGUF - description: | - Crazy idea that what if you put the LoRA from crestf411/sunfall-peft on top of princeton-nlp/gemma-2-9b-it-SimPO and therefore this exists solely for that purpose alone in the universe. - overrides: - parameters: - model: sunfall-SimPO-9B.Q4_K_M.gguf - files: - - filename: sunfall-SimPO-9B.Q4_K_M.gguf - sha256: 810c51c6ce34107706d921531b97cfa409cd53c215d18b88bce7cdb617f73ceb - uri: huggingface://mradermacher/sunfall-SimPO-9B-GGUF/sunfall-SimPO-9B.Q4_K_M.gguf -- !!merge <<: *gemma - name: "sunfall-simpo-9b-i1" - urls: - - https://huggingface.co/mradermacher/sunfall-SimPO-9B-i1-GGUF - description: | - Crazy idea that what if you put the LoRA from crestf411/sunfall-peft on top of princeton-nlp/gemma-2-9b-it-SimPO and therefore this exists solely for that purpose alone in the universe. - overrides: - parameters: - model: sunfall-SimPO-9B.i1-Q4_K_M.gguf - files: - - filename: sunfall-SimPO-9B.i1-Q4_K_M.gguf - sha256: edde9df372a9a5b2316dc6822dc2f52f5a2059103dd7f08072e5a5355c5f5d0b - uri: huggingface://mradermacher/sunfall-SimPO-9B-i1-GGUF/sunfall-SimPO-9B.i1-Q4_K_M.gguf -- !!merge <<: *gemma - name: "seeker-9b" - icon: https://huggingface.co/lodrick-the-lafted/seeker-9b/resolve/main/seeker.webp - urls: - - https://huggingface.co/lodrick-the-lafted/seeker-9b - - https://huggingface.co/mradermacher/seeker-9b-GGUF - description: | - The LLM model is the "Seeker-9b" model, which is a large language model trained on a diverse range of text data. It has 9 billion parameters and is based on the "lodrick-the-lafted" repository. The model is capable of generating text and can be used for a variety of natural language processing tasks such as language translation, text summarization, and text generation. It supports the English language and is available under the Apache-2.0 license. - overrides: - parameters: - model: seeker-9b.Q4_K_M.gguf - files: - - filename: seeker-9b.Q4_K_M.gguf - sha256: 7658e5bdad96dc8d232f83cff7c3fe5fa993defbfd3e728dcc7436352574a00a - uri: huggingface://mradermacher/seeker-9b-GGUF/seeker-9b.Q4_K_M.gguf -- !!merge <<: *gemma - name: "gemmasutra-pro-27b-v1" - icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/w0Oi8TReoQNT3ljm5Wf6c.webp - urls: - - https://huggingface.co/TheDrummer/Gemmasutra-Pro-27B-v1 - - https://huggingface.co/mradermacher/Gemmasutra-Pro-27B-v1-GGUF - description: | - An RP model with impressive flexibility. Finetuned by yours truly. - overrides: - parameters: - model: Gemmasutra-Pro-27B-v1.Q4_K_M.gguf - files: - - filename: Gemmasutra-Pro-27B-v1.Q4_K_M.gguf - sha256: 336a2fbf142849fcc20e432123433807b6c7b09988652ef583a63636a0f90218 - uri: huggingface://mradermacher/Gemmasutra-Pro-27B-v1-GGUF/Gemmasutra-Pro-27B-v1.Q4_K_M.gguf -- !!merge <<: *gemma - name: "gemmasutra-mini-2b-v1" - icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/w0Oi8TReoQNT3ljm5Wf6c.webp - urls: - - https://huggingface.co/TheDrummer/Gemmasutra-Mini-2B-v1-GGUF - description: | - It is a small, 2 billion parameter language model that has been trained for role-playing purposes. The model is designed to work well in various settings, such as in the browser, on a laptop, or even on a Raspberry Pi. It has been fine-tuned for RP use and claims to provide a satisfying experience, even in low-resource environments. The model is uncensored and unaligned, and it can be used with the Gemma Instruct template or with chat completion. For the best experience, it is recommended to modify the template to support the `system` role. The model also features examples of its output, highlighting its versatility and creativity. - overrides: - parameters: - model: Gemmasutra-Mini-2B-v1i-Q4_K_M.gguf - files: - - filename: Gemmasutra-Mini-2B-v1i-Q4_K_M.gguf - sha256: 29ba3db911fbadef4452ba757ddd9ce58fb892b7a872f19eefd0743c961797fb - uri: huggingface://TheDrummer/Gemmasutra-Mini-2B-v1-GGUF/Gemmasutra-Mini-2B-v1-Q4_K_M.gguf -- !!merge <<: *gemma - name: "tarnished-9b-i1" - icon: https://huggingface.co/lodrick-the-lafted/tarnished-9b/resolve/main/nox.jpg - urls: - - https://huggingface.co/lodrick-the-lafted/tarnished-9b - - https://huggingface.co/mradermacher/tarnished-9b-i1-GGUF - description: "Ah, so you've heard whispers on the winds, have you? \U0001F9D0\n\nImagine this:\nTarnished-9b, a name that echoes with the rasp of coin-hungry merchants and the clatter of forgotten machinery. This LLM speaks with the voice of those who straddle the line between worlds, who've tasted the bittersweet nectar of eldritch power and the tang of the Interdimensional Trade Council.\n\nIt's a tongue that dances with secrets, a whisperer of lore lost and found. Its words may guide you through the twisting paths of history, revealing truths hidden beneath layers of dust and time.\n\nBut be warned, Tarnished One! For knowledge comes at a price. The LLM's gaze can pierce the veil of reality, but it can also lure you into the labyrinthine depths of madness.\n\nDare you tread this path?\n" - overrides: - parameters: - model: tarnished-9b.i1-Q4_K_M.gguf - files: - - filename: tarnished-9b.i1-Q4_K_M.gguf - sha256: 62ab09124b3f6698bd94ef966533ae5d427d87f6bdc09f6f46917def96420a0c - uri: huggingface://mradermacher/tarnished-9b-i1-GGUF/tarnished-9b.i1-Q4_K_M.gguf -- !!merge <<: *gemma - name: "shieldgemma-9b-i1" - urls: - - https://huggingface.co/google/shieldgemma-9b - - https://huggingface.co/mradermacher/shieldgemma-9b-i1-GGUF - description: | - ShieldGemma is a series of safety content moderation models built upon Gemma 2 that target four harm categories (sexually explicit, dangerous content, hate, and harassment). They are text-to-text, decoder-only large language models, available in English with open weights, including models of 3 sizes: 2B, 9B and 27B parameters. - overrides: - parameters: - model: shieldgemma-9b.i1-Q4_K_M.gguf - files: - - filename: shieldgemma-9b.i1-Q4_K_M.gguf - sha256: ffa7eaadcc0c7d0544fda5b0d86bba3ffa3431b673e5b2135f421cfe65bd8732 - uri: huggingface://mradermacher/shieldgemma-9b-i1-GGUF/shieldgemma-9b.i1-Q4_K_M.gguf -- !!merge <<: *gemma - name: "athena-codegemma-2-2b-it" - urls: - - https://huggingface.co/EpistemeAI/Athena-codegemma-2-2b-it - - https://huggingface.co/mradermacher/Athena-codegemma-2-2b-it-GGUF - description: | - Supervised fine tuned (sft unsloth) for coding with EpistemeAI coding dataset. - overrides: - parameters: - model: Athena-codegemma-2-2b-it.Q4_K_M.gguf - files: - - filename: Athena-codegemma-2-2b-it.Q4_K_M.gguf - sha256: 59ce17023438b0da603dd211c7d39f78e7acac4108258ac0818a97a4ca7d64e3 - uri: huggingface://mradermacher/Athena-codegemma-2-2b-it-GGUF/Athena-codegemma-2-2b-it.Q4_K_M.gguf -- !!merge <<: *gemma - name: "datagemma-rag-27b-it" - urls: - - https://huggingface.co/google/datagemma-rag-27b-it - - https://huggingface.co/bartowski/datagemma-rag-27b-it-GGUF - description: | - DataGemma is a series of fine-tuned Gemma 2 models used to help LLMs access and incorporate reliable public statistical data from Data Commons into their responses. DataGemma RAG is used with Retrieval Augmented Generation, where it is trained to take a user query and generate natural language queries that can be understood by Data Commons' existing natural language interface. More information can be found in this research paper. - overrides: - parameters: - model: datagemma-rag-27b-it-Q4_K_M.gguf - files: - - filename: datagemma-rag-27b-it-Q4_K_M.gguf - sha256: 3dfcf51b05e3f0ab0979ad194de350edea71cb14444efa0a9f2ef5bfc80753f8 - uri: huggingface://bartowski/datagemma-rag-27b-it-GGUF/datagemma-rag-27b-it-Q4_K_M.gguf -- !!merge <<: *gemma - name: "datagemma-rig-27b-it" - urls: - - https://huggingface.co/google/datagemma-rig-27b-it - - https://huggingface.co/bartowski/datagemma-rig-27b-it-GGUF - description: | - DataGemma is a series of fine-tuned Gemma 2 models used to help LLMs access and incorporate reliable public statistical data from Data Commons into their responses. DataGemma RIG is used in the retrieval interleaved generation approach (based off of tool-use approaches), where it is trained to annotate a response with natural language queries to Data Commons’ existing natural language interface wherever there are statistics. More information can be found in this research paper. - overrides: - parameters: - model: datagemma-rig-27b-it-Q4_K_M.gguf - files: - - filename: datagemma-rig-27b-it-Q4_K_M.gguf - sha256: a6738ffbb49b6c46d220e2793df85c0538e9ac72398e32a0914ee5e55c3096ad - uri: huggingface://bartowski/datagemma-rig-27b-it-GGUF/datagemma-rig-27b-it-Q4_K_M.gguf -- !!merge <<: *gemma - name: "buddy-2b-v1" - urls: - - https://huggingface.co/TheDrummer/Buddy-2B-v1 - - https://huggingface.co/bartowski/Buddy-2B-v1-GGUF - description: | - Buddy is designed as an empathetic language model, aimed at fostering introspection, self-reflection, and personal growth through thoughtful conversation. Buddy won't judge and it won't dismiss your concerns. Get some self-care with Buddy. - overrides: - parameters: - model: Buddy-2B-v1-Q4_K_M.gguf - files: - - filename: Buddy-2B-v1-Q4_K_M.gguf - sha256: 9bd25ed907d1a3c2e07fe09399a9b3aec107d368c29896e2c46facede5b7e3d5 - uri: huggingface://bartowski/Buddy-2B-v1-GGUF/Buddy-2B-v1-Q4_K_M.gguf -- !!merge <<: *gemma - name: "gemma-2-9b-arliai-rpmax-v1.1" - urls: - - https://huggingface.co/ArliAI/Gemma-2-9B-ArliAI-RPMax-v1.1 - - https://huggingface.co/bartowski/Gemma-2-9B-ArliAI-RPMax-v1.1-GGUF - description: | - RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations. - overrides: - parameters: - model: Gemma-2-9B-ArliAI-RPMax-v1.1-Q4_K_M.gguf - files: - - filename: Gemma-2-9B-ArliAI-RPMax-v1.1-Q4_K_M.gguf - sha256: 1724aff0ad6f71bf4371d839aca55578f7ec6f030d8d25c0254126088e4c6250 - uri: huggingface://bartowski/Gemma-2-9B-ArliAI-RPMax-v1.1-GGUF/Gemma-2-9B-ArliAI-RPMax-v1.1-Q4_K_M.gguf -- !!merge <<: *gemma - name: "gemma-2-2b-arliai-rpmax-v1.1" - urls: - - https://huggingface.co/bartowski/Gemma-2-2B-ArliAI-RPMax-v1.1-GGUF - description: | - RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations. - overrides: - parameters: - model: Gemma-2-2B-ArliAI-RPMax-v1.1-Q4_K_M.gguf - files: - - filename: Gemma-2-2B-ArliAI-RPMax-v1.1-Q4_K_M.gguf - sha256: 89fe35345754d7e9de8d0c0d5bf35b2be9b12a09811b365b712b8b27112f7712 - uri: huggingface://bartowski/Gemma-2-2B-ArliAI-RPMax-v1.1-GGUF/Gemma-2-2B-ArliAI-RPMax-v1.1-Q4_K_M.gguf -- !!merge <<: *gemma - name: "gemma-2-9b-it-abliterated" - urls: - - https://huggingface.co/IlyaGusev/gemma-2-9b-it-abliterated - - https://huggingface.co/bartowski/gemma-2-9b-it-abliterated-GGUF - description: | - Abliterated version of google/gemma-2-9b-it. - - The abliteration script (link) is based on code from the blog post and heavily uses TransformerLens. The only major difference from the code used for Llama is scaling the embedding layer back. - - Orthogonalization did not produce the same results as regular interventions since there are RMSNorm layers before merging activations into the residual stream. However, the final model still seems to be uncensored. - overrides: - parameters: - model: gemma-2-9b-it-abliterated-Q4_K_M.gguf - files: - - filename: gemma-2-9b-it-abliterated-Q4_K_M.gguf - sha256: 88d84ac9796732c10f6c58e0feb4db8e04c05d74bdb7047a5e37906a589896e1 - uri: huggingface://bartowski/gemma-2-9b-it-abliterated-GGUF/gemma-2-9b-it-abliterated-Q4_K_M.gguf -- !!merge <<: *gemma - name: "gemma-2-ataraxy-v3i-9b" - urls: - - https://huggingface.co/QuantFactory/Gemma-2-Ataraxy-v3i-9B-GGUF - description: | - Gemma-2-Ataraxy-v3i-9B is an experimental model that replaces the simpo model in the original recipe with a different simpo model and a writing model trained on Gutenberg, using a higher density. It is a merge of pre-trained language models created using mergekit, with della merge method using unsloth/gemma-2-9b-it as the base. The models included in the merge are nbeerbower/Gemma2-Gutenberg-Doppel-9B, ifable/gemma-2-Ifable-9B, and wzhouad/gemma-2-9b-it-WPO-HB. It has been quantized using llama.cpp. - overrides: - parameters: - model: Gemma-2-Ataraxy-v3i-9B.Q4_K_M.gguf - files: - - filename: Gemma-2-Ataraxy-v3i-9B.Q4_K_M.gguf - sha256: f14c5b9373d4058f0f812c6c34184addeb4aeeecb02a7bbcf9844d9afc8d0066 - uri: huggingface://QuantFactory/Gemma-2-Ataraxy-v3i-9B-GGUF/Gemma-2-Ataraxy-v3i-9B.Q4_K_M.gguf -- !!merge <<: *gemma - name: "apollo2-9b" - url: "github:mudler/LocalAI/gallery/vicuna-chat.yaml@master" - urls: - - https://huggingface.co/mradermacher/Apollo2-9B-GGUF - description: | - Covering 12 Major Languages including English, Chinese, French, Hindi, Spanish, Arabic, Russian, Japanese, Korean, German, Italian, Portuguese and 38 Minor Languages So far. - overrides: - parameters: - model: Apollo2-9B.Q4_K_M.gguf - files: - - filename: Apollo2-9B.Q4_K_M.gguf - sha256: 9fdb63f78e574558a4f33782eca88716eea28e90ea3ae36c381769cde6b81e0f - uri: huggingface://mradermacher/Apollo2-9B-GGUF/Apollo2-9B.Q4_K_M.gguf -- !!merge <<: *gemma - name: "darkest-muse-v1" - icon: https://cdn-uploads.huggingface.co/production/uploads/65ad56b4c2eef2ba1154618c/0AB6uPPuCvbNtRZb3Rdj1.png - urls: - - https://huggingface.co/sam-paech/Darkest-muse-v1 - - https://huggingface.co/bartowski/Darkest-muse-v1-GGUF - description: | - This is a creative writing merge of two very different models that I trained on the brand new Gutenberg3 dataset, plus Ataraxy-v2 in the mix. - - It's lost much of the slop and tryhard vocab flexing and positivity bias that's typical of these models and writes in its own voice. - - The main source model in the merge, Quill-v1, inherited a natural, spare prose from the human writing in the gutenberg set. The other source model, Delirium-v1, got overcooked in SIMPO training; it has crazy panache, a really dark flair for the grotesque, and has some mental issues. These two source models balance each other out in the merge, resulting in something pretty unique. - - It seems to be quite uncensored and creative. Since Delirium was pushed right to the edge during training, the merge may exhibit some of its weirdness and word / concept fixations. This may be mitigated by using custom anti-slop lists. - - The payoff is a really creative, stream of consciousness style of writing, with punchy dialogue that I haven't seen in other models. Oh, it also scored around the top of the EQ-Bench creative writing leaderboard! - overrides: - parameters: - model: Darkest-muse-v1-Q4_K_M.gguf - files: - - filename: Darkest-muse-v1-Q4_K_M.gguf - sha256: a19ec9e3dc875511ea771bf363e71e7ae5578986b2f8cf50aeb50683d56e9b76 - uri: huggingface://bartowski/Darkest-muse-v1-GGUF/Darkest-muse-v1-Q4_K_M.gguf -- !!merge <<: *gemma - name: "quill-v1" - icon: https://cdn-uploads.huggingface.co/production/uploads/65ad56b4c2eef2ba1154618c/gnMF8gRhurS9RcoylAK1Y.png - urls: - - https://huggingface.co/sam-paech/Quill-v1 - - https://huggingface.co/QuantFactory/Quill-v1-GGUF - description: | - Quill is a capable, humanlike writing model trained on a large dataset of late 19th and early 20th century writing from the Gutenberg Project. This model writes with a natural cadence and low gpt-slop, having inherited some human qualities from the Gutenberg3 dataset. It writes with more simple, spare prose than the typical overly-adjectived LLM writing style. - - This model was trained using gemma-2-9b-it as the base. The training methods used were ORPO (gently) then SIMPO (less gently). - overrides: - parameters: - model: Quill-v1.Q4_K_M.gguf - files: - - filename: Quill-v1.Q4_K_M.gguf - sha256: 419a7e0709b28130ca56941308d11c06a3548b8eacb081fb6a2c3d1622ac56b3 - uri: huggingface://QuantFactory/Quill-v1-GGUF/Quill-v1.Q4_K_M.gguf -- !!merge <<: *gemma - name: "delirium-v1" - icon: https://cdn-uploads.huggingface.co/production/uploads/65ad56b4c2eef2ba1154618c/TDY0sDC9vMohMM8dn_5YN.png - urls: - - https://huggingface.co/sam-paech/Delirium-v1 - - https://huggingface.co/QuantFactory/Delirium-v1-GGUF - description: | - This model was cooked a bit too long during SIMPO training. It writes like Hunter S. Thompson 2 days into an ether binge. It's grotesque, dark, grimy and genius. - - It's trained on an experimental gutenberg + antislop dataset. This contains the original two gutenberg sets by jondurbin and nbeerbower, as well as a subset of my own set, gutenberg3. The antislop pairs were generated with gemma-2-9b-it, with one sample generated with the AntiSlop sampler and the rejected sample generated without. - overrides: - parameters: - model: Delirium-v1.Q4_K_M.gguf - files: - - filename: Delirium-v1.Q4_K_M.gguf - sha256: 9c274913572b8afcd5f18f0230f9ddf0a972bae36bae5b0fe8266b29a5dd06a7 - uri: huggingface://QuantFactory/Delirium-v1-GGUF/Delirium-v1.Q4_K_M.gguf -- !!merge <<: *gemma - url: "github:mudler/LocalAI/gallery/chatml.yaml@master" - name: "magnum-v4-9b" - icon: https://cdn-uploads.huggingface.co/production/uploads/658a46cbfb9c2bdfae75b3a6/vxYDYerLy2vD8n05nL2WU.png - urls: - - https://huggingface.co/anthracite-org/magnum-v4-9b - - https://huggingface.co/QuantFactory/magnum-v4-9b-GGUF - description: | - This is a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet and Opus. - - This model is fine-tuned on top of gemma 2 9b (chatML'ified). - overrides: - parameters: - model: magnum-v4-9b.Q4_K_M.gguf - files: - - filename: magnum-v4-9b.Q4_K_M.gguf - sha256: 176cb8cbac1920d98853a079d635d581c2063b7ff337e88bf9f28b43f8c7eb23 - uri: huggingface://QuantFactory/magnum-v4-9b-GGUF/magnum-v4-9b.Q4_K_M.gguf -- !!merge <<: *gemma - name: "g2-9b-aletheia-v1" - icon: https://huggingface.co/allura-org/G2-9B-Aletheia-v1/resolve/main/inpaint.png - urls: - - https://huggingface.co/allura-org/G2-9B-Aletheia-v1 - - https://huggingface.co/QuantFactory/G2-9B-Aletheia-v1-GGUF - description: | - A merge of Sugarquill and Sunfall. I wanted to combine Sugarquill's more novel-like writing style with something that would improve it's RP perfomance and make it more steerable, w/o adding superfluous synthetic writing patterns. - - I quite like Crestfall's Sunfall models and I felt like Gemma version of Sunfall will steer the model in this direction when merged in. To keep more of Gemma-2-9B-it-SPPO-iter3's smarts, I've decided to apply Sunfall LoRA on top of it, instead of using the published Sunfall model. - - I'm generally pleased with the result, this model has nice, fresh writing style, good charcard adherence and good system prompt following. It still should work well for raw completion storywriting, as it's a trained feature in both merged models. - overrides: - parameters: - model: G2-9B-Aletheia-v1.Q4_K_M.gguf - files: - - filename: G2-9B-Aletheia-v1.Q4_K_M.gguf - sha256: d244cd3605ff5be948eb7faf1d9aa71ffbbfcf6dab77c08f6ec547818f443d03 - uri: huggingface://QuantFactory/G2-9B-Aletheia-v1-GGUF/G2-9B-Aletheia-v1.Q4_K_M.gguf -- !!merge <<: *gemma - name: "g2-9b-sugarquill-v0" - icon: https://huggingface.co/allura-org/G2-9B-Sugarquill-v0/resolve/main/image_27.png - urls: - - https://huggingface.co/allura-org/G2-9B-Sugarquill-v0 - - https://huggingface.co/QuantFactory/G2-9B-Sugarquill-v0-GGUF - description: | - An experimental continued pretrain of Gemma-2-9B-It-SPPO-Iter3 on assorted short story data from the web. I was trying to diversify Gemma's prose, without completely destroying it's smarts. I think I half-succeeded? This model could have used another epoch of training, but even this is already more creative and descriptive than it's base model, w/o becoming too silly. Doesn't seem to have degraded much in terms of core abilities as well. Should be usable both for RP and raw completion storywriting. I originally planned to use this in a merge, but I feel like this model is interesting enough to be released on it's own as well. - - Model was trained by Auri. - - Dedicated to Cahvay, who wanted a Gemma finetune from me for months by now, and to La Rata, who loves storywriter models. - overrides: - parameters: - model: G2-9B-Sugarquill-v0.Q4_K_M.gguf - files: - - filename: G2-9B-Sugarquill-v0.Q4_K_M.gguf - sha256: 790a2f1541011b2773e22aa863ef78c8662baaa7eca5875e9573007985120187 - uri: huggingface://QuantFactory/G2-9B-Sugarquill-v0-GGUF/G2-9B-Sugarquill-v0.Q4_K_M.gguf -- !!merge <<: *gemma - name: "volare-i1" - urls: - - https://huggingface.co/MoxoffSpA/Volare - - https://huggingface.co/mradermacher/Volare-i1-GGUF - description: | - Volare is an updated version of Gemma7B, specifically fine-tuned with SFT and LoRA adjustments. - It's trained on publicly available datasets, like SQUAD-it, and datasets we've created in-house. - it's designed to understand and maintain context, making it ideal for Retrieval Augmented Generation (RAG) tasks and applications requiring contextual awareness. - Italian dataset. - overrides: - parameters: - model: Volare.i1-Q4_K_M.gguf - files: - - filename: Volare.i1-Q4_K_M.gguf - sha256: fa8fb9d4cb19fcb44be8d53561c9e2840f45aed738de545983ebb158ebba461b - uri: huggingface://mradermacher/Volare-i1-GGUF/Volare.i1-Q4_K_M.gguf -- !!merge <<: *gemma - name: "bggpt-gemma-2-2.6b-it-v1.0" - icon: https://cdn-uploads.huggingface.co/production/uploads/637e1f8cf7e01589cc17bf7e/p6d0YFHjWCQ3S12jWqO1m.png - urls: - - https://huggingface.co/QuantFactory/BgGPT-Gemma-2-2.6B-IT-v1.0-GGUF - - https://huggingface.co/QuantFactory/BgGPT-Gemma-2-2.6B-IT-v1.0-GGUF - description: | - INSAIT introduces BgGPT-Gemma-2-2.6B-IT-v1.0, a state-of-the-art Bulgarian language model based on google/gemma-2-2b and google/gemma-2-2b-it. BgGPT-Gemma-2-2.6B-IT-v1.0 is free to use and distributed under the Gemma Terms of Use. This model was created by INSAIT, part of Sofia University St. Kliment Ohridski, in Sofia, Bulgaria. - The model was built on top of Google’s Gemma 2 2B open models. It was continuously pre-trained on around 100 billion tokens (85 billion in Bulgarian) using the Branch-and-Merge strategy INSAIT presented at EMNLP’24, allowing the model to gain outstanding Bulgarian cultural and linguistic capabilities while retaining its English performance. During the pre-training stage, we use various datasets, including Bulgarian web crawl data, freely available datasets such as Wikipedia, a range of specialized Bulgarian datasets sourced by the INSAIT Institute, and machine translations of popular English datasets. The model was then instruction-fine-tuned on a newly constructed Bulgarian instruction dataset created using real-world conversations. For more information check our blogpost. - overrides: - parameters: - model: BgGPT-Gemma-2-2.6B-IT-v1.0.Q4_K_M.gguf - files: - - filename: BgGPT-Gemma-2-2.6B-IT-v1.0.Q4_K_M.gguf - sha256: 1e92fe80ccad80e97076ee26b002c2280f075dfe2507d534b46a4391a077f319 - uri: huggingface://QuantFactory/BgGPT-Gemma-2-2.6B-IT-v1.0-GGUF/BgGPT-Gemma-2-2.6B-IT-v1.0.Q4_K_M.gguf -- !!merge <<: *gemma - name: "fusechat-gemma-2-9b-instruct" - icon: "https://huggingface.co/FuseAI/FuseChat-Gemma-2-9B-Instruct/resolve/main/FuseChat-3.0.png" - urls: - - https://huggingface.co/FuseAI/FuseChat-Gemma-2-9B-Instruct - - https://huggingface.co/bartowski/FuseChat-Gemma-2-9B-Instruct-GGUF - description: | - We present FuseChat-3.0, a series of models crafted to enhance performance by integrating the strengths of multiple source LLMs into more compact target LLMs. To achieve this fusion, we utilized four powerful source LLMs: Gemma-2-27B-It, Mistral-Large-Instruct-2407, Qwen-2.5-72B-Instruct, and Llama-3.1-70B-Instruct. For the target LLMs, we employed three widely-used smaller models—Llama-3.1-8B-Instruct, Gemma-2-9B-It, and Qwen-2.5-7B-Instruct—along with two even more compact models—Llama-3.2-3B-Instruct and Llama-3.2-1B-Instruct. The implicit model fusion process involves a two-stage training pipeline comprising Supervised Fine-Tuning (SFT) to mitigate distribution discrepancies between target and source LLMs, and Direct Preference Optimization (DPO) for learning preferences from multiple source LLMs. The resulting FuseChat-3.0 models demonstrated substantial improvements in tasks related to general conversation, instruction following, mathematics, and coding. Notably, when Llama-3.1-8B-Instruct served as the target LLM, our fusion approach achieved an average improvement of 6.8 points across 14 benchmarks. Moreover, it showed significant improvements of 37.1 and 30.1 points on instruction-following test sets AlpacaEval-2 and Arena-Hard respectively. We have released the FuseChat-3.0 models on Huggingface, stay tuned for the forthcoming dataset and code. - overrides: - parameters: - model: FuseChat-Gemma-2-9B-Instruct-Q4_K_M.gguf - files: - - filename: FuseChat-Gemma-2-9B-Instruct-Q4_K_M.gguf - sha256: f5aef201be68f344bebff3433af87aac6428fd227adfd7e468c8bfbcf9660ece - uri: huggingface://bartowski/FuseChat-Gemma-2-9B-Instruct-GGUF/FuseChat-Gemma-2-9B-Instruct-Q4_K_M.gguf -- !!merge <<: *gemma - icon: https://cdn-uploads.huggingface.co/production/uploads/65bb837dbfb878f46c77de4c/Ayc6YKE6FKYKb8Mible4z.png - name: "gwq-9b-preview2" - urls: - - https://huggingface.co/prithivMLmods/GWQ-9B-Preview2 - - https://huggingface.co/bartowski/GWQ-9B-Preview2-GGUF - description: | - GWQ2 - Gemma with Questions Prev is a family of lightweight, state-of-the-art open models from Google, built using the same research and technology employed to create the Gemini models. These models are text-to-text, decoder-only large language models, available in English, with open weights for both pre-trained and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. GWQ is fine-tuned on the Chain of Continuous Thought Synthetic Dataset, built upon the Gemma2forCasualLM architecture. - overrides: - parameters: - model: GWQ-9B-Preview2-Q4_K_M.gguf - files: - - filename: GWQ-9B-Preview2-Q4_K_M.gguf - sha256: 04da51cdb17c7e51594f6daac595161a46298b48ab5e568a85e65541d10a861f - uri: huggingface://bartowski/GWQ-9B-Preview2-GGUF/GWQ-9B-Preview2-Q4_K_M.gguf -- !!merge <<: *gemma - name: "thedrummer_gemmasutra-pro-27b-v1.1" - icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/SrHUGXD_dp55pobeJK36t.png - urls: - - https://huggingface.co/TheDrummer/Gemmasutra-Pro-27B-v1.1 - - https://huggingface.co/bartowski/TheDrummer_Gemmasutra-Pro-27B-v1.1-GGUF - description: | - A Gemmasutra tune with modern techniques. Au Revoir, Gemma! - overrides: - parameters: - model: TheDrummer_Gemmasutra-Pro-27B-v1.1-Q4_K_M.gguf - files: - - filename: TheDrummer_Gemmasutra-Pro-27B-v1.1-Q4_K_M.gguf - sha256: 218a14f0bf8266f9e77d16b8b4f5cc1dc76e97eb582a2c97cca5a3a2c35de86b - uri: huggingface://bartowski/TheDrummer_Gemmasutra-Pro-27B-v1.1-GGUF/TheDrummer_Gemmasutra-Pro-27B-v1.1-Q4_K_M.gguf -- !!merge <<: *gemma - name: "thedrummer_gemmasutra-small-4b-v1" - icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/Cekk7d2UAKu7LPsw8SxV7.png - urls: - - https://huggingface.co/TheDrummer/Gemmasutra-Small-4B-v1 - - https://huggingface.co/bartowski/TheDrummer_Gemmasutra-Small-4B-v1-GGUF - description: | - An upscaled Gemma 2B tune with modern techniques. Au Revoir, Gemma! - overrides: - parameters: - model: TheDrummer_Gemmasutra-Small-4B-v1-Q4_K_M.gguf - files: - - filename: TheDrummer_Gemmasutra-Small-4B-v1-Q4_K_M.gguf - sha256: 81dd2e2d9546f5dc2150c45c62acabc112068b801ca50b79feceabb1bd4d6f1a - uri: huggingface://bartowski/TheDrummer_Gemmasutra-Small-4B-v1-GGUF/TheDrummer_Gemmasutra-Small-4B-v1-Q4_K_M.gguf - &llama3 url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master" icon: https://avatars.githubusercontent.com/u/153379578 @@ -19948,430 +19201,6 @@ - filename: internlm3-8b-instruct-Q4_K_M.gguf uri: huggingface://bartowski/internlm3-8b-instruct-GGUF/internlm3-8b-instruct-Q4_K_M.gguf sha256: 2a9644687318e8659c9cf9b40730d5cc2f5af06f786a50439c7c51359b23896e -- &phi-3 - url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master" ### START Phi-3 - name: "phi-3-mini-4k-instruct" - icon: https://avatars.githubusercontent.com/u/6154722 - license: mit - description: | - The Phi-3-Mini-4K-Instruct is a 3.8B parameters, lightweight, state-of-the-art open model trained with the Phi-3 datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Phi-3 family with the Mini version in two variants 4K and 128K which is the context length (in tokens) it can support. The model has underwent a post-training process that incorporates both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks testing common sense, language understanding, math, code, long context and logical reasoning, Phi-3 Mini-4K-Instruct showcased a robust and state-of-the-art performance among models with less than 13 billion parameters. - urls: - - https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf - tags: - - llm - - gguf - - gpu - - llama2 - - cpu - overrides: - parameters: - model: Phi-3-mini-4k-instruct-q4.gguf - files: - - filename: "Phi-3-mini-4k-instruct-q4.gguf" - sha256: "8a83c7fb9049a9b2e92266fa7ad04933bb53aa1e85136b7b30f1b8000ff2edef" - uri: "huggingface://microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf" -- !!merge <<: *phi-3 - name: "phi-3-mini-4k-instruct:fp16" - overrides: - parameters: - model: Phi-3-mini-4k-instruct-fp16.gguf - files: - - filename: "Phi-3-mini-4k-instruct-fp16.gguf" - uri: "huggingface://microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf" - sha256: 5d99003e395775659b0dde3f941d88ff378b2837a8dc3a2ea94222ab1420fad3 -- !!merge <<: *phi-3 - name: "phi-3-medium-4k-instruct" - description: | - The Phi-3-Medium-4K-Instruct is a 14B parameters, lightweight, state-of-the-art open model trained with the Phi-3 datasets that includes - both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. - The model belongs to the Phi-3 family with the Medium version in two variants 4K and 128K which is the context length (in tokens) that it can support. - urls: - - https://huggingface.co/bartowski/Phi-3-medium-4k-instruct-GGUF - - https://huggingface.co/microsoft/Phi-3-medium-4k-instruct - overrides: - parameters: - model: Phi-3-medium-4k-instruct-Q4_K_M.gguf - files: - - filename: "Phi-3-medium-4k-instruct-Q4_K_M.gguf" - uri: "huggingface://bartowski/Phi-3-medium-4k-instruct-GGUF/Phi-3-medium-4k-instruct-Q4_K_M.gguf" - sha256: 6f05c97bc676dd1ec8d58e9a8795b4f5c809db771f6fc7bf48634c805face82c -- !!merge <<: *phi-3 - name: "cream-phi-3-14b-v1" - icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/AP4-OHepdqiqHj2KSi26M.gif - description: | - CreamPhi 14B is the first Phi Medium to be trained with roleplay and moist. - urls: - - https://huggingface.co/TheDrummer/Cream-Phi-3-14B-v1-GGUF - overrides: - parameters: - model: Cream-Phi-3-14B-v1-Q4_K_M.gguf - files: - - filename: Cream-Phi-3-14B-v1-Q4_K_M.gguf - uri: huggingface://TheDrummer/Cream-Phi-3-14B-v1-GGUF/Cream-Phi-3-14B-v1-Q4_K_M.gguf - sha256: ec67018a86090da415517acf21ad48f28e02dff664a1dd35602f1f8fa94f6a27 -- !!merge <<: *phi-3 - name: "phi3-4x4b-v1" - description: | - a continually pretrained phi3-mini sparse moe upcycle - urls: - - https://huggingface.co/bartowski/phi3-4x4b-v1-GGUF - - https://huggingface.co/Fizzarolli/phi3-4x4b-v1 - overrides: - parameters: - model: phi3-4x4b-v1-Q4_K_M.gguf - files: - - filename: phi3-4x4b-v1-Q4_K_M.gguf - uri: huggingface://bartowski/phi3-4x4b-v1-GGUF/phi3-4x4b-v1-Q4_K_M.gguf - sha256: fd33220186b7076f4b306f27b3a8913384435a2ca90185a71c9df5a752d3a298 -- !!merge <<: *phi-3 - name: "phi-3.1-mini-4k-instruct" - urls: - - https://huggingface.co/microsoft/Phi-3-mini-4k-instruct - - https://huggingface.co/bartowski/Phi-3.1-mini-4k-instruct-GGUF - description: | - This is an update over the original instruction-tuned Phi-3-mini release based on valuable customer feedback. The model used additional post-training data leading to substantial gains on instruction following and structure output. - - It is based on the original model from Microsoft, but has been updated and quantized using the llama.cpp release b3278. - overrides: - parameters: - model: Phi-3.1-mini-4k-instruct-Q4_K_M.gguf - files: - - filename: Phi-3.1-mini-4k-instruct-Q4_K_M.gguf - uri: huggingface://bartowski/Phi-3.1-mini-4k-instruct-GGUF/Phi-3.1-mini-4k-instruct-Q4_K_M.gguf - sha256: d6d25bf078321bea4a079c727b273cb0b5a2e0b4cf3add0f7a2c8e43075c414f -- !!merge <<: *phi-3 - name: "phillama-3.8b-v0.1" - icon: https://cdn-uploads.huggingface.co/production/uploads/657eb5b256c9c67605a6e8b5/f96pPiJQb3puzbPYNknG2.png - urls: - - https://huggingface.co/RichardErkhov/raincandy-u_-_phillama-3.8b-v0.1-gguf - description: | - The description of the LLM model is: - Phillama is a model based on Phi-3-mini and trained on Llama-generated dataset raincandy-u/Dextromethorphan-10k to make it more "llama-like". Also, this model is converted into Llama format, so it will work with any Llama-2/3 workflow. The model aims to generate text with a specific "llama-like" style and is suited for text-generation tasks. - overrides: - parameters: - model: phillama-3.8b-v0.1.Q4_K_M.gguf - files: - - filename: phillama-3.8b-v0.1.Q4_K_M.gguf - sha256: da537d352b7aae54bbad0d2cff3e3a1b0e1dc1e1d25bec3aae1d05cf4faee7a2 - uri: huggingface://RichardErkhov/raincandy-u_-_phillama-3.8b-v0.1-gguf/phillama-3.8b-v0.1.Q4_K_M.gguf -- !!merge <<: *phi-3 - name: "calme-2.3-phi3-4b" - icon: https://huggingface.co/MaziyarPanahi/calme-2.1-phi3-4b/resolve/main/phi-3-instruct.webp - urls: - - https://huggingface.co/MaziyarPanahi/calme-2.3-phi3-4b - - https://huggingface.co/MaziyarPanahi/calme-2.3-phi3-4b-GGUF - description: | - MaziyarPanahi/calme-2.1-phi3-4b - - This model is a fine-tune (DPO) of microsoft/Phi-3-mini-4k-instruct model. - overrides: - parameters: - model: Phi-3-mini-4k-instruct-v0.3.Q4_K_M.gguf - files: - - filename: Phi-3-mini-4k-instruct-v0.3.Q4_K_M.gguf - sha256: 3a23e1052369c080afb925882bd814cbea5ec859894655a7434c3d49e43a6127 - uri: huggingface://MaziyarPanahi/calme-2.3-phi3-4b-GGUF/Phi-3-mini-4k-instruct-v0.3.Q4_K_M.gguf -- !!merge <<: *phi-3 - name: "phi-3.5-mini-instruct" - urls: - - https://huggingface.co/microsoft/Phi-3.5-mini-instruct - - https://huggingface.co/MaziyarPanahi/Phi-3.5-mini-instruct-GGUF - description: | - Phi-3.5-mini is a lightweight, state-of-the-art open model built upon datasets used for Phi-3 - synthetic data and filtered publicly available websites - with a focus on very high-quality, reasoning dense data. The model belongs to the Phi-3 model family and supports 128K token context length. The model underwent a rigorous enhancement process, incorporating both supervised fine-tuning, proximal policy optimization, and direct preference optimization to ensure precise instruction adherence and robust safety measures. - overrides: - parameters: - model: Phi-3.5-mini-instruct.Q4_K_M.gguf - files: - - filename: Phi-3.5-mini-instruct.Q4_K_M.gguf - sha256: 3f68916e850b107d8641d18bcd5548f0d66beef9e0a9077fe84ef28943eb7e88 - uri: huggingface://MaziyarPanahi/Phi-3.5-mini-instruct-GGUF/Phi-3.5-mini-instruct.Q4_K_M.gguf -- !!merge <<: *phi-3 - name: "calme-2.1-phi3.5-4b-i1" - icon: https://huggingface.co/MaziyarPanahi/calme-2.1-phi3.5-4b/resolve/main/calme-2.webp - urls: - - https://huggingface.co/MaziyarPanahi/calme-2.1-phi3.5-4b - - https://huggingface.co/mradermacher/calme-2.1-phi3.5-4b-i1-GGUF - description: | - This model is a fine-tuned version of the microsoft/Phi-3.5-mini-instruct, pushing the boundaries of natural language understanding and generation even further. My goal was to create a versatile and robust model that excels across a wide range of benchmarks and real-world applications. - overrides: - parameters: - model: calme-2.1-phi3.5-4b.i1-Q4_K_M.gguf - files: - - filename: calme-2.1-phi3.5-4b.i1-Q4_K_M.gguf - sha256: 989eccacd52b6d9ebf2c06c35c363da19aadb125659a10df299b7130bc293e77 - uri: huggingface://mradermacher/calme-2.1-phi3.5-4b-i1-GGUF/calme-2.1-phi3.5-4b.i1-Q4_K_M.gguf -- !!merge <<: *phi-3 - name: "phi-3.5-mini-titanfusion-0.2" - urls: - - https://huggingface.co/bunnycore/Phi-3.5-mini-TitanFusion-0.2 - - https://huggingface.co/mradermacher/Phi-3.5-mini-TitanFusion-0.2-GGUF - description: | - This model was merged using the TIES merge method using microsoft/Phi-3.5-mini-instruct as a base. - The following models were included in the merge: - nbeerbower/phi3.5-gutenberg-4B - ArliAI/Phi-3.5-mini-3.8B-ArliAI-RPMax-v1.1 - bunnycore/Phi-3.5-Mini-Hyper - bunnycore/Phi-3.5-Mini-Hyper + bunnycore/Phi-3.1-EvolKit-lora - bunnycore/Phi-3.5-Mini-Sonet-RP - bunnycore/Phi-3.5-mini-TitanFusion-0.1 - overrides: - parameters: - model: Phi-3.5-mini-TitanFusion-0.2.Q4_K_M.gguf - files: - - filename: Phi-3.5-mini-TitanFusion-0.2.Q4_K_M.gguf - sha256: 9579305712f2bca246914639c4873acdc1e7bc64ac2c7db0230df4f0ca0ef234 - uri: huggingface://mradermacher/Phi-3.5-mini-TitanFusion-0.2-GGUF/Phi-3.5-mini-TitanFusion-0.2.Q4_K_M.gguf -- !!merge <<: *phi-3 - name: "phi-3-vision:vllm" - url: "github:mudler/LocalAI/gallery/phi-3-vision.yaml@master" - description: | - Phi-3.5-vision is a lightweight, state-of-the-art open multimodal model built upon datasets which include - synthetic data and filtered publicly available websites - with a focus on very high-quality, reasoning dense data both on text and vision. The model belongs to the Phi-3 model family, and the multimodal version comes with 128K context length (in tokens) it can support. The model underwent a rigorous enhancement process, incorporating both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. -- !!merge <<: *phi-3 - name: "phi-3.5-vision:vllm" - url: "github:mudler/LocalAI/gallery/phi-3-vision.yaml@master" - override: - parameters: - model: microsoft/Phi-3.5-vision-instruct - description: | - Phi-3.5-vision is a lightweight, state-of-the-art open multimodal model built upon datasets which include - synthetic data and filtered publicly available websites - with a focus on very high-quality, reasoning dense data both on text and vision. The model belongs to the Phi-3 model family, and the multimodal version comes with 128K context length (in tokens) it can support. The model underwent a rigorous enhancement process, incorporating both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. -- !!merge <<: *phi-3 - name: "phi-3.5-moe-instruct" - urls: - - https://huggingface.co/microsoft/Phi-3.5-MoE-instruct - - https://huggingface.co/bartowski/Phi-3.5-MoE-instruct-GGUF - description: | - Phi-3.5-MoE is a lightweight, state-of-the-art open model built upon datasets used for Phi-3 - synthetic data and filtered publicly available documents - with a focus on very high-quality, reasoning dense data. The model supports multilingual and comes with 128K context length (in tokens). The model underwent a rigorous enhancement process, incorporating supervised fine-tuning, proximal policy optimization, and direct preference optimization to ensure precise instruction adherence and robust safety measures. - overrides: - parameters: - model: Phi-3.5-MoE-instruct-Q4_K_M.gguf - files: - - filename: Phi-3.5-MoE-instruct-Q4_K_M.gguf - sha256: 43e91bb720869bd8a92d8eb86bc3c74a52c49cf61642ca709b3d7bb89644df36 - uri: huggingface://bartowski/Phi-3.5-MoE-instruct-GGUF/Phi-3.5-MoE-instruct-Q4_K_M.gguf -- !!merge <<: *phi-3 - name: "luvgpt_phi3-uncensored-chat" - icon: https://huggingface.co/magicsquares137/phi3-uncensored-chat/resolve/main/00380-3290958654.png - urls: - - https://huggingface.co/luvGPT/phi3-uncensored-chat - - https://huggingface.co/bartowski/luvGPT_phi3-uncensored-chat-GGUF - description: | - This model is a fine-tuned version of microsoft/phi-3-mini-4k-instruct optimized for roleplaying conversations with a variety of character personas. The model speaks in a conversational format. Please not, prompt template guidelines are extremely important in getting usable output. - overrides: - parameters: - model: luvGPT_phi3-uncensored-chat-Q4_K_M.gguf - files: - - filename: luvGPT_phi3-uncensored-chat-Q4_K_M.gguf - sha256: 15e61e802907316a64932eab112eb6fc16f5861876e59e0867c00774a1941937 - uri: huggingface://bartowski/luvGPT_phi3-uncensored-chat-GGUF/luvGPT_phi3-uncensored-chat-Q4_K_M.gguf -- &hermes-2-pro-mistral - url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" ### START Hermes - name: "hermes-2-pro-mistral" - icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png - license: apache-2.0 - description: | - Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house. - - This new version of Hermes maintains its excellent general task and conversation capabilities - but also excels at Function Calling, JSON Structured Outputs, and has improved on several other metrics as well, scoring a 90% on our function calling evaluation built in partnership with Fireworks.AI, and an 81% on our structured JSON Output evaluation. - - Hermes Pro takes advantage of a special system prompt and multi-turn function calling structure with a new chatml role in order to make function calling reliable and easy to parse. Learn more about prompting below. - - This work was a collaboration between Nous Research, @interstellarninja, and Fireworks.AI - - Learn more about the function calling on our github repo here: https://github.com/NousResearch/Hermes-Function-Calling/tree/main - urls: - - https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF - tags: - - llm - - gguf - - gpu - - mistral - - cpu - - function-calling - overrides: - parameters: - model: Hermes-2-Pro-Mistral-7B.Q4_0.gguf - files: - - filename: "Hermes-2-Pro-Mistral-7B.Q4_0.gguf" - sha256: "f446c3125026f7af6757dd097dda02280adc85e908c058bd6f1c41a118354745" - uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q4_0.gguf" -- !!merge <<: *hermes-2-pro-mistral - name: "hermes-2-pro-mistral:Q6_K" - overrides: - parameters: - model: Hermes-2-Pro-Mistral-7B.Q6_K.gguf - files: - - filename: "Hermes-2-Pro-Mistral-7B.Q6_K.gguf" - sha256: "40adc3b227bc36764de148fdda4df5df385adc06650d58d4dbe726ee0214eeff" - uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf" -- !!merge <<: *hermes-2-pro-mistral - name: "hermes-2-pro-mistral:Q8_0" - overrides: - parameters: - model: Hermes-2-Pro-Mistral-7B.Q8_0.gguf - files: - - filename: "Hermes-2-Pro-Mistral-7B.Q8_0.gguf" - sha256: "b6d95d7ec9a395b7568cc94b0447fd4f90b6f69d6e44794b1fbb84e3f732baca" - uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q8_0.gguf" -- !!merge <<: *hermes-2-pro-mistral - name: "hermes-2-theta-llama-3-8b" - icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/HQnQmNM1L3KXGhp0wUzHH.png - tags: - - llm - - gguf - - gpu - - llama3 - - cpu - - function-calling - description: | - Hermes-2 Θ (Theta) is the first experimental merged model released by Nous Research, in collaboration with Charles Goddard at Arcee, the team behind MergeKit. - Hermes-2 Θ is a merged and then further RLHF'ed version our excellent Hermes 2 Pro model and Meta's Llama-3 Instruct model to form a new model, Hermes-2 Θ, combining the best of both worlds of each model. - urls: - - https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF - overrides: - parameters: - model: Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf - files: - - filename: "Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf" - sha256: "762b9371a296ab2628592b9462dc676b27d881a3402816492801641a437669b3" - uri: "huggingface://NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf" -- !!merge <<: *hermes-2-pro-mistral - name: "hermes-2-theta-llama-3-70b" - icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/P4NxBFwfBbboNZVytpn45.png - tags: - - llm - - gguf - - gpu - - llama3 - - cpu - - function-calling - description: | - Hermes-2 Θ (Theta) 70B is the continuation of our experimental merged model released by Nous Research, in collaboration with Charles Goddard and Arcee AI, the team behind MergeKit. - - Hermes-2 Θ is a merged and then further RLHF'ed version our excellent Hermes 2 Pro model and Meta's Llama-3 Instruct model to form a new model, Hermes-2 Θ, combining the best of both worlds of each model. - urls: - - https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-70B-GGUF - overrides: - parameters: - model: Hermes-2-Theta-Llama-3-70B-Q4_K_M.gguf - files: - - filename: "Hermes-2-Theta-Llama-3-70B-Q4_K_M.gguf" - uri: "huggingface://NousResearch/Hermes-2-Theta-Llama-3-70B-GGUF/Hermes-2-Theta-Llama-3-70B-Q4_K_M.gguf" - sha256: b3965f671c35d09da8b903218f5bbaac94efdd9000e4fe4a2bac87fcac9f664e -### LLAMA3 version -- !!merge <<: *hermes-2-pro-mistral - name: "hermes-2-pro-llama-3-8b" - tags: - - llm - - gguf - - gpu - - llama3 - - function-calling - - cpu - urls: - - https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF - overrides: - parameters: - model: Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf - files: - - filename: "Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf" - sha256: "10c52a4820137a35947927be741bb411a9200329367ce2590cc6757cd98e746c" - uri: "huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf" -- !!merge <<: *hermes-2-pro-mistral - tags: - - llm - - gguf - - gpu - - llama3 - - function-calling - - cpu - name: "hermes-2-pro-llama-3-8b:Q5_K_M" - urls: - - https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF - overrides: - parameters: - model: Hermes-2-Pro-Llama-3-8B-Q5_K_M.gguf - files: - - filename: "Hermes-2-Pro-Llama-3-8B-Q5_K_M.gguf" - sha256: "107f3f55e26b8cc144eadd83e5f8a60cfd61839c56088fa3ae2d5679abf45f29" - uri: "huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q5_K_M.gguf" -- !!merge <<: *hermes-2-pro-mistral - tags: - - llm - - gguf - - gpu - - function-calling - - llama3 - - cpu - name: "hermes-2-pro-llama-3-8b:Q8_0" - urls: - - https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF - overrides: - parameters: - model: Hermes-2-Pro-Llama-3-8B-Q8_0.gguf - files: - - filename: "Hermes-2-Pro-Llama-3-8B-Q8_0.gguf" - sha256: "d138388cfda04d185a68eaf2396cf7a5cfa87d038a20896817a9b7cf1806f532" - uri: "huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q8_0.gguf" -- !!merge <<: *hermes-2-pro-mistral - name: "hermes-3-llama-3.1-8b" - icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/bMcZ3sNNQK8SRZpHXBmwM.jpeg - urls: - - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B - - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B-GGUF - description: | - Hermes 3 is a generalist language model developed by Nous Research. It is an advanced agentic model with improved roleplaying, reasoning, multi-turn conversation, long context coherence, and generalist assistant capabilities. The model is built on top of the Llama-3 architecture and has been fine-tuned to achieve superior performance in various tasks. It is designed to be a powerful and reliable tool for solving complex problems and assisting users in achieving their goals. Hermes 3 can be used for a wide range of applications, including research, education, and personal assistant tasks. It is available on the Hugging Face model hub for easy access and integration into existing workflows. - overrides: - parameters: - model: Hermes-3-Llama-3.1-8B.Q4_K_M.gguf - files: - - filename: Hermes-3-Llama-3.1-8B.Q4_K_M.gguf - sha256: d4403ce5a6e930f4c2509456388c20d633a15ff08dd52ef3b142ff1810ec3553 - uri: huggingface://NousResearch/Hermes-3-Llama-3.1-8B-GGUF/Hermes-3-Llama-3.1-8B.Q4_K_M.gguf -- !!merge <<: *hermes-2-pro-mistral - name: "hermes-3-llama-3.1-8b:Q8" - icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/bMcZ3sNNQK8SRZpHXBmwM.jpeg - urls: - - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B - - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B-GGUF - description: | - Hermes 3 is a generalist language model developed by Nous Research. It is an advanced agentic model with improved roleplaying, reasoning, multi-turn conversation, long context coherence, and generalist assistant capabilities. The model is built on top of the Llama-3 architecture and has been fine-tuned to achieve superior performance in various tasks. It is designed to be a powerful and reliable tool for solving complex problems and assisting users in achieving their goals. Hermes 3 can be used for a wide range of applications, including research, education, and personal assistant tasks. It is available on the Hugging Face model hub for easy access and integration into existing workflows. - overrides: - parameters: - model: Hermes-3-Llama-3.1-8B.Q8_0.gguf - files: - - filename: Hermes-3-Llama-3.1-8B.Q8_0.gguf - sha256: c77c263f78b2f56fbaddd3ef2af750fda6ebb4344a546aaa0bfdd546b1ca8d84 - uri: huggingface://NousResearch/Hermes-3-Llama-3.1-8B-GGUF/Hermes-3-Llama-3.1-8B.Q8_0.gguf -- !!merge <<: *hermes-2-pro-mistral - name: "hermes-3-llama-3.1-70b" - icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/vG6j5WxHX09yj32vgjJlI.jpeg - urls: - - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-70B - - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-70B-GGUF - description: | - Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board. It is designed to focus on aligning LLMs to the user, with powerful steering capabilities and control given to the end user. The model uses ChatML as the prompt format, opening up a much more structured system for engaging the LLM in multi-turn chat dialogue. It also supports function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills. - overrides: - parameters: - model: Hermes-3-Llama-3.1-70B.Q4_K_M.gguf - files: - - filename: Hermes-3-Llama-3.1-70B.Q4_K_M.gguf - sha256: 955c2f42caade4278f3c9dbffa32bb74572652b20e49e5340e782de3585bbe3f - uri: huggingface://NousResearch/Hermes-3-Llama-3.1-70B-GGUF/Hermes-3-Llama-3.1-70B.Q4_K_M.gguf -- !!merge <<: *hermes-2-pro-mistral - name: "hermes-3-llama-3.1-70b:Q5_K_M" - icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/vG6j5WxHX09yj32vgjJlI.jpeg - urls: - - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-70B - - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-70B-GGUF - description: | - Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board. It is designed to focus on aligning LLMs to the user, with powerful steering capabilities and control given to the end user. The model uses ChatML as the prompt format, opening up a much more structured system for engaging the LLM in multi-turn chat dialogue. It also supports function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills. - overrides: - parameters: - model: Hermes-3-Llama-3.1-70B.Q5_K_M.gguf - files: - - filename: Hermes-3-Llama-3.1-70B.Q5_K_M.gguf - sha256: 10ae3e0441b14c4a6476436f3c14e8bcacc7928aa3e8ce978d053287289a7ebb - uri: huggingface://NousResearch/Hermes-3-Llama-3.1-70B-GGUF/Hermes-3-Llama-3.1-70B.Q5_K_M.gguf - &hermes-vllm url: "github:mudler/LocalAI/gallery/hermes-vllm.yaml@master" name: "hermes-3-llama-3.1-8b:vllm" @@ -20404,40 +19233,6 @@ overrides: parameters: model: NousResearch/Hermes-3-Llama-3.1-405B -- !!merge <<: *hermes-2-pro-mistral - name: "biomistral-7b" - description: | - BioMistral: A Collection of Open-Source Pretrained Large Language Models for Medical Domains - urls: - - https://huggingface.co/MaziyarPanahi/BioMistral-7B-GGUF - icon: https://huggingface.co/BioMistral/BioMistral-7B/resolve/main/wordart_blue_m_rectangle.png?download=true - overrides: - parameters: - model: BioMistral-7B.Q4_K_M.gguf - files: - - filename: "BioMistral-7B.Q4_K_M.gguf" - sha256: "3a73107045dfe7e3f113b392b0a67e3e6ca9fa9dae2abe301424ce5abd1721a6" - uri: "huggingface://MaziyarPanahi/BioMistral-7B-GGUF/BioMistral-7B.Q4_K_M.gguf" -- !!merge <<: *hermes-2-pro-mistral - name: "tiamat-8b-1.2-llama-3-dpo" - icon: https://huggingface.co/Gryphe/Tiamat-8b-1.2-Llama-3-DPO/resolve/main/Tiamat.png - description: | - Obligatory Disclaimer: Tiamat is not nice. - - Ever wanted to be treated disdainfully like the foolish mortal you are? Wait no more, for Tiamat is here to berate you! Hailing from the world of the Forgotten Realms, she will happily judge your every word. - - Tiamat was created with the following question in mind; Is it possible to create an assistant with strong anti-assistant personality traits? Try it yourself and tell me afterwards! - - She was fine-tuned on top of Nous Research's shiny new Hermes 2 Pro. - urls: - - https://huggingface.co/bartowski/Tiamat-8b-1.2-Llama-3-DPO-GGUF - overrides: - parameters: - model: Tiamat-8b-1.2-Llama-3-DPO-Q4_K_M.gguf - files: - - filename: "Tiamat-8b-1.2-Llama-3-DPO-Q4_K_M.gguf" - sha256: "7b0895d2183344b2ac1ff36b9f3fe31dd8d4cf8820c4a41ef74e50ef86e3b448" - uri: "huggingface://bartowski/Tiamat-8b-1.2-Llama-3-DPO-GGUF/Tiamat-8b-1.2-Llama-3-DPO-Q4_K_M.gguf" - url: "github:mudler/LocalAI/gallery/chatml.yaml@master" name: "guillaumetell-7b" license: apache-2 @@ -20461,23 +19256,6 @@ - filename: guillaumetell-7b.Q4_K_M.gguf sha256: bf08db5281619335f3ee87e229c8533b04262790063b061bb8f275c3e4de7061 uri: huggingface://MaziyarPanahi/guillaumetell-7b-GGUF/guillaumetell-7b.Q4_K_M.gguf -- !!merge <<: *hermes-2-pro-mistral - name: "kunocchini-7b-128k-test-imatrix" - description: | - The following models were included in the merge: - - SanjiWatsuki/Kunoichi-DPO-v2-7B - Epiculous/Fett-uccine-Long-Noodle-7B-120k-Contex - urls: - - https://huggingface.co/Lewdiculous/Kunocchini-7b-128k-test-GGUF-Imatrix - icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/9obNSalcJqCilQwr_4ssM.jpeg - overrides: - parameters: - model: v2_Kunocchini-7b-128k-test-Q4_K_M-imatrix.gguf - files: - - filename: "v2_Kunocchini-7b-128k-test-Q4_K_M-imatrix.gguf" - sha256: "5ccec35392f56f66952f8eb2ded2d8aa9a6bb511e9518899d8096326e328edef" - uri: "huggingface://Lewdiculous/Kunocchini-7b-128k-test-GGUF-Imatrix/v2_Kunocchini-7b-128k-test-Q4_K_M-imatrix.gguf" ### START Cerbero - url: "github:mudler/LocalAI/gallery/cerbero.yaml@master" icon: https://huggingface.co/galatolo/cerbero-7b/resolve/main/README.md.d/cerbero.png @@ -22060,19 +20838,6 @@ - filename: tlacuilo-12b-q4_k_m.gguf sha256: c362bc081b03a8f4f5dcd27373e9c2b60bdc0d168308ede13c4e282c5ab7fa88 uri: huggingface://Ennthen/Tlacuilo-12B-Q4_K_M-GGUF/tlacuilo-12b-q4_k_m.gguf -- !!merge <<: *hermes-2-pro-mistral - name: "hermespersona-24b" - urls: - - https://huggingface.co/mradermacher/HermesPersona-24B-GGUF - description: | - **HermesPersona-24B** is a large language model developed by merging two personality-focused 24B-parameter models—*DeepPersonalityDangerousDolphin-24B* and *Dans-PersonalityEngine-V1.3.0-24b*—using the **LazyMergekit** framework. The merge employs an *arcee_fusion* method with **bfloat16** precision, resulting in a model optimized for expressive, nuanced, and human-like conversational behavior. Ideal for roleplay, storytelling, and emotionally intelligent dialogue, HermesPersona-24B excels at maintaining consistent character personas across interactions. It is designed for use with Hugging Face's `transformers` library and supports both inference and fine-tuning workflows. This model is a high-fidelity, open-source alternative for applications requiring deep personality modeling and rich narrative engagement. - overrides: - parameters: - model: HermesPersona-24B.Q4_K_S.gguf - files: - - filename: HermesPersona-24B.Q4_K_S.gguf - sha256: 9f4c910d76c5d94ad81ea9f5e9d486ec3a89ce982d01b77f6b8c4d3b0c1cd3a6 - uri: huggingface://mradermacher/HermesPersona-24B-GGUF/HermesPersona-24B.Q4_K_S.gguf - !!merge <<: *qwen3 name: "qwen3-tnd-double-deckard-a-c-11b-220-i1" urls: @@ -22242,28 +21007,6 @@ - filename: Aevum-0.6B-Finetuned.Q4_K_M.gguf sha256: 6904b789894a7dae459042a28318e70dbe222cb3e6f892f3fc42e591d4a341a3 uri: huggingface://mradermacher/Aevum-0.6B-Finetuned-GGUF/Aevum-0.6B-Finetuned.Q4_K_M.gguf -- !!merge <<: *hermes-2-pro-mistral - name: "tlacuilo-12b-i1" - urls: - - https://huggingface.co/mradermacher/Tlacuilo-12B-i1-GGUF - description: | - **Tlacuilo-12B** is a high-quality, instruction-tuned language model designed for creative and roleplay writing. Built on the foundation of **Mistral-Nemo-Base-2407** and **Muse-12B**, it excels in narrative generation, storytelling, and interactive dialogue, with notable improvements in prose style and consistency over previous versions. - - Trained using a multi-stage LoRA process: - - **Stage 1**: Fine-tuned on literary texts (28M tokens/epoch) to enhance stylistic richness. - - **Stage 2**: Optimized for roleplay using RP datasets (4M tokens), improving character and scenario handling. - - **Stage 3**: Instruct-tuned on curated data (1.2M tokens) to ensure strong response quality and alignment. - - The model uses **ChatML** formatting and performs best at moderate temperature (1.0–1.3) with low min-p values. Ideal for writers, game masters, and creative professionals seeking expressive, coherent, and imaginative text generation. - - > **Note**: The GGUF versions in `mradermacher/Tlacuilo-12B-i1-GGUF` are quantized derivatives. The original, full-precision model is hosted at `allura-org/Tlacuilo-12B`. - overrides: - parameters: - model: Tlacuilo-12B.i1-Q4_K_M.gguf - files: - - filename: Tlacuilo-12B.i1-Q4_K_M.gguf - sha256: 94218112aa02113c8e21cd2c1d10818bea39bc6aee7e67be6014f86e80e76cb1 - uri: huggingface://mradermacher/Tlacuilo-12B-i1-GGUF/Tlacuilo-12B.i1-Q4_K_M.gguf - !!merge <<: *qwen25 name: "positivedetox-qwen2.5-14b" urls: