models(gallery): add phi-3 vision (#3890 )

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
chore: ⬆️ Update ggerganov/llama.cpp to 45f097645efb11b6d09a5b4adbbfd7c312ac0126 (#3889 )
2026-02-03 19:22:39 -05:00 · 2024-10-21 11:47:52 +02:00 · 2024-10-20 21:40:26 +00:00 · 2024-10-20 11:44:28 +02:00 · 2024-10-20 00:26:49 +02:00 · 2024-10-19 23:04:42 +02:00
91 changed files with 822 additions and 160 deletions
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -9,6 +9,8 @@ updates:
    directory: "/"
    schedule:
      interval: "weekly"
+    ignore:
+    - dependency-name: "github.com/mudler/LocalAI/pkg/grpc/proto"
  - package-ecosystem: "github-actions"
    # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
    directory: "/"
--- a/.github/workflows/notify-models.yaml
+++ b/.github/workflows/notify-models.yaml
@@ -79,7 +79,7 @@ jobs:
        args: ${{ steps.summarize.outputs.message }}
    - name: Setup tmate session if fails
      if: ${{ failure() }}
-      uses: mxschmitt/action-tmate@v3.18
+      uses: mxschmitt/action-tmate@v3.19
      with:
        detached: true
        connect-timeout-seconds: 180
@@ -161,7 +161,7 @@ jobs:
        TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
    - name: Setup tmate session if fails
      if: ${{ failure() }}
-      uses: mxschmitt/action-tmate@v3.18
+      uses: mxschmitt/action-tmate@v3.19
      with:
        detached: true
        connect-timeout-seconds: 180
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -123,7 +123,7 @@ jobs:
            release/*
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.18
+        uses: mxschmitt/action-tmate@v3.19
        with:
          detached: true
          connect-timeout-seconds: 180
@@ -232,7 +232,7 @@ jobs:
            release/*
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.18
+        uses: mxschmitt/action-tmate@v3.19
        with:
          detached: true
          connect-timeout-seconds: 180
@@ -308,7 +308,7 @@ jobs:
            release/*
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.18
+        uses: mxschmitt/action-tmate@v3.19
        with:
          detached: true
          connect-timeout-seconds: 180
@@ -350,7 +350,7 @@ jobs:
            release/*
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.18
+        uses: mxschmitt/action-tmate@v3.19
        with:
          detached: true
          connect-timeout-seconds: 180
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -133,7 +133,7 @@ jobs:
          PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.18
+        uses: mxschmitt/action-tmate@v3.19
        with:
          detached: true
          connect-timeout-seconds: 180
@@ -197,7 +197,7 @@ jobs:
            make run-e2e-aio
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.18
+        uses: mxschmitt/action-tmate@v3.19
        with:
          detached: true
          connect-timeout-seconds: 180
@@ -235,7 +235,7 @@ jobs:
          BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.18
+        uses: mxschmitt/action-tmate@v3.19
        with:
          detached: true
          connect-timeout-seconds: 180
--- a/4
+++ b/4
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=96776405a17034dcfd53d3ddf5d142d34bdbb657
+CPPLLAMA_VERSION?=45f097645efb11b6d09a5b4adbbfd7c312ac0126

 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
@@ -16,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6

 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=fdbfb460ed546452a5d53611bba66d10d842e719
+WHISPER_CPP_VERSION?=a5abfe6a90495f7bf19fe70d016ecc255e97359c

 # bert.cpp version
 BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
--- a/README.md
+++ b/README.md
@@ -66,6 +66,21 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
 # docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12 
 ```

+To load models:
+
+```bash
+# From the model gallery (see available models with `local-ai models list`, in the WebUI from the model tab, or visiting https://models.localai.io)
+local-ai run llama-3.2-1b-instruct:q4_k_m
+# Start LocalAI with the phi-2 model directly from huggingface
+local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
+# Install and run a model from the Ollama OCI registry
+local-ai run ollama://gemma:2b
+# Run a model from a configuration file
+local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
+# Install and run a model from a standard OCI registry (e.g., Docker Hub)
+local-ai run oci://localai/phi-2:latest
+```
+
 [💻 Getting started](https://localai.io/basics/getting_started/index.html)

 ## 📰 Latest project news
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -391,6 +391,39 @@ struct llama_metrics {
    }
 };

+struct llava_embd_batch {
+    std::vector<llama_pos>      pos;
+    std::vector<int32_t>        n_seq_id;
+    std::vector<llama_seq_id>   seq_id_0;
+    std::vector<llama_seq_id *> seq_ids;
+    std::vector<int8_t>         logits;
+    llama_batch batch;
+    llava_embd_batch(float * embd, int32_t n_tokens, llama_pos pos_0, llama_seq_id seq_id) {
+        pos     .resize(n_tokens);
+        n_seq_id.resize(n_tokens);
+        seq_ids .resize(n_tokens + 1);
+        logits  .resize(n_tokens);
+        seq_id_0.resize(1);
+        seq_id_0[0] = seq_id;
+        seq_ids [n_tokens] = nullptr;
+        batch = {
+            /*n_tokens       =*/ n_tokens,
+            /*tokens         =*/ nullptr,
+            /*embd           =*/ embd,
+            /*pos            =*/ pos.data(),
+            /*n_seq_id       =*/ n_seq_id.data(),
+            /*seq_id         =*/ seq_ids.data(),
+            /*logits         =*/ logits.data(),
+        };
+        for (int i = 0; i < n_tokens; i++) {
+            batch.pos     [i] = pos_0 + i;
+            batch.n_seq_id[i] = 1;
+            batch.seq_id  [i] = seq_id_0.data();
+            batch.logits  [i] = false;
+        }
+    }
+};
+
 struct llama_server_context
 {
    llama_model *model = nullptr;
@@ -934,7 +967,6 @@ struct llama_server_context
                    batch.n_seq_id + i,
                    batch.seq_id   + i,
                    batch.logits   + i,
-                    0, 0, 0, // unused
                };
                if (llama_decode(ctx, batch_view) != 0)
                {
@@ -1379,7 +1411,6 @@ struct llama_server_context
                    batch.n_seq_id + i,
                    batch.seq_id   + i,
                    batch.logits   + i,
-                    0, 0, 0, // unused
                };
                if (llama_decode(ctx, batch_view))
                {
@@ -1398,8 +1429,9 @@ struct llama_server_context
                }

                const int n_embd = llama_n_embd(model);
-                llama_batch batch_img = { n_eval, nullptr, (img.image_embedding + i * n_embd), nullptr, nullptr, nullptr, nullptr, slot.n_past, 1, 0, };
-                if (llama_decode(ctx, batch_img))
+                float * embd = img.image_embedding + i * n_embd;
+                llava_embd_batch llava_batch = llava_embd_batch(embd, n_eval, slot.n_past, 0);
+                if (llama_decode(ctx, llava_batch.batch))
                {
                    LOG("%s : failed to eval image\n", __func__);
                    return false;
@@ -1904,7 +1936,6 @@ struct llama_server_context
                batch.n_seq_id + i,
                batch.seq_id   + i,
                batch.logits   + i,
-                0, 0, 0, // unused
            };

            const int ret = llama_decode(ctx, batch_view);
--- a/backend/python/autogptq/requirements-cublas11.txt
+++ b/backend/python/autogptq/requirements-cublas11.txt
@@ -1,2 +1,2 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
+torch==2.4.1+cu118
--- a/backend/python/autogptq/requirements-cublas12.txt
+++ b/backend/python/autogptq/requirements-cublas12.txt
@@ -1 +1 @@
-torch
+torch==2.4.1
--- a/backend/python/autogptq/requirements-hipblas.txt
+++ b/backend/python/autogptq/requirements-hipblas.txt
@@ -1,2 +1,2 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
+torch==2.4.1+rocm6.0
--- a/backend/python/autogptq/requirements.txt
+++ b/backend/python/autogptq/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 auto-gptq==0.7.1
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 certifi
 transformers
--- a/backend/python/bark/requirements-cpu.txt
+++ b/backend/python/bark/requirements-cpu.txt
@@ -1,4 +1,4 @@
 transformers
 accelerate
-torch
-torchaudio
+torch==2.4.1
+torchaudio==2.4.1
--- a/backend/python/bark/requirements-cublas11.txt
+++ b/backend/python/bark/requirements-cublas11.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
-torchaudio
+torch==2.4.1+cu118
+torchaudio==2.4.1+cu118
 transformers
 accelerate
--- a/backend/python/bark/requirements-cublas12.txt
+++ b/backend/python/bark/requirements-cublas12.txt
@@ -1,4 +1,4 @@
-torch
-torchaudio
+torch==2.4.1
+torchaudio==2.4.1
 transformers
 accelerate
--- a/backend/python/bark/requirements-hipblas.txt
+++ b/backend/python/bark/requirements-hipblas.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
-torchaudio
+torch==2.4.1+rocm6.0
+torchaudio==2.4.1+rocm6.0
 transformers
 accelerate
--- a/backend/python/bark/requirements.txt
+++ b/backend/python/bark/requirements.txt
@@ -1,4 +1,4 @@
 bark==0.1.5
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 certifi
--- a/backend/python/common/template/requirements.txt
+++ b/backend/python/common/template/requirements.txt
@@ -1,2 +1,2 @@
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
--- a/backend/python/coqui/requirements-cpu.txt
+++ b/backend/python/coqui/requirements-cpu.txt
@@ -1,3 +1,4 @@
 transformers
 accelerate
-torch
+torch==2.4.1
+coqui-tts
--- a/backend/python/coqui/requirements-cublas11.txt
+++ b/backend/python/coqui/requirements-cublas11.txt
@@ -1,5 +1,6 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
-torchaudio
+torch==2.4.1+cu118
+torchaudio==2.4.1+cu118
 transformers
-accelerate
+accelerate
+coqui-tts
--- a/backend/python/coqui/requirements-cublas12.txt
+++ b/backend/python/coqui/requirements-cublas12.txt
@@ -1,4 +1,5 @@
-torch
-torchaudio
+torch==2.4.1
+torchaudio==2.4.1
 transformers
-accelerate
+accelerate
+coqui-tts
--- a/backend/python/coqui/requirements-hipblas.txt
+++ b/backend/python/coqui/requirements-hipblas.txt
@@ -1,5 +1,6 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
-torchaudio
+torch==2.4.1+rocm6.0
+torchaudio==2.4.1+rocm6.0
 transformers
-accelerate
+accelerate
+coqui-tts
--- a/backend/python/coqui/requirements-intel.txt
+++ b/backend/python/coqui/requirements-intel.txt
@@ -5,4 +5,5 @@ torchaudio
 optimum[openvino]
 setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
 transformers
-accelerate
+accelerate
+coqui-tts
--- a/backend/python/coqui/requirements.txt
+++ b/backend/python/coqui/requirements.txt
@@ -1,4 +1,4 @@
-coqui-tts
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
-certifi
+certifi
+packaging==24.1
--- a/backend/python/diffusers/requirements-cpu.txt
+++ b/backend/python/diffusers/requirements-cpu.txt
@@ -5,5 +5,5 @@ accelerate
 compel
 peft
 sentencepiece
-torch
+torch==2.4.1
 optimum-quanto
--- a/backend/python/diffusers/requirements-cublas11.txt
+++ b/backend/python/diffusers/requirements-cublas11.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
+torch==2.4.1+cu118
 diffusers
 opencv-python
 transformers
--- a/backend/python/diffusers/requirements-cublas12.txt
+++ b/backend/python/diffusers/requirements-cublas12.txt
@@ -1,4 +1,4 @@
-torch
+torch==2.4.1
 diffusers
 opencv-python
 transformers
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -1,5 +1,5 @@
 setuptools
-grpcio==1.66.2
+grpcio==1.67.0
 pillow
 protobuf
 certifi
--- a/backend/python/exllama2/requirements-cpu.txt
+++ b/backend/python/exllama2/requirements-cpu.txt
@@ -1,3 +1,3 @@
 transformers
 accelerate
-torch
+torch==2.4.1
--- a/backend/python/exllama2/requirements-cublas11.txt
+++ b/backend/python/exllama2/requirements-cublas11.txt
@@ -1,4 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
+torch==2.4.1+cu118
 transformers
 accelerate
--- a/backend/python/exllama2/requirements-cublas12.txt
+++ b/backend/python/exllama2/requirements-cublas12.txt
@@ -1,3 +1,3 @@
-torch
+torch==2.4.1
 transformers
 accelerate
--- a/backend/python/exllama2/requirements.txt
+++ b/backend/python/exllama2/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 certifi
 wheel
--- a/backend/python/mamba/requirements-cpu.txt
+++ b/backend/python/mamba/requirements-cpu.txt
@@ -1,2 +1,2 @@
-torch
+torch==2.4.1
 transformers
--- a/backend/python/mamba/requirements-cublas11.txt
+++ b/backend/python/mamba/requirements-cublas11.txt
@@ -1,3 +1,3 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
+torch==2.4.1+cu118
 transformers
--- a/backend/python/mamba/requirements-cublas12.txt
+++ b/backend/python/mamba/requirements-cublas12.txt
@@ -1,2 +1,2 @@
-torch
+torch==2.4.1
 transformers
--- a/backend/python/mamba/requirements.txt
+++ b/backend/python/mamba/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 certifi
--- a/backend/python/openvoice/requirements-cpu.txt
+++ b/backend/python/openvoice/requirements-cpu.txt
@@ -1 +1,3 @@
-torch
+torch==2.4.1
+git+https://github.com/myshell-ai/MeloTTS.git
+git+https://github.com/myshell-ai/OpenVoice.git
--- a/backend/python/openvoice/requirements-cublas11.txt
+++ b/backend/python/openvoice/requirements-cublas11.txt
@@ -1,2 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
+torch==2.4.1+cu118
+git+https://github.com/myshell-ai/MeloTTS.git
+git+https://github.com/myshell-ai/OpenVoice.git
--- a/backend/python/openvoice/requirements-cublas12.txt
+++ b/backend/python/openvoice/requirements-cublas12.txt
@@ -1 +1,3 @@
-torch
+torch==2.4.1
+git+https://github.com/myshell-ai/MeloTTS.git
+git+https://github.com/myshell-ai/OpenVoice.git
--- a/backend/python/openvoice/requirements-hipblas.txt
+++ b/backend/python/openvoice/requirements-hipblas.txt
@@ -1,2 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
+torch==2.4.1+rocm6.0
+git+https://github.com/myshell-ai/MeloTTS.git
+git+https://github.com/myshell-ai/OpenVoice.git
--- a/backend/python/openvoice/requirements-intel.txt
+++ b/backend/python/openvoice/requirements-intel.txt
@@ -2,22 +2,22 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 librosa==0.9.1
-faster-whisper==1.0.3
+faster-whisper==0.9.0
 pydub==0.25.1
 wavmark==0.0.3
-numpy==1.26.4
+numpy==1.22.0
 eng_to_ipa==0.0.2
 inflect==7.0.0
 unidecode==1.3.7
-whisper-timestamped==1.15.4
+whisper-timestamped==1.14.2
 openai
 python-dotenv
 pypinyin==0.50.0
 cn2an==0.5.22
 jieba==0.42.1
-gradio==4.44.1
 langid==1.1.6
 git+https://github.com/myshell-ai/MeloTTS.git
+git+https://github.com/myshell-ai/OpenVoice.git
--- a/backend/python/openvoice/requirements.txt
+++ b/backend/python/openvoice/requirements.txt
@@ -1,10 +1,10 @@
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 librosa
 faster-whisper
 pydub==0.25.1
 wavmark==0.0.3
-numpy
+numpy==1.22.0
 eng_to_ipa==0.0.2
 inflect
 unidecode
@@ -13,8 +13,8 @@ openai
 python-dotenv
 pypinyin
 cn2an==0.5.22
+networkx==2.8.8
 jieba==0.42.1
-gradio
+gradio==3.48.0
 langid==1.1.6
-git+https://github.com/myshell-ai/MeloTTS.git
-git+https://github.com/myshell-ai/OpenVoice.git
+llvmlite==0.43.0
--- a/backend/python/parler-tts/requirements-cpu.txt
+++ b/backend/python/parler-tts/requirements-cpu.txt
@@ -1,3 +1,3 @@
 transformers
 accelerate
-torch
+torch==2.4.1
--- a/backend/python/parler-tts/requirements-cublas11.txt
+++ b/backend/python/parler-tts/requirements-cublas11.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
-torchaudio
+torch==2.4.1+cu118
+torchaudio==2.4.1+cu118
 transformers
 accelerate
--- a/backend/python/parler-tts/requirements-cublas12.txt
+++ b/backend/python/parler-tts/requirements-cublas12.txt
@@ -1,4 +1,4 @@
-torch
-torchaudio
+torch==2.4.1
+torchaudio==2.4.1
 transformers
 accelerate
--- a/backend/python/parler-tts/requirements.txt
+++ b/backend/python/parler-tts/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 certifi
 llvmlite==0.43.0
--- a/backend/python/rerankers/requirements-cpu.txt
+++ b/backend/python/rerankers/requirements-cpu.txt
@@ -1,4 +1,4 @@
 transformers
 accelerate
-torch
+torch==2.4.1
 rerankers[transformers]
--- a/backend/python/rerankers/requirements-cublas11.txt
+++ b/backend/python/rerankers/requirements-cublas11.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 transformers
 accelerate
-torch
+torch==2.4.1+cu118
 rerankers[transformers]
--- a/backend/python/rerankers/requirements-cublas12.txt
+++ b/backend/python/rerankers/requirements-cublas12.txt
@@ -1,4 +1,4 @@
 transformers
 accelerate
-torch
+torch==2.4.1
 rerankers[transformers]
--- a/backend/python/rerankers/requirements-hipblas.txt
+++ b/backend/python/rerankers/requirements-hipblas.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 transformers
 accelerate
-torch
+torch==2.4.1+rocm6.0
 rerankers[transformers]
--- a/backend/python/rerankers/requirements.txt
+++ b/backend/python/rerankers/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 certifi
--- a/backend/python/sentencetransformers/requirements-cpu.txt
+++ b/backend/python/sentencetransformers/requirements-cpu.txt
@@ -1,6 +1,6 @@
-torch
+torch==2.4.1
 accelerate
 transformers
 bitsandbytes
-sentence-transformers==3.1.1
+sentence-transformers==3.2.0
 transformers
--- a/backend/python/sentencetransformers/requirements-cublas11.txt
+++ b/backend/python/sentencetransformers/requirements-cublas11.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
+torch==2.4.1+cu118
 accelerate
-sentence-transformers==3.1.1
+sentence-transformers==3.2.0
 transformers
--- a/backend/python/sentencetransformers/requirements-cublas12.txt
+++ b/backend/python/sentencetransformers/requirements-cublas12.txt
@@ -1,4 +1,4 @@
-torch
+torch==2.4.1
 accelerate
-sentence-transformers==3.1.1
+sentence-transformers==3.2.0
 transformers
--- a/backend/python/sentencetransformers/requirements-hipblas.txt
+++ b/backend/python/sentencetransformers/requirements-hipblas.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
+torch==2.4.1+rocm6.0
 accelerate
-sentence-transformers==3.1.1
+sentence-transformers==3.2.0
 transformers
--- a/backend/python/sentencetransformers/requirements-intel.txt
+++ b/backend/python/sentencetransformers/requirements-intel.txt
@@ -4,5 +4,5 @@ torch
 optimum[openvino]
 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
 accelerate
-sentence-transformers==3.1.1
+sentence-transformers==3.2.0
 transformers
--- a/backend/python/sentencetransformers/requirements.txt
+++ b/backend/python/sentencetransformers/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 certifi
 datasets
--- a/backend/python/transformers-musicgen/requirements-cpu.txt
+++ b/backend/python/transformers-musicgen/requirements-cpu.txt
@@ -1,3 +1,3 @@
 transformers
 accelerate
-torch
+torch==2.4.1
--- a/backend/python/transformers-musicgen/requirements-cublas11.txt
+++ b/backend/python/transformers-musicgen/requirements-cublas11.txt
@@ -1,4 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 transformers
 accelerate
-torch
+torch==2.4.1+cu118
--- a/backend/python/transformers-musicgen/requirements-cublas12.txt
+++ b/backend/python/transformers-musicgen/requirements-cublas12.txt
@@ -1,3 +1,3 @@
 transformers
 accelerate
-torch
+torch==2.4.1
--- a/backend/python/transformers-musicgen/requirements-hipblas.txt
+++ b/backend/python/transformers-musicgen/requirements-hipblas.txt
@@ -1,4 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 transformers
 accelerate
-torch
+torch==2.4.1+rocm6.0
--- a/backend/python/transformers-musicgen/requirements.txt
+++ b/backend/python/transformers-musicgen/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 scipy==1.14.0
 certifi
--- a/backend/python/transformers/requirements-cpu.txt
+++ b/backend/python/transformers/requirements-cpu.txt
@@ -1,4 +1,4 @@
-torch
+torch==2.4.1
 accelerate
 transformers
 bitsandbytes
--- a/backend/python/transformers/requirements-cublas11.txt
+++ b/backend/python/transformers/requirements-cublas11.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
+torch==2.4.1+cu118
 accelerate
 transformers
 bitsandbytes
--- a/backend/python/transformers/requirements-cublas12.txt
+++ b/backend/python/transformers/requirements-cublas12.txt
@@ -1,4 +1,4 @@
-torch
+torch==2.4.1
 accelerate
 transformers
 bitsandbytes
--- a/backend/python/transformers/requirements-hipblas.txt
+++ b/backend/python/transformers/requirements-hipblas.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
+torch==2.4.1+rocm6.0
 accelerate
 transformers
 bitsandbytes
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 certifi
 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/vall-e-x/requirements-cpu.txt
+++ b/backend/python/vall-e-x/requirements-cpu.txt
@@ -1,3 +1,3 @@
 accelerate
-torch
-torchaudio
+torch==2.4.1
+torchaudio==2.4.1
--- a/backend/python/vall-e-x/requirements-cublas11.txt
+++ b/backend/python/vall-e-x/requirements-cublas11.txt
@@ -1,4 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 accelerate
-torch
-torchaudio
+torch==2.4.1+cu118
+torchaudio==2.4.1+cu118
--- a/backend/python/vall-e-x/requirements-cublas12.txt
+++ b/backend/python/vall-e-x/requirements-cublas12.txt
@@ -1,3 +1,3 @@
 accelerate
-torch
-torchaudio
+torch==2.4.1
+torchaudio==2.4.1
--- a/backend/python/vall-e-x/requirements.txt
+++ b/backend/python/vall-e-x/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 certifi
--- a/backend/python/vllm/backend.py
+++ b/backend/python/vllm/backend.py
@@ -19,6 +19,8 @@ from vllm.utils import random_uuid
 from vllm.transformers_utils.tokenizer import get_tokenizer
 from vllm.multimodal.utils import fetch_image
 from vllm.assets.video import VideoAsset
+import base64
+import io

 _ONE_DAY_IN_SECONDS = 60 * 60 * 24

@@ -217,13 +219,15 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
        # Generate text using the LLM engine
        request_id = random_uuid()
        print(f"Generating text with request_id: {request_id}", file=sys.stderr)
+        multi_modal_data = {}
+        if image_data:
+            multi_modal_data["image"] = image_data
+        if video_data:
+            multi_modal_data["video"] = video_data
        outputs = self.llm.generate(
            {
-                "prompt": prompt,
-                "multi_modal_data": {
-                    "image": image_data if image_data else None,
-                    "video": video_data if video_data else None,
-                } if image_data or video_data else None,
+            "prompt": prompt,
+            "multi_modal_data": multi_modal_data if multi_modal_data else None,
            },
            sampling_params=sampling_params,
            request_id=request_id,
@@ -262,19 +266,22 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):

    def load_image(self, image_path: str):
        """
-        Load an image from the given file path.
+        Load an image from the given file path or base64 encoded data.
        
        Args:
-            image_path (str): The path to the image file.
+            image_path (str): The path to the image file or base64 encoded data.

        Returns:
            Image: The loaded image.
        """
        try:
-            return Image.open(image_path)
+
+            image_data = base64.b64decode(image_path)
+            image = Image.open(io.BytesIO(image_data))
+            return image
        except Exception as e:
            print(f"Error loading image {image_path}: {e}", file=sys.stderr)
-            return self.load_video(image_path)
+            return None

    def load_video(self, video_path: str):
        """
@@ -287,10 +294,15 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            Video: The loaded video.
        """
        try:
-            video = VideoAsset(name=video_path).np_ndarrays
+            timestamp = str(int(time.time() * 1000))  # Generate timestamp
+            p = f"/tmp/vl-{timestamp}.data"  # Use timestamp in filename
+            with open(p, "wb") as f:
+                f.write(base64.b64decode(video_path))
+            video = VideoAsset(name=p).np_ndarrays
+            os.remove(p)
            return video
        except Exception as e:
-            print(f"Error loading video {image_path}: {e}", file=sys.stderr)
+            print(f"Error loading video {video_path}: {e}", file=sys.stderr)
            return None

 async def serve(address):
--- a/backend/python/vllm/install.sh
+++ b/backend/python/vllm/install.sh
@@ -22,7 +22,7 @@ if [ "x${BUILD_TYPE}" == "x" ] && [ "x${FROM_SOURCE}" == "xtrue" ]; then
            git clone https://github.com/vllm-project/vllm
        fi
        pushd vllm
-            uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.66.2 protobuf bitsandbytes
+            uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.67.0 protobuf bitsandbytes
            uv pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
            VLLM_TARGET_DEVICE=cpu python setup.py install
        popd
--- a/backend/python/vllm/requirements-cpu.txt
+++ b/backend/python/vllm/requirements-cpu.txt
@@ -1,3 +1,3 @@
 accelerate
-torch
+torch==2.4.1
 transformers
--- a/backend/python/vllm/requirements-cublas11.txt
+++ b/backend/python/vllm/requirements-cublas11.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 accelerate
-torch
+torch==2.4.1+cu118
 transformers
 bitsandbytes
--- a/backend/python/vllm/requirements-cublas12.txt
+++ b/backend/python/vllm/requirements-cublas12.txt
@@ -1,4 +1,4 @@
 accelerate
-torch
+torch==2.4.1
 transformers
 bitsandbytes
--- a/backend/python/vllm/requirements-hipblas.txt
+++ b/backend/python/vllm/requirements-hipblas.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 accelerate
-torch
+torch==2.4.1+rocm6.0
 transformers
 bitsandbytes
--- a/backend/python/vllm/requirements.txt
+++ b/backend/python/vllm/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 certifi
 setuptools
--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@@ -2,6 +2,7 @@ package backend

 import (
 	"context"
+	"encoding/json"
 	"fmt"
 	"os"
 	"regexp"
@@ -77,6 +78,16 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
 			switch ct := message.Content.(type) {
 			case string:
 				protoMessages[i].Content = ct
+			case []interface{}:
+				// If using the tokenizer template, in case of multimodal we want to keep the multimodal content as and return only strings here
+				data, _ := json.Marshal(ct)
+				resultData := []struct {
+					Text string `json:"text"`
+				}{}
+				json.Unmarshal(data, &resultData)
+				for _, r := range resultData {
+					protoMessages[i].Content += r.Text
+				}
 			default:
 				return nil, fmt.Errorf("unsupported type for schema.Message.Content for inference: %T", ct)
 			}
--- a/docs/content/docs/integrations.md
+++ b/docs/content/docs/integrations.md
@@ -28,5 +28,6 @@ The list below is a list of software that integrates with LocalAI.
 - https://github.com/cedriking/spark
 - [Big AGI](https://github.com/enricoros/big-agi) is a powerful web interface entirely running in the browser, supporting LocalAI
 - [Midori AI Subsystem Manager](https://io.midori-ai.xyz/subsystem/manager/) is a powerful docker subsystem for running all types of AI programs
+- [LLPhant](https://github.com/theodo-group/LLPhant) is a PHP library for interacting with LLMs and Vector Databases

 Feel free to open up a Pull request (by clicking at the "Edit page" below) to get a page for your project made or if you see a error on one of the pages!
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v2.21.1"
+  "version": "v2.22.0"
 }
--- a/docs/themes/hugo-theme-relearn
+++ b/docs/themes/hugo-theme-relearn
--- a/examples/chainlit/requirements.txt
+++ b/examples/chainlit/requirements.txt
@@ -1,4 +1,4 @@
-llama_index==0.11.16
+llama_index==0.11.17
 requests==2.32.3
 weaviate_client==4.8.1
 transformers
--- a/examples/functions/requirements.txt
+++ b/examples/functions/requirements.txt
@@ -1,2 +1,2 @@
-langchain==0.3.2
-openai==1.51.1
+langchain==0.3.3
+openai==1.51.2
--- a/examples/langchain-chroma/requirements.txt
+++ b/examples/langchain-chroma/requirements.txt
@@ -1,4 +1,4 @@
-langchain==0.3.1
-openai==1.51.1
-chromadb==0.5.11
-llama-index==0.11.16
+langchain==0.3.3
+openai==1.51.2
+chromadb==0.5.13
+llama-index==0.11.17
--- a/examples/langchain/PY.Dockerfile
+++ b/examples/langchain/PY.Dockerfile
@@ -1,4 +1,4 @@
-FROM python:3.12-bullseye
+FROM python:3.13-bullseye
 COPY ./langchainpy-localai-example /app
 WORKDIR /app
 RUN pip install --no-cache-dir -r requirements.txt
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -1,24 +1,24 @@
-aiohttp==3.10.9
+aiohttp==3.10.10
 aiosignal==1.3.1
 async-timeout==4.0.3
 attrs==24.2.0
 certifi==2024.8.30
-charset-normalizer==3.3.2
+charset-normalizer==3.4.0
 colorama==0.4.6
 dataclasses-json==0.6.7
-debugpy==1.8.6
+debugpy==1.8.7
 frozenlist==1.4.1
 greenlet==3.1.1
 idna==3.10
-langchain==0.3.2
-langchain-community==0.3.1
+langchain==0.3.3
+langchain-community==0.3.2
 marshmallow==3.22.0
 marshmallow-enum==1.5.1
 multidict==6.1.0
 mypy-extensions==1.0.0
 numexpr==2.10.1
-numpy==2.1.1
-openai==1.51.1
+numpy==2.1.2
+openai==1.51.2
 openapi-schema-pydantic==1.2.4
 packaging>=23.2
 pydantic==2.9.2
@@ -30,4 +30,4 @@ tqdm==4.66.5
 typing-inspect==0.9.0
 typing_extensions==4.12.2
 urllib3==2.2.3
-yarl==1.13.1
+yarl==1.15.2
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -182,6 +182,34 @@
    - filename: Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO.Q4_K_M.gguf
      sha256: 7f45fa79bc6c9847ef9fbad08c3bb5a0f2dbb56d2e2200a5d37b260a57274e55
      uri: huggingface://QuantFactory/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO-GGUF/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO.Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "llama-3.2-chibi-3b"
+  icon: https://huggingface.co/AELLM/Llama-3.2-Chibi-3B/resolve/main/chibi.jpg
+  urls:
+    - https://huggingface.co/AELLM/Llama-3.2-Chibi-3B
+    - https://huggingface.co/mradermacher/Llama-3.2-Chibi-3B-GGUF
+  description: |
+    Small parameter LLMs are ideal for navigating the complexities of the Japanese language, which involves multiple character systems like kanji, hiragana, and katakana, along with subtle social cues. Despite their smaller size, these models are capable of delivering highly accurate and context-aware results, making them perfect for use in environments where resources are constrained. Whether deployed on mobile devices with limited processing power or in edge computing scenarios where fast, real-time responses are needed, these models strike the perfect balance between performance and efficiency, without sacrificing quality or speed.
+  overrides:
+    parameters:
+      model: Llama-3.2-Chibi-3B.Q4_K_M.gguf
+  files:
+    - filename: Llama-3.2-Chibi-3B.Q4_K_M.gguf
+      sha256: 4b594cd5f66181202713f1cf97ce2f86d0acfa1b862a64930d5f512c45640a2f
+      uri: huggingface://mradermacher/Llama-3.2-Chibi-3B-GGUF/Llama-3.2-Chibi-3B.Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "llama-3.2-3b-reasoning-time"
+  urls:
+    - https://huggingface.co/mradermacher/Llama-3.2-3B-Reasoning-Time-GGUF
+  description: |
+    Lyte/Llama-3.2-3B-Reasoning-Time is a large language model with 3.2 billion parameters, designed for reasoning and time-based tasks in English. It is based on the Llama architecture and has been quantized using the GGUF format by mradermacher.
+  overrides:
+    parameters:
+      model: Llama-3.2-3B-Reasoning-Time.Q4_K_M.gguf
+  files:
+    - filename: Llama-3.2-3B-Reasoning-Time.Q4_K_M.gguf
+      sha256: 80b10e1a5c6e27f6d8cf08c3472af2b15a9f63ebf8385eedfe8615f85116c73f
+      uri: huggingface://mradermacher/Llama-3.2-3B-Reasoning-Time-GGUF/Llama-3.2-3B-Reasoning-Time.Q4_K_M.gguf
 - &qwen25
  ## Qwen2.5
  name: "qwen2.5-14b-instruct"
@@ -472,6 +500,134 @@
    - filename: qwen2.5-7b-ins-v3-Q4_K_M.gguf
      sha256: 9c23734072714a4886c0386ae0ff07a5e940d67ad52278e2ed689fec44e1e0c8
      uri: huggingface://bartowski/qwen2.5-7b-ins-v3-GGUF/qwen2.5-7b-ins-v3-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "supernova-medius"
+  urls:
+    - https://huggingface.co/arcee-ai/SuperNova-Medius-GGUF
+  description: |
+    Arcee-SuperNova-Medius is a 14B parameter language model developed by Arcee.ai, built on the Qwen2.5-14B-Instruct architecture. This unique model is the result of a cross-architecture distillation pipeline, combining knowledge from both the Qwen2.5-72B-Instruct model and the Llama-3.1-405B-Instruct model. By leveraging the strengths of these two distinct architectures, SuperNova-Medius achieves high-quality instruction-following and complex reasoning capabilities in a mid-sized, resource-efficient form.
+
+    SuperNova-Medius is designed to excel in a variety of business use cases, including customer support, content creation, and technical assistance, while maintaining compatibility with smaller hardware configurations. It’s an ideal solution for organizations looking for advanced capabilities without the high resource requirements of larger models like our SuperNova-70B.
+  overrides:
+    parameters:
+      model: SuperNova-Medius-Q4_K_M.gguf
+  files:
+    - filename: SuperNova-Medius-Q4_K_M.gguf
+      sha256: aaa4bf3451bc900f186fd4b6b3a6a26bfd40c85908f605db76b92e58aadcc864
+      uri: huggingface://arcee-ai/SuperNova-Medius-GGUF/SuperNova-Medius-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "eva-qwen2.5-14b-v0.1-i1"
+  urls:
+    - https://huggingface.co/EVA-UNIT-01/EVA-Qwen2.5-14B-v0.1
+    - https://huggingface.co/mradermacher/EVA-Qwen2.5-14B-v0.1-i1-GGUF
+  description: |
+    A RP/storywriting specialist model, full-parameter finetune of Qwen2.5-14B on mixture of synthetic and natural data.
+    It uses Celeste 70B 0.1 data mixture, greatly expanding it to improve versatility, creativity and "flavor" of the resulting model.
+  overrides:
+    parameters:
+      model: EVA-Qwen2.5-14B-v0.1.i1-Q4_K_M.gguf
+  files:
+    - filename: EVA-Qwen2.5-14B-v0.1.i1-Q4_K_M.gguf
+      sha256: 4e9665d4f83cd97efb42c8427f9c09be93b72e23a0364c91ad0b5de8056f2795
+      uri: huggingface://mradermacher/EVA-Qwen2.5-14B-v0.1-i1-GGUF/EVA-Qwen2.5-14B-v0.1.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "cursorcore-qw2.5-7b-i1"
+  urls:
+    - https://huggingface.co/TechxGenus/CursorCore-QW2.5-7B
+    - https://huggingface.co/mradermacher/CursorCore-QW2.5-7B-i1-GGUF
+  description: |
+    CursorCore is a series of open-source models designed for AI-assisted programming. It aims to support features such as automated editing and inline chat, replicating the core abilities of closed-source AI-assisted programming tools like Cursor. This is achieved by aligning data generated through Programming-Instruct. Please read our paper to learn more.
+  overrides:
+    parameters:
+      model: CursorCore-QW2.5-7B.i1-Q4_K_M.gguf
+  files:
+    - filename: CursorCore-QW2.5-7B.i1-Q4_K_M.gguf
+      sha256: 81868f4edb4ec1a61debde1dbdebc02b407930ee19a6d946ff801afba840a102
+      uri: huggingface://mradermacher/CursorCore-QW2.5-7B-i1-GGUF/CursorCore-QW2.5-7B.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "cursorcore-qw2.5-1.5b-lc-i1"
+  urls:
+    - https://huggingface.co/TechxGenus/CursorCore-QW2.5-1.5B-LC
+    - https://huggingface.co/mradermacher/CursorCore-QW2.5-1.5B-LC-i1-GGUF
+  description: |
+    CursorCore is a series of open-source models designed for AI-assisted programming. It aims to support features such as automated editing and inline chat, replicating the core abilities of closed-source AI-assisted programming tools like Cursor. This is achieved by aligning data generated through Programming-Instruct. Please read our paper to learn more.
+  overrides:
+    parameters:
+      model: CursorCore-QW2.5-1.5B-LC.i1-Q4_K_M.gguf
+  files:
+    - filename: CursorCore-QW2.5-1.5B-LC.i1-Q4_K_M.gguf
+      sha256: 185d720c810f7345ef861ad8eef1199bb15afa8e4f3c03bd5ffd476cfa465127
+      uri: huggingface://mradermacher/CursorCore-QW2.5-1.5B-LC-i1-GGUF/CursorCore-QW2.5-1.5B-LC.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "edgerunner-command-nested-i1"
+  urls:
+    - https://huggingface.co/edgerunner-ai/EdgeRunner-Command-Nested
+    - https://huggingface.co/mradermacher/EdgeRunner-Command-Nested-i1-GGUF
+  description: |
+    EdgeRunner-Command-Nested is an advanced large language model designed specifically for handling complex nested function calls. Initialized from Qwen2.5-7B-Instruct, further enhanced by the integration of the Hermes function call template and additional training on a specialized dataset (based on TinyAgent). This extra dataset focuses on personal domain applications, providing the model with a robust understanding of nested function scenarios that are typical in complex user interactions.
+  overrides:
+    parameters:
+      model: EdgeRunner-Command-Nested.i1-Q4_K_M.gguf
+  files:
+    - filename: EdgeRunner-Command-Nested.i1-Q4_K_M.gguf
+      sha256: a1cc4d2b601dc20e58cbb549bd3e9bc460995840c0aaf1cd3c1cb5414c900ac7
+      uri: huggingface://mradermacher/EdgeRunner-Command-Nested-i1-GGUF/EdgeRunner-Command-Nested.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "tsunami-0.5x-7b-instruct-i1"
+  icon: https://huggingface.co/Tsunami-th/Tsunami-0.5x-7B-Instruct/resolve/main/Tsunami.webp
+  urls:
+    - https://huggingface.co/Tsunami-th/Tsunami-0.5x-7B-Instruct
+    - https://huggingface.co/mradermacher/Tsunami-0.5x-7B-Instruct-i1-GGUF
+  description: |
+    TSUNAMI: Transformative Semantic Understanding and Natural Augmentation Model for Intelligence.
+
+    TSUNAMI full name was created by ChatGPT.
+    infomation
+
+    Tsunami-0.5x-7B-Instruct is Thai Large Language Model that fine-tuned from Qwen2.5-7B around 100,000 rows in Thai dataset.
+  overrides:
+    parameters:
+      model: Tsunami-0.5x-7B-Instruct.i1-Q4_K_M.gguf
+  files:
+    - filename: Tsunami-0.5x-7B-Instruct.i1-Q4_K_M.gguf
+      sha256: 22e2003ecec7f1e91f2e9aaec334613c0f37fb3000d0e628b5a9980e53322fa7
+      uri: huggingface://mradermacher/Tsunami-0.5x-7B-Instruct-i1-GGUF/Tsunami-0.5x-7B-Instruct.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qevacot-7b-v2"
+  urls:
+    - https://huggingface.co/bunnycore/Qevacot-7B-v2
+    - https://huggingface.co/mradermacher/Qevacot-7B-v2-GGUF
+  description: |
+    This model was merged using the TIES merge method using Qwen/Qwen2.5-7B as a base.
+    The following models were included in the merge:
+        c10x/CoT-2.5
+        EVA-UNIT-01/EVA-Qwen2.5-7B-v0.1
+        huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2
+        Cran-May/T.E-8.1
+  overrides:
+    parameters:
+      model: Qevacot-7B-v2.Q4_K_M.gguf
+  files:
+    - filename: Qevacot-7B-v2.Q4_K_M.gguf
+      sha256: a45b3d3b74bc68a5c7ac07d251cdeff671e64085d1816cd86fca6cfb7eab204e
+      uri: huggingface://mradermacher/Qevacot-7B-v2-GGUF/Qevacot-7B-v2.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "meissa-qwen2.5-7b-instruct"
+  icon: https://huggingface.co/Orion-zhen/Meissa-Qwen2.5-7B-Instruct/resolve/main/meissa.jpg
+  urls:
+    - https://huggingface.co/Orion-zhen/Meissa-Qwen2.5-7B-Instruct
+    - https://huggingface.co/QuantFactory/Meissa-Qwen2.5-7B-Instruct-GGUF
+  description: |
+    Meissa is designated Lambda Orionis, forms Orion's head, and is a multiple star with a combined apparent magnitude of 3.33. Its name means the "shining one".
+    This model is fine tuned over writing and role playing datasets (maybe the first on qwen2.5-7b), aiming to enhance model's performance in novel writing and roleplaying.
+    The model is fine-tuned over Orion-zhen/Qwen2.5-7B-Instruct-Uncensored
+  overrides:
+    parameters:
+      model: Meissa-Qwen2.5-7B-Instruct.Q4_K_M.gguf
+  files:
+    - filename: Meissa-Qwen2.5-7B-Instruct.Q4_K_M.gguf
+      sha256: 632b10d5c0e98bc8d53295886da2d57772a54bb6f6fa01d458e9e8c7fa9c905a
+      uri: huggingface://QuantFactory/Meissa-Qwen2.5-7B-Instruct-GGUF/Meissa-Qwen2.5-7B-Instruct.Q4_K_M.gguf
 - &archfunct
  license: apache-2.0
  tags:
@@ -1412,6 +1568,216 @@
    - filename: NIHAPPY-L3.1-8B-v0.09.Q4_K_M.gguf
      sha256: 9bd46a06093448b143bd2775f0fb1b1b172c851fafdce31289e13b7dfc23a0d7
      uri: huggingface://QuantFactory/NIHAPPY-L3.1-8B-v0.09-GGUF/NIHAPPY-L3.1-8B-v0.09.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama3.1-flammades-70b"
+  icon: https://huggingface.co/flammenai/Flammades-Mistral-7B/resolve/main/flammades.png?download=true
+  urls:
+    - https://huggingface.co/flammenai/Llama3.1-Flammades-70B
+    - https://huggingface.co/mradermacher/Llama3.1-Flammades-70B-GGUF
+  description: |
+    nbeerbower/Llama3.1-Gutenberg-Doppel-70B finetuned on flammenai/Date-DPO-NoAsterisks and jondurbin/truthy-dpo-v0.1.
+  overrides:
+    parameters:
+      model: Llama3.1-Flammades-70B.Q4_K_M.gguf
+  files:
+    - filename: Llama3.1-Flammades-70B.Q4_K_M.gguf
+      sha256: f602ed006d0059ac87c6ce5904a7cc6f4b4f290886a1049f96b5b2c561ab5a89
+      uri: huggingface://mradermacher/Llama3.1-Flammades-70B-GGUF/Llama3.1-Flammades-70B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama3.1-gutenberg-doppel-70b"
+  # chatml
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://huggingface.co/nbeerbower/Mistral-Small-Gutenberg-Doppel-22B/resolve/main/doppel-header?download=true
+  urls:
+    - https://huggingface.co/nbeerbower/Llama3.1-Gutenberg-Doppel-70B
+    - https://huggingface.co/mradermacher/Llama3.1-Gutenberg-Doppel-70B-GGUF
+  description: |
+    mlabonne/Hermes-3-Llama-3.1-70B-lorablated finetuned on jondurbin/gutenberg-dpo-v0.1 and nbeerbower/gutenberg2-dpo.
+  overrides:
+    parameters:
+      model: Llama3.1-Gutenberg-Doppel-70B.Q4_K_M.gguf
+  files:
+    - filename: Llama3.1-Gutenberg-Doppel-70B.Q4_K_M.gguf
+      sha256: af558f954fa26c5bb75352178cb815bbf268f01c0ca0b96f2149422d4c19511b
+      uri: huggingface://mradermacher/Llama3.1-Gutenberg-Doppel-70B-GGUF/Llama3.1-Gutenberg-Doppel-70B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-8b-arliai-formax-v1.0-iq-arm-imatrix"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://iili.io/2HmlLn2.md.png
+  urls:
+    - https://huggingface.co/Lewdiculous/Llama-3.1-8B-ArliAI-Formax-v1.0-GGUF-IQ-ARM-Imatrix
+  description: |
+    Quants for ArliAI/Llama-3.1-8B-ArliAI-Formax-v1.0.
+
+    "Formax is a model that specializes in following response format instructions. Tell it the format of it's response and it will follow it perfectly. Great for data processing and dataset creation tasks."
+
+    "It is also a highly uncensored model that will follow your instructions very well."
+  overrides:
+    parameters:
+      model: Llama-3.1-8B-ArliAI-Formax-v1.0-Q4_K_M-imat.gguf
+  files:
+    - filename: Llama-3.1-8B-ArliAI-Formax-v1.0-Q4_K_M-imat.gguf
+      sha256: b548ad47caf7008a697afb3556190359529f5a05ec0e4e48ef992c7869e14255
+      uri: huggingface://Lewdiculous/Llama-3.1-8B-ArliAI-Formax-v1.0-GGUF-IQ-ARM-Imatrix/Llama-3.1-8B-ArliAI-Formax-v1.0-Q4_K_M-imat.gguf
+- !!merge <<: *llama31
+  name: "hermes-3-llama-3.1-70b-lorablated"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/4Hbw5n68jKUSBQeTqQIeT.png
+  urls:
+    - https://huggingface.co/mlabonne/Hermes-3-Llama-3.1-70B-lorablated
+    - https://huggingface.co/mradermacher/Hermes-3-Llama-3.1-70B-lorablated-GGUF
+  description: |
+    This is an uncensored version of NousResearch/Hermes-3-Llama-3.1-70B using lorablation.
+    The recipe is based on @grimjim's grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter (special thanks):
+    Extraction: We extract a LoRA adapter by comparing two models: a censored Llama 3 (meta-llama/Meta-Llama-3-70B-Instruct) and an abliterated Llama 3.1 (failspy/Meta-Llama-3.1-70B-Instruct-abliterated).
+    Merge: We merge this new LoRA adapter using task arithmetic to the censored NousResearch/Hermes-3-Llama-3.1-70B to abliterate it.
+  overrides:
+    parameters:
+      model: Hermes-3-Llama-3.1-70B-lorablated.Q4_K_M.gguf
+  files:
+    - filename: Hermes-3-Llama-3.1-70B-lorablated.Q4_K_M.gguf
+      sha256: 9294875ae3b8822855072b0f710ce800536d144cf303a91bcb087c4a307b578d
+      uri: huggingface://mradermacher/Hermes-3-Llama-3.1-70B-lorablated-GGUF/Hermes-3-Llama-3.1-70B-lorablated.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "hermes-3-llama-3.1-8b-lorablated"
+  urls:
+    - https://huggingface.co/mlabonne/Hermes-3-Llama-3.1-8B-lorablated-GGUF
+  description: |
+    This is an uncensored version of NousResearch/Hermes-3-Llama-3.1-8B using lorablation.
+    The recipe is simple:
+        Extraction: We extract a LoRA adapter by comparing two models: a censored Llama 3.1 (meta-llama/Meta-Llama-3-8B-Instruct) and an abliterated Llama 3.1 (mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated).
+        Merge: We merge this new LoRA adapter using task arithmetic to the censored NousResearch/Hermes-3-Llama-3.1-8B to abliterate it.
+  overrides:
+    parameters:
+      model: hermes-3-llama-3.1-8b-lorablated.Q4_K_M.gguf
+  files:
+    - filename: hermes-3-llama-3.1-8b-lorablated.Q4_K_M.gguf
+      sha256: 8cff9d399a0583616fe1f290da6daa091ab5c5493d0e173a8fffb45202d79417
+      uri: huggingface://mlabonne/Hermes-3-Llama-3.1-8B-lorablated-GGUF/hermes-3-llama-3.1-8b-lorablated.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "doctoraifinetune-3.1-8b-i1"
+  urls:
+    - https://huggingface.co/huzaifa525/Doctoraifinetune-3.1-8B
+    - https://huggingface.co/mradermacher/Doctoraifinetune-3.1-8B-i1-GGUF
+  description: |
+      This is a fine-tuned version of the Meta-Llama-3.1-8B-bnb-4bit model, specifically adapted for the medical field. It has been trained using a dataset that provides extensive information on diseases, symptoms, and treatments, making it ideal for AI-powered healthcare tools such as medical chatbots, virtual assistants, and diagnostic support systems.
+      Key Features
+
+          Disease Diagnosis: Accurately identifies diseases based on symptoms provided by the user.
+          Symptom Analysis: Breaks down and interprets symptoms to provide a comprehensive medical overview.
+          Treatment Recommendations: Suggests treatments and remedies according to medical conditions.
+
+      Dataset
+
+      The model is fine-tuned on 2000 rows from a dataset consisting of 272k rows. This dataset includes rich information about diseases, symptoms, and their corresponding treatments. The model is continuously being updated and will be further trained on the remaining data in future releases to improve accuracy and capabilities.
+  overrides:
+    parameters:
+      model: Doctoraifinetune-3.1-8B.i1-Q4_K_M.gguf
+  files:
+    - filename: Doctoraifinetune-3.1-8B.i1-Q4_K_M.gguf
+      sha256: 282456efcb6c7e54d34ac25ae7fc022a94152ed77281ae4625b9628091e0a3d6
+      uri: huggingface://mradermacher/Doctoraifinetune-3.1-8B-i1-GGUF/Doctoraifinetune-3.1-8B.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "astral-fusion-neural-happy-l3.1-8b"
+  urls:
+    - https://huggingface.co/ZeroXClem/Astral-Fusion-Neural-Happy-L3.1-8B
+    - https://huggingface.co/mradermacher/Astral-Fusion-Neural-Happy-L3.1-8B-GGUF
+  description: |
+    Astral-Fusion-Neural-Happy-L3.1-8B is a celestial blend of magic, creativity, and dynamic storytelling. Designed to excel in instruction-following, immersive roleplaying, and magical narrative generation, this model is a fusion of the finest qualities from Astral-Fusion, NIHAPPY, and NeuralMahou. ✨🚀
+
+    This model is perfect for anyone seeking a cosmic narrative experience, with the ability to generate both precise instructional content and fantastical stories in one cohesive framework. Whether you're crafting immersive stories, creating AI roleplaying characters, or working on interactive storytelling, this model brings out the magic. 🌟
+  overrides:
+    parameters:
+      model: Astral-Fusion-Neural-Happy-L3.1-8B.Q4_K_M.gguf
+  files:
+    - filename: Astral-Fusion-Neural-Happy-L3.1-8B.Q4_K_M.gguf
+      sha256: 14a3b07c1723ef1ca24f99382254b1227d95974541e23792a4e7ff621896055d
+      uri: huggingface://mradermacher/Astral-Fusion-Neural-Happy-L3.1-8B-GGUF/Astral-Fusion-Neural-Happy-L3.1-8B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "mahou-1.5-llama3.1-70b-i1"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://huggingface.co/flammenai/Mahou-1.0-mistral-7B/resolve/main/mahou1.png
+  urls:
+    - https://huggingface.co/flammenai/Mahou-1.5-llama3.1-70B
+    - https://huggingface.co/mradermacher/Mahou-1.5-llama3.1-70B-i1-GGUF
+  description: |
+    Mahou is designed to provide short messages in a conversational context. It is capable of casual conversation and character roleplay.
+  overrides:
+    parameters:
+      model: Mahou-1.5-llama3.1-70B.i1-Q4_K_M.gguf
+  files:
+    - filename: Mahou-1.5-llama3.1-70B.i1-Q4_K_M.gguf
+      sha256: c2711c4c9c8d011edbeaa391b4418d433e273a318d1de3dbdda9b85baf4996f2
+      uri: huggingface://mradermacher/Mahou-1.5-llama3.1-70B-i1-GGUF/Mahou-1.5-llama3.1-70B.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-nemotron-70b-instruct-hf"
+  urls:
+    - https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF
+    - https://huggingface.co/mradermacher/Llama-3.1-Nemotron-70B-Instruct-HF-GGUF
+  description: |
+    Llama-3.1-Nemotron-70B-Instruct is a large language model customized by NVIDIA to improve the helpfulness of LLM generated responses to user queries.
+
+    This model reaches Arena Hard of 85.0, AlpacaEval 2 LC of 57.6 and GPT-4-Turbo MT-Bench of 8.98, which are known to be predictive of LMSys Chatbot Arena Elo
+
+    As of 1 Oct 2024, this model is #1 on all three automatic alignment benchmarks (verified tab for AlpacaEval 2 LC), edging out strong frontier models such as GPT-4o and Claude 3.5 Sonnet.
+
+    This model was trained using RLHF (specifically, REINFORCE), Llama-3.1-Nemotron-70B-Reward and HelpSteer2-Preference prompts on a Llama-3.1-70B-Instruct model as the initial policy.
+
+    Llama-3.1-Nemotron-70B-Instruct-HF has been converted from Llama-3.1-Nemotron-70B-Instruct to support it in the HuggingFace Transformers codebase. Please note that evaluation results might be slightly different from the Llama-3.1-Nemotron-70B-Instruct as evaluated in NeMo-Aligner, which the evaluation results below are based on.
+  overrides:
+    parameters:
+      model: Llama-3.1-Nemotron-70B-Instruct-HF.Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1-Nemotron-70B-Instruct-HF.Q4_K_M.gguf
+      sha256: b6b80001b849e3c59c39b09508c018b35b491a5c7bbafafa23f2fc04243f3e30
+      uri: huggingface://mradermacher/Llama-3.1-Nemotron-70B-Instruct-HF-GGUF/Llama-3.1-Nemotron-70B-Instruct-HF.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "l3.1-etherealrainbow-v1.0-rc1-8b"
+  icon: https://huggingface.co/invisietch/L3.1-EtherealRainbow-v1.0-rc1-8B/resolve/main/header.png
+  urls:
+    - https://huggingface.co/invisietch/L3.1-EtherealRainbow-v1.0-rc1-8B
+    - https://huggingface.co/mradermacher/L3.1-EtherealRainbow-v1.0-rc1-8B-GGUF
+  description: |
+    Ethereal Rainbow v1.0 is the sequel to the popular Llama 3 8B merge, EtherealRainbow v0.3. Instead of a straight merge of other peoples' models, v1.0 is a finetune on the Instruct model, using 245 million tokens of training data (approx 177 million of these tokens are my own novel datasets).
+
+    This model is designed to be suitable for creative writing and roleplay, and to push the boundaries of what's possible with an 8B model. This RC is not a finished product, but your feedback will drive the creation of better models.
+
+    This is a release candidate model. It has some known issues and probably some unknown ones too, because the purpose of these early releases is to seek feedback.
+  overrides:
+    parameters:
+      model: L3.1-EtherealRainbow-v1.0-rc1-8B.Q4_K_M.gguf
+  files:
+    - filename: L3.1-EtherealRainbow-v1.0-rc1-8B.Q4_K_M.gguf
+      sha256: c5556b2563112e512acca171415783f0988545b02c1834696c1cc35952def72c
+      uri: huggingface://mradermacher/L3.1-EtherealRainbow-v1.0-rc1-8B-GGUF/L3.1-EtherealRainbow-v1.0-rc1-8B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "theia-llama-3.1-8b-v1"
+  urls:
+    - https://huggingface.co/Chainbase-Labs/Theia-Llama-3.1-8B-v1
+    - https://huggingface.co/QuantFactory/Theia-Llama-3.1-8B-v1-GGUF
+  description: |
+    Theia-Llama-3.1-8B-v1 is an open-source large language model (LLM) trained specifically in the cryptocurrency domain. It was fine-tuned from the Llama-3.1-8B base model using a dataset curated from top 2000 cryptocurrency projects and comprehensive research reports to specialize in crypto-related tasks. Theia-Llama-3.1-8B-v1 has been quantized to optimize it for efficient deployment and reduced memory footprint. It's benchmarked highly for crypto knowledge comprehension and generation, knowledge coverage, and reasoning capabilities. The system prompt used for its training is "You are a helpful assistant who will answer crypto related questions." The recommended parameters for performance include sequence length of 256, temperature of 0, top-k-sampling of -1, top-p of 1, and context window of 39680.
+  overrides:
+    parameters:
+      model: Theia-Llama-3.1-8B-v1.Q4_K_M.gguf
+  files:
+    - filename: Theia-Llama-3.1-8B-v1.Q4_K_M.gguf
+      sha256: db876d033f86f118b49a1f1006e5d078d494c93b73c7e595bd10ca789a0c8fdb
+      uri: huggingface://QuantFactory/Theia-Llama-3.1-8B-v1-GGUF/Theia-Llama-3.1-8B-v1.Q4_K_M.gguf
+- !!merge <<: *llama31
+  icon: https://huggingface.co/Delta-Vector/Baldur-8B/resolve/main/Baldur.jpg
+  name: "baldur-8b"
+  urls:
+    - https://huggingface.co/QuantFactory/Baldur-8B-GGUF
+    - https://huggingface.co/QuantFactory/Baldur-8B-GGUF
+  description: |
+    An finetune of the L3.1 instruct distill done by Arcee, The intent of this model is to have differing prose then my other releases, in my testing it has achieved this and avoiding using common -isms frequently and has a differing flavor then my other models.
+  overrides:
+    parameters:
+      model: Baldur-8B.Q4_K_M.gguf
+  files:
+    - filename: Baldur-8B.Q4_K_M.gguf
+      sha256: 645b393fbac5cd17ccfd66840a3a05c3930e01b903dd1535f0347a74cc443fc7
+      uri: huggingface://QuantFactory/Baldur-8B-GGUF/Baldur-8B.Q4_K_M.gguf
 - &deepseek
  ## Deepseek
  url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
@@ -1437,6 +1803,20 @@
    - filename: DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf
      sha256: 50ec78036433265965ed1afd0667c00c71c12aa70bcf383be462cb8e159db6c0
      uri: huggingface://LoneStriker/DeepSeek-Coder-V2-Lite-Instruct-GGUF/DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf
+- !!merge <<: *deepseek
+  name: "cursorcore-ds-6.7b-i1"
+  urls:
+    - https://huggingface.co/TechxGenus/CursorCore-DS-6.7B
+    - https://huggingface.co/mradermacher/CursorCore-DS-6.7B-i1-GGUF
+  description: |
+    CursorCore is a series of open-source models designed for AI-assisted programming. It aims to support features such as automated editing and inline chat, replicating the core abilities of closed-source AI-assisted programming tools like Cursor. This is achieved by aligning data generated through Programming-Instruct. Please read our paper to learn more.
+  overrides:
+    parameters:
+      model: CursorCore-DS-6.7B.i1-Q4_K_M.gguf
+  files:
+    - filename: CursorCore-DS-6.7B.i1-Q4_K_M.gguf
+      sha256: 71b94496be79e5bc45c23d6aa6c242f5f1d3625b4f00fe91d781d381ef35c538
+      uri: huggingface://mradermacher/CursorCore-DS-6.7B-i1-GGUF/CursorCore-DS-6.7B.i1-Q4_K_M.gguf
 - name: "archangel_sft_pythia2-8b"
  url: "github:mudler/LocalAI/gallery/tuluv2.yaml@master"
  icon: https://gist.github.com/assets/29318529/fe2d8391-dbd1-4b7e-9dc4-7cb97e55bc06
@@ -2022,6 +2402,76 @@
    - filename: MN-BackyardAI-Party-12B-v1-Q4_K_M-imat.gguf
      sha256: cea68768dff58b553974b755bb40ef790ab8b86866d9b5c46bc2e6c3311b876a
      uri: huggingface://Lewdiculous/MN-BackyardAI-Party-12B-v1-GGUF-IQ-ARM-Imatrix/MN-BackyardAI-Party-12B-v1-Q4_K_M-imat.gguf
+- !!merge <<: *mistral03
+  name: "ml-ms-etheris-123b"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/ieEjL3TxpDM3WAZQcya6E.png
+  urls:
+    - https://huggingface.co/Steelskull/ML-MS-Etheris-123B
+    - https://huggingface.co/mradermacher/ML-MS-Etheris-123B-GGUF
+  description: |
+    This model merges the robust storytelling of mutiple models while attempting to maintain intelligence. The final model was merged after Model Soup with DELLA to add some specal sause.
+      - model: NeverSleep/Lumimaid-v0.2-123B
+      - model: TheDrummer/Behemoth-123B-v1
+      - model: migtissera/Tess-3-Mistral-Large-2-123B
+      - model: anthracite-org/magnum-v2-123b
+    Use Mistral, ChatML, or Meth Format
+  overrides:
+    parameters:
+      model: ML-MS-Etheris-123B.Q2_K.gguf
+  files:
+    - filename: ML-MS-Etheris-123B.Q2_K.gguf
+      sha256: a17c5615413b5c9c8d01cf55386573d0acd00e01f6e2bcdf492624c73c593fc3
+      uri: huggingface://mradermacher/ML-MS-Etheris-123B-GGUF/ML-MS-Etheris-123B.Q2_K.gguf
+- !!merge <<: *mistral03
+  name: "mn-lulanum-12b-fix-i1"
+  urls:
+    - https://huggingface.co/djuna/MN-Lulanum-12B-FIX
+    - https://huggingface.co/mradermacher/MN-Lulanum-12B-FIX-i1-GGUF
+  description: |
+    This model was merged using the della_linear merge method using unsloth/Mistral-Nemo-Base-2407 as a base.
+    The following models were included in the merge:
+        VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct
+        anthracite-org/magnum-v2.5-12b-kto
+        Undi95/LocalC-12B-e2.0
+        NeverSleep/Lumimaid-v0.2-12B
+  overrides:
+    parameters:
+      model: MN-Lulanum-12B-FIX.i1-Q4_K_M.gguf
+  files:
+    - filename: MN-Lulanum-12B-FIX.i1-Q4_K_M.gguf
+      sha256: 7e24d57249059d45bb508565ec3055e585a4e658c1815c67ea92397acc6aa775
+      uri: huggingface://mradermacher/MN-Lulanum-12B-FIX-i1-GGUF/MN-Lulanum-12B-FIX.i1-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "tor-8b"
+  icon: https://huggingface.co/Delta-Vector/Tor-8B/resolve/main/FinalTor8B.jpg
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  urls:
+    - https://huggingface.co/QuantFactory/Tor-8B-GGUF
+  description: |
+    An earlier checkpoint of Darkens-8B using the same configuration that i felt was different enough from it's 4 epoch cousin to release, Finetuned ontop of the Prune/Distill NeMo 8B done by Nvidia, This model aims to have generally good prose and writing while not falling into claude-isms.
+  overrides:
+    parameters:
+      model: Tor-8B.Q4_K_M.gguf
+  files:
+    - filename: Tor-8B.Q4_K_M.gguf
+      sha256: 9dd64bd886aa7682b6179340449b38feda405b44722ef7ac752cedb807af370e
+      uri: huggingface://QuantFactory/Tor-8B-GGUF/Tor-8B.Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "darkens-8b"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  urls:
+    - https://huggingface.co/Delta-Vector/Darkens-8B
+    - https://huggingface.co/QuantFactory/Darkens-8B-GGUF
+  description: |
+    This is the fully cooked, 4 epoch version of Tor-8B, this is an experimental version, despite being trained for 4 epochs, the model feels fresh and new and is not overfit, This model aims to have generally good prose and writing while not falling into claude-isms, it follows the actions "dialogue" format heavily.
+  overrides:
+    parameters:
+      model: Darkens-8B.Q4_K_M.gguf
+  files:
+    - filename: Darkens-8B.Q4_K_M.gguf
+      sha256: f56a483e10fd00957460adfc16ee462cecac892a4fb44dc59e466e68a360fd42
+      uri: huggingface://QuantFactory/Darkens-8B-GGUF/Darkens-8B.Q4_K_M.gguf
 - &mudler
  ### START mudler's LocalAI specific-models
  url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
@@ -2649,6 +3099,20 @@
    - filename: Gemma-2-Ataraxy-v3i-9B.Q4_K_M.gguf
      sha256: f14c5b9373d4058f0f812c6c34184addeb4aeeecb02a7bbcf9844d9afc8d0066
      uri: huggingface://QuantFactory/Gemma-2-Ataraxy-v3i-9B-GGUF/Gemma-2-Ataraxy-v3i-9B.Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "apollo2-9b"
+  url: "github:mudler/LocalAI/gallery/vicuna-chat.yaml@master"
+  urls:
+    - https://huggingface.co/mradermacher/Apollo2-9B-GGUF
+  description: |
+    Covering 12 Major Languages including English, Chinese, French, Hindi, Spanish, Arabic, Russian, Japanese, Korean, German, Italian, Portuguese and 38 Minor Languages So far.
+  overrides:
+    parameters:
+      model: Apollo2-9B.Q4_K_M.gguf
+  files:
+    - filename: Apollo2-9B.Q4_K_M.gguf
+      sha256: 9fdb63f78e574558a4f33782eca88716eea28e90ea3ae36c381769cde6b81e0f
+      uri: huggingface://mradermacher/Apollo2-9B-GGUF/Apollo2-9B.Q4_K_M.gguf
 - &llama3
  url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
@@ -4248,6 +4712,19 @@
    - filename: Yi-Coder-9B.Q4_K_M.gguf
      sha256: cff3db8a69c43654e3c2d2984e86ad2791d1d446ec56b24a636ba1ce78363308
      uri: huggingface://QuantFactory/Yi-Coder-9B-GGUF/Yi-Coder-9B.Q4_K_M.gguf
+- !!merge <<: *yi-chat
+  name: "cursorcore-yi-9b"
+  urls:
+    - https://huggingface.co/mradermacher/CursorCore-Yi-9B-GGUF
+  description: |
+    CursorCore is a series of open-source models designed for AI-assisted programming. It aims to support features such as automated editing and inline chat, replicating the core abilities of closed-source AI-assisted programming tools like Cursor. This is achieved by aligning data generated through Programming-Instruct. Please read our paper to learn more.
+  overrides:
+    parameters:
+      model: CursorCore-Yi-9B.Q4_K_M.gguf
+  files:
+    - filename: CursorCore-Yi-9B.Q4_K_M.gguf
+      sha256: 943bf59b34bee34afae8390c1791ccbc7c742e11a4d04d538a699754eb92215e
+      uri: huggingface://mradermacher/CursorCore-Yi-9B-GGUF/CursorCore-Yi-9B.Q4_K_M.gguf
 - &vicuna-chat
  ## LLama2 and derivatives
  ### Start Fimbulvetr
@@ -5175,6 +5652,26 @@
    - filename: L3-8B-Niitama-v1.i1-Q4_K_M.gguf
      sha256: 8c62f831db2a6e34aa75459fe8a98815199ecc2dac1892a460b8b86363b6826e
      uri: huggingface://mradermacher/L3-8B-Niitama-v1-i1-GGUF/L3-8B-Niitama-v1.i1-Q4_K_M.gguf
+- !!merge <<: *llama3
+  icon: https://huggingface.co/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA/resolve/main/Images/LLAMA-3_8B_Unaligned_BETA.png
+  name: "llama-3_8b_unaligned_beta"
+  urls:
+    - https://huggingface.co/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA
+    - https://huggingface.co/bartowski/LLAMA-3_8B_Unaligned_BETA-GGUF
+  description: |
+    In the Wild West of the AI world, the real titans never hit their deadlines, no sir!
+    The projects that finish on time? They’re the soft ones—basic, surface-level shenanigans. But the serious projects? They’re always delayed. You set a date, then reality hits: not gonna happen, scope creep that mutates the roadmap, unexpected turn of events that derails everything.
+    It's only been 4 months since the Alpha was released, and half a year since the project started, but it felt like nearly a decade.
+    Deadlines shift, but with each delay, you’re not failing—you’re refining, and becoming more ambitious. A project that keeps getting pushed isn’t late; it’s just gaining weight, becoming something worth building, and truly worth seeing all the way through. The longer it’s delayed, the more serious it gets.
+    LLAMA-3_8B_Unaligned is a serious project, and thank god, the Beta is finally here.
+    I love you all unconditionally, thanks for all the support and kind words!
+  overrides:
+    parameters:
+      model: LLAMA-3_8B_Unaligned_BETA-Q4_K_M.gguf
+  files:
+    - filename: LLAMA-3_8B_Unaligned_BETA-Q4_K_M.gguf
+      sha256: 5b88fb4537339996c04e4a1b6ef6a2d555c4103b6378e273ae9c6c5e77af67eb
+      uri: huggingface://bartowski/LLAMA-3_8B_Unaligned_BETA-GGUF/LLAMA-3_8B_Unaligned_BETA-Q4_K_M.gguf
 - &chatml
  ### ChatML
  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
@@ -5707,6 +6204,40 @@
    - filename: calme-2.1-phi3.5-4b.i1-Q4_K_M.gguf
      sha256: 989eccacd52b6d9ebf2c06c35c363da19aadb125659a10df299b7130bc293e77
      uri: huggingface://mradermacher/calme-2.1-phi3.5-4b-i1-GGUF/calme-2.1-phi3.5-4b.i1-Q4_K_M.gguf
+- !!merge <<: *phi-3
+  name: "phi-3.5-mini-titanfusion-0.2"
+  urls:
+    - https://huggingface.co/bunnycore/Phi-3.5-mini-TitanFusion-0.2
+    - https://huggingface.co/mradermacher/Phi-3.5-mini-TitanFusion-0.2-GGUF
+  description: |
+    This model was merged using the TIES merge method using microsoft/Phi-3.5-mini-instruct as a base.
+    The following models were included in the merge:
+        nbeerbower/phi3.5-gutenberg-4B
+        ArliAI/Phi-3.5-mini-3.8B-ArliAI-RPMax-v1.1
+        bunnycore/Phi-3.5-Mini-Hyper
+        bunnycore/Phi-3.5-Mini-Hyper + bunnycore/Phi-3.1-EvolKit-lora
+        bunnycore/Phi-3.5-Mini-Sonet-RP
+        bunnycore/Phi-3.5-mini-TitanFusion-0.1
+  overrides:
+    parameters:
+      model: Phi-3.5-mini-TitanFusion-0.2.Q4_K_M.gguf
+  files:
+    - filename: Phi-3.5-mini-TitanFusion-0.2.Q4_K_M.gguf
+      sha256: 9579305712f2bca246914639c4873acdc1e7bc64ac2c7db0230df4f0ca0ef234
+      uri: huggingface://mradermacher/Phi-3.5-mini-TitanFusion-0.2-GGUF/Phi-3.5-mini-TitanFusion-0.2.Q4_K_M.gguf
+- !!merge <<: *phi-3
+  name: "phi-3-vision:vllm"
+  url: "github:mudler/LocalAI/gallery/phi-3-vision.yaml@master"
+  description: |
+    Phi-3.5-vision is a lightweight, state-of-the-art open multimodal model built upon datasets which include - synthetic data and filtered publicly available websites - with a focus on very high-quality, reasoning dense data both on text and vision. The model belongs to the Phi-3 model family, and the multimodal version comes with 128K context length (in tokens) it can support. The model underwent a rigorous enhancement process, incorporating both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures.
+- !!merge <<: *phi-3
+  name: "phi-3.5-vision:vllm"
+  url: "github:mudler/LocalAI/gallery/phi-3-vision.yaml@master"
+  override:
+    parameters:
+      model: microsoft/Phi-3.5-vision-instruct
+  description: |
+    Phi-3.5-vision is a lightweight, state-of-the-art open multimodal model built upon datasets which include - synthetic data and filtered publicly available websites - with a focus on very high-quality, reasoning dense data both on text and vision. The model belongs to the Phi-3 model family, and the multimodal version comes with 128K context length (in tokens) it can support. The model underwent a rigorous enhancement process, incorporating both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures.
 - &hermes-2-pro-mistral
  ### START Hermes
  url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
--- a/gallery/phi-3-vision.yaml
+++ b/gallery/phi-3-vision.yaml
@@ -0,0 +1,23 @@
+---
+name: "phi3-vision"
+
+config_file: |
+  name: phi3-vision
+  backend: vllm
+  parameters:
+    model: microsoft/Phi-3-vision-128k-instruct
+  trust_remote_code: true
+  max_model_len: 32768
+  template:
+    chat_message: |-
+        <|{{ .RoleName }}|>
+        {{.Content}}<|end|>
+    chat: >-
+      {{.Input}}
+
+      <|assistant|>
+
+    completion: |
+        {{.Input}}
+    use_tokenizer_template: false
+    image: "<|image_{{ add1 .ID }}|>\n{{.Text}}"
--- a/gallery/vicuna-chat.yaml
+++ b/gallery/vicuna-chat.yaml
@@ -14,6 +14,10 @@ config_file: |
      system: "System: "
      assistant: "Assistant: "
    f16: true
+    stopwords:
+    - <|end|>
+    - <|endoftext|>
+    - <eos>
    template:
      completion: |
        Complete the following sentence: {{.Input}}
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -251,8 +251,22 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str

 	// No GPU found or no specific binaries found, try to load the CPU variant(s)

-	// Select the Fallback by default
-	selectedProcess := backendPath(assetDir, LLamaCPPFallback)
+	// Select a binary based on availability/capability
+	selectedProcess := ""
+
+	// Check if we have a native build (llama-cpp) and use that
+	if _, err := os.Stat(backendPath(assetDir, LLamaCPPFallback)); err == nil {
+		log.Debug().Msgf("[%s] %s variant available", LLamaCPPFallback, backend)
+		selectedProcess = backendPath(assetDir, LLamaCPPFallback)
+	}
+
+	// Check if we have a native build (llama-cpp) and use that instead
+	// As a reminder, we do ultimately attempt again with the fallback variant
+	// If things fail with what we select here
+	if _, err := os.Stat(backendPath(assetDir, LLamaCPP)); err == nil {
+		log.Debug().Msgf("[%s] attempting to load with native variant", backend)
+		selectedProcess = backendPath(assetDir, LLamaCPP)
+	}

 	// IF we find any optimized binary, we use that
 	if xsysinfo.HasCPUCaps(cpuid.AVX2) {
@@ -269,7 +283,7 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
 		}
 	}

-	// Check if the binary exists!
+	// Safety measure: check if the binary exists otherwise return empty string
 	if _, err := os.Stat(selectedProcess); err == nil {
 		return selectedProcess
 	}
@@ -277,6 +291,21 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
 	return ""
 }

+func attemptLoadingOnFailure(backend string, ml *ModelLoader, o *Options, err error) (*Model, error) {
+	// XXX: This is too backend specific(llama-cpp), remove this bit or generalize further
+	// We failed somehow starting the binary. For instance, could be that we are missing
+	// some libraries if running in binary-only mode.
+	// In this case, we attempt to load the model with the fallback variant.
+
+	// If not llama-cpp backend, return the error immediately
+	if backend != LLamaCPP {
+		return nil, err
+	}
+
+	log.Error().Msgf("[%s] Failed loading model, trying with fallback '%s', error: %s", backend, LLamaCPPFallback, err.Error())
+	return ml.LoadModel(o.modelID, o.model, ml.grpcModel(LLamaCPPFallback, false, o))
+}
+
 // starts the grpcModelProcess for the backend, and returns a grpc client
 // It also loads the model
 func (ml *ModelLoader) grpcModel(backend string, autodetect bool, o *Options) func(string, string, string) (*Model, error) {
@@ -450,19 +479,7 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e

 	model, err := ml.LoadModel(o.modelID, o.model, ml.grpcModel(backendToConsume, AutoDetect, o))
 	if err != nil {
-		// XXX: This is too backend specific(llama-cpp), remove this bit or generalize further
-		// We failed somehow starting the binary. For instance, could be that we are missing
-		// some libraries if running in binary-only mode.
-		// In this case, we attempt to load the model with the fallback variant.
-
-		// If not llama-cpp backend, return error immediately
-		if backend != LLamaCPP {
-			return nil, err
-		}
-
-		// Otherwise attempt with fallback
-		log.Error().Msgf("[%s] Failed loading model, trying with fallback '%s'", backend, LLamaCPPFallback)
-		model, err = ml.LoadModel(o.modelID, o.model, ml.grpcModel(LLamaCPPFallback, false, o))
+		model, err = attemptLoadingOnFailure(backend, ml, o, err)
 		if err != nil {
 			return nil, err
 		}
--- a/pkg/templates/multimodal.go
+++ b/pkg/templates/multimodal.go
@@ -3,11 +3,13 @@ package templates
 import (
 	"bytes"
 	"text/template"
+
+	"github.com/Masterminds/sprig/v3"
 )

 func TemplateMultiModal(templateString string, templateID int, text string) (string, error) {
 	// compile the template
-	tmpl, err := template.New("template").Parse(templateString)
+	tmpl, err := template.New("template").Funcs(sprig.FuncMap()).Parse(templateString)
 	if err != nil {
 		return "", err
 	}