fix(ci): install latest git

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
fix(parler-tts): use latest audiotools (#3954 )
2026-02-04 11:42:57 -05:00 · 2024-10-24 14:55:24 +02:00 · 2024-10-24 11:40:35 +02:00 · 2024-10-24 10:08:55 +02:00 · 2024-10-24 10:08:20 +02:00 · 2024-10-23 20:02:08 +02:00
106 changed files with 1030 additions and 212 deletions
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -9,6 +9,8 @@ updates:
    directory: "/"
    schedule:
      interval: "weekly"
+    ignore:
+    - dependency-name: "github.com/mudler/LocalAI/pkg/grpc/proto"
  - package-ecosystem: "github-actions"
    # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
    directory: "/"
--- a/.github/workflows/notify-models.yaml
+++ b/.github/workflows/notify-models.yaml
@@ -79,7 +79,7 @@ jobs:
        args: ${{ steps.summarize.outputs.message }}
    - name: Setup tmate session if fails
      if: ${{ failure() }}
-      uses: mxschmitt/action-tmate@v3.18
+      uses: mxschmitt/action-tmate@v3.19
      with:
        detached: true
        connect-timeout-seconds: 180
@@ -161,7 +161,7 @@ jobs:
        TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
    - name: Setup tmate session if fails
      if: ${{ failure() }}
-      uses: mxschmitt/action-tmate@v3.18
+      uses: mxschmitt/action-tmate@v3.19
      with:
        detached: true
        connect-timeout-seconds: 180
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -123,7 +123,7 @@ jobs:
            release/*
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.18
+        uses: mxschmitt/action-tmate@v3.19
        with:
          detached: true
          connect-timeout-seconds: 180
@@ -232,7 +232,7 @@ jobs:
            release/*
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.18
+        uses: mxschmitt/action-tmate@v3.19
        with:
          detached: true
          connect-timeout-seconds: 180
@@ -308,7 +308,7 @@ jobs:
            release/*
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.18
+        uses: mxschmitt/action-tmate@v3.19
        with:
          detached: true
          connect-timeout-seconds: 180
@@ -350,7 +350,7 @@ jobs:
            release/*
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.18
+        uses: mxschmitt/action-tmate@v3.19
        with:
          detached: true
          connect-timeout-seconds: 180
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -105,6 +105,14 @@ jobs:
  tests-parler-tts:
    runs-on: ubuntu-latest
    steps:
+      - name: Force Install GIT latest
+        run: |
+          sudo apt-get update \
+          && sudo apt-get install -y software-properties-common \
+          && sudo apt-get update \
+          && sudo add-apt-repository -y ppa:git-core/ppa \
+          && sudo apt-get update \
+          && sudo apt-get install -y git
      - name: Clone
        uses: actions/checkout@v4
        with:
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -133,7 +133,7 @@ jobs:
          PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.18
+        uses: mxschmitt/action-tmate@v3.19
        with:
          detached: true
          connect-timeout-seconds: 180
@@ -197,7 +197,7 @@ jobs:
            make run-e2e-aio
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.18
+        uses: mxschmitt/action-tmate@v3.19
        with:
          detached: true
          connect-timeout-seconds: 180
@@ -235,7 +235,7 @@ jobs:
          BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.18
+        uses: mxschmitt/action-tmate@v3.19
        with:
          detached: true
          connect-timeout-seconds: 180
--- a/4
+++ b/4
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=96776405a17034dcfd53d3ddf5d142d34bdbb657
+CPPLLAMA_VERSION?=0a1c750c80147687df267114c81956757cc14382

 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
@@ -16,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6

 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=fdbfb460ed546452a5d53611bba66d10d842e719
+WHISPER_CPP_VERSION?=0fbaac9c891055796456df7b9122a70c220f9ca1

 # bert.cpp version
 BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
--- a/README.md
+++ b/README.md
@@ -66,6 +66,21 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
 # docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12 
 ```

+To load models:
+
+```bash
+# From the model gallery (see available models with `local-ai models list`, in the WebUI from the model tab, or visiting https://models.localai.io)
+local-ai run llama-3.2-1b-instruct:q4_k_m
+# Start LocalAI with the phi-2 model directly from huggingface
+local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
+# Install and run a model from the Ollama OCI registry
+local-ai run ollama://gemma:2b
+# Run a model from a configuration file
+local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
+# Install and run a model from a standard OCI registry (e.g., Docker Hub)
+local-ai run oci://localai/phi-2:latest
+```
+
 [💻 Getting started](https://localai.io/basics/getting_started/index.html)

 ## 📰 Latest project news
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -219,6 +219,7 @@ message ModelOptions {
  int32  SwapSpace = 53;
  int32  MaxModelLen = 54;
  int32  TensorParallelSize = 55;
+  string LoadFormat = 58;

  string MMProj = 41;

--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -391,6 +391,39 @@ struct llama_metrics {
    }
 };

+struct llava_embd_batch {
+    std::vector<llama_pos>      pos;
+    std::vector<int32_t>        n_seq_id;
+    std::vector<llama_seq_id>   seq_id_0;
+    std::vector<llama_seq_id *> seq_ids;
+    std::vector<int8_t>         logits;
+    llama_batch batch;
+    llava_embd_batch(float * embd, int32_t n_tokens, llama_pos pos_0, llama_seq_id seq_id) {
+        pos     .resize(n_tokens);
+        n_seq_id.resize(n_tokens);
+        seq_ids .resize(n_tokens + 1);
+        logits  .resize(n_tokens);
+        seq_id_0.resize(1);
+        seq_id_0[0] = seq_id;
+        seq_ids [n_tokens] = nullptr;
+        batch = {
+            /*n_tokens       =*/ n_tokens,
+            /*tokens         =*/ nullptr,
+            /*embd           =*/ embd,
+            /*pos            =*/ pos.data(),
+            /*n_seq_id       =*/ n_seq_id.data(),
+            /*seq_id         =*/ seq_ids.data(),
+            /*logits         =*/ logits.data(),
+        };
+        for (int i = 0; i < n_tokens; i++) {
+            batch.pos     [i] = pos_0 + i;
+            batch.n_seq_id[i] = 1;
+            batch.seq_id  [i] = seq_id_0.data();
+            batch.logits  [i] = false;
+        }
+    }
+};
+
 struct llama_server_context
 {
    llama_model *model = nullptr;
@@ -934,7 +967,6 @@ struct llama_server_context
                    batch.n_seq_id + i,
                    batch.seq_id   + i,
                    batch.logits   + i,
-                    0, 0, 0, // unused
                };
                if (llama_decode(ctx, batch_view) != 0)
                {
@@ -1379,7 +1411,6 @@ struct llama_server_context
                    batch.n_seq_id + i,
                    batch.seq_id   + i,
                    batch.logits   + i,
-                    0, 0, 0, // unused
                };
                if (llama_decode(ctx, batch_view))
                {
@@ -1398,8 +1429,9 @@ struct llama_server_context
                }

                const int n_embd = llama_n_embd(model);
-                llama_batch batch_img = { n_eval, nullptr, (img.image_embedding + i * n_embd), nullptr, nullptr, nullptr, nullptr, slot.n_past, 1, 0, };
-                if (llama_decode(ctx, batch_img))
+                float * embd = img.image_embedding + i * n_embd;
+                llava_embd_batch llava_batch = llava_embd_batch(embd, n_eval, slot.n_past, 0);
+                if (llama_decode(ctx, llava_batch.batch))
                {
                    LOG("%s : failed to eval image\n", __func__);
                    return false;
@@ -1904,7 +1936,6 @@ struct llama_server_context
                batch.n_seq_id + i,
                batch.seq_id   + i,
                batch.logits   + i,
-                0, 0, 0, // unused
            };

            const int ret = llama_decode(ctx, batch_view);
--- a/backend/python/autogptq/requirements-cublas11.txt
+++ b/backend/python/autogptq/requirements-cublas11.txt
@@ -1,2 +1,2 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
+torch==2.4.1+cu118
--- a/backend/python/autogptq/requirements-cublas12.txt
+++ b/backend/python/autogptq/requirements-cublas12.txt
@@ -1 +1 @@
-torch
+torch==2.4.1
--- a/backend/python/autogptq/requirements-hipblas.txt
+++ b/backend/python/autogptq/requirements-hipblas.txt
@@ -1,2 +1,2 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
+torch==2.4.1+rocm6.0
--- a/backend/python/autogptq/requirements.txt
+++ b/backend/python/autogptq/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 auto-gptq==0.7.1
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 certifi
 transformers
--- a/backend/python/bark/requirements-cpu.txt
+++ b/backend/python/bark/requirements-cpu.txt
@@ -1,4 +1,4 @@
 transformers
 accelerate
-torch
-torchaudio
+torch==2.4.1
+torchaudio==2.4.1
--- a/backend/python/bark/requirements-cublas11.txt
+++ b/backend/python/bark/requirements-cublas11.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
-torchaudio
+torch==2.4.1+cu118
+torchaudio==2.4.1+cu118
 transformers
 accelerate
--- a/backend/python/bark/requirements-cublas12.txt
+++ b/backend/python/bark/requirements-cublas12.txt
@@ -1,4 +1,4 @@
-torch
-torchaudio
+torch==2.4.1
+torchaudio==2.4.1
 transformers
 accelerate
--- a/backend/python/bark/requirements-hipblas.txt
+++ b/backend/python/bark/requirements-hipblas.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
-torchaudio
+torch==2.4.1+rocm6.0
+torchaudio==2.4.1+rocm6.0
 transformers
 accelerate
--- a/backend/python/bark/requirements.txt
+++ b/backend/python/bark/requirements.txt
@@ -1,4 +1,4 @@
 bark==0.1.5
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 certifi
--- a/backend/python/common/template/requirements.txt
+++ b/backend/python/common/template/requirements.txt
@@ -1,2 +1,2 @@
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
--- a/backend/python/coqui/requirements-cpu.txt
+++ b/backend/python/coqui/requirements-cpu.txt
@@ -1,3 +1,4 @@
 transformers
 accelerate
-torch
+torch==2.4.1
+coqui-tts
--- a/backend/python/coqui/requirements-cublas11.txt
+++ b/backend/python/coqui/requirements-cublas11.txt
@@ -1,5 +1,6 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
-torchaudio
+torch==2.4.1+cu118
+torchaudio==2.4.1+cu118
 transformers
-accelerate
+accelerate
+coqui-tts
--- a/backend/python/coqui/requirements-cublas12.txt
+++ b/backend/python/coqui/requirements-cublas12.txt
@@ -1,4 +1,5 @@
-torch
-torchaudio
+torch==2.4.1
+torchaudio==2.4.1
 transformers
-accelerate
+accelerate
+coqui-tts
--- a/backend/python/coqui/requirements-hipblas.txt
+++ b/backend/python/coqui/requirements-hipblas.txt
@@ -1,5 +1,6 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
-torchaudio
+torch==2.4.1+rocm6.0
+torchaudio==2.4.1+rocm6.0
 transformers
-accelerate
+accelerate
+coqui-tts
--- a/backend/python/coqui/requirements-intel.txt
+++ b/backend/python/coqui/requirements-intel.txt
@@ -5,4 +5,5 @@ torchaudio
 optimum[openvino]
 setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
 transformers
-accelerate
+accelerate
+coqui-tts
--- a/backend/python/coqui/requirements.txt
+++ b/backend/python/coqui/requirements.txt
@@ -1,4 +1,4 @@
-coqui-tts
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
-certifi
+certifi
+packaging==24.1
--- a/backend/python/diffusers/requirements-cpu.txt
+++ b/backend/python/diffusers/requirements-cpu.txt
@@ -5,5 +5,5 @@ accelerate
 compel
 peft
 sentencepiece
-torch
+torch==2.4.1
 optimum-quanto
--- a/backend/python/diffusers/requirements-cublas11.txt
+++ b/backend/python/diffusers/requirements-cublas11.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
+torch==2.4.1+cu118
 diffusers
 opencv-python
 transformers
--- a/backend/python/diffusers/requirements-cublas12.txt
+++ b/backend/python/diffusers/requirements-cublas12.txt
@@ -1,4 +1,4 @@
-torch
+torch==2.4.1
 diffusers
 opencv-python
 transformers
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -1,5 +1,5 @@
 setuptools
-grpcio==1.66.2
+grpcio==1.67.0
 pillow
 protobuf
 certifi
--- a/backend/python/exllama2/requirements-cpu.txt
+++ b/backend/python/exllama2/requirements-cpu.txt
@@ -1,3 +1,3 @@
 transformers
 accelerate
-torch
+torch==2.4.1
--- a/backend/python/exllama2/requirements-cublas11.txt
+++ b/backend/python/exllama2/requirements-cublas11.txt
@@ -1,4 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
+torch==2.4.1+cu118
 transformers
 accelerate
--- a/backend/python/exllama2/requirements-cublas12.txt
+++ b/backend/python/exllama2/requirements-cublas12.txt
@@ -1,3 +1,3 @@
-torch
+torch==2.4.1
 transformers
 accelerate
--- a/backend/python/exllama2/requirements.txt
+++ b/backend/python/exllama2/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 certifi
 wheel
--- a/backend/python/mamba/requirements-cpu.txt
+++ b/backend/python/mamba/requirements-cpu.txt
@@ -1,2 +1,2 @@
-torch
+torch==2.4.1
 transformers
--- a/backend/python/mamba/requirements-cublas11.txt
+++ b/backend/python/mamba/requirements-cublas11.txt
@@ -1,3 +1,3 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
+torch==2.4.1+cu118
 transformers
--- a/backend/python/mamba/requirements-cublas12.txt
+++ b/backend/python/mamba/requirements-cublas12.txt
@@ -1,2 +1,2 @@
-torch
+torch==2.4.1
 transformers
--- a/backend/python/mamba/requirements.txt
+++ b/backend/python/mamba/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 certifi
--- a/backend/python/openvoice/requirements-cpu.txt
+++ b/backend/python/openvoice/requirements-cpu.txt
@@ -1 +1,3 @@
-torch
+torch==2.4.1
+git+https://github.com/myshell-ai/MeloTTS.git
+git+https://github.com/myshell-ai/OpenVoice.git
--- a/backend/python/openvoice/requirements-cublas11.txt
+++ b/backend/python/openvoice/requirements-cublas11.txt
@@ -1,2 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
+torch==2.4.1+cu118
+git+https://github.com/myshell-ai/MeloTTS.git
+git+https://github.com/myshell-ai/OpenVoice.git
--- a/backend/python/openvoice/requirements-cublas12.txt
+++ b/backend/python/openvoice/requirements-cublas12.txt
@@ -1 +1,3 @@
-torch
+torch==2.4.1
+git+https://github.com/myshell-ai/MeloTTS.git
+git+https://github.com/myshell-ai/OpenVoice.git
--- a/backend/python/openvoice/requirements-hipblas.txt
+++ b/backend/python/openvoice/requirements-hipblas.txt
@@ -1,2 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
+torch==2.4.1+rocm6.0
+git+https://github.com/myshell-ai/MeloTTS.git
+git+https://github.com/myshell-ai/OpenVoice.git
--- a/backend/python/openvoice/requirements-intel.txt
+++ b/backend/python/openvoice/requirements-intel.txt
@@ -2,22 +2,22 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 librosa==0.9.1
-faster-whisper==1.0.3
+faster-whisper==0.9.0
 pydub==0.25.1
 wavmark==0.0.3
-numpy==1.26.4
+numpy==1.22.0
 eng_to_ipa==0.0.2
 inflect==7.0.0
 unidecode==1.3.7
-whisper-timestamped==1.15.4
+whisper-timestamped==1.14.2
 openai
 python-dotenv
 pypinyin==0.50.0
 cn2an==0.5.22
 jieba==0.42.1
-gradio==4.44.1
 langid==1.1.6
 git+https://github.com/myshell-ai/MeloTTS.git
+git+https://github.com/myshell-ai/OpenVoice.git
--- a/backend/python/openvoice/requirements.txt
+++ b/backend/python/openvoice/requirements.txt
@@ -1,10 +1,10 @@
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 librosa
 faster-whisper
 pydub==0.25.1
 wavmark==0.0.3
-numpy
+numpy==1.22.0
 eng_to_ipa==0.0.2
 inflect
 unidecode
@@ -13,8 +13,8 @@ openai
 python-dotenv
 pypinyin
 cn2an==0.5.22
+networkx==2.8.8
 jieba==0.42.1
-gradio
+gradio==3.48.0
 langid==1.1.6
-git+https://github.com/myshell-ai/MeloTTS.git
-git+https://github.com/myshell-ai/OpenVoice.git
+llvmlite==0.43.0
--- a/backend/python/parler-tts/install.sh
+++ b/backend/python/parler-tts/install.sh
@@ -15,12 +15,12 @@ installRequirements

 # https://github.com/descriptinc/audiotools/issues/101
 # incompatible protobuf versions.
-PYDIR=python3.10
-pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/"
+# PYDIR=python3.10
+# pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/"

-if [ ! -d ${pyenv} ]; then
-    echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
-    exit 1
-fi
+# if [ ! -d ${pyenv} ]; then
+#     echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
+#     exit 1
+# fi

-curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py
+# curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py
--- a/backend/python/parler-tts/requirements-after.txt
+++ b/backend/python/parler-tts/requirements-after.txt
@@ -1,3 +1,4 @@
 git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17
 llvmlite==0.43.0
 numba==0.60.0
+git+https://github.com/descriptinc/audiotools
--- a/backend/python/parler-tts/requirements-cpu.txt
+++ b/backend/python/parler-tts/requirements-cpu.txt
@@ -1,3 +1,3 @@
 transformers
 accelerate
-torch
+torch==2.4.1
--- a/backend/python/parler-tts/requirements-cublas11.txt
+++ b/backend/python/parler-tts/requirements-cublas11.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
-torchaudio
+torch==2.4.1+cu118
+torchaudio==2.4.1+cu118
 transformers
 accelerate
--- a/backend/python/parler-tts/requirements-cublas12.txt
+++ b/backend/python/parler-tts/requirements-cublas12.txt
@@ -1,4 +1,4 @@
-torch
-torchaudio
+torch==2.4.1
+torchaudio==2.4.1
 transformers
 accelerate
--- a/backend/python/parler-tts/requirements.txt
+++ b/backend/python/parler-tts/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 certifi
 llvmlite==0.43.0
--- a/backend/python/rerankers/requirements-cpu.txt
+++ b/backend/python/rerankers/requirements-cpu.txt
@@ -1,4 +1,4 @@
 transformers
 accelerate
-torch
+torch==2.4.1
 rerankers[transformers]
--- a/backend/python/rerankers/requirements-cublas11.txt
+++ b/backend/python/rerankers/requirements-cublas11.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 transformers
 accelerate
-torch
+torch==2.4.1+cu118
 rerankers[transformers]
--- a/backend/python/rerankers/requirements-cublas12.txt
+++ b/backend/python/rerankers/requirements-cublas12.txt
@@ -1,4 +1,4 @@
 transformers
 accelerate
-torch
+torch==2.4.1
 rerankers[transformers]
--- a/backend/python/rerankers/requirements-hipblas.txt
+++ b/backend/python/rerankers/requirements-hipblas.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 transformers
 accelerate
-torch
+torch==2.4.1+rocm6.0
 rerankers[transformers]
--- a/backend/python/rerankers/requirements.txt
+++ b/backend/python/rerankers/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 certifi
--- a/backend/python/sentencetransformers/requirements-cpu.txt
+++ b/backend/python/sentencetransformers/requirements-cpu.txt
@@ -1,6 +1,6 @@
-torch
+torch==2.4.1
 accelerate
 transformers
 bitsandbytes
-sentence-transformers==3.1.1
+sentence-transformers==3.2.0
 transformers
--- a/backend/python/sentencetransformers/requirements-cublas11.txt
+++ b/backend/python/sentencetransformers/requirements-cublas11.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
+torch==2.4.1+cu118
 accelerate
-sentence-transformers==3.1.1
+sentence-transformers==3.2.0
 transformers
--- a/backend/python/sentencetransformers/requirements-cublas12.txt
+++ b/backend/python/sentencetransformers/requirements-cublas12.txt
@@ -1,4 +1,4 @@
-torch
+torch==2.4.1
 accelerate
-sentence-transformers==3.1.1
+sentence-transformers==3.2.0
 transformers
--- a/backend/python/sentencetransformers/requirements-hipblas.txt
+++ b/backend/python/sentencetransformers/requirements-hipblas.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
+torch==2.4.1+rocm6.0
 accelerate
-sentence-transformers==3.1.1
+sentence-transformers==3.2.0
 transformers
--- a/backend/python/sentencetransformers/requirements-intel.txt
+++ b/backend/python/sentencetransformers/requirements-intel.txt
@@ -4,5 +4,5 @@ torch
 optimum[openvino]
 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
 accelerate
-sentence-transformers==3.1.1
+sentence-transformers==3.2.0
 transformers
--- a/backend/python/sentencetransformers/requirements.txt
+++ b/backend/python/sentencetransformers/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 certifi
 datasets
--- a/backend/python/transformers-musicgen/requirements-cpu.txt
+++ b/backend/python/transformers-musicgen/requirements-cpu.txt
@@ -1,3 +1,3 @@
 transformers
 accelerate
-torch
+torch==2.4.1
--- a/backend/python/transformers-musicgen/requirements-cublas11.txt
+++ b/backend/python/transformers-musicgen/requirements-cublas11.txt
@@ -1,4 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 transformers
 accelerate
-torch
+torch==2.4.1+cu118
--- a/backend/python/transformers-musicgen/requirements-cublas12.txt
+++ b/backend/python/transformers-musicgen/requirements-cublas12.txt
@@ -1,3 +1,3 @@
 transformers
 accelerate
-torch
+torch==2.4.1
--- a/backend/python/transformers-musicgen/requirements-hipblas.txt
+++ b/backend/python/transformers-musicgen/requirements-hipblas.txt
@@ -1,4 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 transformers
 accelerate
-torch
+torch==2.4.1+rocm6.0
--- a/backend/python/transformers-musicgen/requirements.txt
+++ b/backend/python/transformers-musicgen/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 scipy==1.14.0
 certifi
--- a/backend/python/transformers/requirements-cpu.txt
+++ b/backend/python/transformers/requirements-cpu.txt
@@ -1,4 +1,4 @@
-torch
+torch==2.4.1
 accelerate
 transformers
 bitsandbytes
--- a/backend/python/transformers/requirements-cublas11.txt
+++ b/backend/python/transformers/requirements-cublas11.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
+torch==2.4.1+cu118
 accelerate
 transformers
 bitsandbytes
--- a/backend/python/transformers/requirements-cublas12.txt
+++ b/backend/python/transformers/requirements-cublas12.txt
@@ -1,4 +1,4 @@
-torch
+torch==2.4.1
 accelerate
 transformers
 bitsandbytes
--- a/backend/python/transformers/requirements-hipblas.txt
+++ b/backend/python/transformers/requirements-hipblas.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
+torch==2.4.1+rocm6.0
 accelerate
 transformers
 bitsandbytes
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 certifi
 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/vall-e-x/requirements-cpu.txt
+++ b/backend/python/vall-e-x/requirements-cpu.txt
@@ -1,3 +1,3 @@
 accelerate
-torch
-torchaudio
+torch==2.4.1
+torchaudio==2.4.1
--- a/backend/python/vall-e-x/requirements-cublas11.txt
+++ b/backend/python/vall-e-x/requirements-cublas11.txt
@@ -1,4 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 accelerate
-torch
-torchaudio
+torch==2.4.1+cu118
+torchaudio==2.4.1+cu118
--- a/backend/python/vall-e-x/requirements-cublas12.txt
+++ b/backend/python/vall-e-x/requirements-cublas12.txt
@@ -1,3 +1,3 @@
 accelerate
-torch
-torchaudio
+torch==2.4.1
+torchaudio==2.4.1
--- a/backend/python/vall-e-x/requirements.txt
+++ b/backend/python/vall-e-x/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 certifi
--- a/backend/python/vllm/backend.py
+++ b/backend/python/vllm/backend.py
@@ -19,6 +19,8 @@ from vllm.utils import random_uuid
 from vllm.transformers_utils.tokenizer import get_tokenizer
 from vllm.multimodal.utils import fetch_image
 from vllm.assets.video import VideoAsset
+import base64
+import io

 _ONE_DAY_IN_SECONDS = 60 * 60 * 24

@@ -93,6 +95,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):

        if request.Quantization != "":
            engine_args.quantization = request.Quantization
+        if request.LoadFormat != "":
+            engine_args.load_format = request.LoadFormat
        if request.GPUMemoryUtilization != 0:
            engine_args.gpu_memory_utilization = request.GPUMemoryUtilization
        if request.TrustRemoteCode:
@@ -217,13 +221,15 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
        # Generate text using the LLM engine
        request_id = random_uuid()
        print(f"Generating text with request_id: {request_id}", file=sys.stderr)
+        multi_modal_data = {}
+        if image_data:
+            multi_modal_data["image"] = image_data
+        if video_data:
+            multi_modal_data["video"] = video_data
        outputs = self.llm.generate(
            {
-                "prompt": prompt,
-                "multi_modal_data": {
-                    "image": image_data if image_data else None,
-                    "video": video_data if video_data else None,
-                } if image_data or video_data else None,
+            "prompt": prompt,
+            "multi_modal_data": multi_modal_data if multi_modal_data else None,
            },
            sampling_params=sampling_params,
            request_id=request_id,
@@ -262,19 +268,22 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):

    def load_image(self, image_path: str):
        """
-        Load an image from the given file path.
+        Load an image from the given file path or base64 encoded data.
        
        Args:
-            image_path (str): The path to the image file.
+            image_path (str): The path to the image file or base64 encoded data.

        Returns:
            Image: The loaded image.
        """
        try:
-            return Image.open(image_path)
+
+            image_data = base64.b64decode(image_path)
+            image = Image.open(io.BytesIO(image_data))
+            return image
        except Exception as e:
            print(f"Error loading image {image_path}: {e}", file=sys.stderr)
-            return self.load_video(image_path)
+            return None

    def load_video(self, video_path: str):
        """
@@ -287,10 +296,15 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            Video: The loaded video.
        """
        try:
-            video = VideoAsset(name=video_path).np_ndarrays
+            timestamp = str(int(time.time() * 1000))  # Generate timestamp
+            p = f"/tmp/vl-{timestamp}.data"  # Use timestamp in filename
+            with open(p, "wb") as f:
+                f.write(base64.b64decode(video_path))
+            video = VideoAsset(name=p).np_ndarrays
+            os.remove(p)
            return video
        except Exception as e:
-            print(f"Error loading video {image_path}: {e}", file=sys.stderr)
+            print(f"Error loading video {video_path}: {e}", file=sys.stderr)
            return None

 async def serve(address):
--- a/backend/python/vllm/install.sh
+++ b/backend/python/vllm/install.sh
@@ -22,7 +22,7 @@ if [ "x${BUILD_TYPE}" == "x" ] && [ "x${FROM_SOURCE}" == "xtrue" ]; then
            git clone https://github.com/vllm-project/vllm
        fi
        pushd vllm
-            uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.66.2 protobuf bitsandbytes
+            uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.67.0 protobuf bitsandbytes
            uv pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
            VLLM_TARGET_DEVICE=cpu python setup.py install
        popd
--- a/backend/python/vllm/requirements-cpu.txt
+++ b/backend/python/vllm/requirements-cpu.txt
@@ -1,3 +1,3 @@
 accelerate
-torch
+torch==2.4.1
 transformers
--- a/backend/python/vllm/requirements-cublas11.txt
+++ b/backend/python/vllm/requirements-cublas11.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 accelerate
-torch
+torch==2.4.1+cu118
 transformers
 bitsandbytes
--- a/backend/python/vllm/requirements-cublas12.txt
+++ b/backend/python/vllm/requirements-cublas12.txt
@@ -1,4 +1,4 @@
 accelerate
-torch
+torch==2.4.1
 transformers
 bitsandbytes
--- a/backend/python/vllm/requirements-hipblas.txt
+++ b/backend/python/vllm/requirements-hipblas.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 accelerate
-torch
+torch==2.4.1+rocm6.0
 transformers
 bitsandbytes
--- a/backend/python/vllm/requirements.txt
+++ b/backend/python/vllm/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.2
+grpcio==1.67.0
 protobuf
 certifi
 setuptools
--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@@ -2,6 +2,7 @@ package backend

 import (
 	"context"
+	"encoding/json"
 	"fmt"
 	"os"
 	"regexp"
@@ -77,6 +78,16 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
 			switch ct := message.Content.(type) {
 			case string:
 				protoMessages[i].Content = ct
+			case []interface{}:
+				// If using the tokenizer template, in case of multimodal we want to keep the multimodal content as and return only strings here
+				data, _ := json.Marshal(ct)
+				resultData := []struct {
+					Text string `json:"text"`
+				}{}
+				json.Unmarshal(data, &resultData)
+				for _, r := range resultData {
+					protoMessages[i].Content += r.Text
+				}
 			default:
 				return nil, fmt.Errorf("unsupported type for schema.Message.Content for inference: %T", ct)
 			}
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -139,6 +139,7 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
 		DraftModel:           c.DraftModel,
 		AudioPath:            c.VallE.AudioPath,
 		Quantization:         c.Quantization,
+		LoadFormat:           c.LoadFormat,
 		GPUMemoryUtilization: c.GPUMemoryUtilization,
 		TrustRemoteCode:      c.TrustRemoteCode,
 		EnforceEager:         c.EnforceEager,
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -53,6 +53,7 @@ type RunCMD struct {
 	OpaqueErrors                       bool     `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
 	UseSubtleKeyComparison             bool     `env:"LOCALAI_SUBTLE_KEY_COMPARISON" default:"false" help:"If true, API Key validation comparisons will be performed using constant-time comparisons rather than simple equality. This trades off performance on each request for resiliancy against timing attacks." group:"hardening"`
 	DisableApiKeyRequirementForHttpGet bool     `env:"LOCALAI_DISABLE_API_KEY_REQUIREMENT_FOR_HTTP_GET" default:"false" help:"If true, a valid API key is not required to issue GET requests to portions of the web ui. This should only be enabled in secure testing environments" group:"hardening"`
+	DisableMetricsEndpoint             bool     `env:"LOCALAI_DISABLE_METRICS_ENDPOINT,DISABLE_METRICS_ENDPOINT" default:"false" help:"Disable the /metrics endpoint" group:"api"`
 	HttpGetExemptedEndpoints           []string `env:"LOCALAI_HTTP_GET_EXEMPTED_ENDPOINTS" default:"^/$,^/browse/?$,^/talk/?$,^/p2p/?$,^/chat/?$,^/text2image/?$,^/tts/?$,^/static/.*$,^/swagger.*$" help:"If LOCALAI_DISABLE_API_KEY_REQUIREMENT_FOR_HTTP_GET is overriden to true, this is the list of endpoints to exempt. Only adjust this in case of a security incident or as a result of a personal security posture review" group:"hardening"`
 	Peer2Peer                          bool     `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
 	Peer2PeerDHTInterval               int      `env:"LOCALAI_P2P_DHT_INTERVAL,P2P_DHT_INTERVAL" default:"360" name:"p2p-dht-interval" help:"Interval for DHT refresh (used during token generation)" group:"p2p"`
@@ -108,6 +109,10 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		config.WithLoadToMemory(r.LoadToMemory),
 	}

+	if r.DisableMetricsEndpoint {
+		opts = append(opts, config.DisableMetricsEndpoint)
+	}
+
 	token := ""
 	if r.Peer2Peer || r.Peer2PeerToken != "" {
 		log.Info().Msg("P2P mode enabled")
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -39,6 +39,7 @@ type ApplicationConfig struct {
 	OpaqueErrors                       bool
 	UseSubtleKeyComparison             bool
 	DisableApiKeyRequirementForHttpGet bool
+	DisableMetrics                     bool
 	HttpGetExemptedEndpoints           []*regexp.Regexp
 	DisableGalleryEndpoint             bool
 	LoadToMemory                       []string
@@ -350,6 +351,10 @@ func WithDisableApiKeyRequirementForHttpGet(required bool) AppOption {
 	}
 }

+var DisableMetricsEndpoint AppOption = func(o *ApplicationConfig) {
+	o.DisableMetrics = true
+}
+
 func WithHttpGetExemptedEndpoints(endpoints []string) AppOption {
 	return func(o *ApplicationConfig) {
 		o.HttpGetExemptedEndpoints = []*regexp.Regexp{}
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -143,6 +143,7 @@ type LLMConfig struct {
 	DraftModel           string  `yaml:"draft_model"`
 	NDraft               int32   `yaml:"n_draft"`
 	Quantization         string  `yaml:"quantization"`
+	LoadFormat           string  `yaml:"load_format"`
 	GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM
 	TrustRemoteCode      bool    `yaml:"trust_remote_code"`      // vLLM
 	EnforceEager         bool    `yaml:"enforce_eager"`          // vLLM
@@ -197,9 +198,7 @@ type TemplateConfig struct {
 	// It defaults to \n
 	JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"`

-	Video string `yaml:"video"`
-	Image string `yaml:"image"`
-	Audio string `yaml:"audio"`
+	Multimodal string `yaml:"multimodal"`
 }

 func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error {
--- a/core/http/app.go
+++ b/core/http/app.go
@@ -109,19 +109,21 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
 		app.Use(recover.New())
 	}

-	metricsService, err := services.NewLocalAIMetricsService()
-	if err != nil {
-		return nil, err
-	}
+	if !appConfig.DisableMetrics {
+		metricsService, err := services.NewLocalAIMetricsService()
+		if err != nil {
+			return nil, err
+		}

-	if metricsService != nil {
-		app.Use(localai.LocalAIMetricsAPIMiddleware(metricsService))
-		app.Hooks().OnShutdown(func() error {
-			return metricsService.Shutdown()
-		})
-	}
+		if metricsService != nil {
+			app.Use(localai.LocalAIMetricsAPIMiddleware(metricsService))
+			app.Hooks().OnShutdown(func() error {
+				return metricsService.Shutdown()
+			})
+		}

- // Health Checks should always be exempt from auth, so register these first
+	}
+	// Health Checks should always be exempt from auth, so register these first
 	routes.HealthRoutes(app)

 	kaConfig, err := middleware.GetKeyAuthConfig(appConfig)
--- a/core/http/endpoints/openai/request.go
+++ b/core/http/endpoints/openai/request.go
@@ -149,6 +149,10 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 	// Decode each request's message content
 	imgIndex, vidIndex, audioIndex := 0, 0, 0
 	for i, m := range input.Messages {
+		nrOfImgsInMessage := 0
+		nrOfVideosInMessage := 0
+		nrOfAudiosInMessage := 0
+
 		switch content := m.Content.(type) {
 		case string:
 			input.Messages[i].StringContent = content
@@ -156,11 +160,16 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 			dat, _ := json.Marshal(content)
 			c := []schema.Content{}
 			json.Unmarshal(dat, &c)
+
+			textContent := ""
+			// we will template this at the end
+
 		CONTENT:
 			for _, pp := range c {
 				switch pp.Type {
 				case "text":
-					input.Messages[i].StringContent = pp.Text
+					textContent += pp.Text
+					//input.Messages[i].StringContent = pp.Text
 				case "video", "video_url":
 					// Decode content as base64 either if it's an URL or base64 text
 					base64, err := utils.GetContentURIAsBase64(pp.VideoURL.URL)
@@ -169,14 +178,8 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 						continue CONTENT
 					}
 					input.Messages[i].StringVideos = append(input.Messages[i].StringVideos, base64) // TODO: make sure that we only return base64 stuff
-
-					t := "[vid-{{.ID}}]{{.Text}}"
-					if config.TemplateConfig.Video != "" {
-						t = config.TemplateConfig.Video
-					}
-					// set a placeholder for each image
-					input.Messages[i].StringContent, _ = templates.TemplateMultiModal(t, vidIndex, input.Messages[i].StringContent)
 					vidIndex++
+					nrOfVideosInMessage++
 				case "audio_url", "audio":
 					// Decode content as base64 either if it's an URL or base64 text
 					base64, err := utils.GetContentURIAsBase64(pp.AudioURL.URL)
@@ -185,13 +188,8 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 						continue CONTENT
 					}
 					input.Messages[i].StringAudios = append(input.Messages[i].StringAudios, base64) // TODO: make sure that we only return base64 stuff
-					// set a placeholder for each image
-					t := "[audio-{{.ID}}]{{.Text}}"
-					if config.TemplateConfig.Audio != "" {
-						t = config.TemplateConfig.Audio
-					}
-					input.Messages[i].StringContent, _ = templates.TemplateMultiModal(t, audioIndex, input.Messages[i].StringContent)
 					audioIndex++
+					nrOfAudiosInMessage++
 				case "image_url", "image":
 					// Decode content as base64 either if it's an URL or base64 text
 					base64, err := utils.GetContentURIAsBase64(pp.ImageURL.URL)
@@ -200,16 +198,21 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 						continue CONTENT
 					}

-					t := "[img-{{.ID}}]{{.Text}}"
-					if config.TemplateConfig.Image != "" {
-						t = config.TemplateConfig.Image
-					}
 					input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
-					// set a placeholder for each image
-					input.Messages[i].StringContent, _ = templates.TemplateMultiModal(t, imgIndex, input.Messages[i].StringContent)
+
 					imgIndex++
+					nrOfImgsInMessage++
 				}
 			}
+
+			input.Messages[i].StringContent, _ = templates.TemplateMultiModal(config.TemplateConfig.Multimodal, templates.MultiModalOptions{
+				TotalImages:     imgIndex,
+				TotalVideos:     vidIndex,
+				TotalAudios:     audioIndex,
+				ImagesInMessage: nrOfImgsInMessage,
+				VideosInMessage: nrOfVideosInMessage,
+				AudiosInMessage: nrOfAudiosInMessage,
+			}, textContent)
 		}
 	}

--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -42,7 +42,9 @@ func RegisterLocalAIRoutes(app *fiber.App,
 	app.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig))
 	app.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig))

-	app.Get("/metrics", localai.LocalAIMetricsEndpoint())
+	if !appConfig.DisableMetrics {
+		app.Get("/metrics", localai.LocalAIMetricsEndpoint())
+	}

 	// Experimental Backend Statistics Module
 	backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
--- a/docs/content/docs/getting-started/quickstart.md
+++ b/docs/content/docs/getting-started/quickstart.md
@@ -30,6 +30,19 @@ For a full list of options, refer to the [Installer Options]({{% relref "docs/ad

 Binaries can also be [manually downloaded]({{% relref "docs/reference/binaries" %}}).

+## Using Homebrew on MacOS
+
+{{% alert icon="⚠️" %}}
+The Homebrew formula currently doesn't have the same options than the bash script
+{{% /alert %}}
+
+You can install Homebrew's [LocalAI](https://formulae.brew.sh/formula/localai) with the following command:
+
+```
+brew install localai
+```
+
+
 ## Using Container Images or Kubernetes

 LocalAI is available as a container image compatible with various container engines such as Docker, Podman, and Kubernetes. Container images are published on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest) and [Docker Hub](https://hub.docker.com/r/localai/localai).
--- a/docs/content/docs/integrations.md
+++ b/docs/content/docs/integrations.md
@@ -28,5 +28,6 @@ The list below is a list of software that integrates with LocalAI.
 - https://github.com/cedriking/spark
 - [Big AGI](https://github.com/enricoros/big-agi) is a powerful web interface entirely running in the browser, supporting LocalAI
 - [Midori AI Subsystem Manager](https://io.midori-ai.xyz/subsystem/manager/) is a powerful docker subsystem for running all types of AI programs
+- [LLPhant](https://github.com/theodo-group/LLPhant) is a PHP library for interacting with LLMs and Vector Databases

 Feel free to open up a Pull request (by clicking at the "Edit page" below) to get a page for your project made or if you see a error on one of the pages!
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v2.21.1"
+  "version": "v2.22.1"
 }
--- a/docs/themes/hugo-theme-relearn
+++ b/docs/themes/hugo-theme-relearn
--- a/examples/chainlit/requirements.txt
+++ b/examples/chainlit/requirements.txt
@@ -1,6 +1,6 @@
-llama_index==0.11.16
+llama_index==0.11.19
 requests==2.32.3
-weaviate_client==4.8.1
+weaviate_client==4.9.0
 transformers
 torch
 chainlit
--- a/examples/functions/requirements.txt
+++ b/examples/functions/requirements.txt
@@ -1,2 +1,2 @@
-langchain==0.3.2
-openai==1.51.1
+langchain==0.3.4
+openai==1.52.0
--- a/examples/langchain-chroma/requirements.txt
+++ b/examples/langchain-chroma/requirements.txt
@@ -1,4 +1,4 @@
-langchain==0.3.1
-openai==1.51.1
-chromadb==0.5.11
-llama-index==0.11.16
+langchain==0.3.3
+openai==1.52.0
+chromadb==0.5.13
+llama-index==0.11.19
--- a/examples/langchain/PY.Dockerfile
+++ b/examples/langchain/PY.Dockerfile
@@ -1,4 +1,4 @@
-FROM python:3.12-bullseye
+FROM python:3.13-bullseye
 COPY ./langchainpy-localai-example /app
 WORKDIR /app
 RUN pip install --no-cache-dir -r requirements.txt
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -1,33 +1,33 @@
-aiohttp==3.10.9
+aiohttp==3.10.10
 aiosignal==1.3.1
 async-timeout==4.0.3
 attrs==24.2.0
 certifi==2024.8.30
-charset-normalizer==3.3.2
+charset-normalizer==3.4.0
 colorama==0.4.6
 dataclasses-json==0.6.7
-debugpy==1.8.6
+debugpy==1.8.7
 frozenlist==1.4.1
 greenlet==3.1.1
 idna==3.10
-langchain==0.3.2
-langchain-community==0.3.1
-marshmallow==3.22.0
+langchain==0.3.3
+langchain-community==0.3.3
+marshmallow==3.23.0
 marshmallow-enum==1.5.1
 multidict==6.1.0
 mypy-extensions==1.0.0
 numexpr==2.10.1
-numpy==2.1.1
-openai==1.51.1
+numpy==2.1.2
+openai==1.51.2
 openapi-schema-pydantic==1.2.4
 packaging>=23.2
 pydantic==2.9.2
 PyYAML==6.0.2
 requests==2.32.3
-SQLAlchemy==2.0.35
+SQLAlchemy==2.0.36
 tenacity==8.5.0
 tqdm==4.66.5
 typing-inspect==0.9.0
 typing_extensions==4.12.2
 urllib3==2.2.3
-yarl==1.13.1
+yarl==1.16.0
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -182,6 +182,34 @@
    - filename: Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO.Q4_K_M.gguf
      sha256: 7f45fa79bc6c9847ef9fbad08c3bb5a0f2dbb56d2e2200a5d37b260a57274e55
      uri: huggingface://QuantFactory/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO-GGUF/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO.Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "llama-3.2-chibi-3b"
+  icon: https://huggingface.co/AELLM/Llama-3.2-Chibi-3B/resolve/main/chibi.jpg
+  urls:
+    - https://huggingface.co/AELLM/Llama-3.2-Chibi-3B
+    - https://huggingface.co/mradermacher/Llama-3.2-Chibi-3B-GGUF
+  description: |
+    Small parameter LLMs are ideal for navigating the complexities of the Japanese language, which involves multiple character systems like kanji, hiragana, and katakana, along with subtle social cues. Despite their smaller size, these models are capable of delivering highly accurate and context-aware results, making them perfect for use in environments where resources are constrained. Whether deployed on mobile devices with limited processing power or in edge computing scenarios where fast, real-time responses are needed, these models strike the perfect balance between performance and efficiency, without sacrificing quality or speed.
+  overrides:
+    parameters:
+      model: Llama-3.2-Chibi-3B.Q4_K_M.gguf
+  files:
+    - filename: Llama-3.2-Chibi-3B.Q4_K_M.gguf
+      sha256: 4b594cd5f66181202713f1cf97ce2f86d0acfa1b862a64930d5f512c45640a2f
+      uri: huggingface://mradermacher/Llama-3.2-Chibi-3B-GGUF/Llama-3.2-Chibi-3B.Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "llama-3.2-3b-reasoning-time"
+  urls:
+    - https://huggingface.co/mradermacher/Llama-3.2-3B-Reasoning-Time-GGUF
+  description: |
+    Lyte/Llama-3.2-3B-Reasoning-Time is a large language model with 3.2 billion parameters, designed for reasoning and time-based tasks in English. It is based on the Llama architecture and has been quantized using the GGUF format by mradermacher.
+  overrides:
+    parameters:
+      model: Llama-3.2-3B-Reasoning-Time.Q4_K_M.gguf
+  files:
+    - filename: Llama-3.2-3B-Reasoning-Time.Q4_K_M.gguf
+      sha256: 80b10e1a5c6e27f6d8cf08c3472af2b15a9f63ebf8385eedfe8615f85116c73f
+      uri: huggingface://mradermacher/Llama-3.2-3B-Reasoning-Time-GGUF/Llama-3.2-3B-Reasoning-Time.Q4_K_M.gguf
 - &qwen25
  ## Qwen2.5
  name: "qwen2.5-14b-instruct"
@@ -472,6 +500,134 @@
    - filename: qwen2.5-7b-ins-v3-Q4_K_M.gguf
      sha256: 9c23734072714a4886c0386ae0ff07a5e940d67ad52278e2ed689fec44e1e0c8
      uri: huggingface://bartowski/qwen2.5-7b-ins-v3-GGUF/qwen2.5-7b-ins-v3-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "supernova-medius"
+  urls:
+    - https://huggingface.co/arcee-ai/SuperNova-Medius-GGUF
+  description: |
+    Arcee-SuperNova-Medius is a 14B parameter language model developed by Arcee.ai, built on the Qwen2.5-14B-Instruct architecture. This unique model is the result of a cross-architecture distillation pipeline, combining knowledge from both the Qwen2.5-72B-Instruct model and the Llama-3.1-405B-Instruct model. By leveraging the strengths of these two distinct architectures, SuperNova-Medius achieves high-quality instruction-following and complex reasoning capabilities in a mid-sized, resource-efficient form.
+
+    SuperNova-Medius is designed to excel in a variety of business use cases, including customer support, content creation, and technical assistance, while maintaining compatibility with smaller hardware configurations. It’s an ideal solution for organizations looking for advanced capabilities without the high resource requirements of larger models like our SuperNova-70B.
+  overrides:
+    parameters:
+      model: SuperNova-Medius-Q4_K_M.gguf
+  files:
+    - filename: SuperNova-Medius-Q4_K_M.gguf
+      sha256: aaa4bf3451bc900f186fd4b6b3a6a26bfd40c85908f605db76b92e58aadcc864
+      uri: huggingface://arcee-ai/SuperNova-Medius-GGUF/SuperNova-Medius-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "eva-qwen2.5-14b-v0.1-i1"
+  urls:
+    - https://huggingface.co/EVA-UNIT-01/EVA-Qwen2.5-14B-v0.1
+    - https://huggingface.co/mradermacher/EVA-Qwen2.5-14B-v0.1-i1-GGUF
+  description: |
+    A RP/storywriting specialist model, full-parameter finetune of Qwen2.5-14B on mixture of synthetic and natural data.
+    It uses Celeste 70B 0.1 data mixture, greatly expanding it to improve versatility, creativity and "flavor" of the resulting model.
+  overrides:
+    parameters:
+      model: EVA-Qwen2.5-14B-v0.1.i1-Q4_K_M.gguf
+  files:
+    - filename: EVA-Qwen2.5-14B-v0.1.i1-Q4_K_M.gguf
+      sha256: 4e9665d4f83cd97efb42c8427f9c09be93b72e23a0364c91ad0b5de8056f2795
+      uri: huggingface://mradermacher/EVA-Qwen2.5-14B-v0.1-i1-GGUF/EVA-Qwen2.5-14B-v0.1.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "cursorcore-qw2.5-7b-i1"
+  urls:
+    - https://huggingface.co/TechxGenus/CursorCore-QW2.5-7B
+    - https://huggingface.co/mradermacher/CursorCore-QW2.5-7B-i1-GGUF
+  description: |
+    CursorCore is a series of open-source models designed for AI-assisted programming. It aims to support features such as automated editing and inline chat, replicating the core abilities of closed-source AI-assisted programming tools like Cursor. This is achieved by aligning data generated through Programming-Instruct. Please read our paper to learn more.
+  overrides:
+    parameters:
+      model: CursorCore-QW2.5-7B.i1-Q4_K_M.gguf
+  files:
+    - filename: CursorCore-QW2.5-7B.i1-Q4_K_M.gguf
+      sha256: 81868f4edb4ec1a61debde1dbdebc02b407930ee19a6d946ff801afba840a102
+      uri: huggingface://mradermacher/CursorCore-QW2.5-7B-i1-GGUF/CursorCore-QW2.5-7B.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "cursorcore-qw2.5-1.5b-lc-i1"
+  urls:
+    - https://huggingface.co/TechxGenus/CursorCore-QW2.5-1.5B-LC
+    - https://huggingface.co/mradermacher/CursorCore-QW2.5-1.5B-LC-i1-GGUF
+  description: |
+    CursorCore is a series of open-source models designed for AI-assisted programming. It aims to support features such as automated editing and inline chat, replicating the core abilities of closed-source AI-assisted programming tools like Cursor. This is achieved by aligning data generated through Programming-Instruct. Please read our paper to learn more.
+  overrides:
+    parameters:
+      model: CursorCore-QW2.5-1.5B-LC.i1-Q4_K_M.gguf
+  files:
+    - filename: CursorCore-QW2.5-1.5B-LC.i1-Q4_K_M.gguf
+      sha256: 185d720c810f7345ef861ad8eef1199bb15afa8e4f3c03bd5ffd476cfa465127
+      uri: huggingface://mradermacher/CursorCore-QW2.5-1.5B-LC-i1-GGUF/CursorCore-QW2.5-1.5B-LC.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "edgerunner-command-nested-i1"
+  urls:
+    - https://huggingface.co/edgerunner-ai/EdgeRunner-Command-Nested
+    - https://huggingface.co/mradermacher/EdgeRunner-Command-Nested-i1-GGUF
+  description: |
+    EdgeRunner-Command-Nested is an advanced large language model designed specifically for handling complex nested function calls. Initialized from Qwen2.5-7B-Instruct, further enhanced by the integration of the Hermes function call template and additional training on a specialized dataset (based on TinyAgent). This extra dataset focuses on personal domain applications, providing the model with a robust understanding of nested function scenarios that are typical in complex user interactions.
+  overrides:
+    parameters:
+      model: EdgeRunner-Command-Nested.i1-Q4_K_M.gguf
+  files:
+    - filename: EdgeRunner-Command-Nested.i1-Q4_K_M.gguf
+      sha256: a1cc4d2b601dc20e58cbb549bd3e9bc460995840c0aaf1cd3c1cb5414c900ac7
+      uri: huggingface://mradermacher/EdgeRunner-Command-Nested-i1-GGUF/EdgeRunner-Command-Nested.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "tsunami-0.5x-7b-instruct-i1"
+  icon: https://huggingface.co/Tsunami-th/Tsunami-0.5x-7B-Instruct/resolve/main/Tsunami.webp
+  urls:
+    - https://huggingface.co/Tsunami-th/Tsunami-0.5x-7B-Instruct
+    - https://huggingface.co/mradermacher/Tsunami-0.5x-7B-Instruct-i1-GGUF
+  description: |
+    TSUNAMI: Transformative Semantic Understanding and Natural Augmentation Model for Intelligence.
+
+    TSUNAMI full name was created by ChatGPT.
+    infomation
+
+    Tsunami-0.5x-7B-Instruct is Thai Large Language Model that fine-tuned from Qwen2.5-7B around 100,000 rows in Thai dataset.
+  overrides:
+    parameters:
+      model: Tsunami-0.5x-7B-Instruct.i1-Q4_K_M.gguf
+  files:
+    - filename: Tsunami-0.5x-7B-Instruct.i1-Q4_K_M.gguf
+      sha256: 22e2003ecec7f1e91f2e9aaec334613c0f37fb3000d0e628b5a9980e53322fa7
+      uri: huggingface://mradermacher/Tsunami-0.5x-7B-Instruct-i1-GGUF/Tsunami-0.5x-7B-Instruct.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qevacot-7b-v2"
+  urls:
+    - https://huggingface.co/bunnycore/Qevacot-7B-v2
+    - https://huggingface.co/mradermacher/Qevacot-7B-v2-GGUF
+  description: |
+    This model was merged using the TIES merge method using Qwen/Qwen2.5-7B as a base.
+    The following models were included in the merge:
+        c10x/CoT-2.5
+        EVA-UNIT-01/EVA-Qwen2.5-7B-v0.1
+        huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2
+        Cran-May/T.E-8.1
+  overrides:
+    parameters:
+      model: Qevacot-7B-v2.Q4_K_M.gguf
+  files:
+    - filename: Qevacot-7B-v2.Q4_K_M.gguf
+      sha256: a45b3d3b74bc68a5c7ac07d251cdeff671e64085d1816cd86fca6cfb7eab204e
+      uri: huggingface://mradermacher/Qevacot-7B-v2-GGUF/Qevacot-7B-v2.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "meissa-qwen2.5-7b-instruct"
+  icon: https://huggingface.co/Orion-zhen/Meissa-Qwen2.5-7B-Instruct/resolve/main/meissa.jpg
+  urls:
+    - https://huggingface.co/Orion-zhen/Meissa-Qwen2.5-7B-Instruct
+    - https://huggingface.co/QuantFactory/Meissa-Qwen2.5-7B-Instruct-GGUF
+  description: |
+    Meissa is designated Lambda Orionis, forms Orion's head, and is a multiple star with a combined apparent magnitude of 3.33. Its name means the "shining one".
+    This model is fine tuned over writing and role playing datasets (maybe the first on qwen2.5-7b), aiming to enhance model's performance in novel writing and roleplaying.
+    The model is fine-tuned over Orion-zhen/Qwen2.5-7B-Instruct-Uncensored
+  overrides:
+    parameters:
+      model: Meissa-Qwen2.5-7B-Instruct.Q4_K_M.gguf
+  files:
+    - filename: Meissa-Qwen2.5-7B-Instruct.Q4_K_M.gguf
+      sha256: 632b10d5c0e98bc8d53295886da2d57772a54bb6f6fa01d458e9e8c7fa9c905a
+      uri: huggingface://QuantFactory/Meissa-Qwen2.5-7B-Instruct-GGUF/Meissa-Qwen2.5-7B-Instruct.Q4_K_M.gguf
 - &archfunct
  license: apache-2.0
  tags:
@@ -1412,6 +1568,216 @@
    - filename: NIHAPPY-L3.1-8B-v0.09.Q4_K_M.gguf
      sha256: 9bd46a06093448b143bd2775f0fb1b1b172c851fafdce31289e13b7dfc23a0d7
      uri: huggingface://QuantFactory/NIHAPPY-L3.1-8B-v0.09-GGUF/NIHAPPY-L3.1-8B-v0.09.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama3.1-flammades-70b"
+  icon: https://huggingface.co/flammenai/Flammades-Mistral-7B/resolve/main/flammades.png?download=true
+  urls:
+    - https://huggingface.co/flammenai/Llama3.1-Flammades-70B
+    - https://huggingface.co/mradermacher/Llama3.1-Flammades-70B-GGUF
+  description: |
+    nbeerbower/Llama3.1-Gutenberg-Doppel-70B finetuned on flammenai/Date-DPO-NoAsterisks and jondurbin/truthy-dpo-v0.1.
+  overrides:
+    parameters:
+      model: Llama3.1-Flammades-70B.Q4_K_M.gguf
+  files:
+    - filename: Llama3.1-Flammades-70B.Q4_K_M.gguf
+      sha256: f602ed006d0059ac87c6ce5904a7cc6f4b4f290886a1049f96b5b2c561ab5a89
+      uri: huggingface://mradermacher/Llama3.1-Flammades-70B-GGUF/Llama3.1-Flammades-70B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama3.1-gutenberg-doppel-70b"
+  # chatml
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://huggingface.co/nbeerbower/Mistral-Small-Gutenberg-Doppel-22B/resolve/main/doppel-header?download=true
+  urls:
+    - https://huggingface.co/nbeerbower/Llama3.1-Gutenberg-Doppel-70B
+    - https://huggingface.co/mradermacher/Llama3.1-Gutenberg-Doppel-70B-GGUF
+  description: |
+    mlabonne/Hermes-3-Llama-3.1-70B-lorablated finetuned on jondurbin/gutenberg-dpo-v0.1 and nbeerbower/gutenberg2-dpo.
+  overrides:
+    parameters:
+      model: Llama3.1-Gutenberg-Doppel-70B.Q4_K_M.gguf
+  files:
+    - filename: Llama3.1-Gutenberg-Doppel-70B.Q4_K_M.gguf
+      sha256: af558f954fa26c5bb75352178cb815bbf268f01c0ca0b96f2149422d4c19511b
+      uri: huggingface://mradermacher/Llama3.1-Gutenberg-Doppel-70B-GGUF/Llama3.1-Gutenberg-Doppel-70B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-8b-arliai-formax-v1.0-iq-arm-imatrix"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://iili.io/2HmlLn2.md.png
+  urls:
+    - https://huggingface.co/Lewdiculous/Llama-3.1-8B-ArliAI-Formax-v1.0-GGUF-IQ-ARM-Imatrix
+  description: |
+    Quants for ArliAI/Llama-3.1-8B-ArliAI-Formax-v1.0.
+
+    "Formax is a model that specializes in following response format instructions. Tell it the format of it's response and it will follow it perfectly. Great for data processing and dataset creation tasks."
+
+    "It is also a highly uncensored model that will follow your instructions very well."
+  overrides:
+    parameters:
+      model: Llama-3.1-8B-ArliAI-Formax-v1.0-Q4_K_M-imat.gguf
+  files:
+    - filename: Llama-3.1-8B-ArliAI-Formax-v1.0-Q4_K_M-imat.gguf
+      sha256: b548ad47caf7008a697afb3556190359529f5a05ec0e4e48ef992c7869e14255
+      uri: huggingface://Lewdiculous/Llama-3.1-8B-ArliAI-Formax-v1.0-GGUF-IQ-ARM-Imatrix/Llama-3.1-8B-ArliAI-Formax-v1.0-Q4_K_M-imat.gguf
+- !!merge <<: *llama31
+  name: "hermes-3-llama-3.1-70b-lorablated"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/4Hbw5n68jKUSBQeTqQIeT.png
+  urls:
+    - https://huggingface.co/mlabonne/Hermes-3-Llama-3.1-70B-lorablated
+    - https://huggingface.co/mradermacher/Hermes-3-Llama-3.1-70B-lorablated-GGUF
+  description: |
+    This is an uncensored version of NousResearch/Hermes-3-Llama-3.1-70B using lorablation.
+    The recipe is based on @grimjim's grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter (special thanks):
+    Extraction: We extract a LoRA adapter by comparing two models: a censored Llama 3 (meta-llama/Meta-Llama-3-70B-Instruct) and an abliterated Llama 3.1 (failspy/Meta-Llama-3.1-70B-Instruct-abliterated).
+    Merge: We merge this new LoRA adapter using task arithmetic to the censored NousResearch/Hermes-3-Llama-3.1-70B to abliterate it.
+  overrides:
+    parameters:
+      model: Hermes-3-Llama-3.1-70B-lorablated.Q4_K_M.gguf
+  files:
+    - filename: Hermes-3-Llama-3.1-70B-lorablated.Q4_K_M.gguf
+      sha256: 9294875ae3b8822855072b0f710ce800536d144cf303a91bcb087c4a307b578d
+      uri: huggingface://mradermacher/Hermes-3-Llama-3.1-70B-lorablated-GGUF/Hermes-3-Llama-3.1-70B-lorablated.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "hermes-3-llama-3.1-8b-lorablated"
+  urls:
+    - https://huggingface.co/mlabonne/Hermes-3-Llama-3.1-8B-lorablated-GGUF
+  description: |
+    This is an uncensored version of NousResearch/Hermes-3-Llama-3.1-8B using lorablation.
+    The recipe is simple:
+        Extraction: We extract a LoRA adapter by comparing two models: a censored Llama 3.1 (meta-llama/Meta-Llama-3-8B-Instruct) and an abliterated Llama 3.1 (mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated).
+        Merge: We merge this new LoRA adapter using task arithmetic to the censored NousResearch/Hermes-3-Llama-3.1-8B to abliterate it.
+  overrides:
+    parameters:
+      model: hermes-3-llama-3.1-8b-lorablated.Q4_K_M.gguf
+  files:
+    - filename: hermes-3-llama-3.1-8b-lorablated.Q4_K_M.gguf
+      sha256: 8cff9d399a0583616fe1f290da6daa091ab5c5493d0e173a8fffb45202d79417
+      uri: huggingface://mlabonne/Hermes-3-Llama-3.1-8B-lorablated-GGUF/hermes-3-llama-3.1-8b-lorablated.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "doctoraifinetune-3.1-8b-i1"
+  urls:
+    - https://huggingface.co/huzaifa525/Doctoraifinetune-3.1-8B
+    - https://huggingface.co/mradermacher/Doctoraifinetune-3.1-8B-i1-GGUF
+  description: |
+      This is a fine-tuned version of the Meta-Llama-3.1-8B-bnb-4bit model, specifically adapted for the medical field. It has been trained using a dataset that provides extensive information on diseases, symptoms, and treatments, making it ideal for AI-powered healthcare tools such as medical chatbots, virtual assistants, and diagnostic support systems.
+      Key Features
+
+          Disease Diagnosis: Accurately identifies diseases based on symptoms provided by the user.
+          Symptom Analysis: Breaks down and interprets symptoms to provide a comprehensive medical overview.
+          Treatment Recommendations: Suggests treatments and remedies according to medical conditions.
+
+      Dataset
+
+      The model is fine-tuned on 2000 rows from a dataset consisting of 272k rows. This dataset includes rich information about diseases, symptoms, and their corresponding treatments. The model is continuously being updated and will be further trained on the remaining data in future releases to improve accuracy and capabilities.
+  overrides:
+    parameters:
+      model: Doctoraifinetune-3.1-8B.i1-Q4_K_M.gguf
+  files:
+    - filename: Doctoraifinetune-3.1-8B.i1-Q4_K_M.gguf
+      sha256: 282456efcb6c7e54d34ac25ae7fc022a94152ed77281ae4625b9628091e0a3d6
+      uri: huggingface://mradermacher/Doctoraifinetune-3.1-8B-i1-GGUF/Doctoraifinetune-3.1-8B.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "astral-fusion-neural-happy-l3.1-8b"
+  urls:
+    - https://huggingface.co/ZeroXClem/Astral-Fusion-Neural-Happy-L3.1-8B
+    - https://huggingface.co/mradermacher/Astral-Fusion-Neural-Happy-L3.1-8B-GGUF
+  description: |
+    Astral-Fusion-Neural-Happy-L3.1-8B is a celestial blend of magic, creativity, and dynamic storytelling. Designed to excel in instruction-following, immersive roleplaying, and magical narrative generation, this model is a fusion of the finest qualities from Astral-Fusion, NIHAPPY, and NeuralMahou. ✨🚀
+
+    This model is perfect for anyone seeking a cosmic narrative experience, with the ability to generate both precise instructional content and fantastical stories in one cohesive framework. Whether you're crafting immersive stories, creating AI roleplaying characters, or working on interactive storytelling, this model brings out the magic. 🌟
+  overrides:
+    parameters:
+      model: Astral-Fusion-Neural-Happy-L3.1-8B.Q4_K_M.gguf
+  files:
+    - filename: Astral-Fusion-Neural-Happy-L3.1-8B.Q4_K_M.gguf
+      sha256: 14a3b07c1723ef1ca24f99382254b1227d95974541e23792a4e7ff621896055d
+      uri: huggingface://mradermacher/Astral-Fusion-Neural-Happy-L3.1-8B-GGUF/Astral-Fusion-Neural-Happy-L3.1-8B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "mahou-1.5-llama3.1-70b-i1"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://huggingface.co/flammenai/Mahou-1.0-mistral-7B/resolve/main/mahou1.png
+  urls:
+    - https://huggingface.co/flammenai/Mahou-1.5-llama3.1-70B
+    - https://huggingface.co/mradermacher/Mahou-1.5-llama3.1-70B-i1-GGUF
+  description: |
+    Mahou is designed to provide short messages in a conversational context. It is capable of casual conversation and character roleplay.
+  overrides:
+    parameters:
+      model: Mahou-1.5-llama3.1-70B.i1-Q4_K_M.gguf
+  files:
+    - filename: Mahou-1.5-llama3.1-70B.i1-Q4_K_M.gguf
+      sha256: c2711c4c9c8d011edbeaa391b4418d433e273a318d1de3dbdda9b85baf4996f2
+      uri: huggingface://mradermacher/Mahou-1.5-llama3.1-70B-i1-GGUF/Mahou-1.5-llama3.1-70B.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-nemotron-70b-instruct-hf"
+  urls:
+    - https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF
+    - https://huggingface.co/mradermacher/Llama-3.1-Nemotron-70B-Instruct-HF-GGUF
+  description: |
+    Llama-3.1-Nemotron-70B-Instruct is a large language model customized by NVIDIA to improve the helpfulness of LLM generated responses to user queries.
+
+    This model reaches Arena Hard of 85.0, AlpacaEval 2 LC of 57.6 and GPT-4-Turbo MT-Bench of 8.98, which are known to be predictive of LMSys Chatbot Arena Elo
+
+    As of 1 Oct 2024, this model is #1 on all three automatic alignment benchmarks (verified tab for AlpacaEval 2 LC), edging out strong frontier models such as GPT-4o and Claude 3.5 Sonnet.
+
+    This model was trained using RLHF (specifically, REINFORCE), Llama-3.1-Nemotron-70B-Reward and HelpSteer2-Preference prompts on a Llama-3.1-70B-Instruct model as the initial policy.
+
+    Llama-3.1-Nemotron-70B-Instruct-HF has been converted from Llama-3.1-Nemotron-70B-Instruct to support it in the HuggingFace Transformers codebase. Please note that evaluation results might be slightly different from the Llama-3.1-Nemotron-70B-Instruct as evaluated in NeMo-Aligner, which the evaluation results below are based on.
+  overrides:
+    parameters:
+      model: Llama-3.1-Nemotron-70B-Instruct-HF.Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1-Nemotron-70B-Instruct-HF.Q4_K_M.gguf
+      sha256: b6b80001b849e3c59c39b09508c018b35b491a5c7bbafafa23f2fc04243f3e30
+      uri: huggingface://mradermacher/Llama-3.1-Nemotron-70B-Instruct-HF-GGUF/Llama-3.1-Nemotron-70B-Instruct-HF.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "l3.1-etherealrainbow-v1.0-rc1-8b"
+  icon: https://huggingface.co/invisietch/L3.1-EtherealRainbow-v1.0-rc1-8B/resolve/main/header.png
+  urls:
+    - https://huggingface.co/invisietch/L3.1-EtherealRainbow-v1.0-rc1-8B
+    - https://huggingface.co/mradermacher/L3.1-EtherealRainbow-v1.0-rc1-8B-GGUF
+  description: |
+    Ethereal Rainbow v1.0 is the sequel to the popular Llama 3 8B merge, EtherealRainbow v0.3. Instead of a straight merge of other peoples' models, v1.0 is a finetune on the Instruct model, using 245 million tokens of training data (approx 177 million of these tokens are my own novel datasets).
+
+    This model is designed to be suitable for creative writing and roleplay, and to push the boundaries of what's possible with an 8B model. This RC is not a finished product, but your feedback will drive the creation of better models.
+
+    This is a release candidate model. It has some known issues and probably some unknown ones too, because the purpose of these early releases is to seek feedback.
+  overrides:
+    parameters:
+      model: L3.1-EtherealRainbow-v1.0-rc1-8B.Q4_K_M.gguf
+  files:
+    - filename: L3.1-EtherealRainbow-v1.0-rc1-8B.Q4_K_M.gguf
+      sha256: c5556b2563112e512acca171415783f0988545b02c1834696c1cc35952def72c
+      uri: huggingface://mradermacher/L3.1-EtherealRainbow-v1.0-rc1-8B-GGUF/L3.1-EtherealRainbow-v1.0-rc1-8B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "theia-llama-3.1-8b-v1"
+  urls:
+    - https://huggingface.co/Chainbase-Labs/Theia-Llama-3.1-8B-v1
+    - https://huggingface.co/QuantFactory/Theia-Llama-3.1-8B-v1-GGUF
+  description: |
+    Theia-Llama-3.1-8B-v1 is an open-source large language model (LLM) trained specifically in the cryptocurrency domain. It was fine-tuned from the Llama-3.1-8B base model using a dataset curated from top 2000 cryptocurrency projects and comprehensive research reports to specialize in crypto-related tasks. Theia-Llama-3.1-8B-v1 has been quantized to optimize it for efficient deployment and reduced memory footprint. It's benchmarked highly for crypto knowledge comprehension and generation, knowledge coverage, and reasoning capabilities. The system prompt used for its training is "You are a helpful assistant who will answer crypto related questions." The recommended parameters for performance include sequence length of 256, temperature of 0, top-k-sampling of -1, top-p of 1, and context window of 39680.
+  overrides:
+    parameters:
+      model: Theia-Llama-3.1-8B-v1.Q4_K_M.gguf
+  files:
+    - filename: Theia-Llama-3.1-8B-v1.Q4_K_M.gguf
+      sha256: db876d033f86f118b49a1f1006e5d078d494c93b73c7e595bd10ca789a0c8fdb
+      uri: huggingface://QuantFactory/Theia-Llama-3.1-8B-v1-GGUF/Theia-Llama-3.1-8B-v1.Q4_K_M.gguf
+- !!merge <<: *llama31
+  icon: https://huggingface.co/Delta-Vector/Baldur-8B/resolve/main/Baldur.jpg
+  name: "baldur-8b"
+  urls:
+    - https://huggingface.co/QuantFactory/Baldur-8B-GGUF
+    - https://huggingface.co/QuantFactory/Baldur-8B-GGUF
+  description: |
+    An finetune of the L3.1 instruct distill done by Arcee, The intent of this model is to have differing prose then my other releases, in my testing it has achieved this and avoiding using common -isms frequently and has a differing flavor then my other models.
+  overrides:
+    parameters:
+      model: Baldur-8B.Q4_K_M.gguf
+  files:
+    - filename: Baldur-8B.Q4_K_M.gguf
+      sha256: 645b393fbac5cd17ccfd66840a3a05c3930e01b903dd1535f0347a74cc443fc7
+      uri: huggingface://QuantFactory/Baldur-8B-GGUF/Baldur-8B.Q4_K_M.gguf
 - &deepseek
  ## Deepseek
  url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
@@ -1437,6 +1803,20 @@
    - filename: DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf
      sha256: 50ec78036433265965ed1afd0667c00c71c12aa70bcf383be462cb8e159db6c0
      uri: huggingface://LoneStriker/DeepSeek-Coder-V2-Lite-Instruct-GGUF/DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf
+- !!merge <<: *deepseek
+  name: "cursorcore-ds-6.7b-i1"
+  urls:
+    - https://huggingface.co/TechxGenus/CursorCore-DS-6.7B
+    - https://huggingface.co/mradermacher/CursorCore-DS-6.7B-i1-GGUF
+  description: |
+    CursorCore is a series of open-source models designed for AI-assisted programming. It aims to support features such as automated editing and inline chat, replicating the core abilities of closed-source AI-assisted programming tools like Cursor. This is achieved by aligning data generated through Programming-Instruct. Please read our paper to learn more.
+  overrides:
+    parameters:
+      model: CursorCore-DS-6.7B.i1-Q4_K_M.gguf
+  files:
+    - filename: CursorCore-DS-6.7B.i1-Q4_K_M.gguf
+      sha256: 71b94496be79e5bc45c23d6aa6c242f5f1d3625b4f00fe91d781d381ef35c538
+      uri: huggingface://mradermacher/CursorCore-DS-6.7B-i1-GGUF/CursorCore-DS-6.7B.i1-Q4_K_M.gguf
 - name: "archangel_sft_pythia2-8b"
  url: "github:mudler/LocalAI/gallery/tuluv2.yaml@master"
  icon: https://gist.github.com/assets/29318529/fe2d8391-dbd1-4b7e-9dc4-7cb97e55bc06
@@ -2022,6 +2402,76 @@
    - filename: MN-BackyardAI-Party-12B-v1-Q4_K_M-imat.gguf
      sha256: cea68768dff58b553974b755bb40ef790ab8b86866d9b5c46bc2e6c3311b876a
      uri: huggingface://Lewdiculous/MN-BackyardAI-Party-12B-v1-GGUF-IQ-ARM-Imatrix/MN-BackyardAI-Party-12B-v1-Q4_K_M-imat.gguf
+- !!merge <<: *mistral03
+  name: "ml-ms-etheris-123b"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/ieEjL3TxpDM3WAZQcya6E.png
+  urls:
+    - https://huggingface.co/Steelskull/ML-MS-Etheris-123B
+    - https://huggingface.co/mradermacher/ML-MS-Etheris-123B-GGUF
+  description: |
+    This model merges the robust storytelling of mutiple models while attempting to maintain intelligence. The final model was merged after Model Soup with DELLA to add some specal sause.
+      - model: NeverSleep/Lumimaid-v0.2-123B
+      - model: TheDrummer/Behemoth-123B-v1
+      - model: migtissera/Tess-3-Mistral-Large-2-123B
+      - model: anthracite-org/magnum-v2-123b
+    Use Mistral, ChatML, or Meth Format
+  overrides:
+    parameters:
+      model: ML-MS-Etheris-123B.Q2_K.gguf
+  files:
+    - filename: ML-MS-Etheris-123B.Q2_K.gguf
+      sha256: a17c5615413b5c9c8d01cf55386573d0acd00e01f6e2bcdf492624c73c593fc3
+      uri: huggingface://mradermacher/ML-MS-Etheris-123B-GGUF/ML-MS-Etheris-123B.Q2_K.gguf
+- !!merge <<: *mistral03
+  name: "mn-lulanum-12b-fix-i1"
+  urls:
+    - https://huggingface.co/djuna/MN-Lulanum-12B-FIX
+    - https://huggingface.co/mradermacher/MN-Lulanum-12B-FIX-i1-GGUF
+  description: |
+    This model was merged using the della_linear merge method using unsloth/Mistral-Nemo-Base-2407 as a base.
+    The following models were included in the merge:
+        VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct
+        anthracite-org/magnum-v2.5-12b-kto
+        Undi95/LocalC-12B-e2.0
+        NeverSleep/Lumimaid-v0.2-12B
+  overrides:
+    parameters:
+      model: MN-Lulanum-12B-FIX.i1-Q4_K_M.gguf
+  files:
+    - filename: MN-Lulanum-12B-FIX.i1-Q4_K_M.gguf
+      sha256: 7e24d57249059d45bb508565ec3055e585a4e658c1815c67ea92397acc6aa775
+      uri: huggingface://mradermacher/MN-Lulanum-12B-FIX-i1-GGUF/MN-Lulanum-12B-FIX.i1-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "tor-8b"
+  icon: https://huggingface.co/Delta-Vector/Tor-8B/resolve/main/FinalTor8B.jpg
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  urls:
+    - https://huggingface.co/QuantFactory/Tor-8B-GGUF
+  description: |
+    An earlier checkpoint of Darkens-8B using the same configuration that i felt was different enough from it's 4 epoch cousin to release, Finetuned ontop of the Prune/Distill NeMo 8B done by Nvidia, This model aims to have generally good prose and writing while not falling into claude-isms.
+  overrides:
+    parameters:
+      model: Tor-8B.Q4_K_M.gguf
+  files:
+    - filename: Tor-8B.Q4_K_M.gguf
+      sha256: 9dd64bd886aa7682b6179340449b38feda405b44722ef7ac752cedb807af370e
+      uri: huggingface://QuantFactory/Tor-8B-GGUF/Tor-8B.Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "darkens-8b"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  urls:
+    - https://huggingface.co/Delta-Vector/Darkens-8B
+    - https://huggingface.co/QuantFactory/Darkens-8B-GGUF
+  description: |
+    This is the fully cooked, 4 epoch version of Tor-8B, this is an experimental version, despite being trained for 4 epochs, the model feels fresh and new and is not overfit, This model aims to have generally good prose and writing while not falling into claude-isms, it follows the actions "dialogue" format heavily.
+  overrides:
+    parameters:
+      model: Darkens-8B.Q4_K_M.gguf
+  files:
+    - filename: Darkens-8B.Q4_K_M.gguf
+      sha256: f56a483e10fd00957460adfc16ee462cecac892a4fb44dc59e466e68a360fd42
+      uri: huggingface://QuantFactory/Darkens-8B-GGUF/Darkens-8B.Q4_K_M.gguf
 - &mudler
  ### START mudler's LocalAI specific-models
  url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
@@ -2649,6 +3099,20 @@
    - filename: Gemma-2-Ataraxy-v3i-9B.Q4_K_M.gguf
      sha256: f14c5b9373d4058f0f812c6c34184addeb4aeeecb02a7bbcf9844d9afc8d0066
      uri: huggingface://QuantFactory/Gemma-2-Ataraxy-v3i-9B-GGUF/Gemma-2-Ataraxy-v3i-9B.Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "apollo2-9b"
+  url: "github:mudler/LocalAI/gallery/vicuna-chat.yaml@master"
+  urls:
+    - https://huggingface.co/mradermacher/Apollo2-9B-GGUF
+  description: |
+    Covering 12 Major Languages including English, Chinese, French, Hindi, Spanish, Arabic, Russian, Japanese, Korean, German, Italian, Portuguese and 38 Minor Languages So far.
+  overrides:
+    parameters:
+      model: Apollo2-9B.Q4_K_M.gguf
+  files:
+    - filename: Apollo2-9B.Q4_K_M.gguf
+      sha256: 9fdb63f78e574558a4f33782eca88716eea28e90ea3ae36c381769cde6b81e0f
+      uri: huggingface://mradermacher/Apollo2-9B-GGUF/Apollo2-9B.Q4_K_M.gguf
 - &llama3
  url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
@@ -4248,6 +4712,19 @@
    - filename: Yi-Coder-9B.Q4_K_M.gguf
      sha256: cff3db8a69c43654e3c2d2984e86ad2791d1d446ec56b24a636ba1ce78363308
      uri: huggingface://QuantFactory/Yi-Coder-9B-GGUF/Yi-Coder-9B.Q4_K_M.gguf
+- !!merge <<: *yi-chat
+  name: "cursorcore-yi-9b"
+  urls:
+    - https://huggingface.co/mradermacher/CursorCore-Yi-9B-GGUF
+  description: |
+    CursorCore is a series of open-source models designed for AI-assisted programming. It aims to support features such as automated editing and inline chat, replicating the core abilities of closed-source AI-assisted programming tools like Cursor. This is achieved by aligning data generated through Programming-Instruct. Please read our paper to learn more.
+  overrides:
+    parameters:
+      model: CursorCore-Yi-9B.Q4_K_M.gguf
+  files:
+    - filename: CursorCore-Yi-9B.Q4_K_M.gguf
+      sha256: 943bf59b34bee34afae8390c1791ccbc7c742e11a4d04d538a699754eb92215e
+      uri: huggingface://mradermacher/CursorCore-Yi-9B-GGUF/CursorCore-Yi-9B.Q4_K_M.gguf
 - &vicuna-chat
  ## LLama2 and derivatives
  ### Start Fimbulvetr
@@ -5175,6 +5652,26 @@
    - filename: L3-8B-Niitama-v1.i1-Q4_K_M.gguf
      sha256: 8c62f831db2a6e34aa75459fe8a98815199ecc2dac1892a460b8b86363b6826e
      uri: huggingface://mradermacher/L3-8B-Niitama-v1-i1-GGUF/L3-8B-Niitama-v1.i1-Q4_K_M.gguf
+- !!merge <<: *llama3
+  icon: https://huggingface.co/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA/resolve/main/Images/LLAMA-3_8B_Unaligned_BETA.png
+  name: "llama-3_8b_unaligned_beta"
+  urls:
+    - https://huggingface.co/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA
+    - https://huggingface.co/bartowski/LLAMA-3_8B_Unaligned_BETA-GGUF
+  description: |
+    In the Wild West of the AI world, the real titans never hit their deadlines, no sir!
+    The projects that finish on time? They’re the soft ones—basic, surface-level shenanigans. But the serious projects? They’re always delayed. You set a date, then reality hits: not gonna happen, scope creep that mutates the roadmap, unexpected turn of events that derails everything.
+    It's only been 4 months since the Alpha was released, and half a year since the project started, but it felt like nearly a decade.
+    Deadlines shift, but with each delay, you’re not failing—you’re refining, and becoming more ambitious. A project that keeps getting pushed isn’t late; it’s just gaining weight, becoming something worth building, and truly worth seeing all the way through. The longer it’s delayed, the more serious it gets.
+    LLAMA-3_8B_Unaligned is a serious project, and thank god, the Beta is finally here.
+    I love you all unconditionally, thanks for all the support and kind words!
+  overrides:
+    parameters:
+      model: LLAMA-3_8B_Unaligned_BETA-Q4_K_M.gguf
+  files:
+    - filename: LLAMA-3_8B_Unaligned_BETA-Q4_K_M.gguf
+      sha256: 5b88fb4537339996c04e4a1b6ef6a2d555c4103b6378e273ae9c6c5e77af67eb
+      uri: huggingface://bartowski/LLAMA-3_8B_Unaligned_BETA-GGUF/LLAMA-3_8B_Unaligned_BETA-Q4_K_M.gguf
 - &chatml
  ### ChatML
  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
@@ -5707,6 +6204,40 @@
    - filename: calme-2.1-phi3.5-4b.i1-Q4_K_M.gguf
      sha256: 989eccacd52b6d9ebf2c06c35c363da19aadb125659a10df299b7130bc293e77
      uri: huggingface://mradermacher/calme-2.1-phi3.5-4b-i1-GGUF/calme-2.1-phi3.5-4b.i1-Q4_K_M.gguf
+- !!merge <<: *phi-3
+  name: "phi-3.5-mini-titanfusion-0.2"
+  urls:
+    - https://huggingface.co/bunnycore/Phi-3.5-mini-TitanFusion-0.2
+    - https://huggingface.co/mradermacher/Phi-3.5-mini-TitanFusion-0.2-GGUF
+  description: |
+    This model was merged using the TIES merge method using microsoft/Phi-3.5-mini-instruct as a base.
+    The following models were included in the merge:
+        nbeerbower/phi3.5-gutenberg-4B
+        ArliAI/Phi-3.5-mini-3.8B-ArliAI-RPMax-v1.1
+        bunnycore/Phi-3.5-Mini-Hyper
+        bunnycore/Phi-3.5-Mini-Hyper + bunnycore/Phi-3.1-EvolKit-lora
+        bunnycore/Phi-3.5-Mini-Sonet-RP
+        bunnycore/Phi-3.5-mini-TitanFusion-0.1
+  overrides:
+    parameters:
+      model: Phi-3.5-mini-TitanFusion-0.2.Q4_K_M.gguf
+  files:
+    - filename: Phi-3.5-mini-TitanFusion-0.2.Q4_K_M.gguf
+      sha256: 9579305712f2bca246914639c4873acdc1e7bc64ac2c7db0230df4f0ca0ef234
+      uri: huggingface://mradermacher/Phi-3.5-mini-TitanFusion-0.2-GGUF/Phi-3.5-mini-TitanFusion-0.2.Q4_K_M.gguf
+- !!merge <<: *phi-3
+  name: "phi-3-vision:vllm"
+  url: "github:mudler/LocalAI/gallery/phi-3-vision.yaml@master"
+  description: |
+    Phi-3.5-vision is a lightweight, state-of-the-art open multimodal model built upon datasets which include - synthetic data and filtered publicly available websites - with a focus on very high-quality, reasoning dense data both on text and vision. The model belongs to the Phi-3 model family, and the multimodal version comes with 128K context length (in tokens) it can support. The model underwent a rigorous enhancement process, incorporating both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures.
+- !!merge <<: *phi-3
+  name: "phi-3.5-vision:vllm"
+  url: "github:mudler/LocalAI/gallery/phi-3-vision.yaml@master"
+  override:
+    parameters:
+      model: microsoft/Phi-3.5-vision-instruct
+  description: |
+    Phi-3.5-vision is a lightweight, state-of-the-art open multimodal model built upon datasets which include - synthetic data and filtered publicly available websites - with a focus on very high-quality, reasoning dense data both on text and vision. The model belongs to the Phi-3 model family, and the multimodal version comes with 128K context length (in tokens) it can support. The model underwent a rigorous enhancement process, incorporating both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures.
 - &hermes-2-pro-mistral
  ### START Hermes
  url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
--- a/gallery/phi-3-vision.yaml
+++ b/gallery/phi-3-vision.yaml
@@ -0,0 +1,25 @@
+---
+name: "phi3-vision"
+
+config_file: |
+  name: phi3-vision
+  backend: vllm
+  parameters:
+    model: microsoft/Phi-3-vision-128k-instruct
+  trust_remote_code: true
+  max_model_len: 32768
+  template:
+    chat_message: |-
+        <|{{ .RoleName }}|>
+        {{.Content}}<|end|>
+    chat: >-
+      {{.Input}}
+
+      <|assistant|>
+
+    completion: |
+        {{.Input}}
+    use_tokenizer_template: false
+    multimodal: "{{ range .Images }}<|image_{{ add1 .ID}}|>{{end}}\n{{.Text}}"
+    # XXX: The one below can be dropped after a new release is out
+    image: "<|image_{{ add1 .ID }}|>\n{{.Text}}"
--- a/Show More
+++ b/Show More