Cleanup gh-pages branch

Signed-off-by: Dimitris Karakasilis <dimitris@spectrocloud.com>
2026-05-23 08:10:48 -04:00 · 2023-04-26 11:55:04 +03:00
794 changed files with 215 additions and 114152 deletions
--- a/Generation/musicgen.bru
+++ b/Generation/musicgen.bru
@@ -1,23 +0,0 @@
-meta {
-  name: musicgen
-  type: http
-  seq: 1
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/sound-generation
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-      "model_id": "facebook/musicgen-small",
-      "text": "Exciting 80s Newscast Interstitial",
-      "duration_seconds": 8
-  }
-}
--- a/Requests/backend
+++ b/Requests/backend
@@ -1,17 +0,0 @@
-meta {
-  name: backend monitor
-  type: http
-  seq: 4
-}
-
-get {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/backend/monitor
-  body: json
-  auth: none
-}
-
-body:json {
-  {
-    "model": "{{DEFAULT_MODEL}}"
-  }
-}
--- a/monitor/backend-shutdown.bru
+++ b/monitor/backend-shutdown.bru
@@ -1,21 +0,0 @@
-meta {
-  name: backend-shutdown
-  type: http
-  seq: 3
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/backend/shutdown
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-      "model": "{{DEFAULT_MODEL}}"
-  }
-}
--- a/Requests/bruno.json
+++ b/Requests/bruno.json
@@ -1,5 +0,0 @@
-{
-  "version": "1",
-  "name": "LocalAI Test Requests",
-  "type": "collection"
-}
--- a/Requests/environments/localhost.bru
+++ b/Requests/environments/localhost.bru
@@ -1,6 +0,0 @@
-vars {
-  HOST: localhost
-  PORT: 8080
-  DEFAULT_MODEL: gpt-3.5-turbo
-  PROTOCOL: http://
-}
--- a/.bruno/LocalAI
+++ b/.bruno/LocalAI
@@ -1,11 +0,0 @@
-meta {
-  name: get models list
-  type: http
-  seq: 2
-}
-
-get {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models
-  body: none
-  auth: none
-}
--- a/generation/Generate
+++ b/generation/Generate
@@ -1,25 +0,0 @@
-meta {
-  name: Generate image
-  type: http
-  seq: 1
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/images/generations
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-    "prompt": "<positive prompt>|<negative prompt>",
-    "model": "model-name",
-    "step": 51,
-    "size": "1024x1024",
-    "image": ""
-  }
-}
--- a/text/-completions.bru
+++ b/text/-completions.bru
@@ -1,24 +0,0 @@
-meta {
-  name: -completions
-  type: http
-  seq: 4
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/completions
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-      "model": "{{DEFAULT_MODEL}}",
-      "prompt": "function downloadFile(string url, string outputPath) {",
-      "max_tokens": 256,
-      "temperature": 0.5
-  }
-}
--- a/text/-edits.bru
+++ b/text/-edits.bru
@@ -1,23 +0,0 @@
-meta {
-  name: -edits
-  type: http
-  seq: 5
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/edits
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-      "model": "{{DEFAULT_MODEL}}",
-      "input": "What day of the wek is it?",
-      "instruction": "Fix the spelling mistakes"
-  }
-}
--- a/text/-embeddings.bru
+++ b/text/-embeddings.bru
@@ -1,22 +0,0 @@
-meta {
-  name: -embeddings
-  type: http
-  seq: 6
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/embeddings
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-      "model": "{{DEFAULT_MODEL}}",
-      "input": "A STRANGE GAME.\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\n\nHOW ABOUT A NICE GAME OF CHESS?"
-  }
-}
--- a/text/chat/chat
+++ b/text/chat/chat
@@ -1,30 +0,0 @@
-meta {
-  name: chat completion -simple- 1 message-
-  type: http
-  seq: 4
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-    "model": "{{DEFAULT_MODEL}}",
-    "messages": [
-      {
-        "role": "user",
-        "content": "How could one use friction to cook an egg?"
-      }
-    ],
-    "max_tokens": 256,
-    "temperature": 0.2,
-    "grammar": ""
-  }
-}
--- a/text/chat/chat-completions
+++ b/text/chat/chat-completions
@@ -1,29 +0,0 @@
-meta {
-  name: chat-completions -long-
-  type: http
-  seq: 5
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-       "model": "{{DEFAULT_MODEL}}",
-       "messages": [{"role": "system", "content": "You are a helpful, intelligent pirate assistant. Always answer as helpfully as possible, while being a pirate and using pirate language. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."},
-           {"role": "user", "content": "How could one use electricity to cook an egg?"},
-           {"role": "assistant",
-                  "content": "Shiver me timbers! Using electricity to cook an egg? Well, matey, I reckon that be a right curious idea! *adjusts spectacles* Now, I ain't sure if this be possible, but I'll do me best to help ye out.\n\nFirst things first, ye gotta understand that electricity be a powerful force, and it can't just cook an egg on its own. Ye see, electricity be like a mighty wind that can make things happen, but it needs somethin' to work with. So, if ye want to use electricity to cook an egg, ye gotta find a way to harness that power and make it do the cookin'.\n\nNow, I know what ye might be thinkin': \"How do I harness the power of electricity to cook an egg?\" Well, matey, there be a few ways to do it. One way be to use a special device called an \"electric frying pan.\" This be a pan that has a built-in heating element that gets hot when ye plug it into a wall socket. When the element gets hot, ye can crack an egg into the pan and watch as it cook"
-              },
-              {"role": "user", "content": "I don't have one of those, just a raw wire and plenty of power! How do we get it done?"}],
-       "max_tokens": 1024,
-       "temperature": 0.5
-  }
-}
--- a/text/chat/chat-completions
+++ b/text/chat/chat-completions
@@ -1,25 +0,0 @@
-meta {
-  name: chat-completions -stream-
-  type: http
-  seq: 6
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-       "model": "{{DEFAULT_MODEL}}",
-       "messages": [{"role": "user", "content": "Explain how I can set sail on the ocean using only power generated by seagulls?"}],
-       "max_tokens": 256,
-       "temperature": 0.9,
-       "stream": true
-  }
-}
--- a/Requests/model
+++ b/Requests/model
@@ -1,22 +0,0 @@
-meta {
-  name: add model gallery
-  type: http
-  seq: 10
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-      "url": "file:///home/dave/projects/model-gallery/huggingface/TheBloke__CodeLlama-7B-Instruct-GGML.yaml",
-      "name": "test"
-  }
-}
--- a/gallery/delete
+++ b/gallery/delete
@@ -1,21 +0,0 @@
-meta {
-  name: delete model gallery
-  type: http
-  seq: 11
-}
-
-delete {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-      "name": "test"
-  }
-}
--- a/Requests/model
+++ b/Requests/model
@@ -1,11 +0,0 @@
-meta {
-  name: list MODELS in galleries
-  type: http
-  seq: 7
-}
-
-get {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/available
-  body: none
-  auth: none
-}
--- a/Requests/model
+++ b/Requests/model
@@ -1,11 +0,0 @@
-meta {
-  name: list model GALLERIES
-  type: http
-  seq: 8
-}
-
-get {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
-  body: none
-  auth: none
-}
--- a/Requests/model
+++ b/Requests/model
@@ -1,11 +0,0 @@
-meta {
-  name: model delete
-  type: http
-  seq: 7
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
-  body: none
-  auth: none
-}
--- a/Requests/model
+++ b/Requests/model
@@ -1,21 +0,0 @@
-meta {
-  name: model gallery apply -gist-
-  type: http
-  seq: 12
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/apply
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-      "id": "TheBloke__CodeLlama-7B-Instruct-GGML__codellama-7b-instruct.ggmlv3.Q2_K.bin"
-  }
-}
--- a/Requests/model
+++ b/Requests/model
@@ -1,22 +0,0 @@
-meta {
-  name: model gallery apply
-  type: http
-  seq: 9
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/apply
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-      "id": "dave@TheBloke__CodeLlama-7B-Instruct-GGML__codellama-7b-instruct.ggmlv3.Q3_K_S.bin",
-      "name": "codellama7b"
-  }
-}
--- a/Requests/transcription/gb1.ogg
+++ b/Requests/transcription/gb1.ogg
--- a/Requests/transcription/transcribe.bru
+++ b/Requests/transcription/transcribe.bru
@@ -1,16 +0,0 @@
-meta {
-  name: transcribe
-  type: http
-  seq: 1
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/audio/transcriptions
-  body: multipartForm
-  auth: none
-}
-
-body:multipart-form {
-  file: @file(transcription/gb1.ogg)
-  model: whisper-1
-}
--- a/Requests/tts/-tts.bru
+++ b/Requests/tts/-tts.bru
@@ -1,22 +0,0 @@
-meta {
-  name: -tts
-  type: http
-  seq: 2
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/tts
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-      "model": "{{DEFAULT_MODEL}}",
-      "input": "A STRANGE GAME.\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\n\nHOW ABOUT A NICE GAME OF CHESS?"
-  }
-}
--- a/Requests/tts/musicgen.bru
+++ b/Requests/tts/musicgen.bru
@@ -1,23 +0,0 @@
-meta {
-  name: musicgen
-  type: http
-  seq: 2
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/tts
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-      "backend": "transformers-musicgen",
-      "model": "facebook/musicgen-small",
-      "input": "80s Synths playing Jazz"
-  }
-}
--- a/.devcontainer-scripts/postcreate.sh
+++ b/.devcontainer-scripts/postcreate.sh
@@ -1,17 +0,0 @@
-#!/bin/bash
-
-cd /workspace
-
-# Get the files into the volume without a bind mount
-if [ ! -d ".git" ]; then
-    git clone https://github.com/mudler/LocalAI.git .
-else
-    git fetch
-fi
-
-echo "Standard Post-Create script completed."
-
-if [ -f "/devcontainer-customization/postcreate.sh" ]; then
-    echo "Launching customization postcreate.sh"
-    bash "/devcontainer-customization/postcreate.sh"
-fi
--- a/.devcontainer-scripts/poststart.sh
+++ b/.devcontainer-scripts/poststart.sh
@@ -1,16 +0,0 @@
-#!/bin/bash
-
-cd /workspace
-
-# Grab the pre-stashed backend assets to avoid build issues
-cp -r /build/backend-assets /workspace/backend-assets
-
-# Ensures generated source files are present upon load
-make prepare
-
-echo "Standard Post-Start script completed."
-
-if [ -f "/devcontainer-customization/poststart.sh" ]; then
-    echo "Launching customization poststart.sh"
-    bash "/devcontainer-customization/poststart.sh"
-fi
--- a/.devcontainer-scripts/utils.sh
+++ b/.devcontainer-scripts/utils.sh
@@ -1,55 +0,0 @@
-#!/bin/bash
-
-# This file contains some really simple functions that are useful when building up customization scripts.
-
-
-# Checks if the git config has a user registered - and sets it up if not.
-#
-# Param 1: name
-# Param 2: email
-#
-config_user() {
-    echo "Configuring git for $1 <$2>"
-    local gcn=$(git config --global user.name)
-    if [ -z "${gcn}" ]; then
-        echo "Setting up git user / remote"
-        git config --global user.name "$1"
-        git config --global user.email "$2"
-        
-    fi
-}
-
-# Checks if the git remote is configured - and sets it up if not. Fetches either way.
-#
-# Param 1: remote name
-# Param 2: remote url
-#
-config_remote() {
-    echo "Adding git remote and fetching $2 as $1"
-    local gr=$(git remote -v | grep $1)
-    if [ -z "${gr}" ]; then
-        git remote add $1 $2
-    fi
-    git fetch $1
-}
-
-# Setup special .ssh files
-# Prints out lines of text to make things pretty
-# Param 1: bash array, filenames relative to the customization directory that should be copied to ~/.ssh
-setup_ssh() {
-    echo "starting ~/.ssh directory setup..."
-    mkdir -p "${HOME}.ssh"
-    chmod 0700 "${HOME}/.ssh"
-    echo "-----"
-    local files=("$@")
-    for file in "${files[@]}" ; do
-        local cfile="/devcontainer-customization/${file}"
-        local hfile="${HOME}/.ssh/${file}"
-        if [ ! -f "${hfile}" ]; then
-            echo "copying \"${file}\""
-            cp "${cfile}" "${hfile}"
-            chmod 600 "${hfile}"
-        fi
-    done
-    echo "~/.ssh directory setup complete!"
-}
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -0,0 +1,3 @@
+ARG GO_VERSION=1.20
+FROM mcr.microsoft.com/devcontainers/go:0-$GO_VERSION-bullseye
+RUN apt-get update && apt-get install -y cmake
--- a/.devcontainer/customization/README.md
+++ b/.devcontainer/customization/README.md
@@ -1,25 +0,0 @@
-Place any additional resources your environment requires in this directory
-
-Script hooks are currently called for:
-`postcreate.sh` and `poststart.sh`
-
-If files with those names exist here, they will be called at the end of the normal script.
-
-This is a good place to set things like `git config --global user.name` are set - and to handle any other files that are mounted via this directory.
-
-To assist in doing so, `source /.devcontainer-scripts/utils.sh` will provide utility functions that may be useful - for example:
-
-```
-#!/bin/bash
-
-source "/.devcontainer-scripts/utils.sh"
-
-sshfiles=("config", "key.pub")
-
-setup_ssh "${sshfiles[@]}"
-
-config_user "YOUR NAME" "YOUR EMAIL"
-
-config_remote "REMOTE NAME" "REMOTE URL"
-
-```
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -1,24 +1,46 @@
-{
-    "$schema": "https://raw.githubusercontent.com/devcontainers/spec/main/schemas/devContainer.schema.json",
-    "name": "LocalAI",
-    "workspaceFolder": "/workspace",
-    "dockerComposeFile": [ "./docker-compose-devcontainer.yml" ],
-    "service": "api",
-    "shutdownAction": "stopCompose",
-    "customizations": {
-        "vscode": {
-            "extensions": [
-                "golang.go",
-                "ms-vscode.makefile-tools",
-                "ms-azuretools.vscode-docker",
-                "ms-python.python",
-                "ms-python.debugpy",
-                "wayou.vscode-todo-highlight",
-                "waderyan.gitblame"
-            ]
-        }
-    },
-    "forwardPorts": [8080, 3000],
-    "postCreateCommand": "bash /.devcontainer-scripts/postcreate.sh",
-    "postStartCommand": "bash /.devcontainer-scripts/poststart.sh"
-}
+// For format details, see https://aka.ms/devcontainer.json. For config options, see the
+// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-docker-compose
+{
+	"name": "Existing Docker Compose (Extend)",
+
+	// Update the 'dockerComposeFile' list if you have more compose files or use different names.
+	// The .devcontainer/docker-compose.yml file contains any overrides you need/want to make.
+	"dockerComposeFile": [
+		"../docker-compose.yaml",
+		"docker-compose.yml"
+	],
+
+	// The 'service' property is the name of the service for the container that VS Code should
+	// use. Update this value and .devcontainer/docker-compose.yml to the real service name.
+	"service": "api",
+
+	// The optional 'workspaceFolder' property is the path VS Code should open by default when
+	// connected. This is typically a file mount in .devcontainer/docker-compose.yml
+	"workspaceFolder": "/workspace",
+
+	"features": {
+		"ghcr.io/devcontainers/features/go:1": {},
+		"ghcr.io/azutake/devcontainer-features/go-packages-install:0": {}
+	},
+
+	// Features to add to the dev container. More info: https://containers.dev/features.
+	// "features": {},
+
+	// Use 'forwardPorts' to make a list of ports inside the container available locally.
+	// "forwardPorts": [],
+
+	// Uncomment the next line if you want start specific services in your Docker Compose config.
+	// "runServices": [],
+
+	// Uncomment the next line if you want to keep your containers running after VS Code shuts down.
+	// "shutdownAction": "none",
+
+	// Uncomment the next line to run commands after the container is created.
+	"postCreateCommand": "make prepare"
+
+	// Configure tool-specific properties.
+	// "customizations": {},
+
+	// Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root.
+	// "remoteUser": "devcontainer"
+}
--- a/.devcontainer/docker-compose-devcontainer.yml
+++ b/.devcontainer/docker-compose-devcontainer.yml
@@ -1,48 +0,0 @@
-services:
-  api:
-    build:
-      context: ..
-      dockerfile: Dockerfile
-      target: devcontainer
-      args:
-      - FFMPEG=true
-      - IMAGE_TYPE=extras
-      - GO_TAGS=stablediffusion p2p tts
-    env_file:
-      - ../.env
-    ports:
-      - 8080:8080
-    volumes:
-      - localai_workspace:/workspace
-      - ../models:/host-models
-      - ./customization:/devcontainer-customization
-    command: /bin/sh -c "while sleep 1000; do :; done"
-    cap_add:
-      - SYS_PTRACE
-    security_opt:
-      - seccomp:unconfined
-  prometheus:
-    image: prom/prometheus
-    container_name: prometheus
-    command:
-      - '--config.file=/etc/prometheus/prometheus.yml'
-    ports:
-      - 9090:9090
-    restart: unless-stopped
-    volumes:
-      - ./prometheus:/etc/prometheus
-      - prom_data:/prometheus
-  grafana:
-    image: grafana/grafana
-    container_name: grafana
-    ports:
-      - 3000:3000
-    restart: unless-stopped
-    environment:
-      - GF_SECURITY_ADMIN_USER=admin
-      - GF_SECURITY_ADMIN_PASSWORD=grafana
-    volumes:
-      - ./grafana:/etc/grafana/provisioning/datasources
-volumes:
-  prom_data:
-  localai_workspace:
--- a/.devcontainer/docker-compose.yml
+++ b/.devcontainer/docker-compose.yml
@@ -0,0 +1,26 @@
+version: '3.6'
+services:
+  # Update this to the name of the service you want to work with in your docker-compose.yml file
+  api:
+    # Uncomment if you want to override the service's Dockerfile to one in the .devcontainer 
+    # folder. Note that the path of the Dockerfile and context is relative to the *primary* 
+    # docker-compose.yml file (the first in the devcontainer.json "dockerComposeFile"
+    # array). The sample below assumes your primary file is in the root of your project.
+    #
+    build:
+      context: .
+      dockerfile: .devcontainer/Dockerfile
+
+    volumes:
+      # Update this to wherever you want VS Code to mount the folder of your project
+      - .:/workspace:cached
+
+    # Uncomment the next four lines if you will use a ptrace-based debugger like C++, Go, and Rust.
+    # cap_add:
+    #   - SYS_PTRACE
+    # security_opt:
+    #   - seccomp:unconfined
+
+    # Overrides default command so things don't shut down after the process ends.
+    command: /bin/sh -c "while sleep 1000; do :; done"
+ 
--- a/.devcontainer/grafana/datasource.yml
+++ b/.devcontainer/grafana/datasource.yml
@@ -1,10 +0,0 @@
-
-apiVersion: 1
-
-datasources:
- name: Prometheus
-  type: prometheus
-  url: http://prometheus:9090 
-  isDefault: true
-  access: proxy
-  editable: true
--- a/.devcontainer/prometheus/prometheus.yml
+++ b/.devcontainer/prometheus/prometheus.yml
@@ -1,21 +0,0 @@
-global:
-  scrape_interval: 15s
-  scrape_timeout: 10s
-  evaluation_interval: 15s
-alerting:
-  alertmanagers:
-  - static_configs:
-    - targets: []
-    scheme: http
-    timeout: 10s
-    api_version: v1
-scrape_configs:
- job_name: prometheus
-  honor_timestamps: true
-  scrape_interval: 15s
-  scrape_timeout: 10s
-  metrics_path: /metrics
-  scheme: http
-  static_configs:
-  - targets:
-    - localhost:9090
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,17 +1 @@
-.idea
-.github
-.vscode
-.devcontainer
 models
-examples/chatbot-ui/models
-examples/rwkv/models
-examples/**/models
-Dockerfile*
-__pycache__
-
-# SonarQube
-.scannerwork
-
-# backend virtual environments
-**/venv
-backend/python/**/source
--- a/.editorconfig
+++ b/.editorconfig
@@ -1,31 +0,0 @@
-
-root = true
-
-[*]
-indent_style = space
-indent_size = 2
-end_of_line = lf
-charset = utf-8
-trim_trailing_whitespace = true
-insert_final_newline = true
-
-[*.go]
-indent_style = tab
-
-[Makefile]
-indent_style = tab
-
-[*.proto]
-indent_size = 2
-
-[*.py]
-indent_size = 4
-
-[*.js]
-indent_size = 2
-
-[*.yaml]
-indent_size = 2
-
-[*.md]
-trim_trailing_whitespace = false
--- a/.env
+++ b/.env
@@ -1,97 +1,5 @@
-## Set number of threads.
-## Note: prefer the number of physical cores. Overbooking the CPU degrades performance notably.
-# LOCALAI_THREADS=14
-
-## Specify a different bind address (defaults to ":8080")
-# LOCALAI_ADDRESS=127.0.0.1:8080
-
-## Default models context size
-# LOCALAI_CONTEXT_SIZE=512
-#
-## Define galleries.
-## models will to install will be visible in `/models/available`
-# LOCALAI_GALLERIES=[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml@master"}]
-
-## CORS settings
-# LOCALAI_CORS=true
-# LOCALAI_CORS_ALLOW_ORIGINS=*
-
-## Default path for models
-#
-# LOCALAI_MODELS_PATH=/models
-
-## Enable debug mode
-# LOCALAI_LOG_LEVEL=debug
-
-## Disables COMPEL (Diffusers)
-# COMPEL=0
-
-## Enable/Disable single backend (useful if only one GPU is available)
-# LOCALAI_SINGLE_ACTIVE_BACKEND=true
-
-## Specify a build type. Available: cublas, openblas, clblas.
-## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
-## OpenBLAS: This is an open-source implementation of the BLAS library that aims to provide highly optimized code for various platforms. It includes support for multi-threading and can be compiled to use hardware-specific features for additional performance. OpenBLAS can run on many kinds of hardware, including CPUs from Intel, AMD, and ARM.
-## clBLAS:   This is an open-source implementation of the BLAS library that uses OpenCL, a framework for writing programs that execute across heterogeneous platforms consisting of CPUs, GPUs, and other processors. clBLAS is designed to take advantage of the parallel computing power of GPUs but can also run on any hardware that supports OpenCL. This includes hardware from different vendors like Nvidia, AMD, and Intel.
-# BUILD_TYPE=openblas
-
-## Uncomment and set to true to enable rebuilding from source
-# REBUILD=true
-
-## Enable go tags, available: stablediffusion, tts
-## stablediffusion: image generation with stablediffusion
-## tts: enables text-to-speech with go-piper 
-## (requires REBUILD=true)
-#
-# GO_TAGS=stablediffusion
-
-## Path where to store generated images
-# LOCALAI_IMAGE_PATH=/tmp/generated/images
-
-## Specify a default upload limit in MB (whisper)
-# LOCALAI_UPLOAD_LIMIT=15
-
-## List of external GRPC backends (note on the container image this variable is already set to use extra backends available in extra/)
-# LOCALAI_EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py
-
-### Advanced settings ###
-### Those are not really used by LocalAI, but from components in the stack ###
-##
-### Preload libraries
-# LD_PRELOAD=
-
-### Huggingface cache for models
-# HUGGINGFACE_HUB_CACHE=/usr/local/huggingface
-
-### Python backends GRPC max workers
-### Default number of workers for GRPC Python backends.
-### This actually controls wether a backend can process multiple requests or not.
-# PYTHON_GRPC_MAX_WORKERS=1
-
-### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
-# LLAMACPP_PARALLEL=1
-
-### Define a list of GRPC Servers for llama-cpp workers to distribute the load
-# https://github.com/ggerganov/llama.cpp/pull/6829
-# https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md
-# LLAMACPP_GRPC_SERVERS=""
-
-### Enable to run parallel requests
-# LOCALAI_PARALLEL_REQUESTS=true
-
-# Enable to allow p2p mode
-# LOCALAI_P2P=true
-
-### Watchdog settings
-###
-# Enables watchdog to kill backends that are inactive for too much time
-# LOCALAI_WATCHDOG_IDLE=true
-#
-# Time in duration format (e.g. 1h30m) after which a backend is considered idle
-# LOCALAI_WATCHDOG_IDLE_TIMEOUT=5m
-#
-# Enables watchdog to kill backends that are busy for too much time
-# LOCALAI_WATCHDOG_BUSY=true
-#
-# Time in duration format (e.g. 1h30m) after which a backend is considered busy
-# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m
+# THREADS=14
+# CONTEXT_SIZE=512
+MODELS_PATH=/models
+# DEBUG=true
+# BUILD_TYPE=generic
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,2 +0,0 @@
-*.sh text eol=lf
-backend/cpp/llama/*.hpp linguist-vendored
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@@ -1,5 +0,0 @@
-# These are supported funding model platforms
-
-github: [mudler]
-custom: 
- https://www.buymeacoffee.com/mudler
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -1,29 +0,0 @@
---
-name: Bug report
-about: Create a report to help us improve
-title: ''
-labels: bug, unconfirmed, up-for-grabs
---
-
-<!-- Thanks for helping us to improve LocalAI! We welcome all bug reports. Please fill out each area of the template so we can better help you. Comments like this will be hidden when you post but you can delete them if you wish. -->
-
-**LocalAI version:**
-<!-- Container Image or LocalAI tag/commit -->
-
-**Environment, CPU architecture, OS, and Version:**
-<!-- Provide the output from "uname -a", HW specs, if it's a VM  -->
-
-**Describe the bug**
-<!-- A clear and concise description of what the bug is. -->
-
-**To Reproduce**
-<!-- Steps to reproduce the behavior, including the LocalAI command used, if any -->
-
-**Expected behavior**
-<!-- A clear and concise description of what you expected to happen. -->
-
-**Logs**
-<!-- If applicable, add logs while running LocalAI in debug mode (`--debug` or `DEBUG=true`) to help explain your problem.  -->
-
-**Additional context**
-<!-- Add any other context about the problem here. -->
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -1,8 +0,0 @@
-blank_issues_enabled: false
-contact_links:
-  - name: Community Support
-    url: https://github.com/go-skynet/LocalAI/discussions
-    about: Please ask and answer questions here.
-  - name: Discord
-    url: https://discord.gg/uJAeKSAGDy
-    about: Join our community on Discord!
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -1,20 +0,0 @@
---
-name: Feature request
-about: Suggest an idea for this project
-title: ''
-labels: enhancement, up-for-grabs
---
-
-<!-- Thanks for helping us to improve LocalAI! We welcome all feature requests. Please fill out each area of the template so we can better help you. Comments like this will be hidden when you post but you can delete them if you wish. -->
-
-**Is your feature request related to a problem? Please describe.**
-<!-- A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]  -->
-
-**Describe the solution you'd like**
-<!-- A clear and concise description of what you want to happen.  -->
-
-**Describe alternatives you've considered**
-<!-- A clear and concise description of any alternative solutions or features you've considered.  -->
-
-**Additional context**
-<!-- Add any other context or screenshots about the feature request here. -->
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,31 +0,0 @@
-**Description**
-
-This PR fixes #
-
-**Notes for Reviewers**
-
-
-**[Signed commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [ ] Yes, I signed my commits.
- 
-<!--
-Thank you for contributing to LocalAI! 
-
-Contributing Conventions
-------------------------
-
-The draft above helps to give a quick overview of your PR.
-
-Remember to remove this comment and to at least:
-
-1. Include descriptive PR titles with [<component-name>] prepended. We use [conventional commits](https://www.conventionalcommits.org/en/v1.0.0/).
-2. Build and test your changes before submitting a PR (`make build`). 
-3. Sign your commits
-4. **Tag maintainer:** for a quicker response, tag the relevant maintainer (see below).
-5. **X/Twitter handle:** we announce bigger features on X/Twitter. If your PR gets announced, and you'd like a mention, we'll gladly shout you out!
-
-By following the community's contribution conventions upfront, the review process will 
-be accelerated and your PR merged more quickly.
-
-If no one reviews your PR within a few days, please @-mention @mudler.
-->
--- a/.github/bump_deps.sh
+++ b/.github/bump_deps.sh
@@ -1,22 +0,0 @@
-#!/bin/bash
-set -xe
-REPO=$1
-BRANCH=$2
-VAR=$3
-
-LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
-
-# Read $VAR from Makefile (only first match)
-set +e
-CURRENT_COMMIT="$(grep -m1 "^$VAR?=" Makefile | cut -d'=' -f2)"
-set -e
-
-sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
-
-if [ -z "$CURRENT_COMMIT" ]; then
-    echo "Could not find $VAR in Makefile."
-    exit 0
-fi
-
-echo "Changes: https://github.com/$REPO/compare/${CURRENT_COMMIT}..${LAST_COMMIT}" >> "${VAR}_message.txt"
-echo "${LAST_COMMIT}" >> "${VAR}_commit.txt"
--- a/.github/bump_docs.sh
+++ b/.github/bump_docs.sh
@@ -1,7 +0,0 @@
-#!/bin/bash
-set -xe
-REPO=$1
-
-LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.tag_name')
-
-cat <<< $(jq ".version = \"$LATEST_TAG\"" docs/data/version.json) > docs/data/version.json
--- a/.github/check_and_update.py
+++ b/.github/check_and_update.py
@@ -1,85 +0,0 @@
-import hashlib
-from huggingface_hub import hf_hub_download, get_paths_info
-import requests
-import sys
-import os
-
-uri = sys.argv[1]
-file_name = uri.split('/')[-1]
-
-# Function to parse the URI and determine download method
-def parse_uri(uri):
-    if uri.startswith('huggingface://'):
-        repo_id = uri.split('://')[1]
-        return 'huggingface', repo_id.rsplit('/', 1)[0]
-    elif 'huggingface.co' in uri:
-        parts = uri.split('/resolve/')
-        if len(parts) > 1:
-            repo_path = parts[0].split('https://huggingface.co/')[-1]
-            return 'huggingface', repo_path
-    return 'direct', uri
-
-def calculate_sha256(file_path):
-    sha256_hash = hashlib.sha256()
-    with open(file_path, 'rb') as f:
-        for byte_block in iter(lambda: f.read(4096), b''):
-            sha256_hash.update(byte_block)
-    return sha256_hash.hexdigest()
-
-def manual_safety_check_hf(repo_id):
-    scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan")
-    scan = scanResponse.json()
-    # Check if 'hasUnsafeFile' exists in the response
-    if 'hasUnsafeFile' in scan:
-        if scan['hasUnsafeFile']:
-            return scan
-        else:
-            return None
-    else:
-        return None
-
-download_type, repo_id_or_url = parse_uri(uri)
-
-new_checksum =  None
-file_path = None
-
-# Decide download method based on URI type
-if download_type == 'huggingface':
-    # Check if the repo is flagged as dangerous by HF
-    hazard = manual_safety_check_hf(repo_id_or_url)
-    if hazard != None:
-        print(f'Error: HuggingFace has detected security problems for {repo_id_or_url}: {str(hazard)}', filename=file_name)
-        sys.exit(5)
-    # Use HF API to pull sha
-    for file in get_paths_info(repo_id_or_url, [file_name], repo_type='model'):
-        try:
-            new_checksum = file.lfs.sha256
-            break
-        except Exception as e:
-            print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
-            sys.exit(2)
-    if new_checksum is None:
-        try:
-            file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name)
-        except Exception as e:
-            print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
-            sys.exit(2)
-else:
-    response = requests.get(repo_id_or_url)
-    if response.status_code == 200:
-        with open(file_name, 'wb') as f:
-            f.write(response.content)
-        file_path = file_name
-    elif response.status_code == 404:
-        print(f'File not found: {response.status_code}', file=sys.stderr)
-        sys.exit(2)
-    else:
-        print(f'Error downloading file: {response.status_code}', file=sys.stderr)
-        sys.exit(1)
-
-if new_checksum is None:
-    new_checksum = calculate_sha256(file_path)
-    print(new_checksum)
-    os.remove(file_path)
-else:
-    print(new_checksum)
--- a/.github/checksum_checker.sh
+++ b/.github/checksum_checker.sh
@@ -1,63 +0,0 @@
-#!/bin/bash
-# This scripts needs yq and huggingface_hub to be installed
-# to install hugingface_hub run pip install huggingface_hub
-
-# Path to the input YAML file
-input_yaml=$1
-
-# Function to download file and check checksum using Python
-function check_and_update_checksum() {
-    model_name="$1"
-    file_name="$2"
-    uri="$3"
-    old_checksum="$4"
-    idx="$5"
-
-    # Download the file and calculate new checksum using Python
-    new_checksum=$(python3 ./.github/check_and_update.py $uri)
-    result=$?
-
-    if [[ $result -eq 5 ]]; then
-        echo "Contaminated entry detected, deleting entry for $model_name..."
-        yq eval -i "del([$idx])" "$input_yaml"
-        return
-    fi
-
-    if [[ "$new_checksum" == "" ]]; then
-        echo "Error calculating checksum for $file_name. Skipping..."
-        return
-    fi
-
-    echo "Checksum for $file_name: $new_checksum"
-
-    # Compare and update the YAML file if checksums do not match
-    
-    if [[ $result -eq 2 ]]; then
-        echo "File not found, deleting entry for $file_name..."
-        # yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\"))" "$input_yaml"
-    elif [[ "$old_checksum" != "$new_checksum" ]]; then
-        echo "Checksum mismatch for $file_name. Updating..."
-        yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\").sha256)" "$input_yaml"
-        yq eval -i "(.[$idx].files[] | select(.filename == \"$file_name\")).sha256 = \"$new_checksum\"" "$input_yaml"
-    elif [[ $result -ne 0 ]]; then
-        echo "Error downloading file $file_name. Skipping..."
-    else
-        echo "Checksum match for $file_name. No update needed."
-    fi
-}
-
-# Read the YAML and process each file
-len=$(yq eval '. | length' "$input_yaml")
-for ((i=0; i<$len; i++))
-do
-    name=$(yq eval ".[$i].name" "$input_yaml")
-    files_len=$(yq eval ".[$i].files | length" "$input_yaml")
-    for ((j=0; j<$files_len; j++))
-    do
-        filename=$(yq eval ".[$i].files[$j].filename" "$input_yaml")
-        uri=$(yq eval ".[$i].files[$j].uri" "$input_yaml")
-        checksum=$(yq eval ".[$i].files[$j].sha256" "$input_yaml")
-        echo "Checking model $name, file $filename. URI = $uri, Checksum = $checksum"
-        check_and_update_checksum "$name" "$filename" "$uri" "$checksum" "$i"
-    done
-done
--- a/.github/ci/modelslist.go
+++ b/.github/ci/modelslist.go
@@ -1,304 +0,0 @@
-package main
-
-import (
-	"fmt"
-	"html/template"
-	"io/ioutil"
-	"os"
-
-	"github.com/microcosm-cc/bluemonday"
-	"gopkg.in/yaml.v3"
-)
-
-var modelPageTemplate string = `
-<!DOCTYPE html>
-<html>
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>LocalAI models</title>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/flowbite/2.3.0/flowbite.min.css" rel="stylesheet" />
-    <script src="https://cdn.jsdelivr.net/npm/vanilla-lazyload@19.1.3/dist/lazyload.min.js"></script>
-
-    <link
-    rel="stylesheet"
-    href="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/styles/default.min.css"
-  />
-    <script
-    defer
-    src="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/highlight.min.js"
-  ></script>
-    <script
-    defer
-    src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"
-  ></script>
-  <script
-    defer
-    src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"
-  ></script>
-  <script
-    defer
-    src="https://cdn.jsdelivr.net/npm/dompurify@3.0.6/dist/purify.min.js"
-  ></script>
-
-  <link href="/static/general.css" rel="stylesheet" />
-    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Roboto:wght@400;500&display=swap" rel="stylesheet">
-    <link
-    href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700,900&display=swap"
-    rel="stylesheet" />
-  <link
-    rel="stylesheet"
-    href="https://cdn.jsdelivr.net/npm/tw-elements/css/tw-elements.min.css" />
-  <script src="https://cdn.tailwindcss.com/3.3.0"></script>
-  <script>
-    tailwind.config = {
-      darkMode: "class",
-      theme: {
-        fontFamily: {
-          sans: ["Roboto", "sans-serif"],
-          body: ["Roboto", "sans-serif"],
-          mono: ["ui-monospace", "monospace"],
-        },
-      },
-      corePlugins: {
-        preflight: false,
-      },
-    };
-  </script>
-    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.1.1/css/all.min.css">
-    <script src="https://unpkg.com/htmx.org@1.9.12" integrity="sha384-ujb1lZYygJmzgSwoxRggbCHcjc0rB2XoQrxeTUQyRjrOnlCoYta87iKBWq3EsdM2" crossorigin="anonymous"></script>
-</head>
-
-<body class="bg-gray-900 text-gray-200">
-<div class="flex flex-col min-h-screen">
-
-<nav class="bg-gray-800 shadow-lg">
-    <div class="container mx-auto px-4 py-4">
-        <div class="flex items-center justify-between">
-            <div class="flex items-center">
-                <a href="/" class="text-white text-xl font-bold"><img src="https://github.com/mudler/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
-                <a href="/" class="text-white text-xl font-bold">LocalAI</a>
-            </div>
-            <!-- Menu button for small screens -->
-            <div class="lg:hidden">
-                <button id="menu-toggle" class="text-gray-400 hover:text-white focus:outline-none">
-                    <i class="fas fa-bars fa-lg"></i>
-                </button>
-            </div>
-            <!-- Navigation links -->
-            <div class="hidden lg:flex lg:items-center lg:justify-end lg:flex-1 lg:w-0">
-                <a href="https://localai.io" class="text-gray-400 hover:text-white px-3 py-2 rounded" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
-            </div>
-        </div>
-        <!-- Collapsible menu for small screens -->
-        <div class="hidden lg:hidden" id="mobile-menu">
-            <div class="pt-4 pb-3 border-t border-gray-700">
-
-                <a href="https://localai.io" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
-
-            </div>
-        </div>
-    </div>
-</nav>
-
-<style>
-  .is-hidden {
-	display: none;
-	  }
-</style>
-
-<div class="container mx-auto px-4 flex-grow">
-
-<div class="models mt-12">
-	<h2 class="text-center text-3xl font-semibold text-gray-100">
-	LocalAI model gallery list </h2><br>
-
-	<h2 class="text-center text-3xl font-semibold text-gray-100">
-
-	 🖼️ Available {{.AvailableModels}} models</i> <a href="https://localai.io/models/" target="_blank" >
-			<i class="fas fa-circle-info pr-2"></i>
-		</a></h2>
-
-	<h3>
-	Refer to the Model gallery <a href="https://localai.io/models/" target="_blank" ><i class="fas fa-circle-info pr-2"></i></a> for more information on how to use the models with LocalAI.<br>
-
-	You can install models with the CLI command <code>local-ai models install <model-name></code>. or by using the WebUI.
-	</h3>
-
-	<input class="form-control appearance-none block w-full mt-5 px-3 py-2 text-base font-normal text-gray-300 pb-2 mb-5 bg-gray-800 bg-clip-padding border border-solid border-gray-600 rounded transition ease-in-out m-0 focus:text-gray-300 focus:bg-gray-900 focus:border-blue-500 focus:outline-none" type="search"
-	id="searchbox" placeholder="Live search keyword..">
-	  <div class="dark grid grid-cols-1 grid-rows-1 md:grid-cols-3 block rounded-lg shadow-secondary-1 dark:bg-surface-dark">
-		{{ range $_, $model := .Models }}
-		<div class="box me-4 mb-2 block rounded-lg bg-white shadow-secondary-1  dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface pb-2">
-		<div>
-		    {{ $icon := "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg" }}
-			{{ if $model.Icon }}
-	  		{{ $icon = $model.Icon }}
-	  		{{ end }}
-			<div class="flex justify-center items-center">
-				<img data-src="{{ $icon }}" alt="{{$model.Name}}" class="rounded-t-lg max-h-48 max-w-96 object-cover mt-3 lazy">
-			</div>
-	  		<div class="p-6 text-surface dark:text-white">
-				<h5 class="mb-2 text-xl font-medium leading-tight">{{$model.Name}}</h5>
-
-
-				<p class="mb-4 text-base truncate">{{ $model.Description }}</p>
-
-			</div>
-			<div class="px-6 pt-4 pb-2">
-
-      <!-- Modal toggle -->
-      <button data-modal-target="{{ $model.Name}}-modal" data-modal-toggle="{{ $model.Name }}-modal" class="block text-white bg-blue-700 hover:bg-blue-800 focus:ring-4 focus:outline-none focus:ring-blue-300 font-medium rounded-lg text-sm px-5 py-2.5 text-center dark:bg-blue-600 dark:hover:bg-blue-700 dark:focus:ring-blue-800" type="button">
-        More info
-      </button>
-
-    <!-- Main modal -->
-    <div id="{{ $model.Name}}-modal" tabindex="-1" aria-hidden="true" class="hidden overflow-y-auto overflow-x-hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-[calc(100%-1rem)] max-h-full">
-        <div class="relative p-4 w-full max-w-2xl max-h-full">
-            <!-- Modal content -->
-            <div class="relative bg-white rounded-lg shadow dark:bg-gray-700">
-                <!-- Modal header -->
-                <div class="flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600">
-                    <h3 class="text-xl font-semibold text-gray-900 dark:text-white">
-                        {{ $model.Name}}
-                    </h3>
-                    <button type="button" class="text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white" data-modal-hide="{{$model.Name}}-modal">
-                        <svg class="w-3 h-3" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 14 14">
-                            <path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="m1 1 6 6m0 0 6 6M7 7l6-6M7 7l-6 6"/>
-                        </svg>
-                        <span class="sr-only">Close modal</span>
-                    </button>
-                </div>
-                <!-- Modal body -->
-                <div class="p-4 md:p-5 space-y-4">
-                    <div class="flex justify-center items-center">
-                    <img data-src="{{ $icon }}" alt="{{$model.Name}}" class="lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3">
-                  </div>
-
-                    <p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
-                    {{ $model.Description }}
-
-                    </p>
-
-                    <p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
-                    To install the model with the CLI, run: <br>
-                    <code> local-ai models install {{$model.Name}} </code> <br>
-
-                    <hr>
-                    See also <a href="https://localai.io/models/" target="_blank" >
-                    Installation <i class="fas fa-circle-info pr-2"></i>
-                    </a> to see how to install models with the REST API.
-                    </p>
-
-                    <p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
-                    <ul>
-                    {{ range $_, $u := $model.URLs }}
-                    <li><a href="{{ $u }}" target=_blank><i class="fa-solid fa-link"></i> {{ $u }}</a></li>
-                    {{ end }}
-                    </ul>
-                    </p>
-                </div>
-                <!-- Modal footer -->
-                <div class="flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600">
-                    <button data-modal-hide="{{ $model.Name}}-modal" type="button" class="py-2.5 px-5 ms-3 text-sm font-medium text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-100 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700">Close</button>
-                </div>
-            </div>
-        </div>
-    </div>
-
-
-			</div>
-		</div>
-		</div>
-		{{ end }}
-
-		</div>
-  </div>
-</div>
-
-<script>
-var lazyLoadInstance = new LazyLoad({
-  // Your custom settings go here
-});
-
-let cards = document.querySelectorAll('.box')
-
-function liveSearch() {
-    let search_query = document.getElementById("searchbox").value;
-
-    //Use innerText if all contents are visible
-    //Use textContent for including hidden elements
-    for (var i = 0; i < cards.length; i++) {
-        if(cards[i].textContent.toLowerCase()
-                .includes(search_query.toLowerCase())) {
-            cards[i].classList.remove("is-hidden");
-        } else {
-            cards[i].classList.add("is-hidden");
-        }
-    }
-}
-
-//A little delay
-let typingTimer;
-let typeInterval = 500;
-let searchInput = document.getElementById('searchbox');
-
-searchInput.addEventListener('keyup', () => {
-    clearTimeout(typingTimer);
-    typingTimer = setTimeout(liveSearch, typeInterval);
-});
-</script>
-
-</div>
-
-<script src="https://cdnjs.cloudflare.com/ajax/libs/flowbite/2.3.0/flowbite.min.js"></script>
-</body>
-</html>
-`
-
-type GalleryModel struct {
-	Name        string   `json:"name" yaml:"name"`
-	URLs        []string `json:"urls" yaml:"urls"`
-	Icon        string   `json:"icon" yaml:"icon"`
-	Description string   `json:"description" yaml:"description"`
-}
-
-func main() {
-	// read the YAML file which contains the models
-
-	f, err := ioutil.ReadFile(os.Args[1])
-	if err != nil {
-		fmt.Println("Error reading file:", err)
-		return
-	}
-
-	models := []*GalleryModel{}
-	err = yaml.Unmarshal(f, &models)
-	if err != nil {
-		// write to stderr
-		os.Stderr.WriteString("Error unmarshaling YAML: " + err.Error() + "\n")
-		return
-	}
-
-	// Ensure that all arbitrary text content is sanitized before display
-	for i, m := range models {
-		models[i].Name = bluemonday.StrictPolicy().Sanitize(m.Name)
-		models[i].Description = bluemonday.StrictPolicy().Sanitize(m.Description)
-	}
-
-	// render the template
-	data := struct {
-		Models          []*GalleryModel
-		AvailableModels int
-	}{
-		Models:          models,
-		AvailableModels: len(models),
-	}
-	tmpl := template.Must(template.New("modelPage").Parse(modelPageTemplate))
-
-	err = tmpl.Execute(os.Stdout, data)
-	if err != nil {
-		fmt.Println("Error executing template:", err)
-		return
-	}
-}
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -1,135 +0,0 @@
-# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
-version: 2
-updates:
-  - package-ecosystem: "gitsubmodule"
-    directory: "/"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "gomod"
-    directory: "/"
-    schedule:
-      interval: "weekly"
-    ignore:
-    - dependency-name: "github.com/mudler/LocalAI/pkg/grpc/proto"
-  - package-ecosystem: "github-actions"
-    # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
-    directory: "/"
-    schedule:
-      # Check for updates to GitHub Actions every weekday
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
-    directory: "/"
-    schedule:
-      # Check for updates to GitHub Actions every weekday
-      interval: "weekly"
-  - package-ecosystem: "docker"
-    # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
-    directory: "/"
-    schedule:
-      # Check for updates to GitHub Actions every weekday
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/autogptq"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/bark"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/common/template"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/coqui"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/diffusers"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/exllama"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/exllama2"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/mamba"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/openvoice"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/parler-tts"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/rerankers"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/sentencetransformers"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/transformers"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/transformers-musicgen"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/vall-e-x"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/vllm"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/examples/chainlit"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/examples/functions"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/examples/langchain/langchainpy-localai-example"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/examples/langchain-chroma"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/examples/streamlit-bot"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "docker"
-    directory: "/examples/k8sgpt"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "docker"
-    directory: "/examples/kubernetes"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "docker"
-    directory: "/examples/langchain"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "gomod"
-    directory: "/examples/semantic-todo"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "docker"
-    directory: "/examples/telegram-bot"
-    schedule:
-      interval: "weekly"
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -1,24 +0,0 @@
-enhancements:
- - head-branch: ['^feature', 'feature']
-
-kind/documentation:
- any:
-  - changed-files:
-    - any-glob-to-any-file: 'docs/*'
-  - changed-files:
-    - any-glob-to-any-file: '*.md'
-
-area/ai-model:
- any:
-  - changed-files:
-    - any-glob-to-any-file: 'gallery/*'
-
-examples:
- any:
-  - changed-files:
-    - any-glob-to-any-file: 'examples/*'
-
-ci:
- any:
-  - changed-files:
-    - any-glob-to-any-file: '.github/*'
--- a/.github/release.yml
+++ b/.github/release.yml
@@ -1,37 +0,0 @@
-# .github/release.yml
-
-changelog:
-  exclude:
-    labels:
-      - ignore-for-release
-  categories:
-    - title: Breaking Changes 🛠
-      labels:
-        - Semver-Major
-        - breaking-change
-    - title: "Bug fixes :bug:"
-      labels:
-        - bug
-        - regression
-    - title: "🖧 P2P area"
-      labels:
-         - area/p2p
-    - title: Exciting New Features 🎉
-      labels:
-        - Semver-Minor
-        - enhancement
-        - ux
-        - roadmap
-    - title: 🧠 Models
-      labels:
-        - area/ai-model
-    - title: 📖 Documentation and examples
-      labels:
-        - kind/documentation
-        - examples
-    - title: 👒 Dependencies
-      labels:
-        - dependencies
-    - title: Other Changes
-      labels:
-        - "*"
--- a/.github/stale.yml
+++ b/.github/stale.yml
@@ -1,18 +0,0 @@
-# Number of days of inactivity before an issue becomes stale
-daysUntilStale: 45
-# Number of days of inactivity before a stale issue is closed
-daysUntilClose: 10
-# Issues with these labels will never be considered stale
-exemptLabels:
-  - issue/willfix
-# Label to use when marking an issue as stale
-staleLabel: issue/stale
-# Comment to post when marking an issue as stale. Set to `false` to disable
-markComment: >
-  This issue has been automatically marked as stale because it has not had
-  recent activity. It will be closed if no further activity occurs. Thank you
-  for your contributions.
-# Comment to post when closing a stale issue. Set to `false` to disable
-closeComment: >
-  This issue is being automatically closed due to inactivity.
-  However, you may choose to reopen this issue.
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -1,70 +0,0 @@
-name: Bump dependencies
-on:
-  schedule:
-    - cron: 0 20 * * *
-  workflow_dispatch:
-jobs:
-  bump:
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - repository: "ggerganov/llama.cpp"
-            variable: "CPPLLAMA_VERSION"
-            branch: "master"
-          - repository: "go-skynet/go-ggml-transformers.cpp"
-            variable: "GOGGMLTRANSFORMERS_VERSION"
-            branch: "master"
-          - repository: "donomii/go-rwkv.cpp"
-            variable: "RWKV_VERSION"
-            branch: "main"
-          - repository: "ggerganov/whisper.cpp"
-            variable: "WHISPER_CPP_VERSION"
-            branch: "master"
-          - repository: "go-skynet/go-bert.cpp"
-            variable: "BERT_VERSION"
-            branch: "master"
-          - repository: "go-skynet/bloomz.cpp"
-            variable: "BLOOMZ_VERSION"
-            branch: "main"
-          - repository: "mudler/go-ggllm.cpp"
-            variable: "GOGGLLM_VERSION"
-            branch: "master"
-          - repository: "mudler/go-stable-diffusion"
-            variable: "STABLEDIFFUSION_VERSION"
-            branch: "master"
-          - repository: "mudler/go-piper"
-            variable: "PIPER_VERSION"
-            branch: "master"
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      - name: Bump dependencies 🔧
-        id: bump
-        run: |
-          bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
-          {
-            echo 'message<<EOF'
-            cat "${{ matrix.variable }}_message.txt"
-            echo EOF
-          } >> "$GITHUB_OUTPUT"
-          {
-            echo 'commit<<EOF'
-            cat "${{ matrix.variable }}_commit.txt"
-            echo EOF
-          } >> "$GITHUB_OUTPUT"
-          rm -rfv ${{ matrix.variable }}_message.txt
-          rm -rfv ${{ matrix.variable }}_commit.txt
-      - name: Create Pull Request
-        uses: peter-evans/create-pull-request@v7
-        with:
-          token: ${{ secrets.UPDATE_BOT_TOKEN }}
-          push-to-fork: ci-forks/LocalAI
-          commit-message: ':arrow_up: Update ${{ matrix.repository }}'
-          title: 'chore: :arrow_up: Update ${{ matrix.repository }} to `${{ steps.bump.outputs.commit }}`'
-          branch: "update/${{ matrix.variable }}"
-          body: ${{ steps.bump.outputs.message }}
-          signoff: true
-
-
-
--- a/.github/workflows/bump_docs.yaml
+++ b/.github/workflows/bump_docs.yaml
@@ -1,31 +0,0 @@
-name: Bump dependencies
-on:
-  schedule:
-    - cron: 0 20 * * *
-  workflow_dispatch:
-jobs:
-  bump:
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - repository: "mudler/LocalAI"
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      - name: Bump dependencies 🔧
-        run: |
-          bash .github/bump_docs.sh ${{ matrix.repository }}
-      - name: Create Pull Request
-        uses: peter-evans/create-pull-request@v7
-        with:
-          token: ${{ secrets.UPDATE_BOT_TOKEN }}
-          push-to-fork: ci-forks/LocalAI
-          commit-message: ':arrow_up: Update docs version ${{ matrix.repository }}'
-          title: 'docs: :arrow_up: update docs version ${{ matrix.repository }}'
-          branch: "update/docs"
-          body: Bump of ${{ matrix.repository }} version inside docs
-          signoff: true
-
-
-
--- a/.github/workflows/checksum_checker.yaml
+++ b/.github/workflows/checksum_checker.yaml
@@ -1,47 +0,0 @@
-name: Check if checksums are up-to-date
-on:
-  schedule:
-    - cron: 0 20 * * *
-  workflow_dispatch:
-jobs:
-  checksum_check:
-    runs-on: arc-runner-set
-    steps:
-      - name: Force Install GIT latest
-        run: |
-          sudo apt-get update \
-          && sudo apt-get install -y software-properties-common \
-          && sudo apt-get update \
-          && sudo add-apt-repository -y ppa:git-core/ppa \
-          && sudo apt-get update \
-          && sudo apt-get install -y git
-      - uses: actions/checkout@v4
-      - name: Install dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y pip wget
-          sudo pip install --upgrade pip
-          pip install huggingface_hub
-      - name: 'Setup yq'
-        uses: dcarbone/install-yq-action@v1.3.1
-        with:
-          version: 'v4.44.2'
-          download-compressed: true
-          force: true
-
-      - name: Checksum checker 🔧
-        run: |
-          export HF_HOME=/hf_cache
-          sudo mkdir /hf_cache
-          sudo chmod 777 /hf_cache
-          bash .github/checksum_checker.sh gallery/index.yaml
-      - name: Create Pull Request
-        uses: peter-evans/create-pull-request@v7
-        with:
-          token: ${{ secrets.UPDATE_BOT_TOKEN }}
-          push-to-fork: ci-forks/LocalAI
-          commit-message: ':arrow_up: Checksum updates in gallery/index.yaml'
-          title: 'chore(model-gallery): :arrow_up: update checksum'
-          branch: "update/checksum"
-          body: Updating checksums in gallery/index.yaml
-          signoff: true
--- a/.github/workflows/dependabot_auto.yml
+++ b/.github/workflows/dependabot_auto.yml
@@ -1,43 +0,0 @@
-name: Dependabot auto-merge
-on:
- pull_request_target
-
-permissions:
-  contents: write
-  pull-requests: write
-  packages: read
-
-jobs:
-  dependabot:
-    runs-on: ubuntu-latest
-    if: ${{ github.actor == 'dependabot[bot]' }}
-    steps:
-      - name: Dependabot metadata
-        id: metadata
-        uses: dependabot/fetch-metadata@v2.2.0
-        with:
-          github-token: "${{ secrets.GITHUB_TOKEN }}"
-          skip-commit-verification: true
-
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - name: Approve a PR if not already approved
-        run: |
-          gh pr checkout "$PR_URL"
-            if [ "$(gh pr status --json reviewDecision -q .currentBranch.reviewDecision)" != "APPROVED" ];
-          then
-            gh pr review --approve "$PR_URL"
-          else
-            echo "PR already approved.";
-          fi
-        env:
-          PR_URL: ${{github.event.pull_request.html_url}}
-          GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
-
-      - name: Enable auto-merge for Dependabot PRs
-        if: ${{ contains(github.event.pull_request.title, 'bump')}}
-        run: gh pr merge --auto --squash "$PR_URL"
-        env:
-          PR_URL: ${{github.event.pull_request.html_url}}
-          GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
--- a/.github/workflows/deploy-explorer.yaml
+++ b/.github/workflows/deploy-explorer.yaml
@@ -1,64 +0,0 @@
-name: Explorer deployment
-
-on:
-  push:
-    branches:
-      - master
-    tags:
-      - 'v*'
-
-concurrency:
-  group: ci-deploy-${{ github.head_ref || github.ref }}-${{ github.repository }}
-
-jobs:
-  build-linux:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.21.x'
-          cache: false
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-          make protogen-go
-      - name: Build api
-        run: |
-          CGO_ENABLED=0 make build-api
-      - name: rm
-        uses: appleboy/ssh-action@v1.2.0
-        with:
-            host: ${{ secrets.EXPLORER_SSH_HOST }}
-            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
-            key: ${{ secrets.EXPLORER_SSH_KEY }}
-            port: ${{ secrets.EXPLORER_SSH_PORT }}
-            script: |
-                sudo rm -rf local-ai/ || true
-      - name: copy file via ssh
-        uses: appleboy/scp-action@v0.1.7
-        with:
-            host: ${{ secrets.EXPLORER_SSH_HOST }}
-            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
-            key: ${{ secrets.EXPLORER_SSH_KEY }}
-            port: ${{ secrets.EXPLORER_SSH_PORT }}
-            source: "local-ai"
-            overwrite: true
-            rm: true
-            target: ./local-ai
-      - name: restarting
-        uses: appleboy/ssh-action@v1.2.0
-        with:
-            host: ${{ secrets.EXPLORER_SSH_HOST }}
-            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
-            key: ${{ secrets.EXPLORER_SSH_KEY }}
-            port: ${{ secrets.EXPLORER_SSH_PORT }}
-            script: |
-                sudo cp -rfv local-ai/local-ai /usr/bin/local-ai
-                sudo systemctl restart local-ai
--- a/.github/workflows/disabled/comment-pr.yaml
+++ b/.github/workflows/disabled/comment-pr.yaml
@@ -1,83 +0,0 @@
-name: Comment PRs
-on:
-  pull_request_target:
-
-jobs:
-  comment-pr:
-    env:
-        MODEL_NAME: hermes-2-theta-llama-3-8b
-    runs-on: ubuntu-latest
-    steps:
-    - name: Checkout code
-      uses: actions/checkout@v3
-      with:
-        ref: "${{ github.event.pull_request.merge_commit_sha }}"
-        fetch-depth: 0 # needed to checkout all branches for this Action to work
-    - uses: mudler/localai-github-action@v1
-      with:
-        model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
-      # Check the PR diff using the current branch and the base branch of the PR
-    - uses: GrantBirki/git-diff-action@v2.7.0
-      id: git-diff-action
-      with:
-            json_diff_file_output: diff.json
-            raw_diff_file_output: diff.txt
-            file_output_only: "true"
-            base_branch: ${{ github.event.pull_request.base.sha }}
-    - name: Show diff
-      env:
-        DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
-      run: |
-            cat $DIFF
-    - name: Summarize
-      env:
-        DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
-      id: summarize
-      run: |
-            input="$(cat $DIFF)"
-
-            # Define the LocalAI API endpoint
-            API_URL="http://localhost:8080/chat/completions"
-
-            # Create a JSON payload using jq to handle special characters
-            json_payload=$(jq -n --arg input "$input" '{
-            model: "'$MODEL_NAME'",
-            messages: [
-                {
-                role: "system",
-                content: "You are LocalAI-bot in Github that helps understanding PRs and assess complexity. Explain what has changed in this PR diff and why"
-                },
-                {
-                role: "user",
-                content: $input
-                }
-            ]
-            }')
-
-            # Send the request to LocalAI
-            response=$(curl -s -X POST $API_URL \
-            -H "Content-Type: application/json" \
-            -d "$json_payload")
-
-            # Extract the summary from the response
-            summary="$(echo $response | jq -r '.choices[0].message.content')"
-
-            # Print the summary
-            #  -H "Authorization: Bearer $API_KEY" \
-            echo "Summary:"
-            echo "$summary"
-            echo "payload sent"
-            echo "$json_payload"
-            {
-                echo 'message<<EOF'
-                echo "$summary"
-                echo EOF
-              } >> "$GITHUB_OUTPUT"
-            docker logs --tail 10 local-ai
-    - uses: mshick/add-pr-comment@v2
-      if: always()
-      with:
-          repo-token: ${{ secrets.UPDATE_BOT_TOKEN }}
-          message: ${{ steps.summarize.outputs.message }}
-          message-failure: |
-            Uh oh! Could not analyze this PR, maybe it's too big?
--- a/.github/workflows/disabled/test-gpu.yml
+++ b/.github/workflows/disabled/test-gpu.yml
@@ -1,63 +0,0 @@
---
-name: 'GPU tests'
-
-on:
-  pull_request:
-  push:
-    branches:
-      - master
-    tags:
-      - '*'
-
-concurrency:
-  group: ci-gpu-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
-  cancel-in-progress: true
-
-jobs:
-  ubuntu-latest:
-    runs-on: gpu
-    strategy:
-      matrix:
-        go-version: ['1.21.x']
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with: 
-          submodules: true
-      - name: Setup Go ${{ matrix.go-version }}
-        uses: actions/setup-go@v4
-        with:
-          go-version: ${{ matrix.go-version }}
-      # You can test your matrix by printing the current Go version
-      - name: Display Go version
-        run: go version
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo DEBIAN_FRONTEND=noninteractive apt-get install -y make wget
-      - name: Build
-        run: |
-          if [ ! -e /run/systemd/system ]; then
-            sudo mkdir /run/systemd/system
-          fi
-          sudo mkdir -p /host/tests/${{ github.head_ref || github.ref }}
-          sudo chmod -R 777 /host/tests/${{ github.head_ref || github.ref }}
-          make \
-            TEST_DIR="/host/tests/${{ github.head_ref || github.ref }}" \
-            BUILD_TYPE=cublas \
-            prepare-e2e run-e2e-image test-e2e
-      - name: Release space from worker ♻
-        if: always()
-        run: |
-          sudo rm -rf build || true
-          sudo rm -rf bin || true
-          sudo rm -rf dist || true
-          sudo docker logs $(sudo docker ps -q --filter ancestor=localai-tests) > logs.txt
-          sudo cat logs.txt || true
-          sudo rm -rf logs.txt
-          make clean || true
-          make \
-            TEST_DIR="/host/tests/${{ github.head_ref || github.ref }}" \
-            teardown-e2e || true
-          sudo rm -rf /host/tests/${{ github.head_ref || github.ref }} || true
-          docker system prune -f -a --volumes || true
--- a/.github/workflows/generate_grpc_cache.yaml
+++ b/.github/workflows/generate_grpc_cache.yaml
@@ -1,94 +0,0 @@
-name: 'generate and publish GRPC docker caches'
-
-on:
-  workflow_dispatch:
-  push:
-    branches:
-      - master
-
-concurrency:
-  group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
-  cancel-in-progress: true
-
-jobs:
-  generate_caches:
-    strategy:
-      matrix:
-        include:
-          - grpc-base-image: ubuntu:22.04
-            runs-on: 'ubuntu-latest'
-            platforms: 'linux/amd64,linux/arm64'
-    runs-on: ${{matrix.runs-on}}
-    steps:
-      - name: Release space from worker
-        if: matrix.runs-on == 'ubuntu-latest'
-        run: |
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          df -h
-          echo
-          sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
-          sudo apt-get remove --auto-remove android-sdk-platform-tools || true
-          sudo apt-get purge --auto-remove android-sdk-platform-tools || true
-          sudo rm -rf /usr/local/lib/android
-          sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
-          sudo rm -rf /usr/share/dotnet
-          sudo apt-get remove -y '^mono-.*' || true
-          sudo apt-get remove -y '^ghc-.*' || true
-          sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
-          sudo apt-get remove -y 'php.*' || true
-          sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
-          sudo apt-get remove -y '^google-.*' || true
-          sudo apt-get remove -y azure-cli || true
-          sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
-          sudo apt-get remove -y '^gfortran-.*' || true
-          sudo apt-get remove -y microsoft-edge-stable || true
-          sudo apt-get remove -y firefox || true
-          sudo apt-get remove -y powershell || true
-          sudo apt-get remove -y r-base-core || true
-          sudo apt-get autoremove -y
-          sudo apt-get clean
-          echo
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          sudo rm -rfv build || true
-          sudo rm -rf /usr/share/dotnet || true
-          sudo rm -rf /opt/ghc || true
-          sudo rm -rf "/usr/local/share/boost" || true
-          sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
-          df -h
-
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@master
-        with:
-          platforms: all
-
-      - name: Set up Docker Buildx
-        id: buildx
-        uses: docker/setup-buildx-action@master
-
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Cache GRPC
-        uses: docker/build-push-action@v6
-        with:
-          builder: ${{ steps.buildx.outputs.name }}
-          # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
-          # This means that even the MAKEFLAGS have to be an EXACT match.
-          # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
-          build-args: |
-            GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
-            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
-            GRPC_VERSION=v1.65.0
-          context: .
-          file: ./Dockerfile
-          cache-to: type=gha,ignore-error=true
-          cache-from: type=gha
-          target: grpc
-          platforms: ${{ matrix.platforms }}
-          push: false
--- a/.github/workflows/generate_intel_image.yaml
+++ b/.github/workflows/generate_intel_image.yaml
@@ -1,59 +0,0 @@
-name: 'generate and publish intel docker caches'
-
-on:
-  workflow_dispatch:
-  push:
-    branches:
-      - master
-
-concurrency:
-  group: intel-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
-  cancel-in-progress: true
-
-jobs:
-  generate_caches:
-    strategy:
-      matrix:
-        include:
-          - base-image: intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04
-            runs-on: 'ubuntu-latest'
-            platforms: 'linux/amd64'
-    runs-on: ${{matrix.runs-on}}
-    steps:
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@master
-        with:
-          platforms: all
-      - name: Login to DockerHub
-        if: github.event_name != 'pull_request'
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
-      - name: Login to quay
-        if: github.event_name != 'pull_request'
-        uses: docker/login-action@v3
-        with:
-          registry: quay.io
-          username: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
-          password: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
-      - name: Set up Docker Buildx
-        id: buildx
-        uses: docker/setup-buildx-action@master
-
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Cache Intel images
-        uses: docker/build-push-action@v6
-        with:
-          builder: ${{ steps.buildx.outputs.name }}
-          build-args: |
-            BASE_IMAGE=${{ matrix.base-image }}
-          context: .
-          file: ./Dockerfile
-          tags: quay.io/go-skynet/intel-oneapi-base:latest
-          push: true
-          target: intel
-          platforms: ${{ matrix.platforms }}
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -1,140 +0,0 @@
---
-name: 'build container images tests'
-
-on:
-  pull_request:
-
-concurrency:
-  group: ci-${{ github.head_ref || github.ref }}-${{ github.repository }}
-  cancel-in-progress: true
-
-jobs:
-  extras-image-build:
-    uses: ./.github/workflows/image_build.yml
-    with:
-      tag-latest: ${{ matrix.tag-latest }}
-      tag-suffix: ${{ matrix.tag-suffix }}
-      ffmpeg: ${{ matrix.ffmpeg }}
-      image-type: ${{ matrix.image-type }}
-      build-type: ${{ matrix.build-type }}
-      cuda-major-version: ${{ matrix.cuda-major-version }}
-      cuda-minor-version: ${{ matrix.cuda-minor-version }}
-      platforms: ${{ matrix.platforms }}
-      runs-on: ${{ matrix.runs-on }}
-      base-image: ${{ matrix.base-image }}
-      grpc-base-image: ${{ matrix.grpc-base-image }}
-      makeflags: ${{ matrix.makeflags }}
-    secrets:
-      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
-      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
-      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
-      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
-    strategy:
-      # Pushing with all jobs in parallel
-      # eats the bandwidth of all the nodes
-      max-parallel: ${{ github.event_name != 'pull_request' && 4 || 8 }}
-      matrix:
-        include:
-          # This is basically covered by the AIO test
-          # - build-type: ''
-          #   platforms: 'linux/amd64'
-          #   tag-latest: 'false'
-          #   tag-suffix: '-ffmpeg'
-          #   ffmpeg: 'true'
-          #   image-type: 'extras'
-          #   runs-on: 'arc-runner-set'
-          #   base-image: "ubuntu:22.04"
-          #   makeflags: "--jobs=3 --output-sync=target"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            tag-suffix: '-cublas-cuda12-ffmpeg'
-            ffmpeg: 'true'
-            image-type: 'extras'
-            runs-on: 'arc-runner-set'
-            base-image: "ubuntu:22.04"
-            makeflags: "--jobs=3 --output-sync=target"
-          # - build-type: 'hipblas'
-          #   platforms: 'linux/amd64'
-          #   tag-latest: 'false'
-          #   tag-suffix: '-hipblas'
-          #   ffmpeg: 'false'
-          #   image-type: 'extras'
-          #   base-image: "rocm/dev-ubuntu-22.04:6.1"
-          #   grpc-base-image: "ubuntu:22.04"
-          #   runs-on: 'arc-runner-set'
-          #   makeflags: "--jobs=3 --output-sync=target"
-          # - build-type: 'sycl_f16'
-          #   platforms: 'linux/amd64'
-          #   tag-latest: 'false'
-          #   base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-          #   grpc-base-image: "ubuntu:22.04"
-          #   tag-suffix: 'sycl-f16-ffmpeg'
-          #   ffmpeg: 'true'
-          #   image-type: 'extras'
-          #   runs-on: 'arc-runner-set'
-          #   makeflags: "--jobs=3 --output-sync=target"
-  # core-image-build:
-  #   uses: ./.github/workflows/image_build.yml
-  #   with:
-  #     tag-latest: ${{ matrix.tag-latest }}
-  #     tag-suffix: ${{ matrix.tag-suffix }}
-  #     ffmpeg: ${{ matrix.ffmpeg }}
-  #     image-type: ${{ matrix.image-type }}
-  #     build-type: ${{ matrix.build-type }}
-  #     cuda-major-version: ${{ matrix.cuda-major-version }}
-  #     cuda-minor-version: ${{ matrix.cuda-minor-version }}
-  #     platforms: ${{ matrix.platforms }}
-  #     runs-on: ${{ matrix.runs-on }}
-  #     base-image: ${{ matrix.base-image }}
-  #     grpc-base-image: ${{ matrix.grpc-base-image }}
-  #     makeflags: ${{ matrix.makeflags }}
-  #   secrets:
-  #     dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
-  #     dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
-  #     quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
-  #     quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
-  #   strategy:
-  #     matrix:
-  #       include:
-          # - build-type: ''
-          #   platforms: 'linux/amd64'
-          #   tag-latest: 'false'
-          #   tag-suffix: '-ffmpeg-core'
-          #   ffmpeg: 'true'
-          #   image-type: 'core'
-          #   runs-on: 'ubuntu-latest'
-          #   base-image: "ubuntu:22.04"
-          #   makeflags: "--jobs=4 --output-sync=target"
-          # - build-type: 'sycl_f16'
-          #   platforms: 'linux/amd64'
-          #   tag-latest: 'false'
-          #   base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-          #   grpc-base-image: "ubuntu:22.04"
-          #   tag-suffix: 'sycl-f16-ffmpeg-core'
-          #   ffmpeg: 'true'
-          #   image-type: 'core'
-          #   runs-on: 'arc-runner-set'
-          #   makeflags: "--jobs=3 --output-sync=target"
-          # - build-type: 'cublas'
-          #   cuda-major-version: "12"
-          #   cuda-minor-version: "0"
-          #   platforms: 'linux/amd64'
-          #   tag-latest: 'false'
-          #   tag-suffix: '-cublas-cuda12-ffmpeg-core'
-          #   ffmpeg: 'true'
-          #   image-type: 'core'
-          #   runs-on: 'ubuntu-latest'
-          #   base-image: "ubuntu:22.04"
-          #   makeflags: "--jobs=4 --output-sync=target"
-          # - build-type: 'vulkan'
-          #   platforms: 'linux/amd64'
-          #   tag-latest: 'false'
-          #   tag-suffix: '-vulkan-ffmpeg-core'
-          #   ffmpeg: 'true'
-          #   image-type: 'core'
-          #   runs-on: 'ubuntu-latest'
-          #   base-image: "ubuntu:22.04"
-          #   makeflags: "--jobs=4 --output-sync=target"
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -2,356 +2,77 @@
 name: 'build container images'

 on:
+  pull_request:
  push:
    branches:
      - master
    tags:
      - '*'

-concurrency:
-  group: ci-${{ github.head_ref || github.ref }}-${{ github.repository }}
-  cancel-in-progress: true
-
 jobs:
-  hipblas-jobs:
-    uses: ./.github/workflows/image_build.yml
-    with:
-      tag-latest: ${{ matrix.tag-latest }}
-      tag-suffix: ${{ matrix.tag-suffix }}
-      ffmpeg: ${{ matrix.ffmpeg }}
-      image-type: ${{ matrix.image-type }}
-      build-type: ${{ matrix.build-type }}
-      cuda-major-version: ${{ matrix.cuda-major-version }}
-      cuda-minor-version: ${{ matrix.cuda-minor-version }}
-      platforms: ${{ matrix.platforms }}
-      runs-on: ${{ matrix.runs-on }}
-      base-image: ${{ matrix.base-image }}
-      grpc-base-image: ${{ matrix.grpc-base-image }}
-      aio: ${{ matrix.aio }}
-      makeflags: ${{ matrix.makeflags }}
-      latest-image: ${{ matrix.latest-image }}
-      latest-image-aio: ${{ matrix.latest-image-aio }}
-    secrets:
-      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
-      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
-      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
-      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
-    strategy:
-      # Pushing with all jobs in parallel
-      # eats the bandwidth of all the nodes
-      max-parallel: 2
-      matrix:
-        include:
-          - build-type: 'hipblas'
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-hipblas-ffmpeg'
-            ffmpeg: 'true'
-            image-type: 'extras'
-            aio: "-aio-gpu-hipblas"
-            base-image: "rocm/dev-ubuntu-22.04:6.1"
-            grpc-base-image: "ubuntu:22.04"
-            latest-image: 'latest-gpu-hipblas'
-            latest-image-aio: 'latest-aio-gpu-hipblas'
-            runs-on: 'arc-runner-set'
-            makeflags: "--jobs=3 --output-sync=target"
-          - build-type: 'hipblas'
-            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            tag-suffix: '-hipblas'
-            ffmpeg: 'false'
-            image-type: 'extras'
-            base-image: "rocm/dev-ubuntu-22.04:6.1"
-            grpc-base-image: "ubuntu:22.04"
-            runs-on: 'arc-runner-set'
-            makeflags: "--jobs=3 --output-sync=target"
-          - build-type: 'hipblas'
-            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            tag-suffix: '-hipblas-ffmpeg-core'
-            ffmpeg: 'true'
-            image-type: 'core'
-            base-image: "rocm/dev-ubuntu-22.04:6.1"
-            grpc-base-image: "ubuntu:22.04"
-            runs-on: 'arc-runner-set'
-            makeflags: "--jobs=3 --output-sync=target"
-          - build-type: 'hipblas'
-            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            tag-suffix: '-hipblas-core'
-            ffmpeg: 'false'
-            image-type: 'core'
-            base-image: "rocm/dev-ubuntu-22.04:6.1"
-            grpc-base-image: "ubuntu:22.04"
-            runs-on: 'arc-runner-set'
-            makeflags: "--jobs=3 --output-sync=target"
-  self-hosted-jobs:
-    uses: ./.github/workflows/image_build.yml
-    with:
-      tag-latest: ${{ matrix.tag-latest }}
-      tag-suffix: ${{ matrix.tag-suffix }}
-      ffmpeg: ${{ matrix.ffmpeg }}
-      image-type: ${{ matrix.image-type }}
-      build-type: ${{ matrix.build-type }}
-      cuda-major-version: ${{ matrix.cuda-major-version }}
-      cuda-minor-version: ${{ matrix.cuda-minor-version }}
-      platforms: ${{ matrix.platforms }}
-      runs-on: ${{ matrix.runs-on }}
-      base-image: ${{ matrix.base-image }}
-      grpc-base-image: ${{ matrix.grpc-base-image }}
-      aio: ${{ matrix.aio }}
-      makeflags: ${{ matrix.makeflags }}
-      latest-image: ${{ matrix.latest-image }}
-      latest-image-aio: ${{ matrix.latest-image-aio }}
-    secrets:
-      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
-      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
-      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
-      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
-    strategy:
-      # Pushing with all jobs in parallel
-      # eats the bandwidth of all the nodes
-      max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }}
-      matrix:
-        include:
-          # Extra images
-          - build-type: ''
-            #platforms: 'linux/amd64,linux/arm64'
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: ''
-            ffmpeg: ''
-            image-type: 'extras'
-            runs-on: 'arc-runner-set'
-            base-image: "ubuntu:22.04"
-            makeflags: "--jobs=3 --output-sync=target"
-          - build-type: ''
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-ffmpeg'
-            ffmpeg: 'true'
-            image-type: 'extras'
-            runs-on: 'arc-runner-set'
-            base-image: "ubuntu:22.04"
-            makeflags: "--jobs=3 --output-sync=target"
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            tag-suffix: '-cublas-cuda11'
-            ffmpeg: ''
-            image-type: 'extras'
-            runs-on: 'arc-runner-set'
-            base-image: "ubuntu:22.04"
-            makeflags: "--jobs=3 --output-sync=target"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            tag-suffix: '-cublas-cuda12'
-            ffmpeg: ''
-            image-type: 'extras'
-            runs-on: 'arc-runner-set'
-            base-image: "ubuntu:22.04"
-            makeflags: "--jobs=3 --output-sync=target"
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-cublas-cuda11-ffmpeg'
-            ffmpeg: 'true'
-            image-type: 'extras'
-            runs-on: 'arc-runner-set'
-            base-image: "ubuntu:22.04"
-            aio: "-aio-gpu-nvidia-cuda-11"
-            latest-image: 'latest-gpu-nvidia-cuda-11'
-            latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11'
-            makeflags: "--jobs=3 --output-sync=target"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-cublas-cuda12-ffmpeg'
-            ffmpeg: 'true'
-            image-type: 'extras'
-            runs-on: 'arc-runner-set'
-            base-image: "ubuntu:22.04"
-            aio: "-aio-gpu-nvidia-cuda-12"
-            latest-image: 'latest-gpu-nvidia-cuda-12'
-            latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12'
-            makeflags: "--jobs=3 --output-sync=target"
-          - build-type: ''
-            #platforms: 'linux/amd64,linux/arm64'
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: ''
-            ffmpeg: ''
-            image-type: 'extras'
-            base-image: "ubuntu:22.04"
-            runs-on: 'arc-runner-set'
-            makeflags: "--jobs=3 --output-sync=target"
-          - build-type: 'sycl_f16'
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            grpc-base-image: "ubuntu:22.04"
-            tag-suffix: '-sycl-f16-ffmpeg'
-            ffmpeg: 'true'
-            image-type: 'extras'
-            runs-on: 'arc-runner-set'
-            aio: "-aio-gpu-intel-f16"
-            latest-image: 'latest-gpu-intel-f16'
-            latest-image-aio: 'latest-aio-gpu-intel-f16'
-            makeflags: "--jobs=3 --output-sync=target"
-          - build-type: 'sycl_f32'
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            grpc-base-image: "ubuntu:22.04"
-            tag-suffix: '-sycl-f32-ffmpeg'
-            ffmpeg: 'true'
-            image-type: 'extras'
-            runs-on: 'arc-runner-set'
-            aio: "-aio-gpu-intel-f32"
-            latest-image: 'latest-gpu-intel-f32'
-            latest-image-aio: 'latest-aio-gpu-intel-f32'
-            makeflags: "--jobs=3 --output-sync=target"
-          # Core images
-          - build-type: 'sycl_f16'
-            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            grpc-base-image: "ubuntu:22.04"
-            tag-suffix: '-sycl-f16-core'
-            ffmpeg: 'false'
-            image-type: 'core'
-            runs-on: 'arc-runner-set'
-            makeflags: "--jobs=3 --output-sync=target"
-          - build-type: 'sycl_f32'
-            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            grpc-base-image: "ubuntu:22.04"
-            tag-suffix: '-sycl-f32-core'
-            ffmpeg: 'false'
-            image-type: 'core'
-            runs-on: 'arc-runner-set'
-            makeflags: "--jobs=3 --output-sync=target"
-          - build-type: 'sycl_f16'
-            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            grpc-base-image: "ubuntu:22.04"
-            tag-suffix: '-sycl-f16-ffmpeg-core'
-            ffmpeg: 'true'
-            image-type: 'core'
-            runs-on: 'arc-runner-set'
-            makeflags: "--jobs=3 --output-sync=target"
-          - build-type: 'sycl_f32'
-            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            grpc-base-image: "ubuntu:22.04"
-            tag-suffix: '-sycl-f32-ffmpeg-core'
-            ffmpeg: 'true'
-            image-type: 'core'
-            runs-on: 'arc-runner-set'
-            makeflags: "--jobs=3 --output-sync=target"
+  docker:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3

-  core-image-build:
-    uses: ./.github/workflows/image_build.yml
-    with:
-      tag-latest: ${{ matrix.tag-latest }}
-      tag-suffix: ${{ matrix.tag-suffix }}
-      ffmpeg: ${{ matrix.ffmpeg }}
-      image-type: ${{ matrix.image-type }}
-      build-type: ${{ matrix.build-type }}
-      cuda-major-version: ${{ matrix.cuda-major-version }}
-      cuda-minor-version: ${{ matrix.cuda-minor-version }}
-      platforms: ${{ matrix.platforms }}
-      runs-on: ${{ matrix.runs-on }}
-      aio: ${{ matrix.aio }}
-      base-image: ${{ matrix.base-image }}
-      grpc-base-image: ${{ matrix.grpc-base-image }}
-      makeflags: ${{ matrix.makeflags }}
-      latest-image: ${{ matrix.latest-image }}
-      latest-image-aio: ${{ matrix.latest-image-aio }}
-    secrets:
-      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
-      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
-      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
-      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
-    strategy:
-      max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
-      matrix:
-        include:
-          - build-type: ''
-            platforms: 'linux/amd64,linux/arm64'
-            tag-latest: 'auto'
-            tag-suffix: '-ffmpeg-core'
-            ffmpeg: 'true'
-            image-type: 'core'
-            base-image: "ubuntu:22.04"
-            runs-on: 'arc-runner-set'
-            aio: "-aio-cpu"
-            latest-image: 'latest-cpu'
-            latest-image-aio: 'latest-aio-cpu'
-            makeflags: "--jobs=4 --output-sync=target"
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            tag-suffix: '-cublas-cuda11-core'
-            ffmpeg: ''
-            image-type: 'core'
-            base-image: "ubuntu:22.04"
-            runs-on: 'arc-runner-set'
-            makeflags: "--jobs=4 --output-sync=target"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            tag-suffix: '-cublas-cuda12-core'
-            ffmpeg: ''
-            image-type: 'core'
-            base-image: "ubuntu:22.04"
-            runs-on: 'arc-runner-set'
-            makeflags: "--jobs=4 --output-sync=target"
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            tag-suffix: '-cublas-cuda11-ffmpeg-core'
-            ffmpeg: 'true'
-            image-type: 'core'
-            runs-on: 'arc-runner-set'
-            base-image: "ubuntu:22.04"
-            makeflags: "--jobs=4 --output-sync=target"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            tag-suffix: '-cublas-cuda12-ffmpeg-core'
-            ffmpeg: 'true'
-            image-type: 'core'
-            runs-on: 'arc-runner-set'
-            base-image: "ubuntu:22.04"
-            makeflags: "--jobs=4 --output-sync=target"
-          - build-type: 'vulkan'
-            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            tag-suffix: '-vulkan-ffmpeg-core'
-            latest-image: 'latest-vulkan-ffmpeg-core'
-            ffmpeg: 'true'
-            image-type: 'core'
-            runs-on: 'arc-runner-set'
-            base-image: "ubuntu:22.04"
-            makeflags: "--jobs=4 --output-sync=target"
+      - name: Prepare
+        id: prep
+        run: |
+          DOCKER_IMAGE=quay.io/go-skynet/local-ai
+          VERSION=master
+          SHORTREF=${GITHUB_SHA::8}
+
+          # If this is git tag, use the tag name as a docker tag
+          if [[ $GITHUB_REF == refs/tags/* ]]; then
+            VERSION=${GITHUB_REF#refs/tags/}
+          fi
+          TAGS="${DOCKER_IMAGE}:${VERSION},${DOCKER_IMAGE}:${SHORTREF}"
+
+          # If the VERSION looks like a version number, assume that
+          # this is the most recent version of the image and also
+          # tag it 'latest'.
+          if [[ $VERSION =~ ^v[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
+            TAGS="$TAGS,${DOCKER_IMAGE}:latest"
+          fi
+
+          # Set output parameters.
+          echo ::set-output name=tags::${TAGS}
+          echo ::set-output name=docker_image::${DOCKER_IMAGE}
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@master
+        with:
+          platforms: all
+
+      - name: Set up Docker Buildx
+        id: buildx
+        uses: docker/setup-buildx-action@master
+
+      - name: Login to DockerHub
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@v2
+        with:
+          registry: quay.io
+          username: ${{ secrets.QUAY_USERNAME }}
+          password: ${{ secrets.QUAY_PASSWORD }}
+      - name: Build
+        if: github.event_name != 'pull_request'
+        uses: docker/build-push-action@v4
+        with:
+          builder: ${{ steps.buildx.outputs.name }}
+          context: .
+          file: ./Dockerfile
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ${{ steps.prep.outputs.tags }}
+      - name: Build PRs
+        if: github.event_name == 'pull_request'
+        uses: docker/build-push-action@v4
+        with:
+          builder: ${{ steps.buildx.outputs.name }}
+          context: .
+          file: ./Dockerfile
+          platforms: linux/amd64
+          push: false
+          tags: ${{ steps.prep.outputs.tags }}
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -1,335 +0,0 @@
---
-name: 'build container images (reusable)'
-
-on:
-  workflow_call:
-    inputs:
-      base-image:
-        description: 'Base image'
-        required: true
-        type: string
-      grpc-base-image:
-        description: 'GRPC Base image, must be a compatible image with base-image'
-        required: false
-        default: ''
-        type: string
-      build-type:
-        description: 'Build type'
-        default: ''
-        type: string
-      cuda-major-version:
-        description: 'CUDA major version'
-        default: "12"
-        type: string
-      cuda-minor-version:
-        description: 'CUDA minor version'
-        default: "4"
-        type: string
-      platforms:
-        description: 'Platforms'
-        default: ''
-        type: string
-      tag-latest:
-        description: 'Tag latest'
-        default: ''
-        type: string
-      latest-image:
-          description: 'Tag latest'
-          default: ''
-          type: string
-      latest-image-aio:
-          description: 'Tag latest'
-          default: ''
-          type: string
-      tag-suffix:
-        description: 'Tag suffix'
-        default: ''
-        type: string
-      ffmpeg:
-        description: 'FFMPEG'
-        default: ''
-        type: string
-      image-type:
-        description: 'Image type'
-        default: ''
-        type: string
-      runs-on:
-        description: 'Runs on'
-        required: true
-        default: ''
-        type: string
-      makeflags:
-        description: 'Make Flags'
-        required: false
-        default: '--jobs=4 --output-sync=target'
-        type: string
-      aio:
-        description: 'AIO Image Name'
-        required: false
-        default: ''
-        type: string
-    secrets:
-      dockerUsername:
-        required: true
-      dockerPassword:
-        required: true
-      quayUsername:
-        required: true
-      quayPassword:
-        required: true
-jobs:
-  reusable_image-build:
-    runs-on: ${{ inputs.runs-on }}
-    steps:
-      - name: Force Install GIT latest
-        run: |
-          sudo apt-get update \
-          && sudo apt-get install -y software-properties-common \
-          && sudo apt-get update \
-          && sudo add-apt-repository -y ppa:git-core/ppa \
-          && sudo apt-get update \
-          && sudo apt-get install -y git
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Release space from worker
-        if: inputs.runs-on == 'ubuntu-latest'
-        run: |
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          df -h
-          echo
-          sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
-          sudo apt-get remove --auto-remove android-sdk-platform-tools || true
-          sudo apt-get purge --auto-remove android-sdk-platform-tools || true
-          sudo rm -rf /usr/local/lib/android
-          sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
-          sudo rm -rf /usr/share/dotnet
-          sudo apt-get remove -y '^mono-.*' || true
-          sudo apt-get remove -y '^ghc-.*' || true
-          sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
-          sudo apt-get remove -y 'php.*' || true
-          sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
-          sudo apt-get remove -y '^google-.*' || true
-          sudo apt-get remove -y azure-cli || true
-          sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
-          sudo apt-get remove -y '^gfortran-.*' || true
-          sudo apt-get remove -y microsoft-edge-stable || true
-          sudo apt-get remove -y firefox || true
-          sudo apt-get remove -y powershell || true
-          sudo apt-get remove -y r-base-core || true
-          sudo apt-get autoremove -y
-          sudo apt-get clean
-          echo
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          sudo rm -rfv build || true
-          sudo rm -rf /usr/share/dotnet || true
-          sudo rm -rf /opt/ghc || true
-          sudo rm -rf "/usr/local/share/boost" || true
-          sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
-          df -h
-
-      - name: Docker meta
-        id: meta
-        if: github.event_name != 'pull_request'
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            quay.io/go-skynet/local-ai
-            localai/localai
-          tags: |
-            type=ref,event=branch
-            type=semver,pattern={{raw}}
-            type=sha
-          flavor: |
-            latest=${{ inputs.tag-latest }}
-            suffix=${{ inputs.tag-suffix }}
-      - name: Docker meta for PR
-        id: meta_pull_request
-        if: github.event_name == 'pull_request'
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            ttl.sh/localai-ci-pr-${{ github.event.number }}
-          tags: |
-            type=ref,event=branch
-            type=semver,pattern={{raw}}
-            type=sha
-          flavor: |
-            latest=${{ inputs.tag-latest }}
-            suffix=${{ inputs.tag-suffix }}
-      - name: Docker meta AIO (quay.io)
-        if: inputs.aio != ''
-        id: meta_aio
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            quay.io/go-skynet/local-ai
-          tags: |
-            type=ref,event=branch
-            type=semver,pattern={{raw}}
-          flavor: |
-            latest=${{ inputs.tag-latest }}
-            suffix=${{ inputs.aio }}
-
-      - name: Docker meta AIO (dockerhub)
-        if: inputs.aio != ''
-        id: meta_aio_dockerhub
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            localai/localai
-          tags: |
-            type=ref,event=branch
-            type=semver,pattern={{raw}}
-          flavor: |
-            suffix=${{ inputs.aio }}
-
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@master
-        with:
-          platforms: all
-
-      - name: Set up Docker Buildx
-        id: buildx
-        uses: docker/setup-buildx-action@master
-
-      - name: Login to DockerHub
-        if: github.event_name != 'pull_request'
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.dockerUsername }}
-          password: ${{ secrets.dockerPassword }}
-
-      - name: Login to DockerHub
-        if: github.event_name != 'pull_request'
-        uses: docker/login-action@v3
-        with:
-          registry: quay.io
-          username: ${{ secrets.quayUsername }}
-          password: ${{ secrets.quayPassword }}
-
-      - name: Build and push
-        uses: docker/build-push-action@v6
-        if: github.event_name != 'pull_request'
-        with:
-          builder: ${{ steps.buildx.outputs.name }}
-          # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
-          # This means that even the MAKEFLAGS have to be an EXACT match.
-          # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
-          # This is why some build args like GRPC_VERSION and MAKEFLAGS are hardcoded
-          build-args: |
-            BUILD_TYPE=${{ inputs.build-type }}
-            CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
-            CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
-            FFMPEG=${{ inputs.ffmpeg }}
-            IMAGE_TYPE=${{ inputs.image-type }}
-            BASE_IMAGE=${{ inputs.base-image }}
-            GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
-            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
-            GRPC_VERSION=v1.65.0
-            MAKEFLAGS=${{ inputs.makeflags }}
-          context: .
-          file: ./Dockerfile
-          cache-from: type=gha
-          platforms: ${{ inputs.platforms }}
-          push: ${{ github.event_name != 'pull_request' }}
-          tags: ${{ steps.meta.outputs.tags }}
-          labels: ${{ steps.meta.outputs.labels }}
-### Start testing image
-      - name: Build and push
-        uses: docker/build-push-action@v6
-        if: github.event_name == 'pull_request'
-        with:
-          builder: ${{ steps.buildx.outputs.name }}
-          # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
-          # This means that even the MAKEFLAGS have to be an EXACT match.
-          # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
-          # This is why some build args like GRPC_VERSION and MAKEFLAGS are hardcoded
-          build-args: |
-            BUILD_TYPE=${{ inputs.build-type }}
-            CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
-            CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
-            FFMPEG=${{ inputs.ffmpeg }}
-            IMAGE_TYPE=${{ inputs.image-type }}
-            BASE_IMAGE=${{ inputs.base-image }}
-            GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
-            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
-            GRPC_VERSION=v1.65.0
-            MAKEFLAGS=${{ inputs.makeflags }}
-          context: .
-          file: ./Dockerfile
-          cache-from: type=gha
-          platforms: ${{ inputs.platforms }}
-          push: true
-          tags: ${{ steps.meta_pull_request.outputs.tags }}
-          labels: ${{ steps.meta_pull_request.outputs.labels }}
-      - name: Testing image
-        if: github.event_name == 'pull_request'
-        run: |
-          echo "Image is available at ttl.sh/localai-ci-pr-${{ github.event.number }}:${{ steps.meta_pull_request.outputs.version }}" >> $GITHUB_STEP_SUMMARY
-## End testing image
-      - name: Build and push AIO image
-        if: inputs.aio != ''
-        uses: docker/build-push-action@v6
-        with:
-          builder: ${{ steps.buildx.outputs.name }}
-          build-args: |
-            BASE_IMAGE=quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
-            MAKEFLAGS=${{ inputs.makeflags }}
-          context: .
-          file: ./Dockerfile.aio
-          platforms: ${{ inputs.platforms }}
-          push: ${{ github.event_name != 'pull_request' }}
-          tags: ${{ steps.meta_aio.outputs.tags }}
-          labels: ${{ steps.meta_aio.outputs.labels }}
-
-      - name: Build and push AIO image (dockerhub)
-        if: inputs.aio != ''
-        uses: docker/build-push-action@v6
-        with:
-          builder: ${{ steps.buildx.outputs.name }}
-          build-args: |
-            BASE_IMAGE=localai/localai:${{ steps.meta.outputs.version }}
-            MAKEFLAGS=${{ inputs.makeflags }}
-          context: .
-          file: ./Dockerfile.aio
-          platforms: ${{ inputs.platforms }}
-          push: ${{ github.event_name != 'pull_request' }}
-          tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
-          labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
-
-      - name: Latest tag
-        # run this on branches, when it is a tag and there is a latest-image defined
-        if: github.event_name != 'pull_request' && inputs.latest-image != ''  && github.ref_type == 'tag'
-        run: |
-          docker pull localai/localai:${{ steps.meta.outputs.version }}
-          docker tag localai/localai:${{ steps.meta.outputs.version }} localai/localai:${{ inputs.latest-image }}
-          docker push localai/localai:${{ inputs.latest-image }}
-          docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
-          docker tag quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
-          docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
-      - name: Latest AIO tag
-        # run this on branches, when it is a tag and there is a latest-image defined
-        if: github.event_name != 'pull_request' && inputs.latest-image-aio != ''  && github.ref_type == 'tag'
-        run: |
-          docker pull localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }}
-          docker tag localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} localai/localai:${{ inputs.latest-image-aio }}
-          docker push localai/localai:${{ inputs.latest-image-aio }}
-          docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }}
-          docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
-          docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
-
-      - name: job summary
-        run: |
-          echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
-
-      - name: job summary(AIO)
-        if: inputs.aio != ''
-        run: |
-          echo "Built image: ${{ steps.meta_aio.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
--- a/.github/workflows/labeler.yml
+++ b/.github/workflows/labeler.yml
@@ -1,12 +0,0 @@
-name: "Pull Request Labeler"
-on:
- pull_request_target
-
-jobs:
-  labeler:
-    permissions:
-      contents: read
-      pull-requests: write
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/labeler@v5
--- a/.github/workflows/localaibot_automerge.yml
+++ b/.github/workflows/localaibot_automerge.yml
@@ -1,35 +0,0 @@
-name: LocalAI-bot auto-merge
-on:
- pull_request_target
-
-permissions:
-  contents: write
-  pull-requests: write
-  packages: read
-
-jobs:
-  dependabot:
-    runs-on: ubuntu-latest
-    if: ${{ github.actor == 'localai-bot' }}
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - name: Approve a PR if not already approved
-        run: |
-          gh pr checkout "$PR_URL"
-            if [ "$(gh pr status --json reviewDecision -q .currentBranch.reviewDecision)" != "APPROVED" ];
-          then
-            gh pr review --approve "$PR_URL"
-          else
-            echo "PR already approved.";
-          fi
-        env:
-          PR_URL: ${{github.event.pull_request.html_url}}
-          GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
-
-      - name: Enable auto-merge for LocalAIBot PRs
-        run: gh pr merge --auto --squash "$PR_URL"
-        env:
-          PR_URL: ${{github.event.pull_request.html_url}}
-          GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
--- a/.github/workflows/notify-models.yaml
+++ b/.github/workflows/notify-models.yaml
@@ -1,168 +0,0 @@
-name: Notifications for new models
-on:
-  pull_request:
-     types:
-       - closed
-
-jobs:
-  notify-discord:
-    if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
-    env:
-        MODEL_NAME: hermes-2-theta-llama-3-8b
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        fetch-depth: 0 # needed to checkout all branches for this Action to work
-    - uses: mudler/localai-github-action@v1
-      with:
-        model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
-        # Check the PR diff using the current branch and the base branch of the PR
-    - uses: GrantBirki/git-diff-action@v2.7.0
-      id: git-diff-action
-      with:
-            json_diff_file_output: diff.json
-            raw_diff_file_output: diff.txt
-            file_output_only: "true"
-    - name: Summarize
-      env:
-        DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
-      id: summarize
-      run: |
-            input="$(cat $DIFF)"
-
-            # Define the LocalAI API endpoint
-            API_URL="http://localhost:8080/chat/completions"
-
-            # Create a JSON payload using jq to handle special characters
-            json_payload=$(jq -n --arg input "$input" '{
-            model: "'$MODEL_NAME'",
-            messages: [
-                {
-                role: "system",
-                content: "You are LocalAI-bot. Write a discord message to notify everyone about the new model from the git diff. Make it informal. An example can include: the URL of the model, the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI and that can be browsed over https://models.localai.io. For example: local-ai run model_name_here"
-                },
-                {
-                role: "user",
-                content: $input
-                }
-            ]
-            }')
-
-            # Send the request to LocalAI
-            response=$(curl -s -X POST $API_URL \
-            -H "Content-Type: application/json" \
-            -d "$json_payload")
-
-            # Extract the summary from the response
-            summary="$(echo $response | jq -r '.choices[0].message.content')"
-
-            # Print the summary
-            #  -H "Authorization: Bearer $API_KEY" \
-            echo "Summary:"
-            echo "$summary"
-            echo "payload sent"
-            echo "$json_payload"
-            {
-                echo 'message<<EOF'
-                echo "$summary"
-                echo EOF
-              } >> "$GITHUB_OUTPUT"
-            docker logs --tail 10 local-ai
-    - name: Discord notification
-      env:
-        DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL }}
-        DISCORD_USERNAME: "LocalAI-Bot"
-        DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
-      uses: Ilshidur/action-discord@master
-      with:
-        args: ${{ steps.summarize.outputs.message }}
-    - name: Setup tmate session if fails
-      if: ${{ failure() }}
-      uses: mxschmitt/action-tmate@v3.19
-      with:
-        detached: true
-        connect-timeout-seconds: 180
-        limit-access-to-actor: true
-  notify-twitter:
-    if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
-    env:
-        MODEL_NAME: hermes-2-theta-llama-3-8b
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        fetch-depth: 0 # needed to checkout all branches for this Action to work
-    - name: Start LocalAI
-      run: |
-        echo "Starting LocalAI..."
-        docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
-        until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready";  docker logs --tail 10 local-ai; sleep 2; done
-      # Check the PR diff using the current branch and the base branch of the PR
-    - uses: GrantBirki/git-diff-action@v2.7.0
-      id: git-diff-action
-      with:
-            json_diff_file_output: diff.json
-            raw_diff_file_output: diff.txt
-            file_output_only: "true"
-    - name: Summarize
-      env:
-        DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
-      id: summarize
-      run: |
-            input="$(cat $DIFF)"
-
-            # Define the LocalAI API endpoint
-            API_URL="http://localhost:8080/chat/completions"
-
-            # Create a JSON payload using jq to handle special characters
-            json_payload=$(jq -n --arg input "$input" '{
-            model: "'$MODEL_NAME'",
-            messages: [
-                {
-                role: "system",
-                content: "You are LocalAI-bot. Write a twitter message to notify everyone about the new model from the git diff. Make it informal and really short. An example can include: the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI. For example: local-ai run model_name_here"
-                },
-                {
-                role: "user",
-                content: $input
-                }
-            ]
-            }')
-
-            # Send the request to LocalAI
-            response=$(curl -s -X POST $API_URL \
-            -H "Content-Type: application/json" \
-            -d "$json_payload")
-
-            # Extract the summary from the response
-            summary="$(echo $response | jq -r '.choices[0].message.content')"
-
-            # Print the summary
-            #  -H "Authorization: Bearer $API_KEY" \
-            echo "Summary:"
-            echo "$summary"
-            echo "payload sent"
-            echo "$json_payload"
-            {
-                echo 'message<<EOF'
-                echo "$summary"
-                echo EOF
-              } >> "$GITHUB_OUTPUT"
-            docker logs --tail 10 local-ai
-    - uses: Eomm/why-don-t-you-tweet@v2
-      with:
-        tweet-message: ${{ steps.summarize.outputs.message }}
-      env:
-        # Get your tokens from https://developer.twitter.com/apps
-        TWITTER_CONSUMER_API_KEY: ${{ secrets.TWITTER_APP_KEY }}
-        TWITTER_CONSUMER_API_SECRET: ${{ secrets.TWITTER_APP_SECRET }}
-        TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }}
-        TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
-    - name: Setup tmate session if fails
-      if: ${{ failure() }}
-      uses: mxschmitt/action-tmate@v3.19
-      with:
-        detached: true
-        connect-timeout-seconds: 180
-        limit-access-to-actor: true
--- a/.github/workflows/notify-releases.yaml
+++ b/.github/workflows/notify-releases.yaml
@@ -1,63 +0,0 @@
-name: Release notifications
-on:
-  release:
-    types:
-      - published
-
-jobs:
-  notify-discord:
-    runs-on: ubuntu-latest
-    env:
-        RELEASE_BODY: ${{ github.event.release.body }}
-        RELEASE_TITLE: ${{ github.event.release.name }}
-        RELEASE_TAG_NAME: ${{ github.event.release.tag_name }}
-    steps:
-    - uses: mudler/localai-github-action@v1
-      with:
-        model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
-    - name: Summarize
-      id: summarize
-      run: |
-            input="$RELEASE_TITLE\b$RELEASE_BODY"
-
-            # Define the LocalAI API endpoint
-            API_URL="http://localhost:8080/chat/completions"
-
-            # Create a JSON payload using jq to handle special characters
-            json_payload=$(jq -n --arg input "$input" '{
-            model: "'$MODEL_NAME'",
-            messages: [
-                {
-                role: "system",
-                content: "Write a discord message with a bullet point summary of the release notes."
-                },
-                {
-                role: "user",
-                content: $input
-                }
-            ]
-            }')
-
-            # Send the request to LocalAI API
-            response=$(curl -s -X POST $API_URL \
-            -H "Content-Type: application/json" \
-            -d "$json_payload")
-
-            # Extract the summary from the response
-            summary=$(echo $response | jq -r '.choices[0].message.content')
-
-            # Print the summary
-            #  -H "Authorization: Bearer $API_KEY" \
-            {
-                echo 'message<<EOF'
-                echo "$summary"
-                echo EOF
-              } >> "$GITHUB_OUTPUT"
-    - name: Discord notification
-      env:
-        DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL_RELEASE }}
-        DISCORD_USERNAME: "LocalAI-Bot"
-        DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
-      uses: Ilshidur/action-discord@master
-      with:
-        args: ${{ steps.summarize.outputs.message }}
--- a/.github/workflows/prlint.yaml
+++ b/.github/workflows/prlint.yaml
@@ -1,28 +0,0 @@
-name: Check PR style
-
-on:
-  pull_request_target:
-    types:
-      - opened
-      - reopened
-      - edited
-      - synchronize
-
-jobs:
-  title-lint:
-    runs-on: ubuntu-latest
-    permissions:
-      statuses: write
-    steps:
-      - uses: aslafy-z/conventional-pr-title-action@v3
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-#  check-pr-description:
-#    runs-on: ubuntu-latest
-#    steps:
-#      - uses: actions/checkout@v2
-#      - uses: jadrol/pr-description-checker-action@v1.0.0
-#        id: description-checker
-#        with:
-#          repo-token: ${{ secrets.GITHUB_TOKEN }}
-#          exempt-labels: no qa
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -1,357 +0,0 @@
-name: Build and Release
-
-on:
-  push:
-    branches:
-      - master
-    tags:
-      - 'v*'
-  pull_request:
-
-env:
-  GRPC_VERSION: v1.65.0
-
-permissions:
-  contents: write
-
-concurrency:
-  group: ci-releases-${{ github.head_ref || github.ref }}-${{ github.repository }}
-  cancel-in-progress: true
-
-jobs:
-
-  build-linux-arm:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.21.x'
-          cache: false
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
-          sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
-      - name: Install CUDA Dependencies
-        run: |
-          curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb
-          sudo dpkg -i cuda-keyring_1.1-1_all.deb
-          sudo apt-get update
-          sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION}
-        env:
-          CUDA_VERSION: 12-4
-      - name: Cache grpc
-        id: cache-grpc
-        uses: actions/cache@v4
-        with:
-          path: grpc
-          key: ${{ runner.os }}-arm-grpc-${{ env.GRPC_VERSION }}
-      - name: Build grpc
-        if: steps.cache-grpc.outputs.cache-hit != 'true'
-        run: |
-
-          git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
-          cd grpc && sed -i "216i\  TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
-          cd cmake/build && cmake -DgRPC_INSTALL=ON \
-            -DgRPC_BUILD_TESTS=OFF \
-            ../.. && sudo make --jobs 5 --output-sync=target
-      - name: Install gRPC
-        run: |
-          GNU_HOST=aarch64-linux-gnu
-          C_COMPILER_ARM_LINUX=$GNU_HOST-gcc
-          CXX_COMPILER_ARM_LINUX=$GNU_HOST-g++
-
-          CROSS_TOOLCHAIN=/usr/$GNU_HOST
-          CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
-          CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
-
-          # https://cmake.org/cmake/help/v3.13/manual/cmake-toolchains.7.html#cross-compiling-for-linux
-          echo "set(CMAKE_SYSTEM_NAME Linux)" >> $CMAKE_CROSS_TOOLCHAIN && \
-            echo "set(CMAKE_SYSTEM_PROCESSOR arm)" >> $CMAKE_CROSS_TOOLCHAIN && \
-            echo "set(CMAKE_STAGING_PREFIX $CROSS_STAGING_PREFIX)" >> $CMAKE_CROSS_TOOLCHAIN && \
-            echo "set(CMAKE_SYSROOT ${CROSS_TOOLCHAIN}/sysroot)" >> $CMAKE_CROSS_TOOLCHAIN && \
-            echo "set(CMAKE_C_COMPILER /usr/bin/$C_COMPILER_ARM_LINUX)" >> $CMAKE_CROSS_TOOLCHAIN && \
-            echo "set(CMAKE_CXX_COMPILER /usr/bin/$CXX_COMPILER_ARM_LINUX)" >> $CMAKE_CROSS_TOOLCHAIN && \
-            echo "set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)" >> $CMAKE_CROSS_TOOLCHAIN && \
-            echo "set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
-            echo "set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
-            echo "set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN
-          GRPC_DIR=$PWD/grpc
-          cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install && \
-          GRPC_CROSS_BUILD_DIR=$GRPC_DIR/cmake/cross_build && \
-          mkdir -p $GRPC_CROSS_BUILD_DIR && \
-          cd $GRPC_CROSS_BUILD_DIR && \
-          cmake -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN \
-            -DCMAKE_BUILD_TYPE=Release \
-            -DCMAKE_INSTALL_PREFIX=$CROSS_TOOLCHAIN/grpc_install \
-            ../.. && \
-          sudo make -j`nproc` install
-      - name: Build
-        id: build
-        run: |
-          GNU_HOST=aarch64-linux-gnu
-          C_COMPILER_ARM_LINUX=$GNU_HOST-gcc
-          CXX_COMPILER_ARM_LINUX=$GNU_HOST-g++
-
-          CROSS_TOOLCHAIN=/usr/$GNU_HOST
-          CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
-          CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-          export PATH=$PATH:$GOPATH/bin
-          export PATH=/usr/local/cuda/bin:$PATH
-          sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6
-          sudo cp -rf /usr/aarch64-linux-gnu/lib/libstdc++.so* /usr/aarch64-linux-gnu/lib/libstdc++.so.6
-          sudo cp /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 ld.so
-          BACKEND_LIBS="./grpc/cmake/cross_build/third_party/re2/libre2.a ./grpc/cmake/cross_build/libgrpc.a ./grpc/cmake/cross_build/libgrpc++.a ./grpc/cmake/cross_build/third_party/protobuf/libprotobuf.a /usr/aarch64-linux-gnu/lib/libc.so.6 /usr/aarch64-linux-gnu/lib/libstdc++.so.6 /usr/aarch64-linux-gnu/lib/libgomp.so.1 /usr/aarch64-linux-gnu/lib/libm.so.6 /usr/aarch64-linux-gnu/lib/libgcc_s.so.1 /usr/aarch64-linux-gnu/lib/libdl.so.2 /usr/aarch64-linux-gnu/lib/libpthread.so.0 ./ld.so" \
-          GOOS=linux \
-          GOARCH=arm64 \
-          CMAKE_ARGS="-DProtobuf_INCLUDE_DIRS=$CROSS_STAGING_PREFIX/include -DProtobuf_DIR=$CROSS_STAGING_PREFIX/lib/cmake/protobuf -DgRPC_DIR=$CROSS_STAGING_PREFIX/lib/cmake/grpc -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++" make dist-cross-linux-arm64
-      - uses: actions/upload-artifact@v4
-        with:
-          name: LocalAI-linux-arm64
-          path: release/
-      - name: Release
-        uses: softprops/action-gh-release@v2
-        if: startsWith(github.ref, 'refs/tags/')
-        with:
-          files: |
-            release/*
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.19
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true
-  build-linux:
-    runs-on: arc-runner-set
-    steps:
-      - name: Force Install GIT latest
-        run: |
-          sudo apt-get update \
-          && sudo apt-get install -y software-properties-common \
-          && sudo apt-get update \
-          && sudo add-apt-repository -y ppa:git-core/ppa \
-          && sudo apt-get update \
-          && sudo apt-get install -y git
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.21.x'
-          cache: false
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
-      - name: Intel Dependencies
-        run: |
-          wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
-          echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
-          sudo apt update
-          sudo apt install -y intel-basekit
-      - name: Install CUDA Dependencies
-        run: |
-          curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
-          sudo dpkg -i cuda-keyring_1.1-1_all.deb
-          sudo apt-get update
-          sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
-        env:
-          CUDA_VERSION: 12-5
-      - name: "Install Hipblas"
-        env:
-          ROCM_VERSION: "6.1"
-          AMDGPU_VERSION: "6.1"
-        run: |
-            set -ex
-
-            sudo apt-get update
-            sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg
-
-            curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add -
-
-            printf "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$ROCM_VERSION/ jammy main" | sudo tee /etc/apt/sources.list.d/rocm.list
-
-            printf "deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
-            printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
-            sudo apt-get update
-
-            sudo DEBIAN_FRONTEND=noninteractive apt-get install -y \
-                hipblas-dev rocm-dev \
-                rocblas-dev
-
-            sudo apt-get clean
-            sudo rm -rf /var/lib/apt/lists/*
-            sudo ldconfig
-      - name: Cache grpc
-        id: cache-grpc
-        uses: actions/cache@v4
-        with:
-          path: grpc
-          key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
-      - name: Build grpc
-        if: steps.cache-grpc.outputs.cache-hit != 'true'
-        run: |
-          git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
-          cd grpc && sed -i "216i\  TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
-          cd cmake/build && cmake -DgRPC_INSTALL=ON \
-            -DgRPC_BUILD_TESTS=OFF \
-            ../.. && sudo make --jobs 5 --output-sync=target
-      - name: Install gRPC
-        run: |
-          cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
-      # BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so
-      - name: Build
-        id: build
-        run: |
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-          export PATH=$PATH:$GOPATH/bin
-          export PATH=/usr/local/cuda/bin:$PATH
-          export PATH=/opt/rocm/bin:$PATH
-          source /opt/intel/oneapi/setvars.sh
-          sudo cp /lib64/ld-linux-x86-64.so.2 ld.so
-          BACKEND_LIBS="./ld.so ./sources/go-piper/piper/build/fi/lib/libfmt.a ./sources/go-piper/piper-phonemize/pi/lib/libonnxruntime.so.1.14.1 ./sources/go-piper/piper-phonemize/pi/src/libespeak-ng/libespeak-ng.so /usr/lib/x86_64-linux-gnu/libdl.so.2 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/lib/x86_64-linux-gnu/libpthread.so.0 ./sources/go-piper/piper-phonemize/pi/lib/libpiper_phonemize.so.1 ./sources/go-piper/piper/build/si/lib/libspdlog.a ./sources/go-piper/espeak/ei/lib/libucd.so" \
-          make -j4 dist
-      - uses: actions/upload-artifact@v4
-        with:
-          name: LocalAI-linux
-          path: release/
-      - name: Release
-        uses: softprops/action-gh-release@v2
-        if: startsWith(github.ref, 'refs/tags/')
-        with:
-          files: |
-            release/*
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.19
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true
-  build-stablediffusion:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.21.x'
-          cache: false
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-      - name: Build stablediffusion
-        run: |
-          export PATH=$PATH:$GOPATH/bin
-          make backend-assets/grpc/stablediffusion
-          mkdir -p release && cp backend-assets/grpc/stablediffusion release
-        env:
-          GO_TAGS: stablediffusion
-      - uses: actions/upload-artifact@v4
-        with:
-          name: stablediffusion
-          path: release/
-      - name: Release
-        uses: softprops/action-gh-release@v2
-        if: startsWith(github.ref, 'refs/tags/')
-        with:
-          files: |
-            release/*
-
-  build-macOS-x86_64:
-    runs-on: macos-13
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.21.x'
-          cache: false
-      - name: Dependencies
-        run: |
-          brew install protobuf grpc
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
-      - name: Build
-        id: build
-        run: |
-          export C_INCLUDE_PATH=/usr/local/include
-          export CPLUS_INCLUDE_PATH=/usr/local/include
-          export PATH=$PATH:$GOPATH/bin
-          export SKIP_GRPC_BACKEND=backend-assets/grpc/whisper
-          make dist
-      - uses: actions/upload-artifact@v4
-        with:
-          name: LocalAI-MacOS-x86_64
-          path: release/
-      - name: Release
-        uses: softprops/action-gh-release@v2
-        if: startsWith(github.ref, 'refs/tags/')
-        with:
-          files: |
-            release/*
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.19
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true
-
-  build-macOS-arm64:
-    runs-on: macos-14
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.21.x'
-          cache: false
-      - name: Dependencies
-        run: |
-          brew install protobuf grpc libomp llvm
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-      - name: Build
-        id: build
-        run: |
-          export C_INCLUDE_PATH=/usr/local/include
-          export CPLUS_INCLUDE_PATH=/usr/local/include
-          export PATH=$PATH:$GOPATH/bin
-          export CC=/opt/homebrew/opt/llvm/bin/clang
-          make dist
-      - uses: actions/upload-artifact@v4
-        with:
-          name: LocalAI-MacOS-arm64
-          path: release/
-      - name: Release
-        uses: softprops/action-gh-release@v2
-        if: startsWith(github.ref, 'refs/tags/')
-        with:
-          files: |
-            release/*
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.19
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true
--- a/.github/workflows/release.yml.disabled
+++ b/.github/workflows/release.yml.disabled
@@ -0,0 +1,26 @@
+name: goreleaser
+
+on:
+  push:
+    tags:
+      - 'v*'
+
+jobs:
+  goreleaser:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+      - name: Set up Go
+        uses: actions/setup-go@v3
+        with:
+          go-version: 1.18
+      - name: Run GoReleaser
+        uses: goreleaser/goreleaser-action@v4
+        with:
+          version: latest
+          args: release --clean
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/secscan.yaml
+++ b/.github/workflows/secscan.yaml
@@ -1,30 +0,0 @@
-name: "Security Scan"
-
-# Run workflow each time code is pushed to your repository and on a schedule.
-# The scheduled workflow runs every at 00:00 on Sunday UTC time.
-on:
-  push:
-  schedule:
-  - cron: '0 0 * * 0'
-
-jobs:
-  tests:
-    runs-on: ubuntu-latest
-    env:
-      GO111MODULE: on
-    steps:
-      - name: Checkout Source
-        uses: actions/checkout@v4
-        if: ${{ github.actor != 'dependabot[bot]' }}
-      - name: Run Gosec Security Scanner
-        if: ${{ github.actor != 'dependabot[bot]' }}
-        uses: securego/gosec@v2.21.4
-        with:
-          # we let the report trigger content trigger a failure using the GitHub Security features.
-          args: '-no-fail -fmt sarif -out results.sarif ./...'
-      - name: Upload SARIF file
-        if: ${{ github.actor != 'dependabot[bot]' }}
-        uses: github/codeql-action/upload-sarif@v3
-        with:
-          # Path to SARIF file relative to the root of the repository
-          sarif_file: results.sarif
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -1,302 +0,0 @@
---
-name: 'Tests extras backends'
-
-on:
-  pull_request:
-  push:
-    branches:
-      - master
-    tags:
-      - '*'
-
-concurrency:
-  group: ci-tests-extra-${{ github.head_ref || github.ref }}-${{ github.repository }}
-  cancel-in-progress: true
-
-jobs:
-  tests-transformers:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg
-          # Install UV
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-          sudo apt-get install -y libopencv-dev
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
-
-      - name: Test transformers
-        run: |
-           make --jobs=5 --output-sync=target -C backend/python/transformers
-           make --jobs=5 --output-sync=target -C backend/python/transformers test
-
-  tests-sentencetransformers:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg
-          # Install UV
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-          sudo apt-get install -y libopencv-dev
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
-
-      - name: Test sentencetransformers
-        run: |
-           make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
-           make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test
-
-
-  tests-rerankers:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg
-          # Install UV
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-          sudo apt-get install -y libopencv-dev
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
-
-      - name: Test rerankers
-        run: |
-           make --jobs=5 --output-sync=target -C backend/python/rerankers
-           make --jobs=5 --output-sync=target -C backend/python/rerankers test
-
-  tests-diffusers:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y build-essential ffmpeg
-          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-          sudo apt-get install -y libopencv-dev
-          # Install UV
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
-      - name: Test diffusers
-        run: |
-          make --jobs=5 --output-sync=target -C backend/python/diffusers
-          make --jobs=5 --output-sync=target -C backend/python/diffusers test
-
-  tests-parler-tts:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg
-          # Install UV
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-          sudo apt-get install -y libopencv-dev
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
-
-      - name: Test parler-tts
-        run: |
-           make --jobs=5 --output-sync=target -C backend/python/parler-tts
-           make --jobs=5 --output-sync=target -C backend/python/parler-tts test
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.19
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true
-
-  tests-openvoice:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg
-          # Install UV
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-          sudo apt-get install -y libopencv-dev
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
-
-      - name: Test openvoice
-        run: |
-           make --jobs=5 --output-sync=target -C backend/python/openvoice
-           make --jobs=5 --output-sync=target -C backend/python/openvoice test
-
-  tests-transformers-musicgen:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg
-          # Install UV
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-          sudo apt-get install -y libopencv-dev
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
-
-      - name: Test transformers-musicgen
-        run: |
-           make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
-           make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
-
-  # tests-bark:
-  #   runs-on: ubuntu-latest
-  #   steps:
-  #     - name: Release space from worker
-  #       run: |
-  #           echo "Listing top largest packages"
-  #           pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-  #           head -n 30 <<< "${pkgs}"
-  #           echo
-  #           df -h
-  #           echo
-  #           sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
-  #           sudo apt-get remove --auto-remove android-sdk-platform-tools || true
-  #           sudo apt-get purge --auto-remove android-sdk-platform-tools || true
-  #           sudo rm -rf /usr/local/lib/android
-  #           sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
-  #           sudo rm -rf /usr/share/dotnet
-  #           sudo apt-get remove -y '^mono-.*' || true
-  #           sudo apt-get remove -y '^ghc-.*' || true
-  #           sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
-  #           sudo apt-get remove -y 'php.*' || true
-  #           sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
-  #           sudo apt-get remove -y '^google-.*' || true
-  #           sudo apt-get remove -y azure-cli || true
-  #           sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
-  #           sudo apt-get remove -y '^gfortran-.*' || true
-  #           sudo apt-get remove -y microsoft-edge-stable || true
-  #           sudo apt-get remove -y firefox || true
-  #           sudo apt-get remove -y powershell || true
-  #           sudo apt-get remove -y r-base-core || true
-  #           sudo apt-get autoremove -y
-  #           sudo apt-get clean
-  #           echo
-  #           echo "Listing top largest packages"
-  #           pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-  #           head -n 30 <<< "${pkgs}"
-  #           echo
-  #           sudo rm -rfv build || true
-  #           sudo rm -rf /usr/share/dotnet || true
-  #           sudo rm -rf /opt/ghc || true
-  #           sudo rm -rf "/usr/local/share/boost" || true
-  #           sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
-  #           df -h
-  #     - name: Clone
-  #       uses: actions/checkout@v4
-  #       with:
-  #         submodules: true
-  #     - name: Dependencies
-  #       run: |
-  #         sudo apt-get update
-  #         sudo apt-get install build-essential ffmpeg
-  #         # Install UV
-  #         curl -LsSf https://astral.sh/uv/install.sh | sh
-  #         sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-  #         sudo apt-get install -y libopencv-dev
-  #         pip install --user --no-cache-dir grpcio-tools==1.64.1
-
-  #     - name: Test bark
-  #       run: |
-  #          make --jobs=5 --output-sync=target -C backend/python/bark
-  #          make --jobs=5 --output-sync=target -C backend/python/bark test
-
-
-  # Below tests needs GPU. Commented out for now
-  # TODO: Re-enable as soon as we have GPU nodes
-  # tests-vllm:
-  #   runs-on: ubuntu-latest
-  #   steps:
-  #     - name: Clone
-  #       uses: actions/checkout@v4
-  #       with:
-  #         submodules: true
-  #     - name: Dependencies
-  #       run: |
-  #         sudo apt-get update
-  #         sudo apt-get install build-essential ffmpeg
-  #         # Install UV
-  #         curl -LsSf https://astral.sh/uv/install.sh | sh
-  #         sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-  #         sudo apt-get install -y libopencv-dev
-  #         pip install --user --no-cache-dir grpcio-tools==1.64.1
-  #     - name: Test vllm
-  #       run: |
-  #          make --jobs=5 --output-sync=target -C backend/python/vllm
-  #          make --jobs=5 --output-sync=target -C backend/python/vllm test
-  tests-vallex:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg
-          # Install UV
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-          sudo apt-get install -y libopencv-dev
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
-      - name: Test vall-e-x
-        run: |
-           make --jobs=5 --output-sync=target -C backend/python/vall-e-x
-           make --jobs=5 --output-sync=target -C backend/python/vall-e-x test
-
-  tests-coqui:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg
-          sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip
-          # Install UV
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
-      - name: Test coqui
-        run: |
-          make --jobs=5 --output-sync=target -C backend/python/coqui
-          make --jobs=5 --output-sync=target -C backend/python/coqui test
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -9,234 +9,36 @@ on:
    tags:
      - '*'

-env:
-  GRPC_VERSION: v1.65.0
-
-concurrency:
-  group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
-  cancel-in-progress: true
-
 jobs:
-  tests-linux:
+  ubuntu-latest:
    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        go-version: ['1.21.x']
+
    steps:
-      - name: Release space from worker
-        run: |
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          df -h
-          echo
-          sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
-          sudo apt-get remove --auto-remove android-sdk-platform-tools || true
-          sudo apt-get purge --auto-remove android-sdk-platform-tools || true
-          sudo rm -rf /usr/local/lib/android
-          sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
-          sudo rm -rf /usr/share/dotnet
-          sudo apt-get remove -y '^mono-.*' || true
-          sudo apt-get remove -y '^ghc-.*' || true
-          sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
-          sudo apt-get remove -y 'php.*' || true
-          sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
-          sudo apt-get remove -y '^google-.*' || true
-          sudo apt-get remove -y azure-cli || true
-          sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
-          sudo apt-get remove -y '^gfortran-.*' || true
-          sudo apt-get autoremove -y
-          sudo apt-get clean
-          echo
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          sudo rm -rfv build || true
-          df -h
      - name: Clone
-        uses: actions/checkout@v4
-        with:
+        uses: actions/checkout@v3
+        with: 
          submodules: true
-      - name: Setup Go ${{ matrix.go-version }}
-        uses: actions/setup-go@v5
-        with:
-          go-version: ${{ matrix.go-version }}
-          cache: false
-      # You can test your matrix by printing the current Go version
-      - name: Display Go version
-        run: go version
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
-          sudo apt-get install -y libgmock-dev
-          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
-             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
-             gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
-             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
-             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
-             sudo apt-get update && \
-             sudo apt-get install -y conda
-          # Install UV
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          sudo apt-get install -y ca-certificates cmake patch python3-pip unzip
-          sudo apt-get install -y libopencv-dev
-
-          curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
-          unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
-          rm protoc.zip
-
-          curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
-          sudo dpkg -i cuda-keyring_1.1-1_all.deb
-          sudo apt-get update
-          sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
-          export CUDACXX=/usr/local/cuda/bin/nvcc
-
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-
-          # The python3-grpc-tools package in 22.04 is too old
-          pip install --user grpcio-tools
-
-          sudo rm -rfv /usr/bin/conda || true
-          PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
-
-          # Pre-build piper before we start tests in order to have shared libraries in place
-          make sources/go-piper && \
-          GO_TAGS="tts" make -C sources/go-piper piper.o && \
-          sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
-          # Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
-          PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
-        env:
-          CUDA_VERSION: 12-4
-      - name: Cache grpc
-        id: cache-grpc
-        uses: actions/cache@v4
-        with:
-          path: grpc
-          key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
-      - name: Build grpc
-        if: steps.cache-grpc.outputs.cache-hit != 'true'
-        run: |
-          git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --jobs 5 --shallow-submodules https://github.com/grpc/grpc && \
-          cd grpc && sed -i "216i\  TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && cd cmake/build && \
-          cmake -DgRPC_INSTALL=ON \
-            -DgRPC_BUILD_TESTS=OFF \
-            ../.. && sudo make --jobs 5
-      - name: Install gRPC
-        run: |
-          cd grpc && cd cmake/build && sudo make --jobs 5 install
+          sudo apt-get install build-essential
      - name: Test
        run: |
-          PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.19
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true
+          make test

-  tests-aio-container:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Release space from worker
-        run: |
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          df -h
-          echo
-          sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
-          sudo apt-get remove --auto-remove android-sdk-platform-tools || true
-          sudo apt-get purge --auto-remove android-sdk-platform-tools || true
-          sudo rm -rf /usr/local/lib/android
-          sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
-          sudo rm -rf /usr/share/dotnet
-          sudo apt-get remove -y '^mono-.*' || true
-          sudo apt-get remove -y '^ghc-.*' || true
-          sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
-          sudo apt-get remove -y 'php.*' || true
-          sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
-          sudo apt-get remove -y '^google-.*' || true
-          sudo apt-get remove -y azure-cli || true
-          sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
-          sudo apt-get remove -y '^gfortran-.*' || true
-          sudo apt-get autoremove -y
-          sudo apt-get clean
-          echo
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          sudo rm -rfv build || true
-          df -h
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - name: Dependencies
-        run: |
-          # Install protoc
-          curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
-          unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
-          rm protoc.zip
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          PATH="$PATH:$HOME/go/bin" make protogen-go
-      - name: Build images
-        run: |
-          docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
-          BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
-      - name: Test
-        run: |
-            PATH="$PATH:$HOME/go/bin" LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
-            make run-e2e-aio
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.19
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true
+  macOS-latest:
+    runs-on: macOS-latest

-  tests-apple:
-    runs-on: macOS-14
-    strategy:
-      matrix:
-        go-version: ['1.21.x']
    steps:
      - name: Clone
-        uses: actions/checkout@v4
-        with:
+        uses: actions/checkout@v3
+        with: 
          submodules: true
-      - name: Setup Go ${{ matrix.go-version }}
-        uses: actions/setup-go@v5
-        with:
-          go-version: ${{ matrix.go-version }}
-          cache: false
-      # You can test your matrix by printing the current Go version
-      - name: Display Go version
-        run: go version
+
      - name: Dependencies
        run: |
-          brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
-          pip install --user --no-cache-dir grpcio-tools
+          brew update
+          brew install sdl2
      - name: Test
        run: |
-          export C_INCLUDE_PATH=/usr/local/include
-          export CPLUS_INCLUDE_PATH=/usr/local/include
-          export CC=/opt/homebrew/opt/llvm/bin/clang
-          # Used to run the newer GNUMake version from brew that supports --output-sync
-          export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
-          BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.19
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true
+          make test
--- a/.github/workflows/update_swagger.yaml
+++ b/.github/workflows/update_swagger.yaml
@@ -1,37 +0,0 @@
-name: Update swagger
-on:
-  schedule:
-    - cron: 0 20 * * *
-  workflow_dispatch:
-jobs:
-  swagger:
-    strategy:
-      fail-fast: false
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-go@v5
-        with:
-          go-version: 'stable'
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install protobuf-compiler
-      - run: |
-          go install github.com/swaggo/swag/cmd/swag@latest
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-      - name: Bump swagger 🔧
-        run: |
-          make protogen-go swagger
-      - name: Create Pull Request
-        uses: peter-evans/create-pull-request@v7
-        with:
-          token: ${{ secrets.UPDATE_BOT_TOKEN }}
-          push-to-fork: ci-forks/LocalAI
-          commit-message: 'feat(swagger): update swagger'
-          title: 'feat(swagger): update swagger'
-          branch: "update/swagger"
-          body:  Update swagger
-          signoff: true
-
--- a/.github/workflows/yaml-check.yml
+++ b/.github/workflows/yaml-check.yml
@@ -1,18 +0,0 @@
-name: 'Yamllint GitHub Actions'
-on:
-  - pull_request
-jobs:
-  yamllint:
-    name: 'Yamllint'
-    runs-on: ubuntu-latest
-    steps:
-      - name: 'Checkout'
-        uses: actions/checkout@master
-      - name: 'Yamllint'
-        uses: karancode/yamllint-github-action@master
-        with:
-          yamllint_file_or_dir: 'gallery'
-          yamllint_strict: false
-          yamllint_comment: true
-        env:
-          GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.gitignore
+++ b/.gitignore
@@ -1,58 +1,15 @@
 # go-llama build artifacts
-/sources/
-__pycache__/
-*.a
-get-sources
-prepare-sources
-/backend/cpp/llama/grpc-server
-/backend/cpp/llama/llama.cpp
-/backend/cpp/llama-*
-
-*.log
-
-go-ggml-transformers
+go-llama
+go-gpt4all-j
 go-gpt2
-whisper.cpp
-/bloomz
-go-bert

 # LocalAI build binary
 LocalAI
 local-ai
 # prevent above rules from omitting the helm chart
 !charts/*
-# prevent above rules from omitting the api/localai folder
-!api/localai
-!core/**/localai

 # Ignore models
-models/*
-test-models/
-test-dir/
-
-release/
-
-# just in case
-.DS_Store
-.idea
-
-# Generated during build
-backend-assets/*
-!backend-assets/.keep
-prepare
-/ggml-metal.metal
-docs/static/gallery.html
-
-# Protobuf generated files
-*.pb.go
-*pb2.py
-*pb2_grpc.py
-
-# SonarQube
-.scannerwork
-
-# backend virtual environments
-**/venv
-
-# per-developer customization files for the development container
-.devcontainer/customization/*
+models/*.bin
+models/ggml-*
+test-models/
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +0,0 @@
-[submodule "docs/themes/hugo-theme-relearn"]
-	path = docs/themes/hugo-theme-relearn
-	url = https://github.com/McShelby/hugo-theme-relearn.git
-[submodule "docs/themes/lotusdocs"]
-	path = docs/themes/lotusdocs
-	url = https://github.com/colinwilson/lotusdocs
--- a/.goreleaser.yaml
+++ b/.goreleaser.yaml
@@ -0,0 +1,15 @@
+# Make sure to check the documentation at http://goreleaser.com
+project_name: local-ai
+builds:
+  - ldflags:
+      - -w -s
+    env:
+      - CGO_ENABLED=0
+    goos:
+      - linux
+      - darwin
+      - windows
+    goarch:
+      - amd64
+      - arm64
+    binary: '{{ .ProjectName }}'
--- a/.vscode/extensions.json
+++ b/.vscode/extensions.json
@@ -1,5 +0,0 @@
-{
-    "recommendations": [
-        "golang.go"
-    ]
-}
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -2,33 +2,19 @@
    "version": "0.2.0",
    "configurations": [
        {
-            "name": "Python: Current File",
-            "type": "debugpy",
-            "request": "launch",
-            "program": "${file}",
-            "console": "integratedTerminal",
-            "justMyCode": false,
-            "cwd": "${fileDirname}",
-            "env": {
-                "OPENAI_API_BASE": "http://localhost:8080/v1",
-                "OPENAI_API_KEY": "abc"
-            }
-        },
-        {
-            "name": "Launch LocalAI API",
+            "name": "Launch Go",
            "type": "go",
            "request": "launch",
            "mode": "debug",
-            "program": "${workspaceRoot}",
-            "args": [],
+            "program": "${workspaceFolder}/main.go",
+            "args": [
+                "api"
+            ],
            "env": {
-                "LOCALAI_LOG_LEVEL": "debug",
-                "LOCALAI_P2P": "true",
-                "LOCALAI_FEDERATED": "true"
-            },
-            "buildFlags": ["-tags", "stablediffusion p2p tts", "-v"],
-            "envFile": "${workspaceFolder}/.env",
-            "cwd": "${workspaceRoot}"
+                "C_INCLUDE_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
+                "LIBRARY_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
+                "DEBUG": "true"
+            }
        }
    ]
 }
--- a/.yamllint
+++ b/.yamllint
@@ -1,4 +0,0 @@
-extends: default
-
-rules:
-    line-length: disable
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,84 +0,0 @@
-# Contributing to LocalAI
-
-Thank you for your interest in contributing to LocalAI! We appreciate your time and effort in helping to improve our project. Before you get started, please take a moment to review these guidelines.
-
-## Table of Contents
-
- [Getting Started](#getting-started)
-  - [Prerequisites](#prerequisites)
-  - [Setting up the Development Environment](#setting-up-the-development-environment)
- [Contributing](#contributing)
-  - [Submitting an Issue](#submitting-an-issue)
-  - [Creating a Pull Request (PR)](#creating-a-pull-request-pr)
- [Coding Guidelines](#coding-guidelines)
- [Testing](#testing)
- [Documentation](#documentation)
- [Community and Communication](#community-and-communication)
-
-## Getting Started
-
-### Prerequisites
-
- Golang [1.21]
- Git
- macOS/Linux
-
-### Setting up the Development Environment and running localAI in the local environment
-
-1. Clone the repository: `git clone https://github.com/go-skynet/LocalAI.git`
-2. Navigate to the project directory: `cd LocalAI`
-3. Install the required dependencies ( see https://localai.io/basics/build/#build-localai-locally )
-4. Build LocalAI: `make build`
-5. Run LocalAI: `./local-ai`
-
-## Contributing
-
-We welcome contributions from everyone! To get started, follow these steps:
-
-### Submitting an Issue
-
-If you find a bug, have a feature request, or encounter any issues, please check the [issue tracker](https://github.com/go-skynet/LocalAI/issues) to see if a similar issue has already been reported. If not, feel free to [create a new issue](https://github.com/go-skynet/LocalAI/issues/new) and provide as much detail as possible.
-
-### Creating a Pull Request (PR)
-
-1. Fork the repository.
-2. Create a new branch with a descriptive name: `git checkout -b [branch name]`
-3. Make your changes and commit them.
-4. Push the changes to your fork: `git push origin [branch name]`
-5. Create a new pull request from your branch to the main project's `main` or `master` branch.
-6. Provide a clear description of your changes in the pull request.
-7. Make any requested changes during the review process.
-8. Once your PR is approved, it will be merged into the main project.
-
-## Coding Guidelines
-
- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like [`golangci-lint`](https://golangci-lint.run) can help you here.
-
-## Testing
-
-`make test` cannot handle all the model now. Please be sure to add a test case for the new features or the part was changed.
-
-### Running AIO tests
-
-All-In-One images has a set of tests that automatically verifies that most of the endpoints works correctly, a flow can be :
-
-```bash
-# Build the LocalAI docker image
-make DOCKER_IMAGE=local-ai docker
-
-# Build the corresponding AIO image
-BASE_IMAGE=local-ai DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
-
-# Run the AIO e2e tests
-LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio make run-e2e-aio
-```
-
-## Documentation
-
-We are welcome the contribution of the documents, please open new PR or create a new issue. The documentation is available under `docs/` https://github.com/mudler/LocalAI/tree/master/docs
- 
-## Community and Communication
-
- You can reach out via the Github issue tracker.
- Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions)
- Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy)
--- a/492
+++ b/492
@@ -1,492 +0,0 @@
-ARG IMAGE_TYPE=extras
-ARG BASE_IMAGE=ubuntu:22.04
-ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
-ARG INTEL_BASE_IMAGE=${BASE_IMAGE}
-
-# The requirements-core target is common to all images.  It should not be placed in requirements-core unless every single build will use it.
-FROM ${BASE_IMAGE} AS requirements-core
-
-USER root
-
-ARG GO_VERSION=1.22.6
-ARG CMAKE_VERSION=3.26.4
-ARG CMAKE_FROM_SOURCE=false
-ARG TARGETARCH
-ARG TARGETVARIANT
-
-ENV DEBIAN_FRONTEND=noninteractive
-ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
-
-
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-        build-essential \
-        ccache \
-        ca-certificates \
-        curl libssl-dev \
-        git \
-        unzip upx-ucl && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
-# Install CMake (the version in 22.04 is too old)
-RUN <<EOT bash
-    if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
-        curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
-    else
-        apt-get update && \
-        apt-get install -y \
-            cmake && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/*
-    fi
-EOT
-
-# Install Go
-RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
-ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
-
-# Install grpc compilers
-RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
-    go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-
-COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
-RUN update-ca-certificates
-
-RUN test -n "$TARGETARCH" \
-    || (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
-
-# Use the variables in subsequent instructions
-RUN echo "Target Architecture: $TARGETARCH"
-RUN echo "Target Variant: $TARGETVARIANT"
-
-# Cuda
-ENV PATH=/usr/local/cuda/bin:${PATH}
-
-# HipBLAS requirements
-ENV PATH=/opt/rocm/bin:${PATH}
-
-# OpenBLAS requirements and stable diffusion
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-        libopenblas-dev \
-        libopencv-dev && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
-# Set up OpenCV
-RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
-
-WORKDIR /build
-
-###################################
-###################################
-
-# The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it
-FROM requirements-core AS requirements-extras
-
-# Install uv as a system package
-RUN curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR=/usr/bin sh
-ENV PATH="/root/.cargo/bin:${PATH}"
-
-RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-        espeak-ng \
-        espeak \
-        python3-pip \
-        python-is-python3 \
-        python3-dev llvm \
-        python3-venv && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/* && \
-    pip install --upgrade pip
-
-# Install grpcio-tools (the version in 22.04 is too old)
-RUN pip install --user grpcio-tools
-
-###################################
-###################################
-
-# The requirements-drivers target is for BUILD_TYPE specific items.  If you need to install something specific to CUDA, or specific to ROCM, it goes here.
-# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg
-FROM requirements-${IMAGE_TYPE} AS requirements-drivers
-
-ARG BUILD_TYPE
-ARG CUDA_MAJOR_VERSION=12
-ARG CUDA_MINOR_VERSION=0
-
-ENV BUILD_TYPE=${BUILD_TYPE}
-
-# Vulkan requirements
-RUN <<EOT bash
-    if [ "${BUILD_TYPE}" = "vulkan" ]; then
-        apt-get update && \
-        apt-get install -y  --no-install-recommends \
-            software-properties-common pciutils wget gpg-agent && \
-        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
-        apt-get update && \
-        apt-get install -y \
-            vulkan-sdk && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/*
-    fi
-EOT
-
-# CuBLAS requirements
-RUN <<EOT bash
-    if [ "${BUILD_TYPE}" = "cublas" ]; then
-        apt-get update && \
-        apt-get install -y  --no-install-recommends \
-            software-properties-common pciutils
-        if [ "amd64" = "$TARGETARCH" ]; then
-            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
-        fi
-        if [ "arm64" = "$TARGETARCH" ]; then
-            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
-        fi
-        dpkg -i cuda-keyring_1.1-1_all.deb && \
-        rm -f cuda-keyring_1.1-1_all.deb && \
-        apt-get update && \
-        apt-get install -y --no-install-recommends \
-            cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
-            libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
-            libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
-            libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
-            libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
-            libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/*
-    fi
-EOT
-
-# If we are building with clblas support, we need the libraries for the builds
-RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
-        apt-get update && \
-        apt-get install -y --no-install-recommends \
-            libclblast-dev && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/* \
-    ; fi
-
-RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
-        apt-get update && \
-        apt-get install -y --no-install-recommends \
-            hipblas-dev \
-            rocblas-dev && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/* && \
-        # I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
-        # to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
-        ldconfig \
-    ; fi
-
-###################################
-###################################
-
-# Temporary workaround for Intel's repository to work correctly
-# https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/APT-Repository-not-working-signatures-invalid/m-p/1599436/highlight/true#M36143
-# This is a temporary workaround until Intel fixes their repository
-FROM ${INTEL_BASE_IMAGE} AS intel
-RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
-gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
-RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list
-
-###################################
-###################################
-
-# The grpc target does one thing, it builds and installs GRPC.  This is in it's own layer so that it can be effectively cached by CI.
-# You probably don't need to change anything here, and if you do, make sure that CI is adjusted so that the cache continues to work.
-FROM ${GRPC_BASE_IMAGE} AS grpc
-
-# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
-ARG GRPC_MAKEFLAGS="-j4 -Otarget"
-ARG GRPC_VERSION=v1.65.0
-ARG CMAKE_FROM_SOURCE=false
-ARG CMAKE_VERSION=3.26.4
-
-ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
-
-WORKDIR /build
-
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-        ca-certificates \
-        build-essential curl libssl-dev \
-        git && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
-# Install CMake (the version in 22.04 is too old)
-RUN <<EOT bash
-    if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
-        curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
-    else
-        apt-get update && \
-        apt-get install -y \
-            cmake && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/*
-    fi
-EOT
-
-# We install GRPC to a different prefix here so that we can copy in only the build artifacts later
-# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
-# and running make install in the target container
-RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
-    mkdir -p /build/grpc/cmake/build && \
-    cd /build/grpc/cmake/build && \
-    sed -i "216i\  TESTONLY" "../../third_party/abseil-cpp/absl/container/CMakeLists.txt" && \
-    cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
-    make && \
-    make install && \
-    rm -rf /build
-
-###################################
-###################################
-
-# The builder-base target has the arguments, variables, and copies shared between full builder images and the uncompiled devcontainer
-
-FROM requirements-drivers AS builder-base
-
-ARG GO_TAGS="stablediffusion tts p2p"
-ARG GRPC_BACKENDS
-ARG MAKEFLAGS
-ARG LD_FLAGS="-s -w"
-
-ENV GRPC_BACKENDS=${GRPC_BACKENDS}
-ENV GO_TAGS=${GO_TAGS}
-ENV MAKEFLAGS=${MAKEFLAGS}
-ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
-ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
-ENV NVIDIA_VISIBLE_DEVICES=all
-ENV LD_FLAGS=${LD_FLAGS}
-
-RUN echo "GO_TAGS: $GO_TAGS" && echo "TARGETARCH: $TARGETARCH"
-
-WORKDIR /build
-
-
-# We need protoc installed, and the version in 22.04 is too old.  We will create one as part installing the GRPC build below
-# but that will also being in a newer version of absl which stablediffusion cannot compile with.  This version of protoc is only
-# here so that we can generate the grpc code for the stablediffusion build
-RUN <<EOT bash
-    if [ "amd64" = "$TARGETARCH" ]; then
-        curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip -o protoc.zip && \
-        unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
-        rm protoc.zip
-    fi
-    if [ "arm64" = "$TARGETARCH" ]; then
-        curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-aarch_64.zip -o protoc.zip && \
-        unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
-        rm protoc.zip
-    fi
-EOT
-
-
-###################################
-###################################
-
-# This first portion of builder holds the layers specifically used to build backend-assets/grpc/stablediffusion
-# In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
-FROM builder-base AS builder-sd
-
-# stablediffusion does not tolerate a newer version of abseil, copy only over enough elements to build it
-COPY Makefile .
-COPY go.mod .
-COPY go.sum .
-COPY backend/backend.proto ./backend/backend.proto
-COPY backend/go/image/stablediffusion ./backend/go/image/stablediffusion
-COPY pkg/grpc ./pkg/grpc
-COPY pkg/stablediffusion ./pkg/stablediffusion
-RUN git init
-RUN make sources/go-stable-diffusion
-RUN touch prepare-sources
-
-# Actually build the backend
-RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make backend-assets/grpc/stablediffusion
-
-###################################
-###################################
-
-# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
-# Adjustments to the build process should likely be made here.
-FROM builder-sd AS builder
-
-# Install the pre-built GRPC
-COPY --from=grpc /opt/grpc /usr/local
-
-# Rebuild with defaults backends
-WORKDIR /build
-
-COPY . .
-COPY .git .
-
-RUN make prepare
-
-## Build the binary
-## If it's CUDA or hipblas, we want to skip some of the llama-compat backends to save space
-## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
-## (both will use CUDA or hipblas for the actual computation)
-RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
-        SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
-    else \
-        make build; \
-    fi
-
-RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
-        mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
-        touch /build/sources/go-piper/piper-phonemize/pi/lib/keep \
-    ; fi
-
-###################################
-###################################
-
-# The devcontainer target is not used on CI. It is a target for developers to use locally -
-# rather than copying files it mounts them locally and leaves building to the developer
-
-FROM builder-base AS devcontainer
-
-ARG FFMPEG
-
-COPY --from=grpc /opt/grpc /usr/local
-
-COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion /build/backend-assets/grpc/stablediffusion
-
-COPY .devcontainer-scripts /.devcontainer-scripts
-
-# Add FFmpeg
-RUN if [ "${FFMPEG}" = "true" ]; then \
-        apt-get update && \
-        apt-get install -y --no-install-recommends \
-            ffmpeg && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/* \
-    ; fi
-
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-        ssh less wget
-# For the devcontainer, leave apt functional in case additional devtools are needed at runtime.
-
-RUN go install github.com/go-delve/delve/cmd/dlv@latest
-
-RUN go install github.com/mikefarah/yq/v4@latest
-
-###################################
-###################################
-
-# This is the final target. The result of this target will be the image uploaded to the registry.
-# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
-FROM requirements-drivers
-
-ARG FFMPEG
-ARG BUILD_TYPE
-ARG TARGETARCH
-ARG IMAGE_TYPE=extras
-ARG EXTRA_BACKENDS
-ARG MAKEFLAGS
-
-ENV BUILD_TYPE=${BUILD_TYPE}
-ENV REBUILD=false
-ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
-ENV MAKEFLAGS=${MAKEFLAGS}
-
-ARG CUDA_MAJOR_VERSION=12
-ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
-ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
-ENV NVIDIA_VISIBLE_DEVICES=all
-
-# Add FFmpeg
-RUN if [ "${FFMPEG}" = "true" ]; then \
-        apt-get update && \
-        apt-get install -y --no-install-recommends \
-            ffmpeg && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/* \
-    ; fi
-
-WORKDIR /build
-
-# we start fresh & re-copy all assets because `make build` does not clean up nicely after itself
-# so when `entrypoint.sh` runs `make build` again (which it does by default), the build would fail
-# see https://github.com/go-skynet/LocalAI/pull/658#discussion_r1241971626 and
-# https://github.com/go-skynet/LocalAI/pull/434
-COPY . .
-
-COPY --from=builder /build/sources ./sources/
-COPY --from=grpc /opt/grpc /usr/local
-
-RUN make prepare-sources
-
-# Copy the binary
-COPY --from=builder /build/local-ai ./
-
-# Copy shared libraries for piper
-COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
-
-# do not let stablediffusion rebuild (requires an older version of absl)
-COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
-
-# Change the shell to bash so we can use [[ tests below
-SHELL ["/bin/bash", "-c"]
-# We try to strike a balance between individual layer size (as that affects total push time) and total image size
-# Splitting the backends into more groups with fewer items results in a larger image, but a smaller size for the largest layer
-# Splitting the backends into fewer groups with more items results in a smaller image, but a larger size for the largest layer
-
-RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/coqui \
-    ; fi && \
-    if [[ ( "${EXTRA_BACKENDS}" =~ "parler-tts" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/parler-tts \
-    ; fi && \
-    if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/diffusers \
-    ; fi && \
-    if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/transformers-musicgen \
-    ; fi
-
-RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/vall-e-x \
-    ; fi && \
-    if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/openvoice \
-    ; fi && \
-    if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/sentencetransformers \
-    ; fi && \
-    if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/exllama2 \
-    ; fi && \
-    if [[ ( "${EXTRA_BACKENDS}" =~ "transformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/transformers \
-    ; fi
-
-RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/vllm \
-    ; fi && \
-    if [[ ( "${EXTRA_BACKENDS}" =~ "autogptq" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/autogptq \
-    ; fi && \
-    if [[ ( "${EXTRA_BACKENDS}" =~ "bark" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/bark \
-    ; fi && \
-    if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/rerankers \
-    ; fi && \
-    if [[ ( "${EXTRA_BACKENDS}" =~ "mamba" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/mamba \
-    ; fi
-
-# Make sure the models directory exists
-RUN mkdir -p /build/models
-
-# Define the health check command
-HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
-  CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1
-
-VOLUME /build/models
-EXPOSE 8080
-ENTRYPOINT [ "/build/entrypoint.sh" ]
--- a/Dockerfile.aio
+++ b/Dockerfile.aio
@@ -1,8 +0,0 @@
-ARG BASE_IMAGE=ubuntu:22.04
-
-FROM ${BASE_IMAGE} 
-
-RUN apt-get update && apt-get install -y pciutils && apt-get clean
-
-COPY aio/ /aio
-ENTRYPOINT [ "/aio/entrypoint.sh" ]
--- a/5
+++ b/5
@@ -1,5 +0,0 @@
-VERSION 0.7
-
-build:
-    FROM DOCKERFILE -f Dockerfile .
-    SAVE ARTIFACT /usr/bin/local-ai AS LOCAL local-ai
--- a/Entitlements.plist
+++ b/Entitlements.plist
@@ -1,10 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
-    <key>com.apple.security.network.client</key>
-    <true/>
-    <key>com.apple.security.network.server</key>
-    <true/>
-</dict>
-</plist>
--- a/21
+++ b/21
@@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2023-2024 Ettore Di Giacinto (mudler@localai.io)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
--- a/946
+++ b/946
@@ -1,946 +0,0 @@
-GOCMD=go
-GOTEST=$(GOCMD) test
-GOVET=$(GOCMD) vet
-BINARY_NAME=local-ai
-
-DETECT_LIBS?=true
-
-# llama.cpp versions
-GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
-GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=47f931c8f9a26c072d71224bc8013cc66ea9e445
-
-# whisper.cpp version
-WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
-
-# bert.cpp version
-BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
-BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
-
-# go-piper version
-PIPER_REPO?=https://github.com/mudler/go-piper
-PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
-
-# stablediffusion version
-STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
-STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
-
-# tinydream version
-TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream
-TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
-
-ONNX_VERSION?=1.20.0
-ONNX_ARCH?=x64
-ONNX_OS?=linux
-
-export BUILD_TYPE?=
-export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
-export CMAKE_ARGS?=
-export BACKEND_LIBS?=
-
-CGO_LDFLAGS?=
-CGO_LDFLAGS_WHISPER?=
-CGO_LDFLAGS_WHISPER+=-lggml
-CUDA_LIBPATH?=/usr/local/cuda/lib64/
-GO_TAGS?=
-BUILD_ID?=
-NATIVE?=false
-
-TEST_DIR=/tmp/test
-
-TEST_FLAKES?=5
-
-RANDOM := $(shell bash -c 'echo $$RANDOM')
-
-VERSION?=$(shell git describe --always --tags || echo "dev" )
-# go tool nm ./local-ai | grep Commit
-LD_FLAGS?=-s -w
-override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Version=$(VERSION)"
-override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
-
-OPTIONAL_TARGETS?=
-
-export OS := $(shell uname -s)
-ARCH := $(shell uname -m)
-GREEN  := $(shell tput -Txterm setaf 2)
-YELLOW := $(shell tput -Txterm setaf 3)
-WHITE  := $(shell tput -Txterm setaf 7)
-CYAN   := $(shell tput -Txterm setaf 6)
-RESET  := $(shell tput -Txterm sgr0)
-
-UPX?=
-# check if upx exists
-ifeq (, $(shell which upx))
-	UPX=
-else
-	UPX=$(shell which upx)
-endif
-
-# Default Docker bridge IP
-E2E_BRIDGE_IP?=172.17.0.1
-
-ifndef UNAME_S
-UNAME_S := $(shell uname -s)
-endif
-
-# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS
-ifeq ($(NATIVE),false)
-	CMAKE_ARGS+=-DGGML_NATIVE=OFF
-endif
-
-# Detect if we are running on arm64
-ifneq (,$(findstring aarch64,$(shell uname -m)))
-	ONNX_ARCH=aarch64
-endif
-
-ifeq ($(OS),Darwin)
-	ONNX_OS=osx
-	ifneq (,$(findstring aarch64,$(shell uname -m)))
-		ONNX_ARCH=arm64
-	else ifneq (,$(findstring arm64,$(shell uname -m)))
-		ONNX_ARCH=arm64
-	else
-		ONNX_ARCH=x86_64
-	endif
-
-	ifeq ($(OSX_SIGNING_IDENTITY),)
-		OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
-	endif
-
-	# on OSX, if BUILD_TYPE is blank, we should default to use Metal
-	ifeq ($(BUILD_TYPE),)
-		BUILD_TYPE=metal
-	# disable metal if on Darwin and any other value is explicitly passed.
-	else ifneq ($(BUILD_TYPE),metal)
-		CMAKE_ARGS+=-DGGML_METAL=OFF
-		export GGML_NO_ACCELERATE=1
-		export GGML_NO_METAL=1
-	endif
-
-	ifeq ($(BUILD_TYPE),metal)
-#			-lcblas 	removed: it seems to always be listed as a duplicate flag.
-		CGO_LDFLAGS += -framework Accelerate
-	endif
-else
-CGO_LDFLAGS_WHISPER+=-lgomp
-endif
-
-ifeq ($(BUILD_TYPE),openblas)
-	CGO_LDFLAGS+=-lopenblas
-	export GGML_OPENBLAS=1
-endif
-
-ifeq ($(BUILD_TYPE),cublas)
-	CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
-	export GGML_CUDA=1
-	CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda -lcufft
-endif
-
-ifeq ($(BUILD_TYPE),vulkan)
-	CMAKE_ARGS+=-DGGML_VULKAN=1
-endif
-
-ifneq (,$(findstring sycl,$(BUILD_TYPE)))
-	export GGML_SYCL=1
-endif
-
-ifeq ($(BUILD_TYPE),sycl_f16)
-	export GGML_SYCL_F16=1
-endif
-
-ifeq ($(BUILD_TYPE),hipblas)
-	ROCM_HOME ?= /opt/rocm
-	ROCM_PATH ?= /opt/rocm
-	LD_LIBRARY_PATH ?= /opt/rocm/lib:/opt/rocm/llvm/lib
-	export CXX=$(ROCM_HOME)/llvm/bin/clang++
-	export CC=$(ROCM_HOME)/llvm/bin/clang
-	# llama-ggml has no hipblas support, so override it here.
-	export STABLE_BUILD_TYPE=
-	export GGML_HIP=1
-	GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
-	AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
-	CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
-	CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
-endif
-
-ifeq ($(BUILD_TYPE),metal)
-	CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
-	export GGML_METAL=1
-endif
-
-ifeq ($(BUILD_TYPE),clblas)
-	CGO_LDFLAGS+=-lOpenCL -lclblast
-	export GGML_OPENBLAS=1
-endif
-
-# glibc-static or glibc-devel-static required
-ifeq ($(STATIC),true)
-	LD_FLAGS+=-linkmode external -extldflags -static
-endif
-
-ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
-#	OPTIONAL_TARGETS+=go-stable-diffusion/libstablediffusion.a
-	OPTIONAL_GRPC+=backend-assets/grpc/stablediffusion
-endif
-
-ifeq ($(findstring tinydream,$(GO_TAGS)),tinydream)
-#	OPTIONAL_TARGETS+=go-tiny-dream/libtinydream.a
-	OPTIONAL_GRPC+=backend-assets/grpc/tinydream
-endif
-
-ifeq ($(findstring tts,$(GO_TAGS)),tts)
-#	OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
-#	OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
-	PIPER_CGO_CXXFLAGS+=-I$(CURDIR)/sources/go-piper/piper/src/cpp -I$(CURDIR)/sources/go-piper/piper/build/fi/include -I$(CURDIR)/sources/go-piper/piper/build/pi/include -I$(CURDIR)/sources/go-piper/piper/build/si/include
-	PIPER_CGO_LDFLAGS+=-L$(CURDIR)/sources/go-piper/piper/build/fi/lib -L$(CURDIR)/sources/go-piper/piper/build/pi/lib -L$(CURDIR)/sources/go-piper/piper/build/si/lib -lfmt -lspdlog -lucd
-	OPTIONAL_GRPC+=backend-assets/grpc/piper
-endif
-
-ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
-ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
-ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
-ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
-ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
-ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
-ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
-ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
-ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
-ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
-ALL_GRPC_BACKENDS+=backend-assets/grpc/silero-vad
-ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
-# Use filter-out to remove the specified backends
-ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))
-
-GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
-TEST_PATHS?=./api/... ./pkg/... ./core/...
-
-# If empty, then we build all
-ifeq ($(GRPC_BACKENDS),)
-	GRPC_BACKENDS=$(ALL_GRPC_BACKENDS)
-endif
-
-ifeq ($(BUILD_API_ONLY),true)
-	GRPC_BACKENDS=
-endif
-
-.PHONY: all test build vendor get-sources prepare-sources prepare
-
-all: help
-
-## BERT embeddings
-sources/go-bert.cpp:
-	mkdir -p sources/go-bert.cpp
-	cd sources/go-bert.cpp && \
-	git init && \
-	git remote add origin $(BERT_REPO) && \
-	git fetch origin && \
-	git checkout $(BERT_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
-
-sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
-	$(MAKE) -C sources/go-bert.cpp libgobert.a
-
-## go-llama.cpp
-sources/go-llama.cpp:
-	mkdir -p sources/go-llama.cpp
-	cd sources/go-llama.cpp && \
-	git init && \
-	git remote add origin $(GOLLAMA_REPO) && \
-	git fetch origin && \
-	git checkout $(GOLLAMA_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
-
-sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
-	$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
-
-## go-piper
-sources/go-piper:
-	mkdir -p sources/go-piper
-	cd sources/go-piper && \
-	git init && \
-	git remote add origin $(PIPER_REPO) && \
-	git fetch origin && \
-	git checkout $(PIPER_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
-
-sources/go-piper/libpiper_binding.a: sources/go-piper
-	$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
-
-## stable diffusion
-sources/go-stable-diffusion:
-	mkdir -p sources/go-stable-diffusion
-	cd sources/go-stable-diffusion && \
-	git init && \
-	git remote add origin $(STABLEDIFFUSION_REPO) && \
-	git fetch origin && \
-	git checkout $(STABLEDIFFUSION_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
-
-sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
-	CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
-
-sources/onnxruntime:
-	mkdir -p sources/onnxruntime
-	curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
-	cd sources/onnxruntime && tar -xvf onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz && rm onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
-	cd sources/onnxruntime && mv onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION)/* ./
-
-backend-assets/lib/libonnxruntime.so.1: backend-assets/lib sources/onnxruntime
-	cp -rfv sources/onnxruntime/lib/* backend-assets/lib/
-ifeq ($(OS),Darwin)
-	mv backend-assets/lib/libonnxruntime.$(ONNX_VERSION).dylib backend-assets/lib/libonnxruntime.dylib
-else
-	mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
-endif
-
-## tiny-dream
-sources/go-tiny-dream:
-	mkdir -p sources/go-tiny-dream
-	cd sources/go-tiny-dream && \
-	git init && \
-	git remote add origin $(TINYDREAM_REPO) && \
-	git fetch origin && \
-	git checkout $(TINYDREAM_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
-
-sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
-	$(MAKE) -C sources/go-tiny-dream libtinydream.a
-
-## whisper
-sources/whisper.cpp:
-	mkdir -p sources/whisper.cpp
-	cd sources/whisper.cpp && \
-	git init && \
-	git remote add origin $(WHISPER_REPO) && \
-	git fetch origin && \
-	git checkout $(WHISPER_CPP_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
-
-sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
-	cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
-
-get-sources: sources/go-llama.cpp sources/go-piper sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
-
-replace:
-	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
-	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
-	$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
-	$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
-	$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
-	$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
-	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
-
-dropreplace:
-	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
-	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
-	$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp
-	$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
-	$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
-	$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
-	$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
-
-prepare-sources: get-sources replace
-	$(GOCMD) mod download
-
-## GENERIC
-rebuild: ## Rebuilds the project
-	$(GOCMD) clean -cache
-	$(MAKE) -C sources/go-llama.cpp clean
-	$(MAKE) -C sources/whisper.cpp clean
-	$(MAKE) -C sources/go-stable-diffusion clean
-	$(MAKE) -C sources/go-bert.cpp clean
-	$(MAKE) -C sources/go-piper clean
-	$(MAKE) -C sources/go-tiny-dream clean
-	$(MAKE) build
-
-prepare: prepare-sources $(OPTIONAL_TARGETS)
-
-clean: ## Remove build related file
-	$(GOCMD) clean -cache
-	rm -f prepare
-	rm -rf ./sources
-	rm -rf $(BINARY_NAME)
-	rm -rf release/
-	rm -rf backend-assets/*
-	$(MAKE) -C backend/cpp/grpc clean
-	$(MAKE) -C backend/cpp/llama clean
-	rm -rf backend/cpp/llama-* || true
-	$(MAKE) dropreplace
-	$(MAKE) protogen-clean
-	rmdir pkg/grpc/proto || true
-
-clean-tests:
-	rm -rf test-models
-	rm -rf test-dir
-	rm -rf core/http/backend-assets
-
-clean-dc: clean
-	cp -r /build/backend-assets /workspace/backend-assets
-
-## Build:
-build: prepare backend-assets grpcs ## Build the project
-	$(info ${GREEN}I local-ai build info:${RESET})
-	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
-	$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
-	$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
-	$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
-ifneq ($(BACKEND_LIBS),)
-	$(MAKE) backend-assets/lib
-	cp -f $(BACKEND_LIBS) backend-assets/lib/
-endif
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
-
-build-minimal:
-	BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
-
-build-api:
-	BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=p2p $(MAKE) build
-
-backend-assets/lib:
-	mkdir -p backend-assets/lib
-
-dist:
-	$(MAKE) backend-assets/grpc/llama-cpp-avx2
-ifeq ($(DETECT_LIBS),true)
-	scripts/prepare-libs.sh backend-assets/grpc/llama-cpp-avx2
-endif
-ifeq ($(OS),Darwin)
-	BUILD_TYPE=none $(MAKE) backend-assets/grpc/llama-cpp-fallback
-else
-	$(MAKE) backend-assets/grpc/llama-cpp-cuda
-	$(MAKE) backend-assets/grpc/llama-cpp-hipblas
-	$(MAKE) backend-assets/grpc/llama-cpp-sycl_f16
-	$(MAKE) backend-assets/grpc/llama-cpp-sycl_f32
-endif
-	GO_TAGS="tts p2p" $(MAKE) build
-ifeq ($(DETECT_LIBS),true)
-	scripts/prepare-libs.sh backend-assets/grpc/piper
-endif
-	GO_TAGS="tts p2p" STATIC=true $(MAKE) build
-	mkdir -p release
-# if BUILD_ID is empty, then we don't append it to the binary name
-ifeq ($(BUILD_ID),)
-	cp $(BINARY_NAME) release/$(BINARY_NAME)-$(OS)-$(ARCH)
-	shasum -a 256 release/$(BINARY_NAME)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(OS)-$(ARCH).sha256
-else
-	cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH)
-	shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH).sha256
-endif
-
-dist-cross-linux-arm64:
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" GO_TAGS="p2p" \
-	STATIC=true $(MAKE) build
-	mkdir -p release
-# if BUILD_ID is empty, then we don't append it to the binary name
-ifeq ($(BUILD_ID),)
-	cp $(BINARY_NAME) release/$(BINARY_NAME)-$(OS)-arm64
-	shasum -a 256 release/$(BINARY_NAME)-$(OS)-arm64 > release/$(BINARY_NAME)-$(OS)-arm64.sha256
-else
-	cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-arm64
-	shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-arm64 > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-arm64.sha256
-endif
-
-osx-signed: build
-	codesign --deep --force --sign "$(OSX_SIGNING_IDENTITY)" --entitlements "./Entitlements.plist" "./$(BINARY_NAME)"
-
-## Run
-run: prepare ## run local-ai
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) run ./
-
-test-models/testmodel.ggml:
-	mkdir test-models
-	mkdir test-dir
-	wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel.ggml
-	wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
-	wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
-	wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
-	cp tests/models_fixtures/* test-models
-
-prepare-test: grpcs
-	cp -rf backend-assets core/http
-	cp tests/models_fixtures/* test-models
-
-test: prepare test-models/testmodel.ggml grpcs
-	@echo 'Running tests'
-	export GO_TAGS="tts stablediffusion debug"
-	$(MAKE) prepare-test
-	HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf"  --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
-	$(MAKE) test-llama
-	$(MAKE) test-llama-gguf
-	$(MAKE) test-tts
-	$(MAKE) test-stablediffusion
-
-prepare-e2e:
-	mkdir -p $(TEST_DIR)
-	cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
-	test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
-	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 --build-arg FFMPEG=true -t localai-tests .
-
-run-e2e-image:
-	ls -liah $(abspath ./tests/e2e-fixtures)
-	docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
-
-run-e2e-aio: protogen-go
-	@echo 'Running e2e AIO tests'
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio
-
-test-e2e:
-	@echo 'Running e2e tests'
-	BUILD_TYPE=$(BUILD_TYPE) \
-	LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e
-
-teardown-e2e:
-	rm -rf $(TEST_DIR) || true
-	docker stop $$(docker ps -q --filter ancestor=localai-tests)
-
-test-llama: prepare-test
-	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
-
-test-llama-gguf: prepare-test
-	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
-
-test-tts: prepare-test
-	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
-
-test-stablediffusion: prepare-test
-	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
-
-test-stores: backend-assets/grpc/local-store
-	mkdir -p tests/integration/backend-assets/grpc
-	cp -f backend-assets/grpc/local-store tests/integration/backend-assets/grpc/
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts $(TEST_FLAKES) -v -r tests/integration
-
-test-container:
-	docker build --target requirements -t local-ai-test-container .
-	docker run -ti --rm --entrypoint /bin/bash -ti -v $(abspath ./):/build local-ai-test-container
-
-## Help:
-help: ## Show this help.
-	@echo ''
-	@echo 'Usage:'
-	@echo '  ${YELLOW}make${RESET} ${GREEN}<target>${RESET}'
-	@echo ''
-	@echo 'Targets:'
-	@awk 'BEGIN {FS = ":.*?## "} { \
-		if (/^[a-zA-Z_-]+:.*?##.*$$/) {printf "    ${YELLOW}%-20s${GREEN}%s${RESET}\n", $$1, $$2} \
-		else if (/^## .*$$/) {printf "  ${CYAN}%s${RESET}\n", substr($$1,4)} \
-		}' $(MAKEFILE_LIST)
-
-.PHONY: protogen
-protogen: protogen-go protogen-python
-
-.PHONY: protogen-clean
-protogen-clean: protogen-go-clean protogen-python-clean
-
-.PHONY: protogen-go
-protogen-go:
-	mkdir -p pkg/grpc/proto
-	protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
-    backend/backend.proto
-
-.PHONY: protogen-go-clean
-protogen-go-clean:
-	$(RM) pkg/grpc/proto/backend.pb.go pkg/grpc/proto/backend_grpc.pb.go
-	$(RM) bin/*
-
-.PHONY: protogen-python
-protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
-
-.PHONY: protogen-python-clean
-protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean  exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
-
-.PHONY: autogptq-protogen
-autogptq-protogen:
-	$(MAKE) -C backend/python/autogptq protogen
-
-.PHONY: autogptq-protogen-clean
-autogptq-protogen-clean:
-	$(MAKE) -C backend/python/autogptq protogen-clean
-
-.PHONY: bark-protogen
-bark-protogen:
-	$(MAKE) -C backend/python/bark protogen
-
-.PHONY: bark-protogen-clean
-bark-protogen-clean:
-	$(MAKE) -C backend/python/bark protogen-clean
-
-.PHONY: coqui-protogen
-coqui-protogen:
-	$(MAKE) -C backend/python/coqui protogen
-
-.PHONY: coqui-protogen-clean
-coqui-protogen-clean:
-	$(MAKE) -C backend/python/coqui protogen-clean
-
-.PHONY: diffusers-protogen
-diffusers-protogen:
-	$(MAKE) -C backend/python/diffusers protogen
-
-.PHONY: diffusers-protogen-clean
-diffusers-protogen-clean:
-	$(MAKE) -C backend/python/diffusers protogen-clean
-
-.PHONY: exllama2-protogen
-exllama2-protogen:
-	$(MAKE) -C backend/python/exllama2 protogen
-
-.PHONY: exllama2-protogen-clean
-exllama2-protogen-clean:
-	$(MAKE) -C backend/python/exllama2 protogen-clean
-
-.PHONY: mamba-protogen
-mamba-protogen:
-	$(MAKE) -C backend/python/mamba protogen
-
-.PHONY: mamba-protogen-clean
-mamba-protogen-clean:
-	$(MAKE) -C backend/python/mamba protogen-clean
-
-.PHONY: rerankers-protogen
-rerankers-protogen:
-	$(MAKE) -C backend/python/rerankers protogen
-
-.PHONY: rerankers-protogen-clean
-rerankers-protogen-clean:
-	$(MAKE) -C backend/python/rerankers protogen-clean
-
-.PHONY: sentencetransformers-protogen
-sentencetransformers-protogen:
-	$(MAKE) -C backend/python/sentencetransformers protogen
-
-.PHONY: sentencetransformers-protogen-clean
-sentencetransformers-protogen-clean:
-	$(MAKE) -C backend/python/sentencetransformers protogen-clean
-
-.PHONY: transformers-protogen
-transformers-protogen:
-	$(MAKE) -C backend/python/transformers protogen
-
-.PHONY: transformers-protogen-clean
-transformers-protogen-clean:
-	$(MAKE) -C backend/python/transformers protogen-clean
-
-.PHONY: parler-tts-protogen
-parler-tts-protogen:
-	$(MAKE) -C backend/python/parler-tts protogen
-
-.PHONY: parler-tts-protogen-clean
-parler-tts-protogen-clean:
-	$(MAKE) -C backend/python/parler-tts protogen-clean
-
-.PHONY: transformers-musicgen-protogen
-transformers-musicgen-protogen:
-	$(MAKE) -C backend/python/transformers-musicgen protogen
-
-.PHONY: transformers-musicgen-protogen-clean
-transformers-musicgen-protogen-clean:
-	$(MAKE) -C backend/python/transformers-musicgen protogen-clean
-
-.PHONY: vall-e-x-protogen
-vall-e-x-protogen:
-	$(MAKE) -C backend/python/vall-e-x protogen
-
-.PHONY: vall-e-x-protogen-clean
-vall-e-x-protogen-clean:
-	$(MAKE) -C backend/python/vall-e-x protogen-clean
-
-.PHONY: openvoice-protogen
-openvoice-protogen:
-	$(MAKE) -C backend/python/openvoice protogen
-
-.PHONY: openvoice-protogen-clean
-openvoice-protogen-clean:
-	$(MAKE) -C backend/python/openvoice protogen-clean
-
-.PHONY: vllm-protogen
-vllm-protogen:
-	$(MAKE) -C backend/python/vllm protogen
-
-.PHONY: vllm-protogen-clean
-vllm-protogen-clean:
-	$(MAKE) -C backend/python/vllm protogen-clean
-
-## GRPC
-# Note: it is duplicated in the Dockerfile
-prepare-extra-conda-environments: protogen-python
-	$(MAKE) -C backend/python/autogptq
-	$(MAKE) -C backend/python/bark
-	$(MAKE) -C backend/python/coqui
-	$(MAKE) -C backend/python/diffusers
-	$(MAKE) -C backend/python/vllm
-	$(MAKE) -C backend/python/mamba
-	$(MAKE) -C backend/python/sentencetransformers
-	$(MAKE) -C backend/python/rerankers
-	$(MAKE) -C backend/python/transformers
-	$(MAKE) -C backend/python/transformers-musicgen
-	$(MAKE) -C backend/python/parler-tts
-	$(MAKE) -C backend/python/vall-e-x
-	$(MAKE) -C backend/python/openvoice
-	$(MAKE) -C backend/python/exllama2
-
-prepare-test-extra: protogen-python
-	$(MAKE) -C backend/python/transformers
-	$(MAKE) -C backend/python/diffusers
-
-test-extra: prepare-test-extra
-	$(MAKE) -C backend/python/transformers test
-	$(MAKE) -C backend/python/diffusers test
-
-backend-assets:
-	mkdir -p backend-assets
-ifeq ($(BUILD_API_ONLY),true)
-	touch backend-assets/keep
-endif
-
-backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_binding.a
-	mkdir -p backend-assets/espeak-ng-data
-	@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
-
-backend-assets/grpc: protogen-go replace
-	mkdir -p backend-assets/grpc
-
-backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/bert-embeddings
-endif
-
-backend-assets/grpc/huggingface: backend-assets/grpc
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/huggingface
-endif
-
-backend/cpp/llama/llama.cpp:
-	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
-
-INSTALLED_PACKAGES=$(CURDIR)/backend/cpp/grpc/installed_packages
-INSTALLED_LIB_CMAKE=$(INSTALLED_PACKAGES)/lib/cmake
-ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
-				 -DProtobuf_DIR=${INSTALLED_LIB_CMAKE}/protobuf \
-				 -Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
-				 -DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
-				 -DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
-build-llama-cpp-grpc-server:
-# Conditionally build grpc for the llama backend to use if needed
-ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
-	$(MAKE) -C backend/cpp/grpc build
-	_PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto \
-	_GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin \
-	PATH="${INSTALLED_PACKAGES}/bin:${PATH}" \
-	CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" \
-	LLAMA_VERSION=$(CPPLLAMA_VERSION) \
-	$(MAKE) -C backend/cpp/${VARIANT} grpc-server
-else
-	echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
-	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
-endif
-
-# This target is for manually building a variant with-auto detected flags
-backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
-	cp -rf backend/cpp/llama backend/cpp/llama-cpp
-	$(MAKE) -C backend/cpp/llama-cpp purge
-	$(info ${GREEN}I llama-cpp build info:avx2${RESET})
-	$(MAKE) VARIANT="llama-cpp" build-llama-cpp-grpc-server
-	cp -rfv backend/cpp/llama-cpp/grpc-server backend-assets/grpc/llama-cpp
-
-backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.cpp
-	cp -rf backend/cpp/llama backend/cpp/llama-avx2
-	$(MAKE) -C backend/cpp/llama-avx2 purge
-	$(info ${GREEN}I llama-cpp build info:avx2${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
-	cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
-
-backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
-	cp -rf backend/cpp/llama backend/cpp/llama-avx
-	$(MAKE) -C backend/cpp/llama-avx purge
-	$(info ${GREEN}I llama-cpp build info:avx${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
-	cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx
-
-backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama/llama.cpp
-	cp -rf backend/cpp/llama backend/cpp/llama-fallback
-	$(MAKE) -C backend/cpp/llama-fallback purge
-	$(info ${GREEN}I llama-cpp build info:fallback${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
-	cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
-# TODO: every binary should have its own folder instead, so can have different metal implementations
-ifeq ($(BUILD_TYPE),metal)
-	cp backend/cpp/llama-fallback/llama.cpp/build/bin/ggml-metal.metal backend-assets/grpc/
-endif
-
-backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp
-	cp -rf backend/cpp/llama backend/cpp/llama-cuda
-	$(MAKE) -C backend/cpp/llama-cuda purge
-	$(info ${GREEN}I llama-cpp build info:cuda${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
-	cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
-
-backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama/llama.cpp
-	cp -rf backend/cpp/llama backend/cpp/llama-hipblas
-	$(MAKE) -C backend/cpp/llama-hipblas purge
-	$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
-	cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
-
-backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp
-	cp -rf backend/cpp/llama backend/cpp/llama-sycl_f16
-	$(MAKE) -C backend/cpp/llama-sycl_f16 purge
-	$(info ${GREEN}I llama-cpp build info:sycl_f16${RESET})
-	BUILD_TYPE="sycl_f16" $(MAKE) VARIANT="llama-sycl_f16" build-llama-cpp-grpc-server
-	cp -rfv backend/cpp/llama-sycl_f16/grpc-server backend-assets/grpc/llama-cpp-sycl_f16
-
-backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc backend/cpp/llama/llama.cpp
-	cp -rf backend/cpp/llama backend/cpp/llama-sycl_f32
-	$(MAKE) -C backend/cpp/llama-sycl_f32 purge
-	$(info ${GREEN}I llama-cpp build info:sycl_f32${RESET})
-	BUILD_TYPE="sycl_f32" $(MAKE) VARIANT="llama-sycl_f32" build-llama-cpp-grpc-server
-	cp -rfv backend/cpp/llama-sycl_f32/grpc-server backend-assets/grpc/llama-cpp-sycl_f32
-
-backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.cpp
-	cp -rf backend/cpp/llama backend/cpp/llama-grpc
-	$(MAKE) -C backend/cpp/llama-grpc purge
-	$(info ${GREEN}I llama-cpp build info:grpc${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
-	cp -rfv backend/cpp/llama-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc
-
-backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
-	mkdir -p backend-assets/util/
-	cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
-
-backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/llama-ggml
-endif
-
-backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
-	CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/piper
-endif
-
-backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/stablediffusion
-endif
-
-backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/silero-vad
-endif
-
-backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/tinydream
-endif
-
-backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/whisper
-endif
-
-backend-assets/grpc/local-store: backend-assets/grpc
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/local-store
-endif
-
-grpcs: prepare $(GRPC_BACKENDS)
-
-DOCKER_IMAGE?=local-ai
-DOCKER_AIO_IMAGE?=local-ai-aio
-IMAGE_TYPE?=core
-BASE_IMAGE?=ubuntu:22.04
-
-docker:
-	docker build \
-		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
-		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
-		--build-arg GO_TAGS="$(GO_TAGS)" \
-		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
-		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
-		-t $(DOCKER_IMAGE) .
-
-docker-cuda11:
-	docker build \
-		--build-arg CUDA_MAJOR_VERSION=11 \
-		--build-arg CUDA_MINOR_VERSION=8 \
-		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
-		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
-		--build-arg GO_TAGS="$(GO_TAGS)" \
-		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
-		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
-		-t $(DOCKER_IMAGE)-cuda11 .
-
-docker-aio:
-	@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
-	docker build \
-		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
-		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
-		-t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio .
-
-docker-aio-all:
-	$(MAKE) docker-aio DOCKER_AIO_SIZE=cpu
-	$(MAKE) docker-aio DOCKER_AIO_SIZE=cpu
-
-docker-image-intel:
-	docker build \
-		--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
-		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
-		--build-arg GO_TAGS="none" \
-		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
-		--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
-
-docker-image-intel-xpu:
-	docker build \
-		--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
-		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
-		--build-arg GO_TAGS="none" \
-		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
-		--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
-
-.PHONY: swagger
-swagger:
-	swag init -g core/http/app.go --output swagger
-
-.PHONY: gen-assets
-gen-assets:
-	$(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets
-
-## Documentation
-docs/layouts/_default:
-	mkdir -p docs/layouts/_default
-
-docs/static/gallery.html: docs/layouts/_default
-	$(GOCMD) run ./.github/ci/modelslist.go ./gallery/index.yaml > docs/static/gallery.html
-
-docs/public: docs/layouts/_default docs/static/gallery.html
-	cd docs && hugo --minify
-
-docs-clean:
-	rm -rf docs/public
-	rm -rf docs/static/gallery.html
-
-.PHONY: docs
-docs: docs/static/gallery.html
-	cd docs && hugo serve
--- a/README.md
+++ b/README.md
@@ -1,251 +0,0 @@
-<h1 align="center">
-  <br>
-  <img height="300" src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd"> <br>
-    LocalAI
-<br>
-</h1>
-
-<p align="center">
-<a href="https://github.com/go-skynet/LocalAI/fork" target="blank">
-<img src="https://img.shields.io/github/forks/go-skynet/LocalAI?style=for-the-badge" alt="LocalAI forks"/>
-</a>
-<a href="https://github.com/go-skynet/LocalAI/stargazers" target="blank">
-<img src="https://img.shields.io/github/stars/go-skynet/LocalAI?style=for-the-badge" alt="LocalAI stars"/>
-</a>
-<a href="https://github.com/go-skynet/LocalAI/pulls" target="blank">
-<img src="https://img.shields.io/github/issues-pr/go-skynet/LocalAI?style=for-the-badge" alt="LocalAI pull-requests"/>
-</a>
-<a href='https://github.com/go-skynet/LocalAI/releases'>
-<img src='https://img.shields.io/github/release/go-skynet/LocalAI?&label=Latest&style=for-the-badge'>
-</a>
-</p>
-
-<p align="center">
-<a href="https://hub.docker.com/r/localai/localai" target="blank">
-<img src="https://img.shields.io/badge/dockerhub-images-important.svg?logo=Docker" alt="LocalAI Docker hub"/>
-</a>
-<a href="https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest" target="blank">
-<img src="https://img.shields.io/badge/quay.io-images-important.svg?" alt="LocalAI Quay.io"/>
-</a>
-</p>
-
-<p align="center">
-<a href="https://twitter.com/LocalAI_API" target="blank">
-<img src="https://img.shields.io/twitter/follow/LocalAI_API?label=Follow: LocalAI_API&style=social" alt="Follow LocalAI_API"/>
-</a>
-<a href="https://discord.gg/uJAeKSAGDy" target="blank">
-<img src="https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted" alt="Join LocalAI Discord Community"/>
-</a>
-</p>
-
-<p align="center">
-<a href="https://trendshift.io/repositories/1484" target="_blank"><img src="https://trendshift.io/api/badge/repositories/1484" alt="go-skynet%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
-</p>
-
-> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
->
-> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples) 
-
-[![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai)
-
-**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
-
-![screen](https://github.com/mudler/LocalAI/assets/2420543/20b5ccd2-8393-44f0-aaf6-87a23806381e)
-
-Run the installer script:
-
-```bash
-curl https://localai.io/install.sh | sh
-```
-
-Or run with docker:
-```bash
-# CPU only image:
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-cpu
-
-# Nvidia GPU:
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
-
-# CPU and GPU image (bigger size):
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
-
-# AIO images (it will pre-download a set of models ready for use, see https://localai.io/basics/container/)
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
-```
-
-To load models:
-
-```bash
-# From the model gallery (see available models with `local-ai models list`, in the WebUI from the model tab, or visiting https://models.localai.io)
-local-ai run llama-3.2-1b-instruct:q4_k_m
-# Start LocalAI with the phi-2 model directly from huggingface
-local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
-# Install and run a model from the Ollama OCI registry
-local-ai run ollama://gemma:2b
-# Run a model from a configuration file
-local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
-# Install and run a model from a standard OCI registry (e.g., Docker Hub)
-local-ai run oci://localai/phi-2:latest
-```
-
-[💻 Getting started](https://localai.io/basics/getting_started/index.html)
-
-## 📰 Latest project news
-
- Nov 2024: Voice activity detection models (**VAD**) added to the API: https://github.com/mudler/LocalAI/pull/4204
- Oct 2024: examples moved to [LocalAI-examples](https://github.com/mudler/LocalAI-examples)
- Aug 2024:  🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
- June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628
- May 2024: 🔥🔥 Decentralized P2P llama.cpp:  https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs  https://localai.io/features/distribute/
- May 2024: 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
- May 2024: 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
- May 2024: 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
-
-Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
-
-## 🔥🔥 Hot topics (looking for help):
-
- Multimodal with vLLM and Video understanding: https://github.com/mudler/LocalAI/pull/3729
- Realtime API https://github.com/mudler/LocalAI/issues/3714
- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
- Assistant API: https://github.com/mudler/LocalAI/issues/1273
- Moderation endpoint: https://github.com/mudler/LocalAI/issues/999
- Vulkan: https://github.com/mudler/LocalAI/issues/1647
- Anthropic API: https://github.com/mudler/LocalAI/issues/1808
-
-If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22
-
-## 🚀 [Features](https://localai.io/features/)
-
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
- 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation)
- 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/) 
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
- 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
- 📈 [Reranker API](https://localai.io/features/reranker/)
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
- 🌍 Integrated WebUI!
-
-## 💻 Usage
-
-Check out the [Getting started](https://localai.io/basics/getting_started/index.html) section in our documentation.
-
-### 🔗 Community and integrations
-
-Build and deploy custom containers:
- https://github.com/sozercan/aikit
-
-WebUIs:
- https://github.com/Jirubizu/localai-admin
- https://github.com/go-skynet/LocalAI-frontend
- QA-Pilot(An interactive chat project that leverages LocalAI LLMs for rapid understanding and navigation of GitHub code repository) https://github.com/reid41/QA-Pilot
-
-Model galleries
- https://github.com/go-skynet/model-gallery
-
-Other:
- Helm chart https://github.com/go-skynet/helm-charts
- VSCode extension https://github.com/badgooooor/localai-vscode-plugin
- Terminal utility https://github.com/djcopley/ShellOracle
- Local Smart assistant https://github.com/mudler/LocalAGI
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation / https://github.com/valentinfrlch/ha-gpt4vision
- Discord bot https://github.com/mudler/LocalAGI/tree/main/examples/discord
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
- Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
- Another Telegram Bot https://github.com/JackBekket/Hellper
- Auto-documentation https://github.com/JackBekket/Reflexia
- Github bot which answer on issues, with code and documentation as context https://github.com/JackBekket/GitHelper
- Github Actions: https://github.com/marketplace/actions/start-localai
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/
-  
-
-### 🔗 Resources
-
- [LLM finetuning guide](https://localai.io/docs/advanced/fine-tuning/)
- [How to build locally](https://localai.io/basics/build/index.html)
- [How to install in Kubernetes](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes)
- [Projects integrating LocalAI](https://localai.io/docs/integrations/)
- [How tos section](https://io.midori-ai.xyz/howtos/) (curated by our community)
-
-## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
-
- [Run Visual studio code with LocalAI (SUSE)](https://www.suse.com/c/running-ai-locally/)
- 🆕 [Run LocalAI on Jetson Nano Devkit](https://mudler.pm/posts/local-ai-jetson-nano-devkit/)
- [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/)
- [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)
- [Create a slackbot for teams and OSS projects that answer to documentation](https://mudler.pm/posts/smart-slackbot-for-teams/)
- [LocalAI meets k8sgpt](https://www.youtube.com/watch?v=PKrDNuJ_dfE)
- [Question Answering on Documents locally with LangChain, LocalAI, Chroma, and GPT4All](https://mudler.pm/posts/localai-question-answering/)
- [Tutorial to use k8sgpt with LocalAI](https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65)
-
-## Citation
-
-If you utilize this repository, data in a downstream project, please consider citing it with:
-
-```
-@misc{localai,
-  author = {Ettore Di Giacinto},
-  title = {LocalAI: The free, Open source OpenAI alternative},
-  year = {2023},
-  publisher = {GitHub},
-  journal = {GitHub repository},
-  howpublished = {\url{https://github.com/go-skynet/LocalAI}},
-```
-
-## ❤️ Sponsors
-
-> Do you find LocalAI useful?
-
-Support the project by becoming [a backer or sponsor](https://github.com/sponsors/mudler). Your logo will show up here with a link to your website.
-
-A huge thank you to our generous sponsors who support this project covering CI expenses, and our [Sponsor list](https://github.com/sponsors/mudler):
-
-<p align="center">
-  <a href="https://www.spectrocloud.com/" target="blank">
-    <img height="200" src="https://github.com/go-skynet/LocalAI/assets/2420543/68a6f3cb-8a65-4a4d-99b5-6417a8905512">
-  </a>
-  <a href="https://www.premai.io/" target="blank">
-    <img height="200" src="https://github.com/mudler/LocalAI/assets/2420543/42e4ca83-661e-4f79-8e46-ae43689683d6"> <br>
-  </a>
-</p>
-
-## 🌟 Star history
-
-[![LocalAI Star history Chart](https://api.star-history.com/svg?repos=go-skynet/LocalAI&type=Date)](https://star-history.com/#go-skynet/LocalAI&Date)
-
-## 📖 License
-
-LocalAI is a community-driven project created by [Ettore Di Giacinto](https://github.com/mudler/).
-
-MIT - Author Ettore Di Giacinto <mudler@localai.io>
-
-## 🙇 Acknowledgements
-
-LocalAI couldn't have been built without the help of great software already available from the community. Thank you!
-
- [llama.cpp](https://github.com/ggerganov/llama.cpp)
- https://github.com/tatsu-lab/stanford_alpaca
- https://github.com/cornelk/llama-go for the initial ideas
- https://github.com/antimatter15/alpaca.cpp
- https://github.com/EdVince/Stable-Diffusion-NCNN
- https://github.com/ggerganov/whisper.cpp
- https://github.com/rhasspy/piper
-
-## 🤗 Contributors
-
-This is a community project, a special thanks to our contributors! 🤗
-<a href="https://github.com/go-skynet/LocalAI/graphs/contributors">
-  <img src="https://contrib.rocks/image?repo=go-skynet/LocalAI" />
-</a>
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -1,42 +0,0 @@
-# Security Policy
-
-## Introduction
-
-At LocalAI, we take the security of our software seriously. We understand the importance of protecting our community from vulnerabilities and are committed to ensuring the safety and security of our users.
-
-## Supported Versions
-
-We provide support and updates for certain versions of our software. The following table outlines which versions are currently supported with security updates:
-
-| Version | Supported          |
-| ------- | ------------------ |
-| > 2.0   | :white_check_mark: |
-| < 2.0   | :x:                |
-
-Please ensure that you are using a supported version to receive the latest security updates.
-
-## Reporting a Vulnerability
-
-We encourage the responsible disclosure of any security vulnerabilities. If you believe you've found a security issue in our software, we kindly ask you to follow the steps below to report it to us:
-
-1. **Email Us:** Send an email to [security@localai.io](mailto:security@localai.io) with a detailed report. Please do not disclose the vulnerability publicly or to any third parties before it has been addressed by us.
-
-2. **Expect a Response:** We aim to acknowledge receipt of vulnerability reports within 48 hours. Our security team will review your report and work closely with you to understand the impact and ensure a thorough investigation.
-
-3. **Collaboration:** If the vulnerability is accepted, we will work with you and our community to address the issue promptly. We'll keep you informed throughout the resolution process and may request additional information or collaboration.
-
-4. **Disclosure:** Once the vulnerability has been resolved, we encourage a coordinated disclosure. We believe in transparency and will work with you to ensure that our community is informed in a responsible manner.
-
-## Use of Third-Party Platforms
-
-As a Free and Open Source Software (FOSS) organization, we do not offer monetary bounties. However, researchers who wish to report vulnerabilities can also do so via [Huntr](https://huntr.dev/bounties), a platform that recognizes contributions to open source security.
-
-## Contact
-
-For any security-related inquiries beyond vulnerability reporting, please contact us at [security@localai.io](mailto:security@localai.io).
-
-## Acknowledgments
-
-We appreciate the efforts of those who contribute to the security of our project. Your responsible disclosure is invaluable to the safety and integrity of LocalAI.
-
-Thank you for helping us keep LocalAI secure.
--- a/aio/cpu/README.md
+++ b/aio/cpu/README.md
@@ -1,5 +0,0 @@
-## AIO CPU size
-
-Use this image with CPU-only.
-
-Please keep using only C++ backends so the base image is as small as possible (without CUDA, cuDNN, python, etc).
--- a/aio/cpu/embeddings.yaml
+++ b/aio/cpu/embeddings.yaml
@@ -1,12 +0,0 @@
-name: text-embedding-ada-002
-backend: bert-embeddings
-parameters:
-  model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
-
-usage: |
-    You can test this model with curl like this:
-
-    curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
-      "input": "Your text string goes here",
-      "model": "text-embedding-ada-002"
-    }'
--- a/aio/cpu/image-gen.yaml
+++ b/aio/cpu/image-gen.yaml
@@ -1,62 +0,0 @@
-name: stablediffusion
-backend: stablediffusion
-parameters:
-  model: stablediffusion_assets
-
-license: "BSD-3"
-urls:
- https://github.com/EdVince/Stable-Diffusion-NCNN
- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE
-
-description: |
-     Stable Diffusion in NCNN with c++, supported txt2img and img2img
-
-download_files:
- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
-  sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
-  sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
-  sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
-  sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
-  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin"
- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin"
-  sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
-  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin"
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
-  sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
-  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin"
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
-  sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
- filename: "stablediffusion_assets/log_sigmas.bin"
-  sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
-  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
-  sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
-  sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
-  sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
-  sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
-  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin"
- filename: "stablediffusion_assets/vocab.txt"
-  sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
-
-usage: |
-        curl http://localhost:8080/v1/images/generations \
-          -H "Content-Type: application/json" \
-          -d '{
-            "prompt": "<positive prompt>|<negative prompt>",
-            "step": 25,
-            "size": "512x512"
-          }'
--- a/aio/cpu/rerank.yaml
+++ b/aio/cpu/rerank.yaml
@@ -1,27 +0,0 @@
-name: jina-reranker-v1-base-en
-backend: rerankers
-parameters:
-  model: cross-encoder
-
-usage: |
-    You can test this model with curl like this:
-
-    curl http://localhost:8080/v1/rerank \
-      -H "Content-Type: application/json" \
-      -d '{
-      "model": "jina-reranker-v1-base-en",
-      "query": "Organic skincare products for sensitive skin",
-      "documents": [
-        "Eco-friendly kitchenware for modern homes",
-        "Biodegradable cleaning supplies for eco-conscious consumers",
-        "Organic cotton baby clothes for sensitive skin",
-        "Natural organic skincare range for sensitive skin",
-        "Tech gadgets for smart homes: 2024 edition",
-        "Sustainable gardening tools and compost solutions",
-        "Sensitive skin-friendly facial cleansers and toners",
-        "Organic food wraps and storage solutions",
-        "All-natural pet food for dogs with allergies",
-        "Yoga mats made from recycled materials"
-      ],
-      "top_n": 3
-    }'
--- a/aio/cpu/speech-to-text.yaml
+++ b/aio/cpu/speech-to-text.yaml
@@ -1,18 +0,0 @@
-name: whisper-1
-backend: whisper
-parameters:
-  model: ggml-whisper-base.bin
-
-usage: |
-    ## example audio file
-    wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
-
-    ## Send the example audio file to the transcriptions endpoint
-    curl http://localhost:8080/v1/audio/transcriptions \
-         -H "Content-Type: multipart/form-data" \
-         -F file="@$PWD/gb1.ogg" -F model="whisper-1"
-
-download_files:
- filename: "ggml-whisper-base.bin"
-  sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
-  uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
--- a/aio/cpu/text-to-speech.yaml
+++ b/aio/cpu/text-to-speech.yaml
@@ -1,15 +0,0 @@
-name: tts-1
-download_files:
-  - filename: voice-en-us-amy-low.tar.gz
-    uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
-
-parameters:
-  model: en-us-amy-low.onnx
-
-usage: |
-    To test if this model works as expected, you can use the following curl command:
-
-    curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
-      "model":"voice-en-us-amy-low",
-      "input": "Hi, this is a test."
-    }'
--- a/aio/cpu/text-to-text.yaml
+++ b/aio/cpu/text-to-text.yaml
@@ -1,101 +0,0 @@
-name: gpt-4
-mmap: true
-parameters:
-  model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
-context_size: 8192
-
-stopwords:
- "<|im_end|>"
- "<dummy32000>"
- "</tool_call>"
- "<|eot_id|>"
- "<|end_of_text|>"
-
-function:
-  # disable injecting the "answer" tool
-  disable_no_action: true
-
-  grammar:
-    # This allows the grammar to also return messages
-    mixed_mode: true
-    # Suffix to add to the grammar
-    #prefix: '<tool_call>\n'
-    # Force parallel calls in the grammar
-    # parallel_calls: true
-
-  return_name_in_function_response: true
-  # Without grammar uncomment the lines below
-  # Warning: this is relying only on the capability of the
-  # LLM model to generate the correct function call.
-  json_regex_match: 
-   - "(?s)<tool_call>(.*?)</tool_call>"
-   - "(?s)<tool_call>(.*?)"
-  replace_llm_results:
-  # Drop the scratchpad content from responses
-  - key: "(?s)<scratchpad>.*</scratchpad>"
-    value: ""
-  replace_function_results: 
-  # Replace everything that is not JSON array or object
-  # 
-  - key: '(?s)^[^{\[]*'
-    value: ""
-  - key: '(?s)[^}\]]*$'
-    value: ""
-  - key: "'([^']*?)'"
-    value: "_DQUOTE_${1}_DQUOTE_"
-  - key: '\\"'
-    value: "__TEMP_QUOTE__"
-  - key: "\'"
-    value: "'"
-  - key: "_DQUOTE_"
-    value: '"'
-  - key: "__TEMP_QUOTE__"
-    value: '"'
-  # Drop the scratchpad content from responses
-  - key: "(?s)<scratchpad>.*</scratchpad>"
-    value: ""
-
-template:
-  chat: |
-    {{.Input -}}
-    <|im_start|>assistant
-  chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
-    {{- if .FunctionCall }}
-    <tool_call>
-    {{- else if eq .RoleName "tool" }}
-    <tool_response>
-    {{- end }}
-    {{- if .Content}}
-    {{.Content }}
-    {{- end }}
-    {{- if .FunctionCall}}
-    {{toJson .FunctionCall}}
-    {{- end }}
-    {{- if .FunctionCall }}
-    </tool_call>
-    {{- else if eq .RoleName "tool" }}
-    </tool_response>
-    {{- end }}<|im_end|>
-  completion: |
-    {{.Input}}
-  function: |-
-    <|im_start|>system
-    You are a function calling AI model.
-    Here are the available tools:
-    <tools>
-    {{range .Functions}}
-    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
-    {{end}}
-    </tools>
-    You should call the tools provided to you sequentially
-    Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
-    <scratchpad>
-    {step-by-step reasoning and plan in bullet points}
-    </scratchpad>
-    For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
-    <tool_call>
-    {"arguments": <args-dict>, "name": <function-name>}
-    </tool_call><|im_end|>
-    {{.Input -}}
-    <|im_start|>assistant
--- a/aio/cpu/vision.yaml
+++ b/aio/cpu/vision.yaml
@@ -1,31 +0,0 @@
-backend: llama-cpp
-context_size: 4096
-f16: true
-mmap: true
-name: gpt-4o
-
-roles:
-  user: "USER:"
-  assistant: "ASSISTANT:"
-  system: "SYSTEM:"
-
-mmproj: bakllava-mmproj.gguf
-parameters:
-  model: bakllava.gguf
-
-template:
-  chat: |
-    A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
-    {{.Input}}
-    ASSISTANT:
-
-download_files:
- filename: bakllava.gguf
-  uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
- filename: bakllava-mmproj.gguf
-  uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
-
-usage: |
-    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-        "model": "gpt-4-vision-preview",
-        "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
--- a/aio/entrypoint.sh
+++ b/aio/entrypoint.sh
@@ -1,138 +0,0 @@
-#!/bin/bash
-
-echo "===> LocalAI All-in-One (AIO) container starting..."
-
-GPU_ACCELERATION=false
-GPU_VENDOR=""
-
-function check_intel() {
-    if lspci | grep -E 'VGA|3D' | grep -iq intel; then
-        echo "Intel GPU detected"
-        if [ -d /opt/intel ]; then
-            GPU_ACCELERATION=true
-            GPU_VENDOR=intel
-        else
-            echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available."
-        fi
-    fi
-}
-
-function check_nvidia_wsl() {
-    if lspci | grep -E 'VGA|3D' | grep -iq "Microsoft Corporation Device 008e"; then
-        # We make the assumption this WSL2 cars is NVIDIA, then check for nvidia-smi
-        # Make sure the container was run with `--gpus all` as the only required parameter
-        echo "NVIDIA GPU detected via WSL2"
-        # nvidia-smi should be installed in the container
-        if nvidia-smi; then
-            GPU_ACCELERATION=true
-            GPU_VENDOR=nvidia
-        else
-            echo "NVIDIA GPU detected via WSL2, but nvidia-smi is not installed. GPU acceleration will not be available."
-        fi
-    fi
-}
-
-function check_amd() {
-    if lspci | grep -E 'VGA|3D' | grep -iq amd; then
-        echo "AMD GPU detected"
-        # Check if ROCm is installed
-        if [ -d /opt/rocm ]; then
-            GPU_ACCELERATION=true
-            GPU_VENDOR=amd
-        else
-            echo "AMD GPU detected, but ROCm is not installed. GPU acceleration will not be available."
-        fi
-    fi
-}
-
-function check_nvidia() {
-    if lspci | grep -E 'VGA|3D' | grep -iq nvidia; then
-        echo "NVIDIA GPU detected"
-        # nvidia-smi should be installed in the container
-        if nvidia-smi; then
-            GPU_ACCELERATION=true
-            GPU_VENDOR=nvidia
-        else
-            echo "NVIDIA GPU detected, but nvidia-smi is not installed. GPU acceleration will not be available."
-        fi
-    fi
-}
-
-function check_metal() {
-    if system_profiler SPDisplaysDataType | grep -iq 'Metal'; then
-        echo "Apple Metal supported GPU detected"
-        GPU_ACCELERATION=true
-        GPU_VENDOR=apple
-    fi
-}
-
-function detect_gpu() {
-    case "$(uname -s)" in
-        Linux)
-            check_nvidia
-            check_amd
-            check_intel
-            check_nvidia_wsl
-            ;;
-        Darwin)
-            check_metal
-            ;;
-    esac
-}
-
-function detect_gpu_size() {
-    # Attempting to find GPU memory size for NVIDIA GPUs
-    if [ "$GPU_ACCELERATION" = true ] && [ "$GPU_VENDOR" = "nvidia" ]; then
-        echo "NVIDIA GPU detected. Attempting to find memory size..."
-        # Using head -n 1 to get the total memory of the 1st NVIDIA GPU detected.
-        # If handling multiple GPUs is required in the future, this is the place to do it
-        nvidia_sm=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits | head -n 1)
-        if [ ! -z "$nvidia_sm" ]; then
-            echo "Total GPU Memory: $nvidia_sm MiB"
-            # if bigger than 8GB, use 16GB
-            #if [ "$nvidia_sm" -gt 8192 ]; then
-            #    GPU_SIZE=gpu-16g
-            #else
-            GPU_SIZE=gpu-8g
-            #fi
-        else
-            echo "Unable to determine NVIDIA GPU memory size. Falling back to CPU."
-            GPU_SIZE=gpu-8g
-        fi
-    elif [ "$GPU_ACCELERATION" = true ] && [ "$GPU_VENDOR" = "intel" ]; then
-        GPU_SIZE=intel
-    # Default to a generic GPU size until we implement GPU size detection for non NVIDIA GPUs
-    elif [ "$GPU_ACCELERATION" = true ]; then
-        echo "Non-NVIDIA GPU detected. Specific GPU memory size detection is not implemented."
-        GPU_SIZE=gpu-8g
-
-    # default to cpu if GPU_SIZE is not set
-    else
-        echo "GPU acceleration is not enabled or supported. Defaulting to CPU."
-        GPU_SIZE=cpu
-    fi
-}
-
-function check_vars() {
-    if [ -z "$MODELS" ]; then
-        echo "MODELS environment variable is not set. Please set it to a comma-separated list of model YAML files to load."
-        exit 1
-    fi
-
-    if [ -z "$PROFILE" ]; then
-        echo "PROFILE environment variable is not set. Please set it to one of the following: cpu, gpu-8g, gpu-16g, apple"
-        exit 1
-    fi
-}
-
-detect_gpu
-detect_gpu_size
-
-PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
-export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
-
-check_vars
-
-echo "===> Starting LocalAI[$PROFILE] with the following models: $MODELS"
-
-exec /build/entrypoint.sh "$@"
--- a/Show More
+++ b/Show More