fix(deps): update module github.com/onsi/gomega to v1.29.0

2026-02-03 03:02:38 -05:00 · 2023-10-25 18:54:03 +00:00
1175 changed files with 22129 additions and 395278 deletions
--- a/.air.toml
+++ b/.air.toml
@@ -1,8 +0,0 @@
-# .air.toml
-[build]
-cmd = "make build"
-bin = "./local-ai"
-args_bin = [ "--debug" ]
-include_ext = ["go", "html", "yaml", "toml", "json", "txt", "md"]
-exclude_dir = ["pkg/grpc/proto"]
-delay = 1000
--- a/.devcontainer-scripts/postcreate.sh
+++ b/.devcontainer-scripts/postcreate.sh
@@ -1,17 +0,0 @@
-#!/bin/bash
-
-cd /workspace
-
-# Get the files into the volume without a bind mount
-if [ ! -d ".git" ]; then
-    git clone https://github.com/mudler/LocalAI.git .
-else
-    git fetch
-fi
-
-echo "Standard Post-Create script completed."
-
-if [ -f "/devcontainer-customization/postcreate.sh" ]; then
-    echo "Launching customization postcreate.sh"
-    bash "/devcontainer-customization/postcreate.sh"
-fi
--- a/.devcontainer-scripts/poststart.sh
+++ b/.devcontainer-scripts/poststart.sh
@@ -1,13 +0,0 @@
-#!/bin/bash
-
-cd /workspace
-
-# Ensures generated source files are present upon load
-make prepare
-
-echo "Standard Post-Start script completed."
-
-if [ -f "/devcontainer-customization/poststart.sh" ]; then
-    echo "Launching customization poststart.sh"
-    bash "/devcontainer-customization/poststart.sh"
-fi
--- a/.devcontainer-scripts/utils.sh
+++ b/.devcontainer-scripts/utils.sh
@@ -1,55 +0,0 @@
-#!/bin/bash
-
-# This file contains some really simple functions that are useful when building up customization scripts.
-
-
-# Checks if the git config has a user registered - and sets it up if not.
-#
-# Param 1: name
-# Param 2: email
-#
-config_user() {
-    echo "Configuring git for $1 <$2>"
-    local gcn=$(git config --global user.name)
-    if [ -z "${gcn}" ]; then
-        echo "Setting up git user / remote"
-        git config --global user.name "$1"
-        git config --global user.email "$2"
-        
-    fi
-}
-
-# Checks if the git remote is configured - and sets it up if not. Fetches either way.
-#
-# Param 1: remote name
-# Param 2: remote url
-#
-config_remote() {
-    echo "Adding git remote and fetching $2 as $1"
-    local gr=$(git remote -v | grep $1)
-    if [ -z "${gr}" ]; then
-        git remote add $1 $2
-    fi
-    git fetch $1
-}
-
-# Setup special .ssh files
-# Prints out lines of text to make things pretty
-# Param 1: bash array, filenames relative to the customization directory that should be copied to ~/.ssh
-setup_ssh() {
-    echo "starting ~/.ssh directory setup..."
-    mkdir -p "${HOME}.ssh"
-    chmod 0700 "${HOME}/.ssh"
-    echo "-----"
-    local files=("$@")
-    for file in "${files[@]}" ; do
-        local cfile="/devcontainer-customization/${file}"
-        local hfile="${HOME}/.ssh/${file}"
-        if [ ! -f "${hfile}" ]; then
-            echo "copying \"${file}\""
-            cp "${cfile}" "${hfile}"
-            chmod 600 "${hfile}"
-        fi
-    done
-    echo "~/.ssh directory setup complete!"
-}
--- a/.devcontainer/customization/README.md
+++ b/.devcontainer/customization/README.md
@@ -1,25 +0,0 @@
-Place any additional resources your environment requires in this directory
-
-Script hooks are currently called for:
-`postcreate.sh` and `poststart.sh`
-
-If files with those names exist here, they will be called at the end of the normal script.
-
-This is a good place to set things like `git config --global user.name` are set - and to handle any other files that are mounted via this directory.
-
-To assist in doing so, `source /.devcontainer-scripts/utils.sh` will provide utility functions that may be useful - for example:
-
-```
-#!/bin/bash
-
-source "/.devcontainer-scripts/utils.sh"
-
-sshfiles=("config", "key.pub")
-
-setup_ssh "${sshfiles[@]}"
-
-config_user "YOUR NAME" "YOUR EMAIL"
-
-config_remote "REMOTE NAME" "REMOTE URL"
-
-```
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -1,24 +0,0 @@
-{
-    "$schema": "https://raw.githubusercontent.com/devcontainers/spec/main/schemas/devContainer.schema.json",
-    "name": "LocalAI",
-    "workspaceFolder": "/workspace",
-    "dockerComposeFile": [ "./docker-compose-devcontainer.yml" ],
-    "service": "api",
-    "shutdownAction": "stopCompose",
-    "customizations": {
-        "vscode": {
-            "extensions": [
-                "golang.go",
-                "ms-vscode.makefile-tools",
-                "ms-azuretools.vscode-docker",
-                "ms-python.python",
-                "ms-python.debugpy",
-                "wayou.vscode-todo-highlight",
-                "waderyan.gitblame"
-            ]
-        }
-    },
-    "forwardPorts": [8080, 3000],
-    "postCreateCommand": "bash /.devcontainer-scripts/postcreate.sh",
-    "postStartCommand": "bash /.devcontainer-scripts/poststart.sh"
-}
--- a/.devcontainer/docker-compose-devcontainer.yml
+++ b/.devcontainer/docker-compose-devcontainer.yml
@@ -1,44 +0,0 @@
-services:
-  api:
-    build:
-      context: ..
-      dockerfile: Dockerfile
-      target: devcontainer
-    env_file:
-      - ../.env
-    ports:
-      - 8080:8080
-    volumes:
-      - localai_workspace:/workspace
-      - ../models:/host-models
-      - ./customization:/devcontainer-customization
-    command: /bin/sh -c "while sleep 1000; do :; done"
-    cap_add:
-      - SYS_PTRACE
-    security_opt:
-      - seccomp:unconfined
-  prometheus:
-    image: prom/prometheus
-    container_name: prometheus
-    command:
-      - '--config.file=/etc/prometheus/prometheus.yml'
-    ports:
-      - 9090:9090
-    restart: unless-stopped
-    volumes:
-      - ./prometheus:/etc/prometheus
-      - prom_data:/prometheus
-  grafana:
-    image: grafana/grafana
-    container_name: grafana
-    ports:
-      - 3000:3000
-    restart: unless-stopped
-    environment:
-      - GF_SECURITY_ADMIN_USER=admin
-      - GF_SECURITY_ADMIN_PASSWORD=grafana
-    volumes:
-      - ./grafana:/etc/grafana/provisioning/datasources
-volumes:
-  prom_data:
-  localai_workspace:
--- a/.devcontainer/grafana/datasource.yml
+++ b/.devcontainer/grafana/datasource.yml
@@ -1,10 +0,0 @@
-
-apiVersion: 1
-
-datasources:
- name: Prometheus
-  type: prometheus
-  url: http://prometheus:9090 
-  isDefault: true
-  access: proxy
-  editable: true
--- a/.devcontainer/prometheus/prometheus.yml
+++ b/.devcontainer/prometheus/prometheus.yml
@@ -1,21 +0,0 @@
-global:
-  scrape_interval: 15s
-  scrape_timeout: 10s
-  evaluation_interval: 15s
-alerting:
-  alertmanagers:
-  - static_configs:
-    - targets: []
-    scheme: http
-    timeout: 10s
-    api_version: v1
-scrape_configs:
- job_name: prometheus
-  honor_timestamps: true
-  scrape_interval: 15s
-  scrape_timeout: 10s
-  metrics_path: /metrics
-  scheme: http
-  static_configs:
-  - targets:
-    - localhost:9090
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,23 +1,5 @@
 .idea
-.github
-.vscode
-.devcontainer
 models
-backends
 examples/chatbot-ui/models
-backend/go/image/stablediffusion-ggml/build/
-backend/go/*/build
-backend/go/*/.cache
-backend/go/*/sources
-backend/go/*/package
 examples/rwkv/models
 examples/**/models
-Dockerfile*
-__pycache__
-
-# SonarQube
-.scannerwork
-
-# backend virtual environments
-**/venv
-backend/python/**/source
--- a/.editorconfig
+++ b/.editorconfig
@@ -1,31 +0,0 @@
-
-root = true
-
-[*]
-indent_style = space
-indent_size = 2
-end_of_line = lf
-charset = utf-8
-trim_trailing_whitespace = true
-insert_final_newline = true
-
-[*.go]
-indent_style = tab
-
-[Makefile]
-indent_style = tab
-
-[*.proto]
-indent_size = 2
-
-[*.py]
-indent_size = 4
-
-[*.js]
-indent_size = 2
-
-[*.yaml]
-indent_size = 2
-
-[*.md]
-trim_trailing_whitespace = false
--- a/.env
+++ b/.env
@@ -1,45 +1,58 @@
 ## Set number of threads.
 ## Note: prefer the number of physical cores. Overbooking the CPU degrades performance notably.
-# LOCALAI_THREADS=14
+# THREADS=14

 ## Specify a different bind address (defaults to ":8080")
-# LOCALAI_ADDRESS=127.0.0.1:8080
+# ADDRESS=127.0.0.1:8080

 ## Default models context size
-# LOCALAI_CONTEXT_SIZE=512
+# CONTEXT_SIZE=512
 #
 ## Define galleries.
 ## models will to install will be visible in `/models/available`
-# LOCALAI_GALLERIES=[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml@master"}]
+# GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}]

 ## CORS settings
-# LOCALAI_CORS=true
-# LOCALAI_CORS_ALLOW_ORIGINS=*
+# CORS=true
+# CORS_ALLOW_ORIGINS=*

 ## Default path for models
 #
-# LOCALAI_MODELS_PATH=/models
+MODELS_PATH=/models

 ## Enable debug mode
-# LOCALAI_LOG_LEVEL=debug
+# DEBUG=true

 ## Disables COMPEL (Diffusers)
 # COMPEL=0

 ## Enable/Disable single backend (useful if only one GPU is available)
-# LOCALAI_SINGLE_ACTIVE_BACKEND=true
+# SINGLE_ACTIVE_BACKEND=true

-# Forces shutdown of the backends if busy (only if LOCALAI_SINGLE_ACTIVE_BACKEND is set)
-# LOCALAI_FORCE_BACKEND_SHUTDOWN=true
+## Specify a build type. Available: cublas, openblas, clblas.
+## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
+## OpenBLAS: This is an open-source implementation of the BLAS library that aims to provide highly optimized code for various platforms. It includes support for multi-threading and can be compiled to use hardware-specific features for additional performance. OpenBLAS can run on many kinds of hardware, including CPUs from Intel, AMD, and ARM.
+## clBLAS:   This is an open-source implementation of the BLAS library that uses OpenCL, a framework for writing programs that execute across heterogeneous platforms consisting of CPUs, GPUs, and other processors. clBLAS is designed to take advantage of the parallel computing power of GPUs but can also run on any hardware that supports OpenCL. This includes hardware from different vendors like Nvidia, AMD, and Intel.
+# BUILD_TYPE=openblas
+
+## Uncomment and set to true to enable rebuilding from source
+# REBUILD=true
+
+## Enable go tags, available: stablediffusion, tts
+## stablediffusion: image generation with stablediffusion
+## tts: enables text-to-speech with go-piper 
+## (requires REBUILD=true)
+#
+# GO_TAGS=stablediffusion

 ## Path where to store generated images
-# LOCALAI_IMAGE_PATH=/tmp/generated/images
+# IMAGE_PATH=/tmp

 ## Specify a default upload limit in MB (whisper)
-# LOCALAI_UPLOAD_LIMIT=15
+# UPLOAD_LIMIT

 ## List of external GRPC backends (note on the container image this variable is already set to use extra backends available in extra/)
-# LOCALAI_EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py
+# EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py

 ### Advanced settings ###
 ### Those are not really used by LocalAI, but from components in the stack ###
@@ -53,41 +66,4 @@
 ### Python backends GRPC max workers
 ### Default number of workers for GRPC Python backends.
 ### This actually controls wether a backend can process multiple requests or not.
-# PYTHON_GRPC_MAX_WORKERS=1
-
-### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
-# LLAMACPP_PARALLEL=1
-
-### Define a list of GRPC Servers for llama-cpp workers to distribute the load
-# https://github.com/ggerganov/llama.cpp/pull/6829
-# https://github.com/ggerganov/llama.cpp/blob/master/tools/rpc/README.md
-# LLAMACPP_GRPC_SERVERS=""
-
-### Enable to run parallel requests
-# LOCALAI_PARALLEL_REQUESTS=true
-
-# Enable to allow p2p mode
-# LOCALAI_P2P=true
-
-# Enable to use federated mode
-# LOCALAI_FEDERATED=true
-
-# Enable to start federation server
-# FEDERATED_SERVER=true
-
-# Define to use federation token
-# TOKEN=""
-
-### Watchdog settings
-###
-# Enables watchdog to kill backends that are inactive for too much time
-# LOCALAI_WATCHDOG_IDLE=true
-#
-# Time in duration format (e.g. 1h30m) after which a backend is considered idle
-# LOCALAI_WATCHDOG_IDLE_TIMEOUT=5m
-#
-# Enables watchdog to kill backends that are busy for too much time
-# LOCALAI_WATCHDOG_BUSY=true
-#
-# Time in duration format (e.g. 1h30m) after which a backend is considered busy
-# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m
+# PYTHON_GRPC_MAX_WORKERS=1
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,2 +1 @@
 *.sh text eol=lf
-backend/cpp/llama/*.hpp linguist-vendored
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -2,7 +2,9 @@
 name: Bug report
 about: Create a report to help us improve
 title: ''
-labels: bug, unconfirmed, up-for-grabs
+labels: bug
+assignees: mudler
+
 ---

 <!-- Thanks for helping us to improve LocalAI! We welcome all bug reports. Please fill out each area of the template so we can better help you. Comments like this will be hidden when you post but you can delete them if you wish. -->
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -2,7 +2,9 @@
 name: Feature request
 about: Suggest an idea for this project
 title: ''
-labels: enhancement, up-for-grabs
+labels: enhancement
+assignees: mudler
+
 ---

 <!-- Thanks for helping us to improve LocalAI! We welcome all feature requests. Please fill out each area of the template so we can better help you. Comments like this will be hidden when you post but you can delete them if you wish. -->
--- a/.github/bump_deps.sh
+++ b/.github/bump_deps.sh
@@ -3,25 +3,7 @@ set -xe
 REPO=$1
 BRANCH=$2
 VAR=$3
-FILE=$4
-
-if [ -z "$FILE" ]; then
-    FILE="Makefile"
-fi

 LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")

-# Read $VAR from Makefile (only first match)
-set +e
-CURRENT_COMMIT="$(grep -m1 "^$VAR?=" $FILE | cut -d'=' -f2)"
-set -e
-
-sed -i $FILE -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
-
-if [ -z "$CURRENT_COMMIT" ]; then
-    echo "Could not find $VAR in Makefile."
-    exit 0
-fi
-
-echo "Changes: https://github.com/$REPO/compare/${CURRENT_COMMIT}..${LAST_COMMIT}" >> "${VAR}_message.txt"
-echo "${LAST_COMMIT}" >> "${VAR}_commit.txt"
+sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
--- a/.github/bump_docs.sh
+++ b/.github/bump_docs.sh
@@ -1,7 +0,0 @@
-#!/bin/bash
-set -xe
-REPO=$1
-
-LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.tag_name')
-
-cat <<< $(jq ".version = \"$LATEST_TAG\"" docs/data/version.json) > docs/data/version.json
--- a/.github/check_and_update.py
+++ b/.github/check_and_update.py
@@ -1,85 +0,0 @@
-import hashlib
-from huggingface_hub import hf_hub_download, get_paths_info
-import requests
-import sys
-import os
-
-uri = sys.argv[1]
-file_name = uri.split('/')[-1]
-
-# Function to parse the URI and determine download method
-def parse_uri(uri):
-    if uri.startswith('huggingface://'):
-        repo_id = uri.split('://')[1]
-        return 'huggingface', repo_id.rsplit('/', 1)[0]
-    elif 'huggingface.co' in uri:
-        parts = uri.split('/resolve/')
-        if len(parts) > 1:
-            repo_path = parts[0].split('https://huggingface.co/')[-1]
-            return 'huggingface', repo_path
-    return 'direct', uri
-
-def calculate_sha256(file_path):
-    sha256_hash = hashlib.sha256()
-    with open(file_path, 'rb') as f:
-        for byte_block in iter(lambda: f.read(4096), b''):
-            sha256_hash.update(byte_block)
-    return sha256_hash.hexdigest()
-
-def manual_safety_check_hf(repo_id):
-    scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan")
-    scan = scanResponse.json()
-    # Check if 'hasUnsafeFile' exists in the response
-    if 'hasUnsafeFile' in scan:
-        if scan['hasUnsafeFile']:
-            return scan
-        else:
-            return None
-    else:
-        return None
-
-download_type, repo_id_or_url = parse_uri(uri)
-
-new_checksum =  None
-file_path = None
-
-# Decide download method based on URI type
-if download_type == 'huggingface':
-    # Check if the repo is flagged as dangerous by HF
-    hazard = manual_safety_check_hf(repo_id_or_url)
-    if hazard != None:
-        print(f'Error: HuggingFace has detected security problems for {repo_id_or_url}: {str(hazard)}', filename=file_name)
-        sys.exit(5)
-    # Use HF API to pull sha
-    for file in get_paths_info(repo_id_or_url, [file_name], repo_type='model'):
-        try:
-            new_checksum = file.lfs.sha256
-            break
-        except Exception as e:
-            print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
-            sys.exit(2)
-    if new_checksum is None:
-        try:
-            file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name)
-        except Exception as e:
-            print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
-            sys.exit(2)
-else:
-    response = requests.get(repo_id_or_url)
-    if response.status_code == 200:
-        with open(file_name, 'wb') as f:
-            f.write(response.content)
-        file_path = file_name
-    elif response.status_code == 404:
-        print(f'File not found: {response.status_code}', file=sys.stderr)
-        sys.exit(2)
-    else:
-        print(f'Error downloading file: {response.status_code}', file=sys.stderr)
-        sys.exit(1)
-
-if new_checksum is None:
-    new_checksum = calculate_sha256(file_path)
-    print(new_checksum)
-    os.remove(file_path)
-else:
-    print(new_checksum)
--- a/.github/checksum_checker.sh
+++ b/.github/checksum_checker.sh
@@ -1,63 +0,0 @@
-#!/bin/bash
-# This scripts needs yq and huggingface_hub to be installed
-# to install hugingface_hub run pip install huggingface_hub
-
-# Path to the input YAML file
-input_yaml=$1
-
-# Function to download file and check checksum using Python
-function check_and_update_checksum() {
-    model_name="$1"
-    file_name="$2"
-    uri="$3"
-    old_checksum="$4"
-    idx="$5"
-
-    # Download the file and calculate new checksum using Python
-    new_checksum=$(python3 ./.github/check_and_update.py $uri)
-    result=$?
-
-    if [[ $result -eq 5 ]]; then
-        echo "Contaminated entry detected, deleting entry for $model_name..."
-        yq eval -i "del([$idx])" "$input_yaml"
-        return
-    fi
-
-    if [[ "$new_checksum" == "" ]]; then
-        echo "Error calculating checksum for $file_name. Skipping..."
-        return
-    fi
-
-    echo "Checksum for $file_name: $new_checksum"
-
-    # Compare and update the YAML file if checksums do not match
-    
-    if [[ $result -eq 2 ]]; then
-        echo "File not found, deleting entry for $file_name..."
-        # yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\"))" "$input_yaml"
-    elif [[ "$old_checksum" != "$new_checksum" ]]; then
-        echo "Checksum mismatch for $file_name. Updating..."
-        yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\").sha256)" "$input_yaml"
-        yq eval -i "(.[$idx].files[] | select(.filename == \"$file_name\")).sha256 = \"$new_checksum\"" "$input_yaml"
-    elif [[ $result -ne 0 ]]; then
-        echo "Error downloading file $file_name. Skipping..."
-    else
-        echo "Checksum match for $file_name. No update needed."
-    fi
-}
-
-# Read the YAML and process each file
-len=$(yq eval '. | length' "$input_yaml")
-for ((i=0; i<$len; i++))
-do
-    name=$(yq eval ".[$i].name" "$input_yaml")
-    files_len=$(yq eval ".[$i].files | length" "$input_yaml")
-    for ((j=0; j<$files_len; j++))
-    do
-        filename=$(yq eval ".[$i].files[$j].filename" "$input_yaml")
-        uri=$(yq eval ".[$i].files[$j].uri" "$input_yaml")
-        checksum=$(yq eval ".[$i].files[$j].sha256" "$input_yaml")
-        echo "Checking model $name, file $filename. URI = $uri, Checksum = $checksum"
-        check_and_update_checksum "$name" "$filename" "$uri" "$checksum" "$i"
-    done
-done
--- a/.github/ci/modelslist.go
+++ b/.github/ci/modelslist.go
@@ -1,304 +0,0 @@
-package main
-
-import (
-	"fmt"
-	"html/template"
-	"io/ioutil"
-	"os"
-
-	"github.com/microcosm-cc/bluemonday"
-	"gopkg.in/yaml.v3"
-)
-
-var modelPageTemplate string = `
-<!DOCTYPE html>
-<html>
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>LocalAI models</title>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/flowbite/2.3.0/flowbite.min.css" rel="stylesheet" />
-    <script src="https://cdn.jsdelivr.net/npm/vanilla-lazyload@19.1.3/dist/lazyload.min.js"></script>
-
-    <link
-    rel="stylesheet"
-    href="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/styles/default.min.css"
-  />
-    <script
-    defer
-    src="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/highlight.min.js"
-  ></script>
-    <script
-    defer
-    src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"
-  ></script>
-  <script
-    defer
-    src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"
-  ></script>
-  <script
-    defer
-    src="https://cdn.jsdelivr.net/npm/dompurify@3.0.6/dist/purify.min.js"
-  ></script>
-
-  <link href="/static/general.css" rel="stylesheet" />
-    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Roboto:wght@400;500&display=swap" rel="stylesheet">
-    <link
-    href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700,900&display=swap"
-    rel="stylesheet" />
-  <link
-    rel="stylesheet"
-    href="https://cdn.jsdelivr.net/npm/tw-elements/css/tw-elements.min.css" />
-  <script src="https://cdn.tailwindcss.com/3.3.0"></script>
-  <script>
-    tailwind.config = {
-      darkMode: "class",
-      theme: {
-        fontFamily: {
-          sans: ["Roboto", "sans-serif"],
-          body: ["Roboto", "sans-serif"],
-          mono: ["ui-monospace", "monospace"],
-        },
-      },
-      corePlugins: {
-        preflight: false,
-      },
-    };
-  </script>
-    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.1.1/css/all.min.css">
-    <script src="https://unpkg.com/htmx.org@1.9.12" integrity="sha384-ujb1lZYygJmzgSwoxRggbCHcjc0rB2XoQrxeTUQyRjrOnlCoYta87iKBWq3EsdM2" crossorigin="anonymous"></script>
-</head>
-
-<body class="bg-gray-900 text-gray-200">
-<div class="flex flex-col min-h-screen">
-
-<nav class="bg-gray-800 shadow-lg">
-    <div class="container mx-auto px-4 py-4">
-        <div class="flex items-center justify-between">
-            <div class="flex items-center">
-                <a href="/" class="text-white text-xl font-bold"><img src="https://github.com/mudler/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
-                <a href="/" class="text-white text-xl font-bold">LocalAI</a>
-            </div>
-            <!-- Menu button for small screens -->
-            <div class="lg:hidden">
-                <button id="menu-toggle" class="text-gray-400 hover:text-white focus:outline-none">
-                    <i class="fas fa-bars fa-lg"></i>
-                </button>
-            </div>
-            <!-- Navigation links -->
-            <div class="hidden lg:flex lg:items-center lg:justify-end lg:flex-1 lg:w-0">
-                <a href="https://localai.io" class="text-gray-400 hover:text-white px-3 py-2 rounded" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
-            </div>
-        </div>
-        <!-- Collapsible menu for small screens -->
-        <div class="hidden lg:hidden" id="mobile-menu">
-            <div class="pt-4 pb-3 border-t border-gray-700">
-
-                <a href="https://localai.io" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
-
-            </div>
-        </div>
-    </div>
-</nav>
-
-<style>
-  .is-hidden {
-	display: none;
-	  }
-</style>
-
-<div class="container mx-auto px-4 flex-grow">
-
-<div class="models mt-12">
-	<h2 class="text-center text-3xl font-semibold text-gray-100">
-	LocalAI model gallery list </h2><br>
-
-	<h2 class="text-center text-3xl font-semibold text-gray-100">
-
-	 🖼️ Available {{.AvailableModels}} models</i> <a href="https://localai.io/models/" target="_blank" >
-			<i class="fas fa-circle-info pr-2"></i>
-		</a></h2>
-
-	<h3>
-	Refer to the Model gallery <a href="https://localai.io/models/" target="_blank" ><i class="fas fa-circle-info pr-2"></i></a> for more information on how to use the models with LocalAI.<br>
-
-	You can install models with the CLI command <code>local-ai models install <model-name></code>. or by using the WebUI.
-	</h3>
-
-	<input class="form-control appearance-none block w-full mt-5 px-3 py-2 text-base font-normal text-gray-300 pb-2 mb-5 bg-gray-800 bg-clip-padding border border-solid border-gray-600 rounded transition ease-in-out m-0 focus:text-gray-300 focus:bg-gray-900 focus:border-blue-500 focus:outline-none" type="search"
-	id="searchbox" placeholder="Live search keyword..">
-	  <div class="dark grid grid-cols-1 grid-rows-1 md:grid-cols-3 block rounded-lg shadow-secondary-1 dark:bg-surface-dark">
-		{{ range $_, $model := .Models }}
-		<div class="box me-4 mb-2 block rounded-lg bg-white shadow-secondary-1  dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface pb-2">
-		<div>
-		    {{ $icon := "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg" }}
-			{{ if $model.Icon }}
-	  		{{ $icon = $model.Icon }}
-	  		{{ end }}
-			<div class="flex justify-center items-center">
-				<img data-src="{{ $icon }}" alt="{{$model.Name}}" class="rounded-t-lg max-h-48 max-w-96 object-cover mt-3 lazy">
-			</div>
-	  		<div class="p-6 text-surface dark:text-white">
-				<h5 class="mb-2 text-xl font-medium leading-tight">{{$model.Name}}</h5>
-
-
-				<p class="mb-4 text-base truncate">{{ $model.Description }}</p>
-
-			</div>
-			<div class="px-6 pt-4 pb-2">
-
-      <!-- Modal toggle -->
-      <button data-modal-target="{{ $model.Name}}-modal" data-modal-toggle="{{ $model.Name }}-modal" class="block text-white bg-blue-700 hover:bg-blue-800 focus:ring-4 focus:outline-none focus:ring-blue-300 font-medium rounded-lg text-sm px-5 py-2.5 text-center dark:bg-blue-600 dark:hover:bg-blue-700 dark:focus:ring-blue-800" type="button">
-        More info
-      </button>
-
-    <!-- Main modal -->
-    <div id="{{ $model.Name}}-modal" tabindex="-1" aria-hidden="true" class="hidden overflow-y-auto overflow-x-hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-[calc(100%-1rem)] max-h-full">
-        <div class="relative p-4 w-full max-w-2xl max-h-full">
-            <!-- Modal content -->
-            <div class="relative bg-white rounded-lg shadow dark:bg-gray-700">
-                <!-- Modal header -->
-                <div class="flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600">
-                    <h3 class="text-xl font-semibold text-gray-900 dark:text-white">
-                        {{ $model.Name}}
-                    </h3>
-                    <button type="button" class="text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white" data-modal-hide="{{$model.Name}}-modal">
-                        <svg class="w-3 h-3" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 14 14">
-                            <path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="m1 1 6 6m0 0 6 6M7 7l6-6M7 7l-6 6"/>
-                        </svg>
-                        <span class="sr-only">Close modal</span>
-                    </button>
-                </div>
-                <!-- Modal body -->
-                <div class="p-4 md:p-5 space-y-4">
-                    <div class="flex justify-center items-center">
-                    <img data-src="{{ $icon }}" alt="{{$model.Name}}" class="lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3">
-                  </div>
-
-                    <p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
-                    {{ $model.Description }}
-
-                    </p>
-
-                    <p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
-                    To install the model with the CLI, run: <br>
-                    <code> local-ai models install {{$model.Name}} </code> <br>
-
-                    <hr>
-                    See also <a href="https://localai.io/models/" target="_blank" >
-                    Installation <i class="fas fa-circle-info pr-2"></i>
-                    </a> to see how to install models with the REST API.
-                    </p>
-
-                    <p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
-                    <ul>
-                    {{ range $_, $u := $model.URLs }}
-                    <li><a href="{{ $u }}" target=_blank><i class="fa-solid fa-link"></i> {{ $u }}</a></li>
-                    {{ end }}
-                    </ul>
-                    </p>
-                </div>
-                <!-- Modal footer -->
-                <div class="flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600">
-                    <button data-modal-hide="{{ $model.Name}}-modal" type="button" class="py-2.5 px-5 ms-3 text-sm font-medium text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-100 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700">Close</button>
-                </div>
-            </div>
-        </div>
-    </div>
-
-
-			</div>
-		</div>
-		</div>
-		{{ end }}
-
-		</div>
-  </div>
-</div>
-
-<script>
-var lazyLoadInstance = new LazyLoad({
-  // Your custom settings go here
-});
-
-let cards = document.querySelectorAll('.box')
-
-function liveSearch() {
-    let search_query = document.getElementById("searchbox").value;
-
-    //Use innerText if all contents are visible
-    //Use textContent for including hidden elements
-    for (var i = 0; i < cards.length; i++) {
-        if(cards[i].textContent.toLowerCase()
-                .includes(search_query.toLowerCase())) {
-            cards[i].classList.remove("is-hidden");
-        } else {
-            cards[i].classList.add("is-hidden");
-        }
-    }
-}
-
-//A little delay
-let typingTimer;
-let typeInterval = 500;
-let searchInput = document.getElementById('searchbox');
-
-searchInput.addEventListener('keyup', () => {
-    clearTimeout(typingTimer);
-    typingTimer = setTimeout(liveSearch, typeInterval);
-});
-</script>
-
-</div>
-
-<script src="https://cdnjs.cloudflare.com/ajax/libs/flowbite/2.3.0/flowbite.min.js"></script>
-</body>
-</html>
-`
-
-type GalleryModel struct {
-	Name        string   `json:"name" yaml:"name"`
-	URLs        []string `json:"urls" yaml:"urls"`
-	Icon        string   `json:"icon" yaml:"icon"`
-	Description string   `json:"description" yaml:"description"`
-}
-
-func main() {
-	// read the YAML file which contains the models
-
-	f, err := ioutil.ReadFile(os.Args[1])
-	if err != nil {
-		fmt.Println("Error reading file:", err)
-		return
-	}
-
-	models := []*GalleryModel{}
-	err = yaml.Unmarshal(f, &models)
-	if err != nil {
-		// write to stderr
-		os.Stderr.WriteString("Error unmarshaling YAML: " + err.Error() + "\n")
-		return
-	}
-
-	// Ensure that all arbitrary text content is sanitized before display
-	for i, m := range models {
-		models[i].Name = bluemonday.StrictPolicy().Sanitize(m.Name)
-		models[i].Description = bluemonday.StrictPolicy().Sanitize(m.Description)
-	}
-
-	// render the template
-	data := struct {
-		Models          []*GalleryModel
-		AvailableModels int
-	}{
-		Models:          models,
-		AvailableModels: len(models),
-	}
-	tmpl := template.Must(template.New("modelPage").Parse(modelPageTemplate))
-
-	err = tmpl.Execute(os.Stdout, data)
-	if err != nil {
-		fmt.Println("Error executing template:", err)
-		return
-	}
-}
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -1,119 +0,0 @@
-# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
-version: 2
-updates:
-  - package-ecosystem: "gitsubmodule"
-    directory: "/"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "gomod"
-    directory: "/"
-    schedule:
-      interval: "weekly"
-    ignore:
-    - dependency-name: "github.com/mudler/LocalAI/pkg/grpc/proto"
-  - package-ecosystem: "github-actions"
-    # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
-    directory: "/"
-    schedule:
-      # Check for updates to GitHub Actions every weekday
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
-    directory: "/"
-    schedule:
-      # Check for updates to GitHub Actions every weekday
-      interval: "weekly"
-  - package-ecosystem: "docker"
-    # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
-    directory: "/"
-    schedule:
-      # Check for updates to GitHub Actions every weekday
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/bark"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/common/template"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/coqui"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/diffusers"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/exllama"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/exllama2"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/mamba"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/openvoice"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/rerankers"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/sentencetransformers"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/transformers"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/vllm"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/examples/chainlit"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/examples/functions"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/examples/langchain/langchainpy-localai-example"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/examples/langchain-chroma"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/examples/streamlit-bot"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "docker"
-    directory: "/examples/k8sgpt"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "docker"
-    directory: "/examples/kubernetes"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "docker"
-    directory: "/examples/langchain"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "gomod"
-    directory: "/examples/semantic-todo"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "docker"
-    directory: "/examples/telegram-bot"
-    schedule:
-      interval: "weekly"
--- a/.github/gallery-agent/agent.go
+++ b/.github/gallery-agent/agent.go
@@ -1,445 +0,0 @@
-package main
-
-import (
-	"context"
-	"encoding/json"
-	"fmt"
-	"io"
-	"net/http"
-	"os"
-	"regexp"
-	"slices"
-	"strings"
-
-	"github.com/ghodss/yaml"
-	hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
-	cogito "github.com/mudler/cogito"
-
-	"github.com/mudler/cogito/structures"
-	"github.com/sashabaranov/go-openai/jsonschema"
-)
-
-var (
-	openAIModel      = os.Getenv("OPENAI_MODEL")
-	openAIKey        = os.Getenv("OPENAI_KEY")
-	openAIBaseURL    = os.Getenv("OPENAI_BASE_URL")
-	galleryIndexPath = os.Getenv("GALLERY_INDEX_PATH")
-	//defaultclient
-	llm = cogito.NewOpenAILLM(openAIModel, openAIKey, openAIBaseURL)
-)
-
-// cleanTextContent removes trailing spaces, tabs, and normalizes line endings
-// to prevent YAML linting issues like trailing spaces and multiple empty lines
-func cleanTextContent(text string) string {
-	lines := strings.Split(text, "\n")
-	var cleanedLines []string
-	var prevEmpty bool
-	for _, line := range lines {
-		// Remove all trailing whitespace (spaces, tabs, etc.)
-		trimmed := strings.TrimRight(line, " \t\r")
-		// Avoid multiple consecutive empty lines
-		if trimmed == "" {
-			if !prevEmpty {
-				cleanedLines = append(cleanedLines, "")
-			}
-			prevEmpty = true
-		} else {
-			cleanedLines = append(cleanedLines, trimmed)
-			prevEmpty = false
-		}
-	}
-	// Remove trailing empty lines from the result
-	result := strings.Join(cleanedLines, "\n")
-	return stripThinkingTags(strings.TrimRight(result, "\n"))
-}
-
-type galleryModel struct {
-	Name string   `yaml:"name"`
-	Urls []string `yaml:"urls"`
-}
-
-// isModelExisting checks if a specific model ID exists in the gallery using text search
-func isModelExisting(modelID string) (bool, error) {
-	indexPath := getGalleryIndexPath()
-	content, err := os.ReadFile(indexPath)
-	if err != nil {
-		return false, fmt.Errorf("failed to read %s: %w", indexPath, err)
-	}
-
-	var galleryModels []galleryModel
-
-	err = yaml.Unmarshal(content, &galleryModels)
-	if err != nil {
-		return false, fmt.Errorf("failed to unmarshal %s: %w", indexPath, err)
-	}
-
-	for _, galleryModel := range galleryModels {
-		if slices.Contains(galleryModel.Urls, modelID) {
-			return true, nil
-		}
-	}
-
-	return false, nil
-}
-
-// filterExistingModels removes models that already exist in the gallery
-func filterExistingModels(models []ProcessedModel) ([]ProcessedModel, error) {
-	var filteredModels []ProcessedModel
-	for _, model := range models {
-		exists, err := isModelExisting(model.ModelID)
-		if err != nil {
-			fmt.Printf("Error checking if model %s exists: %v, skipping\n", model.ModelID, err)
-			continue
-		}
-
-		if !exists {
-			filteredModels = append(filteredModels, model)
-		} else {
-			fmt.Printf("Skipping existing model: %s\n", model.ModelID)
-		}
-	}
-
-	fmt.Printf("Filtered out %d existing models, %d new models remaining\n",
-		len(models)-len(filteredModels), len(filteredModels))
-
-	return filteredModels, nil
-}
-
-// getGalleryIndexPath returns the gallery index file path, with a default fallback
-func getGalleryIndexPath() string {
-	if galleryIndexPath != "" {
-		return galleryIndexPath
-	}
-	return "gallery/index.yaml"
-}
-
-func stripThinkingTags(content string) string {
-	// Remove content between <thinking> and </thinking> (including multi-line)
-	content = regexp.MustCompile(`(?s)<thinking>.*?</thinking>`).ReplaceAllString(content, "")
-	// Remove content between <think> and </think> (including multi-line)
-	content = regexp.MustCompile(`(?s)<think>.*?</think>`).ReplaceAllString(content, "")
-	// Clean up any extra whitespace
-	content = strings.TrimSpace(content)
-	return content
-}
-
-func getRealReadme(ctx context.Context, repository string) (string, error) {
-	// Create a conversation fragment
-	fragment := cogito.NewEmptyFragment().
-		AddMessage("user",
-			`Your task is to get a clear description of a large language model from huggingface by using the provided tool. I will share with you a repository that might be quantized, and as such probably not by the original model author. We need to get the real  description of the model, and not the one that might be quantized. You will have to call the tool to get the readme more than once by figuring out from the quantized readme which is the base model readme. This is the repository: `+repository)
-
-	// Execute with tools
-	result, err := cogito.ExecuteTools(llm, fragment,
-		cogito.WithIterations(3),
-		cogito.WithMaxAttempts(3),
-		cogito.WithTools(&HFReadmeTool{client: hfapi.NewClient()}))
-	if err != nil {
-		return "", err
-	}
-
-	result = result.AddMessage("user", "Describe the model in a clear and concise way that can be shared in a model gallery.")
-
-	// Get a response
-	newFragment, err := llm.Ask(ctx, result)
-	if err != nil {
-		return "", err
-	}
-
-	content := newFragment.LastMessage().Content
-	return cleanTextContent(content), nil
-}
-
-func selectMostInterestingModels(ctx context.Context, searchResult *SearchResult) ([]ProcessedModel, error) {
-
-	if len(searchResult.Models) == 1 {
-		return searchResult.Models, nil
-	}
-
-	// Create a conversation fragment
-	fragment := cogito.NewEmptyFragment().
-		AddMessage("user",
-			`Your task is to analyze a list of AI models and select the most interesting ones for a model gallery. You will be given detailed information about multiple models including their metadata, file information, and README content.
-
-Consider the following criteria when selecting models:
-1. Model popularity (download count)
-2. Model recency (last modified date)
-3. Model completeness (has preferred model file, README, etc.)
-4. Model uniqueness (not duplicates or very similar models)
-5. Model quality (based on README content and description)
-6. Model utility (practical applications)
-
-You should select models that would be most valuable for users browsing a model gallery. Prioritize models that are:
- Well-documented with clear READMEs
- Recently updated
- Popular (high download count)
- Have the preferred quantization format available
- Offer unique capabilities or are from reputable authors
-
-Return your analysis and selection reasoning.`)
-
-	// Add the search results as context
-	modelsInfo := fmt.Sprintf("Found %d models matching '%s' with quantization preference '%s':\n\n",
-		searchResult.TotalModelsFound, searchResult.SearchTerm, searchResult.Quantization)
-
-	for i, model := range searchResult.Models {
-		modelsInfo += fmt.Sprintf("Model %d:\n", i+1)
-		modelsInfo += fmt.Sprintf("  ID: %s\n", model.ModelID)
-		modelsInfo += fmt.Sprintf("  Author: %s\n", model.Author)
-		modelsInfo += fmt.Sprintf("  Downloads: %d\n", model.Downloads)
-		modelsInfo += fmt.Sprintf("  Last Modified: %s\n", model.LastModified)
-		modelsInfo += fmt.Sprintf("  Files: %d files\n", len(model.Files))
-
-		if model.PreferredModelFile != nil {
-			modelsInfo += fmt.Sprintf("  Preferred Model File: %s (%d bytes)\n",
-				model.PreferredModelFile.Path, model.PreferredModelFile.Size)
-		} else {
-			modelsInfo += "  No preferred model file found\n"
-		}
-
-		if model.ReadmeContent != "" {
-			modelsInfo += fmt.Sprintf("  README: %s\n", model.ReadmeContent)
-		}
-
-		if model.ProcessingError != "" {
-			modelsInfo += fmt.Sprintf("  Processing Error: %s\n", model.ProcessingError)
-		}
-
-		modelsInfo += "\n"
-	}
-
-	fragment = fragment.AddMessage("user", modelsInfo)
-
-	fragment = fragment.AddMessage("user", "Based on your analysis, select the top 5 most interesting models and provide a brief explanation for each selection. Also, create a filtered SearchResult with only the selected models. Return just a list of repositories IDs, you will later be asked to output it as a JSON array with the json tool.")
-
-	// Get a response
-	newFragment, err := llm.Ask(ctx, fragment)
-	if err != nil {
-		return nil, err
-	}
-
-	fmt.Println(newFragment.LastMessage().Content)
-	repositories := struct {
-		Repositories []string `json:"repositories"`
-	}{}
-
-	s := structures.Structure{
-		Schema: jsonschema.Definition{
-			Type:                 jsonschema.Object,
-			AdditionalProperties: false,
-			Properties: map[string]jsonschema.Definition{
-				"repositories": {
-					Type:        jsonschema.Array,
-					Items:       &jsonschema.Definition{Type: jsonschema.String},
-					Description: "The trending repositories IDs",
-				},
-			},
-			Required: []string{"repositories"},
-		},
-		Object: &repositories,
-	}
-
-	err = newFragment.ExtractStructure(ctx, llm, s)
-	if err != nil {
-		return nil, err
-	}
-
-	filteredModels := []ProcessedModel{}
-	for _, m := range searchResult.Models {
-		if slices.Contains(repositories.Repositories, m.ModelID) {
-			filteredModels = append(filteredModels, m)
-		}
-	}
-
-	return filteredModels, nil
-}
-
-// ModelMetadata represents extracted metadata from a model
-type ModelMetadata struct {
-	Tags    []string `json:"tags"`
-	License string   `json:"license"`
-}
-
-// extractModelMetadata extracts tags and license from model README and documentation
-func extractModelMetadata(ctx context.Context, model ProcessedModel) ([]string, string, error) {
-	// Create a conversation fragment
-	fragment := cogito.NewEmptyFragment().
-		AddMessage("user",
-			`Your task is to extract metadata from an AI model's README and documentation. You will be provided with:
-1. Model information (ID, author, description)
-2. README content
-
-You need to extract:
-1. **Tags**: An array of relevant tags that describe the model. Use common tags from the gallery such as:
-   - llm, gguf, gpu, cpu, multimodal, image-to-text, text-to-text, text-to-speech, tts
-   - thinking, reasoning, chat, instruction-tuned, code, vision
-   - Model family names (e.g., llama, qwen, mistral, gemma) if applicable
-   - Any other relevant descriptive tags
-   Select 3-8 most relevant tags.
-
-2. **License**: The license identifier (e.g., "apache-2.0", "mit", "llama2", "gpl-3.0", "bsd", "cc-by-4.0").
-   If no license is found, return an empty string.
-
-Return the extracted metadata in a structured format.`)
-
-	// Add model information
-	modelInfo := "Model Information:\n"
-	modelInfo += fmt.Sprintf("  ID: %s\n", model.ModelID)
-	modelInfo += fmt.Sprintf("  Author: %s\n", model.Author)
-	modelInfo += fmt.Sprintf("  Downloads: %d\n", model.Downloads)
-	if model.ReadmeContent != "" {
-		modelInfo += fmt.Sprintf("  README Content:\n%s\n", model.ReadmeContent)
-	} else if model.ReadmeContentPreview != "" {
-		modelInfo += fmt.Sprintf("  README Preview: %s\n", model.ReadmeContentPreview)
-	}
-
-	fragment = fragment.AddMessage("user", modelInfo)
-	fragment = fragment.AddMessage("user", "Extract the tags and license from the model information. Return the metadata as a JSON object with 'tags' (array of strings) and 'license' (string).")
-
-	// Get a response
-	newFragment, err := llm.Ask(ctx, fragment)
-	if err != nil {
-		return nil, "", err
-	}
-
-	// Extract structured metadata
-	metadata := ModelMetadata{}
-
-	s := structures.Structure{
-		Schema: jsonschema.Definition{
-			Type:                 jsonschema.Object,
-			AdditionalProperties: false,
-			Properties: map[string]jsonschema.Definition{
-				"tags": {
-					Type:        jsonschema.Array,
-					Items:       &jsonschema.Definition{Type: jsonschema.String},
-					Description: "Array of relevant tags describing the model",
-				},
-				"license": {
-					Type:        jsonschema.String,
-					Description: "License identifier (e.g., apache-2.0, mit, llama2). Empty string if not found.",
-				},
-			},
-			Required: []string{"tags", "license"},
-		},
-		Object: &metadata,
-	}
-
-	err = newFragment.ExtractStructure(ctx, llm, s)
-	if err != nil {
-		return nil, "", err
-	}
-
-	return metadata.Tags, metadata.License, nil
-}
-
-// extractIconFromReadme scans the README content for image URLs and returns the first suitable icon URL found
-func extractIconFromReadme(readmeContent string) string {
-	if readmeContent == "" {
-		return ""
-	}
-
-	// Regular expressions to match image URLs in various formats (case-insensitive)
-	// Match markdown image syntax: ![alt](url) - case insensitive extensions
-	markdownImageRegex := regexp.MustCompile(`(?i)!\[[^\]]*\]\(([^)]+\.(png|jpg|jpeg|svg|webp|gif))\)`)
-	// Match HTML img tags: <img src="url">
-	htmlImageRegex := regexp.MustCompile(`(?i)<img[^>]+src=["']([^"']+\.(png|jpg|jpeg|svg|webp|gif))["']`)
-	// Match plain URLs ending with image extensions
-	plainImageRegex := regexp.MustCompile(`(?i)https?://[^\s<>"']+\.(png|jpg|jpeg|svg|webp|gif)`)
-
-	// Try markdown format first
-	matches := markdownImageRegex.FindStringSubmatch(readmeContent)
-	if len(matches) > 1 && matches[1] != "" {
-		url := strings.TrimSpace(matches[1])
-		// Prefer HuggingFace CDN URLs or absolute URLs
-		if strings.HasPrefix(strings.ToLower(url), "http") {
-			return url
-		}
-	}
-
-	// Try HTML img tags
-	matches = htmlImageRegex.FindStringSubmatch(readmeContent)
-	if len(matches) > 1 && matches[1] != "" {
-		url := strings.TrimSpace(matches[1])
-		if strings.HasPrefix(strings.ToLower(url), "http") {
-			return url
-		}
-	}
-
-	// Try plain URLs
-	matches = plainImageRegex.FindStringSubmatch(readmeContent)
-	if len(matches) > 0 {
-		url := strings.TrimSpace(matches[0])
-		if strings.HasPrefix(strings.ToLower(url), "http") {
-			return url
-		}
-	}
-
-	return ""
-}
-
-// getHuggingFaceAvatarURL attempts to get the HuggingFace avatar URL for a user
-func getHuggingFaceAvatarURL(author string) string {
-	if author == "" {
-		return ""
-	}
-
-	// Try to fetch user info from HuggingFace API
-	// HuggingFace API endpoint: https://huggingface.co/api/users/{username}
-	baseURL := "https://huggingface.co"
-	userURL := fmt.Sprintf("%s/api/users/%s", baseURL, author)
-
-	req, err := http.NewRequest("GET", userURL, nil)
-	if err != nil {
-		return ""
-	}
-
-	client := &http.Client{}
-	resp, err := client.Do(req)
-	if err != nil {
-		return ""
-	}
-	defer resp.Body.Close()
-
-	if resp.StatusCode != http.StatusOK {
-		return ""
-	}
-
-	// Parse the response to get avatar URL
-	var userInfo map[string]interface{}
-	body, err := io.ReadAll(resp.Body)
-	if err != nil {
-		return ""
-	}
-
-	if err := json.Unmarshal(body, &userInfo); err != nil {
-		return ""
-	}
-
-	// Try to extract avatar URL from response
-	if avatar, ok := userInfo["avatarUrl"].(string); ok && avatar != "" {
-		return avatar
-	}
-	if avatar, ok := userInfo["avatar"].(string); ok && avatar != "" {
-		return avatar
-	}
-
-	return ""
-}
-
-// extractModelIcon extracts icon URL from README or falls back to HuggingFace avatar
-func extractModelIcon(model ProcessedModel) string {
-	// First, try to extract icon from README
-	if icon := extractIconFromReadme(model.ReadmeContent); icon != "" {
-		return icon
-	}
-
-	// Fallback: Try to get HuggingFace user avatar
-	if model.Author != "" {
-		if avatar := getHuggingFaceAvatarURL(model.Author); avatar != "" {
-			return avatar
-		}
-	}
-
-	return ""
-}
--- a/.github/gallery-agent/gallery.go
+++ b/.github/gallery-agent/gallery.go
@@ -1,200 +0,0 @@
-package main
-
-import (
-	"context"
-	"encoding/json"
-	"fmt"
-	"os"
-	"strings"
-
-	"github.com/ghodss/yaml"
-	"github.com/mudler/LocalAI/core/gallery/importers"
-)
-
-func formatTextContent(text string) string {
-	return formatTextContentWithIndent(text, 4, 6)
-}
-
-// formatTextContentWithIndent formats text content with specified base and list item indentation
-func formatTextContentWithIndent(text string, baseIndent int, listItemIndent int) string {
-	var formattedLines []string
-	lines := strings.Split(text, "\n")
-	for _, line := range lines {
-		trimmed := strings.TrimRight(line, " \t\r")
-		if trimmed == "" {
-			// Keep empty lines as empty (no indentation)
-			formattedLines = append(formattedLines, "")
-		} else {
-			// Preserve relative indentation from yaml.Marshal output
-			// Count existing leading spaces to preserve relative structure
-			leadingSpaces := len(trimmed) - len(strings.TrimLeft(trimmed, " \t"))
-			trimmedStripped := strings.TrimLeft(trimmed, " \t")
-
-			var totalIndent int
-			if strings.HasPrefix(trimmedStripped, "-") {
-				// List items: use listItemIndent (ignore existing leading spaces)
-				totalIndent = listItemIndent
-			} else {
-				// Regular lines: use baseIndent + preserve relative indentation
-				// This handles both top-level keys (leadingSpaces=0) and nested properties (leadingSpaces>0)
-				totalIndent = baseIndent + leadingSpaces
-			}
-
-			indentStr := strings.Repeat(" ", totalIndent)
-			formattedLines = append(formattedLines, indentStr+trimmedStripped)
-		}
-	}
-	formattedText := strings.Join(formattedLines, "\n")
-	// Remove any trailing spaces from the formatted description
-	formattedText = strings.TrimRight(formattedText, " \t")
-	return formattedText
-}
-
-// generateYAMLEntry generates a YAML entry for a model using the specified anchor
-func generateYAMLEntry(model ProcessedModel, quantization string) string {
-	modelConfig, err := importers.DiscoverModelConfig("https://huggingface.co/"+model.ModelID, json.RawMessage(`{ "quantization": "`+quantization+`"}`))
-	if err != nil {
-		panic(err)
-	}
-
-	// Extract model name from ModelID
-	parts := strings.Split(model.ModelID, "/")
-	modelName := model.ModelID
-	if len(parts) > 0 {
-		modelName = strings.ToLower(parts[len(parts)-1])
-	}
-	// Remove common suffixes
-	modelName = strings.ReplaceAll(modelName, "-gguf", "")
-	modelName = strings.ReplaceAll(modelName, "-q4_k_m", "")
-	modelName = strings.ReplaceAll(modelName, "-q4_k_s", "")
-	modelName = strings.ReplaceAll(modelName, "-q3_k_m", "")
-	modelName = strings.ReplaceAll(modelName, "-q2_k", "")
-
-	description := model.ReadmeContent
-	if description == "" {
-		description = fmt.Sprintf("AI model: %s", modelName)
-	}
-
-	// Clean up description to prevent YAML linting issues
-	description = cleanTextContent(description)
-	formattedDescription := formatTextContent(description)
-
-	configFile := formatTextContent(modelConfig.ConfigFile)
-
-	filesYAML, _ := yaml.Marshal(modelConfig.Files)
-
-	// Files section: list items need 4 spaces (not 6), since files: is at 2 spaces
-	files := formatTextContentWithIndent(string(filesYAML), 4, 4)
-
-	// Build metadata sections
-	var metadataSections []string
-
-	// Add license if present
-	if model.License != "" {
-		metadataSections = append(metadataSections, fmt.Sprintf(`  license: "%s"`, model.License))
-	}
-
-	// Add tags if present
-	if len(model.Tags) > 0 {
-		tagsYAML, _ := yaml.Marshal(model.Tags)
-		tagsFormatted := formatTextContentWithIndent(string(tagsYAML), 4, 4)
-		tagsFormatted = strings.TrimRight(tagsFormatted, "\n")
-		metadataSections = append(metadataSections, fmt.Sprintf("  tags:\n%s", tagsFormatted))
-	}
-
-	// Add icon if present
-	if model.Icon != "" {
-		metadataSections = append(metadataSections, fmt.Sprintf(`  icon: %s`, model.Icon))
-	}
-
-	// Build the metadata block
-	metadataBlock := ""
-	if len(metadataSections) > 0 {
-		metadataBlock = strings.Join(metadataSections, "\n") + "\n"
-	}
-
-	yamlTemplate := ""
-	yamlTemplate = `- name: "%s"
-  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
-  urls:
-    - https://huggingface.co/%s
-  description: |
-%s%s
-  overrides:
-%s
-  files:
-%s`
-	// Trim trailing newlines from formatted sections to prevent extra blank lines
-	formattedDescription = strings.TrimRight(formattedDescription, "\n")
-	configFile = strings.TrimRight(configFile, "\n")
-	files = strings.TrimRight(files, "\n")
-	// Add newline before metadata block if present
-	if metadataBlock != "" {
-		metadataBlock = "\n" + strings.TrimRight(metadataBlock, "\n")
-	}
-	return fmt.Sprintf(yamlTemplate,
-		modelName,
-		model.ModelID,
-		formattedDescription,
-		metadataBlock,
-		configFile,
-		files,
-	)
-}
-
-// generateYAMLForModels generates YAML entries for selected models and appends to index.yaml
-func generateYAMLForModels(ctx context.Context, models []ProcessedModel, quantization string) error {
-
-	// Generate YAML entries for each model
-	var yamlEntries []string
-	for _, model := range models {
-		fmt.Printf("Generating YAML entry for model: %s\n", model.ModelID)
-
-		// Generate YAML entry
-		yamlEntry := generateYAMLEntry(model, quantization)
-		yamlEntries = append(yamlEntries, yamlEntry)
-	}
-
-	// Prepend to index.yaml (write at the top)
-	if len(yamlEntries) > 0 {
-		indexPath := getGalleryIndexPath()
-		fmt.Printf("Prepending YAML entries to %s...\n", indexPath)
-
-		// Read current content
-		content, err := os.ReadFile(indexPath)
-		if err != nil {
-			return fmt.Errorf("failed to read %s: %w", indexPath, err)
-		}
-
-		existingContent := string(content)
-		yamlBlock := strings.Join(yamlEntries, "\n")
-
-		// Check if file starts with "---"
-		var newContent string
-		if strings.HasPrefix(existingContent, "---\n") {
-			// File starts with "---", prepend new entries after it
-			restOfContent := strings.TrimPrefix(existingContent, "---\n")
-			// Ensure proper spacing: "---\n" + new entries + "\n" + rest of content
-			newContent = "---\n" + yamlBlock + "\n" + restOfContent
-		} else if strings.HasPrefix(existingContent, "---") {
-			// File starts with "---" but no newline after
-			restOfContent := strings.TrimPrefix(existingContent, "---")
-			newContent = "---\n" + yamlBlock + "\n" + strings.TrimPrefix(restOfContent, "\n")
-		} else {
-			// No "---" at start, prepend new entries at the very beginning
-			// Trim leading whitespace from existing content
-			existingContent = strings.TrimLeft(existingContent, " \t\n\r")
-			newContent = yamlBlock + "\n" + existingContent
-		}
-
-		// Write back to file
-		err = os.WriteFile(indexPath, []byte(newContent), 0644)
-		if err != nil {
-			return fmt.Errorf("failed to write %s: %w", indexPath, err)
-		}
-
-		fmt.Printf("Successfully prepended %d models to %s\n", len(yamlEntries), indexPath)
-	}
-
-	return nil
-}
--- a/.github/gallery-agent/main.go
+++ b/.github/gallery-agent/main.go
@@ -1,383 +0,0 @@
-package main
-
-import (
-	"context"
-	"encoding/json"
-	"fmt"
-	"os"
-	"strconv"
-	"strings"
-	"time"
-
-	hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
-)
-
-// ProcessedModelFile represents a processed model file with additional metadata
-type ProcessedModelFile struct {
-	Path     string `json:"path"`
-	Size     int64  `json:"size"`
-	SHA256   string `json:"sha256"`
-	IsReadme bool   `json:"is_readme"`
-	FileType string `json:"file_type"` // "model", "readme", "other"
-}
-
-// ProcessedModel represents a processed model with all gathered metadata
-type ProcessedModel struct {
-	ModelID                 string               `json:"model_id"`
-	Author                  string               `json:"author"`
-	Downloads               int                  `json:"downloads"`
-	LastModified            string               `json:"last_modified"`
-	Files                   []ProcessedModelFile `json:"files"`
-	PreferredModelFile      *ProcessedModelFile  `json:"preferred_model_file,omitempty"`
-	ReadmeFile              *ProcessedModelFile  `json:"readme_file,omitempty"`
-	ReadmeContent           string               `json:"readme_content,omitempty"`
-	ReadmeContentPreview    string               `json:"readme_content_preview,omitempty"`
-	QuantizationPreferences []string             `json:"quantization_preferences"`
-	ProcessingError         string               `json:"processing_error,omitempty"`
-	Tags                    []string             `json:"tags,omitempty"`
-	License                 string               `json:"license,omitempty"`
-	Icon                    string               `json:"icon,omitempty"`
-}
-
-// SearchResult represents the complete result of searching and processing models
-type SearchResult struct {
-	SearchTerm       string           `json:"search_term"`
-	Limit            int              `json:"limit"`
-	Quantization     string           `json:"quantization"`
-	TotalModelsFound int              `json:"total_models_found"`
-	Models           []ProcessedModel `json:"models"`
-	FormattedOutput  string           `json:"formatted_output"`
-}
-
-// AddedModelSummary represents a summary of models added to the gallery
-type AddedModelSummary struct {
-	SearchTerm     string   `json:"search_term"`
-	TotalFound     int      `json:"total_found"`
-	ModelsAdded    int      `json:"models_added"`
-	AddedModelIDs  []string `json:"added_model_ids"`
-	AddedModelURLs []string `json:"added_model_urls"`
-	Quantization   string   `json:"quantization"`
-	ProcessingTime string   `json:"processing_time"`
-}
-
-func main() {
-	startTime := time.Now()
-
-	// Check for synthetic mode
-	syntheticMode := os.Getenv("SYNTHETIC_MODE")
-	if syntheticMode == "true" || syntheticMode == "1" {
-		fmt.Println("Running in SYNTHETIC MODE - generating random test data")
-		err := runSyntheticMode()
-		if err != nil {
-			fmt.Fprintf(os.Stderr, "Error in synthetic mode: %v\n", err)
-			os.Exit(1)
-		}
-		return
-	}
-
-	// Get configuration from environment variables
-	searchTerm := os.Getenv("SEARCH_TERM")
-	if searchTerm == "" {
-		searchTerm = "GGUF"
-	}
-
-	limitStr := os.Getenv("LIMIT")
-	if limitStr == "" {
-		limitStr = "5"
-	}
-	limit, err := strconv.Atoi(limitStr)
-	if err != nil {
-		fmt.Fprintf(os.Stderr, "Error parsing LIMIT: %v\n", err)
-		os.Exit(1)
-	}
-
-	quantization := os.Getenv("QUANTIZATION")
-
-	maxModels := os.Getenv("MAX_MODELS")
-	if maxModels == "" {
-		maxModels = "1"
-	}
-	maxModelsInt, err := strconv.Atoi(maxModels)
-	if err != nil {
-		fmt.Fprintf(os.Stderr, "Error parsing MAX_MODELS: %v\n", err)
-		os.Exit(1)
-	}
-
-	// Print configuration
-	fmt.Printf("Gallery Agent Configuration:\n")
-	fmt.Printf("  Search Term: %s\n", searchTerm)
-	fmt.Printf("  Limit: %d\n", limit)
-	fmt.Printf("  Quantization: %s\n", quantization)
-	fmt.Printf("  Max Models to Add: %d\n", maxModelsInt)
-	fmt.Printf("  Gallery Index Path: %s\n", os.Getenv("GALLERY_INDEX_PATH"))
-	fmt.Println()
-
-	result, err := searchAndProcessModels(searchTerm, limit, quantization)
-	if err != nil {
-		fmt.Fprintf(os.Stderr, "Error: %v\n", err)
-		os.Exit(1)
-	}
-
-	fmt.Println(result.FormattedOutput)
-	var models []ProcessedModel
-
-	if len(result.Models) > 1 {
-		fmt.Println("More than one model found (", len(result.Models), "), using AI agent to select the most interesting models")
-		for _, model := range result.Models {
-			fmt.Println("Model: ", model.ModelID)
-		}
-		// Use AI agent to select the most interesting models
-		fmt.Println("Using AI agent to select the most interesting models...")
-		models, err = selectMostInterestingModels(context.Background(), result)
-		if err != nil {
-			fmt.Fprintf(os.Stderr, "Error in model selection: %v\n", err)
-			// Continue with original result if selection fails
-			models = result.Models
-		}
-	} else if len(result.Models) == 1 {
-		models = result.Models
-		fmt.Println("Only one model found, using it directly")
-	}
-
-	fmt.Print(models)
-
-	// Filter out models that already exist in the gallery
-	fmt.Println("Filtering out existing models...")
-	models, err = filterExistingModels(models)
-	if err != nil {
-		fmt.Fprintf(os.Stderr, "Error filtering existing models: %v\n", err)
-		os.Exit(1)
-	}
-
-	// Limit to maxModelsInt after filtering
-	if len(models) > maxModelsInt {
-		models = models[:maxModelsInt]
-	}
-
-	// Track added models for summary
-	var addedModelIDs []string
-	var addedModelURLs []string
-
-	// Generate YAML entries and append to gallery/index.yaml
-	if len(models) > 0 {
-		for _, model := range models {
-			addedModelIDs = append(addedModelIDs, model.ModelID)
-			// Generate Hugging Face URL for the model
-			modelURL := fmt.Sprintf("https://huggingface.co/%s", model.ModelID)
-			addedModelURLs = append(addedModelURLs, modelURL)
-		}
-		fmt.Println("Generating YAML entries for selected models...")
-		err = generateYAMLForModels(context.Background(), models, quantization)
-		if err != nil {
-			fmt.Fprintf(os.Stderr, "Error generating YAML entries: %v\n", err)
-			os.Exit(1)
-		}
-	} else {
-		fmt.Println("No new models to add to the gallery.")
-	}
-
-	// Create and write summary
-	processingTime := time.Since(startTime).String()
-	summary := AddedModelSummary{
-		SearchTerm:     searchTerm,
-		TotalFound:     result.TotalModelsFound,
-		ModelsAdded:    len(addedModelIDs),
-		AddedModelIDs:  addedModelIDs,
-		AddedModelURLs: addedModelURLs,
-		Quantization:   quantization,
-		ProcessingTime: processingTime,
-	}
-
-	// Write summary to file
-	summaryData, err := json.MarshalIndent(summary, "", "  ")
-	if err != nil {
-		fmt.Fprintf(os.Stderr, "Error marshaling summary: %v\n", err)
-	} else {
-		err = os.WriteFile("gallery-agent-summary.json", summaryData, 0644)
-		if err != nil {
-			fmt.Fprintf(os.Stderr, "Error writing summary file: %v\n", err)
-		} else {
-			fmt.Printf("Summary written to gallery-agent-summary.json\n")
-		}
-	}
-}
-
-func searchAndProcessModels(searchTerm string, limit int, quantization string) (*SearchResult, error) {
-	client := hfapi.NewClient()
-	var outputBuilder strings.Builder
-
-	fmt.Println("Searching for models...")
-	// Initialize the result struct
-	result := &SearchResult{
-		SearchTerm:   searchTerm,
-		Limit:        limit,
-		Quantization: quantization,
-		Models:       []ProcessedModel{},
-	}
-
-	models, err := client.GetLatest(searchTerm, limit)
-	if err != nil {
-		return nil, fmt.Errorf("failed to fetch models: %w", err)
-	}
-
-	fmt.Println("Models found:", len(models))
-	result.TotalModelsFound = len(models)
-
-	if len(models) == 0 {
-		outputBuilder.WriteString("No models found.\n")
-		result.FormattedOutput = outputBuilder.String()
-		return result, nil
-	}
-
-	outputBuilder.WriteString(fmt.Sprintf("Found %d models matching '%s':\n\n", len(models), searchTerm))
-
-	// Process each model
-	for i, model := range models {
-		outputBuilder.WriteString(fmt.Sprintf("%d. Processing Model: %s\n", i+1, model.ModelID))
-		outputBuilder.WriteString(fmt.Sprintf("   Author: %s\n", model.Author))
-		outputBuilder.WriteString(fmt.Sprintf("   Downloads: %d\n", model.Downloads))
-		outputBuilder.WriteString(fmt.Sprintf("   Last Modified: %s\n", model.LastModified))
-
-		// Initialize processed model struct
-		processedModel := ProcessedModel{
-			ModelID:                 model.ModelID,
-			Author:                  model.Author,
-			Downloads:               model.Downloads,
-			LastModified:            model.LastModified,
-			QuantizationPreferences: []string{quantization, "Q4_K_M", "Q4_K_S", "Q3_K_M", "Q2_K"},
-		}
-
-		// Get detailed model information
-		details, err := client.GetModelDetails(model.ModelID)
-		if err != nil {
-			errorMsg := fmt.Sprintf("   Error getting model details: %v\n", err)
-			outputBuilder.WriteString(errorMsg)
-			processedModel.ProcessingError = err.Error()
-			result.Models = append(result.Models, processedModel)
-			continue
-		}
-
-		// Define quantization preferences (in order of preference)
-		quantizationPreferences := []string{quantization, "Q4_K_M", "Q4_K_S", "Q3_K_M", "Q2_K"}
-
-		// Find preferred model file
-		preferredModelFile := hfapi.FindPreferredModelFile(details.Files, quantizationPreferences)
-
-		// Process files
-		processedFiles := make([]ProcessedModelFile, len(details.Files))
-		for j, file := range details.Files {
-			fileType := "other"
-			if file.IsReadme {
-				fileType = "readme"
-			} else if preferredModelFile != nil && file.Path == preferredModelFile.Path {
-				fileType = "model"
-			}
-
-			processedFiles[j] = ProcessedModelFile{
-				Path:     file.Path,
-				Size:     file.Size,
-				SHA256:   file.SHA256,
-				IsReadme: file.IsReadme,
-				FileType: fileType,
-			}
-		}
-
-		processedModel.Files = processedFiles
-
-		// Set preferred model file
-		if preferredModelFile != nil {
-			for _, file := range processedFiles {
-				if file.Path == preferredModelFile.Path {
-					processedModel.PreferredModelFile = &file
-					break
-				}
-			}
-		}
-
-		// Print file information
-		outputBuilder.WriteString(fmt.Sprintf("   Files found: %d\n", len(details.Files)))
-
-		if preferredModelFile != nil {
-			outputBuilder.WriteString(fmt.Sprintf("   Preferred Model File: %s (SHA256: %s)\n",
-				preferredModelFile.Path,
-				preferredModelFile.SHA256))
-		} else {
-			outputBuilder.WriteString(fmt.Sprintf("   No model file found with quantization preferences: %v\n", quantizationPreferences))
-		}
-
-		if details.ReadmeFile != nil {
-			outputBuilder.WriteString(fmt.Sprintf("   README File: %s\n", details.ReadmeFile.Path))
-
-			// Find and set readme file
-			for _, file := range processedFiles {
-				if file.IsReadme {
-					processedModel.ReadmeFile = &file
-					break
-				}
-			}
-
-			fmt.Println("Getting real readme for", model.ModelID, "waiting...")
-			// Use agent to get the real readme and prepare the model description
-			readmeContent, err := getRealReadme(context.Background(), model.ModelID)
-			if err == nil {
-				processedModel.ReadmeContent = readmeContent
-				processedModel.ReadmeContentPreview = truncateString(readmeContent, 200)
-				outputBuilder.WriteString(fmt.Sprintf("   README Content Preview: %s\n",
-					processedModel.ReadmeContentPreview))
-			} else {
-				fmt.Printf("   Warning: Failed to get real readme: %v\n", err)
-			}
-			fmt.Println("Real readme got", readmeContent)
-
-			// Extract metadata (tags, license) from README using LLM
-			fmt.Println("Extracting metadata for", model.ModelID, "waiting...")
-			tags, license, err := extractModelMetadata(context.Background(), processedModel)
-			if err == nil {
-				processedModel.Tags = tags
-				processedModel.License = license
-				outputBuilder.WriteString(fmt.Sprintf("   Tags: %v\n", tags))
-				outputBuilder.WriteString(fmt.Sprintf("   License: %s\n", license))
-			} else {
-				fmt.Printf("   Warning: Failed to extract metadata: %v\n", err)
-			}
-
-			// Extract icon from README or use HuggingFace avatar
-			icon := extractModelIcon(processedModel)
-			if icon != "" {
-				processedModel.Icon = icon
-				outputBuilder.WriteString(fmt.Sprintf("   Icon: %s\n", icon))
-			}
-			// Get README content
-			// readmeContent, err := client.GetReadmeContent(model.ModelID, details.ReadmeFile.Path)
-			// if err == nil {
-			// 	processedModel.ReadmeContent = readmeContent
-			// 	processedModel.ReadmeContentPreview = truncateString(readmeContent, 200)
-			// 	outputBuilder.WriteString(fmt.Sprintf("   README Content Preview: %s\n",
-			// 		processedModel.ReadmeContentPreview))
-			// }
-		}
-
-		// Print all files with their checksums
-		outputBuilder.WriteString("   All Files:\n")
-		for _, file := range processedFiles {
-			outputBuilder.WriteString(fmt.Sprintf("     - %s (%s, %d bytes", file.Path, file.FileType, file.Size))
-			if file.SHA256 != "" {
-				outputBuilder.WriteString(fmt.Sprintf(", SHA256: %s", file.SHA256))
-			}
-			outputBuilder.WriteString(")\n")
-		}
-
-		outputBuilder.WriteString("\n")
-		result.Models = append(result.Models, processedModel)
-	}
-
-	result.FormattedOutput = outputBuilder.String()
-	return result, nil
-}
-
-func truncateString(s string, maxLen int) string {
-	if len(s) <= maxLen {
-		return s
-	}
-	return s[:maxLen] + "..."
-}
--- a/.github/gallery-agent/testing.go
+++ b/.github/gallery-agent/testing.go
@@ -1,224 +0,0 @@
-package main
-
-import (
-	"context"
-	"fmt"
-	"math/rand"
-	"strings"
-	"time"
-)
-
-// runSyntheticMode generates synthetic test data and appends it to the gallery
-func runSyntheticMode() error {
-	generator := NewSyntheticDataGenerator()
-
-	// Generate a random number of synthetic models (1-3)
-	numModels := generator.rand.Intn(3) + 1
-	fmt.Printf("Generating %d synthetic models for testing...\n", numModels)
-
-	var models []ProcessedModel
-	for i := 0; i < numModels; i++ {
-		model := generator.GenerateProcessedModel()
-		models = append(models, model)
-		fmt.Printf("Generated synthetic model: %s\n", model.ModelID)
-	}
-
-	// Generate YAML entries and append to gallery/index.yaml
-	fmt.Println("Generating YAML entries for synthetic models...")
-	err := generateYAMLForModels(context.Background(), models, "Q4_K_M")
-	if err != nil {
-		return fmt.Errorf("error generating YAML entries: %w", err)
-	}
-
-	fmt.Printf("Successfully added %d synthetic models to the gallery for testing!\n", len(models))
-	return nil
-}
-
-// SyntheticDataGenerator provides methods to generate synthetic test data
-type SyntheticDataGenerator struct {
-	rand *rand.Rand
-}
-
-// NewSyntheticDataGenerator creates a new synthetic data generator
-func NewSyntheticDataGenerator() *SyntheticDataGenerator {
-	return &SyntheticDataGenerator{
-		rand: rand.New(rand.NewSource(time.Now().UnixNano())),
-	}
-}
-
-// GenerateProcessedModelFile creates a synthetic ProcessedModelFile
-func (g *SyntheticDataGenerator) GenerateProcessedModelFile() ProcessedModelFile {
-	fileTypes := []string{"model", "readme", "other"}
-	fileType := fileTypes[g.rand.Intn(len(fileTypes))]
-
-	var path string
-	var isReadme bool
-
-	switch fileType {
-	case "model":
-		path = fmt.Sprintf("model-%s.gguf", g.randomString(8))
-		isReadme = false
-	case "readme":
-		path = "README.md"
-		isReadme = true
-	default:
-		path = fmt.Sprintf("file-%s.txt", g.randomString(6))
-		isReadme = false
-	}
-
-	return ProcessedModelFile{
-		Path:     path,
-		Size:     int64(g.rand.Intn(1000000000) + 1000000), // 1MB to 1GB
-		SHA256:   g.randomSHA256(),
-		IsReadme: isReadme,
-		FileType: fileType,
-	}
-}
-
-// GenerateProcessedModel creates a synthetic ProcessedModel
-func (g *SyntheticDataGenerator) GenerateProcessedModel() ProcessedModel {
-	authors := []string{"microsoft", "meta", "google", "openai", "anthropic", "mistralai", "huggingface"}
-	modelNames := []string{"llama", "gpt", "claude", "mistral", "gemma", "phi", "qwen", "codellama"}
-
-	author := authors[g.rand.Intn(len(authors))]
-	modelName := modelNames[g.rand.Intn(len(modelNames))]
-	modelID := fmt.Sprintf("%s/%s-%s", author, modelName, g.randomString(6))
-
-	// Generate files
-	numFiles := g.rand.Intn(5) + 2 // 2-6 files
-	files := make([]ProcessedModelFile, numFiles)
-
-	// Ensure at least one model file and one readme
-	hasModelFile := false
-	hasReadme := false
-
-	for i := 0; i < numFiles; i++ {
-		files[i] = g.GenerateProcessedModelFile()
-		if files[i].FileType == "model" {
-			hasModelFile = true
-		}
-		if files[i].FileType == "readme" {
-			hasReadme = true
-		}
-	}
-
-	// Add required files if missing
-	if !hasModelFile {
-		modelFile := g.GenerateProcessedModelFile()
-		modelFile.FileType = "model"
-		modelFile.Path = fmt.Sprintf("%s-Q4_K_M.gguf", modelName)
-		files = append(files, modelFile)
-	}
-
-	if !hasReadme {
-		readmeFile := g.GenerateProcessedModelFile()
-		readmeFile.FileType = "readme"
-		readmeFile.Path = "README.md"
-		readmeFile.IsReadme = true
-		files = append(files, readmeFile)
-	}
-
-	// Find preferred model file
-	var preferredModelFile *ProcessedModelFile
-	for i := range files {
-		if files[i].FileType == "model" {
-			preferredModelFile = &files[i]
-			break
-		}
-	}
-
-	// Find readme file
-	var readmeFile *ProcessedModelFile
-	for i := range files {
-		if files[i].FileType == "readme" {
-			readmeFile = &files[i]
-			break
-		}
-	}
-
-	readmeContent := g.generateReadmeContent(modelName, author)
-
-	// Generate sample metadata
-	licenses := []string{"apache-2.0", "mit", "llama2", "gpl-3.0", "bsd", ""}
-	license := licenses[g.rand.Intn(len(licenses))]
-
-	sampleTags := []string{"llm", "gguf", "gpu", "cpu", "text-to-text", "chat", "instruction-tuned"}
-	numTags := g.rand.Intn(4) + 3 // 3-6 tags
-	tags := make([]string, numTags)
-	for i := 0; i < numTags; i++ {
-		tags[i] = sampleTags[g.rand.Intn(len(sampleTags))]
-	}
-	// Remove duplicates
-	tags = g.removeDuplicates(tags)
-
-	// Optionally include icon (50% chance)
-	icon := ""
-	if g.rand.Intn(2) == 0 {
-		icon = fmt.Sprintf("https://cdn-avatars.huggingface.co/v1/production/uploads/%s.png", g.randomString(24))
-	}
-
-	return ProcessedModel{
-		ModelID:                 modelID,
-		Author:                  author,
-		Downloads:               g.rand.Intn(1000000) + 1000,
-		LastModified:            g.randomDate(),
-		Files:                   files,
-		PreferredModelFile:      preferredModelFile,
-		ReadmeFile:              readmeFile,
-		ReadmeContent:           readmeContent,
-		ReadmeContentPreview:    truncateString(readmeContent, 200),
-		QuantizationPreferences: []string{"Q4_K_M", "Q4_K_S", "Q3_K_M", "Q2_K"},
-		ProcessingError:         "",
-		Tags:                    tags,
-		License:                 license,
-		Icon:                    icon,
-	}
-}
-
-// Helper methods for synthetic data generation
-func (g *SyntheticDataGenerator) randomString(length int) string {
-	const charset = "abcdefghijklmnopqrstuvwxyz0123456789"
-	b := make([]byte, length)
-	for i := range b {
-		b[i] = charset[g.rand.Intn(len(charset))]
-	}
-	return string(b)
-}
-
-func (g *SyntheticDataGenerator) randomSHA256() string {
-	const charset = "0123456789abcdef"
-	b := make([]byte, 64)
-	for i := range b {
-		b[i] = charset[g.rand.Intn(len(charset))]
-	}
-	return string(b)
-}
-
-func (g *SyntheticDataGenerator) randomDate() string {
-	now := time.Now()
-	daysAgo := g.rand.Intn(365) // Random date within last year
-	pastDate := now.AddDate(0, 0, -daysAgo)
-	return pastDate.Format("2006-01-02T15:04:05.000Z")
-}
-
-func (g *SyntheticDataGenerator) removeDuplicates(slice []string) []string {
-	keys := make(map[string]bool)
-	result := []string{}
-	for _, item := range slice {
-		if !keys[item] {
-			keys[item] = true
-			result = append(result, item)
-		}
-	}
-	return result
-}
-
-func (g *SyntheticDataGenerator) generateReadmeContent(modelName, author string) string {
-	templates := []string{
-		fmt.Sprintf("# %s Model\n\nThis is a %s model developed by %s. It's designed for various natural language processing tasks including text generation, question answering, and conversation.\n\n## Features\n\n- High-quality text generation\n- Efficient inference\n- Multiple quantization options\n- Easy to use with LocalAI\n\n## Usage\n\nUse this model with LocalAI for various AI tasks.", strings.Title(modelName), modelName, author),
-		fmt.Sprintf("# %s\n\nA powerful language model from %s. This model excels at understanding and generating human-like text across multiple domains.\n\n## Capabilities\n\n- Text completion\n- Code generation\n- Creative writing\n- Technical documentation\n\n## Model Details\n\n- Architecture: Transformer-based\n- Training: Large-scale supervised learning\n- Quantization: Available in multiple formats", strings.Title(modelName), author),
-		fmt.Sprintf("# %s Language Model\n\nDeveloped by %s, this model represents state-of-the-art performance in natural language understanding and generation.\n\n## Key Features\n\n- Multilingual support\n- Context-aware responses\n- Efficient memory usage\n- Fast inference speed\n\n## Applications\n\n- Chatbots and virtual assistants\n- Content generation\n- Code completion\n- Educational tools", strings.Title(modelName), author),
-	}
-
-	return templates[g.rand.Intn(len(templates))]
-}
--- a/.github/gallery-agent/tools.go
+++ b/.github/gallery-agent/tools.go
@@ -1,46 +0,0 @@
-package main
-
-import (
-	"fmt"
-
-	hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
-	openai "github.com/sashabaranov/go-openai"
-	jsonschema "github.com/sashabaranov/go-openai/jsonschema"
-)
-
-// Get repository README from HF
-type HFReadmeTool struct {
-	client *hfapi.Client
-}
-
-func (s *HFReadmeTool) Execute(args map[string]any) (string, error) {
-	q, ok := args["repository"].(string)
-	if !ok {
-		return "", fmt.Errorf("no query")
-	}
-	readme, err := s.client.GetReadmeContent(q, "README.md")
-	if err != nil {
-		return "", err
-	}
-	return readme, nil
-}
-
-func (s *HFReadmeTool) Tool() openai.Tool {
-	return openai.Tool{
-		Type: openai.ToolTypeFunction,
-		Function: &openai.FunctionDefinition{
-			Name:        "hf_readme",
-			Description: "A tool to get the README content of a huggingface repository",
-			Parameters: jsonschema.Definition{
-				Type: jsonschema.Object,
-				Properties: map[string]jsonschema.Definition{
-					"repository": {
-						Type:        jsonschema.String,
-						Description: "The huggingface repository to get the README content of",
-					},
-				},
-				Required: []string{"repository"},
-			},
-		},
-	}
-}
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -1,33 +0,0 @@
-enhancement:
- - head-branch: ['^feature', 'feature']
-
-dependencies:
- any:
-  - changed-files:
-    - any-glob-to-any-file: 'Makefile'
-  - changed-files:
-    - any-glob-to-any-file: '*.mod'
-  - changed-files:
-    - any-glob-to-any-file: '*.sum'
-
-kind/documentation:
- any:
-  - changed-files:
-    - any-glob-to-any-file: 'docs/*'
-  - changed-files:
-    - any-glob-to-any-file: '*.md'
-
-area/ai-model:
- any:
-  - changed-files:
-    - any-glob-to-any-file: 'gallery/*'
-
-examples:
- any:
-  - changed-files:
-    - any-glob-to-any-file: 'examples/*'
-
-ci:
- any:
-  - changed-files:
-    - any-glob-to-any-file: '.github/*'
--- a/.github/release.yml
+++ b/.github/release.yml
@@ -12,26 +12,13 @@ changelog:
    - title: "Bug fixes :bug:"
      labels:
        - bug
-        - regression
-    - title: "🖧 P2P area"
-      labels:
-         - area/p2p
    - title: Exciting New Features 🎉
      labels:
        - Semver-Minor
        - enhancement
-        - ux
-        - roadmap
-    - title: 🧠 Models
-      labels:
-        - area/ai-model
-    - title: 📖 Documentation and examples
-      labels:
-        - kind/documentation
-        - examples
    - title: 👒 Dependencies
      labels:
        - dependencies
    - title: Other Changes
      labels:
-        - "*"
+        - "*"
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
--- a/.github/workflows/backend_build.yml
+++ b/.github/workflows/backend_build.yml
@@ -1,250 +0,0 @@
---
-name: 'build backend container images (reusable)'
-
-on:
-  workflow_call:
-    inputs:
-      base-image:
-        description: 'Base image'
-        required: true
-        type: string
-      build-type:
-        description: 'Build type'
-        default: ''
-        type: string
-      cuda-major-version:
-        description: 'CUDA major version'
-        default: "12"
-        type: string
-      cuda-minor-version:
-        description: 'CUDA minor version'
-        default: "1"
-        type: string
-      platforms:
-        description: 'Platforms'
-        default: ''
-        type: string
-      tag-latest:
-        description: 'Tag latest'
-        default: ''
-        type: string
-      tag-suffix:
-        description: 'Tag suffix'
-        default: ''
-        type: string
-      runs-on:
-        description: 'Runs on'
-        required: true
-        default: ''
-        type: string
-      backend:
-        description: 'Backend to build'
-        required: true
-        type: string
-      context:
-        description: 'Build context'
-        required: true
-        type: string
-      dockerfile:
-        description: 'Build Dockerfile'
-        required: true
-        type: string
-      skip-drivers:
-        description: 'Skip drivers'
-        default: 'false'
-        type: string
-      ubuntu-version:
-        description: 'Ubuntu version'
-        required: false
-        default: '2204'
-        type: string
-    secrets:
-      dockerUsername:
-        required: false
-      dockerPassword:
-        required: false
-      quayUsername:
-        required: true
-      quayPassword:
-        required: true
-
-jobs:
-  backend-build:
-    runs-on: ${{ inputs.runs-on }}
-    env:
-        quay_username: ${{ secrets.quayUsername }}
-    steps:
-
-
-      - name: Free Disk Space (Ubuntu)
-        if: inputs.runs-on == 'ubuntu-latest'
-        uses: jlumbroso/free-disk-space@main
-        with:
-          # this might remove tools that are actually needed,
-          # if set to "true" but frees about 6 GB
-          tool-cache: true
-          # all of these default to true, but feel free to set to
-          # "false" if necessary for your workflow
-          android: true
-          dotnet: true
-          haskell: true
-          large-packages: true
-          docker-images: true
-          swap-storage: true
-
-      - name: Force Install GIT latest
-        run: |
-          sudo apt-get update \
-          && sudo apt-get install -y software-properties-common \
-          && sudo apt-get update \
-          && sudo add-apt-repository -y ppa:git-core/ppa \
-          && sudo apt-get update \
-          && sudo apt-get install -y git
-
-      - name: Checkout
-        uses: actions/checkout@v6
-
-      - name: Release space from worker
-        if: inputs.runs-on == 'ubuntu-latest'
-        run: |
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          df -h
-          echo
-          sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
-          sudo apt-get remove --auto-remove android-sdk-platform-tools snapd || true
-          sudo apt-get purge --auto-remove android-sdk-platform-tools snapd || true
-          sudo rm -rf /usr/local/lib/android
-          sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
-          sudo rm -rf /usr/share/dotnet
-          sudo apt-get remove -y '^mono-.*' || true
-          sudo apt-get remove -y '^ghc-.*' || true
-          sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
-          sudo apt-get remove -y 'php.*' || true
-          sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
-          sudo apt-get remove -y '^google-.*' || true
-          sudo apt-get remove -y azure-cli || true
-          sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
-          sudo apt-get remove -y '^gfortran-.*' || true
-          sudo apt-get remove -y microsoft-edge-stable || true
-          sudo apt-get remove -y firefox || true
-          sudo apt-get remove -y powershell || true
-          sudo apt-get remove -y r-base-core || true
-          sudo apt-get autoremove -y
-          sudo apt-get clean
-          echo
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          sudo rm -rfv build || true
-          sudo rm -rf /usr/share/dotnet || true
-          sudo rm -rf /opt/ghc || true
-          sudo rm -rf "/usr/local/share/boost" || true
-          sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
-          df -h
-
-      - name: Docker meta
-        id: meta
-        if: github.event_name != 'pull_request'
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            quay.io/go-skynet/local-ai-backends
-            localai/localai-backends
-          tags: |
-            type=ref,event=branch
-            type=semver,pattern={{raw}}
-            type=sha
-          flavor: |
-            latest=${{ inputs.tag-latest }}
-            suffix=${{ inputs.tag-suffix }},onlatest=true
-
-      - name: Docker meta for PR
-        id: meta_pull_request
-        if: github.event_name == 'pull_request'
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            quay.io/go-skynet/ci-tests
-          tags: |
-            type=ref,event=branch,suffix=${{ github.event.number }}-${{ inputs.backend }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
-            type=semver,pattern={{raw}},suffix=${{ github.event.number }}-${{ inputs.backend }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
-            type=sha,suffix=${{ github.event.number }}-${{ inputs.backend }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
-          flavor: |
-            latest=${{ inputs.tag-latest }}
-            suffix=${{ inputs.tag-suffix }},onlatest=true
-## End testing image
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@master
-        with:
-          platforms: all
-
-      - name: Set up Docker Buildx
-        id: buildx
-        uses: docker/setup-buildx-action@master
-
-      - name: Login to DockerHub
-        if: github.event_name != 'pull_request'
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.dockerUsername }}
-          password: ${{ secrets.dockerPassword }}
-
-      - name: Login to Quay.io
-        if: ${{ env.quay_username != '' }}
-        uses: docker/login-action@v3
-        with:
-          registry: quay.io
-          username: ${{ secrets.quayUsername }}
-          password: ${{ secrets.quayPassword }}
-
-      - name: Build and push
-        uses: docker/build-push-action@v6
-        if: github.event_name != 'pull_request'
-        with:
-          builder: ${{ steps.buildx.outputs.name }}
-          build-args: |
-            BUILD_TYPE=${{ inputs.build-type }}
-            SKIP_DRIVERS=${{ inputs.skip-drivers }}
-            CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
-            CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
-            BASE_IMAGE=${{ inputs.base-image }}
-            BACKEND=${{ inputs.backend }}
-            UBUNTU_VERSION=${{ inputs.ubuntu-version }}
-          context: ${{ inputs.context }}
-          file: ${{ inputs.dockerfile }}
-          cache-from: type=gha
-          platforms: ${{ inputs.platforms }}
-          push: ${{ github.event_name != 'pull_request' }}
-          tags: ${{ steps.meta.outputs.tags }}
-          labels: ${{ steps.meta.outputs.labels }}
-
-      - name: Build and push (PR)
-        uses: docker/build-push-action@v6
-        if: github.event_name == 'pull_request'
-        with:
-          builder: ${{ steps.buildx.outputs.name }}
-          build-args: |
-            BUILD_TYPE=${{ inputs.build-type }}
-            SKIP_DRIVERS=${{ inputs.skip-drivers }}
-            CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
-            CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
-            BASE_IMAGE=${{ inputs.base-image }}
-            BACKEND=${{ inputs.backend }}
-            UBUNTU_VERSION=${{ inputs.ubuntu-version }}
-          context: ${{ inputs.context }}
-          file: ${{ inputs.dockerfile }}
-          cache-from: type=gha
-          platforms: ${{ inputs.platforms }}
-          push: ${{ env.quay_username != '' }}
-          tags: ${{ steps.meta_pull_request.outputs.tags }}
-          labels: ${{ steps.meta_pull_request.outputs.labels }}
-
-
-
-      - name: job summary
-        run: |
-          echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
--- a/.github/workflows/backend_build_darwin.yml
+++ b/.github/workflows/backend_build_darwin.yml
@@ -1,144 +0,0 @@
---
-name: 'build darwin python backend container images (reusable)'
-
-on:
-  workflow_call:
-    inputs:
-      backend:
-        description: 'Backend to build'
-        required: true
-        type: string
-      build-type:
-        description: 'Build type (e.g., mps)'
-        default: ''
-        type: string
-      use-pip:
-        description: 'Use pip to install dependencies'
-        default: false
-        type: boolean
-      lang:
-        description: 'Programming language (e.g. go)'
-        default: 'python'
-        type: string
-      go-version:
-        description: 'Go version to use'
-        default: '1.24.x'
-        type: string
-      tag-suffix:
-        description: 'Tag suffix for the built image'
-        required: true
-        type: string
-      runs-on:
-        description: 'Runner to use'
-        default: 'macOS-14'
-        type: string
-    secrets:
-      dockerUsername:
-        required: false
-      dockerPassword:
-        required: false
-      quayUsername:
-        required: true
-      quayPassword:
-        required: true
-
-jobs:
-  darwin-backend-build:
-    runs-on: ${{ inputs.runs-on }}
-    strategy:
-      matrix:
-        go-version: ['${{ inputs.go-version }}']
-    steps:
-      - name: Clone
-        uses: actions/checkout@v6
-        with:
-          submodules: true
-
-      - name: Setup Go ${{ matrix.go-version }}
-        uses: actions/setup-go@v5
-        with:
-          go-version: ${{ matrix.go-version }}
-          cache: false
-
-      # You can test your matrix by printing the current Go version
-      - name: Display Go version
-        run: go version
-
-      - name: Dependencies
-        run: |
-          brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
-
-      - name: Build ${{ inputs.backend }}-darwin
-        run: |
-          make protogen-go
-          BACKEND=${{ inputs.backend }} BUILD_TYPE=${{ inputs.build-type }} USE_PIP=${{ inputs.use-pip }} make build-darwin-${{ inputs.lang }}-backend
-
-      - name: Upload ${{ inputs.backend }}.tar
-        uses: actions/upload-artifact@v6
-        with:
-          name: ${{ inputs.backend }}-tar
-          path: backend-images/${{ inputs.backend }}.tar
-
-  darwin-backend-publish:
-    needs: darwin-backend-build
-    if: github.event_name != 'pull_request'
-    runs-on: ubuntu-latest
-    steps:
-      - name: Download ${{ inputs.backend }}.tar
-        uses: actions/download-artifact@v7
-        with:
-          name: ${{ inputs.backend }}-tar
-          path: .
-
-      - name: Install crane
-        run: |
-          curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz | tar -xz
-          sudo mv crane /usr/local/bin/
-
-      - name: Log in to DockerHub
-        run: |
-          echo "${{ secrets.dockerPassword }}" | crane auth login docker.io -u "${{ secrets.dockerUsername }}" --password-stdin
-
-      - name: Log in to quay.io
-        run: |
-          echo "${{ secrets.quayPassword }}" | crane auth login quay.io -u "${{ secrets.quayUsername }}" --password-stdin
-
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            localai/localai-backends
-          tags: |
-            type=ref,event=branch
-            type=semver,pattern={{raw}}
-            type=sha
-          flavor: |
-            latest=auto
-            suffix=${{ inputs.tag-suffix }},onlatest=true
-
-      - name: Docker meta
-        id: quaymeta
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            quay.io/go-skynet/local-ai-backends
-          tags: |
-            type=ref,event=branch
-            type=semver,pattern={{raw}}
-            type=sha
-          flavor: |
-            latest=auto
-            suffix=${{ inputs.tag-suffix }},onlatest=true
-
-      - name: Push Docker image (DockerHub)
-        run: |
-          for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr ',' '\n'); do
-            crane push ${{ inputs.backend }}.tar $tag
-          done
-
-      - name: Push Docker image (Quay)
-        run: |
-          for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do
-            crane push ${{ inputs.backend }}.tar $tag
-          done
--- a/.github/workflows/backend_pr.yml
+++ b/.github/workflows/backend_pr.yml
@@ -1,79 +0,0 @@
-name: 'build backend container images (PR-filtered)'
-
-on:
-  pull_request:
-
-concurrency:
-  group: ci-backends-pr-${{ github.head_ref || github.ref }}-${{ github.repository }}
-  cancel-in-progress: true
-
-jobs:
-  generate-matrix:
-    runs-on: ubuntu-latest
-    outputs:
-      matrix: ${{ steps.set-matrix.outputs.matrix }}
-      matrix-darwin: ${{ steps.set-matrix.outputs.matrix-darwin }}
-      has-backends: ${{ steps.set-matrix.outputs.has-backends }}
-      has-backends-darwin: ${{ steps.set-matrix.outputs.has-backends-darwin }}
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v6
-
-      - name: Setup Bun
-        uses: oven-sh/setup-bun@v2
-
-      - name: Install dependencies
-        run: |
-          bun add js-yaml
-          bun add @octokit/core
-
-      # filters the matrix in backend.yml
-      - name: Filter matrix for changed backends
-        id: set-matrix
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          GITHUB_EVENT_PATH: ${{ github.event_path }}
-        run: bun run scripts/changed-backends.js
-
-  backend-jobs:
-    needs: generate-matrix
-    uses: ./.github/workflows/backend_build.yml
-    if: needs.generate-matrix.outputs.has-backends == 'true'
-    with:
-      tag-latest: ${{ matrix.tag-latest }}
-      tag-suffix: ${{ matrix.tag-suffix }}
-      build-type: ${{ matrix.build-type }}
-      cuda-major-version: ${{ matrix.cuda-major-version }}
-      cuda-minor-version: ${{ matrix.cuda-minor-version }}
-      platforms: ${{ matrix.platforms }}
-      runs-on: ${{ matrix.runs-on }}
-      base-image: ${{ matrix.base-image }}
-      backend: ${{ matrix.backend }}
-      dockerfile: ${{ matrix.dockerfile }}
-      skip-drivers: ${{ matrix.skip-drivers }}
-      context: ${{ matrix.context }}
-      ubuntu-version: ${{ matrix.ubuntu-version }}
-    secrets:
-      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
-      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
-    strategy:
-      fail-fast: true
-      matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix) }}
-  backend-jobs-darwin:
-    needs: generate-matrix
-    uses: ./.github/workflows/backend_build_darwin.yml
-    if: needs.generate-matrix.outputs.has-backends-darwin == 'true'
-    with:
-      backend: ${{ matrix.backend }}
-      build-type: ${{ matrix.build-type }}
-      go-version: "1.24.x"
-      tag-suffix: ${{ matrix.tag-suffix }}
-      lang: ${{ matrix.lang || 'python' }}
-      use-pip: ${{ matrix.backend == 'diffusers' }}
-      runs-on: "macos-latest"
-    secrets:
-      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
-      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
-    strategy:
-      fail-fast: true
-      matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix-darwin) }}
--- a/.github/workflows/build-test.yaml
+++ b/.github/workflows/build-test.yaml
@@ -1,67 +0,0 @@
-name: Build test
-
-on:
-  push:
-    branches:
-      - master
-  pull_request:
-
-jobs:
-  build-test:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v6
-        with:
-          fetch-depth: 0
-      - name: Set up Go
-        uses: actions/setup-go@v5
-        with:
-          go-version: 1.25
-      - name: Run GoReleaser
-        run: |
-          make dev-dist
-  launcher-build-darwin:
-    runs-on: macos-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v6
-        with:
-          fetch-depth: 0
-      - name: Set up Go
-        uses: actions/setup-go@v5
-        with:
-          go-version: 1.25
-      - name: Build launcher for macOS ARM64
-        run: |
-          make build-launcher-darwin
-          ls -liah dist
-      - name: Upload macOS launcher artifacts
-        uses: actions/upload-artifact@v6
-        with:
-          name: launcher-macos
-          path: dist/
-          retention-days: 30
-      
-  launcher-build-linux:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v6
-        with:
-          fetch-depth: 0
-      - name: Set up Go
-        uses: actions/setup-go@v5
-        with:
-          go-version: 1.25
-      - name: Build launcher for Linux
-        run: |
-          sudo apt-get update
-          sudo apt-get install golang gcc libgl1-mesa-dev xorg-dev libxkbcommon-dev
-          make build-launcher-linux
-      - name: Upload Linux launcher artifacts
-        uses: actions/upload-artifact@v6
-        with:
-          name: launcher-linux
-          path: local-ai-launcher-linux.tar.xz
-          retention-days: 30
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -1,62 +1,62 @@
-name: Bump Backend dependencies
+name: Bump dependencies
 on:
  schedule:
    - cron: 0 20 * * *
  workflow_dispatch:
 jobs:
-  bump-backends:
+  bump:
    strategy:
      fail-fast: false
      matrix:
        include:
-          - repository: "ggml-org/llama.cpp"
-            variable: "LLAMA_VERSION"
+          - repository: "go-skynet/go-llama.cpp"
+            variable: "GOLLAMA_VERSION"
            branch: "master"
-            file: "backend/cpp/llama-cpp/Makefile"
-          - repository: "ggml-org/whisper.cpp"
+          - repository: "ggerganov/llama.cpp"
+            variable: "CPPLLAMA_VERSION"
+            branch: "master"
+          - repository: "go-skynet/go-ggml-transformers.cpp"
+            variable: "GOGGMLTRANSFORMERS_VERSION"
+            branch: "master"
+          - repository: "donomii/go-rwkv.cpp"
+            variable: "RWKV_VERSION"
+            branch: "main"
+          - repository: "ggerganov/whisper.cpp"
            variable: "WHISPER_CPP_VERSION"
            branch: "master"
-            file: "backend/go/whisper/Makefile"
-          - repository: "PABannier/bark.cpp"
-            variable: "BARKCPP_VERSION"
-            branch: "main"
-            file: "Makefile"
-          - repository: "leejet/stable-diffusion.cpp"
-            variable: "STABLEDIFFUSION_GGML_VERSION"
+          - repository: "go-skynet/go-bert.cpp"
+            variable: "BERT_VERSION"
+            branch: "master"
+          - repository: "go-skynet/bloomz.cpp"
+            variable: "BLOOMZ_VERSION"
+            branch: "main"
+          - repository: "nomic-ai/gpt4all"
+            variable: "GPT4ALL_VERSION"
+            branch: "main"
+          - repository: "mudler/go-ggllm.cpp"
+            variable: "GOGGLLM_VERSION"
+            branch: "master"
+          - repository: "mudler/go-stable-diffusion"
+            variable: "STABLEDIFFUSION_VERSION"
            branch: "master"
-            file: "backend/go/stablediffusion-ggml/Makefile"
          - repository: "mudler/go-piper"
            variable: "PIPER_VERSION"
            branch: "master"
-            file: "backend/go/piper/Makefile"
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v4
      - name: Bump dependencies 🔧
-        id: bump
        run: |
-          bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }} ${{ matrix.file }}
-          {
-            echo 'message<<EOF'
-            cat "${{ matrix.variable }}_message.txt"
-            echo EOF
-          } >> "$GITHUB_OUTPUT"
-          {
-            echo 'commit<<EOF'
-            cat "${{ matrix.variable }}_commit.txt"
-            echo EOF
-          } >> "$GITHUB_OUTPUT"
-          rm -rfv ${{ matrix.variable }}_message.txt
-          rm -rfv ${{ matrix.variable }}_commit.txt
+          bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
      - name: Create Pull Request
-        uses: peter-evans/create-pull-request@v8
+        uses: peter-evans/create-pull-request@v5
        with:
          token: ${{ secrets.UPDATE_BOT_TOKEN }}
          push-to-fork: ci-forks/LocalAI
          commit-message: ':arrow_up: Update ${{ matrix.repository }}'
-          title: 'chore: :arrow_up: Update ${{ matrix.repository }} to `${{ steps.bump.outputs.commit }}`'
+          title: ':arrow_up: Update ${{ matrix.repository }}'
          branch: "update/${{ matrix.variable }}"
-          body: ${{ steps.bump.outputs.message }}
+          body: Bump of ${{ matrix.repository }} version
          signoff: true


--- a/.github/workflows/bump_docs.yaml
+++ b/.github/workflows/bump_docs.yaml
@@ -1,31 +0,0 @@
-name: Bump Documentation
-on:
-  schedule:
-    - cron: 0 20 * * *
-  workflow_dispatch:
-jobs:
-  bump-docs:
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - repository: "mudler/LocalAI"
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v6
-      - name: Bump dependencies 🔧
-        run: |
-          bash .github/bump_docs.sh ${{ matrix.repository }}
-      - name: Create Pull Request
-        uses: peter-evans/create-pull-request@v8
-        with:
-          token: ${{ secrets.UPDATE_BOT_TOKEN }}
-          push-to-fork: ci-forks/LocalAI
-          commit-message: ':arrow_up: Update docs version ${{ matrix.repository }}'
-          title: 'docs: :arrow_up: update docs version ${{ matrix.repository }}'
-          branch: "update/docs"
-          body: Bump of ${{ matrix.repository }} version inside docs
-          signoff: true
-
-
-
--- a/.github/workflows/checksum_checker.yaml
+++ b/.github/workflows/checksum_checker.yaml
@@ -1,46 +0,0 @@
-name: Check if checksums are up-to-date
-on:
-  schedule:
-    - cron: 0 20 * * *
-  workflow_dispatch:
-jobs:
-  checksum_check:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Force Install GIT latest
-        run: |
-          sudo apt-get update \
-          && sudo apt-get install -y software-properties-common \
-          && sudo apt-get update \
-          && sudo add-apt-repository -y ppa:git-core/ppa \
-          && sudo apt-get update \
-          && sudo apt-get install -y git
-      - uses: actions/checkout@v6
-      - name: Install dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y pip wget
-          pip install huggingface_hub
-      - name: 'Setup yq'
-        uses: dcarbone/install-yq-action@v1.3.1
-        with:
-          version: 'v4.44.2'
-          download-compressed: true
-          force: true
-
-      - name: Checksum checker 🔧
-        run: |
-          export HF_HOME=/hf_cache
-          sudo mkdir /hf_cache
-          sudo chmod 777 /hf_cache
-          bash .github/checksum_checker.sh gallery/index.yaml
-      - name: Create Pull Request
-        uses: peter-evans/create-pull-request@v8
-        with:
-          token: ${{ secrets.UPDATE_BOT_TOKEN }}
-          push-to-fork: ci-forks/LocalAI
-          commit-message: ':arrow_up: Checksum updates in gallery/index.yaml'
-          title: 'chore(model-gallery): :arrow_up: update checksum'
-          branch: "update/checksum"
-          body: Updating checksums in gallery/index.yaml
-          signoff: true
--- a/.github/workflows/dependabot_auto.yml
+++ b/.github/workflows/dependabot_auto.yml
@@ -1,43 +0,0 @@
-name: Dependabot auto-merge
-on:
- pull_request_target
-
-permissions:
-  contents: write
-  pull-requests: write
-  packages: read
-
-jobs:
-  dependabot:
-    runs-on: ubuntu-latest
-    if: ${{ github.actor == 'dependabot[bot]' }}
-    steps:
-      - name: Dependabot metadata
-        id: metadata
-        uses: dependabot/fetch-metadata@v2.5.0
-        with:
-          github-token: "${{ secrets.GITHUB_TOKEN }}"
-          skip-commit-verification: true
-
-      - name: Checkout repository
-        uses: actions/checkout@v6
-
-      - name: Approve a PR if not already approved
-        run: |
-          gh pr checkout "$PR_URL"
-            if [ "$(gh pr status --json reviewDecision -q .currentBranch.reviewDecision)" != "APPROVED" ];
-          then
-            gh pr review --approve "$PR_URL"
-          else
-            echo "PR already approved.";
-          fi
-        env:
-          PR_URL: ${{github.event.pull_request.html_url}}
-          GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
-
-      - name: Enable auto-merge for Dependabot PRs
-        if: ${{ contains(github.event.pull_request.title, 'bump')}}
-        run: gh pr merge --auto --squash "$PR_URL"
-        env:
-          PR_URL: ${{github.event.pull_request.html_url}}
-          GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
--- a/.github/workflows/deploy-explorer.yaml
+++ b/.github/workflows/deploy-explorer.yaml
@@ -1,64 +0,0 @@
-name: Explorer deployment
-
-on:
-  push:
-    branches:
-      - master
-    tags:
-      - 'v*'
-
-concurrency:
-  group: ci-deploy-${{ github.head_ref || github.ref }}-${{ github.repository }}
-
-jobs:
-  build-linux:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v6
-        with:
-          submodules: true
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.21.x'
-          cache: false
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-          make protogen-go
-      - name: Build api
-        run: |
-          CGO_ENABLED=0 make build
-      - name: rm
-        uses: appleboy/ssh-action@v1.2.4
-        with:
-            host: ${{ secrets.EXPLORER_SSH_HOST }}
-            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
-            key: ${{ secrets.EXPLORER_SSH_KEY }}
-            port: ${{ secrets.EXPLORER_SSH_PORT }}
-            script: |
-                sudo rm -rf local-ai/ || true
-      - name: copy file via ssh
-        uses: appleboy/scp-action@v1.0.0
-        with:
-            host: ${{ secrets.EXPLORER_SSH_HOST }}
-            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
-            key: ${{ secrets.EXPLORER_SSH_KEY }}
-            port: ${{ secrets.EXPLORER_SSH_PORT }}
-            source: "local-ai"
-            overwrite: true
-            rm: true
-            target: ./local-ai
-      - name: restarting
-        uses: appleboy/ssh-action@v1.2.4
-        with:
-            host: ${{ secrets.EXPLORER_SSH_HOST }}
-            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
-            key: ${{ secrets.EXPLORER_SSH_KEY }}
-            port: ${{ secrets.EXPLORER_SSH_PORT }}
-            script: |
-                sudo cp -rfv local-ai/local-ai /usr/bin/local-ai
-                sudo systemctl restart local-ai
--- a/.github/workflows/disabled/comment-pr.yaml
+++ b/.github/workflows/disabled/comment-pr.yaml
@@ -1,83 +0,0 @@
-name: Comment PRs
-on:
-  pull_request_target:
-
-jobs:
-  comment-pr:
-    env:
-        MODEL_NAME: hermes-2-theta-llama-3-8b
-    runs-on: ubuntu-latest
-    steps:
-    - name: Checkout code
-      uses: actions/checkout@v3
-      with:
-        ref: "${{ github.event.pull_request.merge_commit_sha }}"
-        fetch-depth: 0 # needed to checkout all branches for this Action to work
-    - uses: mudler/localai-github-action@v1
-      with:
-        model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
-      # Check the PR diff using the current branch and the base branch of the PR
-    - uses: GrantBirki/git-diff-action@v2.7.0
-      id: git-diff-action
-      with:
-            json_diff_file_output: diff.json
-            raw_diff_file_output: diff.txt
-            file_output_only: "true"
-            base_branch: ${{ github.event.pull_request.base.sha }}
-    - name: Show diff
-      env:
-        DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
-      run: |
-            cat $DIFF
-    - name: Summarize
-      env:
-        DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
-      id: summarize
-      run: |
-            input="$(cat $DIFF)"
-
-            # Define the LocalAI API endpoint
-            API_URL="http://localhost:8080/chat/completions"
-
-            # Create a JSON payload using jq to handle special characters
-            json_payload=$(jq -n --arg input "$input" '{
-            model: "'$MODEL_NAME'",
-            messages: [
-                {
-                role: "system",
-                content: "You are LocalAI-bot in Github that helps understanding PRs and assess complexity. Explain what has changed in this PR diff and why"
-                },
-                {
-                role: "user",
-                content: $input
-                }
-            ]
-            }')
-
-            # Send the request to LocalAI
-            response=$(curl -s -X POST $API_URL \
-            -H "Content-Type: application/json" \
-            -d "$json_payload")
-
-            # Extract the summary from the response
-            summary="$(echo $response | jq -r '.choices[0].message.content')"
-
-            # Print the summary
-            #  -H "Authorization: Bearer $API_KEY" \
-            echo "Summary:"
-            echo "$summary"
-            echo "payload sent"
-            echo "$json_payload"
-            {
-                echo 'message<<EOF'
-                echo "$summary"
-                echo EOF
-              } >> "$GITHUB_OUTPUT"
-            docker logs --tail 10 local-ai
-    - uses: mshick/add-pr-comment@v2
-      if: always()
-      with:
-          repo-token: ${{ secrets.UPDATE_BOT_TOKEN }}
-          message: ${{ steps.summarize.outputs.message }}
-          message-failure: |
-            Uh oh! Could not analyze this PR, maybe it's too big?
--- a/.github/workflows/gallery-agent.yaml
+++ b/.github/workflows/gallery-agent.yaml
@@ -1,132 +0,0 @@
-name: Gallery Agent
-on:
-
-  schedule:
-    - cron: '0 */3 * * *'  # Run every 4 hours
-  workflow_dispatch:
-    inputs:
-      search_term:
-        description: 'Search term for models'
-        required: false
-        default: 'GGUF'
-        type: string
-      limit:
-        description: 'Maximum number of models to process'
-        required: false
-        default: '15'
-        type: string
-      quantization:
-        description: 'Preferred quantization format'
-        required: false
-        default: 'Q4_K_M'
-        type: string
-      max_models:
-        description: 'Maximum number of models to add to the gallery'
-        required: false
-        default: '1'
-        type: string
-jobs:
-  gallery-agent:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v6
-        with:
-          token: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Set up Go
-        uses: actions/setup-go@v5
-        with:
-          go-version: '1.21'
-      - name: Proto Dependencies
-        run: |
-          # Install protoc
-          curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
-          unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
-          rm protoc.zip
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          PATH="$PATH:$HOME/go/bin" make protogen-go
-      - uses: mudler/localai-github-action@v1.1
-        with:
-          model: 'https://huggingface.co/bartowski/Qwen_Qwen3-1.7B-GGUF'
-
-      - name: Run gallery agent
-        env:
-          #OPENAI_MODEL: ${{ secrets.OPENAI_MODEL }}
-          OPENAI_MODE: Qwen_Qwen3-1.7B-GGUF
-          OPENAI_BASE_URL: "http://localhost:8080"
-          OPENAI_KEY: ${{ secrets.OPENAI_KEY }}
-          #OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }}
-          SEARCH_TERM: ${{ github.event.inputs.search_term || 'GGUF' }}
-          LIMIT: ${{ github.event.inputs.limit || '15' }}
-          QUANTIZATION: ${{ github.event.inputs.quantization || 'Q4_K_M' }}
-          MAX_MODELS: ${{ github.event.inputs.max_models || '1' }}
-        run: |
-          export GALLERY_INDEX_PATH=$PWD/gallery/index.yaml
-          go run ./.github/gallery-agent
-
-      - name: Check for changes
-        id: check_changes
-        run: |
-          if git diff --quiet gallery/index.yaml; then
-            echo "changes=false" >> $GITHUB_OUTPUT
-            echo "No changes detected in gallery/index.yaml"
-          else
-            echo "changes=true" >> $GITHUB_OUTPUT
-            echo "Changes detected in gallery/index.yaml"
-            git diff gallery/index.yaml
-          fi
-
-      - name: Read gallery agent summary
-        id: read_summary
-        if: steps.check_changes.outputs.changes == 'true'
-        run: |
-          if [ -f "./gallery-agent-summary.json" ]; then
-            echo "summary_exists=true" >> $GITHUB_OUTPUT
-            # Extract summary data using jq
-            echo "search_term=$(jq -r '.search_term' ./gallery-agent-summary.json)" >> $GITHUB_OUTPUT
-            echo "total_found=$(jq -r '.total_found' ./gallery-agent-summary.json)" >> $GITHUB_OUTPUT
-            echo "models_added=$(jq -r '.models_added' ./gallery-agent-summary.json)" >> $GITHUB_OUTPUT
-            echo "quantization=$(jq -r '.quantization' ./gallery-agent-summary.json)" >> $GITHUB_OUTPUT
-            echo "processing_time=$(jq -r '.processing_time' ./gallery-agent-summary.json)" >> $GITHUB_OUTPUT
-            
-            # Create a formatted list of added models with URLs
-            added_models=$(jq -r 'range(0; .added_model_ids | length) as $i | "- [\(.added_model_ids[$i])](\(.added_model_urls[$i]))"' ./gallery-agent-summary.json | tr '\n' '\n')
-            echo "added_models<<EOF" >> $GITHUB_OUTPUT
-            echo "$added_models" >> $GITHUB_OUTPUT
-            echo "EOF" >> $GITHUB_OUTPUT
-            rm -f ./gallery-agent-summary.json
-          else
-            echo "summary_exists=false" >> $GITHUB_OUTPUT
-          fi
-
-      - name: Create Pull Request
-        if: steps.check_changes.outputs.changes == 'true'
-        uses: peter-evans/create-pull-request@v8
-        with:
-          token: ${{ secrets.UPDATE_BOT_TOKEN }}
-          push-to-fork: ci-forks/LocalAI
-          commit-message: 'chore(model gallery): :robot: add new models via gallery agent'
-          title: 'chore(model gallery): :robot: add ${{ steps.read_summary.outputs.models_added || 0 }} new models via gallery agent'
-          # Branch has to be unique so PRs are not overriding each other
-          branch-suffix: timestamp
-          body: |
-            This PR was automatically created by the gallery agent workflow.
-            
-            **Summary:**
-            - **Search Term:** ${{ steps.read_summary.outputs.search_term || github.event.inputs.search_term || 'GGUF' }}
-            - **Models Found:** ${{ steps.read_summary.outputs.total_found || 'N/A' }}
-            - **Models Added:** ${{ steps.read_summary.outputs.models_added || '0' }}
-            - **Quantization:** ${{ steps.read_summary.outputs.quantization || github.event.inputs.quantization || 'Q4_K_M' }}
-            - **Processing Time:** ${{ steps.read_summary.outputs.processing_time || 'N/A' }}
-            
-            **Added Models:**
-            ${{ steps.read_summary.outputs.added_models || '- No models added' }}
-            
-            **Workflow Details:**
-            - Triggered by: `${{ github.event_name }}`
-            - Run ID: `${{ github.run_id }}`
-            - Commit: `${{ github.sha }}`
-          signoff: true
-          delete-branch: true
--- a/.github/workflows/generate_grpc_cache.yaml
+++ b/.github/workflows/generate_grpc_cache.yaml
@@ -1,95 +0,0 @@
-name: 'generate and publish GRPC docker caches'
-
-on:
-  workflow_dispatch:
-
-  schedule:
-    # daily at midnight
-    - cron: '0 0 * * *'
-
-concurrency:
-  group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
-  cancel-in-progress: true
-
-jobs:
-  generate_caches:
-    strategy:
-      matrix:
-        include:
-          - grpc-base-image: ubuntu:24.04
-            runs-on: 'ubuntu-latest'
-            platforms: 'linux/amd64,linux/arm64'
-    runs-on: ${{matrix.runs-on}}
-    steps:
-      - name: Release space from worker
-        if: matrix.runs-on == 'ubuntu-latest'
-        run: |
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          df -h
-          echo
-          sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
-          sudo apt-get remove --auto-remove android-sdk-platform-tools || true
-          sudo apt-get purge --auto-remove android-sdk-platform-tools || true
-          sudo rm -rf /usr/local/lib/android
-          sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
-          sudo rm -rf /usr/share/dotnet
-          sudo apt-get remove -y '^mono-.*' || true
-          sudo apt-get remove -y '^ghc-.*' || true
-          sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
-          sudo apt-get remove -y 'php.*' || true
-          sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
-          sudo apt-get remove -y '^google-.*' || true
-          sudo apt-get remove -y azure-cli || true
-          sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
-          sudo apt-get remove -y '^gfortran-.*' || true
-          sudo apt-get remove -y microsoft-edge-stable || true
-          sudo apt-get remove -y firefox || true
-          sudo apt-get remove -y powershell || true
-          sudo apt-get remove -y r-base-core || true
-          sudo apt-get autoremove -y
-          sudo apt-get clean
-          echo
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          sudo rm -rfv build || true
-          sudo rm -rf /usr/share/dotnet || true
-          sudo rm -rf /opt/ghc || true
-          sudo rm -rf "/usr/local/share/boost" || true
-          sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
-          df -h
-
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@master
-        with:
-          platforms: all
-
-      - name: Set up Docker Buildx
-        id: buildx
-        uses: docker/setup-buildx-action@master
-
-      - name: Checkout
-        uses: actions/checkout@v6
-
-      - name: Cache GRPC
-        uses: docker/build-push-action@v6
-        with:
-          builder: ${{ steps.buildx.outputs.name }}
-          # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
-          # This means that even the MAKEFLAGS have to be an EXACT match.
-          # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
-          build-args: |
-            GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
-            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
-            GRPC_VERSION=v1.65.0
-          context: .
-          file: ./Dockerfile
-          cache-to: type=gha,ignore-error=true
-          cache-from: type=gha
-          target: grpc
-          platforms: ${{ matrix.platforms }}
-          push: false
--- a/.github/workflows/generate_intel_image.yaml
+++ b/.github/workflows/generate_intel_image.yaml
@@ -1,59 +0,0 @@
-name: 'generate and publish intel docker caches'
-
-on:
-  workflow_dispatch:
-  push:
-    branches:
-      - master
-
-concurrency:
-  group: intel-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
-  cancel-in-progress: true
-
-jobs:
-  generate_caches:
-    strategy:
-      matrix:
-        include:
-          - base-image: intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04
-            runs-on: 'arc-runner-set'
-            platforms: 'linux/amd64'
-    runs-on: ${{matrix.runs-on}}
-    steps:
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@master
-        with:
-          platforms: all
-      - name: Login to DockerHub
-        if: github.event_name != 'pull_request'
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
-      - name: Login to quay
-        if: github.event_name != 'pull_request'
-        uses: docker/login-action@v3
-        with:
-          registry: quay.io
-          username: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
-          password: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
-      - name: Set up Docker Buildx
-        id: buildx
-        uses: docker/setup-buildx-action@master
-
-      - name: Checkout
-        uses: actions/checkout@v6
-
-      - name: Cache Intel images
-        uses: docker/build-push-action@v6
-        with:
-          builder: ${{ steps.buildx.outputs.name }}
-          build-args: |
-            BASE_IMAGE=${{ matrix.base-image }}
-          context: .
-          file: ./Dockerfile
-          tags: quay.io/go-skynet/intel-oneapi-base:24.04
-          push: true
-          target: intel
-          platforms: ${{ matrix.platforms }}
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -1,95 +0,0 @@
---
-  name: 'build container images tests'
-  
-  on:
-    pull_request:
-  
-  concurrency:
-    group: ci-${{ github.head_ref || github.ref }}-${{ github.repository }}
-    cancel-in-progress: true
-  
-  jobs:
-    image-build:
-      uses: ./.github/workflows/image_build.yml
-      with:
-        tag-latest: ${{ matrix.tag-latest }}
-        tag-suffix: ${{ matrix.tag-suffix }}
-        build-type: ${{ matrix.build-type }}
-        cuda-major-version: ${{ matrix.cuda-major-version }}
-        cuda-minor-version: ${{ matrix.cuda-minor-version }}
-        platforms: ${{ matrix.platforms }}
-        runs-on: ${{ matrix.runs-on }}
-        base-image: ${{ matrix.base-image }}
-        grpc-base-image: ${{ matrix.grpc-base-image }}
-        makeflags: ${{ matrix.makeflags }}
-        ubuntu-version: ${{ matrix.ubuntu-version }}
-      secrets:
-        dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
-        dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
-        quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
-        quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
-      strategy:
-        # Pushing with all jobs in parallel
-        # eats the bandwidth of all the nodes
-        max-parallel: ${{ github.event_name != 'pull_request' && 4 || 8 }}
-        fail-fast: false
-        matrix:
-          include:
-            - build-type: 'cublas'
-              cuda-major-version: "12"
-              cuda-minor-version: "9"
-              platforms: 'linux/amd64'
-              tag-latest: 'false'
-              tag-suffix: '-gpu-nvidia-cuda-12'
-              runs-on: 'ubuntu-latest'
-              base-image: "ubuntu:24.04"
-              makeflags: "--jobs=3 --output-sync=target"
-              ubuntu-version: '2404'
-            - build-type: 'cublas'
-              cuda-major-version: "13"
-              cuda-minor-version: "0"
-              platforms: 'linux/amd64'
-              tag-latest: 'false'
-              tag-suffix: '-gpu-nvidia-cuda-13'
-              runs-on: 'ubuntu-latest'
-              base-image: "ubuntu:22.04"
-              makeflags: "--jobs=3 --output-sync=target"
-              ubuntu-version: '2404'
-            - build-type: 'hipblas'
-              platforms: 'linux/amd64'
-              tag-latest: 'false'
-              tag-suffix: '-hipblas'
-              base-image: "rocm/dev-ubuntu-24.04:6.4.4"
-              grpc-base-image: "ubuntu:24.04"
-              runs-on: 'ubuntu-latest'
-              makeflags: "--jobs=3 --output-sync=target"
-              ubuntu-version: '2404'
-            - build-type: 'sycl'
-              platforms: 'linux/amd64'
-              tag-latest: 'false'
-              base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
-              grpc-base-image: "ubuntu:24.04"
-              tag-suffix: 'sycl'
-              runs-on: 'ubuntu-latest'
-              makeflags: "--jobs=3 --output-sync=target"
-              ubuntu-version: '2404'
-            - build-type: 'vulkan'
-              platforms: 'linux/amd64,linux/arm64'
-              tag-latest: 'false'
-              tag-suffix: '-vulkan-core'
-              runs-on: 'ubuntu-latest'
-              base-image: "ubuntu:24.04"
-              makeflags: "--jobs=4 --output-sync=target"
-              ubuntu-version: '2404'
-            - build-type: 'cublas'
-              cuda-major-version: "13"
-              cuda-minor-version: "0"
-              platforms: 'linux/arm64'
-              tag-latest: 'false'
-              tag-suffix: '-nvidia-l4t-arm64-cuda-13'
-              base-image: "ubuntu:24.04"
-              runs-on: 'ubuntu-24.04-arm'
-              makeflags: "--jobs=4 --output-sync=target"
-              skip-drivers: 'false'
-              ubuntu-version: '2404'
-  
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -1,187 +1,146 @@
 ---
-  name: 'build container images'
-  
-  on:
-    push:
-      branches:
-        - master
-      tags:
-        - '*'
-  
-  concurrency:
-    group: ci-${{ github.head_ref || github.ref }}-${{ github.repository }}
-    cancel-in-progress: true
-  
-  jobs:
-    hipblas-jobs:
-      uses: ./.github/workflows/image_build.yml
-      with:
-        tag-latest: ${{ matrix.tag-latest }}
-        tag-suffix: ${{ matrix.tag-suffix }}
-        build-type: ${{ matrix.build-type }}
-        cuda-major-version: ${{ matrix.cuda-major-version }}
-        cuda-minor-version: ${{ matrix.cuda-minor-version }}
-        platforms: ${{ matrix.platforms }}
-        runs-on: ${{ matrix.runs-on }}
-        base-image: ${{ matrix.base-image }}
-        grpc-base-image: ${{ matrix.grpc-base-image }}
-        aio: ${{ matrix.aio }}
-        makeflags: ${{ matrix.makeflags }}
-        ubuntu-version: ${{ matrix.ubuntu-version }}
-        ubuntu-codename: ${{ matrix.ubuntu-codename }}
-      secrets:
-        dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
-        dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
-        quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
-        quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
-      strategy:
-        matrix:
-          include:
-            - build-type: 'hipblas'
-              platforms: 'linux/amd64'
-              tag-latest: 'auto'
-              tag-suffix: '-gpu-hipblas'
-              base-image: "rocm/dev-ubuntu-24.04:6.4.4"
-              grpc-base-image: "ubuntu:24.04"
-              runs-on: 'ubuntu-latest'
-              makeflags: "--jobs=3 --output-sync=target"
-              aio: "-aio-gpu-hipblas"
-              ubuntu-version: '2404'
-              ubuntu-codename: 'noble'
-  
-    core-image-build:
-      uses: ./.github/workflows/image_build.yml
-      with:
-        tag-latest: ${{ matrix.tag-latest }}
-        tag-suffix: ${{ matrix.tag-suffix }}
-        build-type: ${{ matrix.build-type }}
-        cuda-major-version: ${{ matrix.cuda-major-version }}
-        cuda-minor-version: ${{ matrix.cuda-minor-version }}
-        platforms: ${{ matrix.platforms }}
-        runs-on: ${{ matrix.runs-on }}
-        aio: ${{ matrix.aio }}
-        base-image: ${{ matrix.base-image }}
-        grpc-base-image: ${{ matrix.grpc-base-image }}
-        makeflags: ${{ matrix.makeflags }}
-        skip-drivers: ${{ matrix.skip-drivers }}
-        ubuntu-version: ${{ matrix.ubuntu-version }}
-        ubuntu-codename: ${{ matrix.ubuntu-codename }}
-      secrets:
-        dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
-        dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
-        quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
-        quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
-      strategy:
-        #max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
-        matrix:
-          include:
-            - build-type: ''
-              platforms: 'linux/amd64,linux/arm64'
-              tag-latest: 'auto'
-              tag-suffix: ''
-              base-image: "ubuntu:24.04"
-              runs-on: 'ubuntu-latest'
-              aio: "-aio-cpu"
-              makeflags: "--jobs=4 --output-sync=target"
-              skip-drivers: 'false'
-              ubuntu-version: '2404'
-              ubuntu-codename: 'noble'
-            - build-type: 'cublas'
-              cuda-major-version: "12"
-              cuda-minor-version: "9"
-              platforms: 'linux/amd64'
-              tag-latest: 'auto'
-              tag-suffix: '-gpu-nvidia-cuda-12'
-              runs-on: 'ubuntu-latest'
-              base-image: "ubuntu:24.04"
-              skip-drivers: 'false'
-              makeflags: "--jobs=4 --output-sync=target"
-              aio: "-aio-gpu-nvidia-cuda-12"
-              ubuntu-version: '2404'
-              ubuntu-codename: 'noble'
-            - build-type: 'cublas'
-              cuda-major-version: "13"
-              cuda-minor-version: "0"
-              platforms: 'linux/amd64'
-              tag-latest: 'auto'
-              tag-suffix: '-gpu-nvidia-cuda-13'
-              runs-on: 'ubuntu-latest'
-              base-image: "ubuntu:22.04"
-              skip-drivers: 'false'
-              makeflags: "--jobs=4 --output-sync=target"
-              aio: "-aio-gpu-nvidia-cuda-13"
-              ubuntu-version: '2404'
-              ubuntu-codename: 'noble'
-            - build-type: 'vulkan'
-              platforms: 'linux/amd64,linux/arm64'
-              tag-latest: 'auto'
-              tag-suffix: '-gpu-vulkan'
-              runs-on: 'ubuntu-latest'
-              base-image: "ubuntu:24.04"
-              skip-drivers: 'false'
-              makeflags: "--jobs=4 --output-sync=target"
-              aio: "-aio-gpu-vulkan"
-              ubuntu-version: '2404'
-              ubuntu-codename: 'noble'
-            - build-type: 'intel'
-              platforms: 'linux/amd64'
-              tag-latest: 'auto'
-              base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
-              grpc-base-image: "ubuntu:24.04"
-              tag-suffix: '-gpu-intel'
-              runs-on: 'ubuntu-latest'
-              makeflags: "--jobs=3 --output-sync=target"
-              aio: "-aio-gpu-intel"
-              ubuntu-version: '2404'
-              ubuntu-codename: 'noble'
-  
-    gh-runner:
-      uses: ./.github/workflows/image_build.yml
-      with:
-        tag-latest: ${{ matrix.tag-latest }}
-        tag-suffix: ${{ matrix.tag-suffix }}
-        build-type: ${{ matrix.build-type }}
-        cuda-major-version: ${{ matrix.cuda-major-version }}
-        cuda-minor-version: ${{ matrix.cuda-minor-version }}
-        platforms: ${{ matrix.platforms }}
-        runs-on: ${{ matrix.runs-on }}
-        aio: ${{ matrix.aio }}
-        base-image: ${{ matrix.base-image }}
-        grpc-base-image: ${{ matrix.grpc-base-image }}
-        makeflags: ${{ matrix.makeflags }}
-        skip-drivers: ${{ matrix.skip-drivers }}
-        ubuntu-version: ${{ matrix.ubuntu-version }}
-        ubuntu-codename: ${{ matrix.ubuntu-codename }}
-      secrets:
-        dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
-        dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
-        quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
-        quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
-      strategy:
-        matrix:
-          include:
-            - build-type: 'cublas'
-              cuda-major-version: "12"
-              cuda-minor-version: "0"
-              platforms: 'linux/arm64'
-              tag-latest: 'auto'
-              tag-suffix: '-nvidia-l4t-arm64'
-              base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
-              runs-on: 'ubuntu-24.04-arm'
-              makeflags: "--jobs=4 --output-sync=target"
-              skip-drivers: 'true'
-              ubuntu-version: "2204"
-              ubuntu-codename: 'jammy'
-            - build-type: 'cublas'
-              cuda-major-version: "13"
-              cuda-minor-version: "0"
-              platforms: 'linux/arm64'
-              tag-latest: 'auto'
-              tag-suffix: '-nvidia-l4t-arm64-cuda-13'
-              base-image: "ubuntu:24.04"
-              runs-on: 'ubuntu-24.04-arm'
-              makeflags: "--jobs=4 --output-sync=target"
-              skip-drivers: 'false'
-              ubuntu-version: '2404'
-              ubuntu-codename: 'noble'
-  
+name: 'build container images'
+
+on:
+  pull_request:
+  push:
+    branches:
+      - master
+    tags:
+      - '*'
+
+concurrency:
+  group: ci-${{ github.head_ref || github.ref }}-${{ github.repository }}
+  cancel-in-progress: true
+
+jobs:
+  docker:
+    strategy:
+      matrix:
+        include:
+          - build-type: ''
+            #platforms: 'linux/amd64,linux/arm64'
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: ''
+            ffmpeg: ''
+          - build-type: 'cublas'
+            cuda-major-version: 11
+            cuda-minor-version: 7
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: '-cublas-cuda11'
+            ffmpeg: ''
+          - build-type: 'cublas'
+            cuda-major-version: 12
+            cuda-minor-version: 1
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: '-cublas-cuda12'
+            ffmpeg: ''
+          - build-type: ''
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: '-ffmpeg'
+            ffmpeg: 'true'
+          - build-type: 'cublas'
+            cuda-major-version: 11
+            cuda-minor-version: 7
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: '-cublas-cuda11-ffmpeg'
+            ffmpeg: 'true'
+          - build-type: 'cublas'
+            cuda-major-version: 12
+            cuda-minor-version: 1
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: '-cublas-cuda12-ffmpeg'
+            ffmpeg: 'true'
+
+    runs-on: ubuntu-latest
+    steps:
+      - name: Release space from worker
+        run: |
+          echo "Listing top largest packages"
+          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
+          head -n 30 <<< "${pkgs}"
+          echo
+          df -h
+          echo
+          sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
+          sudo apt-get remove --auto-remove android-sdk-platform-tools || true
+          sudo apt-get purge --auto-remove android-sdk-platform-tools || true
+          sudo rm -rf /usr/local/lib/android
+          sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
+          sudo rm -rf /usr/share/dotnet
+          sudo apt-get remove -y '^mono-.*' || true
+          sudo apt-get remove -y '^ghc-.*' || true
+          sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
+          sudo apt-get remove -y 'php.*' || true
+          sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
+          sudo apt-get remove -y '^google-.*' || true
+          sudo apt-get remove -y azure-cli || true
+          sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
+          sudo apt-get remove -y '^gfortran-.*' || true
+          sudo apt-get remove -y microsoft-edge-stable || true
+          sudo apt-get remove -y firefox || true
+          sudo apt-get remove -y powershell || true
+          sudo apt-get remove -y r-base-core || true
+          sudo apt-get autoremove -y
+          sudo apt-get clean
+          echo
+          echo "Listing top largest packages"
+          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
+          head -n 30 <<< "${pkgs}"
+          echo
+          sudo rm -rfv build || true
+          df -h
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: quay.io/go-skynet/local-ai
+          tags: |
+            type=ref,event=branch
+            type=semver,pattern={{raw}}
+            type=sha
+          flavor: |
+            latest=${{ matrix.tag-latest }}
+            suffix=${{ matrix.tag-suffix }}
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@master
+        with:
+          platforms: all
+
+      - name: Set up Docker Buildx
+        id: buildx
+        uses: docker/setup-buildx-action@master
+
+      - name: Login to DockerHub
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@v3
+        with:
+          registry: quay.io
+          username: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
+          password: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
+
+      - name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          builder: ${{ steps.buildx.outputs.name }}
+          build-args: |
+            BUILD_TYPE=${{ matrix.build-type }}
+            CUDA_MAJOR_VERSION=${{ matrix.cuda-major-version }}
+            CUDA_MINOR_VERSION=${{ matrix.cuda-minor-version }}
+            FFMPEG=${{ matrix.ffmpeg }}
+          context: .
+          file: ./Dockerfile
+          platforms: ${{ matrix.platforms }}
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -1,327 +0,0 @@
---
-name: 'build container images (reusable)'
-
-on:
-  workflow_call:
-    inputs:
-      base-image:
-        description: 'Base image'
-        required: true
-        type: string
-      grpc-base-image:
-        description: 'GRPC Base image, must be a compatible image with base-image'
-        required: false
-        default: ''
-        type: string
-      build-type:
-        description: 'Build type'
-        default: ''
-        type: string
-      cuda-major-version:
-        description: 'CUDA major version'
-        default: "12"
-        type: string
-      cuda-minor-version:
-        description: 'CUDA minor version'
-        default: "9"
-        type: string
-      platforms:
-        description: 'Platforms'
-        default: ''
-        type: string
-      tag-latest:
-        description: 'Tag latest'
-        default: ''
-        type: string
-      tag-suffix:
-        description: 'Tag suffix'
-        default: ''
-        type: string
-      skip-drivers:
-        description: 'Skip drivers by default'
-        default: 'false'
-        type: string
-      runs-on:
-        description: 'Runs on'
-        required: true
-        default: ''
-        type: string
-      makeflags:
-        description: 'Make Flags'
-        required: false
-        default: '--jobs=4 --output-sync=target'
-        type: string
-      aio:
-        description: 'AIO Image Name'
-        required: false
-        default: ''
-        type: string
-      ubuntu-version:
-        description: 'Ubuntu version'
-        required: false
-        default: '2204'
-        type: string
-      ubuntu-codename:
-        description: 'Ubuntu codename'
-        required: false
-        default: 'noble'
-        type: string
-    secrets:
-      dockerUsername:
-        required: true
-      dockerPassword:
-        required: true
-      quayUsername:
-        required: true
-      quayPassword:
-        required: true
-jobs:
-  reusable_image-build:
-    runs-on: ${{ inputs.runs-on }}
-    steps:
-
-      - name: Free Disk Space (Ubuntu)
-        if: inputs.runs-on == 'ubuntu-latest'
-        uses: jlumbroso/free-disk-space@main
-        with:
-          # this might remove tools that are actually needed,
-          # if set to "true" but frees about 6 GB
-          tool-cache: true
-          # all of these default to true, but feel free to set to
-          # "false" if necessary for your workflow
-          android: true
-          dotnet: true
-          haskell: true
-          large-packages: true
-          docker-images: true
-          swap-storage: true
-      - name: Force Install GIT latest
-        run: |
-          sudo apt-get update \
-          && sudo apt-get install -y software-properties-common \
-          && sudo apt-get update \
-          && sudo add-apt-repository -y ppa:git-core/ppa \
-          && sudo apt-get update \
-          && sudo apt-get install -y git
-      - name: Checkout
-        uses: actions/checkout@v6
-
-      - name: Release space from worker
-        if: inputs.runs-on == 'ubuntu-latest'
-        run: |
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          df -h
-          echo
-          sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
-          sudo apt-get remove --auto-remove android-sdk-platform-tools snapd || true
-          sudo apt-get purge --auto-remove android-sdk-platform-tools snapd || true
-          sudo rm -rf /usr/local/lib/android
-          sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
-          sudo rm -rf /usr/share/dotnet
-          sudo apt-get remove -y '^mono-.*' || true
-          sudo apt-get remove -y '^ghc-.*' || true
-          sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
-          sudo apt-get remove -y 'php.*' || true
-          sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
-          sudo apt-get remove -y '^google-.*' || true
-          sudo apt-get remove -y azure-cli || true
-          sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
-          sudo apt-get remove -y '^gfortran-.*' || true
-          sudo apt-get remove -y microsoft-edge-stable || true
-          sudo apt-get remove -y firefox || true
-          sudo apt-get remove -y powershell || true
-          sudo apt-get remove -y r-base-core || true
-          sudo apt-get autoremove -y
-          sudo apt-get clean
-          echo
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          sudo rm -rfv build || true
-          sudo rm -rf /usr/share/dotnet || true
-          sudo rm -rf /opt/ghc || true
-          sudo rm -rf "/usr/local/share/boost" || true
-          sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
-          df -h
-
-      - name: Docker meta
-        id: meta
-        if: github.event_name != 'pull_request'
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            quay.io/go-skynet/local-ai
-            localai/localai
-          tags: |
-            type=ref,event=branch
-            type=semver,pattern={{raw}}
-            type=sha
-          flavor: |
-            latest=${{ inputs.tag-latest }}
-            suffix=${{ inputs.tag-suffix }},onlatest=true
-      - name: Docker meta for PR
-        id: meta_pull_request
-        if: github.event_name == 'pull_request'
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            quay.io/go-skynet/ci-tests
-          tags: |
-            type=ref,event=branch,suffix=localai${{ github.event.number }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
-            type=semver,pattern={{raw}},suffix=localai${{ github.event.number }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
-            type=sha,suffix=localai${{ github.event.number }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
-          flavor: |
-            latest=${{ inputs.tag-latest }}
-            suffix=${{ inputs.tag-suffix }}
-      - name: Docker meta AIO (quay.io)
-        if: inputs.aio != ''
-        id: meta_aio
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            quay.io/go-skynet/local-ai
-          tags: |
-            type=ref,event=branch
-            type=semver,pattern={{raw}}
-          flavor: |
-            latest=${{ inputs.tag-latest }}
-            suffix=${{ inputs.aio }},onlatest=true
-
-      - name: Docker meta AIO (dockerhub)
-        if: inputs.aio != ''
-        id: meta_aio_dockerhub
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            localai/localai
-          tags: |
-            type=ref,event=branch
-            type=semver,pattern={{raw}}
-          flavor: |
-            latest=${{ inputs.tag-latest }}
-            suffix=${{ inputs.aio }},onlatest=true
-
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@master
-        with:
-          platforms: all
-
-      - name: Set up Docker Buildx
-        id: buildx
-        uses: docker/setup-buildx-action@master
-
-      - name: Login to DockerHub
-        if: github.event_name != 'pull_request'
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.dockerUsername }}
-          password: ${{ secrets.dockerPassword }}
-
-      - name: Login to DockerHub
-        if: github.event_name != 'pull_request'
-        uses: docker/login-action@v3
-        with:
-          registry: quay.io
-          username: ${{ secrets.quayUsername }}
-          password: ${{ secrets.quayPassword }}
-
-      - name: Build and push
-        uses: docker/build-push-action@v6
-        if: github.event_name != 'pull_request'
-        with:
-          builder: ${{ steps.buildx.outputs.name }}
-          # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
-          # This means that even the MAKEFLAGS have to be an EXACT match.
-          # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
-          # This is why some build args like GRPC_VERSION and MAKEFLAGS are hardcoded
-          build-args: |
-            BUILD_TYPE=${{ inputs.build-type }}
-            CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
-            CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
-            BASE_IMAGE=${{ inputs.base-image }}
-            GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
-            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
-            GRPC_VERSION=v1.65.0
-            MAKEFLAGS=${{ inputs.makeflags }}
-            SKIP_DRIVERS=${{ inputs.skip-drivers }}
-            UBUNTU_VERSION=${{ inputs.ubuntu-version }}
-            UBUNTU_CODENAME=${{ inputs.ubuntu-codename }}
-          context: .
-          file: ./Dockerfile
-          cache-from: type=gha
-          platforms: ${{ inputs.platforms }}
-          push: ${{ github.event_name != 'pull_request' }}
-          tags: ${{ steps.meta.outputs.tags }}
-          labels: ${{ steps.meta.outputs.labels }}
-### Start testing image
-      - name: Build and push
-        uses: docker/build-push-action@v6
-        if: github.event_name == 'pull_request'
-        with:
-          builder: ${{ steps.buildx.outputs.name }}
-          # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
-          # This means that even the MAKEFLAGS have to be an EXACT match.
-          # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
-          # This is why some build args like GRPC_VERSION and MAKEFLAGS are hardcoded
-          build-args: |
-            BUILD_TYPE=${{ inputs.build-type }}
-            CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
-            CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
-            BASE_IMAGE=${{ inputs.base-image }}
-            GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
-            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
-            GRPC_VERSION=v1.65.0
-            MAKEFLAGS=${{ inputs.makeflags }}
-            SKIP_DRIVERS=${{ inputs.skip-drivers }}
-            UBUNTU_VERSION=${{ inputs.ubuntu-version }}
-            UBUNTU_CODENAME=${{ inputs.ubuntu-codename }}
-          context: .
-          file: ./Dockerfile
-          cache-from: type=gha
-          platforms: ${{ inputs.platforms }}
-          #push: true
-          tags: ${{ steps.meta_pull_request.outputs.tags }}
-          labels: ${{ steps.meta_pull_request.outputs.labels }}
-## End testing image
-      - name: Build and push AIO image
-        if: inputs.aio != ''
-        uses: docker/build-push-action@v6
-        with:
-          builder: ${{ steps.buildx.outputs.name }}
-          build-args: |
-            BASE_IMAGE=quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
-            MAKEFLAGS=${{ inputs.makeflags }}
-          context: .
-          file: ./Dockerfile.aio
-          platforms: ${{ inputs.platforms }}
-          push: ${{ github.event_name != 'pull_request' }}
-          tags: ${{ steps.meta_aio.outputs.tags }}
-          labels: ${{ steps.meta_aio.outputs.labels }}
-
-      - name: Build and push AIO image (dockerhub)
-        if: inputs.aio != ''
-        uses: docker/build-push-action@v6
-        with:
-          builder: ${{ steps.buildx.outputs.name }}
-          build-args: |
-            BASE_IMAGE=localai/localai:${{ steps.meta.outputs.version }}
-            MAKEFLAGS=${{ inputs.makeflags }}
-          context: .
-          file: ./Dockerfile.aio
-          platforms: ${{ inputs.platforms }}
-          push: ${{ github.event_name != 'pull_request' }}
-          tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
-          labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
-
-      - name: job summary
-        run: |
-          echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
-
-      - name: job summary(AIO)
-        if: inputs.aio != ''
-        run: |
-          echo "Built image: ${{ steps.meta_aio.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
--- a/.github/workflows/labeler.yml
+++ b/.github/workflows/labeler.yml
@@ -1,12 +0,0 @@
-name: "Pull Request Labeler"
-on:
- pull_request_target
-
-jobs:
-  labeler:
-    permissions:
-      contents: read
-      pull-requests: write
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/labeler@v6
--- a/.github/workflows/localaibot_automerge.yml
+++ b/.github/workflows/localaibot_automerge.yml
@@ -1,36 +0,0 @@
-name: LocalAI-bot auto-merge
-on:
- pull_request_target
-
-permissions:
-  contents: write
-  pull-requests: write
-  packages: read
-  issues: write # for Homebrew/actions/post-comment
-  actions: write # to dispatch publish workflow
-jobs:
-  dependabot:
-    runs-on: ubuntu-latest
-    if: ${{ github.actor == 'localai-bot' && !contains(github.event.pull_request.title, 'chore(model gallery):') }}
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v6
-
-      - name: Approve a PR if not already approved
-        run: |
-          gh pr checkout "$PR_URL"
-            if [ "$(gh pr status --json reviewDecision -q .currentBranch.reviewDecision)" != "APPROVED" ];
-          then
-            gh pr review --approve "$PR_URL"
-          else
-            echo "PR already approved.";
-          fi
-        env:
-          PR_URL: ${{github.event.pull_request.html_url}}
-          GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
-
-      - name: Enable auto-merge for LocalAIBot PRs
-        run: gh pr merge --auto --squash "$PR_URL"
-        env:
-          PR_URL: ${{github.event.pull_request.html_url}}
-          GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
--- a/.github/workflows/notify-models.yaml
+++ b/.github/workflows/notify-models.yaml
@@ -1,174 +0,0 @@
-name: Notifications for new models
-on:
-  pull_request_target:
-     types:
-       - closed
-
-permissions:
-  contents: read
-  pull-requests: read
-
-jobs:
-  notify-discord:
-    if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
-    env:
-        MODEL_NAME: gemma-3-12b-it-qat
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v6
-      with:
-        fetch-depth: 0 # needed to checkout all branches for this Action to work
-        ref: ${{ github.event.pull_request.head.sha }} # Checkout the PR head to get the actual changes
-    - uses: mudler/localai-github-action@v1
-      with:
-        model: 'gemma-3-12b-it-qat' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
-        # Check the PR diff using the current branch and the base branch of the PR
-    - uses: GrantBirki/git-diff-action@v2.8.1
-      id: git-diff-action
-      with:
-            json_diff_file_output: diff.json
-            raw_diff_file_output: diff.txt
-            file_output_only: "true"
-    - name: Summarize
-      env:
-        DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
-      id: summarize
-      run: |
-            input="$(cat $DIFF)"
-
-            # Define the LocalAI API endpoint
-            API_URL="http://localhost:8080/chat/completions"
-
-            # Create a JSON payload using jq to handle special characters
-            json_payload=$(jq -n --arg input "$input" '{
-            model: "'$MODEL_NAME'",
-            messages: [
-                {
-                role: "system",
-                content: "You are LocalAI-bot. Write a discord message to notify everyone about the new model from the git diff. Make it informal. An example can include: the URL of the model, the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI and that can be browsed over https://models.localai.io. For example: local-ai run model_name_here"
-                },
-                {
-                role: "user",
-                content: $input
-                }
-            ]
-            }')
-
-            # Send the request to LocalAI
-            response=$(curl -s -X POST $API_URL \
-            -H "Content-Type: application/json" \
-            -d "$json_payload")
-
-            # Extract the summary from the response
-            summary="$(echo $response | jq -r '.choices[0].message.content')"
-
-            # Print the summary
-            #  -H "Authorization: Bearer $API_KEY" \
-            echo "Summary:"
-            echo "$summary"
-            echo "payload sent"
-            echo "$json_payload"
-            {
-                echo 'message<<EOF'
-                echo "$summary"
-                echo EOF
-              } >> "$GITHUB_OUTPUT"
-            docker logs --tail 10 local-ai
-    - name: Discord notification
-      env:
-        DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL }}
-        DISCORD_USERNAME: "LocalAI-Bot"
-        DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
-      uses: Ilshidur/action-discord@master
-      with:
-        args: ${{ steps.summarize.outputs.message }}
-    - name: Setup tmate session if fails
-      if: ${{ failure() }}
-      uses: mxschmitt/action-tmate@v3.23
-      with:
-        detached: true
-        connect-timeout-seconds: 180
-        limit-access-to-actor: true
-  notify-twitter:
-    if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
-    env:
-        MODEL_NAME: gemma-3-12b-it-qat
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v6
-      with:
-        fetch-depth: 0 # needed to checkout all branches for this Action to work
-        ref: ${{ github.event.pull_request.head.sha }} # Checkout the PR head to get the actual changes
-    - name: Start LocalAI
-      run: |
-        echo "Starting LocalAI..."
-        docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master run --debug $MODEL_NAME
-        until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready";  docker logs --tail 10 local-ai; sleep 2; done
-      # Check the PR diff using the current branch and the base branch of the PR
-    - uses: GrantBirki/git-diff-action@v2.8.1
-      id: git-diff-action
-      with:
-            json_diff_file_output: diff.json
-            raw_diff_file_output: diff.txt
-            file_output_only: "true"
-    - name: Summarize
-      env:
-        DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
-      id: summarize
-      run: |
-            input="$(cat $DIFF)"
-
-            # Define the LocalAI API endpoint
-            API_URL="http://localhost:8080/chat/completions"
-
-            # Create a JSON payload using jq to handle special characters
-            json_payload=$(jq -n --arg input "$input" '{
-            model: "'$MODEL_NAME'",
-            messages: [
-                {
-                role: "system",
-                content: "You are LocalAI-bot. Write a twitter message to notify everyone about the new model from the git diff. Make it informal and really short. An example can include: the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI. For example: local-ai run model_name_here"
-                },
-                {
-                role: "user",
-                content: $input
-                }
-            ]
-            }')
-
-            # Send the request to LocalAI
-            response=$(curl -s -X POST $API_URL \
-            -H "Content-Type: application/json" \
-            -d "$json_payload")
-
-            # Extract the summary from the response
-            summary="$(echo $response | jq -r '.choices[0].message.content')"
-
-            # Print the summary
-            #  -H "Authorization: Bearer $API_KEY" \
-            echo "Summary:"
-            echo "$summary"
-            echo "payload sent"
-            echo "$json_payload"
-            {
-                echo 'message<<EOF'
-                echo "$summary"
-                echo EOF
-              } >> "$GITHUB_OUTPUT"
-            docker logs --tail 10 local-ai
-    - uses: Eomm/why-don-t-you-tweet@v2
-      with:
-        tweet-message: ${{ steps.summarize.outputs.message }}
-      env:
-        # Get your tokens from https://developer.twitter.com/apps
-        TWITTER_CONSUMER_API_KEY: ${{ secrets.TWITTER_APP_KEY }}
-        TWITTER_CONSUMER_API_SECRET: ${{ secrets.TWITTER_APP_SECRET }}
-        TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }}
-        TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
-    - name: Setup tmate session if fails
-      if: ${{ failure() }}
-      uses: mxschmitt/action-tmate@v3.23
-      with:
-        detached: true
-        connect-timeout-seconds: 180
-        limit-access-to-actor: true
--- a/.github/workflows/notify-releases.yaml
+++ b/.github/workflows/notify-releases.yaml
@@ -1,64 +0,0 @@
-name: Release notifications
-on:
-  release:
-    types:
-      - published
-
-jobs:
-  notify-discord:
-    runs-on: ubuntu-latest
-    env:
-        RELEASE_BODY: ${{ github.event.release.body }}
-        RELEASE_TITLE: ${{ github.event.release.name }}
-        RELEASE_TAG_NAME: ${{ github.event.release.tag_name }}
-        MODEL_NAME: gemma-3-12b-it-qat
-    steps:
-    - uses: mudler/localai-github-action@v1
-      with:
-        model: 'gemma-3-12b-it-qat' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
-    - name: Summarize
-      id: summarize
-      run: |
-            input="$RELEASE_TITLE\b$RELEASE_BODY"
-
-            # Define the LocalAI API endpoint
-            API_URL="http://localhost:8080/chat/completions"
-
-            # Create a JSON payload using jq to handle special characters
-            json_payload=$(jq -n --arg input "$input" '{
-            model: "'$MODEL_NAME'",
-            messages: [
-                {
-                role: "system",
-                content: "Write a discord message with a bullet point summary of the release notes."
-                },
-                {
-                role: "user",
-                content: $input
-                }
-            ]
-            }')
-
-            # Send the request to LocalAI API
-            response=$(curl -s -X POST $API_URL \
-            -H "Content-Type: application/json" \
-            -d "$json_payload")
-
-            # Extract the summary from the response
-            summary=$(echo $response | jq -r '.choices[0].message.content')
-
-            # Print the summary
-            #  -H "Authorization: Bearer $API_KEY" \
-            {
-                echo 'message<<EOF'
-                echo "$summary"
-                echo EOF
-              } >> "$GITHUB_OUTPUT"
-    - name: Discord notification
-      env:
-        DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL_RELEASE }}
-        DISCORD_USERNAME: "LocalAI-Bot"
-        DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
-      uses: Ilshidur/action-discord@master
-      with:
-        args: ${{ steps.summarize.outputs.message }}
--- a/.github/workflows/prlint.yaml
+++ b/.github/workflows/prlint.yaml
@@ -1,28 +0,0 @@
-name: Check PR style
-
-on:
-  pull_request_target:
-    types:
-      - opened
-      - reopened
-      - edited
-      - synchronize
-
-jobs:
-  title-lint:
-    runs-on: ubuntu-latest
-    permissions:
-      statuses: write
-    steps:
-      - uses: aslafy-z/conventional-pr-title-action@v3
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-#  check-pr-description:
-#    runs-on: ubuntu-latest
-#    steps:
-#      - uses: actions/checkout@v2
-#      - uses: jadrol/pr-description-checker-action@v1.0.0
-#        id: description-checker
-#        with:
-#          repo-token: ${{ secrets.GITHUB_TOKEN }}
-#          exempt-labels: no qa
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -1,64 +1,99 @@
-name: goreleaser
+name: Build and Release

-on:
-  push:
-    tags:
-      - 'v*'
+on: push
+
+permissions:
+  contents: write

 jobs:
-  goreleaser:
+  build-linux:
+    strategy:
+      matrix:
+        include:
+          - build: 'avx2'
+            defines: ''
+          - build: 'avx'
+            defines: '-DLLAMA_AVX2=OFF'
+          - build: 'avx512'
+            defines: '-DLLAMA_AVX512=ON'
    runs-on: ubuntu-latest
    steps:
-      - name: Checkout
-        uses: actions/checkout@v6
+      - name: Clone
+        uses: actions/checkout@v4
        with:
-          fetch-depth: 0
-      - name: Set up Go
-        uses: actions/setup-go@v5
+          submodules: true
+      - uses: actions/setup-go@v4
        with:
-          go-version: 1.23
-      - name: Run GoReleaser
-        uses: goreleaser/goreleaser-action@v6
-        with:
-          version: v2.11.0
-          args: release --clean
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-  launcher-build-darwin:
-    runs-on: macos-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v6
-        with:
-          fetch-depth: 0
-      - name: Set up Go
-        uses: actions/setup-go@v5
-        with:
-          go-version: 1.23
-      - name: Build launcher for macOS ARM64
-        run: |
-          make build-launcher-darwin
-      - name: Upload DMG to Release
-        uses: softprops/action-gh-release@v2
-        with:
-          files: ./dist/LocalAI.dmg
-  launcher-build-linux:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v6
-        with:
-          fetch-depth: 0
-      - name: Set up Go
-        uses: actions/setup-go@v5
-        with:
-          go-version: 1.23
-      - name: Build launcher for Linux
+          go-version: '>=1.21.0'
+      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install golang gcc libgl1-mesa-dev xorg-dev libxkbcommon-dev
-          make build-launcher-linux
-      - name: Upload Linux launcher artifacts
-        uses: softprops/action-gh-release@v2
+          sudo apt-get install build-essential ffmpeg
+
+          git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
+              cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
+                -DgRPC_BUILD_TESTS=OFF \
+                ../.. && sudo make -j12 install
+
+      - name: Build
+        id: build
+        env:
+          CMAKE_ARGS: "${{ matrix.defines }}"
+          BUILD_ID: "${{ matrix.build }}"
+        run: |
+          STATIC=true make dist
+      - uses: actions/upload-artifact@v3
        with:
-          files: ./local-ai-launcher-linux.tar.xz
+          name: ${{ matrix.build }}
+          path: release/
+      - name: Release
+        uses: softprops/action-gh-release@v1
+        if: startsWith(github.ref, 'refs/tags/')
+        with:
+          files: |
+            release/*
+
+  build-macOS:
+    strategy:
+      matrix:
+        include:
+          - build: 'avx2'
+            defines: ''
+          - build: 'avx'
+            defines: '-DLLAMA_AVX2=OFF'
+          - build: 'avx512'
+            defines: '-DLLAMA_AVX512=ON'
+    runs-on: macOS-latest
+    steps:
+      - name: Clone
+        uses: actions/checkout@v4
+        with:
+          submodules: true
+      - uses: actions/setup-go@v4
+        with:
+          go-version: '>=1.21.0'
+      - name: Dependencies
+        run: |
+          git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
+              cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
+                -DgRPC_BUILD_TESTS=OFF \
+                ../.. && make -j12 install && rm -rf grpc
+      - name: Build
+        id: build
+        env:
+          CMAKE_ARGS: "${{ matrix.defines }}"
+          BUILD_ID: "${{ matrix.build }}"
+        run: |
+          export C_INCLUDE_PATH=/usr/local/include
+          export CPLUS_INCLUDE_PATH=/usr/local/include
+          make dist
+      - uses: actions/upload-artifact@v3
+        with:
+          name: ${{ matrix.build }}
+          path: release/
+      - name: Release
+        uses: softprops/action-gh-release@v1
+        if: startsWith(github.ref, 'refs/tags/')
+        with:
+          files: |
+            release/*
--- a/.github/workflows/secscan.yaml
+++ b/.github/workflows/secscan.yaml
@@ -1,30 +0,0 @@
-name: "Security Scan"
-
-# Run workflow each time code is pushed to your repository and on a schedule.
-# The scheduled workflow runs every at 00:00 on Sunday UTC time.
-on:
-  push:
-  schedule:
-  - cron: '0 0 * * 0'
-
-jobs:
-  tests:
-    runs-on: ubuntu-latest
-    env:
-      GO111MODULE: on
-    steps:
-      - name: Checkout Source
-        uses: actions/checkout@v6
-        if: ${{ github.actor != 'dependabot[bot]' }}
-      - name: Run Gosec Security Scanner
-        if: ${{ github.actor != 'dependabot[bot]' }}
-        uses: securego/gosec@v2.22.9
-        with:
-          # we let the report trigger content trigger a failure using the GitHub Security features.
-          args: '-no-fail -fmt sarif -out results.sarif ./...'
-      - name: Upload SARIF file
-        if: ${{ github.actor != 'dependabot[bot]' }}
-        uses: github/codeql-action/upload-sarif@v4
-        with:
-          # Path to SARIF file relative to the root of the repository
-          sarif_file: results.sarif
--- a/.github/workflows/stalebot.yml
+++ b/.github/workflows/stalebot.yml
@@ -1,24 +0,0 @@
-name: 'Close stale issues and PRs'
-permissions:
-  issues: write
-  pull-requests: write
-on:
-  schedule:
-    - cron: '30 1 * * *'
-
-jobs:
-  stale:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/stale@997185467fa4f803885201cee163a9f38240193d # v9
-        with:
-          stale-issue-message: 'This issue is stale because it has been open 90 days with no activity. Remove stale label or comment or this will be closed in 5 days.'
-          stale-pr-message: 'This PR is stale because it has been open 90 days with no activity. Remove stale label or comment or this will be closed in 10 days.'
-          close-issue-message: 'This issue was closed because it has been stalled for 5 days with no activity.'
-          close-pr-message: 'This PR was closed because it has been stalled for 10 days with no activity.'
-          days-before-issue-stale: 90
-          days-before-pr-stale: 90
-          days-before-issue-close: 5
-          days-before-pr-close: 10
-          exempt-issue-labels: 'roadmap'
-          exempt-pr-labels: 'roadmap'
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -1,268 +0,0 @@
---
-name: 'Tests extras backends'
-
-on:
-  pull_request:
-  push:
-    branches:
-      - master
-    tags:
-      - '*'
-
-concurrency:
-  group: ci-tests-extra-${{ github.head_ref || github.ref }}-${{ github.repository }}
-  cancel-in-progress: true
-
-jobs:
-  # Requires CUDA
-  # tests-chatterbox-tts:
-  #   runs-on: ubuntu-latest
-  #   steps:
-  #     - name: Clone
-  #       uses: actions/checkout@v6
-  #       with:
-  #         submodules: true
-  #     - name: Dependencies
-  #       run: |
-  #         sudo apt-get update
-  #         sudo apt-get install build-essential ffmpeg
-  #         # Install UV
-  #         curl -LsSf https://astral.sh/uv/install.sh | sh
-  #         sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-  #         sudo apt-get install -y libopencv-dev
-  #         pip install --user --no-cache-dir grpcio-tools==1.64.1
-
-  #     - name: Test chatterbox-tts
-  #       run: |
-  #          make --jobs=5 --output-sync=target -C backend/python/chatterbox
-  #          make --jobs=5 --output-sync=target -C backend/python/chatterbox test
-  tests-transformers:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v6
-        with:
-          submodules: true
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg
-          # Install UV
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-          sudo apt-get install -y libopencv-dev
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
-
-      - name: Test transformers
-        run: |
-           make --jobs=5 --output-sync=target -C backend/python/transformers
-           make --jobs=5 --output-sync=target -C backend/python/transformers test
-  tests-rerankers:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v6
-        with:
-          submodules: true
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg
-          # Install UV
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-          sudo apt-get install -y libopencv-dev
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
-
-      - name: Test rerankers
-        run: |
-           make --jobs=5 --output-sync=target -C backend/python/rerankers
-           make --jobs=5 --output-sync=target -C backend/python/rerankers test
-
-  tests-diffusers:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v6
-        with:
-          submodules: true
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y build-essential ffmpeg
-          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-          sudo apt-get install -y libopencv-dev
-          # Install UV
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
-      - name: Test diffusers
-        run: |
-          make --jobs=5 --output-sync=target -C backend/python/diffusers
-          make --jobs=5 --output-sync=target -C backend/python/diffusers test
-
-  #tests-vllm:
-  #  runs-on: ubuntu-latest
-  #  steps:
-  #    - name: Clone
-  #      uses: actions/checkout@v6
-  #      with:
-  #        submodules: true
-  #    - name: Dependencies
-  #      run: |
-  #        sudo apt-get update
-  #        sudo apt-get install -y build-essential ffmpeg
-  #        sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-  #        sudo apt-get install -y libopencv-dev
-  #        # Install UV
-  #        curl -LsSf https://astral.sh/uv/install.sh | sh
-  #        pip install --user --no-cache-dir grpcio-tools==1.64.1
-  #    - name: Test vllm backend
-  #      run: |
-  #        make --jobs=5 --output-sync=target -C backend/python/vllm
-  #        make --jobs=5 --output-sync=target -C backend/python/vllm test
-  # tests-transformers-musicgen:
-  #   runs-on: ubuntu-latest
-  #   steps:
-  #     - name: Clone
-  #       uses: actions/checkout@v6
-  #       with:
-  #         submodules: true
-  #     - name: Dependencies
-  #       run: |
-  #         sudo apt-get update
-  #         sudo apt-get install build-essential ffmpeg
-  #         # Install UV
-  #         curl -LsSf https://astral.sh/uv/install.sh | sh
-  #         sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-  #         sudo apt-get install -y libopencv-dev
-  #         pip install --user --no-cache-dir grpcio-tools==1.64.1
-
-  #     - name: Test transformers-musicgen
-  #       run: |
-  #          make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
-  #          make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
-
-  # tests-bark:
-  #   runs-on: ubuntu-latest
-  #   steps:
-  #     - name: Release space from worker
-  #       run: |
-  #           echo "Listing top largest packages"
-  #           pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-  #           head -n 30 <<< "${pkgs}"
-  #           echo
-  #           df -h
-  #           echo
-  #           sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
-  #           sudo apt-get remove --auto-remove android-sdk-platform-tools || true
-  #           sudo apt-get purge --auto-remove android-sdk-platform-tools || true
-  #           sudo rm -rf /usr/local/lib/android
-  #           sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
-  #           sudo rm -rf /usr/share/dotnet
-  #           sudo apt-get remove -y '^mono-.*' || true
-  #           sudo apt-get remove -y '^ghc-.*' || true
-  #           sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
-  #           sudo apt-get remove -y 'php.*' || true
-  #           sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
-  #           sudo apt-get remove -y '^google-.*' || true
-  #           sudo apt-get remove -y azure-cli || true
-  #           sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
-  #           sudo apt-get remove -y '^gfortran-.*' || true
-  #           sudo apt-get remove -y microsoft-edge-stable || true
-  #           sudo apt-get remove -y firefox || true
-  #           sudo apt-get remove -y powershell || true
-  #           sudo apt-get remove -y r-base-core || true
-  #           sudo apt-get autoremove -y
-  #           sudo apt-get clean
-  #           echo
-  #           echo "Listing top largest packages"
-  #           pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-  #           head -n 30 <<< "${pkgs}"
-  #           echo
-  #           sudo rm -rfv build || true
-  #           sudo rm -rf /usr/share/dotnet || true
-  #           sudo rm -rf /opt/ghc || true
-  #           sudo rm -rf "/usr/local/share/boost" || true
-  #           sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
-  #           df -h
-  #     - name: Clone
-  #       uses: actions/checkout@v6
-  #       with:
-  #         submodules: true
-  #     - name: Dependencies
-  #       run: |
-  #         sudo apt-get update
-  #         sudo apt-get install build-essential ffmpeg
-  #         # Install UV
-  #         curl -LsSf https://astral.sh/uv/install.sh | sh
-  #         sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-  #         sudo apt-get install -y libopencv-dev
-  #         pip install --user --no-cache-dir grpcio-tools==1.64.1
-
-  #     - name: Test bark
-  #       run: |
-  #          make --jobs=5 --output-sync=target -C backend/python/bark
-  #          make --jobs=5 --output-sync=target -C backend/python/bark test
-
-
-  # Below tests needs GPU. Commented out for now
-  # TODO: Re-enable as soon as we have GPU nodes
-  # tests-vllm:
-  #   runs-on: ubuntu-latest
-  #   steps:
-  #     - name: Clone
-  #       uses: actions/checkout@v6
-  #       with:
-  #         submodules: true
-  #     - name: Dependencies
-  #       run: |
-  #         sudo apt-get update
-  #         sudo apt-get install build-essential ffmpeg
-  #         # Install UV
-  #         curl -LsSf https://astral.sh/uv/install.sh | sh
-  #         sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-  #         sudo apt-get install -y libopencv-dev
-  #         pip install --user --no-cache-dir grpcio-tools==1.64.1
-  #     - name: Test vllm
-  #       run: |
-  #          make --jobs=5 --output-sync=target -C backend/python/vllm
-  #          make --jobs=5 --output-sync=target -C backend/python/vllm test
-
-  tests-coqui:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v6
-        with:
-          submodules: true
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg
-          sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip
-          # Install UV
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
-      - name: Test coqui
-        run: |
-          make --jobs=5 --output-sync=target -C backend/python/coqui
-          make --jobs=5 --output-sync=target -C backend/python/coqui test
-  tests-moonshine:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v6
-        with:
-          submodules: true
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg
-          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-          # Install UV
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
-      - name: Test moonshine
-        run: |
-          make --jobs=5 --output-sync=target -C backend/python/moonshine
-          make --jobs=5 --output-sync=target -C backend/python/moonshine test
--- a/.github/workflows/disabled/test-gpu.yml
+++ b/.github/workflows/disabled/test-gpu.yml
@@ -15,7 +15,7 @@ concurrency:

 jobs:
  ubuntu-latest:
-    runs-on: gpu
+    runs-on: self-hosted
    strategy:
      matrix:
        go-version: ['1.21.x']
@@ -40,8 +40,6 @@ jobs:
          if [ ! -e /run/systemd/system ]; then
            sudo mkdir /run/systemd/system
          fi
-          sudo mkdir -p /host/tests/${{ github.head_ref || github.ref }}
-          sudo chmod -R 777 /host/tests/${{ github.head_ref || github.ref }}
          make \
            TEST_DIR="/host/tests/${{ github.head_ref || github.ref }}" \
            BUILD_TYPE=cublas \
@@ -59,5 +57,4 @@ jobs:
          make \
            TEST_DIR="/host/tests/${{ github.head_ref || github.ref }}" \
            teardown-e2e || true
-          sudo rm -rf /host/tests/${{ github.head_ref || github.ref }} || true
-          docker system prune -f -a --volumes || true
+          docker system prune -f -a --volumes || true
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -9,124 +9,16 @@ on:
    tags:
      - '*'

-env:
-  GRPC_VERSION: v1.65.0
-
 concurrency:
  group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
  cancel-in-progress: true

 jobs:
-  tests-linux:
+  ubuntu-latest:
    runs-on: ubuntu-latest
    strategy:
      matrix:
-        go-version: ['1.25.x']
-    steps:
-      - name: Free Disk Space (Ubuntu)
-        uses: jlumbroso/free-disk-space@main
-        with:
-          # this might remove tools that are actually needed,
-          # if set to "true" but frees about 6 GB
-          tool-cache: true
-          # all of these default to true, but feel free to set to
-          # "false" if necessary for your workflow
-          android: true
-          dotnet: true
-          haskell: true
-          large-packages: true
-          docker-images: true
-          swap-storage: true
-      - name: Release space from worker
-        run: |
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          df -h
-          echo
-          sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
-          sudo apt-get remove --auto-remove android-sdk-platform-tools || true
-          sudo apt-get purge --auto-remove android-sdk-platform-tools || true
-          sudo rm -rf /usr/local/lib/android
-          sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
-          sudo rm -rf /usr/share/dotnet
-          sudo apt-get remove -y '^mono-.*' || true
-          sudo apt-get remove -y '^ghc-.*' || true
-          sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
-          sudo apt-get remove -y 'php.*' || true
-          sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
-          sudo apt-get remove -y '^google-.*' || true
-          sudo apt-get remove -y azure-cli || true
-          sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
-          sudo apt-get remove -y '^gfortran-.*' || true
-          sudo apt-get autoremove -y
-          sudo apt-get clean
-          echo
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          sudo rm -rfv build || true
-          df -h
-      - name: Clone
-        uses: actions/checkout@v6
-        with:
-          submodules: true
-      - name: Setup Go ${{ matrix.go-version }}
-        uses: actions/setup-go@v5
-        with:
-          go-version: ${{ matrix.go-version }}
-          cache: false
-      # You can test your matrix by printing the current Go version
-      - name: Display Go version
-        run: go version
-      - name: Proto Dependencies
-        run: |
-          # Install protoc
-          curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
-          unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
-          rm protoc.zip
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          PATH="$PATH:$HOME/go/bin" make protogen-go
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
-          sudo apt-get install -y libgmock-dev clang
-          # Install UV
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          sudo apt-get install -y ca-certificates cmake patch python3-pip unzip
-          sudo apt-get install -y libopencv-dev
-
-          curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
-          unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
-          rm protoc.zip
-
-          curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
-          sudo dpkg -i cuda-keyring_1.1-1_all.deb
-          sudo apt-get update
-          sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
-          export CUDACXX=/usr/local/cuda/bin/nvcc
-          make -C backend/python/transformers
-
-          make backends/huggingface backends/llama-cpp backends/local-store backends/silero-vad backends/piper backends/whisper backends/stablediffusion-ggml
-        env:
-          CUDA_VERSION: 12-4
-      - name: Test
-        run: |
-          PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.23
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true
-
-  tests-aio-container:
-    runs-on: ubuntu-latest
+        go-version: ['1.21.x']
    steps:
      - name: Release space from worker
        run: |
@@ -161,68 +53,79 @@ jobs:
          sudo rm -rfv build || true
          df -h
      - name: Clone
-        uses: actions/checkout@v6
-        with:
-          submodules: true
-      - name: Dependencies
-        run: |
-          # Install protoc
-          curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
-          unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
-          rm protoc.zip
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          PATH="$PATH:$HOME/go/bin" make protogen-go
-      - name: Test
-        run: |
-            PATH="$PATH:$HOME/go/bin" make backends/local-store backends/silero-vad backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.23
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true
-
-  tests-apple:
-    runs-on: macos-latest
-    strategy:
-      matrix:
-        go-version: ['1.25.x']
-    steps:
-      - name: Clone
-        uses: actions/checkout@v6
-        with:
+        uses: actions/checkout@v4
+        with: 
          submodules: true
      - name: Setup Go ${{ matrix.go-version }}
-        uses: actions/setup-go@v5
+        uses: actions/setup-go@v4
        with:
          go-version: ${{ matrix.go-version }}
-          cache: false
      # You can test your matrix by printing the current Go version
      - name: Display Go version
        run: go version
      - name: Dependencies
        run: |
-          brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
-          pip install --user --no-cache-dir grpcio-tools grpcio
-      - name: Build llama-cpp-darwin
+          sudo apt-get update
+          sudo apt-get install build-essential ffmpeg
+
+          sudo apt-get install -y ca-certificates cmake curl patch
+          sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
+          sudo pip install -r extra/requirements.txt
+
+
+          # Pre-build stable diffusion before we install a newever version of abseil (not compatible with stablediffusion-ncn)
+          GO_TAGS="tts stablediffusion" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
+
+          sudo mkdir /build && sudo chmod -R 777 /build && cd /build && \
+          curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v1.11.0.tar.gz" | \
+          tar -xzvf - && \
+          mkdir -p "spdlog-1.11.0/build" && \
+          cd "spdlog-1.11.0/build" && \
+          cmake ..  && \
+          make -j8 && \
+          sudo cmake --install . --prefix /usr && mkdir -p "lib/Linux-$(uname -m)" && \
+          cd /build && \
+          mkdir -p "lib/Linux-$(uname -m)/piper_phonemize" && \
+          curl -L "https://github.com/rhasspy/piper-phonemize/releases/download/v1.0.0/libpiper_phonemize-amd64.tar.gz" | \
+          tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \
+          sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \
+          sudo ln -s /usr/lib/libpiper_phonemize.so /usr/lib/libpiper_phonemize.so.1 && \
+          sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/
+
+          git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
+              cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
+                -DgRPC_BUILD_TESTS=OFF \
+                ../.. && sudo make -j12 install
+
+      - name: Test
        run: |
-          make protogen-go
-          make backends/llama-cpp-darwin
+          ESPEAK_DATA="/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data" GO_TAGS="tts stablediffusion" make test
+
+  macOS-latest:
+    runs-on: macOS-latest
+    strategy:
+      matrix:
+        go-version: ['1.21.x']
+    steps:
+      - name: Clone
+        uses: actions/checkout@v4
+        with: 
+          submodules: true
+      - name: Setup Go ${{ matrix.go-version }}
+        uses: actions/setup-go@v4
+        with:
+          go-version: ${{ matrix.go-version }}
+      # You can test your matrix by printing the current Go version
+      - name: Display Go version
+        run: go version
+      - name: Dependencies
+        run: |
+          git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
+              cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
+                -DgRPC_BUILD_TESTS=OFF \
+                ../.. && make -j12 install && rm -rf grpc
      - name: Test
        run: |
          export C_INCLUDE_PATH=/usr/local/include
          export CPLUS_INCLUDE_PATH=/usr/local/include
-          export CC=/opt/homebrew/opt/llvm/bin/clang
-          # Used to run the newer GNUMake version from brew that supports --output-sync
-          export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
-          PATH="$PATH:$HOME/go/bin" make protogen-go
-          PATH="$PATH:$HOME/go/bin" BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.23
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true
+          CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test
--- a/.github/workflows/update_swagger.yaml
+++ b/.github/workflows/update_swagger.yaml
@@ -1,37 +0,0 @@
-name: Update swagger
-on:
-  schedule:
-    - cron: 0 20 * * *
-  workflow_dispatch:
-jobs:
-  swagger:
-    strategy:
-      fail-fast: false
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v6
-      - uses: actions/setup-go@v5
-        with:
-          go-version: 'stable'
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install protobuf-compiler
-      - run: |
-          go install github.com/swaggo/swag/cmd/swag@latest
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-      - name: Bump swagger 🔧
-        run: |
-          make protogen-go swagger
-      - name: Create Pull Request
-        uses: peter-evans/create-pull-request@v8
-        with:
-          token: ${{ secrets.UPDATE_BOT_TOKEN }}
-          push-to-fork: ci-forks/LocalAI
-          commit-message: 'feat(swagger): update swagger'
-          title: 'feat(swagger): update swagger'
-          branch: "update/swagger"
-          body:  Update swagger
-          signoff: true
-
--- a/.github/workflows/yaml-check.yml
+++ b/.github/workflows/yaml-check.yml
@@ -1,26 +0,0 @@
-name: 'Yamllint GitHub Actions'
-on:
-  - pull_request
-jobs:
-  yamllint:
-    name: 'Yamllint'
-    runs-on: ubuntu-latest
-    steps:
-      - name: 'Checkout'
-        uses: actions/checkout@master
-      - name: 'Yamllint model gallery'
-        uses: karancode/yamllint-github-action@master
-        with:
-          yamllint_file_or_dir: 'gallery'
-          yamllint_strict: false
-          yamllint_comment: true
-        env:
-          GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      - name: 'Yamllint Backend gallery'
-        uses: karancode/yamllint-github-action@master
-        with:
-          yamllint_file_or_dir: 'backend'
-          yamllint_strict: false
-          yamllint_comment: true
-        env:
-          GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.gitignore
+++ b/.gitignore
@@ -1,36 +1,30 @@
 # go-llama build artifacts
-/sources/
+go-llama
+go-llama-stable
+/gpt4all
+go-stable-diffusion
+go-piper
+/go-bert
+go-ggllm
+/piper
 __pycache__/
 *.a
-*.o
 get-sources
-prepare-sources
-/backend/cpp/llama-cpp/grpc-server
-/backend/cpp/llama-cpp/llama.cpp
-/backend/cpp/llama-*
-!backend/cpp/llama-cpp
-/backends
-/backend-images
-/result.yaml
-protoc
-
-*.log

 go-ggml-transformers
 go-gpt2
+go-rwkv
 whisper.cpp
 /bloomz
 go-bert

 # LocalAI build binary
 LocalAI
-/local-ai
-/local-ai-launcher
+local-ai
 # prevent above rules from omitting the helm chart
 !charts/*
 # prevent above rules from omitting the api/localai folder
 !api/localai
-!core/**/localai

 # Ignore models
 models/*
@@ -44,22 +38,6 @@ release/
 .idea

 # Generated during build
-backend-assets/*
-!backend-assets/.keep
+backend-assets/
 prepare
 /ggml-metal.metal
-docs/static/gallery.html
-
-# Protobuf generated files
-*.pb.go
-*pb2.py
-*pb2_grpc.py
-
-# SonarQube
-.scannerwork
-
-# backend virtual environments
-**/venv
-
-# per-developer customization files for the development container
-.devcontainer/customization/*
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +0,0 @@
-[submodule "docs/themes/hugo-theme-relearn"]
-	path = docs/themes/hugo-theme-relearn
-	url = https://github.com/McShelby/hugo-theme-relearn.git
--- a/.goreleaser.yaml
+++ b/.goreleaser.yaml
@@ -1,36 +0,0 @@
-version: 2
-before:
-  hooks:
-    - make protogen-go
-    - go mod tidy
-dist: release
-source:
-  enabled: true
-  name_template: '{{ .ProjectName }}-{{ .Tag }}-source'
-builds:
-  - main: ./cmd/local-ai
-    env:
-      - CGO_ENABLED=0
-    ldflags:
-      - -s -w
-      - -X "github.com/mudler/LocalAI/internal.Version={{ .Tag }}"
-      - -X "github.com/mudler/LocalAI/internal.Commit={{ .FullCommit }}"
-    goos:
-      - linux
-      - darwin
-      #- windows
-    goarch:
-      - amd64
-      - arm64
-    ignore:
-      - goos: darwin
-        goarch: amd64
-archives:
-  - formats: [ 'binary' ] # this removes the tar of the archives, leaving the binaries alone
-    name_template: local-ai-{{ .Tag }}-{{ .Os }}-{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}
-checksum:
-  name_template: '{{ .ProjectName }}-{{ .Tag }}-checksums.txt'
-snapshot:
-  version_template: "{{ .Tag }}-next"
-changelog:
-  use: github-native
--- a/.vscode/extensions.json
+++ b/.vscode/extensions.json
@@ -1,5 +0,0 @@
-{
-    "recommendations": [
-        "golang.go"
-    ]
-}
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -3,12 +3,12 @@
    "configurations": [
        {
            "name": "Python: Current File",
-            "type": "debugpy",
+            "type": "python",
            "request": "launch",
            "program": "${file}",
            "console": "integratedTerminal",
            "justMyCode": false,
-            "cwd": "${fileDirname}",
+            "cwd": "${workspaceFolder}/examples/langchain-chroma",
            "env": {
                "OPENAI_API_BASE": "http://localhost:8080/v1",
                "OPENAI_API_KEY": "abc"
@@ -19,16 +19,15 @@
            "type": "go",
            "request": "launch",
            "mode": "debug",
-            "program": "${workspaceRoot}",
-            "args": [],
+            "program": "${workspaceFolder}/main.go",
+            "args": [
+                "api"
+            ],
            "env": {
-                "LOCALAI_LOG_LEVEL": "debug",
-                "LOCALAI_P2P": "true",
-                "LOCALAI_FEDERATED": "true"
-            },
-            "buildFlags": ["-tags", "", "-v"],
-            "envFile": "${workspaceFolder}/.env",
-            "cwd": "${workspaceRoot}"
+                "C_INCLUDE_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
+                "LIBRARY_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
+                "DEBUG": "true"
+            }
        }
    ]
 }
--- a/.yamllint
+++ b/.yamllint
@@ -1,4 +0,0 @@
-extends: default
-
-rules:
-    line-length: disable
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,282 +0,0 @@
-# Build and testing
-
-Building and testing the project depends on the components involved and the platform where development is taking place. Due to the amount of context required it's usually best not to try building or testing the project unless the user requests it. If you must build the project then inspect the Makefile in the project root and the Makefiles of any backends that are effected by changes you are making. In addition the workflows in .github/workflows can be used as a reference when it is unclear how to build or test a component. The primary Makefile contains targets for building inside or outside Docker, if the user has not previously specified a preference then ask which they would like to use.
-
-## Building a specified backend
-
-Let's say the user wants to build a particular backend for a given platform. For example let's say they want to build bark for ROCM/hipblas
-
- The Makefile has targets like `docker-build-bark` created with `generate-docker-build-target` at the time of writing. Recently added backends may require a new target.
- At a minimum we need to set the BUILD_TYPE, BASE_IMAGE build-args
-  - Use .github/workflows/backend.yml as a reference it lists the needed args in the `include` job strategy matrix
-  - l4t and cublas also requires the CUDA major and minor version
- You can pretty print a command like `DOCKER_MAKEFLAGS=-j$(nproc --ignore=1) BUILD_TYPE=hipblas BASE_IMAGE=rocm/dev-ubuntu-24.04:6.4.4 make docker-build-bark`
- Unless the user specifies that they want you to run the command, then just print it because not all agent frontends handle long running jobs well and the output may overflow your context
- The user may say they want to build AMD or ROCM instead of hipblas, or Intel instead of SYCL or NVIDIA insted of l4t or cublas. Ask for confirmation if there is ambiguity.
- Sometimes the user may need extra parameters to be added to `docker build` (e.g. `--platform` for cross-platform builds or `--progress` to view the full logs), in which case you can generate the `docker build` command directly.
-
-## Adding a New Backend
-
-When adding a new backend to LocalAI, you need to update several files to ensure the backend is properly built, tested, and registered. Here's a step-by-step guide based on the pattern used for adding backends like `moonshine`:
-
-### 1. Create Backend Directory Structure
-
-Create the backend directory under the appropriate location:
- **Python backends**: `backend/python/<backend-name>/`
- **Go backends**: `backend/go/<backend-name>/`
- **C++ backends**: `backend/cpp/<backend-name>/`
-
-For Python backends, you'll typically need:
- `backend.py` - Main gRPC server implementation
- `Makefile` - Build configuration
- `install.sh` - Installation script for dependencies
- `protogen.sh` - Protocol buffer generation script
- `requirements.txt` - Python dependencies
- `run.sh` - Runtime script
- `test.py` / `test.sh` - Test files
-
-### 2. Add Build Configurations to `.github/workflows/backend.yml`
-
-Add build matrix entries for each platform/GPU type you want to support. Look at similar backends (e.g., `chatterbox`, `faster-whisper`) for reference.
-
-**Placement in file:**
- CPU builds: Add after other CPU builds (e.g., after `cpu-chatterbox`)
- CUDA 12 builds: Add after other CUDA 12 builds (e.g., after `gpu-nvidia-cuda-12-chatterbox`)
- CUDA 13 builds: Add after other CUDA 13 builds (e.g., after `gpu-nvidia-cuda-13-chatterbox`)
-
-**Additional build types you may need:**
- ROCm/HIP: Use `build-type: 'hipblas'` with `base-image: "rocm/dev-ubuntu-24.04:6.4.4"`
- Intel/SYCL: Use `build-type: 'intel'` or `build-type: 'sycl_f16'`/`sycl_f32` with `base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"`
- L4T (ARM): Use `build-type: 'l4t'` with `platforms: 'linux/arm64'` and `runs-on: 'ubuntu-24.04-arm'`
-
-### 3. Add Backend Metadata to `backend/index.yaml`
-
-**Step 3a: Add Meta Definition**
-
-Add a YAML anchor definition in the `## metas` section (around line 2-300). Look for similar backends to use as a template such as `diffusers` or `chatterbox`
-
-**Step 3b: Add Image Entries**
-
-Add image entries at the end of the file, following the pattern of similar backends such as `diffusers` or `chatterbox`. Include both `latest` (production) and `master` (development) tags.
-
-### 4. Update the Makefile
-
-The Makefile needs to be updated in several places to support building and testing the new backend:
-
-**Step 4a: Add to `.NOTPARALLEL`**
-
-Add `backends/<backend-name>` to the `.NOTPARALLEL` line (around line 2) to prevent parallel execution conflicts:
-
-```makefile
-.NOTPARALLEL: ... backends/<backend-name>
-```
-
-**Step 4b: Add to `prepare-test-extra`**
-
-Add the backend to the `prepare-test-extra` target (around line 312) to prepare it for testing:
-
-```makefile
-prepare-test-extra: protogen-python
-	...
-	$(MAKE) -C backend/python/<backend-name>
-```
-
-**Step 4c: Add to `test-extra`**
-
-Add the backend to the `test-extra` target (around line 319) to run its tests:
-
-```makefile
-test-extra: prepare-test-extra
-	...
-	$(MAKE) -C backend/python/<backend-name> test
-```
-
-**Step 4d: Add Backend Definition**
-
-Add a backend definition variable in the backend definitions section (around line 428-457). The format depends on the backend type:
-
-**For Python backends with root context** (like `faster-whisper`, `bark`):
-```makefile
-BACKEND_<BACKEND_NAME> = <backend-name>|python|.|false|true
-```
-
-**For Python backends with `./backend` context** (like `chatterbox`, `moonshine`):
-```makefile
-BACKEND_<BACKEND_NAME> = <backend-name>|python|./backend|false|true
-```
-
-**For Go backends**:
-```makefile
-BACKEND_<BACKEND_NAME> = <backend-name>|golang|.|false|true
-```
-
-**Step 4e: Generate Docker Build Target**
-
-Add an eval call to generate the docker-build target (around line 480-501):
-
-```makefile
-$(eval $(call generate-docker-build-target,$(BACKEND_<BACKEND_NAME>)))
-```
-
-**Step 4f: Add to `docker-build-backends`**
-
-Add `docker-build-<backend-name>` to the `docker-build-backends` target (around line 507):
-
-```makefile
-docker-build-backends: ... docker-build-<backend-name>
-```
-
-**Determining the Context:**
-
- If the backend is in `backend/python/<backend-name>/` and uses `./backend` as context in the workflow file, use `./backend` context
- If the backend is in `backend/python/<backend-name>/` but uses `.` as context in the workflow file, use `.` context
- Check similar backends to determine the correct context
-
-### 5. Verification Checklist
-
-After adding a new backend, verify:
-
- [ ] Backend directory structure is complete with all necessary files
- [ ] Build configurations added to `.github/workflows/backend.yml` for all desired platforms
- [ ] Meta definition added to `backend/index.yaml` in the `## metas` section
- [ ] Image entries added to `backend/index.yaml` for all build variants (latest + development)
- [ ] Tag suffixes match between workflow file and index.yaml
- [ ] Makefile updated with all 6 required changes (`.NOTPARALLEL`, `prepare-test-extra`, `test-extra`, backend definition, docker-build target eval, `docker-build-backends`)
- [ ] No YAML syntax errors (check with linter)
- [ ] No Makefile syntax errors (check with linter)
- [ ] Follows the same pattern as similar backends (e.g., if it's a transcription backend, follow `faster-whisper` pattern)
-
-### 6. Example: Adding a Python Backend
-
-For reference, when `moonshine` was added:
- **Files created**: `backend/python/moonshine/{backend.py, Makefile, install.sh, protogen.sh, requirements.txt, run.sh, test.py, test.sh}`
- **Workflow entries**: 3 build configurations (CPU, CUDA 12, CUDA 13)
- **Index entries**: 1 meta definition + 6 image entries (cpu, cuda12, cuda13 × latest/development)
- **Makefile updates**: 
-  - Added to `.NOTPARALLEL` line
-  - Added to `prepare-test-extra` and `test-extra` targets
-  - Added `BACKEND_MOONSHINE = moonshine|python|./backend|false|true`
-  - Added eval for docker-build target generation
-  - Added `docker-build-moonshine` to `docker-build-backends`
-
-# Coding style
-
- The project has the following .editorconfig
-
-```
-root = true
-
-[*]
-indent_style = space
-indent_size = 2
-end_of_line = lf
-charset = utf-8
-trim_trailing_whitespace = true
-insert_final_newline = true
-
-[*.go]
-indent_style = tab
-
-[Makefile]
-indent_style = tab
-
-[*.proto]
-indent_size = 2
-
-[*.py]
-indent_size = 4
-
-[*.js]
-indent_size = 2
-
-[*.yaml]
-indent_size = 2
-
-[*.md]
-trim_trailing_whitespace = false
-```
-
- Use comments sparingly to explain why code does something, not what it does. Comments are there to add context that would be difficult to deduce from reading the code.
- Prefer modern Go e.g. use `any` not `interface{}`
-
-# Logging
-
-Use `github.com/mudler/xlog` for logging which has the same API as slog.
-
-# llama.cpp Backend
-
-The llama.cpp backend (`backend/cpp/llama-cpp/grpc-server.cpp`) is a gRPC adaptation of the upstream HTTP server (`llama.cpp/tools/server/server.cpp`). It uses the same underlying server infrastructure from `llama.cpp/tools/server/server-context.cpp`.
-
-## Building and Testing
-
- Test llama.cpp backend compilation: `make backends/llama-cpp`
- The backend is built as part of the main build process
- Check `backend/cpp/llama-cpp/Makefile` for build configuration
-
-## Architecture
-
- **grpc-server.cpp**: gRPC server implementation, adapts HTTP server patterns to gRPC
- Uses shared server infrastructure: `server-context.cpp`, `server-task.cpp`, `server-queue.cpp`, `server-common.cpp`
- The gRPC server mirrors the HTTP server's functionality but uses gRPC instead of HTTP
-
-## Common Issues When Updating llama.cpp
-
-When fixing compilation errors after upstream changes:
-1. Check how `server.cpp` (HTTP server) handles the same change
-2. Look for new public APIs or getter methods
-3. Store copies of needed data instead of accessing private members
-4. Update function calls to match new signatures
-5. Test with `make backends/llama-cpp`
-
-## Key Differences from HTTP Server
-
- gRPC uses `BackendServiceImpl` class with gRPC service methods
- HTTP server uses `server_routes` with HTTP handlers
- Both use the same `server_context` and task queue infrastructure
- gRPC methods: `LoadModel`, `Predict`, `PredictStream`, `Embedding`, `Rerank`, `TokenizeString`, `GetMetrics`, `Health`
-
-## Tool Call Parsing Maintenance
-
-When working on JSON/XML tool call parsing functionality, always check llama.cpp for reference implementation and updates:
-
-### Checking for XML Parsing Changes
-
-1. **Review XML Format Definitions**: Check `llama.cpp/common/chat-parser-xml-toolcall.h` for `xml_tool_call_format` struct changes
-2. **Review Parsing Logic**: Check `llama.cpp/common/chat-parser-xml-toolcall.cpp` for parsing algorithm updates
-3. **Review Format Presets**: Check `llama.cpp/common/chat-parser.cpp` for new XML format presets (search for `xml_tool_call_format form`)
-4. **Review Model Lists**: Check `llama.cpp/common/chat.h` for `COMMON_CHAT_FORMAT_*` enum values that use XML parsing:
-   - `COMMON_CHAT_FORMAT_GLM_4_5`
-   - `COMMON_CHAT_FORMAT_MINIMAX_M2`
-   - `COMMON_CHAT_FORMAT_KIMI_K2`
-   - `COMMON_CHAT_FORMAT_QWEN3_CODER_XML`
-   - `COMMON_CHAT_FORMAT_APRIEL_1_5`
-   - `COMMON_CHAT_FORMAT_XIAOMI_MIMO`
-   - Any new formats added
-
-### Model Configuration Options
-
-Always check `llama.cpp` for new model configuration options that should be supported in LocalAI:
-
-1. **Check Server Context**: Review `llama.cpp/tools/server/server-context.cpp` for new parameters
-2. **Check Chat Params**: Review `llama.cpp/common/chat.h` for `common_chat_params` struct changes
-3. **Check Server Options**: Review `llama.cpp/tools/server/server.cpp` for command-line argument changes
-4. **Examples of options to check**:
-   - `ctx_shift` - Context shifting support
-   - `parallel_tool_calls` - Parallel tool calling
-   - `reasoning_format` - Reasoning format options
-   - Any new flags or parameters
-
-### Implementation Guidelines
-
-1. **Feature Parity**: Always aim for feature parity with llama.cpp's implementation
-2. **Test Coverage**: Add tests for new features matching llama.cpp's behavior
-3. **Documentation**: Update relevant documentation when adding new formats or options
-4. **Backward Compatibility**: Ensure changes don't break existing functionality
-
-### Files to Monitor
-
- `llama.cpp/common/chat-parser-xml-toolcall.h` - Format definitions
- `llama.cpp/common/chat-parser-xml-toolcall.cpp` - Parsing logic
- `llama.cpp/common/chat-parser.cpp` - Format presets and model-specific handlers
- `llama.cpp/common/chat.h` - Format enums and parameter structures
- `llama.cpp/tools/server/server-context.cpp` - Server configuration options
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,4 +1,4 @@
-# Contributing to LocalAI
+# Contributing to localAI

 Thank you for your interest in contributing to LocalAI! We appreciate your time and effort in helping to improve our project. Before you get started, please take a moment to review these guidelines.

@@ -15,6 +15,8 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time
 - [Documentation](#documentation)
 - [Community and Communication](#community-and-communication)

+
+
 ## Getting Started

 ### Prerequisites
@@ -27,10 +29,8 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time

 1. Clone the repository: `git clone https://github.com/go-skynet/LocalAI.git`
 2. Navigate to the project directory: `cd LocalAI`
-3. Install the required dependencies ( see https://localai.io/basics/build/#build-localai-locally )
-4. Build LocalAI: `make build`
-5. Run LocalAI: `./local-ai`
-6. To Build and live reload: `make build-dev`
+3. Install the required dependencies: `make prepare`
+4. Run LocalAI: `make run`

 ## Contributing

@@ -53,47 +53,20 @@ If you find a bug, have a feature request, or encounter any issues, please check

 ## Coding Guidelines

- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like [`golangci-lint`](https://golangci-lint.run) can help you here.
+- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like []`golangci-lint`](https://golangci-lint.run) can help you here.

 ## Testing

 `make test` cannot handle all the model now. Please be sure to add a test case for the new features or the part was changed.

-### Running AIO tests
-
-All-In-One images has a set of tests that automatically verifies that most of the endpoints works correctly, a flow can be :
-
-```bash
-# Build the LocalAI docker image
-make DOCKER_IMAGE=local-ai docker
-
-# Build the corresponding AIO image
-BASE_IMAGE=local-ai DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
-
-# Run the AIO e2e tests
-LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio make run-e2e-aio
-```
-
 ## Documentation

-We are welcome the contribution of the documents, please open new PR or create a new issue. The documentation is available under `docs/` https://github.com/mudler/LocalAI/tree/master/docs
-
-### Gallery YAML Schema
-
-LocalAI provides a JSON Schema for gallery model YAML files at:
-
-`core/schema/gallery-model.schema.json`
-
-This schema mirrors the internal gallery model configuration and can be used by editors (such as VS Code) to enable autocomplete, validation, and inline documentation when creating or modifying gallery files.
-
-To use it with the YAML language server, add the following comment at the top of a gallery YAML file:
-
-```yaml
-# yaml-language-server: $schema=../core/schema/gallery-model.schema.json
-```
+- We are welcome the contribution of the documents, please open new PR in the official document repo [localai-website](https://github.com/go-skynet/localai-website)

 ## Community and Communication

 - You can reach out via the Github issue tracker.
 - Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions)
 - Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy)
+
+---
--- a/445
+++ b/445
@@ -1,377 +1,184 @@
-ARG BASE_IMAGE=ubuntu:24.04
-ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
-ARG INTEL_BASE_IMAGE=${BASE_IMAGE}
-ARG UBUNTU_CODENAME=noble
+ARG GO_VERSION=1.21-bullseye
+ARG IMAGE_TYPE=extras
+# extras or core

-FROM ${BASE_IMAGE} AS requirements

-ENV DEBIAN_FRONTEND=noninteractive
-
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-        ca-certificates curl wget espeak-ng libgomp1 \
-        ffmpeg libopenblas0 libopenblas-dev && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
-# The requirements-drivers target is for BUILD_TYPE specific items.  If you need to install something specific to CUDA, or specific to ROCM, it goes here.
-FROM requirements AS requirements-drivers
+FROM golang:$GO_VERSION as requirements-core

 ARG BUILD_TYPE
-ARG CUDA_MAJOR_VERSION=12
-ARG CUDA_MINOR_VERSION=0
-ARG SKIP_DRIVERS=false
+ARG CUDA_MAJOR_VERSION=11
+ARG CUDA_MINOR_VERSION=7
+ARG SPDLOG_VERSION="1.11.0"
+ARG PIPER_PHONEMIZE_VERSION='1.0.0'
 ARG TARGETARCH
 ARG TARGETVARIANT
+
 ENV BUILD_TYPE=${BUILD_TYPE}
-ARG UBUNTU_VERSION=2404
-
-RUN mkdir -p /run/localai
-RUN echo "default" > /run/localai/capability
-
-# Vulkan requirements
-RUN <<EOT bash
-    if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
-        apt-get update && \
-        apt-get install -y  --no-install-recommends \
-            software-properties-common pciutils wget gpg-agent && \
-        apt-get install -y libglm-dev cmake libxcb-dri3-0 libxcb-present0 libpciaccess0 \
-            libpng-dev libxcb-keysyms1-dev libxcb-dri3-dev libx11-dev g++ gcc \
-            libwayland-dev libxrandr-dev libxcb-randr0-dev libxcb-ewmh-dev \
-            git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
-            ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
-            clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils mesa-vulkan-drivers
-        if [ "amd64" = "$TARGETARCH" ]; then
-            wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
-            tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
-            rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
-            mkdir -p /opt/vulkan-sdk && \
-            mv 1.4.328.1 /opt/vulkan-sdk/ && \
-            cd /opt/vulkan-sdk/1.4.328.1 && \
-            ./vulkansdk --no-deps --maxjobs \
-                vulkan-loader \
-                vulkan-validationlayers \
-                vulkan-extensionlayer \
-                vulkan-tools \
-                shaderc && \
-            cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/bin/* /usr/bin/ && \
-            cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/lib/* /usr/lib/x86_64-linux-gnu/ && \
-            cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
-            cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
-            rm -rf /opt/vulkan-sdk
-        fi
-        if [ "arm64" = "$TARGETARCH" ]; then
-            mkdir vulkan && cd vulkan && \
-            curl -L -o vulkan-sdk.tar.xz https://github.com/mudler/vulkan-sdk-arm/releases/download/1.4.335.0/vulkansdk-ubuntu-24.04-arm-1.4.335.0.tar.xz && \
-            tar -xvf vulkan-sdk.tar.xz && \
-            rm vulkan-sdk.tar.xz && \
-            cd 1.4.335.0 && \
-            cp -rfv aarch64/bin/* /usr/bin/ && \
-            cp -rfv aarch64/lib/* /usr/lib/aarch64-linux-gnu/ && \
-            cp -rfv aarch64/include/* /usr/include/ && \
-            cp -rfv aarch64/share/* /usr/share/ && \
-            cd ../.. && \
-            rm -rf vulkan
-        fi
-        ldconfig && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/* && \
-        echo "vulkan" > /run/localai/capability
-    fi
-EOT
-
-# CuBLAS requirements
-RUN <<EOT bash
-    if ( [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "l4t" ] ) && [ "${SKIP_DRIVERS}" = "false" ]; then
-        apt-get update && \
-        apt-get install -y  --no-install-recommends \
-            software-properties-common pciutils
-        if [ "amd64" = "$TARGETARCH" ]; then
-            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/cuda-keyring_1.1-1_all.deb
-        fi
-        if [ "arm64" = "$TARGETARCH" ]; then
-            if [ "${CUDA_MAJOR_VERSION}" = "13" ]; then
-                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/sbsa/cuda-keyring_1.1-1_all.deb
-            else
-                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/arm64/cuda-keyring_1.1-1_all.deb
-            fi
-        fi
-        dpkg -i cuda-keyring_1.1-1_all.deb && \
-        rm -f cuda-keyring_1.1-1_all.deb && \
-        apt-get update && \
-        apt-get install -y --no-install-recommends \
-            cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
-            libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
-            libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
-            libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
-            libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
-            libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
-        if [ "${CUDA_MAJOR_VERSION}" = "13" ] && [ "arm64" = "$TARGETARCH" ]; then
-            apt-get install -y --no-install-recommends \
-            libcufile-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcudnn9-cuda-${CUDA_MAJOR_VERSION} cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
-        fi
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/* && \
-        echo "nvidia-cuda-${CUDA_MAJOR_VERSION}" > /run/localai/capability
-    fi
-EOT
-
-RUN <<EOT bash
-    if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "arm64" ]; then
-        echo "nvidia-l4t-cuda-${CUDA_MAJOR_VERSION}" > /run/localai/capability
-    fi
-EOT
-
-# https://github.com/NVIDIA/Isaac-GR00T/issues/343
-RUN <<EOT bash
-    if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "arm64" ]; then
-        wget https://developer.download.nvidia.com/compute/cudss/0.6.0/local_installers/cudss-local-tegra-repo-ubuntu${UBUNTU_VERSION}-0.6.0_0.6.0-1_arm64.deb && \
-        dpkg -i cudss-local-tegra-repo-ubuntu${UBUNTU_VERSION}-0.6.0_0.6.0-1_arm64.deb && \
-        cp /var/cudss-local-tegra-repo-ubuntu${UBUNTU_VERSION}-0.6.0/cudss-*-keyring.gpg /usr/share/keyrings/ && \
-        apt-get update && apt-get -y install cudss cudss-cuda-${CUDA_MAJOR_VERSION} && \
-        wget https://developer.download.nvidia.com/compute/nvpl/25.5/local_installers/nvpl-local-repo-ubuntu${UBUNTU_VERSION}-25.5_1.0-1_arm64.deb && \
-        dpkg -i nvpl-local-repo-ubuntu${UBUNTU_VERSION}-25.5_1.0-1_arm64.deb && \
-        cp /var/nvpl-local-repo-ubuntu${UBUNTU_VERSION}-25.5/nvpl-*-keyring.gpg /usr/share/keyrings/ && \
-        apt-get update && apt-get install -y nvpl
-    fi
-EOT
-
-# If we are building with clblas support, we need the libraries for the builds
-RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
-        apt-get update && \
-        apt-get install -y --no-install-recommends \
-            libclblast-dev && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/* \
-    ; fi
-
-RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
-        apt-get update && \
-        apt-get install -y --no-install-recommends \
-            hipblas-dev \
-            rocblas-dev && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/* && \
-        echo "amd" > /run/localai/capability && \
-        # I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
-        # to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
-        ldconfig \
-    ; fi
-
-RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
-    ln -s /opt/rocm-**/lib/llvm/lib/libomp.so /usr/lib/libomp.so \
-    ; fi
-
-RUN expr "${BUILD_TYPE}" = intel && echo "intel" > /run/localai/capability || echo "not intel"
-
-# Cuda
-ENV PATH=/usr/local/cuda/bin:${PATH}
-
-# HipBLAS requirements
-ENV PATH=/opt/rocm/bin:${PATH}
-
-###################################
-###################################
-
-# The requirements-core target is common to all images.  It should not be placed in requirements-core unless every single build will use it.
-FROM requirements-drivers AS build-requirements
-
-ARG GO_VERSION=1.25.4
-ARG CMAKE_VERSION=3.31.10
-ARG CMAKE_FROM_SOURCE=false
-ARG TARGETARCH
-ARG TARGETVARIANT
+ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/extra/grpc/huggingface/huggingface.py,autogptq:/build/extra/grpc/autogptq/autogptq.py,bark:/build/extra/grpc/bark/ttsbark.py,diffusers:/build/extra/grpc/diffusers/backend_diffusers.py,exllama:/build/extra/grpc/exllama/exllama.py,vall-e-x:/build/extra/grpc/vall-e-x/ttsvalle.py,vllm:/build/extra/grpc/vllm/backend_vllm.py"
+ENV GALLERIES='[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]'
+ARG GO_TAGS="stablediffusion tts"

 RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-        build-essential \
-        ccache \
-        ca-certificates espeak-ng \
-        curl libssl-dev \
-        git \
-        git-lfs \
-        unzip upx-ucl python3 python-is-python3 && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
+    apt-get install -y ca-certificates curl patch pip cmake && apt-get clean

-# Install CMake (the version in 22.04 is too old)
-RUN <<EOT bash
-    if [ "${CMAKE_FROM_SOURCE}" = "true" ]; then
-        curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
-    else
-        apt-get update && \
-        apt-get install -y \
-            cmake && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/*
-    fi
-EOT
-
-# Install Go
-RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
-ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
-
-# Install grpc compilers
-RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
-    go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-
-COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
-RUN update-ca-certificates
-
-RUN test -n "$TARGETARCH" \
-    || (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')

 # Use the variables in subsequent instructions
 RUN echo "Target Architecture: $TARGETARCH"
 RUN echo "Target Variant: $TARGETVARIANT"

+# CuBLAS requirements
+RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
+    apt-get install -y software-properties-common && \
+    apt-add-repository contrib && \
+    curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb && \
+    dpkg -i cuda-keyring_1.0-1_all.deb && \
+    rm -f cuda-keyring_1.0-1_all.deb && \
+    apt-get update && \
+    apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}  && apt-get clean \
+    ; fi
+ENV PATH /usr/local/cuda/bin:${PATH}

+# OpenBLAS requirements and stable diffusion
+RUN apt-get install -y \
+    libopenblas-dev \
+    libopencv-dev \ 
+    && apt-get clean

+# Set up OpenCV
+RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2

 WORKDIR /build

+# piper requirements
+# Use pre-compiled Piper phonemization library (includes onnxruntime)
+#RUN if echo "${GO_TAGS}" | grep -q "tts"; then \
+RUN test -n "$TARGETARCH" \
+    || (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
+
+RUN curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v${SPDLOG_VERSION}.tar.gz" | \
+    tar -xzvf - && \
+    mkdir -p "spdlog-${SPDLOG_VERSION}/build" && \
+    cd "spdlog-${SPDLOG_VERSION}/build" && \
+    cmake ..  && \
+    make -j8 && \
+    cmake --install . --prefix /usr && mkdir -p "lib/Linux-$(uname -m)" && \
+    cd /build && \
+    mkdir -p "lib/Linux-$(uname -m)/piper_phonemize" && \
+    curl -L "https://github.com/rhasspy/piper-phonemize/releases/download/v${PIPER_PHONEMIZE_VERSION}/libpiper_phonemize-${TARGETARCH:-$(go env GOARCH)}${TARGETVARIANT}.tar.gz" | \
+    tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \
+    cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \
+    ln -s /usr/lib/libpiper_phonemize.so /usr/lib/libpiper_phonemize.so.1 && \
+    cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/ && \
+    rm spdlog-${SPDLOG_VERSION} -rf
+
+# Extras requirements
+FROM requirements-core as requirements-extras
+
+COPY extra/requirements.txt /build/extra/requirements.txt
+ENV PATH="/root/.cargo/bin:${PATH}"
+RUN pip install --upgrade pip
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+RUN if [ "${TARGETARCH}" = "amd64" ]; then \
+        pip install git+https://github.com/suno-ai/bark.git diffusers invisible_watermark transformers accelerate safetensors;\
+    fi
+RUN if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "amd64" ]; then \
+        pip install torch vllm && pip install auto-gptq https://github.com/jllllll/exllama/releases/download/0.0.10/exllama-0.0.10+cu${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION}-cp39-cp39-linux_x86_64.whl;\
+    fi
+RUN pip install -r /build/extra/requirements.txt && rm -rf /build/extra/requirements.txt
+
+# Vall-e-X
+RUN git clone https://github.com/Plachtaa/VALL-E-X.git /usr/lib/vall-e-x && cd /usr/lib/vall-e-x && pip install -r requirements.txt
+
+# \
+#    ; fi

 ###################################
 ###################################

-# Temporary workaround for Intel's repository to work correctly
-# https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/APT-Repository-not-working-signatures-invalid/m-p/1599436/highlight/true#M36143
-# This is a temporary workaround until Intel fixes their repository
-FROM ${INTEL_BASE_IMAGE} AS intel
-ARG UBUNTU_CODENAME=noble
-RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
-gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
-RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu ${UBUNTU_CODENAME}/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-        intel-oneapi-runtime-libs && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
+FROM requirements-${IMAGE_TYPE} as builder

-###################################
-###################################
-
-# The builder-base target has the arguments, variables, and copies shared between full builder images and the uncompiled devcontainer
-
-FROM build-requirements AS builder-base
-
-ARG GO_TAGS=""
+ARG GO_TAGS="stablediffusion tts"
 ARG GRPC_BACKENDS
-ARG MAKEFLAGS
-ARG LD_FLAGS="-s -w"
-ARG TARGETARCH
-ARG TARGETVARIANT
+ARG BUILD_GRPC=true
 ENV GRPC_BACKENDS=${GRPC_BACKENDS}
 ENV GO_TAGS=${GO_TAGS}
-ENV MAKEFLAGS=${MAKEFLAGS}
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
 ENV NVIDIA_VISIBLE_DEVICES=all
-ENV LD_FLAGS=${LD_FLAGS}
-
-RUN echo "GO_TAGS: $GO_TAGS" && echo "TARGETARCH: $TARGETARCH"
-
-WORKDIR /build
-
-
-# We need protoc installed, and the version in 22.04 is too old.
-RUN <<EOT bash
-    if [ "amd64" = "$TARGETARCH" ]; then
-        curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip -o protoc.zip && \
-        unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
-        rm protoc.zip
-    fi
-    if [ "arm64" = "$TARGETARCH" ]; then
-        curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-aarch_64.zip -o protoc.zip && \
-        unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
-        rm protoc.zip
-    fi
-EOT
-
-###################################
-###################################
-
-# Compile backends first in a separate stage
-FROM builder-base AS builder-backends
-ARG TARGETARCH
-ARG TARGETVARIANT
-
-WORKDIR /build
-
-COPY ./Makefile .
-COPY ./backend ./backend
-COPY ./go.mod .
-COPY ./go.sum .
-COPY ./.git ./.git
-
-# Some of the Go backends use libs from the main src, we could further optimize the caching by building the CPP backends before here
-COPY ./pkg/grpc ./pkg/grpc
-COPY ./pkg/utils ./pkg/utils
-COPY ./pkg/langchain ./pkg/langchain
-
-RUN ls -l ./
-RUN make protogen-go
-
-# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
-# Adjustments to the build process should likely be made here.
-FROM builder-backends AS builder

 WORKDIR /build

+COPY Makefile .
+RUN make get-sources
+COPY go.mod .
+RUN make prepare
 COPY . .
+COPY .git .

-## Build the binary
-## If we're on arm64 AND using cublas/hipblas, skip some of the llama-compat backends to save space
-## Otherwise just run the normal build
-RUN make build
+# stablediffusion does not tolerate a newer version of abseil, build it first
+RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
+
+RUN if [ "${BUILD_GRPC}" = "true" ]; then \
+    git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
+    cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
+      -DgRPC_BUILD_TESTS=OFF \
+       ../.. && make -j12 install && rm -rf grpc \
+    ; fi
+
+# Rebuild with defaults backends
+RUN ESPEAK_DATA=/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data make build

 ###################################
 ###################################

-# The devcontainer target is not used on CI. It is a target for developers to use locally -
-# rather than copying files it mounts them locally and leaves building to the developer
+FROM requirements-${IMAGE_TYPE}

-FROM builder-base AS devcontainer
-
-COPY .devcontainer-scripts /.devcontainer-scripts
-
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-        ssh less
-# For the devcontainer, leave apt functional in case additional devtools are needed at runtime.
-
-RUN go install github.com/go-delve/delve/cmd/dlv@latest
-
-RUN go install github.com/mikefarah/yq/v4@latest
-
-###################################
-###################################
-
-# This is the final target. The result of this target will be the image uploaded to the registry.
-# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
-FROM requirements-drivers
+ARG FFMPEG
+ARG BUILD_TYPE
+ARG TARGETARCH

+ENV BUILD_TYPE=${BUILD_TYPE}
+ENV REBUILD=false
 ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz

-ARG CUDA_MAJOR_VERSION=12
+ARG CUDA_MAJOR_VERSION=11
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
 ENV NVIDIA_VISIBLE_DEVICES=all

-WORKDIR /
+# Add FFmpeg
+RUN if [ "${FFMPEG}" = "true" ]; then \
+    apt-get install -y ffmpeg \
+    ; fi

-COPY ./entrypoint.sh .
+WORKDIR /build
+
+# we start fresh & re-copy all assets because `make build` does not clean up nicely after itself
+# so when `entrypoint.sh` runs `make build` again (which it does by default), the build would fail
+# see https://github.com/go-skynet/LocalAI/pull/658#discussion_r1241971626 and
+# https://github.com/go-skynet/LocalAI/pull/434
+COPY . .
+RUN make prepare-sources

 # Copy the binary
 COPY --from=builder /build/local-ai ./

-# Make sure the models directory exists
-RUN mkdir -p /models /backends
+# do not let stablediffusion rebuild (requires an older version of absl)
+COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
+
+# Copy VALLE-X as it's not a real "lib"
+RUN if [ -d /usr/lib/vall-e-x ]; then \
+    cp -rfv /usr/lib/vall-e-x/* ./ ; \ 
+    fi
+
+# we also copy exllama libs over to resolve exllama import error
+RUN if [ -d /usr/local/lib/python3.9/dist-packages/exllama ]; then \
+        cp -rfv /usr/local/lib/python3.9/dist-packages/exllama extra/grpc/exllama/;\
+    fi

 # Define the health check command
 HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
-  CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1
+  CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1

-VOLUME /models /backends /configuration
 EXPOSE 8080
-ENTRYPOINT [ "/entrypoint.sh" ]
+ENTRYPOINT [ "/build/entrypoint.sh" ]
--- a/Dockerfile.aio
+++ b/Dockerfile.aio
@@ -1,8 +0,0 @@
-ARG BASE_IMAGE=ubuntu:24.04
-
-FROM ${BASE_IMAGE} 
-
-RUN apt-get update && apt-get install -y pciutils && apt-get clean
-
-COPY aio/ /aio
-ENTRYPOINT [ "/aio/entrypoint.sh" ]
--- a/5
+++ b/5
@@ -0,0 +1,5 @@
+VERSION 0.7
+
+build:
+    FROM DOCKERFILE -f Dockerfile .
+    SAVE ARTIFACT /usr/bin/local-ai AS LOCAL local-ai
--- a/Entitlements.plist
+++ b/Entitlements.plist
@@ -1,10 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
-    <key>com.apple.security.network.client</key>
-    <true/>
-    <key>com.apple.security.network.server</key>
-    <true/>
-</dict>
-</plist>
--- a/2
+++ b/2
@@ -1,6 +1,6 @@
 MIT License

-Copyright (c) 2023-2025 Ettore Di Giacinto (mudler@localai.io)
+Copyright (c) 2023 Ettore Di Giacinto

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/788
+++ b/788
@@ -1,42 +1,66 @@
-# Disable parallel execution for backend builds
-.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm backends/moonshine
-
 GOCMD=go
 GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai
-LAUNCHER_BINARY_NAME=local-ai-launcher

-CUDA_MAJOR_VERSION?=13
-CUDA_MINOR_VERSION?=0
-UBUNTU_VERSION?=2204
-UBUNTU_CODENAME?=noble
+# llama.cpp versions
+GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0

-GORELEASER?=
+GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7
+
+CPPLLAMA_VERSION?=96981f37b1e3f450d9e63e571514217bf60f0a7f
+
+# gpt4all version
+GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
+GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8
+
+# go-ggml-transformers version
+GOGGMLTRANSFORMERS_VERSION?=ffb09d7dd71e2cbc6c5d7d05357d230eea6f369a
+
+# go-rwkv version
+RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
+RWKV_VERSION?=c898cd0f62df8f2a7830e53d1d513bef4f6f792b
+
+# whisper.cpp version
+WHISPER_CPP_VERSION?=85ed71aaec8e0612a84c0b67804bde75aa75a273
+
+# bert.cpp version
+BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
+
+# go-piper version
+PIPER_VERSION?=56b8a81b4760a6fbee1a82e62f007ae7e8f010a7
+
+# go-bloomz version
+BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f
+
+# stablediffusion version
+STABLEDIFFUSION_VERSION?=d89260f598afb809279bc72aa0107b4292587632
+
+# Go-ggllm
+GOGGLLM_VERSION?=862477d16eefb0805261c19c9b0d053e3b2b684b

 export BUILD_TYPE?=
-export CUDA_MAJOR_VERSION?=12
-export CUDA_MINOR_VERSION?=9
-
+export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
+export CMAKE_ARGS?=
+CGO_LDFLAGS?=
+CUDA_LIBPATH?=/usr/local/cuda/lib64/
 GO_TAGS?=
-BUILD_ID?=
-NATIVE?=false
+BUILD_ID?=git

 TEST_DIR=/tmp/test

-TEST_FLAKES?=5
-
 RANDOM := $(shell bash -c 'echo $$RANDOM')

 VERSION?=$(shell git describe --always --tags || echo "dev" )
 # go tool nm ./local-ai | grep Commit
-LD_FLAGS?=-s -w
-override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Version=$(VERSION)"
-override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
+LD_FLAGS?=
+override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Version=$(VERSION)"
+override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"

 OPTIONAL_TARGETS?=
+ESPEAK_DATA?=

-export OS := $(shell uname -s)
+OS := $(shell uname -s)
 ARCH := $(shell uname -m)
 GREEN  := $(shell tput -Txterm setaf 2)
 YELLOW := $(shell tput -Txterm setaf 3)
@@ -51,160 +75,278 @@ ifndef UNAME_S
 UNAME_S := $(shell uname -s)
 endif

-ifeq ($(OS),Darwin)
-	ifeq ($(OSX_SIGNING_IDENTITY),)
-		OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
-	endif
+ifeq ($(UNAME_S),Darwin)
+	CGO_LDFLAGS += -lcblas -framework Accelerate
+ifneq ($(BUILD_TYPE),metal)
+    # explicit disable metal if on Darwin and metal is disabled
+	CMAKE_ARGS+=-DLLAMA_METAL=OFF
+endif
 endif

-# check if goreleaser exists
-ifeq (, $(shell which goreleaser))
-	GORELEASER=curl -sfL https://goreleaser.com/static/run | bash -s --
-else
-	GORELEASER=$(shell which goreleaser)
+ifeq ($(BUILD_TYPE),openblas)
+	CGO_LDFLAGS+=-lopenblas
 endif

-TEST_PATHS?=./api/... ./pkg/... ./core/...
+ifeq ($(BUILD_TYPE),cublas)
+	CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
+	export LLAMA_CUBLAS=1
+endif

+ifeq ($(BUILD_TYPE),hipblas)
+	ROCM_HOME ?= /opt/rocm
+	export CXX=$(ROCM_HOME)/llvm/bin/clang++
+	export CC=$(ROCM_HOME)/llvm/bin/clang
+	# Llama-stable has no hipblas support, so override it here.
+	export STABLE_BUILD_TYPE=
+	GPU_TARGETS ?= gfx900,gfx90a,gfx1030,gfx1031,gfx1100
+	AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
+	CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
+	CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link
+endif
+
+ifeq ($(BUILD_TYPE),metal)
+	CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
+	export LLAMA_METAL=1
+endif
+
+ifeq ($(BUILD_TYPE),clblas)
+	CGO_LDFLAGS+=-lOpenCL -lclblast
+endif
+
+# glibc-static or glibc-devel-static required
+ifeq ($(STATIC),true)
+	LD_FLAGS=-linkmode external -extldflags -static
+endif
+
+ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
+#	OPTIONAL_TARGETS+=go-stable-diffusion/libstablediffusion.a
+	OPTIONAL_GRPC+=backend-assets/grpc/stablediffusion
+endif
+
+ifeq ($(findstring tts,$(GO_TAGS)),tts)
+#	OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
+#	OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
+	OPTIONAL_GRPC+=backend-assets/grpc/piper
+endif
+
+ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/falcon backend-assets/grpc/bloomz backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-stable backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
+GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
+
+# If empty, then we build all
+ifeq ($(GRPC_BACKENDS),)
+	GRPC_BACKENDS=$(ALL_GRPC_BACKENDS)
+endif

 .PHONY: all test build vendor

 all: help

+## GPT4ALL
+gpt4all:
+	git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all
+	cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
+
+## go-ggllm
+go-ggllm:
+	git clone --recurse-submodules https://github.com/mudler/go-ggllm.cpp go-ggllm
+	cd go-ggllm && git checkout -b build $(GOGGLLM_VERSION) && git submodule update --init --recursive --depth 1
+
+go-ggllm/libggllm.a: go-ggllm
+	$(MAKE) -C go-ggllm BUILD_TYPE=$(BUILD_TYPE) libggllm.a
+
+## go-piper
+go-piper:
+	git clone --recurse-submodules https://github.com/mudler/go-piper go-piper
+	cd go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1
+
+## BERT embeddings
+go-bert:
+	git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp go-bert
+	cd go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
+
+## stable diffusion
+go-stable-diffusion:
+	git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion go-stable-diffusion
+	cd go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
+
+go-stable-diffusion/libstablediffusion.a:
+	$(MAKE) -C go-stable-diffusion libstablediffusion.a
+
+## RWKV
+go-rwkv:
+	git clone --recurse-submodules $(RWKV_REPO) go-rwkv
+	cd go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
+
+go-rwkv/librwkv.a: go-rwkv
+	cd go-rwkv && cd rwkv.cpp &&	cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF &&	cmake --build . && 	cp librwkv.a ..
+
+## bloomz
+bloomz:
+	git clone --recurse-submodules https://github.com/go-skynet/bloomz.cpp bloomz
+	cd bloomz && git checkout -b build $(BLOOMZ_VERSION) && git submodule update --init --recursive --depth 1
+
+bloomz/libbloomz.a: bloomz
+	cd bloomz && make libbloomz.a
+
+go-bert/libgobert.a: go-bert
+	$(MAKE) -C go-bert libgobert.a
+
+backend-assets/gpt4all: gpt4all/gpt4all-bindings/golang/libgpt4all.a
+	mkdir -p backend-assets/gpt4all
+	@cp gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
+	@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
+	@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
+
+backend-assets/espeak-ng-data:
+	mkdir -p backend-assets/espeak-ng-data
+ifdef ESPEAK_DATA
+	@cp -rf $(ESPEAK_DATA)/. backend-assets/espeak-ng-data
+else
+	@echo "ESPEAK_DATA not set, skipping tts. Note that this will break the tts functionality."
+	@touch backend-assets/espeak-ng-data/keep
+endif
+
+gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
+	$(MAKE) -C gpt4all/gpt4all-bindings/golang/ libgpt4all.a
+
+## CEREBRAS GPT
+go-ggml-transformers:
+	git clone --recurse-submodules https://github.com/go-skynet/go-ggml-transformers.cpp go-ggml-transformers
+	cd go-ggml-transformers && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1
+
+go-ggml-transformers/libtransformers.a: go-ggml-transformers
+	$(MAKE) -C go-ggml-transformers BUILD_TYPE=$(BUILD_TYPE) libtransformers.a
+
+whisper.cpp:
+	git clone https://github.com/ggerganov/whisper.cpp.git
+	cd whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
+
+whisper.cpp/libwhisper.a: whisper.cpp
+	cd whisper.cpp && make libwhisper.a
+
+go-llama:
+	git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
+	cd go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
+
+go-llama-stable:
+	git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama-stable
+	cd go-llama-stable && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
+
+go-llama/libbinding.a: go-llama
+	$(MAKE) -C go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a
+
+go-llama-stable/libbinding.a: go-llama-stable
+	$(MAKE) -C go-llama-stable BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
+
+go-piper/libpiper_binding.a: go-piper
+	$(MAKE) -C go-piper libpiper_binding.a example/main
+
+get-sources: go-llama go-llama-stable go-ggllm go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion
+	touch $@
+
+replace:
+	$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang
+	$(GOCMD) mod edit -replace github.com/go-skynet/go-ggml-transformers.cpp=$(shell pwd)/go-ggml-transformers
+	$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
+	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
+	$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
+	$(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz
+	$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/go-stable-diffusion
+	$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(shell pwd)/go-piper
+	$(GOCMD) mod edit -replace github.com/mudler/go-ggllm.cpp=$(shell pwd)/go-ggllm
+
+prepare-sources: get-sources replace
+	$(GOCMD) mod download
+
 ## GENERIC
 rebuild: ## Rebuilds the project
 	$(GOCMD) clean -cache
+	$(MAKE) -C go-llama clean
+	$(MAKE) -C go-llama-stable clean
+	$(MAKE) -C gpt4all/gpt4all-bindings/golang/ clean
+	$(MAKE) -C go-ggml-transformers clean
+	$(MAKE) -C go-rwkv clean
+	$(MAKE) -C whisper.cpp clean
+	$(MAKE) -C go-stable-diffusion clean
+	$(MAKE) -C go-bert clean
+	$(MAKE) -C bloomz clean
+	$(MAKE) -C go-piper clean
+	$(MAKE) -C go-ggllm clean
 	$(MAKE) build

+prepare: prepare-sources $(OPTIONAL_TARGETS)
+	touch $@
+
 clean: ## Remove build related file
 	$(GOCMD) clean -cache
 	rm -f prepare
+	rm -rf ./go-llama
+	rm -rf ./gpt4all
+	rm -rf ./go-llama-stable
+	rm -rf ./go-gpt2
+	rm -rf ./go-stable-diffusion
+	rm -rf ./go-ggml-transformers
+	rm -rf ./backend-assets
+	rm -rf ./go-rwkv
+	rm -rf ./go-bert
+	rm -rf ./bloomz
+	rm -rf ./whisper.cpp
+	rm -rf ./go-piper
+	rm -rf ./go-ggllm
 	rm -rf $(BINARY_NAME)
 	rm -rf release/
-	$(MAKE) protogen-clean
-	rmdir pkg/grpc/proto || true
-
-clean-tests:
-	rm -rf test-models
-	rm -rf test-dir
-
-## Install Go tools
-install-go-tools:
-	go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-	go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
+	$(MAKE) -C backend/cpp/llama clean

 ## Build:
-build: protogen-go install-go-tools ## Build the project
+
+build: grpcs prepare ## Build the project
 	$(info ${GREEN}I local-ai build info:${RESET})
 	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
 	$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
 	$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
-	$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
-	rm -rf $(BINARY_NAME) || true
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./cmd/local-ai

-build-launcher: ## Build the launcher application
-	$(info ${GREEN}I local-ai launcher build info:${RESET})
-	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
-	$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
-	$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
-	rm -rf $(LAUNCHER_BINARY_NAME) || true
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(LAUNCHER_BINARY_NAME) ./cmd/launcher
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./

-build-all: build build-launcher ## Build both server and launcher
-
-build-dev: ## Run LocalAI in dev mode with live reload
-	@command -v air >/dev/null 2>&1 || go install github.com/air-verse/air@latest
-	air -c .air.toml
-
-dev-dist:
-	$(GORELEASER) build --snapshot --clean
-
-dist:
-	$(GORELEASER) build --clean
-
-osx-signed: build
-	codesign --deep --force --sign "$(OSX_SIGNING_IDENTITY)" --entitlements "./Entitlements.plist" "./$(BINARY_NAME)"
+dist: build
+	mkdir -p release
+	cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH)

 ## Run
-run: ## run local-ai
+run: prepare ## run local-ai
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) run ./

-test-models/testmodel.ggml:
-	mkdir -p test-models
-	mkdir -p test-dir
-	wget -q https://huggingface.co/mradermacher/gpt2-alpaca-gpt4-GGUF/resolve/main/gpt2-alpaca-gpt4.Q4_K_M.gguf -O test-models/testmodel.ggml
-	wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
-	wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
-	wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
+test-models/testmodel:
+	mkdir test-models
+	mkdir test-dir
+	wget https://huggingface.co/nnakasato/ggml-model-test/resolve/main/ggml-model-q4.bin -O test-models/testmodel
+	wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
+	wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O test-models/bert
+	wget https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
+	wget https://huggingface.co/mudler/rwkv-4-raven-1.5B-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%2525-Other1%2525-20230425-ctx4096_Q4_0.bin -O test-models/rwkv
+	wget https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
 	cp tests/models_fixtures/* test-models

-prepare-test: protogen-go
+prepare-test: grpcs
+	cp -rf backend-assets api
 	cp tests/models_fixtures/* test-models

-########################################################
-## Tests
-########################################################
-
-## Test targets
-test: test-models/testmodel.ggml protogen-go
+test: prepare test-models/testmodel grpcs
 	@echo 'Running tests'
-	export GO_TAGS="debug"
+	export GO_TAGS="tts stablediffusion"
 	$(MAKE) prepare-test
-	HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models BACKENDS_PATH=$(abspath ./)/backends \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama-gguf"  --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
+	HUGGINGFACE_GRPC=$(abspath ./)/extra/grpc/huggingface/huggingface.py TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts 5 -v -r ./api ./pkg
+	$(MAKE) test-gpt4all
+	$(MAKE) test-llama
 	$(MAKE) test-llama-gguf
 	$(MAKE) test-tts
 	$(MAKE) test-stablediffusion

-########################################################
-## AIO tests
-########################################################
-
-docker-build-aio:
-	docker build \
-		--build-arg MAKEFLAGS="--jobs=5 --output-sync=target" \
-		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
-		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
-		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
-		--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
-		--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
-		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
-		--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
-		--build-arg GO_TAGS="$(GO_TAGS)" \
-		-t local-ai:tests -f Dockerfile .
-	BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test $(MAKE) docker-aio
-
-e2e-aio:
-	LOCALAI_BACKEND_DIR=$(abspath ./backends) \
-	LOCALAI_MODELS_DIR=$(abspath ./models) \
-	LOCALAI_IMAGE_TAG=test \
-	LOCALAI_IMAGE=local-ai-aio \
-	$(MAKE) run-e2e-aio
-
-run-e2e-aio: protogen-go
-	@echo 'Running e2e AIO tests'
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio
-
-########################################################
-## E2E tests
-########################################################
-
 prepare-e2e:
 	mkdir -p $(TEST_DIR)
 	cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
 	test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
-	docker build \
-		--build-arg IMAGE_TYPE=core \
-		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
-		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
-		--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
-		--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
-		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
-		--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
-		--build-arg GO_TAGS="$(GO_TAGS)" \
-		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
-		-t localai-tests .
+	docker build --build-arg BUILD_GRPC=true --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests .

 run-e2e-image:
 	ls -liah $(abspath ./tests/e2e-fixtures)
@@ -214,39 +356,36 @@ test-e2e:
 	@echo 'Running e2e tests'
 	BUILD_TYPE=$(BUILD_TYPE) \
 	LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e

 teardown-e2e:
 	rm -rf $(TEST_DIR) || true
 	docker stop $$(docker ps -q --filter ancestor=localai-tests)

-########################################################
-## Integration and unit tests
-########################################################
+test-gpt4all: prepare-test
+	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r ./api ./pkg
+
+test-llama: prepare-test
+	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r ./api ./pkg

 test-llama-gguf: prepare-test
-	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models BACKENDS_PATH=$(abspath ./)/backends \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
+	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts 5 -v -r ./api ./pkg

 test-tts: prepare-test
-	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models BACKENDS_PATH=$(abspath ./)/backends \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
+	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts 1 -v -r ./api ./pkg

 test-stablediffusion: prepare-test
-	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models BACKENDS_PATH=$(abspath ./)/backends \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
-
-test-stores:
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts $(TEST_FLAKES) -v -r tests/integration
+	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts 1 -v -r ./api ./pkg

 test-container:
 	docker build --target requirements -t local-ai-test-container .
 	docker run -ti --rm --entrypoint /bin/bash -ti -v $(abspath ./):/build local-ai-test-container

-########################################################
-## Help
-########################################################
-
 ## Help:
 help: ## Show this help.
 	@echo ''
@@ -259,299 +398,118 @@ help: ## Show this help.
 		else if (/^## .*$$/) {printf "  ${CYAN}%s${RESET}\n", substr($$1,4)} \
 		}' $(MAKEFILE_LIST)

-########################################################
-## Backends
-########################################################
+protogen: protogen-go protogen-python

-.PHONY: protogen
-protogen: protogen-go
+protogen-go:
+	protoc --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative \
+    pkg/grpc/proto/backend.proto

-protoc:
-	@OS_NAME=$$(uname -s | tr '[:upper:]' '[:lower:]'); \
-	ARCH_NAME=$$(uname -m); \
-	if [ "$$OS_NAME" = "darwin" ]; then \
-	  if [ "$$ARCH_NAME" = "arm64" ]; then \
-	    FILE=protoc-31.1-osx-aarch_64.zip; \
-	  elif [ "$$ARCH_NAME" = "x86_64" ]; then \
-	    FILE=protoc-31.1-osx-x86_64.zip; \
-	  else \
-	    echo "Unsupported macOS architecture: $$ARCH_NAME"; exit 1; \
-	  fi; \
-	elif [ "$$OS_NAME" = "linux" ]; then \
-	  if [ "$$ARCH_NAME" = "x86_64" ]; then \
-	    FILE=protoc-31.1-linux-x86_64.zip; \
-	  elif [ "$$ARCH_NAME" = "aarch64" ] || [ "$$ARCH_NAME" = "arm64" ]; then \
-	    FILE=protoc-31.1-linux-aarch_64.zip; \
-	  elif [ "$$ARCH_NAME" = "ppc64le" ]; then \
-	    FILE=protoc-31.1-linux-ppcle_64.zip; \
-	  elif [ "$$ARCH_NAME" = "s390x" ]; then \
-	    FILE=protoc-31.1-linux-s390_64.zip; \
-	  elif [ "$$ARCH_NAME" = "i386" ] || [ "$$ARCH_NAME" = "x86" ]; then \
-	    FILE=protoc-31.1-linux-x86_32.zip; \
-	  else \
-	    echo "Unsupported Linux architecture: $$ARCH_NAME"; exit 1; \
-	  fi; \
-	else \
-	  echo "Unsupported OS: $$OS_NAME"; exit 1; \
-	fi; \
-	URL=https://github.com/protocolbuffers/protobuf/releases/download/v31.1/$$FILE; \
-	curl -L $$URL -o protoc.zip && \
-	unzip -j -d $(CURDIR) protoc.zip bin/protoc && rm protoc.zip
+protogen-python:
+	python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/huggingface/ --grpc_python_out=extra/grpc/huggingface/ pkg/grpc/proto/backend.proto
+	python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/autogptq/ --grpc_python_out=extra/grpc/autogptq/ pkg/grpc/proto/backend.proto
+	python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/exllama/ --grpc_python_out=extra/grpc/exllama/ pkg/grpc/proto/backend.proto
+	python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/bark/ --grpc_python_out=extra/grpc/bark/ pkg/grpc/proto/backend.proto
+	python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/diffusers/ --grpc_python_out=extra/grpc/diffusers/ pkg/grpc/proto/backend.proto
+	python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/vall-e-x/ --grpc_python_out=extra/grpc/vall-e-x/ pkg/grpc/proto/backend.proto
+	python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/vllm/ --grpc_python_out=extra/grpc/vllm/ pkg/grpc/proto/backend.proto

-.PHONY: protogen-go
-protogen-go: protoc install-go-tools
-	mkdir -p pkg/grpc/proto
-	./protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
-    backend/backend.proto
+## GRPC

-.PHONY: protogen-go-clean
-protogen-go-clean:
-	$(RM) pkg/grpc/proto/backend.pb.go pkg/grpc/proto/backend_grpc.pb.go
-	$(RM) bin/*
+backend-assets/grpc:
+	mkdir -p backend-assets/grpc

-prepare-test-extra: protogen-python
-	$(MAKE) -C backend/python/transformers
-	$(MAKE) -C backend/python/diffusers
-	$(MAKE) -C backend/python/chatterbox
-	$(MAKE) -C backend/python/vllm
-	$(MAKE) -C backend/python/vibevoice
-	$(MAKE) -C backend/python/moonshine
+backend-assets/grpc/falcon: backend-assets/grpc go-ggllm/libggllm.a
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggllm LIBRARY_PATH=$(shell pwd)/go-ggllm \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/falcon ./cmd/grpc/falcon/

-test-extra: prepare-test-extra
-	$(MAKE) -C backend/python/transformers test
-	$(MAKE) -C backend/python/diffusers test
-	$(MAKE) -C backend/python/chatterbox test
-	$(MAKE) -C backend/python/vllm test
-	$(MAKE) -C backend/python/vibevoice test
-	$(MAKE) -C backend/python/moonshine test
+backend-assets/grpc/llama: backend-assets/grpc go-llama/libbinding.a
+	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama LIBRARY_PATH=$(shell pwd)/go-llama \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama ./cmd/grpc/llama/
+# TODO: every binary should have its own folder instead, so can have different metal implementations
+ifeq ($(BUILD_TYPE),metal)
+	cp go-llama/build/bin/ggml-metal.metal backend-assets/grpc/
+endif

-DOCKER_IMAGE?=local-ai
-DOCKER_AIO_IMAGE?=local-ai-aio
-IMAGE_TYPE?=core
-BASE_IMAGE?=ubuntu:24.04
+backend/cpp/llama/grpc-server:
+	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server

-docker:
-	docker build \
-		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
-		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
-		--build-arg GO_TAGS="$(GO_TAGS)" \
-		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
-		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
-		--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
-		--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
-		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
-		--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
-		-t $(DOCKER_IMAGE) .
+backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/grpc-server
+	cp -rfv backend/cpp/llama/grpc-server backend-assets/grpc/llama-cpp
+# TODO: every binary should have its own folder instead, so can have different metal implementations
+ifeq ($(BUILD_TYPE),metal)
+	cp backend/cpp/llama/llama.cpp/build/bin/ggml-metal.metal backend-assets/grpc/
+endif

-docker-cuda12:
-	docker build \
-		--build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} \
-		--build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} \
-		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
-		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
-		--build-arg GO_TAGS="$(GO_TAGS)" \
-		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
-		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
-		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
-		--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
-		-t $(DOCKER_IMAGE)-cuda-12 .
+backend-assets/grpc/llama-stable: backend-assets/grpc go-llama-stable/libbinding.a
+	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama-stable
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama-stable LIBRARY_PATH=$(shell pwd)/go-llama \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-stable ./cmd/grpc/llama-stable/

-docker-aio:
-	@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
-	docker build \
-		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
-		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
-		--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
-		--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
-		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
-		--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
-		-t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio .
+backend-assets/grpc/gpt4all: backend-assets/grpc backend-assets/gpt4all gpt4all/gpt4all-bindings/golang/libgpt4all.a
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(shell pwd)/gpt4all/gpt4all-bindings/golang/ \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./cmd/grpc/gpt4all/

-docker-aio-all:
-	$(MAKE) docker-aio DOCKER_AIO_SIZE=cpu
-	$(MAKE) docker-aio DOCKER_AIO_SIZE=cpu
+backend-assets/grpc/dolly: backend-assets/grpc go-ggml-transformers/libtransformers.a
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/dolly ./cmd/grpc/dolly/

-docker-image-intel:
-	docker build \
-		--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04 \
-		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
-		--build-arg GO_TAGS="$(GO_TAGS)" \
-		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
-		--build-arg BUILD_TYPE=intel \
-		--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
-		--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
-		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
-		--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
-		-t $(DOCKER_IMAGE) .
+backend-assets/grpc/gpt2: backend-assets/grpc go-ggml-transformers/libtransformers.a
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt2 ./cmd/grpc/gpt2/

-########################################################
-## Backends
-########################################################
+backend-assets/grpc/gptj: backend-assets/grpc go-ggml-transformers/libtransformers.a
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptj ./cmd/grpc/gptj/

-# Pattern rule for standard backends (docker-based)
-# This matches all backends that use docker-build-* and docker-save-*
-backends/%: docker-build-% docker-save-% build
-	./local-ai backends install "ocifile://$(abspath ./backend-images/$*.tar)"
+backend-assets/grpc/gptneox: backend-assets/grpc go-ggml-transformers/libtransformers.a
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptneox ./cmd/grpc/gptneox/

-# Darwin-specific backends (keep as explicit targets since they have special build logic)
-backends/llama-cpp-darwin: build
-	bash ./scripts/build/llama-cpp-darwin.sh
-	./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"
+backend-assets/grpc/mpt: backend-assets/grpc go-ggml-transformers/libtransformers.a
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/mpt ./cmd/grpc/mpt/

-build-darwin-python-backend: build
-	bash ./scripts/build/python-darwin.sh
+backend-assets/grpc/replit: backend-assets/grpc go-ggml-transformers/libtransformers.a
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/replit ./cmd/grpc/replit/

-build-darwin-go-backend: build
-	bash ./scripts/build/golang-darwin.sh
+backend-assets/grpc/falcon-ggml: backend-assets/grpc go-ggml-transformers/libtransformers.a
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/falcon-ggml ./cmd/grpc/falcon-ggml/

-backends/mlx:
-	BACKEND=mlx $(MAKE) build-darwin-python-backend
-	./local-ai backends install "ocifile://$(abspath ./backend-images/mlx.tar)"
+backend-assets/grpc/starcoder: backend-assets/grpc go-ggml-transformers/libtransformers.a
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/starcoder ./cmd/grpc/starcoder/

-backends/diffuser-darwin:
-	BACKEND=diffusers $(MAKE) build-darwin-python-backend
-	./local-ai backends install "ocifile://$(abspath ./backend-images/diffusers.tar)"
+backend-assets/grpc/rwkv: backend-assets/grpc go-rwkv/librwkv.a
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-rwkv LIBRARY_PATH=$(shell pwd)/go-rwkv \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./cmd/grpc/rwkv/

-backends/mlx-vlm:
-	BACKEND=mlx-vlm $(MAKE) build-darwin-python-backend
-	./local-ai backends install "ocifile://$(abspath ./backend-images/mlx-vlm.tar)"
+backend-assets/grpc/bloomz: backend-assets/grpc bloomz/libbloomz.a
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/bloomz LIBRARY_PATH=$(shell pwd)/bloomz \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bloomz ./cmd/grpc/bloomz/

-backends/mlx-audio:
-	BACKEND=mlx-audio $(MAKE) build-darwin-python-backend
-	./local-ai backends install "ocifile://$(abspath ./backend-images/mlx-audio.tar)"
+backend-assets/grpc/bert-embeddings: backend-assets/grpc go-bert/libgobert.a
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-bert LIBRARY_PATH=$(shell pwd)/go-bert \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./cmd/grpc/bert-embeddings/

-backends/stablediffusion-ggml-darwin:
-	BACKEND=stablediffusion-ggml BUILD_TYPE=metal $(MAKE) build-darwin-go-backend
-	./local-ai backends install "ocifile://$(abspath ./backend-images/stablediffusion-ggml.tar)"
+backend-assets/grpc/langchain-huggingface: backend-assets/grpc
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./cmd/grpc/langchain-huggingface/

-backend-images:
-	mkdir -p backend-images
+backend-assets/grpc/stablediffusion: backend-assets/grpc
+	if [ ! -f backend-assets/grpc/stablediffusion ]; then \
+		$(MAKE) go-stable-diffusion/libstablediffusion.a; \
+		CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-stable-diffusion/ LIBRARY_PATH=$(shell pwd)/go-stable-diffusion/ \
+		$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./cmd/grpc/stablediffusion/; \
+	fi

-# Backend metadata: BACKEND_NAME | DOCKERFILE_TYPE | BUILD_CONTEXT | PROGRESS_FLAG | NEEDS_BACKEND_ARG
-# llama-cpp is special - uses llama-cpp Dockerfile and doesn't need BACKEND arg
-BACKEND_LLAMA_CPP = llama-cpp|llama-cpp|.|false|false
+backend-assets/grpc/piper: backend-assets/grpc backend-assets/espeak-ng-data go-piper/libpiper_binding.a
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(shell pwd)/go-piper \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./cmd/grpc/piper/

-# Golang backends
-BACKEND_BARK_CPP = bark-cpp|golang|.|false|true
-BACKEND_PIPER = piper|golang|.|false|true
-BACKEND_LOCAL_STORE = local-store|golang|.|false|true
-BACKEND_HUGGINGFACE = huggingface|golang|.|false|true
-BACKEND_SILERO_VAD = silero-vad|golang|.|false|true
-BACKEND_STABLEDIFFUSION_GGML = stablediffusion-ggml|golang|.|--progress=plain|true
-BACKEND_WHISPER = whisper|golang|.|false|true
+backend-assets/grpc/whisper: backend-assets/grpc whisper.cpp/libwhisper.a
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/whisper.cpp LIBRARY_PATH=$(shell pwd)/whisper.cpp \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./cmd/grpc/whisper/

-# Python backends with root context
-BACKEND_RERANKERS = rerankers|python|.|false|true
-BACKEND_TRANSFORMERS = transformers|python|.|false|true
-BACKEND_FASTER_WHISPER = faster-whisper|python|.|false|true
-BACKEND_COQUI = coqui|python|.|false|true
-BACKEND_BARK = bark|python|.|false|true
-BACKEND_EXLLAMA2 = exllama2|python|.|false|true
-
-# Python backends with ./backend context
-BACKEND_RFDETR = rfdetr|python|./backend|false|true
-BACKEND_KITTEN_TTS = kitten-tts|python|./backend|false|true
-BACKEND_NEUTTS = neutts|python|./backend|false|true
-BACKEND_KOKORO = kokoro|python|./backend|false|true
-BACKEND_VLLM = vllm|python|./backend|false|true
-BACKEND_DIFFUSERS = diffusers|python|./backend|--progress=plain|true
-BACKEND_CHATTERBOX = chatterbox|python|./backend|false|true
-BACKEND_VIBEVOICE = vibevoice|python|./backend|--progress=plain|true
-BACKEND_MOONSHINE = moonshine|python|./backend|false|true
-
-# Helper function to build docker image for a backend
-# Usage: $(call docker-build-backend,BACKEND_NAME,DOCKERFILE_TYPE,BUILD_CONTEXT,PROGRESS_FLAG,NEEDS_BACKEND_ARG)
-define docker-build-backend
-	docker build $(if $(filter-out false,$(4)),$(4)) \
-		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
-		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
-		--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
-		--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
-		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
-		--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
-		$(if $(filter true,$(5)),--build-arg BACKEND=$(1)) \
-		-t local-ai-backend:$(1) -f backend/Dockerfile.$(2) $(3)
-endef
-
-# Generate docker-build targets from backend definitions
-define generate-docker-build-target
-docker-build-$(word 1,$(subst |, ,$(1))):
-	$$(call docker-build-backend,$(word 1,$(subst |, ,$(1))),$(word 2,$(subst |, ,$(1))),$(word 3,$(subst |, ,$(1))),$(word 4,$(subst |, ,$(1))),$(word 5,$(subst |, ,$(1))))
-endef
-
-# Generate all docker-build targets
-$(eval $(call generate-docker-build-target,$(BACKEND_LLAMA_CPP)))
-$(eval $(call generate-docker-build-target,$(BACKEND_BARK_CPP)))
-$(eval $(call generate-docker-build-target,$(BACKEND_PIPER)))
-$(eval $(call generate-docker-build-target,$(BACKEND_LOCAL_STORE)))
-$(eval $(call generate-docker-build-target,$(BACKEND_HUGGINGFACE)))
-$(eval $(call generate-docker-build-target,$(BACKEND_SILERO_VAD)))
-$(eval $(call generate-docker-build-target,$(BACKEND_STABLEDIFFUSION_GGML)))
-$(eval $(call generate-docker-build-target,$(BACKEND_WHISPER)))
-$(eval $(call generate-docker-build-target,$(BACKEND_RERANKERS)))
-$(eval $(call generate-docker-build-target,$(BACKEND_TRANSFORMERS)))
-$(eval $(call generate-docker-build-target,$(BACKEND_FASTER_WHISPER)))
-$(eval $(call generate-docker-build-target,$(BACKEND_COQUI)))
-$(eval $(call generate-docker-build-target,$(BACKEND_BARK)))
-$(eval $(call generate-docker-build-target,$(BACKEND_EXLLAMA2)))
-$(eval $(call generate-docker-build-target,$(BACKEND_RFDETR)))
-$(eval $(call generate-docker-build-target,$(BACKEND_KITTEN_TTS)))
-$(eval $(call generate-docker-build-target,$(BACKEND_NEUTTS)))
-$(eval $(call generate-docker-build-target,$(BACKEND_KOKORO)))
-$(eval $(call generate-docker-build-target,$(BACKEND_VLLM)))
-$(eval $(call generate-docker-build-target,$(BACKEND_DIFFUSERS)))
-$(eval $(call generate-docker-build-target,$(BACKEND_CHATTERBOX)))
-$(eval $(call generate-docker-build-target,$(BACKEND_VIBEVOICE)))
-$(eval $(call generate-docker-build-target,$(BACKEND_MOONSHINE)))
-
-# Pattern rule for docker-save targets
-docker-save-%: backend-images
-	docker save local-ai-backend:$* -o backend-images/$*.tar
-
-docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-bark docker-build-chatterbox docker-build-vibevoice docker-build-exllama2 docker-build-moonshine
-
-########################################################
-### END Backends
-########################################################
-
-.PHONY: swagger
-swagger:
-	swag init -g core/http/app.go --output swagger
-
-.PHONY: gen-assets
-gen-assets:
-	$(GOCMD) run core/dependencies_manager/manager.go webui_static.yaml core/http/static/assets
-
-## Documentation
-docs/layouts/_default:
-	mkdir -p docs/layouts/_default
-
-docs/static/gallery.html: docs/layouts/_default
-	$(GOCMD) run ./.github/ci/modelslist.go ./gallery/index.yaml > docs/static/gallery.html
-
-docs/public: docs/layouts/_default docs/static/gallery.html
-	cd docs && hugo --minify
-
-docs-clean:
-	rm -rf docs/public
-	rm -rf docs/static/gallery.html
-
-.PHONY: docs
-docs: docs/static/gallery.html
-	cd docs && hugo serve
-
-########################################################
-## Platform-specific builds
-########################################################
-
-## fyne cross-platform build
-build-launcher-darwin: build-launcher
-	go run github.com/tiagomelo/macos-dmg-creator/cmd/createdmg@latest \
-	--appName "LocalAI" \
-	--appBinaryPath "$(LAUNCHER_BINARY_NAME)" \
-	--bundleIdentifier "com.localai.launcher" \
-	--iconPath "core/http/static/logo.png" \
-	--outputDir "dist/"
-
-build-launcher-linux:
-	cd cmd/launcher && go run fyne.io/tools/cmd/fyne@latest package -os linux -icon ../../core/http/static/logo.png --executable $(LAUNCHER_BINARY_NAME)-linux && mv launcher.tar.xz ../../$(LAUNCHER_BINARY_NAME)-linux.tar.xz
+grpcs: prepare $(GRPC_BACKENDS)
--- a/README.md
+++ b/README.md
@@ -1,6 +1,7 @@
 <h1 align="center">
  <br>
-  <img width="300" src="./core/http/static/logo.png"> <br>
+  <img height="300" src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd"> <br>
+    LocalAI
 <br>
 </h1>

@@ -19,370 +20,101 @@
 </a>
 </p>

-<p align="center">
-<a href="https://hub.docker.com/r/localai/localai" target="blank">
-<img src="https://img.shields.io/badge/dockerhub-images-important.svg?logo=Docker" alt="LocalAI Docker hub"/>
-</a>
-<a href="https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest" target="blank">
-<img src="https://img.shields.io/badge/quay.io-images-important.svg?" alt="LocalAI Quay.io"/>
-</a>
-</p>
-
-<p align="center">
-<a href="https://twitter.com/LocalAI_API" target="blank">
-<img src="https://img.shields.io/badge/X-%23000000.svg?style=for-the-badge&logo=X&logoColor=white&label=LocalAI_API" alt="Follow LocalAI_API"/>
-</a>
-<a href="https://discord.gg/uJAeKSAGDy" target="blank">
-<img src="https://img.shields.io/badge/dynamic/json?color=blue&label=Discord&style=for-the-badge&query=approximate_member_count&url=https%3A%2F%2Fdiscordapp.com%2Fapi%2Finvites%2FuJAeKSAGDy%3Fwith_counts%3Dtrue&logo=discord" alt="Join LocalAI Discord Community"/>
-</a>
-</p>
-
-<p align="center">
-<a href="https://trendshift.io/repositories/5539" target="_blank"><img src="https://trendshift.io/api/badge/repositories/5539" alt="mudler%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
-</p>
-
 > :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
->
-> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🛫 Examples](https://github.com/mudler/LocalAI-examples) Try on 
-[![Telegram](https://img.shields.io/badge/Telegram-2CA5E0?style=for-the-badge&logo=telegram&logoColor=white)](https://t.me/localaiofficial_bot)
+> 
+> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/)
+

 [![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai)

-**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
+**LocalAI** is a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families that are compatible with the ggml format, pytorch and more. Does not require GPU.

+<p align="center"><b>Follow LocalAI </b></p>

-## 📚🆕 Local Stack Family
-
-🆕 LocalAI is now part of a comprehensive suite of AI tools designed to work together:
-
-<table>
-  <tr>
-    <td width="50%" valign="top">
-      <a href="https://github.com/mudler/LocalAGI">
-        <img src="https://raw.githubusercontent.com/mudler/LocalAGI/refs/heads/main/webui/react-ui/public/logo_2.png" width="300" alt="LocalAGI Logo">
-      </a>
-    </td>
-    <td width="50%" valign="top">
-      <h3><a href="https://github.com/mudler/LocalAGI">LocalAGI</a></h3>
-      <p>A powerful Local AI agent management platform that serves as a drop-in replacement for OpenAI's Responses API, enhanced with advanced agentic capabilities.</p>
-    </td>
-  </tr>
-  <tr>
-    <td width="50%" valign="top">
-      <a href="https://github.com/mudler/LocalRecall">
-        <img src="https://raw.githubusercontent.com/mudler/LocalRecall/refs/heads/main/static/localrecall_horizontal.png" width="300" alt="LocalRecall Logo">
-      </a>
-    </td>
-    <td width="50%" valign="top">
-      <h3><a href="https://github.com/mudler/LocalRecall">LocalRecall</a></h3>
-      <p>A REST-ful API and knowledge base management system that provides persistent memory and storage capabilities for AI agents.</p>
-    </td>
-  </tr>
-</table>
-
-## Screenshots / Video
-
-### Youtube video
-
-<h1 align="center">
-  <br>
-  <a href="https://www.youtube.com/watch?v=PDqYhB9nNHA" target="_blank"> <img width="300" src="https://img.youtube.com/vi/PDqYhB9nNHA/0.jpg"> </a><br>
-<br>
-</h1>
-
-
-### Screenshots
-
-| Talk Interface | Generate Audio |
-| --- | --- |
-| ![Screenshot 2025-03-31 at 12-01-36 LocalAI - Talk](./docs/assets/images/screenshots/screenshot_tts.png) | ![Screenshot 2025-03-31 at 12-01-29 LocalAI - Generate audio with voice-en-us-ryan-low](./docs/assets/images/screenshots/screenshot_tts.png) |
-
-| Models Overview | Generate Images |
-| --- | --- |
-| ![Screenshot 2025-03-31 at 12-01-20 LocalAI - Models](./docs/assets/images/screenshots/screenshot_gallery.png) | ![Screenshot 2025-03-31 at 12-31-41 LocalAI - Generate images with flux 1-dev](./docs/assets/images/screenshots/screenshot_image.png) |
-
-| Chat Interface | Home |
-| --- | --- |
-| ![Screenshot 2025-03-31 at 11-57-44 LocalAI - Chat with localai-functioncall-qwen2 5-7b-v0 5](./docs/assets/images/screenshots/screenshot_chat.png) | ![Screenshot 2025-03-31 at 11-57-23 LocalAI API - c2a39e3 (c2a39e3639227cfd94ffffe9f5691239acc275a8)](./docs/assets/images/screenshots/screenshot_home.png) |
-
-| Login | Swarm |
-| --- | --- |
-|![Screenshot 2025-03-31 at 12-09-59 ](./docs/assets/images/screenshots/screenshot_login.png) | ![Screenshot 2025-03-31 at 12-10-39 LocalAI - P2P dashboard](./docs/assets/images/screenshots/screenshot_p2p.png) |
-
-## 💻 Quickstart
-
-Run the installer script:
-
-```bash
-# Basic installation
-curl https://localai.io/install.sh | sh
-```
-
-For more installation options, see [Installer Options](https://localai.io/installation/).
-
-### macOS Download:
-
-<a href="https://github.com/mudler/LocalAI/releases/latest/download/LocalAI.dmg">
-  <img src="https://img.shields.io/badge/Download-macOS-blue?style=for-the-badge&logo=apple&logoColor=white" alt="Download LocalAI for macOS"/>
+<p align="center">
+<a href="https://twitter.com/LocalAI_API" target="blank">
+<img src="https://img.shields.io/twitter/follow/LocalAI_API?label=Follow: LocalAI_API&style=social" alt="Follow LocalAI_API"/>
+</a>
+<a href="https://discord.gg/uJAeKSAGDy" target="blank">
+<img src="https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted" alt="Join LocalAI Discord Community"/>
 </a>

-> Note: the DMGs are not signed by Apple as quarantined. See https://github.com/mudler/LocalAI/issues/6268 for a workaround, fix is tracked here: https://github.com/mudler/LocalAI/issues/6244
+<p align="center"><b>Connect with the Creator </b></p>

-Or run with docker:
+<p align="center">
+<a href="https://twitter.com/mudler_it" target="blank">
+<img src="https://img.shields.io/twitter/follow/mudler_it?label=Follow: mudler_it&style=social" alt="Follow mudler_it"/>
+</a>
+<a href='https://github.com/mudler'>
+<img alt="Follow on Github" src="https://img.shields.io/badge/Follow-mudler-black?logo=github&link=https%3A%2F%2Fgithub.com%2Fmudler">
+</a>
+</p>

-> **💡 Docker Run vs Docker Start**
-> 
-> - `docker run` creates and starts a new container. If a container with the same name already exists, this command will fail.
-> - `docker start` starts an existing container that was previously created with `docker run`.
-> 
-> If you've already run LocalAI before and want to start it again, use: `docker start -i local-ai`
+<p align="center"><b>Share LocalAI Repository</b></p>

-### CPU only image:
+<p align="center">

-```bash
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
-```
+<a href="https://twitter.com/intent/tweet?text=Check%20this%20GitHub%20repository%20out.%20LocalAI%20-%20Let%27s%20you%20easily%20run%20LLM%20locally.&url=https://github.com/go-skynet/LocalAI&hashtags=LocalAI,AI" target="blank">
+<img src="https://img.shields.io/twitter/follow/_LocalAI?label=Share Repo on Twitter&style=social" alt="Follow _LocalAI"/></a> 
+<a href="https://t.me/share/url?text=Check%20this%20GitHub%20repository%20out.%20LocalAI%20-%20Let%27s%20you%20easily%20run%20LLM%20locally.&url=https://github.com/go-skynet/LocalAI" target="_blank"><img src="https://img.shields.io/twitter/url?label=Telegram&logo=Telegram&style=social&url=https://github.com/go-skynet/LocalAI" alt="Share on Telegram"/></a>
+<a href="https://api.whatsapp.com/send?text=Check%20this%20GitHub%20repository%20out.%20LocalAI%20-%20Let%27s%20you%20easily%20run%20LLM%20locally.%20https://github.com/go-skynet/LocalAI"><img src="https://img.shields.io/twitter/url?label=whatsapp&logo=whatsapp&style=social&url=https://github.com/go-skynet/LocalAI" /></a> <a href="https://www.reddit.com/submit?url=https://github.com/go-skynet/LocalAI&title=Check%20this%20GitHub%20repository%20out.%20LocalAI%20-%20Let%27s%20you%20easily%20run%20LLM%20locally.
+" target="blank">
+<img src="https://img.shields.io/twitter/url?label=Reddit&logo=Reddit&style=social&url=https://github.com/go-skynet/LocalAI" alt="Share on Reddit"/>
+</a> <a href="mailto:?subject=Check%20this%20GitHub%20repository%20out.%20LocalAI%20-%20Let%27s%20you%20easily%20run%20LLM%20locally.%3A%0Ahttps://github.com/go-skynet/LocalAI" target="_blank"><img src="https://img.shields.io/twitter/url?label=Gmail&logo=Gmail&style=social&url=https://github.com/go-skynet/LocalAI"/></a> <a href="https://www.buymeacoffee.com/mudler" target="_blank"><img src="https://cdn.buymeacoffee.com/buttons/default-orange.png" alt="Buy Me A Coffee" height="23" width="100" style="border-radius:1px"></a>

-### NVIDIA GPU Images:
+</p>

-```bash
-# CUDA 13.0
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-13
+<hr>

-# CUDA 12.0
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
+In a nutshell:

-# NVIDIA Jetson (L4T) ARM64
-# CUDA 12 (for Nvidia AGX Orin and similar platforms)
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-nvidia-l4t-arm64
+- Local, OpenAI drop-in alternative REST API. You own your data.
+- NO GPU required. NO Internet access is required either
+  - Optional, GPU Acceleration is available in `llama.cpp`-compatible LLMs. See also the [build section](https://localai.io/basics/build/index.html). 
+- Supports multiple models
+- 🏃 Once loaded the first time, it keep models loaded in memory for faster inference
+- ⚡ Doesn't shell-out, but uses C++ bindings for a faster inference and better performance.

-# CUDA 13 (for Nvidia DGX Spark)
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-nvidia-l4t-arm64-cuda-13
-```
+LocalAI was created by [Ettore Di Giacinto](https://github.com/mudler/) and is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome! 

-### AMD GPU Images (ROCm):
+Note that this started just as a [fun weekend project](https://localai.io/#backstory) in order to try to create the necessary pieces for a full AI assistant like `ChatGPT`: the community is growing fast and we are working hard to make it better and more stable. If you want to help, please consider contributing (see below)!

-```bash
-docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-gpu-hipblas
-```
-
-### Intel GPU Images (oneAPI):
-
-```bash
-docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/dev/dri/renderD128 localai/localai:latest-gpu-intel
-```
-
-### Vulkan GPU Images:
-
-```bash
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-vulkan
-```
-
-### AIO Images (pre-downloaded models):
-
-```bash
-# CPU version
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
-
-# NVIDIA CUDA 13 version
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-13
-
-# NVIDIA CUDA 12 version
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
-
-# Intel GPU version
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel
-
-# AMD GPU version
-docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas
-```
-
-For more information about the AIO images and pre-downloaded models, see [Container Documentation](https://localai.io/basics/container/).
-
-To load models:
-
-```bash
-# From the model gallery (see available models with `local-ai models list`, in the WebUI from the model tab, or visiting https://models.localai.io)
-local-ai run llama-3.2-1b-instruct:q4_k_m
-# Start LocalAI with the phi-2 model directly from huggingface
-local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
-# Install and run a model from the Ollama OCI registry
-local-ai run ollama://gemma:2b
-# Run a model from a configuration file
-local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
-# Install and run a model from a standard OCI registry (e.g., Docker Hub)
-local-ai run oci://localai/phi-2:latest
-```
-
-> ⚡ **Automatic Backend Detection**: When you install models from the gallery or YAML files, LocalAI automatically detects your system's GPU capabilities (NVIDIA, AMD, Intel) and downloads the appropriate backend. For advanced configuration options, see [GPU Acceleration](https://localai.io/features/gpu-acceleration/#automatic-backend-detection).
-
-For more information, see [💻 Getting started](https://localai.io/basics/getting_started/index.html), if you are interested in our roadmap items and future enhancements, you can see the [Issues labeled as Roadmap here](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
-
-## 📰 Latest project news
-
- December 2025: [Dynamic Memory Resource reclaimer](https://github.com/mudler/LocalAI/pull/7583), [Automatic fitting of models to multiple GPUS(llama.cpp)](https://github.com/mudler/LocalAI/pull/7584), [Added Vibevoice backend](https://github.com/mudler/LocalAI/pull/7494)
- November 2025: Major improvements to the UX. Among these: [Import models via URL](https://github.com/mudler/LocalAI/pull/7245) and [Multiple chats and history](https://github.com/mudler/LocalAI/pull/7325)
- October 2025: 🔌 [Model Context Protocol (MCP)](https://localai.io/docs/features/mcp/) support added for agentic capabilities with external tools
- September 2025: New Launcher application for MacOS and Linux, extended support to many backends for Mac and Nvidia L4T devices. Models: Added MLX-Audio, WAN 2.2. WebUI improvements and Python-based backends now ships portable python environments.
- August 2025: MLX, MLX-VLM, Diffusers and llama.cpp are now supported on Mac M1/M2/M3+ chips ( with `development` suffix in the gallery ): https://github.com/mudler/LocalAI/pull/6049 https://github.com/mudler/LocalAI/pull/6119 https://github.com/mudler/LocalAI/pull/6121 https://github.com/mudler/LocalAI/pull/6060
- July/August 2025: 🔍 [Object Detection](https://localai.io/features/object-detection/) added to the API featuring [rf-detr](https://github.com/roboflow/rf-detr)
- July 2025: All backends migrated outside of the main binary. LocalAI is now more lightweight, small, and automatically downloads the required backend to run the model. [Read the release notes](https://github.com/mudler/LocalAI/releases/tag/v3.2.0)
- June 2025: [Backend management](https://github.com/mudler/LocalAI/pull/5607) has been added. Attention: extras images are going to be deprecated from the next release! Read [the backend management PR](https://github.com/mudler/LocalAI/pull/5607).
- May 2025: [Audio input](https://github.com/mudler/LocalAI/pull/5466) and [Reranking](https://github.com/mudler/LocalAI/pull/5396) in llama.cpp backend, [Realtime API](https://github.com/mudler/LocalAI/pull/5392),  Support to Gemma, SmollVLM, and more multimodal models (available in the gallery).
- May 2025: Important: image name changes [See release](https://github.com/mudler/LocalAI/releases/tag/v2.29.0)
- Apr 2025: Rebrand, WebUI enhancements
- Apr 2025: [LocalAGI](https://github.com/mudler/LocalAGI) and [LocalRecall](https://github.com/mudler/LocalRecall) join the LocalAI family stack.
- Apr 2025: WebUI overhaul, AIO images updates
- Feb 2025: Backend cleanup, Breaking changes, new backends (kokoro, OutelTTS, faster-whisper), Nvidia L4T images
- Jan 2025: LocalAI model release: https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.3, SANA support in diffusers: https://github.com/mudler/LocalAI/pull/4603
- Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 )
- Nov 2024: Bark.cpp backend added ( https://github.com/mudler/LocalAI/pull/4287 )
- Nov 2024: Voice activity detection models (**VAD**) added to the API: https://github.com/mudler/LocalAI/pull/4204
- Oct 2024: examples moved to [LocalAI-examples](https://github.com/mudler/LocalAI-examples)
- Aug 2024:  🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723. P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
- May 2024: 🔥🔥 Decentralized P2P llama.cpp:  https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs  https://localai.io/features/distribute/
- May 2024: 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
-
-Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
+## 🔥🔥 [Hot topics / Roadmap](https://localai.io/#-hot-topics--roadmap)

 ## 🚀 [Features](https://localai.io/features/)

- 🧩 [Backend Gallery](https://localai.io/backends/): Install/remove backends on the fly, powered by OCI images — fully customizable and API-driven.
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
+- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
 - 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
 - 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
- 🎨 [Image generation](https://localai.io/features/image-generation)
- 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/) 
+- 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation)
+- 🔥 [OpenAI functions](https://localai.io/features/openai-functions/) 🆕
 - 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
 - ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
 - 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
- 🔍 [Object Detection](https://localai.io/features/object-detection/)
- 📈 [Reranker API](https://localai.io/features/reranker/)
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
- 🆕🔌 [Model Context Protocol (MCP)](https://localai.io/docs/features/mcp/) - Agentic capabilities with external tools and [LocalAGI's Agentic capabilities](https://github.com/mudler/LocalAGI)
- 🔊 Voice activity detection (Silero-VAD support)
- 🌍 Integrated WebUI!

-## 🧩 Supported Backends & Acceleration
-
-LocalAI supports a comprehensive range of AI backends with multiple acceleration options:
-
-### Text Generation & Language Models
-| Backend | Description | Acceleration Support |
-|---------|-------------|---------------------|
-| **llama.cpp** | LLM inference in C/C++ | CUDA 12/13, ROCm, Intel SYCL, Vulkan, Metal, CPU |
-| **vLLM** | Fast LLM inference with PagedAttention | CUDA 12/13, ROCm, Intel |
-| **transformers** | HuggingFace transformers framework | CUDA 12/13, ROCm, Intel, CPU |
-| **exllama2** | GPTQ inference library | CUDA 12/13 |
-| **MLX** | Apple Silicon LLM inference | Metal (M1/M2/M3+) |
-| **MLX-VLM** | Apple Silicon Vision-Language Models | Metal (M1/M2/M3+) |
-
-### Audio & Speech Processing
-| Backend | Description | Acceleration Support |
-|---------|-------------|---------------------|
-| **whisper.cpp** | OpenAI Whisper in C/C++ | CUDA 12/13, ROCm, Intel SYCL, Vulkan, CPU |
-| **faster-whisper** | Fast Whisper with CTranslate2 | CUDA 12/13, ROCm, Intel, CPU |
-| **bark** | Text-to-audio generation | CUDA 12/13, ROCm, Intel |
-| **bark-cpp** | C++ implementation of Bark | CUDA, Metal, CPU |
-| **coqui** | Advanced TTS with 1100+ languages | CUDA 12/13, ROCm, Intel, CPU |
-| **kokoro** | Lightweight TTS model | CUDA 12/13, ROCm, Intel, CPU |
-| **chatterbox** | Production-grade TTS | CUDA 12/13, CPU |
-| **piper** | Fast neural TTS system | CPU |
-| **kitten-tts** | Kitten TTS models | CPU |
-| **silero-vad** | Voice Activity Detection | CPU |
-| **neutts** | Text-to-speech with voice cloning | CUDA 12/13, ROCm, CPU |
-| **vibevoice** | Real-time TTS with voice cloning | CUDA 12/13, ROCm, Intel, CPU |
-
-### Image & Video Generation
-| Backend | Description | Acceleration Support |
-|---------|-------------|---------------------|
-| **stablediffusion.cpp** | Stable Diffusion in C/C++ | CUDA 12/13, Intel SYCL, Vulkan, CPU |
-| **diffusers** | HuggingFace diffusion models | CUDA 12/13, ROCm, Intel, Metal, CPU |
-
-### Specialized AI Tasks
-| Backend | Description | Acceleration Support |
-|---------|-------------|---------------------|
-| **rfdetr** | Real-time object detection | CUDA 12/13, Intel, CPU |
-| **rerankers** | Document reranking API | CUDA 12/13, ROCm, Intel, CPU |
-| **local-store** | Vector database | CPU |
-| **huggingface** | HuggingFace API integration | API-based |
-
-### Hardware Acceleration Matrix
-
-| Acceleration Type | Supported Backends | Hardware Support |
-|-------------------|-------------------|------------------|
-| **NVIDIA CUDA 12** | All CUDA-compatible backends | Nvidia hardware |
-| **NVIDIA CUDA 13** | All CUDA-compatible backends | Nvidia hardware |
-| **AMD ROCm** | llama.cpp, whisper, vllm, transformers, diffusers, rerankers, coqui, kokoro, bark, neutts, vibevoice | AMD Graphics |
-| **Intel oneAPI** | llama.cpp, whisper, stablediffusion, vllm, transformers, diffusers, rfdetr, rerankers, exllama2, coqui, kokoro, bark, vibevoice | Intel Arc, Intel iGPUs |
-| **Apple Metal** | llama.cpp, whisper, diffusers, MLX, MLX-VLM, bark-cpp | Apple M1/M2/M3+ |
-| **Vulkan** | llama.cpp, whisper, stablediffusion | Cross-platform GPUs |
-| **NVIDIA Jetson (CUDA 12)** | llama.cpp, whisper, stablediffusion, diffusers, rfdetr | ARM64 embedded AI (AGX Orin, etc.) |
-| **NVIDIA Jetson (CUDA 13)** | llama.cpp, whisper, stablediffusion, diffusers, rfdetr | ARM64 embedded AI (DGX Spark) |
-| **CPU Optimized** | All backends | AVX/AVX2/AVX512, quantization support |
-
-### 🔗 Community and integrations
-
-Build and deploy custom containers:
- https://github.com/sozercan/aikit
-
-WebUIs:
- https://github.com/Jirubizu/localai-admin
- https://github.com/go-skynet/LocalAI-frontend
- QA-Pilot(An interactive chat project that leverages LocalAI LLMs for rapid understanding and navigation of GitHub code repository) https://github.com/reid41/QA-Pilot
-
-Agentic Libraries:
- https://github.com/mudler/cogito
-
-MCPs:
- https://github.com/mudler/MCPs
-
-Model galleries
- https://github.com/go-skynet/model-gallery
-
-Voice:
- https://github.com/richiejp/VoxInput
-
-Other:
- Helm chart https://github.com/go-skynet/helm-charts
- VSCode extension https://github.com/badgooooor/localai-vscode-plugin
- Langchain: https://python.langchain.com/docs/integrations/providers/localai/
- Terminal utility https://github.com/djcopley/ShellOracle
- Local Smart assistant https://github.com/mudler/LocalAGI
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation / https://github.com/valentinfrlch/ha-gpt4vision
- Discord bot https://github.com/mudler/LocalAGI/tree/main/examples/discord
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
- Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
- Another Telegram Bot https://github.com/JackBekket/Hellper
- Auto-documentation https://github.com/JackBekket/Reflexia
- Github bot which answer on issues, with code and documentation as context https://github.com/JackBekket/GitHelper
- Github Actions: https://github.com/marketplace/actions/start-localai
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/
-  
-
-### 🔗 Resources
-
- [LLM finetuning guide](https://localai.io/docs/advanced/fine-tuning/)
- [How to build locally](https://localai.io/basics/build/index.html)
- [How to install in Kubernetes](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes)
- [Projects integrating LocalAI](https://localai.io/docs/integrations/)
- [How tos section](https://io.midori-ai.xyz/howtos/) (curated by our community)

 ## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)

- [Run Visual studio code with LocalAI (SUSE)](https://www.suse.com/c/running-ai-locally/)
- 🆕 [Run LocalAI on Jetson Nano Devkit](https://mudler.pm/posts/local-ai-jetson-nano-devkit/)
- [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/)
- [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)
 - [Create a slackbot for teams and OSS projects that answer to documentation](https://mudler.pm/posts/smart-slackbot-for-teams/)
 - [LocalAI meets k8sgpt](https://www.youtube.com/watch?v=PKrDNuJ_dfE)
 - [Question Answering on Documents locally with LangChain, LocalAI, Chroma, and GPT4All](https://mudler.pm/posts/localai-question-answering/)
 - [Tutorial to use k8sgpt with LocalAI](https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65)

+## 💻 Usage
+
+Check out the [Getting started](https://localai.io/basics/getting_started/index.html) section in our documentation.
+
+### 💡 Example: Use Luna-AI Llama model
+
+See the [documentation](https://localai.io/basics/getting_started)
+
+### 🔗 Resources
+
+- [How to build locally](https://localai.io/basics/build/index.html)
+- [How to install in Kubernetes](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes)
+- [Projects integrating LocalAI](https://localai.io/integrations/)
+- [How tos section](https://localai.io/howtos/) (curated by our community)
+  
 ## Citation

 If you utilize this repository, data in a downstream project, please consider citing it with:
@@ -403,20 +135,17 @@ If you utilize this repository, data in a downstream project, please consider ci

 Support the project by becoming [a backer or sponsor](https://github.com/sponsors/mudler). Your logo will show up here with a link to your website.

-A huge thank you to our generous sponsors who support this project covering CI expenses, and our [Sponsor list](https://github.com/sponsors/mudler):
+A huge thank you to our generous sponsors who support this project:

-<p align="center">
-  <a href="https://www.spectrocloud.com/" target="blank">
-    <img height="200" src="https://github.com/user-attachments/assets/72eab1dd-8b93-4fc0-9ade-84db49f24962">
-  </a>
-  <a href="https://www.premai.io/" target="blank">
-    <img height="200" src="https://github.com/mudler/LocalAI/assets/2420543/42e4ca83-661e-4f79-8e46-ae43689683d6"> <br>
-  </a>
-</p>
+| ![Spectro Cloud logo_600x600px_transparent bg](https://github.com/go-skynet/LocalAI/assets/2420543/68a6f3cb-8a65-4a4d-99b5-6417a8905512) | 
+|:-----------------------------------------------:|
+|  [Spectro Cloud](https://www.spectrocloud.com/)  |  
+|  Spectro Cloud kindly supports LocalAI by providing GPU and computing resources to run tests on lamdalabs!  |

-### Individual sponsors
+And a huge shout-out to individuals sponsoring the project by donating hardware or backing the project. 

-A special thanks to individual sponsors that contributed to the project, a full list is in [Github](https://github.com/sponsors/mudler) and [buymeacoffee](https://buymeacoffee.com/mudler), a special shout out goes to [drikster80](https://github.com/drikster80) for being generous. Thank you everyone!
+- [Sponsor list](https://github.com/sponsors/mudler)
+- JDAM00 (donating HW for the CI)

 ## 🌟 Star history

@@ -426,7 +155,7 @@ A special thanks to individual sponsors that contributed to the project, a full

 LocalAI is a community-driven project created by [Ettore Di Giacinto](https://github.com/mudler/).

-MIT - Author Ettore Di Giacinto <mudler@localai.io>
+MIT - Author Ettore Di Giacinto

 ## 🙇 Acknowledgements

@@ -438,7 +167,9 @@ LocalAI couldn't have been built without the help of great software already avai
 - https://github.com/antimatter15/alpaca.cpp
 - https://github.com/EdVince/Stable-Diffusion-NCNN
 - https://github.com/ggerganov/whisper.cpp
+- https://github.com/saharNooby/rwkv.cpp
 - https://github.com/rhasspy/piper
+- https://github.com/cmp-nct/ggllm.cpp

 ## 🤗 Contributors

--- a/SECURITY.md
+++ b/SECURITY.md
@@ -1,42 +0,0 @@
-# Security Policy
-
-## Introduction
-
-At LocalAI, we take the security of our software seriously. We understand the importance of protecting our community from vulnerabilities and are committed to ensuring the safety and security of our users.
-
-## Supported Versions
-
-We provide support and updates for certain versions of our software. The following table outlines which versions are currently supported with security updates:
-
-| Version | Supported          |
-| ------- | ------------------ |
-| > 2.0   | :white_check_mark: |
-| < 2.0   | :x:                |
-
-Please ensure that you are using a supported version to receive the latest security updates.
-
-## Reporting a Vulnerability
-
-We encourage the responsible disclosure of any security vulnerabilities. If you believe you've found a security issue in our software, we kindly ask you to follow the steps below to report it to us:
-
-1. **Email Us:** Send an email to [security@localai.io](mailto:security@localai.io) with a detailed report. Please do not disclose the vulnerability publicly or to any third parties before it has been addressed by us.
-
-2. **Expect a Response:** We aim to acknowledge receipt of vulnerability reports within 48 hours. Our security team will review your report and work closely with you to understand the impact and ensure a thorough investigation.
-
-3. **Collaboration:** If the vulnerability is accepted, we will work with you and our community to address the issue promptly. We'll keep you informed throughout the resolution process and may request additional information or collaboration.
-
-4. **Disclosure:** Once the vulnerability has been resolved, we encourage a coordinated disclosure. We believe in transparency and will work with you to ensure that our community is informed in a responsible manner.
-
-## Use of Third-Party Platforms
-
-As a Free and Open Source Software (FOSS) organization, we do not offer monetary bounties. However, researchers who wish to report vulnerabilities can also do so via [Huntr](https://huntr.dev/bounties), a platform that recognizes contributions to open source security.
-
-## Contact
-
-For any security-related inquiries beyond vulnerability reporting, please contact us at [security@localai.io](mailto:security@localai.io).
-
-## Acknowledgments
-
-We appreciate the efforts of those who contribute to the security of our project. Your responsible disclosure is invaluable to the safety and integrity of LocalAI.
-
-Thank you for helping us keep LocalAI secure.
--- a/aio/cpu/README.md
+++ b/aio/cpu/README.md
@@ -1,5 +0,0 @@
-## AIO CPU size
-
-Use this image with CPU-only.
-
-Please keep using only C++ backends so the base image is as small as possible (without CUDA, cuDNN, python, etc).
--- a/aio/cpu/embeddings.yaml
+++ b/aio/cpu/embeddings.yaml
@@ -1,13 +0,0 @@
-embeddings: true
-name: text-embedding-ada-002
-backend: llama-cpp
-parameters:
-  model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
-
-usage: |
-    You can test this model with curl like this:
-
-    curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
-      "input": "Your text string goes here",
-      "model": "text-embedding-ada-002"
-    }'
--- a/aio/cpu/image-gen.yaml
+++ b/aio/cpu/image-gen.yaml
@@ -1,23 +0,0 @@
-name: stablediffusion
-backend: stablediffusion-ggml
-cfg_scale: 4.5
-
-options:
- sampler:euler
-parameters:
-  model: stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf
-step: 25
-
-download_files:
- filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
-  sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f"
-  uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
-
-usage: |
-        curl http://localhost:8080/v1/images/generations \
-          -H "Content-Type: application/json" \
-          -d '{
-            "prompt": "<positive prompt>|<negative prompt>",
-            "step": 25,
-            "size": "512x512"
-          }'
--- a/aio/cpu/rerank.yaml
+++ b/aio/cpu/rerank.yaml
@@ -1,33 +0,0 @@
-name: jina-reranker-v1-base-en
-reranking: true
-f16: true
-parameters:
-  model: jina-reranker-v1-tiny-en.f16.gguf
-backend: llama-cpp
-download_files:
-  - filename: jina-reranker-v1-tiny-en.f16.gguf
-    sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
-    uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf 
-
-usage: |
-    You can test this model with curl like this:
-
-    curl http://localhost:8080/v1/rerank \
-      -H "Content-Type: application/json" \
-      -d '{
-      "model": "jina-reranker-v1-base-en",
-      "query": "Organic skincare products for sensitive skin",
-      "documents": [
-        "Eco-friendly kitchenware for modern homes",
-        "Biodegradable cleaning supplies for eco-conscious consumers",
-        "Organic cotton baby clothes for sensitive skin",
-        "Natural organic skincare range for sensitive skin",
-        "Tech gadgets for smart homes: 2024 edition",
-        "Sustainable gardening tools and compost solutions",
-        "Sensitive skin-friendly facial cleansers and toners",
-        "Organic food wraps and storage solutions",
-        "All-natural pet food for dogs with allergies",
-        "Yoga mats made from recycled materials"
-      ],
-      "top_n": 3
-    }'
--- a/aio/cpu/speech-to-text.yaml
+++ b/aio/cpu/speech-to-text.yaml
@@ -1,18 +0,0 @@
-name: whisper-1
-backend: whisper
-parameters:
-  model: ggml-whisper-base.bin
-
-usage: |
-    ## example audio file
-    wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
-
-    ## Send the example audio file to the transcriptions endpoint
-    curl http://localhost:8080/v1/audio/transcriptions \
-         -H "Content-Type: multipart/form-data" \
-         -F file="@$PWD/gb1.ogg" -F model="whisper-1"
-
-download_files:
- filename: "ggml-whisper-base.bin"
-  sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
-  uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
--- a/aio/cpu/text-to-speech.yaml
+++ b/aio/cpu/text-to-speech.yaml
@@ -1,15 +0,0 @@
-name: tts-1
-download_files:
-  - filename: voice-en-us-amy-low.tar.gz
-    uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
-backend: piper
-parameters:
-  model: en-us-amy-low.onnx
-
-usage: |
-    To test if this model works as expected, you can use the following curl command:
-
-    curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
-      "model":"voice-en-us-amy-low",
-      "input": "Hi, this is a test."
-    }'
--- a/aio/cpu/text-to-text.yaml
+++ b/aio/cpu/text-to-text.yaml
@@ -1,58 +0,0 @@
-context_size: 8192
-f16: true
-backend: llama-cpp
-function:
-  grammar:
-    no_mixed_free_string: true
-    schema_type: llama3.1 # or JSON is supported too (json)
-  response_regex:
-  - <function=(?P<name>\w+)>(?P<arguments>.*)</function>
-mmap: true
-name: gpt-4
-parameters:
-  model: Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
-stopwords:
- <|im_end|>
- <dummy32000>
- <|eot_id|>
- <|end_of_text|>
-template:
-  chat: |
-    <|begin_of_text|><|start_header_id|>system<|end_header_id|>
-    You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
-    {{.Input }}
-    <|start_header_id|>assistant<|end_header_id|>
-  chat_message: |
-    <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
-    {{ if .FunctionCall -}}
-    {{ else if eq .RoleName "tool" -}}
-    The Function was executed and the response was:
-    {{ end -}}
-    {{ if .Content -}}
-    {{.Content -}}
-    {{ else if .FunctionCall -}}
-    {{ range .FunctionCall }}
-    [{{.FunctionCall.Name}}({{.FunctionCall.Arguments}})]
-    {{ end }}
-    {{ end -}}
-    <|eot_id|>
-  completion: |
-    {{.Input}}
-  function: |
-    <|start_header_id|>system<|end_header_id|>
-    You are an expert in composing functions. You are given a question and a set of possible functions.
-    Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
-    If none of the functions can be used, point it out. If the given question lacks the parameters required by the function, also point it out. You should only return the function call in tools call sections.
-    If you decide to invoke any of the function(s), you MUST put it in the format as follows:
-    [func_name1(params_name1=params_value1,params_name2=params_value2,...),func_name2(params_name1=params_value1,params_name2=params_value2,...)]
-    You SHOULD NOT include any other text in the response.
-    Here is a list of functions in JSON format that you can invoke.
-    {{toJson .Functions}}
-    <|eot_id|><|start_header_id|>user<|end_header_id|>
-    {{.Input}}
-    <|eot_id|><|start_header_id|>assistant<|end_header_id|>
-
-download_files:
- filename: Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
-  sha256: 2e220a14ba4328fee38cf36c2c068261560f999fadb5725ce5c6d977cb5126b5
-  uri: huggingface://bartowski/Hermes-3-Llama-3.2-3B-GGUF/Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
--- a/aio/cpu/vad.yaml
+++ b/aio/cpu/vad.yaml
@@ -1,8 +0,0 @@
-backend: silero-vad
-name: silero-vad
-parameters:
-  model: silero-vad.onnx
-download_files:
- filename: silero-vad.onnx
-  uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
-  sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
--- a/aio/cpu/vision.yaml
+++ b/aio/cpu/vision.yaml
@@ -1,50 +0,0 @@
-context_size: 4096
-f16: true
-backend: llama-cpp
-mmap: true
-mmproj: minicpm-v-4_5-mmproj-f16.gguf
-name: gpt-4o
-parameters:
-  model: minicpm-v-4_5-Q4_K_M.gguf
-stopwords:
- <|im_end|>
- <dummy32000>
- </s>
- <|endoftext|>
-template:
-  chat: |
-    {{.Input -}}
-    <|im_start|>assistant
-  chat_message: |
-    <|im_start|>{{ .RoleName }}
-    {{ if .FunctionCall -}}
-    Function call:
-    {{ else if eq .RoleName "tool" -}}
-    Function response:
-    {{ end -}}
-    {{ if .Content -}}
-    {{.Content }}
-    {{ end -}}
-    {{ if .FunctionCall -}}
-    {{toJson .FunctionCall}}
-    {{ end -}}<|im_end|>
-  completion: |
-    {{.Input}}
-  function: |
-    <|im_start|>system
-    You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
-    {{range .Functions}}
-    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
-    {{end}}
-    For each function call return a json object with function name and arguments
-    <|im_end|>
-    {{.Input -}}
-    <|im_start|>assistant
-
-download_files:
- filename: minicpm-v-4_5-Q4_K_M.gguf
-  sha256: c1c3c33100b15b4caf7319acce4e23c0eb0ce1cbd12f70e8d24f05aa67b7512f
-  uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/ggml-model-Q4_K_M.gguf
- filename: minicpm-v-4_5-mmproj-f16.gguf
-  uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/mmproj-model-f16.gguf
-  sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8
--- a/aio/entrypoint.sh
+++ b/aio/entrypoint.sh
@@ -1,138 +0,0 @@
-#!/bin/bash
-
-echo "===> LocalAI All-in-One (AIO) container starting..."
-
-GPU_ACCELERATION=false
-GPU_VENDOR=""
-
-function check_intel() {
-    if lspci | grep -E 'VGA|3D' | grep -iq intel; then
-        echo "Intel GPU detected"
-        if [ -d /opt/intel ]; then
-            GPU_ACCELERATION=true
-            GPU_VENDOR=intel
-        else
-            echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available."
-        fi
-    fi
-}
-
-function check_nvidia_wsl() {
-    if lspci | grep -E 'VGA|3D' | grep -iq "Microsoft Corporation Device 008e"; then
-        # We make the assumption this WSL2 cars is NVIDIA, then check for nvidia-smi
-        # Make sure the container was run with `--gpus all` as the only required parameter
-        echo "NVIDIA GPU detected via WSL2"
-        # nvidia-smi should be installed in the container
-        if nvidia-smi; then
-            GPU_ACCELERATION=true
-            GPU_VENDOR=nvidia
-        else
-            echo "NVIDIA GPU detected via WSL2, but nvidia-smi is not installed. GPU acceleration will not be available."
-        fi
-    fi
-}
-
-function check_amd() {
-    if lspci | grep -E 'VGA|3D' | grep -iq amd; then
-        echo "AMD GPU detected"
-        # Check if ROCm is installed
-        if [ -d /opt/rocm ]; then
-            GPU_ACCELERATION=true
-            GPU_VENDOR=amd
-        else
-            echo "AMD GPU detected, but ROCm is not installed. GPU acceleration will not be available."
-        fi
-    fi
-}
-
-function check_nvidia() {
-    if lspci | grep -E 'VGA|3D' | grep -iq nvidia; then
-        echo "NVIDIA GPU detected"
-        # nvidia-smi should be installed in the container
-        if nvidia-smi; then
-            GPU_ACCELERATION=true
-            GPU_VENDOR=nvidia
-        else
-            echo "NVIDIA GPU detected, but nvidia-smi is not installed. GPU acceleration will not be available."
-        fi
-    fi
-}
-
-function check_metal() {
-    if system_profiler SPDisplaysDataType | grep -iq 'Metal'; then
-        echo "Apple Metal supported GPU detected"
-        GPU_ACCELERATION=true
-        GPU_VENDOR=apple
-    fi
-}
-
-function detect_gpu() {
-    case "$(uname -s)" in
-        Linux)
-            check_nvidia
-            check_amd
-            check_intel
-            check_nvidia_wsl
-            ;;
-        Darwin)
-            check_metal
-            ;;
-    esac
-}
-
-function detect_gpu_size() {
-    # Attempting to find GPU memory size for NVIDIA GPUs
-    if [ "$GPU_ACCELERATION" = true ] && [ "$GPU_VENDOR" = "nvidia" ]; then
-        echo "NVIDIA GPU detected. Attempting to find memory size..."
-        # Using head -n 1 to get the total memory of the 1st NVIDIA GPU detected.
-        # If handling multiple GPUs is required in the future, this is the place to do it
-        nvidia_sm=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits | head -n 1)
-        if [ ! -z "$nvidia_sm" ]; then
-            echo "Total GPU Memory: $nvidia_sm MiB"
-            # if bigger than 8GB, use 16GB
-            #if [ "$nvidia_sm" -gt 8192 ]; then
-            #    GPU_SIZE=gpu-16g
-            #else
-            GPU_SIZE=gpu-8g
-            #fi
-        else
-            echo "Unable to determine NVIDIA GPU memory size. Falling back to CPU."
-            GPU_SIZE=gpu-8g
-        fi
-    elif [ "$GPU_ACCELERATION" = true ] && [ "$GPU_VENDOR" = "intel" ]; then
-        GPU_SIZE=intel
-    # Default to a generic GPU size until we implement GPU size detection for non NVIDIA GPUs
-    elif [ "$GPU_ACCELERATION" = true ]; then
-        echo "Non-NVIDIA GPU detected. Specific GPU memory size detection is not implemented."
-        GPU_SIZE=gpu-8g
-
-    # default to cpu if GPU_SIZE is not set
-    else
-        echo "GPU acceleration is not enabled or supported. Defaulting to CPU."
-        GPU_SIZE=cpu
-    fi
-}
-
-function check_vars() {
-    if [ -z "$MODELS" ]; then
-        echo "MODELS environment variable is not set. Please set it to a comma-separated list of model YAML files to load."
-        exit 1
-    fi
-
-    if [ -z "$PROFILE" ]; then
-        echo "PROFILE environment variable is not set. Please set it to one of the following: cpu, gpu-8g, gpu-16g, apple"
-        exit 1
-    fi
-}
-
-detect_gpu
-detect_gpu_size
-
-PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
-export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vad.yaml,/aio/${PROFILE}/vision.yaml}"
-
-check_vars
-
-echo "===> Starting LocalAI[$PROFILE] with the following models: $MODELS"
-
-exec /entrypoint.sh "$@"
--- a/aio/gpu-8g/embeddings.yaml
+++ b/aio/gpu-8g/embeddings.yaml
@@ -1,13 +0,0 @@
-embeddings: true
-name: text-embedding-ada-002
-backend: llama-cpp
-parameters:
-  model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
-
-usage: |
-    You can test this model with curl like this:
-
-    curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
-      "input": "Your text string goes here",
-      "model": "text-embedding-ada-002"
-    }'
--- a/aio/gpu-8g/image-gen.yaml
+++ b/aio/gpu-8g/image-gen.yaml
@@ -1,25 +0,0 @@
-name: stablediffusion
-parameters:
-  model: DreamShaper_8_pruned.safetensors
-backend: diffusers
-step: 25
-f16: true
-
-diffusers:
-  pipeline_type: StableDiffusionPipeline
-  cuda: true
-  enable_parameters: "negative_prompt,num_inference_steps"
-  scheduler_type: "k_dpmpp_2m"
-
-download_files:
- filename: DreamShaper_8_pruned.safetensors
-  uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
-
-usage: |
-        curl http://localhost:8080/v1/images/generations \
-          -H "Content-Type: application/json" \
-          -d '{
-            "prompt": "<positive prompt>|<negative prompt>",
-            "step": 25,
-            "size": "512x512"
-          }'
--- a/aio/gpu-8g/rerank.yaml
+++ b/aio/gpu-8g/rerank.yaml
@@ -1,33 +0,0 @@
-name: jina-reranker-v1-base-en
-reranking: true
-f16: true
-parameters:
-  model: jina-reranker-v1-tiny-en.f16.gguf
-backend: llama-cpp
-download_files:
-  - filename: jina-reranker-v1-tiny-en.f16.gguf
-    sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
-    uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf 
-
-usage: |
-    You can test this model with curl like this:
-
-    curl http://localhost:8080/v1/rerank \
-      -H "Content-Type: application/json" \
-      -d '{
-      "model": "jina-reranker-v1-base-en",
-      "query": "Organic skincare products for sensitive skin",
-      "documents": [
-        "Eco-friendly kitchenware for modern homes",
-        "Biodegradable cleaning supplies for eco-conscious consumers",
-        "Organic cotton baby clothes for sensitive skin",
-        "Natural organic skincare range for sensitive skin",
-        "Tech gadgets for smart homes: 2024 edition",
-        "Sustainable gardening tools and compost solutions",
-        "Sensitive skin-friendly facial cleansers and toners",
-        "Organic food wraps and storage solutions",
-        "All-natural pet food for dogs with allergies",
-        "Yoga mats made from recycled materials"
-      ],
-      "top_n": 3
-    }'
--- a/aio/gpu-8g/speech-to-text.yaml
+++ b/aio/gpu-8g/speech-to-text.yaml
@@ -1,18 +0,0 @@
-name: whisper-1
-backend: whisper
-parameters:
-  model: ggml-whisper-base.bin
-
-usage: |
-    ## example audio file
-    wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
-
-    ## Send the example audio file to the transcriptions endpoint
-    curl http://localhost:8080/v1/audio/transcriptions \
-         -H "Content-Type: multipart/form-data" \
-         -F file="@$PWD/gb1.ogg" -F model="whisper-1"
-
-download_files:
- filename: "ggml-whisper-base.bin"
-  sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
-  uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
--- a/aio/gpu-8g/text-to-speech.yaml
+++ b/aio/gpu-8g/text-to-speech.yaml
@@ -1,15 +0,0 @@
-name: tts-1
-download_files:
-  - filename: voice-en-us-amy-low.tar.gz
-    uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
-backend: piper
-parameters:
-  model: en-us-amy-low.onnx
-
-usage: |
-    To test if this model works as expected, you can use the following curl command:
-
-    curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
-      "model":"tts-1",
-      "input": "Hi, this is a test."
-    }'
--- a/aio/gpu-8g/text-to-text.yaml
+++ b/aio/gpu-8g/text-to-text.yaml
@@ -1,54 +0,0 @@
-context_size: 4096
-f16: true
-backend: llama-cpp
-function:
-  capture_llm_results:
-  - (?s)<Thought>(.*?)</Thought>
-  grammar:
-    properties_order: name,arguments
-  json_regex_match:
-  - (?s)<Output>(.*?)</Output>
-  replace_llm_results:
-  - key: (?s)<Thought>(.*?)</Thought>
-    value: ""
-mmap: true
-name: gpt-4
-parameters:
-  model: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
-stopwords:
- <|im_end|>
- <dummy32000>
- </s>
-template:
-  chat: |
-    {{.Input -}}
-    <|im_start|>assistant
-  chat_message: |
-    <|im_start|>{{ .RoleName }}
-    {{ if .FunctionCall -}}
-    Function call:
-    {{ else if eq .RoleName "tool" -}}
-    Function response:
-    {{ end -}}
-    {{ if .Content -}}
-    {{.Content }}
-    {{ end -}}
-    {{ if .FunctionCall -}}
-    {{toJson .FunctionCall}}
-    {{ end -}}<|im_end|>
-  completion: |
-    {{.Input}}
-  function: |
-    <|im_start|>system
-    You are an AI assistant that executes function calls, and these are the tools at your disposal:
-    {{range .Functions}}
-    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
-    {{end}}
-    <|im_end|>
-    {{.Input -}}
-    <|im_start|>assistant
-
-download_files:
- filename: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
-  sha256: 4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4
-  uri: huggingface://mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF/localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
--- a/aio/gpu-8g/vad.yaml
+++ b/aio/gpu-8g/vad.yaml
@@ -1,8 +0,0 @@
-backend: silero-vad
-name: silero-vad
-parameters:
-  model: silero-vad.onnx
-download_files:
- filename: silero-vad.onnx
-  uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
-  sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
--- a/aio/gpu-8g/vision.yaml
+++ b/aio/gpu-8g/vision.yaml
@@ -1,50 +0,0 @@
-context_size: 4096
-backend: llama-cpp
-f16: true
-mmap: true
-mmproj: minicpm-v-4_5-mmproj-f16.gguf
-name: gpt-4o
-parameters:
-  model: minicpm-v-4_5-Q4_K_M.gguf
-stopwords:
- <|im_end|>
- <dummy32000>
- </s>
- <|endoftext|>
-template:
-  chat: |
-    {{.Input -}}
-    <|im_start|>assistant
-  chat_message: |
-    <|im_start|>{{ .RoleName }}
-    {{ if .FunctionCall -}}
-    Function call:
-    {{ else if eq .RoleName "tool" -}}
-    Function response:
-    {{ end -}}
-    {{ if .Content -}}
-    {{.Content }}
-    {{ end -}}
-    {{ if .FunctionCall -}}
-    {{toJson .FunctionCall}}
-    {{ end -}}<|im_end|>
-  completion: |
-    {{.Input}}
-  function: |
-    <|im_start|>system
-    You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
-    {{range .Functions}}
-    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
-    {{end}}
-    For each function call return a json object with function name and arguments
-    <|im_end|>
-    {{.Input -}}
-    <|im_start|>assistant
-
-download_files:
- filename: minicpm-v-4_5-Q4_K_M.gguf
-  sha256: c1c3c33100b15b4caf7319acce4e23c0eb0ce1cbd12f70e8d24f05aa67b7512f
-  uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/ggml-model-Q4_K_M.gguf
- filename: minicpm-v-4_5-mmproj-f16.gguf
-  uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/mmproj-model-f16.gguf
-  sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8
--- a/aio/intel/embeddings.yaml
+++ b/aio/intel/embeddings.yaml
@@ -1,13 +0,0 @@
-embeddings: true
-name: text-embedding-ada-002
-backend: llama-cpp
-parameters:
-  model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
-
-usage: |
-    You can test this model with curl like this:
-
-    curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
-      "input": "Your text string goes here",
-      "model": "text-embedding-ada-002"
-    }'
--- a/aio/intel/image-gen.yaml
+++ b/aio/intel/image-gen.yaml
@@ -1,20 +0,0 @@
-name: stablediffusion
-parameters:
-  model: Lykon/dreamshaper-8
-backend: diffusers
-step: 25
-f16: true
-diffusers:
-  pipeline_type: StableDiffusionPipeline
-  cuda: true
-  enable_parameters: "negative_prompt,num_inference_steps"
-  scheduler_type: "k_dpmpp_2m"
-
-usage: |
-        curl http://localhost:8080/v1/images/generations \
-          -H "Content-Type: application/json" \
-          -d '{
-            "prompt": "<positive prompt>|<negative prompt>",
-            "step": 25,
-            "size": "512x512"
-          }'
--- a/aio/intel/rerank.yaml
+++ b/aio/intel/rerank.yaml
@@ -1,33 +0,0 @@
-name: jina-reranker-v1-base-en
-reranking: true
-f16: true
-parameters:
-  model: jina-reranker-v1-tiny-en.f16.gguf
-backend: llama-cpp
-download_files:
-  - filename: jina-reranker-v1-tiny-en.f16.gguf
-    sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
-    uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf 
-
-usage: |
-    You can test this model with curl like this:
-
-    curl http://localhost:8080/v1/rerank \
-      -H "Content-Type: application/json" \
-      -d '{
-      "model": "jina-reranker-v1-base-en",
-      "query": "Organic skincare products for sensitive skin",
-      "documents": [
-        "Eco-friendly kitchenware for modern homes",
-        "Biodegradable cleaning supplies for eco-conscious consumers",
-        "Organic cotton baby clothes for sensitive skin",
-        "Natural organic skincare range for sensitive skin",
-        "Tech gadgets for smart homes: 2024 edition",
-        "Sustainable gardening tools and compost solutions",
-        "Sensitive skin-friendly facial cleansers and toners",
-        "Organic food wraps and storage solutions",
-        "All-natural pet food for dogs with allergies",
-        "Yoga mats made from recycled materials"
-      ],
-      "top_n": 3
-    }'
--- a/aio/intel/speech-to-text.yaml
+++ b/aio/intel/speech-to-text.yaml
@@ -1,18 +0,0 @@
-name: whisper-1
-backend: whisper
-parameters:
-  model: ggml-whisper-base.bin
-
-usage: |
-    ## example audio file
-    wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
-
-    ## Send the example audio file to the transcriptions endpoint
-    curl http://localhost:8080/v1/audio/transcriptions \
-         -H "Content-Type: multipart/form-data" \
-         -F file="@$PWD/gb1.ogg" -F model="whisper-1"
-
-download_files:
- filename: "ggml-whisper-base.bin"
-  sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
-  uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
--- a/aio/intel/text-to-speech.yaml
+++ b/aio/intel/text-to-speech.yaml
@@ -1,15 +0,0 @@
-name: tts-1
-download_files:
-  - filename: voice-en-us-amy-low.tar.gz
-    uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
-backend: piper
-parameters:
-  model: en-us-amy-low.onnx
-
-usage: |
-    To test if this model works as expected, you can use the following curl command:
-
-    curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
-      "model":"tts-1",
-      "input": "Hi, this is a test."
-    }'
--- a/aio/intel/text-to-text.yaml
+++ b/aio/intel/text-to-text.yaml
@@ -1,54 +0,0 @@
-context_size: 4096
-f16: true
-backend: llama-cpp
-function:
-  capture_llm_results:
-  - (?s)<Thought>(.*?)</Thought>
-  grammar:
-    properties_order: name,arguments
-  json_regex_match:
-  - (?s)<Output>(.*?)</Output>
-  replace_llm_results:
-  - key: (?s)<Thought>(.*?)</Thought>
-    value: ""
-mmap: true
-name: gpt-4
-parameters:
-  model: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
-stopwords:
- <|im_end|>
- <dummy32000>
- </s>
-template:
-  chat: |
-    {{.Input -}}
-    <|im_start|>assistant
-  chat_message: |
-    <|im_start|>{{ .RoleName }}
-    {{ if .FunctionCall -}}
-    Function call:
-    {{ else if eq .RoleName "tool" -}}
-    Function response:
-    {{ end -}}
-    {{ if .Content -}}
-    {{.Content }}
-    {{ end -}}
-    {{ if .FunctionCall -}}
-    {{toJson .FunctionCall}}
-    {{ end -}}<|im_end|>
-  completion: |
-    {{.Input}}
-  function: |
-    <|im_start|>system
-    You are an AI assistant that executes function calls, and these are the tools at your disposal:
-    {{range .Functions}}
-    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
-    {{end}}
-    <|im_end|>
-    {{.Input -}}
-    <|im_start|>assistant
-
-download_files:
- filename: localai-functioncall-phi-4-v0.3-q4_k_m.gguf
-  sha256: 23fee048ded2a6e2e1a7b6bbefa6cbf83068f194caa9552aecbaa00fec8a16d5
-  uri: huggingface://mudler/LocalAI-functioncall-phi-4-v0.3-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.3-q4_k_m.gguf
--- a/aio/intel/vad.yaml
+++ b/aio/intel/vad.yaml
@@ -1,8 +0,0 @@
-backend: silero-vad
-name: silero-vad
-parameters:
-  model: silero-vad.onnx
-download_files:
- filename: silero-vad.onnx
-  uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
-  sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
--- a/aio/intel/vision.yaml
+++ b/aio/intel/vision.yaml
@@ -1,51 +0,0 @@
-context_size: 4096
-backend: llama-cpp
-f16: true
-mmap: true
-mmproj: minicpm-v-4_5-mmproj-f16.gguf
-name: gpt-4o
-parameters:
-  model: minicpm-v-4_5-Q4_K_M.gguf
-stopwords:
- <|im_end|>
- <dummy32000>
- </s>
- <|endoftext|>
-template:
-  chat: |
-    {{.Input -}}
-    <|im_start|>assistant
-  chat_message: |
-    <|im_start|>{{ .RoleName }}
-    {{ if .FunctionCall -}}
-    Function call:
-    {{ else if eq .RoleName "tool" -}}
-    Function response:
-    {{ end -}}
-    {{ if .Content -}}
-    {{.Content }}
-    {{ end -}}
-    {{ if .FunctionCall -}}
-    {{toJson .FunctionCall}}
-    {{ end -}}<|im_end|>
-  completion: |
-    {{.Input}}
-  function: |
-    <|im_start|>system
-    You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
-    {{range .Functions}}
-    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
-    {{end}}
-    For each function call return a json object with function name and arguments
-    <|im_end|>
-    {{.Input -}}
-    <|im_start|>assistant
-
-
-download_files:
- filename: minicpm-v-4_5-Q4_K_M.gguf
-  sha256: c1c3c33100b15b4caf7319acce4e23c0eb0ce1cbd12f70e8d24f05aa67b7512f
-  uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/ggml-model-Q4_K_M.gguf
- filename: minicpm-v-4_5-mmproj-f16.gguf
-  uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/mmproj-model-f16.gguf
-  sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8
--- a/Show More
+++ b/Show More