Compare commits

..

1 Commits

Author SHA1 Message Date
Ettore Di Giacinto
ca65e9ee9b Ci testsg 2024-06-13 23:34:11 +02:00
273 changed files with 2812 additions and 10261 deletions

View File

@@ -1,80 +0,0 @@
import hashlib
from huggingface_hub import hf_hub_download, get_paths_info
import requests
import sys
import os
uri = sys.argv[1]
file_name = uri.split('/')[-1]
# Function to parse the URI and determine download method
def parse_uri(uri):
if uri.startswith('huggingface://'):
repo_id = uri.split('://')[1]
return 'huggingface', repo_id.rsplit('/', 1)[0]
elif 'huggingface.co' in uri:
parts = uri.split('/resolve/')
if len(parts) > 1:
repo_path = parts[0].split('https://huggingface.co/')[-1]
return 'huggingface', repo_path
return 'direct', uri
def calculate_sha256(file_path):
sha256_hash = hashlib.sha256()
with open(file_path, 'rb') as f:
for byte_block in iter(lambda: f.read(4096), b''):
sha256_hash.update(byte_block)
return sha256_hash.hexdigest()
def manual_safety_check_hf(repo_id):
scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan")
scan = scanResponse.json()
if scan['hasUnsafeFile']:
return scan
return None
download_type, repo_id_or_url = parse_uri(uri)
new_checksum = None
file_path = None
# Decide download method based on URI type
if download_type == 'huggingface':
# Check if the repo is flagged as dangerous by HF
hazard = manual_safety_check_hf(repo_id_or_url)
if hazard != None:
print(f'Error: HuggingFace has detected security problems for {repo_id_or_url}: {str(hazard)}', filename=file_name)
sys.exit(5)
# Use HF API to pull sha
for file in get_paths_info(repo_id_or_url, [file_name], repo_type='model'):
try:
new_checksum = file.lfs.sha256
break
except Exception as e:
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
sys.exit(2)
if new_checksum is None:
try:
file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name)
except Exception as e:
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
sys.exit(2)
else:
response = requests.get(repo_id_or_url)
if response.status_code == 200:
with open(file_name, 'wb') as f:
f.write(response.content)
file_path = file_name
elif response.status_code == 404:
print(f'File not found: {response.status_code}', file=sys.stderr)
sys.exit(2)
else:
print(f'Error downloading file: {response.status_code}', file=sys.stderr)
sys.exit(1)
if new_checksum is None:
new_checksum = calculate_sha256(file_path)
print(new_checksum)
os.remove(file_path)
else:
print(new_checksum)

View File

@@ -14,14 +14,77 @@ function check_and_update_checksum() {
idx="$5" idx="$5"
# Download the file and calculate new checksum using Python # Download the file and calculate new checksum using Python
new_checksum=$(python3 ./.github/check_and_update.py $uri) new_checksum=$(python3 -c "
result=$? import hashlib
from huggingface_hub import hf_hub_download, get_paths_info
import requests
import sys
import os
if [[ $result -eq 5 ]]; then uri = '$uri'
echo "Contaminated entry detected, deleting entry for $model_name..." file_name = uri.split('/')[-1]
yq eval -i "del([$idx])" "$input_yaml"
return # Function to parse the URI and determine download method
fi # Function to parse the URI and determine download method
def parse_uri(uri):
if uri.startswith('huggingface://'):
repo_id = uri.split('://')[1]
return 'huggingface', repo_id.rsplit('/', 1)[0]
elif 'huggingface.co' in uri:
parts = uri.split('/resolve/')
if len(parts) > 1:
repo_path = parts[0].split('https://huggingface.co/')[-1]
return 'huggingface', repo_path
return 'direct', uri
def calculate_sha256(file_path):
sha256_hash = hashlib.sha256()
with open(file_path, 'rb') as f:
for byte_block in iter(lambda: f.read(4096), b''):
sha256_hash.update(byte_block)
return sha256_hash.hexdigest()
download_type, repo_id_or_url = parse_uri(uri)
new_checksum = None
# Decide download method based on URI type
if download_type == 'huggingface':
# Use HF API to pull sha
for file in get_paths_info(repo_id_or_url, [file_name], repo_type='model'):
try:
new_checksum = file.lfs.sha256
break
except Exception as e:
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
sys.exit(2)
if new_checksum is None:
try:
file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name)
except Exception as e:
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
sys.exit(2)
else:
response = requests.get(repo_id_or_url)
if response.status_code == 200:
with open(file_name, 'wb') as f:
f.write(response.content)
file_path = file_name
elif response.status_code == 404:
print(f'File not found: {response.status_code}', file=sys.stderr)
sys.exit(2)
else:
print(f'Error downloading file: {response.status_code}', file=sys.stderr)
sys.exit(1)
if new_checksum is None:
new_checksum = calculate_sha256(file_path)
print(new_checksum)
os.remove(file_path)
else:
print(new_checksum)
")
if [[ "$new_checksum" == "" ]]; then if [[ "$new_checksum" == "" ]]; then
echo "Error calculating checksum for $file_name. Skipping..." echo "Error calculating checksum for $file_name. Skipping..."
@@ -31,7 +94,7 @@ function check_and_update_checksum() {
echo "Checksum for $file_name: $new_checksum" echo "Checksum for $file_name: $new_checksum"
# Compare and update the YAML file if checksums do not match # Compare and update the YAML file if checksums do not match
result=$?
if [[ $result -eq 2 ]]; then if [[ $result -eq 2 ]]; then
echo "File not found, deleting entry for $file_name..." echo "File not found, deleting entry for $file_name..."
# yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\"))" "$input_yaml" # yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\"))" "$input_yaml"

View File

@@ -75,7 +75,7 @@ var modelPageTemplate string = `
<div class="container mx-auto px-4 py-4"> <div class="container mx-auto px-4 py-4">
<div class="flex items-center justify-between"> <div class="flex items-center justify-between">
<div class="flex items-center"> <div class="flex items-center">
<a href="/" class="text-white text-xl font-bold"><img src="https://github.com/mudler/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a> <a href="/" class="text-white text-xl font-bold"><img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
<a href="/" class="text-white text-xl font-bold">LocalAI</a> <a href="/" class="text-white text-xl font-bold">LocalAI</a>
</div> </div>
<!-- Menu button for small screens --> <!-- Menu button for small screens -->
@@ -92,9 +92,9 @@ var modelPageTemplate string = `
<!-- Collapsible menu for small screens --> <!-- Collapsible menu for small screens -->
<div class="hidden lg:hidden" id="mobile-menu"> <div class="hidden lg:hidden" id="mobile-menu">
<div class="pt-4 pb-3 border-t border-gray-700"> <div class="pt-4 pb-3 border-t border-gray-700">
<a href="https://localai.io" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a> <a href="https://localai.io" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
</div> </div>
</div> </div>
</div> </div>
@@ -114,17 +114,17 @@ var modelPageTemplate string = `
<h2 class="text-center text-3xl font-semibold text-gray-100"> <h2 class="text-center text-3xl font-semibold text-gray-100">
🖼️ Available {{.AvailableModels}} models</i> <a href="https://localai.io/models/" target="_blank" > 🖼️ Available {{.AvailableModels}} models</i> repositories <a href="https://localai.io/models/" target="_blank" >
<i class="fas fa-circle-info pr-2"></i> <i class="fas fa-circle-info pr-2"></i>
</a></h2> </a></h2>
<h3> <h3>
Refer to the Model gallery <a href="https://localai.io/models/" target="_blank" ><i class="fas fa-circle-info pr-2"></i></a> for more information on how to use the models with LocalAI.<br> Refer to <a href="https://localai.io/models" target=_blank> Model gallery</a> for more information on how to use the models with LocalAI.
You can install models with the CLI command <code>local-ai models install <model-name></code>. or by using the WebUI. You can install models with the CLI command <code>local-ai models install <model-name></code>. or by using the WebUI.
</h3> </h3>
<input class="form-control appearance-none block w-full mt-5 px-3 py-2 text-base font-normal text-gray-300 pb-2 mb-5 bg-gray-800 bg-clip-padding border border-solid border-gray-600 rounded transition ease-in-out m-0 focus:text-gray-300 focus:bg-gray-900 focus:border-blue-500 focus:outline-none" type="search" <input class="form-control appearance-none block w-full mt-5 px-3 py-2 text-base font-normal text-gray-300 pb-2 mb-5 bg-gray-800 bg-clip-padding border border-solid border-gray-600 rounded transition ease-in-out m-0 focus:text-gray-300 focus:bg-gray-900 focus:border-blue-500 focus:outline-none" type="search"
id="searchbox" placeholder="Live search keyword.."> id="searchbox" placeholder="Live search keyword..">
<div class="dark grid grid-cols-1 grid-rows-1 md:grid-cols-3 block rounded-lg shadow-secondary-1 dark:bg-surface-dark"> <div class="dark grid grid-cols-1 grid-rows-1 md:grid-cols-3 block rounded-lg shadow-secondary-1 dark:bg-surface-dark">
{{ range $_, $model := .Models }} {{ range $_, $model := .Models }}
@@ -139,10 +139,10 @@ var modelPageTemplate string = `
</div> </div>
<div class="p-6 text-surface dark:text-white"> <div class="p-6 text-surface dark:text-white">
<h5 class="mb-2 text-xl font-medium leading-tight">{{$model.Name}}</h5> <h5 class="mb-2 text-xl font-medium leading-tight">{{$model.Name}}</h5>
<p class="mb-4 text-base truncate">{{ $model.Description }}</p> <p class="mb-4 text-base truncate">{{ $model.Description }}</p>
</div> </div>
<div class="px-6 pt-4 pb-2"> <div class="px-6 pt-4 pb-2">
@@ -178,7 +178,7 @@ var modelPageTemplate string = `
{{ $model.Description }} {{ $model.Description }}
</p> </p>
<p class="text-base leading-relaxed text-gray-500 dark:text-gray-400"> <p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
To install the model with the CLI, run: <br> To install the model with the CLI, run: <br>
<code> local-ai models install {{$model.Name}} </code> <br> <code> local-ai models install {{$model.Name}} </code> <br>
@@ -193,7 +193,7 @@ var modelPageTemplate string = `
<ul> <ul>
{{ range $_, $u := $model.URLs }} {{ range $_, $u := $model.URLs }}
<li><a href="{{ $u }}" target=_blank><i class="fa-solid fa-link"></i> {{ $u }}</a></li> <li><a href="{{ $u }}" target=_blank><i class="fa-solid fa-link"></i> {{ $u }}</a></li>
{{ end }} {{ end }}
</ul> </ul>
</p> </p>
</div> </div>
@@ -209,7 +209,7 @@ var modelPageTemplate string = `
</div> </div>
</div> </div>
</div> </div>
{{ end }} {{ end }}
</div> </div>
</div> </div>
@@ -221,10 +221,10 @@ var lazyLoadInstance = new LazyLoad({
}); });
let cards = document.querySelectorAll('.box') let cards = document.querySelectorAll('.box')
function liveSearch() { function liveSearch() {
let search_query = document.getElementById("searchbox").value; let search_query = document.getElementById("searchbox").value;
//Use innerText if all contents are visible //Use innerText if all contents are visible
//Use textContent for including hidden elements //Use textContent for including hidden elements
for (var i = 0; i < cards.length; i++) { for (var i = 0; i < cards.length; i++) {
@@ -238,8 +238,8 @@ function liveSearch() {
} }
//A little delay //A little delay
let typingTimer; let typingTimer;
let typeInterval = 500; let typeInterval = 500;
let searchInput = document.getElementById('searchbox'); let searchInput = document.getElementById('searchbox');
searchInput.addEventListener('keyup', () => { searchInput.addEventListener('keyup', () => {

112
.github/dependabot.yml vendored
View File

@@ -1,10 +1,6 @@
# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file # https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
version: 2 version: 2
updates: updates:
- package-ecosystem: "gitsubmodule"
directory: "/"
schedule:
interval: "weekly"
- package-ecosystem: "gomod" - package-ecosystem: "gomod"
directory: "/" directory: "/"
schedule: schedule:
@@ -27,111 +23,3 @@ updates:
schedule: schedule:
# Check for updates to GitHub Actions every weekday # Check for updates to GitHub Actions every weekday
interval: "weekly" interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/autogptq"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/bark"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/common/template"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/coqui"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/diffusers"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/exllama"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/exllama2"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/mamba"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/openvoice"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/parler-tts"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/petals"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/rerankers"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/sentencetransformers"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/transformers"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/transformers-musicgen"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/vall-e-x"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/vllm"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/examples/chainlit"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/examples/functions"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/examples/langchain/langchainpy-localai-example"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/examples/langchain-chroma"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/examples/streamlit-bot"
schedule:
interval: "weekly"
- package-ecosystem: "docker"
directory: "/examples/k8sgpt"
schedule:
interval: "weekly"
- package-ecosystem: "docker"
directory: "/examples/kubernetes"
schedule:
interval: "weekly"
- package-ecosystem: "docker"
directory: "/examples/langchain"
schedule:
interval: "weekly"
- package-ecosystem: "gomod"
directory: "/examples/semantic-todo"
schedule:
interval: "weekly"
- package-ecosystem: "docker"
directory: "/examples/telegram-bot"
schedule:
interval: "weekly"

3
.github/release.yml vendored
View File

@@ -13,9 +13,6 @@ changelog:
labels: labels:
- bug - bug
- regression - regression
- title: "🖧 P2P area"
labels:
- area/p2p
- title: Exciting New Features 🎉 - title: Exciting New Features 🎉
labels: labels:
- Semver-Minor - Semver-Minor

View File

@@ -9,6 +9,9 @@ jobs:
fail-fast: false fail-fast: false
matrix: matrix:
include: include:
- repository: "go-skynet/go-llama.cpp"
variable: "GOLLAMA_VERSION"
branch: "master"
- repository: "ggerganov/llama.cpp" - repository: "ggerganov/llama.cpp"
variable: "CPPLLAMA_VERSION" variable: "CPPLLAMA_VERSION"
branch: "master" branch: "master"
@@ -27,6 +30,9 @@ jobs:
- repository: "go-skynet/bloomz.cpp" - repository: "go-skynet/bloomz.cpp"
variable: "BLOOMZ_VERSION" variable: "BLOOMZ_VERSION"
branch: "main" branch: "main"
- repository: "nomic-ai/gpt4all"
variable: "GPT4ALL_VERSION"
branch: "main"
- repository: "mudler/go-ggllm.cpp" - repository: "mudler/go-ggllm.cpp"
variable: "GOGGLLM_VERSION" variable: "GOGGLLM_VERSION"
branch: "master" branch: "master"
@@ -48,7 +54,7 @@ jobs:
token: ${{ secrets.UPDATE_BOT_TOKEN }} token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI push-to-fork: ci-forks/LocalAI
commit-message: ':arrow_up: Update ${{ matrix.repository }}' commit-message: ':arrow_up: Update ${{ matrix.repository }}'
title: 'chore: :arrow_up: Update ${{ matrix.repository }}' title: ':arrow_up: Update ${{ matrix.repository }}'
branch: "update/${{ matrix.variable }}" branch: "update/${{ matrix.variable }}"
body: Bump of ${{ matrix.repository }} version body: Bump of ${{ matrix.repository }} version
signoff: true signoff: true

View File

@@ -22,7 +22,7 @@ jobs:
token: ${{ secrets.UPDATE_BOT_TOKEN }} token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI push-to-fork: ci-forks/LocalAI
commit-message: ':arrow_up: Update docs version ${{ matrix.repository }}' commit-message: ':arrow_up: Update docs version ${{ matrix.repository }}'
title: 'docs: :arrow_up: update docs version ${{ matrix.repository }}' title: ':arrow_up: Update docs version ${{ matrix.repository }}'
branch: "update/docs" branch: "update/docs"
body: Bump of ${{ matrix.repository }} version inside docs body: Bump of ${{ matrix.repository }} version inside docs
signoff: true signoff: true

View File

@@ -20,12 +20,12 @@ jobs:
run: | run: |
sudo apt-get update sudo apt-get update
sudo apt-get install -y pip wget sudo apt-get install -y pip wget
sudo pip install --upgrade pip sudo pip install --upgrade pip
pip install huggingface_hub pip install huggingface_hub
- name: 'Setup yq' - name: 'Setup yq'
uses: dcarbone/install-yq-action@v1.1.1 uses: dcarbone/install-yq-action@v1.1.1
with: with:
version: 'v4.44.2' version: 'v4.43.1'
download-compressed: true download-compressed: true
force: true force: true

View File

@@ -14,7 +14,7 @@ jobs:
steps: steps:
- name: Dependabot metadata - name: Dependabot metadata
id: metadata id: metadata
uses: dependabot/fetch-metadata@v2.2.0 uses: dependabot/fetch-metadata@v2.1.0
with: with:
github-token: "${{ secrets.GITHUB_TOKEN }}" github-token: "${{ secrets.GITHUB_TOKEN }}"
skip-commit-verification: true skip-commit-verification: true

View File

@@ -1,83 +0,0 @@
name: Comment PRs
on:
pull_request_target:
jobs:
comment-pr:
env:
MODEL_NAME: hermes-2-theta-llama-3-8b
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
ref: "${{ github.event.pull_request.merge_commit_sha }}"
fetch-depth: 0 # needed to checkout all branches for this Action to work
- uses: mudler/localai-github-action@v1
with:
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
# Check the PR diff using the current branch and the base branch of the PR
- uses: GrantBirki/git-diff-action@v2.7.0
id: git-diff-action
with:
json_diff_file_output: diff.json
raw_diff_file_output: diff.txt
file_output_only: "true"
base_branch: ${{ github.event.pull_request.base.sha }}
- name: Show diff
env:
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
run: |
cat $DIFF
- name: Summarize
env:
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
id: summarize
run: |
input="$(cat $DIFF)"
# Define the LocalAI API endpoint
API_URL="http://localhost:8080/chat/completions"
# Create a JSON payload using jq to handle special characters
json_payload=$(jq -n --arg input "$input" '{
model: "'$MODEL_NAME'",
messages: [
{
role: "system",
content: "You are LocalAI-bot in Github that helps understanding PRs and assess complexity. Explain what has changed in this PR diff and why"
},
{
role: "user",
content: $input
}
]
}')
# Send the request to LocalAI
response=$(curl -s -X POST $API_URL \
-H "Content-Type: application/json" \
-d "$json_payload")
# Extract the summary from the response
summary="$(echo $response | jq -r '.choices[0].message.content')"
# Print the summary
# -H "Authorization: Bearer $API_KEY" \
echo "Summary:"
echo "$summary"
echo "payload sent"
echo "$json_payload"
{
echo 'message<<EOF'
echo "$summary"
echo EOF
} >> "$GITHUB_OUTPUT"
docker logs --tail 10 local-ai
- uses: mshick/add-pr-comment@v2
if: always()
with:
repo-token: ${{ secrets.UPDATE_BOT_TOKEN }}
message: ${{ steps.summarize.outputs.message }}
message-failure: |
Uh oh! Could not analyze this PR, maybe it's too big?

View File

@@ -75,7 +75,7 @@ jobs:
uses: actions/checkout@v4 uses: actions/checkout@v4
- name: Cache GRPC - name: Cache GRPC
uses: docker/build-push-action@v6 uses: docker/build-push-action@v5
with: with:
builder: ${{ steps.buildx.outputs.name }} builder: ${{ steps.buildx.outputs.name }}
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache. # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
@@ -84,11 +84,11 @@ jobs:
build-args: | build-args: |
GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }} GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
GRPC_VERSION=v1.65.0 GRPC_VERSION=v1.64.0
context: . context: .
file: ./Dockerfile file: ./Dockerfile
cache-to: type=gha,ignore-error=true cache-to: type=gha,ignore-error=true
cache-from: type=gha cache-from: type=gha
target: grpc target: grpc
platforms: ${{ matrix.platforms }} platforms: ${{ matrix.platforms }}
push: false push: false

View File

@@ -15,7 +15,7 @@ jobs:
strategy: strategy:
matrix: matrix:
include: include:
- base-image: intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 - base-image: intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
platforms: 'linux/amd64' platforms: 'linux/amd64'
runs-on: ${{matrix.runs-on}} runs-on: ${{matrix.runs-on}}
@@ -46,7 +46,7 @@ jobs:
uses: actions/checkout@v4 uses: actions/checkout@v4
- name: Cache Intel images - name: Cache Intel images
uses: docker/build-push-action@v6 uses: docker/build-push-action@v5
with: with:
builder: ${{ steps.buildx.outputs.name }} builder: ${{ steps.buildx.outputs.name }}
build-args: | build-args: |

View File

@@ -32,22 +32,21 @@ jobs:
strategy: strategy:
# Pushing with all jobs in parallel # Pushing with all jobs in parallel
# eats the bandwidth of all the nodes # eats the bandwidth of all the nodes
max-parallel: ${{ github.event_name != 'pull_request' && 4 || 8 }} max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
matrix: matrix:
include: include:
# This is basically covered by the AIO test - build-type: ''
# - build-type: '' platforms: 'linux/amd64'
# platforms: 'linux/amd64' tag-latest: 'false'
# tag-latest: 'false' tag-suffix: '-ffmpeg'
# tag-suffix: '-ffmpeg' ffmpeg: 'true'
# ffmpeg: 'true' image-type: 'extras'
# image-type: 'extras' runs-on: 'arc-runner-set'
# runs-on: 'arc-runner-set' base-image: "ubuntu:22.04"
# base-image: "ubuntu:22.04" makeflags: "--jobs=3 --output-sync=target"
# makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "12" cuda-major-version: "12"
cuda-minor-version: "4" cuda-minor-version: "1"
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'false' tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg' tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -56,85 +55,76 @@ jobs:
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
# - build-type: 'hipblas' - build-type: 'hipblas'
# platforms: 'linux/amd64' platforms: 'linux/amd64'
# tag-latest: 'false' tag-latest: 'false'
# tag-suffix: '-hipblas' tag-suffix: '-hipblas'
# ffmpeg: 'false' ffmpeg: 'false'
# image-type: 'extras' image-type: 'extras'
# base-image: "rocm/dev-ubuntu-22.04:6.1" base-image: "rocm/dev-ubuntu-22.04:6.1"
# grpc-base-image: "ubuntu:22.04" grpc-base-image: "ubuntu:22.04"
# runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
# makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
# - build-type: 'sycl_f16' - build-type: 'sycl_f16'
# platforms: 'linux/amd64' platforms: 'linux/amd64'
# tag-latest: 'false' tag-latest: 'false'
# base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
# grpc-base-image: "ubuntu:22.04" grpc-base-image: "ubuntu:22.04"
# tag-suffix: 'sycl-f16-ffmpeg' tag-suffix: 'sycl-f16-ffmpeg'
# ffmpeg: 'true' ffmpeg: 'true'
# image-type: 'extras' image-type: 'extras'
# runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
# makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
# core-image-build: core-image-build:
# uses: ./.github/workflows/image_build.yml uses: ./.github/workflows/image_build.yml
# with: with:
# tag-latest: ${{ matrix.tag-latest }} tag-latest: ${{ matrix.tag-latest }}
# tag-suffix: ${{ matrix.tag-suffix }} tag-suffix: ${{ matrix.tag-suffix }}
# ffmpeg: ${{ matrix.ffmpeg }} ffmpeg: ${{ matrix.ffmpeg }}
# image-type: ${{ matrix.image-type }} image-type: ${{ matrix.image-type }}
# build-type: ${{ matrix.build-type }} build-type: ${{ matrix.build-type }}
# cuda-major-version: ${{ matrix.cuda-major-version }} cuda-major-version: ${{ matrix.cuda-major-version }}
# cuda-minor-version: ${{ matrix.cuda-minor-version }} cuda-minor-version: ${{ matrix.cuda-minor-version }}
# platforms: ${{ matrix.platforms }} platforms: ${{ matrix.platforms }}
# runs-on: ${{ matrix.runs-on }} runs-on: ${{ matrix.runs-on }}
# base-image: ${{ matrix.base-image }} base-image: ${{ matrix.base-image }}
# grpc-base-image: ${{ matrix.grpc-base-image }} grpc-base-image: ${{ matrix.grpc-base-image }}
# makeflags: ${{ matrix.makeflags }} makeflags: ${{ matrix.makeflags }}
# secrets: secrets:
# dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
# dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
# quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
# quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
# strategy: strategy:
# matrix: matrix:
# include: include:
# - build-type: '' - build-type: ''
# platforms: 'linux/amd64' platforms: 'linux/amd64'
# tag-latest: 'false' tag-latest: 'false'
# tag-suffix: '-ffmpeg-core' tag-suffix: '-ffmpeg-core'
# ffmpeg: 'true' ffmpeg: 'true'
# image-type: 'core' image-type: 'core'
# runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
# base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
# makeflags: "--jobs=4 --output-sync=target" makeflags: "--jobs=4 --output-sync=target"
# - build-type: 'sycl_f16' - build-type: 'sycl_f16'
# platforms: 'linux/amd64' platforms: 'linux/amd64'
# tag-latest: 'false' tag-latest: 'false'
# base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
# grpc-base-image: "ubuntu:22.04" grpc-base-image: "ubuntu:22.04"
# tag-suffix: 'sycl-f16-ffmpeg-core' tag-suffix: 'sycl-f16-ffmpeg-core'
# ffmpeg: 'true' ffmpeg: 'true'
# image-type: 'core' image-type: 'core'
# runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
# makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
# - build-type: 'cublas' - build-type: 'cublas'
# cuda-major-version: "12" cuda-major-version: "12"
# cuda-minor-version: "4" cuda-minor-version: "1"
# platforms: 'linux/amd64' platforms: 'linux/amd64'
# tag-latest: 'false' tag-latest: 'false'
# tag-suffix: '-cublas-cuda12-ffmpeg-core' tag-suffix: '-cublas-cuda12-ffmpeg-core'
# ffmpeg: 'true' ffmpeg: 'true'
# image-type: 'core' image-type: 'core'
# runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
# base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
# makeflags: "--jobs=4 --output-sync=target" makeflags: "--jobs=4 --output-sync=target"
# - build-type: 'vulkan'
# platforms: 'linux/amd64'
# tag-latest: 'false'
# tag-suffix: '-vulkan-ffmpeg-core'
# ffmpeg: 'true'
# image-type: 'core'
# runs-on: 'ubuntu-latest'
# base-image: "ubuntu:22.04"
# makeflags: "--jobs=4 --output-sync=target"

View File

@@ -39,7 +39,7 @@ jobs:
strategy: strategy:
# Pushing with all jobs in parallel # Pushing with all jobs in parallel
# eats the bandwidth of all the nodes # eats the bandwidth of all the nodes
max-parallel: ${{ github.event_name != 'pull_request' && 6 || 10 }} max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
matrix: matrix:
include: include:
# Extra images # Extra images
@@ -75,7 +75,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "12" cuda-major-version: "12"
cuda-minor-version: "4" cuda-minor-version: "1"
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'false' tag-latest: 'false'
tag-suffix: '-cublas-cuda12' tag-suffix: '-cublas-cuda12'
@@ -100,7 +100,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "12" cuda-major-version: "12"
cuda-minor-version: "4" cuda-minor-version: "1"
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-cublas-cuda12-ffmpeg' tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -232,7 +232,7 @@ jobs:
grpc-base-image: "ubuntu:22.04" grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
core-image-build: core-image-build:
uses: ./.github/workflows/image_build.yml uses: ./.github/workflows/image_build.yml
with: with:
@@ -257,7 +257,6 @@ jobs:
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy: strategy:
max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
matrix: matrix:
include: include:
- build-type: '' - build-type: ''
@@ -267,7 +266,7 @@ jobs:
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'core' image-type: 'core'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set' runs-on: 'ubuntu-latest'
aio: "-aio-cpu" aio: "-aio-cpu"
latest-image: 'latest-cpu' latest-image: 'latest-cpu'
latest-image-aio: 'latest-aio-cpu' latest-image-aio: 'latest-aio-cpu'
@@ -281,18 +280,18 @@ jobs:
ffmpeg: '' ffmpeg: ''
image-type: 'core' image-type: 'core'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set' runs-on: 'ubuntu-latest'
makeflags: "--jobs=4 --output-sync=target" makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "12" cuda-major-version: "12"
cuda-minor-version: "4" cuda-minor-version: "1"
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'false' tag-latest: 'false'
tag-suffix: '-cublas-cuda12-core' tag-suffix: '-cublas-cuda12-core'
ffmpeg: '' ffmpeg: ''
image-type: 'core' image-type: 'core'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set' runs-on: 'ubuntu-latest'
makeflags: "--jobs=4 --output-sync=target" makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "11" cuda-major-version: "11"
@@ -302,27 +301,17 @@ jobs:
tag-suffix: '-cublas-cuda11-ffmpeg-core' tag-suffix: '-cublas-cuda11-ffmpeg-core'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'core' image-type: 'core'
runs-on: 'arc-runner-set' runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target" makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "12" cuda-major-version: "12"
cuda-minor-version: "4" cuda-minor-version: "1"
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'false' tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg-core' tag-suffix: '-cublas-cuda12-ffmpeg-core'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'core' image-type: 'core'
runs-on: 'arc-runner-set' runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'vulkan'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-vulkan-ffmpeg-core'
latest-image: 'latest-vulkan-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target" makeflags: "--jobs=4 --output-sync=target"

View File

@@ -19,11 +19,11 @@ on:
type: string type: string
cuda-major-version: cuda-major-version:
description: 'CUDA major version' description: 'CUDA major version'
default: "12" default: "11"
type: string type: string
cuda-minor-version: cuda-minor-version:
description: 'CUDA minor version' description: 'CUDA minor version'
default: "4" default: "7"
type: string type: string
platforms: platforms:
description: 'Platforms' description: 'Platforms'
@@ -215,7 +215,7 @@ jobs:
password: ${{ secrets.quayPassword }} password: ${{ secrets.quayPassword }}
- name: Build and push - name: Build and push
uses: docker/build-push-action@v6 uses: docker/build-push-action@v5
if: github.event_name != 'pull_request' if: github.event_name != 'pull_request'
with: with:
builder: ${{ steps.buildx.outputs.name }} builder: ${{ steps.buildx.outputs.name }}
@@ -232,7 +232,7 @@ jobs:
BASE_IMAGE=${{ inputs.base-image }} BASE_IMAGE=${{ inputs.base-image }}
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }} GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
GRPC_VERSION=v1.65.0 GRPC_VERSION=v1.64.0
MAKEFLAGS=${{ inputs.makeflags }} MAKEFLAGS=${{ inputs.makeflags }}
context: . context: .
file: ./Dockerfile file: ./Dockerfile
@@ -243,7 +243,7 @@ jobs:
labels: ${{ steps.meta.outputs.labels }} labels: ${{ steps.meta.outputs.labels }}
### Start testing image ### Start testing image
- name: Build and push - name: Build and push
uses: docker/build-push-action@v6 uses: docker/build-push-action@v5
if: github.event_name == 'pull_request' if: github.event_name == 'pull_request'
with: with:
builder: ${{ steps.buildx.outputs.name }} builder: ${{ steps.buildx.outputs.name }}
@@ -260,7 +260,7 @@ jobs:
BASE_IMAGE=${{ inputs.base-image }} BASE_IMAGE=${{ inputs.base-image }}
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }} GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
GRPC_VERSION=v1.65.0 GRPC_VERSION=v1.64.0
MAKEFLAGS=${{ inputs.makeflags }} MAKEFLAGS=${{ inputs.makeflags }}
context: . context: .
file: ./Dockerfile file: ./Dockerfile
@@ -276,7 +276,7 @@ jobs:
## End testing image ## End testing image
- name: Build and push AIO image - name: Build and push AIO image
if: inputs.aio != '' if: inputs.aio != ''
uses: docker/build-push-action@v6 uses: docker/build-push-action@v5
with: with:
builder: ${{ steps.buildx.outputs.name }} builder: ${{ steps.buildx.outputs.name }}
build-args: | build-args: |
@@ -291,7 +291,7 @@ jobs:
- name: Build and push AIO image (dockerhub) - name: Build and push AIO image (dockerhub)
if: inputs.aio != '' if: inputs.aio != ''
uses: docker/build-push-action@v6 uses: docker/build-push-action@v5
with: with:
builder: ${{ steps.buildx.outputs.name }} builder: ${{ steps.buildx.outputs.name }}
build-args: | build-args: |
@@ -324,7 +324,7 @@ jobs:
docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }}
docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }} docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }} docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
- name: job summary - name: job summary
run: | run: |
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY

View File

@@ -1,168 +0,0 @@
name: Notifications for new models
on:
pull_request:
types:
- closed
jobs:
notify-discord:
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
env:
MODEL_NAME: hermes-2-theta-llama-3-8b
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # needed to checkout all branches for this Action to work
- uses: mudler/localai-github-action@v1
with:
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
# Check the PR diff using the current branch and the base branch of the PR
- uses: GrantBirki/git-diff-action@v2.7.0
id: git-diff-action
with:
json_diff_file_output: diff.json
raw_diff_file_output: diff.txt
file_output_only: "true"
- name: Summarize
env:
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
id: summarize
run: |
input="$(cat $DIFF)"
# Define the LocalAI API endpoint
API_URL="http://localhost:8080/chat/completions"
# Create a JSON payload using jq to handle special characters
json_payload=$(jq -n --arg input "$input" '{
model: "'$MODEL_NAME'",
messages: [
{
role: "system",
content: "You are LocalAI-bot. Write a discord message to notify everyone about the new model from the git diff. Make it informal. An example can include: the URL of the model, the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI and that can be browsed over https://models.localai.io. For example: local-ai run model_name_here"
},
{
role: "user",
content: $input
}
]
}')
# Send the request to LocalAI
response=$(curl -s -X POST $API_URL \
-H "Content-Type: application/json" \
-d "$json_payload")
# Extract the summary from the response
summary="$(echo $response | jq -r '.choices[0].message.content')"
# Print the summary
# -H "Authorization: Bearer $API_KEY" \
echo "Summary:"
echo "$summary"
echo "payload sent"
echo "$json_payload"
{
echo 'message<<EOF'
echo "$summary"
echo EOF
} >> "$GITHUB_OUTPUT"
docker logs --tail 10 local-ai
- name: Discord notification
env:
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL }}
DISCORD_USERNAME: "LocalAI-Bot"
DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
uses: Ilshidur/action-discord@master
with:
args: ${{ steps.summarize.outputs.message }}
- name: Setup tmate session if fails
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
notify-twitter:
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
env:
MODEL_NAME: hermes-2-theta-llama-3-8b
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # needed to checkout all branches for this Action to work
- name: Start LocalAI
run: |
echo "Starting LocalAI..."
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
# Check the PR diff using the current branch and the base branch of the PR
- uses: GrantBirki/git-diff-action@v2.7.0
id: git-diff-action
with:
json_diff_file_output: diff.json
raw_diff_file_output: diff.txt
file_output_only: "true"
- name: Summarize
env:
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
id: summarize
run: |
input="$(cat $DIFF)"
# Define the LocalAI API endpoint
API_URL="http://localhost:8080/chat/completions"
# Create a JSON payload using jq to handle special characters
json_payload=$(jq -n --arg input "$input" '{
model: "'$MODEL_NAME'",
messages: [
{
role: "system",
content: "You are LocalAI-bot. Write a twitter message to notify everyone about the new model from the git diff. Make it informal and really short. An example can include: the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI. For example: local-ai run model_name_here"
},
{
role: "user",
content: $input
}
]
}')
# Send the request to LocalAI
response=$(curl -s -X POST $API_URL \
-H "Content-Type: application/json" \
-d "$json_payload")
# Extract the summary from the response
summary="$(echo $response | jq -r '.choices[0].message.content')"
# Print the summary
# -H "Authorization: Bearer $API_KEY" \
echo "Summary:"
echo "$summary"
echo "payload sent"
echo "$json_payload"
{
echo 'message<<EOF'
echo "$summary"
echo EOF
} >> "$GITHUB_OUTPUT"
docker logs --tail 10 local-ai
- uses: Eomm/why-don-t-you-tweet@v2
with:
tweet-message: ${{ steps.summarize.outputs.message }}
env:
# Get your tokens from https://developer.twitter.com/apps
TWITTER_CONSUMER_API_KEY: ${{ secrets.TWITTER_APP_KEY }}
TWITTER_CONSUMER_API_SECRET: ${{ secrets.TWITTER_APP_SECRET }}
TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }}
TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
- name: Setup tmate session if fails
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true

View File

@@ -1,63 +0,0 @@
name: Release notifications
on:
release:
types:
- published
jobs:
notify-discord:
runs-on: ubuntu-latest
env:
RELEASE_BODY: ${{ github.event.release.body }}
RELEASE_TITLE: ${{ github.event.release.name }}
RELEASE_TAG_NAME: ${{ github.event.release.tag_name }}
steps:
- uses: mudler/localai-github-action@v1
with:
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
- name: Summarize
id: summarize
run: |
input="$RELEASE_TITLE\b$RELEASE_BODY"
# Define the LocalAI API endpoint
API_URL="http://localhost:8080/chat/completions"
# Create a JSON payload using jq to handle special characters
json_payload=$(jq -n --arg input "$input" '{
model: "'$MODEL_NAME'",
messages: [
{
role: "system",
content: "Write a discord message with a bullet point summary of the release notes."
},
{
role: "user",
content: $input
}
]
}')
# Send the request to LocalAI API
response=$(curl -s -X POST $API_URL \
-H "Content-Type: application/json" \
-d "$json_payload")
# Extract the summary from the response
summary=$(echo $response | jq -r '.choices[0].message.content')
# Print the summary
# -H "Authorization: Bearer $API_KEY" \
{
echo 'message<<EOF'
echo "$summary"
echo EOF
} >> "$GITHUB_OUTPUT"
- name: Discord notification
env:
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL_RELEASE }}
DISCORD_USERNAME: "LocalAI-Bot"
DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
uses: Ilshidur/action-discord@master
with:
args: ${{ steps.summarize.outputs.message }}

View File

@@ -1,28 +0,0 @@
name: Check PR style
on:
pull_request_target:
types:
- opened
- reopened
- edited
- synchronize
jobs:
title-lint:
runs-on: ubuntu-latest
permissions:
statuses: write
steps:
- uses: aslafy-z/conventional-pr-title-action@v3
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# check-pr-description:
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v2
# - uses: jadrol/pr-description-checker-action@v1.0.0
# id: description-checker
# with:
# repo-token: ${{ secrets.GITHUB_TOKEN }}
# exempt-labels: no qa

View File

@@ -1,15 +1,11 @@
name: Build and Release name: Build and Release
on: on:
push: - push
branches: - pull_request
- master
tags:
- 'v*'
pull_request:
env: env:
GRPC_VERSION: v1.65.0 GRPC_VERSION: v1.64.0
permissions: permissions:
contents: write contents: write
@@ -35,8 +31,8 @@ jobs:
- name: Dependencies - name: Dependencies
run: | run: |
sudo apt-get update sudo apt-get update
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache gawk sudo apt-get install build-essential ffmpeg protobuf-compiler ccache
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
- name: Install CUDA Dependencies - name: Install CUDA Dependencies
run: | run: |
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb
@@ -56,8 +52,7 @@ jobs:
run: | run: |
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \ cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
cd cmake/build && cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \ -DgRPC_BUILD_TESTS=OFF \
../.. && sudo make --jobs 5 --output-sync=target ../.. && sudo make --jobs 5 --output-sync=target
- name: Install gRPC - name: Install gRPC
@@ -101,17 +96,11 @@ jobs:
CROSS_TOOLCHAIN=/usr/$GNU_HOST CROSS_TOOLCHAIN=/usr/$GNU_HOST
CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
export PATH=$PATH:$GOPATH/bin export PATH=$PATH:$GOPATH/bin
export PATH=/usr/local/cuda/bin:$PATH export PATH=/usr/local/cuda/bin:$PATH
sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6 GO_TAGS=p2p GOOS=linux GOARCH=arm64 CMAKE_ARGS="-DProtobuf_INCLUDE_DIRS=$CROSS_STAGING_PREFIX/include -DProtobuf_DIR=$CROSS_STAGING_PREFIX/lib/cmake/protobuf -DgRPC_DIR=$CROSS_STAGING_PREFIX/lib/cmake/grpc -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++" make dist-cross-linux-arm64
sudo cp -rf /usr/aarch64-linux-gnu/lib/libstdc++.so* /usr/aarch64-linux-gnu/lib/libstdc++.so.6
sudo cp /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 ld.so
BACKEND_LIBS="./grpc/cmake/cross_build/third_party/re2/libre2.a ./grpc/cmake/cross_build/libgrpc.a ./grpc/cmake/cross_build/libgrpc++.a ./grpc/cmake/cross_build/third_party/protobuf/libprotobuf.a /usr/aarch64-linux-gnu/lib/libc.so.6 /usr/aarch64-linux-gnu/lib/libstdc++.so.6 /usr/aarch64-linux-gnu/lib/libgomp.so.1 /usr/aarch64-linux-gnu/lib/libm.so.6 /usr/aarch64-linux-gnu/lib/libgcc_s.so.1 /usr/aarch64-linux-gnu/lib/libdl.so.2 /usr/aarch64-linux-gnu/lib/libpthread.so.0 ./ld.so" \
GOOS=linux \
GOARCH=arm64 \
CMAKE_ARGS="-DProtobuf_INCLUDE_DIRS=$CROSS_STAGING_PREFIX/include -DProtobuf_DIR=$CROSS_STAGING_PREFIX/lib/cmake/protobuf -DgRPC_DIR=$CROSS_STAGING_PREFIX/lib/cmake/grpc -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++" make dist-cross-linux-arm64
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v4
with: with:
name: LocalAI-linux-arm64 name: LocalAI-linux-arm64
@@ -122,13 +111,7 @@ jobs:
with: with:
files: | files: |
release/* release/*
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
build-linux: build-linux:
runs-on: arc-runner-set runs-on: arc-runner-set
steps: steps:
@@ -151,7 +134,7 @@ jobs:
- name: Dependencies - name: Dependencies
run: | run: |
sudo apt-get update sudo apt-get update
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache gawk cmake libgmock-dev sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache cmake
- name: Intel Dependencies - name: Intel Dependencies
run: | run: |
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
@@ -165,21 +148,21 @@ jobs:
sudo apt-get update sudo apt-get update
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION} sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
env: env:
CUDA_VERSION: 12-5 CUDA_VERSION: 12-3
- name: "Install Hipblas" - name: "Install Hipblas"
env: env:
ROCM_VERSION: "6.1" ROCM_VERSION: "6.1"
AMDGPU_VERSION: "6.1" AMDGPU_VERSION: "6.1"
run: | run: |
set -ex set -ex
sudo apt-get update sudo apt-get update
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg
curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add - curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add -
printf "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$ROCM_VERSION/ jammy main" | sudo tee /etc/apt/sources.list.d/rocm.list printf "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$ROCM_VERSION/ jammy main" | sudo tee /etc/apt/sources.list.d/rocm.list
printf "deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list printf "deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600 printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
sudo apt-get update sudo apt-get update
@@ -187,10 +170,10 @@ jobs:
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y \ sudo DEBIAN_FRONTEND=noninteractive apt-get install -y \
hipblas-dev rocm-dev \ hipblas-dev rocm-dev \
rocblas-dev rocblas-dev
sudo apt-get clean sudo apt-get clean
sudo rm -rf /var/lib/apt/lists/* sudo rm -rf /var/lib/apt/lists/*
sudo ldconfig sudo ldconfig
- name: Cache grpc - name: Cache grpc
id: cache-grpc id: cache-grpc
uses: actions/cache@v4 uses: actions/cache@v4
@@ -201,26 +184,22 @@ jobs:
if: steps.cache-grpc.outputs.cache-hit != 'true' if: steps.cache-grpc.outputs.cache-hit != 'true'
run: | run: |
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \ cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
cd cmake/build && cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \ -DgRPC_BUILD_TESTS=OFF \
../.. && sudo make --jobs 5 --output-sync=target ../.. && sudo make --jobs 5 --output-sync=target
- name: Install gRPC - name: Install gRPC
run: | run: |
cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
# BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so
- name: Build - name: Build
id: build id: build
run: | run: |
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
export PATH=$PATH:$GOPATH/bin export PATH=$PATH:$GOPATH/bin
export PATH=/usr/local/cuda/bin:$PATH export PATH=/usr/local/cuda/bin:$PATH
export PATH=/opt/rocm/bin:$PATH export PATH=/opt/rocm/bin:$PATH
source /opt/intel/oneapi/setvars.sh source /opt/intel/oneapi/setvars.sh
sudo cp /lib64/ld-linux-x86-64.so.2 ld.so GO_TAGS=p2p make -j4 dist
BACKEND_LIBS="./ld.so ./sources/go-piper/piper/build/fi/lib/libfmt.a ./sources/go-piper/piper-phonemize/pi/lib/libonnxruntime.so.1.14.1 ./sources/go-piper/piper-phonemize/pi/src/libespeak-ng/libespeak-ng.so /usr/lib/x86_64-linux-gnu/libdl.so.2 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/lib/x86_64-linux-gnu/libpthread.so.0 ./sources/go-piper/piper-phonemize/pi/lib/libpiper_phonemize.so.1 ./sources/go-piper/piper/build/si/lib/libspdlog.a ./sources/go-piper/espeak/ei/lib/libucd.so" \
make -j4 dist
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v4
with: with:
name: LocalAI-linux name: LocalAI-linux
@@ -231,13 +210,7 @@ jobs:
with: with:
files: | files: |
release/* release/*
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
build-stablediffusion: build-stablediffusion:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
@@ -253,8 +226,8 @@ jobs:
run: | run: |
sudo apt-get update sudo apt-get update
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
- name: Build stablediffusion - name: Build stablediffusion
run: | run: |
export PATH=$PATH:$GOPATH/bin export PATH=$PATH:$GOPATH/bin
@@ -273,48 +246,6 @@ jobs:
files: | files: |
release/* release/*
build-macOS-x86_64:
runs-on: macos-13
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-go@v5
with:
go-version: '1.21.x'
cache: false
- name: Dependencies
run: |
brew install protobuf grpc
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
- name: Build
id: build
run: |
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
export PATH=$PATH:$GOPATH/bin
make dist
- uses: actions/upload-artifact@v4
with:
name: LocalAI-MacOS-x86_64
path: release/
- name: Release
uses: softprops/action-gh-release@v2
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
release/*
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
build-macOS-arm64: build-macOS-arm64:
runs-on: macos-14 runs-on: macos-14
steps: steps:
@@ -326,19 +257,22 @@ jobs:
with: with:
go-version: '1.21.x' go-version: '1.21.x'
cache: false cache: false
- name: Setup tmate session if tests fail
uses: mxschmitt/action-tmate@v3.18
with:
limit-access-to-actor: true
- name: Dependencies - name: Dependencies
run: | run: |
brew install protobuf grpc brew install protobuf grpc
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
- name: Build - name: Build
id: build id: build
run: | run: |
export C_INCLUDE_PATH=/usr/local/include export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include export CPLUS_INCLUDE_PATH=/usr/local/include
export PATH=$PATH:$GOPATH/bin export PATH=$PATH:$GOPATH/bin
GO_TAGS=p2p make dist
make dist
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v4
with: with:
name: LocalAI-MacOS-arm64 name: LocalAI-MacOS-arm64
@@ -349,10 +283,3 @@ jobs:
with: with:
files: | files: |
release/* release/*
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true

View File

@@ -19,7 +19,7 @@ jobs:
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@@ -29,8 +29,8 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1 pip install --user grpcio-tools==1.64.0
- name: Test transformers - name: Test transformers
run: | run: |
make --jobs=5 --output-sync=target -C backend/python/transformers make --jobs=5 --output-sync=target -C backend/python/transformers
@@ -41,7 +41,7 @@ jobs:
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@@ -51,8 +51,8 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1 pip install --user grpcio-tools==1.64.0
- name: Test sentencetransformers - name: Test sentencetransformers
run: | run: |
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
@@ -64,7 +64,7 @@ jobs:
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@@ -74,7 +74,7 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1 pip install --user grpcio-tools==1.64.0
- name: Test rerankers - name: Test rerankers
run: | run: |
@@ -86,7 +86,7 @@ jobs:
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@@ -96,7 +96,7 @@ jobs:
sudo apt-get install -y libopencv-dev sudo apt-get install -y libopencv-dev
# Install UV # Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
pip install --user --no-cache-dir grpcio-tools==1.64.1 pip install --user grpcio-tools==1.64.0
- name: Test diffusers - name: Test diffusers
run: | run: |
make --jobs=5 --output-sync=target -C backend/python/diffusers make --jobs=5 --output-sync=target -C backend/python/diffusers
@@ -107,7 +107,7 @@ jobs:
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@@ -117,19 +117,19 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1 pip install --user grpcio-tools==1.64.0
- name: Test parler-tts - name: Test parler-tts
run: | run: |
make --jobs=5 --output-sync=target -C backend/python/parler-tts make --jobs=5 --output-sync=target -C backend/python/parler-tts
make --jobs=5 --output-sync=target -C backend/python/parler-tts test make --jobs=5 --output-sync=target -C backend/python/parler-tts test
tests-openvoice: tests-openvoice:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@@ -139,7 +139,7 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1 pip install --user grpcio-tools==1.64.0
- name: Test openvoice - name: Test openvoice
run: | run: |
@@ -151,7 +151,7 @@ jobs:
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@@ -161,7 +161,7 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1 pip install --user grpcio-tools==1.64.0
- name: Test transformers-musicgen - name: Test transformers-musicgen
run: | run: |
@@ -175,7 +175,7 @@ jobs:
# steps: # steps:
# - name: Clone # - name: Clone
# uses: actions/checkout@v4 # uses: actions/checkout@v4
# with: # with:
# submodules: true # submodules: true
# - name: Dependencies # - name: Dependencies
# run: | # run: |
@@ -185,14 +185,14 @@ jobs:
# curl -LsSf https://astral.sh/uv/install.sh | sh # curl -LsSf https://astral.sh/uv/install.sh | sh
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip # sudo apt-get install -y ca-certificates cmake curl patch python3-pip
# sudo apt-get install -y libopencv-dev # sudo apt-get install -y libopencv-dev
# pip install --user --no-cache-dir grpcio-tools==1.64.1 # pip install --user grpcio-tools==1.64.0
# - name: Test petals # - name: Test petals
# run: | # run: |
# make --jobs=5 --output-sync=target -C backend/python/petals # make --jobs=5 --output-sync=target -C backend/python/petals
# make --jobs=5 --output-sync=target -C backend/python/petals test # make --jobs=5 --output-sync=target -C backend/python/petals test
# tests-bark: # tests-bark:
# runs-on: ubuntu-latest # runs-on: ubuntu-latest
@@ -239,7 +239,7 @@ jobs:
# df -h # df -h
# - name: Clone # - name: Clone
# uses: actions/checkout@v4 # uses: actions/checkout@v4
# with: # with:
# submodules: true # submodules: true
# - name: Dependencies # - name: Dependencies
# run: | # run: |
@@ -249,14 +249,14 @@ jobs:
# curl -LsSf https://astral.sh/uv/install.sh | sh # curl -LsSf https://astral.sh/uv/install.sh | sh
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip # sudo apt-get install -y ca-certificates cmake curl patch python3-pip
# sudo apt-get install -y libopencv-dev # sudo apt-get install -y libopencv-dev
# pip install --user --no-cache-dir grpcio-tools==1.64.1 # pip install --user grpcio-tools==1.64.0
# - name: Test bark # - name: Test bark
# run: | # run: |
# make --jobs=5 --output-sync=target -C backend/python/bark # make --jobs=5 --output-sync=target -C backend/python/bark
# make --jobs=5 --output-sync=target -C backend/python/bark test # make --jobs=5 --output-sync=target -C backend/python/bark test
# Below tests needs GPU. Commented out for now # Below tests needs GPU. Commented out for now
# TODO: Re-enable as soon as we have GPU nodes # TODO: Re-enable as soon as we have GPU nodes
# tests-vllm: # tests-vllm:
@@ -264,7 +264,7 @@ jobs:
# steps: # steps:
# - name: Clone # - name: Clone
# uses: actions/checkout@v4 # uses: actions/checkout@v4
# with: # with:
# submodules: true # submodules: true
# - name: Dependencies # - name: Dependencies
# run: | # run: |
@@ -274,7 +274,7 @@ jobs:
# curl -LsSf https://astral.sh/uv/install.sh | sh # curl -LsSf https://astral.sh/uv/install.sh | sh
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip # sudo apt-get install -y ca-certificates cmake curl patch python3-pip
# sudo apt-get install -y libopencv-dev # sudo apt-get install -y libopencv-dev
# pip install --user --no-cache-dir grpcio-tools==1.64.1 # pip install --user grpcio-tools==1.64.0
# - name: Test vllm # - name: Test vllm
# run: | # run: |
# make --jobs=5 --output-sync=target -C backend/python/vllm # make --jobs=5 --output-sync=target -C backend/python/vllm
@@ -284,7 +284,7 @@ jobs:
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@@ -294,7 +294,7 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1 pip install --user grpcio-tools==1.64.0
- name: Test vall-e-x - name: Test vall-e-x
run: | run: |
make --jobs=5 --output-sync=target -C backend/python/vall-e-x make --jobs=5 --output-sync=target -C backend/python/vall-e-x
@@ -305,7 +305,7 @@ jobs:
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@@ -314,8 +314,8 @@ jobs:
sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip
# Install UV # Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
pip install --user --no-cache-dir grpcio-tools==1.64.1 pip install --user grpcio-tools==1.64.0
- name: Test coqui - name: Test coqui
run: | run: |
make --jobs=5 --output-sync=target -C backend/python/coqui make --jobs=5 --output-sync=target -C backend/python/coqui
make --jobs=5 --output-sync=target -C backend/python/coqui test make --jobs=5 --output-sync=target -C backend/python/coqui test

View File

@@ -10,7 +10,7 @@ on:
- '*' - '*'
env: env:
GRPC_VERSION: v1.65.0 GRPC_VERSION: v1.64.0
concurrency: concurrency:
group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }} group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
@@ -71,7 +71,6 @@ jobs:
run: | run: |
sudo apt-get update sudo apt-get update
sudo apt-get install build-essential curl ffmpeg sudo apt-get install build-essential curl ffmpeg
sudo apt-get install -y libgmock-dev
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \ sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \ gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
@@ -94,8 +93,8 @@ jobs:
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION} sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
export CUDACXX=/usr/local/cuda/bin/nvcc export CUDACXX=/usr/local/cuda/bin/nvcc
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
# The python3-grpc-tools package in 22.04 is too old # The python3-grpc-tools package in 22.04 is too old
pip install --user grpcio-tools pip install --user grpcio-tools
@@ -110,7 +109,7 @@ jobs:
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn) # Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
env: env:
CUDA_VERSION: 12-4 CUDA_VERSION: 12-3
- name: Cache grpc - name: Cache grpc
id: cache-grpc id: cache-grpc
uses: actions/cache@v4 uses: actions/cache@v4
@@ -121,8 +120,7 @@ jobs:
if: steps.cache-grpc.outputs.cache-hit != 'true' if: steps.cache-grpc.outputs.cache-hit != 'true'
run: | run: |
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --jobs 5 --shallow-submodules https://github.com/grpc/grpc && \ git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --jobs 5 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && cd cmake/build && \ cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \ -DgRPC_BUILD_TESTS=OFF \
../.. && sudo make --jobs 5 ../.. && sudo make --jobs 5
- name: Install gRPC - name: Install gRPC
@@ -215,14 +213,14 @@ jobs:
- name: Dependencies - name: Dependencies
run: | run: |
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc
pip install --user --no-cache-dir grpcio-tools==1.64.1 pip install --user grpcio-tools==1.64.0
- name: Test - name: Test
run: | run: |
export C_INCLUDE_PATH=/usr/local/include export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include export CPLUS_INCLUDE_PATH=/usr/local/include
# Used to run the newer GNUMake version from brew that supports --output-sync # Used to run the newer GNUMake version from brew that supports --output-sync
export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH" export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test
- name: Setup tmate session if tests fail - name: Setup tmate session if tests fail
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18 uses: mxschmitt/action-tmate@v3.18

View File

@@ -13,17 +13,11 @@ jobs:
- uses: actions/setup-go@v5 - uses: actions/setup-go@v5
with: with:
go-version: 'stable' go-version: 'stable'
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install protobuf-compiler
- run: | - run: |
go install github.com/swaggo/swag/cmd/swag@latest go install github.com/swaggo/swag/cmd/swag@latest
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
- name: Bump swagger 🔧 - name: Bump swagger 🔧
run: | run: |
make protogen-go swagger make swagger
- name: Create Pull Request - name: Create Pull Request
uses: peter-evans/create-pull-request@v6 uses: peter-evans/create-pull-request@v6
with: with:

View File

@@ -8,7 +8,7 @@ FROM ${BASE_IMAGE} AS requirements-core
USER root USER root
ARG GO_VERSION=1.22.5 ARG GO_VERSION=1.22.4
ARG TARGETARCH ARG TARGETARCH
ARG TARGETVARIANT ARG TARGETVARIANT
@@ -33,7 +33,7 @@ RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | ta
ENV PATH $PATH:/root/go/bin:/usr/local/go/bin ENV PATH $PATH:/root/go/bin:/usr/local/go/bin
# Install grpc compilers # Install grpc compilers
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \ RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.1 && \
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/ COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
@@ -98,39 +98,43 @@ RUN pip install --user grpcio-tools
FROM requirements-${IMAGE_TYPE} AS requirements-drivers FROM requirements-${IMAGE_TYPE} AS requirements-drivers
ARG BUILD_TYPE ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION=12 ARG CUDA_MAJOR_VERSION=11
ARG CUDA_MINOR_VERSION=4 ARG CUDA_MINOR_VERSION=8
ENV BUILD_TYPE=${BUILD_TYPE} ENV BUILD_TYPE=${BUILD_TYPE}
# Vulkan requirements
RUN <<EOT bash
if [ "${BUILD_TYPE}" = "vulkan" ]; then
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils wget gpg-agent && \
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
apt-get update && \
apt-get install -y \
vulkan-sdk && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
# CuBLAS requirements # CuBLAS requirements
RUN <<EOT bash RUN <<EOT bash
if [ "${BUILD_TYPE}" = "cublas" ]; then if [ "${BUILD_TYPE}" = "cublas" ]; then
apt-get update && \ apt-get update && \
apt-get install -y --no-install-recommends \ apt-get install -y --no-install-recommends \
software-properties-common pciutils software-properties-common pciutils
if [ "amd64" = "$TARGETARCH" ]; then if [ "amd64" = "$TARGETARCH" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
fi fi
if [ "arm64" = "$TARGETARCH" ]; then if [ "arm64" = "$TARGETARCH" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
fi fi
dpkg -i cuda-keyring_1.1-1_all.deb && \
rm -f cuda-keyring_1.1-1_all.deb && \
apt-get update && \
apt-get install -y --no-install-recommends \
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils && \
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
dpkg -i cuda-keyring_1.1-1_all.deb && \ dpkg -i cuda-keyring_1.1-1_all.deb && \
rm -f cuda-keyring_1.1-1_all.deb && \ rm -f cuda-keyring_1.1-1_all.deb && \
apt-get update && \ apt-get update && \
@@ -142,9 +146,8 @@ RUN <<EOT bash
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \ libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
apt-get clean && \ apt-get clean && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/* \
fi ; fi
EOT
# If we are building with clblas support, we need the libraries for the builds # If we are building with clblas support, we need the libraries for the builds
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \ RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
@@ -187,7 +190,7 @@ FROM ${GRPC_BASE_IMAGE} AS grpc
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI # This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
ARG GRPC_MAKEFLAGS="-j4 -Otarget" ARG GRPC_MAKEFLAGS="-j4 -Otarget"
ARG GRPC_VERSION=v1.65.0 ARG GRPC_VERSION=v1.64.2
ENV MAKEFLAGS=${GRPC_MAKEFLAGS} ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
@@ -208,7 +211,6 @@ RUN apt-get update && \
RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
mkdir -p /build/grpc/cmake/build && \ mkdir -p /build/grpc/cmake/build && \
cd /build/grpc/cmake/build && \ cd /build/grpc/cmake/build && \
sed -i "216i\ TESTONLY" "../../third_party/abseil-cpp/absl/container/CMakeLists.txt" && \
cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \ cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
make && \ make && \
make install && \ make install && \
@@ -264,8 +266,6 @@ COPY --from=grpc /opt/grpc /usr/local
# Rebuild with defaults backends # Rebuild with defaults backends
WORKDIR /build WORKDIR /build
## Build the binary
RUN make build RUN make build
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
@@ -292,7 +292,7 @@ ENV REBUILD=false
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
ENV MAKEFLAGS=${MAKEFLAGS} ENV MAKEFLAGS=${MAKEFLAGS}
ARG CUDA_MAJOR_VERSION=12 ARG CUDA_MAJOR_VERSION=11
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0" ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
ENV NVIDIA_VISIBLE_DEVICES=all ENV NVIDIA_VISIBLE_DEVICES=all

214
Makefile
View File

@@ -3,12 +3,9 @@ GOTEST=$(GOCMD) test
GOVET=$(GOCMD) vet GOVET=$(GOCMD) vet
BINARY_NAME=local-ai BINARY_NAME=local-ai
DETECT_LIBS?=true
# llama.cpp versions # llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be CPPLLAMA_VERSION?=963552903f51043ee947a8deeaaa7ec00bc3f1a4
CPPLLAMA_VERSION?=45f2c19cc57286eead7b232ce8028273a817aa4d
# gpt4all version # gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -19,33 +16,26 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version # whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp WHISPER_CPP_VERSION?=420b6abc54008ab634f5887dc45bd77122c2f320
WHISPER_CPP_VERSION?=f68298ce06ca3edd6e6f3f21c3d0bb5f073942c3
# bert.cpp version # bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4 BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
# go-piper version # go-piper version
PIPER_REPO?=https://github.com/mudler/go-piper
PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759 PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759
# stablediffusion version # stablediffusion version
STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
# tinydream version # tinydream version
TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream
TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057 TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
export BUILD_TYPE?= export BUILD_TYPE?=
export STABLE_BUILD_TYPE?=$(BUILD_TYPE) export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
export CMAKE_ARGS?= export CMAKE_ARGS?=
export BACKEND_LIBS?=
CGO_LDFLAGS?= CGO_LDFLAGS?=
CGO_LDFLAGS_WHISPER?= CGO_LDFLAGS_WHISPER?=
CGO_LDFLAGS_WHISPER+=-lggml
CUDA_LIBPATH?=/usr/local/cuda/lib64/ CUDA_LIBPATH?=/usr/local/cuda/lib64/
GO_TAGS?= GO_TAGS?=
BUILD_ID?= BUILD_ID?=
@@ -59,12 +49,12 @@ RANDOM := $(shell bash -c 'echo $$RANDOM')
VERSION?=$(shell git describe --always --tags || echo "dev" ) VERSION?=$(shell git describe --always --tags || echo "dev" )
# go tool nm ./local-ai | grep Commit # go tool nm ./local-ai | grep Commit
LD_FLAGS?= LD_FLAGS?=
override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Version=$(VERSION)" override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Version=$(VERSION)"
override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Commit=$(shell git rev-parse HEAD)" override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
OPTIONAL_TARGETS?= OPTIONAL_TARGETS?=
export OS := $(shell uname -s) OS := $(shell uname -s)
ARCH := $(shell uname -m) ARCH := $(shell uname -m)
GREEN := $(shell tput -Txterm setaf 2) GREEN := $(shell tput -Txterm setaf 2)
YELLOW := $(shell tput -Txterm setaf 3) YELLOW := $(shell tput -Txterm setaf 3)
@@ -90,42 +80,29 @@ ifeq ($(OS),Darwin)
BUILD_TYPE=metal BUILD_TYPE=metal
# disable metal if on Darwin and any other value is explicitly passed. # disable metal if on Darwin and any other value is explicitly passed.
else ifneq ($(BUILD_TYPE),metal) else ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DGGML_METAL=OFF CMAKE_ARGS+=-DLLAMA_METAL=OFF
export GGML_NO_ACCELERATE=1 export LLAMA_NO_ACCELERATE=1
export GGML_NO_METAL=1
endif endif
ifeq ($(BUILD_TYPE),metal) ifeq ($(BUILD_TYPE),metal)
# -lcblas removed: it seems to always be listed as a duplicate flag. # -lcblas removed: it seems to always be listed as a duplicate flag.
CGO_LDFLAGS += -framework Accelerate CGO_LDFLAGS += -framework Accelerate
endif endif
else
CGO_LDFLAGS_WHISPER+=-lgomp
endif endif
ifeq ($(BUILD_TYPE),openblas) ifeq ($(BUILD_TYPE),openblas)
CGO_LDFLAGS+=-lopenblas CGO_LDFLAGS+=-lopenblas
export GGML_OPENBLAS=1 export WHISPER_OPENBLAS=1
endif endif
ifeq ($(BUILD_TYPE),cublas) ifeq ($(BUILD_TYPE),cublas)
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH) CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
export GGML_CUDA=1 export LLAMA_CUBLAS=1
export WHISPER_CUDA=1
CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda -lcufft CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda -lcufft
endif endif
ifeq ($(BUILD_TYPE),vulkan)
CMAKE_ARGS+=-DGGML_VULKAN=1
endif
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
export GGML_SYCL=1
endif
ifeq ($(BUILD_TYPE),sycl_f16)
export GGML_SYCL_F16=1
endif
ifeq ($(BUILD_TYPE),hipblas) ifeq ($(BUILD_TYPE),hipblas)
ROCM_HOME ?= /opt/rocm ROCM_HOME ?= /opt/rocm
ROCM_PATH ?= /opt/rocm ROCM_PATH ?= /opt/rocm
@@ -134,26 +111,27 @@ ifeq ($(BUILD_TYPE),hipblas)
export CC=$(ROCM_HOME)/llvm/bin/clang export CC=$(ROCM_HOME)/llvm/bin/clang
# llama-ggml has no hipblas support, so override it here. # llama-ggml has no hipblas support, so override it here.
export STABLE_BUILD_TYPE= export STABLE_BUILD_TYPE=
export GGML_HIPBLAS=1 export WHISPER_HIPBLAS=1
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101 GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
AMDGPU_TARGETS ?= "$(GPU_TARGETS)" AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)" CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
endif endif
ifeq ($(BUILD_TYPE),metal) ifeq ($(BUILD_TYPE),metal)
CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
export GGML_METAL=1 export LLAMA_METAL=1
export WHISPER_METAL=1
endif endif
ifeq ($(BUILD_TYPE),clblas) ifeq ($(BUILD_TYPE),clblas)
CGO_LDFLAGS+=-lOpenCL -lclblast CGO_LDFLAGS+=-lOpenCL -lclblast
export GGML_OPENBLAS=1 export WHISPER_CLBLAST=1
endif endif
# glibc-static or glibc-devel-static required # glibc-static or glibc-devel-static required
ifeq ($(STATIC),true) ifeq ($(STATIC),true)
LD_FLAGS+=-linkmode external -extldflags -static LD_FLAGS=-linkmode external -extldflags -static
endif endif
ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion) ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
@@ -187,8 +165,6 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC) ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
# Use filter-out to remove the specified backends
ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))
GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC) GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
TEST_PATHS?=./api/... ./pkg/... ./core/... TEST_PATHS?=./api/... ./pkg/... ./core/...
@@ -208,109 +184,69 @@ all: help
## BERT embeddings ## BERT embeddings
sources/go-bert.cpp: sources/go-bert.cpp:
mkdir -p sources/go-bert.cpp git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert.cpp
cd sources/go-bert.cpp && \ cd sources/go-bert.cpp && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
git init && \
git remote add origin $(BERT_REPO) && \
git fetch origin && \
git checkout $(BERT_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
$(MAKE) -C sources/go-bert.cpp libgobert.a $(MAKE) -C sources/go-bert.cpp libgobert.a
## go-llama.cpp ## go-llama.cpp
sources/go-llama.cpp: sources/go-llama.cpp:
mkdir -p sources/go-llama.cpp git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama.cpp
cd sources/go-llama.cpp && \ cd sources/go-llama.cpp && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
git init && \
git remote add origin $(GOLLAMA_REPO) && \
git fetch origin && \
git checkout $(GOLLAMA_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a $(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
## go-piper ## go-piper
sources/go-piper: sources/go-piper:
mkdir -p sources/go-piper git clone --recurse-submodules https://github.com/mudler/go-piper sources/go-piper
cd sources/go-piper && \ cd sources/go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1
git init && \
git remote add origin $(PIPER_REPO) && \
git fetch origin && \
git checkout $(PIPER_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/go-piper/libpiper_binding.a: sources/go-piper sources/go-piper/libpiper_binding.a: sources/go-piper
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o $(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
## GPT4ALL ## GPT4ALL
sources/gpt4all: sources/gpt4all:
mkdir -p sources/gpt4all git clone --recurse-submodules $(GPT4ALL_REPO) sources/gpt4all
cd sources/gpt4all && \ cd sources/gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
git init && \
git remote add origin $(GPT4ALL_REPO) && \
git fetch origin && \
git checkout $(GPT4ALL_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a $(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
## RWKV ## RWKV
sources/go-rwkv.cpp: sources/go-rwkv.cpp:
mkdir -p sources/go-rwkv.cpp git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv.cpp
cd sources/go-rwkv.cpp && \ cd sources/go-rwkv.cpp && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
git init && \
git remote add origin $(RWKV_REPO) && \
git fetch origin && \
git checkout $(RWKV_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a .. cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
## stable diffusion ## stable diffusion
sources/go-stable-diffusion: sources/go-stable-diffusion:
mkdir -p sources/go-stable-diffusion git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion sources/go-stable-diffusion
cd sources/go-stable-diffusion && \ cd sources/go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
git init && \
git remote add origin $(STABLEDIFFUSION_REPO) && \
git fetch origin && \
git checkout $(STABLEDIFFUSION_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
## tiny-dream ## tiny-dream
sources/go-tiny-dream: sources/go-tiny-dream:
mkdir -p sources/go-tiny-dream git clone --recurse-submodules https://github.com/M0Rf30/go-tiny-dream sources/go-tiny-dream
cd sources/go-tiny-dream && \ cd sources/go-tiny-dream && git checkout -b build $(TINYDREAM_VERSION) && git submodule update --init --recursive --depth 1
git init && \
git remote add origin $(TINYDREAM_REPO) && \
git fetch origin && \
git checkout $(TINYDREAM_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
$(MAKE) -C sources/go-tiny-dream libtinydream.a $(MAKE) -C sources/go-tiny-dream libtinydream.a
## whisper ## whisper
sources/whisper.cpp: sources/whisper.cpp:
mkdir -p sources/whisper.cpp git clone https://github.com/ggerganov/whisper.cpp sources/whisper.cpp
cd sources/whisper.cpp && \ cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
git init && \
git remote add origin $(WHISPER_REPO) && \
git fetch origin && \
git checkout $(WHISPER_CPP_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a cd sources/whisper.cpp && $(MAKE) libwhisper.a
get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream
replace: replace:
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp $(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
@@ -377,27 +313,16 @@ build: prepare backend-assets grpcs ## Build the project
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET}) $(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET}) $(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET}) $(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
ls -liah backend-assets/grpc
ifneq ($(BACKEND_LIBS),)
$(MAKE) backend-assets/lib
cp -f $(BACKEND_LIBS) backend-assets/lib/
endif
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./ CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
build-minimal: build-minimal:
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=none $(MAKE) build
build-api: build-api:
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
backend-assets/lib:
mkdir -p backend-assets/lib
dist: dist:
$(MAKE) backend-assets/grpc/llama-cpp-avx2 STATIC=true $(MAKE) backend-assets/grpc/llama-cpp-avx2
ifeq ($(DETECT_LIBS),true)
scripts/prepare-libs.sh backend-assets/grpc/llama-cpp-avx2
endif
ifeq ($(OS),Darwin) ifeq ($(OS),Darwin)
$(info ${GREEN}I Skip CUDA/hipblas build on MacOS${RESET}) $(info ${GREEN}I Skip CUDA/hipblas build on MacOS${RESET})
else else
@@ -406,11 +331,7 @@ else
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f16 $(MAKE) backend-assets/grpc/llama-cpp-sycl_f16
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f32 $(MAKE) backend-assets/grpc/llama-cpp-sycl_f32
endif endif
GO_TAGS="tts p2p" $(MAKE) build $(MAKE) build
ifeq ($(DETECT_LIBS),true)
scripts/prepare-libs.sh backend-assets/grpc/piper
endif
GO_TAGS="tts p2p" STATIC=true $(MAKE) build
mkdir -p release mkdir -p release
# if BUILD_ID is empty, then we don't append it to the binary name # if BUILD_ID is empty, then we don't append it to the binary name
ifeq ($(BUILD_ID),) ifeq ($(BUILD_ID),)
@@ -421,9 +342,9 @@ else
shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH).sha256 shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH).sha256
endif endif
dist-cross-linux-arm64: dist-cross-linux-arm64:
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" GO_TAGS="p2p" \ CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
STATIC=true $(MAKE) build $(MAKE) build
mkdir -p release mkdir -p release
# if BUILD_ID is empty, then we don't append it to the binary name # if BUILD_ID is empty, then we don't append it to the binary name
ifeq ($(BUILD_ID),) ifeq ($(BUILD_ID),)
@@ -472,7 +393,7 @@ prepare-e2e:
mkdir -p $(TEST_DIR) mkdir -p $(TEST_DIR)
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=4 --build-arg FFMPEG=true -t localai-tests . docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests .
run-e2e-image: run-e2e-image:
ls -liah $(abspath ./tests/e2e-fixtures) ls -liah $(abspath ./tests/e2e-fixtures)
@@ -768,71 +689,71 @@ else
endif endif
# This target is for manually building a variant with-auto detected flags # This target is for manually building a variant with-auto detected flags
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp backend-assets/grpc/llama-cpp: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-cpp cp -rf backend/cpp/llama backend/cpp/llama-cpp
$(MAKE) -C backend/cpp/llama-cpp purge $(MAKE) -C backend/cpp/llama-cpp purge
$(info ${GREEN}I llama-cpp build info:avx2${RESET}) $(info ${GREEN}I llama-cpp build info:avx2${RESET})
$(MAKE) VARIANT="llama-cpp" build-llama-cpp-grpc-server $(MAKE) VARIANT="llama-cpp" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-cpp/grpc-server backend-assets/grpc/llama-cpp cp -rfv backend/cpp/llama-cpp/grpc-server backend-assets/grpc/llama-cpp
backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.cpp backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-avx2 cp -rf backend/cpp/llama backend/cpp/llama-avx2
$(MAKE) -C backend/cpp/llama-avx2 purge $(MAKE) -C backend/cpp/llama-avx2 purge
$(info ${GREEN}I llama-cpp build info:avx2${RESET}) $(info ${GREEN}I llama-cpp build info:avx2${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2 cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp backend-assets/grpc/llama-cpp-avx: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-avx cp -rf backend/cpp/llama backend/cpp/llama-avx
$(MAKE) -C backend/cpp/llama-avx purge $(MAKE) -C backend/cpp/llama-avx purge
$(info ${GREEN}I llama-cpp build info:avx${RESET}) $(info ${GREEN}I llama-cpp build info:avx${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx
backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama/llama.cpp backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-fallback cp -rf backend/cpp/llama backend/cpp/llama-fallback
$(MAKE) -C backend/cpp/llama-fallback purge $(MAKE) -C backend/cpp/llama-fallback purge
$(info ${GREEN}I llama-cpp build info:fallback${RESET}) $(info ${GREEN}I llama-cpp build info:fallback${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
# TODO: every binary should have its own folder instead, so can have different metal implementations # TODO: every binary should have its own folder instead, so can have different metal implementations
ifeq ($(BUILD_TYPE),metal) ifeq ($(BUILD_TYPE),metal)
cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/ cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
endif endif
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-cuda cp -rf backend/cpp/llama backend/cpp/llama-cuda
$(MAKE) -C backend/cpp/llama-cuda purge $(MAKE) -C backend/cpp/llama-cuda purge
$(info ${GREEN}I llama-cpp build info:cuda${RESET}) $(info ${GREEN}I llama-cpp build info:cuda${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama/llama.cpp backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-hipblas cp -rf backend/cpp/llama backend/cpp/llama-hipblas
$(MAKE) -C backend/cpp/llama-hipblas purge $(MAKE) -C backend/cpp/llama-hipblas purge
$(info ${GREEN}I llama-cpp build info:hipblas${RESET}) $(info ${GREEN}I llama-cpp build info:hipblas${RESET})
BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f16 cp -rf backend/cpp/llama backend/cpp/llama-sycl_f16
$(MAKE) -C backend/cpp/llama-sycl_f16 purge $(MAKE) -C backend/cpp/llama-sycl_f16 purge
$(info ${GREEN}I llama-cpp build info:sycl_f16${RESET}) $(info ${GREEN}I llama-cpp build info:sycl_f16${RESET})
BUILD_TYPE="sycl_f16" $(MAKE) VARIANT="llama-sycl_f16" build-llama-cpp-grpc-server BUILD_TYPE="sycl_f16" $(MAKE) VARIANT="llama-sycl_f16" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-sycl_f16/grpc-server backend-assets/grpc/llama-cpp-sycl_f16 cp -rfv backend/cpp/llama-sycl_f16/grpc-server backend-assets/grpc/llama-cpp-sycl_f16
backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc backend/cpp/llama/llama.cpp backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f32 cp -rf backend/cpp/llama backend/cpp/llama-sycl_f32
$(MAKE) -C backend/cpp/llama-sycl_f32 purge $(MAKE) -C backend/cpp/llama-sycl_f32 purge
$(info ${GREEN}I llama-cpp build info:sycl_f32${RESET}) $(info ${GREEN}I llama-cpp build info:sycl_f32${RESET})
BUILD_TYPE="sycl_f32" $(MAKE) VARIANT="llama-sycl_f32" build-llama-cpp-grpc-server BUILD_TYPE="sycl_f32" $(MAKE) VARIANT="llama-sycl_f32" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-sycl_f32/grpc-server backend-assets/grpc/llama-cpp-sycl_f32 cp -rfv backend/cpp/llama-sycl_f32/grpc-server backend-assets/grpc/llama-cpp-sycl_f32
backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.cpp backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-grpc cp -rf backend/cpp/llama backend/cpp/llama-grpc
$(MAKE) -C backend/cpp/llama-grpc purge $(MAKE) -C backend/cpp/llama-grpc purge
$(info ${GREEN}I llama-cpp build info:grpc${RESET}) $(info ${GREEN}I llama-cpp build info:grpc${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_RPC=ON -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc cp -rfv backend/cpp/llama-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
@@ -860,7 +781,7 @@ backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libti
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \ CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH=$(CURDIR)/sources/whisper.cpp LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
backend-assets/grpc/local-store: backend-assets/grpc backend-assets/grpc/local-store: backend-assets/grpc
@@ -882,17 +803,6 @@ docker:
--build-arg BUILD_TYPE=$(BUILD_TYPE) \ --build-arg BUILD_TYPE=$(BUILD_TYPE) \
-t $(DOCKER_IMAGE) . -t $(DOCKER_IMAGE) .
docker-cuda11:
docker build \
--build-arg CUDA_MAJOR_VERSION=11 \
--build-arg CUDA_MINOR_VERSION=8 \
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="$(GO_TAGS)" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
-t $(DOCKER_IMAGE)-cuda11 .
docker-aio: docker-aio:
@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)" @echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
docker build \ docker build \
@@ -906,7 +816,7 @@ docker-aio-all:
docker-image-intel: docker-image-intel:
docker build \ docker build \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \ --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="none" \ --build-arg GO_TAGS="none" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
@@ -914,7 +824,7 @@ docker-image-intel:
docker-image-intel-xpu: docker-image-intel-xpu:
docker build \ docker build \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \ --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="none" \ --build-arg GO_TAGS="none" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
@@ -929,7 +839,7 @@ gen-assets:
$(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets $(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets
## Documentation ## Documentation
docs/layouts/_default: docs/layouts/_default:
mkdir -p docs/layouts/_default mkdir -p docs/layouts/_default
docs/static/gallery.html: docs/layouts/_default docs/static/gallery.html: docs/layouts/_default
@@ -944,4 +854,4 @@ docs-clean:
.PHONY: docs .PHONY: docs
docs: docs/static/gallery.html docs: docs/static/gallery.html
cd docs && hugo serve cd docs && hugo serve

View File

@@ -48,13 +48,6 @@
![screen](https://github.com/mudler/LocalAI/assets/2420543/20b5ccd2-8393-44f0-aaf6-87a23806381e) ![screen](https://github.com/mudler/LocalAI/assets/2420543/20b5ccd2-8393-44f0-aaf6-87a23806381e)
Run the installer script:
```bash
curl https://localai.io/install.sh | sh
```
Or run with docker:
```bash ```bash
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
# Alternative images: # Alternative images:
@@ -72,15 +65,12 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723 - 🔥🔥 Decentralized llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/
- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io - 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
- June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628 - 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
- May 2024: 🔥🔥 Decentralized P2P llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/ - 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
- May 2024: 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334 - Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
- May 2024: 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328 - Reranker API: https://github.com/mudler/LocalAI/pull/2121
- May 2024: 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
Hot topics (looking for contributors): Hot topics (looking for contributors):
@@ -90,7 +80,6 @@ Hot topics (looking for contributors):
- Assistant API: https://github.com/mudler/LocalAI/issues/1273 - Assistant API: https://github.com/mudler/LocalAI/issues/1273
- Moderation endpoint: https://github.com/mudler/LocalAI/issues/999 - Moderation endpoint: https://github.com/mudler/LocalAI/issues/999
- Vulkan: https://github.com/mudler/LocalAI/issues/1647 - Vulkan: https://github.com/mudler/LocalAI/issues/1647
- Anthropic API: https://github.com/mudler/LocalAI/issues/1808
If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22 If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22
@@ -107,7 +96,6 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
- 🥽 [Vision API](https://localai.io/features/gpt-vision/) - 🥽 [Vision API](https://localai.io/features/gpt-vision/)
- 📈 [Reranker API](https://localai.io/features/reranker/) - 📈 [Reranker API](https://localai.io/features/reranker/)
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/) - 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
- 🌍 Integrated WebUI!
## 💻 Usage ## 💻 Usage
@@ -136,7 +124,6 @@ Other:
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack - Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
- Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot - Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot - Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
- Github Actions: https://github.com/marketplace/actions/start-localai
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/ - Examples: https://github.com/mudler/LocalAI/tree/master/examples/

View File

@@ -230,7 +230,6 @@ message TranscriptRequest {
string dst = 2; string dst = 2;
string language = 3; string language = 3;
uint32 threads = 4; uint32 threads = 4;
bool translate = 5;
} }
message TranscriptResult { message TranscriptResult {

View File

@@ -46,14 +46,9 @@ endif
$(INSTALLED_PACKAGES): grpc_build $(INSTALLED_PACKAGES): grpc_build
$(GRPC_REPO): $(GRPC_REPO):
mkdir -p $(GRPC_REPO)/grpc git clone --depth $(GIT_CLONE_DEPTH) -b $(TAG_LIB_GRPC) $(GIT_REPO_LIB_GRPC) $(GRPC_REPO)/grpc
cd $(GRPC_REPO)/grpc && \ cd $(GRPC_REPO)/grpc && git submodule update --jobs 2 --init --recursive --depth $(GIT_CLONE_DEPTH)
git init && \
git remote add origin $(GIT_REPO_LIB_GRPC) && \
git fetch origin && \
git checkout $(TAG_LIB_GRPC) && \
git submodule update --init --recursive --depth 1 --single-branch
$(GRPC_BUILD): $(GRPC_REPO) $(GRPC_BUILD): $(GRPC_REPO)
mkdir -p $(GRPC_BUILD) mkdir -p $(GRPC_BUILD)
cd $(GRPC_BUILD) && cmake $(CMAKE_ARGS) ../$(GRPC_REPO)/grpc && cmake --build . && cmake --build . --target install cd $(GRPC_BUILD) && cmake $(CMAKE_ARGS) ../$(GRPC_REPO)/grpc && cmake --build . && cmake --build . --target install

View File

@@ -1,58 +1,45 @@
LLAMA_VERSION?= LLAMA_VERSION?=
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
CMAKE_ARGS?= CMAKE_ARGS?=
BUILD_TYPE?= BUILD_TYPE?=
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
TARGET?=--target grpc-server
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static # If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
ifeq ($(BUILD_TYPE),cublas) ifeq ($(BUILD_TYPE),cublas)
CMAKE_ARGS+=-DGGML_CUDA=ON CMAKE_ARGS+=-DLLAMA_CUBLAS=ON
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS # If build type is openblas then we set -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
# to CMAKE_ARGS automatically # to CMAKE_ARGS automatically
else ifeq ($(BUILD_TYPE),openblas) else ifeq ($(BUILD_TYPE),openblas)
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS CMAKE_ARGS+=-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path # If build type is clblas (openCL) we set -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
else ifeq ($(BUILD_TYPE),clblas) else ifeq ($(BUILD_TYPE),clblas)
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path CMAKE_ARGS+=-DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ # If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
else ifeq ($(BUILD_TYPE),hipblas) else ifeq ($(BUILD_TYPE),hipblas)
CMAKE_ARGS+=-DGGML_HIPBLAS=ON CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation # If it's OSX, DO NOT embed the metal library - -DLLAMA_METAL_EMBED_LIBRARY=ON requires further investigation
# But if it's OSX without metal, disable it here # But if it's OSX without metal, disable it here
else ifeq ($(OS),Darwin) else ifeq ($(OS),darwin)
ifneq ($(BUILD_TYPE),metal) ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DGGML_METAL=OFF CMAKE_ARGS+=-DLLAMA_METAL=OFF
else
CMAKE_ARGS+=-DGGML_METAL=ON
# Until this is tested properly, we disable embedded metal file
# as we already embed it as part of the LocalAI assets
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=OFF
TARGET+=--target ggml-metal
endif endif
endif endif
ifeq ($(BUILD_TYPE),sycl_f16) ifeq ($(BUILD_TYPE),sycl_f16)
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
endif endif
ifeq ($(BUILD_TYPE),sycl_f32) ifeq ($(BUILD_TYPE),sycl_f32)
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
endif endif
llama.cpp: llama.cpp:
mkdir -p llama.cpp git clone --recurse-submodules https://github.com/ggerganov/llama.cpp llama.cpp
cd llama.cpp && \ if [ -z "$(LLAMA_VERSION)" ]; then \
git init && \ exit 1; \
git remote add origin $(LLAMA_REPO) && \ fi
git fetch origin && \ cd llama.cpp && git checkout -b build $(LLAMA_VERSION) && git submodule update --init --recursive --depth 1
git checkout -b build $(LLAMA_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
llama.cpp/examples/grpc-server: llama.cpp llama.cpp/examples/grpc-server: llama.cpp
mkdir -p llama.cpp/examples/grpc-server mkdir -p llama.cpp/examples/grpc-server
@@ -74,9 +61,9 @@ clean: purge
grpc-server: llama.cpp llama.cpp/examples/grpc-server grpc-server: llama.cpp llama.cpp/examples/grpc-server
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)" @echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
ifneq (,$(findstring sycl,$(BUILD_TYPE))) ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+bash -c "source $(ONEAPI_VARS); \ bash -c "source $(ONEAPI_VARS); \
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)" cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && $(MAKE)"
else else
+cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET) cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && $(MAKE)
endif endif
cp llama.cpp/build/bin/grpc-server . cp llama.cpp/build/bin/grpc-server .

View File

@@ -886,8 +886,6 @@ struct llama_server_context
{"task_id", slot->task_id}, {"task_id", slot->task_id},
}); });
LOG_TEE("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
return true; return true;
} }
@@ -2108,7 +2106,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
data["grammar"] = predict->grammar(); data["grammar"] = predict->grammar();
data["prompt"] = predict->prompt(); data["prompt"] = predict->prompt();
data["ignore_eos"] = predict->ignoreeos(); data["ignore_eos"] = predict->ignoreeos();
data["embeddings"] = predict->embeddings();
// for each image in the request, add the image data // for each image in the request, add the image data
// //
@@ -2386,31 +2383,6 @@ public:
return grpc::Status::OK; return grpc::Status::OK;
} }
/// https://github.com/ggerganov/llama.cpp/blob/aa2341298924ac89778252015efcb792f2df1e20/examples/server/server.cpp#L2969
grpc::Status Embedding(ServerContext* context, const backend::PredictOptions* request, backend::EmbeddingResult* embeddingResult) {
json data = parse_options(false, request, llama);
const int task_id = llama.queue_tasks.get_new_id();
llama.queue_results.add_waiting_task_id(task_id);
llama.request_completion(task_id, { {"prompt", data["embeddings"]}, { "n_predict", 0}, {"image_data", ""} }, false, true, -1);
// get the result
task_result result = llama.queue_results.recv(task_id);
//std::cout << "Embedding result JSON" << result.result_json.dump() << std::endl;
llama.queue_results.remove_waiting_task_id(task_id);
if (!result.error && result.stop) {
std::vector<float> embeddings = result.result_json.value("embedding", std::vector<float>());
// loop the vector and set the embeddings results
for (int i = 0; i < embeddings.size(); i++) {
embeddingResult->add_embeddings(embeddings[i]);
}
}
else
{
return grpc::Status::OK;
}
return grpc::Status::OK;
}
}; };
void RunServer(const std::string& server_address) { void RunServer(const std::string& server_address) {

View File

@@ -5,7 +5,7 @@ package main
import ( import (
"flag" "flag"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/go-skynet/LocalAI/pkg/grpc"
) )
var ( var (

View File

@@ -3,9 +3,9 @@ package main
// This is a wrapper to statisfy the GRPC service interface // This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) // It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import ( import (
"github.com/mudler/LocalAI/pkg/grpc/base" "github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/stablediffusion" "github.com/go-skynet/LocalAI/pkg/stablediffusion"
) )
type Image struct { type Image struct {

View File

@@ -5,7 +5,7 @@ package main
import ( import (
"flag" "flag"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/go-skynet/LocalAI/pkg/grpc"
) )
var ( var (

View File

@@ -3,9 +3,9 @@ package main
// This is a wrapper to statisfy the GRPC service interface // This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) // It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import ( import (
"github.com/mudler/LocalAI/pkg/grpc/base" "github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/tinydream" "github.com/go-skynet/LocalAI/pkg/tinydream"
) )
type Image struct { type Image struct {

View File

@@ -5,8 +5,8 @@ package main
import ( import (
bert "github.com/go-skynet/go-bert.cpp" bert "github.com/go-skynet/go-bert.cpp"
"github.com/mudler/LocalAI/pkg/grpc/base" "github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
) )
type Embeddings struct { type Embeddings struct {

View File

@@ -5,7 +5,7 @@ package main
import ( import (
"flag" "flag"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/go-skynet/LocalAI/pkg/grpc"
) )
var ( var (

View File

@@ -5,8 +5,8 @@ package main
import ( import (
"fmt" "fmt"
"github.com/mudler/LocalAI/pkg/grpc/base" "github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang" gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
) )

View File

@@ -5,7 +5,7 @@ package main
import ( import (
"flag" "flag"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/go-skynet/LocalAI/pkg/grpc"
) )
var ( var (

View File

@@ -6,9 +6,9 @@ import (
"fmt" "fmt"
"os" "os"
"github.com/mudler/LocalAI/pkg/grpc/base" "github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/langchain" "github.com/go-skynet/LocalAI/pkg/langchain"
) )
type LLM struct { type LLM struct {

View File

@@ -5,7 +5,7 @@ package main
import ( import (
"flag" "flag"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/go-skynet/LocalAI/pkg/grpc"
) )
var ( var (

View File

@@ -5,9 +5,9 @@ package main
import ( import (
"fmt" "fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/go-llama.cpp" "github.com/go-skynet/go-llama.cpp"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
) )
type LLM struct { type LLM struct {

View File

@@ -3,7 +3,7 @@ package main
import ( import (
"flag" "flag"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/go-skynet/LocalAI/pkg/grpc"
) )
var ( var (

View File

@@ -6,9 +6,9 @@ import (
"fmt" "fmt"
"path/filepath" "path/filepath"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/go-llama.cpp" "github.com/go-skynet/go-llama.cpp"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
) )
type LLM struct { type LLM struct {

View File

@@ -7,7 +7,7 @@ package main
import ( import (
"flag" "flag"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/go-skynet/LocalAI/pkg/grpc"
) )
var ( var (

View File

@@ -5,7 +5,7 @@ package main
import ( import (
"flag" "flag"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/go-skynet/LocalAI/pkg/grpc"
) )
var ( var (

View File

@@ -7,8 +7,8 @@ import (
"path/filepath" "path/filepath"
"github.com/donomii/go-rwkv.cpp" "github.com/donomii/go-rwkv.cpp"
"github.com/mudler/LocalAI/pkg/grpc/base" "github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
) )
const tokenizerSuffix = ".tokenizer.json" const tokenizerSuffix = ".tokenizer.json"
@@ -31,7 +31,7 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error {
model := rwkv.LoadFiles(opts.ModelFile, tokenizerPath, uint32(opts.GetThreads())) model := rwkv.LoadFiles(opts.ModelFile, tokenizerPath, uint32(opts.GetThreads()))
if model == nil { if model == nil {
return fmt.Errorf("rwkv could not load model") return fmt.Errorf("could not load model")
} }
llm.rwkv = model llm.rwkv = model
return nil return nil

View File

@@ -6,7 +6,7 @@ import (
"flag" "flag"
"os" "os"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/go-skynet/LocalAI/pkg/grpc"
"github.com/rs/zerolog" "github.com/rs/zerolog"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
) )

View File

@@ -8,8 +8,8 @@ import (
"math" "math"
"slices" "slices"
"github.com/mudler/LocalAI/pkg/grpc/base" "github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
) )

View File

@@ -5,7 +5,7 @@ package main
import ( import (
"flag" "flag"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/go-skynet/LocalAI/pkg/grpc"
) )
var ( var (

View File

@@ -8,7 +8,7 @@ import (
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
"github.com/go-audio/wav" "github.com/go-audio/wav"
"github.com/mudler/LocalAI/core/schema" "github.com/go-skynet/LocalAI/core/schema"
) )
func ffmpegCommand(args []string) (string, error) { func ffmpegCommand(args []string) (string, error) {
@@ -29,7 +29,7 @@ func audioToWav(src, dst string) error {
return nil return nil
} }
func Transcript(model whisper.Model, audiopath, language string, translate bool, threads uint) (schema.TranscriptionResult, error) { func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.TranscriptionResult, error) {
res := schema.TranscriptionResult{} res := schema.TranscriptionResult{}
dir, err := os.MkdirTemp("", "whisper") dir, err := os.MkdirTemp("", "whisper")
@@ -75,10 +75,6 @@ func Transcript(model whisper.Model, audiopath, language string, translate bool,
context.SetLanguage("auto") context.SetLanguage("auto")
} }
if translate {
context.SetTranslate(true)
}
if err := context.Process(data, nil, nil); err != nil { if err := context.Process(data, nil, nil); err != nil {
return res, err return res, err
} }

View File

@@ -4,9 +4,9 @@ package main
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) // It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import ( import (
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
"github.com/mudler/LocalAI/core/schema" "github.com/go-skynet/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/grpc/base" "github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
) )
type Whisper struct { type Whisper struct {
@@ -22,5 +22,5 @@ func (sd *Whisper) Load(opts *pb.ModelOptions) error {
} }
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) { func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) {
return Transcript(sd.whisper, opts.Dst, opts.Language, opts.Translate, uint(opts.Threads)) return Transcript(sd.whisper, opts.Dst, opts.Language, uint(opts.Threads))
} }

View File

@@ -5,7 +5,7 @@ package main
import ( import (
"flag" "flag"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/go-skynet/LocalAI/pkg/grpc"
) )
var ( var (

View File

@@ -7,8 +7,8 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"github.com/mudler/LocalAI/pkg/grpc/base" "github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
piper "github.com/mudler/go-piper" piper "github.com/mudler/go-piper"
) )

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch intel-extension-for-pytorch
torch torch
optimum[openvino] optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,6 +1,6 @@
accelerate accelerate
auto-gptq==0.7.1 auto-gptq==0.7.1
grpcio==1.65.0 grpcio==1.64.0
protobuf protobuf
torch torch
certifi certifi

View File

@@ -3,4 +3,4 @@ intel-extension-for-pytorch
torch torch
torchaudio torchaudio
optimum[openvino] optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,6 +1,6 @@
accelerate accelerate
bark==0.1.5 bark==0.1.5
grpcio==1.65.0 grpcio==1.64.0
protobuf protobuf
certifi certifi
transformers transformers

View File

@@ -148,13 +148,13 @@ function startBackend() {
ensureVenv ensureVenv
if [ ! -z ${BACKEND_FILE} ]; then if [ ! -z ${BACKEND_FILE} ]; then
exec python ${BACKEND_FILE} $@ python ${BACKEND_FILE} $@
elif [ -e "${MY_DIR}/server.py" ]; then elif [ -e "${MY_DIR}/server.py" ]; then
exec python ${MY_DIR}/server.py $@ python ${MY_DIR}/server.py $@
elif [ -e "${MY_DIR}/backend.py" ]; then elif [ -e "${MY_DIR}/backend.py" ]; then
exec python ${MY_DIR}/backend.py $@ python ${MY_DIR}/backend.py $@
elif [ -e "${MY_DIR}/${BACKEND_NAME}.py" ]; then elif [ -e "${MY_DIR}/${BACKEND_NAME}.py" ]; then
exec python ${MY_DIR}/${BACKEND_NAME}.py $@ python ${MY_DIR}/${BACKEND_NAME}.py $@
fi fi
} }
@@ -210,4 +210,4 @@ function checkTargets() {
echo false echo false
} }
init init

View File

@@ -1,2 +1,2 @@
grpcio==1.65.0 grpcio==1.64.0
protobuf protobuf

View File

@@ -3,4 +3,4 @@ intel-extension-for-pytorch
torch torch
torchaudio torchaudio
optimum[openvino] optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,6 +1,6 @@
accelerate accelerate
TTS==0.22.0 TTS==0.22.0
grpcio==1.65.0 grpcio==1.64.0
protobuf protobuf
certifi certifi
transformers transformers

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from concurrent import futures from concurrent import futures
import traceback
import argparse import argparse
from collections import defaultdict from collections import defaultdict
from enum import Enum from enum import Enum
@@ -17,39 +17,35 @@ import backend_pb2_grpc
import grpc import grpc
from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \ from diffusers import StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, EulerAncestralDiscreteScheduler
EulerAncestralDiscreteScheduler
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
from diffusers.pipelines.stable_diffusion import safety_checker from diffusers.pipelines.stable_diffusion import safety_checker
from diffusers.utils import load_image, export_to_video from diffusers.utils import load_image,export_to_video
from compel import Compel, ReturnedEmbeddingsType from compel import Compel, ReturnedEmbeddingsType
from transformers import CLIPTextModel from transformers import CLIPTextModel
from safetensors.torch import load_file from safetensors.torch import load_file
_ONE_DAY_IN_SECONDS = 60 * 60 * 24 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
COMPEL = os.environ.get("COMPEL", "0") == "1" COMPEL=os.environ.get("COMPEL", "0") == "1"
XPU = os.environ.get("XPU", "0") == "1" XPU=os.environ.get("XPU", "0") == "1"
CLIPSKIP = os.environ.get("CLIPSKIP", "1") == "1" CLIPSKIP=os.environ.get("CLIPSKIP", "1") == "1"
SAFETENSORS = os.environ.get("SAFETENSORS", "1") == "1" SAFETENSORS=os.environ.get("SAFETENSORS", "1") == "1"
CHUNK_SIZE = os.environ.get("CHUNK_SIZE", "8") CHUNK_SIZE=os.environ.get("CHUNK_SIZE", "8")
FPS = os.environ.get("FPS", "7") FPS=os.environ.get("FPS", "7")
DISABLE_CPU_OFFLOAD = os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1" DISABLE_CPU_OFFLOAD=os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1"
FRAMES = os.environ.get("FRAMES", "64") FRAMES=os.environ.get("FRAMES", "64")
if XPU: if XPU:
import intel_extension_for_pytorch as ipex import intel_extension_for_pytorch as ipex
print(ipex.xpu.get_device_name(0)) print(ipex.xpu.get_device_name(0))
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1 # If MAX_WORKERS are specified in the environment use it, otherwise default to 1
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
# https://github.com/CompVis/stable-diffusion/issues/239#issuecomment-1627615287 # https://github.com/CompVis/stable-diffusion/issues/239#issuecomment-1627615287
def sc(self, clip_input, images): return images, [False for i in images] def sc(self, clip_input, images) : return images, [False for i in images]
# edit the StableDiffusionSafetyChecker class so that, when called, it just returns the images and an array of True values # edit the StableDiffusionSafetyChecker class so that, when called, it just returns the images and an array of True values
safety_checker.StableDiffusionSafetyChecker.forward = sc safety_checker.StableDiffusionSafetyChecker.forward = sc
@@ -66,8 +62,6 @@ from diffusers.schedulers import (
PNDMScheduler, PNDMScheduler,
UniPCMultistepScheduler, UniPCMultistepScheduler,
) )
# The scheduler list mapping was taken from here: https://github.com/neggles/animatediff-cli/blob/6f336f5f4b5e38e85d7f06f1744ef42d0a45f2a7/src/animatediff/schedulers.py#L39 # The scheduler list mapping was taken from here: https://github.com/neggles/animatediff-cli/blob/6f336f5f4b5e38e85d7f06f1744ef42d0a45f2a7/src/animatediff/schedulers.py#L39
# Credits to https://github.com/neggles # Credits to https://github.com/neggles
# See https://github.com/huggingface/diffusers/issues/4167 for more details on sched mapping from A1111 # See https://github.com/huggingface/diffusers/issues/4167 for more details on sched mapping from A1111
@@ -142,12 +136,10 @@ def get_scheduler(name: str, config: dict = {}):
return sched_class.from_config(config) return sched_class.from_config(config)
# Implement the BackendServicer class with the service methods # Implement the BackendServicer class with the service methods
class BackendServicer(backend_pb2_grpc.BackendServicer): class BackendServicer(backend_pb2_grpc.BackendServicer):
def Health(self, request, context): def Health(self, request, context):
return backend_pb2.Reply(message=bytes("OK", 'utf-8')) return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
def LoadModel(self, request, context): def LoadModel(self, request, context):
try: try:
print(f"Loading model {request.Model}...", file=sys.stderr) print(f"Loading model {request.Model}...", file=sys.stderr)
@@ -157,7 +149,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if request.F16Memory: if request.F16Memory:
torchType = torch.float16 torchType = torch.float16
variant = "fp16" variant="fp16"
local = False local = False
modelFile = request.Model modelFile = request.Model
@@ -165,38 +157,38 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
self.cfg_scale = 7 self.cfg_scale = 7
if request.CFGScale != 0: if request.CFGScale != 0:
self.cfg_scale = request.CFGScale self.cfg_scale = request.CFGScale
clipmodel = "runwayml/stable-diffusion-v1-5" clipmodel = "runwayml/stable-diffusion-v1-5"
if request.CLIPModel != "": if request.CLIPModel != "":
clipmodel = request.CLIPModel clipmodel = request.CLIPModel
clipsubfolder = "text_encoder" clipsubfolder = "text_encoder"
if request.CLIPSubfolder != "": if request.CLIPSubfolder != "":
clipsubfolder = request.CLIPSubfolder clipsubfolder = request.CLIPSubfolder
# Check if ModelFile exists # Check if ModelFile exists
if request.ModelFile != "": if request.ModelFile != "":
if os.path.exists(request.ModelFile): if os.path.exists(request.ModelFile):
local = True local = True
modelFile = request.ModelFile modelFile = request.ModelFile
fromSingleFile = request.Model.startswith("http") or request.Model.startswith("/") or local fromSingleFile = request.Model.startswith("http") or request.Model.startswith("/") or local
self.img2vid = False self.img2vid=False
self.txt2vid = False self.txt2vid=False
## img2img ## img2img
if (request.PipelineType == "StableDiffusionImg2ImgPipeline") or (request.IMG2IMG and request.PipelineType == ""): if (request.PipelineType == "StableDiffusionImg2ImgPipeline") or (request.IMG2IMG and request.PipelineType == ""):
if fromSingleFile: if fromSingleFile:
self.pipe = StableDiffusionImg2ImgPipeline.from_single_file(modelFile, self.pipe = StableDiffusionImg2ImgPipeline.from_single_file(modelFile,
torch_dtype=torchType) torch_dtype=torchType)
else: else:
self.pipe = StableDiffusionImg2ImgPipeline.from_pretrained(request.Model, self.pipe = StableDiffusionImg2ImgPipeline.from_pretrained(request.Model,
torch_dtype=torchType) torch_dtype=torchType)
elif request.PipelineType == "StableDiffusionDepth2ImgPipeline": elif request.PipelineType == "StableDiffusionDepth2ImgPipeline":
self.pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(request.Model, self.pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(request.Model,
torch_dtype=torchType) torch_dtype=torchType)
## img2vid ## img2vid
elif request.PipelineType == "StableVideoDiffusionPipeline": elif request.PipelineType == "StableVideoDiffusionPipeline":
self.img2vid = True self.img2vid=True
self.pipe = StableVideoDiffusionPipeline.from_pretrained( self.pipe = StableVideoDiffusionPipeline.from_pretrained(
request.Model, torch_dtype=torchType, variant=variant request.Model, torch_dtype=torchType, variant=variant
) )
@@ -205,63 +197,53 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
## text2img ## text2img
elif request.PipelineType == "AutoPipelineForText2Image" or request.PipelineType == "": elif request.PipelineType == "AutoPipelineForText2Image" or request.PipelineType == "":
self.pipe = AutoPipelineForText2Image.from_pretrained(request.Model, self.pipe = AutoPipelineForText2Image.from_pretrained(request.Model,
torch_dtype=torchType, torch_dtype=torchType,
use_safetensors=SAFETENSORS, use_safetensors=SAFETENSORS,
variant=variant) variant=variant)
elif request.PipelineType == "StableDiffusionPipeline": elif request.PipelineType == "StableDiffusionPipeline":
if fromSingleFile: if fromSingleFile:
self.pipe = StableDiffusionPipeline.from_single_file(modelFile, self.pipe = StableDiffusionPipeline.from_single_file(modelFile,
torch_dtype=torchType) torch_dtype=torchType)
else: else:
self.pipe = StableDiffusionPipeline.from_pretrained(request.Model, self.pipe = StableDiffusionPipeline.from_pretrained(request.Model,
torch_dtype=torchType) torch_dtype=torchType)
elif request.PipelineType == "DiffusionPipeline": elif request.PipelineType == "DiffusionPipeline":
self.pipe = DiffusionPipeline.from_pretrained(request.Model, self.pipe = DiffusionPipeline.from_pretrained(request.Model,
torch_dtype=torchType) torch_dtype=torchType)
elif request.PipelineType == "VideoDiffusionPipeline": elif request.PipelineType == "VideoDiffusionPipeline":
self.txt2vid = True self.txt2vid=True
self.pipe = DiffusionPipeline.from_pretrained(request.Model, self.pipe = DiffusionPipeline.from_pretrained(request.Model,
torch_dtype=torchType) torch_dtype=torchType)
elif request.PipelineType == "StableDiffusionXLPipeline": elif request.PipelineType == "StableDiffusionXLPipeline":
if fromSingleFile: if fromSingleFile:
self.pipe = StableDiffusionXLPipeline.from_single_file(modelFile, self.pipe = StableDiffusionXLPipeline.from_single_file(modelFile,
torch_dtype=torchType, torch_dtype=torchType,
use_safetensors=True) use_safetensors=True)
else: else:
self.pipe = StableDiffusionXLPipeline.from_pretrained( self.pipe = StableDiffusionXLPipeline.from_pretrained(
request.Model, request.Model,
torch_dtype=torchType, torch_dtype=torchType,
use_safetensors=True, use_safetensors=True,
variant=variant)
elif request.PipelineType == "StableDiffusion3Pipeline":
if fromSingleFile:
self.pipe = StableDiffusion3Pipeline.from_single_file(modelFile,
torch_dtype=torchType,
use_safetensors=True)
else:
self.pipe = StableDiffusion3Pipeline.from_pretrained(
request.Model,
torch_dtype=torchType,
use_safetensors=True,
variant=variant) variant=variant)
if CLIPSKIP and request.CLIPSkip != 0: if CLIPSKIP and request.CLIPSkip != 0:
self.clip_skip = request.CLIPSkip self.clip_skip = request.CLIPSkip
else: else:
self.clip_skip = 0 self.clip_skip = 0
# torch_dtype needs to be customized. float16 for GPU, float32 for CPU # torch_dtype needs to be customized. float16 for GPU, float32 for CPU
# TODO: this needs to be customized # TODO: this needs to be customized
if request.SchedulerType != "": if request.SchedulerType != "":
self.pipe.scheduler = get_scheduler(request.SchedulerType, self.pipe.scheduler.config) self.pipe.scheduler = get_scheduler(request.SchedulerType, self.pipe.scheduler.config)
if COMPEL: if COMPEL:
self.compel = Compel( self.compel = Compel(
tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2], tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2 ],
text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2], text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
requires_pooled=[False, True] requires_pooled=[False, True]
) )
if request.ControlNet: if request.ControlNet:
self.controlnet = ControlNetModel.from_pretrained( self.controlnet = ControlNetModel.from_pretrained(
@@ -270,6 +252,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
self.pipe.controlnet = self.controlnet self.pipe.controlnet = self.controlnet
else: else:
self.controlnet = None self.controlnet = None
if request.CUDA:
self.pipe.to('cuda')
if self.controlnet:
self.controlnet.to('cuda')
if XPU:
self.pipe = self.pipe.to("xpu")
# Assume directory from request.ModelFile. # Assume directory from request.ModelFile.
# Only if request.LoraAdapter it's not an absolute path # Only if request.LoraAdapter it's not an absolute path
if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter: if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter:
@@ -282,17 +271,10 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if request.LoraAdapter: if request.LoraAdapter:
# Check if its a local file and not a directory ( we load lora differently for a safetensor file ) # Check if its a local file and not a directory ( we load lora differently for a safetensor file )
if os.path.exists(request.LoraAdapter) and not os.path.isdir(request.LoraAdapter): if os.path.exists(request.LoraAdapter) and not os.path.isdir(request.LoraAdapter):
# self.load_lora_weights(request.LoraAdapter, 1, device, torchType) self.load_lora_weights(request.LoraAdapter, 1, device, torchType)
self.pipe.load_lora_weights(request.LoraAdapter)
else: else:
self.pipe.unet.load_attn_procs(request.LoraAdapter) self.pipe.unet.load_attn_procs(request.LoraAdapter)
if request.CUDA:
self.pipe.to('cuda')
if self.controlnet:
self.controlnet.to('cuda')
if XPU:
self.pipe = self.pipe.to("xpu")
except Exception as err: except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
# Implement your logic here for the LoadModel service # Implement your logic here for the LoadModel service
@@ -365,9 +347,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
# create a dictionary of values for the parameters # create a dictionary of values for the parameters
options = { options = {
"negative_prompt": request.negative_prompt, "negative_prompt": request.negative_prompt,
"width": request.width, "width": request.width,
"height": request.height, "height": request.height,
"num_inference_steps": steps, "num_inference_steps": steps,
} }
@@ -379,7 +361,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
options["image"] = pose_image options["image"] = pose_image
if CLIPSKIP and self.clip_skip != 0: if CLIPSKIP and self.clip_skip != 0:
options["clip_skip"] = self.clip_skip options["clip_skip"]=self.clip_skip
# Get the keys that we will build the args for our pipe for # Get the keys that we will build the args for our pipe for
keys = options.keys() keys = options.keys()
@@ -423,21 +405,20 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
image = self.pipe( image = self.pipe(
guidance_scale=self.cfg_scale, guidance_scale=self.cfg_scale,
**kwargs **kwargs
).images[0] ).images[0]
else: else:
# pass the kwargs dictionary to the self.pipe method # pass the kwargs dictionary to the self.pipe method
image = self.pipe( image = self.pipe(
prompt, prompt,
guidance_scale=self.cfg_scale, guidance_scale=self.cfg_scale,
**kwargs **kwargs
).images[0] ).images[0]
# save the result # save the result
image.save(request.dst) image.save(request.dst)
return backend_pb2.Result(message="Media generated", success=True) return backend_pb2.Result(message="Media generated", success=True)
def serve(address): def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
@@ -461,7 +442,6 @@ def serve(address):
except KeyboardInterrupt: except KeyboardInterrupt:
server.stop(0) server.stop(0)
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the gRPC server.") parser = argparse.ArgumentParser(description="Run the gRPC server.")
parser.add_argument( parser.add_argument(
@@ -469,4 +449,4 @@ if __name__ == "__main__":
) )
args = parser.parse_args() args = parser.parse_args()
serve(args.addr) serve(args.addr)

View File

@@ -3,4 +3,4 @@ intel-extension-for-pytorch
torch torch
torchvision torchvision
optimum[openvino] optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,13 +1,10 @@
setuptools
accelerate accelerate
compel compel
peft
diffusers diffusers
grpcio==1.65.0 grpcio==1.64.0
opencv-python opencv-python
pillow pillow
protobuf protobuf
sentencepiece
torch torch
transformers transformers
certifi certifi

View File

@@ -1,4 +1,4 @@
grpcio==1.65.0 grpcio==1.64.0
protobuf protobuf
torch torch
transformers transformers

View File

@@ -1,5 +1,5 @@
accelerate accelerate
grpcio==1.65.0 grpcio==1.64.0
protobuf protobuf
certifi certifi
torch torch

View File

@@ -4,4 +4,4 @@
packaging packaging
setuptools setuptools
wheel wheel
torch==2.3.1 torch==2.2.0

View File

@@ -1,6 +1,6 @@
causal-conv1d==1.4.0 causal-conv1d==1.2.0.post2
mamba-ssm==2.2.2 mamba-ssm==1.2.0.post1
grpcio==1.65.0 grpcio==1.64.0
protobuf protobuf
certifi certifi
transformers transformers

View File

@@ -2,22 +2,22 @@
intel-extension-for-pytorch intel-extension-for-pytorch
torch torch
optimum[openvino] optimum[openvino]
grpcio==1.64.1 grpcio==1.64.0
protobuf protobuf
librosa==0.9.1 librosa==0.9.1
faster-whisper==1.0.3 faster-whisper==0.9.0
pydub==0.25.1 pydub==0.25.1
wavmark==0.0.3 wavmark==0.0.3
numpy==1.26.4 numpy==1.22.0
eng_to_ipa==0.0.2 eng_to_ipa==0.0.2
inflect==7.0.0 inflect==7.0.0
unidecode==1.3.7 unidecode==1.3.7
whisper-timestamped==1.15.4 whisper-timestamped==1.14.2
openai openai
python-dotenv python-dotenv
pypinyin==0.50.0 pypinyin==0.50.0
cn2an==0.5.22 cn2an==0.5.22
jieba==0.42.1 jieba==0.42.1
gradio==4.38.1 gradio==3.48.0
langid==1.1.6 langid==1.1.6
git+https://github.com/myshell-ai/MeloTTS.git git+https://github.com/myshell-ai/MeloTTS.git

View File

@@ -1,20 +1,20 @@
grpcio==1.65.0 grpcio==1.64.0
protobuf protobuf
librosa librosa==0.9.1
faster-whisper faster-whisper==0.9.0
pydub==0.25.1 pydub==0.25.1
wavmark==0.0.3 wavmark==0.0.3
numpy numpy==1.22.0
eng_to_ipa==0.0.2 eng_to_ipa==0.0.2
inflect inflect==7.0.0
unidecode unidecode==1.3.7
whisper-timestamped whisper-timestamped==1.14.2
openai openai
python-dotenv python-dotenv
pypinyin pypinyin==0.50.0
cn2an==0.5.22 cn2an==0.5.22
jieba==0.42.1 jieba==0.42.1
gradio gradio==3.48.0
langid==1.1.6 langid==1.1.6
git+https://github.com/myshell-ai/MeloTTS.git git+https://github.com/myshell-ai/MeloTTS.git
git+https://github.com/myshell-ai/OpenVoice.git git+https://github.com/myshell-ai/OpenVoice.git

View File

@@ -3,4 +3,4 @@ intel-extension-for-pytorch
torch torch
torchaudio torchaudio
optimum[openvino] optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,5 +1,5 @@
accelerate accelerate
grpcio==1.65.0 grpcio==1.64.0
protobuf protobuf
torch torch
git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16 git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch intel-extension-for-pytorch
torch torch
optimum[openvino] optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch intel-extension-for-pytorch
torch torch
optimum[openvino] optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,6 +1,6 @@
accelerate accelerate
rerankers[transformers] rerankers[transformers]
grpcio==1.65.0 grpcio==1.64.0
protobuf protobuf
certifi certifi
transformers transformers

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch intel-extension-for-pytorch
torch torch
optimum[openvino] optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,6 +1,6 @@
accelerate accelerate
sentence-transformers==3.0.1 sentence-transformers==2.5.1
transformers transformers
grpcio==1.65.0 grpcio==1.64.0
protobuf protobuf
certifi certifi

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch intel-extension-for-pytorch
torch torch
optimum[openvino] optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,7 +1,7 @@
accelerate accelerate
transformers transformers
grpcio==1.65.0 grpcio==1.64.0
protobuf protobuf
torch torch
scipy==1.14.0 scipy==1.13.0
certifi certifi

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch intel-extension-for-pytorch
torch torch
optimum[openvino] optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,9 +1,9 @@
accelerate accelerate
transformers transformers
grpcio==1.65.0 grpcio==1.64.0
protobuf protobuf
torch torch
certifi certifi
intel-extension-for-transformers intel-extension-for-transformers
bitsandbytes bitsandbytes
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -3,4 +3,4 @@ intel-extension-for-pytorch
torch torch
torchaudio torchaudio
optimum[openvino] optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,4 +1,4 @@
accelerate accelerate
grpcio==1.65.0 grpcio==1.64.0
protobuf protobuf
certifi certifi

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch intel-extension-for-pytorch
torch torch
optimum[openvino] optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,6 +1,6 @@
accelerate accelerate
vllm vllm
grpcio==1.65.0 grpcio==1.64.0
protobuf protobuf
certifi certifi
transformers transformers

View File

@@ -1,9 +1,9 @@
package core package core
import ( import (
"github.com/mudler/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/config"
"github.com/mudler/LocalAI/core/services" "github.com/go-skynet/LocalAI/core/services"
"github.com/mudler/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/pkg/model"
) )
// The purpose of this structure is to hold pointers to all initialized services, to make plumbing easy // The purpose of this structure is to hold pointers to all initialized services, to make plumbing easy
@@ -28,6 +28,7 @@ type Application struct {
// LocalAI System Services // LocalAI System Services
BackendMonitorService *services.BackendMonitorService BackendMonitorService *services.BackendMonitorService
GalleryService *services.GalleryService GalleryService *services.GalleryService
ListModelsService *services.ListModelsService
LocalAIMetricsService *services.LocalAIMetricsService LocalAIMetricsService *services.LocalAIMetricsService
// OpenAIService *services.OpenAIService // OpenAIService *services.OpenAIService
} }

View File

@@ -3,10 +3,10 @@ package backend
import ( import (
"fmt" "fmt"
"github.com/mudler/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/grpc" "github.com/go-skynet/LocalAI/pkg/grpc"
model "github.com/mudler/LocalAI/pkg/model" model "github.com/go-skynet/LocalAI/pkg/model"
) )
func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) { func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {

View File

@@ -1,10 +1,10 @@
package backend package backend
import ( import (
"github.com/mudler/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/grpc/proto" "github.com/go-skynet/LocalAI/pkg/grpc/proto"
model "github.com/mudler/LocalAI/pkg/model" model "github.com/go-skynet/LocalAI/pkg/model"
) )
func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) { func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {

View File

@@ -9,14 +9,14 @@ import (
"sync" "sync"
"unicode/utf8" "unicode/utf8"
"github.com/mudler/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema" "github.com/go-skynet/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/gallery" "github.com/go-skynet/LocalAI/pkg/gallery"
"github.com/mudler/LocalAI/pkg/grpc" "github.com/go-skynet/LocalAI/pkg/grpc"
"github.com/mudler/LocalAI/pkg/grpc/proto" "github.com/go-skynet/LocalAI/pkg/grpc/proto"
model "github.com/mudler/LocalAI/pkg/model" model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/utils" "github.com/go-skynet/LocalAI/pkg/utils"
) )
type LLMResponse struct { type LLMResponse struct {
@@ -57,7 +57,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
if _, err := os.Stat(modelFile); os.IsNotExist(err) { if _, err := os.Stat(modelFile); os.IsNotExist(err) {
utils.ResetDownloadTimers() utils.ResetDownloadTimers()
// if we failed to load the model, we try to download it // if we failed to load the model, we try to download it
err := gallery.InstallModelFromGallery(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction, o.EnforcePredownloadScans) err := gallery.InstallModelFromGallery(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@@ -5,9 +5,9 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"github.com/mudler/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/config"
pb "github.com/mudler/LocalAI/pkg/grpc/proto" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/pkg/model"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
) )
@@ -91,7 +91,7 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
Type: c.ModelType, Type: c.ModelType,
RopeFreqScale: c.RopeFreqScale, RopeFreqScale: c.RopeFreqScale,
NUMA: c.NUMA, NUMA: c.NUMA,
Embeddings: *c.Embeddings, Embeddings: c.Embeddings,
LowVRAM: *c.LowVRAM, LowVRAM: *c.LowVRAM,
NGPULayers: int32(*c.NGPULayers), NGPULayers: int32(*c.NGPULayers),
MMap: *c.MMap, MMap: *c.MMap,
@@ -142,14 +142,12 @@ func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOption
MirostatTAU: float32(*c.LLMConfig.MirostatTAU), MirostatTAU: float32(*c.LLMConfig.MirostatTAU),
Debug: *c.Debug, Debug: *c.Debug,
StopPrompts: c.StopWords, StopPrompts: c.StopWords,
Repeat: int32(c.RepeatLastN), Repeat: int32(c.RepeatPenalty),
FrequencyPenalty: float32(c.FrequencyPenalty),
PresencePenalty: float32(c.PresencePenalty),
Penalty: float32(c.RepeatPenalty),
NKeep: int32(c.Keep), NKeep: int32(c.Keep),
Batch: int32(c.Batch), Batch: int32(c.Batch),
IgnoreEOS: c.IgnoreEOS, IgnoreEOS: c.IgnoreEOS,
Seed: getSeed(c), Seed: getSeed(c),
FrequencyPenalty: float32(c.FrequencyPenalty),
MLock: *c.MMlock, MLock: *c.MMlock,
MMap: *c.MMap, MMap: *c.MMap,
MainGPU: c.MainGPU, MainGPU: c.MainGPU,

View File

@@ -4,9 +4,9 @@ import (
"context" "context"
"fmt" "fmt"
"github.com/mudler/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/grpc/proto" "github.com/go-skynet/LocalAI/pkg/grpc/proto"
model "github.com/mudler/LocalAI/pkg/model" model "github.com/go-skynet/LocalAI/pkg/model"
) )
func Rerank(backend, modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) { func Rerank(backend, modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {

View File

@@ -1,10 +1,10 @@
package backend package backend
import ( import (
"github.com/mudler/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/grpc" "github.com/go-skynet/LocalAI/pkg/grpc"
"github.com/mudler/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/pkg/model"
) )
func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string) (grpc.Backend, error) { func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string) (grpc.Backend, error) {

View File

@@ -4,14 +4,14 @@ import (
"context" "context"
"fmt" "fmt"
"github.com/mudler/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema" "github.com/go-skynet/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/grpc/proto" "github.com/go-skynet/LocalAI/pkg/grpc/proto"
model "github.com/mudler/LocalAI/pkg/model" model "github.com/go-skynet/LocalAI/pkg/model"
) )
func ModelTranscription(audio, language string, translate bool, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) { func ModelTranscription(audio, language string, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
opts := modelOpts(backendConfig, appConfig, []model.Option{ opts := modelOpts(backendConfig, appConfig, []model.Option{
model.WithBackendString(model.WhisperBackend), model.WithBackendString(model.WhisperBackend),
@@ -31,9 +31,8 @@ func ModelTranscription(audio, language string, translate bool, ml *model.ModelL
} }
return whisperModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{ return whisperModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{
Dst: audio, Dst: audio,
Language: language, Language: language,
Translate: translate, Threads: uint32(*backendConfig.Threads),
Threads: uint32(*backendConfig.Threads),
}) })
} }

View File

@@ -6,11 +6,11 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"github.com/mudler/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/grpc/proto" "github.com/go-skynet/LocalAI/pkg/grpc/proto"
model "github.com/mudler/LocalAI/pkg/model" model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/utils" "github.com/go-skynet/LocalAI/pkg/utils"
) )
func generateUniqueFileName(dir, baseName, ext string) string { func generateUniqueFileName(dir, baseName, ext string) string {

View File

@@ -1,15 +1,14 @@
package cli package cli
import ( import (
cliContext "github.com/mudler/LocalAI/core/cli/context" cliContext "github.com/go-skynet/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/cli/worker" "github.com/go-skynet/LocalAI/core/cli/worker"
) )
var CLI struct { var CLI struct {
cliContext.Context `embed:""` cliContext.Context `embed:""`
Run RunCMD `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"` Run RunCMD `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"`
Federated FederatedCLI `cmd:"" help:"Run LocalAI in federated mode"`
Models ModelsCMD `cmd:"" help:"Manage LocalAI models and definitions"` Models ModelsCMD `cmd:"" help:"Manage LocalAI models and definitions"`
TTS TTSCMD `cmd:"" help:"Convert text to speech"` TTS TTSCMD `cmd:"" help:"Convert text to speech"`
Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"` Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"`

View File

@@ -1,21 +0,0 @@
package cli
import (
"context"
cliContext "github.com/mudler/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/p2p"
)
type FederatedCLI struct {
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
LoadBalanced bool `env:"LOCALAI_LOAD_BALANCED,LOAD_BALANCED" default:"false" help:"Enable load balancing" group:"p2p"`
}
func (f *FederatedCLI) Run(ctx *cliContext.Context) error {
fs := p2p.NewFederatedServer(f.Address, p2p.FederatedID, f.Peer2PeerToken, f.LoadBalanced)
return fs.Start(context.Background())
}

View File

@@ -2,15 +2,12 @@ package cli
import ( import (
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
cliContext "github.com/mudler/LocalAI/core/cli/context" cliContext "github.com/go-skynet/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery" "github.com/go-skynet/LocalAI/pkg/gallery"
"github.com/mudler/LocalAI/pkg/downloader" "github.com/go-skynet/LocalAI/pkg/startup"
"github.com/mudler/LocalAI/pkg/startup"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
"github.com/schollz/progressbar/v3" "github.com/schollz/progressbar/v3"
) )
@@ -25,8 +22,7 @@ type ModelsList struct {
} }
type ModelsInstall struct { type ModelsInstall struct {
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"` ModelArgs []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"`
ModelArgs []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"`
ModelsCMDFlags `embed:""` ModelsCMDFlags `embed:""`
} }
@@ -37,7 +33,7 @@ type ModelsCMD struct {
} }
func (ml *ModelsList) Run(ctx *cliContext.Context) error { func (ml *ModelsList) Run(ctx *cliContext.Context) error {
var galleries []config.Gallery var galleries []gallery.Gallery
if err := json.Unmarshal([]byte(ml.Galleries), &galleries); err != nil { if err := json.Unmarshal([]byte(ml.Galleries), &galleries); err != nil {
log.Error().Err(err).Msg("unable to load galleries") log.Error().Err(err).Msg("unable to load galleries")
} }
@@ -57,11 +53,10 @@ func (ml *ModelsList) Run(ctx *cliContext.Context) error {
} }
func (mi *ModelsInstall) Run(ctx *cliContext.Context) error { func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
var galleries []config.Gallery var galleries []gallery.Gallery
if err := json.Unmarshal([]byte(mi.Galleries), &galleries); err != nil { if err := json.Unmarshal([]byte(mi.Galleries), &galleries); err != nil {
log.Error().Err(err).Msg("unable to load galleries") log.Error().Err(err).Msg("unable to load galleries")
} }
for _, modelName := range mi.ModelArgs { for _, modelName := range mi.ModelArgs {
progressBar := progressbar.NewOptions( progressBar := progressbar.NewOptions(
@@ -83,22 +78,14 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
return err return err
} }
if !downloader.LooksLikeOCI(modelName) { model := gallery.FindModel(models, modelName, mi.ModelsPath)
model := gallery.FindModel(models, modelName, mi.ModelsPath) if model == nil {
if model == nil { log.Error().Str("model", modelName).Msg("model not found")
log.Error().Str("model", modelName).Msg("model not found") return err
return err
}
err = gallery.SafetyScanGalleryModel(model)
if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
return err
}
log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
} }
err = startup.InstallModels(galleries, "", mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName) log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
err = startup.InstallModels(galleries, "", mi.ModelsPath, progressCallback, modelName)
if err != nil { if err != nil {
return err return err
} }

View File

@@ -3,16 +3,14 @@ package cli
import ( import (
"context" "context"
"fmt" "fmt"
"net"
"os"
"strings" "strings"
"time" "time"
cliContext "github.com/mudler/LocalAI/core/cli/context" cliContext "github.com/go-skynet/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http" "github.com/go-skynet/LocalAI/core/http"
"github.com/mudler/LocalAI/core/p2p" "github.com/go-skynet/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/startup" "github.com/go-skynet/LocalAI/core/startup"
"github.com/rs/zerolog" "github.com/rs/zerolog"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
) )
@@ -42,27 +40,24 @@ type RunCMD struct {
Threads int `env:"LOCALAI_THREADS,THREADS" short:"t" help:"Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested" group:"performance"` Threads int `env:"LOCALAI_THREADS,THREADS" short:"t" help:"Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested" group:"performance"`
ContextSize int `env:"LOCALAI_CONTEXT_SIZE,CONTEXT_SIZE" default:"512" help:"Default context size for models" group:"performance"` ContextSize int `env:"LOCALAI_CONTEXT_SIZE,CONTEXT_SIZE" default:"512" help:"Default context size for models" group:"performance"`
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"` Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
CORS bool `env:"LOCALAI_CORS,CORS" help:"" group:"api"` CORS bool `env:"LOCALAI_CORS,CORS" help:"" group:"api"`
CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"` CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
LibraryPath string `env:"LOCALAI_LIBRARY_PATH,LIBRARY_PATH" help:"Path to the library directory (for e.g. external libraries used by backends)" default:"/usr/share/local-ai/libs" group:"backends"` CSRF bool `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"`
CSRF bool `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"` UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"` APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"` DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"` OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"api"`
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"` Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"` Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"` ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"` SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"`
ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"` PreloadBackendOnly bool `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"`
SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"` ExternalGRPCBackends []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"`
PreloadBackendOnly bool `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"` EnableWatchdogIdle bool `env:"LOCALAI_WATCHDOG_IDLE,WATCHDOG_IDLE" default:"false" help:"Enable watchdog for stopping backends that are idle longer than the watchdog-idle-timeout" group:"backends"`
ExternalGRPCBackends []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"` WatchdogIdleTimeout string `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"`
EnableWatchdogIdle bool `env:"LOCALAI_WATCHDOG_IDLE,WATCHDOG_IDLE" default:"false" help:"Enable watchdog for stopping backends that are idle longer than the watchdog-idle-timeout" group:"backends"` EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"`
WatchdogIdleTimeout string `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"` WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"`
WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
} }
func (r *RunCMD) Run(ctx *cliContext.Context) error { func (r *RunCMD) Run(ctx *cliContext.Context) error {
@@ -85,7 +80,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
config.WithCors(r.CORS), config.WithCors(r.CORS),
config.WithCorsAllowOrigins(r.CORSAllowOrigins), config.WithCorsAllowOrigins(r.CORSAllowOrigins),
config.WithCsrf(r.CSRF), config.WithCsrf(r.CSRF),
config.WithLibPath(r.LibraryPath),
config.WithThreads(r.Threads), config.WithThreads(r.Threads),
config.WithBackendAssets(ctx.BackendAssets), config.WithBackendAssets(ctx.BackendAssets),
config.WithBackendAssetsOutput(r.BackendAssetsPath), config.WithBackendAssetsOutput(r.BackendAssetsPath),
@@ -93,13 +87,11 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
config.WithApiKeys(r.APIKeys), config.WithApiKeys(r.APIKeys),
config.WithModelsURL(append(r.Models, r.ModelArgs...)...), config.WithModelsURL(append(r.Models, r.ModelArgs...)...),
config.WithOpaqueErrors(r.OpaqueErrors), config.WithOpaqueErrors(r.OpaqueErrors),
config.WithEnforcedPredownloadScans(!r.DisablePredownloadScan),
} }
token := ""
if r.Peer2Peer || r.Peer2PeerToken != "" { if r.Peer2Peer || r.Peer2PeerToken != "" {
log.Info().Msg("P2P mode enabled") log.Info().Msg("P2P mode enabled")
token = r.Peer2PeerToken token := r.Peer2PeerToken
if token == "" { if token == "" {
// IF no token is provided, and p2p is enabled, // IF no token is provided, and p2p is enabled,
// we generate one and wait for the user to pick up the token (this is for interactive) // we generate one and wait for the user to pick up the token (this is for interactive)
@@ -110,46 +102,14 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
log.Info().Msg("To use the token, you can run the following command in another node or terminal:") log.Info().Msg("To use the token, you can run the following command in another node or terminal:")
fmt.Printf("export TOKEN=\"%s\"\nlocal-ai worker p2p-llama-cpp-rpc\n", token) fmt.Printf("export TOKEN=\"%s\"\nlocal-ai worker p2p-llama-cpp-rpc\n", token)
}
opts = append(opts, config.WithP2PToken(token))
node, err := p2p.NewNode(token) // Ask for user confirmation
if err != nil { log.Info().Msg("Press a button to proceed")
return err var input string
fmt.Scanln(&input)
} }
log.Info().Msg("Starting P2P server discovery...") log.Info().Msg("Starting P2P server discovery...")
if err := p2p.ServiceDiscoverer(context.Background(), node, token, "", func(serviceID string, node p2p.NodeData) { if err := p2p.LLamaCPPRPCServerDiscoverer(context.Background(), token); err != nil {
var tunnelAddresses []string
for _, v := range p2p.GetAvailableNodes("") {
if v.IsOnline() {
tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
} else {
log.Info().Msgf("Node %s is offline", v.ID)
}
}
tunnelEnvVar := strings.Join(tunnelAddresses, ",")
os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
}); err != nil {
return err
}
}
if r.Federated {
_, port, err := net.SplitHostPort(r.Address)
if err != nil {
return err
}
if err := p2p.ExposeService(context.Background(), "localhost", port, token, p2p.FederatedID); err != nil {
return err
}
node, err := p2p.NewNode(token)
if err != nil {
return err
}
if err := p2p.ServiceDiscoverer(context.Background(), node, token, p2p.FederatedID, nil); err != nil {
return err return err
} }
} }

View File

@@ -5,10 +5,10 @@ import (
"errors" "errors"
"fmt" "fmt"
"github.com/mudler/LocalAI/core/backend" "github.com/go-skynet/LocalAI/core/backend"
cliContext "github.com/mudler/LocalAI/core/cli/context" cliContext "github.com/go-skynet/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/pkg/model"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
) )
@@ -18,7 +18,6 @@ type TranscriptCMD struct {
Backend string `short:"b" default:"whisper" help:"Backend to run the transcription model"` Backend string `short:"b" default:"whisper" help:"Backend to run the transcription model"`
Model string `short:"m" required:"" help:"Model name to run the TTS"` Model string `short:"m" required:"" help:"Model name to run the TTS"`
Language string `short:"l" help:"Language of the audio file"` Language string `short:"l" help:"Language of the audio file"`
Translate bool `short:"c" help:"Translate the transcription to english"`
Threads int `short:"t" default:"1" help:"Number of threads used for parallel computation"` Threads int `short:"t" default:"1" help:"Number of threads used for parallel computation"`
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"` BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
@@ -51,7 +50,7 @@ func (t *TranscriptCMD) Run(ctx *cliContext.Context) error {
} }
}() }()
tr, err := backend.ModelTranscription(t.Filename, t.Language, t.Translate, ml, c, opts) tr, err := backend.ModelTranscription(t.Filename, t.Language, ml, c, opts)
if err != nil { if err != nil {
return err return err
} }

View File

@@ -7,10 +7,10 @@ import (
"path/filepath" "path/filepath"
"strings" "strings"
"github.com/mudler/LocalAI/core/backend" "github.com/go-skynet/LocalAI/core/backend"
cliContext "github.com/mudler/LocalAI/core/cli/context" cliContext "github.com/go-skynet/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/pkg/model"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
) )

View File

@@ -1,22 +1,16 @@
package cli package cli
import ( import (
"encoding/json"
"errors"
"fmt" "fmt"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
cliContext "github.com/mudler/LocalAI/core/cli/context" cliContext "github.com/go-skynet/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/pkg/downloader"
gguf "github.com/thxcode/gguf-parser-go" gguf "github.com/thxcode/gguf-parser-go"
) )
type UtilCMD struct { type UtilCMD struct {
GGUFInfo GGUFInfoCMD `cmd:"" name:"gguf-info" help:"Get information about a GGUF file"` GGUFInfo GGUFInfoCMD `cmd:"" name:"gguf-info" help:"Get information about a GGUF file"`
HFScan HFScanCMD `cmd:"" name:"hf-scan" help:"Checks installed models for known security issues. WARNING: this is a best-effort feature and may not catch everything!"`
} }
type GGUFInfoCMD struct { type GGUFInfoCMD struct {
@@ -24,12 +18,6 @@ type GGUFInfoCMD struct {
Header bool `optional:"" default:"false" name:"header" help:"Show header information"` Header bool `optional:"" default:"false" name:"header" help:"Show header information"`
} }
type HFScanCMD struct {
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
ToScan []string `arg:""`
}
func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error { func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
if u.Args == nil || len(u.Args) == 0 { if u.Args == nil || len(u.Args) == 0 {
return fmt.Errorf("no GGUF file provided") return fmt.Errorf("no GGUF file provided")
@@ -65,37 +53,3 @@ func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
return nil return nil
} }
func (hfscmd *HFScanCMD) Run(ctx *cliContext.Context) error {
log.Info().Msg("LocalAI Security Scanner - This is BEST EFFORT functionality! Currently limited to huggingface models!")
if len(hfscmd.ToScan) == 0 {
log.Info().Msg("Checking all installed models against galleries")
var galleries []config.Gallery
if err := json.Unmarshal([]byte(hfscmd.Galleries), &galleries); err != nil {
log.Error().Err(err).Msg("unable to load galleries")
}
err := gallery.SafetyScanGalleryModels(galleries, hfscmd.ModelsPath)
if err == nil {
log.Info().Msg("No security warnings were detected for your installed models. Please note that this is a BEST EFFORT tool, and all issues may not be detected.")
} else {
log.Error().Err(err).Msg("! WARNING ! A known-vulnerable model is installed!")
}
return err
} else {
var errs error = nil
for _, uri := range hfscmd.ToScan {
log.Info().Str("uri", uri).Msg("scanning specific uri")
scanResults, err := downloader.HuggingFaceScan(uri)
if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
log.Error().Err(err).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("! WARNING ! A known-vulnerable model is included in this repo!")
errs = errors.Join(errs, err)
}
}
if errs != nil {
return errs
}
log.Info().Msg("No security warnings were detected for your installed models. Please note that this is a BEST EFFORT tool, and all issues may not be detected.")
return nil
}
}

Some files were not shown because too many files have changed in this diff Show More