Compare commits

..

1 Commits

Author SHA1 Message Date
Ettore Di Giacinto
f41a519a2c tests: try to get logs
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-06-25 09:24:55 +02:00
181 changed files with 1607 additions and 7610 deletions

View File

@@ -1,80 +0,0 @@
import hashlib
from huggingface_hub import hf_hub_download, get_paths_info
import requests
import sys
import os
uri = sys.argv[1]
file_name = uri.split('/')[-1]
# Function to parse the URI and determine download method
def parse_uri(uri):
if uri.startswith('huggingface://'):
repo_id = uri.split('://')[1]
return 'huggingface', repo_id.rsplit('/', 1)[0]
elif 'huggingface.co' in uri:
parts = uri.split('/resolve/')
if len(parts) > 1:
repo_path = parts[0].split('https://huggingface.co/')[-1]
return 'huggingface', repo_path
return 'direct', uri
def calculate_sha256(file_path):
sha256_hash = hashlib.sha256()
with open(file_path, 'rb') as f:
for byte_block in iter(lambda: f.read(4096), b''):
sha256_hash.update(byte_block)
return sha256_hash.hexdigest()
def manual_safety_check_hf(repo_id):
scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan")
scan = scanResponse.json()
if scan['hasUnsafeFile']:
return scan
return None
download_type, repo_id_or_url = parse_uri(uri)
new_checksum = None
file_path = None
# Decide download method based on URI type
if download_type == 'huggingface':
# Check if the repo is flagged as dangerous by HF
hazard = manual_safety_check_hf(repo_id_or_url)
if hazard != None:
print(f'Error: HuggingFace has detected security problems for {repo_id_or_url}: {str(hazard)}', filename=file_name)
sys.exit(5)
# Use HF API to pull sha
for file in get_paths_info(repo_id_or_url, [file_name], repo_type='model'):
try:
new_checksum = file.lfs.sha256
break
except Exception as e:
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
sys.exit(2)
if new_checksum is None:
try:
file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name)
except Exception as e:
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
sys.exit(2)
else:
response = requests.get(repo_id_or_url)
if response.status_code == 200:
with open(file_name, 'wb') as f:
f.write(response.content)
file_path = file_name
elif response.status_code == 404:
print(f'File not found: {response.status_code}', file=sys.stderr)
sys.exit(2)
else:
print(f'Error downloading file: {response.status_code}', file=sys.stderr)
sys.exit(1)
if new_checksum is None:
new_checksum = calculate_sha256(file_path)
print(new_checksum)
os.remove(file_path)
else:
print(new_checksum)

View File

@@ -14,14 +14,77 @@ function check_and_update_checksum() {
idx="$5"
# Download the file and calculate new checksum using Python
new_checksum=$(python3 ./.github/check_and_update.py $uri)
result=$?
new_checksum=$(python3 -c "
import hashlib
from huggingface_hub import hf_hub_download, get_paths_info
import requests
import sys
import os
if [[ $result -eq 5 ]]; then
echo "Contaminated entry detected, deleting entry for $model_name..."
yq eval -i "del([$idx])" "$input_yaml"
return
fi
uri = '$uri'
file_name = uri.split('/')[-1]
# Function to parse the URI and determine download method
# Function to parse the URI and determine download method
def parse_uri(uri):
if uri.startswith('huggingface://'):
repo_id = uri.split('://')[1]
return 'huggingface', repo_id.rsplit('/', 1)[0]
elif 'huggingface.co' in uri:
parts = uri.split('/resolve/')
if len(parts) > 1:
repo_path = parts[0].split('https://huggingface.co/')[-1]
return 'huggingface', repo_path
return 'direct', uri
def calculate_sha256(file_path):
sha256_hash = hashlib.sha256()
with open(file_path, 'rb') as f:
for byte_block in iter(lambda: f.read(4096), b''):
sha256_hash.update(byte_block)
return sha256_hash.hexdigest()
download_type, repo_id_or_url = parse_uri(uri)
new_checksum = None
# Decide download method based on URI type
if download_type == 'huggingface':
# Use HF API to pull sha
for file in get_paths_info(repo_id_or_url, [file_name], repo_type='model'):
try:
new_checksum = file.lfs.sha256
break
except Exception as e:
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
sys.exit(2)
if new_checksum is None:
try:
file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name)
except Exception as e:
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
sys.exit(2)
else:
response = requests.get(repo_id_or_url)
if response.status_code == 200:
with open(file_name, 'wb') as f:
f.write(response.content)
file_path = file_name
elif response.status_code == 404:
print(f'File not found: {response.status_code}', file=sys.stderr)
sys.exit(2)
else:
print(f'Error downloading file: {response.status_code}', file=sys.stderr)
sys.exit(1)
if new_checksum is None:
new_checksum = calculate_sha256(file_path)
print(new_checksum)
os.remove(file_path)
else:
print(new_checksum)
")
if [[ "$new_checksum" == "" ]]; then
echo "Error calculating checksum for $file_name. Skipping..."
@@ -31,7 +94,7 @@ function check_and_update_checksum() {
echo "Checksum for $file_name: $new_checksum"
# Compare and update the YAML file if checksums do not match
result=$?
if [[ $result -eq 2 ]]; then
echo "File not found, deleting entry for $file_name..."
# yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\"))" "$input_yaml"

112
.github/dependabot.yml vendored
View File

@@ -1,10 +1,6 @@
# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
version: 2
updates:
- package-ecosystem: "gitsubmodule"
directory: "/"
schedule:
interval: "weekly"
- package-ecosystem: "gomod"
directory: "/"
schedule:
@@ -27,111 +23,3 @@ updates:
schedule:
# Check for updates to GitHub Actions every weekday
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/autogptq"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/bark"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/common/template"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/coqui"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/diffusers"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/exllama"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/exllama2"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/mamba"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/openvoice"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/parler-tts"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/petals"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/rerankers"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/sentencetransformers"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/transformers"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/transformers-musicgen"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/vall-e-x"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/vllm"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/examples/chainlit"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/examples/functions"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/examples/langchain/langchainpy-localai-example"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/examples/langchain-chroma"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/examples/streamlit-bot"
schedule:
interval: "weekly"
- package-ecosystem: "docker"
directory: "/examples/k8sgpt"
schedule:
interval: "weekly"
- package-ecosystem: "docker"
directory: "/examples/kubernetes"
schedule:
interval: "weekly"
- package-ecosystem: "docker"
directory: "/examples/langchain"
schedule:
interval: "weekly"
- package-ecosystem: "gomod"
directory: "/examples/semantic-todo"
schedule:
interval: "weekly"
- package-ecosystem: "docker"
directory: "/examples/telegram-bot"
schedule:
interval: "weekly"

3
.github/release.yml vendored
View File

@@ -13,9 +13,6 @@ changelog:
labels:
- bug
- regression
- title: "🖧 P2P area"
labels:
- area/p2p
- title: Exciting New Features 🎉
labels:
- Semver-Minor

View File

@@ -9,6 +9,9 @@ jobs:
fail-fast: false
matrix:
include:
- repository: "go-skynet/go-llama.cpp"
variable: "GOLLAMA_VERSION"
branch: "master"
- repository: "ggerganov/llama.cpp"
variable: "CPPLLAMA_VERSION"
branch: "master"
@@ -27,6 +30,9 @@ jobs:
- repository: "go-skynet/bloomz.cpp"
variable: "BLOOMZ_VERSION"
branch: "main"
- repository: "nomic-ai/gpt4all"
variable: "GPT4ALL_VERSION"
branch: "main"
- repository: "mudler/go-ggllm.cpp"
variable: "GOGGLLM_VERSION"
branch: "master"
@@ -48,7 +54,7 @@ jobs:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI
commit-message: ':arrow_up: Update ${{ matrix.repository }}'
title: 'chore: :arrow_up: Update ${{ matrix.repository }}'
title: ':arrow_up: Update ${{ matrix.repository }}'
branch: "update/${{ matrix.variable }}"
body: Bump of ${{ matrix.repository }} version
signoff: true

View File

@@ -22,7 +22,7 @@ jobs:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI
commit-message: ':arrow_up: Update docs version ${{ matrix.repository }}'
title: 'docs: :arrow_up: update docs version ${{ matrix.repository }}'
title: ':arrow_up: Update docs version ${{ matrix.repository }}'
branch: "update/docs"
body: Bump of ${{ matrix.repository }} version inside docs
signoff: true

View File

@@ -20,12 +20,12 @@ jobs:
run: |
sudo apt-get update
sudo apt-get install -y pip wget
sudo pip install --upgrade pip
sudo pip install --upgrade pip
pip install huggingface_hub
- name: 'Setup yq'
uses: dcarbone/install-yq-action@v1.1.1
with:
version: 'v4.44.2'
version: 'v4.43.1'
download-compressed: true
force: true

View File

@@ -14,7 +14,7 @@ jobs:
steps:
- name: Dependabot metadata
id: metadata
uses: dependabot/fetch-metadata@v2.2.0
uses: dependabot/fetch-metadata@v2.1.0
with:
github-token: "${{ secrets.GITHUB_TOKEN }}"
skip-commit-verification: true

View File

@@ -1,83 +0,0 @@
name: Comment PRs
on:
pull_request_target:
jobs:
comment-pr:
env:
MODEL_NAME: hermes-2-theta-llama-3-8b
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
ref: "${{ github.event.pull_request.merge_commit_sha }}"
fetch-depth: 0 # needed to checkout all branches for this Action to work
- uses: mudler/localai-github-action@v1
with:
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
# Check the PR diff using the current branch and the base branch of the PR
- uses: GrantBirki/git-diff-action@v2.7.0
id: git-diff-action
with:
json_diff_file_output: diff.json
raw_diff_file_output: diff.txt
file_output_only: "true"
base_branch: ${{ github.event.pull_request.base.sha }}
- name: Show diff
env:
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
run: |
cat $DIFF
- name: Summarize
env:
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
id: summarize
run: |
input="$(cat $DIFF)"
# Define the LocalAI API endpoint
API_URL="http://localhost:8080/chat/completions"
# Create a JSON payload using jq to handle special characters
json_payload=$(jq -n --arg input "$input" '{
model: "'$MODEL_NAME'",
messages: [
{
role: "system",
content: "You are LocalAI-bot in Github that helps understanding PRs and assess complexity. Explain what has changed in this PR diff and why"
},
{
role: "user",
content: $input
}
]
}')
# Send the request to LocalAI
response=$(curl -s -X POST $API_URL \
-H "Content-Type: application/json" \
-d "$json_payload")
# Extract the summary from the response
summary="$(echo $response | jq -r '.choices[0].message.content')"
# Print the summary
# -H "Authorization: Bearer $API_KEY" \
echo "Summary:"
echo "$summary"
echo "payload sent"
echo "$json_payload"
{
echo 'message<<EOF'
echo "$summary"
echo EOF
} >> "$GITHUB_OUTPUT"
docker logs --tail 10 local-ai
- uses: mshick/add-pr-comment@v2
if: always()
with:
repo-token: ${{ secrets.UPDATE_BOT_TOKEN }}
message: ${{ steps.summarize.outputs.message }}
message-failure: |
Uh oh! Could not analyze this PR, maybe it's too big?

View File

@@ -75,7 +75,7 @@ jobs:
uses: actions/checkout@v4
- name: Cache GRPC
uses: docker/build-push-action@v6
uses: docker/build-push-action@v5
with:
builder: ${{ steps.buildx.outputs.name }}
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
@@ -84,11 +84,11 @@ jobs:
build-args: |
GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
GRPC_VERSION=v1.65.0
GRPC_VERSION=v1.64.0
context: .
file: ./Dockerfile
cache-to: type=gha,ignore-error=true
cache-from: type=gha
target: grpc
platforms: ${{ matrix.platforms }}
push: false
push: false

View File

@@ -15,7 +15,7 @@ jobs:
strategy:
matrix:
include:
- base-image: intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04
- base-image: intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04
runs-on: 'ubuntu-latest'
platforms: 'linux/amd64'
runs-on: ${{matrix.runs-on}}
@@ -46,7 +46,7 @@ jobs:
uses: actions/checkout@v4
- name: Cache Intel images
uses: docker/build-push-action@v6
uses: docker/build-push-action@v5
with:
builder: ${{ steps.buildx.outputs.name }}
build-args: |

View File

@@ -35,19 +35,18 @@ jobs:
max-parallel: ${{ github.event_name != 'pull_request' && 4 || 8 }}
matrix:
include:
# This is basically covered by the AIO test
# - build-type: ''
# platforms: 'linux/amd64'
# tag-latest: 'false'
# tag-suffix: '-ffmpeg'
# ffmpeg: 'true'
# image-type: 'extras'
# runs-on: 'arc-runner-set'
# base-image: "ubuntu:22.04"
# makeflags: "--jobs=3 --output-sync=target"
- build-type: ''
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "4"
cuda-minor-version: "5"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -56,85 +55,85 @@ jobs:
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target"
# - build-type: 'hipblas'
# platforms: 'linux/amd64'
# tag-latest: 'false'
# tag-suffix: '-hipblas'
# ffmpeg: 'false'
# image-type: 'extras'
# base-image: "rocm/dev-ubuntu-22.04:6.1"
# grpc-base-image: "ubuntu:22.04"
# runs-on: 'arc-runner-set'
# makeflags: "--jobs=3 --output-sync=target"
# - build-type: 'sycl_f16'
# platforms: 'linux/amd64'
# tag-latest: 'false'
# base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
# grpc-base-image: "ubuntu:22.04"
# tag-suffix: 'sycl-f16-ffmpeg'
# ffmpeg: 'true'
# image-type: 'extras'
# runs-on: 'arc-runner-set'
# makeflags: "--jobs=3 --output-sync=target"
# core-image-build:
# uses: ./.github/workflows/image_build.yml
# with:
# tag-latest: ${{ matrix.tag-latest }}
# tag-suffix: ${{ matrix.tag-suffix }}
# ffmpeg: ${{ matrix.ffmpeg }}
# image-type: ${{ matrix.image-type }}
# build-type: ${{ matrix.build-type }}
# cuda-major-version: ${{ matrix.cuda-major-version }}
# cuda-minor-version: ${{ matrix.cuda-minor-version }}
# platforms: ${{ matrix.platforms }}
# runs-on: ${{ matrix.runs-on }}
# base-image: ${{ matrix.base-image }}
# grpc-base-image: ${{ matrix.grpc-base-image }}
# makeflags: ${{ matrix.makeflags }}
# secrets:
# dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
# dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
# quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
# quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
# strategy:
# matrix:
# include:
# - build-type: ''
# platforms: 'linux/amd64'
# tag-latest: 'false'
# tag-suffix: '-ffmpeg-core'
# ffmpeg: 'true'
# image-type: 'core'
# runs-on: 'ubuntu-latest'
# base-image: "ubuntu:22.04"
# makeflags: "--jobs=4 --output-sync=target"
# - build-type: 'sycl_f16'
# platforms: 'linux/amd64'
# tag-latest: 'false'
# base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
# grpc-base-image: "ubuntu:22.04"
# tag-suffix: 'sycl-f16-ffmpeg-core'
# ffmpeg: 'true'
# image-type: 'core'
# runs-on: 'arc-runner-set'
# makeflags: "--jobs=3 --output-sync=target"
# - build-type: 'cublas'
# cuda-major-version: "12"
# cuda-minor-version: "4"
# platforms: 'linux/amd64'
# tag-latest: 'false'
# tag-suffix: '-cublas-cuda12-ffmpeg-core'
# ffmpeg: 'true'
# image-type: 'core'
# runs-on: 'ubuntu-latest'
# base-image: "ubuntu:22.04"
# makeflags: "--jobs=4 --output-sync=target"
# - build-type: 'vulkan'
# platforms: 'linux/amd64'
# tag-latest: 'false'
# tag-suffix: '-vulkan-ffmpeg-core'
# ffmpeg: 'true'
# image-type: 'core'
# runs-on: 'ubuntu-latest'
# base-image: "ubuntu:22.04"
# makeflags: "--jobs=4 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas'
ffmpeg: 'false'
image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
tag-suffix: 'sycl-f16-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
core-image-build:
uses: ./.github/workflows/image_build.yml
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
ffmpeg: ${{ matrix.ffmpeg }}
image-type: ${{ matrix.image-type }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
grpc-base-image: ${{ matrix.grpc-base-image }}
makeflags: ${{ matrix.makeflags }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
matrix:
include:
- build-type: ''
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
tag-suffix: 'sycl-f16-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "5"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'vulkan'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-vulkan-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target"

View File

@@ -39,7 +39,7 @@ jobs:
strategy:
# Pushing with all jobs in parallel
# eats the bandwidth of all the nodes
max-parallel: ${{ github.event_name != 'pull_request' && 6 || 10 }}
max-parallel: ${{ github.event_name != 'pull_request' && 6 || 12 }}
matrix:
include:
# Extra images
@@ -64,7 +64,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11'
@@ -75,7 +75,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "4"
cuda-minor-version: "5"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12'
@@ -86,7 +86,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cublas-cuda11-ffmpeg'
@@ -100,7 +100,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "4"
cuda-minor-version: "5"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -257,7 +257,6 @@ jobs:
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
matrix:
include:
- build-type: ''
@@ -274,7 +273,7 @@ jobs:
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11-core'
@@ -285,7 +284,7 @@ jobs:
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "4"
cuda-minor-version: "5"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-core'
@@ -296,7 +295,7 @@ jobs:
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11-ffmpeg-core'
@@ -307,7 +306,7 @@ jobs:
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "4"
cuda-minor-version: "5"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg-core'
@@ -317,12 +316,11 @@ jobs:
base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'vulkan'
platforms: 'linux/amd64'
platforms: 'linux/amd64,linux/arm64'
tag-latest: 'false'
tag-suffix: '-vulkan-ffmpeg-core'
latest-image: 'latest-vulkan-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target"
makeflags: "--jobs=4 --output-sync=target"

View File

@@ -23,7 +23,7 @@ on:
type: string
cuda-minor-version:
description: 'CUDA minor version'
default: "4"
default: "5"
type: string
platforms:
description: 'Platforms'
@@ -215,7 +215,7 @@ jobs:
password: ${{ secrets.quayPassword }}
- name: Build and push
uses: docker/build-push-action@v6
uses: docker/build-push-action@v5
if: github.event_name != 'pull_request'
with:
builder: ${{ steps.buildx.outputs.name }}
@@ -232,7 +232,7 @@ jobs:
BASE_IMAGE=${{ inputs.base-image }}
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
GRPC_VERSION=v1.65.0
GRPC_VERSION=v1.64.0
MAKEFLAGS=${{ inputs.makeflags }}
context: .
file: ./Dockerfile
@@ -243,7 +243,7 @@ jobs:
labels: ${{ steps.meta.outputs.labels }}
### Start testing image
- name: Build and push
uses: docker/build-push-action@v6
uses: docker/build-push-action@v5
if: github.event_name == 'pull_request'
with:
builder: ${{ steps.buildx.outputs.name }}
@@ -260,7 +260,7 @@ jobs:
BASE_IMAGE=${{ inputs.base-image }}
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
GRPC_VERSION=v1.65.0
GRPC_VERSION=v1.64.0
MAKEFLAGS=${{ inputs.makeflags }}
context: .
file: ./Dockerfile
@@ -276,7 +276,7 @@ jobs:
## End testing image
- name: Build and push AIO image
if: inputs.aio != ''
uses: docker/build-push-action@v6
uses: docker/build-push-action@v5
with:
builder: ${{ steps.buildx.outputs.name }}
build-args: |
@@ -291,7 +291,7 @@ jobs:
- name: Build and push AIO image (dockerhub)
if: inputs.aio != ''
uses: docker/build-push-action@v6
uses: docker/build-push-action@v5
with:
builder: ${{ steps.buildx.outputs.name }}
build-args: |

View File

@@ -1,168 +0,0 @@
name: Notifications for new models
on:
pull_request:
types:
- closed
jobs:
notify-discord:
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
env:
MODEL_NAME: hermes-2-theta-llama-3-8b
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # needed to checkout all branches for this Action to work
- uses: mudler/localai-github-action@v1
with:
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
# Check the PR diff using the current branch and the base branch of the PR
- uses: GrantBirki/git-diff-action@v2.7.0
id: git-diff-action
with:
json_diff_file_output: diff.json
raw_diff_file_output: diff.txt
file_output_only: "true"
- name: Summarize
env:
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
id: summarize
run: |
input="$(cat $DIFF)"
# Define the LocalAI API endpoint
API_URL="http://localhost:8080/chat/completions"
# Create a JSON payload using jq to handle special characters
json_payload=$(jq -n --arg input "$input" '{
model: "'$MODEL_NAME'",
messages: [
{
role: "system",
content: "You are LocalAI-bot. Write a discord message to notify everyone about the new model from the git diff. Make it informal. An example can include: the URL of the model, the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI and that can be browsed over https://models.localai.io. For example: local-ai run model_name_here"
},
{
role: "user",
content: $input
}
]
}')
# Send the request to LocalAI
response=$(curl -s -X POST $API_URL \
-H "Content-Type: application/json" \
-d "$json_payload")
# Extract the summary from the response
summary="$(echo $response | jq -r '.choices[0].message.content')"
# Print the summary
# -H "Authorization: Bearer $API_KEY" \
echo "Summary:"
echo "$summary"
echo "payload sent"
echo "$json_payload"
{
echo 'message<<EOF'
echo "$summary"
echo EOF
} >> "$GITHUB_OUTPUT"
docker logs --tail 10 local-ai
- name: Discord notification
env:
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL }}
DISCORD_USERNAME: "LocalAI-Bot"
DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
uses: Ilshidur/action-discord@master
with:
args: ${{ steps.summarize.outputs.message }}
- name: Setup tmate session if fails
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
notify-twitter:
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
env:
MODEL_NAME: hermes-2-theta-llama-3-8b
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # needed to checkout all branches for this Action to work
- name: Start LocalAI
run: |
echo "Starting LocalAI..."
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
# Check the PR diff using the current branch and the base branch of the PR
- uses: GrantBirki/git-diff-action@v2.7.0
id: git-diff-action
with:
json_diff_file_output: diff.json
raw_diff_file_output: diff.txt
file_output_only: "true"
- name: Summarize
env:
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
id: summarize
run: |
input="$(cat $DIFF)"
# Define the LocalAI API endpoint
API_URL="http://localhost:8080/chat/completions"
# Create a JSON payload using jq to handle special characters
json_payload=$(jq -n --arg input "$input" '{
model: "'$MODEL_NAME'",
messages: [
{
role: "system",
content: "You are LocalAI-bot. Write a twitter message to notify everyone about the new model from the git diff. Make it informal and really short. An example can include: the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI. For example: local-ai run model_name_here"
},
{
role: "user",
content: $input
}
]
}')
# Send the request to LocalAI
response=$(curl -s -X POST $API_URL \
-H "Content-Type: application/json" \
-d "$json_payload")
# Extract the summary from the response
summary="$(echo $response | jq -r '.choices[0].message.content')"
# Print the summary
# -H "Authorization: Bearer $API_KEY" \
echo "Summary:"
echo "$summary"
echo "payload sent"
echo "$json_payload"
{
echo 'message<<EOF'
echo "$summary"
echo EOF
} >> "$GITHUB_OUTPUT"
docker logs --tail 10 local-ai
- uses: Eomm/why-don-t-you-tweet@v2
with:
tweet-message: ${{ steps.summarize.outputs.message }}
env:
# Get your tokens from https://developer.twitter.com/apps
TWITTER_CONSUMER_API_KEY: ${{ secrets.TWITTER_APP_KEY }}
TWITTER_CONSUMER_API_SECRET: ${{ secrets.TWITTER_APP_SECRET }}
TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }}
TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
- name: Setup tmate session if fails
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true

View File

@@ -1,63 +0,0 @@
name: Release notifications
on:
release:
types:
- published
jobs:
notify-discord:
runs-on: ubuntu-latest
env:
RELEASE_BODY: ${{ github.event.release.body }}
RELEASE_TITLE: ${{ github.event.release.name }}
RELEASE_TAG_NAME: ${{ github.event.release.tag_name }}
steps:
- uses: mudler/localai-github-action@v1
with:
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
- name: Summarize
id: summarize
run: |
input="$RELEASE_TITLE\b$RELEASE_BODY"
# Define the LocalAI API endpoint
API_URL="http://localhost:8080/chat/completions"
# Create a JSON payload using jq to handle special characters
json_payload=$(jq -n --arg input "$input" '{
model: "'$MODEL_NAME'",
messages: [
{
role: "system",
content: "Write a discord message with a bullet point summary of the release notes."
},
{
role: "user",
content: $input
}
]
}')
# Send the request to LocalAI API
response=$(curl -s -X POST $API_URL \
-H "Content-Type: application/json" \
-d "$json_payload")
# Extract the summary from the response
summary=$(echo $response | jq -r '.choices[0].message.content')
# Print the summary
# -H "Authorization: Bearer $API_KEY" \
{
echo 'message<<EOF'
echo "$summary"
echo EOF
} >> "$GITHUB_OUTPUT"
- name: Discord notification
env:
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL_RELEASE }}
DISCORD_USERNAME: "LocalAI-Bot"
DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
uses: Ilshidur/action-discord@master
with:
args: ${{ steps.summarize.outputs.message }}

View File

@@ -1,28 +0,0 @@
name: Check PR style
on:
pull_request_target:
types:
- opened
- reopened
- edited
- synchronize
jobs:
title-lint:
runs-on: ubuntu-latest
permissions:
statuses: write
steps:
- uses: aslafy-z/conventional-pr-title-action@v3
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# check-pr-description:
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v2
# - uses: jadrol/pr-description-checker-action@v1.0.0
# id: description-checker
# with:
# repo-token: ${{ secrets.GITHUB_TOKEN }}
# exempt-labels: no qa

View File

@@ -1,15 +1,11 @@
name: Build and Release
on:
push:
branches:
- master
tags:
- 'v*'
pull_request:
- push
- pull_request
env:
GRPC_VERSION: v1.65.0
GRPC_VERSION: v1.64.0
permissions:
contents: write
@@ -35,8 +31,8 @@ jobs:
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache gawk
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
- name: Install CUDA Dependencies
run: |
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb
@@ -44,7 +40,7 @@ jobs:
sudo apt-get update
sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION}
env:
CUDA_VERSION: 12-4
CUDA_VERSION: 12-5
- name: Cache grpc
id: cache-grpc
uses: actions/cache@v4
@@ -56,8 +52,7 @@ jobs:
run: |
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
cd cmake/build && cmake -DgRPC_INSTALL=ON \
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \
../.. && sudo make --jobs 5 --output-sync=target
- name: Install gRPC
@@ -101,13 +96,14 @@ jobs:
CROSS_TOOLCHAIN=/usr/$GNU_HOST
CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
export PATH=$PATH:$GOPATH/bin
export PATH=/usr/local/cuda/bin:$PATH
sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6
sudo cp -rf /usr/aarch64-linux-gnu/lib/libstdc++.so* /usr/aarch64-linux-gnu/lib/libstdc++.so.6
sudo cp /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 ld.so
GO_TAGS=p2p \
BACKEND_LIBS="./grpc/cmake/cross_build/third_party/re2/libre2.a ./grpc/cmake/cross_build/libgrpc.a ./grpc/cmake/cross_build/libgrpc++.a ./grpc/cmake/cross_build/third_party/protobuf/libprotobuf.a /usr/aarch64-linux-gnu/lib/libc.so.6 /usr/aarch64-linux-gnu/lib/libstdc++.so.6 /usr/aarch64-linux-gnu/lib/libgomp.so.1 /usr/aarch64-linux-gnu/lib/libm.so.6 /usr/aarch64-linux-gnu/lib/libgcc_s.so.1 /usr/aarch64-linux-gnu/lib/libdl.so.2 /usr/aarch64-linux-gnu/lib/libpthread.so.0 ./ld.so" \
GOOS=linux \
GOARCH=arm64 \
@@ -151,7 +147,7 @@ jobs:
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache gawk cmake libgmock-dev
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache cmake
- name: Intel Dependencies
run: |
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
@@ -165,7 +161,7 @@ jobs:
sudo apt-get update
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
env:
CUDA_VERSION: 12-5
CUDA_VERSION: 12-3
- name: "Install Hipblas"
env:
ROCM_VERSION: "6.1"
@@ -201,8 +197,7 @@ jobs:
if: steps.cache-grpc.outputs.cache-hit != 'true'
run: |
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
cd cmake/build && cmake -DgRPC_INSTALL=ON \
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \
../.. && sudo make --jobs 5 --output-sync=target
- name: Install gRPC
@@ -212,14 +207,15 @@ jobs:
- name: Build
id: build
run: |
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
export PATH=$PATH:$GOPATH/bin
export PATH=/usr/local/cuda/bin:$PATH
export PATH=/opt/rocm/bin:$PATH
source /opt/intel/oneapi/setvars.sh
sudo cp /lib64/ld-linux-x86-64.so.2 ld.so
BACKEND_LIBS="./ld.so ./sources/go-piper/piper/build/fi/lib/libfmt.a ./sources/go-piper/piper-phonemize/pi/lib/libonnxruntime.so.1.14.1 ./sources/go-piper/piper-phonemize/pi/src/libespeak-ng/libespeak-ng.so /usr/lib/x86_64-linux-gnu/libdl.so.2 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/lib/x86_64-linux-gnu/libpthread.so.0 ./sources/go-piper/piper-phonemize/pi/lib/libpiper_phonemize.so.1 ./sources/go-piper/piper/build/si/lib/libspdlog.a ./sources/go-piper/espeak/ei/lib/libucd.so" \
GO_TAGS=p2p \
BACKEND_LIBS="./ld.so /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/libgomp.so.1" \
make -j4 dist
- uses: actions/upload-artifact@v4
with:
@@ -253,8 +249,8 @@ jobs:
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
- name: Build stablediffusion
run: |
export PATH=$PATH:$GOPATH/bin
@@ -273,8 +269,8 @@ jobs:
files: |
release/*
build-macOS-x86_64:
runs-on: macos-13
build-macOS-arm64:
runs-on: macos-14
steps:
- name: Clone
uses: actions/checkout@v4
@@ -296,49 +292,7 @@ jobs:
export CPLUS_INCLUDE_PATH=/usr/local/include
export PATH=$PATH:$GOPATH/bin
make dist
- uses: actions/upload-artifact@v4
with:
name: LocalAI-MacOS-x86_64
path: release/
- name: Release
uses: softprops/action-gh-release@v2
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
release/*
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
build-macOS-arm64:
runs-on: macos-14
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-go@v5
with:
go-version: '1.21.x'
cache: false
- name: Dependencies
run: |
brew install protobuf grpc
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
- name: Build
id: build
run: |
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
export PATH=$PATH:$GOPATH/bin
make dist
BACKEND_LIBS="$(ls /opt/homebrew/opt/grpc/lib/*.dylib /opt/homebrew/opt/re2/lib/*.dylib /opt/homebrew/opt/openssl@3/lib/*.dylib /opt/homebrew/opt/protobuf/lib/*.dylib /opt/homebrew/opt/abseil/lib/*.dylib | xargs)" GO_TAGS=p2p make dist
- uses: actions/upload-artifact@v4
with:
name: LocalAI-MacOS-arm64

View File

@@ -19,7 +19,7 @@ jobs:
steps:
- name: Clone
uses: actions/checkout@v4
with:
with:
submodules: true
- name: Dependencies
run: |
@@ -29,8 +29,8 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1
pip install --user grpcio-tools==1.64.0
- name: Test transformers
run: |
make --jobs=5 --output-sync=target -C backend/python/transformers
@@ -41,7 +41,7 @@ jobs:
steps:
- name: Clone
uses: actions/checkout@v4
with:
with:
submodules: true
- name: Dependencies
run: |
@@ -51,8 +51,8 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1
pip install --user grpcio-tools==1.64.0
- name: Test sentencetransformers
run: |
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
@@ -64,7 +64,7 @@ jobs:
steps:
- name: Clone
uses: actions/checkout@v4
with:
with:
submodules: true
- name: Dependencies
run: |
@@ -74,7 +74,7 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1
pip install --user grpcio-tools==1.64.0
- name: Test rerankers
run: |
@@ -86,7 +86,7 @@ jobs:
steps:
- name: Clone
uses: actions/checkout@v4
with:
with:
submodules: true
- name: Dependencies
run: |
@@ -96,7 +96,7 @@ jobs:
sudo apt-get install -y libopencv-dev
# Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh
pip install --user --no-cache-dir grpcio-tools==1.64.1
pip install --user grpcio-tools==1.64.0
- name: Test diffusers
run: |
make --jobs=5 --output-sync=target -C backend/python/diffusers
@@ -107,7 +107,7 @@ jobs:
steps:
- name: Clone
uses: actions/checkout@v4
with:
with:
submodules: true
- name: Dependencies
run: |
@@ -117,19 +117,19 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1
pip install --user grpcio-tools==1.64.0
- name: Test parler-tts
run: |
make --jobs=5 --output-sync=target -C backend/python/parler-tts
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
tests-openvoice:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
with:
submodules: true
- name: Dependencies
run: |
@@ -139,7 +139,7 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1
pip install --user grpcio-tools==1.64.0
- name: Test openvoice
run: |
@@ -151,7 +151,7 @@ jobs:
steps:
- name: Clone
uses: actions/checkout@v4
with:
with:
submodules: true
- name: Dependencies
run: |
@@ -161,7 +161,7 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1
pip install --user grpcio-tools==1.64.0
- name: Test transformers-musicgen
run: |
@@ -175,7 +175,7 @@ jobs:
# steps:
# - name: Clone
# uses: actions/checkout@v4
# with:
# with:
# submodules: true
# - name: Dependencies
# run: |
@@ -185,14 +185,14 @@ jobs:
# curl -LsSf https://astral.sh/uv/install.sh | sh
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
# sudo apt-get install -y libopencv-dev
# pip install --user --no-cache-dir grpcio-tools==1.64.1
# pip install --user grpcio-tools==1.64.0
# - name: Test petals
# run: |
# make --jobs=5 --output-sync=target -C backend/python/petals
# make --jobs=5 --output-sync=target -C backend/python/petals test
# tests-bark:
# runs-on: ubuntu-latest
@@ -239,7 +239,7 @@ jobs:
# df -h
# - name: Clone
# uses: actions/checkout@v4
# with:
# with:
# submodules: true
# - name: Dependencies
# run: |
@@ -249,14 +249,14 @@ jobs:
# curl -LsSf https://astral.sh/uv/install.sh | sh
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
# sudo apt-get install -y libopencv-dev
# pip install --user --no-cache-dir grpcio-tools==1.64.1
# pip install --user grpcio-tools==1.64.0
# - name: Test bark
# run: |
# make --jobs=5 --output-sync=target -C backend/python/bark
# make --jobs=5 --output-sync=target -C backend/python/bark test
# Below tests needs GPU. Commented out for now
# TODO: Re-enable as soon as we have GPU nodes
# tests-vllm:
@@ -264,7 +264,7 @@ jobs:
# steps:
# - name: Clone
# uses: actions/checkout@v4
# with:
# with:
# submodules: true
# - name: Dependencies
# run: |
@@ -274,7 +274,7 @@ jobs:
# curl -LsSf https://astral.sh/uv/install.sh | sh
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
# sudo apt-get install -y libopencv-dev
# pip install --user --no-cache-dir grpcio-tools==1.64.1
# pip install --user grpcio-tools==1.64.0
# - name: Test vllm
# run: |
# make --jobs=5 --output-sync=target -C backend/python/vllm
@@ -284,7 +284,7 @@ jobs:
steps:
- name: Clone
uses: actions/checkout@v4
with:
with:
submodules: true
- name: Dependencies
run: |
@@ -294,7 +294,7 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1
pip install --user grpcio-tools==1.64.0
- name: Test vall-e-x
run: |
make --jobs=5 --output-sync=target -C backend/python/vall-e-x
@@ -305,7 +305,7 @@ jobs:
steps:
- name: Clone
uses: actions/checkout@v4
with:
with:
submodules: true
- name: Dependencies
run: |
@@ -314,8 +314,8 @@ jobs:
sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip
# Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh
pip install --user --no-cache-dir grpcio-tools==1.64.1
pip install --user grpcio-tools==1.64.0
- name: Test coqui
run: |
make --jobs=5 --output-sync=target -C backend/python/coqui
make --jobs=5 --output-sync=target -C backend/python/coqui test
make --jobs=5 --output-sync=target -C backend/python/coqui test

View File

@@ -10,7 +10,7 @@ on:
- '*'
env:
GRPC_VERSION: v1.65.0
GRPC_VERSION: v1.64.0
concurrency:
group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
@@ -71,7 +71,6 @@ jobs:
run: |
sudo apt-get update
sudo apt-get install build-essential curl ffmpeg
sudo apt-get install -y libgmock-dev
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
@@ -94,8 +93,8 @@ jobs:
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
export CUDACXX=/usr/local/cuda/bin/nvcc
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
# The python3-grpc-tools package in 22.04 is too old
pip install --user grpcio-tools
@@ -110,7 +109,7 @@ jobs:
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
env:
CUDA_VERSION: 12-4
CUDA_VERSION: 12-3
- name: Cache grpc
id: cache-grpc
uses: actions/cache@v4
@@ -121,8 +120,7 @@ jobs:
if: steps.cache-grpc.outputs.cache-hit != 'true'
run: |
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --jobs 5 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && cd cmake/build && \
cmake -DgRPC_INSTALL=ON \
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \
../.. && sudo make --jobs 5
- name: Install gRPC
@@ -215,14 +213,14 @@ jobs:
- name: Dependencies
run: |
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc
pip install --user --no-cache-dir grpcio-tools==1.64.1
pip install --user grpcio-tools==1.64.0
- name: Test
run: |
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
# Used to run the newer GNUMake version from brew that supports --output-sync
export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18

View File

@@ -13,17 +13,11 @@ jobs:
- uses: actions/setup-go@v5
with:
go-version: 'stable'
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install protobuf-compiler
- run: |
go install github.com/swaggo/swag/cmd/swag@latest
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
- name: Bump swagger 🔧
run: |
make protogen-go swagger
make swagger
- name: Create Pull Request
uses: peter-evans/create-pull-request@v6
with:

View File

@@ -8,7 +8,7 @@ FROM ${BASE_IMAGE} AS requirements-core
USER root
ARG GO_VERSION=1.22.5
ARG GO_VERSION=1.22.4
ARG TARGETARCH
ARG TARGETVARIANT
@@ -99,7 +99,7 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers
ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION=12
ARG CUDA_MINOR_VERSION=4
ARG CUDA_MINOR_VERSION=5
ENV BUILD_TYPE=${BUILD_TYPE}
@@ -108,11 +108,11 @@ RUN <<EOT bash
if [ "${BUILD_TYPE}" = "vulkan" ]; then
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils wget gpg-agent && \
software-properties-common pciutils wget gpg-agent && \
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
apt-get update && \
apt-get install -y \
apt-get install -y \
vulkan-sdk && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
@@ -124,13 +124,33 @@ RUN <<EOT bash
if [ "${BUILD_TYPE}" = "cublas" ]; then
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils
software-properties-common pciutils
if [ "amd64" = "$TARGETARCH" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
fi
fi
if [ "arm64" = "$TARGETARCH" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
fi
dpkg -i cuda-keyring_1.1-1_all.deb && \
rm -f cuda-keyring_1.1-1_all.deb && \
apt-get update && \
apt-get install -y --no-install-recommends \
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils && \
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
dpkg -i cuda-keyring_1.1-1_all.deb && \
rm -f cuda-keyring_1.1-1_all.deb && \
apt-get update && \
@@ -142,9 +162,8 @@ RUN <<EOT bash
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
rm -rf /var/lib/apt/lists/* \
; fi
# If we are building with clblas support, we need the libraries for the builds
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
@@ -187,7 +206,7 @@ FROM ${GRPC_BASE_IMAGE} AS grpc
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
ARG GRPC_VERSION=v1.65.0
ARG GRPC_VERSION=v1.64.2
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
@@ -208,7 +227,6 @@ RUN apt-get update && \
RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
mkdir -p /build/grpc/cmake/build && \
cd /build/grpc/cmake/build && \
sed -i "216i\ TESTONLY" "../../third_party/abseil-cpp/absl/container/CMakeLists.txt" && \
cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
make && \
make install && \
@@ -264,8 +282,6 @@ COPY --from=grpc /opt/grpc /usr/local
# Rebuild with defaults backends
WORKDIR /build
## Build the binary
RUN make build
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \

191
Makefile
View File

@@ -3,12 +3,9 @@ GOTEST=$(GOCMD) test
GOVET=$(GOCMD) vet
BINARY_NAME=local-ai
DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=87e397d00bdcedd5cbf6dfda06a7b0f302462728
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=e112b610a1a75cb7fa8351e1a933e2e7a755a5ce
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -19,33 +16,26 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=f68298ce06ca3edd6e6f3f21c3d0bb5f073942c3
WHISPER_CPP_VERSION?=b29b3b29240aac8b71ce8e5a4360c1f1562ad66f
# bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
# go-piper version
PIPER_REPO?=https://github.com/mudler/go-piper
PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759
# stablediffusion version
STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
# tinydream version
TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream
TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
export BUILD_TYPE?=
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
export CMAKE_ARGS?=
export BACKEND_LIBS?=
CGO_LDFLAGS?=
CGO_LDFLAGS_WHISPER?=
CGO_LDFLAGS_WHISPER+=-lggml
CUDA_LIBPATH?=/usr/local/cuda/lib64/
GO_TAGS?=
BUILD_ID?=
@@ -59,12 +49,12 @@ RANDOM := $(shell bash -c 'echo $$RANDOM')
VERSION?=$(shell git describe --always --tags || echo "dev" )
# go tool nm ./local-ai | grep Commit
LD_FLAGS?=
override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Version=$(VERSION)"
override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Version=$(VERSION)"
override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
OPTIONAL_TARGETS?=
export OS := $(shell uname -s)
OS := $(shell uname -s)
ARCH := $(shell uname -m)
GREEN := $(shell tput -Txterm setaf 2)
YELLOW := $(shell tput -Txterm setaf 3)
@@ -90,40 +80,31 @@ ifeq ($(OS),Darwin)
BUILD_TYPE=metal
# disable metal if on Darwin and any other value is explicitly passed.
else ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DGGML_METAL=OFF
export GGML_NO_ACCELERATE=1
export GGML_NO_METAL=1
CMAKE_ARGS+=-DLLAMA_METAL=OFF
export LLAMA_NO_ACCELERATE=1
endif
ifeq ($(BUILD_TYPE),metal)
# -lcblas removed: it seems to always be listed as a duplicate flag.
CGO_LDFLAGS += -framework Accelerate
endif
else
CGO_LDFLAGS_WHISPER+=-lgomp
endif
ifeq ($(BUILD_TYPE),openblas)
CGO_LDFLAGS+=-lopenblas
export GGML_OPENBLAS=1
export WHISPER_OPENBLAS=1
endif
ifeq ($(BUILD_TYPE),cublas)
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
export GGML_CUDA=1
export LLAMA_CUBLAS=1
export WHISPER_CUDA=1
CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda -lcufft
endif
ifeq ($(BUILD_TYPE),vulkan)
CMAKE_ARGS+=-DGGML_VULKAN=1
endif
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
export GGML_SYCL=1
endif
ifeq ($(BUILD_TYPE),sycl_f16)
export GGML_SYCL_F16=1
CMAKE_ARGS+=-DLLAMA_VULKAN=1
endif
ifeq ($(BUILD_TYPE),hipblas)
@@ -134,26 +115,27 @@ ifeq ($(BUILD_TYPE),hipblas)
export CC=$(ROCM_HOME)/llvm/bin/clang
# llama-ggml has no hipblas support, so override it here.
export STABLE_BUILD_TYPE=
export GGML_HIPBLAS=1
export WHISPER_HIPBLAS=1
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
endif
ifeq ($(BUILD_TYPE),metal)
CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
export GGML_METAL=1
export LLAMA_METAL=1
export WHISPER_METAL=1
endif
ifeq ($(BUILD_TYPE),clblas)
CGO_LDFLAGS+=-lOpenCL -lclblast
export GGML_OPENBLAS=1
export WHISPER_CLBLAST=1
endif
# glibc-static or glibc-devel-static required
ifeq ($(STATIC),true)
LD_FLAGS+=-linkmode external -extldflags -static
LD_FLAGS=-linkmode external -extldflags -static
endif
ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
@@ -187,8 +169,6 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
# Use filter-out to remove the specified backends
ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))
GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
TEST_PATHS?=./api/... ./pkg/... ./core/...
@@ -208,109 +188,69 @@ all: help
## BERT embeddings
sources/go-bert.cpp:
mkdir -p sources/go-bert.cpp
cd sources/go-bert.cpp && \
git init && \
git remote add origin $(BERT_REPO) && \
git fetch origin && \
git checkout $(BERT_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert.cpp
cd sources/go-bert.cpp && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
$(MAKE) -C sources/go-bert.cpp libgobert.a
## go-llama.cpp
sources/go-llama.cpp:
mkdir -p sources/go-llama.cpp
cd sources/go-llama.cpp && \
git init && \
git remote add origin $(GOLLAMA_REPO) && \
git fetch origin && \
git checkout $(GOLLAMA_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama.cpp
cd sources/go-llama.cpp && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
## go-piper
sources/go-piper:
mkdir -p sources/go-piper
cd sources/go-piper && \
git init && \
git remote add origin $(PIPER_REPO) && \
git fetch origin && \
git checkout $(PIPER_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
git clone --recurse-submodules https://github.com/mudler/go-piper sources/go-piper
cd sources/go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1
sources/go-piper/libpiper_binding.a: sources/go-piper
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
## GPT4ALL
sources/gpt4all:
mkdir -p sources/gpt4all
cd sources/gpt4all && \
git init && \
git remote add origin $(GPT4ALL_REPO) && \
git fetch origin && \
git checkout $(GPT4ALL_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
git clone --recurse-submodules $(GPT4ALL_REPO) sources/gpt4all
cd sources/gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
## RWKV
sources/go-rwkv.cpp:
mkdir -p sources/go-rwkv.cpp
cd sources/go-rwkv.cpp && \
git init && \
git remote add origin $(RWKV_REPO) && \
git fetch origin && \
git checkout $(RWKV_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv.cpp
cd sources/go-rwkv.cpp && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
## stable diffusion
sources/go-stable-diffusion:
mkdir -p sources/go-stable-diffusion
cd sources/go-stable-diffusion && \
git init && \
git remote add origin $(STABLEDIFFUSION_REPO) && \
git fetch origin && \
git checkout $(STABLEDIFFUSION_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion sources/go-stable-diffusion
cd sources/go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
## tiny-dream
sources/go-tiny-dream:
mkdir -p sources/go-tiny-dream
cd sources/go-tiny-dream && \
git init && \
git remote add origin $(TINYDREAM_REPO) && \
git fetch origin && \
git checkout $(TINYDREAM_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
git clone --recurse-submodules https://github.com/M0Rf30/go-tiny-dream sources/go-tiny-dream
cd sources/go-tiny-dream && git checkout -b build $(TINYDREAM_VERSION) && git submodule update --init --recursive --depth 1
sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
$(MAKE) -C sources/go-tiny-dream libtinydream.a
## whisper
sources/whisper.cpp:
mkdir -p sources/whisper.cpp
cd sources/whisper.cpp && \
git init && \
git remote add origin $(WHISPER_REPO) && \
git fetch origin && \
git checkout $(WHISPER_CPP_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
git clone https://github.com/ggerganov/whisper.cpp sources/whisper.cpp
cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
cd sources/whisper.cpp && $(MAKE) libwhisper.a
get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream
replace:
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
@@ -379,12 +319,12 @@ build: prepare backend-assets grpcs ## Build the project
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
ifneq ($(BACKEND_LIBS),)
$(MAKE) backend-assets/lib
cp -f $(BACKEND_LIBS) backend-assets/lib/
cp $(BACKEND_LIBS) backend-assets/lib/
endif
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
build-minimal:
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=none $(MAKE) build
build-api:
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
@@ -394,9 +334,6 @@ backend-assets/lib:
dist:
$(MAKE) backend-assets/grpc/llama-cpp-avx2
ifeq ($(DETECT_LIBS),true)
scripts/prepare-libs.sh backend-assets/grpc/llama-cpp-avx2
endif
ifeq ($(OS),Darwin)
$(info ${GREEN}I Skip CUDA/hipblas build on MacOS${RESET})
else
@@ -405,11 +342,7 @@ else
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f16
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f32
endif
GO_TAGS="tts p2p" $(MAKE) build
ifeq ($(DETECT_LIBS),true)
scripts/prepare-libs.sh backend-assets/grpc/piper
endif
GO_TAGS="tts p2p" STATIC=true $(MAKE) build
STATIC=true $(MAKE) build
mkdir -p release
# if BUILD_ID is empty, then we don't append it to the binary name
ifeq ($(BUILD_ID),)
@@ -420,8 +353,8 @@ else
shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH).sha256
endif
dist-cross-linux-arm64:
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
dist-cross-linux-arm64:
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
STATIC=true $(MAKE) build
mkdir -p release
# if BUILD_ID is empty, then we don't append it to the binary name
@@ -471,7 +404,7 @@ prepare-e2e:
mkdir -p $(TEST_DIR)
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=4 --build-arg FFMPEG=true -t localai-tests .
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=5 --build-arg FFMPEG=true -t localai-tests .
run-e2e-image:
ls -liah $(abspath ./tests/e2e-fixtures)
@@ -767,71 +700,71 @@ else
endif
# This target is for manually building a variant with-auto detected flags
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
backend-assets/grpc/llama-cpp: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-cpp
$(MAKE) -C backend/cpp/llama-cpp purge
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
$(MAKE) VARIANT="llama-cpp" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-cpp/grpc-server backend-assets/grpc/llama-cpp
backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.cpp
backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-avx2
$(MAKE) -C backend/cpp/llama-avx2 purge
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-avx
$(MAKE) -C backend/cpp/llama-avx purge
$(info ${GREEN}I llama-cpp build info:avx${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx
backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama/llama.cpp
backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-fallback
$(MAKE) -C backend/cpp/llama-fallback purge
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
# TODO: every binary should have its own folder instead, so can have different metal implementations
ifeq ($(BUILD_TYPE),metal)
cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
endif
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-cuda
$(MAKE) -C backend/cpp/llama-cuda purge
$(info ${GREEN}I llama-cpp build info:cuda${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama/llama.cpp
backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-hipblas
$(MAKE) -C backend/cpp/llama-hipblas purge
$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f16
$(MAKE) -C backend/cpp/llama-sycl_f16 purge
$(info ${GREEN}I llama-cpp build info:sycl_f16${RESET})
BUILD_TYPE="sycl_f16" $(MAKE) VARIANT="llama-sycl_f16" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-sycl_f16/grpc-server backend-assets/grpc/llama-cpp-sycl_f16
backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc backend/cpp/llama/llama.cpp
backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f32
$(MAKE) -C backend/cpp/llama-sycl_f32 purge
$(info ${GREEN}I llama-cpp build info:sycl_f32${RESET})
BUILD_TYPE="sycl_f32" $(MAKE) VARIANT="llama-sycl_f32" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-sycl_f32/grpc-server backend-assets/grpc/llama-cpp-sycl_f32
backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.cpp
backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-grpc
$(MAKE) -C backend/cpp/llama-grpc purge
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_RPC=ON -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
@@ -859,7 +792,7 @@ backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libti
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH=$(CURDIR)/sources/whisper.cpp LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
backend-assets/grpc/local-store: backend-assets/grpc
@@ -905,7 +838,7 @@ docker-aio-all:
docker-image-intel:
docker build \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="none" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
@@ -913,7 +846,7 @@ docker-image-intel:
docker-image-intel-xpu:
docker build \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="none" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
@@ -928,7 +861,7 @@ gen-assets:
$(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets
## Documentation
docs/layouts/_default:
docs/layouts/_default:
mkdir -p docs/layouts/_default
docs/static/gallery.html: docs/layouts/_default
@@ -943,4 +876,4 @@ docs-clean:
.PHONY: docs
docs: docs/static/gallery.html
cd docs && hugo serve
cd docs && hugo serve

View File

@@ -72,15 +72,13 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
- June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628
- May 2024: 🔥🔥 Decentralized P2P llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/
- May 2024: 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
- May 2024: 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
- May 2024: 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
- 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
- 🔥🔥 Decentralized llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/
- 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
- 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
- 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
- Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
- Reranker API: https://github.com/mudler/LocalAI/pull/2121
Hot topics (looking for contributors):
@@ -90,7 +88,6 @@ Hot topics (looking for contributors):
- Assistant API: https://github.com/mudler/LocalAI/issues/1273
- Moderation endpoint: https://github.com/mudler/LocalAI/issues/999
- Vulkan: https://github.com/mudler/LocalAI/issues/1647
- Anthropic API: https://github.com/mudler/LocalAI/issues/1808
If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22
@@ -107,7 +104,6 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
- 📈 [Reranker API](https://localai.io/features/reranker/)
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
- 🌍 Integrated WebUI!
## 💻 Usage
@@ -136,7 +132,6 @@ Other:
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
- Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
- Github Actions: https://github.com/marketplace/actions/start-localai
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/

View File

@@ -46,14 +46,9 @@ endif
$(INSTALLED_PACKAGES): grpc_build
$(GRPC_REPO):
mkdir -p $(GRPC_REPO)/grpc
cd $(GRPC_REPO)/grpc && \
git init && \
git remote add origin $(GIT_REPO_LIB_GRPC) && \
git fetch origin && \
git checkout $(TAG_LIB_GRPC) && \
git submodule update --init --recursive --depth 1 --single-branch
git clone --depth $(GIT_CLONE_DEPTH) -b $(TAG_LIB_GRPC) $(GIT_REPO_LIB_GRPC) $(GRPC_REPO)/grpc
cd $(GRPC_REPO)/grpc && git submodule update --jobs 2 --init --recursive --depth $(GIT_CLONE_DEPTH)
$(GRPC_BUILD): $(GRPC_REPO)
mkdir -p $(GRPC_BUILD)
cd $(GRPC_BUILD) && cmake $(CMAKE_ARGS) ../$(GRPC_REPO)/grpc && cmake --build . && cmake --build . --target install

View File

@@ -1,58 +1,45 @@
LLAMA_VERSION?=
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
CMAKE_ARGS?=
BUILD_TYPE?=
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
TARGET?=--target grpc-server
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
# If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically
ifeq ($(BUILD_TYPE),cublas)
CMAKE_ARGS+=-DGGML_CUDA=ON
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
CMAKE_ARGS+=-DLLAMA_CUBLAS=ON
# If build type is openblas then we set -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
# to CMAKE_ARGS automatically
else ifeq ($(BUILD_TYPE),openblas)
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
CMAKE_ARGS+=-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
# If build type is clblas (openCL) we set -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
else ifeq ($(BUILD_TYPE),clblas)
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
CMAKE_ARGS+=-DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
else ifeq ($(BUILD_TYPE),hipblas)
CMAKE_ARGS+=-DGGML_HIPBLAS=ON
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
# If it's OSX, DO NOT embed the metal library - -DLLAMA_METAL_EMBED_LIBRARY=ON requires further investigation
# But if it's OSX without metal, disable it here
else ifeq ($(OS),Darwin)
else ifeq ($(OS),darwin)
ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DGGML_METAL=OFF
else
CMAKE_ARGS+=-DGGML_METAL=ON
# Until this is tested properly, we disable embedded metal file
# as we already embed it as part of the LocalAI assets
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=OFF
TARGET+=--target ggml-metal
CMAKE_ARGS+=-DLLAMA_METAL=OFF
endif
endif
ifeq ($(BUILD_TYPE),sycl_f16)
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
endif
ifeq ($(BUILD_TYPE),sycl_f32)
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
endif
llama.cpp:
mkdir -p llama.cpp
cd llama.cpp && \
git init && \
git remote add origin $(LLAMA_REPO) && \
git fetch origin && \
git checkout -b build $(LLAMA_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
git clone --recurse-submodules https://github.com/ggerganov/llama.cpp llama.cpp
if [ -z "$(LLAMA_VERSION)" ]; then \
exit 1; \
fi
cd llama.cpp && git checkout -b build $(LLAMA_VERSION) && git submodule update --init --recursive --depth 1
llama.cpp/examples/grpc-server: llama.cpp
mkdir -p llama.cpp/examples/grpc-server
@@ -74,9 +61,9 @@ clean: purge
grpc-server: llama.cpp llama.cpp/examples/grpc-server
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+bash -c "source $(ONEAPI_VARS); \
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"
bash -c "source $(ONEAPI_VARS); \
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && $(MAKE)"
else
+cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && $(MAKE)
endif
cp llama.cpp/build/bin/grpc-server .

View File

@@ -886,8 +886,6 @@ struct llama_server_context
{"task_id", slot->task_id},
});
LOG_TEE("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
return true;
}
@@ -2108,7 +2106,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
data["grammar"] = predict->grammar();
data["prompt"] = predict->prompt();
data["ignore_eos"] = predict->ignoreeos();
data["embeddings"] = predict->embeddings();
// for each image in the request, add the image data
//
@@ -2386,31 +2383,6 @@ public:
return grpc::Status::OK;
}
/// https://github.com/ggerganov/llama.cpp/blob/aa2341298924ac89778252015efcb792f2df1e20/examples/server/server.cpp#L2969
grpc::Status Embedding(ServerContext* context, const backend::PredictOptions* request, backend::EmbeddingResult* embeddingResult) {
json data = parse_options(false, request, llama);
const int task_id = llama.queue_tasks.get_new_id();
llama.queue_results.add_waiting_task_id(task_id);
llama.request_completion(task_id, { {"prompt", data["embeddings"]}, { "n_predict", 0}, {"image_data", ""} }, false, true, -1);
// get the result
task_result result = llama.queue_results.recv(task_id);
//std::cout << "Embedding result JSON" << result.result_json.dump() << std::endl;
llama.queue_results.remove_waiting_task_id(task_id);
if (!result.error && result.stop) {
std::vector<float> embeddings = result.result_json.value("embedding", std::vector<float>());
// loop the vector and set the embeddings results
for (int i = 0; i < embeddings.size(); i++) {
embeddingResult->add_embeddings(embeddings[i]);
}
}
else
{
return grpc::Status::OK;
}
return grpc::Status::OK;
}
};
void RunServer(const std::string& server_address) {

View File

@@ -6,9 +6,9 @@ import (
"fmt"
"path/filepath"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/go-llama.cpp"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
)
type LLM struct {

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,6 +1,6 @@
accelerate
auto-gptq==0.7.1
grpcio==1.65.0
grpcio==1.64.0
protobuf
torch
certifi

View File

@@ -3,4 +3,4 @@ intel-extension-for-pytorch
torch
torchaudio
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,6 +1,6 @@
accelerate
bark==0.1.5
grpcio==1.65.0
grpcio==1.64.0
protobuf
certifi
transformers

View File

@@ -148,13 +148,13 @@ function startBackend() {
ensureVenv
if [ ! -z ${BACKEND_FILE} ]; then
exec python ${BACKEND_FILE} $@
python ${BACKEND_FILE} $@
elif [ -e "${MY_DIR}/server.py" ]; then
exec python ${MY_DIR}/server.py $@
python ${MY_DIR}/server.py $@
elif [ -e "${MY_DIR}/backend.py" ]; then
exec python ${MY_DIR}/backend.py $@
python ${MY_DIR}/backend.py $@
elif [ -e "${MY_DIR}/${BACKEND_NAME}.py" ]; then
exec python ${MY_DIR}/${BACKEND_NAME}.py $@
python ${MY_DIR}/${BACKEND_NAME}.py $@
fi
}
@@ -210,4 +210,4 @@ function checkTargets() {
echo false
}
init
init

View File

@@ -1,2 +1,2 @@
grpcio==1.65.0
grpcio==1.64.0
protobuf

View File

@@ -3,4 +3,4 @@ intel-extension-for-pytorch
torch
torchaudio
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,6 +1,6 @@
accelerate
TTS==0.22.0
grpcio==1.65.0
grpcio==1.64.0
protobuf
certifi
transformers

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env python3
from concurrent import futures
import traceback
import argparse
from collections import defaultdict
from enum import Enum
@@ -17,39 +17,35 @@ import backend_pb2_grpc
import grpc
from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
EulerAncestralDiscreteScheduler
from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, EulerAncestralDiscreteScheduler
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
from diffusers.pipelines.stable_diffusion import safety_checker
from diffusers.utils import load_image, export_to_video
from diffusers.utils import load_image,export_to_video
from compel import Compel, ReturnedEmbeddingsType
from transformers import CLIPTextModel
from safetensors.torch import load_file
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
COMPEL = os.environ.get("COMPEL", "0") == "1"
XPU = os.environ.get("XPU", "0") == "1"
CLIPSKIP = os.environ.get("CLIPSKIP", "1") == "1"
SAFETENSORS = os.environ.get("SAFETENSORS", "1") == "1"
CHUNK_SIZE = os.environ.get("CHUNK_SIZE", "8")
FPS = os.environ.get("FPS", "7")
DISABLE_CPU_OFFLOAD = os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1"
FRAMES = os.environ.get("FRAMES", "64")
COMPEL=os.environ.get("COMPEL", "0") == "1"
XPU=os.environ.get("XPU", "0") == "1"
CLIPSKIP=os.environ.get("CLIPSKIP", "1") == "1"
SAFETENSORS=os.environ.get("SAFETENSORS", "1") == "1"
CHUNK_SIZE=os.environ.get("CHUNK_SIZE", "8")
FPS=os.environ.get("FPS", "7")
DISABLE_CPU_OFFLOAD=os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1"
FRAMES=os.environ.get("FRAMES", "64")
if XPU:
import intel_extension_for_pytorch as ipex
print(ipex.xpu.get_device_name(0))
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
# https://github.com/CompVis/stable-diffusion/issues/239#issuecomment-1627615287
def sc(self, clip_input, images): return images, [False for i in images]
def sc(self, clip_input, images) : return images, [False for i in images]
# edit the StableDiffusionSafetyChecker class so that, when called, it just returns the images and an array of True values
safety_checker.StableDiffusionSafetyChecker.forward = sc
@@ -66,8 +62,6 @@ from diffusers.schedulers import (
PNDMScheduler,
UniPCMultistepScheduler,
)
# The scheduler list mapping was taken from here: https://github.com/neggles/animatediff-cli/blob/6f336f5f4b5e38e85d7f06f1744ef42d0a45f2a7/src/animatediff/schedulers.py#L39
# Credits to https://github.com/neggles
# See https://github.com/huggingface/diffusers/issues/4167 for more details on sched mapping from A1111
@@ -142,12 +136,10 @@ def get_scheduler(name: str, config: dict = {}):
return sched_class.from_config(config)
# Implement the BackendServicer class with the service methods
class BackendServicer(backend_pb2_grpc.BackendServicer):
def Health(self, request, context):
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
def LoadModel(self, request, context):
try:
print(f"Loading model {request.Model}...", file=sys.stderr)
@@ -157,7 +149,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if request.F16Memory:
torchType = torch.float16
variant = "fp16"
variant="fp16"
local = False
modelFile = request.Model
@@ -165,38 +157,38 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
self.cfg_scale = 7
if request.CFGScale != 0:
self.cfg_scale = request.CFGScale
clipmodel = "runwayml/stable-diffusion-v1-5"
if request.CLIPModel != "":
clipmodel = request.CLIPModel
clipsubfolder = "text_encoder"
if request.CLIPSubfolder != "":
clipsubfolder = request.CLIPSubfolder
# Check if ModelFile exists
if request.ModelFile != "":
if os.path.exists(request.ModelFile):
local = True
modelFile = request.ModelFile
fromSingleFile = request.Model.startswith("http") or request.Model.startswith("/") or local
self.img2vid = False
self.txt2vid = False
self.img2vid=False
self.txt2vid=False
## img2img
if (request.PipelineType == "StableDiffusionImg2ImgPipeline") or (request.IMG2IMG and request.PipelineType == ""):
if fromSingleFile:
self.pipe = StableDiffusionImg2ImgPipeline.from_single_file(modelFile,
torch_dtype=torchType)
torch_dtype=torchType)
else:
self.pipe = StableDiffusionImg2ImgPipeline.from_pretrained(request.Model,
torch_dtype=torchType)
torch_dtype=torchType)
elif request.PipelineType == "StableDiffusionDepth2ImgPipeline":
self.pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(request.Model,
torch_dtype=torchType)
torch_dtype=torchType)
## img2vid
elif request.PipelineType == "StableVideoDiffusionPipeline":
self.img2vid = True
self.img2vid=True
self.pipe = StableVideoDiffusionPipeline.from_pretrained(
request.Model, torch_dtype=torchType, variant=variant
)
@@ -205,63 +197,64 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
## text2img
elif request.PipelineType == "AutoPipelineForText2Image" or request.PipelineType == "":
self.pipe = AutoPipelineForText2Image.from_pretrained(request.Model,
torch_dtype=torchType,
use_safetensors=SAFETENSORS,
variant=variant)
torch_dtype=torchType,
use_safetensors=SAFETENSORS,
variant=variant)
elif request.PipelineType == "StableDiffusionPipeline":
if fromSingleFile:
self.pipe = StableDiffusionPipeline.from_single_file(modelFile,
torch_dtype=torchType)
torch_dtype=torchType)
else:
self.pipe = StableDiffusionPipeline.from_pretrained(request.Model,
torch_dtype=torchType)
torch_dtype=torchType)
elif request.PipelineType == "DiffusionPipeline":
self.pipe = DiffusionPipeline.from_pretrained(request.Model,
torch_dtype=torchType)
torch_dtype=torchType)
elif request.PipelineType == "VideoDiffusionPipeline":
self.txt2vid = True
self.txt2vid=True
self.pipe = DiffusionPipeline.from_pretrained(request.Model,
torch_dtype=torchType)
torch_dtype=torchType)
elif request.PipelineType == "StableDiffusionXLPipeline":
if fromSingleFile:
self.pipe = StableDiffusionXLPipeline.from_single_file(modelFile,
torch_dtype=torchType,
use_safetensors=True)
torch_dtype=torchType,
use_safetensors=True)
else:
self.pipe = StableDiffusionXLPipeline.from_pretrained(
request.Model,
torch_dtype=torchType,
use_safetensors=True,
request.Model,
torch_dtype=torchType,
use_safetensors=True,
variant=variant)
elif request.PipelineType == "StableDiffusion3Pipeline":
if fromSingleFile:
self.pipe = StableDiffusion3Pipeline.from_single_file(modelFile,
torch_dtype=torchType,
use_safetensors=True)
torch_dtype=torchType,
use_safetensors=True)
else:
self.pipe = StableDiffusion3Pipeline.from_pretrained(
request.Model,
torch_dtype=torchType,
use_safetensors=True,
request.Model,
torch_dtype=torchType,
use_safetensors=True,
variant=variant)
if CLIPSKIP and request.CLIPSkip != 0:
self.clip_skip = request.CLIPSkip
else:
self.clip_skip = 0
# torch_dtype needs to be customized. float16 for GPU, float32 for CPU
# TODO: this needs to be customized
if request.SchedulerType != "":
self.pipe.scheduler = get_scheduler(request.SchedulerType, self.pipe.scheduler.config)
if COMPEL:
self.compel = Compel(
tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2 ],
text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
requires_pooled=[False, True]
)
)
if request.ControlNet:
self.controlnet = ControlNetModel.from_pretrained(
@@ -270,6 +263,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
self.pipe.controlnet = self.controlnet
else:
self.controlnet = None
if request.CUDA:
self.pipe.to('cuda')
if self.controlnet:
self.controlnet.to('cuda')
if XPU:
self.pipe = self.pipe.to("xpu")
# Assume directory from request.ModelFile.
# Only if request.LoraAdapter it's not an absolute path
if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter:
@@ -282,17 +282,10 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if request.LoraAdapter:
# Check if its a local file and not a directory ( we load lora differently for a safetensor file )
if os.path.exists(request.LoraAdapter) and not os.path.isdir(request.LoraAdapter):
# self.load_lora_weights(request.LoraAdapter, 1, device, torchType)
self.pipe.load_lora_weights(request.LoraAdapter)
self.load_lora_weights(request.LoraAdapter, 1, device, torchType)
else:
self.pipe.unet.load_attn_procs(request.LoraAdapter)
if request.CUDA:
self.pipe.to('cuda')
if self.controlnet:
self.controlnet.to('cuda')
if XPU:
self.pipe = self.pipe.to("xpu")
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
# Implement your logic here for the LoadModel service
@@ -365,9 +358,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
# create a dictionary of values for the parameters
options = {
"negative_prompt": request.negative_prompt,
"width": request.width,
"height": request.height,
"negative_prompt": request.negative_prompt,
"width": request.width,
"height": request.height,
"num_inference_steps": steps,
}
@@ -379,7 +372,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
options["image"] = pose_image
if CLIPSKIP and self.clip_skip != 0:
options["clip_skip"] = self.clip_skip
options["clip_skip"]=self.clip_skip
# Get the keys that we will build the args for our pipe for
keys = options.keys()
@@ -423,21 +416,20 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
image = self.pipe(
guidance_scale=self.cfg_scale,
**kwargs
).images[0]
).images[0]
else:
# pass the kwargs dictionary to the self.pipe method
image = self.pipe(
prompt,
guidance_scale=self.cfg_scale,
**kwargs
).images[0]
).images[0]
# save the result
image.save(request.dst)
return backend_pb2.Result(message="Media generated", success=True)
def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
@@ -461,7 +453,6 @@ def serve(address):
except KeyboardInterrupt:
server.stop(0)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the gRPC server.")
parser.add_argument(
@@ -469,4 +460,4 @@ if __name__ == "__main__":
)
args = parser.parse_args()
serve(args.addr)
serve(args.addr)

View File

@@ -3,4 +3,4 @@ intel-extension-for-pytorch
torch
torchvision
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,9 +1,7 @@
setuptools
accelerate
compel
peft
diffusers
grpcio==1.65.0
grpcio==1.64.0
opencv-python
pillow
protobuf

View File

@@ -1,4 +1,4 @@
grpcio==1.65.0
grpcio==1.64.0
protobuf
torch
transformers

View File

@@ -1,5 +1,5 @@
accelerate
grpcio==1.65.0
grpcio==1.64.0
protobuf
certifi
torch

View File

@@ -4,4 +4,4 @@
packaging
setuptools
wheel
torch==2.3.1
torch==2.2.0

View File

@@ -1,6 +1,6 @@
causal-conv1d==1.4.0
mamba-ssm==2.2.2
grpcio==1.65.0
causal-conv1d==1.2.0.post2
mamba-ssm==1.2.0.post1
grpcio==1.64.0
protobuf
certifi
transformers

View File

@@ -2,22 +2,22 @@
intel-extension-for-pytorch
torch
optimum[openvino]
grpcio==1.64.1
grpcio==1.64.0
protobuf
librosa==0.9.1
faster-whisper==1.0.3
faster-whisper==0.9.0
pydub==0.25.1
wavmark==0.0.3
numpy==1.26.4
numpy==1.22.0
eng_to_ipa==0.0.2
inflect==7.0.0
unidecode==1.3.7
whisper-timestamped==1.15.4
whisper-timestamped==1.14.2
openai
python-dotenv
pypinyin==0.50.0
cn2an==0.5.22
jieba==0.42.1
gradio==4.38.1
gradio==3.48.0
langid==1.1.6
git+https://github.com/myshell-ai/MeloTTS.git
git+https://github.com/myshell-ai/MeloTTS.git

View File

@@ -1,20 +1,20 @@
grpcio==1.65.0
grpcio==1.64.0
protobuf
librosa
faster-whisper
librosa==0.9.1
faster-whisper==0.9.0
pydub==0.25.1
wavmark==0.0.3
numpy
numpy==1.22.0
eng_to_ipa==0.0.2
inflect
unidecode
whisper-timestamped
inflect==7.0.0
unidecode==1.3.7
whisper-timestamped==1.14.2
openai
python-dotenv
pypinyin
pypinyin==0.50.0
cn2an==0.5.22
jieba==0.42.1
gradio
gradio==3.48.0
langid==1.1.6
git+https://github.com/myshell-ai/MeloTTS.git
git+https://github.com/myshell-ai/OpenVoice.git
git+https://github.com/myshell-ai/OpenVoice.git

View File

@@ -3,4 +3,4 @@ intel-extension-for-pytorch
torch
torchaudio
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,5 +1,5 @@
accelerate
grpcio==1.65.0
grpcio==1.64.0
protobuf
torch
git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,6 +1,6 @@
accelerate
rerankers[transformers]
grpcio==1.65.0
grpcio==1.64.0
protobuf
certifi
transformers

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,6 +1,6 @@
accelerate
sentence-transformers==3.0.1
sentence-transformers==2.5.1
transformers
grpcio==1.65.0
grpcio==1.64.0
protobuf
certifi

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,7 +1,7 @@
accelerate
transformers
grpcio==1.65.0
grpcio==1.64.0
protobuf
torch
scipy==1.14.0
scipy==1.13.0
certifi

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,9 +1,9 @@
accelerate
transformers
grpcio==1.65.0
grpcio==1.64.0
protobuf
torch
certifi
intel-extension-for-transformers
bitsandbytes
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -3,4 +3,4 @@ intel-extension-for-pytorch
torch
torchaudio
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,4 +1,4 @@
accelerate
grpcio==1.65.0
grpcio==1.64.0
protobuf
certifi

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,6 +1,6 @@
accelerate
vllm
grpcio==1.65.0
grpcio==1.64.0
protobuf
certifi
transformers

View File

@@ -28,6 +28,7 @@ type Application struct {
// LocalAI System Services
BackendMonitorService *services.BackendMonitorService
GalleryService *services.GalleryService
ListModelsService *services.ListModelsService
LocalAIMetricsService *services.LocalAIMetricsService
// OpenAIService *services.OpenAIService
}

View File

@@ -57,7 +57,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
if _, err := os.Stat(modelFile); os.IsNotExist(err) {
utils.ResetDownloadTimers()
// if we failed to load the model, we try to download it
err := gallery.InstallModelFromGallery(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction, o.EnforcePredownloadScans)
err := gallery.InstallModelFromGallery(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction)
if err != nil {
return nil, err
}

View File

@@ -91,7 +91,7 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
Type: c.ModelType,
RopeFreqScale: c.RopeFreqScale,
NUMA: c.NUMA,
Embeddings: *c.Embeddings,
Embeddings: c.Embeddings,
LowVRAM: *c.LowVRAM,
NGPULayers: int32(*c.NGPULayers),
MMap: *c.MMap,
@@ -142,14 +142,12 @@ func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOption
MirostatTAU: float32(*c.LLMConfig.MirostatTAU),
Debug: *c.Debug,
StopPrompts: c.StopWords,
Repeat: int32(c.RepeatLastN),
FrequencyPenalty: float32(c.FrequencyPenalty),
PresencePenalty: float32(c.PresencePenalty),
Penalty: float32(c.RepeatPenalty),
Repeat: int32(c.RepeatPenalty),
NKeep: int32(c.Keep),
Batch: int32(c.Batch),
IgnoreEOS: c.IgnoreEOS,
Seed: getSeed(c),
FrequencyPenalty: float32(c.FrequencyPenalty),
MLock: *c.MMlock,
MMap: *c.MMap,
MainGPU: c.MainGPU,

View File

@@ -9,7 +9,6 @@ var CLI struct {
cliContext.Context `embed:""`
Run RunCMD `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"`
Federated FederatedCLI `cmd:"" help:"Run LocalAI in federated mode"`
Models ModelsCMD `cmd:"" help:"Manage LocalAI models and definitions"`
TTS TTSCMD `cmd:"" help:"Convert text to speech"`
Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"`

View File

@@ -1,21 +0,0 @@
package cli
import (
"context"
cliContext "github.com/mudler/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/p2p"
)
type FederatedCLI struct {
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
LoadBalanced bool `env:"LOCALAI_LOAD_BALANCED,LOAD_BALANCED" default:"false" help:"Enable load balancing" group:"p2p"`
}
func (f *FederatedCLI) Run(ctx *cliContext.Context) error {
fs := p2p.NewFederatedServer(f.Address, p2p.FederatedID, f.Peer2PeerToken, f.LoadBalanced)
return fs.Start(context.Background())
}

View File

@@ -2,7 +2,6 @@ package cli
import (
"encoding/json"
"errors"
"fmt"
cliContext "github.com/mudler/LocalAI/core/cli/context"
@@ -25,8 +24,7 @@ type ModelsList struct {
}
type ModelsInstall struct {
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
ModelArgs []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"`
ModelArgs []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"`
ModelsCMDFlags `embed:""`
}
@@ -90,15 +88,9 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
return err
}
err = gallery.SafetyScanGalleryModel(model)
if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
return err
}
log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
}
err = startup.InstallModels(galleries, "", mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
err = startup.InstallModels(galleries, "", mi.ModelsPath, progressCallback, modelName)
if err != nil {
return err
}

View File

@@ -3,8 +3,6 @@ package cli
import (
"context"
"fmt"
"net"
"os"
"strings"
"time"
@@ -42,27 +40,25 @@ type RunCMD struct {
Threads int `env:"LOCALAI_THREADS,THREADS" short:"t" help:"Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested" group:"performance"`
ContextSize int `env:"LOCALAI_CONTEXT_SIZE,CONTEXT_SIZE" default:"512" help:"Default context size for models" group:"performance"`
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
CORS bool `env:"LOCALAI_CORS,CORS" help:"" group:"api"`
CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
LibraryPath string `env:"LOCALAI_LIBRARY_PATH,LIBRARY_PATH" help:"Path to the library directory (for e.g. external libraries used by backends)" default:"/usr/share/local-ai/libs" group:"backends"`
CSRF bool `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"`
UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"`
PreloadBackendOnly bool `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"`
ExternalGRPCBackends []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"`
EnableWatchdogIdle bool `env:"LOCALAI_WATCHDOG_IDLE,WATCHDOG_IDLE" default:"false" help:"Enable watchdog for stopping backends that are idle longer than the watchdog-idle-timeout" group:"backends"`
WatchdogIdleTimeout string `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"`
EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"`
WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
CORS bool `env:"LOCALAI_CORS,CORS" help:"" group:"api"`
CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
LibraryPath string `env:"LOCALAI_LIBRARY_PATH,LIBRARY_PATH" help:"Path to the library directory (for e.g. external libraries used by backends)" default:"/usr/share/local-ai/libs" group:"backends"`
CSRF bool `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"`
UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"api"`
Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"`
PreloadBackendOnly bool `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"`
ExternalGRPCBackends []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"`
EnableWatchdogIdle bool `env:"LOCALAI_WATCHDOG_IDLE,WATCHDOG_IDLE" default:"false" help:"Enable watchdog for stopping backends that are idle longer than the watchdog-idle-timeout" group:"backends"`
WatchdogIdleTimeout string `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"`
EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"`
WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
}
func (r *RunCMD) Run(ctx *cliContext.Context) error {
@@ -93,13 +89,11 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
config.WithApiKeys(r.APIKeys),
config.WithModelsURL(append(r.Models, r.ModelArgs...)...),
config.WithOpaqueErrors(r.OpaqueErrors),
config.WithEnforcedPredownloadScans(!r.DisablePredownloadScan),
}
token := ""
if r.Peer2Peer || r.Peer2PeerToken != "" {
log.Info().Msg("P2P mode enabled")
token = r.Peer2PeerToken
token := r.Peer2PeerToken
if token == "" {
// IF no token is provided, and p2p is enabled,
// we generate one and wait for the user to pick up the token (this is for interactive)
@@ -110,46 +104,14 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
log.Info().Msg("To use the token, you can run the following command in another node or terminal:")
fmt.Printf("export TOKEN=\"%s\"\nlocal-ai worker p2p-llama-cpp-rpc\n", token)
}
opts = append(opts, config.WithP2PToken(token))
node, err := p2p.NewNode(token)
if err != nil {
return err
// Ask for user confirmation
log.Info().Msg("Press a button to proceed")
var input string
fmt.Scanln(&input)
}
log.Info().Msg("Starting P2P server discovery...")
if err := p2p.ServiceDiscoverer(context.Background(), node, token, "", func(serviceID string, node p2p.NodeData) {
var tunnelAddresses []string
for _, v := range p2p.GetAvailableNodes("") {
if v.IsOnline() {
tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
} else {
log.Info().Msgf("Node %s is offline", v.ID)
}
}
tunnelEnvVar := strings.Join(tunnelAddresses, ",")
os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
}); err != nil {
return err
}
}
if r.Federated {
_, port, err := net.SplitHostPort(r.Address)
if err != nil {
return err
}
if err := p2p.ExposeService(context.Background(), "localhost", port, token, p2p.FederatedID); err != nil {
return err
}
node, err := p2p.NewNode(token)
if err != nil {
return err
}
if err := p2p.ServiceDiscoverer(context.Background(), node, token, p2p.FederatedID, nil); err != nil {
if err := p2p.LLamaCPPRPCServerDiscoverer(context.Background(), token); err != nil {
return err
}
}

View File

@@ -18,7 +18,7 @@ type TranscriptCMD struct {
Backend string `short:"b" default:"whisper" help:"Backend to run the transcription model"`
Model string `short:"m" required:"" help:"Model name to run the TTS"`
Language string `short:"l" help:"Language of the audio file"`
Translate bool `short:"c" help:"Translate the transcription to english"`
Translate bool `short:"t" help:"Translate the transcription to english"`
Threads int `short:"t" default:"1" help:"Number of threads used for parallel computation"`
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`

View File

@@ -1,22 +1,16 @@
package cli
import (
"encoding/json"
"errors"
"fmt"
"github.com/rs/zerolog/log"
cliContext "github.com/mudler/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/pkg/downloader"
gguf "github.com/thxcode/gguf-parser-go"
)
type UtilCMD struct {
GGUFInfo GGUFInfoCMD `cmd:"" name:"gguf-info" help:"Get information about a GGUF file"`
HFScan HFScanCMD `cmd:"" name:"hf-scan" help:"Checks installed models for known security issues. WARNING: this is a best-effort feature and may not catch everything!"`
}
type GGUFInfoCMD struct {
@@ -24,12 +18,6 @@ type GGUFInfoCMD struct {
Header bool `optional:"" default:"false" name:"header" help:"Show header information"`
}
type HFScanCMD struct {
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
ToScan []string `arg:""`
}
func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
if u.Args == nil || len(u.Args) == 0 {
return fmt.Errorf("no GGUF file provided")
@@ -65,37 +53,3 @@ func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
return nil
}
func (hfscmd *HFScanCMD) Run(ctx *cliContext.Context) error {
log.Info().Msg("LocalAI Security Scanner - This is BEST EFFORT functionality! Currently limited to huggingface models!")
if len(hfscmd.ToScan) == 0 {
log.Info().Msg("Checking all installed models against galleries")
var galleries []config.Gallery
if err := json.Unmarshal([]byte(hfscmd.Galleries), &galleries); err != nil {
log.Error().Err(err).Msg("unable to load galleries")
}
err := gallery.SafetyScanGalleryModels(galleries, hfscmd.ModelsPath)
if err == nil {
log.Info().Msg("No security warnings were detected for your installed models. Please note that this is a BEST EFFORT tool, and all issues may not be detected.")
} else {
log.Error().Err(err).Msg("! WARNING ! A known-vulnerable model is installed!")
}
return err
} else {
var errs error = nil
for _, uri := range hfscmd.ToScan {
log.Info().Str("uri", uri).Msg("scanning specific uri")
scanResults, err := downloader.HuggingFaceScan(uri)
if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
log.Error().Err(err).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("! WARNING ! A known-vulnerable model is included in this repo!")
errs = errors.Join(errs, err)
}
}
if errs != nil {
return errs
}
log.Info().Msg("No security warnings were detected for your installed models. Please note that this is a BEST EFFORT tool, and all issues may not be detected.")
return nil
}
}

View File

@@ -20,7 +20,7 @@ import (
type P2P struct {
WorkerFlags `embed:""`
Token string `env:"LOCALAI_TOKEN,LOCALAI_P2P_TOKEN,TOKEN" help:"P2P token to use"`
Token string `env:"LOCALAI_TOKEN,TOKEN" help:"JSON list of galleries"`
NoRunner bool `env:"LOCALAI_NO_RUNNER,NO_RUNNER" help:"Do not start the llama-cpp-rpc-server"`
RunnerAddress string `env:"LOCALAI_RUNNER_ADDRESS,RUNNER_ADDRESS" help:"Address of the llama-cpp-rpc-server"`
RunnerPort string `env:"LOCALAI_RUNNER_PORT,RUNNER_PORT" help:"Port of the llama-cpp-rpc-server"`
@@ -59,7 +59,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
p = r.RunnerPort
}
err = p2p.ExposeService(context.Background(), address, p, r.Token, "")
err = p2p.BindLLamaCPPWorker(context.Background(), address, p, r.Token)
if err != nil {
return err
}
@@ -99,7 +99,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
}
}()
err = p2p.ExposeService(context.Background(), address, fmt.Sprint(port), r.Token, "")
err = p2p.BindLLamaCPPWorker(context.Background(), address, fmt.Sprint(port), r.Token)
if err != nil {
return err
}

View File

@@ -31,9 +31,7 @@ type ApplicationConfig struct {
PreloadModelsFromPath string
CORSAllowOrigins string
ApiKeys []string
EnforcePredownloadScans bool
OpaqueErrors bool
P2PToken string
ModelLibraryURL string
@@ -97,12 +95,6 @@ func WithCsrf(b bool) AppOption {
}
}
func WithP2PToken(s string) AppOption {
return func(o *ApplicationConfig) {
o.P2PToken = s
}
}
func WithModelLibraryURL(url string) AppOption {
return func(o *ApplicationConfig) {
o.ModelLibraryURL = url
@@ -302,12 +294,6 @@ func WithApiKeys(apiKeys []string) AppOption {
}
}
func WithEnforcedPredownloadScans(enforced bool) AppOption {
return func(o *ApplicationConfig) {
o.EnforcePredownloadScans = enforced
}
}
func WithOpaqueErrors(opaque bool) AppOption {
return func(o *ApplicationConfig) {
o.OpaqueErrors = opaque

View File

@@ -32,7 +32,7 @@ type BackendConfig struct {
Threads *int `yaml:"threads"`
Debug *bool `yaml:"debug"`
Roles map[string]string `yaml:"roles"`
Embeddings *bool `yaml:"embeddings"`
Embeddings bool `yaml:"embeddings"`
Backend string `yaml:"backend"`
TemplateConfig TemplateConfig `yaml:"template"`
@@ -338,10 +338,6 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
cfg.LowVRAM = &falseV
}
if cfg.Embeddings == nil {
cfg.Embeddings = &falseV
}
// Value passed by the top level are treated as default (no implicit defaults)
// defaults are set by the user
if ctx == 0 {

View File

@@ -20,7 +20,6 @@ const (
ChatML
Mistral03
Gemma
DeepSeek2
)
type settingsConfig struct {
@@ -38,17 +37,6 @@ var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConf
Completion: "{{.Input}}",
},
},
DeepSeek2: {
StopWords: []string{"<end▁of▁sentence>"},
TemplateConfig: TemplateConfig{
ChatMessage: `{{if eq .RoleName "user" -}}User: {{.Content }}
{{ end -}}
{{if eq .RoleName "assistant" -}}Assistant: {{.Content}}<end▁of▁sentence>{{end}}
{{if eq .RoleName "system" -}}{{.Content}}
{{end -}}`,
Chat: "{{.Input -}}\nAssistant: ",
},
},
LLaMa3: {
StopWords: []string{"<|eot_id|>"},
TemplateConfig: TemplateConfig{
@@ -220,11 +208,8 @@ func identifyFamily(f *gguf.GGUFFile) familyType {
qwen2 := arch == "qwen2"
phi3 := arch == "phi-3"
gemma := strings.HasPrefix(f.Model().Name, "gemma")
deepseek2 := arch == "deepseek2"
switch {
case deepseek2:
return DeepSeek2
case gemma:
return Gemma
case llama3:

View File

@@ -7,16 +7,15 @@ import (
"path/filepath"
"strings"
"dario.cat/mergo"
"github.com/imdario/mergo"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/utils"
"github.com/rs/zerolog/log"
"gopkg.in/yaml.v2"
)
// Installs a model from the gallery
func InstallModelFromGallery(galleries []config.Gallery, name string, basePath string, req GalleryModel, downloadStatus func(string, string, string, float64), enforceScan bool) error {
func InstallModelFromGallery(galleries []config.Gallery, name string, basePath string, req GalleryModel, downloadStatus func(string, string, string, float64)) error {
applyModel := func(model *GalleryModel) error {
name = strings.ReplaceAll(name, string(os.PathSeparator), "__")
@@ -64,7 +63,7 @@ func InstallModelFromGallery(galleries []config.Gallery, name string, basePath s
return err
}
if err := InstallModel(basePath, installName, &config, model.Overrides, downloadStatus, enforceScan); err != nil {
if err := InstallModel(basePath, installName, &config, model.Overrides, downloadStatus); err != nil {
return err
}
@@ -190,12 +189,6 @@ func DeleteModelFromSystem(basePath string, name string, additionalFiles []strin
galleryFile := filepath.Join(basePath, galleryFileName(name))
for _, f := range []string{configFile, galleryFile} {
if err := utils.VerifyPath(f, basePath); err != nil {
return fmt.Errorf("failed to verify path %s: %w", f, err)
}
}
var err error
// Delete all the files associated to the model
// read the model config
@@ -235,29 +228,3 @@ func DeleteModelFromSystem(basePath string, name string, additionalFiles []strin
return err
}
// This is ***NEVER*** going to be perfect or finished.
// This is a BEST EFFORT function to surface known-vulnerable models to users.
func SafetyScanGalleryModels(galleries []config.Gallery, basePath string) error {
galleryModels, err := AvailableGalleryModels(galleries, basePath)
if err != nil {
return err
}
for _, gM := range galleryModels {
if gM.Installed {
err = errors.Join(err, SafetyScanGalleryModel(gM))
}
}
return err
}
func SafetyScanGalleryModel(galleryModel *GalleryModel) error {
for _, file := range galleryModel.AdditionalFiles {
scanResults, err := downloader.HuggingFaceScan(file.URI)
if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
log.Error().Str("model", galleryModel.Name).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("Contains unsafe file(s)!")
return err
}
}
return nil
}

View File

@@ -1,12 +1,11 @@
package gallery
import (
"errors"
"fmt"
"os"
"path/filepath"
"dario.cat/mergo"
"github.com/imdario/mergo"
lconfig "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/utils"
@@ -95,7 +94,7 @@ func ReadConfigFile(filePath string) (*Config, error) {
return &config, nil
}
func InstallModel(basePath, nameOverride string, config *Config, configOverrides map[string]interface{}, downloadStatus func(string, string, string, float64), enforceScan bool) error {
func InstallModel(basePath, nameOverride string, config *Config, configOverrides map[string]interface{}, downloadStatus func(string, string, string, float64)) error {
// Create base path if it doesn't exist
err := os.MkdirAll(basePath, 0750)
if err != nil {
@@ -113,18 +112,9 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides
if err := utils.VerifyPath(file.Filename, basePath); err != nil {
return err
}
// Create file path
filePath := filepath.Join(basePath, file.Filename)
if enforceScan {
scanResults, err := downloader.HuggingFaceScan(file.URI)
if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
log.Error().Str("model", config.Name).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("Contains unsafe file(s)!")
return err
}
}
if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.Files), downloadStatus); err != nil {
return err
}

View File

@@ -21,7 +21,7 @@ var _ = Describe("Model test", func() {
defer os.RemoveAll(tempdir)
c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
Expect(err).ToNot(HaveOccurred())
err = InstallModel(tempdir, "", c, map[string]interface{}{}, func(string, string, string, float64) {}, true)
err = InstallModel(tempdir, "", c, map[string]interface{}{}, func(string, string, string, float64) {})
Expect(err).ToNot(HaveOccurred())
for _, f := range []string{"cerebras", "cerebras-completion.tmpl", "cerebras-chat.tmpl", "cerebras.yaml"} {
@@ -69,7 +69,7 @@ var _ = Describe("Model test", func() {
Expect(models[0].URL).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"))
Expect(models[0].Installed).To(BeFalse())
err = InstallModelFromGallery(galleries, "test@bert", tempdir, GalleryModel{}, func(s1, s2, s3 string, f float64) {}, true)
err = InstallModelFromGallery(galleries, "test@bert", tempdir, GalleryModel{}, func(s1, s2, s3 string, f float64) {})
Expect(err).ToNot(HaveOccurred())
dat, err := os.ReadFile(filepath.Join(tempdir, "bert.yaml"))
@@ -106,7 +106,7 @@ var _ = Describe("Model test", func() {
c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
Expect(err).ToNot(HaveOccurred())
err = InstallModel(tempdir, "foo", c, map[string]interface{}{}, func(string, string, string, float64) {}, true)
err = InstallModel(tempdir, "foo", c, map[string]interface{}{}, func(string, string, string, float64) {})
Expect(err).ToNot(HaveOccurred())
for _, f := range []string{"cerebras", "cerebras-completion.tmpl", "cerebras-chat.tmpl", "foo.yaml"} {
@@ -122,7 +122,7 @@ var _ = Describe("Model test", func() {
c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
Expect(err).ToNot(HaveOccurred())
err = InstallModel(tempdir, "foo", c, map[string]interface{}{"backend": "foo"}, func(string, string, string, float64) {}, true)
err = InstallModel(tempdir, "foo", c, map[string]interface{}{"backend": "foo"}, func(string, string, string, float64) {})
Expect(err).ToNot(HaveOccurred())
for _, f := range []string{"cerebras", "cerebras-completion.tmpl", "cerebras-chat.tmpl", "foo.yaml"} {
@@ -148,7 +148,7 @@ var _ = Describe("Model test", func() {
c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
Expect(err).ToNot(HaveOccurred())
err = InstallModel(tempdir, "../../../foo", c, map[string]interface{}{}, func(string, string, string, float64) {}, true)
err = InstallModel(tempdir, "../../../foo", c, map[string]interface{}{}, func(string, string, string, float64) {})
Expect(err).To(HaveOccurred())
})
})

View File

@@ -5,8 +5,6 @@ import (
"strings"
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/pkg/model"
"github.com/rs/zerolog/log"
)
@@ -15,7 +13,7 @@ import (
// If no model is specified, it will take the first available
// Takes a model string as input which should be the one received from the user request.
// It returns the model name resolved from the context and an error if any.
func ModelFromContext(ctx *fiber.Ctx, cl *config.BackendConfigLoader, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) {
func ModelFromContext(ctx *fiber.Ctx, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) {
if ctx.Params("model") != "" {
modelInput = ctx.Params("model")
}
@@ -26,7 +24,7 @@ func ModelFromContext(ctx *fiber.Ctx, cl *config.BackendConfigLoader, loader *mo
// If no model was specified, take the first available
if modelInput == "" && !bearerExists && firstModel {
models, _ := services.ListModels(cl, loader, "", true)
models, _ := loader.ListModels()
if len(models) > 0 {
modelInput = models[0]
log.Debug().Msgf("No model specified, using: %s", modelInput)

View File

@@ -7,7 +7,6 @@ import (
"github.com/chasefleming/elem-go"
"github.com/chasefleming/elem-go/attrs"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/pkg/xsync"
)
@@ -16,14 +15,6 @@ const (
noImage = "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg"
)
func renderElements(n []elem.Node) string {
render := ""
for _, r := range n {
render += r.Render()
}
return render
}
func DoneProgress(galleryID, text string, showDelete bool) string {
var modelName = galleryID
// Split by @ and grab the name
@@ -81,135 +72,6 @@ func ProgressBar(progress string) string {
).Render()
}
func P2PNodeStats(nodes []p2p.NodeData) string {
/*
<div class="bg-gray-800 p-6 rounded-lg shadow-lg text-left">
<p class="text-xl font-semibold text-gray-200">Total Workers Detected: {{ len .Nodes }}</p>
{{ $online := 0 }}
{{ range .Nodes }}
{{ if .IsOnline }}
{{ $online = add $online 1 }}
{{ end }}
{{ end }}
<p class="text-xl font-semibold text-gray-200">Total Online Workers: {{$online}}</p>
</div>
*/
online := 0
for _, n := range nodes {
if n.IsOnline() {
online++
}
}
class := "text-green-500"
if online == 0 {
class = "text-red-500"
}
/*
<i class="fas fa-circle animate-pulse text-green-500 ml-2 mr-1"></i>
*/
circle := elem.I(attrs.Props{
"class": "fas fa-circle animate-pulse " + class + " ml-2 mr-1",
})
nodesElements := []elem.Node{
elem.Span(
attrs.Props{
"class": class,
},
circle,
elem.Text(fmt.Sprintf("%d", online)),
),
elem.Span(
attrs.Props{
"class": "text-gray-200",
},
elem.Text(fmt.Sprintf("/%d", len(nodes))),
),
}
return renderElements(nodesElements)
}
func P2PNodeBoxes(nodes []p2p.NodeData) string {
/*
<div class="bg-gray-800 p-4 rounded-lg shadow-lg text-left">
<div class="flex items-center mb-2">
<i class="fas fa-desktop text-gray-400 mr-2"></i>
<span class="text-gray-200 font-semibold">{{.ID}}</span>
</div>
<p class="text-sm text-gray-400 mt-2 flex items-center">
Status:
<i class="fas fa-circle {{ if .IsOnline }}text-green-500{{ else }}text-red-500{{ end }} ml-2 mr-1"></i>
<span class="{{ if .IsOnline }}text-green-400{{ else }}text-red-400{{ end }}">
{{ if .IsOnline }}Online{{ else }}Offline{{ end }}
</span>
</p>
</div>
*/
nodesElements := []elem.Node{}
for _, n := range nodes {
nodesElements = append(nodesElements,
elem.Div(
attrs.Props{
"class": "bg-gray-700 p-6 rounded-lg shadow-lg text-left",
},
elem.P(
attrs.Props{
"class": "text-sm text-gray-400 mt-2 flex",
},
elem.I(
attrs.Props{
"class": "fas fa-desktop text-gray-400 mr-2",
},
),
elem.Text("Name: "),
elem.Span(
attrs.Props{
"class": "text-gray-200 font-semibold ml-2 mr-1",
},
elem.Text(n.ID),
),
elem.Text("Status: "),
elem.If(
n.IsOnline(),
elem.I(
attrs.Props{
"class": "fas fa-circle animate-pulse text-green-500 ml-2 mr-1",
},
),
elem.I(
attrs.Props{
"class": "fas fa-circle animate-pulse text-red-500 ml-2 mr-1",
},
),
),
elem.If(
n.IsOnline(),
elem.Span(
attrs.Props{
"class": "text-green-400",
},
elem.Text("Online"),
),
elem.Span(
attrs.Props{
"class": "text-red-400",
},
elem.Text("Offline"),
),
),
),
))
}
return renderElements(nodesElements)
}
func StartProgressBar(uid, progress, text string) string {
if progress == "" {
progress = "0"

View File

@@ -28,7 +28,7 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
return err
}
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.ModelID, false)
modelFile, err := fiberContext.ModelFromContext(c, ml, input.ModelID, false)
if err != nil {
modelFile = input.ModelID
log.Warn().Msgf("Model not found in context: %s", input.ModelID)

View File

@@ -12,11 +12,6 @@ import (
"github.com/rs/zerolog/log"
)
// JINARerankEndpoint acts like the Jina reranker endpoint (https://jina.ai/reranker/)
// @Summary Reranks a list of phrases by relevance to a given text query.
// @Param request body schema.JINARerankRequest true "query params"
// @Success 200 {object} schema.JINARerankResponse "Response"
// @Router /v1/rerank [post]
func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
req := new(schema.JINARerankRequest)
@@ -33,7 +28,7 @@ func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
return err
}
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, false)
if err != nil {
modelFile = input.Model
log.Warn().Msgf("Model not found in context: %s", input.Model)

View File

@@ -6,11 +6,6 @@ import (
"github.com/mudler/LocalAI/core/services"
)
// BackendMonitorEndpoint returns the status of the specified backend
// @Summary Backend monitor endpoint
// @Param request body schema.BackendMonitorRequest true "Backend statistics request"
// @Success 200 {object} proto.StatusResponse "Response"
// @Router /backend/monitor [get]
func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
@@ -28,10 +23,6 @@ func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ct
}
}
// BackendMonitorEndpoint shuts down the specified backend
// @Summary Backend monitor endpoint
// @Param request body schema.BackendMonitorRequest true "Backend statistics request"
// @Router /backend/shutdown [post]
func BackendShutdownEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(schema.BackendMonitorRequest)

View File

@@ -9,7 +9,6 @@ import (
"github.com/google/uuid"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/services"
"github.com/rs/zerolog/log"
)
@@ -34,10 +33,6 @@ func CreateModelGalleryEndpointService(galleries []config.Gallery, modelPath str
}
}
// GetOpStatusEndpoint returns the job status
// @Summary Returns the job status
// @Success 200 {object} gallery.GalleryOpStatus "Response"
// @Router /models/jobs/{uuid} [get]
func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
status := mgs.galleryApplier.GetStatus(c.Params("uuid"))
@@ -48,21 +43,12 @@ func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() func(c *fiber.Ctx)
}
}
// GetAllStatusEndpoint returns all the jobs status progress
// @Summary Returns all the jobs status progress
// @Success 200 {object} map[string]gallery.GalleryOpStatus "Response"
// @Router /models/jobs [get]
func (mgs *ModelGalleryEndpointService) GetAllStatusEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
return c.JSON(mgs.galleryApplier.GetAllStatus())
}
}
// ApplyModelGalleryEndpoint installs a new model to a LocalAI instance from the model gallery
// @Summary Install models to LocalAI.
// @Param request body GalleryModel true "query params"
// @Success 200 {object} schema.GalleryResponse "Response"
// @Router /models/apply [post]
func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(GalleryModel)
@@ -82,15 +68,13 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe
Galleries: mgs.galleries,
ConfigURL: input.ConfigURL,
}
return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
return c.JSON(struct {
ID string `json:"uuid"`
StatusURL string `json:"status"`
}{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
}
}
// DeleteModelGalleryEndpoint lets delete models from a LocalAI instance
// @Summary delete models to LocalAI.
// @Param name path string true "Model name"
// @Success 200 {object} schema.GalleryResponse "Response"
// @Router /models/delete/{name} [post]
func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
modelName := c.Params("name")
@@ -105,14 +89,13 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
return err
}
return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
return c.JSON(struct {
ID string `json:"uuid"`
StatusURL string `json:"status"`
}{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
}
}
// ListModelFromGalleryEndpoint list the available models for installation from the active galleries
// @Summary List installable models.
// @Success 200 {object} []gallery.GalleryModel "Response"
// @Router /models/available [get]
func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries)
@@ -133,10 +116,6 @@ func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *f
}
}
// ListModelGalleriesEndpoint list the available galleries configured in LocalAI
// @Summary List all Galleries
// @Success 200 {object} []config.Gallery "Response"
// @Router /models/galleries [get]
// NOTE: This is different (and much simpler!) than above! This JUST lists the model galleries that have been loaded, not their contents!
func (mgs *ModelGalleryEndpointService) ListModelGalleriesEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
@@ -149,11 +128,6 @@ func (mgs *ModelGalleryEndpointService) ListModelGalleriesEndpoint() func(c *fib
}
}
// AddModelGalleryEndpoint adds a gallery in LocalAI
// @Summary Adds a gallery in LocalAI
// @Param request body config.Gallery true "Gallery details"
// @Success 200 {object} []config.Gallery "Response"
// @Router /models/galleries [post]
func (mgs *ModelGalleryEndpointService) AddModelGalleryEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(config.Gallery)
@@ -176,11 +150,6 @@ func (mgs *ModelGalleryEndpointService) AddModelGalleryEndpoint() func(c *fiber.
}
}
// RemoveModelGalleryEndpoint remove a gallery in LocalAI
// @Summary removes a gallery from LocalAI
// @Param request body config.Gallery true "Gallery details"
// @Success 200 {object} []config.Gallery "Response"
// @Router /models/galleries [delete]
func (mgs *ModelGalleryEndpointService) RemoveModelGalleryEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(config.Gallery)
@@ -196,10 +165,6 @@ func (mgs *ModelGalleryEndpointService) RemoveModelGalleryEndpoint() func(c *fib
mgs.galleries = slices.DeleteFunc(mgs.galleries, func(gallery config.Gallery) bool {
return gallery.Name == input.Name
})
dat, err := json.Marshal(mgs.galleries)
if err != nil {
return err
}
return c.Send(dat)
return c.Send(nil)
}
}

View File

@@ -9,11 +9,8 @@ import (
"github.com/prometheus/client_golang/prometheus/promhttp"
)
// LocalAIMetricsEndpoint returns the metrics endpoint for LocalAI
// @Summary Prometheus metrics endpoint
// @Param request body config.Gallery true "Gallery details"
// @Router /metrics [get]
func LocalAIMetricsEndpoint() fiber.Handler {
return adaptor.HTTPHandler(promhttp.Handler())
}

View File

@@ -1,28 +0,0 @@
package localai
import (
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/schema"
)
// ShowP2PNodes returns the P2P Nodes
// @Summary Returns available P2P nodes
// @Success 200 {object} []schema.P2PNodesResponse "Response"
// @Router /api/p2p [get]
func ShowP2PNodes(c *fiber.Ctx) error {
// Render index
return c.JSON(schema.P2PNodesResponse{
Nodes: p2p.GetAvailableNodes(""),
FederatedNodes: p2p.GetAvailableNodes(p2p.FederatedID),
})
}
// ShowP2PToken returns the P2P token
// @Summary Show the P2P token
// @Success 200 {string} string "Response"
// @Router /api/p2p/token [get]
func ShowP2PToken(appConfig *config.ApplicationConfig) func(*fiber.Ctx) error {
return func(c *fiber.Ctx) error { return c.Send([]byte(appConfig.P2PToken)) }
}

View File

@@ -29,7 +29,7 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
return err
}
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, false)
if err != nil {
modelFile = input.Model
log.Warn().Msgf("Model not found in context: %s", input.Model)

View File

@@ -4,8 +4,6 @@ import (
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/internal"
"github.com/mudler/LocalAI/pkg/model"
)
@@ -13,7 +11,7 @@ import (
func WelcomeEndpoint(appConfig *config.ApplicationConfig,
cl *config.BackendConfigLoader, ml *model.ModelLoader, modelStatus func() (map[string]string, map[string]string)) func(*fiber.Ctx) error {
return func(c *fiber.Ctx) error {
models, _ := services.ListModels(cl, ml, "", true)
models, _ := ml.ListModels()
backendConfigs := cl.GetAllBackendConfigs()
galleryConfigs := map[string]*gallery.Config{}
@@ -29,21 +27,12 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
// Get model statuses to display in the UI the operation in progress
processingModels, taskTypes := modelStatus()
modelsWithoutConfig := []string{}
for _, m := range models {
if _, ok := galleryConfigs[m]; !ok {
modelsWithoutConfig = append(modelsWithoutConfig, m)
}
}
summary := fiber.Map{
"Title": "LocalAI API - " + internal.PrintableVersion(),
"Version": internal.PrintableVersion(),
"Models": modelsWithoutConfig,
"Models": models,
"ModelsConfig": backendConfigs,
"GalleryConfig": galleryConfigs,
"IsP2PEnabled": p2p.IsP2PEnabled(),
"ApplicationConfig": appConfig,
"ProcessingModels": processingModels,
"TaskTypes": taskTypes,

View File

@@ -11,8 +11,6 @@ import (
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/services"
model "github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/utils"
"github.com/rs/zerolog/log"
@@ -81,7 +79,7 @@ func CreateAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"})
}
if !modelExists(cl, ml, request.Model) {
if !modelExists(ml, request.Model) {
log.Warn().Msgf("Model: %s was not found in list of models.", request.Model)
return c.Status(fiber.StatusBadRequest).SendString("Model " + request.Model + " not found")
}
@@ -126,14 +124,6 @@ func generateRandomID() int64 {
return currentId
}
// ListAssistantsEndpoint is the OpenAI Assistant API endpoint to list assistents https://platform.openai.com/docs/api-reference/assistants/listAssistants
// @Summary List available assistents
// @Param limit query int false "Limit the number of assistants returned"
// @Param order query string false "Order of assistants returned"
// @Param after query string false "Return assistants created after the given ID"
// @Param before query string false "Return assistants created before the given ID"
// @Success 200 {object} []Assistant "Response"
// @Router /v1/assistants [get]
func ListAssistantsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
// Because we're altering the existing assistants list we should just duplicate it for now.
@@ -223,9 +213,9 @@ func filterAssistantsAfterID(assistants []Assistant, id string) []Assistant {
return filteredAssistants
}
func modelExists(cl *config.BackendConfigLoader, ml *model.ModelLoader, modelName string) (found bool) {
func modelExists(ml *model.ModelLoader, modelName string) (found bool) {
found = false
models, err := services.ListModels(cl, ml, "", true)
models, err := ml.ListModels()
if err != nil {
return
}
@@ -239,11 +229,13 @@ func modelExists(cl *config.BackendConfigLoader, ml *model.ModelLoader, modelNam
return
}
// DeleteAssistantEndpoint is the OpenAI Assistant API endpoint to delete assistents https://platform.openai.com/docs/api-reference/assistants/deleteAssistant
// @Summary Delete assistents
// @Success 200 {object} schema.DeleteAssistantResponse "Response"
// @Router /v1/assistants/{assistant_id} [delete]
func DeleteAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
type DeleteAssistantResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Deleted bool `json:"deleted"`
}
return func(c *fiber.Ctx) error {
assistantID := c.Params("assistant_id")
if assistantID == "" {
@@ -254,7 +246,7 @@ func DeleteAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
if assistant.ID == assistantID {
Assistants = append(Assistants[:i], Assistants[i+1:]...)
utils.SaveConfig(appConfig.ConfigsDir, AssistantsConfigFile, Assistants)
return c.Status(fiber.StatusOK).JSON(schema.DeleteAssistantResponse{
return c.Status(fiber.StatusOK).JSON(DeleteAssistantResponse{
ID: assistantID,
Object: "assistant.deleted",
Deleted: true,
@@ -263,7 +255,7 @@ func DeleteAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
}
log.Warn().Msgf("Unable to find assistant %s for deletion", assistantID)
return c.Status(fiber.StatusNotFound).JSON(schema.DeleteAssistantResponse{
return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantResponse{
ID: assistantID,
Object: "assistant.deleted",
Deleted: false,
@@ -271,10 +263,6 @@ func DeleteAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
}
}
// GetAssistantEndpoint is the OpenAI Assistant API endpoint to get assistents https://platform.openai.com/docs/api-reference/assistants/getAssistant
// @Summary Get assistent data
// @Success 200 {object} Assistant "Response"
// @Router /v1/assistants/{assistant_id} [get]
func GetAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
assistantID := c.Params("assistant_id")
@@ -304,9 +292,19 @@ var (
AssistantsFileConfigFile = "assistantsFile.json"
)
type AssistantFileRequest struct {
FileID string `json:"file_id"`
}
type DeleteAssistantFileResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Deleted bool `json:"deleted"`
}
func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
request := new(schema.AssistantFileRequest)
request := new(AssistantFileRequest)
if err := c.BodyParser(request); err != nil {
return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"})
}
@@ -347,7 +345,7 @@ func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
func ListAssistantFilesEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
type ListAssistantFiles struct {
Data []schema.File
Data []File
Object string
}
@@ -465,7 +463,7 @@ func DeleteAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
// Remove the file from the assistantFiles slice
AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...)
utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
return c.Status(fiber.StatusOK).JSON(schema.DeleteAssistantFileResponse{
return c.Status(fiber.StatusOK).JSON(DeleteAssistantFileResponse{
ID: fileId,
Object: "assistant.file.deleted",
Deleted: true,
@@ -481,7 +479,7 @@ func DeleteAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...)
utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
return c.Status(fiber.StatusNotFound).JSON(schema.DeleteAssistantFileResponse{
return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantFileResponse{
ID: fileId,
Object: "assistant.file.deleted",
Deleted: true,
@@ -492,7 +490,7 @@ func DeleteAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
}
log.Warn().Msgf("Unable to find assistant: %s", assistantID)
return c.Status(fiber.StatusNotFound).JSON(schema.DeleteAssistantFileResponse{
return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantFileResponse{
ID: fileId,
Object: "assistant.file.deleted",
Deleted: false,

View File

@@ -14,7 +14,6 @@ import (
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/model"
"github.com/stretchr/testify/assert"
)
@@ -27,7 +26,7 @@ type MockLoader struct {
func tearDown() func() {
return func() {
UploadedFiles = []schema.File{}
UploadedFiles = []File{}
Assistants = []Assistant{}
AssistantFiles = []AssistantFile{}
_ = os.Remove(filepath.Join(configsDir, AssistantsConfigFile))
@@ -295,7 +294,7 @@ func TestAssistantEndpoints(t *testing.T) {
file, assistant, err := createFileAndAssistant(t, app, appConfig)
assert.NoError(t, err)
afr := schema.AssistantFileRequest{FileID: file.ID}
afr := AssistantFileRequest{FileID: file.ID}
af, _, err := createAssistantFile(app, afr, assistant.ID)
assert.NoError(t, err)
@@ -306,7 +305,7 @@ func TestAssistantEndpoints(t *testing.T) {
file, assistant, err := createFileAndAssistant(t, app, appConfig)
assert.NoError(t, err)
afr := schema.AssistantFileRequest{FileID: file.ID}
afr := AssistantFileRequest{FileID: file.ID}
af, _, err := createAssistantFile(app, afr, assistant.ID)
assert.NoError(t, err)
@@ -317,7 +316,7 @@ func TestAssistantEndpoints(t *testing.T) {
file, assistant, err := createFileAndAssistant(t, app, appConfig)
assert.NoError(t, err)
afr := schema.AssistantFileRequest{FileID: file.ID}
afr := AssistantFileRequest{FileID: file.ID}
af, _, err := createAssistantFile(app, afr, assistant.ID)
assert.NoError(t, err)
t.Cleanup(cleanupAssistantFile(t, app, af.ID, af.AssistantID))
@@ -339,7 +338,7 @@ func TestAssistantEndpoints(t *testing.T) {
file, assistant, err := createFileAndAssistant(t, app, appConfig)
assert.NoError(t, err)
afr := schema.AssistantFileRequest{FileID: file.ID}
afr := AssistantFileRequest{FileID: file.ID}
af, _, err := createAssistantFile(app, afr, assistant.ID)
assert.NoError(t, err)
@@ -350,7 +349,7 @@ func TestAssistantEndpoints(t *testing.T) {
}
func createFileAndAssistant(t *testing.T, app *fiber.App, o *config.ApplicationConfig) (schema.File, Assistant, error) {
func createFileAndAssistant(t *testing.T, app *fiber.App, o *config.ApplicationConfig) (File, Assistant, error) {
ar := &AssistantRequest{
Model: "ggml-gpt4all-j",
Name: "3.5-turbo",
@@ -363,7 +362,7 @@ func createFileAndAssistant(t *testing.T, app *fiber.App, o *config.ApplicationC
assistant, _, err := createAssistant(app, *ar)
if err != nil {
return schema.File{}, Assistant{}, err
return File{}, Assistant{}, err
}
t.Cleanup(cleanupAllAssistants(t, app, []string{assistant.ID}))
@@ -375,7 +374,7 @@ func createFileAndAssistant(t *testing.T, app *fiber.App, o *config.ApplicationC
return file, assistant, nil
}
func createAssistantFile(app *fiber.App, afr schema.AssistantFileRequest, assistantId string) (AssistantFile, *http.Response, error) {
func createAssistantFile(app *fiber.App, afr AssistantFileRequest, assistantId string) (AssistantFile, *http.Response, error) {
afrJson, err := json.Marshal(afr)
if err != nil {
return AssistantFile{}, nil, err
@@ -452,7 +451,7 @@ func cleanupAssistantFile(t *testing.T, app *fiber.App, fileId, assistantId stri
resp, err := app.Test(request)
assert.NoError(t, err)
var dafr schema.DeleteAssistantFileResponse
var dafr DeleteAssistantFileResponse
err = json.NewDecoder(resp.Body).Decode(&dafr)
assert.NoError(t, err)
assert.True(t, dafr.Deleted)

View File

@@ -159,7 +159,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
}
return func(c *fiber.Ctx) error {
modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
modelFile, input, err := readRequest(c, ml, startupOptions, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
@@ -225,10 +225,18 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
}
// Update input grammar
jsStruct := funcs.ToJSONStructure(config.FunctionsConfig.FunctionNameKey, config.FunctionsConfig.FunctionNameKey)
config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
// Handle if we should return "name" instead of "functions"
if config.FunctionsConfig.FunctionName {
jsStruct := funcs.ToJSONNameStructure()
config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
} else {
jsStruct := funcs.ToJSONFunctionStructure()
config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
}
case input.JSONFunctionGrammarObject != nil:
config.Grammar = input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
case input.JSONFunctionGrammarObjectName != nil:
config.Grammar = input.JSONFunctionGrammarObjectName.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
default:
// Force picking one of the functions by the request
if config.FunctionToCall() != "" {

View File

@@ -57,7 +57,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
}
return func(c *fiber.Ctx) error {
modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
modelFile, input, err := readRequest(c, ml, appConfig, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}

View File

@@ -16,14 +16,9 @@ import (
"github.com/rs/zerolog/log"
)
// EditEndpoint is the OpenAI edit API endpoint
// @Summary OpenAI edit endpoint
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/edits [post]
func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
modelFile, input, err := readRequest(c, ml, appConfig, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}

View File

@@ -23,7 +23,7 @@ import (
// @Router /v1/embeddings [post]
func EmbeddingsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
model, input, err := readRequest(c, cl, ml, appConfig, true)
model, input, err := readRequest(c, ml, appConfig, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}

View File

@@ -9,16 +9,25 @@ import (
"time"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/pkg/utils"
)
var UploadedFiles []schema.File
var UploadedFiles []File
const UploadedFilesFile = "uploadedFiles.json"
// File represents the structure of a file object from the OpenAI API.
type File struct {
ID string `json:"id"` // Unique identifier for the file
Object string `json:"object"` // Type of the object (e.g., "file")
Bytes int `json:"bytes"` // Size of the file in bytes
CreatedAt time.Time `json:"created_at"` // The time at which the file was created
Filename string `json:"filename"` // The name of the file
Purpose string `json:"purpose"` // The purpose of the file (e.g., "fine-tune", "classifications", etc.)
}
// UploadFilesEndpoint https://platform.openai.com/docs/api-reference/files/create
func UploadFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
@@ -52,7 +61,7 @@ func UploadFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Appli
return c.Status(fiber.StatusInternalServerError).SendString("Failed to save file: " + err.Error())
}
f := schema.File{
f := File{
ID: fmt.Sprintf("file-%d", getNextFileId()),
Object: "file",
Bytes: int(file.Size),
@@ -75,13 +84,14 @@ func getNextFileId() int64 {
}
// ListFilesEndpoint https://platform.openai.com/docs/api-reference/files/list
// @Summary List files.
// @Success 200 {object} schema.ListFiles "Response"
// @Router /v1/files [get]
func ListFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
type ListFiles struct {
Data []File
Object string
}
return func(c *fiber.Ctx) error {
var listFiles schema.ListFiles
var listFiles ListFiles
purpose := c.Query("purpose")
if purpose == "" {
@@ -98,7 +108,7 @@ func ListFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Applica
}
}
func getFileFromRequest(c *fiber.Ctx) (*schema.File, error) {
func getFileFromRequest(c *fiber.Ctx) (*File, error) {
id := c.Params("file_id")
if id == "" {
return nil, fmt.Errorf("file_id parameter is required")
@@ -113,10 +123,7 @@ func getFileFromRequest(c *fiber.Ctx) (*schema.File, error) {
return nil, fmt.Errorf("unable to find file id %s", id)
}
// GetFilesEndpoint is the OpenAI API endpoint to get files https://platform.openai.com/docs/api-reference/files/retrieve
// @Summary Returns information about a specific file.
// @Success 200 {object} schema.File "Response"
// @Router /v1/files/{file_id} [get]
// GetFilesEndpoint https://platform.openai.com/docs/api-reference/files/retrieve
func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
file, err := getFileFromRequest(c)
@@ -128,17 +135,13 @@ func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Applicat
}
}
type DeleteStatus struct {
Id string
Object string
Deleted bool
}
// DeleteFilesEndpoint is the OpenAI API endpoint to delete files https://platform.openai.com/docs/api-reference/files/delete
// @Summary Delete a file.
// @Success 200 {object} DeleteStatus "Response"
// @Router /v1/files/{file_id} [delete]
// DeleteFilesEndpoint https://platform.openai.com/docs/api-reference/files/delete
func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
type DeleteStatus struct {
Id string
Object string
Deleted bool
}
return func(c *fiber.Ctx) error {
file, err := getFileFromRequest(c)
@@ -171,11 +174,7 @@ func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Appli
}
}
// GetFilesContentsEndpoint is the OpenAI API endpoint to get files content https://platform.openai.com/docs/api-reference/files/retrieve-contents
// @Summary Returns information about a specific file.
// @Success 200 {string} binary "file"
// @Router /v1/files/{file_id}/content [get]
// GetFilesContentsEndpoint
// GetFilesContentsEndpoint https://platform.openai.com/docs/api-reference/files/retrieve-contents
func GetFilesContentsEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
file, err := getFileFromRequest(c)

View File

@@ -14,7 +14,6 @@ import (
"github.com/rs/zerolog/log"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/gofiber/fiber/v2"
utils2 "github.com/mudler/LocalAI/pkg/utils"
@@ -23,6 +22,11 @@ import (
"testing"
)
type ListFiles struct {
Data []File
Object string
}
func startUpApp() (app *fiber.App, option *config.ApplicationConfig, loader *config.BackendConfigLoader) {
// Preparing the mocked objects
loader = &config.BackendConfigLoader{}
@@ -155,7 +159,7 @@ func TestUploadFileExceedSizeLimit(t *testing.T) {
resp, _ := app.Test(req)
assert.Equal(t, 200, resp.StatusCode)
var listFiles schema.ListFiles
var listFiles ListFiles
if err := json.Unmarshal(bodyToByteArray(resp, t), &listFiles); err != nil {
t.Errorf("Failed to decode response: %v", err)
return
@@ -197,7 +201,7 @@ func CallFilesUploadEndpoint(t *testing.T, app *fiber.App, fileName, tag, purpos
return app.Test(req)
}
func CallFilesUploadEndpointWithCleanup(t *testing.T, app *fiber.App, fileName, tag, purpose string, fileSize int, appConfig *config.ApplicationConfig) schema.File {
func CallFilesUploadEndpointWithCleanup(t *testing.T, app *fiber.App, fileName, tag, purpose string, fileSize int, appConfig *config.ApplicationConfig) File {
// Create a file that exceeds the limit
testName := strings.Split(t.Name(), "/")[1]
file := createTestFile(t, testName+"-"+fileName, fileSize, appConfig)
@@ -276,8 +280,8 @@ func bodyToByteArray(resp *http.Response, t *testing.T) []byte {
return bodyBytes
}
func responseToFile(t *testing.T, resp *http.Response) schema.File {
var file schema.File
func responseToFile(t *testing.T, resp *http.Response) File {
var file File
responseToString := bodyToString(resp, t)
err := json.NewDecoder(strings.NewReader(responseToString)).Decode(&file)
@@ -288,8 +292,8 @@ func responseToFile(t *testing.T, resp *http.Response) schema.File {
return file
}
func responseToListFile(t *testing.T, resp *http.Response) schema.ListFiles {
var listFiles schema.ListFiles
func responseToListFile(t *testing.T, resp *http.Response) ListFiles {
var listFiles ListFiles
responseToString := bodyToString(resp, t)
err := json.NewDecoder(strings.NewReader(responseToString)).Decode(&listFiles)

View File

@@ -66,7 +66,7 @@ func downloadFile(url string) (string, error) {
// @Router /v1/images/generations [post]
func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
m, input, err := readRequest(c, cl, ml, appConfig, false)
m, input, err := readRequest(c, ml, appConfig, false)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}

View File

@@ -2,17 +2,11 @@ package openai
import (
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/services"
model "github.com/mudler/LocalAI/pkg/model"
)
// ListModelsEndpoint is the OpenAI Models API endpoint https://platform.openai.com/docs/api-reference/models
// @Summary List and describe the various models available in the API.
// @Success 200 {object} schema.ModelsDataResponse "Response"
// @Router /v1/models [get]
func ListModelsEndpoint(bcl *config.BackendConfigLoader, ml *model.ModelLoader) func(ctx *fiber.Ctx) error {
func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
// If blank, no filter is applied.
filter := c.Query("filter")
@@ -20,30 +14,16 @@ func ListModelsEndpoint(bcl *config.BackendConfigLoader, ml *model.ModelLoader)
// By default, exclude any loose files that are already referenced by a configuration file.
excludeConfigured := c.QueryBool("excludeConfigured", true)
dataModels, err := modelList(bcl, ml, filter, excludeConfigured)
dataModels, err := lms.ListModels(filter, excludeConfigured)
if err != nil {
return err
}
return c.JSON(schema.ModelsDataResponse{
return c.JSON(struct {
Object string `json:"object"`
Data []schema.OpenAIModel `json:"data"`
}{
Object: "list",
Data: dataModels,
})
}
}
func modelList(bcl *config.BackendConfigLoader, ml *model.ModelLoader, filter string, excludeConfigured bool) ([]schema.OpenAIModel, error) {
models, err := services.ListModels(bcl, ml, filter, excludeConfigured)
if err != nil {
return nil, err
}
dataModels := []schema.OpenAIModel{}
// Then iterate through the loose files:
for _, m := range models {
dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"})
}
return dataModels, nil
}

View File

@@ -15,7 +15,7 @@ import (
"github.com/rs/zerolog/log"
)
func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) {
func readRequest(c *fiber.Ctx, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) {
input := new(schema.OpenAIRequest)
// Get input data from the request body
@@ -31,7 +31,7 @@ func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLo
log.Debug().Msgf("Request received: %s", string(received))
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, firstModel)
modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, firstModel)
return modelFile, input, err
}

View File

@@ -25,7 +25,7 @@ import (
// @Router /v1/audio/transcriptions [post]
func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
m, input, err := readRequest(c, cl, ml, appConfig, false)
m, input, err := readRequest(c, ml, appConfig, false)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}

View File

@@ -5,7 +5,6 @@ import (
"github.com/gofiber/swagger"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/endpoints/localai"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/internal"
"github.com/mudler/LocalAI/pkg/model"
@@ -57,12 +56,6 @@ func RegisterLocalAIRoutes(app *fiber.App,
app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitorService))
app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitorService))
// p2p
if p2p.IsP2PEnabled() {
app.Get("/api/p2p", auth, localai.ShowP2PNodes)
app.Get("/api/p2p/token", auth, localai.ShowP2PToken(appConfig))
}
app.Get("/version", auth, func(c *fiber.Ctx) error {
return c.JSON(struct {
Version string `json:"version"`

View File

@@ -5,6 +5,7 @@ import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/endpoints/localai"
"github.com/mudler/LocalAI/core/http/endpoints/openai"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/pkg/model"
)
@@ -80,7 +81,8 @@ func RegisterOpenAIRoutes(app *fiber.App,
app.Static("/generated-audio", appConfig.AudioDir)
}
// List models
app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml))
app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml))
// models
tmpLMS := services.NewListModelsService(ml, cl, appConfig) // TODO: once createApplication() is fully in use, reference the central instance.
app.Get("/v1/models", auth, openai.ListModelsEndpoint(tmpLMS))
app.Get("/models", auth, openai.ListModelsEndpoint(tmpLMS))
}

Some files were not shown because too many files have changed in this diff Show More