mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-08 05:32:25 -05:00
Compare commits
2 Commits
llama_cpp/
...
libmtmd
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cd4c0b8aa6 | ||
|
|
7437d0c9ca |
@@ -2,6 +2,9 @@
|
|||||||
|
|
||||||
cd /workspace
|
cd /workspace
|
||||||
|
|
||||||
|
# Grab the pre-stashed backend assets to avoid build issues
|
||||||
|
cp -r /build/backend-assets /workspace/backend-assets
|
||||||
|
|
||||||
# Ensures generated source files are present upon load
|
# Ensures generated source files are present upon load
|
||||||
make prepare
|
make prepare
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,10 @@ services:
|
|||||||
context: ..
|
context: ..
|
||||||
dockerfile: Dockerfile
|
dockerfile: Dockerfile
|
||||||
target: devcontainer
|
target: devcontainer
|
||||||
|
args:
|
||||||
|
- FFMPEG=true
|
||||||
|
- IMAGE_TYPE=extras
|
||||||
|
- GO_TAGS=p2p tts
|
||||||
env_file:
|
env_file:
|
||||||
- ../.env
|
- ../.env
|
||||||
ports:
|
ports:
|
||||||
|
|||||||
@@ -3,9 +3,7 @@
|
|||||||
.vscode
|
.vscode
|
||||||
.devcontainer
|
.devcontainer
|
||||||
models
|
models
|
||||||
backends
|
|
||||||
examples/chatbot-ui/models
|
examples/chatbot-ui/models
|
||||||
backend/go/image/stablediffusion-ggml/build/
|
|
||||||
examples/rwkv/models
|
examples/rwkv/models
|
||||||
examples/**/models
|
examples/**/models
|
||||||
Dockerfile*
|
Dockerfile*
|
||||||
@@ -16,4 +14,4 @@ __pycache__
|
|||||||
|
|
||||||
# backend virtual environments
|
# backend virtual environments
|
||||||
**/venv
|
**/venv
|
||||||
backend/python/**/source
|
backend/python/**/source
|
||||||
7
.env
7
.env
@@ -41,6 +41,13 @@
|
|||||||
## Uncomment and set to true to enable rebuilding from source
|
## Uncomment and set to true to enable rebuilding from source
|
||||||
# REBUILD=true
|
# REBUILD=true
|
||||||
|
|
||||||
|
## Enable go tags, available: p2p, tts
|
||||||
|
## p2p: enable distributed inferencing
|
||||||
|
## tts: enables text-to-speech with go-piper
|
||||||
|
## (requires REBUILD=true)
|
||||||
|
#
|
||||||
|
# GO_TAGS=p2p
|
||||||
|
|
||||||
## Path where to store generated images
|
## Path where to store generated images
|
||||||
# LOCALAI_IMAGE_PATH=/tmp/generated/images
|
# LOCALAI_IMAGE_PATH=/tmp/generated/images
|
||||||
|
|
||||||
|
|||||||
9
.github/bump_deps.sh
vendored
9
.github/bump_deps.sh
vendored
@@ -3,20 +3,15 @@ set -xe
|
|||||||
REPO=$1
|
REPO=$1
|
||||||
BRANCH=$2
|
BRANCH=$2
|
||||||
VAR=$3
|
VAR=$3
|
||||||
FILE=$4
|
|
||||||
|
|
||||||
if [ -z "$FILE" ]; then
|
|
||||||
FILE="Makefile"
|
|
||||||
fi
|
|
||||||
|
|
||||||
LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
|
LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
|
||||||
|
|
||||||
# Read $VAR from Makefile (only first match)
|
# Read $VAR from Makefile (only first match)
|
||||||
set +e
|
set +e
|
||||||
CURRENT_COMMIT="$(grep -m1 "^$VAR?=" $FILE | cut -d'=' -f2)"
|
CURRENT_COMMIT="$(grep -m1 "^$VAR?=" Makefile | cut -d'=' -f2)"
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
sed -i $FILE -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
|
sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
|
||||||
|
|
||||||
if [ -z "$CURRENT_COMMIT" ]; then
|
if [ -z "$CURRENT_COMMIT" ]; then
|
||||||
echo "Could not find $VAR in Makefile."
|
echo "Could not find $VAR in Makefile."
|
||||||
|
|||||||
4
.github/dependabot.yml
vendored
4
.github/dependabot.yml
vendored
@@ -61,6 +61,10 @@ updates:
|
|||||||
directory: "/backend/python/openvoice"
|
directory: "/backend/python/openvoice"
|
||||||
schedule:
|
schedule:
|
||||||
interval: "weekly"
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/backend/python/parler-tts"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
- package-ecosystem: "pip"
|
- package-ecosystem: "pip"
|
||||||
directory: "/backend/python/rerankers"
|
directory: "/backend/python/rerankers"
|
||||||
schedule:
|
schedule:
|
||||||
|
|||||||
1048
.github/workflows/backend.yml
vendored
1048
.github/workflows/backend.yml
vendored
File diff suppressed because it is too large
Load Diff
241
.github/workflows/backend_build.yml
vendored
241
.github/workflows/backend_build.yml
vendored
@@ -1,241 +0,0 @@
|
|||||||
---
|
|
||||||
name: 'build python backend container images (reusable)'
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_call:
|
|
||||||
inputs:
|
|
||||||
base-image:
|
|
||||||
description: 'Base image'
|
|
||||||
required: true
|
|
||||||
type: string
|
|
||||||
build-type:
|
|
||||||
description: 'Build type'
|
|
||||||
default: ''
|
|
||||||
type: string
|
|
||||||
cuda-major-version:
|
|
||||||
description: 'CUDA major version'
|
|
||||||
default: "12"
|
|
||||||
type: string
|
|
||||||
cuda-minor-version:
|
|
||||||
description: 'CUDA minor version'
|
|
||||||
default: "1"
|
|
||||||
type: string
|
|
||||||
platforms:
|
|
||||||
description: 'Platforms'
|
|
||||||
default: ''
|
|
||||||
type: string
|
|
||||||
tag-latest:
|
|
||||||
description: 'Tag latest'
|
|
||||||
default: ''
|
|
||||||
type: string
|
|
||||||
tag-suffix:
|
|
||||||
description: 'Tag suffix'
|
|
||||||
default: ''
|
|
||||||
type: string
|
|
||||||
runs-on:
|
|
||||||
description: 'Runs on'
|
|
||||||
required: true
|
|
||||||
default: ''
|
|
||||||
type: string
|
|
||||||
backend:
|
|
||||||
description: 'Backend to build'
|
|
||||||
required: true
|
|
||||||
type: string
|
|
||||||
context:
|
|
||||||
description: 'Build context'
|
|
||||||
required: true
|
|
||||||
type: string
|
|
||||||
dockerfile:
|
|
||||||
description: 'Build Dockerfile'
|
|
||||||
required: true
|
|
||||||
type: string
|
|
||||||
skip-drivers:
|
|
||||||
description: 'Skip drivers'
|
|
||||||
default: 'false'
|
|
||||||
type: string
|
|
||||||
secrets:
|
|
||||||
dockerUsername:
|
|
||||||
required: true
|
|
||||||
dockerPassword:
|
|
||||||
required: true
|
|
||||||
quayUsername:
|
|
||||||
required: true
|
|
||||||
quayPassword:
|
|
||||||
required: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
backend-build:
|
|
||||||
runs-on: ${{ inputs.runs-on }}
|
|
||||||
steps:
|
|
||||||
|
|
||||||
|
|
||||||
- name: Free Disk Space (Ubuntu)
|
|
||||||
if: inputs.runs-on == 'ubuntu-latest'
|
|
||||||
uses: jlumbroso/free-disk-space@main
|
|
||||||
with:
|
|
||||||
# this might remove tools that are actually needed,
|
|
||||||
# if set to "true" but frees about 6 GB
|
|
||||||
tool-cache: true
|
|
||||||
# all of these default to true, but feel free to set to
|
|
||||||
# "false" if necessary for your workflow
|
|
||||||
android: true
|
|
||||||
dotnet: true
|
|
||||||
haskell: true
|
|
||||||
large-packages: true
|
|
||||||
docker-images: true
|
|
||||||
swap-storage: true
|
|
||||||
|
|
||||||
- name: Force Install GIT latest
|
|
||||||
run: |
|
|
||||||
sudo apt-get update \
|
|
||||||
&& sudo apt-get install -y software-properties-common \
|
|
||||||
&& sudo apt-get update \
|
|
||||||
&& sudo add-apt-repository -y ppa:git-core/ppa \
|
|
||||||
&& sudo apt-get update \
|
|
||||||
&& sudo apt-get install -y git
|
|
||||||
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Release space from worker
|
|
||||||
if: inputs.runs-on == 'ubuntu-latest'
|
|
||||||
run: |
|
|
||||||
echo "Listing top largest packages"
|
|
||||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
|
||||||
head -n 30 <<< "${pkgs}"
|
|
||||||
echo
|
|
||||||
df -h
|
|
||||||
echo
|
|
||||||
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
|
||||||
sudo apt-get remove --auto-remove android-sdk-platform-tools snapd || true
|
|
||||||
sudo apt-get purge --auto-remove android-sdk-platform-tools snapd || true
|
|
||||||
sudo rm -rf /usr/local/lib/android
|
|
||||||
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
|
||||||
sudo rm -rf /usr/share/dotnet
|
|
||||||
sudo apt-get remove -y '^mono-.*' || true
|
|
||||||
sudo apt-get remove -y '^ghc-.*' || true
|
|
||||||
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
|
||||||
sudo apt-get remove -y 'php.*' || true
|
|
||||||
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
|
||||||
sudo apt-get remove -y '^google-.*' || true
|
|
||||||
sudo apt-get remove -y azure-cli || true
|
|
||||||
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
|
||||||
sudo apt-get remove -y '^gfortran-.*' || true
|
|
||||||
sudo apt-get remove -y microsoft-edge-stable || true
|
|
||||||
sudo apt-get remove -y firefox || true
|
|
||||||
sudo apt-get remove -y powershell || true
|
|
||||||
sudo apt-get remove -y r-base-core || true
|
|
||||||
sudo apt-get autoremove -y
|
|
||||||
sudo apt-get clean
|
|
||||||
echo
|
|
||||||
echo "Listing top largest packages"
|
|
||||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
|
||||||
head -n 30 <<< "${pkgs}"
|
|
||||||
echo
|
|
||||||
sudo rm -rfv build || true
|
|
||||||
sudo rm -rf /usr/share/dotnet || true
|
|
||||||
sudo rm -rf /opt/ghc || true
|
|
||||||
sudo rm -rf "/usr/local/share/boost" || true
|
|
||||||
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
|
||||||
df -h
|
|
||||||
|
|
||||||
- name: Docker meta
|
|
||||||
id: meta
|
|
||||||
if: github.event_name != 'pull_request'
|
|
||||||
uses: docker/metadata-action@v5
|
|
||||||
with:
|
|
||||||
images: |
|
|
||||||
quay.io/go-skynet/local-ai-backends
|
|
||||||
localai/localai-backends
|
|
||||||
tags: |
|
|
||||||
type=ref,event=branch
|
|
||||||
type=semver,pattern={{raw}}
|
|
||||||
type=sha
|
|
||||||
flavor: |
|
|
||||||
latest=${{ inputs.tag-latest }}
|
|
||||||
suffix=${{ inputs.tag-suffix }},onlatest=true
|
|
||||||
|
|
||||||
- name: Docker meta for PR
|
|
||||||
id: meta_pull_request
|
|
||||||
if: github.event_name == 'pull_request'
|
|
||||||
uses: docker/metadata-action@v5
|
|
||||||
with:
|
|
||||||
images: |
|
|
||||||
quay.io/go-skynet/ci-tests
|
|
||||||
tags: |
|
|
||||||
type=ref,event=branch,suffix=${{ github.event.number }}-${{ inputs.backend }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
|
|
||||||
type=semver,pattern={{raw}},suffix=${{ github.event.number }}-${{ inputs.backend }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
|
|
||||||
type=sha,suffix=${{ github.event.number }}-${{ inputs.backend }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
|
|
||||||
flavor: |
|
|
||||||
latest=${{ inputs.tag-latest }}
|
|
||||||
suffix=${{ inputs.tag-suffix }},onlatest=true
|
|
||||||
## End testing image
|
|
||||||
- name: Set up QEMU
|
|
||||||
uses: docker/setup-qemu-action@master
|
|
||||||
with:
|
|
||||||
platforms: all
|
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
|
||||||
id: buildx
|
|
||||||
uses: docker/setup-buildx-action@master
|
|
||||||
|
|
||||||
- name: Login to DockerHub
|
|
||||||
if: github.event_name != 'pull_request'
|
|
||||||
uses: docker/login-action@v3
|
|
||||||
with:
|
|
||||||
username: ${{ secrets.dockerUsername }}
|
|
||||||
password: ${{ secrets.dockerPassword }}
|
|
||||||
|
|
||||||
- name: Login to Quay.io
|
|
||||||
# if: github.event_name != 'pull_request'
|
|
||||||
uses: docker/login-action@v3
|
|
||||||
with:
|
|
||||||
registry: quay.io
|
|
||||||
username: ${{ secrets.quayUsername }}
|
|
||||||
password: ${{ secrets.quayPassword }}
|
|
||||||
|
|
||||||
- name: Build and push
|
|
||||||
uses: docker/build-push-action@v6
|
|
||||||
if: github.event_name != 'pull_request'
|
|
||||||
with:
|
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
|
||||||
build-args: |
|
|
||||||
BUILD_TYPE=${{ inputs.build-type }}
|
|
||||||
SKIP_DRIVERS=${{ inputs.skip-drivers }}
|
|
||||||
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
|
||||||
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
|
||||||
BASE_IMAGE=${{ inputs.base-image }}
|
|
||||||
BACKEND=${{ inputs.backend }}
|
|
||||||
context: ${{ inputs.context }}
|
|
||||||
file: ${{ inputs.dockerfile }}
|
|
||||||
cache-from: type=gha
|
|
||||||
platforms: ${{ inputs.platforms }}
|
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
|
||||||
tags: ${{ steps.meta.outputs.tags }}
|
|
||||||
labels: ${{ steps.meta.outputs.labels }}
|
|
||||||
|
|
||||||
- name: Build and push (PR)
|
|
||||||
uses: docker/build-push-action@v6
|
|
||||||
if: github.event_name == 'pull_request'
|
|
||||||
with:
|
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
|
||||||
build-args: |
|
|
||||||
BUILD_TYPE=${{ inputs.build-type }}
|
|
||||||
SKIP_DRIVERS=${{ inputs.skip-drivers }}
|
|
||||||
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
|
||||||
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
|
||||||
BASE_IMAGE=${{ inputs.base-image }}
|
|
||||||
BACKEND=${{ inputs.backend }}
|
|
||||||
context: ${{ inputs.context }}
|
|
||||||
file: ${{ inputs.dockerfile }}
|
|
||||||
cache-from: type=gha
|
|
||||||
platforms: ${{ inputs.platforms }}
|
|
||||||
push: true
|
|
||||||
tags: ${{ steps.meta_pull_request.outputs.tags }}
|
|
||||||
labels: ${{ steps.meta_pull_request.outputs.labels }}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
- name: job summary
|
|
||||||
run: |
|
|
||||||
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
|
|
||||||
23
.github/workflows/build-test.yaml
vendored
23
.github/workflows/build-test.yaml
vendored
@@ -1,23 +0,0 @@
|
|||||||
name: Build test
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
pull_request:
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build-test:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
fetch-depth: 0
|
|
||||||
- name: Set up Go
|
|
||||||
uses: actions/setup-go@v5
|
|
||||||
with:
|
|
||||||
go-version: 1.23
|
|
||||||
- name: Run GoReleaser
|
|
||||||
run: |
|
|
||||||
make dev-dist
|
|
||||||
14
.github/workflows/bump_deps.yaml
vendored
14
.github/workflows/bump_deps.yaml
vendored
@@ -10,32 +10,30 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- repository: "ggml-org/llama.cpp"
|
- repository: "ggml-org/llama.cpp"
|
||||||
variable: "LLAMA_VERSION"
|
variable: "CPPLLAMA_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
file: "backend/cpp/llama-cpp/Makefile"
|
|
||||||
- repository: "ggml-org/whisper.cpp"
|
- repository: "ggml-org/whisper.cpp"
|
||||||
variable: "WHISPER_CPP_VERSION"
|
variable: "WHISPER_CPP_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
file: "backend/go/whisper/Makefile"
|
|
||||||
- repository: "PABannier/bark.cpp"
|
- repository: "PABannier/bark.cpp"
|
||||||
variable: "BARKCPP_VERSION"
|
variable: "BARKCPP_VERSION"
|
||||||
branch: "main"
|
branch: "main"
|
||||||
file: "Makefile"
|
- repository: "leejet/stable-diffusion.cpp"
|
||||||
- repository: "richiejp/stable-diffusion.cpp"
|
|
||||||
variable: "STABLEDIFFUSION_GGML_VERSION"
|
variable: "STABLEDIFFUSION_GGML_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
file: "backend/go/stablediffusion-ggml/Makefile"
|
- repository: "mudler/go-stable-diffusion"
|
||||||
|
variable: "STABLEDIFFUSION_VERSION"
|
||||||
|
branch: "master"
|
||||||
- repository: "mudler/go-piper"
|
- repository: "mudler/go-piper"
|
||||||
variable: "PIPER_VERSION"
|
variable: "PIPER_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
file: "backend/go/piper/Makefile"
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- name: Bump dependencies 🔧
|
- name: Bump dependencies 🔧
|
||||||
id: bump
|
id: bump
|
||||||
run: |
|
run: |
|
||||||
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }} ${{ matrix.file }}
|
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
|
||||||
{
|
{
|
||||||
echo 'message<<EOF'
|
echo 'message<<EOF'
|
||||||
cat "${{ matrix.variable }}_message.txt"
|
cat "${{ matrix.variable }}_message.txt"
|
||||||
|
|||||||
3
.github/workflows/checksum_checker.yaml
vendored
3
.github/workflows/checksum_checker.yaml
vendored
@@ -5,7 +5,7 @@ on:
|
|||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
jobs:
|
jobs:
|
||||||
checksum_check:
|
checksum_check:
|
||||||
runs-on: ubuntu-latest
|
runs-on: arc-runner-set
|
||||||
steps:
|
steps:
|
||||||
- name: Force Install GIT latest
|
- name: Force Install GIT latest
|
||||||
run: |
|
run: |
|
||||||
@@ -20,6 +20,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y pip wget
|
sudo apt-get install -y pip wget
|
||||||
|
sudo pip install --upgrade pip
|
||||||
pip install huggingface_hub
|
pip install huggingface_hub
|
||||||
- name: 'Setup yq'
|
- name: 'Setup yq'
|
||||||
uses: dcarbone/install-yq-action@v1.3.1
|
uses: dcarbone/install-yq-action@v1.3.1
|
||||||
|
|||||||
2
.github/workflows/deploy-explorer.yaml
vendored
2
.github/workflows/deploy-explorer.yaml
vendored
@@ -31,7 +31,7 @@ jobs:
|
|||||||
make protogen-go
|
make protogen-go
|
||||||
- name: Build api
|
- name: Build api
|
||||||
run: |
|
run: |
|
||||||
CGO_ENABLED=0 make build
|
CGO_ENABLED=0 make build-api
|
||||||
- name: rm
|
- name: rm
|
||||||
uses: appleboy/ssh-action@v1.2.2
|
uses: appleboy/ssh-action@v1.2.2
|
||||||
with:
|
with:
|
||||||
|
|||||||
2
.github/workflows/generate_grpc_cache.yaml
vendored
2
.github/workflows/generate_grpc_cache.yaml
vendored
@@ -17,7 +17,7 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- grpc-base-image: ubuntu:22.04
|
- grpc-base-image: ubuntu:22.04
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'arc-runner-set'
|
||||||
platforms: 'linux/amd64,linux/arm64'
|
platforms: 'linux/amd64,linux/arm64'
|
||||||
runs-on: ${{matrix.runs-on}}
|
runs-on: ${{matrix.runs-on}}
|
||||||
steps:
|
steps:
|
||||||
|
|||||||
2
.github/workflows/generate_intel_image.yaml
vendored
2
.github/workflows/generate_intel_image.yaml
vendored
@@ -15,7 +15,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- base-image: intel/oneapi-basekit:2025.2.0-0-devel-ubuntu22.04
|
- base-image: intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
runs-on: ${{matrix.runs-on}}
|
runs-on: ${{matrix.runs-on}}
|
||||||
|
|||||||
96
.github/workflows/image-pr.yml
vendored
96
.github/workflows/image-pr.yml
vendored
@@ -9,11 +9,13 @@ concurrency:
|
|||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
image-build:
|
extras-image-build:
|
||||||
uses: ./.github/workflows/image_build.yml
|
uses: ./.github/workflows/image_build.yml
|
||||||
with:
|
with:
|
||||||
tag-latest: ${{ matrix.tag-latest }}
|
tag-latest: ${{ matrix.tag-latest }}
|
||||||
tag-suffix: ${{ matrix.tag-suffix }}
|
tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
|
ffmpeg: ${{ matrix.ffmpeg }}
|
||||||
|
image-type: ${{ matrix.image-type }}
|
||||||
build-type: ${{ matrix.build-type }}
|
build-type: ${{ matrix.build-type }}
|
||||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
@@ -34,35 +36,115 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
|
# This is basically covered by the AIO test
|
||||||
|
# - build-type: ''
|
||||||
|
# platforms: 'linux/amd64'
|
||||||
|
# tag-latest: 'false'
|
||||||
|
# tag-suffix: '-ffmpeg'
|
||||||
|
# ffmpeg: 'true'
|
||||||
|
# image-type: 'extras'
|
||||||
|
# runs-on: 'arc-runner-set'
|
||||||
|
# base-image: "ubuntu:22.04"
|
||||||
|
# makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "0"
|
cuda-minor-version: "0"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-gpu-nvidia-cuda12'
|
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||||
runs-on: 'ubuntu-latest'
|
ffmpeg: 'true'
|
||||||
|
image-type: 'extras'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'hipblas'
|
- build-type: 'hipblas'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-hipblas'
|
tag-suffix: '-hipblas'
|
||||||
|
ffmpeg: 'false'
|
||||||
|
image-type: 'extras'
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'sycl_f16'
|
- build-type: 'sycl_f16'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
tag-suffix: 'sycl-f16'
|
tag-suffix: 'sycl-f16-ffmpeg'
|
||||||
runs-on: 'ubuntu-latest'
|
ffmpeg: 'true'
|
||||||
|
image-type: 'extras'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'vulkan'
|
- build-type: 'vulkan'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-vulkan-core'
|
tag-suffix: '-vulkan-ffmpeg-core'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
|
# core-image-build:
|
||||||
|
# uses: ./.github/workflows/image_build.yml
|
||||||
|
# with:
|
||||||
|
# tag-latest: ${{ matrix.tag-latest }}
|
||||||
|
# tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
|
# ffmpeg: ${{ matrix.ffmpeg }}
|
||||||
|
# image-type: ${{ matrix.image-type }}
|
||||||
|
# build-type: ${{ matrix.build-type }}
|
||||||
|
# cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||||
|
# cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
|
# platforms: ${{ matrix.platforms }}
|
||||||
|
# runs-on: ${{ matrix.runs-on }}
|
||||||
|
# base-image: ${{ matrix.base-image }}
|
||||||
|
# grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||||
|
# makeflags: ${{ matrix.makeflags }}
|
||||||
|
# secrets:
|
||||||
|
# dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
|
# dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
|
# quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||||
|
# quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||||
|
# strategy:
|
||||||
|
# matrix:
|
||||||
|
# include:
|
||||||
|
# - build-type: ''
|
||||||
|
# platforms: 'linux/amd64'
|
||||||
|
# tag-latest: 'false'
|
||||||
|
# tag-suffix: '-ffmpeg-core'
|
||||||
|
# ffmpeg: 'true'
|
||||||
|
# image-type: 'core'
|
||||||
|
# runs-on: 'ubuntu-latest'
|
||||||
|
# base-image: "ubuntu:22.04"
|
||||||
|
# makeflags: "--jobs=4 --output-sync=target"
|
||||||
|
# - build-type: 'sycl_f16'
|
||||||
|
# platforms: 'linux/amd64'
|
||||||
|
# tag-latest: 'false'
|
||||||
|
# base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||||
|
# grpc-base-image: "ubuntu:22.04"
|
||||||
|
# tag-suffix: 'sycl-f16-ffmpeg-core'
|
||||||
|
# ffmpeg: 'true'
|
||||||
|
# image-type: 'core'
|
||||||
|
# runs-on: 'arc-runner-set'
|
||||||
|
# makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
# - build-type: 'cublas'
|
||||||
|
# cuda-major-version: "12"
|
||||||
|
# cuda-minor-version: "0"
|
||||||
|
# platforms: 'linux/amd64'
|
||||||
|
# tag-latest: 'false'
|
||||||
|
# tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||||
|
# ffmpeg: 'true'
|
||||||
|
# image-type: 'core'
|
||||||
|
# runs-on: 'ubuntu-latest'
|
||||||
|
# base-image: "ubuntu:22.04"
|
||||||
|
# makeflags: "--jobs=4 --output-sync=target"
|
||||||
|
# - build-type: 'vulkan'
|
||||||
|
# platforms: 'linux/amd64'
|
||||||
|
# tag-latest: 'false'
|
||||||
|
# tag-suffix: '-vulkan-ffmpeg-core'
|
||||||
|
# ffmpeg: 'true'
|
||||||
|
# image-type: 'core'
|
||||||
|
# runs-on: 'ubuntu-latest'
|
||||||
|
# base-image: "ubuntu:22.04"
|
||||||
|
# makeflags: "--jobs=4 --output-sync=target"
|
||||||
|
|||||||
204
.github/workflows/image.yml
vendored
204
.github/workflows/image.yml
vendored
@@ -18,6 +18,8 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
tag-latest: ${{ matrix.tag-latest }}
|
tag-latest: ${{ matrix.tag-latest }}
|
||||||
tag-suffix: ${{ matrix.tag-suffix }}
|
tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
|
ffmpeg: ${{ matrix.ffmpeg }}
|
||||||
|
image-type: ${{ matrix.image-type }}
|
||||||
build-type: ${{ matrix.build-type }}
|
build-type: ${{ matrix.build-type }}
|
||||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
@@ -27,29 +29,157 @@ jobs:
|
|||||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||||
aio: ${{ matrix.aio }}
|
aio: ${{ matrix.aio }}
|
||||||
makeflags: ${{ matrix.makeflags }}
|
makeflags: ${{ matrix.makeflags }}
|
||||||
|
latest-image: ${{ matrix.latest-image }}
|
||||||
|
latest-image-aio: ${{ matrix.latest-image-aio }}
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||||
strategy:
|
strategy:
|
||||||
|
# Pushing with all jobs in parallel
|
||||||
|
# eats the bandwidth of all the nodes
|
||||||
|
max-parallel: 2
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- build-type: 'hipblas'
|
- build-type: 'hipblas'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
tag-suffix: '-gpu-hipblas'
|
tag-suffix: '-hipblas-extras'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'extras'
|
||||||
|
aio: "-aio-gpu-hipblas"
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
runs-on: 'ubuntu-latest'
|
latest-image: 'latest-gpu-hipblas-extras'
|
||||||
|
latest-image-aio: 'latest-aio-gpu-hipblas'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
aio: "-aio-gpu-hipblas"
|
- build-type: 'hipblas'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-hipblas'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
latest-image: 'latest-gpu-hipblas'
|
||||||
|
self-hosted-jobs:
|
||||||
|
uses: ./.github/workflows/image_build.yml
|
||||||
|
with:
|
||||||
|
tag-latest: ${{ matrix.tag-latest }}
|
||||||
|
tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
|
ffmpeg: ${{ matrix.ffmpeg }}
|
||||||
|
image-type: ${{ matrix.image-type }}
|
||||||
|
build-type: ${{ matrix.build-type }}
|
||||||
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||||
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
|
platforms: ${{ matrix.platforms }}
|
||||||
|
runs-on: ${{ matrix.runs-on }}
|
||||||
|
base-image: ${{ matrix.base-image }}
|
||||||
|
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||||
|
aio: ${{ matrix.aio }}
|
||||||
|
makeflags: ${{ matrix.makeflags }}
|
||||||
|
latest-image: ${{ matrix.latest-image }}
|
||||||
|
latest-image-aio: ${{ matrix.latest-image-aio }}
|
||||||
|
secrets:
|
||||||
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
|
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||||
|
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||||
|
strategy:
|
||||||
|
# Pushing with all jobs in parallel
|
||||||
|
# eats the bandwidth of all the nodes
|
||||||
|
max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }}
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "11"
|
||||||
|
cuda-minor-version: "7"
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-cublas-cuda11-extras'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'extras'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
|
aio: "-aio-gpu-nvidia-cuda-11"
|
||||||
|
latest-image: 'latest-gpu-nvidia-cuda-11-extras'
|
||||||
|
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "12"
|
||||||
|
cuda-minor-version: "0"
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-cublas-cuda12-extras'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'extras'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
|
aio: "-aio-gpu-nvidia-cuda-12"
|
||||||
|
latest-image: 'latest-gpu-nvidia-cuda-12-extras'
|
||||||
|
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
- build-type: 'sycl_f16'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
tag-suffix: '-sycl-f16-extras'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'extras'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
aio: "-aio-gpu-intel-f16"
|
||||||
|
latest-image: 'latest-gpu-intel-f16-extras'
|
||||||
|
latest-image-aio: 'latest-aio-gpu-intel-f16'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
- build-type: 'sycl_f32'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
tag-suffix: '-sycl-f32-extras'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'extras'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
aio: "-aio-gpu-intel-f32"
|
||||||
|
latest-image: 'latest-gpu-intel-f32-extras'
|
||||||
|
latest-image-aio: 'latest-aio-gpu-intel-f32'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
# Core images
|
||||||
|
- build-type: 'sycl_f16'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
tag-suffix: '-sycl-f16'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
latest-image: 'latest-gpu-intel-f16'
|
||||||
|
- build-type: 'sycl_f32'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
tag-suffix: '-sycl-f32'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
latest-image: 'latest-gpu-intel-f32'
|
||||||
|
|
||||||
core-image-build:
|
core-image-build:
|
||||||
uses: ./.github/workflows/image_build.yml
|
uses: ./.github/workflows/image_build.yml
|
||||||
with:
|
with:
|
||||||
tag-latest: ${{ matrix.tag-latest }}
|
tag-latest: ${{ matrix.tag-latest }}
|
||||||
tag-suffix: ${{ matrix.tag-suffix }}
|
tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
|
ffmpeg: ${{ matrix.ffmpeg }}
|
||||||
|
image-type: ${{ matrix.image-type }}
|
||||||
build-type: ${{ matrix.build-type }}
|
build-type: ${{ matrix.build-type }}
|
||||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
@@ -59,6 +189,8 @@ jobs:
|
|||||||
base-image: ${{ matrix.base-image }}
|
base-image: ${{ matrix.base-image }}
|
||||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||||
makeflags: ${{ matrix.makeflags }}
|
makeflags: ${{ matrix.makeflags }}
|
||||||
|
latest-image: ${{ matrix.latest-image }}
|
||||||
|
latest-image-aio: ${{ matrix.latest-image-aio }}
|
||||||
skip-drivers: ${{ matrix.skip-drivers }}
|
skip-drivers: ${{ matrix.skip-drivers }}
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
@@ -66,73 +198,66 @@ jobs:
|
|||||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||||
strategy:
|
strategy:
|
||||||
#max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
|
max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- build-type: ''
|
- build-type: ''
|
||||||
platforms: 'linux/amd64,linux/arm64'
|
platforms: 'linux/amd64,linux/arm64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
tag-suffix: ''
|
tag-suffix: ''
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'arc-runner-set'
|
||||||
aio: "-aio-cpu"
|
aio: "-aio-cpu"
|
||||||
|
latest-image: 'latest-cpu'
|
||||||
|
latest-image-aio: 'latest-aio-cpu'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
skip-drivers: 'false'
|
skip-drivers: 'false'
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-gpu-nvidia-cuda11'
|
tag-suffix: '-cublas-cuda11'
|
||||||
runs-on: 'ubuntu-latest'
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
skip-drivers: 'false'
|
skip-drivers: 'false'
|
||||||
aio: "-aio-gpu-nvidia-cuda-11"
|
latest-image: 'latest-gpu-nvidia-cuda-12'
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "0"
|
cuda-minor-version: "0"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-gpu-nvidia-cuda12'
|
tag-suffix: '-cublas-cuda12'
|
||||||
runs-on: 'ubuntu-latest'
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
skip-drivers: 'false'
|
skip-drivers: 'false'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
aio: "-aio-gpu-nvidia-cuda-12"
|
latest-image: 'latest-gpu-nvidia-cuda-12'
|
||||||
- build-type: 'vulkan'
|
- build-type: 'vulkan'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-vulkan'
|
tag-suffix: '-vulkan'
|
||||||
runs-on: 'ubuntu-latest'
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
skip-drivers: 'false'
|
skip-drivers: 'false'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
aio: "-aio-gpu-vulkan"
|
latest-image: 'latest-gpu-vulkan'
|
||||||
- build-type: 'sycl_f16'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'auto'
|
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
tag-suffix: '-gpu-intel-f16'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
aio: "-aio-gpu-intel-f16"
|
|
||||||
- build-type: 'sycl_f32'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'auto'
|
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
tag-suffix: '-gpu-intel-f32'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
aio: "-aio-gpu-intel-f32"
|
|
||||||
|
|
||||||
gh-runner:
|
gh-runner:
|
||||||
uses: ./.github/workflows/image_build.yml
|
uses: ./.github/workflows/image_build.yml
|
||||||
with:
|
with:
|
||||||
tag-latest: ${{ matrix.tag-latest }}
|
tag-latest: ${{ matrix.tag-latest }}
|
||||||
tag-suffix: ${{ matrix.tag-suffix }}
|
tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
|
ffmpeg: ${{ matrix.ffmpeg }}
|
||||||
|
image-type: ${{ matrix.image-type }}
|
||||||
build-type: ${{ matrix.build-type }}
|
build-type: ${{ matrix.build-type }}
|
||||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
@@ -142,6 +267,8 @@ jobs:
|
|||||||
base-image: ${{ matrix.base-image }}
|
base-image: ${{ matrix.base-image }}
|
||||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||||
makeflags: ${{ matrix.makeflags }}
|
makeflags: ${{ matrix.makeflags }}
|
||||||
|
latest-image: ${{ matrix.latest-image }}
|
||||||
|
latest-image-aio: ${{ matrix.latest-image-aio }}
|
||||||
skip-drivers: ${{ matrix.skip-drivers }}
|
skip-drivers: ${{ matrix.skip-drivers }}
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
@@ -155,9 +282,12 @@ jobs:
|
|||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "0"
|
cuda-minor-version: "0"
|
||||||
platforms: 'linux/arm64'
|
platforms: 'linux/arm64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-nvidia-l4t-arm64'
|
tag-suffix: '-nvidia-l4t-arm64'
|
||||||
|
latest-image: 'latest-nvidia-l4t-arm64'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||||
runs-on: 'ubuntu-24.04-arm'
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
skip-drivers: 'true'
|
skip-drivers: 'true'
|
||||||
87
.github/workflows/image_build.yml
vendored
87
.github/workflows/image_build.yml
vendored
@@ -33,14 +33,30 @@ on:
|
|||||||
description: 'Tag latest'
|
description: 'Tag latest'
|
||||||
default: ''
|
default: ''
|
||||||
type: string
|
type: string
|
||||||
|
latest-image:
|
||||||
|
description: 'Tag latest'
|
||||||
|
default: ''
|
||||||
|
type: string
|
||||||
|
latest-image-aio:
|
||||||
|
description: 'Tag latest'
|
||||||
|
default: ''
|
||||||
|
type: string
|
||||||
tag-suffix:
|
tag-suffix:
|
||||||
description: 'Tag suffix'
|
description: 'Tag suffix'
|
||||||
default: ''
|
default: ''
|
||||||
type: string
|
type: string
|
||||||
|
ffmpeg:
|
||||||
|
description: 'FFMPEG'
|
||||||
|
default: ''
|
||||||
|
type: string
|
||||||
skip-drivers:
|
skip-drivers:
|
||||||
description: 'Skip drivers by default'
|
description: 'Skip drivers by default'
|
||||||
default: 'false'
|
default: 'false'
|
||||||
type: string
|
type: string
|
||||||
|
image-type:
|
||||||
|
description: 'Image type'
|
||||||
|
default: ''
|
||||||
|
type: string
|
||||||
runs-on:
|
runs-on:
|
||||||
description: 'Runs on'
|
description: 'Runs on'
|
||||||
required: true
|
required: true
|
||||||
@@ -69,22 +85,6 @@ jobs:
|
|||||||
reusable_image-build:
|
reusable_image-build:
|
||||||
runs-on: ${{ inputs.runs-on }}
|
runs-on: ${{ inputs.runs-on }}
|
||||||
steps:
|
steps:
|
||||||
|
|
||||||
- name: Free Disk Space (Ubuntu)
|
|
||||||
if: inputs.runs-on == 'ubuntu-latest'
|
|
||||||
uses: jlumbroso/free-disk-space@main
|
|
||||||
with:
|
|
||||||
# this might remove tools that are actually needed,
|
|
||||||
# if set to "true" but frees about 6 GB
|
|
||||||
tool-cache: true
|
|
||||||
# all of these default to true, but feel free to set to
|
|
||||||
# "false" if necessary for your workflow
|
|
||||||
android: true
|
|
||||||
dotnet: true
|
|
||||||
haskell: true
|
|
||||||
large-packages: true
|
|
||||||
docker-images: true
|
|
||||||
swap-storage: true
|
|
||||||
- name: Force Install GIT latest
|
- name: Force Install GIT latest
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update \
|
sudo apt-get update \
|
||||||
@@ -106,8 +106,8 @@ jobs:
|
|||||||
df -h
|
df -h
|
||||||
echo
|
echo
|
||||||
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
||||||
sudo apt-get remove --auto-remove android-sdk-platform-tools snapd || true
|
sudo apt-get remove --auto-remove android-sdk-platform-tools || true
|
||||||
sudo apt-get purge --auto-remove android-sdk-platform-tools snapd || true
|
sudo apt-get purge --auto-remove android-sdk-platform-tools || true
|
||||||
sudo rm -rf /usr/local/lib/android
|
sudo rm -rf /usr/local/lib/android
|
||||||
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
||||||
sudo rm -rf /usr/share/dotnet
|
sudo rm -rf /usr/share/dotnet
|
||||||
@@ -152,18 +152,18 @@ jobs:
|
|||||||
type=sha
|
type=sha
|
||||||
flavor: |
|
flavor: |
|
||||||
latest=${{ inputs.tag-latest }}
|
latest=${{ inputs.tag-latest }}
|
||||||
suffix=${{ inputs.tag-suffix }},onlatest=true
|
suffix=${{ inputs.tag-suffix }}
|
||||||
- name: Docker meta for PR
|
- name: Docker meta for PR
|
||||||
id: meta_pull_request
|
id: meta_pull_request
|
||||||
if: github.event_name == 'pull_request'
|
if: github.event_name == 'pull_request'
|
||||||
uses: docker/metadata-action@v5
|
uses: docker/metadata-action@v5
|
||||||
with:
|
with:
|
||||||
images: |
|
images: |
|
||||||
quay.io/go-skynet/ci-tests
|
ttl.sh/localai-ci-pr-${{ github.event.number }}
|
||||||
tags: |
|
tags: |
|
||||||
type=ref,event=branch,suffix=localai${{ github.event.number }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
|
type=ref,event=branch
|
||||||
type=semver,pattern={{raw}},suffix=localai${{ github.event.number }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
|
type=semver,pattern={{raw}}
|
||||||
type=sha,suffix=localai${{ github.event.number }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
|
type=sha
|
||||||
flavor: |
|
flavor: |
|
||||||
latest=${{ inputs.tag-latest }}
|
latest=${{ inputs.tag-latest }}
|
||||||
suffix=${{ inputs.tag-suffix }}
|
suffix=${{ inputs.tag-suffix }}
|
||||||
@@ -179,7 +179,7 @@ jobs:
|
|||||||
type=semver,pattern={{raw}}
|
type=semver,pattern={{raw}}
|
||||||
flavor: |
|
flavor: |
|
||||||
latest=${{ inputs.tag-latest }}
|
latest=${{ inputs.tag-latest }}
|
||||||
suffix=${{ inputs.aio }},onlatest=true
|
suffix=${{ inputs.aio }}
|
||||||
|
|
||||||
- name: Docker meta AIO (dockerhub)
|
- name: Docker meta AIO (dockerhub)
|
||||||
if: inputs.aio != ''
|
if: inputs.aio != ''
|
||||||
@@ -192,8 +192,7 @@ jobs:
|
|||||||
type=ref,event=branch
|
type=ref,event=branch
|
||||||
type=semver,pattern={{raw}}
|
type=semver,pattern={{raw}}
|
||||||
flavor: |
|
flavor: |
|
||||||
latest=${{ inputs.tag-latest }}
|
suffix=${{ inputs.aio }}
|
||||||
suffix=${{ inputs.aio }},onlatest=true
|
|
||||||
|
|
||||||
- name: Set up QEMU
|
- name: Set up QEMU
|
||||||
uses: docker/setup-qemu-action@master
|
uses: docker/setup-qemu-action@master
|
||||||
@@ -232,6 +231,8 @@ jobs:
|
|||||||
BUILD_TYPE=${{ inputs.build-type }}
|
BUILD_TYPE=${{ inputs.build-type }}
|
||||||
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
||||||
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
||||||
|
FFMPEG=${{ inputs.ffmpeg }}
|
||||||
|
IMAGE_TYPE=${{ inputs.image-type }}
|
||||||
BASE_IMAGE=${{ inputs.base-image }}
|
BASE_IMAGE=${{ inputs.base-image }}
|
||||||
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
||||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||||
@@ -259,6 +260,8 @@ jobs:
|
|||||||
BUILD_TYPE=${{ inputs.build-type }}
|
BUILD_TYPE=${{ inputs.build-type }}
|
||||||
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
||||||
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
||||||
|
FFMPEG=${{ inputs.ffmpeg }}
|
||||||
|
IMAGE_TYPE=${{ inputs.image-type }}
|
||||||
BASE_IMAGE=${{ inputs.base-image }}
|
BASE_IMAGE=${{ inputs.base-image }}
|
||||||
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
||||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||||
@@ -269,9 +272,13 @@ jobs:
|
|||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
platforms: ${{ inputs.platforms }}
|
platforms: ${{ inputs.platforms }}
|
||||||
#push: true
|
push: true
|
||||||
tags: ${{ steps.meta_pull_request.outputs.tags }}
|
tags: ${{ steps.meta_pull_request.outputs.tags }}
|
||||||
labels: ${{ steps.meta_pull_request.outputs.labels }}
|
labels: ${{ steps.meta_pull_request.outputs.labels }}
|
||||||
|
- name: Testing image
|
||||||
|
if: github.event_name == 'pull_request'
|
||||||
|
run: |
|
||||||
|
echo "Image is available at ttl.sh/localai-ci-pr-${{ github.event.number }}:${{ steps.meta_pull_request.outputs.version }}" >> $GITHUB_STEP_SUMMARY
|
||||||
## End testing image
|
## End testing image
|
||||||
- name: Build and push AIO image
|
- name: Build and push AIO image
|
||||||
if: inputs.aio != ''
|
if: inputs.aio != ''
|
||||||
@@ -303,6 +310,32 @@ jobs:
|
|||||||
tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
|
tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
|
||||||
labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
|
labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
|
||||||
|
|
||||||
|
- name: Cleanup
|
||||||
|
run: |
|
||||||
|
docker builder prune -f
|
||||||
|
docker system prune --force --volumes --all
|
||||||
|
|
||||||
|
- name: Latest tag
|
||||||
|
# run this on branches, when it is a tag and there is a latest-image defined
|
||||||
|
if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag'
|
||||||
|
run: |
|
||||||
|
docker pull localai/localai:${{ steps.meta.outputs.version }}
|
||||||
|
docker tag localai/localai:${{ steps.meta.outputs.version }} localai/localai:${{ inputs.latest-image }}
|
||||||
|
docker push localai/localai:${{ inputs.latest-image }}
|
||||||
|
docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
|
||||||
|
docker tag quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
|
||||||
|
docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
|
||||||
|
- name: Latest AIO tag
|
||||||
|
# run this on branches, when it is a tag and there is a latest-image defined
|
||||||
|
if: github.event_name != 'pull_request' && inputs.latest-image-aio != '' && github.ref_type == 'tag'
|
||||||
|
run: |
|
||||||
|
docker pull localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }}
|
||||||
|
docker tag localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} localai/localai:${{ inputs.latest-image-aio }}
|
||||||
|
docker push localai/localai:${{ inputs.latest-image-aio }}
|
||||||
|
docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }}
|
||||||
|
docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
|
||||||
|
docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
|
||||||
|
|
||||||
- name: job summary
|
- name: job summary
|
||||||
run: |
|
run: |
|
||||||
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
|
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
|
||||||
|
|||||||
6
.github/workflows/notify-models.yaml
vendored
6
.github/workflows/notify-models.yaml
vendored
@@ -18,7 +18,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
model: 'gemma-3-12b-it' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
model: 'gemma-3-12b-it' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||||
# Check the PR diff using the current branch and the base branch of the PR
|
# Check the PR diff using the current branch and the base branch of the PR
|
||||||
- uses: GrantBirki/git-diff-action@v2.8.1
|
- uses: GrantBirki/git-diff-action@v2.8.0
|
||||||
id: git-diff-action
|
id: git-diff-action
|
||||||
with:
|
with:
|
||||||
json_diff_file_output: diff.json
|
json_diff_file_output: diff.json
|
||||||
@@ -96,10 +96,10 @@ jobs:
|
|||||||
- name: Start LocalAI
|
- name: Start LocalAI
|
||||||
run: |
|
run: |
|
||||||
echo "Starting LocalAI..."
|
echo "Starting LocalAI..."
|
||||||
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master run --debug $MODEL_NAME
|
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
|
||||||
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
|
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
|
||||||
# Check the PR diff using the current branch and the base branch of the PR
|
# Check the PR diff using the current branch and the base branch of the PR
|
||||||
- uses: GrantBirki/git-diff-action@v2.8.1
|
- uses: GrantBirki/git-diff-action@v2.8.0
|
||||||
id: git-diff-action
|
id: git-diff-action
|
||||||
with:
|
with:
|
||||||
json_diff_file_output: diff.json
|
json_diff_file_output: diff.json
|
||||||
|
|||||||
324
.github/workflows/release.yaml
vendored
324
.github/workflows/release.yaml
vendored
@@ -1,26 +1,324 @@
|
|||||||
name: goreleaser
|
name: Build and Release
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
tags:
|
tags:
|
||||||
- 'v*'
|
- 'v*'
|
||||||
|
pull_request:
|
||||||
|
|
||||||
|
env:
|
||||||
|
GRPC_VERSION: v1.65.0
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ci-releases-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
goreleaser:
|
|
||||||
|
build-linux-arm:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
submodules: true
|
||||||
- name: Set up Go
|
- uses: actions/setup-go@v5
|
||||||
uses: actions/setup-go@v5
|
|
||||||
with:
|
with:
|
||||||
go-version: 1.23
|
go-version: '1.21.x'
|
||||||
- name: Run GoReleaser
|
cache: false
|
||||||
uses: goreleaser/goreleaser-action@v6
|
- name: Dependencies
|
||||||
with:
|
run: |
|
||||||
version: v2.11.0
|
sudo apt-get update
|
||||||
args: release --clean
|
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
|
||||||
|
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
|
||||||
|
make install-go-tools
|
||||||
|
- name: Install CUDA Dependencies
|
||||||
|
run: |
|
||||||
|
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb
|
||||||
|
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION}
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
CUDA_VERSION: 12-4
|
||||||
|
- name: Cache grpc
|
||||||
|
id: cache-grpc
|
||||||
|
uses: actions/cache@v4
|
||||||
|
with:
|
||||||
|
path: grpc
|
||||||
|
key: ${{ runner.os }}-arm-grpc-${{ env.GRPC_VERSION }}
|
||||||
|
- name: Build grpc
|
||||||
|
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
||||||
|
run: |
|
||||||
|
|
||||||
|
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
|
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
|
||||||
|
cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||||
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
|
../.. && sudo make --jobs 5 --output-sync=target
|
||||||
|
- name: Install gRPC
|
||||||
|
run: |
|
||||||
|
GNU_HOST=aarch64-linux-gnu
|
||||||
|
C_COMPILER_ARM_LINUX=$GNU_HOST-gcc
|
||||||
|
CXX_COMPILER_ARM_LINUX=$GNU_HOST-g++
|
||||||
|
|
||||||
|
CROSS_TOOLCHAIN=/usr/$GNU_HOST
|
||||||
|
CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
|
||||||
|
CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
|
||||||
|
|
||||||
|
# https://cmake.org/cmake/help/v3.13/manual/cmake-toolchains.7.html#cross-compiling-for-linux
|
||||||
|
echo "set(CMAKE_SYSTEM_NAME Linux)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||||
|
echo "set(CMAKE_SYSTEM_PROCESSOR arm)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||||
|
echo "set(CMAKE_STAGING_PREFIX $CROSS_STAGING_PREFIX)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||||
|
echo "set(CMAKE_SYSROOT ${CROSS_TOOLCHAIN}/sysroot)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||||
|
echo "set(CMAKE_C_COMPILER /usr/bin/$C_COMPILER_ARM_LINUX)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||||
|
echo "set(CMAKE_CXX_COMPILER /usr/bin/$CXX_COMPILER_ARM_LINUX)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||||
|
echo "set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||||
|
echo "set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||||
|
echo "set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||||
|
echo "set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN
|
||||||
|
GRPC_DIR=$PWD/grpc
|
||||||
|
cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install && \
|
||||||
|
GRPC_CROSS_BUILD_DIR=$GRPC_DIR/cmake/cross_build && \
|
||||||
|
mkdir -p $GRPC_CROSS_BUILD_DIR && \
|
||||||
|
cd $GRPC_CROSS_BUILD_DIR && \
|
||||||
|
cmake -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN \
|
||||||
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=$CROSS_TOOLCHAIN/grpc_install \
|
||||||
|
../.. && \
|
||||||
|
sudo make -j`nproc` install
|
||||||
|
- name: Build
|
||||||
|
id: build
|
||||||
|
run: |
|
||||||
|
GNU_HOST=aarch64-linux-gnu
|
||||||
|
C_COMPILER_ARM_LINUX=$GNU_HOST-gcc
|
||||||
|
CXX_COMPILER_ARM_LINUX=$GNU_HOST-g++
|
||||||
|
|
||||||
|
CROSS_TOOLCHAIN=/usr/$GNU_HOST
|
||||||
|
CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
|
||||||
|
CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
|
||||||
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
|
export PATH=$PATH:$GOPATH/bin
|
||||||
|
export PATH=/usr/local/cuda/bin:$PATH
|
||||||
|
sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6
|
||||||
|
sudo cp -rf /usr/aarch64-linux-gnu/lib/libstdc++.so* /usr/aarch64-linux-gnu/lib/libstdc++.so.6
|
||||||
|
sudo cp /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 ld.so
|
||||||
|
BACKEND_LIBS="./grpc/cmake/cross_build/third_party/re2/libre2.a ./grpc/cmake/cross_build/libgrpc.a ./grpc/cmake/cross_build/libgrpc++.a ./grpc/cmake/cross_build/third_party/protobuf/libprotobuf.a /usr/aarch64-linux-gnu/lib/libc.so.6 /usr/aarch64-linux-gnu/lib/libstdc++.so.6 /usr/aarch64-linux-gnu/lib/libgomp.so.1 /usr/aarch64-linux-gnu/lib/libm.so.6 /usr/aarch64-linux-gnu/lib/libgcc_s.so.1 /usr/aarch64-linux-gnu/lib/libdl.so.2 /usr/aarch64-linux-gnu/lib/libpthread.so.0 ./ld.so" \
|
||||||
|
GOOS=linux \
|
||||||
|
GOARCH=arm64 \
|
||||||
|
CMAKE_ARGS="-DProtobuf_INCLUDE_DIRS=$CROSS_STAGING_PREFIX/include -DProtobuf_DIR=$CROSS_STAGING_PREFIX/lib/cmake/protobuf -DgRPC_DIR=$CROSS_STAGING_PREFIX/lib/cmake/grpc -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++" make dist-cross-linux-arm64
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: LocalAI-linux-arm64
|
||||||
|
path: release/
|
||||||
|
- name: Release
|
||||||
|
uses: softprops/action-gh-release@v2
|
||||||
|
if: startsWith(github.ref, 'refs/tags/')
|
||||||
|
with:
|
||||||
|
files: |
|
||||||
|
release/*
|
||||||
|
- name: Setup tmate session if tests fail
|
||||||
|
if: ${{ failure() }}
|
||||||
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
|
with:
|
||||||
|
detached: true
|
||||||
|
connect-timeout-seconds: 180
|
||||||
|
limit-access-to-actor: true
|
||||||
|
build-linux:
|
||||||
|
runs-on: arc-runner-set
|
||||||
|
steps:
|
||||||
|
- name: Force Install GIT latest
|
||||||
|
run: |
|
||||||
|
sudo apt-get update \
|
||||||
|
&& sudo apt-get install -y software-properties-common \
|
||||||
|
&& sudo apt-get update \
|
||||||
|
&& sudo add-apt-repository -y ppa:git-core/ppa \
|
||||||
|
&& sudo apt-get update \
|
||||||
|
&& sudo apt-get install -y git
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: '1.21.x'
|
||||||
|
cache: false
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
|
||||||
|
make install-go-tools
|
||||||
|
- name: Intel Dependencies
|
||||||
|
run: |
|
||||||
|
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
|
||||||
|
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
|
||||||
|
sudo apt update
|
||||||
|
sudo apt install -y intel-basekit
|
||||||
|
- name: Install CUDA Dependencies
|
||||||
|
run: |
|
||||||
|
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||||
|
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
||||||
|
env:
|
||||||
|
CUDA_VERSION: 12-5
|
||||||
|
- name: "Install Hipblas"
|
||||||
|
env:
|
||||||
|
ROCM_VERSION: "6.1"
|
||||||
|
AMDGPU_VERSION: "6.1"
|
||||||
|
run: |
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
sudo apt-get update
|
||||||
|
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg
|
||||||
|
|
||||||
|
curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add -
|
||||||
|
|
||||||
|
printf "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$ROCM_VERSION/ jammy main" | sudo tee /etc/apt/sources.list.d/rocm.list
|
||||||
|
|
||||||
|
printf "deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
|
||||||
|
printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
|
||||||
|
sudo apt-get update
|
||||||
|
|
||||||
|
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||||
|
hipblas-dev rocm-dev \
|
||||||
|
rocblas-dev
|
||||||
|
|
||||||
|
sudo apt-get clean
|
||||||
|
sudo rm -rf /var/lib/apt/lists/*
|
||||||
|
sudo ldconfig
|
||||||
|
- name: Cache grpc
|
||||||
|
id: cache-grpc
|
||||||
|
uses: actions/cache@v4
|
||||||
|
with:
|
||||||
|
path: grpc
|
||||||
|
key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
|
||||||
|
- name: Build grpc
|
||||||
|
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
||||||
|
run: |
|
||||||
|
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
|
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
|
||||||
|
cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||||
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
|
../.. && sudo make --jobs 5 --output-sync=target
|
||||||
|
- name: Install gRPC
|
||||||
|
run: |
|
||||||
|
cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
|
||||||
|
# BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so
|
||||||
|
- name: Build
|
||||||
|
id: build
|
||||||
|
run: |
|
||||||
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
|
export PATH=$PATH:$GOPATH/bin
|
||||||
|
export PATH=/usr/local/cuda/bin:$PATH
|
||||||
|
export PATH=/opt/rocm/bin:$PATH
|
||||||
|
source /opt/intel/oneapi/setvars.sh
|
||||||
|
sudo cp /lib64/ld-linux-x86-64.so.2 ld.so
|
||||||
|
BACKEND_LIBS="./ld.so ./sources/go-piper/piper/build/fi/lib/libfmt.a ./sources/go-piper/piper-phonemize/pi/lib/libonnxruntime.so.1.14.1 ./sources/go-piper/piper-phonemize/pi/src/libespeak-ng/libespeak-ng.so /usr/lib/x86_64-linux-gnu/libdl.so.2 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/lib/x86_64-linux-gnu/libpthread.so.0 ./sources/go-piper/piper-phonemize/pi/lib/libpiper_phonemize.so.1 ./sources/go-piper/piper/build/si/lib/libspdlog.a ./sources/go-piper/espeak/ei/lib/libucd.so" \
|
||||||
|
make -j4 dist
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: LocalAI-linux
|
||||||
|
path: release/
|
||||||
|
- name: Release
|
||||||
|
uses: softprops/action-gh-release@v2
|
||||||
|
if: startsWith(github.ref, 'refs/tags/')
|
||||||
|
with:
|
||||||
|
files: |
|
||||||
|
release/*
|
||||||
|
- name: Setup tmate session if tests fail
|
||||||
|
if: ${{ failure() }}
|
||||||
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
|
with:
|
||||||
|
detached: true
|
||||||
|
connect-timeout-seconds: 180
|
||||||
|
limit-access-to-actor: true
|
||||||
|
|
||||||
|
|
||||||
|
build-macOS-x86_64:
|
||||||
|
runs-on: macos-13
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: '1.21.x'
|
||||||
|
cache: false
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
brew install protobuf grpc
|
||||||
|
make install-go-tools
|
||||||
|
- name: Build
|
||||||
|
id: build
|
||||||
|
run: |
|
||||||
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
|
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||||
|
export PATH=$PATH:$GOPATH/bin
|
||||||
|
export SKIP_GRPC_BACKEND=backend-assets/grpc/whisper
|
||||||
|
make dist
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: LocalAI-MacOS-x86_64
|
||||||
|
path: release/
|
||||||
|
- name: Release
|
||||||
|
uses: softprops/action-gh-release@v2
|
||||||
|
if: startsWith(github.ref, 'refs/tags/')
|
||||||
|
with:
|
||||||
|
files: |
|
||||||
|
release/*
|
||||||
|
- name: Setup tmate session if tests fail
|
||||||
|
if: ${{ failure() }}
|
||||||
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
|
with:
|
||||||
|
detached: true
|
||||||
|
connect-timeout-seconds: 180
|
||||||
|
limit-access-to-actor: true
|
||||||
|
|
||||||
|
build-macOS-arm64:
|
||||||
|
runs-on: macos-14
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: '1.21.x'
|
||||||
|
cache: false
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
brew install protobuf grpc libomp llvm
|
||||||
|
make install-go-tools
|
||||||
|
- name: Build
|
||||||
|
id: build
|
||||||
|
run: |
|
||||||
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
|
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||||
|
export PATH=$PATH:$GOPATH/bin
|
||||||
|
export CC=/opt/homebrew/opt/llvm/bin/clang
|
||||||
|
make dist
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: LocalAI-MacOS-arm64
|
||||||
|
path: release/
|
||||||
|
- name: Release
|
||||||
|
uses: softprops/action-gh-release@v2
|
||||||
|
if: startsWith(github.ref, 'refs/tags/')
|
||||||
|
with:
|
||||||
|
files: |
|
||||||
|
release/*
|
||||||
|
- name: Setup tmate session if tests fail
|
||||||
|
if: ${{ failure() }}
|
||||||
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
|
with:
|
||||||
|
detached: true
|
||||||
|
connect-timeout-seconds: 180
|
||||||
|
limit-access-to-actor: true
|
||||||
|
|||||||
2
.github/workflows/secscan.yaml
vendored
2
.github/workflows/secscan.yaml
vendored
@@ -18,7 +18,7 @@ jobs:
|
|||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
- name: Run Gosec Security Scanner
|
- name: Run Gosec Security Scanner
|
||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
uses: securego/gosec@v2.22.7
|
uses: securego/gosec@v2.22.4
|
||||||
with:
|
with:
|
||||||
# we let the report trigger content trigger a failure using the GitHub Security features.
|
# we let the report trigger content trigger a failure using the GitHub Security features.
|
||||||
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
||||||
|
|||||||
24
.github/workflows/stalebot.yml
vendored
24
.github/workflows/stalebot.yml
vendored
@@ -1,24 +0,0 @@
|
|||||||
name: 'Close stale issues and PRs'
|
|
||||||
permissions:
|
|
||||||
issues: write
|
|
||||||
pull-requests: write
|
|
||||||
on:
|
|
||||||
schedule:
|
|
||||||
- cron: '30 1 * * *'
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
stale:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9
|
|
||||||
with:
|
|
||||||
stale-issue-message: 'This issue is stale because it has been open 90 days with no activity. Remove stale label or comment or this will be closed in 5 days.'
|
|
||||||
stale-pr-message: 'This PR is stale because it has been open 90 days with no activity. Remove stale label or comment or this will be closed in 10 days.'
|
|
||||||
close-issue-message: 'This issue was closed because it has been stalled for 5 days with no activity.'
|
|
||||||
close-pr-message: 'This PR was closed because it has been stalled for 10 days with no activity.'
|
|
||||||
days-before-issue-stale: 90
|
|
||||||
days-before-pr-stale: 90
|
|
||||||
days-before-issue-close: 5
|
|
||||||
days-before-pr-close: 10
|
|
||||||
exempt-issue-labels: 'roadmap'
|
|
||||||
exempt-pr-labels: 'roadmap'
|
|
||||||
22
.github/workflows/test-extra.yml
vendored
22
.github/workflows/test-extra.yml
vendored
@@ -14,28 +14,6 @@ concurrency:
|
|||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
# Requires CUDA
|
|
||||||
# tests-chatterbox-tts:
|
|
||||||
# runs-on: ubuntu-latest
|
|
||||||
# steps:
|
|
||||||
# - name: Clone
|
|
||||||
# uses: actions/checkout@v4
|
|
||||||
# with:
|
|
||||||
# submodules: true
|
|
||||||
# - name: Dependencies
|
|
||||||
# run: |
|
|
||||||
# sudo apt-get update
|
|
||||||
# sudo apt-get install build-essential ffmpeg
|
|
||||||
# # Install UV
|
|
||||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
||||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
|
||||||
# sudo apt-get install -y libopencv-dev
|
|
||||||
# pip install --user --no-cache-dir grpcio-tools==1.64.1
|
|
||||||
|
|
||||||
# - name: Test chatterbox-tts
|
|
||||||
# run: |
|
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/chatterbox
|
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/chatterbox test
|
|
||||||
tests-transformers:
|
tests-transformers:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
|
|||||||
65
.github/workflows/test.yml
vendored
65
.github/workflows/test.yml
vendored
@@ -67,20 +67,18 @@ jobs:
|
|||||||
# You can test your matrix by printing the current Go version
|
# You can test your matrix by printing the current Go version
|
||||||
- name: Display Go version
|
- name: Display Go version
|
||||||
run: go version
|
run: go version
|
||||||
- name: Proto Dependencies
|
|
||||||
run: |
|
|
||||||
# Install protoc
|
|
||||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
|
||||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
|
||||||
rm protoc.zip
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
|
||||||
PATH="$PATH:$HOME/go/bin" make protogen-go
|
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
|
sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
|
||||||
sudo apt-get install -y libgmock-dev clang
|
sudo apt-get install -y libgmock-dev clang
|
||||||
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
# Install UV
|
# Install UV
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
sudo apt-get install -y ca-certificates cmake patch python3-pip unzip
|
sudo apt-get install -y ca-certificates cmake patch python3-pip unzip
|
||||||
@@ -96,15 +94,38 @@ jobs:
|
|||||||
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
||||||
export CUDACXX=/usr/local/cuda/bin/nvcc
|
export CUDACXX=/usr/local/cuda/bin/nvcc
|
||||||
|
|
||||||
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
|
go install github.com/GeertJohan/go.rice/rice@latest
|
||||||
|
|
||||||
# The python3-grpc-tools package in 22.04 is too old
|
# The python3-grpc-tools package in 22.04 is too old
|
||||||
pip install --user grpcio-tools==1.71.0 grpcio==1.71.0
|
pip install --user grpcio-tools
|
||||||
|
|
||||||
make -C backend/python/transformers
|
make -C backend/python/transformers
|
||||||
|
|
||||||
make backends/huggingface backends/llama-cpp backends/local-store backends/silero-vad backends/piper backends/whisper backends/stablediffusion-ggml
|
# Pre-build piper before we start tests in order to have shared libraries in place
|
||||||
|
make sources/go-piper && \
|
||||||
|
GO_TAGS="tts" make -C sources/go-piper piper.o && \
|
||||||
|
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/
|
||||||
env:
|
env:
|
||||||
CUDA_VERSION: 12-4
|
CUDA_VERSION: 12-4
|
||||||
|
- name: Cache grpc
|
||||||
|
id: cache-grpc
|
||||||
|
uses: actions/cache@v4
|
||||||
|
with:
|
||||||
|
path: grpc
|
||||||
|
key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
|
||||||
|
- name: Build grpc
|
||||||
|
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
||||||
|
run: |
|
||||||
|
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --jobs 5 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
|
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && cd cmake/build && \
|
||||||
|
cmake -DgRPC_INSTALL=ON \
|
||||||
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
|
../.. && sudo make --jobs 5
|
||||||
|
- name: Install gRPC
|
||||||
|
run: |
|
||||||
|
cd grpc && cd cmake/build && sudo make --jobs 5 install
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
|
PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
|
||||||
@@ -163,10 +184,16 @@ jobs:
|
|||||||
rm protoc.zip
|
rm protoc.zip
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
|
go install github.com/GeertJohan/go.rice/rice@latest
|
||||||
PATH="$PATH:$HOME/go/bin" make protogen-go
|
PATH="$PATH:$HOME/go/bin" make protogen-go
|
||||||
|
- name: Build images
|
||||||
|
run: |
|
||||||
|
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
|
||||||
|
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
PATH="$PATH:$HOME/go/bin" make backends/local-store backends/silero-vad backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio
|
PATH="$PATH:$HOME/go/bin" LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
|
||||||
|
make run-e2e-aio
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.22
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
@@ -196,15 +223,8 @@ jobs:
|
|||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
|
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.71.0 grpcio==1.71.0
|
pip install --user --no-cache-dir grpcio-tools
|
||||||
- name: Build llama-cpp-darwin
|
go install github.com/GeertJohan/go.rice/rice@latest
|
||||||
run: |
|
|
||||||
make protogen-go
|
|
||||||
make build
|
|
||||||
bash scripts/build-llama-cpp-darwin.sh
|
|
||||||
ls -la build/darwin.tar
|
|
||||||
mv build/darwin.tar build/llama-cpp.tar
|
|
||||||
./local-ai backends install "ocifile://$PWD/build/llama-cpp.tar"
|
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
export C_INCLUDE_PATH=/usr/local/include
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
@@ -212,8 +232,7 @@ jobs:
|
|||||||
export CC=/opt/homebrew/opt/llvm/bin/clang
|
export CC=/opt/homebrew/opt/llvm/bin/clang
|
||||||
# Used to run the newer GNUMake version from brew that supports --output-sync
|
# Used to run the newer GNUMake version from brew that supports --output-sync
|
||||||
export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
|
export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
|
||||||
PATH="$PATH:$HOME/go/bin" make protogen-go
|
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
|
||||||
PATH="$PATH:$HOME/go/bin" BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
|
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.22
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
|
|||||||
10
.github/workflows/yaml-check.yml
vendored
10
.github/workflows/yaml-check.yml
vendored
@@ -8,7 +8,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: 'Checkout'
|
- name: 'Checkout'
|
||||||
uses: actions/checkout@master
|
uses: actions/checkout@master
|
||||||
- name: 'Yamllint model gallery'
|
- name: 'Yamllint'
|
||||||
uses: karancode/yamllint-github-action@master
|
uses: karancode/yamllint-github-action@master
|
||||||
with:
|
with:
|
||||||
yamllint_file_or_dir: 'gallery'
|
yamllint_file_or_dir: 'gallery'
|
||||||
@@ -16,11 +16,3 @@ jobs:
|
|||||||
yamllint_comment: true
|
yamllint_comment: true
|
||||||
env:
|
env:
|
||||||
GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
- name: 'Yamllint Backend gallery'
|
|
||||||
uses: karancode/yamllint-github-action@master
|
|
||||||
with:
|
|
||||||
yamllint_file_or_dir: 'backend'
|
|
||||||
yamllint_strict: false
|
|
||||||
yamllint_comment: true
|
|
||||||
env:
|
|
||||||
GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
10
.gitignore
vendored
10
.gitignore
vendored
@@ -5,13 +5,9 @@ __pycache__/
|
|||||||
*.o
|
*.o
|
||||||
get-sources
|
get-sources
|
||||||
prepare-sources
|
prepare-sources
|
||||||
/backend/cpp/llama-cpp/grpc-server
|
/backend/cpp/llama/grpc-server
|
||||||
/backend/cpp/llama-cpp/llama.cpp
|
/backend/cpp/llama/llama.cpp
|
||||||
/backend/cpp/llama-*
|
/backend/cpp/llama-*
|
||||||
!backend/cpp/llama-cpp
|
|
||||||
/backends
|
|
||||||
/backend-images
|
|
||||||
/result.yaml
|
|
||||||
|
|
||||||
*.log
|
*.log
|
||||||
|
|
||||||
@@ -60,4 +56,4 @@ docs/static/gallery.html
|
|||||||
**/venv
|
**/venv
|
||||||
|
|
||||||
# per-developer customization files for the development container
|
# per-developer customization files for the development container
|
||||||
.devcontainer/customization/*
|
.devcontainer/customization/*
|
||||||
@@ -1,33 +0,0 @@
|
|||||||
version: 2
|
|
||||||
before:
|
|
||||||
hooks:
|
|
||||||
- make protogen-go
|
|
||||||
- go mod tidy
|
|
||||||
dist: release
|
|
||||||
source:
|
|
||||||
enabled: true
|
|
||||||
name_template: '{{ .ProjectName }}-{{ .Tag }}-source'
|
|
||||||
builds:
|
|
||||||
-
|
|
||||||
env:
|
|
||||||
- CGO_ENABLED=0
|
|
||||||
ldflags:
|
|
||||||
- -s -w
|
|
||||||
- -X "github.com/mudler/LocalAI/internal.Version={{ .Tag }}"
|
|
||||||
- -X "github.com/mudler/LocalAI/internal.Commit={{ .FullCommit }}"
|
|
||||||
goos:
|
|
||||||
- linux
|
|
||||||
- darwin
|
|
||||||
#- windows
|
|
||||||
goarch:
|
|
||||||
- amd64
|
|
||||||
- arm64
|
|
||||||
archives:
|
|
||||||
- formats: [ 'binary' ] # this removes the tar of the archives, leaving the binaries alone
|
|
||||||
name_template: local-ai-{{ .Tag }}-{{ .Os }}-{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}
|
|
||||||
checksum:
|
|
||||||
name_template: '{{ .ProjectName }}-{{ .Tag }}-checksums.txt'
|
|
||||||
snapshot:
|
|
||||||
version_template: "{{ .Tag }}-next"
|
|
||||||
changelog:
|
|
||||||
use: github-native
|
|
||||||
2
.vscode/launch.json
vendored
2
.vscode/launch.json
vendored
@@ -26,7 +26,7 @@
|
|||||||
"LOCALAI_P2P": "true",
|
"LOCALAI_P2P": "true",
|
||||||
"LOCALAI_FEDERATED": "true"
|
"LOCALAI_FEDERATED": "true"
|
||||||
},
|
},
|
||||||
"buildFlags": ["-tags", "", "-v"],
|
"buildFlags": ["-tags", "p2p tts", "-v"],
|
||||||
"envFile": "${workspaceFolder}/.env",
|
"envFile": "${workspaceFolder}/.env",
|
||||||
"cwd": "${workspaceRoot}"
|
"cwd": "${workspaceRoot}"
|
||||||
}
|
}
|
||||||
|
|||||||
385
Dockerfile
385
Dockerfile
@@ -1,31 +1,120 @@
|
|||||||
|
ARG IMAGE_TYPE=extras
|
||||||
ARG BASE_IMAGE=ubuntu:22.04
|
ARG BASE_IMAGE=ubuntu:22.04
|
||||||
ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
|
ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
|
||||||
ARG INTEL_BASE_IMAGE=${BASE_IMAGE}
|
ARG INTEL_BASE_IMAGE=${BASE_IMAGE}
|
||||||
|
|
||||||
FROM ${BASE_IMAGE} AS requirements
|
# The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it.
|
||||||
|
FROM ${BASE_IMAGE} AS requirements-core
|
||||||
|
|
||||||
|
USER root
|
||||||
|
|
||||||
|
ARG GO_VERSION=1.22.6
|
||||||
|
ARG CMAKE_VERSION=3.26.4
|
||||||
|
ARG CMAKE_FROM_SOURCE=false
|
||||||
|
ARG TARGETARCH
|
||||||
|
ARG TARGETVARIANT
|
||||||
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
ca-certificates curl wget espeak-ng libgomp1 \
|
build-essential \
|
||||||
python3 python-is-python3 ffmpeg && \
|
ccache \
|
||||||
|
ca-certificates \
|
||||||
|
curl libssl-dev \
|
||||||
|
git \
|
||||||
|
git-lfs \
|
||||||
|
unzip upx-ucl && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Install CMake (the version in 22.04 is too old)
|
||||||
|
RUN <<EOT bash
|
||||||
|
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
|
||||||
|
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
|
||||||
|
else
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y \
|
||||||
|
cmake && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
fi
|
||||||
|
EOT
|
||||||
|
|
||||||
|
# Install Go
|
||||||
|
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
||||||
|
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
|
||||||
|
|
||||||
|
# Install grpc compilers and rice
|
||||||
|
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
|
||||||
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af && \
|
||||||
|
go install github.com/GeertJohan/go.rice/rice@latest
|
||||||
|
|
||||||
|
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
||||||
|
RUN update-ca-certificates
|
||||||
|
|
||||||
|
RUN test -n "$TARGETARCH" \
|
||||||
|
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
|
||||||
|
|
||||||
|
# Use the variables in subsequent instructions
|
||||||
|
RUN echo "Target Architecture: $TARGETARCH"
|
||||||
|
RUN echo "Target Variant: $TARGETVARIANT"
|
||||||
|
|
||||||
|
# Cuda
|
||||||
|
ENV PATH=/usr/local/cuda/bin:${PATH}
|
||||||
|
|
||||||
|
# HipBLAS requirements
|
||||||
|
ENV PATH=/opt/rocm/bin:${PATH}
|
||||||
|
|
||||||
|
# OpenBLAS requirements and stable diffusion
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
libopenblas-dev && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
WORKDIR /build
|
||||||
|
|
||||||
|
###################################
|
||||||
|
###################################
|
||||||
|
|
||||||
|
# The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it
|
||||||
|
FROM requirements-core AS requirements-extras
|
||||||
|
|
||||||
|
# Install uv as a system package
|
||||||
|
RUN curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR=/usr/bin sh
|
||||||
|
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||||
|
|
||||||
|
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
espeak-ng \
|
||||||
|
espeak \
|
||||||
|
python3-pip \
|
||||||
|
python-is-python3 \
|
||||||
|
python3-dev llvm \
|
||||||
|
python3-venv && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/* && \
|
||||||
|
pip install --upgrade pip
|
||||||
|
|
||||||
|
# Install grpcio-tools (the version in 22.04 is too old)
|
||||||
|
RUN pip install --user grpcio-tools
|
||||||
|
|
||||||
|
###################################
|
||||||
|
###################################
|
||||||
|
|
||||||
# The requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here.
|
# The requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here.
|
||||||
FROM requirements AS requirements-drivers
|
# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg
|
||||||
|
FROM requirements-${IMAGE_TYPE} AS requirements-drivers
|
||||||
|
|
||||||
ARG BUILD_TYPE
|
ARG BUILD_TYPE
|
||||||
ARG CUDA_MAJOR_VERSION=12
|
ARG CUDA_MAJOR_VERSION=12
|
||||||
ARG CUDA_MINOR_VERSION=0
|
ARG CUDA_MINOR_VERSION=0
|
||||||
ARG SKIP_DRIVERS=false
|
ARG SKIP_DRIVERS=false
|
||||||
ARG TARGETARCH
|
|
||||||
ARG TARGETVARIANT
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
|
||||||
|
|
||||||
RUN mkdir -p /run/localai
|
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||||
RUN echo "default" > /run/localai/capability
|
|
||||||
|
|
||||||
# Vulkan requirements
|
# Vulkan requirements
|
||||||
RUN <<EOT bash
|
RUN <<EOT bash
|
||||||
@@ -39,8 +128,7 @@ RUN <<EOT bash
|
|||||||
apt-get install -y \
|
apt-get install -y \
|
||||||
vulkan-sdk && \
|
vulkan-sdk && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/* && \
|
rm -rf /var/lib/apt/lists/*
|
||||||
echo "vulkan" > /run/localai/capability
|
|
||||||
fi
|
fi
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
@@ -67,8 +155,7 @@ RUN <<EOT bash
|
|||||||
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/* && \
|
rm -rf /var/lib/apt/lists/*
|
||||||
echo "nvidia" > /run/localai/capability
|
|
||||||
fi
|
fi
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
@@ -88,88 +175,11 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
|||||||
rocblas-dev && \
|
rocblas-dev && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/* && \
|
rm -rf /var/lib/apt/lists/* && \
|
||||||
echo "amd" > /run/localai/capability && \
|
|
||||||
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
|
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
|
||||||
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
|
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
|
||||||
ldconfig \
|
ldconfig \
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
# Cuda
|
|
||||||
ENV PATH=/usr/local/cuda/bin:${PATH}
|
|
||||||
|
|
||||||
# HipBLAS requirements
|
|
||||||
ENV PATH=/opt/rocm/bin:${PATH}
|
|
||||||
|
|
||||||
###################################
|
|
||||||
###################################
|
|
||||||
|
|
||||||
# The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it.
|
|
||||||
FROM requirements-drivers AS build-requirements
|
|
||||||
|
|
||||||
ARG GO_VERSION=1.22.6
|
|
||||||
ARG CMAKE_VERSION=3.26.4
|
|
||||||
ARG CMAKE_FROM_SOURCE=false
|
|
||||||
ARG TARGETARCH
|
|
||||||
ARG TARGETVARIANT
|
|
||||||
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
build-essential \
|
|
||||||
ccache \
|
|
||||||
ca-certificates espeak-ng \
|
|
||||||
curl libssl-dev \
|
|
||||||
git \
|
|
||||||
git-lfs \
|
|
||||||
unzip upx-ucl python3 python-is-python3 && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Install CMake (the version in 22.04 is too old)
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "${CMAKE_FROM_SOURCE}" = "true" ]; then
|
|
||||||
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
|
|
||||||
else
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y \
|
|
||||||
cmake && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
# Install Go
|
|
||||||
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
|
||||||
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
|
|
||||||
|
|
||||||
# Install grpc compilers
|
|
||||||
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
|
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
|
||||||
|
|
||||||
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
|
||||||
RUN update-ca-certificates
|
|
||||||
|
|
||||||
|
|
||||||
# OpenBLAS requirements and stable diffusion
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
libopenblas-dev && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
RUN test -n "$TARGETARCH" \
|
|
||||||
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
|
|
||||||
|
|
||||||
# Use the variables in subsequent instructions
|
|
||||||
RUN echo "Target Architecture: $TARGETARCH"
|
|
||||||
RUN echo "Target Variant: $TARGETVARIANT"
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
WORKDIR /build
|
|
||||||
|
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
@@ -180,25 +190,69 @@ FROM ${INTEL_BASE_IMAGE} AS intel
|
|||||||
RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
|
RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
|
||||||
gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
|
gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
|
||||||
RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list
|
RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list
|
||||||
|
|
||||||
|
###################################
|
||||||
|
###################################
|
||||||
|
|
||||||
|
# The grpc target does one thing, it builds and installs GRPC. This is in it's own layer so that it can be effectively cached by CI.
|
||||||
|
# You probably don't need to change anything here, and if you do, make sure that CI is adjusted so that the cache continues to work.
|
||||||
|
FROM ${GRPC_BASE_IMAGE} AS grpc
|
||||||
|
|
||||||
|
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
|
||||||
|
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
|
||||||
|
ARG GRPC_VERSION=v1.65.0
|
||||||
|
ARG CMAKE_FROM_SOURCE=false
|
||||||
|
ARG CMAKE_VERSION=3.26.4
|
||||||
|
|
||||||
|
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
|
||||||
|
|
||||||
|
WORKDIR /build
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
intel-oneapi-runtime-libs && \
|
ca-certificates \
|
||||||
|
build-essential curl libssl-dev \
|
||||||
|
git && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Install CMake (the version in 22.04 is too old)
|
||||||
|
RUN <<EOT bash
|
||||||
|
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
|
||||||
|
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
|
||||||
|
else
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y \
|
||||||
|
cmake && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
fi
|
||||||
|
EOT
|
||||||
|
|
||||||
|
# We install GRPC to a different prefix here so that we can copy in only the build artifacts later
|
||||||
|
# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
|
||||||
|
# and running make install in the target container
|
||||||
|
RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
|
mkdir -p /build/grpc/cmake/build && \
|
||||||
|
cd /build/grpc/cmake/build && \
|
||||||
|
sed -i "216i\ TESTONLY" "../../third_party/abseil-cpp/absl/container/CMakeLists.txt" && \
|
||||||
|
cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
|
||||||
|
make && \
|
||||||
|
make install && \
|
||||||
|
rm -rf /build
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
# The builder-base target has the arguments, variables, and copies shared between full builder images and the uncompiled devcontainer
|
# The builder-base target has the arguments, variables, and copies shared between full builder images and the uncompiled devcontainer
|
||||||
|
|
||||||
FROM build-requirements AS builder-base
|
FROM requirements-drivers AS builder-base
|
||||||
|
|
||||||
ARG GO_TAGS=""
|
ARG GO_TAGS="tts p2p"
|
||||||
ARG GRPC_BACKENDS
|
ARG GRPC_BACKENDS
|
||||||
ARG MAKEFLAGS
|
ARG MAKEFLAGS
|
||||||
ARG LD_FLAGS="-s -w"
|
ARG LD_FLAGS="-s -w"
|
||||||
ARG TARGETARCH
|
|
||||||
ARG TARGETVARIANT
|
|
||||||
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
|
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
|
||||||
ENV GO_TAGS=${GO_TAGS}
|
ENV GO_TAGS=${GO_TAGS}
|
||||||
ENV MAKEFLAGS=${MAKEFLAGS}
|
ENV MAKEFLAGS=${MAKEFLAGS}
|
||||||
@@ -212,7 +266,9 @@ RUN echo "GO_TAGS: $GO_TAGS" && echo "TARGETARCH: $TARGETARCH"
|
|||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
|
|
||||||
|
|
||||||
# We need protoc installed, and the version in 22.04 is too old.
|
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
|
||||||
|
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
|
||||||
|
# here so that we can generate the grpc code for the stablediffusion build
|
||||||
RUN <<EOT bash
|
RUN <<EOT bash
|
||||||
if [ "amd64" = "$TARGETARCH" ]; then
|
if [ "amd64" = "$TARGETARCH" ]; then
|
||||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip -o protoc.zip && \
|
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip -o protoc.zip && \
|
||||||
@@ -229,39 +285,34 @@ EOT
|
|||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
# Compile backends first in a separate stage
|
|
||||||
FROM builder-base AS builder-backends
|
|
||||||
ARG TARGETARCH
|
|
||||||
ARG TARGETVARIANT
|
|
||||||
|
|
||||||
WORKDIR /build
|
|
||||||
|
|
||||||
COPY ./Makefile .
|
|
||||||
COPY ./backend ./backend
|
|
||||||
COPY ./go.mod .
|
|
||||||
COPY ./go.sum .
|
|
||||||
COPY ./.git ./.git
|
|
||||||
|
|
||||||
# Some of the Go backends use libs from the main src, we could further optimize the caching by building the CPP backends before here
|
|
||||||
COPY ./pkg/grpc ./pkg/grpc
|
|
||||||
COPY ./pkg/utils ./pkg/utils
|
|
||||||
COPY ./pkg/langchain ./pkg/langchain
|
|
||||||
|
|
||||||
RUN ls -l ./
|
|
||||||
RUN make protogen-go
|
|
||||||
|
|
||||||
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
||||||
# Adjustments to the build process should likely be made here.
|
# Adjustments to the build process should likely be made here.
|
||||||
FROM builder-backends AS builder
|
FROM builder-base AS builder
|
||||||
|
|
||||||
|
# Install the pre-built GRPC
|
||||||
|
COPY --from=grpc /opt/grpc /usr/local
|
||||||
|
|
||||||
|
# Rebuild with defaults backends
|
||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
COPY .git .
|
||||||
|
|
||||||
|
RUN make prepare
|
||||||
|
|
||||||
## Build the binary
|
## Build the binary
|
||||||
## If we're on arm64 AND using cublas/hipblas, skip some of the llama-compat backends to save space
|
## If we're on arm64 AND using cublas/hipblas, skip some of the llama-compat backends to save space
|
||||||
## Otherwise just run the normal build
|
## Otherwise just run the normal build
|
||||||
RUN make build
|
RUN if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||||
|
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
||||||
|
else \
|
||||||
|
make build; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
||||||
|
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
|
||||||
|
touch /build/sources/go-piper/piper-phonemize/pi/lib/keep \
|
||||||
|
; fi
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
@@ -271,11 +322,24 @@ RUN make build
|
|||||||
|
|
||||||
FROM builder-base AS devcontainer
|
FROM builder-base AS devcontainer
|
||||||
|
|
||||||
|
ARG FFMPEG
|
||||||
|
|
||||||
|
COPY --from=grpc /opt/grpc /usr/local
|
||||||
|
|
||||||
COPY .devcontainer-scripts /.devcontainer-scripts
|
COPY .devcontainer-scripts /.devcontainer-scripts
|
||||||
|
|
||||||
|
# Add FFmpeg
|
||||||
|
RUN if [ "${FFMPEG}" = "true" ]; then \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
ffmpeg && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/* \
|
||||||
|
; fi
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
ssh less
|
ssh less wget
|
||||||
# For the devcontainer, leave apt functional in case additional devtools are needed at runtime.
|
# For the devcontainer, leave apt functional in case additional devtools are needed at runtime.
|
||||||
|
|
||||||
RUN go install github.com/go-delve/delve/cmd/dlv@latest
|
RUN go install github.com/go-delve/delve/cmd/dlv@latest
|
||||||
@@ -289,27 +353,98 @@ RUN go install github.com/mikefarah/yq/v4@latest
|
|||||||
# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
|
# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
|
||||||
FROM requirements-drivers
|
FROM requirements-drivers
|
||||||
|
|
||||||
|
ARG FFMPEG
|
||||||
|
ARG BUILD_TYPE
|
||||||
|
ARG TARGETARCH
|
||||||
|
ARG IMAGE_TYPE=extras
|
||||||
|
ARG EXTRA_BACKENDS
|
||||||
|
ARG MAKEFLAGS
|
||||||
|
|
||||||
|
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||||
|
ENV REBUILD=false
|
||||||
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
|
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
|
||||||
|
ENV MAKEFLAGS=${MAKEFLAGS}
|
||||||
|
|
||||||
ARG CUDA_MAJOR_VERSION=12
|
ARG CUDA_MAJOR_VERSION=12
|
||||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||||
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
||||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||||
|
|
||||||
WORKDIR /
|
# Add FFmpeg
|
||||||
|
RUN if [ "${FFMPEG}" = "true" ]; then \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
ffmpeg && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/* \
|
||||||
|
; fi
|
||||||
|
|
||||||
COPY ./entrypoint.sh .
|
WORKDIR /build
|
||||||
|
|
||||||
|
# we start fresh & re-copy all assets because `make build` does not clean up nicely after itself
|
||||||
|
# so when `entrypoint.sh` runs `make build` again (which it does by default), the build would fail
|
||||||
|
# see https://github.com/go-skynet/LocalAI/pull/658#discussion_r1241971626 and
|
||||||
|
# https://github.com/go-skynet/LocalAI/pull/434
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
COPY --from=builder /build/sources ./sources/
|
||||||
|
COPY --from=grpc /opt/grpc /usr/local
|
||||||
|
|
||||||
|
RUN make prepare-sources
|
||||||
|
|
||||||
# Copy the binary
|
# Copy the binary
|
||||||
COPY --from=builder /build/local-ai ./
|
COPY --from=builder /build/local-ai ./
|
||||||
|
|
||||||
|
# Copy shared libraries for piper
|
||||||
|
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
||||||
|
|
||||||
|
# Change the shell to bash so we can use [[ tests below
|
||||||
|
SHELL ["/bin/bash", "-c"]
|
||||||
|
# We try to strike a balance between individual layer size (as that affects total push time) and total image size
|
||||||
|
# Splitting the backends into more groups with fewer items results in a larger image, but a smaller size for the largest layer
|
||||||
|
# Splitting the backends into fewer groups with more items results in a smaller image, but a larger size for the largest layer
|
||||||
|
|
||||||
|
RUN if [[ ( "${IMAGE_TYPE}" == "extras ")]]; then \
|
||||||
|
apt-get -qq -y install espeak-ng \
|
||||||
|
; fi
|
||||||
|
|
||||||
|
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/coqui \
|
||||||
|
; fi && \
|
||||||
|
if [[ ( "${EXTRA_BACKENDS}" =~ "faster-whisper" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/faster-whisper \
|
||||||
|
; fi && \
|
||||||
|
if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/diffusers \
|
||||||
|
; fi
|
||||||
|
|
||||||
|
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/kokoro \
|
||||||
|
; fi && \
|
||||||
|
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/exllama2 \
|
||||||
|
; fi && \
|
||||||
|
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/transformers \
|
||||||
|
; fi
|
||||||
|
|
||||||
|
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/vllm \
|
||||||
|
; fi && \
|
||||||
|
if [[ ( "${EXTRA_BACKENDS}" =~ "bark" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/bark \
|
||||||
|
; fi && \
|
||||||
|
if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/rerankers \
|
||||||
|
; fi
|
||||||
|
|
||||||
# Make sure the models directory exists
|
# Make sure the models directory exists
|
||||||
RUN mkdir -p /models /backends
|
RUN mkdir -p /build/models
|
||||||
|
|
||||||
# Define the health check command
|
# Define the health check command
|
||||||
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
|
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
|
||||||
CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1
|
CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1
|
||||||
|
|
||||||
VOLUME /models /backends
|
VOLUME /build/models
|
||||||
EXPOSE 8080
|
EXPOSE 8080
|
||||||
ENTRYPOINT [ "/entrypoint.sh" ]
|
ENTRYPOINT [ "/build/entrypoint.sh" ]
|
||||||
|
|||||||
5
Earthfile
Normal file
5
Earthfile
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
VERSION 0.7
|
||||||
|
|
||||||
|
build:
|
||||||
|
FROM DOCKERFILE -f Dockerfile .
|
||||||
|
SAVE ARTIFACT /usr/bin/local-ai AS LOCAL local-ai
|
||||||
773
Makefile
773
Makefile
@@ -3,12 +3,47 @@ GOTEST=$(GOCMD) test
|
|||||||
GOVET=$(GOCMD) vet
|
GOVET=$(GOCMD) vet
|
||||||
BINARY_NAME=local-ai
|
BINARY_NAME=local-ai
|
||||||
|
|
||||||
GORELEASER?=
|
DETECT_LIBS?=true
|
||||||
|
|
||||||
ONEAPI_VERSION?=2025.2
|
# llama.cpp versions
|
||||||
|
CPPLLAMA_VERSION?=e5c834f718a32b7584f142799bbf508fddb9021c
|
||||||
|
|
||||||
|
# whisper.cpp version
|
||||||
|
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
||||||
|
WHISPER_CPP_VERSION?=e41bc5c61ae66af6be2bd7011769bb821a83e8ae
|
||||||
|
|
||||||
|
# go-piper version
|
||||||
|
PIPER_REPO?=https://github.com/mudler/go-piper
|
||||||
|
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
|
||||||
|
|
||||||
|
# bark.cpp
|
||||||
|
BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
|
||||||
|
BARKCPP_VERSION?=v1.0.0
|
||||||
|
|
||||||
|
# stablediffusion.cpp (ggml)
|
||||||
|
STABLEDIFFUSION_GGML_REPO?=https://github.com/richiejp/stable-diffusion.cpp
|
||||||
|
STABLEDIFFUSION_GGML_VERSION?=53e3b17eb3d0b5760ced06a1f98320b68b34aaae
|
||||||
|
|
||||||
|
# ONEAPI variables for SYCL
|
||||||
|
export ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
||||||
|
|
||||||
|
ONNX_VERSION?=1.20.0
|
||||||
|
ONNX_ARCH?=x64
|
||||||
|
ONNX_OS?=linux
|
||||||
|
|
||||||
export BUILD_TYPE?=
|
export BUILD_TYPE?=
|
||||||
|
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
||||||
|
export CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF
|
||||||
|
export WHISPER_CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF
|
||||||
|
export BACKEND_LIBS?=
|
||||||
|
export WHISPER_DIR=$(abspath ./sources/whisper.cpp)
|
||||||
|
export WHISPER_INCLUDE_PATH=$(WHISPER_DIR)/include:$(WHISPER_DIR)/ggml/include
|
||||||
|
export WHISPER_LIBRARY_PATH=$(WHISPER_DIR)/build/src/:$(WHISPER_DIR)/build/ggml/src
|
||||||
|
|
||||||
|
CGO_LDFLAGS?=
|
||||||
|
CGO_LDFLAGS_WHISPER?=
|
||||||
|
CGO_LDFLAGS_WHISPER+=-lggml
|
||||||
|
CUDA_LIBPATH?=/usr/local/cuda/lib64/
|
||||||
GO_TAGS?=
|
GO_TAGS?=
|
||||||
BUILD_ID?=
|
BUILD_ID?=
|
||||||
NATIVE?=false
|
NATIVE?=false
|
||||||
@@ -35,6 +70,14 @@ WHITE := $(shell tput -Txterm setaf 7)
|
|||||||
CYAN := $(shell tput -Txterm setaf 6)
|
CYAN := $(shell tput -Txterm setaf 6)
|
||||||
RESET := $(shell tput -Txterm sgr0)
|
RESET := $(shell tput -Txterm sgr0)
|
||||||
|
|
||||||
|
UPX?=
|
||||||
|
# check if upx exists
|
||||||
|
ifeq (, $(shell which upx))
|
||||||
|
UPX=
|
||||||
|
else
|
||||||
|
UPX=$(shell which upx)
|
||||||
|
endif
|
||||||
|
|
||||||
# Default Docker bridge IP
|
# Default Docker bridge IP
|
||||||
E2E_BRIDGE_IP?=172.17.0.1
|
E2E_BRIDGE_IP?=172.17.0.1
|
||||||
|
|
||||||
@@ -42,152 +85,409 @@ ifndef UNAME_S
|
|||||||
UNAME_S := $(shell uname -s)
|
UNAME_S := $(shell uname -s)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS
|
||||||
|
ifeq ($(NATIVE),false)
|
||||||
|
CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
||||||
|
WHISPER_CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
||||||
|
endif
|
||||||
|
|
||||||
|
# Detect if we are running on arm64
|
||||||
|
ifneq (,$(findstring aarch64,$(shell uname -m)))
|
||||||
|
ONNX_ARCH=aarch64
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(OS),Darwin)
|
ifeq ($(OS),Darwin)
|
||||||
|
ONNX_OS=osx
|
||||||
|
ifneq (,$(findstring aarch64,$(shell uname -m)))
|
||||||
|
ONNX_ARCH=arm64
|
||||||
|
else ifneq (,$(findstring arm64,$(shell uname -m)))
|
||||||
|
ONNX_ARCH=arm64
|
||||||
|
else
|
||||||
|
ONNX_ARCH=x86_64
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(OSX_SIGNING_IDENTITY),)
|
ifeq ($(OSX_SIGNING_IDENTITY),)
|
||||||
OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
|
OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
|
||||||
endif
|
endif
|
||||||
endif
|
|
||||||
|
|
||||||
# check if goreleaser exists
|
# on OSX, if BUILD_TYPE is blank, we should default to use Metal
|
||||||
ifeq (, $(shell which goreleaser))
|
ifeq ($(BUILD_TYPE),)
|
||||||
GORELEASER=curl -sfL https://goreleaser.com/static/run | bash -s --
|
BUILD_TYPE=metal
|
||||||
|
# disable metal if on Darwin and any other value is explicitly passed.
|
||||||
|
else ifneq ($(BUILD_TYPE),metal)
|
||||||
|
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||||
|
WHISPER_CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||||
|
export GGML_NO_ACCELERATE=1
|
||||||
|
export GGML_NO_METAL=1
|
||||||
|
GO_LDFLAGS_WHISPER+=-lggml-blas
|
||||||
|
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-blas
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_TYPE),metal)
|
||||||
|
CGO_LDFLAGS += -framework Accelerate
|
||||||
|
CGO_LDFLAGS_WHISPER+=-lggml-metal -lggml-blas
|
||||||
|
CMAKE_ARGS+=-DGGML_METAL=ON
|
||||||
|
CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
|
||||||
|
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
||||||
|
CMAKE_ARGS+=-DGGML_OPENMP=OFF
|
||||||
|
WHISPER_CMAKE_ARGS+=-DGGML_METAL=ON
|
||||||
|
WHISPER_CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
|
||||||
|
WHISPER_CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
||||||
|
WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_EXAMPLES=OFF
|
||||||
|
WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_TESTS=OFF
|
||||||
|
WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_SERVER=OFF
|
||||||
|
WHISPER_CMAKE_ARGS+=-DGGML_OPENMP=OFF
|
||||||
|
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-metal/:$(WHISPER_DIR)/build/ggml/src/ggml-blas
|
||||||
|
else
|
||||||
|
CGO_LDFLAGS_WHISPER+=-lggml-blas
|
||||||
|
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-blas
|
||||||
|
endif
|
||||||
else
|
else
|
||||||
GORELEASER=$(shell which goreleaser)
|
CGO_LDFLAGS_WHISPER+=-lgomp
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_TYPE),openblas)
|
||||||
|
CGO_LDFLAGS+=-lopenblas
|
||||||
|
export GGML_OPENBLAS=1
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
|
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH) -L$(CUDA_LIBPATH)/stubs/ -lcuda
|
||||||
|
export GGML_CUDA=1
|
||||||
|
CMAKE_ARGS+=-DGGML_CUDA=ON
|
||||||
|
WHISPER_CMAKE_ARGS+=-DGGML_CUDA=ON
|
||||||
|
CGO_LDFLAGS_WHISPER+=-lcufft -lggml-cuda
|
||||||
|
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-cuda/
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_TYPE),vulkan)
|
||||||
|
CMAKE_ARGS+=-DGGML_VULKAN=1
|
||||||
|
WHISPER_CMAKE_ARGS+=-DGGML_VULKAN=1
|
||||||
|
CGO_LDFLAGS_WHISPER+=-lggml-vulkan -lvulkan
|
||||||
|
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-vulkan/
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||||
|
export GGML_SYCL=1
|
||||||
|
CMAKE_ARGS+=-DGGML_SYCL=ON
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||||
|
export GGML_SYCL_F16=1
|
||||||
|
CMAKE_ARGS+=-DGGML_SYCL_F16=ON
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_TYPE),hipblas)
|
||||||
|
ROCM_HOME ?= /opt/rocm
|
||||||
|
ROCM_PATH ?= /opt/rocm
|
||||||
|
LD_LIBRARY_PATH ?= /opt/rocm/lib:/opt/rocm/llvm/lib
|
||||||
|
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
||||||
|
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||||
|
export STABLE_BUILD_TYPE=
|
||||||
|
export GGML_HIP=1
|
||||||
|
GPU_TARGETS ?= gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102
|
||||||
|
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
||||||
|
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
||||||
|
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_TYPE),metal)
|
||||||
|
CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
|
||||||
|
export GGML_METAL=1
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_TYPE),clblas)
|
||||||
|
CGO_LDFLAGS+=-lOpenCL -lclblast
|
||||||
|
export GGML_OPENBLAS=1
|
||||||
|
endif
|
||||||
|
|
||||||
|
# glibc-static or glibc-devel-static required
|
||||||
|
ifeq ($(STATIC),true)
|
||||||
|
LD_FLAGS+=-linkmode external -extldflags -static
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
||||||
|
# OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
|
||||||
|
# OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
|
||||||
|
PIPER_CGO_CXXFLAGS+=-I$(CURDIR)/sources/go-piper/piper/src/cpp -I$(CURDIR)/sources/go-piper/piper/build/fi/include -I$(CURDIR)/sources/go-piper/piper/build/pi/include -I$(CURDIR)/sources/go-piper/piper/build/si/include
|
||||||
|
PIPER_CGO_LDFLAGS+=-L$(CURDIR)/sources/go-piper/piper/build/fi/lib -L$(CURDIR)/sources/go-piper/piper/build/pi/lib -L$(CURDIR)/sources/go-piper/piper/build/si/lib -lfmt -lspdlog -lucd
|
||||||
|
OPTIONAL_GRPC+=backend-assets/grpc/piper
|
||||||
|
endif
|
||||||
|
|
||||||
|
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
||||||
|
|
||||||
|
ifeq ($(ONNX_OS),linux)
|
||||||
|
ifeq ($(ONNX_ARCH),x64)
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/bark-cpp
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/stablediffusion-ggml
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/silero-vad
|
||||||
|
ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
|
||||||
|
# Use filter-out to remove the specified backends
|
||||||
|
ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))
|
||||||
|
|
||||||
|
GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
|
||||||
TEST_PATHS?=./api/... ./pkg/... ./core/...
|
TEST_PATHS?=./api/... ./pkg/... ./core/...
|
||||||
|
|
||||||
|
# If empty, then we build all
|
||||||
|
ifeq ($(GRPC_BACKENDS),)
|
||||||
|
GRPC_BACKENDS=$(ALL_GRPC_BACKENDS)
|
||||||
|
endif
|
||||||
|
|
||||||
.PHONY: all test build vendor
|
ifeq ($(BUILD_API_ONLY),true)
|
||||||
|
GRPC_BACKENDS=
|
||||||
|
endif
|
||||||
|
|
||||||
|
.PHONY: all test build vendor get-sources prepare-sources prepare
|
||||||
|
|
||||||
all: help
|
all: help
|
||||||
|
|
||||||
|
## bark.cpp
|
||||||
|
sources/bark.cpp:
|
||||||
|
git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
|
||||||
|
cd sources/bark.cpp && \
|
||||||
|
git checkout $(BARKCPP_VERSION) && \
|
||||||
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
|
sources/bark.cpp/build/libbark.a: sources/bark.cpp
|
||||||
|
cd sources/bark.cpp && \
|
||||||
|
mkdir -p build && \
|
||||||
|
cd build && \
|
||||||
|
cmake $(CMAKE_ARGS) .. && \
|
||||||
|
cmake --build . --config Release
|
||||||
|
|
||||||
|
backend/go/bark/libbark.a: sources/bark.cpp/build/libbark.a
|
||||||
|
$(MAKE) -C backend/go/bark libbark.a
|
||||||
|
|
||||||
|
## go-piper
|
||||||
|
sources/go-piper:
|
||||||
|
mkdir -p sources/go-piper
|
||||||
|
cd sources/go-piper && \
|
||||||
|
git init && \
|
||||||
|
git remote add origin $(PIPER_REPO) && \
|
||||||
|
git fetch origin && \
|
||||||
|
git checkout $(PIPER_VERSION) && \
|
||||||
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
|
sources/go-piper/libpiper_binding.a: sources/go-piper
|
||||||
|
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
|
||||||
|
|
||||||
|
## stablediffusion (ggml)
|
||||||
|
sources/stablediffusion-ggml.cpp:
|
||||||
|
git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
|
||||||
|
cd sources/stablediffusion-ggml.cpp && \
|
||||||
|
git checkout $(STABLEDIFFUSION_GGML_VERSION) && \
|
||||||
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
|
backend/go/image/stablediffusion-ggml/libsd.a: sources/stablediffusion-ggml.cpp
|
||||||
|
$(MAKE) -C backend/go/image/stablediffusion-ggml build/libstable-diffusion.a
|
||||||
|
$(MAKE) -C backend/go/image/stablediffusion-ggml libsd.a
|
||||||
|
|
||||||
|
backend-assets/grpc/stablediffusion-ggml: backend/go/image/stablediffusion-ggml/libsd.a backend-assets/grpc
|
||||||
|
$(MAKE) -C backend/go/image/stablediffusion-ggml CGO_LDFLAGS="$(CGO_LDFLAGS)" stablediffusion-ggml
|
||||||
|
|
||||||
|
sources/onnxruntime:
|
||||||
|
mkdir -p sources/onnxruntime
|
||||||
|
curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
|
||||||
|
cd sources/onnxruntime && tar -xvf onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz && rm onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
|
||||||
|
cd sources/onnxruntime && mv onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION)/* ./
|
||||||
|
|
||||||
|
backend-assets/lib/libonnxruntime.so.1: backend-assets/lib sources/onnxruntime
|
||||||
|
cp -rfv sources/onnxruntime/lib/* backend-assets/lib/
|
||||||
|
ifeq ($(OS),Darwin)
|
||||||
|
mv backend-assets/lib/libonnxruntime.$(ONNX_VERSION).dylib backend-assets/lib/libonnxruntime.dylib
|
||||||
|
else
|
||||||
|
mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
|
||||||
|
endif
|
||||||
|
|
||||||
|
## whisper
|
||||||
|
sources/whisper.cpp:
|
||||||
|
mkdir -p sources/whisper.cpp
|
||||||
|
cd sources/whisper.cpp && \
|
||||||
|
git init && \
|
||||||
|
git remote add origin $(WHISPER_REPO) && \
|
||||||
|
git fetch origin && \
|
||||||
|
git checkout $(WHISPER_CPP_VERSION) && \
|
||||||
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
|
sources/whisper.cpp/build/src/libwhisper.a: sources/whisper.cpp
|
||||||
|
cd sources/whisper.cpp && cmake $(WHISPER_CMAKE_ARGS) . -B ./build
|
||||||
|
cd sources/whisper.cpp/build && cmake --build . --config Release
|
||||||
|
|
||||||
|
get-sources: sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
|
||||||
|
|
||||||
|
replace:
|
||||||
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||||
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
||||||
|
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||||
|
|
||||||
|
dropreplace:
|
||||||
|
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
||||||
|
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
||||||
|
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
||||||
|
|
||||||
|
prepare-sources: get-sources replace
|
||||||
|
$(GOCMD) mod download
|
||||||
|
|
||||||
## GENERIC
|
## GENERIC
|
||||||
rebuild: ## Rebuilds the project
|
rebuild: ## Rebuilds the project
|
||||||
$(GOCMD) clean -cache
|
$(GOCMD) clean -cache
|
||||||
|
$(MAKE) -C sources/whisper.cpp clean
|
||||||
|
$(MAKE) -C sources/go-piper clean
|
||||||
$(MAKE) build
|
$(MAKE) build
|
||||||
|
|
||||||
|
prepare: prepare-sources $(OPTIONAL_TARGETS)
|
||||||
|
|
||||||
clean: ## Remove build related file
|
clean: ## Remove build related file
|
||||||
$(GOCMD) clean -cache
|
$(GOCMD) clean -cache
|
||||||
rm -f prepare
|
rm -f prepare
|
||||||
|
rm -rf ./sources
|
||||||
rm -rf $(BINARY_NAME)
|
rm -rf $(BINARY_NAME)
|
||||||
rm -rf release/
|
rm -rf release/
|
||||||
|
rm -rf backend-assets/*
|
||||||
|
$(MAKE) -C backend/cpp/grpc clean
|
||||||
|
$(MAKE) -C backend/go/bark clean
|
||||||
|
$(MAKE) -C backend/cpp/llama clean
|
||||||
|
$(MAKE) -C backend/go/image/stablediffusion-ggml clean
|
||||||
|
rm -rf backend/cpp/llama-* || true
|
||||||
|
$(MAKE) dropreplace
|
||||||
$(MAKE) protogen-clean
|
$(MAKE) protogen-clean
|
||||||
rmdir pkg/grpc/proto || true
|
rmdir pkg/grpc/proto || true
|
||||||
|
|
||||||
clean-tests:
|
clean-tests:
|
||||||
rm -rf test-models
|
rm -rf test-models
|
||||||
rm -rf test-dir
|
rm -rf test-dir
|
||||||
|
rm -rf core/http/backend-assets
|
||||||
|
|
||||||
|
clean-dc: clean
|
||||||
|
cp -r /build/backend-assets /workspace/backend-assets
|
||||||
|
|
||||||
## Install Go tools
|
## Install Go tools
|
||||||
install-go-tools:
|
install-go-tools:
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
|
go install github.com/GeertJohan/go.rice/rice@latest
|
||||||
|
|
||||||
## Build:
|
## Build:
|
||||||
build: protogen-go install-go-tools ## Build the project
|
build: prepare backend-assets grpcs install-go-tools ## Build the project
|
||||||
$(info ${GREEN}I local-ai build info:${RESET})
|
$(info ${GREEN}I local-ai build info:${RESET})
|
||||||
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
||||||
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
|
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
|
||||||
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
|
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
|
||||||
$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
|
$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
|
||||||
|
ifneq ($(BACKEND_LIBS),)
|
||||||
|
$(MAKE) backend-assets/lib
|
||||||
|
cp -f $(BACKEND_LIBS) backend-assets/lib/
|
||||||
|
endif
|
||||||
rm -rf $(BINARY_NAME) || true
|
rm -rf $(BINARY_NAME) || true
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
|
||||||
|
rice append --exec $(BINARY_NAME)
|
||||||
|
|
||||||
dev-dist:
|
build-minimal:
|
||||||
$(GORELEASER) build --snapshot --clean
|
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
|
||||||
|
|
||||||
|
build-api:
|
||||||
|
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=p2p $(MAKE) build
|
||||||
|
|
||||||
|
backend-assets/lib:
|
||||||
|
mkdir -p backend-assets/lib
|
||||||
|
|
||||||
dist:
|
dist:
|
||||||
$(GORELEASER) build --clean
|
$(MAKE) backend-assets/grpc/llama-cpp-avx2
|
||||||
|
ifeq ($(DETECT_LIBS),true)
|
||||||
|
scripts/prepare-libs.sh backend-assets/grpc/llama-cpp-avx2
|
||||||
|
endif
|
||||||
|
ifeq ($(OS),Darwin)
|
||||||
|
BUILD_TYPE=none $(MAKE) backend-assets/grpc/llama-cpp-fallback
|
||||||
|
else
|
||||||
|
$(MAKE) backend-assets/grpc/llama-cpp-cuda
|
||||||
|
$(MAKE) backend-assets/grpc/llama-cpp-hipblas
|
||||||
|
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f16
|
||||||
|
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f32
|
||||||
|
endif
|
||||||
|
GO_TAGS="tts p2p" $(MAKE) build
|
||||||
|
ifeq ($(DETECT_LIBS),true)
|
||||||
|
scripts/prepare-libs.sh backend-assets/grpc/piper
|
||||||
|
endif
|
||||||
|
GO_TAGS="tts p2p" STATIC=true $(MAKE) build
|
||||||
|
mkdir -p release
|
||||||
|
# if BUILD_ID is empty, then we don't append it to the binary name
|
||||||
|
ifeq ($(BUILD_ID),)
|
||||||
|
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(OS)-$(ARCH)
|
||||||
|
shasum -a 256 release/$(BINARY_NAME)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(OS)-$(ARCH).sha256
|
||||||
|
else
|
||||||
|
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH)
|
||||||
|
shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH).sha256
|
||||||
|
endif
|
||||||
|
|
||||||
|
dist-cross-linux-arm64:
|
||||||
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" GO_TAGS="p2p" \
|
||||||
|
STATIC=true $(MAKE) build
|
||||||
|
mkdir -p release
|
||||||
|
# if BUILD_ID is empty, then we don't append it to the binary name
|
||||||
|
ifeq ($(BUILD_ID),)
|
||||||
|
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(OS)-arm64
|
||||||
|
shasum -a 256 release/$(BINARY_NAME)-$(OS)-arm64 > release/$(BINARY_NAME)-$(OS)-arm64.sha256
|
||||||
|
else
|
||||||
|
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-arm64
|
||||||
|
shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-arm64 > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-arm64.sha256
|
||||||
|
endif
|
||||||
|
|
||||||
osx-signed: build
|
osx-signed: build
|
||||||
codesign --deep --force --sign "$(OSX_SIGNING_IDENTITY)" --entitlements "./Entitlements.plist" "./$(BINARY_NAME)"
|
codesign --deep --force --sign "$(OSX_SIGNING_IDENTITY)" --entitlements "./Entitlements.plist" "./$(BINARY_NAME)"
|
||||||
|
|
||||||
## Run
|
## Run
|
||||||
run: ## run local-ai
|
run: prepare ## run local-ai
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) run ./
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) run ./
|
||||||
|
|
||||||
test-models/testmodel.ggml:
|
test-models/testmodel.ggml:
|
||||||
mkdir test-models
|
mkdir test-models
|
||||||
mkdir test-dir
|
mkdir test-dir
|
||||||
wget -q https://huggingface.co/mradermacher/gpt2-alpaca-gpt4-GGUF/resolve/main/gpt2-alpaca-gpt4.Q4_K_M.gguf -O test-models/testmodel.ggml
|
wget -q https://huggingface.co/RichardErkhov/Qwen_-_Qwen2-1.5B-Instruct-gguf/resolve/main/Qwen2-1.5B-Instruct.Q2_K.gguf -O test-models/testmodel.ggml
|
||||||
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
||||||
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
|
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
|
||||||
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
||||||
cp tests/models_fixtures/* test-models
|
cp tests/models_fixtures/* test-models
|
||||||
|
|
||||||
prepare-test: protogen-go
|
prepare-test: grpcs
|
||||||
|
cp -rf backend-assets core/http
|
||||||
cp tests/models_fixtures/* test-models
|
cp tests/models_fixtures/* test-models
|
||||||
|
|
||||||
########################################################
|
|
||||||
## Tests
|
|
||||||
########################################################
|
|
||||||
|
|
||||||
## Test targets
|
## Test targets
|
||||||
test: test-models/testmodel.ggml protogen-go
|
test: prepare test-models/testmodel.ggml grpcs
|
||||||
@echo 'Running tests'
|
@echo 'Running tests'
|
||||||
export GO_TAGS="debug"
|
export GO_TAGS="tts debug"
|
||||||
$(MAKE) prepare-test
|
$(MAKE) prepare-test
|
||||||
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models BACKENDS_PATH=$(abspath ./)/backends \
|
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||||
$(MAKE) test-llama-gguf
|
$(MAKE) test-llama-gguf
|
||||||
$(MAKE) test-tts
|
$(MAKE) test-tts
|
||||||
$(MAKE) test-stablediffusion
|
$(MAKE) test-stablediffusion
|
||||||
|
|
||||||
backends/llama-cpp: docker-build-llama-cpp docker-save-llama-cpp build
|
|
||||||
./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"
|
|
||||||
|
|
||||||
backends/piper: docker-build-piper docker-save-piper build
|
|
||||||
./local-ai backends install "ocifile://$(abspath ./backend-images/piper.tar)"
|
|
||||||
|
|
||||||
backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-stablediffusion-ggml build
|
|
||||||
./local-ai backends install "ocifile://$(abspath ./backend-images/stablediffusion-ggml.tar)"
|
|
||||||
|
|
||||||
backends/whisper: docker-build-whisper docker-save-whisper build
|
|
||||||
./local-ai backends install "ocifile://$(abspath ./backend-images/whisper.tar)"
|
|
||||||
|
|
||||||
backends/silero-vad: docker-build-silero-vad docker-save-silero-vad build
|
|
||||||
./local-ai backends install "ocifile://$(abspath ./backend-images/silero-vad.tar)"
|
|
||||||
|
|
||||||
backends/local-store: docker-build-local-store docker-save-local-store build
|
|
||||||
./local-ai backends install "ocifile://$(abspath ./backend-images/local-store.tar)"
|
|
||||||
|
|
||||||
backends/huggingface: docker-build-huggingface docker-save-huggingface build
|
|
||||||
./local-ai backends install "ocifile://$(abspath ./backend-images/huggingface.tar)"
|
|
||||||
|
|
||||||
########################################################
|
|
||||||
## AIO tests
|
|
||||||
########################################################
|
|
||||||
|
|
||||||
docker-build-aio:
|
|
||||||
docker build --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
|
|
||||||
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test $(MAKE) docker-aio
|
|
||||||
|
|
||||||
e2e-aio:
|
|
||||||
LOCALAI_BACKEND_DIR=$(abspath ./backends) \
|
|
||||||
LOCALAI_MODELS_DIR=$(abspath ./models) \
|
|
||||||
LOCALAI_IMAGE_TAG=test \
|
|
||||||
LOCALAI_IMAGE=local-ai-aio \
|
|
||||||
$(MAKE) run-e2e-aio
|
|
||||||
|
|
||||||
run-e2e-aio: protogen-go
|
|
||||||
@echo 'Running e2e AIO tests'
|
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio
|
|
||||||
|
|
||||||
########################################################
|
|
||||||
## E2E tests
|
|
||||||
########################################################
|
|
||||||
|
|
||||||
prepare-e2e:
|
prepare-e2e:
|
||||||
mkdir -p $(TEST_DIR)
|
mkdir -p $(TEST_DIR)
|
||||||
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
|
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
|
||||||
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
|
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
|
||||||
docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 -t localai-tests .
|
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 --build-arg FFMPEG=true -t localai-tests .
|
||||||
|
|
||||||
run-e2e-image:
|
run-e2e-image:
|
||||||
ls -liah $(abspath ./tests/e2e-fixtures)
|
ls -liah $(abspath ./tests/e2e-fixtures)
|
||||||
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
|
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
|
||||||
|
|
||||||
|
run-e2e-aio: protogen-go
|
||||||
|
@echo 'Running e2e AIO tests'
|
||||||
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio
|
||||||
|
|
||||||
test-e2e:
|
test-e2e:
|
||||||
@echo 'Running e2e tests'
|
@echo 'Running e2e tests'
|
||||||
BUILD_TYPE=$(BUILD_TYPE) \
|
BUILD_TYPE=$(BUILD_TYPE) \
|
||||||
@@ -198,33 +498,27 @@ teardown-e2e:
|
|||||||
rm -rf $(TEST_DIR) || true
|
rm -rf $(TEST_DIR) || true
|
||||||
docker stop $$(docker ps -q --filter ancestor=localai-tests)
|
docker stop $$(docker ps -q --filter ancestor=localai-tests)
|
||||||
|
|
||||||
########################################################
|
|
||||||
## Integration and unit tests
|
|
||||||
########################################################
|
|
||||||
|
|
||||||
test-llama-gguf: prepare-test
|
test-llama-gguf: prepare-test
|
||||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models BACKENDS_PATH=$(abspath ./)/backends \
|
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
||||||
|
|
||||||
test-tts: prepare-test
|
test-tts: prepare-test
|
||||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models BACKENDS_PATH=$(abspath ./)/backends \
|
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
||||||
|
|
||||||
test-stablediffusion: prepare-test
|
test-stablediffusion: prepare-test
|
||||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models BACKENDS_PATH=$(abspath ./)/backends \
|
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
||||||
|
|
||||||
test-stores:
|
test-stores: backend-assets/grpc/local-store
|
||||||
|
mkdir -p tests/integration/backend-assets/grpc
|
||||||
|
cp -f backend-assets/grpc/local-store tests/integration/backend-assets/grpc/
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts $(TEST_FLAKES) -v -r tests/integration
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts $(TEST_FLAKES) -v -r tests/integration
|
||||||
|
|
||||||
test-container:
|
test-container:
|
||||||
docker build --target requirements -t local-ai-test-container .
|
docker build --target requirements -t local-ai-test-container .
|
||||||
docker run -ti --rm --entrypoint /bin/bash -ti -v $(abspath ./):/build local-ai-test-container
|
docker run -ti --rm --entrypoint /bin/bash -ti -v $(abspath ./):/build local-ai-test-container
|
||||||
|
|
||||||
########################################################
|
|
||||||
## Help
|
|
||||||
########################################################
|
|
||||||
|
|
||||||
## Help:
|
## Help:
|
||||||
help: ## Show this help.
|
help: ## Show this help.
|
||||||
@echo ''
|
@echo ''
|
||||||
@@ -237,52 +531,16 @@ help: ## Show this help.
|
|||||||
else if (/^## .*$$/) {printf " ${CYAN}%s${RESET}\n", substr($$1,4)} \
|
else if (/^## .*$$/) {printf " ${CYAN}%s${RESET}\n", substr($$1,4)} \
|
||||||
}' $(MAKEFILE_LIST)
|
}' $(MAKEFILE_LIST)
|
||||||
|
|
||||||
########################################################
|
|
||||||
## Backends
|
|
||||||
########################################################
|
|
||||||
|
|
||||||
.PHONY: protogen
|
.PHONY: protogen
|
||||||
protogen: protogen-go protogen-python
|
protogen: protogen-go protogen-python
|
||||||
|
|
||||||
.PHONY: protogen-clean
|
.PHONY: protogen-clean
|
||||||
protogen-clean: protogen-go-clean protogen-python-clean
|
protogen-clean: protogen-go-clean protogen-python-clean
|
||||||
|
|
||||||
protoc:
|
|
||||||
@OS_NAME=$$(uname -s | tr '[:upper:]' '[:lower:]'); \
|
|
||||||
ARCH_NAME=$$(uname -m); \
|
|
||||||
if [ "$$OS_NAME" = "darwin" ]; then \
|
|
||||||
if [ "$$ARCH_NAME" = "arm64" ]; then \
|
|
||||||
FILE=protoc-31.1-osx-aarch_64.zip; \
|
|
||||||
elif [ "$$ARCH_NAME" = "x86_64" ]; then \
|
|
||||||
FILE=protoc-31.1-osx-x86_64.zip; \
|
|
||||||
else \
|
|
||||||
echo "Unsupported macOS architecture: $$ARCH_NAME"; exit 1; \
|
|
||||||
fi; \
|
|
||||||
elif [ "$$OS_NAME" = "linux" ]; then \
|
|
||||||
if [ "$$ARCH_NAME" = "x86_64" ]; then \
|
|
||||||
FILE=protoc-31.1-linux-x86_64.zip; \
|
|
||||||
elif [ "$$ARCH_NAME" = "aarch64" ] || [ "$$ARCH_NAME" = "arm64" ]; then \
|
|
||||||
FILE=protoc-31.1-linux-aarch_64.zip; \
|
|
||||||
elif [ "$$ARCH_NAME" = "ppc64le" ]; then \
|
|
||||||
FILE=protoc-31.1-linux-ppcle_64.zip; \
|
|
||||||
elif [ "$$ARCH_NAME" = "s390x" ]; then \
|
|
||||||
FILE=protoc-31.1-linux-s390_64.zip; \
|
|
||||||
elif [ "$$ARCH_NAME" = "i386" ] || [ "$$ARCH_NAME" = "x86" ]; then \
|
|
||||||
FILE=protoc-31.1-linux-x86_32.zip; \
|
|
||||||
else \
|
|
||||||
echo "Unsupported Linux architecture: $$ARCH_NAME"; exit 1; \
|
|
||||||
fi; \
|
|
||||||
else \
|
|
||||||
echo "Unsupported OS: $$OS_NAME"; exit 1; \
|
|
||||||
fi; \
|
|
||||||
URL=https://github.com/protocolbuffers/protobuf/releases/download/v31.1/$$FILE; \
|
|
||||||
curl -L -s $$URL -o protoc.zip && \
|
|
||||||
unzip -j -d $(CURDIR) protoc.zip bin/protoc && rm protoc.zip
|
|
||||||
|
|
||||||
.PHONY: protogen-go
|
.PHONY: protogen-go
|
||||||
protogen-go: protoc install-go-tools
|
protogen-go: install-go-tools
|
||||||
mkdir -p pkg/grpc/proto
|
mkdir -p pkg/grpc/proto
|
||||||
./protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
|
protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
|
||||||
backend/backend.proto
|
backend/backend.proto
|
||||||
|
|
||||||
.PHONY: protogen-go-clean
|
.PHONY: protogen-go-clean
|
||||||
@@ -291,10 +549,10 @@ protogen-go-clean:
|
|||||||
$(RM) bin/*
|
$(RM) bin/*
|
||||||
|
|
||||||
.PHONY: protogen-python
|
.PHONY: protogen-python
|
||||||
protogen-python: bark-protogen coqui-protogen chatterbox-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
|
protogen-python: bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
|
||||||
|
|
||||||
.PHONY: protogen-python-clean
|
.PHONY: protogen-python-clean
|
||||||
protogen-python-clean: bark-protogen-clean coqui-protogen-clean chatterbox-protogen-clean diffusers-protogen-clean exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean
|
protogen-python-clean: bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean
|
||||||
|
|
||||||
.PHONY: bark-protogen
|
.PHONY: bark-protogen
|
||||||
bark-protogen:
|
bark-protogen:
|
||||||
@@ -316,18 +574,10 @@ coqui-protogen-clean:
|
|||||||
diffusers-protogen:
|
diffusers-protogen:
|
||||||
$(MAKE) -C backend/python/diffusers protogen
|
$(MAKE) -C backend/python/diffusers protogen
|
||||||
|
|
||||||
.PHONY: chatterbox-protogen
|
|
||||||
chatterbox-protogen:
|
|
||||||
$(MAKE) -C backend/python/chatterbox protogen
|
|
||||||
|
|
||||||
.PHONY: diffusers-protogen-clean
|
.PHONY: diffusers-protogen-clean
|
||||||
diffusers-protogen-clean:
|
diffusers-protogen-clean:
|
||||||
$(MAKE) -C backend/python/diffusers protogen-clean
|
$(MAKE) -C backend/python/diffusers protogen-clean
|
||||||
|
|
||||||
.PHONY: chatterbox-protogen-clean
|
|
||||||
chatterbox-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/chatterbox protogen-clean
|
|
||||||
|
|
||||||
.PHONY: faster-whisper-protogen
|
.PHONY: faster-whisper-protogen
|
||||||
faster-whisper-protogen:
|
faster-whisper-protogen:
|
||||||
$(MAKE) -C backend/python/faster-whisper protogen
|
$(MAKE) -C backend/python/faster-whisper protogen
|
||||||
@@ -376,19 +626,184 @@ vllm-protogen:
|
|||||||
vllm-protogen-clean:
|
vllm-protogen-clean:
|
||||||
$(MAKE) -C backend/python/vllm protogen-clean
|
$(MAKE) -C backend/python/vllm protogen-clean
|
||||||
|
|
||||||
|
## GRPC
|
||||||
|
# Note: it is duplicated in the Dockerfile
|
||||||
|
prepare-extra-conda-environments: protogen-python
|
||||||
|
$(MAKE) -C backend/python/bark
|
||||||
|
$(MAKE) -C backend/python/coqui
|
||||||
|
$(MAKE) -C backend/python/diffusers
|
||||||
|
$(MAKE) -C backend/python/faster-whisper
|
||||||
|
$(MAKE) -C backend/python/vllm
|
||||||
|
$(MAKE) -C backend/python/rerankers
|
||||||
|
$(MAKE) -C backend/python/transformers
|
||||||
|
$(MAKE) -C backend/python/kokoro
|
||||||
|
$(MAKE) -C backend/python/exllama2
|
||||||
|
|
||||||
prepare-test-extra: protogen-python
|
prepare-test-extra: protogen-python
|
||||||
$(MAKE) -C backend/python/transformers
|
$(MAKE) -C backend/python/transformers
|
||||||
$(MAKE) -C backend/python/diffusers
|
$(MAKE) -C backend/python/diffusers
|
||||||
$(MAKE) -C backend/python/chatterbox
|
|
||||||
$(MAKE) -C backend/python/vllm
|
$(MAKE) -C backend/python/vllm
|
||||||
|
|
||||||
test-extra: prepare-test-extra
|
test-extra: prepare-test-extra
|
||||||
$(MAKE) -C backend/python/transformers test
|
$(MAKE) -C backend/python/transformers test
|
||||||
$(MAKE) -C backend/python/diffusers test
|
$(MAKE) -C backend/python/diffusers test
|
||||||
$(MAKE) -C backend/python/chatterbox test
|
|
||||||
$(MAKE) -C backend/python/vllm test
|
$(MAKE) -C backend/python/vllm test
|
||||||
|
|
||||||
|
backend-assets:
|
||||||
|
mkdir -p backend-assets
|
||||||
|
ifeq ($(BUILD_API_ONLY),true)
|
||||||
|
touch backend-assets/keep
|
||||||
|
endif
|
||||||
|
|
||||||
|
backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_binding.a
|
||||||
|
mkdir -p backend-assets/espeak-ng-data
|
||||||
|
@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
|
||||||
|
|
||||||
|
backend-assets/grpc: protogen-go replace
|
||||||
|
mkdir -p backend-assets/grpc
|
||||||
|
|
||||||
|
backend-assets/grpc/huggingface: backend-assets/grpc
|
||||||
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
|
||||||
|
ifneq ($(UPX),)
|
||||||
|
$(UPX) backend-assets/grpc/huggingface
|
||||||
|
endif
|
||||||
|
|
||||||
|
backend/cpp/llama/llama.cpp:
|
||||||
|
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
|
||||||
|
|
||||||
|
INSTALLED_PACKAGES=$(CURDIR)/backend/cpp/grpc/installed_packages
|
||||||
|
INSTALLED_LIB_CMAKE=$(INSTALLED_PACKAGES)/lib/cmake
|
||||||
|
ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
|
||||||
|
-DProtobuf_DIR=${INSTALLED_LIB_CMAKE}/protobuf \
|
||||||
|
-Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
|
||||||
|
-DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
|
||||||
|
-DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
|
||||||
|
build-llama-cpp-grpc-server:
|
||||||
|
# Conditionally build grpc for the llama backend to use if needed
|
||||||
|
ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
|
||||||
|
$(MAKE) -C backend/cpp/grpc build
|
||||||
|
_PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto \
|
||||||
|
_GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin \
|
||||||
|
PATH="${INSTALLED_PACKAGES}/bin:${PATH}" \
|
||||||
|
CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" \
|
||||||
|
LLAMA_VERSION=$(CPPLLAMA_VERSION) \
|
||||||
|
$(MAKE) -C backend/cpp/${VARIANT} grpc-server
|
||||||
|
else
|
||||||
|
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
|
||||||
|
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
|
||||||
|
endif
|
||||||
|
|
||||||
|
# This target is for manually building a variant with-auto detected flags
|
||||||
|
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
|
cp -rf backend/cpp/llama backend/cpp/llama-cpp
|
||||||
|
$(MAKE) -C backend/cpp/llama-cpp purge
|
||||||
|
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
|
||||||
|
$(MAKE) VARIANT="llama-cpp" build-llama-cpp-grpc-server
|
||||||
|
cp -rfv backend/cpp/llama-cpp/grpc-server backend-assets/grpc/llama-cpp
|
||||||
|
|
||||||
|
backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
|
cp -rf backend/cpp/llama backend/cpp/llama-avx2
|
||||||
|
$(MAKE) -C backend/cpp/llama-avx2 purge
|
||||||
|
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
|
||||||
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
|
||||||
|
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
|
||||||
|
|
||||||
|
backend-assets/grpc/llama-cpp-avx512: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
|
cp -rf backend/cpp/llama backend/cpp/llama-avx512
|
||||||
|
$(MAKE) -C backend/cpp/llama-avx512 purge
|
||||||
|
$(info ${GREEN}I llama-cpp build info:avx512${RESET})
|
||||||
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx512" build-llama-cpp-grpc-server
|
||||||
|
cp -rfv backend/cpp/llama-avx512/grpc-server backend-assets/grpc/llama-cpp-avx512
|
||||||
|
|
||||||
|
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
|
cp -rf backend/cpp/llama backend/cpp/llama-avx
|
||||||
|
$(MAKE) -C backend/cpp/llama-avx purge
|
||||||
|
$(info ${GREEN}I llama-cpp build info:avx${RESET})
|
||||||
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
|
||||||
|
cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx
|
||||||
|
|
||||||
|
backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
|
cp -rf backend/cpp/llama backend/cpp/llama-fallback
|
||||||
|
$(MAKE) -C backend/cpp/llama-fallback purge
|
||||||
|
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
|
||||||
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
|
||||||
|
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
|
||||||
|
|
||||||
|
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
|
cp -rf backend/cpp/llama backend/cpp/llama-cuda
|
||||||
|
$(MAKE) -C backend/cpp/llama-cuda purge
|
||||||
|
$(info ${GREEN}I llama-cpp build info:cuda${RESET})
|
||||||
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
|
||||||
|
cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
|
||||||
|
|
||||||
|
backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
|
cp -rf backend/cpp/llama backend/cpp/llama-hipblas
|
||||||
|
$(MAKE) -C backend/cpp/llama-hipblas purge
|
||||||
|
$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
|
||||||
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
|
||||||
|
cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
|
||||||
|
|
||||||
|
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
|
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f16
|
||||||
|
$(MAKE) -C backend/cpp/llama-sycl_f16 purge
|
||||||
|
$(info ${GREEN}I llama-cpp build info:sycl_f16${RESET})
|
||||||
|
BUILD_TYPE="sycl_f16" $(MAKE) VARIANT="llama-sycl_f16" build-llama-cpp-grpc-server
|
||||||
|
cp -rfv backend/cpp/llama-sycl_f16/grpc-server backend-assets/grpc/llama-cpp-sycl_f16
|
||||||
|
|
||||||
|
backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
|
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f32
|
||||||
|
$(MAKE) -C backend/cpp/llama-sycl_f32 purge
|
||||||
|
$(info ${GREEN}I llama-cpp build info:sycl_f32${RESET})
|
||||||
|
BUILD_TYPE="sycl_f32" $(MAKE) VARIANT="llama-sycl_f32" build-llama-cpp-grpc-server
|
||||||
|
cp -rfv backend/cpp/llama-sycl_f32/grpc-server backend-assets/grpc/llama-cpp-sycl_f32
|
||||||
|
|
||||||
|
backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
|
cp -rf backend/cpp/llama backend/cpp/llama-grpc
|
||||||
|
$(MAKE) -C backend/cpp/llama-grpc purge
|
||||||
|
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
|
||||||
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
|
||||||
|
cp -rfv backend/cpp/llama-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc
|
||||||
|
|
||||||
|
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
|
||||||
|
mkdir -p backend-assets/util/
|
||||||
|
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
|
||||||
|
|
||||||
|
backend-assets/grpc/bark-cpp: backend/go/bark/libbark.a backend-assets/grpc
|
||||||
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark/ LIBRARY_PATH=$(CURDIR)/backend/go/bark/ \
|
||||||
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark/
|
||||||
|
ifneq ($(UPX),)
|
||||||
|
$(UPX) backend-assets/grpc/bark-cpp
|
||||||
|
endif
|
||||||
|
|
||||||
|
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
|
||||||
|
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
|
||||||
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
|
||||||
|
ifneq ($(UPX),)
|
||||||
|
$(UPX) backend-assets/grpc/piper
|
||||||
|
endif
|
||||||
|
|
||||||
|
backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
|
||||||
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
|
||||||
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
|
||||||
|
ifneq ($(UPX),)
|
||||||
|
$(UPX) backend-assets/grpc/silero-vad
|
||||||
|
endif
|
||||||
|
|
||||||
|
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/build/src/libwhisper.a backend-assets/grpc
|
||||||
|
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="${WHISPER_INCLUDE_PATH}" LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" LD_LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" \
|
||||||
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper
|
||||||
|
ifneq ($(UPX),)
|
||||||
|
$(UPX) backend-assets/grpc/whisper
|
||||||
|
endif
|
||||||
|
|
||||||
|
backend-assets/grpc/local-store: backend-assets/grpc
|
||||||
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
|
||||||
|
ifneq ($(UPX),)
|
||||||
|
$(UPX) backend-assets/grpc/local-store
|
||||||
|
endif
|
||||||
|
|
||||||
|
grpcs: prepare $(GRPC_BACKENDS)
|
||||||
|
|
||||||
DOCKER_IMAGE?=local-ai
|
DOCKER_IMAGE?=local-ai
|
||||||
DOCKER_AIO_IMAGE?=local-ai-aio
|
DOCKER_AIO_IMAGE?=local-ai-aio
|
||||||
IMAGE_TYPE?=core
|
IMAGE_TYPE?=core
|
||||||
@@ -427,111 +842,21 @@ docker-aio-all:
|
|||||||
|
|
||||||
docker-image-intel:
|
docker-image-intel:
|
||||||
docker build \
|
docker build \
|
||||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:${ONEAPI_VERSION}.0-0-devel-ubuntu24.04 \
|
--progress plain \
|
||||||
|
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.1.0-0-devel-ubuntu24.04 \
|
||||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||||
--build-arg GO_TAGS="$(GO_TAGS)" \
|
--build-arg GO_TAGS="none" \
|
||||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||||
--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
|
--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
|
||||||
|
|
||||||
docker-image-intel-xpu:
|
docker-image-intel-xpu:
|
||||||
docker build \
|
docker build \
|
||||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:${ONEAPI_VERSION}.0-0-devel-ubuntu22.04 \
|
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04 \
|
||||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||||
--build-arg GO_TAGS="$(GO_TAGS)" \
|
--build-arg GO_TAGS="none" \
|
||||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||||
--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
|
--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
|
||||||
|
|
||||||
########################################################
|
|
||||||
## Backends
|
|
||||||
########################################################
|
|
||||||
|
|
||||||
backend-images:
|
|
||||||
mkdir -p backend-images
|
|
||||||
|
|
||||||
docker-build-llama-cpp:
|
|
||||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg IMAGE_BASE=$(IMAGE_BASE) -t local-ai-backend:llama-cpp -f backend/Dockerfile.llama-cpp .
|
|
||||||
|
|
||||||
docker-build-bark-cpp:
|
|
||||||
docker build -t local-ai-backend:bark-cpp -f backend/Dockerfile.go --build-arg BACKEND=bark-cpp .
|
|
||||||
|
|
||||||
docker-build-piper:
|
|
||||||
docker build -t local-ai-backend:piper -f backend/Dockerfile.go --build-arg BACKEND=piper .
|
|
||||||
|
|
||||||
docker-build-local-store:
|
|
||||||
docker build -t local-ai-backend:local-store -f backend/Dockerfile.go --build-arg BACKEND=local-store .
|
|
||||||
|
|
||||||
docker-build-huggingface:
|
|
||||||
docker build -t local-ai-backend:huggingface -f backend/Dockerfile.go --build-arg BACKEND=huggingface .
|
|
||||||
|
|
||||||
docker-save-huggingface: backend-images
|
|
||||||
docker save local-ai-backend:huggingface -o backend-images/huggingface.tar
|
|
||||||
|
|
||||||
docker-save-local-store: backend-images
|
|
||||||
docker save local-ai-backend:local-store -o backend-images/local-store.tar
|
|
||||||
|
|
||||||
docker-build-silero-vad:
|
|
||||||
docker build -t local-ai-backend:silero-vad -f backend/Dockerfile.go --build-arg BACKEND=silero-vad .
|
|
||||||
|
|
||||||
docker-save-silero-vad: backend-images
|
|
||||||
docker save local-ai-backend:silero-vad -o backend-images/silero-vad.tar
|
|
||||||
|
|
||||||
docker-save-piper: backend-images
|
|
||||||
docker save local-ai-backend:piper -o backend-images/piper.tar
|
|
||||||
|
|
||||||
docker-save-llama-cpp: backend-images
|
|
||||||
docker save local-ai-backend:llama-cpp -o backend-images/llama-cpp.tar
|
|
||||||
|
|
||||||
docker-save-bark-cpp: backend-images
|
|
||||||
docker save local-ai-backend:bark-cpp -o backend-images/bark-cpp.tar
|
|
||||||
|
|
||||||
docker-build-stablediffusion-ggml:
|
|
||||||
docker build -t local-ai-backend:stablediffusion-ggml -f backend/Dockerfile.go --build-arg BACKEND=stablediffusion-ggml .
|
|
||||||
|
|
||||||
docker-save-stablediffusion-ggml: backend-images
|
|
||||||
docker save local-ai-backend:stablediffusion-ggml -o backend-images/stablediffusion-ggml.tar
|
|
||||||
|
|
||||||
docker-build-rerankers:
|
|
||||||
docker build -t local-ai-backend:rerankers -f backend/Dockerfile.python --build-arg BACKEND=rerankers .
|
|
||||||
|
|
||||||
docker-build-vllm:
|
|
||||||
docker build -t local-ai-backend:vllm -f backend/Dockerfile.python --build-arg BACKEND=vllm .
|
|
||||||
|
|
||||||
docker-build-transformers:
|
|
||||||
docker build -t local-ai-backend:transformers -f backend/Dockerfile.python --build-arg BACKEND=transformers .
|
|
||||||
|
|
||||||
docker-build-diffusers:
|
|
||||||
docker build -t local-ai-backend:diffusers -f backend/Dockerfile.python --build-arg BACKEND=diffusers .
|
|
||||||
|
|
||||||
docker-build-kokoro:
|
|
||||||
docker build -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro .
|
|
||||||
|
|
||||||
docker-build-whisper:
|
|
||||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:whisper -f backend/Dockerfile.go --build-arg BACKEND=whisper .
|
|
||||||
|
|
||||||
docker-save-whisper: backend-images
|
|
||||||
docker save local-ai-backend:whisper -o backend-images/whisper.tar
|
|
||||||
|
|
||||||
docker-build-faster-whisper:
|
|
||||||
docker build -t local-ai-backend:faster-whisper -f backend/Dockerfile.python --build-arg BACKEND=faster-whisper .
|
|
||||||
|
|
||||||
docker-build-coqui:
|
|
||||||
docker build -t local-ai-backend:coqui -f backend/Dockerfile.python --build-arg BACKEND=coqui .
|
|
||||||
|
|
||||||
docker-build-bark:
|
|
||||||
docker build -t local-ai-backend:bark -f backend/Dockerfile.python --build-arg BACKEND=bark .
|
|
||||||
|
|
||||||
docker-build-chatterbox:
|
|
||||||
docker build -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox .
|
|
||||||
|
|
||||||
docker-build-exllama2:
|
|
||||||
docker build -t local-ai-backend:exllama2 -f backend/Dockerfile.python --build-arg BACKEND=exllama2 .
|
|
||||||
|
|
||||||
docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-bark docker-build-chatterbox docker-build-exllama2
|
|
||||||
|
|
||||||
########################################################
|
|
||||||
### END Backends
|
|
||||||
########################################################
|
|
||||||
|
|
||||||
.PHONY: swagger
|
.PHONY: swagger
|
||||||
swagger:
|
swagger:
|
||||||
swag init -g core/http/app.go --output swagger
|
swag init -g core/http/app.go --output swagger
|
||||||
|
|||||||
70
README.md
70
README.md
@@ -1,6 +1,6 @@
|
|||||||
<h1 align="center">
|
<h1 align="center">
|
||||||
<br>
|
<br>
|
||||||
<img width="300" src="./core/http/static/logo.png"> <br>
|
<img height="300" src="./core/http/static/logo.png"> <br>
|
||||||
<br>
|
<br>
|
||||||
</h1>
|
</h1>
|
||||||
|
|
||||||
@@ -113,67 +113,22 @@ For more installation options, see [Installer Options](https://localai.io/docs/a
|
|||||||
Or run with docker:
|
Or run with docker:
|
||||||
|
|
||||||
### CPU only image:
|
### CPU only image:
|
||||||
|
```bash
|
||||||
|
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-cpu
|
||||||
|
```
|
||||||
|
### Nvidia GPU:
|
||||||
|
```bash
|
||||||
|
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
|
||||||
|
```
|
||||||
|
### CPU and GPU image (bigger size):
|
||||||
```bash
|
```bash
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
|
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
|
||||||
```
|
```
|
||||||
|
### AIO images (it will pre-download a set of models ready for use, see https://localai.io/basics/container/)
|
||||||
### NVIDIA GPU Images:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# CUDA 12.0
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
|
|
||||||
|
|
||||||
# CUDA 11.7
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11
|
|
||||||
|
|
||||||
# NVIDIA Jetson (L4T) ARM64
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-nvidia-l4t-arm64
|
|
||||||
```
|
|
||||||
|
|
||||||
### AMD GPU Images (ROCm):
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-gpu-hipblas
|
|
||||||
```
|
|
||||||
|
|
||||||
### Intel GPU Images (oneAPI):
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Intel GPU with FP16 support
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/dev/dri/renderD128 localai/localai:latest-gpu-intel-f16
|
|
||||||
|
|
||||||
# Intel GPU with FP32 support
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/dev/dri/renderD128 localai/localai:latest-gpu-intel-f32
|
|
||||||
```
|
|
||||||
|
|
||||||
### Vulkan GPU Images:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-vulkan
|
|
||||||
```
|
|
||||||
|
|
||||||
### AIO Images (pre-downloaded models):
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# CPU version
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
||||||
|
|
||||||
# NVIDIA CUDA 12 version
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
|
|
||||||
|
|
||||||
# NVIDIA CUDA 11 version
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11
|
|
||||||
|
|
||||||
# Intel GPU version
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel-f16
|
|
||||||
|
|
||||||
# AMD GPU version
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas
|
|
||||||
```
|
```
|
||||||
|
|
||||||
For more information about the AIO images and pre-downloaded models, see [Container Documentation](https://localai.io/basics/container/).
|
|
||||||
|
|
||||||
To load models:
|
To load models:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -193,10 +148,6 @@ For more information, see [💻 Getting started](https://localai.io/basics/getti
|
|||||||
|
|
||||||
## 📰 Latest project news
|
## 📰 Latest project news
|
||||||
|
|
||||||
- June 2025: [Backend management](https://github.com/mudler/LocalAI/pull/5607) has been added. Attention: extras images are going to be deprecated from the next release! Read [the backend management PR](https://github.com/mudler/LocalAI/pull/5607).
|
|
||||||
- May 2025: [Audio input](https://github.com/mudler/LocalAI/pull/5466) and [Reranking](https://github.com/mudler/LocalAI/pull/5396) in llama.cpp backend, [Realtime API](https://github.com/mudler/LocalAI/pull/5392), Support to Gemma, SmollVLM, and more multimodal models (available in the gallery).
|
|
||||||
- May 2025: Important: image name changes [See release](https://github.com/mudler/LocalAI/releases/tag/v2.29.0)
|
|
||||||
- Apr 2025: Rebrand, WebUI enhancements
|
|
||||||
- Apr 2025: [LocalAGI](https://github.com/mudler/LocalAGI) and [LocalRecall](https://github.com/mudler/LocalRecall) join the LocalAI family stack.
|
- Apr 2025: [LocalAGI](https://github.com/mudler/LocalAGI) and [LocalRecall](https://github.com/mudler/LocalRecall) join the LocalAI family stack.
|
||||||
- Apr 2025: WebUI overhaul, AIO images updates
|
- Apr 2025: WebUI overhaul, AIO images updates
|
||||||
- Feb 2025: Backend cleanup, Breaking changes, new backends (kokoro, OutelTTS, faster-whisper), Nvidia L4T images
|
- Feb 2025: Backend cleanup, Breaking changes, new backends (kokoro, OutelTTS, faster-whisper), Nvidia L4T images
|
||||||
@@ -215,7 +166,6 @@ Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3A
|
|||||||
|
|
||||||
## 🚀 [Features](https://localai.io/features/)
|
## 🚀 [Features](https://localai.io/features/)
|
||||||
|
|
||||||
- 🧩 [Backend Gallery](https://localai.io/backends/): Install/remove backends on the fly, powered by OCI images — fully customizable and API-driven.
|
|
||||||
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
||||||
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
|
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
|
||||||
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
|
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
embeddings: true
|
embeddings: true
|
||||||
name: text-embedding-ada-002
|
name: text-embedding-ada-002
|
||||||
backend: llama-cpp
|
|
||||||
parameters:
|
parameters:
|
||||||
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
|
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
|
||||||
|
|
||||||
|
|||||||
@@ -1,13 +1,7 @@
|
|||||||
name: jina-reranker-v1-base-en
|
name: jina-reranker-v1-base-en
|
||||||
reranking: true
|
backend: rerankers
|
||||||
f16: true
|
|
||||||
parameters:
|
parameters:
|
||||||
model: jina-reranker-v1-tiny-en.f16.gguf
|
model: cross-encoder
|
||||||
backend: llama-cpp
|
|
||||||
download_files:
|
|
||||||
- filename: jina-reranker-v1-tiny-en.f16.gguf
|
|
||||||
sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
|
|
||||||
uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf
|
|
||||||
|
|
||||||
usage: |
|
usage: |
|
||||||
You can test this model with curl like this:
|
You can test this model with curl like this:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ name: tts-1
|
|||||||
download_files:
|
download_files:
|
||||||
- filename: voice-en-us-amy-low.tar.gz
|
- filename: voice-en-us-amy-low.tar.gz
|
||||||
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
|
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
|
||||||
backend: piper
|
|
||||||
parameters:
|
parameters:
|
||||||
model: en-us-amy-low.onnx
|
model: en-us-amy-low.onnx
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
context_size: 8192
|
context_size: 8192
|
||||||
f16: true
|
f16: true
|
||||||
backend: llama-cpp
|
|
||||||
function:
|
function:
|
||||||
grammar:
|
grammar:
|
||||||
no_mixed_free_string: true
|
no_mixed_free_string: true
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
context_size: 4096
|
context_size: 4096
|
||||||
f16: true
|
f16: true
|
||||||
backend: llama-cpp
|
|
||||||
mmap: true
|
mmap: true
|
||||||
mmproj: minicpm-v-2_6-mmproj-f16.gguf
|
mmproj: minicpm-v-2_6-mmproj-f16.gguf
|
||||||
name: gpt-4o
|
name: gpt-4o
|
||||||
|
|||||||
@@ -135,4 +135,4 @@ check_vars
|
|||||||
|
|
||||||
echo "===> Starting LocalAI[$PROFILE] with the following models: $MODELS"
|
echo "===> Starting LocalAI[$PROFILE] with the following models: $MODELS"
|
||||||
|
|
||||||
exec /entrypoint.sh "$@"
|
exec /build/entrypoint.sh "$@"
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
embeddings: true
|
embeddings: true
|
||||||
name: text-embedding-ada-002
|
name: text-embedding-ada-002
|
||||||
backend: llama-cpp
|
|
||||||
parameters:
|
parameters:
|
||||||
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
|
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
|
||||||
|
|
||||||
|
|||||||
@@ -1,13 +1,7 @@
|
|||||||
name: jina-reranker-v1-base-en
|
name: jina-reranker-v1-base-en
|
||||||
reranking: true
|
backend: rerankers
|
||||||
f16: true
|
|
||||||
parameters:
|
parameters:
|
||||||
model: jina-reranker-v1-tiny-en.f16.gguf
|
model: cross-encoder
|
||||||
backend: llama-cpp
|
|
||||||
download_files:
|
|
||||||
- filename: jina-reranker-v1-tiny-en.f16.gguf
|
|
||||||
sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
|
|
||||||
uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf
|
|
||||||
|
|
||||||
usage: |
|
usage: |
|
||||||
You can test this model with curl like this:
|
You can test this model with curl like this:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ name: tts-1
|
|||||||
download_files:
|
download_files:
|
||||||
- filename: voice-en-us-amy-low.tar.gz
|
- filename: voice-en-us-amy-low.tar.gz
|
||||||
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
|
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
|
||||||
backend: piper
|
|
||||||
parameters:
|
parameters:
|
||||||
model: en-us-amy-low.onnx
|
model: en-us-amy-low.onnx
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
context_size: 4096
|
context_size: 4096
|
||||||
f16: true
|
f16: true
|
||||||
backend: llama-cpp
|
|
||||||
function:
|
function:
|
||||||
capture_llm_results:
|
capture_llm_results:
|
||||||
- (?s)<Thought>(.*?)</Thought>
|
- (?s)<Thought>(.*?)</Thought>
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
context_size: 4096
|
context_size: 4096
|
||||||
backend: llama-cpp
|
|
||||||
f16: true
|
f16: true
|
||||||
mmap: true
|
mmap: true
|
||||||
mmproj: minicpm-v-2_6-mmproj-f16.gguf
|
mmproj: minicpm-v-2_6-mmproj-f16.gguf
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
embeddings: true
|
embeddings: true
|
||||||
name: text-embedding-ada-002
|
name: text-embedding-ada-002
|
||||||
backend: llama-cpp
|
|
||||||
parameters:
|
parameters:
|
||||||
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
|
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
|
||||||
|
|
||||||
|
|||||||
@@ -1,13 +1,7 @@
|
|||||||
name: jina-reranker-v1-base-en
|
name: jina-reranker-v1-base-en
|
||||||
reranking: true
|
backend: rerankers
|
||||||
f16: true
|
|
||||||
parameters:
|
parameters:
|
||||||
model: jina-reranker-v1-tiny-en.f16.gguf
|
model: cross-encoder
|
||||||
backend: llama-cpp
|
|
||||||
download_files:
|
|
||||||
- filename: jina-reranker-v1-tiny-en.f16.gguf
|
|
||||||
sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
|
|
||||||
uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf
|
|
||||||
|
|
||||||
usage: |
|
usage: |
|
||||||
You can test this model with curl like this:
|
You can test this model with curl like this:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ name: tts-1
|
|||||||
download_files:
|
download_files:
|
||||||
- filename: voice-en-us-amy-low.tar.gz
|
- filename: voice-en-us-amy-low.tar.gz
|
||||||
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
|
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
|
||||||
backend: piper
|
|
||||||
parameters:
|
parameters:
|
||||||
model: en-us-amy-low.onnx
|
model: en-us-amy-low.onnx
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
context_size: 4096
|
context_size: 4096
|
||||||
f16: true
|
f16: true
|
||||||
backend: llama-cpp
|
|
||||||
function:
|
function:
|
||||||
capture_llm_results:
|
capture_llm_results:
|
||||||
- (?s)<Thought>(.*?)</Thought>
|
- (?s)<Thought>(.*?)</Thought>
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
context_size: 4096
|
context_size: 4096
|
||||||
backend: llama-cpp
|
|
||||||
f16: true
|
f16: true
|
||||||
mmap: true
|
mmap: true
|
||||||
mmproj: minicpm-v-2_6-mmproj-f16.gguf
|
mmproj: minicpm-v-2_6-mmproj-f16.gguf
|
||||||
|
|||||||
15
assets.go
Normal file
15
assets.go
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
rice "github.com/GeertJohan/go.rice"
|
||||||
|
)
|
||||||
|
|
||||||
|
var backendAssets *rice.Box
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var err error
|
||||||
|
backendAssets, err = rice.FindBox("backend-assets")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,142 +0,0 @@
|
|||||||
ARG BASE_IMAGE=ubuntu:22.04
|
|
||||||
|
|
||||||
FROM ${BASE_IMAGE} AS builder
|
|
||||||
ARG BACKEND=rerankers
|
|
||||||
ARG BUILD_TYPE
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
|
||||||
ARG CUDA_MAJOR_VERSION
|
|
||||||
ARG CUDA_MINOR_VERSION
|
|
||||||
ARG SKIP_DRIVERS=false
|
|
||||||
ENV CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION}
|
|
||||||
ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
|
||||||
ARG TARGETARCH
|
|
||||||
ARG TARGETVARIANT
|
|
||||||
ARG GO_VERSION=1.22.6
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
build-essential \
|
|
||||||
git ccache \
|
|
||||||
ca-certificates \
|
|
||||||
make cmake \
|
|
||||||
curl unzip \
|
|
||||||
libssl-dev && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
|
|
||||||
# Cuda
|
|
||||||
ENV PATH=/usr/local/cuda/bin:${PATH}
|
|
||||||
|
|
||||||
# HipBLAS requirements
|
|
||||||
ENV PATH=/opt/rocm/bin:${PATH}
|
|
||||||
|
|
||||||
# Vulkan requirements
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
software-properties-common pciutils wget gpg-agent && \
|
|
||||||
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
|
||||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y \
|
|
||||||
vulkan-sdk && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
# CuBLAS requirements
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
software-properties-common pciutils
|
|
||||||
if [ "amd64" = "$TARGETARCH" ]; then
|
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
|
||||||
fi
|
|
||||||
if [ "arm64" = "$TARGETARCH" ]; then
|
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
|
|
||||||
fi
|
|
||||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
|
||||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
# If we are building with clblas support, we need the libraries for the builds
|
|
||||||
RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
libclblast-dev && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* \
|
|
||||||
; fi
|
|
||||||
|
|
||||||
RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
hipblas-dev \
|
|
||||||
rocblas-dev && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* && \
|
|
||||||
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
|
|
||||||
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
|
|
||||||
ldconfig \
|
|
||||||
; fi
|
|
||||||
|
|
||||||
# Intel oneAPI requirements
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [[ "${BUILD_TYPE}" == sycl* ]] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
intel-oneapi-runtime-libs && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
# Install Go
|
|
||||||
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
|
||||||
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin:/usr/local/bin
|
|
||||||
|
|
||||||
# Install grpc compilers
|
|
||||||
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
|
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
|
||||||
RUN echo "TARGETARCH: $TARGETARCH"
|
|
||||||
|
|
||||||
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
|
|
||||||
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
|
|
||||||
# here so that we can generate the grpc code for the stablediffusion build
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "amd64" = "$TARGETARCH" ]; then
|
|
||||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip -o protoc.zip && \
|
|
||||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
|
||||||
rm protoc.zip
|
|
||||||
fi
|
|
||||||
if [ "arm64" = "$TARGETARCH" ]; then
|
|
||||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-aarch_64.zip -o protoc.zip && \
|
|
||||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
|
||||||
rm protoc.zip
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
COPY . /LocalAI
|
|
||||||
|
|
||||||
RUN cd /LocalAI && make protogen-go && make -C /LocalAI/backend/go/${BACKEND} build
|
|
||||||
|
|
||||||
FROM scratch
|
|
||||||
ARG BACKEND=rerankers
|
|
||||||
|
|
||||||
COPY --from=builder /LocalAI/backend/go/${BACKEND}/package/. ./
|
|
||||||
@@ -1,204 +0,0 @@
|
|||||||
ARG BASE_IMAGE=ubuntu:22.04
|
|
||||||
ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
|
|
||||||
|
|
||||||
|
|
||||||
# The grpc target does one thing, it builds and installs GRPC. This is in it's own layer so that it can be effectively cached by CI.
|
|
||||||
# You probably don't need to change anything here, and if you do, make sure that CI is adjusted so that the cache continues to work.
|
|
||||||
FROM ${GRPC_BASE_IMAGE} AS grpc
|
|
||||||
|
|
||||||
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
|
|
||||||
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
|
|
||||||
ARG GRPC_VERSION=v1.65.0
|
|
||||||
ARG CMAKE_FROM_SOURCE=false
|
|
||||||
ARG CMAKE_VERSION=3.26.4
|
|
||||||
ARG PROTOBUF_VERSION=v21.12
|
|
||||||
|
|
||||||
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
|
|
||||||
|
|
||||||
WORKDIR /build
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
ca-certificates \
|
|
||||||
build-essential curl libssl-dev \
|
|
||||||
git && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Install CMake (the version in 22.04 is too old)
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
|
|
||||||
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
|
|
||||||
else
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y \
|
|
||||||
cmake && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
# We install GRPC to a different prefix here so that we can copy in only the build artifacts later
|
|
||||||
# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
|
|
||||||
# and running make install in the target container
|
|
||||||
RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
|
||||||
mkdir -p /build/grpc/cmake/build && \
|
|
||||||
cd /build/grpc/cmake/build && \
|
|
||||||
sed -i "216i\ TESTONLY" "../../third_party/abseil-cpp/absl/container/CMakeLists.txt" && \
|
|
||||||
cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
|
|
||||||
make && \
|
|
||||||
make install && \
|
|
||||||
rm -rf /build
|
|
||||||
|
|
||||||
RUN git clone --recurse-submodules --branch ${PROTOBUF_VERSION} https://github.com/protocolbuffers/protobuf.git && \
|
|
||||||
mkdir -p /build/protobuf/build && \
|
|
||||||
cd /build/protobuf/build && \
|
|
||||||
cmake -Dprotobuf_BUILD_SHARED_LIBS=ON -Dprotobuf_BUILD_TESTS=OFF .. && \
|
|
||||||
make && \
|
|
||||||
make install && \
|
|
||||||
rm -rf /build
|
|
||||||
|
|
||||||
FROM ${BASE_IMAGE} AS builder
|
|
||||||
ARG BACKEND=rerankers
|
|
||||||
ARG BUILD_TYPE
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
|
||||||
ARG CUDA_MAJOR_VERSION
|
|
||||||
ARG CUDA_MINOR_VERSION
|
|
||||||
ARG SKIP_DRIVERS=false
|
|
||||||
ENV CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION}
|
|
||||||
ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
|
||||||
ARG TARGETARCH
|
|
||||||
ARG TARGETVARIANT
|
|
||||||
ARG GO_VERSION=1.22.6
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
build-essential \
|
|
||||||
ccache git \
|
|
||||||
ca-certificates \
|
|
||||||
make \
|
|
||||||
curl unzip \
|
|
||||||
libssl-dev && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Cuda
|
|
||||||
ENV PATH=/usr/local/cuda/bin:${PATH}
|
|
||||||
|
|
||||||
# HipBLAS requirements
|
|
||||||
ENV PATH=/opt/rocm/bin:${PATH}
|
|
||||||
|
|
||||||
# Vulkan requirements
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
software-properties-common pciutils wget gpg-agent && \
|
|
||||||
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
|
||||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y \
|
|
||||||
vulkan-sdk && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
# CuBLAS requirements
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
software-properties-common pciutils
|
|
||||||
if [ "amd64" = "$TARGETARCH" ]; then
|
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
|
||||||
fi
|
|
||||||
if [ "arm64" = "$TARGETARCH" ]; then
|
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
|
|
||||||
fi
|
|
||||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
|
||||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
# If we are building with clblas support, we need the libraries for the builds
|
|
||||||
RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
libclblast-dev && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* \
|
|
||||||
; fi
|
|
||||||
|
|
||||||
RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
hipblas-dev \
|
|
||||||
rocblas-dev && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* && \
|
|
||||||
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
|
|
||||||
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
|
|
||||||
ldconfig \
|
|
||||||
; fi
|
|
||||||
|
|
||||||
RUN echo "TARGETARCH: $TARGETARCH"
|
|
||||||
|
|
||||||
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
|
|
||||||
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
|
|
||||||
# here so that we can generate the grpc code for the stablediffusion build
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "amd64" = "$TARGETARCH" ]; then
|
|
||||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip -o protoc.zip && \
|
|
||||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
|
||||||
rm protoc.zip
|
|
||||||
fi
|
|
||||||
if [ "arm64" = "$TARGETARCH" ]; then
|
|
||||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-aarch_64.zip -o protoc.zip && \
|
|
||||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
|
||||||
rm protoc.zip
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
# Install CMake (the version in 22.04 is too old)
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
|
|
||||||
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
|
|
||||||
else
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y \
|
|
||||||
cmake && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
COPY --from=grpc /opt/grpc /usr/local
|
|
||||||
|
|
||||||
|
|
||||||
COPY . /LocalAI
|
|
||||||
|
|
||||||
RUN make -C /LocalAI/backend/cpp/llama-cpp llama-cpp
|
|
||||||
RUN make -C /LocalAI/backend/cpp/llama-cpp llama-cpp-grpc
|
|
||||||
RUN make -C /LocalAI/backend/cpp/llama-cpp llama-cpp-rpc-server
|
|
||||||
|
|
||||||
# Copy libraries using a script to handle architecture differences
|
|
||||||
RUN make -C /LocalAI/backend/cpp/llama-cpp package
|
|
||||||
|
|
||||||
|
|
||||||
FROM scratch
|
|
||||||
|
|
||||||
|
|
||||||
# Copy all available binaries (the build process only creates the appropriate ones for the target architecture)
|
|
||||||
COPY --from=builder /LocalAI/backend/cpp/llama-cpp/package/. ./
|
|
||||||
@@ -1,123 +0,0 @@
|
|||||||
ARG BASE_IMAGE=ubuntu:22.04
|
|
||||||
|
|
||||||
FROM ${BASE_IMAGE} AS builder
|
|
||||||
ARG BACKEND=rerankers
|
|
||||||
ARG BUILD_TYPE
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
|
||||||
ARG CUDA_MAJOR_VERSION
|
|
||||||
ARG CUDA_MINOR_VERSION
|
|
||||||
ARG SKIP_DRIVERS=false
|
|
||||||
ENV CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION}
|
|
||||||
ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
|
||||||
ARG TARGETARCH
|
|
||||||
ARG TARGETVARIANT
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
build-essential \
|
|
||||||
ccache \
|
|
||||||
ca-certificates \
|
|
||||||
espeak-ng \
|
|
||||||
curl \
|
|
||||||
libssl-dev \
|
|
||||||
git \
|
|
||||||
git-lfs \
|
|
||||||
unzip \
|
|
||||||
upx-ucl \
|
|
||||||
curl python3-pip \
|
|
||||||
python-is-python3 \
|
|
||||||
python3-dev llvm \
|
|
||||||
python3-venv make && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* && \
|
|
||||||
pip install --upgrade pip
|
|
||||||
|
|
||||||
|
|
||||||
# Cuda
|
|
||||||
ENV PATH=/usr/local/cuda/bin:${PATH}
|
|
||||||
|
|
||||||
# HipBLAS requirements
|
|
||||||
ENV PATH=/opt/rocm/bin:${PATH}
|
|
||||||
|
|
||||||
# Vulkan requirements
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
software-properties-common pciutils wget gpg-agent && \
|
|
||||||
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
|
||||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y \
|
|
||||||
vulkan-sdk && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
# CuBLAS requirements
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
software-properties-common pciutils
|
|
||||||
if [ "amd64" = "$TARGETARCH" ]; then
|
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
|
||||||
fi
|
|
||||||
if [ "arm64" = "$TARGETARCH" ]; then
|
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
|
|
||||||
fi
|
|
||||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
|
||||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
# If we are building with clblas support, we need the libraries for the builds
|
|
||||||
RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
libclblast-dev && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* \
|
|
||||||
; fi
|
|
||||||
|
|
||||||
RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
hipblas-dev \
|
|
||||||
rocblas-dev && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* && \
|
|
||||||
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
|
|
||||||
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
|
|
||||||
ldconfig \
|
|
||||||
; fi
|
|
||||||
# Install uv as a system package
|
|
||||||
RUN curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR=/usr/bin sh
|
|
||||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
|
||||||
|
|
||||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
|
||||||
|
|
||||||
# Install grpcio-tools (the version in 22.04 is too old)
|
|
||||||
RUN pip install --user grpcio-tools==1.71.0 grpcio==1.71.0
|
|
||||||
|
|
||||||
COPY python/${BACKEND} /${BACKEND}
|
|
||||||
COPY backend.proto /${BACKEND}/backend.proto
|
|
||||||
COPY python/common/ /${BACKEND}/common
|
|
||||||
|
|
||||||
RUN cd /${BACKEND} && make
|
|
||||||
|
|
||||||
FROM scratch
|
|
||||||
ARG BACKEND=rerankers
|
|
||||||
COPY --from=builder /${BACKEND}/ /
|
|
||||||
@@ -162,7 +162,6 @@ message Reply {
|
|||||||
int32 prompt_tokens = 3;
|
int32 prompt_tokens = 3;
|
||||||
double timing_prompt_processing = 4;
|
double timing_prompt_processing = 4;
|
||||||
double timing_token_generation = 5;
|
double timing_token_generation = 5;
|
||||||
bytes audio = 6;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message GrammarTrigger {
|
message GrammarTrigger {
|
||||||
@@ -185,6 +184,7 @@ message ModelOptions {
|
|||||||
string MainGPU = 13;
|
string MainGPU = 13;
|
||||||
string TensorSplit = 14;
|
string TensorSplit = 14;
|
||||||
int32 Threads = 15;
|
int32 Threads = 15;
|
||||||
|
string LibrarySearchPath = 16;
|
||||||
float RopeFreqBase = 17;
|
float RopeFreqBase = 17;
|
||||||
float RopeFreqScale = 18;
|
float RopeFreqScale = 18;
|
||||||
float RMSNormEps = 19;
|
float RMSNormEps = 19;
|
||||||
@@ -255,10 +255,6 @@ message ModelOptions {
|
|||||||
string CacheTypeValue = 64;
|
string CacheTypeValue = 64;
|
||||||
|
|
||||||
repeated GrammarTrigger GrammarTriggers = 65;
|
repeated GrammarTrigger GrammarTriggers = 65;
|
||||||
|
|
||||||
bool Reranking = 71;
|
|
||||||
|
|
||||||
repeated string Overrides = 72;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message Result {
|
message Result {
|
||||||
|
|||||||
@@ -1,146 +0,0 @@
|
|||||||
|
|
||||||
LLAMA_VERSION?=acd6cb1c41676f6bbb25c2a76fa5abeb1719301e
|
|
||||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
|
||||||
|
|
||||||
CMAKE_ARGS?=
|
|
||||||
BUILD_TYPE?=
|
|
||||||
NATIVE?=false
|
|
||||||
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
|
||||||
TARGET?=--target grpc-server
|
|
||||||
|
|
||||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=ON -DLLAMA_CURL=OFF -DGGML_CPU_ALL_VARIANTS=ON -DGGML_BACKEND_DL=ON
|
|
||||||
|
|
||||||
CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
|
||||||
ifeq ($(NATIVE),false)
|
|
||||||
CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
|
||||||
endif
|
|
||||||
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
|
||||||
ifeq ($(BUILD_TYPE),cublas)
|
|
||||||
CMAKE_ARGS+=-DGGML_CUDA=ON
|
|
||||||
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
|
||||||
# to CMAKE_ARGS automatically
|
|
||||||
else ifeq ($(BUILD_TYPE),openblas)
|
|
||||||
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
|
||||||
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
|
||||||
else ifeq ($(BUILD_TYPE),clblas)
|
|
||||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
|
||||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
|
||||||
else ifeq ($(BUILD_TYPE),hipblas)
|
|
||||||
ROCM_HOME ?= /opt/rocm
|
|
||||||
ROCM_PATH ?= /opt/rocm
|
|
||||||
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
|
||||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
|
||||||
# GPU_TARGETS ?= gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102
|
|
||||||
# AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
|
||||||
CMAKE_ARGS+=-DGGML_HIP=ON
|
|
||||||
# CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
|
||||||
else ifeq ($(BUILD_TYPE),vulkan)
|
|
||||||
CMAKE_ARGS+=-DGGML_VULKAN=1
|
|
||||||
else ifeq ($(OS),Darwin)
|
|
||||||
ifeq ($(BUILD_TYPE),)
|
|
||||||
BUILD_TYPE=metal
|
|
||||||
endif
|
|
||||||
ifneq ($(BUILD_TYPE),metal)
|
|
||||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
|
||||||
else
|
|
||||||
CMAKE_ARGS+=-DGGML_METAL=ON
|
|
||||||
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
|
||||||
CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
|
|
||||||
CMAKE_ARGS+=-DGGML_OPENMP=OFF
|
|
||||||
endif
|
|
||||||
TARGET+=--target ggml-metal
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
|
||||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
|
||||||
-DCMAKE_C_COMPILER=icx \
|
|
||||||
-DCMAKE_CXX_COMPILER=icpx \
|
|
||||||
-DCMAKE_CXX_FLAGS="-fsycl" \
|
|
||||||
-DGGML_SYCL_F16=ON
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),sycl_f32)
|
|
||||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
|
||||||
-DCMAKE_C_COMPILER=icx \
|
|
||||||
-DCMAKE_CXX_COMPILER=icpx \
|
|
||||||
-DCMAKE_CXX_FLAGS="-fsycl"
|
|
||||||
endif
|
|
||||||
|
|
||||||
INSTALLED_PACKAGES=$(CURDIR)/../grpc/installed_packages
|
|
||||||
INSTALLED_LIB_CMAKE=$(INSTALLED_PACKAGES)/lib/cmake
|
|
||||||
ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
|
|
||||||
-DProtobuf_DIR=${INSTALLED_LIB_CMAKE}/protobuf \
|
|
||||||
-Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
|
|
||||||
-DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
|
|
||||||
-DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
|
|
||||||
build-llama-cpp-grpc-server:
|
|
||||||
# Conditionally build grpc for the llama backend to use if needed
|
|
||||||
ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
|
|
||||||
$(MAKE) -C ../../grpc build
|
|
||||||
_PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto \
|
|
||||||
_GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin \
|
|
||||||
PATH="${INSTALLED_PACKAGES}/bin:${PATH}" \
|
|
||||||
CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" \
|
|
||||||
LLAMA_VERSION=$(LLAMA_VERSION) \
|
|
||||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(VARIANT) grpc-server
|
|
||||||
else
|
|
||||||
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
|
|
||||||
LLAMA_VERSION=$(LLAMA_VERSION) $(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(VARIANT) grpc-server
|
|
||||||
endif
|
|
||||||
|
|
||||||
llama-cpp: llama.cpp
|
|
||||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-build
|
|
||||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-build purge
|
|
||||||
$(info ${GREEN}I llama-cpp build info:${RESET})
|
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS)" $(MAKE) VARIANT="llama-cpp-build" build-llama-cpp-grpc-server
|
|
||||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-build/grpc-server llama-cpp
|
|
||||||
|
|
||||||
llama-cpp-grpc: llama.cpp
|
|
||||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build
|
|
||||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build purge
|
|
||||||
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
|
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
|
|
||||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/grpc-server llama-cpp-grpc
|
|
||||||
|
|
||||||
llama-cpp-rpc-server: llama-cpp-grpc
|
|
||||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/llama.cpp/build/bin/rpc-server llama-cpp-rpc-server
|
|
||||||
|
|
||||||
llama.cpp:
|
|
||||||
mkdir -p llama.cpp
|
|
||||||
cd llama.cpp && \
|
|
||||||
git init && \
|
|
||||||
git remote add origin $(LLAMA_REPO) && \
|
|
||||||
git fetch origin && \
|
|
||||||
git checkout -b build $(LLAMA_VERSION) && \
|
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
|
||||||
|
|
||||||
llama.cpp/tools/grpc-server: llama.cpp
|
|
||||||
mkdir -p llama.cpp/tools/grpc-server
|
|
||||||
bash prepare.sh
|
|
||||||
|
|
||||||
rebuild:
|
|
||||||
bash prepare.sh
|
|
||||||
rm -rf grpc-server
|
|
||||||
$(MAKE) grpc-server
|
|
||||||
|
|
||||||
package:
|
|
||||||
bash package.sh
|
|
||||||
|
|
||||||
purge:
|
|
||||||
rm -rf llama.cpp/build
|
|
||||||
rm -rf llama.cpp/tools/grpc-server
|
|
||||||
rm -rf grpc-server
|
|
||||||
|
|
||||||
clean: purge
|
|
||||||
rm -rf llama.cpp
|
|
||||||
|
|
||||||
grpc-server: llama.cpp llama.cpp/tools/grpc-server
|
|
||||||
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
|
||||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
|
||||||
+bash -c "source $(ONEAPI_VARS); \
|
|
||||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"
|
|
||||||
else
|
|
||||||
+cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
|
|
||||||
endif
|
|
||||||
cp llama.cpp/build/bin/grpc-server .
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,42 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# Script to copy the appropriate libraries based on architecture
|
|
||||||
# This script is used in the final stage of the Dockerfile
|
|
||||||
|
|
||||||
set -e
|
|
||||||
|
|
||||||
CURDIR=$(dirname "$(realpath $0)")
|
|
||||||
|
|
||||||
# Create lib directory
|
|
||||||
mkdir -p $CURDIR/package/lib
|
|
||||||
|
|
||||||
cp -avrf $CURDIR/llama-cpp-* $CURDIR/package/
|
|
||||||
cp -rfv $CURDIR/run.sh $CURDIR/package/
|
|
||||||
|
|
||||||
# Detect architecture and copy appropriate libraries
|
|
||||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
|
||||||
# x86_64 architecture
|
|
||||||
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
|
||||||
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
|
||||||
elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
|
||||||
# ARM64 architecture
|
|
||||||
echo "Detected ARM64 architecture, copying ARM64 libraries..."
|
|
||||||
cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
|
||||||
else
|
|
||||||
echo "Error: Could not detect architecture"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Packaging completed successfully"
|
|
||||||
ls -liah $CURDIR/package/
|
|
||||||
ls -liah $CURDIR/package/lib/
|
|
||||||
@@ -1,52 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
## Patches
|
|
||||||
## Apply patches from the `patches` directory
|
|
||||||
for patch in $(ls patches); do
|
|
||||||
echo "Applying patch $patch"
|
|
||||||
patch -d llama.cpp/ -p1 < patches/$patch
|
|
||||||
done
|
|
||||||
|
|
||||||
set -e
|
|
||||||
|
|
||||||
cp -r CMakeLists.txt llama.cpp/tools/grpc-server/
|
|
||||||
cp -r grpc-server.cpp llama.cpp/tools/grpc-server/
|
|
||||||
cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/tools/grpc-server/
|
|
||||||
cp -rfv llama.cpp/tools/server/utils.hpp llama.cpp/tools/grpc-server/
|
|
||||||
cp -rfv llama.cpp/vendor/cpp-httplib/httplib.h llama.cpp/tools/grpc-server/
|
|
||||||
|
|
||||||
set +e
|
|
||||||
if grep -q "grpc-server" llama.cpp/tools/CMakeLists.txt; then
|
|
||||||
echo "grpc-server already added"
|
|
||||||
else
|
|
||||||
echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt
|
|
||||||
fi
|
|
||||||
set -e
|
|
||||||
|
|
||||||
# Now to keep maximum compatibility with the original server.cpp, we need to remove the index.html.gz.hpp and loading.html.hpp includes
|
|
||||||
# and remove the main function
|
|
||||||
# TODO: upstream this to the original server.cpp by extracting the upstream main function to a separate file
|
|
||||||
awk '
|
|
||||||
/int[ \t]+main[ \t]*\(/ { # If the line starts the main function
|
|
||||||
in_main=1; # Set a flag
|
|
||||||
open_braces=0; # Track number of open braces
|
|
||||||
}
|
|
||||||
in_main {
|
|
||||||
open_braces += gsub(/\{/, "{"); # Count opening braces
|
|
||||||
open_braces -= gsub(/\}/, "}"); # Count closing braces
|
|
||||||
if (open_braces == 0) { # If all braces are closed
|
|
||||||
in_main=0; # End skipping
|
|
||||||
}
|
|
||||||
next; # Skip lines inside main
|
|
||||||
}
|
|
||||||
!in_main # Print lines not inside main
|
|
||||||
' "llama.cpp/tools/server/server.cpp" > llama.cpp/tools/grpc-server/server.cpp
|
|
||||||
|
|
||||||
# remove index.html.gz.hpp and loading.html.hpp includes
|
|
||||||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
|
||||||
# macOS
|
|
||||||
sed -i '' '/#include "index\.html\.gz\.hpp"/d; /#include "loading\.html\.hpp"/d' llama.cpp/tools/grpc-server/server.cpp
|
|
||||||
else
|
|
||||||
# Linux and others
|
|
||||||
sed -i '/#include "index\.html\.gz\.hpp"/d; /#include "loading\.html\.hpp"/d' llama.cpp/tools/grpc-server/server.cpp
|
|
||||||
fi
|
|
||||||
@@ -1,33 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -ex
|
|
||||||
|
|
||||||
# Get the absolute current dir where the script is located
|
|
||||||
CURDIR=$(dirname "$(realpath $0)")
|
|
||||||
|
|
||||||
cd /
|
|
||||||
|
|
||||||
BINARY=llama-cpp
|
|
||||||
|
|
||||||
## P2P/GRPC mode
|
|
||||||
if [ -n "$LLAMACPP_GRPC_SERVERS" ]; then
|
|
||||||
if [ -e $CURDIR/llama-cpp-grpc ]; then
|
|
||||||
BINARY=llama-cpp-grpc
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Extend ld library path with the dir where this script is located/lib
|
|
||||||
if [ "$(uname)" == "Darwin" ]; then
|
|
||||||
DYLD_FALLBACK_LIBRARY_PATH=$CURDIR/lib:$DYLD_FALLBACK_LIBRARY_PATH
|
|
||||||
else
|
|
||||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
|
||||||
fi
|
|
||||||
|
|
||||||
# If there is a lib/ld.so, use it
|
|
||||||
if [ -f $CURDIR/lib/ld.so ]; then
|
|
||||||
echo "Using lib/ld.so"
|
|
||||||
echo "Using binary: $BINARY"
|
|
||||||
exec $CURDIR/lib/ld.so $CURDIR/$BINARY "$@"
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Using binary: $BINARY"
|
|
||||||
exec $CURDIR/$BINARY "$@"
|
|
||||||
@@ -1,3 +1,20 @@
|
|||||||
|
|
||||||
|
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||||
|
## This is an hack for now, but it should be fixed in the future.
|
||||||
|
# set(TARGET myclip)
|
||||||
|
# add_library(${TARGET} clip.cpp clip.h clip-impl.h llava.cpp llava.h)
|
||||||
|
# install(TARGETS ${TARGET} LIBRARY)
|
||||||
|
# target_include_directories(myclip PUBLIC .)
|
||||||
|
# target_include_directories(myclip PUBLIC ../..)
|
||||||
|
# target_include_directories(myclip PUBLIC ../../common)
|
||||||
|
# target_link_libraries(${TARGET} PRIVATE common ggml llama ${CMAKE_THREAD_LIBS_INIT})
|
||||||
|
# target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||||
|
# if (NOT MSVC)
|
||||||
|
# target_compile_options(${TARGET} PRIVATE -Wno-cast-qual) # stb_image.h
|
||||||
|
# endif()
|
||||||
|
# END CLIP hack
|
||||||
|
|
||||||
|
|
||||||
set(TARGET grpc-server)
|
set(TARGET grpc-server)
|
||||||
set(CMAKE_CXX_STANDARD 17)
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
cmake_minimum_required(VERSION 3.15)
|
cmake_minimum_required(VERSION 3.15)
|
||||||
@@ -17,8 +34,6 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
|||||||
include_directories("${HOMEBREW_DEFAULT_PREFIX}/include")
|
include_directories("${HOMEBREW_DEFAULT_PREFIX}/include")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(Protobuf_USE_STATIC_LIBS OFF)
|
|
||||||
set(gRPC_USE_STATIC_LIBS OFF)
|
|
||||||
find_package(absl CONFIG REQUIRED)
|
find_package(absl CONFIG REQUIRED)
|
||||||
find_package(Protobuf CONFIG REQUIRED)
|
find_package(Protobuf CONFIG REQUIRED)
|
||||||
find_package(gRPC CONFIG REQUIRED)
|
find_package(gRPC CONFIG REQUIRED)
|
||||||
@@ -59,7 +74,7 @@ add_library(hw_grpc_proto
|
|||||||
${hw_proto_srcs}
|
${hw_proto_srcs}
|
||||||
${hw_proto_hdrs} )
|
${hw_proto_hdrs} )
|
||||||
|
|
||||||
add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp httplib.h)
|
add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp)
|
||||||
|
|
||||||
target_include_directories(${TARGET} PRIVATE ../llava)
|
target_include_directories(${TARGET} PRIVATE ../llava)
|
||||||
target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})
|
target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})
|
||||||
87
backend/cpp/llama/Makefile
Normal file
87
backend/cpp/llama/Makefile
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
|
||||||
|
LLAMA_VERSION?=
|
||||||
|
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||||
|
|
||||||
|
CMAKE_ARGS?=
|
||||||
|
BUILD_TYPE?=
|
||||||
|
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
||||||
|
TARGET?=--target grpc-server
|
||||||
|
|
||||||
|
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
||||||
|
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
|
||||||
|
|
||||||
|
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
||||||
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
|
CMAKE_ARGS+=-DGGML_CUDA=ON
|
||||||
|
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||||
|
# to CMAKE_ARGS automatically
|
||||||
|
else ifeq ($(BUILD_TYPE),openblas)
|
||||||
|
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||||
|
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||||
|
else ifeq ($(BUILD_TYPE),clblas)
|
||||||
|
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||||
|
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||||
|
else ifeq ($(BUILD_TYPE),hipblas)
|
||||||
|
CMAKE_ARGS+=-DGGML_HIP=ON
|
||||||
|
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
||||||
|
# But if it's OSX without metal, disable it here
|
||||||
|
else ifeq ($(OS),Darwin)
|
||||||
|
ifneq ($(BUILD_TYPE),metal)
|
||||||
|
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||||
|
else
|
||||||
|
CMAKE_ARGS+=-DGGML_METAL=ON
|
||||||
|
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
||||||
|
TARGET+=--target ggml-metal
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||||
|
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||||
|
-DCMAKE_C_COMPILER=icx \
|
||||||
|
-DCMAKE_CXX_COMPILER=icpx \
|
||||||
|
-DCMAKE_CXX_FLAGS="-fsycl" \
|
||||||
|
-DGGML_SYCL_F16=ON
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_TYPE),sycl_f32)
|
||||||
|
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||||
|
-DCMAKE_C_COMPILER=icx \
|
||||||
|
-DCMAKE_CXX_COMPILER=icpx \
|
||||||
|
-DCMAKE_CXX_FLAGS="-fsycl"
|
||||||
|
endif
|
||||||
|
|
||||||
|
llama.cpp:
|
||||||
|
mkdir -p llama.cpp
|
||||||
|
cd llama.cpp && \
|
||||||
|
git init && \
|
||||||
|
git remote add origin $(LLAMA_REPO) && \
|
||||||
|
git fetch origin && \
|
||||||
|
git checkout -b build $(LLAMA_VERSION) && \
|
||||||
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
|
llama.cpp/tools/grpc-server: llama.cpp
|
||||||
|
mkdir -p llama.cpp/tools/grpc-server
|
||||||
|
bash prepare.sh
|
||||||
|
|
||||||
|
rebuild:
|
||||||
|
bash prepare.sh
|
||||||
|
rm -rf grpc-server
|
||||||
|
$(MAKE) grpc-server
|
||||||
|
|
||||||
|
purge:
|
||||||
|
rm -rf llama.cpp/build
|
||||||
|
rm -rf llama.cpp/tools/grpc-server
|
||||||
|
rm -rf grpc-server
|
||||||
|
|
||||||
|
clean: purge
|
||||||
|
rm -rf llama.cpp
|
||||||
|
|
||||||
|
grpc-server: llama.cpp llama.cpp/tools/grpc-server
|
||||||
|
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
||||||
|
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||||
|
+bash -c "source $(ONEAPI_VARS); \
|
||||||
|
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"
|
||||||
|
else
|
||||||
|
+cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
|
||||||
|
endif
|
||||||
|
cp llama.cpp/build/bin/grpc-server .
|
||||||
2477
backend/cpp/llama/grpc-server.cpp
Normal file
2477
backend/cpp/llama/grpc-server.cpp
Normal file
File diff suppressed because it is too large
Load Diff
24596
backend/cpp/llama/json.hpp
vendored
Normal file
24596
backend/cpp/llama/json.hpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
28
backend/cpp/llama/prepare.sh
Normal file
28
backend/cpp/llama/prepare.sh
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
## Patches
|
||||||
|
## Apply patches from the `patches` directory
|
||||||
|
for patch in $(ls patches); do
|
||||||
|
echo "Applying patch $patch"
|
||||||
|
patch -d llama.cpp/ -p1 < patches/$patch
|
||||||
|
done
|
||||||
|
|
||||||
|
cp -r CMakeLists.txt llama.cpp/tools/grpc-server/
|
||||||
|
cp -r grpc-server.cpp llama.cpp/tools/grpc-server/
|
||||||
|
cp -rfv json.hpp llama.cpp/tools/grpc-server/
|
||||||
|
cp -rfv utils.hpp llama.cpp/tools/grpc-server/
|
||||||
|
|
||||||
|
if grep -q "grpc-server" llama.cpp/tools/CMakeLists.txt; then
|
||||||
|
echo "grpc-server already added"
|
||||||
|
else
|
||||||
|
echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt
|
||||||
|
fi
|
||||||
|
|
||||||
|
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||||
|
## This is an hack for now, but it should be fixed in the future.
|
||||||
|
# cp -rfv llama.cpp/tools/mtmd/clip.h llama.cpp/tools/grpc-server/clip.h
|
||||||
|
# cp -rfv llama.cpp/tools/mtmd/clip-impl.h llama.cpp/tools/grpc-server/clip-impl.h
|
||||||
|
# cp -rfv llama.cpp/tools/mtmd/llava.cpp llama.cpp/tools/grpc-server/llava.cpp
|
||||||
|
# echo '#include "llama.h"' > llama.cpp/tools/grpc-server/llava.h
|
||||||
|
# cat llama.cpp/tools/mtmd/llava.h >> llama.cpp/tools/grpc-server/llava.h
|
||||||
|
# cp -rfv llama.cpp/tools/mtmd/clip.cpp llama.cpp/tools/grpc-server/clip.cpp
|
||||||
910
backend/cpp/llama/utils.hpp
vendored
Normal file
910
backend/cpp/llama/utils.hpp
vendored
Normal file
@@ -0,0 +1,910 @@
|
|||||||
|
// https://github.com/ggerganov/llama.cpp/blob/master/tools/server/utils.hpp
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <set>
|
||||||
|
#include <mutex>
|
||||||
|
#include <condition_variable>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
#include "json.hpp"
|
||||||
|
|
||||||
|
#include "../mtmd/clip.h"
|
||||||
|
|
||||||
|
using json = nlohmann::json;
|
||||||
|
|
||||||
|
extern bool server_verbose;
|
||||||
|
|
||||||
|
#ifndef SERVER_VERBOSE
|
||||||
|
#define SERVER_VERBOSE 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if SERVER_VERBOSE != 1
|
||||||
|
#define LOG_VERBOSE(MSG, ...)
|
||||||
|
#else
|
||||||
|
#define LOG_VERBOSE(MSG, ...) \
|
||||||
|
do \
|
||||||
|
{ \
|
||||||
|
if (server_verbose) \
|
||||||
|
{ \
|
||||||
|
server_log("VERBOSE", __func__, __LINE__, MSG, __VA_ARGS__); \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define LOG_ERROR( MSG, ...) server_log("ERROR", __func__, __LINE__, MSG, __VA_ARGS__)
|
||||||
|
#define LOG_WARNING(MSG, ...) server_log("WARNING", __func__, __LINE__, MSG, __VA_ARGS__)
|
||||||
|
#define LOG_INFO( MSG, ...) server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__)
|
||||||
|
|
||||||
|
//
|
||||||
|
// parallel
|
||||||
|
//
|
||||||
|
|
||||||
|
enum server_state {
|
||||||
|
SERVER_STATE_LOADING_MODEL, // Server is starting up, model not fully loaded yet
|
||||||
|
SERVER_STATE_READY, // Server is ready and model is loaded
|
||||||
|
SERVER_STATE_ERROR // An error occurred, load_model failed
|
||||||
|
};
|
||||||
|
|
||||||
|
enum task_type {
|
||||||
|
TASK_TYPE_COMPLETION,
|
||||||
|
TASK_TYPE_CANCEL,
|
||||||
|
TASK_TYPE_NEXT_RESPONSE
|
||||||
|
};
|
||||||
|
|
||||||
|
struct task_server {
|
||||||
|
int id = -1; // to be filled by llama_server_queue
|
||||||
|
int target_id;
|
||||||
|
task_type type;
|
||||||
|
json data;
|
||||||
|
bool infill_mode = false;
|
||||||
|
bool embedding_mode = false;
|
||||||
|
int multitask_id = -1;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct task_result {
|
||||||
|
int id;
|
||||||
|
int multitask_id = -1;
|
||||||
|
bool stop;
|
||||||
|
bool error;
|
||||||
|
json result_json;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct task_multi {
|
||||||
|
int id;
|
||||||
|
std::set<int> subtasks_remaining{};
|
||||||
|
std::vector<task_result> results{};
|
||||||
|
};
|
||||||
|
|
||||||
|
// TODO: can become bool if we can't find use of more states
|
||||||
|
enum slot_state
|
||||||
|
{
|
||||||
|
IDLE,
|
||||||
|
PROCESSING,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum slot_command
|
||||||
|
{
|
||||||
|
NONE,
|
||||||
|
LOAD_PROMPT,
|
||||||
|
RELEASE,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct slot_params
|
||||||
|
{
|
||||||
|
bool stream = true;
|
||||||
|
bool cache_prompt = false; // remember the prompt to avoid reprocessing all prompt
|
||||||
|
|
||||||
|
uint32_t seed = -1; // RNG seed
|
||||||
|
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||||
|
int32_t n_predict = -1; // new tokens to predict
|
||||||
|
|
||||||
|
std::vector<std::string> antiprompt;
|
||||||
|
|
||||||
|
json input_prefix;
|
||||||
|
json input_suffix;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct slot_image
|
||||||
|
{
|
||||||
|
int32_t id;
|
||||||
|
|
||||||
|
bool request_encode_image = false;
|
||||||
|
float * image_embedding = nullptr;
|
||||||
|
int32_t image_tokens = 0;
|
||||||
|
|
||||||
|
clip_image_u8 * img_data;
|
||||||
|
|
||||||
|
std::string prefix_prompt; // before of this image
|
||||||
|
};
|
||||||
|
|
||||||
|
// completion token output with probabilities
|
||||||
|
struct completion_token_output
|
||||||
|
{
|
||||||
|
struct token_prob
|
||||||
|
{
|
||||||
|
llama_token tok;
|
||||||
|
float prob;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<token_prob> probs;
|
||||||
|
llama_token tok;
|
||||||
|
std::string text_to_send;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline void server_log(const char *level, const char *function, int line,
|
||||||
|
const char *message, const nlohmann::ordered_json &extra)
|
||||||
|
{
|
||||||
|
nlohmann::ordered_json log
|
||||||
|
{
|
||||||
|
{"timestamp", time(nullptr)},
|
||||||
|
{"level", level},
|
||||||
|
{"function", function},
|
||||||
|
{"line", line},
|
||||||
|
{"message", message},
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!extra.empty())
|
||||||
|
{
|
||||||
|
log.merge_patch(extra);
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::string str = log.dump(-1, ' ', false, json::error_handler_t::replace);
|
||||||
|
printf("%.*s\n", (int)str.size(), str.data());
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// server utils
|
||||||
|
//
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static T json_value(const json &body, const std::string &key, const T &default_value)
|
||||||
|
{
|
||||||
|
// Fallback null to default value
|
||||||
|
return body.contains(key) && !body.at(key).is_null()
|
||||||
|
? body.value(key, default_value)
|
||||||
|
: default_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::string format_chatml(std::vector<json> messages)
|
||||||
|
{
|
||||||
|
std::ostringstream chatml_msgs;
|
||||||
|
|
||||||
|
for (auto it = messages.begin(); it != messages.end(); ++it) {
|
||||||
|
chatml_msgs << "<|im_start|>"
|
||||||
|
<< json_value(*it, "role", std::string("user")) << '\n';
|
||||||
|
chatml_msgs << json_value(*it, "content", std::string(""))
|
||||||
|
<< "<|im_end|>\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
chatml_msgs << "<|im_start|>assistant" << '\n';
|
||||||
|
|
||||||
|
return chatml_msgs.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// work queue utils
|
||||||
|
//
|
||||||
|
|
||||||
|
struct llama_server_queue {
|
||||||
|
int id = 0;
|
||||||
|
std::mutex mutex_tasks;
|
||||||
|
// queues
|
||||||
|
std::vector<task_server> queue_tasks;
|
||||||
|
std::vector<task_server> queue_tasks_deferred;
|
||||||
|
std::vector<task_multi> queue_multitasks;
|
||||||
|
std::condition_variable condition_tasks;
|
||||||
|
// callback functions
|
||||||
|
std::function<void(task_server&)> callback_new_task;
|
||||||
|
std::function<void(task_multi&)> callback_finish_multitask;
|
||||||
|
std::function<void(void)> callback_all_task_finished;
|
||||||
|
|
||||||
|
// Add a new task to the end of the queue
|
||||||
|
int post(task_server task) {
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||||
|
if (task.id == -1) {
|
||||||
|
task.id = id++;
|
||||||
|
}
|
||||||
|
queue_tasks.push_back(std::move(task));
|
||||||
|
condition_tasks.notify_one();
|
||||||
|
return task.id;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add a new task, but defer until one slot is available
|
||||||
|
void defer(task_server task) {
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||||
|
queue_tasks_deferred.push_back(std::move(task));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the next id for creating anew task
|
||||||
|
int get_new_id() {
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||||
|
return id++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Register function to process a new task
|
||||||
|
void on_new_task(std::function<void(task_server&)> callback) {
|
||||||
|
callback_new_task = callback;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Register function to process a multitask
|
||||||
|
void on_finish_multitask(std::function<void(task_multi&)> callback) {
|
||||||
|
callback_finish_multitask = callback;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Register the function to be called when the batch of tasks is finished
|
||||||
|
void on_all_tasks_finished(std::function<void(void)> callback) {
|
||||||
|
callback_all_task_finished = callback;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call when the state of one slot is changed
|
||||||
|
void notify_slot_changed() {
|
||||||
|
// move deferred tasks back to main loop
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||||
|
for (auto & task : queue_tasks_deferred) {
|
||||||
|
queue_tasks.push_back(std::move(task));
|
||||||
|
}
|
||||||
|
queue_tasks_deferred.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start the main loop. This call is blocking
|
||||||
|
[[noreturn]]
|
||||||
|
void start_loop() {
|
||||||
|
while (true) {
|
||||||
|
// new task arrived
|
||||||
|
LOG_VERBOSE("have new task", {});
|
||||||
|
{
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||||
|
if (queue_tasks.empty()) {
|
||||||
|
lock.unlock();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
task_server task = queue_tasks.front();
|
||||||
|
queue_tasks.erase(queue_tasks.begin());
|
||||||
|
lock.unlock();
|
||||||
|
LOG_VERBOSE("callback_new_task", {});
|
||||||
|
callback_new_task(task);
|
||||||
|
}
|
||||||
|
LOG_VERBOSE("callback_all_task_finished", {});
|
||||||
|
// process and update all the multitasks
|
||||||
|
auto queue_iterator = queue_multitasks.begin();
|
||||||
|
while (queue_iterator != queue_multitasks.end())
|
||||||
|
{
|
||||||
|
if (queue_iterator->subtasks_remaining.empty())
|
||||||
|
{
|
||||||
|
// all subtasks done == multitask is done
|
||||||
|
task_multi current_multitask = *queue_iterator;
|
||||||
|
callback_finish_multitask(current_multitask);
|
||||||
|
// remove this multitask
|
||||||
|
queue_iterator = queue_multitasks.erase(queue_iterator);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
++queue_iterator;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// all tasks in the current loop is finished
|
||||||
|
callback_all_task_finished();
|
||||||
|
}
|
||||||
|
LOG_VERBOSE("wait for new task", {});
|
||||||
|
// wait for new task
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||||
|
if (queue_tasks.empty()) {
|
||||||
|
condition_tasks.wait(lock, [&]{
|
||||||
|
return !queue_tasks.empty();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// functions to manage multitasks
|
||||||
|
//
|
||||||
|
|
||||||
|
// add a multitask by specifying the id of all subtask (subtask is a task_server)
|
||||||
|
void add_multitask(int multitask_id, std::vector<int>& sub_ids)
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(mutex_tasks);
|
||||||
|
task_multi multi;
|
||||||
|
multi.id = multitask_id;
|
||||||
|
std::copy(sub_ids.begin(), sub_ids.end(), std::inserter(multi.subtasks_remaining, multi.subtasks_remaining.end()));
|
||||||
|
queue_multitasks.push_back(multi);
|
||||||
|
}
|
||||||
|
|
||||||
|
// updatethe remaining subtasks, while appending results to multitask
|
||||||
|
void update_multitask(int multitask_id, int subtask_id, task_result& result)
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(mutex_tasks);
|
||||||
|
for (auto& multitask : queue_multitasks)
|
||||||
|
{
|
||||||
|
if (multitask.id == multitask_id)
|
||||||
|
{
|
||||||
|
multitask.subtasks_remaining.erase(subtask_id);
|
||||||
|
multitask.results.push_back(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct llama_server_response {
|
||||||
|
typedef std::function<void(int, int, task_result&)> callback_multitask_t;
|
||||||
|
callback_multitask_t callback_update_multitask;
|
||||||
|
// for keeping track of all tasks waiting for the result
|
||||||
|
std::set<int> waiting_task_ids;
|
||||||
|
// the main result queue
|
||||||
|
std::vector<task_result> queue_results;
|
||||||
|
std::mutex mutex_results;
|
||||||
|
std::condition_variable condition_results;
|
||||||
|
|
||||||
|
void add_waiting_task_id(int task_id) {
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_results);
|
||||||
|
waiting_task_ids.insert(task_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
void remove_waiting_task_id(int task_id) {
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_results);
|
||||||
|
waiting_task_ids.erase(task_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function blocks the thread until there is a response for this task_id
|
||||||
|
task_result recv(int task_id) {
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_results);
|
||||||
|
condition_results.wait(lock, [&]{
|
||||||
|
return !queue_results.empty();
|
||||||
|
});
|
||||||
|
LOG_VERBOSE("condition_results unblock", {});
|
||||||
|
|
||||||
|
for (int i = 0; i < (int) queue_results.size(); i++)
|
||||||
|
{
|
||||||
|
if (queue_results[i].id == task_id)
|
||||||
|
{
|
||||||
|
assert(queue_results[i].multitask_id == -1);
|
||||||
|
task_result res = queue_results[i];
|
||||||
|
queue_results.erase(queue_results.begin() + i);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// should never reach here
|
||||||
|
}
|
||||||
|
|
||||||
|
// Register the function to update multitask
|
||||||
|
void on_multitask_update(callback_multitask_t callback) {
|
||||||
|
callback_update_multitask = callback;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Send a new result to a waiting task_id
|
||||||
|
void send(task_result result) {
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_results);
|
||||||
|
LOG_VERBOSE("send new result", {});
|
||||||
|
for (auto& task_id : waiting_task_ids) {
|
||||||
|
// LOG_TEE("waiting task id %i \n", task_id);
|
||||||
|
// for now, tasks that have associated parent multitasks just get erased once multitask picks up the result
|
||||||
|
if (result.multitask_id == task_id)
|
||||||
|
{
|
||||||
|
LOG_VERBOSE("callback_update_multitask", {});
|
||||||
|
callback_update_multitask(task_id, result.id, result);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result.id == task_id)
|
||||||
|
{
|
||||||
|
LOG_VERBOSE("queue_results.push_back", {});
|
||||||
|
queue_results.push_back(result);
|
||||||
|
condition_results.notify_one();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
//
|
||||||
|
// base64 utils (TODO: move to common in the future)
|
||||||
|
//
|
||||||
|
|
||||||
|
static const std::string base64_chars =
|
||||||
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||||
|
"abcdefghijklmnopqrstuvwxyz"
|
||||||
|
"0123456789+/";
|
||||||
|
|
||||||
|
static inline bool is_base64(uint8_t c)
|
||||||
|
{
|
||||||
|
return (isalnum(c) || (c == '+') || (c == '/'));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline std::vector<uint8_t> base64_decode(const std::string & encoded_string)
|
||||||
|
{
|
||||||
|
int i = 0;
|
||||||
|
int j = 0;
|
||||||
|
int in_ = 0;
|
||||||
|
|
||||||
|
int in_len = encoded_string.size();
|
||||||
|
|
||||||
|
uint8_t char_array_4[4];
|
||||||
|
uint8_t char_array_3[3];
|
||||||
|
|
||||||
|
std::vector<uint8_t> ret;
|
||||||
|
|
||||||
|
while (in_len-- && (encoded_string[in_] != '=') && is_base64(encoded_string[in_]))
|
||||||
|
{
|
||||||
|
char_array_4[i++] = encoded_string[in_]; in_++;
|
||||||
|
if (i == 4)
|
||||||
|
{
|
||||||
|
for (i = 0; i <4; i++)
|
||||||
|
{
|
||||||
|
char_array_4[i] = base64_chars.find(char_array_4[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
char_array_3[0] = ((char_array_4[0] ) << 2) + ((char_array_4[1] & 0x30) >> 4);
|
||||||
|
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
|
||||||
|
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
|
||||||
|
|
||||||
|
for (i = 0; (i < 3); i++)
|
||||||
|
{
|
||||||
|
ret.push_back(char_array_3[i]);
|
||||||
|
}
|
||||||
|
i = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i)
|
||||||
|
{
|
||||||
|
for (j = i; j <4; j++)
|
||||||
|
{
|
||||||
|
char_array_4[j] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = 0; j <4; j++)
|
||||||
|
{
|
||||||
|
char_array_4[j] = base64_chars.find(char_array_4[j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
char_array_3[0] = ((char_array_4[0] ) << 2) + ((char_array_4[1] & 0x30) >> 4);
|
||||||
|
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
|
||||||
|
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
|
||||||
|
|
||||||
|
for (j = 0; (j < i - 1); j++)
|
||||||
|
{
|
||||||
|
ret.push_back(char_array_3[j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// tokenizer and input processing utils
|
||||||
|
//
|
||||||
|
|
||||||
|
static bool json_is_array_of_numbers(const json & data) {
|
||||||
|
if (data.is_array()) {
|
||||||
|
for (const auto & e : data) {
|
||||||
|
if (!e.is_number_integer()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// is array having BOTH numbers & strings?
|
||||||
|
static bool json_is_array_of_mixed_numbers_strings(const json & data) {
|
||||||
|
bool seen_string = false;
|
||||||
|
bool seen_number = false;
|
||||||
|
if (data.is_array()) {
|
||||||
|
for (const auto & e : data) {
|
||||||
|
seen_string |= e.is_string();
|
||||||
|
seen_number |= e.is_number_integer();
|
||||||
|
if (seen_number && seen_string) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// get value by path(key1 / key2)
|
||||||
|
static json json_get_nested_values(const std::vector<std::string> & paths, const json & js) {
|
||||||
|
json result = json::object();
|
||||||
|
|
||||||
|
for (const std::string & path : paths) {
|
||||||
|
json current = js;
|
||||||
|
const auto keys = string_split<std::string>(path, /*separator*/ '/');
|
||||||
|
bool valid_path = true;
|
||||||
|
for (const std::string & k : keys) {
|
||||||
|
if (valid_path && current.is_object() && current.contains(k)) {
|
||||||
|
current = current[k];
|
||||||
|
} else {
|
||||||
|
valid_path = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (valid_path) {
|
||||||
|
result[path] = current;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* this handles 2 cases:
|
||||||
|
* - only string, example: "string"
|
||||||
|
* - mixed string and tokens, example: [12, 34, "string", 56, 78]
|
||||||
|
*/
|
||||||
|
static llama_tokens tokenize_mixed(const llama_vocab * vocab, const json & json_prompt, bool add_special, bool parse_special) {
|
||||||
|
// If `add_bos` is true, we only add BOS, when json_prompt is a string,
|
||||||
|
// or the first element of the json_prompt array is a string.
|
||||||
|
llama_tokens prompt_tokens;
|
||||||
|
|
||||||
|
if (json_prompt.is_array()) {
|
||||||
|
bool first = true;
|
||||||
|
for (const auto & p : json_prompt) {
|
||||||
|
if (p.is_string()) {
|
||||||
|
auto s = p.template get<std::string>();
|
||||||
|
|
||||||
|
llama_tokens p;
|
||||||
|
if (first) {
|
||||||
|
p = common_tokenize(vocab, s, add_special, parse_special);
|
||||||
|
first = false;
|
||||||
|
} else {
|
||||||
|
p = common_tokenize(vocab, s, false, parse_special);
|
||||||
|
}
|
||||||
|
|
||||||
|
prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end());
|
||||||
|
} else {
|
||||||
|
if (first) {
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
prompt_tokens.push_back(p.template get<llama_token>());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
auto s = json_prompt.template get<std::string>();
|
||||||
|
prompt_tokens = common_tokenize(vocab, s, add_special, parse_special);
|
||||||
|
}
|
||||||
|
|
||||||
|
return prompt_tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* break the input "prompt" object into multiple prompt if needed, then tokenize them
|
||||||
|
* this supports these cases:
|
||||||
|
* - "prompt": "string"
|
||||||
|
* - "prompt": [12, 34, 56]
|
||||||
|
* - "prompt": [12, 34, "string", 56, 78]
|
||||||
|
* and multiple prompts (multi-tasks):
|
||||||
|
* - "prompt": ["string1", "string2"]
|
||||||
|
* - "prompt": ["string1", [12, 34, 56]]
|
||||||
|
* - "prompt": [[12, 34, 56], [78, 90, 12]]
|
||||||
|
* - "prompt": [[12, 34, "string", 56, 78], [12, 34, 56]]
|
||||||
|
*/
|
||||||
|
static std::vector<llama_tokens> tokenize_input_prompts(const llama_vocab * vocab, const json & json_prompt, bool add_special, bool parse_special) {
|
||||||
|
std::vector<llama_tokens> result;
|
||||||
|
if (json_prompt.is_string() || json_is_array_of_mixed_numbers_strings(json_prompt)) {
|
||||||
|
// string or mixed
|
||||||
|
result.push_back(tokenize_mixed(vocab, json_prompt, add_special, parse_special));
|
||||||
|
} else if (json_is_array_of_numbers(json_prompt)) {
|
||||||
|
// array of tokens
|
||||||
|
result.push_back(json_prompt.get<llama_tokens>());
|
||||||
|
} else if (json_prompt.is_array()) {
|
||||||
|
// array of prompts
|
||||||
|
result.reserve(json_prompt.size());
|
||||||
|
for (const auto & p : json_prompt) {
|
||||||
|
if (p.is_string() || json_is_array_of_mixed_numbers_strings(p)) {
|
||||||
|
result.push_back(tokenize_mixed(vocab, p, add_special, parse_special));
|
||||||
|
} else if (json_is_array_of_numbers(p)) {
|
||||||
|
// array of tokens
|
||||||
|
result.push_back(p.get<llama_tokens>());
|
||||||
|
} else {
|
||||||
|
throw std::runtime_error("element of \"prompt\" must be a string, an list of tokens, or a list of mixed strings & tokens");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw std::runtime_error("\"prompt\" must be a string, an list of tokens, a list of mixed strings & tokens, or a list of prompts");
|
||||||
|
}
|
||||||
|
if (result.empty()) {
|
||||||
|
throw std::runtime_error("\"prompt\" must not be empty");
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// utils for interacting with libmtmd
|
||||||
|
// (may need to refactor in near future)
|
||||||
|
//
|
||||||
|
|
||||||
|
/**
|
||||||
|
* server_tokens is a helper to manage the input tokens and image for the server.
|
||||||
|
* it is made this way to simplify the logic of KV cache management.
|
||||||
|
*/
|
||||||
|
struct server_tokens {
|
||||||
|
bool has_mtmd = false;
|
||||||
|
|
||||||
|
private: // disallow accessing these members directly, risking out-of-sync
|
||||||
|
|
||||||
|
// map a **start** position in tokens to the image chunk
|
||||||
|
std::unordered_map<llama_pos, mtmd::input_chunk_ptr> map_pos_to_image;
|
||||||
|
|
||||||
|
// list of tokens
|
||||||
|
// it can include LLAMA_TOKEN_NULL, which is used to indicate a token that is not a text token
|
||||||
|
// a mtmd_input_chunk can occupy multiple tokens, one llama_token per **position**
|
||||||
|
// important: for models using mrope, an image can contain multiple tokens but will use only one **position**
|
||||||
|
llama_tokens tokens;
|
||||||
|
|
||||||
|
// for ex. with input of 5 text tokens and 2 images:
|
||||||
|
// [0] [1] [2] [3] [4] [img0] [img0] [img0] [img1] [img1]
|
||||||
|
// pos 0 1 2 3 4 5 6 7 8 9
|
||||||
|
// map_pos_to_image will contain: {5, img0}, {8, img1}
|
||||||
|
|
||||||
|
public:
|
||||||
|
server_tokens() = default;
|
||||||
|
~server_tokens() = default;
|
||||||
|
|
||||||
|
// Prevent copying
|
||||||
|
server_tokens(const server_tokens&) = delete;
|
||||||
|
server_tokens& operator=(const server_tokens&) = delete;
|
||||||
|
|
||||||
|
// Allow moving (usually implicitly generated if members are movable)
|
||||||
|
server_tokens(server_tokens&&) = default;
|
||||||
|
server_tokens& operator=(server_tokens&&) = default;
|
||||||
|
|
||||||
|
// Allow accessing elements using [] operator
|
||||||
|
llama_token operator[](size_t index) { return tokens[index]; }
|
||||||
|
const llama_token& operator[](size_t index) const { return tokens[index]; }
|
||||||
|
|
||||||
|
server_tokens(mtmd::input_chunks & mtmd_chunks, bool has_mtmd) : has_mtmd(has_mtmd) {
|
||||||
|
for (size_t i = 0; i < mtmd_chunks.size(); ++i) {
|
||||||
|
push_back(mtmd_chunks[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
server_tokens(llama_tokens & tokens, bool has_mtmd) : has_mtmd(has_mtmd), tokens(tokens) {}
|
||||||
|
|
||||||
|
// for debugging
|
||||||
|
std::string str() const {
|
||||||
|
std::ostringstream oss;
|
||||||
|
oss << "tokens: ";
|
||||||
|
for (const auto & t : tokens) {
|
||||||
|
if (t == LLAMA_TOKEN_NULL) {
|
||||||
|
oss << "<embd> ";
|
||||||
|
} else {
|
||||||
|
oss << t << " ";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
oss << "\n";
|
||||||
|
oss << "image pos: ";
|
||||||
|
for (const auto & it : map_pos_to_image) {
|
||||||
|
oss << it.first << ", ";
|
||||||
|
}
|
||||||
|
return oss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
const mtmd::input_chunk_ptr & find_chunk(llama_pos pos) const {
|
||||||
|
auto it = map_pos_to_image.find(pos);
|
||||||
|
if (it != map_pos_to_image.end()) {
|
||||||
|
return it->second;
|
||||||
|
} else {
|
||||||
|
throw std::runtime_error("Chunk not found");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void push_back(llama_token tok) {
|
||||||
|
if (tok == LLAMA_TOKEN_NULL) {
|
||||||
|
throw std::runtime_error("Invalid token");
|
||||||
|
}
|
||||||
|
tokens.emplace_back(tok);
|
||||||
|
}
|
||||||
|
|
||||||
|
// will create a copy of the chunk if it contains non-text data
|
||||||
|
void push_back(const mtmd_input_chunk * chunk) {
|
||||||
|
auto type = mtmd_input_chunk_get_type(chunk);
|
||||||
|
if (type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
|
||||||
|
GGML_ASSERT(has_mtmd);
|
||||||
|
auto img_tokens = mtmd_input_chunk_get_tokens_image(chunk);
|
||||||
|
const int n_pos = mtmd_image_tokens_get_n_pos(img_tokens);
|
||||||
|
llama_pos start_pos = tokens.size();
|
||||||
|
for (int i = 0; i < n_pos; ++i) {
|
||||||
|
tokens.emplace_back(LLAMA_TOKEN_NULL);
|
||||||
|
}
|
||||||
|
mtmd::input_chunk_ptr new_chunk(mtmd_input_chunk_copy(chunk));
|
||||||
|
map_pos_to_image[start_pos] = std::move(new_chunk);
|
||||||
|
} else if (type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
|
||||||
|
size_t n_tokens;
|
||||||
|
auto text_tokens = mtmd_input_chunk_get_tokens_text(chunk, &n_tokens);
|
||||||
|
for (size_t i = 0; i < n_tokens; ++i) {
|
||||||
|
push_back(text_tokens[i]);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
GGML_ABORT("Invalid chunk type");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// for compatibility with context shift and prompt truncation
|
||||||
|
void insert(const llama_tokens & inp_tokens) {
|
||||||
|
GGML_ASSERT(!has_mtmd); // only allow this if mtmd is disabled
|
||||||
|
tokens.insert(tokens.end(), inp_tokens.begin(), inp_tokens.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
// for compatibility with speculative decoding, ctx shift, slot save/load
|
||||||
|
const llama_tokens & get_text_tokens() const {
|
||||||
|
GGML_ASSERT(!has_mtmd); // only allow this if mtmd is disabled
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
// for compatibility with speculative decoding
|
||||||
|
void set_token(llama_pos pos, llama_token id) {
|
||||||
|
GGML_ASSERT(!has_mtmd); // only allow this if mtmd is disabled
|
||||||
|
tokens[pos] = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t size() const {
|
||||||
|
return tokens.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool empty() const {
|
||||||
|
return tokens.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear() {
|
||||||
|
tokens.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
void resize(size_t n) {
|
||||||
|
GGML_ASSERT(n <= tokens.size());
|
||||||
|
if (has_mtmd) {
|
||||||
|
// we throw an error if we try to remove a token in the middle of an image
|
||||||
|
// for ex. with input of 5 text tokens and 2 images:
|
||||||
|
// [0] [1] [2] [3] [4] [img0] [img0] [img0] [img1] [img1]
|
||||||
|
// n 1 2 3 4 5 6 7 8 9 10
|
||||||
|
// allowed to resize ^ ^
|
||||||
|
// disallowed to resize ^ ^ ^
|
||||||
|
if (n > 0) {
|
||||||
|
llama_token last_token = tokens[n - 1];
|
||||||
|
// make sure we never remove tokens in the middle of an image
|
||||||
|
if (last_token == LLAMA_TOKEN_NULL) {
|
||||||
|
find_chunk(n - 1); // will throw an error if the token is not begin-of-chunk
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// remove all image chunks that are not used anymore
|
||||||
|
for (auto it = map_pos_to_image.begin(); it != map_pos_to_image.end(); ) {
|
||||||
|
llama_pos pos = it->first;
|
||||||
|
if (pos >= (llama_pos)n) {
|
||||||
|
it = map_pos_to_image.erase(it);
|
||||||
|
} else {
|
||||||
|
++it;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tokens.resize(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string detokenize(const llama_context * ctx, bool special) const {
|
||||||
|
llama_tokens text_tokens;
|
||||||
|
text_tokens.reserve(tokens.size());
|
||||||
|
for (const auto & t : tokens) {
|
||||||
|
if (t != LLAMA_TOKEN_NULL) {
|
||||||
|
text_tokens.push_back(t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return common_detokenize(ctx, text_tokens, special);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t get_common_prefix(const server_tokens & b) const {
|
||||||
|
size_t max_idx = std::min(tokens.size(), b.tokens.size());
|
||||||
|
for (size_t i = 0; i < max_idx; ++i) {
|
||||||
|
auto & ai = tokens[i];
|
||||||
|
auto & bi = b.tokens[i];
|
||||||
|
|
||||||
|
if (ai == LLAMA_TOKEN_NULL && bi == LLAMA_TOKEN_NULL) {
|
||||||
|
GGML_ASSERT(has_mtmd);
|
||||||
|
const auto & a_chunk = find_chunk(i);
|
||||||
|
const auto & b_chunk = b.find_chunk(i);
|
||||||
|
GGML_ASSERT(a_chunk && b_chunk);
|
||||||
|
const auto * a_img = mtmd_input_chunk_get_tokens_image(a_chunk.get());
|
||||||
|
const auto * b_img = mtmd_input_chunk_get_tokens_image(b_chunk.get());
|
||||||
|
std::string ai_id = mtmd_image_tokens_get_id(a_img);
|
||||||
|
std::string bi_id = mtmd_image_tokens_get_id(b_img);
|
||||||
|
size_t a_pos = mtmd_image_tokens_get_n_pos(a_img);
|
||||||
|
size_t b_pos = mtmd_image_tokens_get_n_pos(b_img);
|
||||||
|
if (ai_id == bi_id && a_pos == b_pos) {
|
||||||
|
GGML_ASSERT(a_pos > 0 && "Invalid image token"); // should never happen
|
||||||
|
i += a_pos - 1; // will be +1 by the for loop
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
} else if (ai == bi) {
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return max_idx; // all tokens are equal
|
||||||
|
}
|
||||||
|
|
||||||
|
// make sure all text tokens are within the vocab range
|
||||||
|
bool validate(const struct llama_context * ctx) const {
|
||||||
|
const llama_model * model = llama_get_model(ctx);
|
||||||
|
const llama_vocab * vocab = llama_model_get_vocab(model);
|
||||||
|
const int32_t n_vocab = llama_vocab_n_tokens(vocab);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < tokens.size(); ++i) {
|
||||||
|
auto & t = tokens[i];
|
||||||
|
if (t == LLAMA_TOKEN_NULL) {
|
||||||
|
try {
|
||||||
|
const auto & chunk = find_chunk(i);
|
||||||
|
const auto * img_tokens = mtmd_input_chunk_get_tokens_image(chunk.get());
|
||||||
|
size_t n_pos = mtmd_image_tokens_get_n_pos(img_tokens);
|
||||||
|
i += n_pos - 1; // will be +1 by the for loop
|
||||||
|
} catch (const std::exception & e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else if (t < 0 || t >= n_vocab) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// encode and decode the image chunk
|
||||||
|
int32_t process_chunk(
|
||||||
|
llama_context * ctx,
|
||||||
|
mtmd_context * mctx,
|
||||||
|
llama_pos n_past,
|
||||||
|
int32_t seq_id,
|
||||||
|
llama_pos & n_pos_out) {
|
||||||
|
auto it = map_pos_to_image.find(n_past);
|
||||||
|
if (it == map_pos_to_image.end()) {
|
||||||
|
throw std::runtime_error("Chunk not found");
|
||||||
|
}
|
||||||
|
// SRV_INF("%s\n", "processing image...");
|
||||||
|
int32_t n_batch = llama_n_batch(ctx);
|
||||||
|
int64_t t0 = ggml_time_ms();
|
||||||
|
llama_pos new_n_past = n_past;
|
||||||
|
int32_t result = mtmd_helper_eval_chunk_single(mctx, ctx,
|
||||||
|
it->second.get(), // chunk
|
||||||
|
n_past,
|
||||||
|
seq_id,
|
||||||
|
n_batch,
|
||||||
|
true, // logits last
|
||||||
|
&new_n_past);
|
||||||
|
//SRV_INF("image processed in %" PRId64 " ms\n", ggml_time_ms() - t0);
|
||||||
|
if (result != 0) {
|
||||||
|
LOG_ERR("mtmd_helper_eval failed with status %d", result);
|
||||||
|
n_pos_out = n_past;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
n_pos_out = new_n_past;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Computes FNV-1a hash of the data
|
||||||
|
static std::string fnv_hash(const uint8_t * data, size_t len) {
|
||||||
|
const uint64_t fnv_prime = 0x100000001b3ULL;
|
||||||
|
uint64_t hash = 0xcbf29ce484222325ULL;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < len; ++i) {
|
||||||
|
hash ^= data[i];
|
||||||
|
hash *= fnv_prime;
|
||||||
|
}
|
||||||
|
return std::to_string(hash);
|
||||||
|
}
|
||||||
@@ -1,51 +0,0 @@
|
|||||||
INCLUDE_PATH := $(abspath ./)
|
|
||||||
LIBRARY_PATH := $(abspath ./)
|
|
||||||
|
|
||||||
AR?=ar
|
|
||||||
|
|
||||||
CMAKE_ARGS?=-DGGML_NATIVE=OFF
|
|
||||||
BUILD_TYPE?=
|
|
||||||
GOCMD=go
|
|
||||||
# keep standard at C11 and C++11
|
|
||||||
CXXFLAGS = -I. -I$(INCLUDE_PATH)/sources/bark.cpp/examples -I$(INCLUDE_PATH)/sources/bark.cpp/encodec.cpp/ggml/include -I$(INCLUDE_PATH)/sources/bark.cpp/spm-headers -I$(INCLUDE_PATH)/sources/bark.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
|
||||||
LDFLAGS = -L$(LIBRARY_PATH) -L$(LIBRARY_PATH)/sources/bark.cpp/build/examples -lbark -lstdc++ -lm
|
|
||||||
|
|
||||||
# bark.cpp
|
|
||||||
BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
|
|
||||||
BARKCPP_VERSION?=5d5be84f089ab9ea53b7a793f088d3fbf7247495
|
|
||||||
|
|
||||||
# warnings
|
|
||||||
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
|
||||||
|
|
||||||
## bark.cpp
|
|
||||||
sources/bark.cpp:
|
|
||||||
git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
|
|
||||||
cd sources/bark.cpp && \
|
|
||||||
git checkout $(BARKCPP_VERSION) && \
|
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
|
||||||
|
|
||||||
sources/bark.cpp/build/libbark.a: sources/bark.cpp
|
|
||||||
cd sources/bark.cpp && \
|
|
||||||
mkdir -p build && \
|
|
||||||
cd build && \
|
|
||||||
cmake $(CMAKE_ARGS) .. && \
|
|
||||||
cmake --build . --config Release
|
|
||||||
|
|
||||||
gobark.o:
|
|
||||||
$(CXX) $(CXXFLAGS) gobark.cpp -o gobark.o -c $(LDFLAGS)
|
|
||||||
|
|
||||||
libbark.a: sources/bark.cpp/build/libbark.a gobark.o
|
|
||||||
cp $(INCLUDE_PATH)/sources/bark.cpp/build/libbark.a ./
|
|
||||||
$(AR) rcs libbark.a gobark.o
|
|
||||||
|
|
||||||
bark-cpp: libbark.a
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH="$(CURDIR)" LIBRARY_PATH=$(CURDIR) \
|
|
||||||
$(GOCMD) build -v -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o bark-cpp ./
|
|
||||||
|
|
||||||
package:
|
|
||||||
bash package.sh
|
|
||||||
|
|
||||||
build: bark-cpp package
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -f gobark.o libbark.a
|
|
||||||
@@ -1,41 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# Script to copy the appropriate libraries based on architecture
|
|
||||||
# This script is used in the final stage of the Dockerfile
|
|
||||||
|
|
||||||
set -e
|
|
||||||
|
|
||||||
CURDIR=$(dirname "$(realpath $0)")
|
|
||||||
|
|
||||||
# Create lib directory
|
|
||||||
mkdir -p $CURDIR/package/lib
|
|
||||||
cp -avrf $CURDIR/bark-cpp $CURDIR/package/
|
|
||||||
cp -rfv $CURDIR/run.sh $CURDIR/package/
|
|
||||||
|
|
||||||
# Detect architecture and copy appropriate libraries
|
|
||||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
|
||||||
# x86_64 architecture
|
|
||||||
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
|
||||||
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
|
||||||
elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
|
||||||
# ARM64 architecture
|
|
||||||
echo "Detected ARM64 architecture, copying ARM64 libraries..."
|
|
||||||
cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
|
||||||
else
|
|
||||||
echo "Error: Could not detect architecture"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Packaging completed successfully"
|
|
||||||
ls -liah $CURDIR/package/
|
|
||||||
ls -liah $CURDIR/package/lib/
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -ex
|
|
||||||
|
|
||||||
CURDIR=$(dirname "$(realpath $0)")
|
|
||||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
|
||||||
|
|
||||||
# If there is a lib/ld.so, use it
|
|
||||||
if [ -f $CURDIR/lib/ld.so ]; then
|
|
||||||
echo "Using lib/ld.so"
|
|
||||||
exec $CURDIR/lib/ld.so $CURDIR/bark-cpp "$@"
|
|
||||||
fi
|
|
||||||
|
|
||||||
exec $CURDIR/bark-cpp "$@"
|
|
||||||
25
backend/go/bark/Makefile
Normal file
25
backend/go/bark/Makefile
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
INCLUDE_PATH := $(abspath ./)
|
||||||
|
LIBRARY_PATH := $(abspath ./)
|
||||||
|
|
||||||
|
AR?=ar
|
||||||
|
|
||||||
|
BUILD_TYPE?=
|
||||||
|
# keep standard at C11 and C++11
|
||||||
|
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../sources/bark.cpp/examples -I$(INCLUDE_PATH)/../../../sources/bark.cpp/spm-headers -I$(INCLUDE_PATH)/../../../sources/bark.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
||||||
|
LDFLAGS = -L$(LIBRARY_PATH) -L$(LIBRARY_PATH)/../../../sources/bark.cpp/build/examples -lbark -lstdc++ -lm
|
||||||
|
|
||||||
|
# warnings
|
||||||
|
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
||||||
|
|
||||||
|
gobark.o:
|
||||||
|
$(CXX) $(CXXFLAGS) gobark.cpp -o gobark.o -c $(LDFLAGS)
|
||||||
|
|
||||||
|
libbark.a: gobark.o
|
||||||
|
cp $(INCLUDE_PATH)/../../../sources/bark.cpp/build/libbark.a ./
|
||||||
|
$(AR) rcs libbark.a gobark.o
|
||||||
|
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml.c.o
|
||||||
|
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-alloc.c.o
|
||||||
|
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-backend.c.o
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f gobark.o libbark.a
|
||||||
@@ -48,7 +48,7 @@ int tts(char *text,int threads, char *dst ) {
|
|||||||
|
|
||||||
// generate audio
|
// generate audio
|
||||||
if (!bark_generate_audio(c, text, threads)) {
|
if (!bark_generate_audio(c, text, threads)) {
|
||||||
fprintf(stderr, "%s: An error occurred. If the problem persists, feel free to open an issue to report it.\n", __func__);
|
fprintf(stderr, "%s: An error occured. If the problem persists, feel free to open an issue to report it.\n", __func__);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
// #cgo CXXFLAGS: -I${SRCDIR}/sources/bark.cpp/ -I${SRCDIR}/sources/bark.cpp/encodec.cpp -I${SRCDIR}/sources/bark.cpp/encodec.cpp/ggml/include -I${SRCDIR}/sources/bark.cpp/examples -I${SRCDIR}/sources/bark.cpp/spm-headers
|
// #cgo CXXFLAGS: -I${SRCDIR}/../../../sources/bark.cpp/ -I${SRCDIR}/../../../sources/bark.cpp/encodec.cpp -I${SRCDIR}/../../../sources/bark.cpp/examples -I${SRCDIR}/../../../sources/bark.cpp/spm-headers
|
||||||
// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/sources/bark.cpp/build/examples -L${SRCDIR}/sources/bark.cpp/build/encodec.cpp/ggml/src/ -L${SRCDIR}/sources/bark.cpp/build/encodec.cpp/ -lbark -lencodec -lcommon -lggml -lgomp
|
// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../sources/bark.cpp/build/examples -L${SRCDIR}/../../../sources/bark.cpp/build/encodec.cpp/ -lbark -lencodec -lcommon
|
||||||
// #include <gobark.h>
|
// #include <gobark.h>
|
||||||
// #include <stdlib.h>
|
// #include <stdlib.h>
|
||||||
import "C"
|
import "C"
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
GOCMD=go
|
|
||||||
|
|
||||||
huggingface:
|
|
||||||
CGO_ENABLED=0 $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o huggingface ./
|
|
||||||
|
|
||||||
package:
|
|
||||||
bash package.sh
|
|
||||||
|
|
||||||
build: huggingface package
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# Script to copy the appropriate libraries based on architecture
|
|
||||||
# This script is used in the final stage of the Dockerfile
|
|
||||||
|
|
||||||
set -e
|
|
||||||
|
|
||||||
CURDIR=$(dirname "$(realpath $0)")
|
|
||||||
|
|
||||||
mkdir -p $CURDIR/package
|
|
||||||
cp -avrf $CURDIR/huggingface $CURDIR/package/
|
|
||||||
cp -rfv $CURDIR/run.sh $CURDIR/package/
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -ex
|
|
||||||
|
|
||||||
CURDIR=$(dirname "$(realpath $0)")
|
|
||||||
|
|
||||||
exec $CURDIR/huggingface "$@"
|
|
||||||
@@ -4,11 +4,9 @@ LIBRARY_PATH := $(abspath ./)
|
|||||||
AR?=ar
|
AR?=ar
|
||||||
CMAKE_ARGS?=
|
CMAKE_ARGS?=
|
||||||
BUILD_TYPE?=
|
BUILD_TYPE?=
|
||||||
NATIVE?=false
|
|
||||||
CUDA_LIBPATH?=/usr/local/cuda/lib64/
|
|
||||||
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
||||||
# keep standard at C11 and C++11
|
# keep standard at C11 and C++11
|
||||||
CXXFLAGS = -I. -I$(INCLUDE_PATH)/sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
||||||
|
|
||||||
GOCMD?=go
|
GOCMD?=go
|
||||||
CGO_LDFLAGS?=
|
CGO_LDFLAGS?=
|
||||||
@@ -17,21 +15,12 @@ CGO_LDFLAGS_SYCL=
|
|||||||
GO_TAGS?=
|
GO_TAGS?=
|
||||||
LD_FLAGS?=
|
LD_FLAGS?=
|
||||||
|
|
||||||
# stablediffusion.cpp (ggml)
|
|
||||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/richiejp/stable-diffusion.cpp
|
|
||||||
STABLEDIFFUSION_GGML_VERSION?=53e3b17eb3d0b5760ced06a1f98320b68b34aaae
|
|
||||||
|
|
||||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||||
|
|
||||||
ifeq ($(NATIVE),false)
|
|
||||||
CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
|
||||||
endif
|
|
||||||
|
|
||||||
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
||||||
ifeq ($(BUILD_TYPE),cublas)
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
CMAKE_ARGS+=-DSD_CUDA=ON -DGGML_CUDA=ON
|
CMAKE_ARGS+=-DSD_CUDA=ON
|
||||||
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH) -L$(CUDA_LIBPATH)/stubs/ -lcuda
|
|
||||||
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||||
# to CMAKE_ARGS automatically
|
# to CMAKE_ARGS automatically
|
||||||
else ifeq ($(BUILD_TYPE),openblas)
|
else ifeq ($(BUILD_TYPE),openblas)
|
||||||
@@ -41,17 +30,14 @@ else ifeq ($(BUILD_TYPE),clblas)
|
|||||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||||
else ifeq ($(BUILD_TYPE),hipblas)
|
else ifeq ($(BUILD_TYPE),hipblas)
|
||||||
CMAKE_ARGS+=-DSD_HIPBLAS=ON -DGGML_HIPBLAS=ON
|
CMAKE_ARGS+=-DSD_HIPBLAS=ON
|
||||||
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
||||||
# But if it's OSX without metal, disable it here
|
# But if it's OSX without metal, disable it here
|
||||||
else ifeq ($(BUILD_TYPE),vulkan)
|
|
||||||
CMAKE_ARGS+=-DSD_VULKAN=ON -DGGML_VULKAN=ON
|
|
||||||
CGO_LDFLAGS+=-lvulkan
|
|
||||||
else ifeq ($(OS),Darwin)
|
else ifeq ($(OS),Darwin)
|
||||||
ifneq ($(BUILD_TYPE),metal)
|
ifneq ($(BUILD_TYPE),metal)
|
||||||
CMAKE_ARGS+=-DSD_METAL=OFF -DGGML_METAL=OFF
|
CMAKE_ARGS+=-DSD_METAL=OFF
|
||||||
else
|
else
|
||||||
CMAKE_ARGS+=-DSD_METAL=ON -DGGML_METAL=ON
|
CMAKE_ARGS+=-DSD_METAL=ON
|
||||||
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
||||||
TARGET+=--target ggml-metal
|
TARGET+=--target ggml-metal
|
||||||
endif
|
endif
|
||||||
@@ -63,8 +49,8 @@ ifeq ($(BUILD_TYPE),sycl_f16)
|
|||||||
-DCMAKE_CXX_COMPILER=icpx \
|
-DCMAKE_CXX_COMPILER=icpx \
|
||||||
-DSD_SYCL=ON \
|
-DSD_SYCL=ON \
|
||||||
-DGGML_SYCL_F16=ON
|
-DGGML_SYCL_F16=ON
|
||||||
export CC=icx
|
CC=icx
|
||||||
export CXX=icpx
|
CXX=icpx
|
||||||
CGO_LDFLAGS_SYCL += -fsycl -L${DNNLROOT}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL
|
CGO_LDFLAGS_SYCL += -fsycl -L${DNNLROOT}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL
|
||||||
CGO_LDFLAGS_SYCL += $(shell pkg-config --libs mkl-static-lp64-gomp)
|
CGO_LDFLAGS_SYCL += $(shell pkg-config --libs mkl-static-lp64-gomp)
|
||||||
CGO_CXXFLAGS += -fiopenmp -fopenmp-targets=spir64
|
CGO_CXXFLAGS += -fiopenmp -fopenmp-targets=spir64
|
||||||
@@ -76,8 +62,8 @@ ifeq ($(BUILD_TYPE),sycl_f32)
|
|||||||
-DCMAKE_C_COMPILER=icx \
|
-DCMAKE_C_COMPILER=icx \
|
||||||
-DCMAKE_CXX_COMPILER=icpx \
|
-DCMAKE_CXX_COMPILER=icpx \
|
||||||
-DSD_SYCL=ON
|
-DSD_SYCL=ON
|
||||||
export CC=icx
|
CC=icx
|
||||||
export CXX=icpx
|
CXX=icpx
|
||||||
CGO_LDFLAGS_SYCL += -fsycl -L${DNNLROOT}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL
|
CGO_LDFLAGS_SYCL += -fsycl -L${DNNLROOT}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL
|
||||||
CGO_LDFLAGS_SYCL += $(shell pkg-config --libs mkl-static-lp64-gomp)
|
CGO_LDFLAGS_SYCL += $(shell pkg-config --libs mkl-static-lp64-gomp)
|
||||||
CGO_CXXFLAGS += -fiopenmp -fopenmp-targets=spir64
|
CGO_CXXFLAGS += -fiopenmp -fopenmp-targets=spir64
|
||||||
@@ -115,12 +101,12 @@ ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
|||||||
+bash -c "source $(ONEAPI_VARS); \
|
+bash -c "source $(ONEAPI_VARS); \
|
||||||
mkdir -p build && \
|
mkdir -p build && \
|
||||||
cd build && \
|
cd build && \
|
||||||
cmake $(CMAKE_ARGS) ../sources/stablediffusion-ggml.cpp && \
|
cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \
|
||||||
cmake --build . --config Release"
|
cmake --build . --config Release"
|
||||||
else
|
else
|
||||||
mkdir -p build && \
|
mkdir -p build && \
|
||||||
cd build && \
|
cd build && \
|
||||||
cmake $(CMAKE_ARGS) ../sources/stablediffusion-ggml.cpp && \
|
cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \
|
||||||
cmake --build . --config Release
|
cmake --build . --config Release
|
||||||
endif
|
endif
|
||||||
$(MAKE) $(COMBINED_LIB)
|
$(MAKE) $(COMBINED_LIB)
|
||||||
@@ -133,26 +119,17 @@ else
|
|||||||
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c
|
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c
|
||||||
endif
|
endif
|
||||||
|
|
||||||
## stablediffusion (ggml)
|
libsd.a: gosd.o
|
||||||
sources/stablediffusion-ggml.cpp:
|
|
||||||
git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
|
|
||||||
cd sources/stablediffusion-ggml.cpp && \
|
|
||||||
git checkout $(STABLEDIFFUSION_GGML_VERSION) && \
|
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
|
||||||
|
|
||||||
libsd.a: sources/stablediffusion-ggml.cpp build/libstable-diffusion.a gosd.o
|
|
||||||
cp $(INCLUDE_PATH)/build/libstable-diffusion.a ./libsd.a
|
cp $(INCLUDE_PATH)/build/libstable-diffusion.a ./libsd.a
|
||||||
$(AR) rcs libsd.a gosd.o
|
$(AR) rcs libsd.a gosd.o
|
||||||
|
|
||||||
stablediffusion-ggml: libsd.a
|
stablediffusion-ggml:
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_SYCL)" C_INCLUDE_PATH="$(INCLUDE_PATH)" LIBRARY_PATH="$(LIBRARY_PATH)" \
|
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_SYCL)" C_INCLUDE_PATH="$(INCLUDE_PATH)" LIBRARY_PATH="$(LIBRARY_PATH)" \
|
||||||
CC="$(CC)" CXX="$(CXX)" CGO_CXXFLAGS="$(CGO_CXXFLAGS)" \
|
CC="$(CC)" CXX="$(CXX)" CGO_CXXFLAGS="$(CGO_CXXFLAGS)" \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o stablediffusion-ggml ./
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o ../../../../backend-assets/grpc/stablediffusion-ggml ./
|
||||||
|
ifneq ($(UPX),)
|
||||||
package:
|
$(UPX) ../../../../backend-assets/grpc/stablediffusion-ggml
|
||||||
bash package.sh
|
endif
|
||||||
|
|
||||||
build: stablediffusion-ggml package
|
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf gosd.o libsd.a build $(COMBINED_LIB)
|
rm -rf gosd.o libsd.a build $(COMBINED_LIB)
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
// #cgo CXXFLAGS: -I${SRCDIR}/sources/stablediffusion-ggml.cpp/thirdparty -I${SRCDIR}/sources/stablediffusion-ggml.cpp -I${SRCDIR}/sources/stablediffusion-ggml.cpp/ggml/include
|
// #cgo CXXFLAGS: -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/ggml/include
|
||||||
// #cgo LDFLAGS: -L${SRCDIR}/ -lsd -lstdc++ -lm -lggmlall -lgomp
|
// #cgo LDFLAGS: -L${SRCDIR}/ -lsd -lstdc++ -lm -lggmlall -lgomp
|
||||||
// #include <gosd.h>
|
// #include <gosd.h>
|
||||||
// #include <stdlib.h>
|
// #include <stdlib.h>
|
||||||
@@ -58,9 +58,6 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
|||||||
if opts.Embeddings {
|
if opts.Embeddings {
|
||||||
llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
|
llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
|
||||||
}
|
}
|
||||||
if opts.Reranking {
|
|
||||||
llamaOpts = append(llamaOpts, llama.EnableReranking)
|
|
||||||
}
|
|
||||||
if opts.NGPULayers != 0 {
|
if opts.NGPULayers != 0 {
|
||||||
llamaOpts = append(llamaOpts, llama.SetGPULayers(int(opts.NGPULayers)))
|
llamaOpts = append(llamaOpts, llama.SetGPULayers(int(opts.NGPULayers)))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,9 +0,0 @@
|
|||||||
GOCMD=go
|
|
||||||
|
|
||||||
local-store:
|
|
||||||
CGO_ENABLED=0 $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o local-store ./
|
|
||||||
|
|
||||||
package:
|
|
||||||
bash package.sh
|
|
||||||
|
|
||||||
build: local-store package
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# Script to copy the appropriate libraries based on architecture
|
|
||||||
# This script is used in the final stage of the Dockerfile
|
|
||||||
|
|
||||||
set -e
|
|
||||||
|
|
||||||
CURDIR=$(dirname "$(realpath $0)")
|
|
||||||
|
|
||||||
mkdir -p $CURDIR/package
|
|
||||||
cp -avrf $CURDIR/local-store $CURDIR/package/
|
|
||||||
cp -rfv $CURDIR/run.sh $CURDIR/package/
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -ex
|
|
||||||
|
|
||||||
CURDIR=$(dirname "$(realpath $0)")
|
|
||||||
|
|
||||||
exec $CURDIR/local-store "$@"
|
|
||||||
@@ -1,37 +0,0 @@
|
|||||||
|
|
||||||
# go-piper version
|
|
||||||
PIPER_REPO?=https://github.com/mudler/go-piper
|
|
||||||
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
|
|
||||||
|
|
||||||
CURRENT_DIR=$(abspath ./)
|
|
||||||
GOCMD=go
|
|
||||||
|
|
||||||
PIPER_CGO_CXXFLAGS+=-I$(CURRENT_DIR)/sources/go-piper/piper/src/cpp -I$(CURRENT_DIR)/sources/go-piper/piper/build/fi/include -I$(CURRENT_DIR)/sources/go-piper/piper/build/pi/include -I$(CURRENT_DIR)/sources/go-piper/piper/build/si/include
|
|
||||||
PIPER_CGO_LDFLAGS+=-L$(CURRENT_DIR)/sources/go-piper/piper/build/fi/lib -L$(CURRENT_DIR)/sources/go-piper/piper/build/pi/lib -L$(CURRENT_DIR)/sources/go-piper/piper/build/si/lib -lfmt -lspdlog -lucd
|
|
||||||
|
|
||||||
## go-piper
|
|
||||||
sources/go-piper:
|
|
||||||
mkdir -p sources/go-piper
|
|
||||||
cd sources/go-piper && \
|
|
||||||
git init && \
|
|
||||||
git remote add origin $(PIPER_REPO) && \
|
|
||||||
git fetch origin && \
|
|
||||||
git checkout $(PIPER_VERSION) && \
|
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
|
||||||
|
|
||||||
sources/go-piper/libpiper_binding.a: sources/go-piper
|
|
||||||
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
|
|
||||||
|
|
||||||
espeak-ng-data: sources/go-piper sources/go-piper/libpiper_binding.a
|
|
||||||
mkdir -p espeak-ng-data
|
|
||||||
@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. espeak-ng-data
|
|
||||||
|
|
||||||
piper: sources/go-piper sources/go-piper/libpiper_binding.a espeak-ng-data
|
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURRENT_DIR)/sources/go-piper
|
|
||||||
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURRENT_DIR)/sources/go-piper \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o piper ./
|
|
||||||
|
|
||||||
package:
|
|
||||||
bash package.sh
|
|
||||||
|
|
||||||
build: piper package
|
|
||||||
@@ -1,54 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# Script to copy the appropriate libraries based on architecture
|
|
||||||
# This script is used in the final stage of the Dockerfile
|
|
||||||
|
|
||||||
set -e
|
|
||||||
|
|
||||||
CURDIR=$(dirname "$(realpath $0)")
|
|
||||||
|
|
||||||
# Create lib directory
|
|
||||||
mkdir -p $CURDIR/package/lib
|
|
||||||
|
|
||||||
cp -avrf $CURDIR/piper $CURDIR/package/
|
|
||||||
cp -avrf $CURDIR/espeak-ng-data $CURDIR/package/
|
|
||||||
cp -rfv $CURDIR/run.sh $CURDIR/package/
|
|
||||||
cp -rfLv $CURDIR/sources/go-piper/piper-phonemize/pi/lib/* $CURDIR/package/lib/
|
|
||||||
|
|
||||||
# Detect architecture and copy appropriate libraries
|
|
||||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
|
||||||
# x86_64 architecture
|
|
||||||
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
|
||||||
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
|
||||||
elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
|
||||||
# ARM64 architecture
|
|
||||||
echo "Detected ARM64 architecture, copying ARM64 libraries..."
|
|
||||||
cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
|
||||||
else
|
|
||||||
echo "Error: Could not detect architecture"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Packaging completed successfully"
|
|
||||||
ls -liah $CURDIR/package/
|
|
||||||
ls -liah $CURDIR/package/lib/
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -ex
|
|
||||||
|
|
||||||
CURDIR=$(dirname "$(realpath $0)")
|
|
||||||
|
|
||||||
export ESPEAK_NG_DATA=$CURDIR/espeak-ng-data
|
|
||||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
|
||||||
|
|
||||||
# If there is a lib/ld.so, use it
|
|
||||||
if [ -f $CURDIR/lib/ld.so ]; then
|
|
||||||
echo "Using lib/ld.so"
|
|
||||||
exec $CURDIR/lib/ld.so $CURDIR/piper "$@"
|
|
||||||
fi
|
|
||||||
|
|
||||||
exec $CURDIR/piper "$@"
|
|
||||||
@@ -1,47 +0,0 @@
|
|||||||
|
|
||||||
CURRENT_DIR=$(abspath ./)
|
|
||||||
GOCMD=go
|
|
||||||
|
|
||||||
ONNX_VERSION?=1.20.0
|
|
||||||
ONNX_ARCH?=x64
|
|
||||||
ONNX_OS?=linux
|
|
||||||
|
|
||||||
# Detect if we are running on arm64
|
|
||||||
ifneq (,$(findstring aarch64,$(shell uname -m)))
|
|
||||||
ONNX_ARCH=aarch64
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(OS),Darwin)
|
|
||||||
ONNX_OS=osx
|
|
||||||
ifneq (,$(findstring aarch64,$(shell uname -m)))
|
|
||||||
ONNX_ARCH=arm64
|
|
||||||
else ifneq (,$(findstring arm64,$(shell uname -m)))
|
|
||||||
ONNX_ARCH=arm64
|
|
||||||
else
|
|
||||||
ONNX_ARCH=x86_64
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
sources/onnxruntime:
|
|
||||||
mkdir -p sources/onnxruntime
|
|
||||||
curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
|
|
||||||
cd sources/onnxruntime && tar -xvf onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz && rm onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
|
|
||||||
cd sources/onnxruntime && mv onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION)/* ./
|
|
||||||
|
|
||||||
backend-assets/lib/libonnxruntime.so.1: sources/onnxruntime
|
|
||||||
mkdir -p backend-assets/lib
|
|
||||||
cp -rfLv sources/onnxruntime/lib/* backend-assets/lib/
|
|
||||||
ifeq ($(OS),Darwin)
|
|
||||||
mv backend-assets/lib/libonnxruntime.$(ONNX_VERSION).dylib backend-assets/lib/libonnxruntime.dylib
|
|
||||||
else
|
|
||||||
mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
|
|
||||||
endif
|
|
||||||
|
|
||||||
silero-vad: backend-assets/lib/libonnxruntime.so.1
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURRENT_DIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURRENT_DIR)/backend-assets/lib \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o silero-vad ./
|
|
||||||
|
|
||||||
package:
|
|
||||||
bash package.sh
|
|
||||||
|
|
||||||
build: silero-vad package
|
|
||||||
@@ -1,53 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# Script to copy the appropriate libraries based on architecture
|
|
||||||
# This script is used in the final stage of the Dockerfile
|
|
||||||
|
|
||||||
set -e
|
|
||||||
|
|
||||||
CURDIR=$(dirname "$(realpath $0)")
|
|
||||||
|
|
||||||
# Create lib directory
|
|
||||||
mkdir -p $CURDIR/package/lib
|
|
||||||
|
|
||||||
cp -avrf $CURDIR/silero-vad $CURDIR/package/
|
|
||||||
cp -avrf $CURDIR/run.sh $CURDIR/package/
|
|
||||||
cp -rfLv $CURDIR/backend-assets/lib/* $CURDIR/package/lib/
|
|
||||||
|
|
||||||
# Detect architecture and copy appropriate libraries
|
|
||||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
|
||||||
# x86_64 architecture
|
|
||||||
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
|
||||||
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
|
||||||
elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
|
||||||
# ARM64 architecture
|
|
||||||
echo "Detected ARM64 architecture, copying ARM64 libraries..."
|
|
||||||
cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
|
||||||
else
|
|
||||||
echo "Error: Could not detect architecture"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Packaging completed successfully"
|
|
||||||
ls -liah $CURDIR/package/
|
|
||||||
ls -liah $CURDIR/package/lib/
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -ex
|
|
||||||
|
|
||||||
CURDIR=$(dirname "$(realpath $0)")
|
|
||||||
|
|
||||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
|
||||||
|
|
||||||
# If there is a lib/ld.so, use it
|
|
||||||
if [ -f $CURDIR/lib/ld.so ]; then
|
|
||||||
echo "Using lib/ld.so"
|
|
||||||
exec $CURDIR/lib/ld.so $CURDIR/silero-vad "$@"
|
|
||||||
fi
|
|
||||||
|
|
||||||
exec $CURDIR/silero-vad "$@"
|
|
||||||
@@ -1,52 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# Script to copy the appropriate libraries based on architecture
|
|
||||||
# This script is used in the final stage of the Dockerfile
|
|
||||||
|
|
||||||
set -e
|
|
||||||
|
|
||||||
CURDIR=$(dirname "$(realpath $0)")
|
|
||||||
|
|
||||||
# Create lib directory
|
|
||||||
mkdir -p $CURDIR/package/lib
|
|
||||||
|
|
||||||
cp -avrf $CURDIR/stablediffusion-ggml $CURDIR/package/
|
|
||||||
cp -rfv $CURDIR/run.sh $CURDIR/package/
|
|
||||||
|
|
||||||
# Detect architecture and copy appropriate libraries
|
|
||||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
|
||||||
# x86_64 architecture
|
|
||||||
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
|
||||||
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
|
||||||
cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
|
||||||
elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
|
||||||
# ARM64 architecture
|
|
||||||
echo "Detected ARM64 architecture, copying ARM64 libraries..."
|
|
||||||
cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
|
||||||
else
|
|
||||||
echo "Error: Could not detect architecture"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Packaging completed successfully"
|
|
||||||
ls -liah $CURDIR/package/
|
|
||||||
ls -liah $CURDIR/package/lib/
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -ex
|
|
||||||
|
|
||||||
CURDIR=$(dirname "$(realpath $0)")
|
|
||||||
|
|
||||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
|
||||||
|
|
||||||
# If there is a lib/ld.so, use it
|
|
||||||
if [ -f $CURDIR/lib/ld.so ]; then
|
|
||||||
echo "Using lib/ld.so"
|
|
||||||
exec $CURDIR/lib/ld.so $CURDIR/stablediffusion-ggml "$@"
|
|
||||||
fi
|
|
||||||
|
|
||||||
exec $CURDIR/stablediffusion-ggml "$@"
|
|
||||||
@@ -4,7 +4,6 @@ package main
|
|||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
import (
|
import (
|
||||||
"container/heap"
|
"container/heap"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
"slices"
|
"slices"
|
||||||
@@ -100,9 +99,6 @@ func sortIntoKeySlicese(keys []*pb.StoresKey) [][]float32 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *Store) Load(opts *pb.ModelOptions) error {
|
func (s *Store) Load(opts *pb.ModelOptions) error {
|
||||||
if opts.Model != "" {
|
|
||||||
return errors.New("not implemented")
|
|
||||||
}
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -319,7 +315,7 @@ func isNormalized(k []float32) bool {
|
|||||||
|
|
||||||
for _, v := range k {
|
for _, v := range k {
|
||||||
v64 := float64(v)
|
v64 := float64(v)
|
||||||
sum += v64 * v64
|
sum += v64*v64
|
||||||
}
|
}
|
||||||
|
|
||||||
s := math.Sqrt(sum)
|
s := math.Sqrt(sum)
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user