mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 11:13:31 -05:00
Compare commits
2 Commits
propagate_
...
ci/static-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c28e8ca697 | ||
|
|
ecaaff8f03 |
@@ -1,23 +0,0 @@
|
||||
meta {
|
||||
name: musicgen
|
||||
type: http
|
||||
seq: 1
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/sound-generation
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model_id": "facebook/musicgen-small",
|
||||
"text": "Exciting 80s Newscast Interstitial",
|
||||
"duration_seconds": 8
|
||||
}
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
meta {
|
||||
name: model delete
|
||||
type: http
|
||||
seq: 7
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||
body: none
|
||||
auth: none
|
||||
}
|
||||
Binary file not shown.
@@ -1,16 +0,0 @@
|
||||
meta {
|
||||
name: transcribe
|
||||
type: http
|
||||
seq: 1
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/audio/transcriptions
|
||||
body: multipartForm
|
||||
auth: none
|
||||
}
|
||||
|
||||
body:multipart-form {
|
||||
file: @file(transcription/gb1.ogg)
|
||||
model: whisper-1
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
cd /workspace
|
||||
|
||||
# Get the files into the volume without a bind mount
|
||||
if [ ! -d ".git" ]; then
|
||||
git clone https://github.com/mudler/LocalAI.git .
|
||||
else
|
||||
git fetch
|
||||
fi
|
||||
|
||||
echo "Standard Post-Create script completed."
|
||||
|
||||
if [ -f "/devcontainer-customization/postcreate.sh" ]; then
|
||||
echo "Launching customization postcreate.sh"
|
||||
bash "/devcontainer-customization/postcreate.sh"
|
||||
fi
|
||||
@@ -1,16 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
cd /workspace
|
||||
|
||||
# Grab the pre-stashed backend assets to avoid build issues
|
||||
cp -r /build/backend-assets /workspace/backend-assets
|
||||
|
||||
# Ensures generated source files are present upon load
|
||||
make prepare
|
||||
|
||||
echo "Standard Post-Start script completed."
|
||||
|
||||
if [ -f "/devcontainer-customization/poststart.sh" ]; then
|
||||
echo "Launching customization poststart.sh"
|
||||
bash "/devcontainer-customization/poststart.sh"
|
||||
fi
|
||||
@@ -1,55 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# This file contains some really simple functions that are useful when building up customization scripts.
|
||||
|
||||
|
||||
# Checks if the git config has a user registered - and sets it up if not.
|
||||
#
|
||||
# Param 1: name
|
||||
# Param 2: email
|
||||
#
|
||||
config_user() {
|
||||
echo "Configuring git for $1 <$2>"
|
||||
local gcn=$(git config --global user.name)
|
||||
if [ -z "${gcn}" ]; then
|
||||
echo "Setting up git user / remote"
|
||||
git config --global user.name "$1"
|
||||
git config --global user.email "$2"
|
||||
|
||||
fi
|
||||
}
|
||||
|
||||
# Checks if the git remote is configured - and sets it up if not. Fetches either way.
|
||||
#
|
||||
# Param 1: remote name
|
||||
# Param 2: remote url
|
||||
#
|
||||
config_remote() {
|
||||
echo "Adding git remote and fetching $2 as $1"
|
||||
local gr=$(git remote -v | grep $1)
|
||||
if [ -z "${gr}" ]; then
|
||||
git remote add $1 $2
|
||||
fi
|
||||
git fetch $1
|
||||
}
|
||||
|
||||
# Setup special .ssh files
|
||||
# Prints out lines of text to make things pretty
|
||||
# Param 1: bash array, filenames relative to the customization directory that should be copied to ~/.ssh
|
||||
setup_ssh() {
|
||||
echo "starting ~/.ssh directory setup..."
|
||||
mkdir -p "${HOME}.ssh"
|
||||
chmod 0700 "${HOME}/.ssh"
|
||||
echo "-----"
|
||||
local files=("$@")
|
||||
for file in "${files[@]}" ; do
|
||||
local cfile="/devcontainer-customization/${file}"
|
||||
local hfile="${HOME}/.ssh/${file}"
|
||||
if [ ! -f "${hfile}" ]; then
|
||||
echo "copying \"${file}\""
|
||||
cp "${cfile}" "${hfile}"
|
||||
chmod 600 "${hfile}"
|
||||
fi
|
||||
done
|
||||
echo "~/.ssh directory setup complete!"
|
||||
}
|
||||
@@ -1,25 +0,0 @@
|
||||
Place any additional resources your environment requires in this directory
|
||||
|
||||
Script hooks are currently called for:
|
||||
`postcreate.sh` and `poststart.sh`
|
||||
|
||||
If files with those names exist here, they will be called at the end of the normal script.
|
||||
|
||||
This is a good place to set things like `git config --global user.name` are set - and to handle any other files that are mounted via this directory.
|
||||
|
||||
To assist in doing so, `source /.devcontainer-scripts/utils.sh` will provide utility functions that may be useful - for example:
|
||||
|
||||
```
|
||||
#!/bin/bash
|
||||
|
||||
source "/.devcontainer-scripts/utils.sh"
|
||||
|
||||
sshfiles=("config", "key.pub")
|
||||
|
||||
setup_ssh "${sshfiles[@]}"
|
||||
|
||||
config_user "YOUR NAME" "YOUR EMAIL"
|
||||
|
||||
config_remote "REMOTE NAME" "REMOTE URL"
|
||||
|
||||
```
|
||||
@@ -1,24 +0,0 @@
|
||||
{
|
||||
"$schema": "https://raw.githubusercontent.com/devcontainers/spec/main/schemas/devContainer.schema.json",
|
||||
"name": "LocalAI",
|
||||
"workspaceFolder": "/workspace",
|
||||
"dockerComposeFile": [ "./docker-compose-devcontainer.yml" ],
|
||||
"service": "api",
|
||||
"shutdownAction": "stopCompose",
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"golang.go",
|
||||
"ms-vscode.makefile-tools",
|
||||
"ms-azuretools.vscode-docker",
|
||||
"ms-python.python",
|
||||
"ms-python.debugpy",
|
||||
"wayou.vscode-todo-highlight",
|
||||
"waderyan.gitblame"
|
||||
]
|
||||
}
|
||||
},
|
||||
"forwardPorts": [8080, 3000],
|
||||
"postCreateCommand": "bash /.devcontainer-scripts/postcreate.sh",
|
||||
"postStartCommand": "bash /.devcontainer-scripts/poststart.sh"
|
||||
}
|
||||
@@ -1,48 +0,0 @@
|
||||
services:
|
||||
api:
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: Dockerfile
|
||||
target: devcontainer
|
||||
args:
|
||||
- FFMPEG=true
|
||||
- IMAGE_TYPE=extras
|
||||
- GO_TAGS=stablediffusion p2p tts
|
||||
env_file:
|
||||
- ../.env
|
||||
ports:
|
||||
- 8080:8080
|
||||
volumes:
|
||||
- localai_workspace:/workspace
|
||||
- ../models:/host-models
|
||||
- ./customization:/devcontainer-customization
|
||||
command: /bin/sh -c "while sleep 1000; do :; done"
|
||||
cap_add:
|
||||
- SYS_PTRACE
|
||||
security_opt:
|
||||
- seccomp:unconfined
|
||||
prometheus:
|
||||
image: prom/prometheus
|
||||
container_name: prometheus
|
||||
command:
|
||||
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||
ports:
|
||||
- 9090:9090
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- ./prometheus:/etc/prometheus
|
||||
- prom_data:/prometheus
|
||||
grafana:
|
||||
image: grafana/grafana
|
||||
container_name: grafana
|
||||
ports:
|
||||
- 3000:3000
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
- GF_SECURITY_ADMIN_USER=admin
|
||||
- GF_SECURITY_ADMIN_PASSWORD=grafana
|
||||
volumes:
|
||||
- ./grafana:/etc/grafana/provisioning/datasources
|
||||
volumes:
|
||||
prom_data:
|
||||
localai_workspace:
|
||||
@@ -1,10 +0,0 @@
|
||||
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
url: http://prometheus:9090
|
||||
isDefault: true
|
||||
access: proxy
|
||||
editable: true
|
||||
@@ -1,21 +0,0 @@
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
scrape_timeout: 10s
|
||||
evaluation_interval: 15s
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets: []
|
||||
scheme: http
|
||||
timeout: 10s
|
||||
api_version: v1
|
||||
scrape_configs:
|
||||
- job_name: prometheus
|
||||
honor_timestamps: true
|
||||
scrape_interval: 15s
|
||||
scrape_timeout: 10s
|
||||
metrics_path: /metrics
|
||||
scheme: http
|
||||
static_configs:
|
||||
- targets:
|
||||
- localhost:9090
|
||||
@@ -1,7 +1,6 @@
|
||||
.idea
|
||||
.github
|
||||
.vscode
|
||||
.devcontainer
|
||||
models
|
||||
examples/chatbot-ui/models
|
||||
examples/rwkv/models
|
||||
|
||||
3
.env
3
.env
@@ -79,9 +79,6 @@
|
||||
### Enable to run parallel requests
|
||||
# LOCALAI_PARALLEL_REQUESTS=true
|
||||
|
||||
# Enable to allow p2p mode
|
||||
# LOCALAI_P2P=true
|
||||
|
||||
### Watchdog settings
|
||||
###
|
||||
# Enables watchdog to kill backends that are inactive for too much time
|
||||
|
||||
1
.gitattributes
vendored
1
.gitattributes
vendored
@@ -1,2 +1 @@
|
||||
*.sh text eol=lf
|
||||
backend/cpp/llama/*.hpp linguist-vendored
|
||||
13
.github/bump_deps.sh
vendored
13
.github/bump_deps.sh
vendored
@@ -6,17 +6,4 @@ VAR=$3
|
||||
|
||||
LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
|
||||
|
||||
# Read $VAR from Makefile (only first match)
|
||||
set +e
|
||||
CURRENT_COMMIT="$(grep -m1 "^$VAR?=" Makefile | cut -d'=' -f2)"
|
||||
set -e
|
||||
|
||||
sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
|
||||
|
||||
if [ -z "$CURRENT_COMMIT" ]; then
|
||||
echo "Could not find $VAR in Makefile."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Changes: https://github.com/$REPO/compare/${CURRENT_COMMIT}..${LAST_COMMIT}" >> "${VAR}_message.txt"
|
||||
echo "${LAST_COMMIT}" >> "${VAR}_commit.txt"
|
||||
11
.github/check_and_update.py
vendored
11
.github/check_and_update.py
vendored
@@ -29,14 +29,9 @@ def calculate_sha256(file_path):
|
||||
def manual_safety_check_hf(repo_id):
|
||||
scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan")
|
||||
scan = scanResponse.json()
|
||||
# Check if 'hasUnsafeFile' exists in the response
|
||||
if 'hasUnsafeFile' in scan:
|
||||
if scan['hasUnsafeFile']:
|
||||
return scan
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
return None
|
||||
if scan['hasUnsafeFile']:
|
||||
return scan
|
||||
return None
|
||||
|
||||
download_type, repo_id_or_url = parse_uri(uri)
|
||||
|
||||
|
||||
7
.github/ci/modelslist.go
vendored
7
.github/ci/modelslist.go
vendored
@@ -6,7 +6,6 @@ import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
|
||||
"github.com/microcosm-cc/bluemonday"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
@@ -280,12 +279,6 @@ func main() {
|
||||
return
|
||||
}
|
||||
|
||||
// Ensure that all arbitrary text content is sanitized before display
|
||||
for i, m := range models {
|
||||
models[i].Name = bluemonday.StrictPolicy().Sanitize(m.Name)
|
||||
models[i].Description = bluemonday.StrictPolicy().Sanitize(m.Description)
|
||||
}
|
||||
|
||||
// render the template
|
||||
data := struct {
|
||||
Models []*GalleryModel
|
||||
|
||||
6
.github/dependabot.yml
vendored
6
.github/dependabot.yml
vendored
@@ -9,8 +9,6 @@ updates:
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
ignore:
|
||||
- dependency-name: "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
- package-ecosystem: "github-actions"
|
||||
# Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
|
||||
directory: "/"
|
||||
@@ -69,6 +67,10 @@ updates:
|
||||
directory: "/backend/python/parler-tts"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/petals"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/rerankers"
|
||||
schedule:
|
||||
|
||||
5
.github/labeler.yml
vendored
5
.github/labeler.yml
vendored
@@ -1,11 +1,6 @@
|
||||
enhancements:
|
||||
- head-branch: ['^feature', 'feature']
|
||||
|
||||
dependencies:
|
||||
- any:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: 'Makefile'
|
||||
|
||||
kind/documentation:
|
||||
- any:
|
||||
- changed-files:
|
||||
|
||||
36
.github/workflows/bump_deps.yaml
vendored
36
.github/workflows/bump_deps.yaml
vendored
@@ -12,14 +12,23 @@ jobs:
|
||||
- repository: "ggerganov/llama.cpp"
|
||||
variable: "CPPLLAMA_VERSION"
|
||||
branch: "master"
|
||||
- repository: "go-skynet/go-ggml-transformers.cpp"
|
||||
variable: "GOGGMLTRANSFORMERS_VERSION"
|
||||
branch: "master"
|
||||
- repository: "donomii/go-rwkv.cpp"
|
||||
variable: "RWKV_VERSION"
|
||||
branch: "main"
|
||||
- repository: "ggerganov/whisper.cpp"
|
||||
variable: "WHISPER_CPP_VERSION"
|
||||
branch: "master"
|
||||
- repository: "PABannier/bark.cpp"
|
||||
variable: "BARKCPP_VERSION"
|
||||
- repository: "go-skynet/go-bert.cpp"
|
||||
variable: "BERT_VERSION"
|
||||
branch: "master"
|
||||
- repository: "go-skynet/bloomz.cpp"
|
||||
variable: "BLOOMZ_VERSION"
|
||||
branch: "main"
|
||||
- repository: "leejet/stable-diffusion.cpp"
|
||||
variable: "STABLEDIFFUSION_GGML_VERSION"
|
||||
- repository: "mudler/go-ggllm.cpp"
|
||||
variable: "GOGGLLM_VERSION"
|
||||
branch: "master"
|
||||
- repository: "mudler/go-stable-diffusion"
|
||||
variable: "STABLEDIFFUSION_VERSION"
|
||||
@@ -31,30 +40,17 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Bump dependencies 🔧
|
||||
id: bump
|
||||
run: |
|
||||
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
|
||||
{
|
||||
echo 'message<<EOF'
|
||||
cat "${{ matrix.variable }}_message.txt"
|
||||
echo EOF
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
{
|
||||
echo 'commit<<EOF'
|
||||
cat "${{ matrix.variable }}_commit.txt"
|
||||
echo EOF
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
rm -rfv ${{ matrix.variable }}_message.txt
|
||||
rm -rfv ${{ matrix.variable }}_commit.txt
|
||||
- name: Create Pull Request
|
||||
uses: peter-evans/create-pull-request@v7
|
||||
uses: peter-evans/create-pull-request@v6
|
||||
with:
|
||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||
push-to-fork: ci-forks/LocalAI
|
||||
commit-message: ':arrow_up: Update ${{ matrix.repository }}'
|
||||
title: 'chore: :arrow_up: Update ${{ matrix.repository }} to `${{ steps.bump.outputs.commit }}`'
|
||||
title: 'chore: :arrow_up: Update ${{ matrix.repository }}'
|
||||
branch: "update/${{ matrix.variable }}"
|
||||
body: ${{ steps.bump.outputs.message }}
|
||||
body: Bump of ${{ matrix.repository }} version
|
||||
signoff: true
|
||||
|
||||
|
||||
|
||||
2
.github/workflows/bump_docs.yaml
vendored
2
.github/workflows/bump_docs.yaml
vendored
@@ -17,7 +17,7 @@ jobs:
|
||||
run: |
|
||||
bash .github/bump_docs.sh ${{ matrix.repository }}
|
||||
- name: Create Pull Request
|
||||
uses: peter-evans/create-pull-request@v7
|
||||
uses: peter-evans/create-pull-request@v6
|
||||
with:
|
||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||
push-to-fork: ci-forks/LocalAI
|
||||
|
||||
6
.github/workflows/checksum_checker.yaml
vendored
6
.github/workflows/checksum_checker.yaml
vendored
@@ -23,7 +23,7 @@ jobs:
|
||||
sudo pip install --upgrade pip
|
||||
pip install huggingface_hub
|
||||
- name: 'Setup yq'
|
||||
uses: dcarbone/install-yq-action@v1.3.1
|
||||
uses: dcarbone/install-yq-action@v1.1.1
|
||||
with:
|
||||
version: 'v4.44.2'
|
||||
download-compressed: true
|
||||
@@ -36,12 +36,12 @@ jobs:
|
||||
sudo chmod 777 /hf_cache
|
||||
bash .github/checksum_checker.sh gallery/index.yaml
|
||||
- name: Create Pull Request
|
||||
uses: peter-evans/create-pull-request@v7
|
||||
uses: peter-evans/create-pull-request@v6
|
||||
with:
|
||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||
push-to-fork: ci-forks/LocalAI
|
||||
commit-message: ':arrow_up: Checksum updates in gallery/index.yaml'
|
||||
title: 'chore(model-gallery): :arrow_up: update checksum'
|
||||
title: 'models(gallery): :arrow_up: update checksum'
|
||||
branch: "update/checksum"
|
||||
body: Updating checksums in gallery/index.yaml
|
||||
signoff: true
|
||||
|
||||
@@ -12,7 +12,6 @@ jobs:
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
ref: "${{ github.event.pull_request.merge_commit_sha }}"
|
||||
fetch-depth: 0 # needed to checkout all branches for this Action to work
|
||||
- uses: mudler/localai-github-action@v1
|
||||
with:
|
||||
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||
@@ -23,7 +22,6 @@ jobs:
|
||||
json_diff_file_output: diff.json
|
||||
raw_diff_file_output: diff.txt
|
||||
file_output_only: "true"
|
||||
base_branch: ${{ github.event.pull_request.base.sha }}
|
||||
- name: Show diff
|
||||
env:
|
||||
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
|
||||
64
.github/workflows/deploy-explorer.yaml
vendored
64
.github/workflows/deploy-explorer.yaml
vendored
@@ -1,64 +0,0 @@
|
||||
name: Explorer deployment
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
tags:
|
||||
- 'v*'
|
||||
|
||||
concurrency:
|
||||
group: ci-deploy-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||
|
||||
jobs:
|
||||
build-linux:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.21.x'
|
||||
cache: false
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||
make protogen-go
|
||||
- name: Build api
|
||||
run: |
|
||||
CGO_ENABLED=0 make build-api
|
||||
- name: rm
|
||||
uses: appleboy/ssh-action@v1.2.0
|
||||
with:
|
||||
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
||||
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
||||
key: ${{ secrets.EXPLORER_SSH_KEY }}
|
||||
port: ${{ secrets.EXPLORER_SSH_PORT }}
|
||||
script: |
|
||||
sudo rm -rf local-ai/ || true
|
||||
- name: copy file via ssh
|
||||
uses: appleboy/scp-action@v0.1.7
|
||||
with:
|
||||
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
||||
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
||||
key: ${{ secrets.EXPLORER_SSH_KEY }}
|
||||
port: ${{ secrets.EXPLORER_SSH_PORT }}
|
||||
source: "local-ai"
|
||||
overwrite: true
|
||||
rm: true
|
||||
target: ./local-ai
|
||||
- name: restarting
|
||||
uses: appleboy/ssh-action@v1.2.0
|
||||
with:
|
||||
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
||||
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
||||
key: ${{ secrets.EXPLORER_SSH_KEY }}
|
||||
port: ${{ secrets.EXPLORER_SSH_PORT }}
|
||||
script: |
|
||||
sudo cp -rfv local-ai/local-ai /usr/bin/local-ai
|
||||
sudo systemctl restart local-ai
|
||||
2
.github/workflows/generate_intel_image.yaml
vendored
2
.github/workflows/generate_intel_image.yaml
vendored
@@ -15,7 +15,7 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- base-image: intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04
|
||||
- base-image: intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04
|
||||
runs-on: 'ubuntu-latest'
|
||||
platforms: 'linux/amd64'
|
||||
runs-on: ${{matrix.runs-on}}
|
||||
|
||||
4
.github/workflows/image-pr.yml
vendored
4
.github/workflows/image-pr.yml
vendored
@@ -47,7 +47,7 @@ jobs:
|
||||
# makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "4"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||
@@ -120,7 +120,7 @@ jobs:
|
||||
# makeflags: "--jobs=3 --output-sync=target"
|
||||
# - build-type: 'cublas'
|
||||
# cuda-major-version: "12"
|
||||
# cuda-minor-version: "0"
|
||||
# cuda-minor-version: "4"
|
||||
# platforms: 'linux/amd64'
|
||||
# tag-latest: 'false'
|
||||
# tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||
|
||||
125
.github/workflows/image.yml
vendored
125
.github/workflows/image.yml
vendored
@@ -13,78 +13,6 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
hipblas-jobs:
|
||||
uses: ./.github/workflows/image_build.yml
|
||||
with:
|
||||
tag-latest: ${{ matrix.tag-latest }}
|
||||
tag-suffix: ${{ matrix.tag-suffix }}
|
||||
ffmpeg: ${{ matrix.ffmpeg }}
|
||||
image-type: ${{ matrix.image-type }}
|
||||
build-type: ${{ matrix.build-type }}
|
||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||
platforms: ${{ matrix.platforms }}
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
base-image: ${{ matrix.base-image }}
|
||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||
aio: ${{ matrix.aio }}
|
||||
makeflags: ${{ matrix.makeflags }}
|
||||
latest-image: ${{ matrix.latest-image }}
|
||||
latest-image-aio: ${{ matrix.latest-image-aio }}
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
strategy:
|
||||
# Pushing with all jobs in parallel
|
||||
# eats the bandwidth of all the nodes
|
||||
max-parallel: 2
|
||||
matrix:
|
||||
include:
|
||||
- build-type: 'hipblas'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-hipblas-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'extras'
|
||||
aio: "-aio-gpu-hipblas"
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
latest-image: 'latest-gpu-hipblas'
|
||||
latest-image-aio: 'latest-aio-gpu-hipblas'
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'hipblas'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-hipblas'
|
||||
ffmpeg: 'false'
|
||||
image-type: 'extras'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'hipblas'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-hipblas-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'hipblas'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-hipblas-core'
|
||||
ffmpeg: 'false'
|
||||
image-type: 'core'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
self-hosted-jobs:
|
||||
uses: ./.github/workflows/image_build.yml
|
||||
with:
|
||||
@@ -111,7 +39,7 @@ jobs:
|
||||
strategy:
|
||||
# Pushing with all jobs in parallel
|
||||
# eats the bandwidth of all the nodes
|
||||
max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }}
|
||||
max-parallel: ${{ github.event_name != 'pull_request' && 6 || 10 }}
|
||||
matrix:
|
||||
include:
|
||||
# Extra images
|
||||
@@ -147,7 +75,7 @@ jobs:
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "4"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12'
|
||||
@@ -172,7 +100,7 @@ jobs:
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "4"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||
@@ -194,6 +122,29 @@ jobs:
|
||||
base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'hipblas'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-hipblas-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'extras'
|
||||
aio: "-aio-gpu-hipblas"
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
latest-image: 'latest-gpu-hipblas'
|
||||
latest-image-aio: 'latest-aio-gpu-hipblas'
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'hipblas'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-hipblas'
|
||||
ffmpeg: 'false'
|
||||
image-type: 'extras'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
@@ -261,6 +212,26 @@ jobs:
|
||||
image-type: 'core'
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'hipblas'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-hipblas-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'hipblas'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-hipblas-core'
|
||||
ffmpeg: 'false'
|
||||
image-type: 'core'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
|
||||
core-image-build:
|
||||
uses: ./.github/workflows/image_build.yml
|
||||
@@ -314,7 +285,7 @@ jobs:
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "4"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-core'
|
||||
@@ -336,7 +307,7 @@ jobs:
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "4"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||
|
||||
4
.github/workflows/notify-models.yaml
vendored
4
.github/workflows/notify-models.yaml
vendored
@@ -79,7 +79,7 @@ jobs:
|
||||
args: ${{ steps.summarize.outputs.message }}
|
||||
- name: Setup tmate session if fails
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
@@ -161,7 +161,7 @@ jobs:
|
||||
TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
|
||||
- name: Setup tmate session if fails
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
|
||||
23
.github/workflows/release.yaml
vendored
23
.github/workflows/release.yaml
vendored
@@ -4,8 +4,6 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
tags:
|
||||
- 'v*'
|
||||
pull_request:
|
||||
|
||||
env:
|
||||
@@ -31,10 +29,11 @@ jobs:
|
||||
with:
|
||||
go-version: '1.21.x'
|
||||
cache: false
|
||||
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
|
||||
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache gawk
|
||||
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
|
||||
- name: Install CUDA Dependencies
|
||||
run: |
|
||||
@@ -123,7 +122,7 @@ jobs:
|
||||
release/*
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
@@ -150,7 +149,7 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
|
||||
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache gawk cmake libgmock-dev
|
||||
- name: Intel Dependencies
|
||||
run: |
|
||||
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
|
||||
@@ -232,7 +231,7 @@ jobs:
|
||||
release/*
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
@@ -251,7 +250,7 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
|
||||
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||
- name: Build stablediffusion
|
||||
@@ -294,7 +293,7 @@ jobs:
|
||||
export C_INCLUDE_PATH=/usr/local/include
|
||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||
export PATH=$PATH:$GOPATH/bin
|
||||
export SKIP_GRPC_BACKEND=backend-assets/grpc/whisper
|
||||
|
||||
make dist
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
@@ -308,7 +307,7 @@ jobs:
|
||||
release/*
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
@@ -327,7 +326,7 @@ jobs:
|
||||
cache: false
|
||||
- name: Dependencies
|
||||
run: |
|
||||
brew install protobuf grpc libomp llvm
|
||||
brew install protobuf grpc
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||
- name: Build
|
||||
@@ -336,7 +335,7 @@ jobs:
|
||||
export C_INCLUDE_PATH=/usr/local/include
|
||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||
export PATH=$PATH:$GOPATH/bin
|
||||
export CC=/opt/homebrew/opt/llvm/bin/clang
|
||||
|
||||
make dist
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
@@ -350,7 +349,7 @@ jobs:
|
||||
release/*
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
|
||||
2
.github/workflows/secscan.yaml
vendored
2
.github/workflows/secscan.yaml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||
- name: Run Gosec Security Scanner
|
||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||
uses: securego/gosec@v2.21.4
|
||||
uses: securego/gosec@master
|
||||
with:
|
||||
# we let the report trigger content trigger a failure using the GitHub Security features.
|
||||
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
||||
|
||||
70
.github/workflows/static-check.yml
vendored
Normal file
70
.github/workflows/static-check.yml
vendored
Normal file
@@ -0,0 +1,70 @@
|
||||
name: static check
|
||||
on: pull_request
|
||||
|
||||
jobs:
|
||||
imports:
|
||||
name: Imports
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@master
|
||||
- name: check
|
||||
uses: danhunsaker/golang-github-actions@v1.3.0
|
||||
with:
|
||||
run: imports
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
errcheck:
|
||||
name: Errcheck
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@master
|
||||
- name: check
|
||||
uses: danhunsaker/golang-github-actions@v1.3.0
|
||||
with:
|
||||
run: errcheck
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
lint:
|
||||
name: Lint
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@master
|
||||
- name: check
|
||||
uses: danhunsaker/golang-github-actions@v1.3.0
|
||||
with:
|
||||
run: lint
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
shadow:
|
||||
name: Shadow
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@master
|
||||
- name: check
|
||||
uses: danhunsaker/golang-github-actions@v1.3.0
|
||||
with:
|
||||
run: shadow
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
staticcheck:
|
||||
name: StaticCheck
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@master
|
||||
- name: check
|
||||
uses: danhunsaker/golang-github-actions@v1.3.0
|
||||
with:
|
||||
run: staticcheck
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
sec:
|
||||
name: Sec
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@master
|
||||
- name: check
|
||||
uses: danhunsaker/golang-github-actions@v1.3.0
|
||||
with:
|
||||
run: sec
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
flags: "-exclude=G104"
|
||||
33
.github/workflows/test-extra.yml
vendored
33
.github/workflows/test-extra.yml
vendored
@@ -123,13 +123,6 @@ jobs:
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts
|
||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
|
||||
tests-openvoice:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -175,6 +168,32 @@ jobs:
|
||||
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
|
||||
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
|
||||
|
||||
|
||||
|
||||
# tests-petals:
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v4
|
||||
# with:
|
||||
# submodules: true
|
||||
# - name: Dependencies
|
||||
# run: |
|
||||
# sudo apt-get update
|
||||
# sudo apt-get install build-essential ffmpeg
|
||||
# # Install UV
|
||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
# sudo apt-get install -y libopencv-dev
|
||||
# pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
|
||||
# - name: Test petals
|
||||
# run: |
|
||||
# make --jobs=5 --output-sync=target -C backend/python/petals
|
||||
# make --jobs=5 --output-sync=target -C backend/python/petals test
|
||||
|
||||
|
||||
|
||||
# tests-bark:
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
|
||||
24
.github/workflows/test.yml
vendored
24
.github/workflows/test.yml
vendored
@@ -70,7 +70,7 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
|
||||
sudo apt-get install build-essential curl ffmpeg
|
||||
sudo apt-get install -y libgmock-dev
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
@@ -133,7 +133,7 @@ jobs:
|
||||
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
@@ -178,26 +178,17 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
# Install protoc
|
||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||
rm protoc.zip
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
PATH="$PATH:$HOME/go/bin" make protogen-go
|
||||
- name: Build images
|
||||
run: |
|
||||
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
|
||||
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
|
||||
- name: Test
|
||||
run: |
|
||||
PATH="$PATH:$HOME/go/bin" LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
|
||||
LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
|
||||
make run-e2e-aio
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
@@ -223,19 +214,18 @@ jobs:
|
||||
run: go version
|
||||
- name: Dependencies
|
||||
run: |
|
||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
|
||||
pip install --user --no-cache-dir grpcio-tools
|
||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc
|
||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
- name: Test
|
||||
run: |
|
||||
export C_INCLUDE_PATH=/usr/local/include
|
||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||
export CC=/opt/homebrew/opt/llvm/bin/clang
|
||||
# Used to run the newer GNUMake version from brew that supports --output-sync
|
||||
export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
|
||||
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
|
||||
2
.github/workflows/update_swagger.yaml
vendored
2
.github/workflows/update_swagger.yaml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
run: |
|
||||
make protogen-go swagger
|
||||
- name: Create Pull Request
|
||||
uses: peter-evans/create-pull-request@v7
|
||||
uses: peter-evans/create-pull-request@v6
|
||||
with:
|
||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||
push-to-fork: ci-forks/LocalAI
|
||||
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -2,7 +2,6 @@
|
||||
/sources/
|
||||
__pycache__/
|
||||
*.a
|
||||
*.o
|
||||
get-sources
|
||||
prepare-sources
|
||||
/backend/cpp/llama/grpc-server
|
||||
@@ -13,6 +12,7 @@ prepare-sources
|
||||
|
||||
go-ggml-transformers
|
||||
go-gpt2
|
||||
go-rwkv
|
||||
whisper.cpp
|
||||
/bloomz
|
||||
go-bert
|
||||
@@ -54,6 +54,3 @@ docs/static/gallery.html
|
||||
|
||||
# backend virtual environments
|
||||
**/venv
|
||||
|
||||
# per-developer customization files for the development container
|
||||
.devcontainer/customization/*
|
||||
21
.vscode/launch.json
vendored
21
.vscode/launch.json
vendored
@@ -3,12 +3,12 @@
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python: Current File",
|
||||
"type": "debugpy",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "${file}",
|
||||
"console": "integratedTerminal",
|
||||
"justMyCode": false,
|
||||
"cwd": "${fileDirname}",
|
||||
"cwd": "${workspaceFolder}/examples/langchain-chroma",
|
||||
"env": {
|
||||
"OPENAI_API_BASE": "http://localhost:8080/v1",
|
||||
"OPENAI_API_KEY": "abc"
|
||||
@@ -19,16 +19,15 @@
|
||||
"type": "go",
|
||||
"request": "launch",
|
||||
"mode": "debug",
|
||||
"program": "${workspaceRoot}",
|
||||
"args": [],
|
||||
"program": "${workspaceFolder}/main.go",
|
||||
"args": [
|
||||
"api"
|
||||
],
|
||||
"env": {
|
||||
"LOCALAI_LOG_LEVEL": "debug",
|
||||
"LOCALAI_P2P": "true",
|
||||
"LOCALAI_FEDERATED": "true"
|
||||
},
|
||||
"buildFlags": ["-tags", "stablediffusion p2p tts", "-v"],
|
||||
"envFile": "${workspaceFolder}/.env",
|
||||
"cwd": "${workspaceRoot}"
|
||||
"C_INCLUDE_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
|
||||
"LIBRARY_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
|
||||
"DEBUG": "true"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -15,6 +15,8 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time
|
||||
- [Documentation](#documentation)
|
||||
- [Community and Communication](#community-and-communication)
|
||||
|
||||
|
||||
|
||||
## Getting Started
|
||||
|
||||
### Prerequisites
|
||||
@@ -52,7 +54,7 @@ If you find a bug, have a feature request, or encounter any issues, please check
|
||||
|
||||
## Coding Guidelines
|
||||
|
||||
- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like [`golangci-lint`](https://golangci-lint.run) can help you here.
|
||||
- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like []`golangci-lint`](https://golangci-lint.run) can help you here.
|
||||
|
||||
## Testing
|
||||
|
||||
@@ -82,3 +84,5 @@ We are welcome the contribution of the documents, please open new PR or create a
|
||||
- You can reach out via the Github issue tracker.
|
||||
- Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions)
|
||||
- Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy)
|
||||
|
||||
---
|
||||
|
||||
163
Dockerfile
163
Dockerfile
@@ -8,14 +8,12 @@ FROM ${BASE_IMAGE} AS requirements-core
|
||||
|
||||
USER root
|
||||
|
||||
ARG GO_VERSION=1.22.6
|
||||
ARG CMAKE_VERSION=3.26.4
|
||||
ARG CMAKE_FROM_SOURCE=false
|
||||
ARG GO_VERSION=1.22.5
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
||||
|
||||
|
||||
RUN apt-get update && \
|
||||
@@ -23,28 +21,16 @@ RUN apt-get update && \
|
||||
build-essential \
|
||||
ccache \
|
||||
ca-certificates \
|
||||
curl libssl-dev \
|
||||
cmake \
|
||||
curl \
|
||||
git \
|
||||
unzip upx-ucl && \
|
||||
unzip && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install CMake (the version in 22.04 is too old)
|
||||
RUN <<EOT bash
|
||||
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
|
||||
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
|
||||
else
|
||||
apt-get update && \
|
||||
apt-get install -y \
|
||||
cmake && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
fi
|
||||
EOT
|
||||
|
||||
# Install Go
|
||||
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
||||
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
|
||||
ENV PATH $PATH:/root/go/bin:/usr/local/go/bin
|
||||
|
||||
# Install grpc compilers
|
||||
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
|
||||
@@ -53,18 +39,15 @@ RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
|
||||
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
||||
RUN update-ca-certificates
|
||||
|
||||
RUN test -n "$TARGETARCH" \
|
||||
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
|
||||
|
||||
# Use the variables in subsequent instructions
|
||||
RUN echo "Target Architecture: $TARGETARCH"
|
||||
RUN echo "Target Variant: $TARGETVARIANT"
|
||||
|
||||
# Cuda
|
||||
ENV PATH=/usr/local/cuda/bin:${PATH}
|
||||
ENV PATH /usr/local/cuda/bin:${PATH}
|
||||
|
||||
# HipBLAS requirements
|
||||
ENV PATH=/opt/rocm/bin:${PATH}
|
||||
ENV PATH /opt/rocm/bin:${PATH}
|
||||
|
||||
# OpenBLAS requirements and stable diffusion
|
||||
RUN apt-get update && \
|
||||
@@ -79,14 +62,16 @@ RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
RUN test -n "$TARGETARCH" \
|
||||
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
# The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it
|
||||
FROM requirements-core AS requirements-extras
|
||||
|
||||
# Install uv as a system package
|
||||
RUN curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR=/usr/bin sh
|
||||
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
|
||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
@@ -96,7 +81,7 @@ RUN apt-get update && \
|
||||
espeak \
|
||||
python3-pip \
|
||||
python-is-python3 \
|
||||
python3-dev llvm \
|
||||
python3-dev \
|
||||
python3-venv && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
@@ -114,7 +99,7 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers
|
||||
|
||||
ARG BUILD_TYPE
|
||||
ARG CUDA_MAJOR_VERSION=12
|
||||
ARG CUDA_MINOR_VERSION=0
|
||||
ARG CUDA_MINOR_VERSION=4
|
||||
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
|
||||
@@ -203,8 +188,6 @@ FROM ${GRPC_BASE_IMAGE} AS grpc
|
||||
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
|
||||
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
|
||||
ARG GRPC_VERSION=v1.65.0
|
||||
ARG CMAKE_FROM_SOURCE=false
|
||||
ARG CMAKE_VERSION=3.26.4
|
||||
|
||||
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
|
||||
|
||||
@@ -213,24 +196,12 @@ WORKDIR /build
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
build-essential curl libssl-dev \
|
||||
build-essential \
|
||||
cmake \
|
||||
git && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install CMake (the version in 22.04 is too old)
|
||||
RUN <<EOT bash
|
||||
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
|
||||
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
|
||||
else
|
||||
apt-get update && \
|
||||
apt-get install -y \
|
||||
cmake && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
fi
|
||||
EOT
|
||||
|
||||
# We install GRPC to a different prefix here so that we can copy in only the build artifacts later
|
||||
# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
|
||||
# and running make install in the target container
|
||||
@@ -246,14 +217,13 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
|
||||
###################################
|
||||
###################################
|
||||
|
||||
# The builder-base target has the arguments, variables, and copies shared between full builder images and the uncompiled devcontainer
|
||||
|
||||
FROM requirements-drivers AS builder-base
|
||||
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
||||
# Adjustments to the build process should likely be made here.
|
||||
FROM requirements-drivers AS builder
|
||||
|
||||
ARG GO_TAGS="stablediffusion tts p2p"
|
||||
ARG GRPC_BACKENDS
|
||||
ARG MAKEFLAGS
|
||||
ARG LD_FLAGS="-s -w"
|
||||
|
||||
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
|
||||
ENV GO_TAGS=${GO_TAGS}
|
||||
@@ -261,12 +231,14 @@ ENV MAKEFLAGS=${MAKEFLAGS}
|
||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||
ENV LD_FLAGS=${LD_FLAGS}
|
||||
|
||||
RUN echo "GO_TAGS: $GO_TAGS" && echo "TARGETARCH: $TARGETARCH"
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
COPY . .
|
||||
COPY .git .
|
||||
RUN echo "GO_TAGS: $GO_TAGS"
|
||||
|
||||
RUN make prepare
|
||||
|
||||
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
|
||||
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
|
||||
@@ -284,35 +256,8 @@ RUN <<EOT bash
|
||||
fi
|
||||
EOT
|
||||
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
# This first portion of builder holds the layers specifically used to build backend-assets/grpc/stablediffusion
|
||||
# In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
|
||||
FROM builder-base AS builder-sd
|
||||
|
||||
# stablediffusion does not tolerate a newer version of abseil, copy only over enough elements to build it
|
||||
COPY Makefile .
|
||||
COPY go.mod .
|
||||
COPY go.sum .
|
||||
COPY backend/backend.proto ./backend/backend.proto
|
||||
COPY backend/go/image/stablediffusion ./backend/go/image/stablediffusion
|
||||
COPY pkg/grpc ./pkg/grpc
|
||||
COPY pkg/stablediffusion ./pkg/stablediffusion
|
||||
RUN git init
|
||||
RUN make sources/go-stable-diffusion
|
||||
RUN touch prepare-sources
|
||||
|
||||
# Actually build the backend
|
||||
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make backend-assets/grpc/stablediffusion
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
||||
# Adjustments to the build process should likely be made here.
|
||||
FROM builder-sd AS builder
|
||||
# stablediffusion does not tolerate a newer version of abseil, build it first
|
||||
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||
|
||||
# Install the pre-built GRPC
|
||||
COPY --from=grpc /opt/grpc /usr/local
|
||||
@@ -320,20 +265,8 @@ COPY --from=grpc /opt/grpc /usr/local
|
||||
# Rebuild with defaults backends
|
||||
WORKDIR /build
|
||||
|
||||
COPY . .
|
||||
COPY .git .
|
||||
|
||||
RUN make prepare
|
||||
|
||||
## Build the binary
|
||||
## If it's CUDA or hipblas, we want to skip some of the llama-compat backends to save space
|
||||
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
|
||||
## (both will use CUDA or hipblas for the actual computation)
|
||||
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
||||
else \
|
||||
make build; \
|
||||
fi
|
||||
RUN make build
|
||||
|
||||
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
||||
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
|
||||
@@ -343,40 +276,6 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
||||
###################################
|
||||
###################################
|
||||
|
||||
# The devcontainer target is not used on CI. It is a target for developers to use locally -
|
||||
# rather than copying files it mounts them locally and leaves building to the developer
|
||||
|
||||
FROM builder-base AS devcontainer
|
||||
|
||||
ARG FFMPEG
|
||||
|
||||
COPY --from=grpc /opt/grpc /usr/local
|
||||
|
||||
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion /build/backend-assets/grpc/stablediffusion
|
||||
|
||||
COPY .devcontainer-scripts /.devcontainer-scripts
|
||||
|
||||
# Add FFmpeg
|
||||
RUN if [ "${FFMPEG}" = "true" ]; then \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ffmpeg && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* \
|
||||
; fi
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ssh less wget
|
||||
# For the devcontainer, leave apt functional in case additional devtools are needed at runtime.
|
||||
|
||||
RUN go install github.com/go-delve/delve/cmd/dlv@latest
|
||||
|
||||
RUN go install github.com/mikefarah/yq/v4@latest
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
# This is the final target. The result of this target will be the image uploaded to the registry.
|
||||
# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
|
||||
FROM requirements-drivers
|
||||
@@ -427,7 +326,7 @@ COPY --from=builder /build/local-ai ./
|
||||
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
||||
|
||||
# do not let stablediffusion rebuild (requires an older version of absl)
|
||||
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
||||
COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
||||
|
||||
# Change the shell to bash so we can use [[ tests below
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
@@ -446,6 +345,9 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAG
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/transformers-musicgen \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama1" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/exllama \
|
||||
; fi
|
||||
|
||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
@@ -454,6 +356,9 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$I
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/openvoice \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "petals" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/petals \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/sentencetransformers \
|
||||
; fi && \
|
||||
|
||||
351
Makefile
351
Makefile
@@ -8,15 +8,27 @@ DETECT_LIBS?=true
|
||||
# llama.cpp versions
|
||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=dafae66cc242eb766797194d3c85c5e502625623
|
||||
CPPLLAMA_VERSION?=b3283448ce9a5098226afe1d8648ccc578511fe4
|
||||
|
||||
# gpt4all version
|
||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||
GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8
|
||||
|
||||
# go-rwkv version
|
||||
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||
WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
|
||||
WHISPER_CPP_VERSION?=f68298ce06ca3edd6e6f3f21c3d0bb5f073942c3
|
||||
|
||||
# bert.cpp version
|
||||
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
|
||||
BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
|
||||
|
||||
# go-piper version
|
||||
PIPER_REPO?=https://github.com/mudler/go-piper
|
||||
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
|
||||
PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759
|
||||
|
||||
# stablediffusion version
|
||||
STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
|
||||
@@ -26,21 +38,8 @@ STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
|
||||
TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream
|
||||
TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
|
||||
|
||||
# bark.cpp
|
||||
BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
|
||||
BARKCPP_VERSION?=v1.0.0
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=9578fdcc4632dc3de5565f28e2fb16b7c18f8d48
|
||||
|
||||
ONNX_VERSION?=1.20.0
|
||||
ONNX_ARCH?=x64
|
||||
ONNX_OS?=linux
|
||||
|
||||
export BUILD_TYPE?=
|
||||
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
||||
export GGML_CMAKE_ARGS?=
|
||||
export CMAKE_ARGS?=
|
||||
export BACKEND_LIBS?=
|
||||
|
||||
@@ -50,7 +49,6 @@ CGO_LDFLAGS_WHISPER+=-lggml
|
||||
CUDA_LIBPATH?=/usr/local/cuda/lib64/
|
||||
GO_TAGS?=
|
||||
BUILD_ID?=
|
||||
NATIVE?=false
|
||||
|
||||
TEST_DIR=/tmp/test
|
||||
|
||||
@@ -60,7 +58,7 @@ RANDOM := $(shell bash -c 'echo $$RANDOM')
|
||||
|
||||
VERSION?=$(shell git describe --always --tags || echo "dev" )
|
||||
# go tool nm ./local-ai | grep Commit
|
||||
LD_FLAGS?=-s -w
|
||||
LD_FLAGS?=
|
||||
override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Version=$(VERSION)"
|
||||
override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
|
||||
|
||||
@@ -74,14 +72,6 @@ WHITE := $(shell tput -Txterm setaf 7)
|
||||
CYAN := $(shell tput -Txterm setaf 6)
|
||||
RESET := $(shell tput -Txterm sgr0)
|
||||
|
||||
UPX?=
|
||||
# check if upx exists
|
||||
ifeq (, $(shell which upx))
|
||||
UPX=
|
||||
else
|
||||
UPX=$(shell which upx)
|
||||
endif
|
||||
|
||||
# Default Docker bridge IP
|
||||
E2E_BRIDGE_IP?=172.17.0.1
|
||||
|
||||
@@ -89,61 +79,7 @@ ifndef UNAME_S
|
||||
UNAME_S := $(shell uname -s)
|
||||
endif
|
||||
|
||||
# IF native is false, we add -DGGML_NATIVE=OFF to GGML_CMAKE_ARGS
|
||||
ifeq ($(NATIVE),false)
|
||||
GGML_CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
||||
endif
|
||||
|
||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
||||
GGML_CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
# If build type is cublas, then we set -DGGML_CUDA=ON to GGML_CMAKE_ARGS automatically
|
||||
ifeq ($(BUILD_TYPE),cublas)
|
||||
GGML_CMAKE_ARGS+=-DGGML_CUDA=ON
|
||||
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||
# to GGML_CMAKE_ARGS automatically
|
||||
else ifeq ($(BUILD_TYPE),openblas)
|
||||
GGML_CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
else ifeq ($(BUILD_TYPE),clblas)
|
||||
GGML_CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||
else ifeq ($(BUILD_TYPE),hipblas)
|
||||
GGML_CMAKE_ARGS+=-DGGML_HIP=ON
|
||||
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
||||
# But if it's OSX without metal, disable it here
|
||||
else ifeq ($(OS),Darwin)
|
||||
ifneq ($(BUILD_TYPE),metal)
|
||||
GGML_CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||
else
|
||||
GGML_CMAKE_ARGS+=-DGGML_METAL=ON
|
||||
GGML_CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
||||
TARGET+=--target ggml-metal
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||
GGML_CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),sycl_f32)
|
||||
GGML_CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||
endif
|
||||
|
||||
# Detect if we are running on arm64
|
||||
ifneq (,$(findstring aarch64,$(shell uname -m)))
|
||||
ONNX_ARCH=aarch64
|
||||
endif
|
||||
|
||||
ifeq ($(OS),Darwin)
|
||||
ONNX_OS=osx
|
||||
ifneq (,$(findstring aarch64,$(shell uname -m)))
|
||||
ONNX_ARCH=arm64
|
||||
else ifneq (,$(findstring arm64,$(shell uname -m)))
|
||||
ONNX_ARCH=arm64
|
||||
else
|
||||
ONNX_ARCH=x86_64
|
||||
endif
|
||||
|
||||
ifeq ($(OSX_SIGNING_IDENTITY),)
|
||||
OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
|
||||
@@ -154,7 +90,7 @@ ifeq ($(OS),Darwin)
|
||||
BUILD_TYPE=metal
|
||||
# disable metal if on Darwin and any other value is explicitly passed.
|
||||
else ifneq ($(BUILD_TYPE),metal)
|
||||
GGML_CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||
export GGML_NO_ACCELERATE=1
|
||||
export GGML_NO_METAL=1
|
||||
endif
|
||||
@@ -179,7 +115,7 @@ ifeq ($(BUILD_TYPE),cublas)
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),vulkan)
|
||||
GGML_CMAKE_ARGS+=-DGGML_VULKAN=1
|
||||
CMAKE_ARGS+=-DGGML_VULKAN=1
|
||||
endif
|
||||
|
||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
@@ -198,10 +134,10 @@ ifeq ($(BUILD_TYPE),hipblas)
|
||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||
# llama-ggml has no hipblas support, so override it here.
|
||||
export STABLE_BUILD_TYPE=
|
||||
export GGML_HIP=1
|
||||
export GGML_HIPBLAS=1
|
||||
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
|
||||
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
||||
GGML_CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
||||
CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
||||
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
|
||||
endif
|
||||
|
||||
@@ -239,23 +175,17 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
||||
endif
|
||||
|
||||
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
||||
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
||||
|
||||
ifeq ($(ONNX_OS),linux)
|
||||
ifeq ($(ONNX_ARCH),x64)
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/bark-cpp
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/stablediffusion-ggml
|
||||
endif
|
||||
endif
|
||||
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/silero-vad
|
||||
ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
|
||||
# Use filter-out to remove the specified backends
|
||||
ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))
|
||||
@@ -272,12 +202,23 @@ ifeq ($(BUILD_API_ONLY),true)
|
||||
GRPC_BACKENDS=
|
||||
endif
|
||||
|
||||
export CMAKE_ARGS?=$(GGML_CMAKE_ARGS)
|
||||
|
||||
.PHONY: all test build vendor get-sources prepare-sources prepare
|
||||
|
||||
all: help
|
||||
|
||||
## BERT embeddings
|
||||
sources/go-bert.cpp:
|
||||
mkdir -p sources/go-bert.cpp
|
||||
cd sources/go-bert.cpp && \
|
||||
git init && \
|
||||
git remote add origin $(BERT_REPO) && \
|
||||
git fetch origin && \
|
||||
git checkout $(BERT_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
|
||||
$(MAKE) -C sources/go-bert.cpp libgobert.a
|
||||
|
||||
## go-llama.cpp
|
||||
sources/go-llama.cpp:
|
||||
mkdir -p sources/go-llama.cpp
|
||||
@@ -291,23 +232,6 @@ sources/go-llama.cpp:
|
||||
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
|
||||
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
||||
|
||||
## bark.cpp
|
||||
sources/bark.cpp:
|
||||
git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
|
||||
cd sources/bark.cpp && \
|
||||
git checkout $(BARKCPP_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/bark.cpp/build/libbark.a: sources/bark.cpp
|
||||
cd sources/bark.cpp && \
|
||||
mkdir -p build && \
|
||||
cd build && \
|
||||
cmake $(CMAKE_ARGS) .. && \
|
||||
cmake --build . --config Release
|
||||
|
||||
backend/go/bark/libbark.a: sources/bark.cpp/build/libbark.a
|
||||
$(MAKE) -C backend/go/bark libbark.a
|
||||
|
||||
## go-piper
|
||||
sources/go-piper:
|
||||
mkdir -p sources/go-piper
|
||||
@@ -321,7 +245,33 @@ sources/go-piper:
|
||||
sources/go-piper/libpiper_binding.a: sources/go-piper
|
||||
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
|
||||
|
||||
## stable diffusion (onnx)
|
||||
## GPT4ALL
|
||||
sources/gpt4all:
|
||||
mkdir -p sources/gpt4all
|
||||
cd sources/gpt4all && \
|
||||
git init && \
|
||||
git remote add origin $(GPT4ALL_REPO) && \
|
||||
git fetch origin && \
|
||||
git checkout $(GPT4ALL_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
|
||||
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
||||
|
||||
## RWKV
|
||||
sources/go-rwkv.cpp:
|
||||
mkdir -p sources/go-rwkv.cpp
|
||||
cd sources/go-rwkv.cpp && \
|
||||
git init && \
|
||||
git remote add origin $(RWKV_REPO) && \
|
||||
git fetch origin && \
|
||||
git checkout $(RWKV_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
|
||||
cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
||||
|
||||
## stable diffusion
|
||||
sources/go-stable-diffusion:
|
||||
mkdir -p sources/go-stable-diffusion
|
||||
cd sources/go-stable-diffusion && \
|
||||
@@ -334,44 +284,6 @@ sources/go-stable-diffusion:
|
||||
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
|
||||
CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
|
||||
|
||||
## stablediffusion (ggml)
|
||||
sources/stablediffusion-ggml.cpp:
|
||||
git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
|
||||
cd sources/stablediffusion-ggml.cpp && \
|
||||
git checkout $(STABLEDIFFUSION_GGML_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a: sources/stablediffusion-ggml.cpp
|
||||
cd sources/stablediffusion-ggml.cpp && \
|
||||
mkdir -p build && \
|
||||
cd build && \
|
||||
cmake $(CMAKE_ARGS) .. && \
|
||||
cmake --build . --config Release
|
||||
|
||||
backend/go/image/stablediffusion-ggml/libsd.a: sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a
|
||||
$(MAKE) -C backend/go/image/stablediffusion-ggml libsd.a
|
||||
|
||||
backend-assets/grpc/stablediffusion-ggml: backend/go/image/stablediffusion-ggml/libsd.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ LIBRARY_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion-ggml ./backend/go/image/stablediffusion-ggml/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/stablediffusion-ggml
|
||||
endif
|
||||
|
||||
sources/onnxruntime:
|
||||
mkdir -p sources/onnxruntime
|
||||
curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
|
||||
cd sources/onnxruntime && tar -xvf onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz && rm onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
|
||||
cd sources/onnxruntime && mv onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION)/* ./
|
||||
|
||||
backend-assets/lib/libonnxruntime.so.1: backend-assets/lib sources/onnxruntime
|
||||
cp -rfv sources/onnxruntime/lib/* backend-assets/lib/
|
||||
ifeq ($(OS),Darwin)
|
||||
mv backend-assets/lib/libonnxruntime.$(ONNX_VERSION).dylib backend-assets/lib/libonnxruntime.dylib
|
||||
else
|
||||
mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
|
||||
endif
|
||||
|
||||
## tiny-dream
|
||||
sources/go-tiny-dream:
|
||||
mkdir -p sources/go-tiny-dream
|
||||
@@ -398,22 +310,28 @@ sources/whisper.cpp:
|
||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
||||
|
||||
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
|
||||
get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
|
||||
|
||||
replace:
|
||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
|
||||
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
||||
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
||||
|
||||
dropreplace:
|
||||
$(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp
|
||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
||||
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp
|
||||
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
|
||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
|
||||
$(GOCMD) mod edit -dropreplace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang
|
||||
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
|
||||
|
||||
prepare-sources: get-sources replace
|
||||
@@ -423,8 +341,11 @@ prepare-sources: get-sources replace
|
||||
rebuild: ## Rebuilds the project
|
||||
$(GOCMD) clean -cache
|
||||
$(MAKE) -C sources/go-llama.cpp clean
|
||||
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
|
||||
$(MAKE) -C sources/go-rwkv.cpp clean
|
||||
$(MAKE) -C sources/whisper.cpp clean
|
||||
$(MAKE) -C sources/go-stable-diffusion clean
|
||||
$(MAKE) -C sources/go-bert.cpp clean
|
||||
$(MAKE) -C sources/go-piper clean
|
||||
$(MAKE) -C sources/go-tiny-dream clean
|
||||
$(MAKE) build
|
||||
@@ -439,9 +360,7 @@ clean: ## Remove build related file
|
||||
rm -rf release/
|
||||
rm -rf backend-assets/*
|
||||
$(MAKE) -C backend/cpp/grpc clean
|
||||
$(MAKE) -C backend/go/bark clean
|
||||
$(MAKE) -C backend/cpp/llama clean
|
||||
$(MAKE) -C backend/go/image/stablediffusion-ggml clean
|
||||
rm -rf backend/cpp/llama-* || true
|
||||
$(MAKE) dropreplace
|
||||
$(MAKE) protogen-clean
|
||||
@@ -452,16 +371,12 @@ clean-tests:
|
||||
rm -rf test-dir
|
||||
rm -rf core/http/backend-assets
|
||||
|
||||
clean-dc: clean
|
||||
cp -r /build/backend-assets /workspace/backend-assets
|
||||
|
||||
## Build:
|
||||
build: prepare backend-assets grpcs ## Build the project
|
||||
$(info ${GREEN}I local-ai build info:${RESET})
|
||||
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
||||
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
|
||||
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
|
||||
$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
|
||||
ifneq ($(BACKEND_LIBS),)
|
||||
$(MAKE) backend-assets/lib
|
||||
cp -f $(BACKEND_LIBS) backend-assets/lib/
|
||||
@@ -472,7 +387,7 @@ build-minimal:
|
||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
|
||||
|
||||
build-api:
|
||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=p2p $(MAKE) build
|
||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
|
||||
|
||||
backend-assets/lib:
|
||||
mkdir -p backend-assets/lib
|
||||
@@ -483,7 +398,7 @@ ifeq ($(DETECT_LIBS),true)
|
||||
scripts/prepare-libs.sh backend-assets/grpc/llama-cpp-avx2
|
||||
endif
|
||||
ifeq ($(OS),Darwin)
|
||||
BUILD_TYPE=none $(MAKE) backend-assets/grpc/llama-cpp-fallback
|
||||
$(info ${GREEN}I Skip CUDA/hipblas build on MacOS${RESET})
|
||||
else
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-cuda
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-hipblas
|
||||
@@ -506,7 +421,7 @@ else
|
||||
endif
|
||||
|
||||
dist-cross-linux-arm64:
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" GO_TAGS="p2p" \
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
|
||||
STATIC=true $(MAKE) build
|
||||
mkdir -p release
|
||||
# if BUILD_ID is empty, then we don't append it to the binary name
|
||||
@@ -532,6 +447,8 @@ test-models/testmodel.ggml:
|
||||
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
||||
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
|
||||
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
||||
wget -q https://huggingface.co/mudler/rwkv-4-raven-1.5B-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%2525-Other1%2525-20230425-ctx4096_Q4_0.bin -O test-models/rwkv
|
||||
wget -q https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
|
||||
cp tests/models_fixtures/* test-models
|
||||
|
||||
prepare-test: grpcs
|
||||
@@ -543,7 +460,8 @@ test: prepare test-models/testmodel.ggml grpcs
|
||||
export GO_TAGS="tts stablediffusion debug"
|
||||
$(MAKE) prepare-test
|
||||
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||
$(MAKE) test-gpt4all
|
||||
$(MAKE) test-llama
|
||||
$(MAKE) test-llama-gguf
|
||||
$(MAKE) test-tts
|
||||
@@ -553,46 +471,50 @@ prepare-e2e:
|
||||
mkdir -p $(TEST_DIR)
|
||||
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
|
||||
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
|
||||
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 --build-arg FFMPEG=true -t localai-tests .
|
||||
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=4 --build-arg FFMPEG=true -t localai-tests .
|
||||
|
||||
run-e2e-image:
|
||||
ls -liah $(abspath ./tests/e2e-fixtures)
|
||||
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
|
||||
|
||||
run-e2e-aio: protogen-go
|
||||
run-e2e-aio:
|
||||
@echo 'Running e2e AIO tests'
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio
|
||||
|
||||
test-e2e:
|
||||
@echo 'Running e2e tests'
|
||||
BUILD_TYPE=$(BUILD_TYPE) \
|
||||
LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e
|
||||
|
||||
teardown-e2e:
|
||||
rm -rf $(TEST_DIR) || true
|
||||
docker stop $$(docker ps -q --filter ancestor=localai-tests)
|
||||
|
||||
test-gpt4all: prepare-test
|
||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r $(TEST_PATHS)
|
||||
|
||||
test-llama: prepare-test
|
||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS)
|
||||
|
||||
test-llama-gguf: prepare-test
|
||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts 5 -v -r $(TEST_PATHS)
|
||||
|
||||
test-tts: prepare-test
|
||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts 1 -v -r $(TEST_PATHS)
|
||||
|
||||
test-stablediffusion: prepare-test
|
||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts 1 -v -r $(TEST_PATHS)
|
||||
|
||||
test-stores: backend-assets/grpc/local-store
|
||||
mkdir -p tests/integration/backend-assets/grpc
|
||||
cp -f backend-assets/grpc/local-store tests/integration/backend-assets/grpc/
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts $(TEST_FLAKES) -v -r tests/integration
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts 1 -v -r tests/integration
|
||||
|
||||
test-container:
|
||||
docker build --target requirements -t local-ai-test-container .
|
||||
@@ -628,10 +550,10 @@ protogen-go-clean:
|
||||
$(RM) bin/*
|
||||
|
||||
.PHONY: protogen-python
|
||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
|
||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
|
||||
|
||||
.PHONY: protogen-python-clean
|
||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
|
||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
|
||||
|
||||
.PHONY: autogptq-protogen
|
||||
autogptq-protogen:
|
||||
@@ -665,6 +587,14 @@ diffusers-protogen:
|
||||
diffusers-protogen-clean:
|
||||
$(MAKE) -C backend/python/diffusers protogen-clean
|
||||
|
||||
.PHONY: exllama-protogen
|
||||
exllama-protogen:
|
||||
$(MAKE) -C backend/python/exllama protogen
|
||||
|
||||
.PHONY: exllama-protogen-clean
|
||||
exllama-protogen-clean:
|
||||
$(MAKE) -C backend/python/exllama protogen-clean
|
||||
|
||||
.PHONY: exllama2-protogen
|
||||
exllama2-protogen:
|
||||
$(MAKE) -C backend/python/exllama2 protogen
|
||||
@@ -681,6 +611,14 @@ mamba-protogen:
|
||||
mamba-protogen-clean:
|
||||
$(MAKE) -C backend/python/mamba protogen-clean
|
||||
|
||||
.PHONY: petals-protogen
|
||||
petals-protogen:
|
||||
$(MAKE) -C backend/python/petals protogen
|
||||
|
||||
.PHONY: petals-protogen-clean
|
||||
petals-protogen-clean:
|
||||
$(MAKE) -C backend/python/petals protogen-clean
|
||||
|
||||
.PHONY: rerankers-protogen
|
||||
rerankers-protogen:
|
||||
$(MAKE) -C backend/python/rerankers protogen
|
||||
@@ -761,6 +699,8 @@ prepare-extra-conda-environments: protogen-python
|
||||
$(MAKE) -C backend/python/parler-tts
|
||||
$(MAKE) -C backend/python/vall-e-x
|
||||
$(MAKE) -C backend/python/openvoice
|
||||
$(MAKE) -C backend/python/exllama
|
||||
$(MAKE) -C backend/python/petals
|
||||
$(MAKE) -C backend/python/exllama2
|
||||
|
||||
prepare-test-extra: protogen-python
|
||||
@@ -781,14 +721,25 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin
|
||||
mkdir -p backend-assets/espeak-ng-data
|
||||
@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
|
||||
|
||||
backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
|
||||
mkdir -p backend-assets/gpt4all
|
||||
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
|
||||
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
|
||||
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
|
||||
|
||||
backend-assets/grpc: protogen-go replace
|
||||
mkdir -p backend-assets/grpc
|
||||
|
||||
backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
|
||||
|
||||
backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
|
||||
|
||||
backend-assets/grpc/huggingface: backend-assets/grpc
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/huggingface
|
||||
endif
|
||||
|
||||
backend/cpp/llama/llama.cpp:
|
||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
|
||||
@@ -843,6 +794,10 @@ backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama/ll
|
||||
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
|
||||
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
||||
ifeq ($(BUILD_TYPE),metal)
|
||||
cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
|
||||
endif
|
||||
|
||||
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-cuda
|
||||
@@ -855,7 +810,7 @@ backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama/lla
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-hipblas
|
||||
$(MAKE) -C backend/cpp/llama-hipblas purge
|
||||
$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
|
||||
BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
|
||||
|
||||
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
@@ -886,57 +841,29 @@ backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
|
||||
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/llama-ggml
|
||||
endif
|
||||
|
||||
backend-assets/grpc/bark-cpp: backend/go/bark/libbark.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark/ LIBRARY_PATH=$(CURDIR)/backend/go/bark/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/bark-cpp
|
||||
endif
|
||||
|
||||
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
|
||||
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/piper
|
||||
endif
|
||||
|
||||
backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
|
||||
|
||||
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/stablediffusion
|
||||
endif
|
||||
|
||||
backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/silero-vad
|
||||
endif
|
||||
|
||||
backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/tinydream
|
||||
endif
|
||||
|
||||
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/whisper
|
||||
endif
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
|
||||
|
||||
backend-assets/grpc/local-store: backend-assets/grpc
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/local-store
|
||||
endif
|
||||
|
||||
grpcs: prepare $(GRPC_BACKENDS)
|
||||
|
||||
@@ -978,7 +905,7 @@ docker-aio-all:
|
||||
|
||||
docker-image-intel:
|
||||
docker build \
|
||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
|
||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
|
||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||
--build-arg GO_TAGS="none" \
|
||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||
@@ -986,7 +913,7 @@ docker-image-intel:
|
||||
|
||||
docker-image-intel-xpu:
|
||||
docker build \
|
||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
|
||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
|
||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||
--build-arg GO_TAGS="none" \
|
||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||
|
||||
59
README.md
59
README.md
@@ -38,13 +38,9 @@
|
||||
</a>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://trendshift.io/repositories/1484" target="_blank"><img src="https://trendshift.io/api/badge/repositories/1484" alt="go-skynet%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
||||
</p>
|
||||
|
||||
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
||||
>
|
||||
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples)
|
||||
> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||
|
||||
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
||||
|
||||
@@ -60,43 +56,22 @@ curl https://localai.io/install.sh | sh
|
||||
|
||||
Or run with docker:
|
||||
```bash
|
||||
# CPU only image:
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-cpu
|
||||
|
||||
# Nvidia GPU:
|
||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
|
||||
|
||||
# CPU and GPU image (bigger size):
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
|
||||
|
||||
# AIO images (it will pre-download a set of models ready for use, see https://localai.io/basics/container/)
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
||||
```
|
||||
|
||||
To load models:
|
||||
|
||||
```bash
|
||||
# From the model gallery (see available models with `local-ai models list`, in the WebUI from the model tab, or visiting https://models.localai.io)
|
||||
local-ai run llama-3.2-1b-instruct:q4_k_m
|
||||
# Start LocalAI with the phi-2 model directly from huggingface
|
||||
local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
|
||||
# Install and run a model from the Ollama OCI registry
|
||||
local-ai run ollama://gemma:2b
|
||||
# Run a model from a configuration file
|
||||
local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
|
||||
# Install and run a model from a standard OCI registry (e.g., Docker Hub)
|
||||
local-ai run oci://localai/phi-2:latest
|
||||
# Alternative images:
|
||||
# - if you have an Nvidia GPU:
|
||||
# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
|
||||
# - without preconfigured models
|
||||
# docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
|
||||
# - without preconfigured models for Nvidia GPUs
|
||||
# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
|
||||
```
|
||||
|
||||
[💻 Getting started](https://localai.io/basics/getting_started/index.html)
|
||||
|
||||
## 📰 Latest project news
|
||||
## 🔥🔥 Hot topics / Roadmap
|
||||
|
||||
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||
|
||||
- Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 )
|
||||
- Nov 2024: Bark.cpp backend added ( https://github.com/mudler/LocalAI/pull/4287 )
|
||||
- Nov 2024: Voice activity detection models (**VAD**) added to the API: https://github.com/mudler/LocalAI/pull/4204
|
||||
- Oct 2024: examples moved to [LocalAI-examples](https://github.com/mudler/LocalAI-examples)
|
||||
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
|
||||
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
|
||||
- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
|
||||
- June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628
|
||||
@@ -107,13 +82,8 @@ local-ai run oci://localai/phi-2:latest
|
||||
- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
|
||||
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
||||
|
||||
Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||
Hot topics (looking for contributors):
|
||||
|
||||
## 🔥🔥 Hot topics (looking for help):
|
||||
|
||||
- Multimodal with vLLM and Video understanding: https://github.com/mudler/LocalAI/pull/3729
|
||||
- Realtime API https://github.com/mudler/LocalAI/issues/3714
|
||||
- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
|
||||
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
|
||||
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
||||
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
|
||||
@@ -166,9 +136,6 @@ Other:
|
||||
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
|
||||
- Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
|
||||
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
|
||||
- Another Telegram Bot https://github.com/JackBekket/Hellper
|
||||
- Auto-documentation https://github.com/JackBekket/Reflexia
|
||||
- Github bot which answer on issues, with code and documentation as context https://github.com/JackBekket/GitHelper
|
||||
- Github Actions: https://github.com/marketplace/actions/start-localai
|
||||
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/
|
||||
|
||||
@@ -183,7 +150,6 @@ Other:
|
||||
|
||||
## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
|
||||
|
||||
- [Run Visual studio code with LocalAI (SUSE)](https://www.suse.com/c/running-ai-locally/)
|
||||
- 🆕 [Run LocalAI on Jetson Nano Devkit](https://mudler.pm/posts/local-ai-jetson-nano-devkit/)
|
||||
- [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/)
|
||||
- [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)
|
||||
@@ -243,6 +209,7 @@ LocalAI couldn't have been built without the help of great software already avai
|
||||
- https://github.com/antimatter15/alpaca.cpp
|
||||
- https://github.com/EdVince/Stable-Diffusion-NCNN
|
||||
- https://github.com/ggerganov/whisper.cpp
|
||||
- https://github.com/saharNooby/rwkv.cpp
|
||||
- https://github.com/rhasspy/piper
|
||||
|
||||
## 🤗 Contributors
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
name: text-embedding-ada-002
|
||||
embeddings: true
|
||||
backend: bert-embeddings
|
||||
parameters:
|
||||
model: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf
|
||||
model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
|
||||
|
||||
usage: |
|
||||
You can test this model with curl like this:
|
||||
|
||||
@@ -2,7 +2,7 @@ backend: llama-cpp
|
||||
context_size: 4096
|
||||
f16: true
|
||||
mmap: true
|
||||
name: gpt-4o
|
||||
name: gpt-4-vision-preview
|
||||
|
||||
roles:
|
||||
user: "USER:"
|
||||
|
||||
@@ -2,7 +2,7 @@ backend: llama-cpp
|
||||
context_size: 4096
|
||||
f16: true
|
||||
mmap: true
|
||||
name: gpt-4o
|
||||
name: gpt-4-vision-preview
|
||||
|
||||
roles:
|
||||
user: "USER:"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
name: stablediffusion
|
||||
parameters:
|
||||
model: Lykon/dreamshaper-8
|
||||
model: runwayml/stable-diffusion-v1-5
|
||||
backend: diffusers
|
||||
step: 25
|
||||
f16: true
|
||||
|
||||
@@ -2,7 +2,7 @@ backend: llama-cpp
|
||||
context_size: 4096
|
||||
mmap: false
|
||||
f16: false
|
||||
name: gpt-4o
|
||||
name: gpt-4-vision-preview
|
||||
|
||||
roles:
|
||||
user: "USER:"
|
||||
|
||||
@@ -16,7 +16,6 @@ service Backend {
|
||||
rpc GenerateImage(GenerateImageRequest) returns (Result) {}
|
||||
rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
|
||||
rpc TTS(TTSRequest) returns (Result) {}
|
||||
rpc SoundGeneration(SoundGenerationRequest) returns (Result) {}
|
||||
rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
|
||||
rpc Status(HealthMessage) returns (StatusResponse) {}
|
||||
|
||||
@@ -26,21 +25,6 @@ service Backend {
|
||||
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
|
||||
|
||||
rpc Rerank(RerankRequest) returns (RerankResult) {}
|
||||
|
||||
rpc GetMetrics(MetricsRequest) returns (MetricsResponse);
|
||||
|
||||
rpc VAD(VADRequest) returns (VADResponse) {}
|
||||
}
|
||||
|
||||
// Define the empty request
|
||||
message MetricsRequest {}
|
||||
|
||||
message MetricsResponse {
|
||||
int32 slot_id = 1;
|
||||
string prompt_json_for_slot = 2; // Stores the prompt as a JSON string.
|
||||
float tokens_per_second = 3;
|
||||
int32 tokens_generated = 4;
|
||||
int32 prompt_tokens_processed = 5;
|
||||
}
|
||||
|
||||
message RerankRequest {
|
||||
@@ -149,9 +133,6 @@ message PredictOptions {
|
||||
repeated string Images = 42;
|
||||
bool UseTokenizerTemplate = 43;
|
||||
repeated Message Messages = 44;
|
||||
repeated string Videos = 45;
|
||||
repeated string Audios = 46;
|
||||
string CorrelationId = 47;
|
||||
}
|
||||
|
||||
// The response message containing the result
|
||||
@@ -221,7 +202,6 @@ message ModelOptions {
|
||||
int32 SwapSpace = 53;
|
||||
int32 MaxModelLen = 54;
|
||||
int32 TensorParallelSize = 55;
|
||||
string LoadFormat = 58;
|
||||
|
||||
string MMProj = 41;
|
||||
|
||||
@@ -235,16 +215,6 @@ message ModelOptions {
|
||||
|
||||
bool FlashAttention = 56;
|
||||
bool NoKVOffload = 57;
|
||||
|
||||
string ModelPath = 59;
|
||||
|
||||
repeated string LoraAdapters = 60;
|
||||
repeated float LoraScales = 61;
|
||||
|
||||
repeated string Options = 62;
|
||||
|
||||
string CacheTypeKey = 63;
|
||||
string CacheTypeValue = 64;
|
||||
}
|
||||
|
||||
message Result {
|
||||
@@ -300,30 +270,6 @@ message TTSRequest {
|
||||
optional string language = 5;
|
||||
}
|
||||
|
||||
message VADRequest {
|
||||
repeated float audio = 1;
|
||||
}
|
||||
|
||||
message VADSegment {
|
||||
float start = 1;
|
||||
float end = 2;
|
||||
}
|
||||
|
||||
message VADResponse {
|
||||
repeated VADSegment segments = 1;
|
||||
}
|
||||
|
||||
message SoundGenerationRequest {
|
||||
string text = 1;
|
||||
string model = 2;
|
||||
string dst = 3;
|
||||
optional float duration = 4;
|
||||
optional float temperature = 5;
|
||||
optional bool sample = 6;
|
||||
optional string src = 7;
|
||||
optional int32 src_divisor = 8;
|
||||
}
|
||||
|
||||
message TokenizationResponse {
|
||||
int32 length = 1;
|
||||
repeated int32 tokens = 2;
|
||||
|
||||
@@ -7,6 +7,44 @@ BUILD_TYPE?=
|
||||
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
||||
TARGET?=--target grpc-server
|
||||
|
||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
||||
ifeq ($(BUILD_TYPE),cublas)
|
||||
CMAKE_ARGS+=-DGGML_CUDA=ON
|
||||
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||
# to CMAKE_ARGS automatically
|
||||
else ifeq ($(BUILD_TYPE),openblas)
|
||||
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
else ifeq ($(BUILD_TYPE),clblas)
|
||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||
else ifeq ($(BUILD_TYPE),hipblas)
|
||||
CMAKE_ARGS+=-DGGML_HIPBLAS=ON
|
||||
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
||||
# But if it's OSX without metal, disable it here
|
||||
else ifeq ($(OS),Darwin)
|
||||
ifneq ($(BUILD_TYPE),metal)
|
||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||
else
|
||||
CMAKE_ARGS+=-DGGML_METAL=ON
|
||||
# Until this is tested properly, we disable embedded metal file
|
||||
# as we already embed it as part of the LocalAI assets
|
||||
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=OFF
|
||||
TARGET+=--target ggml-metal
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),sycl_f32)
|
||||
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||
endif
|
||||
|
||||
llama.cpp:
|
||||
mkdir -p llama.cpp
|
||||
cd llama.cpp && \
|
||||
@@ -34,7 +72,7 @@ clean: purge
|
||||
rm -rf llama.cpp
|
||||
|
||||
grpc-server: llama.cpp llama.cpp/examples/grpc-server
|
||||
@echo "Building grpc-server for llama.cpp with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
||||
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
+bash -c "source $(ONEAPI_VARS); \
|
||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"
|
||||
|
||||
@@ -13,15 +13,15 @@
|
||||
#include <getopt.h>
|
||||
#include "clip.h"
|
||||
#include "llava.h"
|
||||
#include "log.h"
|
||||
#include "stb_image.h"
|
||||
#include "common.h"
|
||||
#include "json.hpp"
|
||||
#include "llama.h"
|
||||
#include "grammar-parser.h"
|
||||
#include "backend.pb.h"
|
||||
#include "backend.grpc.pb.h"
|
||||
#include "utils.hpp"
|
||||
#include "sampling.h"
|
||||
|
||||
// include std::regex
|
||||
#include <cstddef>
|
||||
#include <thread>
|
||||
@@ -113,7 +113,7 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
|
||||
std::string ret;
|
||||
for (; begin != end; ++begin)
|
||||
{
|
||||
ret += common_token_to_piece(ctx, *begin);
|
||||
ret += llama_token_to_piece(ctx, *begin);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@@ -121,7 +121,7 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
|
||||
// format incomplete utf-8 multibyte character for output
|
||||
static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token)
|
||||
{
|
||||
std::string out = token == -1 ? "" : common_token_to_piece(ctx, token);
|
||||
std::string out = token == -1 ? "" : llama_token_to_piece(ctx, token);
|
||||
// if the size is 1 and first bit is 1, meaning it's a partial character
|
||||
// (size > 1 meaning it's already a known token)
|
||||
if (out.size() == 1 && (out[0] & 0x80) == 0x80)
|
||||
@@ -203,8 +203,8 @@ struct llama_client_slot
|
||||
std::string stopping_word;
|
||||
|
||||
// sampling
|
||||
struct common_params_sampling sparams;
|
||||
common_sampler *ctx_sampling = nullptr;
|
||||
struct llama_sampling_params sparams;
|
||||
llama_sampling_context *ctx_sampling = nullptr;
|
||||
|
||||
int32_t ga_i = 0; // group-attention state
|
||||
int32_t ga_n = 1; // group-attention factor
|
||||
@@ -257,7 +257,7 @@ struct llama_client_slot
|
||||
images.clear();
|
||||
}
|
||||
|
||||
bool has_budget(common_params &global_params) {
|
||||
bool has_budget(gpt_params &global_params) {
|
||||
if (params.n_predict == -1 && global_params.n_predict == -1)
|
||||
{
|
||||
return true; // limitless
|
||||
@@ -391,39 +391,6 @@ struct llama_metrics {
|
||||
}
|
||||
};
|
||||
|
||||
struct llava_embd_batch {
|
||||
std::vector<llama_pos> pos;
|
||||
std::vector<int32_t> n_seq_id;
|
||||
std::vector<llama_seq_id> seq_id_0;
|
||||
std::vector<llama_seq_id *> seq_ids;
|
||||
std::vector<int8_t> logits;
|
||||
llama_batch batch;
|
||||
llava_embd_batch(float * embd, int32_t n_tokens, llama_pos pos_0, llama_seq_id seq_id) {
|
||||
pos .resize(n_tokens);
|
||||
n_seq_id.resize(n_tokens);
|
||||
seq_ids .resize(n_tokens + 1);
|
||||
logits .resize(n_tokens);
|
||||
seq_id_0.resize(1);
|
||||
seq_id_0[0] = seq_id;
|
||||
seq_ids [n_tokens] = nullptr;
|
||||
batch = {
|
||||
/*n_tokens =*/ n_tokens,
|
||||
/*tokens =*/ nullptr,
|
||||
/*embd =*/ embd,
|
||||
/*pos =*/ pos.data(),
|
||||
/*n_seq_id =*/ n_seq_id.data(),
|
||||
/*seq_id =*/ seq_ids.data(),
|
||||
/*logits =*/ logits.data(),
|
||||
};
|
||||
for (int i = 0; i < n_tokens; i++) {
|
||||
batch.pos [i] = pos_0 + i;
|
||||
batch.n_seq_id[i] = 1;
|
||||
batch.seq_id [i] = seq_id_0.data();
|
||||
batch.logits [i] = false;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct llama_server_context
|
||||
{
|
||||
llama_model *model = nullptr;
|
||||
@@ -431,7 +398,7 @@ struct llama_server_context
|
||||
|
||||
clip_ctx *clp_ctx = nullptr;
|
||||
|
||||
common_params params;
|
||||
gpt_params params;
|
||||
|
||||
llama_batch batch;
|
||||
|
||||
@@ -474,7 +441,7 @@ struct llama_server_context
|
||||
}
|
||||
}
|
||||
|
||||
bool load_model(const common_params ¶ms_)
|
||||
bool load_model(const gpt_params ¶ms_)
|
||||
{
|
||||
params = params_;
|
||||
if (!params.mmproj.empty()) {
|
||||
@@ -482,7 +449,7 @@ struct llama_server_context
|
||||
LOG_INFO("Multi Modal Mode Enabled", {});
|
||||
clp_ctx = clip_model_load(params.mmproj.c_str(), /*verbosity=*/ 1);
|
||||
if(clp_ctx == nullptr) {
|
||||
LOG_ERR("unable to load clip model: %s", params.mmproj.c_str());
|
||||
LOG_ERROR("unable to load clip model", {{"model", params.mmproj}});
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -491,12 +458,10 @@ struct llama_server_context
|
||||
}
|
||||
}
|
||||
|
||||
common_init_result common_init = common_init_from_params(params);
|
||||
model = common_init.model;
|
||||
ctx = common_init.context;
|
||||
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
||||
if (model == nullptr)
|
||||
{
|
||||
LOG_ERR("unable to load model: %s", params.model.c_str());
|
||||
LOG_ERROR("unable to load model", {{"model", params.model}});
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -504,7 +469,7 @@ struct llama_server_context
|
||||
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
|
||||
const int n_embd_llm = llama_n_embd(model);
|
||||
if (n_embd_clip != n_embd_llm) {
|
||||
LOG("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
|
||||
LOG_TEE("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
|
||||
llama_free(ctx);
|
||||
llama_free_model(model);
|
||||
return false;
|
||||
@@ -513,7 +478,7 @@ struct llama_server_context
|
||||
|
||||
n_ctx = llama_n_ctx(ctx);
|
||||
|
||||
add_bos_token = llama_add_bos_token(model);
|
||||
add_bos_token = llama_should_add_bos_token(model);
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -523,21 +488,11 @@ struct llama_server_context
|
||||
std::vector<char> buf(1);
|
||||
int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size());
|
||||
if (res < 0) {
|
||||
LOG_ERR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", __func__);
|
||||
LOG_ERROR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", {});
|
||||
sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template
|
||||
}
|
||||
}
|
||||
|
||||
llama_client_slot* get_active_slot() {
|
||||
for (llama_client_slot& slot : slots) {
|
||||
// Check if the slot is currently processing
|
||||
if (slot.is_processing()) {
|
||||
return &slot; // Return the active slot
|
||||
}
|
||||
}
|
||||
return nullptr; // No active slot found
|
||||
}
|
||||
|
||||
void initialize() {
|
||||
// create slots
|
||||
all_slots_are_idle = true;
|
||||
@@ -611,12 +566,12 @@ struct llama_server_context
|
||||
std::vector<llama_token> p;
|
||||
if (first)
|
||||
{
|
||||
p = common_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
|
||||
p = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
|
||||
first = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
p = common_tokenize(ctx, s, false, TMP_FORCE_SPECIAL);
|
||||
p = ::llama_tokenize(ctx, s, false, TMP_FORCE_SPECIAL);
|
||||
}
|
||||
prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end());
|
||||
}
|
||||
@@ -633,7 +588,7 @@ struct llama_server_context
|
||||
else
|
||||
{
|
||||
auto s = json_prompt.template get<std::string>();
|
||||
prompt_tokens = common_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
|
||||
prompt_tokens = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
|
||||
}
|
||||
|
||||
return prompt_tokens;
|
||||
@@ -662,7 +617,7 @@ struct llama_server_context
|
||||
|
||||
bool launch_slot_with_data(llama_client_slot* &slot, json data) {
|
||||
slot_params default_params;
|
||||
common_params_sampling default_sparams;
|
||||
llama_sampling_params default_sparams;
|
||||
|
||||
slot->params.stream = json_value(data, "stream", false);
|
||||
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
|
||||
@@ -670,7 +625,8 @@ struct llama_server_context
|
||||
slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
|
||||
slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
|
||||
slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
|
||||
slot->sparams.typ_p = json_value(data, "typical_p", default_sparams.typ_p);
|
||||
slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
|
||||
slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p);
|
||||
slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
|
||||
slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
|
||||
slot->sparams.dynatemp_exponent = json_value(data, "dynatemp_exponent", default_sparams.dynatemp_exponent);
|
||||
@@ -683,7 +639,7 @@ struct llama_server_context
|
||||
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
|
||||
slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
|
||||
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
|
||||
slot->sparams.seed = json_value(data, "seed", default_sparams.seed);
|
||||
slot->params.seed = json_value(data, "seed", default_params.seed);
|
||||
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
||||
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
||||
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
||||
@@ -707,7 +663,6 @@ struct llama_server_context
|
||||
slot->params.input_prefix = "";
|
||||
}
|
||||
|
||||
|
||||
if (data.count("input_suffix") != 0)
|
||||
{
|
||||
slot->params.input_suffix = data["input_suffix"];
|
||||
@@ -726,10 +681,6 @@ struct llama_server_context
|
||||
slot->prompt = "";
|
||||
}
|
||||
|
||||
if (json_value(data, "ignore_eos", false)) {
|
||||
slot->sparams.logit_bias.push_back({llama_token_eos(model), -INFINITY});
|
||||
}
|
||||
/*
|
||||
slot->sparams.penalty_prompt_tokens.clear();
|
||||
slot->sparams.use_penalty_prompt_tokens = false;
|
||||
const auto &penalty_prompt = data.find("penalty_prompt");
|
||||
@@ -765,10 +716,14 @@ struct llama_server_context
|
||||
slot->sparams.use_penalty_prompt_tokens = true;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
slot->sparams.logit_bias.clear();
|
||||
|
||||
if (json_value(data, "ignore_eos", false))
|
||||
{
|
||||
slot->sparams.logit_bias[llama_token_eos(model)] = -INFINITY;
|
||||
}
|
||||
|
||||
const auto &logit_bias = data.find("logit_bias");
|
||||
if (logit_bias != data.end() && logit_bias->is_array())
|
||||
{
|
||||
@@ -796,21 +751,21 @@ struct llama_server_context
|
||||
llama_token tok = el[0].get<llama_token>();
|
||||
if (tok >= 0 && tok < n_vocab)
|
||||
{
|
||||
slot->sparams.logit_bias.push_back({tok, bias});
|
||||
slot->sparams.logit_bias[tok] = bias;
|
||||
}
|
||||
}
|
||||
else if (el[0].is_string())
|
||||
{
|
||||
auto toks = common_tokenize(model, el[0].get<std::string>(), false);
|
||||
auto toks = llama_tokenize(model, el[0].get<std::string>(), false);
|
||||
for (auto tok : toks)
|
||||
{
|
||||
slot->sparams.logit_bias.push_back({tok, bias});
|
||||
slot->sparams.logit_bias[tok] = bias;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
slot->params.antiprompt.clear();
|
||||
|
||||
const auto &stop = data.find("stop");
|
||||
@@ -824,22 +779,24 @@ struct llama_server_context
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const auto & samplers = data.find("samplers");
|
||||
if (samplers != data.end() && samplers->is_array()) {
|
||||
|
||||
const auto &samplers_sequence = data.find("samplers");
|
||||
if (samplers_sequence != data.end() && samplers_sequence->is_array())
|
||||
{
|
||||
std::vector<std::string> sampler_names;
|
||||
for (const auto & name : *samplers) {
|
||||
if (name.is_string()) {
|
||||
sampler_names.emplace_back(name);
|
||||
}
|
||||
for (const auto &sampler_name : *samplers_sequence)
|
||||
{
|
||||
if (sampler_name.is_string())
|
||||
{
|
||||
sampler_names.emplace_back(sampler_name);
|
||||
}
|
||||
slot->sparams.samplers = common_sampler_types_from_names(sampler_names, false);
|
||||
}
|
||||
slot->sparams.samplers_sequence = llama_sampling_types_from_names(sampler_names, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
slot->sparams.samplers = default_sparams.samplers;
|
||||
slot->sparams.samplers_sequence = default_sparams.samplers_sequence;
|
||||
}
|
||||
|
||||
|
||||
if (multimodal)
|
||||
{
|
||||
@@ -855,11 +812,10 @@ struct llama_server_context
|
||||
img_sl.img_data = clip_image_u8_init();
|
||||
if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data))
|
||||
{
|
||||
LOG_ERR("%s: failed to load image, slot_id: %d, img_sl_id: %d",
|
||||
__func__,
|
||||
slot->id,
|
||||
img_sl.id
|
||||
);
|
||||
LOG_ERROR("failed to load image", {
|
||||
{"slot_id", slot->id},
|
||||
{"img_sl_id", img_sl.id}
|
||||
});
|
||||
return false;
|
||||
}
|
||||
LOG_VERBOSE("image loaded", {
|
||||
@@ -897,12 +853,12 @@ struct llama_server_context
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
LOG("ERROR: Image with id: %i, not found.\n", img_id);
|
||||
LOG_TEE("ERROR: Image with id: %i, not found.\n", img_id);
|
||||
slot->images.clear();
|
||||
return false;
|
||||
}
|
||||
} catch (const std::invalid_argument& e) {
|
||||
LOG("Invalid image number id in prompt\n");
|
||||
LOG_TEE("Invalid image number id in prompt\n");
|
||||
slot->images.clear();
|
||||
return false;
|
||||
}
|
||||
@@ -917,10 +873,10 @@ struct llama_server_context
|
||||
|
||||
if (slot->ctx_sampling != nullptr)
|
||||
{
|
||||
common_sampler_free(slot->ctx_sampling);
|
||||
llama_sampling_free(slot->ctx_sampling);
|
||||
}
|
||||
slot->ctx_sampling = common_sampler_init(model, slot->sparams);
|
||||
//llama_set_rng_seed(ctx, slot->params.seed);
|
||||
slot->ctx_sampling = llama_sampling_init(slot->sparams);
|
||||
llama_set_rng_seed(ctx, slot->params.seed);
|
||||
slot->command = LOAD_PROMPT;
|
||||
|
||||
all_slots_are_idle = false;
|
||||
@@ -930,7 +886,7 @@ struct llama_server_context
|
||||
{"task_id", slot->task_id},
|
||||
});
|
||||
|
||||
// LOG("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
|
||||
LOG_TEE("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -946,13 +902,13 @@ struct llama_server_context
|
||||
system_tokens.clear();
|
||||
|
||||
if (!system_prompt.empty()) {
|
||||
system_tokens = common_tokenize(ctx, system_prompt, add_bos_token);
|
||||
system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token);
|
||||
|
||||
common_batch_clear(batch);
|
||||
llama_batch_clear(batch);
|
||||
|
||||
for (int i = 0; i < (int)system_tokens.size(); ++i)
|
||||
{
|
||||
common_batch_add(batch, system_tokens[i], i, { 0 }, false);
|
||||
llama_batch_add(batch, system_tokens[i], i, { 0 }, false);
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += params.n_batch)
|
||||
@@ -966,10 +922,11 @@ struct llama_server_context
|
||||
batch.n_seq_id + i,
|
||||
batch.seq_id + i,
|
||||
batch.logits + i,
|
||||
0, 0, 0, // unused
|
||||
};
|
||||
if (llama_decode(ctx, batch_view) != 0)
|
||||
{
|
||||
LOG("%s: llama_decode() failed\n", __func__);
|
||||
LOG_TEE("%s: llama_decode() failed\n", __func__);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -981,7 +938,7 @@ struct llama_server_context
|
||||
}
|
||||
}
|
||||
|
||||
LOG("system prompt updated\n");
|
||||
LOG_TEE("system prompt updated\n");
|
||||
system_need_update = false;
|
||||
}
|
||||
|
||||
@@ -1040,20 +997,18 @@ struct llama_server_context
|
||||
|
||||
bool process_token(completion_token_output &result, llama_client_slot &slot) {
|
||||
// remember which tokens were sampled - used for repetition penalties during sampling
|
||||
const std::string token_str = common_token_to_piece(ctx, result.tok);
|
||||
const std::string token_str = llama_token_to_piece(ctx, result.tok);
|
||||
slot.sampled = result.tok;
|
||||
|
||||
// search stop word and delete it
|
||||
slot.generated_text += token_str;
|
||||
slot.has_next_token = true;
|
||||
|
||||
/*
|
||||
if (slot.ctx_sampling->params.use_penalty_prompt_tokens && result.tok != -1)
|
||||
{
|
||||
// we can change penalty_prompt_tokens because it is always created from scratch each request
|
||||
slot.ctx_sampling->params.penalty_prompt_tokens.push_back(result.tok);
|
||||
}
|
||||
*/
|
||||
|
||||
// check if there is incomplete UTF-8 character at the end
|
||||
bool incomplete = false;
|
||||
@@ -1162,8 +1117,8 @@ struct llama_server_context
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.cpuparams.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
|
||||
LOG("Error processing the given image");
|
||||
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
|
||||
LOG_TEE("Error processing the given image");
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1175,7 +1130,7 @@ struct llama_server_context
|
||||
|
||||
void send_error(task_server& task, const std::string &error)
|
||||
{
|
||||
LOG("task %i - error: %s\n", task.id, error.c_str());
|
||||
LOG_TEE("task %i - error: %s\n", task.id, error.c_str());
|
||||
task_result res;
|
||||
res.id = task.id;
|
||||
res.multitask_id = task.multitask_id;
|
||||
@@ -1187,11 +1142,13 @@ struct llama_server_context
|
||||
|
||||
json get_formated_generation(llama_client_slot &slot)
|
||||
{
|
||||
std::vector<std::string> samplers;
|
||||
samplers.reserve(slot.sparams.samplers.size());
|
||||
for (const auto & sampler : slot.sparams.samplers)
|
||||
const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model));
|
||||
const bool ignore_eos = eos_bias != slot.sparams.logit_bias.end() &&
|
||||
eos_bias->second < 0.0f && std::isinf(eos_bias->second);
|
||||
std::vector<std::string> samplers_sequence;
|
||||
for (const auto &sampler_type : slot.sparams.samplers_sequence)
|
||||
{
|
||||
samplers.emplace_back(common_sampler_type_to_str(sampler));
|
||||
samplers_sequence.emplace_back(llama_sampling_type_to_str(sampler_type));
|
||||
}
|
||||
|
||||
return json {
|
||||
@@ -1205,11 +1162,14 @@ struct llama_server_context
|
||||
{"top_k", slot.sparams.top_k},
|
||||
{"top_p", slot.sparams.top_p},
|
||||
{"min_p", slot.sparams.min_p},
|
||||
{"typical_p", slot.sparams.typ_p},
|
||||
{"tfs_z", slot.sparams.tfs_z},
|
||||
{"typical_p", slot.sparams.typical_p},
|
||||
{"repeat_last_n", slot.sparams.penalty_last_n},
|
||||
{"repeat_penalty", slot.sparams.penalty_repeat},
|
||||
{"presence_penalty", slot.sparams.penalty_present},
|
||||
{"frequency_penalty", slot.sparams.penalty_freq},
|
||||
{"penalty_prompt_tokens", slot.sparams.penalty_prompt_tokens},
|
||||
{"use_penalty_prompt_tokens", slot.sparams.use_penalty_prompt_tokens},
|
||||
{"mirostat", slot.sparams.mirostat},
|
||||
{"mirostat_tau", slot.sparams.mirostat_tau},
|
||||
{"mirostat_eta", slot.sparams.mirostat_eta},
|
||||
@@ -1217,13 +1177,13 @@ struct llama_server_context
|
||||
{"stop", slot.params.antiprompt},
|
||||
{"n_predict", slot.params.n_predict},
|
||||
{"n_keep", params.n_keep},
|
||||
{"ignore_eos", slot.sparams.ignore_eos},
|
||||
{"ignore_eos", ignore_eos},
|
||||
{"stream", slot.params.stream},
|
||||
// {"logit_bias", slot.sparams.logit_bias},
|
||||
{"logit_bias", slot.sparams.logit_bias},
|
||||
{"n_probs", slot.sparams.n_probs},
|
||||
{"min_keep", slot.sparams.min_keep},
|
||||
{"grammar", slot.sparams.grammar},
|
||||
{"samplers", samplers}
|
||||
{"samplers", samplers_sequence}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1246,7 +1206,7 @@ struct llama_server_context
|
||||
if (slot.sparams.n_probs > 0)
|
||||
{
|
||||
std::vector<completion_token_output> probs_output = {};
|
||||
const std::vector<llama_token> to_send_toks = common_tokenize(ctx, tkn.text_to_send, false);
|
||||
const std::vector<llama_token> to_send_toks = llama_tokenize(ctx, tkn.text_to_send, false);
|
||||
size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size());
|
||||
size_t probs_stop_pos = std::min(slot.sent_token_probs_index + to_send_toks.size(), slot.generated_token_probs.size());
|
||||
if (probs_pos < probs_stop_pos)
|
||||
@@ -1298,7 +1258,7 @@ struct llama_server_context
|
||||
std::vector<completion_token_output> probs = {};
|
||||
if (!slot.params.stream && slot.stopped_word)
|
||||
{
|
||||
const std::vector<llama_token> stop_word_toks = common_tokenize(ctx, slot.stopping_word, false);
|
||||
const std::vector<llama_token> stop_word_toks = llama_tokenize(ctx, slot.stopping_word, false);
|
||||
probs = std::vector<completion_token_output>(slot.generated_token_probs.begin(), slot.generated_token_probs.end() - stop_word_toks.size());
|
||||
}
|
||||
else
|
||||
@@ -1409,10 +1369,11 @@ struct llama_server_context
|
||||
batch.n_seq_id + i,
|
||||
batch.seq_id + i,
|
||||
batch.logits + i,
|
||||
0, 0, 0, // unused
|
||||
};
|
||||
if (llama_decode(ctx, batch_view))
|
||||
{
|
||||
LOG("%s : failed to eval\n", __func__);
|
||||
LOG_TEE("%s : failed to eval\n", __func__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -1427,18 +1388,17 @@ struct llama_server_context
|
||||
}
|
||||
|
||||
const int n_embd = llama_n_embd(model);
|
||||
float * embd = img.image_embedding + i * n_embd;
|
||||
llava_embd_batch llava_batch = llava_embd_batch(embd, n_eval, slot.n_past, 0);
|
||||
if (llama_decode(ctx, llava_batch.batch))
|
||||
llama_batch batch_img = { n_eval, nullptr, (img.image_embedding + i * n_embd), nullptr, nullptr, nullptr, nullptr, slot.n_past, 1, 0, };
|
||||
if (llama_decode(ctx, batch_img))
|
||||
{
|
||||
LOG("%s : failed to eval image\n", __func__);
|
||||
LOG_TEE("%s : failed to eval image\n", __func__);
|
||||
return false;
|
||||
}
|
||||
slot.n_past += n_eval;
|
||||
}
|
||||
image_idx++;
|
||||
|
||||
common_batch_clear(batch);
|
||||
llama_batch_clear(batch);
|
||||
|
||||
// append prefix of next image
|
||||
const auto json_prompt = (image_idx >= (int) slot.images.size()) ?
|
||||
@@ -1448,7 +1408,7 @@ struct llama_server_context
|
||||
std::vector<llama_token> append_tokens = tokenize(json_prompt, false); // has next image
|
||||
for (int i = 0; i < (int) append_tokens.size(); ++i)
|
||||
{
|
||||
common_batch_add(batch, append_tokens[i], system_tokens.size() + slot.n_past, { slot.id }, true);
|
||||
llama_batch_add(batch, append_tokens[i], system_tokens.size() + slot.n_past, { slot.id }, true);
|
||||
slot.n_past += 1;
|
||||
}
|
||||
}
|
||||
@@ -1580,7 +1540,7 @@ struct llama_server_context
|
||||
update_system_prompt();
|
||||
}
|
||||
|
||||
common_batch_clear(batch);
|
||||
llama_batch_clear(batch);
|
||||
|
||||
if (all_slots_are_idle)
|
||||
{
|
||||
@@ -1614,7 +1574,7 @@ struct llama_server_context
|
||||
slot.n_past = 0;
|
||||
slot.truncated = false;
|
||||
slot.has_next_token = true;
|
||||
LOG("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
|
||||
LOG_TEE("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
|
||||
|
||||
continue;
|
||||
// END LOCALAI changes
|
||||
@@ -1658,7 +1618,7 @@ struct llama_server_context
|
||||
|
||||
// TODO: we always have to take into account the "system_tokens"
|
||||
// this is not great and needs to be improved somehow
|
||||
common_batch_add(batch, slot.sampled, system_tokens.size() + slot_npast, { slot.id }, true);
|
||||
llama_batch_add(batch, slot.sampled, system_tokens.size() + slot_npast, { slot.id }, true);
|
||||
slot.n_past += 1;
|
||||
}
|
||||
|
||||
@@ -1752,7 +1712,7 @@ struct llama_server_context
|
||||
|
||||
if (!slot.params.cache_prompt)
|
||||
{
|
||||
common_sampler_reset(slot.ctx_sampling);
|
||||
llama_sampling_reset(slot.ctx_sampling);
|
||||
|
||||
slot.n_past = 0;
|
||||
slot.n_past_se = 0;
|
||||
@@ -1764,7 +1724,7 @@ struct llama_server_context
|
||||
// push the prompt into the sampling context (do not apply grammar)
|
||||
for (auto &token : prompt_tokens)
|
||||
{
|
||||
common_sampler_accept(slot.ctx_sampling, token, false);
|
||||
llama_sampling_accept(slot.ctx_sampling, ctx, token, false);
|
||||
}
|
||||
|
||||
slot.n_past = common_part(slot.cache_tokens, prompt_tokens);
|
||||
@@ -1856,17 +1816,16 @@ struct llama_server_context
|
||||
ga_i += ga_w/ga_n;
|
||||
}
|
||||
}
|
||||
common_batch_add(batch, prefix_tokens[slot.n_past], system_tokens.size() + slot_npast, {slot.id }, false);
|
||||
llama_batch_add(batch, prefix_tokens[slot.n_past], system_tokens.size() + slot_npast, {slot.id }, false);
|
||||
slot_npast++;
|
||||
}
|
||||
|
||||
if (has_images && !ingest_images(slot, n_batch))
|
||||
{
|
||||
LOG_ERR("%s: failed processing images Slot id : %d, Task id: %d",
|
||||
__func__,
|
||||
slot.id,
|
||||
slot.task_id
|
||||
);
|
||||
LOG_ERROR("failed processing images", {
|
||||
"slot_id", slot.id,
|
||||
"task_id", slot.task_id,
|
||||
});
|
||||
// FIXME @phymbert: to be properly tested
|
||||
// early returning without changing the slot state will block the slot for ever
|
||||
// no one at the moment is checking the return value
|
||||
@@ -1906,10 +1865,10 @@ struct llama_server_context
|
||||
const int bd = (slot.ga_w / slot.ga_n) * (slot.ga_n - 1);
|
||||
const int dd = (slot.ga_w / slot.ga_n) - ib * bd - slot.ga_w;
|
||||
|
||||
LOG("\n");
|
||||
LOG("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i, slot.n_past_se, ib * bd, slot.ga_i + ib * bd, slot.n_past_se + ib * bd);
|
||||
LOG("div: [%6d, %6d] / %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w, slot.ga_n, (slot.ga_i + ib * bd) / slot.ga_n, (slot.ga_i + ib * bd + slot.ga_w) / slot.ga_n);
|
||||
LOG("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd + slot.ga_w, slot.n_past_se + ib * bd, dd, slot.ga_i + ib * bd + slot.ga_w + dd, slot.n_past_se + ib * bd + dd);
|
||||
LOG_TEE("\n");
|
||||
LOG_TEE("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i, slot.n_past_se, ib * bd, slot.ga_i + ib * bd, slot.n_past_se + ib * bd);
|
||||
LOG_TEE("div: [%6d, %6d] / %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w, slot.ga_n, (slot.ga_i + ib * bd) / slot.ga_n, (slot.ga_i + ib * bd + slot.ga_w) / slot.ga_n);
|
||||
LOG_TEE("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd + slot.ga_w, slot.n_past_se + ib * bd, dd, slot.ga_i + ib * bd + slot.ga_w + dd, slot.n_past_se + ib * bd + dd);
|
||||
|
||||
llama_kv_cache_seq_add(ctx, slot.id, slot.ga_i, slot.n_past_se, ib * bd);
|
||||
llama_kv_cache_seq_div(ctx, slot.id, slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w,slot.ga_n);
|
||||
@@ -1919,7 +1878,7 @@ struct llama_server_context
|
||||
|
||||
slot.ga_i += slot.ga_w / slot.ga_n;
|
||||
|
||||
LOG("\nn_past_old = %d, n_past = %d, ga_i = %d\n\n", slot.n_past_se + bd, slot.n_past_se, slot.ga_i);
|
||||
LOG_TEE("\nn_past_old = %d, n_past = %d, ga_i = %d\n\n", slot.n_past_se + bd, slot.n_past_se, slot.ga_i);
|
||||
}
|
||||
slot.n_past_se += n_tokens;
|
||||
}
|
||||
@@ -1934,6 +1893,7 @@ struct llama_server_context
|
||||
batch.n_seq_id + i,
|
||||
batch.seq_id + i,
|
||||
batch.logits + i,
|
||||
0, 0, 0, // unused
|
||||
};
|
||||
|
||||
const int ret = llama_decode(ctx, batch_view);
|
||||
@@ -1943,11 +1903,11 @@ struct llama_server_context
|
||||
if (n_batch == 1 || ret < 0)
|
||||
{
|
||||
// if you get here, it means the KV cache is full - try increasing it via the context size
|
||||
LOG("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
|
||||
LOG_TEE("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
|
||||
return false;
|
||||
}
|
||||
|
||||
LOG("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2);
|
||||
LOG_TEE("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2);
|
||||
|
||||
// retry with half the batch size to try to find a free slot in the KV cache
|
||||
n_batch /= 2;
|
||||
@@ -1972,9 +1932,9 @@ struct llama_server_context
|
||||
}
|
||||
|
||||
completion_token_output result;
|
||||
const llama_token id = common_sampler_sample(slot.ctx_sampling, ctx, slot.i_batch - i);
|
||||
const llama_token id = llama_sampling_sample(slot.ctx_sampling, ctx, NULL, slot.i_batch - i);
|
||||
|
||||
common_sampler_accept(slot.ctx_sampling, id, true);
|
||||
llama_sampling_accept(slot.ctx_sampling, ctx, id, true);
|
||||
|
||||
slot.n_decoded += 1;
|
||||
if (slot.n_decoded == 1)
|
||||
@@ -1984,14 +1944,19 @@ struct llama_server_context
|
||||
metrics.on_prompt_eval(slot);
|
||||
}
|
||||
|
||||
llama_token_data_array cur_p = { slot.ctx_sampling->cur.data(), slot.ctx_sampling->cur.size(), false };
|
||||
result.tok = id;
|
||||
const auto * cur_p = common_sampler_get_candidates(slot.ctx_sampling);
|
||||
|
||||
for (size_t i = 0; i < (size_t) slot.sparams.n_probs; ++i) {
|
||||
result.probs.push_back({
|
||||
cur_p->data[i].id,
|
||||
i >= cur_p->size ? 0.0f : cur_p->data[i].p,
|
||||
});
|
||||
const int32_t n_probs = slot.sparams.n_probs;
|
||||
if (slot.sparams.temp <= 0 && n_probs > 0)
|
||||
{
|
||||
// for llama_sample_token_greedy we need to sort candidates
|
||||
llama_sample_softmax(ctx, &cur_p);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < std::min(cur_p.size, (size_t)n_probs); ++i)
|
||||
{
|
||||
result.probs.push_back({cur_p.data[i].id, cur_p.data[i].p});
|
||||
}
|
||||
|
||||
if (!process_token(result, slot))
|
||||
@@ -2038,7 +2003,7 @@ static json format_partial_response(
|
||||
struct token_translator
|
||||
{
|
||||
llama_context * ctx;
|
||||
std::string operator()(llama_token tok) const { return common_token_to_piece(ctx, tok); }
|
||||
std::string operator()(llama_token tok) const { return llama_token_to_piece(ctx, tok); }
|
||||
std::string operator()(const completion_token_output &cto) const { return (*this)(cto.tok); }
|
||||
};
|
||||
|
||||
@@ -2103,6 +2068,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
||||
// slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict);
|
||||
// slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
|
||||
// slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
|
||||
// slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
|
||||
// slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p);
|
||||
// slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
|
||||
// slot->sparams.penalty_last_n = json_value(data, "repeat_last_n", default_sparams.penalty_last_n);
|
||||
@@ -2126,6 +2092,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
||||
data["n_predict"] = predict->tokens() == 0 ? -1 : predict->tokens();
|
||||
data["top_k"] = predict->topk();
|
||||
data["top_p"] = predict->topp();
|
||||
data["tfs_z"] = predict->tailfreesamplingz();
|
||||
data["typical_p"] = predict->typicalp();
|
||||
data["temperature"] = predict->temperature();
|
||||
data["repeat_last_n"] = predict->repeat();
|
||||
@@ -2143,9 +2110,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
||||
data["ignore_eos"] = predict->ignoreeos();
|
||||
data["embeddings"] = predict->embeddings();
|
||||
|
||||
// Add the correlationid to json data
|
||||
data["correlation_id"] = predict->correlationid();
|
||||
|
||||
// for each image in the request, add the image data
|
||||
//
|
||||
for (int i = 0; i < predict->images_size(); i++) {
|
||||
@@ -2172,6 +2136,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
||||
// llama.params.n_predict = predict->tokens() == 0 ? -1 : predict->tokens();
|
||||
// llama.params.sparams.top_k = predict->topk();
|
||||
// llama.params.sparams.top_p = predict->topp();
|
||||
// llama.params.sparams.tfs_z = predict->tailfreesamplingz();
|
||||
// llama.params.sparams.typical_p = predict->typicalp();
|
||||
// llama.params.sparams.penalty_last_n = predict->repeat();
|
||||
// llama.params.sparams.temp = predict->temperature();
|
||||
@@ -2229,7 +2194,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
||||
// }
|
||||
|
||||
static void params_parse(const backend::ModelOptions* request,
|
||||
common_params & params) {
|
||||
gpt_params & params) {
|
||||
|
||||
// this is comparable to: https://github.com/ggerganov/llama.cpp/blob/d9b33fe95bd257b36c84ee5769cc048230067d6f/examples/server/server.cpp#L1809
|
||||
|
||||
@@ -2241,15 +2206,9 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
}
|
||||
// params.model_alias ??
|
||||
params.model_alias = request->modelfile();
|
||||
if (!request->cachetypekey().empty()) {
|
||||
params.cache_type_k = request->cachetypekey();
|
||||
}
|
||||
if (!request->cachetypevalue().empty()) {
|
||||
params.cache_type_v = request->cachetypevalue();
|
||||
}
|
||||
params.n_ctx = request->contextsize();
|
||||
//params.memory_f16 = request->f16memory();
|
||||
params.cpuparams.n_threads = request->threads();
|
||||
params.n_threads = request->threads();
|
||||
params.n_gpu_layers = request->ngpulayers();
|
||||
params.n_batch = request->nbatch();
|
||||
// Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1
|
||||
@@ -2299,13 +2258,13 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
}
|
||||
// get the directory of modelfile
|
||||
std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
|
||||
params.lora_adapters.push_back({ model_dir + "/"+request->loraadapter(), scale_factor });
|
||||
params.lora_adapter.push_back(std::make_tuple(model_dir + "/"+request->loraadapter(), scale_factor));
|
||||
params.lora_base = model_dir + "/"+request->lorabase();
|
||||
}
|
||||
params.use_mlock = request->mlock();
|
||||
params.use_mmap = request->mmap();
|
||||
params.flash_attn = request->flashattention();
|
||||
params.no_kv_offload = request->nokvoffload();
|
||||
params.ctx_shift = false; // We control context-shifting in any case (and we disable it as it could just lead to infinite loops)
|
||||
|
||||
params.embedding = request->embeddings();
|
||||
|
||||
@@ -2344,7 +2303,7 @@ public:
|
||||
|
||||
grpc::Status LoadModel(ServerContext* context, const backend::ModelOptions* request, backend::Result* result) {
|
||||
// Implement LoadModel RPC
|
||||
common_params params;
|
||||
gpt_params params;
|
||||
params_parse(request, params);
|
||||
|
||||
llama_backend_init();
|
||||
@@ -2390,11 +2349,6 @@ public:
|
||||
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
||||
reply.set_prompt_tokens(tokens_evaluated);
|
||||
|
||||
// Log Request Correlation Id
|
||||
LOG_VERBOSE("correlation:", {
|
||||
{ "id", data["correlation_id"] }
|
||||
});
|
||||
|
||||
// Send the reply
|
||||
writer->Write(reply);
|
||||
|
||||
@@ -2418,12 +2372,6 @@ public:
|
||||
std::string completion_text;
|
||||
task_result result = llama.queue_results.recv(task_id);
|
||||
if (!result.error && result.stop) {
|
||||
|
||||
// Log Request Correlation Id
|
||||
LOG_VERBOSE("correlation:", {
|
||||
{ "id", data["correlation_id"] }
|
||||
});
|
||||
|
||||
completion_text = result.result_json.value("content", "");
|
||||
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
|
||||
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
||||
@@ -2463,31 +2411,6 @@ public:
|
||||
|
||||
return grpc::Status::OK;
|
||||
}
|
||||
|
||||
grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
|
||||
llama_client_slot* active_slot = llama.get_active_slot();
|
||||
|
||||
if (active_slot != nullptr) {
|
||||
// Calculate the tokens per second using existing logic
|
||||
double tokens_per_second = 1e3 / active_slot->t_token_generation * active_slot->n_decoded;
|
||||
|
||||
// Populate the response with metrics
|
||||
response->set_slot_id(active_slot->id);
|
||||
response->set_prompt_json_for_slot(active_slot->prompt.dump());
|
||||
response->set_tokens_per_second(tokens_per_second);
|
||||
response->set_tokens_generated(active_slot->n_decoded);
|
||||
response->set_prompt_tokens_processed(active_slot->num_prompt_tokens_processed);
|
||||
} else {
|
||||
// Handle case when no active slot exists
|
||||
response->set_slot_id(0);
|
||||
response->set_prompt_json_for_slot("");
|
||||
response->set_tokens_per_second(0);
|
||||
response->set_tokens_generated(0);
|
||||
response->set_prompt_tokens_processed(0);
|
||||
}
|
||||
|
||||
return grpc::Status::OK;
|
||||
}
|
||||
};
|
||||
|
||||
void RunServer(const std::string& server_address) {
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
||||
index 342042ff..224db9b5 100644
|
||||
--- a/examples/llava/clip.cpp
|
||||
+++ b/examples/llava/clip.cpp
|
||||
@@ -2419,7 +2419,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
||||
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
|
||||
int* patches_data = (int*)malloc(ggml_nbytes(patches));
|
||||
for (int i = 0; i < num_patches; i++) {
|
||||
- patches_data[i] = i + 1;
|
||||
+ patches_data[i] = i;
|
||||
}
|
||||
ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
|
||||
free(patches_data);
|
||||
@@ -1,12 +1,5 @@
|
||||
#!/bin/bash
|
||||
|
||||
## Patches
|
||||
## Apply patches from the `patches` directory
|
||||
for patch in $(ls patches); do
|
||||
echo "Applying patch $patch"
|
||||
patch -d llama.cpp/ -p1 < patches/$patch
|
||||
done
|
||||
|
||||
cp -r CMakeLists.txt llama.cpp/examples/grpc-server/
|
||||
cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||
cp -rfv json.hpp llama.cpp/examples/grpc-server/
|
||||
|
||||
@@ -480,4 +480,31 @@ static inline std::vector<uint8_t> base64_decode(const std::string & encoded_str
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
//
|
||||
// random string / id
|
||||
//
|
||||
|
||||
static std::string random_string()
|
||||
{
|
||||
static const std::string str("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
|
||||
|
||||
std::random_device rd;
|
||||
std::mt19937 generator(rd());
|
||||
|
||||
std::string result(32, ' ');
|
||||
|
||||
for (int i = 0; i < 32; ++i) {
|
||||
result[i] = str[generator() % str.size()];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::string gen_chatcmplid()
|
||||
{
|
||||
std::stringstream chatcmplid;
|
||||
chatcmplid << "chatcmpl-" << random_string();
|
||||
return chatcmplid.str();
|
||||
}
|
||||
@@ -1,25 +0,0 @@
|
||||
INCLUDE_PATH := $(abspath ./)
|
||||
LIBRARY_PATH := $(abspath ./)
|
||||
|
||||
AR?=ar
|
||||
|
||||
BUILD_TYPE?=
|
||||
# keep standard at C11 and C++11
|
||||
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../sources/bark.cpp/examples -I$(INCLUDE_PATH)/../../../sources/bark.cpp/spm-headers -I$(INCLUDE_PATH)/../../../sources/bark.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
||||
LDFLAGS = -L$(LIBRARY_PATH) -L$(LIBRARY_PATH)/../../../sources/bark.cpp/build/examples -lbark -lstdc++ -lm
|
||||
|
||||
# warnings
|
||||
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
||||
|
||||
gobark.o:
|
||||
$(CXX) $(CXXFLAGS) gobark.cpp -o gobark.o -c $(LDFLAGS)
|
||||
|
||||
libbark.a: gobark.o
|
||||
cp $(INCLUDE_PATH)/../../../sources/bark.cpp/build/libbark.a ./
|
||||
$(AR) rcs libbark.a gobark.o
|
||||
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml.c.o
|
||||
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-alloc.c.o
|
||||
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-backend.c.o
|
||||
|
||||
clean:
|
||||
rm -f gobark.o libbark.a
|
||||
@@ -1,85 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <tuple>
|
||||
|
||||
#include "bark.h"
|
||||
#include "gobark.h"
|
||||
#include "common.h"
|
||||
#include "ggml.h"
|
||||
|
||||
struct bark_context *c;
|
||||
|
||||
void bark_print_progress_callback(struct bark_context *bctx, enum bark_encoding_step step, int progress, void *user_data) {
|
||||
if (step == bark_encoding_step::SEMANTIC) {
|
||||
printf("\rGenerating semantic tokens... %d%%", progress);
|
||||
} else if (step == bark_encoding_step::COARSE) {
|
||||
printf("\rGenerating coarse tokens... %d%%", progress);
|
||||
} else if (step == bark_encoding_step::FINE) {
|
||||
printf("\rGenerating fine tokens... %d%%", progress);
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
int load_model(char *model) {
|
||||
// initialize bark context
|
||||
struct bark_context_params ctx_params = bark_context_default_params();
|
||||
bark_params params;
|
||||
|
||||
params.model_path = model;
|
||||
|
||||
// ctx_params.verbosity = verbosity;
|
||||
ctx_params.progress_callback = bark_print_progress_callback;
|
||||
ctx_params.progress_callback_user_data = nullptr;
|
||||
|
||||
struct bark_context *bctx = bark_load_model(params.model_path.c_str(), ctx_params, params.seed);
|
||||
if (!bctx) {
|
||||
fprintf(stderr, "%s: Could not load model\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
c = bctx;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int tts(char *text,int threads, char *dst ) {
|
||||
|
||||
ggml_time_init();
|
||||
const int64_t t_main_start_us = ggml_time_us();
|
||||
|
||||
// generate audio
|
||||
if (!bark_generate_audio(c, text, threads)) {
|
||||
fprintf(stderr, "%s: An error occured. If the problem persists, feel free to open an issue to report it.\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
const float *audio_data = bark_get_audio_data(c);
|
||||
if (audio_data == NULL) {
|
||||
fprintf(stderr, "%s: Could not get audio data\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
const int audio_arr_size = bark_get_audio_data_size(c);
|
||||
|
||||
std::vector<float> audio_arr(audio_data, audio_data + audio_arr_size);
|
||||
|
||||
write_wav_on_disk(audio_arr, dst);
|
||||
|
||||
// report timing
|
||||
{
|
||||
const int64_t t_main_end_us = ggml_time_us();
|
||||
const int64_t t_load_us = bark_get_load_time(c);
|
||||
const int64_t t_eval_us = bark_get_eval_time(c);
|
||||
|
||||
printf("\n\n");
|
||||
printf("%s: load time = %8.2f ms\n", __func__, t_load_us / 1000.0f);
|
||||
printf("%s: eval time = %8.2f ms\n", __func__, t_eval_us / 1000.0f);
|
||||
printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us) / 1000.0f);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int unload() {
|
||||
bark_free(c);
|
||||
}
|
||||
|
||||
@@ -1,52 +0,0 @@
|
||||
package main
|
||||
|
||||
// #cgo CXXFLAGS: -I${SRCDIR}/../../../sources/bark.cpp/ -I${SRCDIR}/../../../sources/bark.cpp/encodec.cpp -I${SRCDIR}/../../../sources/bark.cpp/examples -I${SRCDIR}/../../../sources/bark.cpp/spm-headers
|
||||
// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../sources/bark.cpp/build/examples -L${SRCDIR}/../../../sources/bark.cpp/build/encodec.cpp/ -lbark -lencodec -lcommon
|
||||
// #include <gobark.h>
|
||||
// #include <stdlib.h>
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unsafe"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
)
|
||||
|
||||
type Bark struct {
|
||||
base.SingleThread
|
||||
threads int
|
||||
}
|
||||
|
||||
func (sd *Bark) Load(opts *pb.ModelOptions) error {
|
||||
|
||||
sd.threads = int(opts.Threads)
|
||||
|
||||
modelFile := C.CString(opts.ModelFile)
|
||||
defer C.free(unsafe.Pointer(modelFile))
|
||||
|
||||
ret := C.load_model(modelFile)
|
||||
if ret != 0 {
|
||||
return fmt.Errorf("inference failed")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sd *Bark) TTS(opts *pb.TTSRequest) error {
|
||||
t := C.CString(opts.Text)
|
||||
defer C.free(unsafe.Pointer(t))
|
||||
|
||||
dst := C.CString(opts.Dst)
|
||||
defer C.free(unsafe.Pointer(dst))
|
||||
|
||||
threads := C.int(sd.threads)
|
||||
|
||||
ret := C.tts(t, threads, dst)
|
||||
if ret != 0 {
|
||||
return fmt.Errorf("inference failed")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int load_model(char *model);
|
||||
int tts(char *text,int threads, char *dst );
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -1,21 +0,0 @@
|
||||
INCLUDE_PATH := $(abspath ./)
|
||||
LIBRARY_PATH := $(abspath ./)
|
||||
|
||||
AR?=ar
|
||||
|
||||
BUILD_TYPE?=
|
||||
# keep standard at C11 and C++11
|
||||
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
||||
|
||||
# warnings
|
||||
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
||||
|
||||
gosd.o:
|
||||
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c
|
||||
|
||||
libsd.a: gosd.o
|
||||
cp $(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a ./libsd.a
|
||||
$(AR) rcs libsd.a gosd.o
|
||||
|
||||
clean:
|
||||
rm -f gosd.o libsd.a
|
||||
@@ -1,228 +0,0 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <iostream>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "gosd.h"
|
||||
|
||||
// #include "preprocessing.hpp"
|
||||
#include "flux.hpp"
|
||||
#include "stable-diffusion.h"
|
||||
|
||||
#define STB_IMAGE_IMPLEMENTATION
|
||||
#define STB_IMAGE_STATIC
|
||||
#include "stb_image.h"
|
||||
|
||||
#define STB_IMAGE_WRITE_IMPLEMENTATION
|
||||
#define STB_IMAGE_WRITE_STATIC
|
||||
#include "stb_image_write.h"
|
||||
|
||||
#define STB_IMAGE_RESIZE_IMPLEMENTATION
|
||||
#define STB_IMAGE_RESIZE_STATIC
|
||||
#include "stb_image_resize.h"
|
||||
|
||||
// Names of the sampler method, same order as enum sample_method in stable-diffusion.h
|
||||
const char* sample_method_str[] = {
|
||||
"euler_a",
|
||||
"euler",
|
||||
"heun",
|
||||
"dpm2",
|
||||
"dpm++2s_a",
|
||||
"dpm++2m",
|
||||
"dpm++2mv2",
|
||||
"ipndm",
|
||||
"ipndm_v",
|
||||
"lcm",
|
||||
};
|
||||
|
||||
// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
|
||||
const char* schedule_str[] = {
|
||||
"default",
|
||||
"discrete",
|
||||
"karras",
|
||||
"exponential",
|
||||
"ays",
|
||||
"gits",
|
||||
};
|
||||
|
||||
sd_ctx_t* sd_c;
|
||||
|
||||
sample_method_t sample_method;
|
||||
|
||||
int load_model(char *model, char* options[], int threads, int diff) {
|
||||
fprintf (stderr, "Loading model!\n");
|
||||
|
||||
char *stableDiffusionModel = "";
|
||||
if (diff == 1 ) {
|
||||
stableDiffusionModel = model;
|
||||
model = "";
|
||||
}
|
||||
|
||||
// decode options. Options are in form optname:optvale, or if booleans only optname.
|
||||
char *clip_l_path = "";
|
||||
char *clip_g_path = "";
|
||||
char *t5xxl_path = "";
|
||||
char *vae_path = "";
|
||||
char *scheduler = "";
|
||||
char *sampler = "";
|
||||
|
||||
// If options is not NULL, parse options
|
||||
for (int i = 0; options[i] != NULL; i++) {
|
||||
char *optname = strtok(options[i], ":");
|
||||
char *optval = strtok(NULL, ":");
|
||||
if (optval == NULL) {
|
||||
optval = "true";
|
||||
}
|
||||
|
||||
if (!strcmp(optname, "clip_l_path")) {
|
||||
clip_l_path = optval;
|
||||
}
|
||||
if (!strcmp(optname, "clip_g_path")) {
|
||||
clip_g_path = optval;
|
||||
}
|
||||
if (!strcmp(optname, "t5xxl_path")) {
|
||||
t5xxl_path = optval;
|
||||
}
|
||||
if (!strcmp(optname, "vae_path")) {
|
||||
vae_path = optval;
|
||||
}
|
||||
if (!strcmp(optname, "scheduler")) {
|
||||
scheduler = optval;
|
||||
}
|
||||
if (!strcmp(optname, "sampler")) {
|
||||
sampler = optval;
|
||||
}
|
||||
}
|
||||
|
||||
int sample_method_found = -1;
|
||||
for (int m = 0; m < N_SAMPLE_METHODS; m++) {
|
||||
if (!strcmp(sampler, sample_method_str[m])) {
|
||||
sample_method_found = m;
|
||||
}
|
||||
}
|
||||
if (sample_method_found == -1) {
|
||||
fprintf(stderr, "Invalid sample method, default to EULER_A!\n");
|
||||
sample_method_found = EULER_A;
|
||||
}
|
||||
sample_method = (sample_method_t)sample_method_found;
|
||||
|
||||
int schedule_found = -1;
|
||||
for (int d = 0; d < N_SCHEDULES; d++) {
|
||||
if (!strcmp(scheduler, schedule_str[d])) {
|
||||
schedule_found = d;
|
||||
fprintf (stderr, "Found scheduler: %s\n", scheduler);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if (schedule_found == -1) {
|
||||
fprintf (stderr, "Invalid scheduler! using DEFAULT\n");
|
||||
schedule_found = DEFAULT;
|
||||
}
|
||||
|
||||
schedule_t schedule = (schedule_t)schedule_found;
|
||||
|
||||
fprintf (stderr, "Creating context\n");
|
||||
sd_ctx_t* sd_ctx = new_sd_ctx(model,
|
||||
clip_l_path,
|
||||
clip_g_path,
|
||||
t5xxl_path,
|
||||
stableDiffusionModel,
|
||||
vae_path,
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
threads,
|
||||
SD_TYPE_COUNT,
|
||||
STD_DEFAULT_RNG,
|
||||
schedule,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false);
|
||||
|
||||
if (sd_ctx == NULL) {
|
||||
fprintf (stderr, "failed loading model (generic error)\n");
|
||||
return 1;
|
||||
}
|
||||
fprintf (stderr, "Created context: OK\n");
|
||||
|
||||
sd_c = sd_ctx;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed , char *dst, float cfg_scale) {
|
||||
|
||||
sd_image_t* results;
|
||||
|
||||
std::vector<int> skip_layers = {7, 8, 9};
|
||||
|
||||
fprintf (stderr, "Generating image\n");
|
||||
|
||||
results = txt2img(sd_c,
|
||||
text,
|
||||
negativeText,
|
||||
-1, //clip_skip
|
||||
cfg_scale, // sfg_scale
|
||||
3.5f,
|
||||
width,
|
||||
height,
|
||||
sample_method,
|
||||
steps,
|
||||
seed,
|
||||
1,
|
||||
NULL,
|
||||
0.9f,
|
||||
20.f,
|
||||
false,
|
||||
"",
|
||||
skip_layers.data(),
|
||||
skip_layers.size(),
|
||||
0,
|
||||
0.01,
|
||||
0.2);
|
||||
|
||||
if (results == NULL) {
|
||||
fprintf (stderr, "NO results\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (results[0].data == NULL) {
|
||||
fprintf (stderr, "Results with no data\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
fprintf (stderr, "Writing PNG\n");
|
||||
|
||||
fprintf (stderr, "DST: %s\n", dst);
|
||||
fprintf (stderr, "Width: %d\n", results[0].width);
|
||||
fprintf (stderr, "Height: %d\n", results[0].height);
|
||||
fprintf (stderr, "Channel: %d\n", results[0].channel);
|
||||
fprintf (stderr, "Data: %p\n", results[0].data);
|
||||
|
||||
stbi_write_png(dst, results[0].width, results[0].height, results[0].channel,
|
||||
results[0].data, 0, NULL);
|
||||
fprintf (stderr, "Saved resulting image to '%s'\n", dst);
|
||||
|
||||
// TODO: free results. Why does it crash?
|
||||
|
||||
free(results[0].data);
|
||||
results[0].data = NULL;
|
||||
free(results);
|
||||
fprintf (stderr, "gen_image is done", dst);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int unload() {
|
||||
free_sd_ctx(sd_c);
|
||||
}
|
||||
|
||||
@@ -1,96 +0,0 @@
|
||||
package main
|
||||
|
||||
// #cgo CXXFLAGS: -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/ggml/include
|
||||
// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/build/ggml/src/ggml-cpu -L${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/build/ggml/src -lsd -lstdc++ -lm -lggml -lggml-base -lggml-cpu -lgomp
|
||||
// #include <gosd.h>
|
||||
// #include <stdlib.h>
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"unsafe"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
)
|
||||
|
||||
type SDGGML struct {
|
||||
base.SingleThread
|
||||
threads int
|
||||
sampleMethod string
|
||||
cfgScale float32
|
||||
}
|
||||
|
||||
func (sd *SDGGML) Load(opts *pb.ModelOptions) error {
|
||||
|
||||
sd.threads = int(opts.Threads)
|
||||
|
||||
modelFile := C.CString(opts.ModelFile)
|
||||
defer C.free(unsafe.Pointer(modelFile))
|
||||
|
||||
var options **C.char
|
||||
// prepare the options array to pass to C
|
||||
|
||||
size := C.size_t(unsafe.Sizeof((*C.char)(nil)))
|
||||
length := C.size_t(len(opts.Options))
|
||||
options = (**C.char)(C.malloc(length * size))
|
||||
view := (*[1 << 30]*C.char)(unsafe.Pointer(options))[0:len(opts.Options):len(opts.Options)]
|
||||
|
||||
var diffusionModel int
|
||||
|
||||
var oo []string
|
||||
for _, op := range opts.Options {
|
||||
if op == "diffusion_model" {
|
||||
diffusionModel = 1
|
||||
continue
|
||||
}
|
||||
|
||||
// If it's an option path, we resolve absolute path from the model path
|
||||
if strings.Contains(op, ":") && strings.Contains(op, "path") {
|
||||
data := strings.Split(op, ":")
|
||||
data[1] = filepath.Join(opts.ModelPath, data[1])
|
||||
if err := utils.VerifyPath(data[1], opts.ModelPath); err == nil {
|
||||
oo = append(oo, strings.Join(data, ":"))
|
||||
}
|
||||
} else {
|
||||
oo = append(oo, op)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Fprintf(os.Stderr, "Options: %+v\n", oo)
|
||||
|
||||
for i, x := range oo {
|
||||
view[i] = C.CString(x)
|
||||
}
|
||||
|
||||
sd.cfgScale = opts.CFGScale
|
||||
|
||||
ret := C.load_model(modelFile, options, C.int(opts.Threads), C.int(diffusionModel))
|
||||
if ret != 0 {
|
||||
return fmt.Errorf("could not load model")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sd *SDGGML) GenerateImage(opts *pb.GenerateImageRequest) error {
|
||||
t := C.CString(opts.PositivePrompt)
|
||||
defer C.free(unsafe.Pointer(t))
|
||||
|
||||
dst := C.CString(opts.Dst)
|
||||
defer C.free(unsafe.Pointer(dst))
|
||||
|
||||
negative := C.CString(opts.NegativePrompt)
|
||||
defer C.free(unsafe.Pointer(negative))
|
||||
|
||||
ret := C.gen_image(t, negative, C.int(opts.Width), C.int(opts.Height), C.int(opts.Step), C.int(opts.Seed), dst, C.float(sd.cfgScale))
|
||||
if ret != 0 {
|
||||
return fmt.Errorf("inference failed")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int load_model(char *model, char* options[], int threads, int diffusionModel);
|
||||
int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed, char *dst, float cfg_scale);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
34
backend/go/llm/bert/bert.go
Normal file
34
backend/go/llm/bert/bert.go
Normal file
@@ -0,0 +1,34 @@
|
||||
package main
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
bert "github.com/go-skynet/go-bert.cpp"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
)
|
||||
|
||||
type Embeddings struct {
|
||||
base.SingleThread
|
||||
bert *bert.Bert
|
||||
}
|
||||
|
||||
func (llm *Embeddings) Load(opts *pb.ModelOptions) error {
|
||||
model, err := bert.New(opts.ModelFile)
|
||||
llm.bert = model
|
||||
return err
|
||||
}
|
||||
|
||||
func (llm *Embeddings) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||
|
||||
if len(opts.EmbeddingTokens) > 0 {
|
||||
tokens := []int{}
|
||||
for _, t := range opts.EmbeddingTokens {
|
||||
tokens = append(tokens, int(t))
|
||||
}
|
||||
return llm.bert.TokenEmbeddings(tokens, bert.SetThreads(int(opts.Threads)))
|
||||
}
|
||||
|
||||
return llm.bert.Embeddings(opts.Embeddings, bert.SetThreads(int(opts.Threads)))
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
package main
|
||||
|
||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
@@ -14,7 +15,7 @@ var (
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &SDGGML{}); err != nil {
|
||||
if err := grpc.StartServer(*addr, &Embeddings{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
62
backend/go/llm/gpt4all/gpt4all.go
Normal file
62
backend/go/llm/gpt4all/gpt4all.go
Normal file
@@ -0,0 +1,62 @@
|
||||
package main
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
|
||||
)
|
||||
|
||||
type LLM struct {
|
||||
base.SingleThread
|
||||
|
||||
gpt4all *gpt4all.Model
|
||||
}
|
||||
|
||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||
model, err := gpt4all.New(opts.ModelFile,
|
||||
gpt4all.SetThreads(int(opts.Threads)),
|
||||
gpt4all.SetLibrarySearchPath(opts.LibrarySearchPath))
|
||||
llm.gpt4all = model
|
||||
return err
|
||||
}
|
||||
|
||||
func buildPredictOptions(opts *pb.PredictOptions) []gpt4all.PredictOption {
|
||||
predictOptions := []gpt4all.PredictOption{
|
||||
gpt4all.SetTemperature(float64(opts.Temperature)),
|
||||
gpt4all.SetTopP(float64(opts.TopP)),
|
||||
gpt4all.SetTopK(int(opts.TopK)),
|
||||
gpt4all.SetTokens(int(opts.Tokens)),
|
||||
}
|
||||
|
||||
if opts.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gpt4all.SetBatch(int(opts.Batch)))
|
||||
}
|
||||
return predictOptions
|
||||
}
|
||||
|
||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
return llm.gpt4all.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
predictOptions := buildPredictOptions(opts)
|
||||
|
||||
go func() {
|
||||
llm.gpt4all.SetTokenCallback(func(token string) bool {
|
||||
results <- token
|
||||
return true
|
||||
})
|
||||
_, err := llm.gpt4all.Predict(opts.Prompt, predictOptions...)
|
||||
if err != nil {
|
||||
fmt.Println("err: ", err)
|
||||
}
|
||||
llm.gpt4all.SetTokenCallback(nil)
|
||||
close(results)
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -15,7 +15,7 @@ var (
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &VAD{}); err != nil {
|
||||
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
package main
|
||||
|
||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
@@ -14,7 +15,7 @@ var (
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &Bark{}); err != nil {
|
||||
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
95
backend/go/llm/rwkv/rwkv.go
Normal file
95
backend/go/llm/rwkv/rwkv.go
Normal file
@@ -0,0 +1,95 @@
|
||||
package main
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/donomii/go-rwkv.cpp"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
)
|
||||
|
||||
const tokenizerSuffix = ".tokenizer.json"
|
||||
|
||||
type LLM struct {
|
||||
base.SingleThread
|
||||
|
||||
rwkv *rwkv.RwkvState
|
||||
}
|
||||
|
||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||
tokenizerFile := opts.Tokenizer
|
||||
if tokenizerFile == "" {
|
||||
modelFile := filepath.Base(opts.ModelFile)
|
||||
tokenizerFile = modelFile + tokenizerSuffix
|
||||
}
|
||||
modelPath := filepath.Dir(opts.ModelFile)
|
||||
tokenizerPath := filepath.Join(modelPath, tokenizerFile)
|
||||
|
||||
model := rwkv.LoadFiles(opts.ModelFile, tokenizerPath, uint32(opts.GetThreads()))
|
||||
|
||||
if model == nil {
|
||||
return fmt.Errorf("rwkv could not load model")
|
||||
}
|
||||
llm.rwkv = model
|
||||
return nil
|
||||
}
|
||||
|
||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
stopWord := "\n"
|
||||
if len(opts.StopPrompts) > 0 {
|
||||
stopWord = opts.StopPrompts[0]
|
||||
}
|
||||
|
||||
if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
response := llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), nil)
|
||||
|
||||
return response, nil
|
||||
}
|
||||
|
||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
go func() {
|
||||
|
||||
stopWord := "\n"
|
||||
if len(opts.StopPrompts) > 0 {
|
||||
stopWord = opts.StopPrompts[0]
|
||||
}
|
||||
|
||||
if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
|
||||
fmt.Println("Error processing input: ", err)
|
||||
return
|
||||
}
|
||||
|
||||
llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), func(s string) bool {
|
||||
results <- s
|
||||
return true
|
||||
})
|
||||
close(results)
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (llm *LLM) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationResponse, error) {
|
||||
tokens, err := llm.rwkv.Tokenizer.Encode(opts.Prompt)
|
||||
if err != nil {
|
||||
return pb.TokenizationResponse{}, err
|
||||
}
|
||||
|
||||
l := len(tokens)
|
||||
i32Tokens := make([]int32, l)
|
||||
|
||||
for i, t := range tokens {
|
||||
i32Tokens[i] = int32(t.ID)
|
||||
}
|
||||
|
||||
return pb.TokenizationResponse{
|
||||
Length: int32(l),
|
||||
Tokens: i32Tokens,
|
||||
}, nil
|
||||
}
|
||||
104
backend/go/transcribe/transcript.go
Normal file
104
backend/go/transcribe/transcript.go
Normal file
@@ -0,0 +1,104 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||
"github.com/go-audio/wav"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
)
|
||||
|
||||
func ffmpegCommand(args []string) (string, error) {
|
||||
cmd := exec.Command("ffmpeg", args...) // Constrain this to ffmpeg to permit security scanner to see that the command is safe.
|
||||
cmd.Env = os.Environ()
|
||||
out, err := cmd.CombinedOutput()
|
||||
return string(out), err
|
||||
}
|
||||
|
||||
// AudioToWav converts audio to wav for transcribe.
|
||||
// TODO: use https://github.com/mccoyst/ogg?
|
||||
func audioToWav(src, dst string) error {
|
||||
commandArgs := []string{"-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst}
|
||||
out, err := ffmpegCommand(commandArgs)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error: %w out: %s", err, out)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func Transcript(model whisper.Model, audiopath, language string, translate bool, threads uint) (schema.TranscriptionResult, error) {
|
||||
res := schema.TranscriptionResult{}
|
||||
|
||||
dir, err := os.MkdirTemp("", "whisper")
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
convertedPath := filepath.Join(dir, "converted.wav")
|
||||
|
||||
if err := audioToWav(audiopath, convertedPath); err != nil {
|
||||
return res, err
|
||||
}
|
||||
|
||||
// Open samples
|
||||
fh, err := os.Open(convertedPath)
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
defer fh.Close()
|
||||
|
||||
// Read samples
|
||||
d := wav.NewDecoder(fh)
|
||||
buf, err := d.FullPCMBuffer()
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
|
||||
data := buf.AsFloat32Buffer().Data
|
||||
|
||||
// Process samples
|
||||
context, err := model.NewContext()
|
||||
if err != nil {
|
||||
return res, err
|
||||
|
||||
}
|
||||
|
||||
context.SetThreads(threads)
|
||||
|
||||
if language != "" {
|
||||
context.SetLanguage(language)
|
||||
} else {
|
||||
context.SetLanguage("auto")
|
||||
}
|
||||
|
||||
if translate {
|
||||
context.SetTranslate(true)
|
||||
}
|
||||
|
||||
if err := context.Process(data, nil, nil); err != nil {
|
||||
return res, err
|
||||
}
|
||||
|
||||
for {
|
||||
s, err := context.NextSegment()
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
var tokens []int
|
||||
for _, t := range s.Tokens {
|
||||
tokens = append(tokens, t.Id)
|
||||
}
|
||||
|
||||
segment := schema.Segment{Id: s.Num, Text: s.Text, Start: s.Start, End: s.End, Tokens: tokens}
|
||||
res.Segments = append(res.Segments, segment)
|
||||
|
||||
res.Text += s.Text
|
||||
}
|
||||
|
||||
return res, nil
|
||||
}
|
||||
26
backend/go/transcribe/whisper.go
Normal file
26
backend/go/transcribe/whisper.go
Normal file
@@ -0,0 +1,26 @@
|
||||
package main
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
)
|
||||
|
||||
type Whisper struct {
|
||||
base.SingleThread
|
||||
whisper whisper.Model
|
||||
}
|
||||
|
||||
func (sd *Whisper) Load(opts *pb.ModelOptions) error {
|
||||
// Note: the Model here is a path to a directory containing the model files
|
||||
w, err := whisper.New(opts.ModelFile)
|
||||
sd.whisper = w
|
||||
return err
|
||||
}
|
||||
|
||||
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) {
|
||||
return Transcript(sd.whisper, opts.Dst, opts.Language, opts.Translate, uint(opts.Threads))
|
||||
}
|
||||
@@ -1,105 +0,0 @@
|
||||
package main
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||
"github.com/go-audio/wav"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
)
|
||||
|
||||
type Whisper struct {
|
||||
base.SingleThread
|
||||
whisper whisper.Model
|
||||
}
|
||||
|
||||
func (sd *Whisper) Load(opts *pb.ModelOptions) error {
|
||||
// Note: the Model here is a path to a directory containing the model files
|
||||
w, err := whisper.New(opts.ModelFile)
|
||||
sd.whisper = w
|
||||
return err
|
||||
}
|
||||
|
||||
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (pb.TranscriptResult, error) {
|
||||
|
||||
dir, err := os.MkdirTemp("", "whisper")
|
||||
if err != nil {
|
||||
return pb.TranscriptResult{}, err
|
||||
}
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
convertedPath := filepath.Join(dir, "converted.wav")
|
||||
|
||||
if err := utils.AudioToWav(opts.Dst, convertedPath); err != nil {
|
||||
return pb.TranscriptResult{}, err
|
||||
}
|
||||
|
||||
// Open samples
|
||||
fh, err := os.Open(convertedPath)
|
||||
if err != nil {
|
||||
return pb.TranscriptResult{}, err
|
||||
}
|
||||
defer fh.Close()
|
||||
|
||||
// Read samples
|
||||
d := wav.NewDecoder(fh)
|
||||
buf, err := d.FullPCMBuffer()
|
||||
if err != nil {
|
||||
return pb.TranscriptResult{}, err
|
||||
}
|
||||
|
||||
data := buf.AsFloat32Buffer().Data
|
||||
|
||||
// Process samples
|
||||
context, err := sd.whisper.NewContext()
|
||||
if err != nil {
|
||||
return pb.TranscriptResult{}, err
|
||||
|
||||
}
|
||||
|
||||
context.SetThreads(uint(opts.Threads))
|
||||
|
||||
if opts.Language != "" {
|
||||
context.SetLanguage(opts.Language)
|
||||
} else {
|
||||
context.SetLanguage("auto")
|
||||
}
|
||||
|
||||
if opts.Translate {
|
||||
context.SetTranslate(true)
|
||||
}
|
||||
|
||||
if err := context.Process(data, nil, nil); err != nil {
|
||||
return pb.TranscriptResult{}, err
|
||||
}
|
||||
|
||||
segments := []*pb.TranscriptSegment{}
|
||||
text := ""
|
||||
for {
|
||||
s, err := context.NextSegment()
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
var tokens []int32
|
||||
for _, t := range s.Tokens {
|
||||
tokens = append(tokens, int32(t.Id))
|
||||
}
|
||||
|
||||
segment := &pb.TranscriptSegment{Id: int32(s.Num), Text: s.Text, Start: int64(s.Start), End: int64(s.End), Tokens: tokens}
|
||||
segments = append(segments, segment)
|
||||
|
||||
text += s.Text
|
||||
}
|
||||
|
||||
return pb.TranscriptResult{
|
||||
Segments: segments,
|
||||
Text: text,
|
||||
}, nil
|
||||
|
||||
}
|
||||
@@ -1,54 +0,0 @@
|
||||
package main
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/streamer45/silero-vad-go/speech"
|
||||
)
|
||||
|
||||
type VAD struct {
|
||||
base.SingleThread
|
||||
detector *speech.Detector
|
||||
}
|
||||
|
||||
func (vad *VAD) Load(opts *pb.ModelOptions) error {
|
||||
v, err := speech.NewDetector(speech.DetectorConfig{
|
||||
ModelPath: opts.ModelFile,
|
||||
SampleRate: 16000,
|
||||
//WindowSize: 1024,
|
||||
Threshold: 0.5,
|
||||
MinSilenceDurationMs: 0,
|
||||
SpeechPadMs: 0,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("create silero detector: %w", err)
|
||||
}
|
||||
|
||||
vad.detector = v
|
||||
return err
|
||||
}
|
||||
|
||||
func (vad *VAD) VAD(req *pb.VADRequest) (pb.VADResponse, error) {
|
||||
audio := req.Audio
|
||||
|
||||
segments, err := vad.detector.Detect(audio)
|
||||
if err != nil {
|
||||
return pb.VADResponse{}, fmt.Errorf("detect: %w", err)
|
||||
}
|
||||
|
||||
vadSegments := []*pb.VADSegment{}
|
||||
for _, s := range segments {
|
||||
vadSegments = append(vadSegments, &pb.VADSegment{
|
||||
Start: float32(s.SpeechStartAt),
|
||||
End: float32(s.SpeechEndAt),
|
||||
})
|
||||
}
|
||||
|
||||
return pb.VADResponse{
|
||||
Segments: vadSegments,
|
||||
}, nil
|
||||
}
|
||||
@@ -1,2 +0,0 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch==2.4.1+cu118
|
||||
@@ -1 +0,0 @@
|
||||
torch==2.4.1
|
||||
@@ -1,2 +1,2 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch==2.4.1+rocm6.0
|
||||
torch
|
||||
@@ -2,4 +2,4 @@
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,6 +1,7 @@
|
||||
accelerate
|
||||
auto-gptq==0.7.1
|
||||
grpcio==1.68.1
|
||||
grpcio==1.65.0
|
||||
protobuf
|
||||
torch
|
||||
certifi
|
||||
transformers
|
||||
@@ -1,4 +0,0 @@
|
||||
transformers
|
||||
accelerate
|
||||
torch==2.4.1
|
||||
torchaudio==2.4.1
|
||||
@@ -1,5 +0,0 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch==2.4.1+cu118
|
||||
torchaudio==2.4.1+cu118
|
||||
transformers
|
||||
accelerate
|
||||
@@ -1,4 +0,0 @@
|
||||
torch==2.4.1
|
||||
torchaudio==2.4.1
|
||||
transformers
|
||||
accelerate
|
||||
@@ -1,5 +1,3 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch==2.4.1+rocm6.0
|
||||
torchaudio==2.4.1+rocm6.0
|
||||
transformers
|
||||
accelerate
|
||||
torch
|
||||
torchaudio
|
||||
@@ -3,6 +3,4 @@ intel-extension-for-pytorch
|
||||
torch
|
||||
torchaudio
|
||||
optimum[openvino]
|
||||
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
transformers
|
||||
accelerate
|
||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,4 +1,6 @@
|
||||
accelerate
|
||||
bark==0.1.5
|
||||
grpcio==1.68.1
|
||||
grpcio==1.65.0
|
||||
protobuf
|
||||
certifi
|
||||
certifi
|
||||
transformers
|
||||
@@ -18,23 +18,10 @@
|
||||
# source $(dirname $0)/../common/libbackend.sh
|
||||
#
|
||||
function init() {
|
||||
# Name of the backend (directory name)
|
||||
BACKEND_NAME=${PWD##*/}
|
||||
|
||||
# Path where all backends files are
|
||||
MY_DIR=$(realpath `dirname $0`)
|
||||
|
||||
# Build type
|
||||
BUILD_PROFILE=$(getBuildProfile)
|
||||
|
||||
# Environment directory
|
||||
EDIR=${MY_DIR}
|
||||
|
||||
# Allow to specify a custom env dir for shared environments
|
||||
if [ "x${ENV_DIR}" != "x" ]; then
|
||||
EDIR=${ENV_DIR}
|
||||
fi
|
||||
|
||||
# If a backend has defined a list of valid build profiles...
|
||||
if [ ! -z "${LIMIT_TARGETS}" ]; then
|
||||
isValidTarget=$(checkTargets ${LIMIT_TARGETS})
|
||||
@@ -87,14 +74,13 @@ function getBuildProfile() {
|
||||
# This function is idempotent, so you can call it as many times as you want and it will
|
||||
# always result in an activated virtual environment
|
||||
function ensureVenv() {
|
||||
if [ ! -d "${EDIR}/venv" ]; then
|
||||
uv venv ${EDIR}/venv
|
||||
if [ ! -d "${MY_DIR}/venv" ]; then
|
||||
uv venv ${MY_DIR}/venv
|
||||
echo "virtualenv created"
|
||||
fi
|
||||
|
||||
# Source if we are not already in a Virtual env
|
||||
if [ "x${VIRTUAL_ENV}" != "x${EDIR}/venv" ]; then
|
||||
source ${EDIR}/venv/bin/activate
|
||||
|
||||
if [ "x${VIRTUAL_ENV}" != "x${MY_DIR}/venv" ]; then
|
||||
source ${MY_DIR}/venv/bin/activate
|
||||
echo "virtualenv activated"
|
||||
fi
|
||||
|
||||
@@ -127,24 +113,13 @@ function installRequirements() {
|
||||
|
||||
# These are the requirements files we will attempt to install, in order
|
||||
declare -a requirementFiles=(
|
||||
"${EDIR}/requirements-install.txt"
|
||||
"${EDIR}/requirements.txt"
|
||||
"${EDIR}/requirements-${BUILD_TYPE}.txt"
|
||||
"${MY_DIR}/requirements-install.txt"
|
||||
"${MY_DIR}/requirements.txt"
|
||||
"${MY_DIR}/requirements-${BUILD_TYPE}.txt"
|
||||
)
|
||||
|
||||
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
|
||||
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}.txt")
|
||||
fi
|
||||
|
||||
# if BUILD_TYPE is empty, we are a CPU build, so we should try to install the CPU requirements
|
||||
if [ "x${BUILD_TYPE}" == "x" ]; then
|
||||
requirementFiles+=("${EDIR}/requirements-cpu.txt")
|
||||
fi
|
||||
|
||||
requirementFiles+=("${EDIR}/requirements-after.txt")
|
||||
|
||||
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
|
||||
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}-after.txt")
|
||||
requirementFiles+=("${MY_DIR}/requirements-${BUILD_PROFILE}.txt")
|
||||
fi
|
||||
|
||||
for reqFile in ${requirementFiles[@]}; do
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
.DEFAULT_GOAL := install
|
||||
|
||||
.PHONY: install
|
||||
install:
|
||||
install: protogen
|
||||
bash install.sh
|
||||
$(MAKE) protogen
|
||||
|
||||
.PHONY: protogen
|
||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||
@@ -13,7 +12,7 @@ protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
bash protogen.sh
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
@@ -1,3 +1,2 @@
|
||||
grpcio==1.68.1
|
||||
protobuf
|
||||
grpcio-tools
|
||||
grpcio==1.65.0
|
||||
protobuf
|
||||
@@ -1,4 +0,0 @@
|
||||
transformers
|
||||
accelerate
|
||||
torch==2.4.1
|
||||
coqui-tts
|
||||
@@ -1,6 +0,0 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch==2.4.1+cu118
|
||||
torchaudio==2.4.1+cu118
|
||||
transformers
|
||||
accelerate
|
||||
coqui-tts
|
||||
@@ -1,5 +0,0 @@
|
||||
torch==2.4.1
|
||||
torchaudio==2.4.1
|
||||
transformers
|
||||
accelerate
|
||||
coqui-tts
|
||||
@@ -1,6 +1,3 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch==2.4.1+rocm6.0
|
||||
torchaudio==2.4.1+rocm6.0
|
||||
transformers
|
||||
accelerate
|
||||
coqui-tts
|
||||
torch
|
||||
torchaudio
|
||||
@@ -3,7 +3,4 @@ intel-extension-for-pytorch
|
||||
torch
|
||||
torchaudio
|
||||
optimum[openvino]
|
||||
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
transformers
|
||||
accelerate
|
||||
coqui-tts
|
||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,4 +1,6 @@
|
||||
grpcio==1.68.1
|
||||
accelerate
|
||||
TTS==0.22.0
|
||||
grpcio==1.65.0
|
||||
protobuf
|
||||
certifi
|
||||
packaging==24.1
|
||||
transformers
|
||||
@@ -19,7 +19,7 @@ class TestBackendServicer(unittest.TestCase):
|
||||
This method sets up the gRPC service by starting the server
|
||||
"""
|
||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
||||
time.sleep(30)
|
||||
time.sleep(10)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
"""
|
||||
|
||||
@@ -18,13 +18,13 @@ import backend_pb2_grpc
|
||||
import grpc
|
||||
|
||||
from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
|
||||
EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
|
||||
EulerAncestralDiscreteScheduler
|
||||
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
|
||||
from diffusers.pipelines.stable_diffusion import safety_checker
|
||||
from diffusers.utils import load_image, export_to_video
|
||||
from compel import Compel, ReturnedEmbeddingsType
|
||||
from optimum.quanto import freeze, qfloat8, quantize
|
||||
from transformers import CLIPTextModel, T5EncoderModel
|
||||
|
||||
from transformers import CLIPTextModel
|
||||
from safetensors.torch import load_file
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
@@ -163,12 +163,10 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
modelFile = request.Model
|
||||
|
||||
self.cfg_scale = 7
|
||||
self.PipelineType = request.PipelineType
|
||||
|
||||
if request.CFGScale != 0:
|
||||
self.cfg_scale = request.CFGScale
|
||||
|
||||
clipmodel = "Lykon/dreamshaper-8"
|
||||
clipmodel = "runwayml/stable-diffusion-v1-5"
|
||||
if request.CLIPModel != "":
|
||||
clipmodel = request.CLIPModel
|
||||
clipsubfolder = "text_encoder"
|
||||
@@ -246,35 +244,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
torch_dtype=torchType,
|
||||
use_safetensors=True,
|
||||
variant=variant)
|
||||
elif request.PipelineType == "FluxPipeline":
|
||||
if fromSingleFile:
|
||||
self.pipe = FluxPipeline.from_single_file(modelFile,
|
||||
torch_dtype=torchType,
|
||||
use_safetensors=True)
|
||||
else:
|
||||
self.pipe = FluxPipeline.from_pretrained(
|
||||
request.Model,
|
||||
torch_dtype=torch.bfloat16)
|
||||
if request.LowVRAM:
|
||||
self.pipe.enable_model_cpu_offload()
|
||||
elif request.PipelineType == "FluxTransformer2DModel":
|
||||
dtype = torch.bfloat16
|
||||
# specify from environment or default to "ChuckMcSneed/FLUX.1-dev"
|
||||
bfl_repo = os.environ.get("BFL_REPO", "ChuckMcSneed/FLUX.1-dev")
|
||||
|
||||
transformer = FluxTransformer2DModel.from_single_file(modelFile, torch_dtype=dtype)
|
||||
quantize(transformer, weights=qfloat8)
|
||||
freeze(transformer)
|
||||
text_encoder_2 = T5EncoderModel.from_pretrained(bfl_repo, subfolder="text_encoder_2", torch_dtype=dtype)
|
||||
quantize(text_encoder_2, weights=qfloat8)
|
||||
freeze(text_encoder_2)
|
||||
|
||||
self.pipe = FluxPipeline.from_pretrained(bfl_repo, transformer=None, text_encoder_2=None, torch_dtype=dtype)
|
||||
self.pipe.transformer = transformer
|
||||
self.pipe.text_encoder_2 = text_encoder_2
|
||||
|
||||
if request.LowVRAM:
|
||||
self.pipe.enable_model_cpu_offload()
|
||||
|
||||
if CLIPSKIP and request.CLIPSkip != 0:
|
||||
self.clip_skip = request.CLIPSkip
|
||||
@@ -301,34 +270,22 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
self.pipe.controlnet = self.controlnet
|
||||
else:
|
||||
self.controlnet = None
|
||||
|
||||
if request.LoraAdapter and not os.path.isabs(request.LoraAdapter):
|
||||
# Assume directory from request.ModelFile.
|
||||
# Only if request.LoraAdapter it's not an absolute path
|
||||
if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter:
|
||||
# get base path of modelFile
|
||||
modelFileBase = os.path.dirname(request.ModelFile)
|
||||
# modify LoraAdapter to be relative to modelFileBase
|
||||
request.LoraAdapter = os.path.join(request.ModelPath, request.LoraAdapter)
|
||||
|
||||
request.LoraAdapter = os.path.join(modelFileBase, request.LoraAdapter)
|
||||
device = "cpu" if not request.CUDA else "cuda"
|
||||
self.device = device
|
||||
if request.LoraAdapter:
|
||||
# Check if its a local file and not a directory ( we load lora differently for a safetensor file )
|
||||
if os.path.exists(request.LoraAdapter) and not os.path.isdir(request.LoraAdapter):
|
||||
# self.load_lora_weights(request.LoraAdapter, 1, device, torchType)
|
||||
self.pipe.load_lora_weights(request.LoraAdapter)
|
||||
else:
|
||||
self.pipe.unet.load_attn_procs(request.LoraAdapter)
|
||||
if len(request.LoraAdapters) > 0:
|
||||
i = 0
|
||||
adapters_name = []
|
||||
adapters_weights = []
|
||||
for adapter in request.LoraAdapters:
|
||||
if not os.path.isabs(adapter):
|
||||
adapter = os.path.join(request.ModelPath, adapter)
|
||||
self.pipe.load_lora_weights(adapter, adapter_name=f"adapter_{i}")
|
||||
adapters_name.append(f"adapter_{i}")
|
||||
i += 1
|
||||
|
||||
for adapters_weight in request.LoraScales:
|
||||
adapters_weights.append(adapters_weight)
|
||||
|
||||
self.pipe.set_adapters(adapters_name, adapter_weights=adapters_weights)
|
||||
|
||||
if request.CUDA:
|
||||
self.pipe.to('cuda')
|
||||
@@ -409,6 +366,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
# create a dictionary of values for the parameters
|
||||
options = {
|
||||
"negative_prompt": request.negative_prompt,
|
||||
"width": request.width,
|
||||
"height": request.height,
|
||||
"num_inference_steps": steps,
|
||||
}
|
||||
|
||||
@@ -426,13 +385,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
keys = options.keys()
|
||||
|
||||
if request.EnableParameters != "":
|
||||
keys = [key.strip() for key in request.EnableParameters.split(",")]
|
||||
keys = request.EnableParameters.split(",")
|
||||
|
||||
if request.EnableParameters == "none":
|
||||
keys = []
|
||||
|
||||
# create a dictionary of parameters by using the keys from EnableParameters and the values from defaults
|
||||
kwargs = {key: options.get(key) for key in keys if key in options}
|
||||
kwargs = {key: options[key] for key in keys}
|
||||
|
||||
# Set seed
|
||||
if request.seed > 0:
|
||||
@@ -440,19 +399,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
request.seed
|
||||
)
|
||||
|
||||
if self.PipelineType == "FluxPipeline":
|
||||
kwargs["max_sequence_length"] = 256
|
||||
|
||||
if request.width:
|
||||
kwargs["width"] = request.width
|
||||
|
||||
if request.height:
|
||||
kwargs["height"] = request.height
|
||||
|
||||
if self.PipelineType == "FluxTransformer2DModel":
|
||||
kwargs["output_type"] = "pil"
|
||||
kwargs["generator"] = torch.Generator("cpu").manual_seed(0)
|
||||
|
||||
if self.img2vid:
|
||||
# Load the conditioning image
|
||||
image = load_image(request.src)
|
||||
@@ -468,7 +414,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
export_to_video(video_frames, request.dst)
|
||||
return backend_pb2.Result(message="Media generated successfully", success=True)
|
||||
|
||||
print(f"Generating image with {kwargs=}", file=sys.stderr)
|
||||
image = {}
|
||||
if COMPEL:
|
||||
conditioning, pooled = self.compel.build_conditioning_tensor(prompt)
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
diffusers
|
||||
opencv-python
|
||||
transformers
|
||||
accelerate
|
||||
compel
|
||||
peft
|
||||
sentencepiece
|
||||
torch==2.4.1
|
||||
optimum-quanto
|
||||
@@ -1,10 +0,0 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch==2.4.1+cu118
|
||||
diffusers
|
||||
opencv-python
|
||||
transformers
|
||||
accelerate
|
||||
compel
|
||||
peft
|
||||
sentencepiece
|
||||
optimum-quanto
|
||||
@@ -1,9 +0,0 @@
|
||||
torch==2.4.1
|
||||
diffusers
|
||||
opencv-python
|
||||
transformers
|
||||
accelerate
|
||||
compel
|
||||
peft
|
||||
sentencepiece
|
||||
optimum-quanto
|
||||
@@ -1,11 +1,3 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch==2.3.1+rocm6.0
|
||||
torchvision==0.18.1+rocm6.0
|
||||
diffusers
|
||||
opencv-python
|
||||
transformers
|
||||
accelerate
|
||||
compel
|
||||
peft
|
||||
sentencepiece
|
||||
optimum-quanto
|
||||
torch
|
||||
torchvision
|
||||
@@ -3,12 +3,4 @@ intel-extension-for-pytorch
|
||||
torch
|
||||
torchvision
|
||||
optimum[openvino]
|
||||
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
diffusers
|
||||
opencv-python
|
||||
transformers
|
||||
accelerate
|
||||
compel
|
||||
peft
|
||||
sentencepiece
|
||||
optimum-quanto
|
||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,5 +1,13 @@
|
||||
setuptools
|
||||
grpcio==1.68.1
|
||||
accelerate
|
||||
compel
|
||||
peft
|
||||
diffusers
|
||||
grpcio==1.65.0
|
||||
opencv-python
|
||||
pillow
|
||||
protobuf
|
||||
sentencepiece
|
||||
torch
|
||||
transformers
|
||||
certifi
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user