Compare commits

..

1 Commits

Author SHA1 Message Date
Ettore Di Giacinto
ff55f311cb refactor: break down json grammar parser in different files
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-24 23:21:25 +02:00
293 changed files with 3221 additions and 8788 deletions

View File

@@ -1,17 +0,0 @@
#!/bin/bash
cd /workspace
# Get the files into the volume without a bind mount
if [ ! -d ".git" ]; then
git clone https://github.com/mudler/LocalAI.git .
else
git fetch
fi
echo "Standard Post-Create script completed."
if [ -f "/devcontainer-customization/postcreate.sh" ]; then
echo "Launching customization postcreate.sh"
bash "/devcontainer-customization/postcreate.sh"
fi

View File

@@ -1,16 +0,0 @@
#!/bin/bash
cd /workspace
# Grab the pre-stashed backend assets to avoid build issues
cp -r /build/backend-assets /workspace/backend-assets
# Ensures generated source files are present upon load
make prepare
echo "Standard Post-Start script completed."
if [ -f "/devcontainer-customization/poststart.sh" ]; then
echo "Launching customization poststart.sh"
bash "/devcontainer-customization/poststart.sh"
fi

View File

@@ -1,55 +0,0 @@
#!/bin/bash
# This file contains some really simple functions that are useful when building up customization scripts.
# Checks if the git config has a user registered - and sets it up if not.
#
# Param 1: name
# Param 2: email
#
config_user() {
echo "Configuring git for $1 <$2>"
local gcn=$(git config --global user.name)
if [ -z "${gcn}" ]; then
echo "Setting up git user / remote"
git config --global user.name "$1"
git config --global user.email "$2"
fi
}
# Checks if the git remote is configured - and sets it up if not. Fetches either way.
#
# Param 1: remote name
# Param 2: remote url
#
config_remote() {
echo "Adding git remote and fetching $2 as $1"
local gr=$(git remote -v | grep $1)
if [ -z "${gr}" ]; then
git remote add $1 $2
fi
git fetch $1
}
# Setup special .ssh files
# Prints out lines of text to make things pretty
# Param 1: bash array, filenames relative to the customization directory that should be copied to ~/.ssh
setup_ssh() {
echo "starting ~/.ssh directory setup..."
mkdir -p "${HOME}.ssh"
chmod 0700 "${HOME}/.ssh"
echo "-----"
local files=("$@")
for file in "${files[@]}" ; do
local cfile="/devcontainer-customization/${file}"
local hfile="${HOME}/.ssh/${file}"
if [ ! -f "${hfile}" ]; then
echo "copying \"${file}\""
cp "${cfile}" "${hfile}"
chmod 600 "${hfile}"
fi
done
echo "~/.ssh directory setup complete!"
}

View File

@@ -1,25 +0,0 @@
Place any additional resources your environment requires in this directory
Script hooks are currently called for:
`postcreate.sh` and `poststart.sh`
If files with those names exist here, they will be called at the end of the normal script.
This is a good place to set things like `git config --global user.name` are set - and to handle any other files that are mounted via this directory.
To assist in doing so, `source /.devcontainer-scripts/utils.sh` will provide utility functions that may be useful - for example:
```
#!/bin/bash
source "/.devcontainer-scripts/utils.sh"
sshfiles=("config", "key.pub")
setup_ssh "${sshfiles[@]}"
config_user "YOUR NAME" "YOUR EMAIL"
config_remote "REMOTE NAME" "REMOTE URL"
```

View File

@@ -1,24 +0,0 @@
{
"$schema": "https://raw.githubusercontent.com/devcontainers/spec/main/schemas/devContainer.schema.json",
"name": "LocalAI",
"workspaceFolder": "/workspace",
"dockerComposeFile": [ "./docker-compose-devcontainer.yml" ],
"service": "api",
"shutdownAction": "stopCompose",
"customizations": {
"vscode": {
"extensions": [
"golang.go",
"ms-vscode.makefile-tools",
"ms-azuretools.vscode-docker",
"ms-python.python",
"ms-python.debugpy",
"wayou.vscode-todo-highlight",
"waderyan.gitblame"
]
}
},
"forwardPorts": [8080, 3000],
"postCreateCommand": "bash /.devcontainer-scripts/postcreate.sh",
"postStartCommand": "bash /.devcontainer-scripts/poststart.sh"
}

View File

@@ -1,48 +0,0 @@
services:
api:
build:
context: ..
dockerfile: Dockerfile
target: devcontainer
args:
- FFMPEG=true
- IMAGE_TYPE=extras
- GO_TAGS=stablediffusion p2p tts
env_file:
- ../.env
ports:
- 8080:8080
volumes:
- localai_workspace:/workspace
- ../models:/host-models
- ./customization:/devcontainer-customization
command: /bin/sh -c "while sleep 1000; do :; done"
cap_add:
- SYS_PTRACE
security_opt:
- seccomp:unconfined
prometheus:
image: prom/prometheus
container_name: prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
ports:
- 9090:9090
restart: unless-stopped
volumes:
- ./prometheus:/etc/prometheus
- prom_data:/prometheus
grafana:
image: grafana/grafana
container_name: grafana
ports:
- 3000:3000
restart: unless-stopped
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=grafana
volumes:
- ./grafana:/etc/grafana/provisioning/datasources
volumes:
prom_data:
localai_workspace:

View File

@@ -1,10 +0,0 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
url: http://prometheus:9090
isDefault: true
access: proxy
editable: true

View File

@@ -1,21 +0,0 @@
global:
scrape_interval: 15s
scrape_timeout: 10s
evaluation_interval: 15s
alerting:
alertmanagers:
- static_configs:
- targets: []
scheme: http
timeout: 10s
api_version: v1
scrape_configs:
- job_name: prometheus
honor_timestamps: true
scrape_interval: 15s
scrape_timeout: 10s
metrics_path: /metrics
scheme: http
static_configs:
- targets:
- localhost:9090

View File

@@ -1,7 +1,6 @@
.idea
.github
.vscode
.devcontainer
models
examples/chatbot-ui/models
examples/rwkv/models

3
.env
View File

@@ -79,9 +79,6 @@
### Enable to run parallel requests
# LOCALAI_PARALLEL_REQUESTS=true
# Enable to allow p2p mode
# LOCALAI_P2P=true
### Watchdog settings
###
# Enables watchdog to kill backends that are inactive for too much time

13
.github/bump_deps.sh vendored
View File

@@ -6,17 +6,4 @@ VAR=$3
LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
# Read $VAR from Makefile (only first match)
set +e
CURRENT_COMMIT="$(grep -m1 "^$VAR?=" Makefile | cut -d'=' -f2)"
set -e
sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
if [ -z "$CURRENT_COMMIT" ]; then
echo "Could not find $VAR in Makefile."
exit 0
fi
echo "Changes: https://github.com/$REPO/compare/${CURRENT_COMMIT}..${LAST_COMMIT}" >> "${VAR}_message.txt"
echo "${LAST_COMMIT}" >> "${VAR}_commit.txt"

View File

@@ -29,14 +29,9 @@ def calculate_sha256(file_path):
def manual_safety_check_hf(repo_id):
scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan")
scan = scanResponse.json()
# Check if 'hasUnsafeFile' exists in the response
if 'hasUnsafeFile' in scan:
if scan['hasUnsafeFile']:
return scan
else:
return None
else:
return None
if scan['hasUnsafeFile']:
return scan
return None
download_type, repo_id_or_url = parse_uri(uri)

View File

@@ -67,6 +67,10 @@ updates:
directory: "/backend/python/parler-tts"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/petals"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/rerankers"
schedule:

View File

@@ -40,30 +40,17 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Bump dependencies 🔧
id: bump
run: |
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
{
echo 'message<<EOF'
cat "${{ matrix.variable }}_message.txt"
echo EOF
} >> "$GITHUB_OUTPUT"
{
echo 'commit<<EOF'
cat "${{ matrix.variable }}_commit.txt"
echo EOF
} >> "$GITHUB_OUTPUT"
rm -rfv ${{ matrix.variable }}_message.txt
rm -rfv ${{ matrix.variable }}_commit.txt
- name: Create Pull Request
uses: peter-evans/create-pull-request@v7
uses: peter-evans/create-pull-request@v6
with:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI
commit-message: ':arrow_up: Update ${{ matrix.repository }}'
title: 'chore: :arrow_up: Update ${{ matrix.repository }} to `${{ steps.bump.outputs.commit }}`'
title: 'chore: :arrow_up: Update ${{ matrix.repository }}'
branch: "update/${{ matrix.variable }}"
body: ${{ steps.bump.outputs.message }}
body: Bump of ${{ matrix.repository }} version
signoff: true

View File

@@ -17,7 +17,7 @@ jobs:
run: |
bash .github/bump_docs.sh ${{ matrix.repository }}
- name: Create Pull Request
uses: peter-evans/create-pull-request@v7
uses: peter-evans/create-pull-request@v6
with:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI

View File

@@ -36,12 +36,12 @@ jobs:
sudo chmod 777 /hf_cache
bash .github/checksum_checker.sh gallery/index.yaml
- name: Create Pull Request
uses: peter-evans/create-pull-request@v7
uses: peter-evans/create-pull-request@v6
with:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI
commit-message: ':arrow_up: Checksum updates in gallery/index.yaml'
title: 'chore(model-gallery): :arrow_up: update checksum'
title: 'models(gallery): :arrow_up: update checksum'
branch: "update/checksum"
body: Updating checksums in gallery/index.yaml
signoff: true

View File

@@ -1,64 +0,0 @@
name: Explorer deployment
on:
push:
branches:
- master
tags:
- 'v*'
concurrency:
group: ci-deploy-${{ github.head_ref || github.ref }}-${{ github.repository }}
jobs:
build-linux:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-go@v5
with:
go-version: '1.21.x'
cache: false
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
make protogen-go
- name: Build api
run: |
CGO_ENABLED=0 make build-api
- name: rm
uses: appleboy/ssh-action@v1.0.3
with:
host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
key: ${{ secrets.EXPLORER_SSH_KEY }}
port: ${{ secrets.EXPLORER_SSH_PORT }}
script: |
sudo rm -rf local-ai/ || true
- name: copy file via ssh
uses: appleboy/scp-action@v0.1.7
with:
host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
key: ${{ secrets.EXPLORER_SSH_KEY }}
port: ${{ secrets.EXPLORER_SSH_PORT }}
source: "local-ai"
overwrite: true
rm: true
target: ./local-ai
- name: restarting
uses: appleboy/ssh-action@v1.0.3
with:
host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
key: ${{ secrets.EXPLORER_SSH_KEY }}
port: ${{ secrets.EXPLORER_SSH_PORT }}
script: |
sudo cp -rfv local-ai/local-ai /usr/bin/local-ai
sudo systemctl restart local-ai

View File

@@ -13,78 +13,6 @@ concurrency:
cancel-in-progress: true
jobs:
hipblas-jobs:
uses: ./.github/workflows/image_build.yml
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
ffmpeg: ${{ matrix.ffmpeg }}
image-type: ${{ matrix.image-type }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
grpc-base-image: ${{ matrix.grpc-base-image }}
aio: ${{ matrix.aio }}
makeflags: ${{ matrix.makeflags }}
latest-image: ${{ matrix.latest-image }}
latest-image-aio: ${{ matrix.latest-image-aio }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
# Pushing with all jobs in parallel
# eats the bandwidth of all the nodes
max-parallel: 2
matrix:
include:
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-hipblas-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
aio: "-aio-gpu-hipblas"
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
latest-image: 'latest-gpu-hipblas'
latest-image-aio: 'latest-aio-gpu-hipblas'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas'
ffmpeg: 'false'
image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-core'
ffmpeg: 'false'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
self-hosted-jobs:
uses: ./.github/workflows/image_build.yml
with:
@@ -111,7 +39,7 @@ jobs:
strategy:
# Pushing with all jobs in parallel
# eats the bandwidth of all the nodes
max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }}
max-parallel: ${{ github.event_name != 'pull_request' && 6 || 10 }}
matrix:
include:
# Extra images
@@ -194,6 +122,29 @@ jobs:
base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-hipblas-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
aio: "-aio-gpu-hipblas"
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
latest-image: 'latest-gpu-hipblas'
latest-image-aio: 'latest-aio-gpu-hipblas'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas'
ffmpeg: 'false'
image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'auto'
@@ -261,6 +212,26 @@ jobs:
image-type: 'core'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-core'
ffmpeg: 'false'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
core-image-build:
uses: ./.github/workflows/image_build.yml

View File

@@ -294,7 +294,7 @@ jobs:
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
export PATH=$PATH:$GOPATH/bin
export SKIP_GRPC_BACKEND=backend-assets/grpc/whisper
make dist
- uses: actions/upload-artifact@v4
with:
@@ -327,7 +327,7 @@ jobs:
cache: false
- name: Dependencies
run: |
brew install protobuf grpc libomp llvm
brew install protobuf grpc
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
- name: Build
@@ -336,7 +336,7 @@ jobs:
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
export PATH=$PATH:$GOPATH/bin
export CC=/opt/homebrew/opt/llvm/bin/clang
make dist
- uses: actions/upload-artifact@v4
with:

View File

@@ -18,7 +18,7 @@ jobs:
if: ${{ github.actor != 'dependabot[bot]' }}
- name: Run Gosec Security Scanner
if: ${{ github.actor != 'dependabot[bot]' }}
uses: securego/gosec@v2.21.0
uses: securego/gosec@master
with:
# we let the report trigger content trigger a failure using the GitHub Security features.
args: '-no-fail -fmt sarif -out results.sarif ./...'

View File

@@ -168,6 +168,32 @@ jobs:
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
# tests-petals:
# runs-on: ubuntu-latest
# steps:
# - name: Clone
# uses: actions/checkout@v4
# with:
# submodules: true
# - name: Dependencies
# run: |
# sudo apt-get update
# sudo apt-get install build-essential ffmpeg
# # Install UV
# curl -LsSf https://astral.sh/uv/install.sh | sh
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
# sudo apt-get install -y libopencv-dev
# pip install --user --no-cache-dir grpcio-tools==1.64.1
# - name: Test petals
# run: |
# make --jobs=5 --output-sync=target -C backend/python/petals
# make --jobs=5 --output-sync=target -C backend/python/petals test
# tests-bark:
# runs-on: ubuntu-latest
# steps:

View File

@@ -214,13 +214,12 @@ jobs:
run: go version
- name: Dependencies
run: |
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc
pip install --user --no-cache-dir grpcio-tools==1.64.1
- name: Test
run: |
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
export CC=/opt/homebrew/opt/llvm/bin/clang
# Used to run the newer GNUMake version from brew that supports --output-sync
export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test

View File

@@ -25,7 +25,7 @@ jobs:
run: |
make protogen-go swagger
- name: Create Pull Request
uses: peter-evans/create-pull-request@v7
uses: peter-evans/create-pull-request@v6
with:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI

3
.gitignore vendored
View File

@@ -54,6 +54,3 @@ docs/static/gallery.html
# backend virtual environments
**/venv
# per-developer customization files for the development container
.devcontainer/customization/*

21
.vscode/launch.json vendored
View File

@@ -3,12 +3,12 @@
"configurations": [
{
"name": "Python: Current File",
"type": "debugpy",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"justMyCode": false,
"cwd": "${fileDirname}",
"cwd": "${workspaceFolder}/examples/langchain-chroma",
"env": {
"OPENAI_API_BASE": "http://localhost:8080/v1",
"OPENAI_API_KEY": "abc"
@@ -19,16 +19,15 @@
"type": "go",
"request": "launch",
"mode": "debug",
"program": "${workspaceRoot}",
"args": [],
"program": "${workspaceFolder}/main.go",
"args": [
"api"
],
"env": {
"LOCALAI_LOG_LEVEL": "debug",
"LOCALAI_P2P": "true",
"LOCALAI_FEDERATED": "true"
},
"buildFlags": ["-tags", "stablediffusion p2p tts", "-v"],
"envFile": "${workspaceFolder}/.env",
"cwd": "${workspaceRoot}"
"C_INCLUDE_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
"LIBRARY_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
"DEBUG": "true"
}
}
]
}

View File

@@ -8,12 +8,12 @@ FROM ${BASE_IMAGE} AS requirements-core
USER root
ARG GO_VERSION=1.22.6
ARG GO_VERSION=1.22.5
ARG TARGETARCH
ARG TARGETVARIANT
ENV DEBIAN_FRONTEND=noninteractive
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
RUN apt-get update && \
@@ -30,7 +30,7 @@ RUN apt-get update && \
# Install Go
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
ENV PATH $PATH:/root/go/bin:/usr/local/go/bin
# Install grpc compilers
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
@@ -39,18 +39,15 @@ RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
RUN update-ca-certificates
RUN test -n "$TARGETARCH" \
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
# Use the variables in subsequent instructions
RUN echo "Target Architecture: $TARGETARCH"
RUN echo "Target Variant: $TARGETVARIANT"
# Cuda
ENV PATH=/usr/local/cuda/bin:${PATH}
ENV PATH /usr/local/cuda/bin:${PATH}
# HipBLAS requirements
ENV PATH=/opt/rocm/bin:${PATH}
ENV PATH /opt/rocm/bin:${PATH}
# OpenBLAS requirements and stable diffusion
RUN apt-get update && \
@@ -65,6 +62,9 @@ RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
WORKDIR /build
RUN test -n "$TARGETARCH" \
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
###################################
###################################
@@ -81,7 +81,7 @@ RUN apt-get update && \
espeak \
python3-pip \
python-is-python3 \
python3-dev llvm \
python3-dev \
python3-venv && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
@@ -217,14 +217,13 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
###################################
###################################
# The builder-base target has the arguments, variables, and copies shared between full builder images and the uncompiled devcontainer
FROM requirements-drivers AS builder-base
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
# Adjustments to the build process should likely be made here.
FROM requirements-drivers AS builder
ARG GO_TAGS="stablediffusion tts p2p"
ARG GRPC_BACKENDS
ARG MAKEFLAGS
ARG LD_FLAGS="-s -w"
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
ENV GO_TAGS=${GO_TAGS}
@@ -232,12 +231,14 @@ ENV MAKEFLAGS=${MAKEFLAGS}
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
ENV NVIDIA_VISIBLE_DEVICES=all
ENV LD_FLAGS=${LD_FLAGS}
RUN echo "GO_TAGS: $GO_TAGS" && echo "TARGETARCH: $TARGETARCH"
WORKDIR /build
COPY . .
COPY .git .
RUN echo "GO_TAGS: $GO_TAGS"
RUN make prepare
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
@@ -255,35 +256,8 @@ RUN <<EOT bash
fi
EOT
###################################
###################################
# This first portion of builder holds the layers specifically used to build backend-assets/grpc/stablediffusion
# In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
FROM builder-base AS builder-sd
# stablediffusion does not tolerate a newer version of abseil, copy only over enough elements to build it
COPY Makefile .
COPY go.mod .
COPY go.sum .
COPY backend/backend.proto ./backend/backend.proto
COPY backend/go/image/stablediffusion ./backend/go/image/stablediffusion
COPY pkg/grpc ./pkg/grpc
COPY pkg/stablediffusion ./pkg/stablediffusion
RUN git init
RUN make sources/go-stable-diffusion
RUN touch prepare-sources
# Actually build the backend
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make backend-assets/grpc/stablediffusion
###################################
###################################
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
# Adjustments to the build process should likely be made here.
FROM builder-sd AS builder
# stablediffusion does not tolerate a newer version of abseil, build it first
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
# Install the pre-built GRPC
COPY --from=grpc /opt/grpc /usr/local
@@ -291,20 +265,8 @@ COPY --from=grpc /opt/grpc /usr/local
# Rebuild with defaults backends
WORKDIR /build
COPY . .
COPY .git .
RUN make prepare
## Build the binary
## If it's CUDA or hipblas, we want to skip some of the llama-compat backends to save space
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
## (both will use CUDA or hipblas for the actual computation)
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
else \
make build; \
fi
RUN make build
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
@@ -314,40 +276,6 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
###################################
###################################
# The devcontainer target is not used on CI. It is a target for developers to use locally -
# rather than copying files it mounts them locally and leaves building to the developer
FROM builder-base AS devcontainer
ARG FFMPEG
COPY --from=grpc /opt/grpc /usr/local
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion /build/backend-assets/grpc/stablediffusion
COPY .devcontainer-scripts /.devcontainer-scripts
# Add FFmpeg
RUN if [ "${FFMPEG}" = "true" ]; then \
apt-get update && \
apt-get install -y --no-install-recommends \
ffmpeg && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* \
; fi
RUN apt-get update && \
apt-get install -y --no-install-recommends \
ssh less wget
# For the devcontainer, leave apt functional in case additional devtools are needed at runtime.
RUN go install github.com/go-delve/delve/cmd/dlv@latest
RUN go install github.com/mikefarah/yq/v4@latest
###################################
###################################
# This is the final target. The result of this target will be the image uploaded to the registry.
# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
FROM requirements-drivers
@@ -398,7 +326,7 @@ COPY --from=builder /build/local-ai ./
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
# do not let stablediffusion rebuild (requires an older version of absl)
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
# Change the shell to bash so we can use [[ tests below
SHELL ["/bin/bash", "-c"]
@@ -417,6 +345,9 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAG
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/transformers-musicgen \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama1" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/exllama \
; fi
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
@@ -425,6 +356,9 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$I
if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/openvoice \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "petals" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/petals \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/sentencetransformers \
; fi && \

View File

@@ -8,7 +8,11 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=f0c7b5edf82aa200656fd88c11ae3a805d7130bf
CPPLLAMA_VERSION?=081fe431aa8fb6307145c4feb3eed4f48cab19f8
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8
# go-rwkv version
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
@@ -16,7 +20,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=34972dbe221709323714fc8402f2e24041d48213
WHISPER_CPP_VERSION?=f68298ce06ca3edd6e6f3f21c3d0bb5f073942c3
# bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
@@ -186,6 +190,7 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
@@ -248,6 +253,18 @@ sources/go-piper:
sources/go-piper/libpiper_binding.a: sources/go-piper
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
## GPT4ALL
sources/gpt4all:
mkdir -p sources/gpt4all
cd sources/gpt4all && \
git init && \
git remote add origin $(GPT4ALL_REPO) && \
git fetch origin && \
git checkout $(GPT4ALL_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
## RWKV
sources/go-rwkv.cpp:
@@ -301,7 +318,7 @@ sources/whisper.cpp:
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
get-sources: sources/go-llama.cpp sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
replace:
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
@@ -311,6 +328,7 @@ replace:
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
dropreplace:
@@ -321,6 +339,7 @@ dropreplace:
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
$(GOCMD) mod edit -dropreplace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
prepare-sources: get-sources replace
@@ -330,6 +349,7 @@ prepare-sources: get-sources replace
rebuild: ## Rebuilds the project
$(GOCMD) clean -cache
$(MAKE) -C sources/go-llama.cpp clean
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
$(MAKE) -C sources/go-rwkv.cpp clean
$(MAKE) -C sources/whisper.cpp clean
$(MAKE) -C sources/go-stable-diffusion clean
@@ -359,9 +379,6 @@ clean-tests:
rm -rf test-dir
rm -rf core/http/backend-assets
clean-dc: clean
cp -r /build/backend-assets /workspace/backend-assets
## Build:
build: prepare backend-assets grpcs ## Build the project
$(info ${GREEN}I local-ai build info:${RESET})
@@ -379,7 +396,7 @@ build-minimal:
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
build-api:
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=p2p $(MAKE) build
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
backend-assets/lib:
mkdir -p backend-assets/lib
@@ -390,7 +407,7 @@ ifeq ($(DETECT_LIBS),true)
scripts/prepare-libs.sh backend-assets/grpc/llama-cpp-avx2
endif
ifeq ($(OS),Darwin)
BUILD_TYPE=none $(MAKE) backend-assets/grpc/llama-cpp-fallback
$(info ${GREEN}I Skip CUDA/hipblas build on MacOS${RESET})
else
$(MAKE) backend-assets/grpc/llama-cpp-cuda
$(MAKE) backend-assets/grpc/llama-cpp-hipblas
@@ -452,7 +469,8 @@ test: prepare test-models/testmodel.ggml grpcs
export GO_TAGS="tts stablediffusion debug"
$(MAKE) prepare-test
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
$(MAKE) test-gpt4all
$(MAKE) test-llama
$(MAKE) test-llama-gguf
$(MAKE) test-tts
@@ -482,6 +500,10 @@ teardown-e2e:
rm -rf $(TEST_DIR) || true
docker stop $$(docker ps -q --filter ancestor=localai-tests)
test-gpt4all: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r $(TEST_PATHS)
test-llama: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS)
@@ -537,10 +559,10 @@ protogen-go-clean:
$(RM) bin/*
.PHONY: protogen-python
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
.PHONY: protogen-python-clean
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
.PHONY: autogptq-protogen
autogptq-protogen:
@@ -574,6 +596,14 @@ diffusers-protogen:
diffusers-protogen-clean:
$(MAKE) -C backend/python/diffusers protogen-clean
.PHONY: exllama-protogen
exllama-protogen:
$(MAKE) -C backend/python/exllama protogen
.PHONY: exllama-protogen-clean
exllama-protogen-clean:
$(MAKE) -C backend/python/exllama protogen-clean
.PHONY: exllama2-protogen
exllama2-protogen:
$(MAKE) -C backend/python/exllama2 protogen
@@ -590,6 +620,14 @@ mamba-protogen:
mamba-protogen-clean:
$(MAKE) -C backend/python/mamba protogen-clean
.PHONY: petals-protogen
petals-protogen:
$(MAKE) -C backend/python/petals protogen
.PHONY: petals-protogen-clean
petals-protogen-clean:
$(MAKE) -C backend/python/petals protogen-clean
.PHONY: rerankers-protogen
rerankers-protogen:
$(MAKE) -C backend/python/rerankers protogen
@@ -670,6 +708,8 @@ prepare-extra-conda-environments: protogen-python
$(MAKE) -C backend/python/parler-tts
$(MAKE) -C backend/python/vall-e-x
$(MAKE) -C backend/python/openvoice
$(MAKE) -C backend/python/exllama
$(MAKE) -C backend/python/petals
$(MAKE) -C backend/python/exllama2
prepare-test-extra: protogen-python
@@ -690,6 +730,12 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin
mkdir -p backend-assets/espeak-ng-data
@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
mkdir -p backend-assets/gpt4all
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
backend-assets/grpc: protogen-go replace
mkdir -p backend-assets/grpc
@@ -700,6 +746,13 @@ ifneq ($(UPX),)
$(UPX) backend-assets/grpc/bert-embeddings
endif
backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/gpt4all
endif
backend-assets/grpc/huggingface: backend-assets/grpc
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
ifneq ($(UPX),)
@@ -730,6 +783,9 @@ else
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
endif
ifneq ($(UPX),)
$(UPX) backend/cpp/${VARIANT}/grpc-server
endif
# This target is for manually building a variant with-auto detected flags
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
@@ -802,6 +858,9 @@ backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
mkdir -p backend-assets/util/
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
ifneq ($(UPX),)
$(UPX) backend-assets/util/llama-cpp-rpc-server
endif
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
@@ -840,7 +899,7 @@ endif
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/whisper
endif

View File

@@ -40,7 +40,7 @@
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
>
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/go-skynet/LocalAI/tree/master/examples/)
> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
[![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai)
@@ -72,7 +72,6 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
- June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628
@@ -85,7 +84,6 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
Hot topics (looking for contributors):
- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
@@ -152,7 +150,6 @@ Other:
## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
- [Run Visual studio code with LocalAI (SUSE)](https://www.suse.com/c/running-ai-locally/)
- 🆕 [Run LocalAI on Jetson Nano Devkit](https://mudler.pm/posts/local-ai-jetson-nano-devkit/)
- [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/)
- [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)

View File

@@ -2,7 +2,7 @@ backend: llama-cpp
context_size: 4096
f16: true
mmap: true
name: gpt-4o
name: gpt-4-vision-preview
roles:
user: "USER:"

View File

@@ -2,7 +2,7 @@ backend: llama-cpp
context_size: 4096
f16: true
mmap: true
name: gpt-4o
name: gpt-4-vision-preview
roles:
user: "USER:"

View File

@@ -1,6 +1,6 @@
name: stablediffusion
parameters:
model: Lykon/dreamshaper-8
model: runwayml/stable-diffusion-v1-5
backend: diffusers
step: 25
f16: true

View File

@@ -2,7 +2,7 @@ backend: llama-cpp
context_size: 4096
mmap: false
f16: false
name: gpt-4o
name: gpt-4-vision-preview
roles:
user: "USER:"

View File

@@ -16,7 +16,6 @@ service Backend {
rpc GenerateImage(GenerateImageRequest) returns (Result) {}
rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
rpc TTS(TTSRequest) returns (Result) {}
rpc SoundGeneration(SoundGenerationRequest) returns (Result) {}
rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
rpc Status(HealthMessage) returns (StatusResponse) {}
@@ -134,8 +133,6 @@ message PredictOptions {
repeated string Images = 42;
bool UseTokenizerTemplate = 43;
repeated Message Messages = 44;
repeated string Videos = 45;
repeated string Audios = 46;
}
// The response message containing the result
@@ -273,17 +270,6 @@ message TTSRequest {
optional string language = 5;
}
message SoundGenerationRequest {
string text = 1;
string model = 2;
string dst = 3;
optional float duration = 4;
optional float temperature = 5;
optional bool sample = 6;
optional string src = 7;
optional int32 src_divisor = 8;
}
message TokenizationResponse {
int32 length = 1;
repeated int32 tokens = 2;

View File

@@ -13,15 +13,15 @@
#include <getopt.h>
#include "clip.h"
#include "llava.h"
#include "log.h"
#include "stb_image.h"
#include "common.h"
#include "json.hpp"
#include "llama.h"
#include "grammar-parser.h"
#include "backend.pb.h"
#include "backend.grpc.pb.h"
#include "utils.hpp"
#include "sampling.h"
// include std::regex
#include <cstddef>
#include <thread>
@@ -203,8 +203,8 @@ struct llama_client_slot
std::string stopping_word;
// sampling
struct gpt_sampler_params sparams;
gpt_sampler *ctx_sampling = nullptr;
struct llama_sampling_params sparams;
llama_sampling_context *ctx_sampling = nullptr;
int32_t ga_i = 0; // group-attention state
int32_t ga_n = 1; // group-attention factor
@@ -449,7 +449,7 @@ struct llama_server_context
LOG_INFO("Multi Modal Mode Enabled", {});
clp_ctx = clip_model_load(params.mmproj.c_str(), /*verbosity=*/ 1);
if(clp_ctx == nullptr) {
LOG_ERR("unable to load clip model: %s", params.mmproj.c_str());
LOG_ERROR("unable to load clip model", {{"model", params.mmproj}});
return false;
}
@@ -458,12 +458,10 @@ struct llama_server_context
}
}
llama_init_result llama_init = llama_init_from_gpt_params(params);
model = llama_init.model;
ctx = llama_init.context;
std::tie(model, ctx) = llama_init_from_gpt_params(params);
if (model == nullptr)
{
LOG_ERR("unable to load model: %s", params.model.c_str());
LOG_ERROR("unable to load model", {{"model", params.model}});
return false;
}
@@ -471,7 +469,7 @@ struct llama_server_context
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
const int n_embd_llm = llama_n_embd(model);
if (n_embd_clip != n_embd_llm) {
LOG("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
LOG_TEE("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
llama_free(ctx);
llama_free_model(model);
return false;
@@ -480,7 +478,7 @@ struct llama_server_context
n_ctx = llama_n_ctx(ctx);
add_bos_token = llama_add_bos_token(model);
add_bos_token = llama_should_add_bos_token(model);
return true;
}
@@ -490,7 +488,7 @@ struct llama_server_context
std::vector<char> buf(1);
int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size());
if (res < 0) {
LOG_ERR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", __func__);
LOG_ERROR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", {});
sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template
}
}
@@ -619,7 +617,7 @@ struct llama_server_context
bool launch_slot_with_data(llama_client_slot* &slot, json data) {
slot_params default_params;
gpt_sampler_params default_sparams;
llama_sampling_params default_sparams;
slot->params.stream = json_value(data, "stream", false);
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
@@ -628,7 +626,7 @@ struct llama_server_context
slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
slot->sparams.typ_p = json_value(data, "typical_p", default_sparams.typ_p);
slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p);
slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
slot->sparams.dynatemp_exponent = json_value(data, "dynatemp_exponent", default_sparams.dynatemp_exponent);
@@ -641,7 +639,7 @@ struct llama_server_context
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
slot->sparams.seed = json_value(data, "seed", default_sparams.seed);
slot->params.seed = json_value(data, "seed", default_params.seed);
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
@@ -665,7 +663,6 @@ struct llama_server_context
slot->params.input_prefix = "";
}
if (data.count("input_suffix") != 0)
{
slot->params.input_suffix = data["input_suffix"];
@@ -684,10 +681,6 @@ struct llama_server_context
slot->prompt = "";
}
if (json_value(data, "ignore_eos", false)) {
slot->sparams.logit_bias.push_back({llama_token_eos(model), -INFINITY});
}
/*
slot->sparams.penalty_prompt_tokens.clear();
slot->sparams.use_penalty_prompt_tokens = false;
const auto &penalty_prompt = data.find("penalty_prompt");
@@ -723,10 +716,14 @@ struct llama_server_context
slot->sparams.use_penalty_prompt_tokens = true;
}
}
*/
slot->sparams.logit_bias.clear();
if (json_value(data, "ignore_eos", false))
{
slot->sparams.logit_bias[llama_token_eos(model)] = -INFINITY;
}
const auto &logit_bias = data.find("logit_bias");
if (logit_bias != data.end() && logit_bias->is_array())
{
@@ -754,7 +751,7 @@ struct llama_server_context
llama_token tok = el[0].get<llama_token>();
if (tok >= 0 && tok < n_vocab)
{
slot->sparams.logit_bias.push_back({tok, bias});
slot->sparams.logit_bias[tok] = bias;
}
}
else if (el[0].is_string())
@@ -762,13 +759,13 @@ struct llama_server_context
auto toks = llama_tokenize(model, el[0].get<std::string>(), false);
for (auto tok : toks)
{
slot->sparams.logit_bias.push_back({tok, bias});
slot->sparams.logit_bias[tok] = bias;
}
}
}
}
}
slot->params.antiprompt.clear();
const auto &stop = data.find("stop");
@@ -782,22 +779,24 @@ struct llama_server_context
}
}
}
const auto & samplers = data.find("samplers");
if (samplers != data.end() && samplers->is_array()) {
const auto &samplers_sequence = data.find("samplers");
if (samplers_sequence != data.end() && samplers_sequence->is_array())
{
std::vector<std::string> sampler_names;
for (const auto & name : *samplers) {
if (name.is_string()) {
sampler_names.emplace_back(name);
}
for (const auto &sampler_name : *samplers_sequence)
{
if (sampler_name.is_string())
{
sampler_names.emplace_back(sampler_name);
}
slot->sparams.samplers = gpt_sampler_types_from_names(sampler_names, false);
}
slot->sparams.samplers_sequence = llama_sampling_types_from_names(sampler_names, false);
}
else
{
slot->sparams.samplers = default_sparams.samplers;
slot->sparams.samplers_sequence = default_sparams.samplers_sequence;
}
if (multimodal)
{
@@ -813,11 +812,10 @@ struct llama_server_context
img_sl.img_data = clip_image_u8_init();
if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data))
{
LOG_ERR("%s: failed to load image, slot_id: %d, img_sl_id: %d",
__func__,
slot->id,
img_sl.id
);
LOG_ERROR("failed to load image", {
{"slot_id", slot->id},
{"img_sl_id", img_sl.id}
});
return false;
}
LOG_VERBOSE("image loaded", {
@@ -855,12 +853,12 @@ struct llama_server_context
}
}
if (!found) {
LOG("ERROR: Image with id: %i, not found.\n", img_id);
LOG_TEE("ERROR: Image with id: %i, not found.\n", img_id);
slot->images.clear();
return false;
}
} catch (const std::invalid_argument& e) {
LOG("Invalid image number id in prompt\n");
LOG_TEE("Invalid image number id in prompt\n");
slot->images.clear();
return false;
}
@@ -875,10 +873,10 @@ struct llama_server_context
if (slot->ctx_sampling != nullptr)
{
gpt_sampler_free(slot->ctx_sampling);
llama_sampling_free(slot->ctx_sampling);
}
slot->ctx_sampling = gpt_sampler_init(model, slot->sparams);
//llama_set_rng_seed(ctx, slot->params.seed);
slot->ctx_sampling = llama_sampling_init(slot->sparams);
llama_set_rng_seed(ctx, slot->params.seed);
slot->command = LOAD_PROMPT;
all_slots_are_idle = false;
@@ -888,7 +886,7 @@ struct llama_server_context
{"task_id", slot->task_id},
});
// LOG("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
LOG_TEE("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
return true;
}
@@ -928,7 +926,7 @@ struct llama_server_context
};
if (llama_decode(ctx, batch_view) != 0)
{
LOG("%s: llama_decode() failed\n", __func__);
LOG_TEE("%s: llama_decode() failed\n", __func__);
return;
}
}
@@ -940,7 +938,7 @@ struct llama_server_context
}
}
LOG("system prompt updated\n");
LOG_TEE("system prompt updated\n");
system_need_update = false;
}
@@ -1006,13 +1004,11 @@ struct llama_server_context
slot.generated_text += token_str;
slot.has_next_token = true;
/*
if (slot.ctx_sampling->params.use_penalty_prompt_tokens && result.tok != -1)
{
// we can change penalty_prompt_tokens because it is always created from scratch each request
slot.ctx_sampling->params.penalty_prompt_tokens.push_back(result.tok);
}
*/
// check if there is incomplete UTF-8 character at the end
bool incomplete = false;
@@ -1121,8 +1117,8 @@ struct llama_server_context
continue;
}
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.cpuparams.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
LOG("Error processing the given image");
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
LOG_TEE("Error processing the given image");
return false;
}
@@ -1134,7 +1130,7 @@ struct llama_server_context
void send_error(task_server& task, const std::string &error)
{
LOG("task %i - error: %s\n", task.id, error.c_str());
LOG_TEE("task %i - error: %s\n", task.id, error.c_str());
task_result res;
res.id = task.id;
res.multitask_id = task.multitask_id;
@@ -1146,11 +1142,13 @@ struct llama_server_context
json get_formated_generation(llama_client_slot &slot)
{
std::vector<std::string> samplers;
samplers.reserve(slot.sparams.samplers.size());
for (const auto & sampler : slot.sparams.samplers)
const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model));
const bool ignore_eos = eos_bias != slot.sparams.logit_bias.end() &&
eos_bias->second < 0.0f && std::isinf(eos_bias->second);
std::vector<std::string> samplers_sequence;
for (const auto &sampler_type : slot.sparams.samplers_sequence)
{
samplers.emplace_back(gpt_sampler_type_to_str(sampler));
samplers_sequence.emplace_back(llama_sampling_type_to_str(sampler_type));
}
return json {
@@ -1165,11 +1163,13 @@ struct llama_server_context
{"top_p", slot.sparams.top_p},
{"min_p", slot.sparams.min_p},
{"tfs_z", slot.sparams.tfs_z},
{"typical_p", slot.sparams.typ_p},
{"typical_p", slot.sparams.typical_p},
{"repeat_last_n", slot.sparams.penalty_last_n},
{"repeat_penalty", slot.sparams.penalty_repeat},
{"presence_penalty", slot.sparams.penalty_present},
{"frequency_penalty", slot.sparams.penalty_freq},
{"penalty_prompt_tokens", slot.sparams.penalty_prompt_tokens},
{"use_penalty_prompt_tokens", slot.sparams.use_penalty_prompt_tokens},
{"mirostat", slot.sparams.mirostat},
{"mirostat_tau", slot.sparams.mirostat_tau},
{"mirostat_eta", slot.sparams.mirostat_eta},
@@ -1177,13 +1177,13 @@ struct llama_server_context
{"stop", slot.params.antiprompt},
{"n_predict", slot.params.n_predict},
{"n_keep", params.n_keep},
{"ignore_eos", slot.sparams.ignore_eos},
{"ignore_eos", ignore_eos},
{"stream", slot.params.stream},
// {"logit_bias", slot.sparams.logit_bias},
{"logit_bias", slot.sparams.logit_bias},
{"n_probs", slot.sparams.n_probs},
{"min_keep", slot.sparams.min_keep},
{"grammar", slot.sparams.grammar},
{"samplers", samplers}
{"samplers", samplers_sequence}
};
}
@@ -1373,7 +1373,7 @@ struct llama_server_context
};
if (llama_decode(ctx, batch_view))
{
LOG("%s : failed to eval\n", __func__);
LOG_TEE("%s : failed to eval\n", __func__);
return false;
}
}
@@ -1391,7 +1391,7 @@ struct llama_server_context
llama_batch batch_img = { n_eval, nullptr, (img.image_embedding + i * n_embd), nullptr, nullptr, nullptr, nullptr, slot.n_past, 1, 0, };
if (llama_decode(ctx, batch_img))
{
LOG("%s : failed to eval image\n", __func__);
LOG_TEE("%s : failed to eval image\n", __func__);
return false;
}
slot.n_past += n_eval;
@@ -1574,7 +1574,7 @@ struct llama_server_context
slot.n_past = 0;
slot.truncated = false;
slot.has_next_token = true;
LOG("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
LOG_TEE("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
continue;
// END LOCALAI changes
@@ -1712,7 +1712,7 @@ struct llama_server_context
if (!slot.params.cache_prompt)
{
gpt_sampler_reset(slot.ctx_sampling);
llama_sampling_reset(slot.ctx_sampling);
slot.n_past = 0;
slot.n_past_se = 0;
@@ -1724,7 +1724,7 @@ struct llama_server_context
// push the prompt into the sampling context (do not apply grammar)
for (auto &token : prompt_tokens)
{
gpt_sampler_accept(slot.ctx_sampling, token, false);
llama_sampling_accept(slot.ctx_sampling, ctx, token, false);
}
slot.n_past = common_part(slot.cache_tokens, prompt_tokens);
@@ -1822,11 +1822,10 @@ struct llama_server_context
if (has_images && !ingest_images(slot, n_batch))
{
LOG_ERR("%s: failed processing images Slot id : %d, Task id: %d",
__func__,
slot.id,
slot.task_id
);
LOG_ERROR("failed processing images", {
"slot_id", slot.id,
"task_id", slot.task_id,
});
// FIXME @phymbert: to be properly tested
// early returning without changing the slot state will block the slot for ever
// no one at the moment is checking the return value
@@ -1866,10 +1865,10 @@ struct llama_server_context
const int bd = (slot.ga_w / slot.ga_n) * (slot.ga_n - 1);
const int dd = (slot.ga_w / slot.ga_n) - ib * bd - slot.ga_w;
LOG("\n");
LOG("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i, slot.n_past_se, ib * bd, slot.ga_i + ib * bd, slot.n_past_se + ib * bd);
LOG("div: [%6d, %6d] / %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w, slot.ga_n, (slot.ga_i + ib * bd) / slot.ga_n, (slot.ga_i + ib * bd + slot.ga_w) / slot.ga_n);
LOG("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd + slot.ga_w, slot.n_past_se + ib * bd, dd, slot.ga_i + ib * bd + slot.ga_w + dd, slot.n_past_se + ib * bd + dd);
LOG_TEE("\n");
LOG_TEE("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i, slot.n_past_se, ib * bd, slot.ga_i + ib * bd, slot.n_past_se + ib * bd);
LOG_TEE("div: [%6d, %6d] / %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w, slot.ga_n, (slot.ga_i + ib * bd) / slot.ga_n, (slot.ga_i + ib * bd + slot.ga_w) / slot.ga_n);
LOG_TEE("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd + slot.ga_w, slot.n_past_se + ib * bd, dd, slot.ga_i + ib * bd + slot.ga_w + dd, slot.n_past_se + ib * bd + dd);
llama_kv_cache_seq_add(ctx, slot.id, slot.ga_i, slot.n_past_se, ib * bd);
llama_kv_cache_seq_div(ctx, slot.id, slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w,slot.ga_n);
@@ -1879,7 +1878,7 @@ struct llama_server_context
slot.ga_i += slot.ga_w / slot.ga_n;
LOG("\nn_past_old = %d, n_past = %d, ga_i = %d\n\n", slot.n_past_se + bd, slot.n_past_se, slot.ga_i);
LOG_TEE("\nn_past_old = %d, n_past = %d, ga_i = %d\n\n", slot.n_past_se + bd, slot.n_past_se, slot.ga_i);
}
slot.n_past_se += n_tokens;
}
@@ -1904,11 +1903,11 @@ struct llama_server_context
if (n_batch == 1 || ret < 0)
{
// if you get here, it means the KV cache is full - try increasing it via the context size
LOG("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
LOG_TEE("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
return false;
}
LOG("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2);
LOG_TEE("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2);
// retry with half the batch size to try to find a free slot in the KV cache
n_batch /= 2;
@@ -1933,9 +1932,9 @@ struct llama_server_context
}
completion_token_output result;
const llama_token id = gpt_sampler_sample(slot.ctx_sampling, ctx, slot.i_batch - i);
const llama_token id = llama_sampling_sample(slot.ctx_sampling, ctx, NULL, slot.i_batch - i);
gpt_sampler_accept(slot.ctx_sampling, id, true);
llama_sampling_accept(slot.ctx_sampling, ctx, id, true);
slot.n_decoded += 1;
if (slot.n_decoded == 1)
@@ -1945,14 +1944,19 @@ struct llama_server_context
metrics.on_prompt_eval(slot);
}
llama_token_data_array cur_p = { slot.ctx_sampling->cur.data(), slot.ctx_sampling->cur.size(), false };
result.tok = id;
const auto * cur_p = gpt_sampler_get_candidates(slot.ctx_sampling);
for (size_t i = 0; i < (size_t) slot.sparams.n_probs; ++i) {
result.probs.push_back({
cur_p->data[i].id,
i >= cur_p->size ? 0.0f : cur_p->data[i].p,
});
const int32_t n_probs = slot.sparams.n_probs;
if (slot.sparams.temp <= 0 && n_probs > 0)
{
// for llama_sample_token_greedy we need to sort candidates
llama_sample_softmax(ctx, &cur_p);
}
for (size_t i = 0; i < std::min(cur_p.size, (size_t)n_probs); ++i)
{
result.probs.push_back({cur_p.data[i].id, cur_p.data[i].p});
}
if (!process_token(result, slot))
@@ -2204,7 +2208,7 @@ static void params_parse(const backend::ModelOptions* request,
params.model_alias = request->modelfile();
params.n_ctx = request->contextsize();
//params.memory_f16 = request->f16memory();
params.cpuparams.n_threads = request->threads();
params.n_threads = request->threads();
params.n_gpu_layers = request->ngpulayers();
params.n_batch = request->nbatch();
// Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1
@@ -2254,7 +2258,7 @@ static void params_parse(const backend::ModelOptions* request,
}
// get the directory of modelfile
std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
params.lora_adapters.push_back({ model_dir + "/"+request->loraadapter(), scale_factor });
params.lora_adapter.push_back(std::make_tuple(model_dir + "/"+request->loraadapter(), scale_factor));
}
params.use_mlock = request->mlock();
params.use_mmap = request->mmap();

View File

@@ -1,13 +0,0 @@
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
index 342042ff..224db9b5 100644
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -2419,7 +2419,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
int* patches_data = (int*)malloc(ggml_nbytes(patches));
for (int i = 0; i < num_patches; i++) {
- patches_data[i] = i + 1;
+ patches_data[i] = i;
}
ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
free(patches_data);

View File

@@ -1,12 +1,5 @@
#!/bin/bash
## Patches
## Apply patches from the `patches` directory
for patch in $(ls patches); do
echo "Applying patch $patch"
patch -d llama.cpp/ -p1 < patches/$patch
done
cp -r CMakeLists.txt llama.cpp/examples/grpc-server/
cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
cp -rfv json.hpp llama.cpp/examples/grpc-server/

View File

@@ -480,4 +480,31 @@ static inline std::vector<uint8_t> base64_decode(const std::string & encoded_str
}
return ret;
}
//
// random string / id
//
static std::string random_string()
{
static const std::string str("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
std::random_device rd;
std::mt19937 generator(rd());
std::string result(32, ' ');
for (int i = 0; i < 32; ++i) {
result[i] = str[generator() % str.size()];
}
return result;
}
static std::string gen_chatcmplid()
{
std::stringstream chatcmplid;
chatcmplid << "chatcmpl-" << random_string();
return chatcmplid.str();
}

View File

@@ -0,0 +1,62 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
)
type LLM struct {
base.SingleThread
gpt4all *gpt4all.Model
}
func (llm *LLM) Load(opts *pb.ModelOptions) error {
model, err := gpt4all.New(opts.ModelFile,
gpt4all.SetThreads(int(opts.Threads)),
gpt4all.SetLibrarySearchPath(opts.LibrarySearchPath))
llm.gpt4all = model
return err
}
func buildPredictOptions(opts *pb.PredictOptions) []gpt4all.PredictOption {
predictOptions := []gpt4all.PredictOption{
gpt4all.SetTemperature(float64(opts.Temperature)),
gpt4all.SetTopP(float64(opts.TopP)),
gpt4all.SetTopK(int(opts.TopK)),
gpt4all.SetTokens(int(opts.Tokens)),
}
if opts.Batch != 0 {
predictOptions = append(predictOptions, gpt4all.SetBatch(int(opts.Batch)))
}
return predictOptions
}
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
return llm.gpt4all.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
predictOptions := buildPredictOptions(opts)
go func() {
llm.gpt4all.SetTokenCallback(func(token string) bool {
results <- token
return true
})
_, err := llm.gpt4all.Predict(opts.Prompt, predictOptions...)
if err != nil {
fmt.Println("err: ", err)
}
llm.gpt4all.SetTokenCallback(nil)
close(results)
}()
return nil
}

View File

@@ -0,0 +1,21 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
grpc "github.com/mudler/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
panic(err)
}
}

View File

@@ -0,0 +1,104 @@
package main
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
"github.com/go-audio/wav"
"github.com/mudler/LocalAI/core/schema"
)
func ffmpegCommand(args []string) (string, error) {
cmd := exec.Command("ffmpeg", args...) // Constrain this to ffmpeg to permit security scanner to see that the command is safe.
cmd.Env = os.Environ()
out, err := cmd.CombinedOutput()
return string(out), err
}
// AudioToWav converts audio to wav for transcribe.
// TODO: use https://github.com/mccoyst/ogg?
func audioToWav(src, dst string) error {
commandArgs := []string{"-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst}
out, err := ffmpegCommand(commandArgs)
if err != nil {
return fmt.Errorf("error: %w out: %s", err, out)
}
return nil
}
func Transcript(model whisper.Model, audiopath, language string, translate bool, threads uint) (schema.TranscriptionResult, error) {
res := schema.TranscriptionResult{}
dir, err := os.MkdirTemp("", "whisper")
if err != nil {
return res, err
}
defer os.RemoveAll(dir)
convertedPath := filepath.Join(dir, "converted.wav")
if err := audioToWav(audiopath, convertedPath); err != nil {
return res, err
}
// Open samples
fh, err := os.Open(convertedPath)
if err != nil {
return res, err
}
defer fh.Close()
// Read samples
d := wav.NewDecoder(fh)
buf, err := d.FullPCMBuffer()
if err != nil {
return res, err
}
data := buf.AsFloat32Buffer().Data
// Process samples
context, err := model.NewContext()
if err != nil {
return res, err
}
context.SetThreads(threads)
if language != "" {
context.SetLanguage(language)
} else {
context.SetLanguage("auto")
}
if translate {
context.SetTranslate(true)
}
if err := context.Process(data, nil, nil); err != nil {
return res, err
}
for {
s, err := context.NextSegment()
if err != nil {
break
}
var tokens []int
for _, t := range s.Tokens {
tokens = append(tokens, t.Id)
}
segment := schema.Segment{Id: s.Num, Text: s.Text, Start: s.Start, End: s.End, Tokens: tokens}
res.Segments = append(res.Segments, segment)
res.Text += s.Text
}
return res, nil
}

View File

@@ -0,0 +1,26 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
)
type Whisper struct {
base.SingleThread
whisper whisper.Model
}
func (sd *Whisper) Load(opts *pb.ModelOptions) error {
// Note: the Model here is a path to a directory containing the model files
w, err := whisper.New(opts.ModelFile)
sd.whisper = w
return err
}
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) {
return Transcript(sd.whisper, opts.Dst, opts.Language, opts.Translate, uint(opts.Threads))
}

View File

@@ -1,105 +0,0 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"os"
"path/filepath"
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
"github.com/go-audio/wav"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/utils"
)
type Whisper struct {
base.SingleThread
whisper whisper.Model
}
func (sd *Whisper) Load(opts *pb.ModelOptions) error {
// Note: the Model here is a path to a directory containing the model files
w, err := whisper.New(opts.ModelFile)
sd.whisper = w
return err
}
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (pb.TranscriptResult, error) {
dir, err := os.MkdirTemp("", "whisper")
if err != nil {
return pb.TranscriptResult{}, err
}
defer os.RemoveAll(dir)
convertedPath := filepath.Join(dir, "converted.wav")
if err := utils.AudioToWav(opts.Dst, convertedPath); err != nil {
return pb.TranscriptResult{}, err
}
// Open samples
fh, err := os.Open(convertedPath)
if err != nil {
return pb.TranscriptResult{}, err
}
defer fh.Close()
// Read samples
d := wav.NewDecoder(fh)
buf, err := d.FullPCMBuffer()
if err != nil {
return pb.TranscriptResult{}, err
}
data := buf.AsFloat32Buffer().Data
// Process samples
context, err := sd.whisper.NewContext()
if err != nil {
return pb.TranscriptResult{}, err
}
context.SetThreads(uint(opts.Threads))
if opts.Language != "" {
context.SetLanguage(opts.Language)
} else {
context.SetLanguage("auto")
}
if opts.Translate {
context.SetTranslate(true)
}
if err := context.Process(data, nil, nil); err != nil {
return pb.TranscriptResult{}, err
}
segments := []*pb.TranscriptSegment{}
text := ""
for {
s, err := context.NextSegment()
if err != nil {
break
}
var tokens []int32
for _, t := range s.Tokens {
tokens = append(tokens, int32(t.Id))
}
segment := &pb.TranscriptSegment{Id: int32(s.Num), Text: s.Text, Start: int64(s.Start), End: int64(s.End), Tokens: tokens}
segments = append(segments, segment)
text += s.Text
}
return pb.TranscriptResult{
Segments: segments,
Text: text,
}, nil
}

View File

@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch

View File

@@ -1 +0,0 @@
torch

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,6 +1,7 @@
accelerate
auto-gptq==0.7.1
grpcio==1.66.1
grpcio==1.65.1
protobuf
torch
certifi
transformers

View File

@@ -1,4 +0,0 @@
transformers
accelerate
torch
torchaudio

View File

@@ -1,5 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch
torchaudio
transformers
accelerate

View File

@@ -1,4 +0,0 @@
torch
torchaudio
transformers
accelerate

View File

@@ -1,5 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
torch
torchaudio
transformers
accelerate
torchaudio

View File

@@ -3,6 +3,4 @@ intel-extension-for-pytorch
torch
torchaudio
optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
transformers
accelerate
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,4 +1,6 @@
accelerate
bark==0.1.5
grpcio==1.66.1
grpcio==1.65.1
protobuf
certifi
certifi
transformers

View File

@@ -18,23 +18,10 @@
# source $(dirname $0)/../common/libbackend.sh
#
function init() {
# Name of the backend (directory name)
BACKEND_NAME=${PWD##*/}
# Path where all backends files are
MY_DIR=$(realpath `dirname $0`)
# Build type
BUILD_PROFILE=$(getBuildProfile)
# Environment directory
EDIR=${MY_DIR}
# Allow to specify a custom env dir for shared environments
if [ "x${ENV_DIR}" != "x" ]; then
EDIR=${ENV_DIR}
fi
# If a backend has defined a list of valid build profiles...
if [ ! -z "${LIMIT_TARGETS}" ]; then
isValidTarget=$(checkTargets ${LIMIT_TARGETS})
@@ -87,14 +74,13 @@ function getBuildProfile() {
# This function is idempotent, so you can call it as many times as you want and it will
# always result in an activated virtual environment
function ensureVenv() {
if [ ! -d "${EDIR}/venv" ]; then
uv venv ${EDIR}/venv
if [ ! -d "${MY_DIR}/venv" ]; then
uv venv ${MY_DIR}/venv
echo "virtualenv created"
fi
# Source if we are not already in a Virtual env
if [ "x${VIRTUAL_ENV}" != "x${EDIR}/venv" ]; then
source ${EDIR}/venv/bin/activate
if [ "x${VIRTUAL_ENV}" != "x${MY_DIR}/venv" ]; then
source ${MY_DIR}/venv/bin/activate
echo "virtualenv activated"
fi
@@ -127,24 +113,13 @@ function installRequirements() {
# These are the requirements files we will attempt to install, in order
declare -a requirementFiles=(
"${EDIR}/requirements-install.txt"
"${EDIR}/requirements.txt"
"${EDIR}/requirements-${BUILD_TYPE}.txt"
"${MY_DIR}/requirements-install.txt"
"${MY_DIR}/requirements.txt"
"${MY_DIR}/requirements-${BUILD_TYPE}.txt"
)
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}.txt")
fi
# if BUILD_TYPE is empty, we are a CPU build, so we should try to install the CPU requirements
if [ "x${BUILD_TYPE}" == "x" ]; then
requirementFiles+=("${EDIR}/requirements-cpu.txt")
fi
requirementFiles+=("${EDIR}/requirements-after.txt")
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}-after.txt")
requirementFiles+=("${MY_DIR}/requirements-${BUILD_PROFILE}.txt")
fi
for reqFile in ${requirementFiles[@]}; do

View File

@@ -1,2 +1,2 @@
grpcio==1.66.1
grpcio==1.65.1
protobuf

View File

@@ -1,3 +0,0 @@
transformers
accelerate
torch

View File

@@ -1,5 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch
torchaudio
transformers
accelerate

View File

@@ -1,4 +0,0 @@
torch
torchaudio
transformers
accelerate

View File

@@ -1,5 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
torch
torchaudio
transformers
accelerate
torchaudio

View File

@@ -3,6 +3,4 @@ intel-extension-for-pytorch
torch
torchaudio
optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
transformers
accelerate
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,4 +1,6 @@
coqui-tts
grpcio==1.66.1
accelerate
TTS==0.22.0
grpcio==1.65.1
protobuf
certifi
certifi
transformers

View File

@@ -18,13 +18,13 @@ import backend_pb2_grpc
import grpc
from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
EulerAncestralDiscreteScheduler
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
from diffusers.pipelines.stable_diffusion import safety_checker
from diffusers.utils import load_image, export_to_video
from compel import Compel, ReturnedEmbeddingsType
from optimum.quanto import freeze, qfloat8, quantize
from transformers import CLIPTextModel, T5EncoderModel
from transformers import CLIPTextModel
from safetensors.torch import load_file
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
@@ -163,12 +163,10 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
modelFile = request.Model
self.cfg_scale = 7
self.PipelineType = request.PipelineType
if request.CFGScale != 0:
self.cfg_scale = request.CFGScale
clipmodel = "Lykon/dreamshaper-8"
clipmodel = "runwayml/stable-diffusion-v1-5"
if request.CLIPModel != "":
clipmodel = request.CLIPModel
clipsubfolder = "text_encoder"
@@ -246,30 +244,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
torch_dtype=torchType,
use_safetensors=True,
variant=variant)
elif request.PipelineType == "FluxPipeline":
self.pipe = FluxPipeline.from_pretrained(
request.Model,
torch_dtype=torch.bfloat16)
if request.LowVRAM:
self.pipe.enable_model_cpu_offload()
elif request.PipelineType == "FluxTransformer2DModel":
dtype = torch.bfloat16
# specify from environment or default to "ChuckMcSneed/FLUX.1-dev"
bfl_repo = os.environ.get("BFL_REPO", "ChuckMcSneed/FLUX.1-dev")
transformer = FluxTransformer2DModel.from_single_file(modelFile, torch_dtype=dtype)
quantize(transformer, weights=qfloat8)
freeze(transformer)
text_encoder_2 = T5EncoderModel.from_pretrained(bfl_repo, subfolder="text_encoder_2", torch_dtype=dtype)
quantize(text_encoder_2, weights=qfloat8)
freeze(text_encoder_2)
self.pipe = FluxPipeline.from_pretrained(bfl_repo, transformer=None, text_encoder_2=None, torch_dtype=dtype)
self.pipe.transformer = transformer
self.pipe.text_encoder_2 = text_encoder_2
if request.LowVRAM:
self.pipe.enable_model_cpu_offload()
if CLIPSKIP and request.CLIPSkip != 0:
self.clip_skip = request.CLIPSkip
@@ -425,13 +399,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
request.seed
)
if self.PipelineType == "FluxPipeline":
kwargs["max_sequence_length"] = 256
if self.PipelineType == "FluxTransformer2DModel":
kwargs["output_type"] = "pil"
kwargs["generator"] = torch.Generator("cpu").manual_seed(0)
if self.img2vid:
# Load the conditioning image
image = load_image(request.src)

View File

@@ -1,9 +0,0 @@
diffusers
opencv-python
transformers
accelerate
compel
peft
sentencepiece
torch
optimum-quanto

View File

@@ -1,10 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch
diffusers
opencv-python
transformers
accelerate
compel
peft
sentencepiece
optimum-quanto

View File

@@ -1,9 +0,0 @@
torch
diffusers
opencv-python
transformers
accelerate
compel
peft
sentencepiece
optimum-quanto

View File

@@ -1,11 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
torch==2.3.1+rocm6.0
torchvision==0.18.1+rocm6.0
diffusers
opencv-python
transformers
accelerate
compel
peft
sentencepiece
optimum-quanto
torch
torchvision

View File

@@ -3,12 +3,4 @@ intel-extension-for-pytorch
torch
torchvision
optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
diffusers
opencv-python
transformers
accelerate
compel
peft
sentencepiece
optimum-quanto
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,5 +1,13 @@
setuptools
grpcio==1.66.1
accelerate
compel
peft
diffusers
grpcio==1.65.1
opencv-python
pillow
protobuf
sentencepiece
torch
transformers
certifi

View File

@@ -53,7 +53,7 @@ class TestBackendServicer(unittest.TestCase):
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.LoadModel(backend_pb2.ModelOptions(Model="Lykon/dreamshaper-8"))
response = stub.LoadModel(backend_pb2.ModelOptions(Model="runwayml/stable-diffusion-v1-5"))
self.assertTrue(response.success)
self.assertEqual(response.message, "Model loaded successfully")
except Exception as err:
@@ -71,7 +71,7 @@ class TestBackendServicer(unittest.TestCase):
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.LoadModel(backend_pb2.ModelOptions(Model="Lykon/dreamshaper-8"))
response = stub.LoadModel(backend_pb2.ModelOptions(Model="runwayml/stable-diffusion-v1-5"))
print(response.message)
self.assertTrue(response.success)
image_req = backend_pb2.GenerateImageRequest(positive_prompt="cat", width=16,height=16, dst="test.jpg")
@@ -81,4 +81,4 @@ class TestBackendServicer(unittest.TestCase):
print(err)
self.fail("Image gen service failed")
finally:
self.tearDown()
self.tearDown()

1
backend/python/exllama/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
source

View File

@@ -0,0 +1,25 @@
export CONDA_ENV_PATH = "exllama.yml"
.PHONY: exllama
exllama: protogen
bash install.sh ${CONDA_ENV_PATH}
.PHONY: run
run: protogen
@echo "Running exllama..."
bash run.sh
@echo "exllama run."
.PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py
.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
backend_pb2_grpc.py backend_pb2.py:
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
.PHONY: clean
clean: protogen-clean
$(RM) -r venv source __pycache__

View File

@@ -0,0 +1,5 @@
# Creating a separate environment for the exllama project
```
make exllama
```

159
backend/python/exllama/backend.py Executable file
View File

@@ -0,0 +1,159 @@
#!/usr/bin/env python3
import grpc
from concurrent import futures
import time
import backend_pb2
import backend_pb2_grpc
import argparse
import signal
import sys
import os, glob
from pathlib import Path
import torch
import torch.nn.functional as F
from torch import version as torch_version
from source.tokenizer import ExLlamaTokenizer
from source.generator import ExLlamaGenerator
from source.model import ExLlama, ExLlamaCache, ExLlamaConfig
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
# Implement the BackendServicer class with the service methods
class BackendServicer(backend_pb2_grpc.BackendServicer):
def generate(self,prompt, max_new_tokens):
self.generator.end_beam_search()
# Tokenizing the input
ids = self.generator.tokenizer.encode(prompt)
self.generator.gen_begin_reuse(ids)
initial_len = self.generator.sequence[0].shape[0]
has_leading_space = False
decoded_text = ''
for i in range(max_new_tokens):
token = self.generator.gen_single_token()
if i == 0 and self.generator.tokenizer.tokenizer.IdToPiece(int(token)).startswith(''):
has_leading_space = True
decoded_text = self.generator.tokenizer.decode(self.generator.sequence[0][initial_len:])
if has_leading_space:
decoded_text = ' ' + decoded_text
if token.item() == self.generator.tokenizer.eos_token_id:
break
return decoded_text
def Health(self, request, context):
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
def LoadModel(self, request, context):
try:
# https://github.com/turboderp/exllama/blob/master/example_cfg.py
model_directory = request.ModelFile
# Locate files we need within that directory
tokenizer_path = os.path.join(model_directory, "tokenizer.model")
model_config_path = os.path.join(model_directory, "config.json")
st_pattern = os.path.join(model_directory, "*.safetensors")
model_path = glob.glob(st_pattern)[0]
# Create config, model, tokenizer and generator
config = ExLlamaConfig(model_config_path) # create config from config.json
config.model_path = model_path # supply path to model weights file
if (request.ContextSize):
config.max_seq_len = request.ContextSize # override max sequence length
config.max_attention_size = request.ContextSize**2 # Should be set to context_size^2.
# https://github.com/turboderp/exllama/issues/220#issuecomment-1720324163
# Set Rope scaling.
if (request.RopeFreqScale):
# Alpha value for Rope scaling.
# Higher value increases context but adds perplexity.
# alpha_value and compress_pos_emb are mutually exclusive.
# https://github.com/turboderp/exllama/issues/115
config.alpha_value = request.RopeFreqScale
config.calculate_rotary_embedding_base()
model = ExLlama(config) # create ExLlama instance and load the weights
tokenizer = ExLlamaTokenizer(tokenizer_path) # create tokenizer from tokenizer model file
cache = ExLlamaCache(model, batch_size = 2) # create cache for inference
generator = ExLlamaGenerator(model, tokenizer, cache) # create generator
self.generator= generator
self.model = model
self.tokenizer = tokenizer
self.cache = cache
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
return backend_pb2.Result(message="Model loaded successfully", success=True)
def Predict(self, request, context):
penalty = 1.15
if request.Penalty != 0.0:
penalty = request.Penalty
self.generator.settings.token_repetition_penalty_max = penalty
self.generator.settings.temperature = request.Temperature
self.generator.settings.top_k = request.TopK
self.generator.settings.top_p = request.TopP
tokens = 512
if request.Tokens != 0:
tokens = request.Tokens
if self.cache.batch_size == 1:
del self.cache
self.cache = ExLlamaCache(self.model, batch_size=2)
self.generator = ExLlamaGenerator(self.model, self.tokenizer, self.cache)
t = self.generate(request.Prompt, tokens)
# Remove prompt from response if present
if request.Prompt in t:
t = t.replace(request.Prompt, "")
return backend_pb2.Result(message=bytes(t, encoding='utf-8'))
def PredictStream(self, request, context):
# Implement PredictStream RPC
#for reply in some_data_generator():
# yield reply
# Not implemented yet
return self.Predict(request, context)
def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
server.start()
print("Server started. Listening on: " + address, file=sys.stderr)
# Define the signal handler function
def signal_handler(sig, frame):
print("Received termination signal. Shutting down...")
server.stop(0)
sys.exit(0)
# Set the signal handlers for SIGINT and SIGTERM
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
try:
while True:
time.sleep(_ONE_DAY_IN_SECONDS)
except KeyboardInterrupt:
server.stop(0)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the gRPC server.")
parser.add_argument(
"--addr", default="localhost:50051", help="The address to bind the server to."
)
args = parser.parse_args()
serve(args.addr)

View File

@@ -0,0 +1,13 @@
#!/bin/bash
set -e
LIMIT_TARGETS="cublas"
source $(dirname $0)/../common/libbackend.sh
installRequirements
git clone https://github.com/turboderp/exllama $MY_DIR/source
uv pip install ${BUILD_ISOLATION_FLAG} --requirement ${MY_DIR}/source/requirements.txt
cp -v ./*py $MY_DIR/source/

View File

@@ -0,0 +1,6 @@
grpcio==1.65.0
protobuf
torch
transformers
certifi
setuptools

7
backend/python/exllama/run.sh Executable file
View File

@@ -0,0 +1,7 @@
#!/bin/bash
LIMIT_TARGETS="cublas"
BACKEND_FILE="${MY_DIR}/source/backend.py"
source $(dirname $0)/../common/libbackend.sh
startBackend $@

6
backend/python/exllama/test.sh Executable file
View File

@@ -0,0 +1,6 @@
#!/bin/bash
set -e
source $(dirname $0)/../common/libbackend.sh
runUnittests

View File

@@ -1,3 +0,0 @@
transformers
accelerate
torch

View File

@@ -1,4 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch
transformers
accelerate

View File

@@ -1,3 +0,0 @@
torch
transformers
accelerate

View File

@@ -1,5 +1,7 @@
grpcio==1.66.1
accelerate
grpcio==1.65.1
protobuf
certifi
torch
wheel
setuptools

View File

@@ -1,2 +0,0 @@
causal-conv1d==1.4.0
mamba-ssm==2.2.2

View File

@@ -1,2 +0,0 @@
torch
transformers

View File

@@ -1,3 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch
transformers

View File

@@ -1,2 +0,0 @@
torch
transformers

View File

@@ -3,4 +3,5 @@
# https://github.com/Dao-AILab/causal-conv1d/issues/24
packaging
setuptools
wheel
wheel
torch==2.3.1

View File

@@ -1,3 +1,6 @@
grpcio==1.66.1
causal-conv1d==1.4.0
mamba-ssm==2.2.2
grpcio==1.65.1
protobuf
certifi
certifi
transformers

View File

@@ -1 +0,0 @@
torch

View File

@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch

View File

@@ -1 +0,0 @@
torch

View File

@@ -2,7 +2,7 @@
intel-extension-for-pytorch
torch
optimum[openvino]
grpcio==1.66.1
grpcio==1.65.1
protobuf
librosa==0.9.1
faster-whisper==1.0.3

View File

@@ -1,4 +1,4 @@
grpcio==1.66.1
grpcio==1.65.1
protobuf
librosa
faster-whisper

View File

@@ -5,7 +5,7 @@ source $(dirname $0)/../common/libbackend.sh
# Download checkpoints if not present
if [ ! -d "checkpoints_v2" ]; then
wget https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
wget https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
unzip checkpoints_v2.zip
fi

View File

@@ -15,12 +15,5 @@ installRequirements
# https://github.com/descriptinc/audiotools/issues/101
# incompatible protobuf versions.
PYDIR=python3.10
pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/"
if [ ! -d ${pyenv} ]; then
echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
exit 1
fi
curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py
PYDIR=$(ls ${MY_DIR}/venv/lib)
curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/builder.py

View File

@@ -1,3 +0,0 @@
git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17
llvmlite==0.43.0
numba==0.60.0

View File

@@ -1,3 +0,0 @@
transformers
accelerate
torch

View File

@@ -1,5 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch
torchaudio
transformers
accelerate

View File

@@ -1,4 +0,0 @@
torch
torchaudio
transformers
accelerate

View File

@@ -1,5 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
torch==2.3.0+rocm6.0
torchaudio==2.3.0+rocm6.0
transformers
accelerate
torch
torchaudio

View File

@@ -3,6 +3,4 @@ intel-extension-for-pytorch
torch
torchaudio
optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
transformers
accelerate
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

Some files were not shown because too many files have changed in this diff Show More