mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 11:13:31 -05:00
Compare commits
53 Commits
v2.18.1
...
fix_aarch6
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9080e6442d | ||
|
|
2845baecd5 | ||
|
|
3492026b63 | ||
|
|
ea97fe67e2 | ||
|
|
d5a56f04be | ||
|
|
f120a0c9f9 | ||
|
|
401ee553f4 | ||
|
|
e3c89ac9cd | ||
|
|
b59841cf69 | ||
|
|
cca881ec49 | ||
|
|
dd95ae130f | ||
|
|
185ab93b0d | ||
|
|
bb38f051e6 | ||
|
|
2a05c39adf | ||
|
|
deb5311373 | ||
|
|
bdfebfe0f4 | ||
|
|
3a88299cfe | ||
|
|
748e4cb6b1 | ||
|
|
7c554be4ea | ||
|
|
6011845ee9 | ||
|
|
c184f23621 | ||
|
|
8cec0304ee | ||
|
|
dc51869c61 | ||
|
|
f881d25630 | ||
|
|
683c306f90 | ||
|
|
a985d8c239 | ||
|
|
17608ea6aa | ||
|
|
9280060e05 | ||
|
|
cbcb74e159 | ||
|
|
f5c1518438 | ||
|
|
29e4729c22 | ||
|
|
68f3943e0f | ||
|
|
b59f81abff | ||
|
|
94c5524277 | ||
|
|
5b3211e71c | ||
|
|
5c135d0dec | ||
|
|
a4c96836ac | ||
|
|
ff19b22d72 | ||
|
|
d96d4883ce | ||
|
|
83576d7f57 | ||
|
|
23b926d43e | ||
|
|
9aec1b3a61 | ||
|
|
2d65df38d1 | ||
|
|
6f5b6711ea | ||
|
|
89c888bf55 | ||
|
|
a637ee2278 | ||
|
|
1b270759ef | ||
|
|
b10441a41c | ||
|
|
97de2b6550 | ||
|
|
497a037344 | ||
|
|
cf0af16695 | ||
|
|
62b4030278 | ||
|
|
c047c19145 |
3
.github/release.yml
vendored
3
.github/release.yml
vendored
@@ -13,6 +13,9 @@ changelog:
|
||||
labels:
|
||||
- bug
|
||||
- regression
|
||||
- title: "🖧 P2P area"
|
||||
labels:
|
||||
- area/p2p
|
||||
- title: Exciting New Features 🎉
|
||||
labels:
|
||||
- Semver-Minor
|
||||
|
||||
4
.github/workflows/image-pr.yml
vendored
4
.github/workflows/image-pr.yml
vendored
@@ -46,7 +46,7 @@ jobs:
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "5"
|
||||
cuda-minor-version: "4"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||
@@ -119,7 +119,7 @@ jobs:
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "5"
|
||||
cuda-minor-version: "4"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||
|
||||
16
.github/workflows/image.yml
vendored
16
.github/workflows/image.yml
vendored
@@ -64,7 +64,7 @@ jobs:
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "8"
|
||||
cuda-minor-version: "7"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda11'
|
||||
@@ -75,7 +75,7 @@ jobs:
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "5"
|
||||
cuda-minor-version: "4"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12'
|
||||
@@ -86,7 +86,7 @@ jobs:
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "8"
|
||||
cuda-minor-version: "7"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cublas-cuda11-ffmpeg'
|
||||
@@ -100,7 +100,7 @@ jobs:
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "5"
|
||||
cuda-minor-version: "4"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||
@@ -274,7 +274,7 @@ jobs:
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "8"
|
||||
cuda-minor-version: "7"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda11-core'
|
||||
@@ -285,7 +285,7 @@ jobs:
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "5"
|
||||
cuda-minor-version: "4"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-core'
|
||||
@@ -296,7 +296,7 @@ jobs:
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "8"
|
||||
cuda-minor-version: "7"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda11-ffmpeg-core'
|
||||
@@ -307,7 +307,7 @@ jobs:
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "5"
|
||||
cuda-minor-version: "4"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||
|
||||
2
.github/workflows/image_build.yml
vendored
2
.github/workflows/image_build.yml
vendored
@@ -23,7 +23,7 @@ on:
|
||||
type: string
|
||||
cuda-minor-version:
|
||||
description: 'CUDA minor version'
|
||||
default: "5"
|
||||
default: "4"
|
||||
type: string
|
||||
platforms:
|
||||
description: 'Platforms'
|
||||
|
||||
170
.github/workflows/notify-models.yaml
vendored
Normal file
170
.github/workflows/notify-models.yaml
vendored
Normal file
@@ -0,0 +1,170 @@
|
||||
name: Notifications for new models
|
||||
on:
|
||||
pull_request:
|
||||
types:
|
||||
- closed
|
||||
|
||||
jobs:
|
||||
notify-discord:
|
||||
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
|
||||
env:
|
||||
MODEL_NAME: hermes-2-theta-llama-3-8b
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # needed to checkout all branches for this Action to work
|
||||
- name: Start LocalAI
|
||||
run: |
|
||||
echo "Starting LocalAI..."
|
||||
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
|
||||
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
|
||||
# Check the PR diff using the current branch and the base branch of the PR
|
||||
- uses: GrantBirki/git-diff-action@v2.7.0
|
||||
id: git-diff-action
|
||||
with:
|
||||
json_diff_file_output: diff.json
|
||||
raw_diff_file_output: diff.txt
|
||||
file_output_only: "true"
|
||||
- name: Summarize
|
||||
env:
|
||||
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
|
||||
id: summarize
|
||||
run: |
|
||||
input="$(cat $DIFF)"
|
||||
|
||||
# Define the LocalAI API endpoint
|
||||
API_URL="http://localhost:8080/chat/completions"
|
||||
|
||||
# Create a JSON payload using jq to handle special characters
|
||||
json_payload=$(jq -n --arg input "$input" '{
|
||||
model: "'$MODEL_NAME'",
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: "You are LocalAI-bot. Write a discord message to notify everyone about the new model from the git diff. Make it informal. An example can include: the URL of the model, the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI and that can be browsed over https://models.localai.io. For example: local-ai run model_name_here"
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: $input
|
||||
}
|
||||
]
|
||||
}')
|
||||
|
||||
# Send the request to LocalAI
|
||||
response=$(curl -s -X POST $API_URL \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$json_payload")
|
||||
|
||||
# Extract the summary from the response
|
||||
summary="$(echo $response | jq -r '.choices[0].message.content')"
|
||||
|
||||
# Print the summary
|
||||
# -H "Authorization: Bearer $API_KEY" \
|
||||
echo "Summary:"
|
||||
echo "$summary"
|
||||
echo "payload sent"
|
||||
echo "$json_payload"
|
||||
{
|
||||
echo 'message<<EOF'
|
||||
echo "$summary"
|
||||
echo EOF
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
docker logs --tail 10 local-ai
|
||||
- name: Discord notification
|
||||
env:
|
||||
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL }}
|
||||
DISCORD_USERNAME: "LocalAI-Bot"
|
||||
DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
|
||||
uses: Ilshidur/action-discord@master
|
||||
with:
|
||||
args: ${{ steps.summarize.outputs.message }}
|
||||
- name: Setup tmate session if fails
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
notify-twitter:
|
||||
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
|
||||
env:
|
||||
MODEL_NAME: hermes-2-theta-llama-3-8b
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # needed to checkout all branches for this Action to work
|
||||
- name: Start LocalAI
|
||||
run: |
|
||||
echo "Starting LocalAI..."
|
||||
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
|
||||
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
|
||||
# Check the PR diff using the current branch and the base branch of the PR
|
||||
- uses: GrantBirki/git-diff-action@v2.7.0
|
||||
id: git-diff-action
|
||||
with:
|
||||
json_diff_file_output: diff.json
|
||||
raw_diff_file_output: diff.txt
|
||||
file_output_only: "true"
|
||||
- name: Summarize
|
||||
env:
|
||||
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
|
||||
id: summarize
|
||||
run: |
|
||||
input="$(cat $DIFF)"
|
||||
|
||||
# Define the LocalAI API endpoint
|
||||
API_URL="http://localhost:8080/chat/completions"
|
||||
|
||||
# Create a JSON payload using jq to handle special characters
|
||||
json_payload=$(jq -n --arg input "$input" '{
|
||||
model: "'$MODEL_NAME'",
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: "You are LocalAI-bot. Write a twitter message to notify everyone about the new model from the git diff. Make it informal and really short. An example can include: the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI. For example: local-ai run model_name_here"
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: $input
|
||||
}
|
||||
]
|
||||
}')
|
||||
|
||||
# Send the request to LocalAI
|
||||
response=$(curl -s -X POST $API_URL \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$json_payload")
|
||||
|
||||
# Extract the summary from the response
|
||||
summary="$(echo $response | jq -r '.choices[0].message.content')"
|
||||
|
||||
# Print the summary
|
||||
# -H "Authorization: Bearer $API_KEY" \
|
||||
echo "Summary:"
|
||||
echo "$summary"
|
||||
echo "payload sent"
|
||||
echo "$json_payload"
|
||||
{
|
||||
echo 'message<<EOF'
|
||||
echo "$summary"
|
||||
echo EOF
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
docker logs --tail 10 local-ai
|
||||
- uses: Eomm/why-don-t-you-tweet@v2
|
||||
with:
|
||||
tweet-message: ${{ steps.summarize.outputs.message }}
|
||||
env:
|
||||
# Get your tokens from https://developer.twitter.com/apps
|
||||
TWITTER_CONSUMER_API_KEY: ${{ secrets.TWITTER_APP_KEY }}
|
||||
TWITTER_CONSUMER_API_SECRET: ${{ secrets.TWITTER_APP_SECRET }}
|
||||
TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }}
|
||||
TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
|
||||
- name: Setup tmate session if fails
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
65
.github/workflows/notify-releases.yaml
vendored
Normal file
65
.github/workflows/notify-releases.yaml
vendored
Normal file
@@ -0,0 +1,65 @@
|
||||
name: Release notifications
|
||||
on:
|
||||
release:
|
||||
types:
|
||||
- published
|
||||
|
||||
jobs:
|
||||
notify-discord:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
RELEASE_BODY: ${{ github.event.release.body }}
|
||||
RELEASE_TITLE: ${{ github.event.release.name }}
|
||||
RELEASE_TAG_NAME: ${{ github.event.release.tag_name }}
|
||||
steps:
|
||||
- name: Start LocalAI
|
||||
run: |
|
||||
echo "Starting LocalAI..."
|
||||
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
|
||||
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
|
||||
- name: Summarize
|
||||
id: summarize
|
||||
run: |
|
||||
input="$RELEASE_TITLE\b$RELEASE_BODY"
|
||||
|
||||
# Define the LocalAI API endpoint
|
||||
API_URL="http://localhost:8080/chat/completions"
|
||||
|
||||
# Create a JSON payload using jq to handle special characters
|
||||
json_payload=$(jq -n --arg input "$input" '{
|
||||
model: "'$MODEL_NAME'",
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: "Write a discord message with a bullet point summary of the release notes."
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: $input
|
||||
}
|
||||
]
|
||||
}')
|
||||
|
||||
# Send the request to LocalAI API
|
||||
response=$(curl -s -X POST $API_URL \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$json_payload")
|
||||
|
||||
# Extract the summary from the response
|
||||
summary=$(echo $response | jq -r '.choices[0].message.content')
|
||||
|
||||
# Print the summary
|
||||
# -H "Authorization: Bearer $API_KEY" \
|
||||
{
|
||||
echo 'message<<EOF'
|
||||
echo "$summary"
|
||||
echo EOF
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
- name: Discord notification
|
||||
env:
|
||||
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL_RELEASE }}
|
||||
DISCORD_USERNAME: "LocalAI-Bot"
|
||||
DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
|
||||
uses: Ilshidur/action-discord@master
|
||||
with:
|
||||
args: ${{ steps.summarize.outputs.message }}
|
||||
65
.github/workflows/release.yaml
vendored
65
.github/workflows/release.yaml
vendored
@@ -31,7 +31,7 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache
|
||||
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache gawk
|
||||
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
|
||||
- name: Install CUDA Dependencies
|
||||
run: |
|
||||
@@ -40,7 +40,7 @@ jobs:
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION}
|
||||
env:
|
||||
CUDA_VERSION: 12-5
|
||||
CUDA_VERSION: 12-4
|
||||
- name: Cache grpc
|
||||
id: cache-grpc
|
||||
uses: actions/cache@v4
|
||||
@@ -77,6 +77,16 @@ jobs:
|
||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN
|
||||
GRPC_DIR=$PWD/grpc
|
||||
|
||||
# http://google.github.io/googletest/quickstart-cmake.html
|
||||
# Seems otherwise cross-arch fails to find it
|
||||
echo "include(FetchContent)" >> $GRPC_DIR/CMakeLists.txt
|
||||
echo "FetchContent_Declare(" >> $GRPC_DIR/CMakeLists.txt
|
||||
echo " googletest" >> $GRPC_DIR/CMakeLists.txt
|
||||
echo " URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip" >> $GRPC_DIR/CMakeLists.txt
|
||||
echo ")" >> $GRPC_DIR/CMakeLists.txt
|
||||
echo "FetchContent_MakeAvailable(googletest)" >> $GRPC_DIR/CMakeLists.txt
|
||||
|
||||
cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install && \
|
||||
GRPC_CROSS_BUILD_DIR=$GRPC_DIR/cmake/cross_build && \
|
||||
mkdir -p $GRPC_CROSS_BUILD_DIR && \
|
||||
@@ -96,6 +106,7 @@ jobs:
|
||||
CROSS_TOOLCHAIN=/usr/$GNU_HOST
|
||||
CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
|
||||
CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
|
||||
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
||||
export PATH=$PATH:$GOPATH/bin
|
||||
@@ -103,7 +114,6 @@ jobs:
|
||||
sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6
|
||||
sudo cp -rf /usr/aarch64-linux-gnu/lib/libstdc++.so* /usr/aarch64-linux-gnu/lib/libstdc++.so.6
|
||||
sudo cp /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 ld.so
|
||||
GO_TAGS=p2p \
|
||||
BACKEND_LIBS="./grpc/cmake/cross_build/third_party/re2/libre2.a ./grpc/cmake/cross_build/libgrpc.a ./grpc/cmake/cross_build/libgrpc++.a ./grpc/cmake/cross_build/third_party/protobuf/libprotobuf.a /usr/aarch64-linux-gnu/lib/libc.so.6 /usr/aarch64-linux-gnu/lib/libstdc++.so.6 /usr/aarch64-linux-gnu/lib/libgomp.so.1 /usr/aarch64-linux-gnu/lib/libm.so.6 /usr/aarch64-linux-gnu/lib/libgcc_s.so.1 /usr/aarch64-linux-gnu/lib/libdl.so.2 /usr/aarch64-linux-gnu/lib/libpthread.so.0 ./ld.so" \
|
||||
GOOS=linux \
|
||||
GOARCH=arm64 \
|
||||
@@ -147,7 +157,7 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache cmake
|
||||
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache gawk cmake
|
||||
- name: Intel Dependencies
|
||||
run: |
|
||||
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
|
||||
@@ -214,8 +224,7 @@ jobs:
|
||||
export PATH=/opt/rocm/bin:$PATH
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
sudo cp /lib64/ld-linux-x86-64.so.2 ld.so
|
||||
GO_TAGS=p2p \
|
||||
BACKEND_LIBS="./ld.so /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/libgomp.so.1" \
|
||||
BACKEND_LIBS="./ld.so ./sources/go-piper/piper/build/fi/lib/libfmt.a ./sources/go-piper/piper-phonemize/pi/lib/libonnxruntime.so.1.14.1 ./sources/go-piper/piper-phonemize/pi/src/libespeak-ng/libespeak-ng.so /usr/lib/x86_64-linux-gnu/libdl.so.2 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/lib/x86_64-linux-gnu/libpthread.so.0 ./sources/go-piper/piper-phonemize/pi/lib/libpiper_phonemize.so.1 ./sources/go-piper/piper/build/si/lib/libspdlog.a ./sources/go-piper/espeak/ei/lib/libucd.so" \
|
||||
make -j4 dist
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
@@ -269,6 +278,48 @@ jobs:
|
||||
files: |
|
||||
release/*
|
||||
|
||||
build-macOS-x86_64:
|
||||
runs-on: macos-13
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.21.x'
|
||||
cache: false
|
||||
- name: Dependencies
|
||||
run: |
|
||||
brew install protobuf grpc
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
||||
- name: Build
|
||||
id: build
|
||||
run: |
|
||||
export C_INCLUDE_PATH=/usr/local/include
|
||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||
export PATH=$PATH:$GOPATH/bin
|
||||
|
||||
make dist
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: LocalAI-MacOS-x86_64
|
||||
path: release/
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
with:
|
||||
files: |
|
||||
release/*
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
|
||||
build-macOS-arm64:
|
||||
runs-on: macos-14
|
||||
steps:
|
||||
@@ -292,7 +343,7 @@ jobs:
|
||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||
export PATH=$PATH:$GOPATH/bin
|
||||
|
||||
BACKEND_LIBS="$(ls /opt/homebrew/opt/grpc/lib/*.dylib /opt/homebrew/opt/re2/lib/*.dylib /opt/homebrew/opt/openssl@3/lib/*.dylib /opt/homebrew/opt/protobuf/lib/*.dylib /opt/homebrew/opt/abseil/lib/*.dylib | xargs)" GO_TAGS=p2p make dist
|
||||
make dist
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: LocalAI-MacOS-arm64
|
||||
|
||||
@@ -99,7 +99,7 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers
|
||||
|
||||
ARG BUILD_TYPE
|
||||
ARG CUDA_MAJOR_VERSION=12
|
||||
ARG CUDA_MINOR_VERSION=5
|
||||
ARG CUDA_MINOR_VERSION=4
|
||||
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
|
||||
|
||||
59
Makefile
59
Makefile
@@ -3,9 +3,11 @@ GOTEST=$(GOCMD) test
|
||||
GOVET=$(GOCMD) vet
|
||||
BINARY_NAME=local-ai
|
||||
|
||||
DETECT_LIBS?=true
|
||||
|
||||
# llama.cpp versions
|
||||
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=cb5fad4c6c2cbef92e9b8b63449e1cb7664e4846
|
||||
CPPLLAMA_VERSION?=c4dd11d1d3903e1922c06242e189f6310fc4d8c3
|
||||
|
||||
# gpt4all version
|
||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||
@@ -16,7 +18,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_CPP_VERSION?=b29b3b29240aac8b71ce8e5a4360c1f1562ad66f
|
||||
WHISPER_CPP_VERSION?=1c31f9d4a8936aec550e6c4dc9ca5cae3b4f304a
|
||||
|
||||
# bert.cpp version
|
||||
BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
|
||||
@@ -33,9 +35,11 @@ TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
|
||||
export BUILD_TYPE?=
|
||||
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
||||
export CMAKE_ARGS?=
|
||||
export BACKEND_LIBS?=
|
||||
|
||||
CGO_LDFLAGS?=
|
||||
CGO_LDFLAGS_WHISPER?=
|
||||
CGO_LDFLAGS_WHISPER+=-lggml
|
||||
CUDA_LIBPATH?=/usr/local/cuda/lib64/
|
||||
GO_TAGS?=
|
||||
BUILD_ID?=
|
||||
@@ -49,8 +53,8 @@ RANDOM := $(shell bash -c 'echo $$RANDOM')
|
||||
VERSION?=$(shell git describe --always --tags || echo "dev" )
|
||||
# go tool nm ./local-ai | grep Commit
|
||||
LD_FLAGS?=
|
||||
override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Version=$(VERSION)"
|
||||
override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
|
||||
override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Version=$(VERSION)"
|
||||
override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
|
||||
|
||||
OPTIONAL_TARGETS?=
|
||||
|
||||
@@ -82,24 +86,25 @@ ifeq ($(OS),Darwin)
|
||||
else ifneq ($(BUILD_TYPE),metal)
|
||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||
export GGML_NO_ACCELERATE=1
|
||||
export GGML_NO_METAL=1
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),metal)
|
||||
# -lcblas removed: it seems to always be listed as a duplicate flag.
|
||||
CGO_LDFLAGS += -framework Accelerate
|
||||
endif
|
||||
else
|
||||
CGO_LDFLAGS_WHISPER+=-lgomp
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),openblas)
|
||||
CGO_LDFLAGS+=-lopenblas
|
||||
export WHISPER_OPENBLAS=1
|
||||
export GGML_OPENBLAS=1
|
||||
endif
|
||||
|
||||
|
||||
ifeq ($(BUILD_TYPE),cublas)
|
||||
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
|
||||
export GGML_CUDA=1
|
||||
export WHISPER_CUDA=1
|
||||
CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda -lcufft
|
||||
endif
|
||||
|
||||
@@ -107,6 +112,14 @@ ifeq ($(BUILD_TYPE),vulkan)
|
||||
CMAKE_ARGS+=-DGGML_VULKAN=1
|
||||
endif
|
||||
|
||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
export GGML_SYCL=1
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||
export GGML_SYCL_F16=1
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),hipblas)
|
||||
ROCM_HOME ?= /opt/rocm
|
||||
ROCM_PATH ?= /opt/rocm
|
||||
@@ -115,7 +128,7 @@ ifeq ($(BUILD_TYPE),hipblas)
|
||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||
# llama-ggml has no hipblas support, so override it here.
|
||||
export STABLE_BUILD_TYPE=
|
||||
export WHISPER_HIPBLAS=1
|
||||
export GGML_HIPBLAS=1
|
||||
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
|
||||
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
||||
CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
||||
@@ -125,17 +138,16 @@ endif
|
||||
ifeq ($(BUILD_TYPE),metal)
|
||||
CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
|
||||
export GGML_METAL=1
|
||||
export WHISPER_METAL=1
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),clblas)
|
||||
CGO_LDFLAGS+=-lOpenCL -lclblast
|
||||
export WHISPER_CLBLAST=1
|
||||
export GGML_OPENBLAS=1
|
||||
endif
|
||||
|
||||
# glibc-static or glibc-devel-static required
|
||||
ifeq ($(STATIC),true)
|
||||
LD_FLAGS=-linkmode external -extldflags -static
|
||||
LD_FLAGS+=-linkmode external -extldflags -static
|
||||
endif
|
||||
|
||||
ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
|
||||
@@ -169,6 +181,8 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
|
||||
ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
|
||||
# Use filter-out to remove the specified backends
|
||||
ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))
|
||||
|
||||
GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
|
||||
TEST_PATHS?=./api/... ./pkg/... ./core/...
|
||||
@@ -248,7 +262,7 @@ sources/whisper.cpp:
|
||||
cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a
|
||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
||||
|
||||
get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream
|
||||
|
||||
@@ -319,7 +333,7 @@ build: prepare backend-assets grpcs ## Build the project
|
||||
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
|
||||
ifneq ($(BACKEND_LIBS),)
|
||||
$(MAKE) backend-assets/lib
|
||||
cp $(BACKEND_LIBS) backend-assets/lib/
|
||||
cp -f $(BACKEND_LIBS) backend-assets/lib/
|
||||
endif
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
|
||||
|
||||
@@ -334,6 +348,9 @@ backend-assets/lib:
|
||||
|
||||
dist:
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-avx2
|
||||
ifeq ($(DETECT_LIBS),true)
|
||||
scripts/prepare-libs.sh backend-assets/grpc/llama-cpp-avx2
|
||||
endif
|
||||
ifeq ($(OS),Darwin)
|
||||
$(info ${GREEN}I Skip CUDA/hipblas build on MacOS${RESET})
|
||||
else
|
||||
@@ -342,7 +359,11 @@ else
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f16
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f32
|
||||
endif
|
||||
STATIC=true $(MAKE) build
|
||||
GO_TAGS="tts p2p" $(MAKE) build
|
||||
ifeq ($(DETECT_LIBS),true)
|
||||
scripts/prepare-libs.sh backend-assets/grpc/piper
|
||||
endif
|
||||
GO_TAGS="tts p2p" STATIC=true $(MAKE) build
|
||||
mkdir -p release
|
||||
# if BUILD_ID is empty, then we don't append it to the binary name
|
||||
ifeq ($(BUILD_ID),)
|
||||
@@ -353,7 +374,7 @@ else
|
||||
shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH).sha256
|
||||
endif
|
||||
|
||||
dist-cross-linux-arm64:
|
||||
dist-cross-linux-arm64:
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
|
||||
STATIC=true $(MAKE) build
|
||||
mkdir -p release
|
||||
@@ -404,7 +425,7 @@ prepare-e2e:
|
||||
mkdir -p $(TEST_DIR)
|
||||
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
|
||||
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
|
||||
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=5 --build-arg FFMPEG=true -t localai-tests .
|
||||
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=4 --build-arg FFMPEG=true -t localai-tests .
|
||||
|
||||
run-e2e-image:
|
||||
ls -liah $(abspath ./tests/e2e-fixtures)
|
||||
@@ -792,7 +813,7 @@ backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libti
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
|
||||
|
||||
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH=$(CURDIR)/sources/whisper.cpp LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
|
||||
|
||||
backend-assets/grpc/local-store: backend-assets/grpc
|
||||
@@ -861,7 +882,7 @@ gen-assets:
|
||||
$(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets
|
||||
|
||||
## Documentation
|
||||
docs/layouts/_default:
|
||||
docs/layouts/_default:
|
||||
mkdir -p docs/layouts/_default
|
||||
|
||||
docs/static/gallery.html: docs/layouts/_default
|
||||
@@ -876,4 +897,4 @@ docs-clean:
|
||||
|
||||
.PHONY: docs
|
||||
docs: docs/static/gallery.html
|
||||
cd docs && hugo serve
|
||||
cd docs && hugo serve
|
||||
|
||||
@@ -72,6 +72,7 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
||||
|
||||
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||
|
||||
- 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
|
||||
- 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
|
||||
- 🔥🔥 Decentralized llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/
|
||||
- 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
|
||||
@@ -104,6 +105,7 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
|
||||
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
||||
- 📈 [Reranker API](https://localai.io/features/reranker/)
|
||||
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
|
||||
- 🌍 Integrated WebUI!
|
||||
|
||||
## 💻 Usage
|
||||
|
||||
|
||||
@@ -83,4 +83,11 @@ target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
if(TARGET BUILD_INFO)
|
||||
add_dependencies(${TARGET} BUILD_INFO)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
include(FetchContent)
|
||||
FetchContent_Declare(
|
||||
googletest
|
||||
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
|
||||
)
|
||||
FetchContent_MakeAvailable(googletest)
|
||||
@@ -71,9 +71,9 @@ clean: purge
|
||||
grpc-server: llama.cpp llama.cpp/examples/grpc-server
|
||||
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
bash -c "source $(ONEAPI_VARS); \
|
||||
+bash -c "source $(ONEAPI_VARS); \
|
||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"
|
||||
else
|
||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
|
||||
+cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
|
||||
endif
|
||||
cp llama.cpp/build/bin/grpc-server .
|
||||
@@ -148,13 +148,13 @@ function startBackend() {
|
||||
ensureVenv
|
||||
|
||||
if [ ! -z ${BACKEND_FILE} ]; then
|
||||
python ${BACKEND_FILE} $@
|
||||
exec python ${BACKEND_FILE} $@
|
||||
elif [ -e "${MY_DIR}/server.py" ]; then
|
||||
python ${MY_DIR}/server.py $@
|
||||
exec python ${MY_DIR}/server.py $@
|
||||
elif [ -e "${MY_DIR}/backend.py" ]; then
|
||||
python ${MY_DIR}/backend.py $@
|
||||
exec python ${MY_DIR}/backend.py $@
|
||||
elif [ -e "${MY_DIR}/${BACKEND_NAME}.py" ]; then
|
||||
python ${MY_DIR}/${BACKEND_NAME}.py $@
|
||||
exec python ${MY_DIR}/${BACKEND_NAME}.py $@
|
||||
fi
|
||||
}
|
||||
|
||||
@@ -210,4 +210,4 @@ function checkTargets() {
|
||||
echo false
|
||||
}
|
||||
|
||||
init
|
||||
init
|
||||
|
||||
@@ -9,6 +9,7 @@ var CLI struct {
|
||||
cliContext.Context `embed:""`
|
||||
|
||||
Run RunCMD `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"`
|
||||
Federated FederatedCLI `cmd:"" help:"Run LocalAI in federated mode"`
|
||||
Models ModelsCMD `cmd:"" help:"Manage LocalAI models and definitions"`
|
||||
TTS TTSCMD `cmd:"" help:"Convert text to speech"`
|
||||
Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"`
|
||||
|
||||
130
core/cli/federated.go
Normal file
130
core/cli/federated.go
Normal file
@@ -0,0 +1,130 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"time"
|
||||
|
||||
"math/rand/v2"
|
||||
|
||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||
"github.com/mudler/LocalAI/core/p2p"
|
||||
"github.com/mudler/edgevpn/pkg/node"
|
||||
"github.com/mudler/edgevpn/pkg/protocol"
|
||||
"github.com/mudler/edgevpn/pkg/types"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type FederatedCLI struct {
|
||||
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
|
||||
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
|
||||
}
|
||||
|
||||
func (f *FederatedCLI) Run(ctx *cliContext.Context) error {
|
||||
|
||||
n, err := p2p.NewNode(f.Peer2PeerToken)
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating a new node: %w", err)
|
||||
}
|
||||
err = n.Start(context.Background())
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating a new node: %w", err)
|
||||
}
|
||||
|
||||
if err := p2p.ServiceDiscoverer(context.Background(), n, f.Peer2PeerToken, p2p.FederatedID, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return Proxy(context.Background(), n, f.Address, p2p.FederatedID)
|
||||
}
|
||||
|
||||
func Proxy(ctx context.Context, node *node.Node, listenAddr, service string) error {
|
||||
|
||||
log.Info().Msgf("Allocating service '%s' on: %s", service, listenAddr)
|
||||
// Open local port for listening
|
||||
l, err := net.Listen("tcp", listenAddr)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("Error listening")
|
||||
return err
|
||||
}
|
||||
// ll.Info("Binding local port on", srcaddr)
|
||||
|
||||
ledger, _ := node.Ledger()
|
||||
|
||||
// Announce ourselves so nodes accepts our connection
|
||||
ledger.Announce(
|
||||
ctx,
|
||||
10*time.Second,
|
||||
func() {
|
||||
// Retrieve current ID for ip in the blockchain
|
||||
//_, found := ledger.GetKey(protocol.UsersLedgerKey, node.Host().ID().String())
|
||||
// If mismatch, update the blockchain
|
||||
//if !found {
|
||||
updatedMap := map[string]interface{}{}
|
||||
updatedMap[node.Host().ID().String()] = &types.User{
|
||||
PeerID: node.Host().ID().String(),
|
||||
Timestamp: time.Now().String(),
|
||||
}
|
||||
ledger.Add(protocol.UsersLedgerKey, updatedMap)
|
||||
// }
|
||||
},
|
||||
)
|
||||
|
||||
defer l.Close()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return errors.New("context canceled")
|
||||
default:
|
||||
log.Debug().Msg("New for connection")
|
||||
// Listen for an incoming connection.
|
||||
conn, err := l.Accept()
|
||||
if err != nil {
|
||||
fmt.Println("Error accepting: ", err.Error())
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle connections in a new goroutine, forwarding to the p2p service
|
||||
go func() {
|
||||
var tunnelAddresses []string
|
||||
for _, v := range p2p.GetAvailableNodes(p2p.FederatedID) {
|
||||
if v.IsOnline() {
|
||||
tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
|
||||
} else {
|
||||
log.Info().Msgf("Node %s is offline", v.ID)
|
||||
}
|
||||
}
|
||||
|
||||
// open a TCP stream to one of the tunnels
|
||||
// chosen randomly
|
||||
// TODO: optimize this and track usage
|
||||
tunnelAddr := tunnelAddresses[rand.IntN(len(tunnelAddresses))]
|
||||
|
||||
tunnelConn, err := net.Dial("tcp", tunnelAddr)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("Error connecting to tunnel")
|
||||
return
|
||||
}
|
||||
|
||||
log.Info().Msgf("Redirecting %s to %s", conn.LocalAddr().String(), tunnelConn.RemoteAddr().String())
|
||||
closer := make(chan struct{}, 2)
|
||||
go copyStream(closer, tunnelConn, conn)
|
||||
go copyStream(closer, conn, tunnelConn)
|
||||
<-closer
|
||||
|
||||
tunnelConn.Close()
|
||||
conn.Close()
|
||||
// ll.Infof("(service %s) Done handling %s", serviceID, l.Addr().String())
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func copyStream(closer chan struct{}, dst io.Writer, src io.Reader) {
|
||||
defer func() { closer <- struct{}{} }() // connection is closed, send signal to stop proxy
|
||||
io.Copy(dst, src)
|
||||
}
|
||||
@@ -3,6 +3,8 @@ package cli
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -50,7 +52,7 @@ type RunCMD struct {
|
||||
DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
|
||||
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"api"`
|
||||
Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
|
||||
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
|
||||
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
|
||||
ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
|
||||
SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"`
|
||||
PreloadBackendOnly bool `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"`
|
||||
@@ -59,6 +61,7 @@ type RunCMD struct {
|
||||
WatchdogIdleTimeout string `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"`
|
||||
EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"`
|
||||
WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
|
||||
Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
|
||||
}
|
||||
|
||||
func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
@@ -91,9 +94,10 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
config.WithOpaqueErrors(r.OpaqueErrors),
|
||||
}
|
||||
|
||||
token := ""
|
||||
if r.Peer2Peer || r.Peer2PeerToken != "" {
|
||||
log.Info().Msg("P2P mode enabled")
|
||||
token := r.Peer2PeerToken
|
||||
token = r.Peer2PeerToken
|
||||
if token == "" {
|
||||
// IF no token is provided, and p2p is enabled,
|
||||
// we generate one and wait for the user to pick up the token (this is for interactive)
|
||||
@@ -104,14 +108,46 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
|
||||
log.Info().Msg("To use the token, you can run the following command in another node or terminal:")
|
||||
fmt.Printf("export TOKEN=\"%s\"\nlocal-ai worker p2p-llama-cpp-rpc\n", token)
|
||||
|
||||
// Ask for user confirmation
|
||||
log.Info().Msg("Press a button to proceed")
|
||||
var input string
|
||||
fmt.Scanln(&input)
|
||||
}
|
||||
opts = append(opts, config.WithP2PToken(token))
|
||||
|
||||
node, err := p2p.NewNode(token)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Info().Msg("Starting P2P server discovery...")
|
||||
if err := p2p.LLamaCPPRPCServerDiscoverer(context.Background(), token); err != nil {
|
||||
if err := p2p.ServiceDiscoverer(context.Background(), node, token, "", func() {
|
||||
var tunnelAddresses []string
|
||||
for _, v := range p2p.GetAvailableNodes("") {
|
||||
if v.IsOnline() {
|
||||
tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
|
||||
} else {
|
||||
log.Info().Msgf("Node %s is offline", v.ID)
|
||||
}
|
||||
}
|
||||
tunnelEnvVar := strings.Join(tunnelAddresses, ",")
|
||||
|
||||
os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
|
||||
log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if r.Federated {
|
||||
_, port, err := net.SplitHostPort(r.Address)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := p2p.ExposeService(context.Background(), "localhost", port, token, p2p.FederatedID); err != nil {
|
||||
return err
|
||||
}
|
||||
node, err := p2p.NewNode(token)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := p2p.ServiceDiscoverer(context.Background(), node, token, p2p.FederatedID, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ import (
|
||||
|
||||
type P2P struct {
|
||||
WorkerFlags `embed:""`
|
||||
Token string `env:"LOCALAI_TOKEN,TOKEN" help:"JSON list of galleries"`
|
||||
Token string `env:"LOCALAI_TOKEN,LOCALAI_P2P_TOKEN,TOKEN" help:"P2P token to use"`
|
||||
NoRunner bool `env:"LOCALAI_NO_RUNNER,NO_RUNNER" help:"Do not start the llama-cpp-rpc-server"`
|
||||
RunnerAddress string `env:"LOCALAI_RUNNER_ADDRESS,RUNNER_ADDRESS" help:"Address of the llama-cpp-rpc-server"`
|
||||
RunnerPort string `env:"LOCALAI_RUNNER_PORT,RUNNER_PORT" help:"Port of the llama-cpp-rpc-server"`
|
||||
@@ -59,7 +59,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
|
||||
p = r.RunnerPort
|
||||
}
|
||||
|
||||
err = p2p.BindLLamaCPPWorker(context.Background(), address, p, r.Token)
|
||||
err = p2p.ExposeService(context.Background(), address, p, r.Token, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -99,7 +99,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
|
||||
}
|
||||
}()
|
||||
|
||||
err = p2p.BindLLamaCPPWorker(context.Background(), address, fmt.Sprint(port), r.Token)
|
||||
err = p2p.ExposeService(context.Background(), address, fmt.Sprint(port), r.Token, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -32,6 +32,7 @@ type ApplicationConfig struct {
|
||||
CORSAllowOrigins string
|
||||
ApiKeys []string
|
||||
OpaqueErrors bool
|
||||
P2PToken string
|
||||
|
||||
ModelLibraryURL string
|
||||
|
||||
@@ -95,6 +96,12 @@ func WithCsrf(b bool) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
func WithP2PToken(s string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.P2PToken = s
|
||||
}
|
||||
}
|
||||
|
||||
func WithModelLibraryURL(url string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.ModelLibraryURL = url
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"github.com/chasefleming/elem-go"
|
||||
"github.com/chasefleming/elem-go/attrs"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/core/p2p"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/mudler/LocalAI/pkg/xsync"
|
||||
)
|
||||
@@ -15,6 +16,14 @@ const (
|
||||
noImage = "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg"
|
||||
)
|
||||
|
||||
func renderElements(n []elem.Node) string {
|
||||
render := ""
|
||||
for _, r := range n {
|
||||
render += r.Render()
|
||||
}
|
||||
return render
|
||||
}
|
||||
|
||||
func DoneProgress(galleryID, text string, showDelete bool) string {
|
||||
var modelName = galleryID
|
||||
// Split by @ and grab the name
|
||||
@@ -72,6 +81,135 @@ func ProgressBar(progress string) string {
|
||||
).Render()
|
||||
}
|
||||
|
||||
func P2PNodeStats(nodes []p2p.NodeData) string {
|
||||
/*
|
||||
<div class="bg-gray-800 p-6 rounded-lg shadow-lg text-left">
|
||||
<p class="text-xl font-semibold text-gray-200">Total Workers Detected: {{ len .Nodes }}</p>
|
||||
{{ $online := 0 }}
|
||||
{{ range .Nodes }}
|
||||
{{ if .IsOnline }}
|
||||
{{ $online = add $online 1 }}
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
<p class="text-xl font-semibold text-gray-200">Total Online Workers: {{$online}}</p>
|
||||
</div>
|
||||
*/
|
||||
|
||||
online := 0
|
||||
for _, n := range nodes {
|
||||
if n.IsOnline() {
|
||||
online++
|
||||
}
|
||||
}
|
||||
|
||||
class := "text-green-500"
|
||||
if online == 0 {
|
||||
class = "text-red-500"
|
||||
}
|
||||
/*
|
||||
<i class="fas fa-circle animate-pulse text-green-500 ml-2 mr-1"></i>
|
||||
*/
|
||||
circle := elem.I(attrs.Props{
|
||||
"class": "fas fa-circle animate-pulse " + class + " ml-2 mr-1",
|
||||
})
|
||||
nodesElements := []elem.Node{
|
||||
elem.Span(
|
||||
attrs.Props{
|
||||
"class": class,
|
||||
},
|
||||
circle,
|
||||
elem.Text(fmt.Sprintf("%d", online)),
|
||||
),
|
||||
elem.Span(
|
||||
attrs.Props{
|
||||
"class": "text-gray-200",
|
||||
},
|
||||
elem.Text(fmt.Sprintf("/%d", len(nodes))),
|
||||
),
|
||||
}
|
||||
|
||||
return renderElements(nodesElements)
|
||||
}
|
||||
|
||||
func P2PNodeBoxes(nodes []p2p.NodeData) string {
|
||||
/*
|
||||
<div class="bg-gray-800 p-4 rounded-lg shadow-lg text-left">
|
||||
<div class="flex items-center mb-2">
|
||||
<i class="fas fa-desktop text-gray-400 mr-2"></i>
|
||||
<span class="text-gray-200 font-semibold">{{.ID}}</span>
|
||||
</div>
|
||||
<p class="text-sm text-gray-400 mt-2 flex items-center">
|
||||
Status:
|
||||
<i class="fas fa-circle {{ if .IsOnline }}text-green-500{{ else }}text-red-500{{ end }} ml-2 mr-1"></i>
|
||||
<span class="{{ if .IsOnline }}text-green-400{{ else }}text-red-400{{ end }}">
|
||||
{{ if .IsOnline }}Online{{ else }}Offline{{ end }}
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
*/
|
||||
|
||||
nodesElements := []elem.Node{}
|
||||
|
||||
for _, n := range nodes {
|
||||
|
||||
nodesElements = append(nodesElements,
|
||||
elem.Div(
|
||||
attrs.Props{
|
||||
"class": "bg-gray-700 p-6 rounded-lg shadow-lg text-left",
|
||||
},
|
||||
elem.P(
|
||||
attrs.Props{
|
||||
"class": "text-sm text-gray-400 mt-2 flex",
|
||||
},
|
||||
elem.I(
|
||||
attrs.Props{
|
||||
"class": "fas fa-desktop text-gray-400 mr-2",
|
||||
},
|
||||
),
|
||||
elem.Text("Name: "),
|
||||
elem.Span(
|
||||
attrs.Props{
|
||||
"class": "text-gray-200 font-semibold ml-2 mr-1",
|
||||
},
|
||||
elem.Text(n.ID),
|
||||
),
|
||||
elem.Text("Status: "),
|
||||
elem.If(
|
||||
n.IsOnline(),
|
||||
elem.I(
|
||||
attrs.Props{
|
||||
"class": "fas fa-circle animate-pulse text-green-500 ml-2 mr-1",
|
||||
},
|
||||
),
|
||||
elem.I(
|
||||
attrs.Props{
|
||||
"class": "fas fa-circle animate-pulse text-red-500 ml-2 mr-1",
|
||||
},
|
||||
),
|
||||
),
|
||||
elem.If(
|
||||
n.IsOnline(),
|
||||
elem.Span(
|
||||
attrs.Props{
|
||||
"class": "text-green-400",
|
||||
},
|
||||
|
||||
elem.Text("Online"),
|
||||
),
|
||||
elem.Span(
|
||||
attrs.Props{
|
||||
"class": "text-red-400",
|
||||
},
|
||||
elem.Text("Offline"),
|
||||
),
|
||||
),
|
||||
),
|
||||
))
|
||||
}
|
||||
|
||||
return renderElements(nodesElements)
|
||||
}
|
||||
|
||||
func StartProgressBar(uid, progress, text string) string {
|
||||
if progress == "" {
|
||||
progress = "0"
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/core/p2p"
|
||||
"github.com/mudler/LocalAI/internal"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
@@ -33,6 +34,7 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
|
||||
"Models": models,
|
||||
"ModelsConfig": backendConfigs,
|
||||
"GalleryConfig": galleryConfigs,
|
||||
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
||||
"ApplicationConfig": appConfig,
|
||||
"ProcessingModels": processingModels,
|
||||
"TaskTypes": taskTypes,
|
||||
|
||||
@@ -123,7 +123,10 @@ func getFileFromRequest(c *fiber.Ctx) (*File, error) {
|
||||
return nil, fmt.Errorf("unable to find file id %s", id)
|
||||
}
|
||||
|
||||
// GetFilesEndpoint https://platform.openai.com/docs/api-reference/files/retrieve
|
||||
// GetFilesEndpoint is the OpenAI API endpoint to get files https://platform.openai.com/docs/api-reference/files/retrieve
|
||||
// @Summary Returns information about a specific file.
|
||||
// @Success 200 {object} File "Response"
|
||||
// @Router /v1/files/{file_id} [get]
|
||||
func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
file, err := getFileFromRequest(c)
|
||||
@@ -135,13 +138,17 @@ func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Applicat
|
||||
}
|
||||
}
|
||||
|
||||
// DeleteFilesEndpoint https://platform.openai.com/docs/api-reference/files/delete
|
||||
type DeleteStatus struct {
|
||||
Id string
|
||||
Object string
|
||||
Deleted bool
|
||||
}
|
||||
|
||||
// DeleteFilesEndpoint is the OpenAI API endpoint to delete files https://platform.openai.com/docs/api-reference/files/delete
|
||||
// @Summary Delete a file.
|
||||
// @Success 200 {object} DeleteStatus "Response"
|
||||
// @Router /v1/files/{file_id} [delete]
|
||||
func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
type DeleteStatus struct {
|
||||
Id string
|
||||
Object string
|
||||
Deleted bool
|
||||
}
|
||||
|
||||
return func(c *fiber.Ctx) error {
|
||||
file, err := getFileFromRequest(c)
|
||||
@@ -174,7 +181,11 @@ func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Appli
|
||||
}
|
||||
}
|
||||
|
||||
// GetFilesContentsEndpoint https://platform.openai.com/docs/api-reference/files/retrieve-contents
|
||||
// GetFilesContentsEndpoint is the OpenAI API endpoint to get files content https://platform.openai.com/docs/api-reference/files/retrieve-contents
|
||||
// @Summary Returns information about a specific file.
|
||||
// @Success 200 {string} binary "file"
|
||||
// @Router /v1/files/{file_id}/content [get]
|
||||
// GetFilesContentsEndpoint
|
||||
func GetFilesContentsEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
file, err := getFileFromRequest(c)
|
||||
|
||||
@@ -6,6 +6,10 @@ import (
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
)
|
||||
|
||||
// ListModelsEndpoint is the OpenAI Models API endpoint https://platform.openai.com/docs/api-reference/models
|
||||
// @Summary List and describe the various models available in the API.
|
||||
// @Success 200 {object} schema.ModelsDataResponse "Response"
|
||||
// @Router /v1/models [get]
|
||||
func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
// If blank, no filter is applied.
|
||||
@@ -18,10 +22,7 @@ func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) er
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return c.JSON(struct {
|
||||
Object string `json:"object"`
|
||||
Data []schema.OpenAIModel `json:"data"`
|
||||
}{
|
||||
return c.JSON(schema.ModelsDataResponse{
|
||||
Object: "list",
|
||||
Data: dataModels,
|
||||
})
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"github.com/gofiber/swagger"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/localai"
|
||||
"github.com/mudler/LocalAI/core/p2p"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/mudler/LocalAI/internal"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
@@ -56,6 +57,20 @@ func RegisterLocalAIRoutes(app *fiber.App,
|
||||
app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitorService))
|
||||
app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitorService))
|
||||
|
||||
// p2p
|
||||
if p2p.IsP2PEnabled() {
|
||||
app.Get("/api/p2p", auth, func(c *fiber.Ctx) error {
|
||||
// Render index
|
||||
return c.JSON(map[string]interface{}{
|
||||
"Nodes": p2p.GetAvailableNodes(""),
|
||||
"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),
|
||||
})
|
||||
})
|
||||
app.Get("/api/p2p/token", auth, func(c *fiber.Ctx) error {
|
||||
return c.Send([]byte(appConfig.P2PToken))
|
||||
})
|
||||
}
|
||||
|
||||
app.Get("/version", auth, func(c *fiber.Ctx) error {
|
||||
return c.JSON(struct {
|
||||
Version string `json:"version"`
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/core/http/elements"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/localai"
|
||||
"github.com/mudler/LocalAI/core/p2p"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/mudler/LocalAI/internal"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
@@ -53,6 +54,37 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
|
||||
app.Get("/", auth, localai.WelcomeEndpoint(appConfig, cl, ml, modelStatus))
|
||||
|
||||
if p2p.IsP2PEnabled() {
|
||||
app.Get("/p2p", auth, func(c *fiber.Ctx) error {
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - P2P dashboard",
|
||||
"Version": internal.PrintableVersion(),
|
||||
//"Nodes": p2p.GetAvailableNodes(""),
|
||||
//"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),
|
||||
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
||||
"P2PToken": appConfig.P2PToken,
|
||||
}
|
||||
|
||||
// Render index
|
||||
return c.Render("views/p2p", summary)
|
||||
})
|
||||
|
||||
/* show nodes live! */
|
||||
app.Get("/p2p/ui/workers", auth, func(c *fiber.Ctx) error {
|
||||
return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes("")))
|
||||
})
|
||||
app.Get("/p2p/ui/workers-federation", auth, func(c *fiber.Ctx) error {
|
||||
return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.FederatedID)))
|
||||
})
|
||||
|
||||
app.Get("/p2p/ui/workers-stats", auth, func(c *fiber.Ctx) error {
|
||||
return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes("")))
|
||||
})
|
||||
app.Get("/p2p/ui/workers-federation-stats", auth, func(c *fiber.Ctx) error {
|
||||
return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.FederatedID)))
|
||||
})
|
||||
}
|
||||
|
||||
// Show the Models page (all models)
|
||||
app.Get("/browse", auth, func(c *fiber.Ctx) error {
|
||||
term := c.Query("term")
|
||||
@@ -87,7 +119,9 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
"AllTags": tags,
|
||||
"ProcessingModels": processingModelsData,
|
||||
"AvailableModels": len(models),
|
||||
"TaskTypes": taskTypes,
|
||||
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
||||
|
||||
"TaskTypes": taskTypes,
|
||||
// "ApplicationConfig": appConfig,
|
||||
}
|
||||
|
||||
@@ -243,6 +277,7 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": c.Params("model"),
|
||||
"Version": internal.PrintableVersion(),
|
||||
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
||||
}
|
||||
|
||||
// Render index
|
||||
@@ -261,6 +296,7 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
"Title": "LocalAI - Talk",
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": backendConfigs[0].ID,
|
||||
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
||||
"Version": internal.PrintableVersion(),
|
||||
}
|
||||
|
||||
@@ -282,6 +318,7 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": backendConfigs[0].ID,
|
||||
"Version": internal.PrintableVersion(),
|
||||
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
||||
}
|
||||
|
||||
// Render index
|
||||
@@ -296,6 +333,7 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": c.Params("model"),
|
||||
"Version": internal.PrintableVersion(),
|
||||
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
||||
}
|
||||
|
||||
// Render index
|
||||
@@ -316,6 +354,7 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": backendConfigs[0].Name,
|
||||
"Version": internal.PrintableVersion(),
|
||||
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
||||
}
|
||||
|
||||
// Render index
|
||||
@@ -330,6 +369,7 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": c.Params("model"),
|
||||
"Version": internal.PrintableVersion(),
|
||||
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
||||
}
|
||||
|
||||
// Render index
|
||||
@@ -349,6 +389,7 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
"Title": "LocalAI - Generate audio with " + backendConfigs[0].Name,
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": backendConfigs[0].Name,
|
||||
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
||||
"Version": internal.PrintableVersion(),
|
||||
}
|
||||
|
||||
|
||||
29
core/http/static/assets/tw-elements.js
Normal file
29
core/http/static/assets/tw-elements.js
Normal file
File diff suppressed because one or more lines are too long
@@ -81,10 +81,10 @@ ul {
|
||||
li {
|
||||
font-size: 0.875rem; /* Small text size */
|
||||
color: #4a5568; /* Dark gray text */
|
||||
background-color: #f7fafc; /* Very light gray background */
|
||||
/* background-color: #f7fafc; Very light gray background */
|
||||
border-radius: 0.375rem; /* Rounded corners */
|
||||
padding: 0.5rem; /* Padding inside each list item */
|
||||
box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1), 0 1px 2px 0 rgba(0, 0, 0, 0.06); /* Subtle shadow */
|
||||
/*box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1), 0 1px 2px 0 rgba(0, 0, 0, 0.06); Subtle shadow */
|
||||
margin-bottom: 0.5rem; /* Vertical space between list items */
|
||||
}
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ SOFTWARE.
|
||||
<body class="bg-gray-900 text-gray-200" x-data="{ key: $store.chat.key }">
|
||||
<div class="flex flex-col min-h-screen">
|
||||
|
||||
{{template "views/partials/navbar"}}
|
||||
{{template "views/partials/navbar" .}}
|
||||
<div class="chat-container mt-2 mr-2 ml-2 mb-2 bg-gray-800 shadow-lg rounded-lg" >
|
||||
<!-- Chat Header -->
|
||||
<div class="border-b border-gray-700 p-4" x-data="{ component: 'menu' }">
|
||||
|
||||
150
core/http/views/p2p.html
Normal file
150
core/http/views/p2p.html
Normal file
@@ -0,0 +1,150 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
{{template "views/partials/head" .}}
|
||||
|
||||
<body class="bg-gray-900 text-gray-200">
|
||||
<div class="flex flex-col min-h-screen">
|
||||
|
||||
{{template "views/partials/navbar" .}}
|
||||
<div class="container mx-auto px-4 flex-grow">
|
||||
<div class="workers mt-12 text-center">
|
||||
|
||||
<h2 class="text-3xl font-semibold text-gray-100 mb-8">
|
||||
<i class="fa-solid fa-circle-nodes"></i> Distributed inference with P2P
|
||||
<a href="https://localai.io/features/distribute/" target="_blank">
|
||||
<i class="fas fa-circle-info pr-2"></i>
|
||||
</a>
|
||||
</h2>
|
||||
<h5 class="mb-4 text-justify">LocalAI uses P2P technologies to enable distribution of work between peers. It is possible to share an instance with Federation and/or split the weights of a model across peers (only available with llama.cpp models). You can now share computational resources between your devices or your friends!</h5>
|
||||
|
||||
<!-- Federation Box -->
|
||||
<div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left">
|
||||
|
||||
<p class="text-xl font-semibold text-gray-200"> <i class="text-gray-200 fa-solid fa-circle-nodes"></i> Federated Nodes: <span hx-get="/p2p/ui/workers-federation-stats" hx-trigger="every 1s"></span> </p>
|
||||
<p class="mb-4">You can start LocalAI in federated mode to share your instance, or start the federated server to balance requests between nodes of the federation.</p>
|
||||
|
||||
<div class="grid grid-cols-1 sm:grid-cols-2 md:grid-cols-3 gap-4 mb-12">
|
||||
<div hx-get="/p2p/ui/workers-federation" hx-trigger="every 1s"></div>
|
||||
</div>
|
||||
|
||||
<hr class="border-gray-700 mb-12">
|
||||
|
||||
<h3 class="text-2xl font-semibold text-gray-100 mb-6"><i class="fa-solid fa-book"></i> Start a federated instance</h3>
|
||||
|
||||
|
||||
<!-- Tabs navigation -->
|
||||
<ul class="mb-5 flex list-none flex-row flex-wrap ps-0" role="tablist" data-twe-nav-ref>
|
||||
<li role="presentation" class="flex-auto text-center">
|
||||
<a href="#tabs-federated-cli" class="tablink my-2 block border-0 bg-gray-800 px-7 pb-3.5 pt-4 text-xs font-medium uppercase leading-tight text-white hover:bg-gray-700 focus:bg-gray-700 data-[twe-nav-active]:border-yellow-500 data-[twe-nav-active]:text-yellow-500 data-[twe-nav-active]:bg-gray-700 active" data-twe-toggle="pill" data-twe-target="#tabs-federated-cli" data-twe-nav-active role="tab" aria-controls="tabs-federated-cli" aria-selected="true"><i class="fa-solid fa-terminal"></i> CLI</a>
|
||||
</li>
|
||||
<li role="presentation" class="flex-auto text-center">
|
||||
<a href="#tabs-federated-docker" class="tablink my-2 block border-0 bg-gray-800 px-7 pb-3.5 pt-4 text-xs font-medium uppercase leading-tight text-white hover:bg-gray-700 focus:bg-gray-700 data-[twe-nav-active]:border-yellow-500 data-[twe-nav-active]:text-yellow-500 data-[twe-nav-active]:bg-gray-700" data-twe-toggle="pill" data-twe-target="#tabs-federated-docker" role="tab" aria-controls="tabs-federated-docker" aria-selected="false"><i class="fa-solid fa-box-open"></i> Container images</a>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<!-- Tabs content -->
|
||||
<div class="mb-6">
|
||||
|
||||
<div class="tabcontent hidden opacity-100 transition-opacity duration-150 ease-linear data-[twe-tab-active]:block p-4" id="tabs-federated-cli" role="tabpanel" aria-labelledby="tabs-federated-cli" data-twe-tab-active>
|
||||
|
||||
|
||||
<p class="mb-2">To start a new instance to share:</p>
|
||||
<code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
|
||||
# Start a new instance to share with --federated and a TOKEN<br>
|
||||
export TOKEN="<span class="token">{{.P2PToken}}</span>"<br>
|
||||
local-ai run --federated --p2p
|
||||
</code>
|
||||
|
||||
<p class="mt-2">Note: If you don't have a token do not specify it and use the generated one that you can find in this page.</p>
|
||||
|
||||
<p class="mb-2">To start a new federated load balancer:</p>
|
||||
<code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
|
||||
export TOKEN="<span class="token">{{.P2PToken}}</span>"<br>
|
||||
local-ai federated
|
||||
</code>
|
||||
|
||||
<p class="mt-2">Note: Token is needed when starting the federated server.</p>
|
||||
|
||||
<p class="mt-2">For all the options available, please refer to the <a href="https://localai.io/features/distribute/#starting-workers" target="_blank" class="text-yellow-300 hover:text-yellow-400">documentation</a>.</p>
|
||||
</div>
|
||||
<div class="tabcontent hidden opacity-0 transition-opacity duration-150 ease-linear data-[twe-tab-active]:block p-4" id="tabs-federated-docker" role="tabpanel" aria-labelledby="tabs-federated-docker">
|
||||
<p class="mb-2">To start a new federated instance:</p>
|
||||
<code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
|
||||
docker run -ti --net host -e TOKEN="<span class="token">{{.P2PToken}}</span>" --name local-ai -p 8080:8080 localai/localai:latest-cpu run --federated --p2p
|
||||
</code>
|
||||
|
||||
<p class="mb-2">To start a new federated server (port to 9090):</p>
|
||||
<code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
|
||||
docker run -ti --net host -e TOKEN="<span class="token">{{.P2PToken}}</span>" --name local-ai -p 9090:8080 localai/localai:latest-cpu federated
|
||||
</code>
|
||||
|
||||
<p class="mt-2">For all the options available and see what image to use, please refer to the <a href="https://localai.io/basics/container/" target="_blank" class="text-yellow-300 hover:text-yellow-400">Container images documentation</a> and <a href="https://localai.io/advanced/#cli-parameters" target="_blank" class="text-yellow-300 hover:text-yellow-400">CLI parameters documentation</a>.</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Llama.cpp Box -->
|
||||
|
||||
<div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left">
|
||||
|
||||
<p class="text-xl font-semibold text-gray-200"> <i class="text-gray-200 fa-solid fa-circle-nodes"></i> Workers (llama.cpp): <span hx-get="/p2p/ui/workers-stats" hx-trigger="every 1s"></span> </p>
|
||||
<p class="mb-4">You can start llama.cpp workers to distribute weights between the workers and offload part of the computation. To start a new worker, you can use the CLI or Docker.</p>
|
||||
|
||||
<div class="grid grid-cols-1 sm:grid-cols-2 md:grid-cols-3 gap-4 mb-12">
|
||||
<div hx-get="/p2p/ui/workers" hx-trigger="every 1s"></div>
|
||||
</div>
|
||||
<hr class="border-gray-700 mb-12">
|
||||
|
||||
<h3 class="text-2xl font-semibold text-gray-100 mb-6"><i class="fa-solid fa-book"></i> Start a new llama.cpp P2P worker</h3>
|
||||
|
||||
<!-- Tabs navigation -->
|
||||
<ul class="mb-5 flex list-none flex-row flex-wrap ps-0" role="tablist" data-twe-nav-ref>
|
||||
<li role="presentation" class="flex-auto text-center">
|
||||
<a href="#tabs-cli" class="tablink my-2 block border-0 bg-gray-800 px-7 pb-3.5 pt-4 text-xs font-medium uppercase leading-tight text-white hover:bg-gray-700 focus:bg-gray-700 data-[twe-nav-active]:border-yellow-500 data-[twe-nav-active]:text-yellow-500 data-[twe-nav-active]:bg-gray-700 active" data-twe-toggle="pill" data-twe-target="#tabs-cli" data-twe-nav-active role="tab" aria-controls="tabs-cli" aria-selected="true"><i class="fa-solid fa-terminal"></i> CLI</a>
|
||||
</li>
|
||||
<li role="presentation" class="flex-auto text-center">
|
||||
<a href="#tabs-docker" class="tablink my-2 block border-0 bg-gray-800 px-7 pb-3.5 pt-4 text-xs font-medium uppercase leading-tight text-white hover:bg-gray-700 focus:bg-gray-700 data-[twe-nav-active]:border-yellow-500 data-[twe-nav-active]:text-yellow-500 data-[twe-nav-active]:bg-gray-700" data-twe-toggle="pill" data-twe-target="#tabs-docker" role="tab" aria-controls="tabs-docker" aria-selected="false"><i class="fa-solid fa-box-open"></i> Container images</a>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<!-- Tabs content -->
|
||||
<div class="mb-6">
|
||||
<div class="tabcontent hidden opacity-100 transition-opacity duration-150 ease-linear data-[twe-tab-active]:block p-4" id="tabs-cli" role="tabpanel" aria-labelledby="tabs-cli" data-twe-tab-active>
|
||||
<p class="mb-2">To start a new worker, run the following command:</p>
|
||||
<code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
|
||||
export TOKEN="<span class="token">{{.P2PToken}}</span>"<br>
|
||||
local-ai worker p2p-llama-cpp-rpc
|
||||
</code>
|
||||
|
||||
<p class="mt-2">For all the options available, please refer to the <a href="https://localai.io/features/distribute/#starting-workers" target="_blank" class="text-yellow-300 hover:text-yellow-400">documentation</a>.</p>
|
||||
</div>
|
||||
<div class="tabcontent hidden opacity-0 transition-opacity duration-150 ease-linear data-[twe-tab-active]:block p-4" id="tabs-docker" role="tabpanel" aria-labelledby="tabs-docker">
|
||||
<p class="mb-2">To start a new worker with docker, run the following command:</p>
|
||||
<code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
|
||||
docker run -ti --net host -e TOKEN="<span class="token">{{.P2PToken}}</span>" --name local-ai -p 8080:8080 localai/localai:latest-cpu worker p2p-llama-cpp-rpc
|
||||
</code>
|
||||
|
||||
<p class="mt-2">For all the options available and see what image to use, please refer to the <a href="https://localai.io/basics/container/" target="_blank" class="text-yellow-300 hover:text-yellow-400">Container images documentation</a> and <a href="https://localai.io/advanced/#cli-parameters" target="_blank" class="text-yellow-300 hover:text-yellow-400">CLI parameters documentation</a>.</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<!-- Llama.cpp Box END -->
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{{template "views/partials/footer" .}}
|
||||
</div>
|
||||
|
||||
<style>
|
||||
.token {
|
||||
word-break: break-all;
|
||||
}
|
||||
.workers .grid div {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
justify-content: space-between;
|
||||
}
|
||||
</style>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,4 +1,5 @@
|
||||
<footer class="text-center py-8">
|
||||
LocalAI Version {{.Version}}<br>
|
||||
<a href='https://localai.io' class="text-blue-400 hover:text-blue-600" target="_blank">LocalAI</a> © 2023-2024 <a href='https://mudler.pm' class="text-blue-400 hover:text-blue-600" target="_blank">Ettore Di Giacinto</a>
|
||||
</footer>
|
||||
</footer>
|
||||
<script src="/static/assets/tw-elements.js"></script>
|
||||
|
||||
@@ -21,6 +21,9 @@
|
||||
<a href="/text2image/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-image pr-2"></i> Generate images</a>
|
||||
<a href="/tts/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-music pr-2"></i> TTS </a>
|
||||
<a href="/talk/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
|
||||
{{ if .IsP2PEnabled }}
|
||||
<a href="/p2p/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-circle-nodes"></i> Swarm </a>
|
||||
{{ end }}
|
||||
<a href="/swagger/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-code pr-2"></i> API</a>
|
||||
</div>
|
||||
</div>
|
||||
@@ -34,6 +37,9 @@
|
||||
<a href="/text2image/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-image pr-2"></i> Generate images</a>
|
||||
<a href="/tts/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-music pr-2"></i> TTS </a>
|
||||
<a href="/talk/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
|
||||
{{ if .IsP2PEnabled }}
|
||||
<a href="/p2p/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-circle-nodes"></i> Swarm </a>
|
||||
{{ end }}
|
||||
<a href="/swagger/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-code pr-2"></i> API</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
<body class="bg-gray-900 text-gray-200" x-data="{ key: $store.chat.key }">
|
||||
<div class="flex flex-col min-h-screen">
|
||||
|
||||
{{template "views/partials/navbar"}}
|
||||
{{template "views/partials/navbar" .}}
|
||||
<div class="chat-container mt-2 mr-2 ml-2 mb-2 bg-gray-800 shadow-lg rounded-lg " >
|
||||
<!-- Chat Header -->
|
||||
<div class="border-b border-gray-700 p-4" x-data="{ component: 'menu' }">
|
||||
|
||||
50
core/p2p/node.go
Normal file
50
core/p2p/node.go
Normal file
@@ -0,0 +1,50 @@
|
||||
package p2p
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
const defaultServicesID = "services_localai"
|
||||
const FederatedID = "federated"
|
||||
|
||||
type NodeData struct {
|
||||
Name string
|
||||
ID string
|
||||
TunnelAddress string
|
||||
LastSeen time.Time
|
||||
}
|
||||
|
||||
func (d NodeData) IsOnline() bool {
|
||||
now := time.Now()
|
||||
// if the node was seen in the last 40 seconds, it's online
|
||||
return now.Sub(d.LastSeen) < 40*time.Second
|
||||
}
|
||||
|
||||
var mu sync.Mutex
|
||||
var nodes = map[string]map[string]NodeData{}
|
||||
|
||||
func GetAvailableNodes(serviceID string) []NodeData {
|
||||
if serviceID == "" {
|
||||
serviceID = defaultServicesID
|
||||
}
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
var availableNodes = []NodeData{}
|
||||
for _, v := range nodes[serviceID] {
|
||||
availableNodes = append(availableNodes, v)
|
||||
}
|
||||
return availableNodes
|
||||
}
|
||||
|
||||
func AddNode(serviceID string, node NodeData) {
|
||||
if serviceID == "" {
|
||||
serviceID = defaultServicesID
|
||||
}
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
if nodes[serviceID] == nil {
|
||||
nodes[serviceID] = map[string]NodeData{}
|
||||
}
|
||||
nodes[serviceID][node.ID] = node
|
||||
}
|
||||
196
core/p2p/p2p.go
196
core/p2p/p2p.go
@@ -10,19 +10,18 @@ import (
|
||||
"io"
|
||||
"net"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ipfs/go-log"
|
||||
"github.com/libp2p/go-libp2p/core/peer"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
"github.com/mudler/edgevpn/pkg/config"
|
||||
"github.com/mudler/edgevpn/pkg/node"
|
||||
"github.com/mudler/edgevpn/pkg/protocol"
|
||||
"github.com/mudler/edgevpn/pkg/services"
|
||||
"github.com/mudler/edgevpn/pkg/types"
|
||||
"github.com/phayes/freeport"
|
||||
|
||||
"github.com/ipfs/go-log"
|
||||
"github.com/mudler/edgevpn/pkg/config"
|
||||
"github.com/mudler/edgevpn/pkg/services"
|
||||
zlog "github.com/rs/zerolog/log"
|
||||
|
||||
"github.com/mudler/edgevpn/pkg/logger"
|
||||
@@ -34,6 +33,15 @@ func GenerateToken() string {
|
||||
return newData.Base64()
|
||||
}
|
||||
|
||||
func IsP2PEnabled() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func nodeID(s string) string {
|
||||
hostname, _ := os.Hostname()
|
||||
return fmt.Sprintf("%s-%s", hostname, s)
|
||||
}
|
||||
|
||||
func allocateLocalService(ctx context.Context, node *node.Node, listenAddr, service string) error {
|
||||
|
||||
zlog.Info().Msgf("Allocating service '%s' on: %s", service, listenAddr)
|
||||
@@ -53,16 +61,16 @@ func allocateLocalService(ctx context.Context, node *node.Node, listenAddr, serv
|
||||
10*time.Second,
|
||||
func() {
|
||||
// Retrieve current ID for ip in the blockchain
|
||||
_, found := ledger.GetKey(protocol.UsersLedgerKey, node.Host().ID().String())
|
||||
//_, found := ledger.GetKey(protocol.UsersLedgerKey, node.Host().ID().String())
|
||||
// If mismatch, update the blockchain
|
||||
if !found {
|
||||
updatedMap := map[string]interface{}{}
|
||||
updatedMap[node.Host().ID().String()] = &types.User{
|
||||
PeerID: node.Host().ID().String(),
|
||||
Timestamp: time.Now().String(),
|
||||
}
|
||||
ledger.Add(protocol.UsersLedgerKey, updatedMap)
|
||||
//if !found {
|
||||
updatedMap := map[string]interface{}{}
|
||||
updatedMap[node.Host().ID().String()] = &types.User{
|
||||
PeerID: node.Host().ID().String(),
|
||||
Timestamp: time.Now().String(),
|
||||
}
|
||||
ledger.Add(protocol.UsersLedgerKey, updatedMap)
|
||||
// }
|
||||
},
|
||||
)
|
||||
|
||||
@@ -80,7 +88,6 @@ func allocateLocalService(ctx context.Context, node *node.Node, listenAddr, serv
|
||||
continue
|
||||
}
|
||||
|
||||
// ll.Info("New connection from", l.Addr().String())
|
||||
// Handle connections in a new goroutine, forwarding to the p2p service
|
||||
go func() {
|
||||
// Retrieve current ID for ip in the blockchain
|
||||
@@ -137,24 +144,30 @@ func copyStream(closer chan struct{}, dst io.Writer, src io.Reader) {
|
||||
|
||||
// This is the main of the server (which keeps the env variable updated)
|
||||
// This starts a goroutine that keeps LLAMACPP_GRPC_SERVERS updated with the discovered services
|
||||
func LLamaCPPRPCServerDiscoverer(ctx context.Context, token string) error {
|
||||
tunnels, err := discoveryTunnels(ctx, token)
|
||||
func ServiceDiscoverer(ctx context.Context, n *node.Node, token, servicesID string, discoveryFunc func()) error {
|
||||
if servicesID == "" {
|
||||
servicesID = defaultServicesID
|
||||
}
|
||||
tunnels, err := discoveryTunnels(ctx, n, token, servicesID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// TODO: discoveryTunnels should return all the nodes that are available?
|
||||
// In this way we updated availableNodes here instead of appending
|
||||
// e.g. we have a LastSeen field in NodeData that is updated in discoveryTunnels
|
||||
// each time the node is seen
|
||||
// In this case the below function should be idempotent and just keep track of the nodes
|
||||
go func() {
|
||||
totalTunnels := []string{}
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
zlog.Error().Msg("Discoverer stopped")
|
||||
return
|
||||
case tunnel := <-tunnels:
|
||||
|
||||
totalTunnels = append(totalTunnels, tunnel)
|
||||
os.Setenv("LLAMACPP_GRPC_SERVERS", strings.Join(totalTunnels, ","))
|
||||
zlog.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", strings.Join(totalTunnels, ","))
|
||||
AddNode(servicesID, tunnel)
|
||||
if discoveryFunc != nil {
|
||||
discoveryFunc()
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
@@ -162,19 +175,10 @@ func LLamaCPPRPCServerDiscoverer(ctx context.Context, token string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func discoveryTunnels(ctx context.Context, token string) (chan string, error) {
|
||||
tunnels := make(chan string)
|
||||
func discoveryTunnels(ctx context.Context, n *node.Node, token, servicesID string) (chan NodeData, error) {
|
||||
tunnels := make(chan NodeData)
|
||||
|
||||
nodeOpts, err := newNodeOpts(token)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
n, err := node.New(nodeOpts...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating a new node: %w", err)
|
||||
}
|
||||
err = n.Start(ctx)
|
||||
err := n.Start(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating a new node: %w", err)
|
||||
}
|
||||
@@ -184,8 +188,14 @@ func discoveryTunnels(ctx context.Context, token string) (chan string, error) {
|
||||
}
|
||||
|
||||
// get new services, allocate and return to the channel
|
||||
|
||||
// TODO:
|
||||
// a function ensureServices that:
|
||||
// - starts a service if not started, if the worker is Online
|
||||
// - checks that workers are Online, if not cancel the context of allocateLocalService
|
||||
// - discoveryTunnels should return all the nodes and addresses associated with it
|
||||
// - the caller should take now care of the fact that we are always returning fresh informations
|
||||
go func() {
|
||||
emitted := map[string]bool{}
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
@@ -195,20 +205,20 @@ func discoveryTunnels(ctx context.Context, token string) (chan string, error) {
|
||||
time.Sleep(5 * time.Second)
|
||||
zlog.Debug().Msg("Searching for workers")
|
||||
|
||||
data := ledger.LastBlock().Storage["services_localai"]
|
||||
for k := range data {
|
||||
data := ledger.LastBlock().Storage[servicesID]
|
||||
for k, v := range data {
|
||||
zlog.Info().Msgf("Found worker %s", k)
|
||||
if _, found := emitted[k]; !found {
|
||||
emitted[k] = true
|
||||
//discoveredPeers <- k
|
||||
port, err := freeport.GetFreePort()
|
||||
if err != nil {
|
||||
fmt.Print(err)
|
||||
}
|
||||
tunnelAddress := fmt.Sprintf("127.0.0.1:%d", port)
|
||||
go allocateLocalService(ctx, n, tunnelAddress, k)
|
||||
tunnels <- tunnelAddress
|
||||
nd := &NodeData{}
|
||||
if err := v.Unmarshal(nd); err != nil {
|
||||
zlog.Error().Msg("cannot unmarshal node data")
|
||||
continue
|
||||
}
|
||||
ensureService(ctx, n, nd, k)
|
||||
muservice.Lock()
|
||||
if _, ok := service[nd.Name]; ok {
|
||||
tunnels <- service[nd.Name].NodeData
|
||||
}
|
||||
muservice.Unlock()
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -217,8 +227,60 @@ func discoveryTunnels(ctx context.Context, token string) (chan string, error) {
|
||||
return tunnels, err
|
||||
}
|
||||
|
||||
type nodeServiceData struct {
|
||||
NodeData NodeData
|
||||
CancelFunc context.CancelFunc
|
||||
}
|
||||
|
||||
var service = map[string]nodeServiceData{}
|
||||
var muservice sync.Mutex
|
||||
|
||||
func ensureService(ctx context.Context, n *node.Node, nd *NodeData, sserv string) {
|
||||
muservice.Lock()
|
||||
defer muservice.Unlock()
|
||||
if ndService, found := service[nd.Name]; !found {
|
||||
if !nd.IsOnline() {
|
||||
// if node is offline and not present, do nothing
|
||||
return
|
||||
}
|
||||
newCtxm, cancel := context.WithCancel(ctx)
|
||||
// Start the service
|
||||
port, err := freeport.GetFreePort()
|
||||
if err != nil {
|
||||
fmt.Print(err)
|
||||
}
|
||||
tunnelAddress := fmt.Sprintf("127.0.0.1:%d", port)
|
||||
nd.TunnelAddress = tunnelAddress
|
||||
service[nd.Name] = nodeServiceData{
|
||||
NodeData: *nd,
|
||||
CancelFunc: cancel,
|
||||
}
|
||||
go allocateLocalService(newCtxm, n, tunnelAddress, sserv)
|
||||
zlog.Debug().Msgf("Starting service %s on %s", sserv, tunnelAddress)
|
||||
} else {
|
||||
// Check if the service is still alive
|
||||
// if not cancel the context
|
||||
if !nd.IsOnline() && !ndService.NodeData.IsOnline() {
|
||||
ndService.CancelFunc()
|
||||
delete(service, nd.Name)
|
||||
zlog.Info().Msgf("Node %s is offline, deleting", nd.ID)
|
||||
} else if nd.IsOnline() {
|
||||
// update last seen inside service
|
||||
nd.TunnelAddress = ndService.NodeData.TunnelAddress
|
||||
service[nd.Name] = nodeServiceData{
|
||||
NodeData: *nd,
|
||||
CancelFunc: ndService.CancelFunc,
|
||||
}
|
||||
zlog.Debug().Msgf("Node %s is still online", nd.ID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This is the P2P worker main
|
||||
func BindLLamaCPPWorker(ctx context.Context, host, port, token string) error {
|
||||
func ExposeService(ctx context.Context, host, port, token, servicesID string) error {
|
||||
if servicesID == "" {
|
||||
servicesID = defaultServicesID
|
||||
}
|
||||
llger := logger.New(log.LevelFatal)
|
||||
|
||||
nodeOpts, err := newNodeOpts(token)
|
||||
@@ -248,31 +310,53 @@ func BindLLamaCPPWorker(ctx context.Context, host, port, token string) error {
|
||||
|
||||
ledger.Announce(
|
||||
ctx,
|
||||
10*time.Second,
|
||||
20*time.Second,
|
||||
func() {
|
||||
// Retrieve current ID for ip in the blockchain
|
||||
_, found := ledger.GetKey("services_localai", name)
|
||||
//_, found := ledger.GetKey("services_localai", name)
|
||||
// If mismatch, update the blockchain
|
||||
if !found {
|
||||
updatedMap := map[string]interface{}{}
|
||||
updatedMap[name] = "p2p"
|
||||
ledger.Add("services_localai", updatedMap)
|
||||
//if !found {
|
||||
updatedMap := map[string]interface{}{}
|
||||
updatedMap[name] = &NodeData{
|
||||
Name: name,
|
||||
LastSeen: time.Now(),
|
||||
ID: nodeID(name),
|
||||
}
|
||||
ledger.Add(servicesID, updatedMap)
|
||||
// }
|
||||
},
|
||||
)
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func NewNode(token string) (*node.Node, error) {
|
||||
nodeOpts, err := newNodeOpts(token)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
n, err := node.New(nodeOpts...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating a new node: %w", err)
|
||||
}
|
||||
|
||||
return n, nil
|
||||
}
|
||||
|
||||
func newNodeOpts(token string) ([]node.Option, error) {
|
||||
llger := logger.New(log.LevelFatal)
|
||||
defaultInterval := 10 * time.Second
|
||||
|
||||
// TODO: move this up, expose more config options when creating a node
|
||||
noDHT := os.Getenv("LOCALAI_P2P_DISABLE_DHT") == "true"
|
||||
noLimits := os.Getenv("LOCALAI_P2P_DISABLE_LIMITS") == "true"
|
||||
|
||||
loglevel := "info"
|
||||
|
||||
c := config.Config{
|
||||
Limit: config.ResourceLimit{
|
||||
Enable: true,
|
||||
Enable: !noLimits,
|
||||
MaxConns: 100,
|
||||
},
|
||||
NetworkToken: token,
|
||||
@@ -292,7 +376,7 @@ func newNodeOpts(token string) ([]node.Option, error) {
|
||||
RateLimitInterval: defaultInterval,
|
||||
},
|
||||
Discovery: config.Discovery{
|
||||
DHT: true,
|
||||
DHT: noDHT,
|
||||
MDNS: true,
|
||||
Interval: 30 * time.Second,
|
||||
},
|
||||
|
||||
@@ -6,16 +6,26 @@ package p2p
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/mudler/edgevpn/pkg/node"
|
||||
)
|
||||
|
||||
func GenerateToken() string {
|
||||
return "not implemented"
|
||||
}
|
||||
|
||||
func LLamaCPPRPCServerDiscoverer(ctx context.Context, token string) error {
|
||||
func ServiceDiscoverer(ctx context.Context, node *node.Node, token, servicesID string, fn func()) error {
|
||||
return fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func BindLLamaCPPWorker(ctx context.Context, host, port, token string) error {
|
||||
func ExposeService(ctx context.Context, host, port, token, servicesID string) error {
|
||||
return fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func IsP2PEnabled() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func NewNode(token string) (*node.Node, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
@@ -155,3 +155,8 @@ type OpenAIRequest struct {
|
||||
// AutoGPTQ
|
||||
ModelBaseName string `json:"model_base_name" yaml:"model_base_name"`
|
||||
}
|
||||
|
||||
type ModelsDataResponse struct {
|
||||
Object string `json:"object"`
|
||||
Data []OpenAIModel `json:"data"`
|
||||
}
|
||||
|
||||
@@ -98,3 +98,14 @@ The server logs should indicate that new workers are being discovered.
|
||||
- If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
|
||||
- Only a single model is supported currently.
|
||||
- Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
|
||||
|
||||
|
||||
## Environment Variables
|
||||
|
||||
There are options that can be tweaked or parameters that can be set using environment variables
|
||||
|
||||
| Environment Variable | Description |
|
||||
|----------------------|-------------|
|
||||
| **LOCALAI_P2P_DISABLE_DHT** | Set to "true" to disable DHT and enable p2p layer to be local only (mDNS) |
|
||||
| **LOCALAI_P2P_DISABLE_LIMITS** | Set to "true" to disable connection limits and resources management |
|
||||
| **LOCALAI_P2P_TOKEN** | Set the token for the p2p network |
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"version": "v2.18.0"
|
||||
"version": "v2.18.1"
|
||||
}
|
||||
|
||||
43
docs/static/install.sh
vendored
43
docs/static/install.sh
vendored
@@ -76,6 +76,8 @@ DOCKER_INSTALL=${DOCKER_INSTALL:-$docker_found}
|
||||
USE_AIO=${USE_AIO:-false}
|
||||
API_KEY=${API_KEY:-}
|
||||
CORE_IMAGES=${CORE_IMAGES:-false}
|
||||
P2P_TOKEN=${P2P_TOKEN:-}
|
||||
WORKER=${WORKER:-false}
|
||||
# nprocs -1
|
||||
if available nproc; then
|
||||
procs=$(nproc)
|
||||
@@ -132,7 +134,14 @@ configure_systemd() {
|
||||
|
||||
info "Adding current user to local-ai group..."
|
||||
$SUDO usermod -a -G local-ai $(whoami)
|
||||
|
||||
STARTCOMMAND="run"
|
||||
if [ "$WORKER" = true ]; then
|
||||
if [ -n "$P2P_TOKEN" ]; then
|
||||
STARTCOMMAND="worker p2p-llama-cpp-rpc"
|
||||
else
|
||||
STARTCOMMAND="worker llama-cpp-rpc"
|
||||
fi
|
||||
fi
|
||||
info "Creating local-ai systemd service..."
|
||||
cat <<EOF | $SUDO tee /etc/systemd/system/local-ai.service >/dev/null
|
||||
[Unit]
|
||||
@@ -140,7 +149,7 @@ Description=LocalAI Service
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
ExecStart=$BINDIR/local-ai run
|
||||
ExecStart=$BINDIR/local-ai $STARTCOMMAND
|
||||
User=local-ai
|
||||
Group=local-ai
|
||||
Restart=always
|
||||
@@ -159,6 +168,11 @@ EOF
|
||||
$SUDO echo "THREADS=$THREADS" | $SUDO tee -a /etc/localai.env >/dev/null
|
||||
$SUDO echo "MODELS_PATH=$MODELS_PATH" | $SUDO tee -a /etc/localai.env >/dev/null
|
||||
|
||||
if [ -n "$P2P_TOKEN" ]; then
|
||||
$SUDO echo "LOCALAI_P2P_TOKEN=$P2P_TOKEN" | $SUDO tee -a /etc/localai.env >/dev/null
|
||||
$SUDO echo "LOCALAI_P2P=true" | $SUDO tee -a /etc/localai.env >/dev/null
|
||||
fi
|
||||
|
||||
SYSTEMCTL_RUNNING="$(systemctl is-system-running || true)"
|
||||
case $SYSTEMCTL_RUNNING in
|
||||
running|degraded)
|
||||
@@ -407,6 +421,19 @@ install_docker() {
|
||||
# exit 0
|
||||
fi
|
||||
|
||||
STARTCOMMAND="run"
|
||||
if [ "$WORKER" = true ]; then
|
||||
if [ -n "$P2P_TOKEN" ]; then
|
||||
STARTCOMMAND="worker p2p-llama-cpp-rpc"
|
||||
else
|
||||
STARTCOMMAND="worker llama-cpp-rpc"
|
||||
fi
|
||||
fi
|
||||
envs=""
|
||||
if [ -n "$P2P_TOKEN" ]; then
|
||||
envs="-e LOCALAI_P2P_TOKEN=$P2P_TOKEN -e LOCALAI_P2P=true"
|
||||
fi
|
||||
|
||||
IMAGE_TAG=
|
||||
if [ "$HAS_CUDA" ]; then
|
||||
IMAGE_TAG=${VERSION}-cublas-cuda12-ffmpeg
|
||||
@@ -430,7 +457,8 @@ install_docker() {
|
||||
--restart=always \
|
||||
-e API_KEY=$API_KEY \
|
||||
-e THREADS=$THREADS \
|
||||
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG
|
||||
$envs \
|
||||
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
|
||||
elif [ "$HAS_AMD" ]; then
|
||||
IMAGE_TAG=${VERSION}-hipblas-ffmpeg
|
||||
# CORE
|
||||
@@ -448,7 +476,8 @@ install_docker() {
|
||||
--restart=always \
|
||||
-e API_KEY=$API_KEY \
|
||||
-e THREADS=$THREADS \
|
||||
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG
|
||||
$envs \
|
||||
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
|
||||
elif [ "$HAS_INTEL" ]; then
|
||||
IMAGE_TAG=${VERSION}-sycl-f32-ffmpeg
|
||||
# CORE
|
||||
@@ -465,7 +494,8 @@ install_docker() {
|
||||
--restart=always \
|
||||
-e API_KEY=$API_KEY \
|
||||
-e THREADS=$THREADS \
|
||||
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG
|
||||
$envs \
|
||||
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
|
||||
else
|
||||
IMAGE_TAG=${VERSION}-ffmpeg
|
||||
# CORE
|
||||
@@ -481,7 +511,8 @@ install_docker() {
|
||||
-e MODELS_PATH=/models \
|
||||
-e API_KEY=$API_KEY \
|
||||
-e THREADS=$THREADS \
|
||||
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG
|
||||
$envs \
|
||||
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
|
||||
fi
|
||||
|
||||
install_success
|
||||
|
||||
@@ -16,6 +16,9 @@
|
||||
- filename: "tw-elements.css"
|
||||
url: "https://cdn.jsdelivr.net/npm/tw-elements/css/tw-elements.min.css"
|
||||
sha: "72746af5326d6eb3647f504efa81b5e0f50ed486f37cc8262a4169781ad310d3"
|
||||
- filename: "tw-elements.js"
|
||||
url: "https://cdn.jsdelivr.net/npm/tw-elements/js/tw-elements.umd.min.js"
|
||||
sha: "2985706362e92360b65c8697cc32490bb9c0a5df9cd9b7251a97c1c5a661a40a"
|
||||
- filename: "tailwindcss.js"
|
||||
url: "https://cdn.tailwindcss.com/3.3.0"
|
||||
sha: "dbff048aa4581e6eae7f1cb2c641f72655ea833b3bb82923c4a59822e11ca594"
|
||||
|
||||
83
examples/github-actions/workflow.yml
Normal file
83
examples/github-actions/workflow.yml
Normal file
@@ -0,0 +1,83 @@
|
||||
name: Use LocalAI in GHA
|
||||
on:
|
||||
pull_request:
|
||||
types:
|
||||
- closed
|
||||
|
||||
jobs:
|
||||
notify-discord:
|
||||
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
|
||||
env:
|
||||
MODEL_NAME: hermes-2-theta-llama-3-8b
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # needed to checkout all branches for this Action to work
|
||||
# Starts the LocalAI container
|
||||
- name: Start LocalAI
|
||||
run: |
|
||||
echo "Starting LocalAI..."
|
||||
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
|
||||
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
|
||||
# Check the PR diff using the current branch and the base branch of the PR
|
||||
- uses: GrantBirki/git-diff-action@v2.7.0
|
||||
id: git-diff-action
|
||||
with:
|
||||
json_diff_file_output: diff.json
|
||||
raw_diff_file_output: diff.txt
|
||||
file_output_only: "true"
|
||||
# Ask to explain the diff to LocalAI
|
||||
- name: Summarize
|
||||
env:
|
||||
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
|
||||
id: summarize
|
||||
run: |
|
||||
input="$(cat $DIFF)"
|
||||
|
||||
# Define the LocalAI API endpoint
|
||||
API_URL="http://localhost:8080/chat/completions"
|
||||
|
||||
# Create a JSON payload using jq to handle special characters
|
||||
json_payload=$(jq -n --arg input "$input" '{
|
||||
model: "'$MODEL_NAME'",
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: "Write a message summarizing the change diffs"
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: $input
|
||||
}
|
||||
]
|
||||
}')
|
||||
|
||||
# Send the request to LocalAI
|
||||
response=$(curl -s -X POST $API_URL \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$json_payload")
|
||||
|
||||
# Extract the summary from the response
|
||||
summary="$(echo $response | jq -r '.choices[0].message.content')"
|
||||
|
||||
# Print the summary
|
||||
# -H "Authorization: Bearer $API_KEY" \
|
||||
echo "Summary:"
|
||||
echo "$summary"
|
||||
echo "payload sent"
|
||||
echo "$json_payload"
|
||||
{
|
||||
echo 'message<<EOF'
|
||||
echo "$summary"
|
||||
echo EOF
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
# Send the summary somewhere (e.g. Discord)
|
||||
- name: Discord notification
|
||||
env:
|
||||
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL }}
|
||||
DISCORD_USERNAME: "discord-bot"
|
||||
DISCORD_AVATAR: ""
|
||||
uses: Ilshidur/action-discord@master
|
||||
with:
|
||||
args: ${{ steps.summarize.outputs.message }}
|
||||
@@ -127,6 +127,56 @@
|
||||
- filename: Arcee-Spark-Q4_K_M.gguf
|
||||
sha256: 44123276d7845dc13f73ca4aa431dc4c931104eb7d2186f2a73d076fa0ee2330
|
||||
uri: huggingface://arcee-ai/Arcee-Spark-GGUF/Arcee-Spark-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen2
|
||||
name: "hercules-5.0-qwen2-7b"
|
||||
description: |
|
||||
Locutusque/Hercules-5.0-Qwen2-7B is a fine-tuned language model derived from Qwen2-7B. It is specifically designed to excel in instruction following, function calls, and conversational interactions across various scientific and technical domains. This fine-tuning has hercules-v5.0 with enhanced abilities in:
|
||||
|
||||
Complex Instruction Following: Understanding and accurately executing multi-step instructions, even those involving specialized terminology.
|
||||
Function Calling: Seamlessly interpreting and executing function calls, providing appropriate input and output values.
|
||||
Domain-Specific Knowledge: Engaging in informative and educational conversations about Biology, Chemistry, Physics, Mathematics, Medicine, Computer Science, and more.
|
||||
urls:
|
||||
- https://huggingface.co/Locutusque/Hercules-5.0-Qwen2-7B
|
||||
- https://huggingface.co/bartowski/Hercules-5.0-Qwen2-7B-GGUF
|
||||
overrides:
|
||||
parameters:
|
||||
model: Hercules-5.0-Qwen2-7B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Hercules-5.0-Qwen2-7B-Q4_K_M.gguf
|
||||
sha256: 8ebae4ffd43b906ddb938c3a611060ee5f99c35014e5ffe23ca35714361b5693
|
||||
uri: huggingface://Hercules-5.0-Qwen2-7B-Q4_K_M.gguf/Hercules-5.0-Qwen2-7B-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen2
|
||||
name: "arcee-agent"
|
||||
icon: https://i.ibb.co/CBHmTDn/136719a5-6d8a-4654-a618-46eabc788953.jpg
|
||||
description: |
|
||||
Arcee Agent is a cutting-edge 7B parameter language model specifically designed for function calling and tool use. Initialized from Qwen2-7B, it rivals the performance of much larger models while maintaining efficiency and speed. This model is particularly suited for developers, researchers, and businesses looking to implement sophisticated AI-driven solutions without the computational overhead of larger language models. Compute for training Arcee-Agent was provided by CrusoeAI. Arcee-Agent was trained using Spectrum.
|
||||
urls:
|
||||
- https://huggingface.co/crusoeai/Arcee-Agent-GGUF
|
||||
- https://huggingface.co/arcee-ai/Arcee-Agent
|
||||
overrides:
|
||||
parameters:
|
||||
model: arcee-agent.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: arcee-agent.Q4_K_M.gguf
|
||||
sha256: ebb49943a66c1e717f9399a555aee0af28a40bfac7500f2ad8dd05f211b62aac
|
||||
uri: huggingface://crusoeai/Arcee-Agent-GGUF/arcee-agent.Q4_K_M.gguf
|
||||
- !!merge <<: *qwen2
|
||||
name: "qwen2-7b-instruct-v0.8"
|
||||
icon: https://huggingface.co/MaziyarPanahi/Qwen2-7B-Instruct-v0.8/resolve/main/qwen2-fine-tunes-maziyar-panahi.webp
|
||||
description: |
|
||||
MaziyarPanahi/Qwen2-7B-Instruct-v0.8
|
||||
|
||||
This is a fine-tuned version of the Qwen/Qwen2-7B model. It aims to improve the base model across all benchmarks.
|
||||
urls:
|
||||
- https://huggingface.co/MaziyarPanahi/Qwen2-7B-Instruct-v0.8
|
||||
- https://huggingface.co/MaziyarPanahi/Qwen2-7B-Instruct-v0.8-GGUF
|
||||
overrides:
|
||||
parameters:
|
||||
model: Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
|
||||
sha256: 8c1b3efe9fa6ae1b37942ef26473cb4e0aed0f8038b60d4b61e5bffb61e49b7e
|
||||
uri: huggingface://MaziyarPanahi/Qwen2-7B-Instruct-v0.8-GGUF/Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
|
||||
- &mistral03
|
||||
## START Mistral
|
||||
url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
|
||||
@@ -325,7 +375,7 @@
|
||||
files:
|
||||
- filename: gemma-2-27b-it-Q4_K_M.gguf
|
||||
uri: huggingface://bartowski/gemma-2-27b-it-GGUF/gemma-2-27b-it-Q4_K_M.gguf
|
||||
sha256: ca86fbdb791842cf2e5eb276a6916e326b3b5d58d9ab60ee3e18b1c6f01fc181
|
||||
sha256: 69a0cdba2bc2e56d8298a9330f2a050ebecd657ed315beb3e51ae427b224dbc7
|
||||
- !!merge <<: *gemma
|
||||
name: "gemma-2-9b-it"
|
||||
urls:
|
||||
@@ -339,7 +389,100 @@
|
||||
files:
|
||||
- filename: gemma-2-9b-it-Q4_K_M.gguf
|
||||
uri: huggingface://bartowski/gemma-2-9b-it-GGUF/gemma-2-9b-it-Q4_K_M.gguf
|
||||
sha256: c70fd20caec79fb953b83031c46ddea4e99905835a66af7b8a856aa1b2534614
|
||||
sha256: 05390244866abc0e7108a2b1e3db07b82df3cd82f006256a75fc21137054151f
|
||||
- !!merge <<: *gemma
|
||||
name: "tess-v2.5-gemma-2-27b-alpha"
|
||||
urls:
|
||||
- https://huggingface.co/migtissera/Tess-v2.5-Gemma-2-27B-alpha
|
||||
- https://huggingface.co/bartowski/Tess-v2.5-Gemma-2-27B-alpha-GGUF
|
||||
icon: https://huggingface.co/migtissera/Tess-v2.5-Qwen2-72B/resolve/main/Tess-v2.5.png
|
||||
description: |
|
||||
Great at reasoning, but woke as fuck! This is a fine-tune over the Gemma-2-27B-it, since the base model fine-tuning is not generating coherent content.
|
||||
|
||||
Tess-v2.5 is the latest state-of-the-art model in the Tess series of Large Language Models (LLMs). Tess, short for Tesoro (Treasure in Italian), is the flagship LLM series created by Migel Tissera. Tess-v2.5 brings significant improvements in reasoning capabilities, coding capabilities and mathematics
|
||||
overrides:
|
||||
parameters:
|
||||
model: Tess-v2.5-Gemma-2-27B-alpha-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Tess-v2.5-Gemma-2-27B-alpha-Q4_K_M.gguf
|
||||
uri: huggingface://bartowski/Tess-v2.5-Gemma-2-27B-alpha-GGUF/Tess-v2.5-Gemma-2-27B-alpha-Q4_K_M.gguf
|
||||
sha256: d7be7092d28aefbdcd1ee4f4d8503d169d0a649f763e169d4b179aef20d69c21
|
||||
- !!merge <<: *gemma
|
||||
name: "gemma2-9b-daybreak-v0.5"
|
||||
urls:
|
||||
- https://huggingface.co/crestf411/gemma2-9B-daybreak-v0.5
|
||||
- https://huggingface.co/Vdr1/gemma2-9B-daybreak-v0.5-GGUF-Imatrix-IQ
|
||||
description: |
|
||||
THIS IS A PRE-RELEASE. BEGONE.
|
||||
|
||||
Beware, depraved. Not suitable for any audience.
|
||||
|
||||
Dataset curation to remove slop-perceived expressions continues. Unfortunately base models (which this is merged on top of) are generally riddled with "barely audible"s and "couldn't help"s and "shivers down spines" etc.
|
||||
overrides:
|
||||
parameters:
|
||||
model: gemma2-9B-daybreak-v0.5-Q4_K_M-imat.gguf
|
||||
files:
|
||||
- filename: gemma2-9B-daybreak-v0.5-Q4_K_M-imat.gguf
|
||||
uri: huggingface://Vdr1/gemma2-9B-daybreak-v0.5-GGUF-Imatrix-IQ/gemma2-9B-daybreak-v0.5-Q4_K_M-imat.gguf
|
||||
sha256: 6add4d12052918986af935d686773e4e89fddd1bbf7941911cf3fbeb1b1862c0
|
||||
- !!merge <<: *gemma
|
||||
name: "gemma-2-9b-it-sppo-iter3"
|
||||
urls:
|
||||
- https://huggingface.co/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3
|
||||
- https://huggingface.co/bartowski/Gemma-2-9B-It-SPPO-Iter3-GGUF
|
||||
description: |
|
||||
Self-Play Preference Optimization for Language Model Alignment (https://arxiv.org/abs/2405.00675)
|
||||
Gemma-2-9B-It-SPPO-Iter3
|
||||
|
||||
This model was developed using Self-Play Preference Optimization at iteration 3, based on the google/gemma-2-9b-it architecture as starting point. We utilized the prompt sets from the openbmb/UltraFeedback dataset, splited to 3 parts for 3 iterations by snorkelai/Snorkel-Mistral-PairRM-DPO-Dataset. All responses used are synthetic.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Gemma-2-9B-It-SPPO-Iter3-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Gemma-2-9B-It-SPPO-Iter3-Q4_K_M.gguf
|
||||
uri: huggingface://bartowski/Gemma-2-9B-It-SPPO-Iter3-GGUF/Gemma-2-9B-It-SPPO-Iter3-Q4_K_M.gguf
|
||||
sha256: 7aac221f548beef8d45106eabbec6b2c4e1669a51ad14e4bf640d463dadf36e7
|
||||
- !!merge <<: *gemma
|
||||
name: "smegmma-9b-v1"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/RSuc5p9Sm6CYj6lGOxvx4.gif
|
||||
urls:
|
||||
- https://huggingface.co/TheDrummer/Smegmma-9B-v1
|
||||
- https://huggingface.co/bartowski/Smegmma-9B-v1-GGUF
|
||||
description: |
|
||||
Smegmma 9B v1 🧀
|
||||
|
||||
The sweet moist of Gemma 2, unhinged.
|
||||
|
||||
image/gif
|
||||
|
||||
smeg - ghem - mah
|
||||
|
||||
An eRP model that will blast you with creamy moist. Finetuned by yours truly.
|
||||
|
||||
The first Gemma 2 9B RP finetune attempt!
|
||||
What's New?
|
||||
|
||||
Engaging roleplay
|
||||
Less refusals / censorship
|
||||
Less commentaries / summaries
|
||||
More willing AI
|
||||
Better formatting
|
||||
Better creativity
|
||||
Moist alignment
|
||||
|
||||
Notes
|
||||
|
||||
Refusals still exist, but a couple of re-gens may yield the result you want
|
||||
Formatting and logic may be weaker at the start
|
||||
Make sure to start strong
|
||||
May be weaker with certain cards, YMMV and adjust accordingly!
|
||||
overrides:
|
||||
parameters:
|
||||
model: Smegmma-9B-v1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Smegmma-9B-v1-Q4_K_M.gguf
|
||||
uri: huggingface://bartowski/Smegmma-9B-v1-GGUF/Smegmma-9B-v1-Q4_K_M.gguf
|
||||
sha256: abd9da0a6bf5cbc0ed6bb0d7e3ee7aea3f6b1edbf8c64e51d0fa25001975aed7
|
||||
- &llama3
|
||||
url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
|
||||
@@ -475,8 +618,8 @@
|
||||
model: Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf
|
||||
sha256: 5833d99d5596cade0d02e61cddaa6dac49170864ee56d0b602933c6f9fbae314
|
||||
uri: huggingface://bartowski/Llama-3-SauerkrautLM-8b-Instruct-GGUF/Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf
|
||||
sha256: e5ae69b6f59b3f207fa6b435490286b365add846a310c46924fa784b5a7d73e3
|
||||
- !!merge <<: *llama3
|
||||
name: "llama-3-13b-instruct-v0.1"
|
||||
urls:
|
||||
@@ -1330,6 +1473,31 @@
|
||||
- filename: llama3-8B-DarkIdol-2.1-Uncensored-1048K-Q4_K_M-imat.gguf
|
||||
sha256: 86f0f1e10fc315689e09314aebb7354bb40d8fe95de008d21a75dc8fff1cd2fe
|
||||
uri: huggingface://LWDCLS/llama3-8B-DarkIdol-2.1-Uncensored-1048K-GGUF-IQ-Imatrix-Request/llama3-8B-DarkIdol-2.1-Uncensored-1048K-Q4_K_M-imat.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "llama3-8b-darkidol-2.2-uncensored-1048k-iq-imatrix"
|
||||
urls:
|
||||
- https://huggingface.co/aifeifei798/llama3-8B-DarkIdol-2.2-Uncensored-1048K
|
||||
- https://huggingface.co/LWDCLS/llama3-8B-DarkIdol-2.2-Uncensored-1048K-GGUF-IQ-Imatrix-Request
|
||||
icon: https://huggingface.co/aifeifei798/llama3-8B-DarkIdol-2.2-Uncensored-1048K/resolve/main/llama3-8B-DarkIdol-2.2-Uncensored-1048K.png
|
||||
description: |
|
||||
The module combination has been readjusted to better fulfill various roles and has been adapted for mobile phones.
|
||||
|
||||
- Saving money(LLama 3)
|
||||
- Uncensored
|
||||
- Quick response
|
||||
- The underlying model used is winglian/Llama-3-8b-1048k-PoSE
|
||||
- A scholarly response akin to a thesis.(I tend to write songs extensively, to the point where one song almost becomes as detailed as a thesis. :)
|
||||
- DarkIdol:Roles that you can imagine and those that you cannot imagine.
|
||||
- Roleplay
|
||||
- Specialized in various role-playing scenarios more look at test role. (https://huggingface.co/aifeifei798/llama3-8B-DarkIdol-1.2/tree/main/test)
|
||||
- more look at LM Studio presets (https://huggingface.co/aifeifei798/llama3-8B-DarkIdol-1.2/tree/main/config-presets)
|
||||
overrides:
|
||||
parameters:
|
||||
model: llama3-8B-DarkIdol-2.2-Uncensored-1048K-Q4_K_M-imat.gguf
|
||||
files:
|
||||
- filename: llama3-8B-DarkIdol-2.2-Uncensored-1048K-Q4_K_M-imat.gguf
|
||||
sha256: 7714947799d4e6984cf9106244ee24aa821778936ad1a81023480a774e255f52
|
||||
uri: huggingface://LWDCLS/llama3-8B-DarkIdol-2.2-Uncensored-1048K-GGUF-IQ-Imatrix-Request/llama3-8B-DarkIdol-2.2-Uncensored-1048K-Q4_K_M-imat.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "llama3-turbcat-instruct-8b"
|
||||
urls:
|
||||
@@ -1345,6 +1513,76 @@
|
||||
- filename: llama3-turbcat-instruct-8b-Q4_K_M.gguf
|
||||
sha256: a9a36e3220d901a8ad80c75608a81aaeed3a9cdf111247462bf5e3443aad5461
|
||||
uri: huggingface://bartowski/llama3-turbcat-instruct-8b-GGUF/llama3-turbcat-instruct-8b-Q4_K_M.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "l3-8b-everything-cot"
|
||||
urls:
|
||||
- https://huggingface.co/FPHam/L3-8B-Everything-COT
|
||||
- https://huggingface.co/bartowski/L3-8B-Everything-COT-GGUF
|
||||
icon: https://huggingface.co/FPHam/L3-8B-Everything-COT/resolve/main/cot2.png
|
||||
description: |
|
||||
Everything COT is an investigative self-reflecting general model that uses Chain of Thought for everything. And I mean everything.
|
||||
|
||||
Instead of confidently proclaiming something (or confidently hallucinating other things) like most models, it caries an internal dialogue with itself and often cast doubts over uncertain topics while looking at it from various sides.
|
||||
overrides:
|
||||
parameters:
|
||||
model: L3-8B-Everything-COT-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: L3-8B-Everything-COT-Q4_K_M.gguf
|
||||
sha256: b220b0e2f8fb1c8a491d10dbd054269ed078ee5e2e62dc9d2e3b97b06f52e987
|
||||
uri: huggingface://bartowski/L3-8B-Everything-COT-GGUF/L3-8B-Everything-COT-Q4_K_M.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "llama-3-llamilitary"
|
||||
urls:
|
||||
- https://huggingface.co/Heralax/llama-3-llamilitary
|
||||
- https://huggingface.co/mudler/llama-3-llamilitary-Q4_K_M-GGUF
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/64825ebceb4befee377cf8ac/ea2C9laq24V6OuxwhzJZS.png
|
||||
description: |
|
||||
This is a model trained on [instruct data generated from old historical war books] as well as on the books themselves, with the goal of creating a joke LLM knowledgeable about the (long gone) kind of warfare involving muskets, cavalry, and cannon.
|
||||
|
||||
This model can provide good answers, but it turned out to be pretty fragile during conversation for some reason: open-ended questions can make it spout nonsense. Asking facts is more reliable but not guaranteed to work.
|
||||
|
||||
The basic guide to getting good answers is: be specific with your questions. Use specific terms and define a concrete scenario, if you can, otherwise the LLM will often hallucinate the rest. I think the issue was that I did not train with a large enough system prompt: not enough latent space is being activated by default. (I'll try to correct this in future runs).
|
||||
overrides:
|
||||
parameters:
|
||||
model: llama-3-llamilitary-q4_k_m.gguf
|
||||
files:
|
||||
- filename: llama-3-llamilitary-q4_k_m.gguf
|
||||
sha256: f3684f2f0845f9aead884fa9a52ea67bed53856ebeedef1620ca863aba57e458
|
||||
uri: huggingface://mudler/llama-3-llamilitary-Q4_K_M-GGUF/llama-3-llamilitary-q4_k_m.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "l3-stheno-maid-blackroot-grand-horror-16b"
|
||||
urls:
|
||||
- https://huggingface.co/DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-GGUF
|
||||
icon: https://huggingface.co/DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-GGUF/resolve/main/hm.jpg
|
||||
description: |
|
||||
Rebuilt and Powered Up.
|
||||
|
||||
WARNING: NSFW. Graphic HORROR. Extreme swearing. UNCENSORED. SMART.
|
||||
|
||||
The author took the original models in "L3-Stheno-Maid-Blackroot 8B" and completely rebuilt it a new pass-through merge (everything preserved) and blew it out to over 16.5 billion parameters - 642 tensors, 71 layers (8B original has 32 layers).
|
||||
|
||||
This is not an "upscale" or "franken merge" but a completely new model based on the models used to construct "L3-Stheno-Maid-Blackroot 8B".
|
||||
|
||||
The result is a take no prisoners, totally uncensored, fiction writing monster and roleplay master as well just about... any general fiction activity "AI guru" including scene generation and scene continuation.
|
||||
|
||||
As a result of the expansion / merge re-build its level of prose and story generation has significantly improved as well as word choice, sentence structure as well as default output levels and lengths.
|
||||
|
||||
It also has a STRONG horror bias, although it will generate content for almost any genre. That being said if there is a "hint" of things going wrong... they will.
|
||||
|
||||
It will also swear (R-18) like there is no tomorrow at times and "dark" characters will be VERY dark so to speak.
|
||||
|
||||
Model is excels in details (real and "constructed"), descriptions, similes and metaphors.
|
||||
|
||||
It can have a sense of humor ... ah... dark humor.
|
||||
|
||||
Because of the nature of this merge most attributes of each of the 3 models will be in this rebuilt 16.5B model as opposed to the original 8B model where some of one or more of the model's features and/or strengths maybe reduced or overshadowed.
|
||||
overrides:
|
||||
parameters:
|
||||
model: L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-D_AU-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-D_AU-Q4_K_M.gguf
|
||||
sha256: ae29f38d73dfb04415821405cf8b319fc42d78d0cdd0da91db147d12e68030fe
|
||||
uri: huggingface://DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-GGUF/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-D_AU-Q4_K_M.gguf
|
||||
- &dolphin
|
||||
name: "dolphin-2.9-llama3-8b"
|
||||
url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
|
||||
@@ -2141,6 +2379,176 @@
|
||||
- filename: Llama-3-Update-3.0-mmproj-model-f16.gguf
|
||||
sha256: 3d2f36dff61d6157cadf102df86a808eb9f8a230be1bc0bc99039d81a895468a
|
||||
uri: huggingface://Nitral-AI/Llama-3-Update-3.0-mmproj-model-f16/Llama-3-Update-3.0-mmproj-model-f16.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "llama-3_8b_unaligned_alpha"
|
||||
urls:
|
||||
- https://huggingface.co/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha
|
||||
- https://huggingface.co/bartowski/LLAMA-3_8B_Unaligned_Alpha-GGUF
|
||||
description: |
|
||||
Model card description:
|
||||
As of June 11, 2024, I've finally started training the model! The training is progressing smoothly, although it will take some time. I used a combination of model merges and an abliterated model as base, followed by a comprehensive deep unalignment protocol to unalign the model to its core. A common issue with uncensoring and unaligning models is that it often significantly impacts their base intelligence. To mitigate these drawbacks, I've included a substantial corpus of common sense, theory of mind, and various other elements to counteract the effects of the deep uncensoring process. Given the extensive corpus involved, the training will require at least a week of continuous training. Expected early results: in about 3-4 days.
|
||||
Additional info:
|
||||
As of June 13, 2024, I've observed that even after two days of continuous training, the model is still resistant to learning certain aspects.
|
||||
For example, some of the validation data still shows a loss over , whereas other parts have a loss of < or lower. This is after the model was initially abliterated.
|
||||
June 18, 2024 Update, After extensive testing of the intermediate checkpoints, significant progress has been made.
|
||||
The model is slowly — I mean, really slowly — unlearning its alignment. By significantly lowering the learning rate, I was able to visibly observe deep behavioral changes, this process is taking longer than anticipated, but it's going to be worth it. Estimated time to completion: 4 more days.. I'm pleased to report that in several tests, the model not only maintained its intelligence but actually showed a slight improvement, especially in terms of common sense. An intermediate checkpoint of this model was used to create invisietch/EtherealRainbow-v0.3-rc7, with promising results. Currently, it seems like I'm on the right track. I hope this model will serve as a solid foundation for further merges, whether for role-playing (RP) or for uncensoring. This approach also allows us to save on actual fine-tuning, thereby reducing our carbon footprint. The merge process takes just a few minutes of CPU time, instead of days of GPU work.
|
||||
June 20, 2024 Update, Unaligning was partially successful, and the results are decent, but I am not fully satisfied. I decided to bite the bullet, and do a full finetune, god have mercy on my GPUs. I am also releasing the intermediate checkpoint of this model.
|
||||
icon: https://i.imgur.com/Kpk1PgZ.png
|
||||
overrides:
|
||||
parameters:
|
||||
model: LLAMA-3_8B_Unaligned_Alpha-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: LLAMA-3_8B_Unaligned_Alpha-Q4_K_M.gguf
|
||||
sha256: 93ddb5f9f525586d2578186c61e39f96461c26c0b38631de89aa30b171774515
|
||||
uri: huggingface://bartowski/LLAMA-3_8B_Unaligned_Alpha-GGUF/LLAMA-3_8B_Unaligned_Alpha-Q4_K_M.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "l3-8b-lunaris-v1"
|
||||
urls:
|
||||
- https://huggingface.co/Sao10K/L3-8B-Lunaris-v1
|
||||
- https://huggingface.co/bartowski/L3-8B-Lunaris-v1-GGUF
|
||||
description: |
|
||||
A generalist / roleplaying model merge based on Llama 3. Models are selected from my personal experience while using them.
|
||||
|
||||
I personally think this is an improvement over Stheno v3.2, considering the other models helped balance out its creativity and at the same time improving its logic.
|
||||
overrides:
|
||||
parameters:
|
||||
model: L3-8B-Lunaris-v1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: L3-8B-Lunaris-v1-Q4_K_M.gguf
|
||||
sha256: ef1d393f125be8c608859eeb4f26185ad90c7fc9cba41c96e847e77cdbcada18
|
||||
uri: huggingface://bartowski/L3-8B-Lunaris-v1-GGUF/L3-8B-Lunaris-v1-Q4_K_M.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "llama-3_8b_unaligned_alpha_rp_soup-i1"
|
||||
icon: https://i.imgur.com/pXcjpoV.png
|
||||
urls:
|
||||
- https://huggingface.co/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha_RP_Soup
|
||||
- https://huggingface.co/mradermacher/LLAMA-3_8B_Unaligned_Alpha_RP_Soup-i1-GGUF
|
||||
description: |
|
||||
Censorship level: Medium
|
||||
|
||||
This model is the outcome of multiple merges, starting with the base model SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha. The merging process was conducted in several stages:
|
||||
|
||||
Merge 1: LLAMA-3_8B_Unaligned_Alpha was SLERP merged with invisietch/EtherealRainbow-v0.3-8B.
|
||||
Merge 2: LLAMA-3_8B_Unaligned_Alpha was SLERP merged with TheDrummer/Llama-3SOME-8B-v2.
|
||||
Soup 1: Merge 1 was combined with Merge 2.
|
||||
Final Merge: Soup 1 was SLERP merged with Nitral-Archive/Hathor_Enigmatica-L3-8B-v0.4.
|
||||
|
||||
The final model is surprisingly coherent (although slightly more censored), which is a bit unexpected, since all the intermediate merge steps were pretty incoherent.
|
||||
overrides:
|
||||
parameters:
|
||||
model: LLAMA-3_8B_Unaligned_Alpha_RP_Soup.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: LLAMA-3_8B_Unaligned_Alpha_RP_Soup.i1-Q4_K_M.gguf
|
||||
sha256: 94347eb5125d9092e286730ae0ccc78374d68663c16ad2265005d8721eb8807b
|
||||
uri: huggingface://mradermacher/LLAMA-3_8B_Unaligned_Alpha_RP_Soup-i1-GGUF/LLAMA-3_8B_Unaligned_Alpha_RP_Soup.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "hathor_respawn-l3-8b-v0.8"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/sWyipsXI-Wl-uEm57SRwM.png
|
||||
urls:
|
||||
- https://huggingface.co/Nitral-AI/Hathor_Respawn-L3-8B-v0.8
|
||||
- https://huggingface.co/bartowski/Hathor_Respawn-L3-8B-v0.8-GGUF
|
||||
description: |
|
||||
Hathor_Aleph-v0.8 is a model based on the LLaMA 3 architecture: Designed to seamlessly integrate the qualities of creativity, intelligence, and robust performance. Making it an ideal tool for a wide range of applications; such as creative writing, educational support and human/computer interaction.
|
||||
Hathor 0.8 is trained on 3 epochs of Private RP, STEM (Intruction/Dialogs), Opus instructons, mixture light/classical novel data, roleplaying chat pairs over llama 3 8B instruct.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Hathor_Respawn-L3-8B-v0.8-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Hathor_Respawn-L3-8B-v0.8-Q4_K_M.gguf
|
||||
sha256: d0cdfa8951ee80b252bf1dc183403ca9b48bc3de1578cb8e7fe321af753e661c
|
||||
uri: huggingface://bartowski/Hathor_Respawn-L3-8B-v0.8-GGUF/Hathor_Respawn-L3-8B-v0.8-Q4_K_M.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "llama3-8b-instruct-replete-adapted"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/-0dERC793D9XeFsJ9uHbx.png
|
||||
urls:
|
||||
- https://huggingface.co/Replete-AI/Llama3-8B-Instruct-Replete-Adapted
|
||||
- https://huggingface.co/bartowski/Llama3-8B-Instruct-Replete-Adapted-GGUF
|
||||
description: |
|
||||
Replete-Coder-llama3-8b is a general purpose model that is specially trained in coding in over 100 coding languages. The data used to train the model contains 25% non-code instruction data and 75% coding instruction data totaling up to 3.9 million lines, roughly 1 billion tokens, or 7.27gb of instruct data. The data used to train this model was 100% uncensored, then fully deduplicated, before training happened.
|
||||
|
||||
More than just a coding model!
|
||||
|
||||
Although Replete-Coder has amazing coding capabilities, its trained on vaste amount of non-coding data, fully cleaned and uncensored. Dont just use it for coding, use it for all your needs! We are truly trying to make the GPT killer!
|
||||
overrides:
|
||||
parameters:
|
||||
model: Llama3-8B-Instruct-Replete-Adapted-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Llama3-8B-Instruct-Replete-Adapted-Q4_K_M.gguf
|
||||
sha256: 9e9a142f6fb5fc812b17bfc30230582ae50ac22b93dea696b6887cde815c1cb4
|
||||
uri: huggingface://bartowski/Llama3-8B-Instruct-Replete-Adapted-GGUF/Llama3-8B-Instruct-Replete-Adapted-Q4_K_M.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "llama-3-perky-pat-instruct-8b"
|
||||
urls:
|
||||
- https://huggingface.co/grimjim/Llama-3-Perky-Pat-Instruct-8B
|
||||
- https://huggingface.co/bartowski/Llama-3-Perky-Pat-Instruct-8B-GGUF
|
||||
description: |
|
||||
we explore negative weight merger, and propose Orthogonalized Vector Adaptation, or OVA.
|
||||
|
||||
This is a merge of pre-trained language models created using mergekit.
|
||||
|
||||
"One must imagine Sisyphys happy."
|
||||
|
||||
Task arithmetic was used to invert the intervention vector that was applied in MopeyMule, via application of negative weight -1.0. The combination of model weights (Instruct - MopeyMule) comprises an Orthogonalized Vector Adaptation that can subsequently be applied to the base Instruct model, and could in principle be applied to other models derived from fine-tuning the Instruct model.
|
||||
|
||||
This model is meant to continue exploration of behavioral changes that can be achieved via orthogonalized steering. The result appears to be more enthusiastic and lengthy responses in chat, though it is also clear that the merged model has some unhealed damage.
|
||||
|
||||
Built with Meta Llama 3.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Llama-3-Perky-Pat-Instruct-8B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Llama-3-Perky-Pat-Instruct-8B-Q4_K_M.gguf
|
||||
sha256: b0eae5d9d58a7101a30693c267097a90f4a005c81fda801b40ab2c25e788a93e
|
||||
uri: huggingface://bartowski/Llama-3-Perky-Pat-Instruct-8B-GGUF/Llama-3-Perky-Pat-Instruct-8B-Q4_K_M.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "l3-uncen-merger-omelette-rp-v0.2-8b"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/m0YKWwK9n7w8rnKOzduu4.png
|
||||
urls:
|
||||
- https://huggingface.co/Casual-Autopsy/L3-Uncen-Merger-Omelette-RP-v0.2-8B
|
||||
- https://huggingface.co/LWDCLS/L3-Uncen-Merger-Omelette-RP-v0.2-8B-GGUF-IQ-Imatrix-Request
|
||||
description: |
|
||||
L3-Uncen-Merger-Omelette-RP-v0.2-8B is a merge of the following models using LazyMergekit:
|
||||
|
||||
Sao10K/L3-8B-Stheno-v3.2
|
||||
Casual-Autopsy/L3-Umbral-Mind-RP-v1.0-8B
|
||||
bluuwhale/L3-SthenoMaidBlackroot-8B-V1
|
||||
Cas-Warehouse/Llama-3-Mopeyfied-Psychology-v2
|
||||
migtissera/Llama-3-8B-Synthia-v3.5
|
||||
tannedbum/L3-Nymeria-Maid-8B
|
||||
Casual-Autopsy/L3-Umbral-Mind-RP-v0.3-8B
|
||||
tannedbum/L3-Nymeria-8B
|
||||
ChaoticNeutrals/Hathor_RP-v.01-L3-8B
|
||||
cgato/L3-TheSpice-8b-v0.8.3
|
||||
Sao10K/L3-8B-Stheno-v3.1
|
||||
Nitral-AI/Hathor_Stable-v0.2-L3-8B
|
||||
aifeifei798/llama3-8B-DarkIdol-1.0
|
||||
ChaoticNeutrals/Poppy_Porpoise-1.4-L3-8B
|
||||
ResplendentAI/Nymph_8B
|
||||
overrides:
|
||||
parameters:
|
||||
model: L3-Uncen-Merger-Omelette-RP-v0.2-8B-Q4_K_M-imat.gguf
|
||||
files:
|
||||
- filename: L3-Uncen-Merger-Omelette-RP-v0.2-8B-Q4_K_M-imat.gguf
|
||||
sha256: 6bbc42a4c3b25f2b854d76a6e32746b9b3b21dd8856f8f2bc1a5b1269aa8fca1
|
||||
uri: huggingface://LWDCLS/L3-Uncen-Merger-Omelette-RP-v0.2-8B-GGUF-IQ-Imatrix-Request/L3-Uncen-Merger-Omelette-RP-v0.2-8B-Q4_K_M-imat.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "nymph_8b-i1"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/9U_eJCDzLJ8nxb6qfuICc.jpeg
|
||||
urls:
|
||||
- https://huggingface.co/ResplendentAI/Nymph_8B
|
||||
- https://huggingface.co/mradermacher/Nymph_8B-i1-GGUF?not-for-all-audiences=true
|
||||
description: |
|
||||
Model card:
|
||||
Nymph is the culmination of everything I have learned with the T-series project. This model aims to be a unique and full featured RP juggernaut.
|
||||
|
||||
The finetune incorporates 1.6 Million tokens of RP data sourced from Bluemoon, FreedomRP, Aesir-Preview, and Claude Opus logs. I made sure to use the multi-turn sharegpt datasets this time instead of alpaca conversions. I have also included three of my personal datasets. The final touch is an ORPO based upon Openhermes Roleplay preferences.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Nymph_8B.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Nymph_8B.i1-Q4_K_M.gguf
|
||||
sha256: 5b35794539d9cd262720f47a54f59dbffd5bf6c601950359b5c68d13f1ce13a0
|
||||
uri: huggingface://mradermacher/Nymph_8B-i1-GGUF/Nymph_8B.i1-Q4_K_M.gguf
|
||||
- &chatml
|
||||
### ChatML
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
@@ -2337,6 +2745,33 @@
|
||||
- filename: "phi-2-orange.Q4_0.gguf"
|
||||
sha256: "49cb710ae688e1b19b1b299087fa40765a0cd677e3afcc45e5f7ef6750975dcf"
|
||||
uri: "huggingface://TheBloke/phi-2-orange-GGUF/phi-2-orange.Q4_0.gguf"
|
||||
### Internlm2
|
||||
- name: "internlm2_5-7b-chat-1m"
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/internlm/internlm2_5-7b-chat-1m
|
||||
- https://huggingface.co/bartowski/internlm2_5-7b-chat-1m-GGUF
|
||||
icon: https://github.com/InternLM/InternLM/assets/22529082/b9788105-8892-4398-8b47-b513a292378e
|
||||
tags:
|
||||
- internlm2
|
||||
- gguf
|
||||
- cpu
|
||||
- gpu
|
||||
description: |
|
||||
InternLM2.5 has open-sourced a 7 billion parameter base model and a chat model tailored for practical scenarios. The model has the following characteristics:
|
||||
|
||||
Outstanding reasoning capability: State-of-the-art performance on Math reasoning, surpassing models like Llama3 and Gemma2-9B.
|
||||
|
||||
1M Context window: Nearly perfect at finding needles in the haystack with 1M-long context, with leading performance on long-context tasks like LongBench. Try it with LMDeploy for 1M-context inference and a file chat demo.
|
||||
|
||||
Stronger tool use: InternLM2.5 supports gathering information from more than 100 web pages, corresponding implementation will be released in Lagent soon. InternLM2.5 has better tool utilization-related capabilities in instruction following, tool selection and reflection. See examples.
|
||||
overrides:
|
||||
parameters:
|
||||
model: internlm2_5-7b-chat-1m-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: internlm2_5-7b-chat-1m-Q4_K_M.gguf
|
||||
uri: huggingface://bartowski/internlm2_5-7b-chat-1m-GGUF/internlm2_5-7b-chat-1m-Q4_K_M.gguf
|
||||
sha256: 10d5e18a4125f9d4d74a9284a21e0c820b150af06dee48665e54ff6e1be3a564
|
||||
- &phi-3
|
||||
### START Phi-3
|
||||
url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master"
|
||||
|
||||
23
scripts/prepare-libs.sh
Executable file
23
scripts/prepare-libs.sh
Executable file
@@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
mkdir -p backend-assets/lib
|
||||
|
||||
OS="$(uname)"
|
||||
|
||||
if [ "$OS" == "Darwin" ]; then
|
||||
LIBS="$(otool -L $1 | awk 'NR > 1 { system("echo " $1) } ' | xargs echo)"
|
||||
elif [ "$OS" == "Linux" ]; then
|
||||
LIBS="$(ldd $1 | awk 'NF == 4 { system("echo " $3) } ' | xargs echo)"
|
||||
else
|
||||
echo "Unsupported OS"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
for lib in $LIBS; do
|
||||
cp -f $lib backend-assets/lib
|
||||
done
|
||||
|
||||
echo "==============================="
|
||||
echo "Copied libraries to backend-assets/lib"
|
||||
echo "$LIBS"
|
||||
echo "==============================="
|
||||
Reference in New Issue
Block a user