Compare commits

..

1 Commits

Author SHA1 Message Date
Ettore Di Giacinto
95f773ee4b experiment: build with a single image with all the deps
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-01 19:43:18 +02:00
43 changed files with 241 additions and 1822 deletions

3
.github/release.yml vendored
View File

@@ -13,9 +13,6 @@ changelog:
labels:
- bug
- regression
- title: "🖧 P2P area"
labels:
- area/p2p
- title: Exciting New Features 🎉
labels:
- Semver-Minor

View File

@@ -46,7 +46,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "4"
cuda-minor-version: "5"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -119,7 +119,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "4"
cuda-minor-version: "5"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg-core'

View File

@@ -64,7 +64,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11'
@@ -75,7 +75,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "4"
cuda-minor-version: "5"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12'
@@ -86,7 +86,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cublas-cuda11-ffmpeg'
@@ -100,7 +100,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "4"
cuda-minor-version: "5"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -274,7 +274,7 @@ jobs:
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11-core'
@@ -285,7 +285,7 @@ jobs:
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "4"
cuda-minor-version: "5"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-core'
@@ -296,7 +296,7 @@ jobs:
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11-ffmpeg-core'
@@ -307,7 +307,7 @@ jobs:
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "4"
cuda-minor-version: "5"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg-core'

View File

@@ -23,7 +23,7 @@ on:
type: string
cuda-minor-version:
description: 'CUDA minor version'
default: "4"
default: "5"
type: string
platforms:
description: 'Platforms'

View File

@@ -1,170 +0,0 @@
name: Notifications for new models
on:
pull_request:
types:
- closed
jobs:
notify-discord:
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
env:
MODEL_NAME: hermes-2-theta-llama-3-8b
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # needed to checkout all branches for this Action to work
- name: Start LocalAI
run: |
echo "Starting LocalAI..."
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
# Check the PR diff using the current branch and the base branch of the PR
- uses: GrantBirki/git-diff-action@v2.7.0
id: git-diff-action
with:
json_diff_file_output: diff.json
raw_diff_file_output: diff.txt
file_output_only: "true"
- name: Summarize
env:
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
id: summarize
run: |
input="$(cat $DIFF)"
# Define the LocalAI API endpoint
API_URL="http://localhost:8080/chat/completions"
# Create a JSON payload using jq to handle special characters
json_payload=$(jq -n --arg input "$input" '{
model: "'$MODEL_NAME'",
messages: [
{
role: "system",
content: "You are LocalAI-bot. Write a discord message to notify everyone about the new model from the git diff. Make it informal. An example can include: the URL of the model, the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI and that can be browsed over https://models.localai.io. For example: local-ai run model_name_here"
},
{
role: "user",
content: $input
}
]
}')
# Send the request to LocalAI
response=$(curl -s -X POST $API_URL \
-H "Content-Type: application/json" \
-d "$json_payload")
# Extract the summary from the response
summary="$(echo $response | jq -r '.choices[0].message.content')"
# Print the summary
# -H "Authorization: Bearer $API_KEY" \
echo "Summary:"
echo "$summary"
echo "payload sent"
echo "$json_payload"
{
echo 'message<<EOF'
echo "$summary"
echo EOF
} >> "$GITHUB_OUTPUT"
docker logs --tail 10 local-ai
- name: Discord notification
env:
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL }}
DISCORD_USERNAME: "LocalAI-Bot"
DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
uses: Ilshidur/action-discord@master
with:
args: ${{ steps.summarize.outputs.message }}
- name: Setup tmate session if fails
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
notify-twitter:
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
env:
MODEL_NAME: hermes-2-theta-llama-3-8b
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # needed to checkout all branches for this Action to work
- name: Start LocalAI
run: |
echo "Starting LocalAI..."
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
# Check the PR diff using the current branch and the base branch of the PR
- uses: GrantBirki/git-diff-action@v2.7.0
id: git-diff-action
with:
json_diff_file_output: diff.json
raw_diff_file_output: diff.txt
file_output_only: "true"
- name: Summarize
env:
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
id: summarize
run: |
input="$(cat $DIFF)"
# Define the LocalAI API endpoint
API_URL="http://localhost:8080/chat/completions"
# Create a JSON payload using jq to handle special characters
json_payload=$(jq -n --arg input "$input" '{
model: "'$MODEL_NAME'",
messages: [
{
role: "system",
content: "You are LocalAI-bot. Write a twitter message to notify everyone about the new model from the git diff. Make it informal and really short. An example can include: the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI. For example: local-ai run model_name_here"
},
{
role: "user",
content: $input
}
]
}')
# Send the request to LocalAI
response=$(curl -s -X POST $API_URL \
-H "Content-Type: application/json" \
-d "$json_payload")
# Extract the summary from the response
summary="$(echo $response | jq -r '.choices[0].message.content')"
# Print the summary
# -H "Authorization: Bearer $API_KEY" \
echo "Summary:"
echo "$summary"
echo "payload sent"
echo "$json_payload"
{
echo 'message<<EOF'
echo "$summary"
echo EOF
} >> "$GITHUB_OUTPUT"
docker logs --tail 10 local-ai
- uses: Eomm/why-don-t-you-tweet@v2
with:
tweet-message: ${{ steps.summarize.outputs.message }}
env:
# Get your tokens from https://developer.twitter.com/apps
TWITTER_CONSUMER_API_KEY: ${{ secrets.TWITTER_APP_KEY }}
TWITTER_CONSUMER_API_SECRET: ${{ secrets.TWITTER_APP_SECRET }}
TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }}
TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
- name: Setup tmate session if fails
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true

View File

@@ -1,65 +0,0 @@
name: Release notifications
on:
release:
types:
- published
jobs:
notify-discord:
runs-on: ubuntu-latest
env:
RELEASE_BODY: ${{ github.event.release.body }}
RELEASE_TITLE: ${{ github.event.release.name }}
RELEASE_TAG_NAME: ${{ github.event.release.tag_name }}
steps:
- name: Start LocalAI
run: |
echo "Starting LocalAI..."
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
- name: Summarize
id: summarize
run: |
input="$RELEASE_TITLE\b$RELEASE_BODY"
# Define the LocalAI API endpoint
API_URL="http://localhost:8080/chat/completions"
# Create a JSON payload using jq to handle special characters
json_payload=$(jq -n --arg input "$input" '{
model: "'$MODEL_NAME'",
messages: [
{
role: "system",
content: "Write a discord message with a bullet point summary of the release notes."
},
{
role: "user",
content: $input
}
]
}')
# Send the request to LocalAI API
response=$(curl -s -X POST $API_URL \
-H "Content-Type: application/json" \
-d "$json_payload")
# Extract the summary from the response
summary=$(echo $response | jq -r '.choices[0].message.content')
# Print the summary
# -H "Authorization: Bearer $API_KEY" \
{
echo 'message<<EOF'
echo "$summary"
echo EOF
} >> "$GITHUB_OUTPUT"
- name: Discord notification
env:
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL_RELEASE }}
DISCORD_USERNAME: "LocalAI-Bot"
DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
uses: Ilshidur/action-discord@master
with:
args: ${{ steps.summarize.outputs.message }}

View File

@@ -31,7 +31,7 @@ jobs:
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache gawk
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
- name: Install CUDA Dependencies
run: |
@@ -40,7 +40,7 @@ jobs:
sudo apt-get update
sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION}
env:
CUDA_VERSION: 12-4
CUDA_VERSION: 12-5
- name: Cache grpc
id: cache-grpc
uses: actions/cache@v4
@@ -77,16 +77,6 @@ jobs:
echo "set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
echo "set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN
GRPC_DIR=$PWD/grpc
# http://google.github.io/googletest/quickstart-cmake.html
# Seems otherwise cross-arch fails to find it
echo "include(FetchContent)" >> $GRPC_DIR/CMakeLists.txt
echo "FetchContent_Declare(" >> $GRPC_DIR/CMakeLists.txt
echo " googletest" >> $GRPC_DIR/CMakeLists.txt
echo " URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip" >> $GRPC_DIR/CMakeLists.txt
echo ")" >> $GRPC_DIR/CMakeLists.txt
echo "FetchContent_MakeAvailable(googletest)" >> $GRPC_DIR/CMakeLists.txt
cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install && \
GRPC_CROSS_BUILD_DIR=$GRPC_DIR/cmake/cross_build && \
mkdir -p $GRPC_CROSS_BUILD_DIR && \
@@ -106,7 +96,6 @@ jobs:
CROSS_TOOLCHAIN=/usr/$GNU_HOST
CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
export PATH=$PATH:$GOPATH/bin
@@ -114,6 +103,7 @@ jobs:
sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6
sudo cp -rf /usr/aarch64-linux-gnu/lib/libstdc++.so* /usr/aarch64-linux-gnu/lib/libstdc++.so.6
sudo cp /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 ld.so
GO_TAGS=p2p \
BACKEND_LIBS="./grpc/cmake/cross_build/third_party/re2/libre2.a ./grpc/cmake/cross_build/libgrpc.a ./grpc/cmake/cross_build/libgrpc++.a ./grpc/cmake/cross_build/third_party/protobuf/libprotobuf.a /usr/aarch64-linux-gnu/lib/libc.so.6 /usr/aarch64-linux-gnu/lib/libstdc++.so.6 /usr/aarch64-linux-gnu/lib/libgomp.so.1 /usr/aarch64-linux-gnu/lib/libm.so.6 /usr/aarch64-linux-gnu/lib/libgcc_s.so.1 /usr/aarch64-linux-gnu/lib/libdl.so.2 /usr/aarch64-linux-gnu/lib/libpthread.so.0 ./ld.so" \
GOOS=linux \
GOARCH=arm64 \
@@ -157,7 +147,7 @@ jobs:
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache gawk cmake
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache cmake
- name: Intel Dependencies
run: |
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
@@ -224,7 +214,8 @@ jobs:
export PATH=/opt/rocm/bin:$PATH
source /opt/intel/oneapi/setvars.sh
sudo cp /lib64/ld-linux-x86-64.so.2 ld.so
BACKEND_LIBS="./ld.so ./sources/go-piper/piper/build/fi/lib/libfmt.a ./sources/go-piper/piper-phonemize/pi/lib/libonnxruntime.so.1.14.1 ./sources/go-piper/piper-phonemize/pi/src/libespeak-ng/libespeak-ng.so /usr/lib/x86_64-linux-gnu/libdl.so.2 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/lib/x86_64-linux-gnu/libpthread.so.0 ./sources/go-piper/piper-phonemize/pi/lib/libpiper_phonemize.so.1 ./sources/go-piper/piper/build/si/lib/libspdlog.a ./sources/go-piper/espeak/ei/lib/libucd.so" \
GO_TAGS=p2p \
BACKEND_LIBS="./ld.so /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/libgomp.so.1" \
make -j4 dist
- uses: actions/upload-artifact@v4
with:
@@ -278,48 +269,6 @@ jobs:
files: |
release/*
build-macOS-x86_64:
runs-on: macos-13
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-go@v5
with:
go-version: '1.21.x'
cache: false
- name: Dependencies
run: |
brew install protobuf grpc
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
- name: Build
id: build
run: |
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
export PATH=$PATH:$GOPATH/bin
make dist
- uses: actions/upload-artifact@v4
with:
name: LocalAI-MacOS-x86_64
path: release/
- name: Release
uses: softprops/action-gh-release@v2
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
release/*
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
build-macOS-arm64:
runs-on: macos-14
steps:
@@ -343,7 +292,7 @@ jobs:
export CPLUS_INCLUDE_PATH=/usr/local/include
export PATH=$PATH:$GOPATH/bin
make dist
BACKEND_LIBS="$(ls /opt/homebrew/opt/grpc/lib/*.dylib /opt/homebrew/opt/re2/lib/*.dylib /opt/homebrew/opt/openssl@3/lib/*.dylib /opt/homebrew/opt/protobuf/lib/*.dylib /opt/homebrew/opt/abseil/lib/*.dylib | xargs)" GO_TAGS=p2p make dist
- uses: actions/upload-artifact@v4
with:
name: LocalAI-MacOS-arm64

View File

@@ -5,6 +5,12 @@ ARG INTEL_BASE_IMAGE=${BASE_IMAGE}
# The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it.
FROM ${BASE_IMAGE} AS requirements-core
# TODO(mudler): install all accellerators here
# and use make dist instead of build.
# TODO(mudler): modify make dist to build also go-piper and stablediffusion
# This way the same binary can work for everything(!)
# TODO(mudler): also make sure that we bundle all the required libs in the backend-assets/lib
# For the GPU-accell we are going to generate a tar file instead that will be extracted by the bash installer, and the libs will also be installed in the final docker image, so no need to pull ALL the dependencies
USER root
@@ -49,10 +55,12 @@ ENV PATH /usr/local/cuda/bin:${PATH}
# HipBLAS requirements
ENV PATH /opt/rocm/bin:${PATH}
# OpenBLAS requirements and stable diffusion
# OpenBLAS requirements and stable diffusion, tts (espeak)
RUN apt-get update && \
apt-get install -y --no-install-recommends \
libopenblas-dev \
espeak-ng \
espeak \
libopencv-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
@@ -77,8 +85,6 @@ ENV PATH="/root/.cargo/bin:${PATH}"
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
RUN apt-get update && \
apt-get install -y --no-install-recommends \
espeak-ng \
espeak \
python3-pip \
python-is-python3 \
python3-dev \
@@ -93,13 +99,12 @@ RUN pip install --user grpcio-tools
###################################
###################################
# The requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here.
# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg
FROM requirements-${IMAGE_TYPE} AS requirements-drivers
# Base image for the build-type.
FROM requirements-${IMAGE_TYPE} AS run-requirements-drivers
ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION=12
ARG CUDA_MINOR_VERSION=4
ARG CUDA_MINOR_VERSION=5
ENV BUILD_TYPE=${BUILD_TYPE}
@@ -186,6 +191,82 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
ldconfig \
; fi
# The build-requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here.
# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg
FROM requirements-${IMAGE_TYPE} AS build-requirements-drivers
ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION=12
ARG CUDA_MINOR_VERSION=5
ENV BUILD_TYPE=${BUILD_TYPE}
# Vulkan requirements
RUN <<EOT bash
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils wget gpg-agent && \
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
apt-get update && \
apt-get install -y \
vulkan-sdk && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
EOT
# CuBLAS requirements
RUN <<EOT bash
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils
if [ "amd64" = "$TARGETARCH" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
fi
if [ "arm64" = "$TARGETARCH" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
fi
dpkg -i cuda-keyring_1.1-1_all.deb && \
rm -f cuda-keyring_1.1-1_all.deb && \
apt-get update && \
apt-get install -y --no-install-recommends \
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
EOT
# clblas
RUN apt-get update && \
apt-get install -y --no-install-recommends \
libclblast-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# intel
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && apt update && apt install -y intel-basekit && apt-get clean && \
rm -rf /var/lib/apt/lists/*
# hipblas
RUN wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null && apt-get update && \
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/6.1.2/ubuntu jammy main" \
| tee /etc/apt/sources.list.d/amdgpu.list && \
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/6.1.2 jammy main" | tee --append /etc/apt/sources.list.d/rocm.list && printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | tee /etc/apt/preferences.d/rocm-pin-600 && \
apt update && \
apt-get install -y --no-install-recommends \
hipblas-dev rocm-dev \
rocblas-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
ldconfig
###################################
###################################
@@ -237,7 +318,7 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
# Adjustments to the build process should likely be made here.
FROM requirements-drivers AS builder
FROM build-requirements-drivers AS builder
ARG GO_TAGS="stablediffusion tts p2p"
ARG GRPC_BACKENDS
@@ -282,9 +363,8 @@ COPY --from=grpc /opt/grpc /usr/local
# Rebuild with defaults backends
WORKDIR /build
## Build the binary
RUN make build
# Need to build tts and stablediffusion separately first (?)
RUN make dist && rm release/*.sha256 && mv release/* local-ai
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
@@ -296,7 +376,7 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
# This is the final target. The result of this target will be the image uploaded to the registry.
# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
FROM requirements-drivers
FROM run-requirements-drivers
ARG FFMPEG
ARG BUILD_TYPE
@@ -341,6 +421,7 @@ RUN make prepare-sources
COPY --from=builder /build/local-ai ./
# Copy shared libraries for piper
# TODO(mudler): bundle these libs in backend-assets/lib/ (like we do for llama.cpp deps)
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
# do not let stablediffusion rebuild (requires an older version of absl)

View File

@@ -3,11 +3,9 @@ GOTEST=$(GOCMD) test
GOVET=$(GOCMD) vet
BINARY_NAME=local-ai
DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=c4dd11d1d3903e1922c06242e189f6310fc4d8c3
CPPLLAMA_VERSION?=9ef07800622e4c371605f9419864d15667c3558f
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -18,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
WHISPER_CPP_VERSION?=1c31f9d4a8936aec550e6c4dc9ca5cae3b4f304a
WHISPER_CPP_VERSION?=b29b3b29240aac8b71ce8e5a4360c1f1562ad66f
# bert.cpp version
BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
@@ -35,11 +33,9 @@ TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
export BUILD_TYPE?=
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
export CMAKE_ARGS?=
export BACKEND_LIBS?=
CGO_LDFLAGS?=
CGO_LDFLAGS_WHISPER?=
CGO_LDFLAGS_WHISPER+=-lggml
CUDA_LIBPATH?=/usr/local/cuda/lib64/
GO_TAGS?=
BUILD_ID?=
@@ -53,8 +49,8 @@ RANDOM := $(shell bash -c 'echo $$RANDOM')
VERSION?=$(shell git describe --always --tags || echo "dev" )
# go tool nm ./local-ai | grep Commit
LD_FLAGS?=
override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Version=$(VERSION)"
override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Version=$(VERSION)"
override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
OPTIONAL_TARGETS?=
@@ -86,25 +82,24 @@ ifeq ($(OS),Darwin)
else ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DGGML_METAL=OFF
export GGML_NO_ACCELERATE=1
export GGML_NO_METAL=1
endif
ifeq ($(BUILD_TYPE),metal)
# -lcblas removed: it seems to always be listed as a duplicate flag.
CGO_LDFLAGS += -framework Accelerate
endif
else
CGO_LDFLAGS_WHISPER+=-lgomp
endif
ifeq ($(BUILD_TYPE),openblas)
CGO_LDFLAGS+=-lopenblas
export GGML_OPENBLAS=1
export WHISPER_OPENBLAS=1
endif
ifeq ($(BUILD_TYPE),cublas)
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
export GGML_CUDA=1
export WHISPER_CUDA=1
CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda -lcufft
endif
@@ -112,14 +107,6 @@ ifeq ($(BUILD_TYPE),vulkan)
CMAKE_ARGS+=-DGGML_VULKAN=1
endif
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
export GGML_SYCL=1
endif
ifeq ($(BUILD_TYPE),sycl_f16)
export GGML_SYCL_F16=1
endif
ifeq ($(BUILD_TYPE),hipblas)
ROCM_HOME ?= /opt/rocm
ROCM_PATH ?= /opt/rocm
@@ -128,7 +115,7 @@ ifeq ($(BUILD_TYPE),hipblas)
export CC=$(ROCM_HOME)/llvm/bin/clang
# llama-ggml has no hipblas support, so override it here.
export STABLE_BUILD_TYPE=
export GGML_HIPBLAS=1
export WHISPER_HIPBLAS=1
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
@@ -138,16 +125,17 @@ endif
ifeq ($(BUILD_TYPE),metal)
CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
export GGML_METAL=1
export WHISPER_METAL=1
endif
ifeq ($(BUILD_TYPE),clblas)
CGO_LDFLAGS+=-lOpenCL -lclblast
export GGML_OPENBLAS=1
export WHISPER_CLBLAST=1
endif
# glibc-static or glibc-devel-static required
ifeq ($(STATIC),true)
LD_FLAGS+=-linkmode external -extldflags -static
LD_FLAGS=-linkmode external -extldflags -static
endif
ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
@@ -181,8 +169,6 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
# Use filter-out to remove the specified backends
ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))
GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
TEST_PATHS?=./api/... ./pkg/... ./core/...
@@ -262,7 +248,7 @@ sources/whisper.cpp:
cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
cd sources/whisper.cpp && $(MAKE) libwhisper.a
get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream
@@ -333,7 +319,7 @@ build: prepare backend-assets grpcs ## Build the project
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
ifneq ($(BACKEND_LIBS),)
$(MAKE) backend-assets/lib
cp -f $(BACKEND_LIBS) backend-assets/lib/
cp $(BACKEND_LIBS) backend-assets/lib/
endif
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
@@ -348,22 +334,17 @@ backend-assets/lib:
dist:
$(MAKE) backend-assets/grpc/llama-cpp-avx2
ifeq ($(DETECT_LIBS),true)
scripts/prepare-libs.sh backend-assets/grpc/llama-cpp-avx2
endif
ifeq ($(OS),Darwin)
$(info ${GREEN}I Skip CUDA/hipblas build on MacOS${RESET})
else
ifneq ($(ARCH),arm64)
$(MAKE) backend-assets/grpc/llama-cpp-cuda
$(MAKE) backend-assets/grpc/llama-cpp-hipblas
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f16
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f32
endif
GO_TAGS="tts p2p" $(MAKE) build
ifeq ($(DETECT_LIBS),true)
scripts/prepare-libs.sh backend-assets/grpc/piper
endif
GO_TAGS="tts p2p" STATIC=true $(MAKE) build
STATIC=true $(MAKE) build
mkdir -p release
# if BUILD_ID is empty, then we don't append it to the binary name
ifeq ($(BUILD_ID),)
@@ -374,7 +355,7 @@ else
shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH).sha256
endif
dist-cross-linux-arm64:
dist-cross-linux-arm64:
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
STATIC=true $(MAKE) build
mkdir -p release
@@ -425,7 +406,7 @@ prepare-e2e:
mkdir -p $(TEST_DIR)
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=4 --build-arg FFMPEG=true -t localai-tests .
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=5 --build-arg FFMPEG=true -t localai-tests .
run-e2e-image:
ls -liah $(abspath ./tests/e2e-fixtures)
@@ -813,7 +794,7 @@ backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libti
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH=$(CURDIR)/sources/whisper.cpp LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
backend-assets/grpc/local-store: backend-assets/grpc
@@ -882,7 +863,7 @@ gen-assets:
$(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets
## Documentation
docs/layouts/_default:
docs/layouts/_default:
mkdir -p docs/layouts/_default
docs/static/gallery.html: docs/layouts/_default
@@ -897,4 +878,4 @@ docs-clean:
.PHONY: docs
docs: docs/static/gallery.html
cd docs && hugo serve
cd docs && hugo serve

View File

@@ -72,7 +72,6 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
- 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
- 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
- 🔥🔥 Decentralized llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/
- 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
@@ -105,7 +104,6 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
- 📈 [Reranker API](https://localai.io/features/reranker/)
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
- 🌍 Integrated WebUI!
## 💻 Usage

View File

@@ -83,11 +83,4 @@ target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_
target_compile_features(${TARGET} PRIVATE cxx_std_11)
if(TARGET BUILD_INFO)
add_dependencies(${TARGET} BUILD_INFO)
endif()
include(FetchContent)
FetchContent_Declare(
googletest
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
)
FetchContent_MakeAvailable(googletest)
endif()

View File

@@ -71,9 +71,9 @@ clean: purge
grpc-server: llama.cpp llama.cpp/examples/grpc-server
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+bash -c "source $(ONEAPI_VARS); \
bash -c "source $(ONEAPI_VARS); \
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"
else
+cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
endif
cp llama.cpp/build/bin/grpc-server .

View File

@@ -148,13 +148,13 @@ function startBackend() {
ensureVenv
if [ ! -z ${BACKEND_FILE} ]; then
exec python ${BACKEND_FILE} $@
python ${BACKEND_FILE} $@
elif [ -e "${MY_DIR}/server.py" ]; then
exec python ${MY_DIR}/server.py $@
python ${MY_DIR}/server.py $@
elif [ -e "${MY_DIR}/backend.py" ]; then
exec python ${MY_DIR}/backend.py $@
python ${MY_DIR}/backend.py $@
elif [ -e "${MY_DIR}/${BACKEND_NAME}.py" ]; then
exec python ${MY_DIR}/${BACKEND_NAME}.py $@
python ${MY_DIR}/${BACKEND_NAME}.py $@
fi
}
@@ -210,4 +210,4 @@ function checkTargets() {
echo false
}
init
init

View File

@@ -9,7 +9,6 @@ var CLI struct {
cliContext.Context `embed:""`
Run RunCMD `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"`
Federated FederatedCLI `cmd:"" help:"Run LocalAI in federated mode"`
Models ModelsCMD `cmd:"" help:"Manage LocalAI models and definitions"`
TTS TTSCMD `cmd:"" help:"Convert text to speech"`
Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"`

View File

@@ -1,130 +0,0 @@
package cli
import (
"context"
"errors"
"fmt"
"io"
"net"
"time"
"math/rand/v2"
cliContext "github.com/mudler/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/edgevpn/pkg/node"
"github.com/mudler/edgevpn/pkg/protocol"
"github.com/mudler/edgevpn/pkg/types"
"github.com/rs/zerolog/log"
)
type FederatedCLI struct {
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
}
func (f *FederatedCLI) Run(ctx *cliContext.Context) error {
n, err := p2p.NewNode(f.Peer2PeerToken)
if err != nil {
return fmt.Errorf("creating a new node: %w", err)
}
err = n.Start(context.Background())
if err != nil {
return fmt.Errorf("creating a new node: %w", err)
}
if err := p2p.ServiceDiscoverer(context.Background(), n, f.Peer2PeerToken, p2p.FederatedID, nil); err != nil {
return err
}
return Proxy(context.Background(), n, f.Address, p2p.FederatedID)
}
func Proxy(ctx context.Context, node *node.Node, listenAddr, service string) error {
log.Info().Msgf("Allocating service '%s' on: %s", service, listenAddr)
// Open local port for listening
l, err := net.Listen("tcp", listenAddr)
if err != nil {
log.Error().Err(err).Msg("Error listening")
return err
}
// ll.Info("Binding local port on", srcaddr)
ledger, _ := node.Ledger()
// Announce ourselves so nodes accepts our connection
ledger.Announce(
ctx,
10*time.Second,
func() {
// Retrieve current ID for ip in the blockchain
//_, found := ledger.GetKey(protocol.UsersLedgerKey, node.Host().ID().String())
// If mismatch, update the blockchain
//if !found {
updatedMap := map[string]interface{}{}
updatedMap[node.Host().ID().String()] = &types.User{
PeerID: node.Host().ID().String(),
Timestamp: time.Now().String(),
}
ledger.Add(protocol.UsersLedgerKey, updatedMap)
// }
},
)
defer l.Close()
for {
select {
case <-ctx.Done():
return errors.New("context canceled")
default:
log.Debug().Msg("New for connection")
// Listen for an incoming connection.
conn, err := l.Accept()
if err != nil {
fmt.Println("Error accepting: ", err.Error())
continue
}
// Handle connections in a new goroutine, forwarding to the p2p service
go func() {
var tunnelAddresses []string
for _, v := range p2p.GetAvailableNodes(p2p.FederatedID) {
if v.IsOnline() {
tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
} else {
log.Info().Msgf("Node %s is offline", v.ID)
}
}
// open a TCP stream to one of the tunnels
// chosen randomly
// TODO: optimize this and track usage
tunnelAddr := tunnelAddresses[rand.IntN(len(tunnelAddresses))]
tunnelConn, err := net.Dial("tcp", tunnelAddr)
if err != nil {
log.Error().Err(err).Msg("Error connecting to tunnel")
return
}
log.Info().Msgf("Redirecting %s to %s", conn.LocalAddr().String(), tunnelConn.RemoteAddr().String())
closer := make(chan struct{}, 2)
go copyStream(closer, tunnelConn, conn)
go copyStream(closer, conn, tunnelConn)
<-closer
tunnelConn.Close()
conn.Close()
// ll.Infof("(service %s) Done handling %s", serviceID, l.Addr().String())
}()
}
}
}
func copyStream(closer chan struct{}, dst io.Writer, src io.Reader) {
defer func() { closer <- struct{}{} }() // connection is closed, send signal to stop proxy
io.Copy(dst, src)
}

View File

@@ -3,8 +3,6 @@ package cli
import (
"context"
"fmt"
"net"
"os"
"strings"
"time"
@@ -52,7 +50,7 @@ type RunCMD struct {
DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"api"`
Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"`
PreloadBackendOnly bool `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"`
@@ -61,7 +59,6 @@ type RunCMD struct {
WatchdogIdleTimeout string `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"`
EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"`
WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
}
func (r *RunCMD) Run(ctx *cliContext.Context) error {
@@ -94,10 +91,9 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
config.WithOpaqueErrors(r.OpaqueErrors),
}
token := ""
if r.Peer2Peer || r.Peer2PeerToken != "" {
log.Info().Msg("P2P mode enabled")
token = r.Peer2PeerToken
token := r.Peer2PeerToken
if token == "" {
// IF no token is provided, and p2p is enabled,
// we generate one and wait for the user to pick up the token (this is for interactive)
@@ -108,46 +104,14 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
log.Info().Msg("To use the token, you can run the following command in another node or terminal:")
fmt.Printf("export TOKEN=\"%s\"\nlocal-ai worker p2p-llama-cpp-rpc\n", token)
}
opts = append(opts, config.WithP2PToken(token))
node, err := p2p.NewNode(token)
if err != nil {
return err
// Ask for user confirmation
log.Info().Msg("Press a button to proceed")
var input string
fmt.Scanln(&input)
}
log.Info().Msg("Starting P2P server discovery...")
if err := p2p.ServiceDiscoverer(context.Background(), node, token, "", func() {
var tunnelAddresses []string
for _, v := range p2p.GetAvailableNodes("") {
if v.IsOnline() {
tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
} else {
log.Info().Msgf("Node %s is offline", v.ID)
}
}
tunnelEnvVar := strings.Join(tunnelAddresses, ",")
os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
}); err != nil {
return err
}
}
if r.Federated {
_, port, err := net.SplitHostPort(r.Address)
if err != nil {
return err
}
if err := p2p.ExposeService(context.Background(), "localhost", port, token, p2p.FederatedID); err != nil {
return err
}
node, err := p2p.NewNode(token)
if err != nil {
return err
}
if err := p2p.ServiceDiscoverer(context.Background(), node, token, p2p.FederatedID, nil); err != nil {
if err := p2p.LLamaCPPRPCServerDiscoverer(context.Background(), token); err != nil {
return err
}
}

View File

@@ -20,7 +20,7 @@ import (
type P2P struct {
WorkerFlags `embed:""`
Token string `env:"LOCALAI_TOKEN,LOCALAI_P2P_TOKEN,TOKEN" help:"P2P token to use"`
Token string `env:"LOCALAI_TOKEN,TOKEN" help:"JSON list of galleries"`
NoRunner bool `env:"LOCALAI_NO_RUNNER,NO_RUNNER" help:"Do not start the llama-cpp-rpc-server"`
RunnerAddress string `env:"LOCALAI_RUNNER_ADDRESS,RUNNER_ADDRESS" help:"Address of the llama-cpp-rpc-server"`
RunnerPort string `env:"LOCALAI_RUNNER_PORT,RUNNER_PORT" help:"Port of the llama-cpp-rpc-server"`
@@ -59,7 +59,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
p = r.RunnerPort
}
err = p2p.ExposeService(context.Background(), address, p, r.Token, "")
err = p2p.BindLLamaCPPWorker(context.Background(), address, p, r.Token)
if err != nil {
return err
}
@@ -99,7 +99,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
}
}()
err = p2p.ExposeService(context.Background(), address, fmt.Sprint(port), r.Token, "")
err = p2p.BindLLamaCPPWorker(context.Background(), address, fmt.Sprint(port), r.Token)
if err != nil {
return err
}

View File

@@ -32,7 +32,6 @@ type ApplicationConfig struct {
CORSAllowOrigins string
ApiKeys []string
OpaqueErrors bool
P2PToken string
ModelLibraryURL string
@@ -96,12 +95,6 @@ func WithCsrf(b bool) AppOption {
}
}
func WithP2PToken(s string) AppOption {
return func(o *ApplicationConfig) {
o.P2PToken = s
}
}
func WithModelLibraryURL(url string) AppOption {
return func(o *ApplicationConfig) {
o.ModelLibraryURL = url

View File

@@ -7,7 +7,6 @@ import (
"github.com/chasefleming/elem-go"
"github.com/chasefleming/elem-go/attrs"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/pkg/xsync"
)
@@ -16,14 +15,6 @@ const (
noImage = "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg"
)
func renderElements(n []elem.Node) string {
render := ""
for _, r := range n {
render += r.Render()
}
return render
}
func DoneProgress(galleryID, text string, showDelete bool) string {
var modelName = galleryID
// Split by @ and grab the name
@@ -81,135 +72,6 @@ func ProgressBar(progress string) string {
).Render()
}
func P2PNodeStats(nodes []p2p.NodeData) string {
/*
<div class="bg-gray-800 p-6 rounded-lg shadow-lg text-left">
<p class="text-xl font-semibold text-gray-200">Total Workers Detected: {{ len .Nodes }}</p>
{{ $online := 0 }}
{{ range .Nodes }}
{{ if .IsOnline }}
{{ $online = add $online 1 }}
{{ end }}
{{ end }}
<p class="text-xl font-semibold text-gray-200">Total Online Workers: {{$online}}</p>
</div>
*/
online := 0
for _, n := range nodes {
if n.IsOnline() {
online++
}
}
class := "text-green-500"
if online == 0 {
class = "text-red-500"
}
/*
<i class="fas fa-circle animate-pulse text-green-500 ml-2 mr-1"></i>
*/
circle := elem.I(attrs.Props{
"class": "fas fa-circle animate-pulse " + class + " ml-2 mr-1",
})
nodesElements := []elem.Node{
elem.Span(
attrs.Props{
"class": class,
},
circle,
elem.Text(fmt.Sprintf("%d", online)),
),
elem.Span(
attrs.Props{
"class": "text-gray-200",
},
elem.Text(fmt.Sprintf("/%d", len(nodes))),
),
}
return renderElements(nodesElements)
}
func P2PNodeBoxes(nodes []p2p.NodeData) string {
/*
<div class="bg-gray-800 p-4 rounded-lg shadow-lg text-left">
<div class="flex items-center mb-2">
<i class="fas fa-desktop text-gray-400 mr-2"></i>
<span class="text-gray-200 font-semibold">{{.ID}}</span>
</div>
<p class="text-sm text-gray-400 mt-2 flex items-center">
Status:
<i class="fas fa-circle {{ if .IsOnline }}text-green-500{{ else }}text-red-500{{ end }} ml-2 mr-1"></i>
<span class="{{ if .IsOnline }}text-green-400{{ else }}text-red-400{{ end }}">
{{ if .IsOnline }}Online{{ else }}Offline{{ end }}
</span>
</p>
</div>
*/
nodesElements := []elem.Node{}
for _, n := range nodes {
nodesElements = append(nodesElements,
elem.Div(
attrs.Props{
"class": "bg-gray-700 p-6 rounded-lg shadow-lg text-left",
},
elem.P(
attrs.Props{
"class": "text-sm text-gray-400 mt-2 flex",
},
elem.I(
attrs.Props{
"class": "fas fa-desktop text-gray-400 mr-2",
},
),
elem.Text("Name: "),
elem.Span(
attrs.Props{
"class": "text-gray-200 font-semibold ml-2 mr-1",
},
elem.Text(n.ID),
),
elem.Text("Status: "),
elem.If(
n.IsOnline(),
elem.I(
attrs.Props{
"class": "fas fa-circle animate-pulse text-green-500 ml-2 mr-1",
},
),
elem.I(
attrs.Props{
"class": "fas fa-circle animate-pulse text-red-500 ml-2 mr-1",
},
),
),
elem.If(
n.IsOnline(),
elem.Span(
attrs.Props{
"class": "text-green-400",
},
elem.Text("Online"),
),
elem.Span(
attrs.Props{
"class": "text-red-400",
},
elem.Text("Offline"),
),
),
),
))
}
return renderElements(nodesElements)
}
func StartProgressBar(uid, progress, text string) string {
if progress == "" {
progress = "0"

View File

@@ -4,7 +4,6 @@ import (
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/internal"
"github.com/mudler/LocalAI/pkg/model"
)
@@ -34,7 +33,6 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
"Models": models,
"ModelsConfig": backendConfigs,
"GalleryConfig": galleryConfigs,
"IsP2PEnabled": p2p.IsP2PEnabled(),
"ApplicationConfig": appConfig,
"ProcessingModels": processingModels,
"TaskTypes": taskTypes,

View File

@@ -123,10 +123,7 @@ func getFileFromRequest(c *fiber.Ctx) (*File, error) {
return nil, fmt.Errorf("unable to find file id %s", id)
}
// GetFilesEndpoint is the OpenAI API endpoint to get files https://platform.openai.com/docs/api-reference/files/retrieve
// @Summary Returns information about a specific file.
// @Success 200 {object} File "Response"
// @Router /v1/files/{file_id} [get]
// GetFilesEndpoint https://platform.openai.com/docs/api-reference/files/retrieve
func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
file, err := getFileFromRequest(c)
@@ -138,17 +135,13 @@ func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Applicat
}
}
type DeleteStatus struct {
Id string
Object string
Deleted bool
}
// DeleteFilesEndpoint is the OpenAI API endpoint to delete files https://platform.openai.com/docs/api-reference/files/delete
// @Summary Delete a file.
// @Success 200 {object} DeleteStatus "Response"
// @Router /v1/files/{file_id} [delete]
// DeleteFilesEndpoint https://platform.openai.com/docs/api-reference/files/delete
func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
type DeleteStatus struct {
Id string
Object string
Deleted bool
}
return func(c *fiber.Ctx) error {
file, err := getFileFromRequest(c)
@@ -181,11 +174,7 @@ func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Appli
}
}
// GetFilesContentsEndpoint is the OpenAI API endpoint to get files content https://platform.openai.com/docs/api-reference/files/retrieve-contents
// @Summary Returns information about a specific file.
// @Success 200 {string} binary "file"
// @Router /v1/files/{file_id}/content [get]
// GetFilesContentsEndpoint
// GetFilesContentsEndpoint https://platform.openai.com/docs/api-reference/files/retrieve-contents
func GetFilesContentsEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
file, err := getFileFromRequest(c)

View File

@@ -6,10 +6,6 @@ import (
"github.com/mudler/LocalAI/core/services"
)
// ListModelsEndpoint is the OpenAI Models API endpoint https://platform.openai.com/docs/api-reference/models
// @Summary List and describe the various models available in the API.
// @Success 200 {object} schema.ModelsDataResponse "Response"
// @Router /v1/models [get]
func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
// If blank, no filter is applied.
@@ -22,7 +18,10 @@ func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) er
if err != nil {
return err
}
return c.JSON(schema.ModelsDataResponse{
return c.JSON(struct {
Object string `json:"object"`
Data []schema.OpenAIModel `json:"data"`
}{
Object: "list",
Data: dataModels,
})

View File

@@ -5,7 +5,6 @@ import (
"github.com/gofiber/swagger"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/endpoints/localai"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/internal"
"github.com/mudler/LocalAI/pkg/model"
@@ -57,20 +56,6 @@ func RegisterLocalAIRoutes(app *fiber.App,
app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitorService))
app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitorService))
// p2p
if p2p.IsP2PEnabled() {
app.Get("/api/p2p", auth, func(c *fiber.Ctx) error {
// Render index
return c.JSON(map[string]interface{}{
"Nodes": p2p.GetAvailableNodes(""),
"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),
})
})
app.Get("/api/p2p/token", auth, func(c *fiber.Ctx) error {
return c.Send([]byte(appConfig.P2PToken))
})
}
app.Get("/version", auth, func(c *fiber.Ctx) error {
return c.JSON(struct {
Version string `json:"version"`

View File

@@ -10,7 +10,6 @@ import (
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/http/elements"
"github.com/mudler/LocalAI/core/http/endpoints/localai"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/internal"
"github.com/mudler/LocalAI/pkg/model"
@@ -54,37 +53,6 @@ func RegisterUIRoutes(app *fiber.App,
app.Get("/", auth, localai.WelcomeEndpoint(appConfig, cl, ml, modelStatus))
if p2p.IsP2PEnabled() {
app.Get("/p2p", auth, func(c *fiber.Ctx) error {
summary := fiber.Map{
"Title": "LocalAI - P2P dashboard",
"Version": internal.PrintableVersion(),
//"Nodes": p2p.GetAvailableNodes(""),
//"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),
"IsP2PEnabled": p2p.IsP2PEnabled(),
"P2PToken": appConfig.P2PToken,
}
// Render index
return c.Render("views/p2p", summary)
})
/* show nodes live! */
app.Get("/p2p/ui/workers", auth, func(c *fiber.Ctx) error {
return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes("")))
})
app.Get("/p2p/ui/workers-federation", auth, func(c *fiber.Ctx) error {
return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.FederatedID)))
})
app.Get("/p2p/ui/workers-stats", auth, func(c *fiber.Ctx) error {
return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes("")))
})
app.Get("/p2p/ui/workers-federation-stats", auth, func(c *fiber.Ctx) error {
return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.FederatedID)))
})
}
// Show the Models page (all models)
app.Get("/browse", auth, func(c *fiber.Ctx) error {
term := c.Query("term")
@@ -119,9 +87,7 @@ func RegisterUIRoutes(app *fiber.App,
"AllTags": tags,
"ProcessingModels": processingModelsData,
"AvailableModels": len(models),
"IsP2PEnabled": p2p.IsP2PEnabled(),
"TaskTypes": taskTypes,
"TaskTypes": taskTypes,
// "ApplicationConfig": appConfig,
}
@@ -277,7 +243,6 @@ func RegisterUIRoutes(app *fiber.App,
"ModelsConfig": backendConfigs,
"Model": c.Params("model"),
"Version": internal.PrintableVersion(),
"IsP2PEnabled": p2p.IsP2PEnabled(),
}
// Render index
@@ -296,7 +261,6 @@ func RegisterUIRoutes(app *fiber.App,
"Title": "LocalAI - Talk",
"ModelsConfig": backendConfigs,
"Model": backendConfigs[0].ID,
"IsP2PEnabled": p2p.IsP2PEnabled(),
"Version": internal.PrintableVersion(),
}
@@ -318,7 +282,6 @@ func RegisterUIRoutes(app *fiber.App,
"ModelsConfig": backendConfigs,
"Model": backendConfigs[0].ID,
"Version": internal.PrintableVersion(),
"IsP2PEnabled": p2p.IsP2PEnabled(),
}
// Render index
@@ -333,7 +296,6 @@ func RegisterUIRoutes(app *fiber.App,
"ModelsConfig": backendConfigs,
"Model": c.Params("model"),
"Version": internal.PrintableVersion(),
"IsP2PEnabled": p2p.IsP2PEnabled(),
}
// Render index
@@ -354,7 +316,6 @@ func RegisterUIRoutes(app *fiber.App,
"ModelsConfig": backendConfigs,
"Model": backendConfigs[0].Name,
"Version": internal.PrintableVersion(),
"IsP2PEnabled": p2p.IsP2PEnabled(),
}
// Render index
@@ -369,7 +330,6 @@ func RegisterUIRoutes(app *fiber.App,
"ModelsConfig": backendConfigs,
"Model": c.Params("model"),
"Version": internal.PrintableVersion(),
"IsP2PEnabled": p2p.IsP2PEnabled(),
}
// Render index
@@ -389,7 +349,6 @@ func RegisterUIRoutes(app *fiber.App,
"Title": "LocalAI - Generate audio with " + backendConfigs[0].Name,
"ModelsConfig": backendConfigs,
"Model": backendConfigs[0].Name,
"IsP2PEnabled": p2p.IsP2PEnabled(),
"Version": internal.PrintableVersion(),
}

View File

File diff suppressed because one or more lines are too long

View File

@@ -81,10 +81,10 @@ ul {
li {
font-size: 0.875rem; /* Small text size */
color: #4a5568; /* Dark gray text */
/* background-color: #f7fafc; Very light gray background */
background-color: #f7fafc; /* Very light gray background */
border-radius: 0.375rem; /* Rounded corners */
padding: 0.5rem; /* Padding inside each list item */
/*box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1), 0 1px 2px 0 rgba(0, 0, 0, 0.06); Subtle shadow */
box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1), 0 1px 2px 0 rgba(0, 0, 0, 0.06); /* Subtle shadow */
margin-bottom: 0.5rem; /* Vertical space between list items */
}

View File

@@ -37,7 +37,7 @@ SOFTWARE.
<body class="bg-gray-900 text-gray-200" x-data="{ key: $store.chat.key }">
<div class="flex flex-col min-h-screen">
{{template "views/partials/navbar" .}}
{{template "views/partials/navbar"}}
<div class="chat-container mt-2 mr-2 ml-2 mb-2 bg-gray-800 shadow-lg rounded-lg" >
<!-- Chat Header -->
<div class="border-b border-gray-700 p-4" x-data="{ component: 'menu' }">

View File

@@ -1,150 +0,0 @@
<!DOCTYPE html>
<html lang="en">
{{template "views/partials/head" .}}
<body class="bg-gray-900 text-gray-200">
<div class="flex flex-col min-h-screen">
{{template "views/partials/navbar" .}}
<div class="container mx-auto px-4 flex-grow">
<div class="workers mt-12 text-center">
<h2 class="text-3xl font-semibold text-gray-100 mb-8">
<i class="fa-solid fa-circle-nodes"></i> Distributed inference with P2P
<a href="https://localai.io/features/distribute/" target="_blank">
<i class="fas fa-circle-info pr-2"></i>
</a>
</h2>
<h5 class="mb-4 text-justify">LocalAI uses P2P technologies to enable distribution of work between peers. It is possible to share an instance with Federation and/or split the weights of a model across peers (only available with llama.cpp models). You can now share computational resources between your devices or your friends!</h5>
<!-- Federation Box -->
<div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left">
<p class="text-xl font-semibold text-gray-200"> <i class="text-gray-200 fa-solid fa-circle-nodes"></i> Federated Nodes: <span hx-get="/p2p/ui/workers-federation-stats" hx-trigger="every 1s"></span> </p>
<p class="mb-4">You can start LocalAI in federated mode to share your instance, or start the federated server to balance requests between nodes of the federation.</p>
<div class="grid grid-cols-1 sm:grid-cols-2 md:grid-cols-3 gap-4 mb-12">
<div hx-get="/p2p/ui/workers-federation" hx-trigger="every 1s"></div>
</div>
<hr class="border-gray-700 mb-12">
<h3 class="text-2xl font-semibold text-gray-100 mb-6"><i class="fa-solid fa-book"></i> Start a federated instance</h3>
<!-- Tabs navigation -->
<ul class="mb-5 flex list-none flex-row flex-wrap ps-0" role="tablist" data-twe-nav-ref>
<li role="presentation" class="flex-auto text-center">
<a href="#tabs-federated-cli" class="tablink my-2 block border-0 bg-gray-800 px-7 pb-3.5 pt-4 text-xs font-medium uppercase leading-tight text-white hover:bg-gray-700 focus:bg-gray-700 data-[twe-nav-active]:border-yellow-500 data-[twe-nav-active]:text-yellow-500 data-[twe-nav-active]:bg-gray-700 active" data-twe-toggle="pill" data-twe-target="#tabs-federated-cli" data-twe-nav-active role="tab" aria-controls="tabs-federated-cli" aria-selected="true"><i class="fa-solid fa-terminal"></i> CLI</a>
</li>
<li role="presentation" class="flex-auto text-center">
<a href="#tabs-federated-docker" class="tablink my-2 block border-0 bg-gray-800 px-7 pb-3.5 pt-4 text-xs font-medium uppercase leading-tight text-white hover:bg-gray-700 focus:bg-gray-700 data-[twe-nav-active]:border-yellow-500 data-[twe-nav-active]:text-yellow-500 data-[twe-nav-active]:bg-gray-700" data-twe-toggle="pill" data-twe-target="#tabs-federated-docker" role="tab" aria-controls="tabs-federated-docker" aria-selected="false"><i class="fa-solid fa-box-open"></i> Container images</a>
</li>
</ul>
<!-- Tabs content -->
<div class="mb-6">
<div class="tabcontent hidden opacity-100 transition-opacity duration-150 ease-linear data-[twe-tab-active]:block p-4" id="tabs-federated-cli" role="tabpanel" aria-labelledby="tabs-federated-cli" data-twe-tab-active>
<p class="mb-2">To start a new instance to share:</p>
<code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
# Start a new instance to share with --federated and a TOKEN<br>
export TOKEN="<span class="token">{{.P2PToken}}</span>"<br>
local-ai run --federated --p2p
</code>
<p class="mt-2">Note: If you don't have a token do not specify it and use the generated one that you can find in this page.</p>
<p class="mb-2">To start a new federated load balancer:</p>
<code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
export TOKEN="<span class="token">{{.P2PToken}}</span>"<br>
local-ai federated
</code>
<p class="mt-2">Note: Token is needed when starting the federated server.</p>
<p class="mt-2">For all the options available, please refer to the <a href="https://localai.io/features/distribute/#starting-workers" target="_blank" class="text-yellow-300 hover:text-yellow-400">documentation</a>.</p>
</div>
<div class="tabcontent hidden opacity-0 transition-opacity duration-150 ease-linear data-[twe-tab-active]:block p-4" id="tabs-federated-docker" role="tabpanel" aria-labelledby="tabs-federated-docker">
<p class="mb-2">To start a new federated instance:</p>
<code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
docker run -ti --net host -e TOKEN="<span class="token">{{.P2PToken}}</span>" --name local-ai -p 8080:8080 localai/localai:latest-cpu run --federated --p2p
</code>
<p class="mb-2">To start a new federated server (port to 9090):</p>
<code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
docker run -ti --net host -e TOKEN="<span class="token">{{.P2PToken}}</span>" --name local-ai -p 9090:8080 localai/localai:latest-cpu federated
</code>
<p class="mt-2">For all the options available and see what image to use, please refer to the <a href="https://localai.io/basics/container/" target="_blank" class="text-yellow-300 hover:text-yellow-400">Container images documentation</a> and <a href="https://localai.io/advanced/#cli-parameters" target="_blank" class="text-yellow-300 hover:text-yellow-400">CLI parameters documentation</a>.</p>
</div>
</div>
</div>
<!-- Llama.cpp Box -->
<div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left">
<p class="text-xl font-semibold text-gray-200"> <i class="text-gray-200 fa-solid fa-circle-nodes"></i> Workers (llama.cpp): <span hx-get="/p2p/ui/workers-stats" hx-trigger="every 1s"></span> </p>
<p class="mb-4">You can start llama.cpp workers to distribute weights between the workers and offload part of the computation. To start a new worker, you can use the CLI or Docker.</p>
<div class="grid grid-cols-1 sm:grid-cols-2 md:grid-cols-3 gap-4 mb-12">
<div hx-get="/p2p/ui/workers" hx-trigger="every 1s"></div>
</div>
<hr class="border-gray-700 mb-12">
<h3 class="text-2xl font-semibold text-gray-100 mb-6"><i class="fa-solid fa-book"></i> Start a new llama.cpp P2P worker</h3>
<!-- Tabs navigation -->
<ul class="mb-5 flex list-none flex-row flex-wrap ps-0" role="tablist" data-twe-nav-ref>
<li role="presentation" class="flex-auto text-center">
<a href="#tabs-cli" class="tablink my-2 block border-0 bg-gray-800 px-7 pb-3.5 pt-4 text-xs font-medium uppercase leading-tight text-white hover:bg-gray-700 focus:bg-gray-700 data-[twe-nav-active]:border-yellow-500 data-[twe-nav-active]:text-yellow-500 data-[twe-nav-active]:bg-gray-700 active" data-twe-toggle="pill" data-twe-target="#tabs-cli" data-twe-nav-active role="tab" aria-controls="tabs-cli" aria-selected="true"><i class="fa-solid fa-terminal"></i> CLI</a>
</li>
<li role="presentation" class="flex-auto text-center">
<a href="#tabs-docker" class="tablink my-2 block border-0 bg-gray-800 px-7 pb-3.5 pt-4 text-xs font-medium uppercase leading-tight text-white hover:bg-gray-700 focus:bg-gray-700 data-[twe-nav-active]:border-yellow-500 data-[twe-nav-active]:text-yellow-500 data-[twe-nav-active]:bg-gray-700" data-twe-toggle="pill" data-twe-target="#tabs-docker" role="tab" aria-controls="tabs-docker" aria-selected="false"><i class="fa-solid fa-box-open"></i> Container images</a>
</li>
</ul>
<!-- Tabs content -->
<div class="mb-6">
<div class="tabcontent hidden opacity-100 transition-opacity duration-150 ease-linear data-[twe-tab-active]:block p-4" id="tabs-cli" role="tabpanel" aria-labelledby="tabs-cli" data-twe-tab-active>
<p class="mb-2">To start a new worker, run the following command:</p>
<code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
export TOKEN="<span class="token">{{.P2PToken}}</span>"<br>
local-ai worker p2p-llama-cpp-rpc
</code>
<p class="mt-2">For all the options available, please refer to the <a href="https://localai.io/features/distribute/#starting-workers" target="_blank" class="text-yellow-300 hover:text-yellow-400">documentation</a>.</p>
</div>
<div class="tabcontent hidden opacity-0 transition-opacity duration-150 ease-linear data-[twe-tab-active]:block p-4" id="tabs-docker" role="tabpanel" aria-labelledby="tabs-docker">
<p class="mb-2">To start a new worker with docker, run the following command:</p>
<code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
docker run -ti --net host -e TOKEN="<span class="token">{{.P2PToken}}</span>" --name local-ai -p 8080:8080 localai/localai:latest-cpu worker p2p-llama-cpp-rpc
</code>
<p class="mt-2">For all the options available and see what image to use, please refer to the <a href="https://localai.io/basics/container/" target="_blank" class="text-yellow-300 hover:text-yellow-400">Container images documentation</a> and <a href="https://localai.io/advanced/#cli-parameters" target="_blank" class="text-yellow-300 hover:text-yellow-400">CLI parameters documentation</a>.</p>
</div>
</div>
</div>
<!-- Llama.cpp Box END -->
</div>
</div>
{{template "views/partials/footer" .}}
</div>
<style>
.token {
word-break: break-all;
}
.workers .grid div {
display: flex;
flex-direction: column;
justify-content: space-between;
}
</style>
</body>
</html>

View File

@@ -1,5 +1,4 @@
<footer class="text-center py-8">
LocalAI Version {{.Version}}<br>
<a href='https://localai.io' class="text-blue-400 hover:text-blue-600" target="_blank">LocalAI</a> © 2023-2024 <a href='https://mudler.pm' class="text-blue-400 hover:text-blue-600" target="_blank">Ettore Di Giacinto</a>
</footer>
<script src="/static/assets/tw-elements.js"></script>
</footer>

View File

@@ -21,9 +21,6 @@
<a href="/text2image/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-image pr-2"></i> Generate images</a>
<a href="/tts/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-music pr-2"></i> TTS </a>
<a href="/talk/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
{{ if .IsP2PEnabled }}
<a href="/p2p/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-circle-nodes"></i> Swarm </a>
{{ end }}
<a href="/swagger/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-code pr-2"></i> API</a>
</div>
</div>
@@ -37,9 +34,6 @@
<a href="/text2image/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-image pr-2"></i> Generate images</a>
<a href="/tts/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-music pr-2"></i> TTS </a>
<a href="/talk/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
{{ if .IsP2PEnabled }}
<a href="/p2p/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-circle-nodes"></i> Swarm </a>
{{ end }}
<a href="/swagger/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-code pr-2"></i> API</a>
</div>
</div>

View File

@@ -10,7 +10,7 @@
<body class="bg-gray-900 text-gray-200" x-data="{ key: $store.chat.key }">
<div class="flex flex-col min-h-screen">
{{template "views/partials/navbar" .}}
{{template "views/partials/navbar"}}
<div class="chat-container mt-2 mr-2 ml-2 mb-2 bg-gray-800 shadow-lg rounded-lg " >
<!-- Chat Header -->
<div class="border-b border-gray-700 p-4" x-data="{ component: 'menu' }">

View File

@@ -1,50 +0,0 @@
package p2p
import (
"sync"
"time"
)
const defaultServicesID = "services_localai"
const FederatedID = "federated"
type NodeData struct {
Name string
ID string
TunnelAddress string
LastSeen time.Time
}
func (d NodeData) IsOnline() bool {
now := time.Now()
// if the node was seen in the last 40 seconds, it's online
return now.Sub(d.LastSeen) < 40*time.Second
}
var mu sync.Mutex
var nodes = map[string]map[string]NodeData{}
func GetAvailableNodes(serviceID string) []NodeData {
if serviceID == "" {
serviceID = defaultServicesID
}
mu.Lock()
defer mu.Unlock()
var availableNodes = []NodeData{}
for _, v := range nodes[serviceID] {
availableNodes = append(availableNodes, v)
}
return availableNodes
}
func AddNode(serviceID string, node NodeData) {
if serviceID == "" {
serviceID = defaultServicesID
}
mu.Lock()
defer mu.Unlock()
if nodes[serviceID] == nil {
nodes[serviceID] = map[string]NodeData{}
}
nodes[serviceID][node.ID] = node
}

View File

@@ -10,18 +10,19 @@ import (
"io"
"net"
"os"
"sync"
"strings"
"time"
"github.com/ipfs/go-log"
"github.com/libp2p/go-libp2p/core/peer"
"github.com/mudler/LocalAI/pkg/utils"
"github.com/mudler/edgevpn/pkg/config"
"github.com/mudler/edgevpn/pkg/node"
"github.com/mudler/edgevpn/pkg/protocol"
"github.com/mudler/edgevpn/pkg/services"
"github.com/mudler/edgevpn/pkg/types"
"github.com/phayes/freeport"
"github.com/ipfs/go-log"
"github.com/mudler/edgevpn/pkg/config"
"github.com/mudler/edgevpn/pkg/services"
zlog "github.com/rs/zerolog/log"
"github.com/mudler/edgevpn/pkg/logger"
@@ -33,15 +34,6 @@ func GenerateToken() string {
return newData.Base64()
}
func IsP2PEnabled() bool {
return true
}
func nodeID(s string) string {
hostname, _ := os.Hostname()
return fmt.Sprintf("%s-%s", hostname, s)
}
func allocateLocalService(ctx context.Context, node *node.Node, listenAddr, service string) error {
zlog.Info().Msgf("Allocating service '%s' on: %s", service, listenAddr)
@@ -61,16 +53,16 @@ func allocateLocalService(ctx context.Context, node *node.Node, listenAddr, serv
10*time.Second,
func() {
// Retrieve current ID for ip in the blockchain
//_, found := ledger.GetKey(protocol.UsersLedgerKey, node.Host().ID().String())
_, found := ledger.GetKey(protocol.UsersLedgerKey, node.Host().ID().String())
// If mismatch, update the blockchain
//if !found {
updatedMap := map[string]interface{}{}
updatedMap[node.Host().ID().String()] = &types.User{
PeerID: node.Host().ID().String(),
Timestamp: time.Now().String(),
if !found {
updatedMap := map[string]interface{}{}
updatedMap[node.Host().ID().String()] = &types.User{
PeerID: node.Host().ID().String(),
Timestamp: time.Now().String(),
}
ledger.Add(protocol.UsersLedgerKey, updatedMap)
}
ledger.Add(protocol.UsersLedgerKey, updatedMap)
// }
},
)
@@ -88,6 +80,7 @@ func allocateLocalService(ctx context.Context, node *node.Node, listenAddr, serv
continue
}
// ll.Info("New connection from", l.Addr().String())
// Handle connections in a new goroutine, forwarding to the p2p service
go func() {
// Retrieve current ID for ip in the blockchain
@@ -144,30 +137,24 @@ func copyStream(closer chan struct{}, dst io.Writer, src io.Reader) {
// This is the main of the server (which keeps the env variable updated)
// This starts a goroutine that keeps LLAMACPP_GRPC_SERVERS updated with the discovered services
func ServiceDiscoverer(ctx context.Context, n *node.Node, token, servicesID string, discoveryFunc func()) error {
if servicesID == "" {
servicesID = defaultServicesID
}
tunnels, err := discoveryTunnels(ctx, n, token, servicesID)
func LLamaCPPRPCServerDiscoverer(ctx context.Context, token string) error {
tunnels, err := discoveryTunnels(ctx, token)
if err != nil {
return err
}
// TODO: discoveryTunnels should return all the nodes that are available?
// In this way we updated availableNodes here instead of appending
// e.g. we have a LastSeen field in NodeData that is updated in discoveryTunnels
// each time the node is seen
// In this case the below function should be idempotent and just keep track of the nodes
go func() {
totalTunnels := []string{}
for {
select {
case <-ctx.Done():
zlog.Error().Msg("Discoverer stopped")
return
case tunnel := <-tunnels:
AddNode(servicesID, tunnel)
if discoveryFunc != nil {
discoveryFunc()
}
totalTunnels = append(totalTunnels, tunnel)
os.Setenv("LLAMACPP_GRPC_SERVERS", strings.Join(totalTunnels, ","))
zlog.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", strings.Join(totalTunnels, ","))
}
}
}()
@@ -175,10 +162,19 @@ func ServiceDiscoverer(ctx context.Context, n *node.Node, token, servicesID stri
return nil
}
func discoveryTunnels(ctx context.Context, n *node.Node, token, servicesID string) (chan NodeData, error) {
tunnels := make(chan NodeData)
func discoveryTunnels(ctx context.Context, token string) (chan string, error) {
tunnels := make(chan string)
err := n.Start(ctx)
nodeOpts, err := newNodeOpts(token)
if err != nil {
return nil, err
}
n, err := node.New(nodeOpts...)
if err != nil {
return nil, fmt.Errorf("creating a new node: %w", err)
}
err = n.Start(ctx)
if err != nil {
return nil, fmt.Errorf("creating a new node: %w", err)
}
@@ -188,14 +184,8 @@ func discoveryTunnels(ctx context.Context, n *node.Node, token, servicesID strin
}
// get new services, allocate and return to the channel
// TODO:
// a function ensureServices that:
// - starts a service if not started, if the worker is Online
// - checks that workers are Online, if not cancel the context of allocateLocalService
// - discoveryTunnels should return all the nodes and addresses associated with it
// - the caller should take now care of the fact that we are always returning fresh informations
go func() {
emitted := map[string]bool{}
for {
select {
case <-ctx.Done():
@@ -205,20 +195,20 @@ func discoveryTunnels(ctx context.Context, n *node.Node, token, servicesID strin
time.Sleep(5 * time.Second)
zlog.Debug().Msg("Searching for workers")
data := ledger.LastBlock().Storage[servicesID]
for k, v := range data {
data := ledger.LastBlock().Storage["services_localai"]
for k := range data {
zlog.Info().Msgf("Found worker %s", k)
nd := &NodeData{}
if err := v.Unmarshal(nd); err != nil {
zlog.Error().Msg("cannot unmarshal node data")
continue
if _, found := emitted[k]; !found {
emitted[k] = true
//discoveredPeers <- k
port, err := freeport.GetFreePort()
if err != nil {
fmt.Print(err)
}
tunnelAddress := fmt.Sprintf("127.0.0.1:%d", port)
go allocateLocalService(ctx, n, tunnelAddress, k)
tunnels <- tunnelAddress
}
ensureService(ctx, n, nd, k)
muservice.Lock()
if _, ok := service[nd.Name]; ok {
tunnels <- service[nd.Name].NodeData
}
muservice.Unlock()
}
}
}
@@ -227,60 +217,8 @@ func discoveryTunnels(ctx context.Context, n *node.Node, token, servicesID strin
return tunnels, err
}
type nodeServiceData struct {
NodeData NodeData
CancelFunc context.CancelFunc
}
var service = map[string]nodeServiceData{}
var muservice sync.Mutex
func ensureService(ctx context.Context, n *node.Node, nd *NodeData, sserv string) {
muservice.Lock()
defer muservice.Unlock()
if ndService, found := service[nd.Name]; !found {
if !nd.IsOnline() {
// if node is offline and not present, do nothing
return
}
newCtxm, cancel := context.WithCancel(ctx)
// Start the service
port, err := freeport.GetFreePort()
if err != nil {
fmt.Print(err)
}
tunnelAddress := fmt.Sprintf("127.0.0.1:%d", port)
nd.TunnelAddress = tunnelAddress
service[nd.Name] = nodeServiceData{
NodeData: *nd,
CancelFunc: cancel,
}
go allocateLocalService(newCtxm, n, tunnelAddress, sserv)
zlog.Debug().Msgf("Starting service %s on %s", sserv, tunnelAddress)
} else {
// Check if the service is still alive
// if not cancel the context
if !nd.IsOnline() && !ndService.NodeData.IsOnline() {
ndService.CancelFunc()
delete(service, nd.Name)
zlog.Info().Msgf("Node %s is offline, deleting", nd.ID)
} else if nd.IsOnline() {
// update last seen inside service
nd.TunnelAddress = ndService.NodeData.TunnelAddress
service[nd.Name] = nodeServiceData{
NodeData: *nd,
CancelFunc: ndService.CancelFunc,
}
zlog.Debug().Msgf("Node %s is still online", nd.ID)
}
}
}
// This is the P2P worker main
func ExposeService(ctx context.Context, host, port, token, servicesID string) error {
if servicesID == "" {
servicesID = defaultServicesID
}
func BindLLamaCPPWorker(ctx context.Context, host, port, token string) error {
llger := logger.New(log.LevelFatal)
nodeOpts, err := newNodeOpts(token)
@@ -310,53 +248,31 @@ func ExposeService(ctx context.Context, host, port, token, servicesID string) er
ledger.Announce(
ctx,
20*time.Second,
10*time.Second,
func() {
// Retrieve current ID for ip in the blockchain
//_, found := ledger.GetKey("services_localai", name)
_, found := ledger.GetKey("services_localai", name)
// If mismatch, update the blockchain
//if !found {
updatedMap := map[string]interface{}{}
updatedMap[name] = &NodeData{
Name: name,
LastSeen: time.Now(),
ID: nodeID(name),
if !found {
updatedMap := map[string]interface{}{}
updatedMap[name] = "p2p"
ledger.Add("services_localai", updatedMap)
}
ledger.Add(servicesID, updatedMap)
// }
},
)
return err
}
func NewNode(token string) (*node.Node, error) {
nodeOpts, err := newNodeOpts(token)
if err != nil {
return nil, err
}
n, err := node.New(nodeOpts...)
if err != nil {
return nil, fmt.Errorf("creating a new node: %w", err)
}
return n, nil
}
func newNodeOpts(token string) ([]node.Option, error) {
llger := logger.New(log.LevelFatal)
defaultInterval := 10 * time.Second
// TODO: move this up, expose more config options when creating a node
noDHT := os.Getenv("LOCALAI_P2P_DISABLE_DHT") == "true"
noLimits := os.Getenv("LOCALAI_P2P_DISABLE_LIMITS") == "true"
loglevel := "info"
c := config.Config{
Limit: config.ResourceLimit{
Enable: !noLimits,
Enable: true,
MaxConns: 100,
},
NetworkToken: token,
@@ -376,7 +292,7 @@ func newNodeOpts(token string) ([]node.Option, error) {
RateLimitInterval: defaultInterval,
},
Discovery: config.Discovery{
DHT: noDHT,
DHT: true,
MDNS: true,
Interval: 30 * time.Second,
},

View File

@@ -6,26 +6,16 @@ package p2p
import (
"context"
"fmt"
"github.com/mudler/edgevpn/pkg/node"
)
func GenerateToken() string {
return "not implemented"
}
func ServiceDiscoverer(ctx context.Context, node *node.Node, token, servicesID string, fn func()) error {
func LLamaCPPRPCServerDiscoverer(ctx context.Context, token string) error {
return fmt.Errorf("not implemented")
}
func ExposeService(ctx context.Context, host, port, token, servicesID string) error {
func BindLLamaCPPWorker(ctx context.Context, host, port, token string) error {
return fmt.Errorf("not implemented")
}
func IsP2PEnabled() bool {
return false
}
func NewNode(token string) (*node.Node, error) {
return nil, fmt.Errorf("not implemented")
}

View File

@@ -155,8 +155,3 @@ type OpenAIRequest struct {
// AutoGPTQ
ModelBaseName string `json:"model_base_name" yaml:"model_base_name"`
}
type ModelsDataResponse struct {
Object string `json:"object"`
Data []OpenAIModel `json:"data"`
}

View File

@@ -98,14 +98,3 @@ The server logs should indicate that new workers are being discovered.
- If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
- Only a single model is supported currently.
- Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
## Environment Variables
There are options that can be tweaked or parameters that can be set using environment variables
| Environment Variable | Description |
|----------------------|-------------|
| **LOCALAI_P2P_DISABLE_DHT** | Set to "true" to disable DHT and enable p2p layer to be local only (mDNS) |
| **LOCALAI_P2P_DISABLE_LIMITS** | Set to "true" to disable connection limits and resources management |
| **LOCALAI_P2P_TOKEN** | Set the token for the p2p network |

View File

@@ -1,3 +1,3 @@
{
"version": "v2.18.1"
"version": "v2.18.0"
}

View File

@@ -76,8 +76,6 @@ DOCKER_INSTALL=${DOCKER_INSTALL:-$docker_found}
USE_AIO=${USE_AIO:-false}
API_KEY=${API_KEY:-}
CORE_IMAGES=${CORE_IMAGES:-false}
P2P_TOKEN=${P2P_TOKEN:-}
WORKER=${WORKER:-false}
# nprocs -1
if available nproc; then
procs=$(nproc)
@@ -134,14 +132,7 @@ configure_systemd() {
info "Adding current user to local-ai group..."
$SUDO usermod -a -G local-ai $(whoami)
STARTCOMMAND="run"
if [ "$WORKER" = true ]; then
if [ -n "$P2P_TOKEN" ]; then
STARTCOMMAND="worker p2p-llama-cpp-rpc"
else
STARTCOMMAND="worker llama-cpp-rpc"
fi
fi
info "Creating local-ai systemd service..."
cat <<EOF | $SUDO tee /etc/systemd/system/local-ai.service >/dev/null
[Unit]
@@ -149,7 +140,7 @@ Description=LocalAI Service
After=network-online.target
[Service]
ExecStart=$BINDIR/local-ai $STARTCOMMAND
ExecStart=$BINDIR/local-ai run
User=local-ai
Group=local-ai
Restart=always
@@ -168,11 +159,6 @@ EOF
$SUDO echo "THREADS=$THREADS" | $SUDO tee -a /etc/localai.env >/dev/null
$SUDO echo "MODELS_PATH=$MODELS_PATH" | $SUDO tee -a /etc/localai.env >/dev/null
if [ -n "$P2P_TOKEN" ]; then
$SUDO echo "LOCALAI_P2P_TOKEN=$P2P_TOKEN" | $SUDO tee -a /etc/localai.env >/dev/null
$SUDO echo "LOCALAI_P2P=true" | $SUDO tee -a /etc/localai.env >/dev/null
fi
SYSTEMCTL_RUNNING="$(systemctl is-system-running || true)"
case $SYSTEMCTL_RUNNING in
running|degraded)
@@ -421,19 +407,6 @@ install_docker() {
# exit 0
fi
STARTCOMMAND="run"
if [ "$WORKER" = true ]; then
if [ -n "$P2P_TOKEN" ]; then
STARTCOMMAND="worker p2p-llama-cpp-rpc"
else
STARTCOMMAND="worker llama-cpp-rpc"
fi
fi
envs=""
if [ -n "$P2P_TOKEN" ]; then
envs="-e LOCALAI_P2P_TOKEN=$P2P_TOKEN -e LOCALAI_P2P=true"
fi
IMAGE_TAG=
if [ "$HAS_CUDA" ]; then
IMAGE_TAG=${VERSION}-cublas-cuda12-ffmpeg
@@ -457,8 +430,7 @@ install_docker() {
--restart=always \
-e API_KEY=$API_KEY \
-e THREADS=$THREADS \
$envs \
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG
elif [ "$HAS_AMD" ]; then
IMAGE_TAG=${VERSION}-hipblas-ffmpeg
# CORE
@@ -476,8 +448,7 @@ install_docker() {
--restart=always \
-e API_KEY=$API_KEY \
-e THREADS=$THREADS \
$envs \
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG
elif [ "$HAS_INTEL" ]; then
IMAGE_TAG=${VERSION}-sycl-f32-ffmpeg
# CORE
@@ -494,8 +465,7 @@ install_docker() {
--restart=always \
-e API_KEY=$API_KEY \
-e THREADS=$THREADS \
$envs \
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG
else
IMAGE_TAG=${VERSION}-ffmpeg
# CORE
@@ -511,8 +481,7 @@ install_docker() {
-e MODELS_PATH=/models \
-e API_KEY=$API_KEY \
-e THREADS=$THREADS \
$envs \
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG
fi
install_success

View File

@@ -16,9 +16,6 @@
- filename: "tw-elements.css"
url: "https://cdn.jsdelivr.net/npm/tw-elements/css/tw-elements.min.css"
sha: "72746af5326d6eb3647f504efa81b5e0f50ed486f37cc8262a4169781ad310d3"
- filename: "tw-elements.js"
url: "https://cdn.jsdelivr.net/npm/tw-elements/js/tw-elements.umd.min.js"
sha: "2985706362e92360b65c8697cc32490bb9c0a5df9cd9b7251a97c1c5a661a40a"
- filename: "tailwindcss.js"
url: "https://cdn.tailwindcss.com/3.3.0"
sha: "dbff048aa4581e6eae7f1cb2c641f72655ea833b3bb82923c4a59822e11ca594"

View File

@@ -1,83 +0,0 @@
name: Use LocalAI in GHA
on:
pull_request:
types:
- closed
jobs:
notify-discord:
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
env:
MODEL_NAME: hermes-2-theta-llama-3-8b
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # needed to checkout all branches for this Action to work
# Starts the LocalAI container
- name: Start LocalAI
run: |
echo "Starting LocalAI..."
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
# Check the PR diff using the current branch and the base branch of the PR
- uses: GrantBirki/git-diff-action@v2.7.0
id: git-diff-action
with:
json_diff_file_output: diff.json
raw_diff_file_output: diff.txt
file_output_only: "true"
# Ask to explain the diff to LocalAI
- name: Summarize
env:
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
id: summarize
run: |
input="$(cat $DIFF)"
# Define the LocalAI API endpoint
API_URL="http://localhost:8080/chat/completions"
# Create a JSON payload using jq to handle special characters
json_payload=$(jq -n --arg input "$input" '{
model: "'$MODEL_NAME'",
messages: [
{
role: "system",
content: "Write a message summarizing the change diffs"
},
{
role: "user",
content: $input
}
]
}')
# Send the request to LocalAI
response=$(curl -s -X POST $API_URL \
-H "Content-Type: application/json" \
-d "$json_payload")
# Extract the summary from the response
summary="$(echo $response | jq -r '.choices[0].message.content')"
# Print the summary
# -H "Authorization: Bearer $API_KEY" \
echo "Summary:"
echo "$summary"
echo "payload sent"
echo "$json_payload"
{
echo 'message<<EOF'
echo "$summary"
echo EOF
} >> "$GITHUB_OUTPUT"
# Send the summary somewhere (e.g. Discord)
- name: Discord notification
env:
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL }}
DISCORD_USERNAME: "discord-bot"
DISCORD_AVATAR: ""
uses: Ilshidur/action-discord@master
with:
args: ${{ steps.summarize.outputs.message }}

View File

@@ -127,56 +127,6 @@
- filename: Arcee-Spark-Q4_K_M.gguf
sha256: 44123276d7845dc13f73ca4aa431dc4c931104eb7d2186f2a73d076fa0ee2330
uri: huggingface://arcee-ai/Arcee-Spark-GGUF/Arcee-Spark-Q4_K_M.gguf
- !!merge <<: *qwen2
name: "hercules-5.0-qwen2-7b"
description: |
Locutusque/Hercules-5.0-Qwen2-7B is a fine-tuned language model derived from Qwen2-7B. It is specifically designed to excel in instruction following, function calls, and conversational interactions across various scientific and technical domains. This fine-tuning has hercules-v5.0 with enhanced abilities in:
Complex Instruction Following: Understanding and accurately executing multi-step instructions, even those involving specialized terminology.
Function Calling: Seamlessly interpreting and executing function calls, providing appropriate input and output values.
Domain-Specific Knowledge: Engaging in informative and educational conversations about Biology, Chemistry, Physics, Mathematics, Medicine, Computer Science, and more.
urls:
- https://huggingface.co/Locutusque/Hercules-5.0-Qwen2-7B
- https://huggingface.co/bartowski/Hercules-5.0-Qwen2-7B-GGUF
overrides:
parameters:
model: Hercules-5.0-Qwen2-7B-Q4_K_M.gguf
files:
- filename: Hercules-5.0-Qwen2-7B-Q4_K_M.gguf
sha256: 8ebae4ffd43b906ddb938c3a611060ee5f99c35014e5ffe23ca35714361b5693
uri: huggingface://Hercules-5.0-Qwen2-7B-Q4_K_M.gguf/Hercules-5.0-Qwen2-7B-Q4_K_M.gguf
- !!merge <<: *qwen2
name: "arcee-agent"
icon: https://i.ibb.co/CBHmTDn/136719a5-6d8a-4654-a618-46eabc788953.jpg
description: |
Arcee Agent is a cutting-edge 7B parameter language model specifically designed for function calling and tool use. Initialized from Qwen2-7B, it rivals the performance of much larger models while maintaining efficiency and speed. This model is particularly suited for developers, researchers, and businesses looking to implement sophisticated AI-driven solutions without the computational overhead of larger language models. Compute for training Arcee-Agent was provided by CrusoeAI. Arcee-Agent was trained using Spectrum.
urls:
- https://huggingface.co/crusoeai/Arcee-Agent-GGUF
- https://huggingface.co/arcee-ai/Arcee-Agent
overrides:
parameters:
model: arcee-agent.Q4_K_M.gguf
files:
- filename: arcee-agent.Q4_K_M.gguf
sha256: ebb49943a66c1e717f9399a555aee0af28a40bfac7500f2ad8dd05f211b62aac
uri: huggingface://crusoeai/Arcee-Agent-GGUF/arcee-agent.Q4_K_M.gguf
- !!merge <<: *qwen2
name: "qwen2-7b-instruct-v0.8"
icon: https://huggingface.co/MaziyarPanahi/Qwen2-7B-Instruct-v0.8/resolve/main/qwen2-fine-tunes-maziyar-panahi.webp
description: |
MaziyarPanahi/Qwen2-7B-Instruct-v0.8
This is a fine-tuned version of the Qwen/Qwen2-7B model. It aims to improve the base model across all benchmarks.
urls:
- https://huggingface.co/MaziyarPanahi/Qwen2-7B-Instruct-v0.8
- https://huggingface.co/MaziyarPanahi/Qwen2-7B-Instruct-v0.8-GGUF
overrides:
parameters:
model: Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
files:
- filename: Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
sha256: 8c1b3efe9fa6ae1b37942ef26473cb4e0aed0f8038b60d4b61e5bffb61e49b7e
uri: huggingface://MaziyarPanahi/Qwen2-7B-Instruct-v0.8-GGUF/Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
- &mistral03
## START Mistral
url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
@@ -375,7 +325,7 @@
files:
- filename: gemma-2-27b-it-Q4_K_M.gguf
uri: huggingface://bartowski/gemma-2-27b-it-GGUF/gemma-2-27b-it-Q4_K_M.gguf
sha256: 69a0cdba2bc2e56d8298a9330f2a050ebecd657ed315beb3e51ae427b224dbc7
sha256: ca86fbdb791842cf2e5eb276a6916e326b3b5d58d9ab60ee3e18b1c6f01fc181
- !!merge <<: *gemma
name: "gemma-2-9b-it"
urls:
@@ -389,100 +339,7 @@
files:
- filename: gemma-2-9b-it-Q4_K_M.gguf
uri: huggingface://bartowski/gemma-2-9b-it-GGUF/gemma-2-9b-it-Q4_K_M.gguf
sha256: 05390244866abc0e7108a2b1e3db07b82df3cd82f006256a75fc21137054151f
- !!merge <<: *gemma
name: "tess-v2.5-gemma-2-27b-alpha"
urls:
- https://huggingface.co/migtissera/Tess-v2.5-Gemma-2-27B-alpha
- https://huggingface.co/bartowski/Tess-v2.5-Gemma-2-27B-alpha-GGUF
icon: https://huggingface.co/migtissera/Tess-v2.5-Qwen2-72B/resolve/main/Tess-v2.5.png
description: |
Great at reasoning, but woke as fuck! This is a fine-tune over the Gemma-2-27B-it, since the base model fine-tuning is not generating coherent content.
Tess-v2.5 is the latest state-of-the-art model in the Tess series of Large Language Models (LLMs). Tess, short for Tesoro (Treasure in Italian), is the flagship LLM series created by Migel Tissera. Tess-v2.5 brings significant improvements in reasoning capabilities, coding capabilities and mathematics
overrides:
parameters:
model: Tess-v2.5-Gemma-2-27B-alpha-Q4_K_M.gguf
files:
- filename: Tess-v2.5-Gemma-2-27B-alpha-Q4_K_M.gguf
uri: huggingface://bartowski/Tess-v2.5-Gemma-2-27B-alpha-GGUF/Tess-v2.5-Gemma-2-27B-alpha-Q4_K_M.gguf
sha256: d7be7092d28aefbdcd1ee4f4d8503d169d0a649f763e169d4b179aef20d69c21
- !!merge <<: *gemma
name: "gemma2-9b-daybreak-v0.5"
urls:
- https://huggingface.co/crestf411/gemma2-9B-daybreak-v0.5
- https://huggingface.co/Vdr1/gemma2-9B-daybreak-v0.5-GGUF-Imatrix-IQ
description: |
THIS IS A PRE-RELEASE. BEGONE.
Beware, depraved. Not suitable for any audience.
Dataset curation to remove slop-perceived expressions continues. Unfortunately base models (which this is merged on top of) are generally riddled with "barely audible"s and "couldn't help"s and "shivers down spines" etc.
overrides:
parameters:
model: gemma2-9B-daybreak-v0.5-Q4_K_M-imat.gguf
files:
- filename: gemma2-9B-daybreak-v0.5-Q4_K_M-imat.gguf
uri: huggingface://Vdr1/gemma2-9B-daybreak-v0.5-GGUF-Imatrix-IQ/gemma2-9B-daybreak-v0.5-Q4_K_M-imat.gguf
sha256: 6add4d12052918986af935d686773e4e89fddd1bbf7941911cf3fbeb1b1862c0
- !!merge <<: *gemma
name: "gemma-2-9b-it-sppo-iter3"
urls:
- https://huggingface.co/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3
- https://huggingface.co/bartowski/Gemma-2-9B-It-SPPO-Iter3-GGUF
description: |
Self-Play Preference Optimization for Language Model Alignment (https://arxiv.org/abs/2405.00675)
Gemma-2-9B-It-SPPO-Iter3
This model was developed using Self-Play Preference Optimization at iteration 3, based on the google/gemma-2-9b-it architecture as starting point. We utilized the prompt sets from the openbmb/UltraFeedback dataset, splited to 3 parts for 3 iterations by snorkelai/Snorkel-Mistral-PairRM-DPO-Dataset. All responses used are synthetic.
overrides:
parameters:
model: Gemma-2-9B-It-SPPO-Iter3-Q4_K_M.gguf
files:
- filename: Gemma-2-9B-It-SPPO-Iter3-Q4_K_M.gguf
uri: huggingface://bartowski/Gemma-2-9B-It-SPPO-Iter3-GGUF/Gemma-2-9B-It-SPPO-Iter3-Q4_K_M.gguf
sha256: 7aac221f548beef8d45106eabbec6b2c4e1669a51ad14e4bf640d463dadf36e7
- !!merge <<: *gemma
name: "smegmma-9b-v1"
icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/RSuc5p9Sm6CYj6lGOxvx4.gif
urls:
- https://huggingface.co/TheDrummer/Smegmma-9B-v1
- https://huggingface.co/bartowski/Smegmma-9B-v1-GGUF
description: |
Smegmma 9B v1 🧀
The sweet moist of Gemma 2, unhinged.
image/gif
smeg - ghem - mah
An eRP model that will blast you with creamy moist. Finetuned by yours truly.
The first Gemma 2 9B RP finetune attempt!
What's New?
Engaging roleplay
Less refusals / censorship
Less commentaries / summaries
More willing AI
Better formatting
Better creativity
Moist alignment
Notes
Refusals still exist, but a couple of re-gens may yield the result you want
Formatting and logic may be weaker at the start
Make sure to start strong
May be weaker with certain cards, YMMV and adjust accordingly!
overrides:
parameters:
model: Smegmma-9B-v1-Q4_K_M.gguf
files:
- filename: Smegmma-9B-v1-Q4_K_M.gguf
uri: huggingface://bartowski/Smegmma-9B-v1-GGUF/Smegmma-9B-v1-Q4_K_M.gguf
sha256: abd9da0a6bf5cbc0ed6bb0d7e3ee7aea3f6b1edbf8c64e51d0fa25001975aed7
sha256: c70fd20caec79fb953b83031c46ddea4e99905835a66af7b8a856aa1b2534614
- &llama3
url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
@@ -618,8 +475,8 @@
model: Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf
files:
- filename: Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf
sha256: 5833d99d5596cade0d02e61cddaa6dac49170864ee56d0b602933c6f9fbae314
uri: huggingface://bartowski/Llama-3-SauerkrautLM-8b-Instruct-GGUF/Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf
sha256: e5ae69b6f59b3f207fa6b435490286b365add846a310c46924fa784b5a7d73e3
- !!merge <<: *llama3
name: "llama-3-13b-instruct-v0.1"
urls:
@@ -1473,31 +1330,6 @@
- filename: llama3-8B-DarkIdol-2.1-Uncensored-1048K-Q4_K_M-imat.gguf
sha256: 86f0f1e10fc315689e09314aebb7354bb40d8fe95de008d21a75dc8fff1cd2fe
uri: huggingface://LWDCLS/llama3-8B-DarkIdol-2.1-Uncensored-1048K-GGUF-IQ-Imatrix-Request/llama3-8B-DarkIdol-2.1-Uncensored-1048K-Q4_K_M-imat.gguf
- !!merge <<: *llama3
name: "llama3-8b-darkidol-2.2-uncensored-1048k-iq-imatrix"
urls:
- https://huggingface.co/aifeifei798/llama3-8B-DarkIdol-2.2-Uncensored-1048K
- https://huggingface.co/LWDCLS/llama3-8B-DarkIdol-2.2-Uncensored-1048K-GGUF-IQ-Imatrix-Request
icon: https://huggingface.co/aifeifei798/llama3-8B-DarkIdol-2.2-Uncensored-1048K/resolve/main/llama3-8B-DarkIdol-2.2-Uncensored-1048K.png
description: |
The module combination has been readjusted to better fulfill various roles and has been adapted for mobile phones.
- Saving money(LLama 3)
- Uncensored
- Quick response
- The underlying model used is winglian/Llama-3-8b-1048k-PoSE
- A scholarly response akin to a thesis.(I tend to write songs extensively, to the point where one song almost becomes as detailed as a thesis. :)
- DarkIdol:Roles that you can imagine and those that you cannot imagine.
- Roleplay
- Specialized in various role-playing scenarios more look at test role. (https://huggingface.co/aifeifei798/llama3-8B-DarkIdol-1.2/tree/main/test)
- more look at LM Studio presets (https://huggingface.co/aifeifei798/llama3-8B-DarkIdol-1.2/tree/main/config-presets)
overrides:
parameters:
model: llama3-8B-DarkIdol-2.2-Uncensored-1048K-Q4_K_M-imat.gguf
files:
- filename: llama3-8B-DarkIdol-2.2-Uncensored-1048K-Q4_K_M-imat.gguf
sha256: 7714947799d4e6984cf9106244ee24aa821778936ad1a81023480a774e255f52
uri: huggingface://LWDCLS/llama3-8B-DarkIdol-2.2-Uncensored-1048K-GGUF-IQ-Imatrix-Request/llama3-8B-DarkIdol-2.2-Uncensored-1048K-Q4_K_M-imat.gguf
- !!merge <<: *llama3
name: "llama3-turbcat-instruct-8b"
urls:
@@ -1513,76 +1345,6 @@
- filename: llama3-turbcat-instruct-8b-Q4_K_M.gguf
sha256: a9a36e3220d901a8ad80c75608a81aaeed3a9cdf111247462bf5e3443aad5461
uri: huggingface://bartowski/llama3-turbcat-instruct-8b-GGUF/llama3-turbcat-instruct-8b-Q4_K_M.gguf
- !!merge <<: *llama3
name: "l3-8b-everything-cot"
urls:
- https://huggingface.co/FPHam/L3-8B-Everything-COT
- https://huggingface.co/bartowski/L3-8B-Everything-COT-GGUF
icon: https://huggingface.co/FPHam/L3-8B-Everything-COT/resolve/main/cot2.png
description: |
Everything COT is an investigative self-reflecting general model that uses Chain of Thought for everything. And I mean everything.
Instead of confidently proclaiming something (or confidently hallucinating other things) like most models, it caries an internal dialogue with itself and often cast doubts over uncertain topics while looking at it from various sides.
overrides:
parameters:
model: L3-8B-Everything-COT-Q4_K_M.gguf
files:
- filename: L3-8B-Everything-COT-Q4_K_M.gguf
sha256: b220b0e2f8fb1c8a491d10dbd054269ed078ee5e2e62dc9d2e3b97b06f52e987
uri: huggingface://bartowski/L3-8B-Everything-COT-GGUF/L3-8B-Everything-COT-Q4_K_M.gguf
- !!merge <<: *llama3
name: "llama-3-llamilitary"
urls:
- https://huggingface.co/Heralax/llama-3-llamilitary
- https://huggingface.co/mudler/llama-3-llamilitary-Q4_K_M-GGUF
icon: https://cdn-uploads.huggingface.co/production/uploads/64825ebceb4befee377cf8ac/ea2C9laq24V6OuxwhzJZS.png
description: |
This is a model trained on [instruct data generated from old historical war books] as well as on the books themselves, with the goal of creating a joke LLM knowledgeable about the (long gone) kind of warfare involving muskets, cavalry, and cannon.
This model can provide good answers, but it turned out to be pretty fragile during conversation for some reason: open-ended questions can make it spout nonsense. Asking facts is more reliable but not guaranteed to work.
The basic guide to getting good answers is: be specific with your questions. Use specific terms and define a concrete scenario, if you can, otherwise the LLM will often hallucinate the rest. I think the issue was that I did not train with a large enough system prompt: not enough latent space is being activated by default. (I'll try to correct this in future runs).
overrides:
parameters:
model: llama-3-llamilitary-q4_k_m.gguf
files:
- filename: llama-3-llamilitary-q4_k_m.gguf
sha256: f3684f2f0845f9aead884fa9a52ea67bed53856ebeedef1620ca863aba57e458
uri: huggingface://mudler/llama-3-llamilitary-Q4_K_M-GGUF/llama-3-llamilitary-q4_k_m.gguf
- !!merge <<: *llama3
name: "l3-stheno-maid-blackroot-grand-horror-16b"
urls:
- https://huggingface.co/DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-GGUF
icon: https://huggingface.co/DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-GGUF/resolve/main/hm.jpg
description: |
Rebuilt and Powered Up.
WARNING: NSFW. Graphic HORROR. Extreme swearing. UNCENSORED. SMART.
The author took the original models in "L3-Stheno-Maid-Blackroot 8B" and completely rebuilt it a new pass-through merge (everything preserved) and blew it out to over 16.5 billion parameters - 642 tensors, 71 layers (8B original has 32 layers).
This is not an "upscale" or "franken merge" but a completely new model based on the models used to construct "L3-Stheno-Maid-Blackroot 8B".
The result is a take no prisoners, totally uncensored, fiction writing monster and roleplay master as well just about... any general fiction activity "AI guru" including scene generation and scene continuation.
As a result of the expansion / merge re-build its level of prose and story generation has significantly improved as well as word choice, sentence structure as well as default output levels and lengths.
It also has a STRONG horror bias, although it will generate content for almost any genre. That being said if there is a "hint" of things going wrong... they will.
It will also swear (R-18) like there is no tomorrow at times and "dark" characters will be VERY dark so to speak.
Model is excels in details (real and "constructed"), descriptions, similes and metaphors.
It can have a sense of humor ... ah... dark humor.
Because of the nature of this merge most attributes of each of the 3 models will be in this rebuilt 16.5B model as opposed to the original 8B model where some of one or more of the model's features and/or strengths maybe reduced or overshadowed.
overrides:
parameters:
model: L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-D_AU-Q4_K_M.gguf
files:
- filename: L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-D_AU-Q4_K_M.gguf
sha256: ae29f38d73dfb04415821405cf8b319fc42d78d0cdd0da91db147d12e68030fe
uri: huggingface://DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-GGUF/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-D_AU-Q4_K_M.gguf
- &dolphin
name: "dolphin-2.9-llama3-8b"
url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
@@ -2379,176 +2141,6 @@
- filename: Llama-3-Update-3.0-mmproj-model-f16.gguf
sha256: 3d2f36dff61d6157cadf102df86a808eb9f8a230be1bc0bc99039d81a895468a
uri: huggingface://Nitral-AI/Llama-3-Update-3.0-mmproj-model-f16/Llama-3-Update-3.0-mmproj-model-f16.gguf
- !!merge <<: *llama3
name: "llama-3_8b_unaligned_alpha"
urls:
- https://huggingface.co/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha
- https://huggingface.co/bartowski/LLAMA-3_8B_Unaligned_Alpha-GGUF
description: |
Model card description:
As of June 11, 2024, I've finally started training the model! The training is progressing smoothly, although it will take some time. I used a combination of model merges and an abliterated model as base, followed by a comprehensive deep unalignment protocol to unalign the model to its core. A common issue with uncensoring and unaligning models is that it often significantly impacts their base intelligence. To mitigate these drawbacks, I've included a substantial corpus of common sense, theory of mind, and various other elements to counteract the effects of the deep uncensoring process. Given the extensive corpus involved, the training will require at least a week of continuous training. Expected early results: in about 3-4 days.
Additional info:
As of June 13, 2024, I've observed that even after two days of continuous training, the model is still resistant to learning certain aspects.
For example, some of the validation data still shows a loss over , whereas other parts have a loss of < or lower. This is after the model was initially abliterated.
June 18, 2024 Update, After extensive testing of the intermediate checkpoints, significant progress has been made.
The model is slowly — I mean, really slowly — unlearning its alignment. By significantly lowering the learning rate, I was able to visibly observe deep behavioral changes, this process is taking longer than anticipated, but it's going to be worth it. Estimated time to completion: 4 more days.. I'm pleased to report that in several tests, the model not only maintained its intelligence but actually showed a slight improvement, especially in terms of common sense. An intermediate checkpoint of this model was used to create invisietch/EtherealRainbow-v0.3-rc7, with promising results. Currently, it seems like I'm on the right track. I hope this model will serve as a solid foundation for further merges, whether for role-playing (RP) or for uncensoring. This approach also allows us to save on actual fine-tuning, thereby reducing our carbon footprint. The merge process takes just a few minutes of CPU time, instead of days of GPU work.
June 20, 2024 Update, Unaligning was partially successful, and the results are decent, but I am not fully satisfied. I decided to bite the bullet, and do a full finetune, god have mercy on my GPUs. I am also releasing the intermediate checkpoint of this model.
icon: https://i.imgur.com/Kpk1PgZ.png
overrides:
parameters:
model: LLAMA-3_8B_Unaligned_Alpha-Q4_K_M.gguf
files:
- filename: LLAMA-3_8B_Unaligned_Alpha-Q4_K_M.gguf
sha256: 93ddb5f9f525586d2578186c61e39f96461c26c0b38631de89aa30b171774515
uri: huggingface://bartowski/LLAMA-3_8B_Unaligned_Alpha-GGUF/LLAMA-3_8B_Unaligned_Alpha-Q4_K_M.gguf
- !!merge <<: *llama3
name: "l3-8b-lunaris-v1"
urls:
- https://huggingface.co/Sao10K/L3-8B-Lunaris-v1
- https://huggingface.co/bartowski/L3-8B-Lunaris-v1-GGUF
description: |
A generalist / roleplaying model merge based on Llama 3. Models are selected from my personal experience while using them.
I personally think this is an improvement over Stheno v3.2, considering the other models helped balance out its creativity and at the same time improving its logic.
overrides:
parameters:
model: L3-8B-Lunaris-v1-Q4_K_M.gguf
files:
- filename: L3-8B-Lunaris-v1-Q4_K_M.gguf
sha256: ef1d393f125be8c608859eeb4f26185ad90c7fc9cba41c96e847e77cdbcada18
uri: huggingface://bartowski/L3-8B-Lunaris-v1-GGUF/L3-8B-Lunaris-v1-Q4_K_M.gguf
- !!merge <<: *llama3
name: "llama-3_8b_unaligned_alpha_rp_soup-i1"
icon: https://i.imgur.com/pXcjpoV.png
urls:
- https://huggingface.co/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha_RP_Soup
- https://huggingface.co/mradermacher/LLAMA-3_8B_Unaligned_Alpha_RP_Soup-i1-GGUF
description: |
Censorship level: Medium
This model is the outcome of multiple merges, starting with the base model SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha. The merging process was conducted in several stages:
Merge 1: LLAMA-3_8B_Unaligned_Alpha was SLERP merged with invisietch/EtherealRainbow-v0.3-8B.
Merge 2: LLAMA-3_8B_Unaligned_Alpha was SLERP merged with TheDrummer/Llama-3SOME-8B-v2.
Soup 1: Merge 1 was combined with Merge 2.
Final Merge: Soup 1 was SLERP merged with Nitral-Archive/Hathor_Enigmatica-L3-8B-v0.4.
The final model is surprisingly coherent (although slightly more censored), which is a bit unexpected, since all the intermediate merge steps were pretty incoherent.
overrides:
parameters:
model: LLAMA-3_8B_Unaligned_Alpha_RP_Soup.i1-Q4_K_M.gguf
files:
- filename: LLAMA-3_8B_Unaligned_Alpha_RP_Soup.i1-Q4_K_M.gguf
sha256: 94347eb5125d9092e286730ae0ccc78374d68663c16ad2265005d8721eb8807b
uri: huggingface://mradermacher/LLAMA-3_8B_Unaligned_Alpha_RP_Soup-i1-GGUF/LLAMA-3_8B_Unaligned_Alpha_RP_Soup.i1-Q4_K_M.gguf
- !!merge <<: *llama3
name: "hathor_respawn-l3-8b-v0.8"
icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/sWyipsXI-Wl-uEm57SRwM.png
urls:
- https://huggingface.co/Nitral-AI/Hathor_Respawn-L3-8B-v0.8
- https://huggingface.co/bartowski/Hathor_Respawn-L3-8B-v0.8-GGUF
description: |
Hathor_Aleph-v0.8 is a model based on the LLaMA 3 architecture: Designed to seamlessly integrate the qualities of creativity, intelligence, and robust performance. Making it an ideal tool for a wide range of applications; such as creative writing, educational support and human/computer interaction.
Hathor 0.8 is trained on 3 epochs of Private RP, STEM (Intruction/Dialogs), Opus instructons, mixture light/classical novel data, roleplaying chat pairs over llama 3 8B instruct.
overrides:
parameters:
model: Hathor_Respawn-L3-8B-v0.8-Q4_K_M.gguf
files:
- filename: Hathor_Respawn-L3-8B-v0.8-Q4_K_M.gguf
sha256: d0cdfa8951ee80b252bf1dc183403ca9b48bc3de1578cb8e7fe321af753e661c
uri: huggingface://bartowski/Hathor_Respawn-L3-8B-v0.8-GGUF/Hathor_Respawn-L3-8B-v0.8-Q4_K_M.gguf
- !!merge <<: *llama3
name: "llama3-8b-instruct-replete-adapted"
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/-0dERC793D9XeFsJ9uHbx.png
urls:
- https://huggingface.co/Replete-AI/Llama3-8B-Instruct-Replete-Adapted
- https://huggingface.co/bartowski/Llama3-8B-Instruct-Replete-Adapted-GGUF
description: |
Replete-Coder-llama3-8b is a general purpose model that is specially trained in coding in over 100 coding languages. The data used to train the model contains 25% non-code instruction data and 75% coding instruction data totaling up to 3.9 million lines, roughly 1 billion tokens, or 7.27gb of instruct data. The data used to train this model was 100% uncensored, then fully deduplicated, before training happened.
More than just a coding model!
Although Replete-Coder has amazing coding capabilities, its trained on vaste amount of non-coding data, fully cleaned and uncensored. Dont just use it for coding, use it for all your needs! We are truly trying to make the GPT killer!
overrides:
parameters:
model: Llama3-8B-Instruct-Replete-Adapted-Q4_K_M.gguf
files:
- filename: Llama3-8B-Instruct-Replete-Adapted-Q4_K_M.gguf
sha256: 9e9a142f6fb5fc812b17bfc30230582ae50ac22b93dea696b6887cde815c1cb4
uri: huggingface://bartowski/Llama3-8B-Instruct-Replete-Adapted-GGUF/Llama3-8B-Instruct-Replete-Adapted-Q4_K_M.gguf
- !!merge <<: *llama3
name: "llama-3-perky-pat-instruct-8b"
urls:
- https://huggingface.co/grimjim/Llama-3-Perky-Pat-Instruct-8B
- https://huggingface.co/bartowski/Llama-3-Perky-Pat-Instruct-8B-GGUF
description: |
we explore negative weight merger, and propose Orthogonalized Vector Adaptation, or OVA.
This is a merge of pre-trained language models created using mergekit.
"One must imagine Sisyphys happy."
Task arithmetic was used to invert the intervention vector that was applied in MopeyMule, via application of negative weight -1.0. The combination of model weights (Instruct - MopeyMule) comprises an Orthogonalized Vector Adaptation that can subsequently be applied to the base Instruct model, and could in principle be applied to other models derived from fine-tuning the Instruct model.
This model is meant to continue exploration of behavioral changes that can be achieved via orthogonalized steering. The result appears to be more enthusiastic and lengthy responses in chat, though it is also clear that the merged model has some unhealed damage.
Built with Meta Llama 3.
overrides:
parameters:
model: Llama-3-Perky-Pat-Instruct-8B-Q4_K_M.gguf
files:
- filename: Llama-3-Perky-Pat-Instruct-8B-Q4_K_M.gguf
sha256: b0eae5d9d58a7101a30693c267097a90f4a005c81fda801b40ab2c25e788a93e
uri: huggingface://bartowski/Llama-3-Perky-Pat-Instruct-8B-GGUF/Llama-3-Perky-Pat-Instruct-8B-Q4_K_M.gguf
- !!merge <<: *llama3
name: "l3-uncen-merger-omelette-rp-v0.2-8b"
icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/m0YKWwK9n7w8rnKOzduu4.png
urls:
- https://huggingface.co/Casual-Autopsy/L3-Uncen-Merger-Omelette-RP-v0.2-8B
- https://huggingface.co/LWDCLS/L3-Uncen-Merger-Omelette-RP-v0.2-8B-GGUF-IQ-Imatrix-Request
description: |
L3-Uncen-Merger-Omelette-RP-v0.2-8B is a merge of the following models using LazyMergekit:
Sao10K/L3-8B-Stheno-v3.2
Casual-Autopsy/L3-Umbral-Mind-RP-v1.0-8B
bluuwhale/L3-SthenoMaidBlackroot-8B-V1
Cas-Warehouse/Llama-3-Mopeyfied-Psychology-v2
migtissera/Llama-3-8B-Synthia-v3.5
tannedbum/L3-Nymeria-Maid-8B
Casual-Autopsy/L3-Umbral-Mind-RP-v0.3-8B
tannedbum/L3-Nymeria-8B
ChaoticNeutrals/Hathor_RP-v.01-L3-8B
cgato/L3-TheSpice-8b-v0.8.3
Sao10K/L3-8B-Stheno-v3.1
Nitral-AI/Hathor_Stable-v0.2-L3-8B
aifeifei798/llama3-8B-DarkIdol-1.0
ChaoticNeutrals/Poppy_Porpoise-1.4-L3-8B
ResplendentAI/Nymph_8B
overrides:
parameters:
model: L3-Uncen-Merger-Omelette-RP-v0.2-8B-Q4_K_M-imat.gguf
files:
- filename: L3-Uncen-Merger-Omelette-RP-v0.2-8B-Q4_K_M-imat.gguf
sha256: 6bbc42a4c3b25f2b854d76a6e32746b9b3b21dd8856f8f2bc1a5b1269aa8fca1
uri: huggingface://LWDCLS/L3-Uncen-Merger-Omelette-RP-v0.2-8B-GGUF-IQ-Imatrix-Request/L3-Uncen-Merger-Omelette-RP-v0.2-8B-Q4_K_M-imat.gguf
- !!merge <<: *llama3
name: "nymph_8b-i1"
icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/9U_eJCDzLJ8nxb6qfuICc.jpeg
urls:
- https://huggingface.co/ResplendentAI/Nymph_8B
- https://huggingface.co/mradermacher/Nymph_8B-i1-GGUF?not-for-all-audiences=true
description: |
Model card:
Nymph is the culmination of everything I have learned with the T-series project. This model aims to be a unique and full featured RP juggernaut.
The finetune incorporates 1.6 Million tokens of RP data sourced from Bluemoon, FreedomRP, Aesir-Preview, and Claude Opus logs. I made sure to use the multi-turn sharegpt datasets this time instead of alpaca conversions. I have also included three of my personal datasets. The final touch is an ORPO based upon Openhermes Roleplay preferences.
overrides:
parameters:
model: Nymph_8B.i1-Q4_K_M.gguf
files:
- filename: Nymph_8B.i1-Q4_K_M.gguf
sha256: 5b35794539d9cd262720f47a54f59dbffd5bf6c601950359b5c68d13f1ce13a0
uri: huggingface://mradermacher/Nymph_8B-i1-GGUF/Nymph_8B.i1-Q4_K_M.gguf
- &chatml
### ChatML
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
@@ -2745,33 +2337,6 @@
- filename: "phi-2-orange.Q4_0.gguf"
sha256: "49cb710ae688e1b19b1b299087fa40765a0cd677e3afcc45e5f7ef6750975dcf"
uri: "huggingface://TheBloke/phi-2-orange-GGUF/phi-2-orange.Q4_0.gguf"
### Internlm2
- name: "internlm2_5-7b-chat-1m"
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
urls:
- https://huggingface.co/internlm/internlm2_5-7b-chat-1m
- https://huggingface.co/bartowski/internlm2_5-7b-chat-1m-GGUF
icon: https://github.com/InternLM/InternLM/assets/22529082/b9788105-8892-4398-8b47-b513a292378e
tags:
- internlm2
- gguf
- cpu
- gpu
description: |
InternLM2.5 has open-sourced a 7 billion parameter base model and a chat model tailored for practical scenarios. The model has the following characteristics:
Outstanding reasoning capability: State-of-the-art performance on Math reasoning, surpassing models like Llama3 and Gemma2-9B.
1M Context window: Nearly perfect at finding needles in the haystack with 1M-long context, with leading performance on long-context tasks like LongBench. Try it with LMDeploy for 1M-context inference and a file chat demo.
Stronger tool use: InternLM2.5 supports gathering information from more than 100 web pages, corresponding implementation will be released in Lagent soon. InternLM2.5 has better tool utilization-related capabilities in instruction following, tool selection and reflection. See examples.
overrides:
parameters:
model: internlm2_5-7b-chat-1m-Q4_K_M.gguf
files:
- filename: internlm2_5-7b-chat-1m-Q4_K_M.gguf
uri: huggingface://bartowski/internlm2_5-7b-chat-1m-GGUF/internlm2_5-7b-chat-1m-Q4_K_M.gguf
sha256: 10d5e18a4125f9d4d74a9284a21e0c820b150af06dee48665e54ff6e1be3a564
- &phi-3
### START Phi-3
url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master"

View File

@@ -247,23 +247,14 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
}
if xsysinfo.HasCPUCaps(cpuid.AVX2) {
p := backendPath(assetDir, LLamaCPPAVX2)
if _, err := os.Stat(p); err == nil {
log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
grpcProcess = p
}
log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
grpcProcess = backendPath(assetDir, LLamaCPPAVX2)
} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
p := backendPath(assetDir, LLamaCPPAVX)
if _, err := os.Stat(p); err == nil {
log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
grpcProcess = p
}
log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
grpcProcess = backendPath(assetDir, LLamaCPPAVX)
} else {
p := backendPath(assetDir, LLamaCPPFallback)
if _, err := os.Stat(p); err == nil {
log.Info().Msgf("[%s] attempting to load with fallback variant", backend)
grpcProcess = p
}
log.Info().Msgf("[%s] attempting to load with fallback variant", backend)
grpcProcess = backendPath(assetDir, LLamaCPPFallback)
}
return grpcProcess
@@ -518,39 +509,6 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
err = errors.Join(err, fmt.Errorf("backend %s returned no usable model", key))
log.Info().Msgf("[%s] Fails: %s", key, "backend returned no usable model")
}
if autoDetect && key == LLamaCPP && err != nil {
// try as hard as possible to run the llama.cpp variants
backendToUse := ""
if xsysinfo.HasCPUCaps(cpuid.AVX2) {
if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPAVX2)); err == nil {
backendToUse = LLamaCPPAVX2
}
} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPAVX2)); err == nil {
backendToUse = LLamaCPPAVX
}
} else {
if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPFallback)); err == nil {
backendToUse = LLamaCPPFallback
} else {
// If we don't have a fallback, just skip fallback
continue
}
}
// Autodetection failed, try the fallback
log.Info().Msgf("[%s] Autodetection failed, trying the fallback", key)
options = append(options, WithBackendString(backendToUse))
model, modelerr = ml.BackendLoader(options...)
if modelerr == nil && model != nil {
log.Info().Msgf("[%s] Loads OK", key)
return model, nil
} else {
err = errors.Join(err, fmt.Errorf("[%s]: %w", key, modelerr))
log.Info().Msgf("[%s] Fails: %s", key, modelerr.Error())
}
}
}
return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())

View File

@@ -1,23 +0,0 @@
#!/bin/bash
mkdir -p backend-assets/lib
OS="$(uname)"
if [ "$OS" == "Darwin" ]; then
LIBS="$(otool -L $1 | awk 'NR > 1 { system("echo " $1) } ' | xargs echo)"
elif [ "$OS" == "Linux" ]; then
LIBS="$(ldd $1 | awk 'NF == 4 { system("echo " $3) } ' | xargs echo)"
else
echo "Unsupported OS"
exit 1
fi
for lib in $LIBS; do
cp -f $lib backend-assets/lib
done
echo "==============================="
echo "Copied libraries to backend-assets/lib"
echo "$LIBS"
echo "==============================="