Compare commits

..

2 Commits

Author SHA1 Message Date
Ettore Di Giacinto
c28e8ca697 Merge branch 'master' into ci/static-check 2024-07-18 19:44:59 +02:00
Ettore Di Giacinto
ecaaff8f03 ci: add static-checker
**Description**

This PR adds a static-checker pipeline as part of our workflows

**Notes for Reviewers**
N/A

**[Signed commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [x] Yes, I signed my commits.

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-07-12 10:28:44 +02:00
51 changed files with 394 additions and 814 deletions

View File

@@ -12,7 +12,6 @@ jobs:
uses: actions/checkout@v3
with:
ref: "${{ github.event.pull_request.merge_commit_sha }}"
fetch-depth: 0 # needed to checkout all branches for this Action to work
- uses: mudler/localai-github-action@v1
with:
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
@@ -23,7 +22,6 @@ jobs:
json_diff_file_output: diff.json
raw_diff_file_output: diff.txt
file_output_only: "true"
base_branch: ${{ github.event.pull_request.base.sha }}
- name: Show diff
env:
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}

View File

@@ -47,7 +47,7 @@ jobs:
# makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
cuda-minor-version: "4"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -120,7 +120,7 @@ jobs:
# makeflags: "--jobs=3 --output-sync=target"
# - build-type: 'cublas'
# cuda-major-version: "12"
# cuda-minor-version: "0"
# cuda-minor-version: "4"
# platforms: 'linux/amd64'
# tag-latest: 'false'
# tag-suffix: '-cublas-cuda12-ffmpeg-core'

View File

@@ -75,7 +75,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
cuda-minor-version: "4"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12'
@@ -100,7 +100,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
cuda-minor-version: "4"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -285,7 +285,7 @@ jobs:
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
cuda-minor-version: "4"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-core'
@@ -307,7 +307,7 @@ jobs:
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
cuda-minor-version: "4"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg-core'

View File

@@ -4,8 +4,6 @@ on:
push:
branches:
- master
tags:
- 'v*'
pull_request:
env:
@@ -31,10 +29,11 @@ jobs:
with:
go-version: '1.21.x'
cache: false
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache gawk
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
- name: Install CUDA Dependencies
run: |
@@ -150,7 +149,7 @@ jobs:
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache gawk cmake libgmock-dev
- name: Intel Dependencies
run: |
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
@@ -251,7 +250,7 @@ jobs:
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
- name: Build stablediffusion

70
.github/workflows/static-check.yml vendored Normal file
View File

@@ -0,0 +1,70 @@
name: static check
on: pull_request
jobs:
imports:
name: Imports
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@master
- name: check
uses: danhunsaker/golang-github-actions@v1.3.0
with:
run: imports
token: ${{ secrets.GITHUB_TOKEN }}
errcheck:
name: Errcheck
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@master
- name: check
uses: danhunsaker/golang-github-actions@v1.3.0
with:
run: errcheck
token: ${{ secrets.GITHUB_TOKEN }}
lint:
name: Lint
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@master
- name: check
uses: danhunsaker/golang-github-actions@v1.3.0
with:
run: lint
token: ${{ secrets.GITHUB_TOKEN }}
shadow:
name: Shadow
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@master
- name: check
uses: danhunsaker/golang-github-actions@v1.3.0
with:
run: shadow
token: ${{ secrets.GITHUB_TOKEN }}
staticcheck:
name: StaticCheck
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@master
- name: check
uses: danhunsaker/golang-github-actions@v1.3.0
with:
run: staticcheck
token: ${{ secrets.GITHUB_TOKEN }}
sec:
name: Sec
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@master
- name: check
uses: danhunsaker/golang-github-actions@v1.3.0
with:
run: sec
token: ${{ secrets.GITHUB_TOKEN }}
flags: "-exclude=G104"

View File

@@ -70,7 +70,7 @@ jobs:
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
sudo apt-get install build-essential curl ffmpeg
sudo apt-get install -y libgmock-dev
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \

View File

@@ -24,7 +24,7 @@ RUN apt-get update && \
cmake \
curl \
git \
unzip upx-ucl && \
unzip && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
@@ -99,7 +99,7 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers
ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION=12
ARG CUDA_MINOR_VERSION=0
ARG CUDA_MINOR_VERSION=4
ENV BUILD_TYPE=${BUILD_TYPE}

View File

@@ -8,7 +8,7 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=b841d0740855c5af1344a81f261139a45a2b39ee
CPPLLAMA_VERSION?=b3283448ce9a5098226afe1d8648ccc578511fe4
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -58,7 +58,7 @@ RANDOM := $(shell bash -c 'echo $$RANDOM')
VERSION?=$(shell git describe --always --tags || echo "dev" )
# go tool nm ./local-ai | grep Commit
LD_FLAGS?=-s -w
LD_FLAGS?=
override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Version=$(VERSION)"
override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
@@ -72,14 +72,6 @@ WHITE := $(shell tput -Txterm setaf 7)
CYAN := $(shell tput -Txterm setaf 6)
RESET := $(shell tput -Txterm sgr0)
UPX?=
# check if upx exists
ifeq (, $(shell which upx))
UPX=
else
UPX=$(shell which upx)
endif
# Default Docker bridge IP
E2E_BRIDGE_IP?=172.17.0.1
@@ -385,7 +377,6 @@ build: prepare backend-assets grpcs ## Build the project
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
ifneq ($(BACKEND_LIBS),)
$(MAKE) backend-assets/lib
cp -f $(BACKEND_LIBS) backend-assets/lib/
@@ -430,7 +421,7 @@ else
endif
dist-cross-linux-arm64:
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" GO_TAGS="p2p" \
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
STATIC=true $(MAKE) build
mkdir -p release
# if BUILD_ID is empty, then we don't append it to the binary name
@@ -480,7 +471,7 @@ prepare-e2e:
mkdir -p $(TEST_DIR)
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 --build-arg FFMPEG=true -t localai-tests .
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=4 --build-arg FFMPEG=true -t localai-tests .
run-e2e-image:
ls -liah $(abspath ./tests/e2e-fixtures)
@@ -742,22 +733,13 @@ backend-assets/grpc: protogen-go replace
backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/bert-embeddings
endif
backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/gpt4all
endif
backend-assets/grpc/huggingface: backend-assets/grpc
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/huggingface
endif
backend/cpp/llama/llama.cpp:
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
@@ -783,9 +765,6 @@ else
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
endif
ifneq ($(UPX),)
$(UPX) backend/cpp/${VARIANT}/grpc-server
endif
# This target is for manually building a variant with-auto detected flags
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
@@ -858,57 +837,33 @@ backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
mkdir -p backend-assets/util/
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
ifneq ($(UPX),)
$(UPX) backend-assets/util/llama-cpp-rpc-server
endif
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/llama-ggml
endif
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/piper
endif
backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/rwkv
endif
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/stablediffusion
endif
backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/tinydream
endif
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/whisper
endif
backend-assets/grpc/local-store: backend-assets/grpc
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/local-store
endif
grpcs: prepare $(GRPC_BACKENDS)

View File

@@ -75,24 +75,11 @@ add_library(hw_grpc_proto
${hw_proto_hdrs} )
add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp)
# Conditionally link SYCL to grpc-server
# https://github.com/ggerganov/llama.cpp/issues/8665
if ( DEFINED ENV{ONEAPI_ROOT})
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
absl::flags_parse
gRPC::${_REFLECTION}
gRPC::${_GRPC_GRPCPP}
protobuf::${_PROTOBUF_LIBPROTOBUF}
sycl)
else()
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
absl::flags_parse
gRPC::${_REFLECTION}
gRPC::${_GRPC_GRPCPP}
protobuf::${_PROTOBUF_LIBPROTOBUF})
endif()
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
absl::flags_parse
gRPC::${_REFLECTION}
gRPC::${_GRPC_GRPCPP}
protobuf::${_PROTOBUF_LIBPROTOBUF})
target_compile_features(${TARGET} PRIVATE cxx_std_11)
if(TARGET BUILD_INFO)
add_dependencies(${TARGET} BUILD_INFO)

View File

@@ -1,8 +0,0 @@
# https://github.com/ggerganov/llama.cpp/issues/8665
add_executable(rpc-server rpc-server.cpp)
if ( DEFINED ENV{ONEAPI_ROOT})
target_link_libraries(rpc-server PRIVATE ggml llama sycl)
else()
target_link_libraries(rpc-server PRIVATE ggml llama)
endif()

View File

@@ -17,7 +17,4 @@ cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
# https://github.com/ggerganov/llama.cpp/issues/8665
cp -rfv CMakeLists.txt.rpc-8662 llama.cpp/examples/rpc/CMakeLists.txt
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp

View File

@@ -1,6 +1,6 @@
accelerate
auto-gptq==0.7.1
grpcio==1.65.1
grpcio==1.65.0
protobuf
torch
certifi

View File

@@ -1,6 +1,6 @@
accelerate
bark==0.1.5
grpcio==1.65.1
grpcio==1.65.0
protobuf
certifi
transformers

View File

@@ -1,2 +1,2 @@
grpcio==1.65.1
grpcio==1.65.0
protobuf

View File

@@ -1,6 +1,6 @@
accelerate
TTS==0.22.0
grpcio==1.65.1
grpcio==1.65.0
protobuf
certifi
transformers

View File

@@ -3,7 +3,7 @@ accelerate
compel
peft
diffusers
grpcio==1.65.1
grpcio==1.65.0
opencv-python
pillow
protobuf

View File

@@ -1,5 +1,5 @@
accelerate
grpcio==1.65.1
grpcio==1.65.0
protobuf
certifi
torch

View File

@@ -1,6 +1,6 @@
causal-conv1d==1.4.0
mamba-ssm==2.2.2
grpcio==1.65.1
grpcio==1.65.0
protobuf
certifi
transformers

View File

@@ -2,7 +2,7 @@
intel-extension-for-pytorch
torch
optimum[openvino]
grpcio==1.65.1
grpcio==1.64.1
protobuf
librosa==0.9.1
faster-whisper==1.0.3

View File

@@ -1,4 +1,4 @@
grpcio==1.65.1
grpcio==1.65.0
protobuf
librosa
faster-whisper

View File

@@ -1,5 +1,5 @@
accelerate
grpcio==1.65.1
grpcio==1.65.0
protobuf
torch
git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16

View File

@@ -1,6 +1,6 @@
accelerate
rerankers[transformers]
grpcio==1.65.1
grpcio==1.65.0
protobuf
certifi
transformers

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,6 +1,6 @@
accelerate
sentence-transformers==3.0.1
transformers
grpcio==1.65.1
grpcio==1.65.0
protobuf
certifi

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,6 +1,6 @@
accelerate
transformers
grpcio==1.65.1
grpcio==1.65.0
protobuf
torch
scipy==1.14.0

View File

@@ -2,3 +2,4 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,9 +1,9 @@
accelerate
transformers
grpcio==1.65.1
grpcio==1.65.0
protobuf
torch
certifi
intel-extension-for-transformers
bitsandbytes
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,4 +1,4 @@
accelerate
grpcio==1.65.1
grpcio==1.65.0
protobuf
certifi

View File

@@ -1,6 +1,6 @@
accelerate
vllm
grpcio==1.65.1
grpcio==1.65.0
protobuf
certifi
transformers

View File

@@ -10,12 +10,11 @@ import (
type FederatedCLI struct {
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
LoadBalanced bool `env:"LOCALAI_LOAD_BALANCED,LOAD_BALANCED" default:"false" help:"Enable load balancing" group:"p2p"`
}
func (f *FederatedCLI) Run(ctx *cliContext.Context) error {
fs := p2p.NewFederatedServer(f.Address, p2p.FederatedID, f.Peer2PeerToken, f.LoadBalanced)
fs := p2p.NewFederatedServer(f.Address, p2p.FederatedID, f.Peer2PeerToken)
return fs.Start(context.Background())
}

View File

@@ -16,16 +16,7 @@
</a>
</h2>
<h5 class="mb-4 text-justify">LocalAI uses P2P technologies to enable distribution of work between peers. It is possible to share an instance with Federation and/or split the weights of a model across peers (only available with llama.cpp models). You can now share computational resources between your devices or your friends!</h5>
<!-- Warning box if p2p token is empty and p2p is enabled -->
{{ if and .IsP2PEnabled (eq .P2PToken "") }}
<div class="bg-red-500 p-4 rounded-lg shadow-lg mb-12 text-left">
<p class="text-xl font-semibold text-white"> <i class="fa-solid fa-exclamation-triangle"></i> Warning: P2P mode is disabled or no token was specified</p>
<p class="mb-4">You have to enable P2P mode by starting LocalAI with <code>--p2p</code>. Please restart the server with <code>--p2p</code> to generate a new token automatically that can be used to automatically discover other nodes. If you already have a token specify it with <code>export TOKEN=".."</code> <a href="https://localai.io/features/distribute/" target="_blank">
Check out the documentation for more information.
</a> </p>
</div>
{{ else }}
<!-- Federation Box -->
<div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left">
@@ -137,8 +128,7 @@
</div>
</div>
</div>
<!-- Llama.cpp Box END -->
{{ end }}
<!-- Llama.cpp Box END -->
</div>
</div>

View File

@@ -4,44 +4,12 @@ const FederatedID = "federated"
type FederatedServer struct {
listenAddr, service, p2ptoken string
requestTable map[string]int
loadBalanced bool
}
func NewFederatedServer(listenAddr, service, p2pToken string, loadBalanced bool) *FederatedServer {
func NewFederatedServer(listenAddr, service, p2pToken string) *FederatedServer {
return &FederatedServer{
listenAddr: listenAddr,
service: service,
p2ptoken: p2pToken,
requestTable: map[string]int{},
loadBalanced: loadBalanced,
}
}
func (fs *FederatedServer) SelectLeastUsedServer() string {
// cycle over requestTable and find the entry with the lower number
// if there are multiple entries with the same number, select one randomly
// if there are no entries, return an empty string
var min int
var minKey string
for k, v := range fs.requestTable {
if min == 0 || v < min {
min = v
minKey = k
}
}
return minKey
}
func (fs *FederatedServer) RecordRequest(nodeID string) {
// increment the counter for the nodeID in the requestTable
fs.requestTable[nodeID]++
}
func (fs *FederatedServer) EnsureRecordExist(nodeID string) {
// if the nodeID is not in the requestTable, add it with a counter of 0
_, ok := fs.requestTable[nodeID]
if !ok {
fs.requestTable[nodeID] = 0
listenAddr: listenAddr,
service: service,
p2ptoken: p2pToken,
}
}

View File

@@ -100,23 +100,10 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
return
}
tunnelAddr := ""
if fs.loadBalanced {
for _, t := range tunnelAddresses {
fs.EnsureRecordExist(t)
}
tunnelAddr = fs.SelectLeastUsedServer()
log.Debug().Msgf("Selected tunnel %s", tunnelAddr)
if tunnelAddr == "" {
tunnelAddr = tunnelAddresses[rand.IntN(len(tunnelAddresses))]
}
fs.RecordRequest(tunnelAddr)
} else {
tunnelAddr = tunnelAddresses[rand.IntN(len(tunnelAddresses))]
}
// open a TCP stream to one of the tunnels
// chosen randomly
// TODO: optimize this and track usage
tunnelAddr := tunnelAddresses[rand.IntN(len(tunnelAddresses))]
tunnelConn, err := net.Dial("tcp", tunnelAddr)
if err != nil {

View File

@@ -5,65 +5,17 @@ weight = 15
url = "/features/distribute/"
+++
This functionality enables LocalAI to distribute inference requests across multiple worker nodes, improving efficiency and performance. Nodes are automatically discovered and connect via p2p by using a shared token which makes sure the communication is secure and private between the nodes of the network.
LocalAI supports two modes of distributed inferencing via p2p:
- **Federated Mode**: Requests are shared between the cluster and routed to a single worker node in the network based on the load balancer's decision.
- **Worker Mode** (aka "model sharding" or "splitting weights"): Requests are processed by all the workers which contributes to the final inference result (by sharing the model weights).
## Usage
Starting LocalAI with `--p2p` generates a shared token for connecting multiple instances: and that's all you need to create AI clusters, eliminating the need for intricate network setups.
Simply navigate to the "Swarm" section in the WebUI and follow the on-screen instructions.
For fully shared instances, initiate LocalAI with --p2p --federated and adhere to the Swarm section's guidance. This feature, while still experimental, offers a tech preview quality experience.
### Federated mode
Federated mode allows to launch multiple LocalAI instances and connect them together in a federated network. This mode is useful when you want to distribute the load of the inference across multiple nodes, but you want to have a single point of entry for the API. In the Swarm section of the WebUI, you can see the instructions to connect multiple instances together.
![346663124-1d2324fd-8b55-4fa2-9856-721a467969c2](https://github.com/user-attachments/assets/19ebd44a-20ff-412c-b92f-cfb8efbe4b21)
To start a LocalAI server in federated mode, run:
```bash
local-ai run --p2p --federated
```
This will generate a token that you can use to connect other LocalAI instances to the network or others can use to join the network. If you already have a token, you can specify it using the `TOKEN` environment variable.
To start a load balanced server that routes the requests to the network, run with the `TOKEN`:
```bash
local-ai federated
```
To see all the available options, run `local-ai federated --help`.
The instructions are displayed in the "Swarm" section of the WebUI, guiding you through the process of connecting multiple instances.
### Workers mode
{{% alert note %}}
This feature is available exclusively with llama-cpp compatible models.
This feature was introduced in [LocalAI pull request #2324](https://github.com/mudler/LocalAI/pull/2324) and is based on the upstream work in [llama.cpp pull request #6829](https://github.com/ggerganov/llama.cpp/pull/6829).
{{% /alert %}}
To connect multiple workers to a single LocalAI instance, start first a server in p2p mode:
This functionality enables LocalAI to distribute inference requests across multiple worker nodes, improving efficiency and performance.
```bash
local-ai run --p2p
```
## Usage
And navigate the WebUI to the "Swarm" section to see the instructions to connect multiple workers to the network.
![346663124-1d2324fd-8b55-4fa2-9856-721a467969c2](https://github.com/user-attachments/assets/b8cadddf-a467-49cf-a1ed-8850de95366d)
### Without P2P
### Starting Workers
To start workers for distributing the computational load, run:
@@ -71,27 +23,48 @@ To start workers for distributing the computational load, run:
local-ai worker llama-cpp-rpc <listening_address> <listening_port>
```
And you can specify the address of the workers when starting LocalAI with the `LLAMACPP_GRPC_SERVERS` environment variable:
Alternatively, you can build the RPC server following the llama.cpp [README](https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md), which is compatible with LocalAI.
### Starting LocalAI
To start the LocalAI server, which handles API requests, specify the worker addresses using the `LLAMACPP_GRPC_SERVERS` environment variable:
```bash
LLAMACPP_GRPC_SERVERS="address1:port,address2:port" local-ai run
```
The workload on the LocalAI server will then be distributed across the specified nodes.
Alternatively, you can build the RPC workers/server following the llama.cpp [README](https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md), which is compatible with LocalAI.
## Peer-to-Peer Networking
## Manual example (worker)
![output](https://github.com/mudler/LocalAI/assets/2420543/8ca277cf-c208-4562-8929-808b2324b584)
Use the WebUI to guide you in the process of starting new workers. This example shows the manual steps to highlight the process.
Workers can also connect to each other in a peer-to-peer network, distributing the workload in a decentralized manner.
A shared token between the server and the workers is required for communication within the peer-to-peer network. This feature supports both local network (using mDNS discovery) and DHT for communication across different networks.
The token is automatically generated when starting the server with the `--p2p` flag. Workers can be started with the token using `local-ai worker p2p-llama-cpp-rpc` and specifying the token via the environment variable `TOKEN` or with the `--token` argument.
A network is established between the server and workers using DHT and mDNS discovery protocols. The llama.cpp RPC server is automatically started and exposed to the peer-to-peer network, allowing the API server to connect.
When the HTTP server starts, it discovers workers in the network and creates port forwards to the local service. Llama.cpp is configured to use these services. For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343).
### Usage
1. Start the server with `--p2p`:
```bash
./local-ai run --p2p
# Get the token in the Swarm section of the WebUI
# 1:02AM INF loading environment variables from file envFile=.env
# 1:02AM INF Setting logging to info
# 1:02AM INF P2P mode enabled
# 1:02AM INF No token provided, generating one
# 1:02AM INF Generated Token:
# XXXXXXXXXXX
# 1:02AM INF Press a button to proceed
```
Copy the token from the WebUI or via API call (e.g., `curl http://localhost:8000/p2p/token`) and save it for later use.
Copy the displayed token and press Enter.
To reuse the same token later, restart the server with `--p2ptoken` or `P2P_TOKEN`.
@@ -120,14 +93,12 @@ The server logs should indicate that new workers are being discovered.
3. Start inference as usual on the server initiated in step 1.
![output](https://github.com/mudler/LocalAI/assets/2420543/8ca277cf-c208-4562-8929-808b2324b584)
## Notes
- If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
- Only a single model is supported currently.
- Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
- For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343)
## Environment Variables

View File

@@ -1,3 +1,3 @@
{
"version": "v2.19.1"
"version": "v2.18.1"
}

View File

@@ -1,6 +1,6 @@
llama_index==0.10.56
llama_index==0.10.55
requests==2.32.3
weaviate_client==4.6.7
weaviate_client==4.6.5
transformers
torch
chainlit

View File

@@ -1,2 +1,2 @@
langchain==0.2.10
openai==1.37.0
langchain==0.2.8
openai==1.35.13

View File

@@ -1,4 +1,4 @@
langchain==0.2.10
openai==1.37.0
langchain==0.2.8
openai==1.35.13
chromadb==0.5.4
llama-index==0.10.56
llama-index==0.10.55

View File

@@ -10,21 +10,21 @@ debugpy==1.8.2
frozenlist==1.4.1
greenlet==3.0.3
idna==3.7
langchain==0.2.10
langchain-community==0.2.9
langchain==0.2.8
langchain-community==0.2.7
marshmallow==3.21.3
marshmallow-enum==1.5.1
multidict==6.0.5
mypy-extensions==1.0.0
numexpr==2.10.1
numpy==2.0.1
openai==1.37.0
numpy==1.26.4
openai==1.35.13
openapi-schema-pydantic==1.2.4
packaging>=23.2
pydantic==2.8.2
PyYAML==6.0.1
requests==2.32.3
SQLAlchemy==2.0.31
SQLAlchemy==2.0.30
tenacity==8.5.0
tqdm==4.66.4
typing-inspect==0.9.0

View File

@@ -24,33 +24,6 @@
- filename: DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf
sha256: 50ec78036433265965ed1afd0667c00c71c12aa70bcf383be462cb8e159db6c0
uri: huggingface://LoneStriker/DeepSeek-Coder-V2-Lite-Instruct-GGUF/DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf
- name: "archangel_sft_pythia2-8b"
url: "github:mudler/LocalAI/gallery/tuluv2.yaml@master"
icon: https://gist.github.com/assets/29318529/fe2d8391-dbd1-4b7e-9dc4-7cb97e55bc06
license: apache-2.0
urls:
- https://huggingface.co/ContextualAI/archangel_sft_pythia2-8b
- https://huggingface.co/RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf
- https://github.com/ContextualAI/HALOs
description: |
datasets:
- stanfordnlp/SHP
- Anthropic/hh-rlhf
- OpenAssistant/oasst1
This repo contains the model checkpoints for:
- model family pythia2-8b
- optimized with the loss SFT
- aligned using the SHP, Anthropic HH and Open Assistant datasets.
Please refer to our [code repository](https://github.com/ContextualAI/HALOs) or [blog](https://contextual.ai/better-cheaper-faster-llm-alignment-with-kto/) which contains intructions for training your own HALOs and links to our model cards.
overrides:
parameters:
model: archangel_sft_pythia2-8b.Q4_K_M.gguf
files:
- filename: archangel_sft_pythia2-8b.Q4_K_M.gguf
sha256: a47782c55ef2b39b19644213720a599d9849511a73c9ebb0c1de749383c0a0f8
uri: huggingface://RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf/archangel_sft_pythia2-8b.Q4_K_M.gguf
- &qwen2
## Start QWEN2
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
@@ -229,54 +202,6 @@
- filename: Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
sha256: 8c1b3efe9fa6ae1b37942ef26473cb4e0aed0f8038b60d4b61e5bffb61e49b7e
uri: huggingface://MaziyarPanahi/Qwen2-7B-Instruct-v0.8-GGUF/Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
- !!merge <<: *qwen2
name: "qwen2-wukong-7b"
icon: https://cdn-uploads.huggingface.co/production/uploads/655dc641accde1bbc8b41aec/xOe1Nb3S9Nb53us7_Ja3s.jpeg
urls:
- https://huggingface.co/bartowski/Qwen2-Wukong-7B-GGUF
description: |
Qwen2-Wukong-7B is a dealigned chat finetune of the original fantastic Qwen2-7B model by the Qwen team.
This model was trained on the teknium OpenHeremes-2.5 dataset and some supplementary datasets from Cognitive Computations
This model was trained for 3 epochs with a custom FA2 implementation for AMD cards.
overrides:
parameters:
model: Qwen2-Wukong-7B-Q4_K_M.gguf
files:
- filename: Qwen2-Wukong-7B-Q4_K_M.gguf
sha256: 6b8ca6649c33fc84d4892ebcff1214f0b34697aced784f0d6d32e284a15943ad
uri: huggingface://bartowski/Qwen2-Wukong-7B-GGUF/Qwen2-Wukong-7B-Q4_K_M.gguf
- !!merge <<: *qwen2
name: "calme-2.8-qwen2-7b"
icon: https://huggingface.co/MaziyarPanahi/calme-2.8-qwen2-7b/resolve/main/qwen2-fine-tunes-maziyar-panahi.webp
urls:
- https://huggingface.co/MaziyarPanahi/calme-2.8-qwen2-7b
- https://huggingface.co/MaziyarPanahi/calme-2.8-qwen2-7b-GGUF
description: |
This is a fine-tuned version of the Qwen/Qwen2-7B model. It aims to improve the base model across all benchmarks.
overrides:
parameters:
model: Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
files:
- filename: Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
sha256: 8c1b3efe9fa6ae1b37942ef26473cb4e0aed0f8038b60d4b61e5bffb61e49b7e
uri: huggingface://MaziyarPanahi/calme-2.8-qwen2-7b-GGUF/Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
- !!merge <<: *qwen2
name: "stellardong-72b-i1"
icon: https://huggingface.co/smelborp/StellarDong-72b/resolve/main/stellardong.png
urls:
- https://huggingface.co/smelborp/StellarDong-72b
- https://huggingface.co/mradermacher/StellarDong-72b-i1-GGUF
description: |
Magnum + Nova = you won't believe how stellar this dong is!!
overrides:
parameters:
model: StellarDong-72b.i1-Q4_K_M.gguf
files:
- filename: StellarDong-72b.i1-Q4_K_M.gguf
sha256: 4c5012f0a034f40a044904891343ade2594f29c28a8a9d8052916de4dc5a61df
uri: huggingface://mradermacher/StellarDong-72b-i1-GGUF/StellarDong-72b.i1-Q4_K_M.gguf
- &mistral03
## START Mistral
url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
@@ -339,31 +264,6 @@
- filename: Mahou-1.3d-mistral-7B.i1-Q4_K_M.gguf
sha256: 8272f050e36d612ab282e095cb4e775e2c818e7096f8d522314d256923ef6da9
uri: huggingface://mradermacher/Mahou-1.3d-mistral-7B-i1-GGUF/Mahou-1.3d-mistral-7B.i1-Q4_K_M.gguf
- name: "einstein-v4-7b"
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
icon: https://cdn-uploads.huggingface.co/production/uploads/6468ce47e134d050a58aa89c/U0zyXVGj-O8a7KP3BvPue.png
urls:
- https://huggingface.co/Weyaxi/Einstein-v4-7B
- https://huggingface.co/mradermacher/Einstein-v4-7B-GGUF
tags:
- llm
- gguf
- gpu
- mistral
- cpu
description: |
🔬 Einstein-v4-7B
This model is a full fine-tuned version of mistralai/Mistral-7B-v0.1 on diverse datasets.
This model is finetuned using 7xRTX3090 + 1xRTXA6000 using axolotl.
overrides:
parameters:
model: Einstein-v4-7B.Q4_K_M.gguf
files:
- filename: Einstein-v4-7B.Q4_K_M.gguf
sha256: 78bd573de2a9eb3c6e213132858164e821145f374fcaa4b19dfd6502c05d990d
uri: huggingface://mradermacher/Einstein-v4-7B-GGUF/Einstein-v4-7B.Q4_K_M.gguf
- &mudler
### START mudler's LocalAI specific-models
url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
@@ -679,91 +579,6 @@
- filename: EZO-Common-9B-gemma-2-it.Q4_K_M.gguf
sha256: 57678b1828673dccb15f76e52b00672c74aa6169421bbb8620b8955955322cfd
uri: huggingface://QuantFactory/EZO-Common-9B-gemma-2-it-GGUF/EZO-Common-9B-gemma-2-it.Q4_K_M.gguf
- !!merge <<: *gemma
name: "big-tiger-gemma-27b-v1"
icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/A97OlLKeT4XOnv4IG1b6m.png
urls:
- https://huggingface.co/TheDrummer/Big-Tiger-Gemma-27B-v1
- https://huggingface.co/TheDrummer/Big-Tiger-Gemma-27B-v1-GGUF
description: |
Big Tiger Gemma 27B v1 is a Decensored Gemma 27B model with no refusals, except for some rare instances from the 9B model. It does not appear to have any brain damage. The model is available from various sources, including Hugging Face, and comes in different variations such as GGUF, iMatrix, and EXL2.
overrides:
parameters:
model: Big-Tiger-Gemma-27B-v1c-Q4_K_M.gguf
files:
- filename: Big-Tiger-Gemma-27B-v1c-Q4_K_M.gguf
sha256: c5fc5605d36ae280c1c908c9b4bcb12b28abbe2692f317edeb83ab1104657fe5
uri: huggingface://TheDrummer/Big-Tiger-Gemma-27B-v1-GGUF/Big-Tiger-Gemma-27B-v1c-Q4_K_M.gguf
- !!merge <<: *gemma
name: "gemma-2b-translation-v0.150"
urls:
- https://huggingface.co/lemon-mint/gemma-2b-translation-v0.150
- https://huggingface.co/RichardErkhov/lemon-mint_-_gemma-2b-translation-v0.150-gguf
description: |
Original model: lemon-mint/gemma-ko-1.1-2b-it
Evaluation metrics: Eval Loss, Train Loss, lr, optimizer, lr_scheduler_type.
Prompt Template:
<bos><start_of_turn>user
Translate into Korean: [input text]<end_of_turn>
<start_of_turn>model
[translated text in Korean]<eos>
<bos><start_of_turn>user
Translate into English: [Korean text]<end_of_turn>
<start_of_turn>model
[translated text in English]<eos>
Model features:
* Developed by: lemon-mint
* Model type: Gemma
* Languages (NLP): English
* License: Gemma Terms of Use
* Finetuned from model: lemon-mint/gemma-ko-1.1-2b-it
overrides:
parameters:
model: gemma-2b-translation-v0.150.Q4_K_M.gguf
files:
- filename: gemma-2b-translation-v0.150.Q4_K_M.gguf
sha256: dcde67b83168d2e7ca835cf9a7a4dcf38b41b9cefe3cbc997c71d2741c08cd25
uri: huggingface://RichardErkhov/lemon-mint_-_gemma-2b-translation-v0.150-gguf/gemma-2b-translation-v0.150.Q4_K_M.gguf
- !!merge <<: *gemma
name: "emo-2b"
urls:
- https://huggingface.co/OEvortex/EMO-2B
- https://huggingface.co/RichardErkhov/OEvortex_-_EMO-2B-gguf
description: |
EMO-2B: Emotionally Intelligent Conversational AI
Overview:
EMO-2B is a state-of-the-art conversational AI model with 2.5 billion parameters, designed to engage in emotionally resonant dialogue. Building upon the success of EMO-1.5B, this model has been further fine-tuned on an extensive corpus of emotional narratives, enabling it to perceive and respond to the emotional undertones of user inputs with exceptional empathy and emotional intelligence.
Key Features:
- Advanced Emotional Intelligence: With its increased capacity, EMO-2B demonstrates an even deeper understanding and generation of emotional language, allowing for more nuanced and contextually appropriate emotional responses.
- Enhanced Contextual Awareness: The model considers an even broader context within conversations, accounting for subtle emotional cues and providing emotionally resonant responses tailored to the specific situation.
- Empathetic and Supportive Dialogue: EMO-2B excels at active listening, validating emotions, offering compassionate advice, and providing emotional support, making it an ideal companion for users seeking empathy and understanding.
- Dynamic Persona Adaptation: The model can dynamically adapt its persona, communication style, and emotional responses to match the user's emotional state, ensuring a highly personalized and tailored conversational experience.
Use Cases:
EMO-2B is well-suited for a variety of applications where emotional intelligence and empathetic communication are crucial, such as:
- Mental health support chatbots
- Emotional support companions
- Personalized coaching and motivation
- Narrative storytelling and interactive fiction
- Customer service and support (for emotionally sensitive contexts)
Limitations and Ethical Considerations:
While EMO-2B is designed to provide emotionally intelligent and empathetic responses, it is important to note that it is an AI system and cannot replicate the depth and nuance of human emotional intelligence. Users should be aware that the model's responses, while emotionally supportive, should not be considered a substitute for professional mental health support or counseling.
Additionally, as with any language model, EMO-2B may reflect biases present in its training data. Users should exercise caution and critical thinking when interacting with the model, and report any concerning or inappropriate responses.
overrides:
parameters:
model: EMO-2B.Q4_K_M.gguf
files:
- filename: EMO-2B.Q4_K_M.gguf
sha256: 608bffc0e9012bc7f9a94b714f4932e2826cc122dbac59b586e4baa2ee0fdca5
uri: huggingface://RichardErkhov/OEvortex_-_EMO-2B-gguf/EMO-2B.Q4_K_M.gguf
- &llama3
url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
@@ -3186,106 +3001,6 @@
- filename: L3-15B-EtherealMaid-t0.0001.i1-Q4_K_M.gguf
sha256: 2911be6be8e0fd4184998d452410ba847491b4ab71a928749de87cafb0e13757
uri: huggingface://mradermacher/L3-15B-EtherealMaid-t0.0001-i1-GGUF/L3-15B-EtherealMaid-t0.0001.i1-Q4_K_M.gguf
- !!merge <<: *llama3
name: "l3-8b-celeste-v1"
icon: https://cdn-uploads.huggingface.co/production/uploads/630cf5d14ca0a22768bbe10c/Zv__LDTO-nHvpuxPcCgUU.webp
urls:
- https://huggingface.co/nothingiisreal/L3-8B-Celeste-v1
- https://huggingface.co/bartowski/L3-8B-Celeste-v1-GGUF
description: |
Trained on LLaMA 3 8B Instruct at 8K context using Reddit Writing Prompts, Opus 15K Instruct an c2 logs cleaned.
This is a roleplay model any instruction following capabilities outside roleplay contexts are coincidental.
overrides:
parameters:
model: L3-8B-Celeste-v1-Q4_K_M.gguf
files:
- filename: L3-8B-Celeste-v1-Q4_K_M.gguf
sha256: ed5277719965fb6bbcce7d16742e3bac4a8d5b8f52133261a3402a480cd65317
uri: huggingface://bartowski/L3-8B-Celeste-v1-GGUF/L3-8B-Celeste-v1-Q4_K_M.gguf
- !!merge <<: *llama3
name: "l3-8b-celeste-v1.2"
icon: https://cdn-uploads.huggingface.co/production/uploads/630cf5d14ca0a22768bbe10c/Zv__LDTO-nHvpuxPcCgUU.webp
urls:
- https://huggingface.co/mudler/L3-8B-Celeste-V1.2-Q4_K_M-GGUF
description: |
Trained on LLaMA 3 8B Instruct at 8K context using Reddit Writing Prompts, Opus 15K Instruct an c2 logs cleaned.
This is a roleplay model any instruction following capabilities outside roleplay contexts are coincidental.
overrides:
parameters:
model: l3-8b-celeste-v1.2-q4_k_m.gguf
files:
- filename: l3-8b-celeste-v1.2-q4_k_m.gguf
sha256: 7752204c0e9f627ff5726eb69bb6114974cafbc934a993ad019abfba62002783
uri: huggingface://mudler/L3-8B-Celeste-V1.2-Q4_K_M-GGUF/l3-8b-celeste-v1.2-q4_k_m.gguf
- !!merge <<: *llama3
name: "llama-3-tulu-2-8b-i1"
icon: https://huggingface.co/datasets/allenai/blog-images/resolve/main/tulu-v2/Tulu%20V2%20banner.png
urls:
- https://huggingface.co/allenai/llama-3-tulu-2-8b
- https://huggingface.co/mradermacher/llama-3-tulu-2-8b-i1-GGUF
description: |
Tulu is a series of language models that are trained to act as helpful assistants. Llama 3 Tulu V2 8B is a fine-tuned version of Llama 3 that was trained on a mix of publicly available, synthetic and human datasets.
overrides:
parameters:
model: llama-3-tulu-2-8b.i1-Q4_K_M.gguf
files:
- filename: llama-3-tulu-2-8b.i1-Q4_K_M.gguf
sha256: f859c22bfa64f461e9ffd973dc7ad6a78bb98b1dda6f49abfa416a4022b7e333
uri: huggingface://mradermacher/llama-3-tulu-2-8b-i1-GGUF/llama-3-tulu-2-8b.i1-Q4_K_M.gguf
- !!merge <<: *llama3
name: "llama-3-tulu-2-dpo-70b-i1"
icon: https://huggingface.co/datasets/allenai/blog-images/resolve/main/tulu-v2/Tulu%20V2%20banner.png
urls:
- https://huggingface.co/allenai/llama-3-tulu-2-dpo-70b
- https://huggingface.co/mradermacher/llama-3-tulu-2-dpo-70b-i1-GGUF
description: |
Tulu is a series of language models that are trained to act as helpful assistants. Llama 3 Tulu V2 8B is a fine-tuned version of Llama 3 that was trained on a mix of publicly available, synthetic and human datasets.
overrides:
parameters:
model: llama-3-tulu-2-dpo-70b.i1-Q4_K_M.gguf
files:
- filename: llama-3-tulu-2-dpo-70b.i1-Q4_K_M.gguf
sha256: fc309bbdf1e2bdced954c4c8dc1f9a885c547017ee5e750bfde645af89e3d3a5
uri: huggingface://mradermacher/llama-3-tulu-2-dpo-70b-i1-GGUF/llama-3-tulu-2-dpo-70b.i1-Q4_K_M.gguf
- !!merge <<: *llama3
license: cc-by-nc-4.0
name: "suzume-llama-3-8b-multilingual-orpo-borda-top25"
icon: https://cdn-uploads.huggingface.co/production/uploads/64b63f8ad57e02621dc93c8b/kWQSu02YfgYdUQqv4s5lq.png
urls:
- https://huggingface.co/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25
- https://huggingface.co/RichardErkhov/lightblue_-_suzume-llama-3-8B-multilingual-orpo-borda-top25-gguf
description: |
This is Suzume ORPO, an ORPO trained fine-tune of the lightblue/suzume-llama-3-8B-multilingual model using our lightblue/mitsu dataset.
We have trained several versions of this model using ORPO and so recommend that you use the best performing model from our tests, lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half.
Note that this model has a non-commerical license as we used the Command R and Command R+ models to generate our training data for this model (lightblue/mitsu).
We are currently working on a developing a commerically usable model, so stay tuned for that!
overrides:
parameters:
model: suzume-llama-3-8B-multilingual-orpo-borda-top25.Q4_K_M.gguf
files:
- filename: suzume-llama-3-8B-multilingual-orpo-borda-top25.Q4_K_M.gguf
sha256: ef75a02c5f38e14a8873c7989188dac6974851b4654279fe1921d2c8018cc388
uri: huggingface://RichardErkhov/lightblue_-_suzume-llama-3-8B-multilingual-orpo-borda-top25-gguf/suzume-llama-3-8B-multilingual-orpo-borda-top25.Q4_K_M.gguf
- !!merge <<: *llama3
name: "calme-2.4-llama3-70b"
icon: https://huggingface.co/MaziyarPanahi/calme-2.4-llama3-70b/resolve/main/llama-3-merges.webp
urls:
- https://huggingface.co/MaziyarPanahi/calme-2.4-llama3-70b
- https://huggingface.co/mradermacher/calme-2.4-llama3-70b-GGUF
description: |
This model is a fine-tune (DPO) of meta-llama/Meta-Llama-3-70B-Instruct model.
overrides:
parameters:
model: calme-2.4-llama3-70b.Q4_K_M.gguf
files:
- filename: calme-2.4-llama3-70b.Q4_K_M.gguf
sha256: 0b44ac8a88395dfc60f1b9d3cfffc0ffef74ec0a302e610ef91fc787187568f2
uri: huggingface://mradermacher/calme-2.4-llama3-70b-GGUF/calme-2.4-llama3-70b.Q4_K_M.gguf
- &command-R
### START Command-r
url: "github:mudler/LocalAI/gallery/command-r.yaml@master"
@@ -3530,38 +3245,6 @@
- filename: Phi-3.1-mini-4k-instruct-Q4_K_M.gguf
sha256: 39458b227a4be763b7eb39d306d240c3d45205e3f8b474ec7bdca7bba0158e69
uri: huggingface://bartowski/Phi-3.1-mini-4k-instruct-GGUF/Phi-3.1-mini-4k-instruct-Q4_K_M.gguf
- !!merge <<: *phi-3
name: "phillama-3.8b-v0.1"
icon: https://cdn-uploads.huggingface.co/production/uploads/657eb5b256c9c67605a6e8b5/f96pPiJQb3puzbPYNknG2.png
urls:
- https://huggingface.co/RichardErkhov/raincandy-u_-_phillama-3.8b-v0.1-gguf
description: |
The description of the LLM model is:
Phillama is a model based on Phi-3-mini and trained on Llama-generated dataset raincandy-u/Dextromethorphan-10k to make it more "llama-like". Also, this model is converted into Llama format, so it will work with any Llama-2/3 workflow. The model aims to generate text with a specific "llama-like" style and is suited for text-generation tasks.
overrides:
parameters:
model: phillama-3.8b-v0.1.Q4_K_M.gguf
files:
- filename: phillama-3.8b-v0.1.Q4_K_M.gguf
sha256: da537d352b7aae54bbad0d2cff3e3a1b0e1dc1e1d25bec3aae1d05cf4faee7a2
uri: huggingface://RichardErkhov/raincandy-u_-_phillama-3.8b-v0.1-gguf/phillama-3.8b-v0.1.Q4_K_M.gguf
- !!merge <<: *llama3
name: "calme-2.3-phi3-4b"
icon: https://huggingface.co/MaziyarPanahi/calme-2.1-phi3-4b/resolve/main/phi-3-instruct.webp
urls:
- https://huggingface.co/MaziyarPanahi/calme-2.3-phi3-4b
- https://huggingface.co/MaziyarPanahi/calme-2.3-phi3-4b-GGUF
description: |
MaziyarPanahi/calme-2.1-phi3-4b
This model is a fine-tune (DPO) of microsoft/Phi-3-mini-4k-instruct model.
overrides:
parameters:
model: Phi-3-mini-4k-instruct-v0.3.Q4_K_M.gguf
files:
- filename: Phi-3-mini-4k-instruct-v0.3.Q4_K_M.gguf
sha256: 3a23e1052369c080afb925882bd814cbea5ec859894655a7434c3d49e43a6127
uri: huggingface://MaziyarPanahi/calme-2.3-phi3-4b-GGUF/Phi-3-mini-4k-instruct-v0.3.Q4_K_M.gguf
- &hermes-2-pro-mistral
### START Hermes
url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"

View File

@@ -1,43 +0,0 @@
---
name: "tuluv2"
config_file: |
mmap: true
template:
chat_message: |
<|{{ .RoleName }}|>
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content }}
{{ end -}}
{{ if .FunctionCall -}}
{{toJson .FunctionCall}}
{{ end -}}
function: |
<|{{ .RoleName }}|>
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content }}
{{ end -}}
{{ if .FunctionCall -}}
{{toJson .FunctionCall}}
{{ end -}}
chat: |
{{.Input -}}
<|assistant|>
completion: |
{{.Input}}
context_size: 4096
f16: true
stopwords:
- '<|im_end|>'
- '<dummy32000>'
- '<|endoftext|>'

View File

@@ -1,13 +0,0 @@
package concurrency
import (
"testing"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
func TestConcurrency(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Concurrency test suite")
}

View File

@@ -1,69 +0,0 @@
package concurrency
import (
"context"
"sync"
)
// This is a Read-ONLY structure that contains the result of an arbitrary asynchronous action
type JobResult[RequestType any, ResultType any] struct {
request *RequestType
result *ResultType
err error
once sync.Once
done *chan struct{}
}
// This structure is returned in a pair with a JobResult and serves as the structure that has access to be updated.
type WritableJobResult[RequestType any, ResultType any] struct {
*JobResult[RequestType, ResultType]
}
// Wait blocks until the result is ready and then returns the result, or the context expires.
// Returns *ResultType instead of ResultType since its possible we have only an error and nil for ResultType.
// Is this correct and idiomatic?
func (jr *JobResult[RequestType, ResultType]) Wait(ctx context.Context) (*ResultType, error) {
if jr.done == nil { // If the channel is blanked out, result is ready.
return jr.result, jr.err
}
select {
case <-*jr.done: // Wait for the result to be ready
jr.done = nil
if jr.err != nil {
return nil, jr.err
}
return jr.result, nil
case <-ctx.Done():
return nil, ctx.Err()
}
}
// Accessor function to allow holders of JobResults to access the associated request, without allowing the pointer to be updated.
func (jr *JobResult[RequestType, ResultType]) Request() *RequestType {
return jr.request
}
// This is the function that actually updates the Result and Error on the JobResult... but it's normally not accessible
func (jr *JobResult[RequestType, ResultType]) setResult(result ResultType, err error) {
jr.once.Do(func() {
jr.result = &result
jr.err = err
close(*jr.done) // Signal that the result is ready - since this is only ran once, jr.done cannot be set to nil yet.
})
}
// Only the WritableJobResult can actually call setResult - prevents accidental corruption
func (wjr *WritableJobResult[RequestType, ResultType]) SetResult(result ResultType, err error) {
wjr.JobResult.setResult(result, err)
}
// NewJobResult binds a request to a matched pair of JobResult and WritableJobResult
func NewJobResult[RequestType any, ResultType any](request RequestType) (*JobResult[RequestType, ResultType], *WritableJobResult[RequestType, ResultType]) {
done := make(chan struct{})
jr := &JobResult[RequestType, ResultType]{
once: sync.Once{},
request: &request,
done: &done,
}
return jr, &WritableJobResult[RequestType, ResultType]{JobResult: jr}
}

View File

@@ -1,80 +0,0 @@
package concurrency_test
import (
"context"
"fmt"
"time"
. "github.com/mudler/LocalAI/pkg/concurrency"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("pkg/concurrency unit tests", func() {
It("can be used to recieve a result across goroutines", func() {
jr, wjr := NewJobResult[string, string]("foo")
Expect(jr).ToNot(BeNil())
Expect(wjr).ToNot(BeNil())
go func(wjr *WritableJobResult[string, string]) {
time.Sleep(time.Second * 5)
wjr.SetResult("bar", nil)
}(wjr)
resPtr, err := jr.Wait(context.Background())
Expect(err).To(BeNil())
Expect(jr.Request).ToNot(BeNil())
Expect(*jr.Request()).To(Equal("foo"))
Expect(resPtr).ToNot(BeNil())
Expect(*resPtr).To(Equal("bar"))
})
It("can be used to recieve an error across goroutines", func() {
jr, wjr := NewJobResult[string, string]("foo")
Expect(jr).ToNot(BeNil())
Expect(wjr).ToNot(BeNil())
go func(wjr *WritableJobResult[string, string]) {
time.Sleep(time.Second * 5)
wjr.SetResult("", fmt.Errorf("test"))
}(wjr)
_, err := jr.Wait(context.Background())
Expect(jr.Request).ToNot(BeNil())
Expect(*jr.Request()).To(Equal("foo"))
Expect(err).ToNot(BeNil())
Expect(err).To(MatchError("test"))
})
It("can properly handle timeouts", func() {
jr, wjr := NewJobResult[string, string]("foo")
Expect(jr).ToNot(BeNil())
Expect(wjr).ToNot(BeNil())
go func(wjr *WritableJobResult[string, string]) {
time.Sleep(time.Second * 5)
wjr.SetResult("bar", nil)
}(wjr)
timeout1s, c1 := context.WithTimeoutCause(context.Background(), time.Second, fmt.Errorf("timeout"))
timeout10s, c2 := context.WithTimeoutCause(context.Background(), time.Second*10, fmt.Errorf("timeout"))
_, err := jr.Wait(timeout1s)
Expect(jr.Request).ToNot(BeNil())
Expect(*jr.Request()).To(Equal("foo"))
Expect(err).ToNot(BeNil())
Expect(err).To(MatchError(context.DeadlineExceeded))
resPtr, err := jr.Wait(timeout10s)
Expect(jr.Request).ToNot(BeNil())
Expect(*jr.Request()).To(Equal("foo"))
Expect(err).To(BeNil())
Expect(resPtr).ToNot(BeNil())
Expect(*resPtr).To(Equal("bar"))
// Is this needed? Cleanup Either Way.
c1()
c2()
})
})

View File

@@ -1,13 +0,0 @@
package downloader
import (
"testing"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
func TestDownloader(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Downloader test suite")
}

View File

@@ -212,7 +212,7 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
grpcProcess = p
foundCUDA = true
} else {
log.Debug().Msgf("Nvidia GPU device found, no embedded CUDA variant found. You can ignore this message if you are using container with CUDA support")
log.Info().Msgf("GPU device found but no CUDA backend present")
}
}
if strings.Contains(gpu.String(), "amd") {
@@ -222,7 +222,7 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
grpcProcess = p
foundAMDGPU = true
} else {
log.Debug().Msgf("AMD GPU device found, no embedded HIPBLAS variant found. You can ignore this message if you are using container with HIPBLAS support")
log.Info().Msgf("GPU device found but no HIPBLAS backend present")
}
}
if strings.Contains(gpu.String(), "intel") {
@@ -236,7 +236,7 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
grpcProcess = p
foundIntelGPU = true
} else {
log.Debug().Msgf("Intel GPU device found, no embedded SYCL variant found. You can ignore this message if you are using container with SYCL support")
log.Info().Msgf("GPU device found but no Intel backend present")
}
}
}

View File

@@ -700,6 +700,18 @@ const docTemplate = `{
}
}
},
"functions.Argument": {
"type": "object",
"properties": {
"properties": {
"type": "object",
"additionalProperties": true
},
"type": {
"type": "string"
}
}
},
"functions.Function": {
"type": "object",
"properties": {
@@ -715,19 +727,48 @@ const docTemplate = `{
}
}
},
"functions.Item": {
"functions.FunctionName": {
"type": "object",
"properties": {
"const": {
"type": "string"
}
}
},
"functions.FunctionProperties": {
"type": "object",
"properties": {
"arguments": {
"$ref": "#/definitions/functions.Argument"
},
"function": {
"$ref": "#/definitions/functions.FunctionName"
}
}
},
"functions.ItemFunction": {
"type": "object",
"properties": {
"properties": {
"type": "object",
"additionalProperties": true
"$ref": "#/definitions/functions.FunctionProperties"
},
"type": {
"type": "string"
}
}
},
"functions.JSONFunctionStructure": {
"functions.ItemName": {
"type": "object",
"properties": {
"properties": {
"$ref": "#/definitions/functions.NameProperties"
},
"type": {
"type": "string"
}
}
},
"functions.JSONFunctionStructureFunction": {
"type": "object",
"properties": {
"$defs": {
@@ -737,17 +778,49 @@ const docTemplate = `{
"anyOf": {
"type": "array",
"items": {
"$ref": "#/definitions/functions.Item"
"$ref": "#/definitions/functions.ItemFunction"
}
},
"oneOf": {
"type": "array",
"items": {
"$ref": "#/definitions/functions.Item"
"$ref": "#/definitions/functions.ItemFunction"
}
}
}
},
"functions.JSONFunctionStructureName": {
"type": "object",
"properties": {
"$defs": {
"type": "object",
"additionalProperties": true
},
"anyOf": {
"type": "array",
"items": {
"$ref": "#/definitions/functions.ItemName"
}
},
"oneOf": {
"type": "array",
"items": {
"$ref": "#/definitions/functions.ItemName"
}
}
}
},
"functions.NameProperties": {
"type": "object",
"properties": {
"arguments": {
"$ref": "#/definitions/functions.Argument"
},
"name": {
"$ref": "#/definitions/functions.FunctionName"
}
}
},
"functions.Tool": {
"type": "object",
"properties": {
@@ -1415,7 +1488,10 @@ const docTemplate = `{
"type": "string"
},
"grammar_json_functions": {
"$ref": "#/definitions/functions.JSONFunctionStructure"
"$ref": "#/definitions/functions.JSONFunctionStructureFunction"
},
"grammar_json_name": {
"$ref": "#/definitions/functions.JSONFunctionStructureName"
},
"ignore_eos": {
"type": "boolean"

View File

@@ -693,6 +693,18 @@
}
}
},
"functions.Argument": {
"type": "object",
"properties": {
"properties": {
"type": "object",
"additionalProperties": true
},
"type": {
"type": "string"
}
}
},
"functions.Function": {
"type": "object",
"properties": {
@@ -708,19 +720,48 @@
}
}
},
"functions.Item": {
"functions.FunctionName": {
"type": "object",
"properties": {
"const": {
"type": "string"
}
}
},
"functions.FunctionProperties": {
"type": "object",
"properties": {
"arguments": {
"$ref": "#/definitions/functions.Argument"
},
"function": {
"$ref": "#/definitions/functions.FunctionName"
}
}
},
"functions.ItemFunction": {
"type": "object",
"properties": {
"properties": {
"type": "object",
"additionalProperties": true
"$ref": "#/definitions/functions.FunctionProperties"
},
"type": {
"type": "string"
}
}
},
"functions.JSONFunctionStructure": {
"functions.ItemName": {
"type": "object",
"properties": {
"properties": {
"$ref": "#/definitions/functions.NameProperties"
},
"type": {
"type": "string"
}
}
},
"functions.JSONFunctionStructureFunction": {
"type": "object",
"properties": {
"$defs": {
@@ -730,17 +771,49 @@
"anyOf": {
"type": "array",
"items": {
"$ref": "#/definitions/functions.Item"
"$ref": "#/definitions/functions.ItemFunction"
}
},
"oneOf": {
"type": "array",
"items": {
"$ref": "#/definitions/functions.Item"
"$ref": "#/definitions/functions.ItemFunction"
}
}
}
},
"functions.JSONFunctionStructureName": {
"type": "object",
"properties": {
"$defs": {
"type": "object",
"additionalProperties": true
},
"anyOf": {
"type": "array",
"items": {
"$ref": "#/definitions/functions.ItemName"
}
},
"oneOf": {
"type": "array",
"items": {
"$ref": "#/definitions/functions.ItemName"
}
}
}
},
"functions.NameProperties": {
"type": "object",
"properties": {
"arguments": {
"$ref": "#/definitions/functions.Argument"
},
"name": {
"$ref": "#/definitions/functions.FunctionName"
}
}
},
"functions.Tool": {
"type": "object",
"properties": {
@@ -1408,7 +1481,10 @@
"type": "string"
},
"grammar_json_functions": {
"$ref": "#/definitions/functions.JSONFunctionStructure"
"$ref": "#/definitions/functions.JSONFunctionStructureFunction"
},
"grammar_json_name": {
"$ref": "#/definitions/functions.JSONFunctionStructureName"
},
"ignore_eos": {
"type": "boolean"

View File

@@ -7,6 +7,14 @@ definitions:
url:
type: string
type: object
functions.Argument:
properties:
properties:
additionalProperties: true
type: object
type:
type: string
type: object
functions.Function:
properties:
description:
@@ -17,28 +25,67 @@ definitions:
additionalProperties: true
type: object
type: object
functions.Item:
functions.FunctionName:
properties:
const:
type: string
type: object
functions.FunctionProperties:
properties:
arguments:
$ref: '#/definitions/functions.Argument'
function:
$ref: '#/definitions/functions.FunctionName'
type: object
functions.ItemFunction:
properties:
properties:
additionalProperties: true
type: object
$ref: '#/definitions/functions.FunctionProperties'
type:
type: string
type: object
functions.JSONFunctionStructure:
functions.ItemName:
properties:
properties:
$ref: '#/definitions/functions.NameProperties'
type:
type: string
type: object
functions.JSONFunctionStructureFunction:
properties:
$defs:
additionalProperties: true
type: object
anyOf:
items:
$ref: '#/definitions/functions.Item'
$ref: '#/definitions/functions.ItemFunction'
type: array
oneOf:
items:
$ref: '#/definitions/functions.Item'
$ref: '#/definitions/functions.ItemFunction'
type: array
type: object
functions.JSONFunctionStructureName:
properties:
$defs:
additionalProperties: true
type: object
anyOf:
items:
$ref: '#/definitions/functions.ItemName'
type: array
oneOf:
items:
$ref: '#/definitions/functions.ItemName'
type: array
type: object
functions.NameProperties:
properties:
arguments:
$ref: '#/definitions/functions.Argument'
name:
$ref: '#/definitions/functions.FunctionName'
type: object
functions.Tool:
properties:
function:
@@ -491,7 +538,9 @@ definitions:
description: A grammar to constrain the LLM output
type: string
grammar_json_functions:
$ref: '#/definitions/functions.JSONFunctionStructure'
$ref: '#/definitions/functions.JSONFunctionStructureFunction'
grammar_json_name:
$ref: '#/definitions/functions.JSONFunctionStructureName'
ignore_eos:
type: boolean
input: {}