mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 11:13:31 -05:00
Compare commits
53 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
59bfc67ead | ||
|
|
f80b6dfc2d | ||
|
|
4c16957448 | ||
|
|
a9757fb057 | ||
|
|
1c96e0b79e | ||
|
|
c7f0743f48 | ||
|
|
ead69a116a | ||
|
|
0314b37cd8 | ||
|
|
703cd08f01 | ||
|
|
b53947a5bb | ||
|
|
39de3cf21d | ||
|
|
e3cd11cc0a | ||
|
|
5e5037f10d | ||
|
|
9c331239d9 | ||
|
|
36789e9ead | ||
|
|
6ec593c237 | ||
|
|
bbb1dc2ae0 | ||
|
|
385d8dc29b | ||
|
|
fb574434a4 | ||
|
|
7ab3217df0 | ||
|
|
2f9f04b260 | ||
|
|
8385eb2a59 | ||
|
|
99324eeef0 | ||
|
|
ede352256b | ||
|
|
b555b64616 | ||
|
|
824cc816ea | ||
|
|
a1bc2e9771 | ||
|
|
9fc09b32cf | ||
|
|
8ec7a0a407 | ||
|
|
d3166e8571 | ||
|
|
2966979161 | ||
|
|
f4ed47bf95 | ||
|
|
1a75546b27 | ||
|
|
a6b92af875 | ||
|
|
3dc601c470 | ||
|
|
153e977155 | ||
|
|
7d61de63ae | ||
|
|
bcd9e153ba | ||
|
|
19282af059 | ||
|
|
9c0c11e8a0 | ||
|
|
3f7eddb039 | ||
|
|
77ad49333a | ||
|
|
ef5e8326c8 | ||
|
|
86509e6002 | ||
|
|
8667a67695 | ||
|
|
f505d7ab3f | ||
|
|
450dbed820 | ||
|
|
46b86f7e6e | ||
|
|
0ee1f8c1cf | ||
|
|
87bd831aba | ||
|
|
f9f83791d1 | ||
|
|
e75f73bf73 | ||
|
|
bd277162c7 |
4
.github/workflows/image-pr.yml
vendored
4
.github/workflows/image-pr.yml
vendored
@@ -47,7 +47,7 @@ jobs:
|
||||
# makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "4"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||
@@ -120,7 +120,7 @@ jobs:
|
||||
# makeflags: "--jobs=3 --output-sync=target"
|
||||
# - build-type: 'cublas'
|
||||
# cuda-major-version: "12"
|
||||
# cuda-minor-version: "4"
|
||||
# cuda-minor-version: "0"
|
||||
# platforms: 'linux/amd64'
|
||||
# tag-latest: 'false'
|
||||
# tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||
|
||||
8
.github/workflows/image.yml
vendored
8
.github/workflows/image.yml
vendored
@@ -75,7 +75,7 @@ jobs:
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "4"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12'
|
||||
@@ -100,7 +100,7 @@ jobs:
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "4"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||
@@ -285,7 +285,7 @@ jobs:
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "4"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-core'
|
||||
@@ -307,7 +307,7 @@ jobs:
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "4"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||
|
||||
9
.github/workflows/release.yaml
vendored
9
.github/workflows/release.yaml
vendored
@@ -4,6 +4,8 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
tags:
|
||||
- 'v*'
|
||||
pull_request:
|
||||
|
||||
env:
|
||||
@@ -29,11 +31,10 @@ jobs:
|
||||
with:
|
||||
go-version: '1.21.x'
|
||||
cache: false
|
||||
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache gawk
|
||||
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
|
||||
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
|
||||
- name: Install CUDA Dependencies
|
||||
run: |
|
||||
@@ -149,7 +150,7 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache gawk cmake libgmock-dev
|
||||
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
|
||||
- name: Intel Dependencies
|
||||
run: |
|
||||
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
|
||||
@@ -250,7 +251,7 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache
|
||||
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||
- name: Build stablediffusion
|
||||
|
||||
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@@ -70,7 +70,7 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential curl ffmpeg
|
||||
sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
|
||||
sudo apt-get install -y libgmock-dev
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
|
||||
@@ -24,7 +24,7 @@ RUN apt-get update && \
|
||||
cmake \
|
||||
curl \
|
||||
git \
|
||||
unzip && \
|
||||
unzip upx-ucl && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
@@ -99,7 +99,7 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers
|
||||
|
||||
ARG BUILD_TYPE
|
||||
ARG CUDA_MAJOR_VERSION=12
|
||||
ARG CUDA_MINOR_VERSION=4
|
||||
ARG CUDA_MINOR_VERSION=0
|
||||
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
|
||||
|
||||
53
Makefile
53
Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
|
||||
# llama.cpp versions
|
||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=705b7ecf60e667ced57c15d67aa86865e3cc7aa7
|
||||
CPPLLAMA_VERSION?=b841d0740855c5af1344a81f261139a45a2b39ee
|
||||
|
||||
# gpt4all version
|
||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||
@@ -58,7 +58,7 @@ RANDOM := $(shell bash -c 'echo $$RANDOM')
|
||||
|
||||
VERSION?=$(shell git describe --always --tags || echo "dev" )
|
||||
# go tool nm ./local-ai | grep Commit
|
||||
LD_FLAGS?=
|
||||
LD_FLAGS?=-s -w
|
||||
override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Version=$(VERSION)"
|
||||
override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
|
||||
|
||||
@@ -72,6 +72,14 @@ WHITE := $(shell tput -Txterm setaf 7)
|
||||
CYAN := $(shell tput -Txterm setaf 6)
|
||||
RESET := $(shell tput -Txterm sgr0)
|
||||
|
||||
UPX?=
|
||||
# check if upx exists
|
||||
ifeq (, $(shell which upx))
|
||||
UPX=
|
||||
else
|
||||
UPX=$(shell which upx)
|
||||
endif
|
||||
|
||||
# Default Docker bridge IP
|
||||
E2E_BRIDGE_IP?=172.17.0.1
|
||||
|
||||
@@ -377,6 +385,7 @@ build: prepare backend-assets grpcs ## Build the project
|
||||
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
||||
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
|
||||
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
|
||||
$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
|
||||
ifneq ($(BACKEND_LIBS),)
|
||||
$(MAKE) backend-assets/lib
|
||||
cp -f $(BACKEND_LIBS) backend-assets/lib/
|
||||
@@ -421,7 +430,7 @@ else
|
||||
endif
|
||||
|
||||
dist-cross-linux-arm64:
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" GO_TAGS="p2p" \
|
||||
STATIC=true $(MAKE) build
|
||||
mkdir -p release
|
||||
# if BUILD_ID is empty, then we don't append it to the binary name
|
||||
@@ -471,7 +480,7 @@ prepare-e2e:
|
||||
mkdir -p $(TEST_DIR)
|
||||
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
|
||||
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
|
||||
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=4 --build-arg FFMPEG=true -t localai-tests .
|
||||
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 --build-arg FFMPEG=true -t localai-tests .
|
||||
|
||||
run-e2e-image:
|
||||
ls -liah $(abspath ./tests/e2e-fixtures)
|
||||
@@ -733,13 +742,22 @@ backend-assets/grpc: protogen-go replace
|
||||
backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/bert-embeddings
|
||||
endif
|
||||
|
||||
backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/gpt4all
|
||||
endif
|
||||
|
||||
backend-assets/grpc/huggingface: backend-assets/grpc
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/huggingface
|
||||
endif
|
||||
|
||||
backend/cpp/llama/llama.cpp:
|
||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
|
||||
@@ -765,6 +783,9 @@ else
|
||||
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
|
||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
|
||||
endif
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend/cpp/${VARIANT}/grpc-server
|
||||
endif
|
||||
|
||||
# This target is for manually building a variant with-auto detected flags
|
||||
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
@@ -837,33 +858,57 @@ backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.
|
||||
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
|
||||
mkdir -p backend-assets/util/
|
||||
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/util/llama-cpp-rpc-server
|
||||
endif
|
||||
|
||||
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/llama-ggml
|
||||
endif
|
||||
|
||||
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
|
||||
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/piper
|
||||
endif
|
||||
|
||||
backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/rwkv
|
||||
endif
|
||||
|
||||
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/stablediffusion
|
||||
endif
|
||||
|
||||
backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/tinydream
|
||||
endif
|
||||
|
||||
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/whisper
|
||||
endif
|
||||
|
||||
backend-assets/grpc/local-store: backend-assets/grpc
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/local-store
|
||||
endif
|
||||
|
||||
grpcs: prepare $(GRPC_BACKENDS)
|
||||
|
||||
|
||||
@@ -75,11 +75,24 @@ add_library(hw_grpc_proto
|
||||
${hw_proto_hdrs} )
|
||||
|
||||
add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
||||
absl::flags_parse
|
||||
gRPC::${_REFLECTION}
|
||||
gRPC::${_GRPC_GRPCPP}
|
||||
protobuf::${_PROTOBUF_LIBPROTOBUF})
|
||||
|
||||
# Conditionally link SYCL to grpc-server
|
||||
# https://github.com/ggerganov/llama.cpp/issues/8665
|
||||
if ( DEFINED ENV{ONEAPI_ROOT})
|
||||
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
||||
absl::flags_parse
|
||||
gRPC::${_REFLECTION}
|
||||
gRPC::${_GRPC_GRPCPP}
|
||||
protobuf::${_PROTOBUF_LIBPROTOBUF}
|
||||
sycl)
|
||||
else()
|
||||
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
||||
absl::flags_parse
|
||||
gRPC::${_REFLECTION}
|
||||
gRPC::${_GRPC_GRPCPP}
|
||||
protobuf::${_PROTOBUF_LIBPROTOBUF})
|
||||
endif()
|
||||
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
if(TARGET BUILD_INFO)
|
||||
add_dependencies(${TARGET} BUILD_INFO)
|
||||
|
||||
8
backend/cpp/llama/CMakeLists.txt.rpc-8662
Normal file
8
backend/cpp/llama/CMakeLists.txt.rpc-8662
Normal file
@@ -0,0 +1,8 @@
|
||||
# https://github.com/ggerganov/llama.cpp/issues/8665
|
||||
|
||||
add_executable(rpc-server rpc-server.cpp)
|
||||
if ( DEFINED ENV{ONEAPI_ROOT})
|
||||
target_link_libraries(rpc-server PRIVATE ggml llama sycl)
|
||||
else()
|
||||
target_link_libraries(rpc-server PRIVATE ggml llama)
|
||||
endif()
|
||||
@@ -17,4 +17,7 @@ cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
|
||||
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
|
||||
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
|
||||
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
|
||||
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
|
||||
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
|
||||
|
||||
# https://github.com/ggerganov/llama.cpp/issues/8665
|
||||
cp -rfv CMakeLists.txt.rpc-8662 llama.cpp/examples/rpc/CMakeLists.txt
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
auto-gptq==0.7.1
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
torch
|
||||
certifi
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
bark==0.1.5
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -1,2 +1,2 @@
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
TTS==0.22.0
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -3,7 +3,7 @@ accelerate
|
||||
compel
|
||||
peft
|
||||
diffusers
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
opencv-python
|
||||
pillow
|
||||
protobuf
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
accelerate
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
certifi
|
||||
torch
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
causal-conv1d==1.4.0
|
||||
mamba-ssm==2.2.2
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -2,7 +2,7 @@
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
grpcio==1.64.1
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
librosa==0.9.1
|
||||
faster-whisper==1.0.3
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
librosa
|
||||
faster-whisper
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
accelerate
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
torch
|
||||
git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
rerankers[transformers]
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -2,4 +2,4 @@
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
sentence-transformers==3.0.1
|
||||
transformers
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
certifi
|
||||
@@ -2,4 +2,4 @@
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
transformers
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
torch
|
||||
scipy==1.14.0
|
||||
|
||||
@@ -2,4 +2,3 @@
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,9 +1,9 @@
|
||||
accelerate
|
||||
transformers
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
torch
|
||||
certifi
|
||||
intel-extension-for-transformers
|
||||
bitsandbytes
|
||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
accelerate
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
certifi
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
vllm
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
|
||||
@@ -16,7 +16,16 @@
|
||||
</a>
|
||||
</h2>
|
||||
<h5 class="mb-4 text-justify">LocalAI uses P2P technologies to enable distribution of work between peers. It is possible to share an instance with Federation and/or split the weights of a model across peers (only available with llama.cpp models). You can now share computational resources between your devices or your friends!</h5>
|
||||
|
||||
<!-- Warning box if p2p token is empty and p2p is enabled -->
|
||||
{{ if and .IsP2PEnabled (eq .P2PToken "") }}
|
||||
<div class="bg-red-500 p-4 rounded-lg shadow-lg mb-12 text-left">
|
||||
<p class="text-xl font-semibold text-white"> <i class="fa-solid fa-exclamation-triangle"></i> Warning: P2P mode is disabled or no token was specified</p>
|
||||
<p class="mb-4">You have to enable P2P mode by starting LocalAI with <code>--p2p</code>. Please restart the server with <code>--p2p</code> to generate a new token automatically that can be used to automatically discover other nodes. If you already have a token specify it with <code>export TOKEN=".."</code> <a href="https://localai.io/features/distribute/" target="_blank">
|
||||
Check out the documentation for more information.
|
||||
</a> </p>
|
||||
</div>
|
||||
{{ else }}
|
||||
|
||||
<!-- Federation Box -->
|
||||
<div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left">
|
||||
|
||||
@@ -128,7 +137,8 @@
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<!-- Llama.cpp Box END -->
|
||||
<!-- Llama.cpp Box END -->
|
||||
{{ end }}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -5,17 +5,65 @@ weight = 15
|
||||
url = "/features/distribute/"
|
||||
+++
|
||||
|
||||
|
||||
This functionality enables LocalAI to distribute inference requests across multiple worker nodes, improving efficiency and performance. Nodes are automatically discovered and connect via p2p by using a shared token which makes sure the communication is secure and private between the nodes of the network.
|
||||
|
||||
LocalAI supports two modes of distributed inferencing via p2p:
|
||||
|
||||
- **Federated Mode**: Requests are shared between the cluster and routed to a single worker node in the network based on the load balancer's decision.
|
||||
- **Worker Mode** (aka "model sharding" or "splitting weights"): Requests are processed by all the workers which contributes to the final inference result (by sharing the model weights).
|
||||
|
||||
## Usage
|
||||
|
||||
Starting LocalAI with `--p2p` generates a shared token for connecting multiple instances: and that's all you need to create AI clusters, eliminating the need for intricate network setups.
|
||||
|
||||
Simply navigate to the "Swarm" section in the WebUI and follow the on-screen instructions.
|
||||
|
||||
For fully shared instances, initiate LocalAI with --p2p --federated and adhere to the Swarm section's guidance. This feature, while still experimental, offers a tech preview quality experience.
|
||||
|
||||
### Federated mode
|
||||
|
||||
Federated mode allows to launch multiple LocalAI instances and connect them together in a federated network. This mode is useful when you want to distribute the load of the inference across multiple nodes, but you want to have a single point of entry for the API. In the Swarm section of the WebUI, you can see the instructions to connect multiple instances together.
|
||||
|
||||

|
||||
|
||||
To start a LocalAI server in federated mode, run:
|
||||
|
||||
```bash
|
||||
local-ai run --p2p --federated
|
||||
```
|
||||
|
||||
This will generate a token that you can use to connect other LocalAI instances to the network or others can use to join the network. If you already have a token, you can specify it using the `TOKEN` environment variable.
|
||||
|
||||
To start a load balanced server that routes the requests to the network, run with the `TOKEN`:
|
||||
|
||||
```bash
|
||||
local-ai federated
|
||||
```
|
||||
|
||||
To see all the available options, run `local-ai federated --help`.
|
||||
|
||||
The instructions are displayed in the "Swarm" section of the WebUI, guiding you through the process of connecting multiple instances.
|
||||
|
||||
### Workers mode
|
||||
|
||||
{{% alert note %}}
|
||||
This feature is available exclusively with llama-cpp compatible models.
|
||||
|
||||
This feature was introduced in [LocalAI pull request #2324](https://github.com/mudler/LocalAI/pull/2324) and is based on the upstream work in [llama.cpp pull request #6829](https://github.com/ggerganov/llama.cpp/pull/6829).
|
||||
{{% /alert %}}
|
||||
|
||||
This functionality enables LocalAI to distribute inference requests across multiple worker nodes, improving efficiency and performance.
|
||||
To connect multiple workers to a single LocalAI instance, start first a server in p2p mode:
|
||||
|
||||
## Usage
|
||||
```bash
|
||||
local-ai run --p2p
|
||||
```
|
||||
|
||||
### Starting Workers
|
||||
And navigate the WebUI to the "Swarm" section to see the instructions to connect multiple workers to the network.
|
||||
|
||||

|
||||
|
||||
### Without P2P
|
||||
|
||||
To start workers for distributing the computational load, run:
|
||||
|
||||
@@ -23,48 +71,27 @@ To start workers for distributing the computational load, run:
|
||||
local-ai worker llama-cpp-rpc <listening_address> <listening_port>
|
||||
```
|
||||
|
||||
Alternatively, you can build the RPC server following the llama.cpp [README](https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md), which is compatible with LocalAI.
|
||||
|
||||
### Starting LocalAI
|
||||
|
||||
To start the LocalAI server, which handles API requests, specify the worker addresses using the `LLAMACPP_GRPC_SERVERS` environment variable:
|
||||
And you can specify the address of the workers when starting LocalAI with the `LLAMACPP_GRPC_SERVERS` environment variable:
|
||||
|
||||
```bash
|
||||
LLAMACPP_GRPC_SERVERS="address1:port,address2:port" local-ai run
|
||||
```
|
||||
|
||||
The workload on the LocalAI server will then be distributed across the specified nodes.
|
||||
|
||||
## Peer-to-Peer Networking
|
||||
Alternatively, you can build the RPC workers/server following the llama.cpp [README](https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md), which is compatible with LocalAI.
|
||||
|
||||

|
||||
## Manual example (worker)
|
||||
|
||||
Workers can also connect to each other in a peer-to-peer network, distributing the workload in a decentralized manner.
|
||||
|
||||
A shared token between the server and the workers is required for communication within the peer-to-peer network. This feature supports both local network (using mDNS discovery) and DHT for communication across different networks.
|
||||
|
||||
The token is automatically generated when starting the server with the `--p2p` flag. Workers can be started with the token using `local-ai worker p2p-llama-cpp-rpc` and specifying the token via the environment variable `TOKEN` or with the `--token` argument.
|
||||
|
||||
A network is established between the server and workers using DHT and mDNS discovery protocols. The llama.cpp RPC server is automatically started and exposed to the peer-to-peer network, allowing the API server to connect.
|
||||
|
||||
When the HTTP server starts, it discovers workers in the network and creates port forwards to the local service. Llama.cpp is configured to use these services. For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343).
|
||||
|
||||
### Usage
|
||||
Use the WebUI to guide you in the process of starting new workers. This example shows the manual steps to highlight the process.
|
||||
|
||||
1. Start the server with `--p2p`:
|
||||
|
||||
```bash
|
||||
./local-ai run --p2p
|
||||
# 1:02AM INF loading environment variables from file envFile=.env
|
||||
# 1:02AM INF Setting logging to info
|
||||
# 1:02AM INF P2P mode enabled
|
||||
# 1:02AM INF No token provided, generating one
|
||||
# 1:02AM INF Generated Token:
|
||||
# XXXXXXXXXXX
|
||||
# 1:02AM INF Press a button to proceed
|
||||
# Get the token in the Swarm section of the WebUI
|
||||
```
|
||||
|
||||
Copy the displayed token and press Enter.
|
||||
Copy the token from the WebUI or via API call (e.g., `curl http://localhost:8000/p2p/token`) and save it for later use.
|
||||
|
||||
To reuse the same token later, restart the server with `--p2ptoken` or `P2P_TOKEN`.
|
||||
|
||||
@@ -93,12 +120,14 @@ The server logs should indicate that new workers are being discovered.
|
||||
|
||||
3. Start inference as usual on the server initiated in step 1.
|
||||
|
||||

|
||||
|
||||
## Notes
|
||||
|
||||
- If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
|
||||
- Only a single model is supported currently.
|
||||
- Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
|
||||
|
||||
- For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343)
|
||||
|
||||
## Environment Variables
|
||||
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"version": "v2.18.1"
|
||||
"version": "v2.19.1"
|
||||
}
|
||||
|
||||
2
docs/themes/hugo-theme-relearn
vendored
2
docs/themes/hugo-theme-relearn
vendored
Submodule docs/themes/hugo-theme-relearn updated: 1b2e139512...7aec99b38d
@@ -1,6 +1,6 @@
|
||||
llama_index==0.10.55
|
||||
llama_index==0.10.56
|
||||
requests==2.32.3
|
||||
weaviate_client==4.6.5
|
||||
weaviate_client==4.6.7
|
||||
transformers
|
||||
torch
|
||||
chainlit
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
langchain==0.2.8
|
||||
openai==1.35.13
|
||||
langchain==0.2.10
|
||||
openai==1.37.0
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
langchain==0.2.8
|
||||
openai==1.35.13
|
||||
langchain==0.2.10
|
||||
openai==1.37.0
|
||||
chromadb==0.5.4
|
||||
llama-index==0.10.55
|
||||
llama-index==0.10.56
|
||||
@@ -10,21 +10,21 @@ debugpy==1.8.2
|
||||
frozenlist==1.4.1
|
||||
greenlet==3.0.3
|
||||
idna==3.7
|
||||
langchain==0.2.8
|
||||
langchain-community==0.2.7
|
||||
langchain==0.2.10
|
||||
langchain-community==0.2.9
|
||||
marshmallow==3.21.3
|
||||
marshmallow-enum==1.5.1
|
||||
multidict==6.0.5
|
||||
mypy-extensions==1.0.0
|
||||
numexpr==2.10.1
|
||||
numpy==1.26.4
|
||||
openai==1.35.13
|
||||
numpy==2.0.1
|
||||
openai==1.37.0
|
||||
openapi-schema-pydantic==1.2.4
|
||||
packaging>=23.2
|
||||
pydantic==2.8.2
|
||||
PyYAML==6.0.1
|
||||
requests==2.32.3
|
||||
SQLAlchemy==2.0.30
|
||||
SQLAlchemy==2.0.31
|
||||
tenacity==8.5.0
|
||||
tqdm==4.66.4
|
||||
typing-inspect==0.9.0
|
||||
|
||||
@@ -24,6 +24,33 @@
|
||||
- filename: DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf
|
||||
sha256: 50ec78036433265965ed1afd0667c00c71c12aa70bcf383be462cb8e159db6c0
|
||||
uri: huggingface://LoneStriker/DeepSeek-Coder-V2-Lite-Instruct-GGUF/DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf
|
||||
- name: "archangel_sft_pythia2-8b"
|
||||
url: "github:mudler/LocalAI/gallery/tuluv2.yaml@master"
|
||||
icon: https://gist.github.com/assets/29318529/fe2d8391-dbd1-4b7e-9dc4-7cb97e55bc06
|
||||
license: apache-2.0
|
||||
urls:
|
||||
- https://huggingface.co/ContextualAI/archangel_sft_pythia2-8b
|
||||
- https://huggingface.co/RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf
|
||||
- https://github.com/ContextualAI/HALOs
|
||||
description: |
|
||||
datasets:
|
||||
- stanfordnlp/SHP
|
||||
- Anthropic/hh-rlhf
|
||||
- OpenAssistant/oasst1
|
||||
|
||||
This repo contains the model checkpoints for:
|
||||
- model family pythia2-8b
|
||||
- optimized with the loss SFT
|
||||
- aligned using the SHP, Anthropic HH and Open Assistant datasets.
|
||||
|
||||
Please refer to our [code repository](https://github.com/ContextualAI/HALOs) or [blog](https://contextual.ai/better-cheaper-faster-llm-alignment-with-kto/) which contains intructions for training your own HALOs and links to our model cards.
|
||||
overrides:
|
||||
parameters:
|
||||
model: archangel_sft_pythia2-8b.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: archangel_sft_pythia2-8b.Q4_K_M.gguf
|
||||
sha256: a47782c55ef2b39b19644213720a599d9849511a73c9ebb0c1de749383c0a0f8
|
||||
uri: huggingface://RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf/archangel_sft_pythia2-8b.Q4_K_M.gguf
|
||||
- &qwen2
|
||||
## Start QWEN2
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
@@ -220,6 +247,36 @@
|
||||
- filename: Qwen2-Wukong-7B-Q4_K_M.gguf
|
||||
sha256: 6b8ca6649c33fc84d4892ebcff1214f0b34697aced784f0d6d32e284a15943ad
|
||||
uri: huggingface://bartowski/Qwen2-Wukong-7B-GGUF/Qwen2-Wukong-7B-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen2
|
||||
name: "calme-2.8-qwen2-7b"
|
||||
icon: https://huggingface.co/MaziyarPanahi/calme-2.8-qwen2-7b/resolve/main/qwen2-fine-tunes-maziyar-panahi.webp
|
||||
urls:
|
||||
- https://huggingface.co/MaziyarPanahi/calme-2.8-qwen2-7b
|
||||
- https://huggingface.co/MaziyarPanahi/calme-2.8-qwen2-7b-GGUF
|
||||
description: |
|
||||
This is a fine-tuned version of the Qwen/Qwen2-7B model. It aims to improve the base model across all benchmarks.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
|
||||
sha256: 8c1b3efe9fa6ae1b37942ef26473cb4e0aed0f8038b60d4b61e5bffb61e49b7e
|
||||
uri: huggingface://MaziyarPanahi/calme-2.8-qwen2-7b-GGUF/Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
|
||||
- !!merge <<: *qwen2
|
||||
name: "stellardong-72b-i1"
|
||||
icon: https://huggingface.co/smelborp/StellarDong-72b/resolve/main/stellardong.png
|
||||
urls:
|
||||
- https://huggingface.co/smelborp/StellarDong-72b
|
||||
- https://huggingface.co/mradermacher/StellarDong-72b-i1-GGUF
|
||||
description: |
|
||||
Magnum + Nova = you won't believe how stellar this dong is!!
|
||||
overrides:
|
||||
parameters:
|
||||
model: StellarDong-72b.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: StellarDong-72b.i1-Q4_K_M.gguf
|
||||
sha256: 4c5012f0a034f40a044904891343ade2594f29c28a8a9d8052916de4dc5a61df
|
||||
uri: huggingface://mradermacher/StellarDong-72b-i1-GGUF/StellarDong-72b.i1-Q4_K_M.gguf
|
||||
- &mistral03
|
||||
## START Mistral
|
||||
url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
|
||||
@@ -3146,6 +3203,89 @@
|
||||
- filename: L3-8B-Celeste-v1-Q4_K_M.gguf
|
||||
sha256: ed5277719965fb6bbcce7d16742e3bac4a8d5b8f52133261a3402a480cd65317
|
||||
uri: huggingface://bartowski/L3-8B-Celeste-v1-GGUF/L3-8B-Celeste-v1-Q4_K_M.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "l3-8b-celeste-v1.2"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/630cf5d14ca0a22768bbe10c/Zv__LDTO-nHvpuxPcCgUU.webp
|
||||
urls:
|
||||
- https://huggingface.co/mudler/L3-8B-Celeste-V1.2-Q4_K_M-GGUF
|
||||
description: |
|
||||
Trained on LLaMA 3 8B Instruct at 8K context using Reddit Writing Prompts, Opus 15K Instruct an c2 logs cleaned.
|
||||
|
||||
This is a roleplay model any instruction following capabilities outside roleplay contexts are coincidental.
|
||||
overrides:
|
||||
parameters:
|
||||
model: l3-8b-celeste-v1.2-q4_k_m.gguf
|
||||
files:
|
||||
- filename: l3-8b-celeste-v1.2-q4_k_m.gguf
|
||||
sha256: 7752204c0e9f627ff5726eb69bb6114974cafbc934a993ad019abfba62002783
|
||||
uri: huggingface://mudler/L3-8B-Celeste-V1.2-Q4_K_M-GGUF/l3-8b-celeste-v1.2-q4_k_m.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "llama-3-tulu-2-8b-i1"
|
||||
icon: https://huggingface.co/datasets/allenai/blog-images/resolve/main/tulu-v2/Tulu%20V2%20banner.png
|
||||
urls:
|
||||
- https://huggingface.co/allenai/llama-3-tulu-2-8b
|
||||
- https://huggingface.co/mradermacher/llama-3-tulu-2-8b-i1-GGUF
|
||||
description: |
|
||||
Tulu is a series of language models that are trained to act as helpful assistants. Llama 3 Tulu V2 8B is a fine-tuned version of Llama 3 that was trained on a mix of publicly available, synthetic and human datasets.
|
||||
overrides:
|
||||
parameters:
|
||||
model: llama-3-tulu-2-8b.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: llama-3-tulu-2-8b.i1-Q4_K_M.gguf
|
||||
sha256: f859c22bfa64f461e9ffd973dc7ad6a78bb98b1dda6f49abfa416a4022b7e333
|
||||
uri: huggingface://mradermacher/llama-3-tulu-2-8b-i1-GGUF/llama-3-tulu-2-8b.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "llama-3-tulu-2-dpo-70b-i1"
|
||||
icon: https://huggingface.co/datasets/allenai/blog-images/resolve/main/tulu-v2/Tulu%20V2%20banner.png
|
||||
urls:
|
||||
- https://huggingface.co/allenai/llama-3-tulu-2-dpo-70b
|
||||
- https://huggingface.co/mradermacher/llama-3-tulu-2-dpo-70b-i1-GGUF
|
||||
description: |
|
||||
Tulu is a series of language models that are trained to act as helpful assistants. Llama 3 Tulu V2 8B is a fine-tuned version of Llama 3 that was trained on a mix of publicly available, synthetic and human datasets.
|
||||
overrides:
|
||||
parameters:
|
||||
model: llama-3-tulu-2-dpo-70b.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: llama-3-tulu-2-dpo-70b.i1-Q4_K_M.gguf
|
||||
sha256: fc309bbdf1e2bdced954c4c8dc1f9a885c547017ee5e750bfde645af89e3d3a5
|
||||
uri: huggingface://mradermacher/llama-3-tulu-2-dpo-70b-i1-GGUF/llama-3-tulu-2-dpo-70b.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *llama3
|
||||
license: cc-by-nc-4.0
|
||||
name: "suzume-llama-3-8b-multilingual-orpo-borda-top25"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/64b63f8ad57e02621dc93c8b/kWQSu02YfgYdUQqv4s5lq.png
|
||||
urls:
|
||||
- https://huggingface.co/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25
|
||||
- https://huggingface.co/RichardErkhov/lightblue_-_suzume-llama-3-8B-multilingual-orpo-borda-top25-gguf
|
||||
description: |
|
||||
This is Suzume ORPO, an ORPO trained fine-tune of the lightblue/suzume-llama-3-8B-multilingual model using our lightblue/mitsu dataset.
|
||||
|
||||
We have trained several versions of this model using ORPO and so recommend that you use the best performing model from our tests, lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half.
|
||||
|
||||
Note that this model has a non-commerical license as we used the Command R and Command R+ models to generate our training data for this model (lightblue/mitsu).
|
||||
|
||||
We are currently working on a developing a commerically usable model, so stay tuned for that!
|
||||
overrides:
|
||||
parameters:
|
||||
model: suzume-llama-3-8B-multilingual-orpo-borda-top25.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: suzume-llama-3-8B-multilingual-orpo-borda-top25.Q4_K_M.gguf
|
||||
sha256: ef75a02c5f38e14a8873c7989188dac6974851b4654279fe1921d2c8018cc388
|
||||
uri: huggingface://RichardErkhov/lightblue_-_suzume-llama-3-8B-multilingual-orpo-borda-top25-gguf/suzume-llama-3-8B-multilingual-orpo-borda-top25.Q4_K_M.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "calme-2.4-llama3-70b"
|
||||
icon: https://huggingface.co/MaziyarPanahi/calme-2.4-llama3-70b/resolve/main/llama-3-merges.webp
|
||||
urls:
|
||||
- https://huggingface.co/MaziyarPanahi/calme-2.4-llama3-70b
|
||||
- https://huggingface.co/mradermacher/calme-2.4-llama3-70b-GGUF
|
||||
description: |
|
||||
This model is a fine-tune (DPO) of meta-llama/Meta-Llama-3-70B-Instruct model.
|
||||
overrides:
|
||||
parameters:
|
||||
model: calme-2.4-llama3-70b.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: calme-2.4-llama3-70b.Q4_K_M.gguf
|
||||
sha256: 0b44ac8a88395dfc60f1b9d3cfffc0ffef74ec0a302e610ef91fc787187568f2
|
||||
uri: huggingface://mradermacher/calme-2.4-llama3-70b-GGUF/calme-2.4-llama3-70b.Q4_K_M.gguf
|
||||
- &command-R
|
||||
### START Command-r
|
||||
url: "github:mudler/LocalAI/gallery/command-r.yaml@master"
|
||||
@@ -3405,7 +3545,23 @@
|
||||
- filename: phillama-3.8b-v0.1.Q4_K_M.gguf
|
||||
sha256: da537d352b7aae54bbad0d2cff3e3a1b0e1dc1e1d25bec3aae1d05cf4faee7a2
|
||||
uri: huggingface://RichardErkhov/raincandy-u_-_phillama-3.8b-v0.1-gguf/phillama-3.8b-v0.1.Q4_K_M.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "calme-2.3-phi3-4b"
|
||||
icon: https://huggingface.co/MaziyarPanahi/calme-2.1-phi3-4b/resolve/main/phi-3-instruct.webp
|
||||
urls:
|
||||
- https://huggingface.co/MaziyarPanahi/calme-2.3-phi3-4b
|
||||
- https://huggingface.co/MaziyarPanahi/calme-2.3-phi3-4b-GGUF
|
||||
description: |
|
||||
MaziyarPanahi/calme-2.1-phi3-4b
|
||||
|
||||
This model is a fine-tune (DPO) of microsoft/Phi-3-mini-4k-instruct model.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Phi-3-mini-4k-instruct-v0.3.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Phi-3-mini-4k-instruct-v0.3.Q4_K_M.gguf
|
||||
sha256: 3a23e1052369c080afb925882bd814cbea5ec859894655a7434c3d49e43a6127
|
||||
uri: huggingface://MaziyarPanahi/calme-2.3-phi3-4b-GGUF/Phi-3-mini-4k-instruct-v0.3.Q4_K_M.gguf
|
||||
- &hermes-2-pro-mistral
|
||||
### START Hermes
|
||||
url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
|
||||
|
||||
43
gallery/tuluv2.yaml
Normal file
43
gallery/tuluv2.yaml
Normal file
@@ -0,0 +1,43 @@
|
||||
---
|
||||
name: "tuluv2"
|
||||
|
||||
config_file: |
|
||||
mmap: true
|
||||
template:
|
||||
chat_message: |
|
||||
<|{{ .RoleName }}|>
|
||||
{{ if .FunctionCall -}}
|
||||
Function call:
|
||||
{{ else if eq .RoleName "tool" -}}
|
||||
Function response:
|
||||
{{ end -}}
|
||||
{{ if .Content -}}
|
||||
{{.Content }}
|
||||
{{ end -}}
|
||||
{{ if .FunctionCall -}}
|
||||
{{toJson .FunctionCall}}
|
||||
{{ end -}}
|
||||
function: |
|
||||
<|{{ .RoleName }}|>
|
||||
{{ if .FunctionCall -}}
|
||||
Function call:
|
||||
{{ else if eq .RoleName "tool" -}}
|
||||
Function response:
|
||||
{{ end -}}
|
||||
{{ if .Content -}}
|
||||
{{.Content }}
|
||||
{{ end -}}
|
||||
{{ if .FunctionCall -}}
|
||||
{{toJson .FunctionCall}}
|
||||
{{ end -}}
|
||||
chat: |
|
||||
{{.Input -}}
|
||||
<|assistant|>
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 4096
|
||||
f16: true
|
||||
stopwords:
|
||||
- '<|im_end|>'
|
||||
- '<dummy32000>'
|
||||
- '<|endoftext|>'
|
||||
@@ -212,7 +212,7 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
|
||||
grpcProcess = p
|
||||
foundCUDA = true
|
||||
} else {
|
||||
log.Info().Msgf("GPU device found but no CUDA backend present")
|
||||
log.Debug().Msgf("Nvidia GPU device found, no embedded CUDA variant found. You can ignore this message if you are using container with CUDA support")
|
||||
}
|
||||
}
|
||||
if strings.Contains(gpu.String(), "amd") {
|
||||
@@ -222,7 +222,7 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
|
||||
grpcProcess = p
|
||||
foundAMDGPU = true
|
||||
} else {
|
||||
log.Info().Msgf("GPU device found but no HIPBLAS backend present")
|
||||
log.Debug().Msgf("AMD GPU device found, no embedded HIPBLAS variant found. You can ignore this message if you are using container with HIPBLAS support")
|
||||
}
|
||||
}
|
||||
if strings.Contains(gpu.String(), "intel") {
|
||||
@@ -236,7 +236,7 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
|
||||
grpcProcess = p
|
||||
foundIntelGPU = true
|
||||
} else {
|
||||
log.Info().Msgf("GPU device found but no Intel backend present")
|
||||
log.Debug().Msgf("Intel GPU device found, no embedded SYCL variant found. You can ignore this message if you are using container with SYCL support")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user