mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 11:13:31 -05:00
Compare commits
40 Commits
debug_nvid
...
v2.19.2
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
80ae919dbe | ||
|
|
0802895cd2 | ||
|
|
9fee46207a | ||
|
|
bd900945f7 | ||
|
|
89484efaed | ||
|
|
a9757fb057 | ||
|
|
1c96e0b79e | ||
|
|
c7f0743f48 | ||
|
|
ead69a116a | ||
|
|
0314b37cd8 | ||
|
|
703cd08f01 | ||
|
|
b53947a5bb | ||
|
|
39de3cf21d | ||
|
|
e3cd11cc0a | ||
|
|
5e5037f10d | ||
|
|
9c331239d9 | ||
|
|
36789e9ead | ||
|
|
6ec593c237 | ||
|
|
bbb1dc2ae0 | ||
|
|
385d8dc29b | ||
|
|
fb574434a4 | ||
|
|
7ab3217df0 | ||
|
|
2f9f04b260 | ||
|
|
8385eb2a59 | ||
|
|
99324eeef0 | ||
|
|
ede352256b | ||
|
|
b555b64616 | ||
|
|
824cc816ea | ||
|
|
a1bc2e9771 | ||
|
|
9fc09b32cf | ||
|
|
8ec7a0a407 | ||
|
|
d3166e8571 | ||
|
|
2966979161 | ||
|
|
f4ed47bf95 | ||
|
|
1a75546b27 | ||
|
|
a6b92af875 | ||
|
|
3dc601c470 | ||
|
|
153e977155 | ||
|
|
7d61de63ae | ||
|
|
bcd9e153ba |
4
.github/workflows/image-pr.yml
vendored
4
.github/workflows/image-pr.yml
vendored
@@ -47,7 +47,7 @@ jobs:
|
||||
# makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "4"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||
@@ -120,7 +120,7 @@ jobs:
|
||||
# makeflags: "--jobs=3 --output-sync=target"
|
||||
# - build-type: 'cublas'
|
||||
# cuda-major-version: "12"
|
||||
# cuda-minor-version: "4"
|
||||
# cuda-minor-version: "0"
|
||||
# platforms: 'linux/amd64'
|
||||
# tag-latest: 'false'
|
||||
# tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||
|
||||
8
.github/workflows/image.yml
vendored
8
.github/workflows/image.yml
vendored
@@ -75,7 +75,7 @@ jobs:
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "4"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12'
|
||||
@@ -100,7 +100,7 @@ jobs:
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "4"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||
@@ -285,7 +285,7 @@ jobs:
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "4"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-core'
|
||||
@@ -307,7 +307,7 @@ jobs:
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "4"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||
|
||||
7
.github/workflows/release.yaml
vendored
7
.github/workflows/release.yaml
vendored
@@ -31,11 +31,10 @@ jobs:
|
||||
with:
|
||||
go-version: '1.21.x'
|
||||
cache: false
|
||||
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache gawk
|
||||
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
|
||||
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
|
||||
- name: Install CUDA Dependencies
|
||||
run: |
|
||||
@@ -151,7 +150,7 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache gawk cmake libgmock-dev
|
||||
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
|
||||
- name: Intel Dependencies
|
||||
run: |
|
||||
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
|
||||
@@ -252,7 +251,7 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache
|
||||
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||
- name: Build stablediffusion
|
||||
|
||||
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@@ -70,7 +70,7 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential curl ffmpeg
|
||||
sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
|
||||
sudo apt-get install -y libgmock-dev
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
|
||||
@@ -24,7 +24,7 @@ RUN apt-get update && \
|
||||
cmake \
|
||||
curl \
|
||||
git \
|
||||
unzip && \
|
||||
unzip upx-ucl && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
@@ -99,7 +99,7 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers
|
||||
|
||||
ARG BUILD_TYPE
|
||||
ARG CUDA_MAJOR_VERSION=12
|
||||
ARG CUDA_MINOR_VERSION=4
|
||||
ARG CUDA_MINOR_VERSION=0
|
||||
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
|
||||
|
||||
51
Makefile
51
Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
|
||||
# llama.cpp versions
|
||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=07283b1a90e1320aae4762c7e03c879043910252
|
||||
CPPLLAMA_VERSION?=79167d9e49aef9caa98e13ee7ca067ec9f88b4b5
|
||||
|
||||
# gpt4all version
|
||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||
@@ -58,7 +58,7 @@ RANDOM := $(shell bash -c 'echo $$RANDOM')
|
||||
|
||||
VERSION?=$(shell git describe --always --tags || echo "dev" )
|
||||
# go tool nm ./local-ai | grep Commit
|
||||
LD_FLAGS?=
|
||||
LD_FLAGS?=-s -w
|
||||
override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Version=$(VERSION)"
|
||||
override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
|
||||
|
||||
@@ -72,6 +72,14 @@ WHITE := $(shell tput -Txterm setaf 7)
|
||||
CYAN := $(shell tput -Txterm setaf 6)
|
||||
RESET := $(shell tput -Txterm sgr0)
|
||||
|
||||
UPX?=
|
||||
# check if upx exists
|
||||
ifeq (, $(shell which upx))
|
||||
UPX=
|
||||
else
|
||||
UPX=$(shell which upx)
|
||||
endif
|
||||
|
||||
# Default Docker bridge IP
|
||||
E2E_BRIDGE_IP?=172.17.0.1
|
||||
|
||||
@@ -377,6 +385,7 @@ build: prepare backend-assets grpcs ## Build the project
|
||||
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
||||
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
|
||||
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
|
||||
$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
|
||||
ifneq ($(BACKEND_LIBS),)
|
||||
$(MAKE) backend-assets/lib
|
||||
cp -f $(BACKEND_LIBS) backend-assets/lib/
|
||||
@@ -471,7 +480,7 @@ prepare-e2e:
|
||||
mkdir -p $(TEST_DIR)
|
||||
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
|
||||
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
|
||||
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=4 --build-arg FFMPEG=true -t localai-tests .
|
||||
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 --build-arg FFMPEG=true -t localai-tests .
|
||||
|
||||
run-e2e-image:
|
||||
ls -liah $(abspath ./tests/e2e-fixtures)
|
||||
@@ -733,13 +742,22 @@ backend-assets/grpc: protogen-go replace
|
||||
backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/bert-embeddings
|
||||
endif
|
||||
|
||||
backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/gpt4all
|
||||
endif
|
||||
|
||||
backend-assets/grpc/huggingface: backend-assets/grpc
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/huggingface
|
||||
endif
|
||||
|
||||
backend/cpp/llama/llama.cpp:
|
||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
|
||||
@@ -765,6 +783,9 @@ else
|
||||
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
|
||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
|
||||
endif
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend/cpp/${VARIANT}/grpc-server
|
||||
endif
|
||||
|
||||
# This target is for manually building a variant with-auto detected flags
|
||||
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
@@ -837,33 +858,57 @@ backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.
|
||||
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
|
||||
mkdir -p backend-assets/util/
|
||||
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/util/llama-cpp-rpc-server
|
||||
endif
|
||||
|
||||
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/llama-ggml
|
||||
endif
|
||||
|
||||
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
|
||||
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/piper
|
||||
endif
|
||||
|
||||
backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/rwkv
|
||||
endif
|
||||
|
||||
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/stablediffusion
|
||||
endif
|
||||
|
||||
backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/tinydream
|
||||
endif
|
||||
|
||||
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/whisper
|
||||
endif
|
||||
|
||||
backend-assets/grpc/local-store: backend-assets/grpc
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/local-store
|
||||
endif
|
||||
|
||||
grpcs: prepare $(GRPC_BACKENDS)
|
||||
|
||||
|
||||
@@ -2259,7 +2259,6 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
// get the directory of modelfile
|
||||
std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
|
||||
params.lora_adapter.push_back(std::make_tuple(model_dir + "/"+request->loraadapter(), scale_factor));
|
||||
params.lora_base = model_dir + "/"+request->lorabase();
|
||||
}
|
||||
params.use_mlock = request->mlock();
|
||||
params.use_mmap = request->mmap();
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
auto-gptq==0.7.1
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
torch
|
||||
certifi
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
bark==0.1.5
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -1,2 +1,2 @@
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
TTS==0.22.0
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -3,7 +3,7 @@ accelerate
|
||||
compel
|
||||
peft
|
||||
diffusers
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
opencv-python
|
||||
pillow
|
||||
protobuf
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
accelerate
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
certifi
|
||||
torch
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
causal-conv1d==1.4.0
|
||||
mamba-ssm==2.2.2
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -2,7 +2,7 @@
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
grpcio==1.64.1
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
librosa==0.9.1
|
||||
faster-whisper==1.0.3
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
librosa
|
||||
faster-whisper
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
accelerate
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
torch
|
||||
git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
rerankers[transformers]
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -2,4 +2,4 @@
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
sentence-transformers==3.0.1
|
||||
transformers
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
certifi
|
||||
@@ -2,4 +2,4 @@
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
transformers
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
torch
|
||||
scipy==1.14.0
|
||||
|
||||
@@ -2,4 +2,3 @@
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,9 +1,9 @@
|
||||
accelerate
|
||||
transformers
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
torch
|
||||
certifi
|
||||
intel-extension-for-transformers
|
||||
bitsandbytes
|
||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
accelerate
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
certifi
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
vllm
|
||||
grpcio==1.65.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
|
||||
@@ -16,7 +16,16 @@
|
||||
</a>
|
||||
</h2>
|
||||
<h5 class="mb-4 text-justify">LocalAI uses P2P technologies to enable distribution of work between peers. It is possible to share an instance with Federation and/or split the weights of a model across peers (only available with llama.cpp models). You can now share computational resources between your devices or your friends!</h5>
|
||||
|
||||
<!-- Warning box if p2p token is empty and p2p is enabled -->
|
||||
{{ if and .IsP2PEnabled (eq .P2PToken "") }}
|
||||
<div class="bg-red-500 p-4 rounded-lg shadow-lg mb-12 text-left">
|
||||
<p class="text-xl font-semibold text-white"> <i class="fa-solid fa-exclamation-triangle"></i> Warning: P2P mode is disabled or no token was specified</p>
|
||||
<p class="mb-4">You have to enable P2P mode by starting LocalAI with <code>--p2p</code>. Please restart the server with <code>--p2p</code> to generate a new token automatically that can be used to automatically discover other nodes. If you already have a token specify it with <code>export TOKEN=".."</code> <a href="https://localai.io/features/distribute/" target="_blank">
|
||||
Check out the documentation for more information.
|
||||
</a> </p>
|
||||
</div>
|
||||
{{ else }}
|
||||
|
||||
<!-- Federation Box -->
|
||||
<div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left">
|
||||
|
||||
@@ -128,7 +137,8 @@
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<!-- Llama.cpp Box END -->
|
||||
<!-- Llama.cpp Box END -->
|
||||
{{ end }}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ This functionality enables LocalAI to distribute inference requests across multi
|
||||
LocalAI supports two modes of distributed inferencing via p2p:
|
||||
|
||||
- **Federated Mode**: Requests are shared between the cluster and routed to a single worker node in the network based on the load balancer's decision.
|
||||
- **Worker Mode**: Requests are processed by all the workers which contributes to the final inference result (by sharing the model weights).
|
||||
- **Worker Mode** (aka "model sharding" or "splitting weights"): Requests are processed by all the workers which contributes to the final inference result (by sharing the model weights).
|
||||
|
||||
## Usage
|
||||
|
||||
@@ -122,12 +122,6 @@ The server logs should indicate that new workers are being discovered.
|
||||
|
||||

|
||||
|
||||
## Notes
|
||||
|
||||
- If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
|
||||
- Only a single model is supported currently.
|
||||
- Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
|
||||
- For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343)
|
||||
|
||||
## Environment Variables
|
||||
|
||||
@@ -138,3 +132,20 @@ There are options that can be tweaked or parameters that can be set using enviro
|
||||
| **LOCALAI_P2P_DISABLE_DHT** | Set to "true" to disable DHT and enable p2p layer to be local only (mDNS) |
|
||||
| **LOCALAI_P2P_DISABLE_LIMITS** | Set to "true" to disable connection limits and resources management |
|
||||
| **LOCALAI_P2P_TOKEN** | Set the token for the p2p network |
|
||||
|
||||
## Architecture
|
||||
|
||||
LocalAI uses https://github.com/libp2p/go-libp2p under the hood, the same project powering IPFS. Differently from other frameworks, LocalAI uses peer2peer without a single master server, but rather it uses sub/gossip and ledger functionalities to achieve consensus across different peers.
|
||||
|
||||
[EdgeVPN](https://github.com/mudler/edgevpn) is used as a library to establish the network and expose the ledger functionality under a shared token to ease out automatic discovery and have separated, private peer2peer networks.
|
||||
|
||||
The weights are split proportional to the memory when running into worker mode, when in federation mode each request is split to every node which have to load the model fully.
|
||||
|
||||
## Notes
|
||||
|
||||
- If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
|
||||
- Only a single model is supported currently.
|
||||
- Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
|
||||
- For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343)
|
||||
|
||||
|
||||
|
||||
2
docs/themes/hugo-theme-relearn
vendored
2
docs/themes/hugo-theme-relearn
vendored
Submodule docs/themes/hugo-theme-relearn updated: 1b2e139512...7aec99b38d
@@ -1,6 +1,6 @@
|
||||
llama_index==0.10.55
|
||||
llama_index==0.10.56
|
||||
requests==2.32.3
|
||||
weaviate_client==4.6.5
|
||||
weaviate_client==4.6.7
|
||||
transformers
|
||||
torch
|
||||
chainlit
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
langchain==0.2.8
|
||||
openai==1.35.13
|
||||
langchain==0.2.10
|
||||
openai==1.37.0
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
langchain==0.2.8
|
||||
openai==1.35.13
|
||||
langchain==0.2.10
|
||||
openai==1.37.0
|
||||
chromadb==0.5.4
|
||||
llama-index==0.10.55
|
||||
llama-index==0.10.56
|
||||
@@ -10,21 +10,21 @@ debugpy==1.8.2
|
||||
frozenlist==1.4.1
|
||||
greenlet==3.0.3
|
||||
idna==3.7
|
||||
langchain==0.2.8
|
||||
langchain-community==0.2.7
|
||||
langchain==0.2.10
|
||||
langchain-community==0.2.9
|
||||
marshmallow==3.21.3
|
||||
marshmallow-enum==1.5.1
|
||||
multidict==6.0.5
|
||||
mypy-extensions==1.0.0
|
||||
numexpr==2.10.1
|
||||
numpy==1.26.4
|
||||
openai==1.35.13
|
||||
numpy==2.0.1
|
||||
openai==1.37.0
|
||||
openapi-schema-pydantic==1.2.4
|
||||
packaging>=23.2
|
||||
pydantic==2.8.2
|
||||
PyYAML==6.0.1
|
||||
requests==2.32.3
|
||||
SQLAlchemy==2.0.30
|
||||
SQLAlchemy==2.0.31
|
||||
tenacity==8.5.0
|
||||
tqdm==4.66.4
|
||||
typing-inspect==0.9.0
|
||||
|
||||
@@ -1,4 +1,44 @@
|
||||
---
|
||||
## LLama3.1
|
||||
- &llama31
|
||||
url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
|
||||
name: "meta-llama-3.1-8b-instruct"
|
||||
license: llama3.1
|
||||
description: |
|
||||
The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.
|
||||
|
||||
Model developer: Meta
|
||||
|
||||
Model Architecture: Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.
|
||||
urls:
|
||||
- https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct
|
||||
- https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
- gpu
|
||||
- cpu
|
||||
- llama3.1
|
||||
overrides:
|
||||
parameters:
|
||||
model: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
|
||||
sha256: c2f17f44af962660d1ad4cb1af91a731f219f3b326c2b14441f9df1f347f2815
|
||||
uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "meta-llama-3.1-70b-instruct"
|
||||
urls:
|
||||
- https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct
|
||||
- https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-70B-Instruct-GGUF
|
||||
overrides:
|
||||
parameters:
|
||||
model: Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
|
||||
sha256: 3f16ab17da4521fe3ed7c5d7beed960d3fe7b5b64421ee9650aa53d6b649ccab
|
||||
uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
|
||||
## Deepseek
|
||||
- &deepseek
|
||||
url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
|
||||
|
||||
@@ -212,7 +212,7 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
|
||||
grpcProcess = p
|
||||
foundCUDA = true
|
||||
} else {
|
||||
log.Info().Msgf("GPU device found but no CUDA backend present")
|
||||
log.Debug().Msgf("Nvidia GPU device found, no embedded CUDA variant found. You can ignore this message if you are using container with CUDA support")
|
||||
}
|
||||
}
|
||||
if strings.Contains(gpu.String(), "amd") {
|
||||
@@ -222,7 +222,7 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
|
||||
grpcProcess = p
|
||||
foundAMDGPU = true
|
||||
} else {
|
||||
log.Info().Msgf("GPU device found but no HIPBLAS backend present")
|
||||
log.Debug().Msgf("AMD GPU device found, no embedded HIPBLAS variant found. You can ignore this message if you are using container with HIPBLAS support")
|
||||
}
|
||||
}
|
||||
if strings.Contains(gpu.String(), "intel") {
|
||||
@@ -236,7 +236,7 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
|
||||
grpcProcess = p
|
||||
foundIntelGPU = true
|
||||
} else {
|
||||
log.Info().Msgf("GPU device found but no Intel backend present")
|
||||
log.Debug().Msgf("Intel GPU device found, no embedded SYCL variant found. You can ignore this message if you are using container with SYCL support")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user