mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-24 16:51:44 -04:00
Compare commits
5 Commits
gosec_fix
...
cleanup_de
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2a03905920 | ||
|
|
35297ebc14 | ||
|
|
b303805df9 | ||
|
|
32d51797d9 | ||
|
|
af09b019ed |
@@ -32,22 +32,18 @@ config_remote() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Setup special .ssh files
|
# Setup special .ssh files
|
||||||
# Prints out lines of text to make things pretty
|
#
|
||||||
# Param 1: bash array, filenames relative to the customization directory that should be copied to ~/.ssh
|
# Param 1: bash array, filenames relative to the customization directory that should be copied to ~/.ssh
|
||||||
setup_ssh() {
|
setup_ssh() {
|
||||||
echo "starting ~/.ssh directory setup..."
|
|
||||||
mkdir -p "${HOME}.ssh"
|
|
||||||
chmod 0700 "${HOME}/.ssh"
|
|
||||||
echo "-----"
|
|
||||||
local files=("$@")
|
local files=("$@")
|
||||||
for file in "${files[@]}" ; do
|
for file in "${files[@]}"; then
|
||||||
local cfile="/devcontainer-customization/${file}"
|
local cfile="/devcontainer-customization/${file}"
|
||||||
local hfile="${HOME}/.ssh/${file}"
|
local hfile="~/.ssh/${file}"
|
||||||
if [ ! -f "${hfile}" ]; then
|
if [ ! -f "${hfile}" ]; then
|
||||||
echo "copying \"${file}\""
|
echo "copying ${file}"
|
||||||
cp "${cfile}" "${hfile}"
|
cp "${cfile}" "${hfile}"
|
||||||
chmod 600 "${hfile}"
|
chmod 600 "${hfile}"
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
echo "~/.ssh directory setup complete!"
|
ls ~/.ssh
|
||||||
}
|
}
|
||||||
|
|||||||
2
.github/workflows/bump_deps.yaml
vendored
2
.github/workflows/bump_deps.yaml
vendored
@@ -56,7 +56,7 @@ jobs:
|
|||||||
rm -rfv ${{ matrix.variable }}_message.txt
|
rm -rfv ${{ matrix.variable }}_message.txt
|
||||||
rm -rfv ${{ matrix.variable }}_commit.txt
|
rm -rfv ${{ matrix.variable }}_commit.txt
|
||||||
- name: Create Pull Request
|
- name: Create Pull Request
|
||||||
uses: peter-evans/create-pull-request@v7
|
uses: peter-evans/create-pull-request@v6
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||||
push-to-fork: ci-forks/LocalAI
|
push-to-fork: ci-forks/LocalAI
|
||||||
|
|||||||
2
.github/workflows/bump_docs.yaml
vendored
2
.github/workflows/bump_docs.yaml
vendored
@@ -17,7 +17,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
bash .github/bump_docs.sh ${{ matrix.repository }}
|
bash .github/bump_docs.sh ${{ matrix.repository }}
|
||||||
- name: Create Pull Request
|
- name: Create Pull Request
|
||||||
uses: peter-evans/create-pull-request@v7
|
uses: peter-evans/create-pull-request@v6
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||||
push-to-fork: ci-forks/LocalAI
|
push-to-fork: ci-forks/LocalAI
|
||||||
|
|||||||
2
.github/workflows/checksum_checker.yaml
vendored
2
.github/workflows/checksum_checker.yaml
vendored
@@ -36,7 +36,7 @@ jobs:
|
|||||||
sudo chmod 777 /hf_cache
|
sudo chmod 777 /hf_cache
|
||||||
bash .github/checksum_checker.sh gallery/index.yaml
|
bash .github/checksum_checker.sh gallery/index.yaml
|
||||||
- name: Create Pull Request
|
- name: Create Pull Request
|
||||||
uses: peter-evans/create-pull-request@v7
|
uses: peter-evans/create-pull-request@v6
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||||
push-to-fork: ci-forks/LocalAI
|
push-to-fork: ci-forks/LocalAI
|
||||||
|
|||||||
6
.github/workflows/release.yaml
vendored
6
.github/workflows/release.yaml
vendored
@@ -294,7 +294,7 @@ jobs:
|
|||||||
export C_INCLUDE_PATH=/usr/local/include
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||||
export PATH=$PATH:$GOPATH/bin
|
export PATH=$PATH:$GOPATH/bin
|
||||||
export SKIP_GRPC_BACKEND=backend-assets/grpc/whisper
|
|
||||||
make dist
|
make dist
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
@@ -327,7 +327,7 @@ jobs:
|
|||||||
cache: false
|
cache: false
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
brew install protobuf grpc libomp llvm
|
brew install protobuf grpc
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
- name: Build
|
- name: Build
|
||||||
@@ -336,7 +336,7 @@ jobs:
|
|||||||
export C_INCLUDE_PATH=/usr/local/include
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||||
export PATH=$PATH:$GOPATH/bin
|
export PATH=$PATH:$GOPATH/bin
|
||||||
export CC=/opt/homebrew/opt/llvm/bin/clang
|
|
||||||
make dist
|
make dist
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
|
|||||||
2
.github/workflows/secscan.yaml
vendored
2
.github/workflows/secscan.yaml
vendored
@@ -18,7 +18,7 @@ jobs:
|
|||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
- name: Run Gosec Security Scanner
|
- name: Run Gosec Security Scanner
|
||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
uses: securego/gosec@v2.21.0
|
uses: securego/gosec@master
|
||||||
with:
|
with:
|
||||||
# we let the report trigger content trigger a failure using the GitHub Security features.
|
# we let the report trigger content trigger a failure using the GitHub Security features.
|
||||||
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
||||||
|
|||||||
3
.github/workflows/test.yml
vendored
3
.github/workflows/test.yml
vendored
@@ -214,13 +214,12 @@ jobs:
|
|||||||
run: go version
|
run: go version
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
|
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
export C_INCLUDE_PATH=/usr/local/include
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||||
export CC=/opt/homebrew/opt/llvm/bin/clang
|
|
||||||
# Used to run the newer GNUMake version from brew that supports --output-sync
|
# Used to run the newer GNUMake version from brew that supports --output-sync
|
||||||
export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
|
export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
|
||||||
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
|
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
|
||||||
|
|||||||
2
.github/workflows/update_swagger.yaml
vendored
2
.github/workflows/update_swagger.yaml
vendored
@@ -25,7 +25,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
make protogen-go swagger
|
make protogen-go swagger
|
||||||
- name: Create Pull Request
|
- name: Create Pull Request
|
||||||
uses: peter-evans/create-pull-request@v7
|
uses: peter-evans/create-pull-request@v6
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||||
push-to-fork: ci-forks/LocalAI
|
push-to-fork: ci-forks/LocalAI
|
||||||
|
|||||||
39
Dockerfile
39
Dockerfile
@@ -13,7 +13,7 @@ ARG TARGETARCH
|
|||||||
ARG TARGETVARIANT
|
ARG TARGETVARIANT
|
||||||
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
||||||
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
@@ -263,20 +263,14 @@ EOT
|
|||||||
# In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
|
# In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
|
||||||
FROM builder-base AS builder-sd
|
FROM builder-base AS builder-sd
|
||||||
|
|
||||||
# stablediffusion does not tolerate a newer version of abseil, copy only over enough elements to build it
|
COPY . .
|
||||||
COPY Makefile .
|
COPY .git .
|
||||||
COPY go.mod .
|
|
||||||
COPY go.sum .
|
|
||||||
COPY backend/backend.proto ./backend/backend.proto
|
|
||||||
COPY backend/go/image/stablediffusion ./backend/go/image/stablediffusion
|
|
||||||
COPY pkg/grpc ./pkg/grpc
|
|
||||||
COPY pkg/stablediffusion ./pkg/stablediffusion
|
|
||||||
RUN git init
|
|
||||||
RUN make sources/go-stable-diffusion
|
|
||||||
RUN touch prepare-sources
|
|
||||||
|
|
||||||
# Actually build the backend
|
RUN make prepare
|
||||||
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make backend-assets/grpc/stablediffusion
|
|
||||||
|
|
||||||
|
# stablediffusion does not tolerate a newer version of abseil, build it first
|
||||||
|
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
@@ -291,20 +285,8 @@ COPY --from=grpc /opt/grpc /usr/local
|
|||||||
# Rebuild with defaults backends
|
# Rebuild with defaults backends
|
||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
|
|
||||||
COPY . .
|
|
||||||
COPY .git .
|
|
||||||
|
|
||||||
RUN make prepare
|
|
||||||
|
|
||||||
## Build the binary
|
## Build the binary
|
||||||
## If it's CUDA, we want to skip some of the llama-compat backends to save space
|
RUN make build
|
||||||
## We only leave the most CPU-optimized variant and the fallback for the cublas build
|
|
||||||
## (both will use CUDA for the actual computation)
|
|
||||||
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
|
||||||
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
|
||||||
else \
|
|
||||||
make build; \
|
|
||||||
fi
|
|
||||||
|
|
||||||
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
||||||
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
|
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
|
||||||
@@ -418,6 +400,9 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAG
|
|||||||
; fi && \
|
; fi && \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/transformers-musicgen \
|
make -C backend/python/transformers-musicgen \
|
||||||
|
; fi && \
|
||||||
|
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama1" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/exllama \
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
|||||||
21
Makefile
21
Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
|
|||||||
# llama.cpp versions
|
# llama.cpp versions
|
||||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||||
CPPLLAMA_VERSION?=e6b7801bd189d102d901d3e72035611a25456ef1
|
CPPLLAMA_VERSION?=2f3c1466ff46a2413b0e363a5005c46538186ee6
|
||||||
|
|
||||||
# go-rwkv version
|
# go-rwkv version
|
||||||
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||||
@@ -16,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
|||||||
|
|
||||||
# whisper.cpp version
|
# whisper.cpp version
|
||||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||||
WHISPER_CPP_VERSION?=a551933542d956ae84634937acd2942eb40efaaf
|
WHISPER_CPP_VERSION?=d65786ea540a5aef21f67cacfa6f134097727780
|
||||||
|
|
||||||
# bert.cpp version
|
# bert.cpp version
|
||||||
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
|
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
|
||||||
@@ -338,7 +338,7 @@ rebuild: ## Rebuilds the project
|
|||||||
$(MAKE) -C sources/go-tiny-dream clean
|
$(MAKE) -C sources/go-tiny-dream clean
|
||||||
$(MAKE) build
|
$(MAKE) build
|
||||||
|
|
||||||
prepare: prepare-sources $(OPTIONAL_TARGETS)
|
prepare: prepare-sources gen-assets $(OPTIONAL_TARGETS)
|
||||||
|
|
||||||
clean: ## Remove build related file
|
clean: ## Remove build related file
|
||||||
$(GOCMD) clean -cache
|
$(GOCMD) clean -cache
|
||||||
@@ -534,10 +534,10 @@ protogen-go-clean:
|
|||||||
$(RM) bin/*
|
$(RM) bin/*
|
||||||
|
|
||||||
.PHONY: protogen-python
|
.PHONY: protogen-python
|
||||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
|
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
|
||||||
|
|
||||||
.PHONY: protogen-python-clean
|
.PHONY: protogen-python-clean
|
||||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
|
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
|
||||||
|
|
||||||
.PHONY: autogptq-protogen
|
.PHONY: autogptq-protogen
|
||||||
autogptq-protogen:
|
autogptq-protogen:
|
||||||
@@ -571,6 +571,14 @@ diffusers-protogen:
|
|||||||
diffusers-protogen-clean:
|
diffusers-protogen-clean:
|
||||||
$(MAKE) -C backend/python/diffusers protogen-clean
|
$(MAKE) -C backend/python/diffusers protogen-clean
|
||||||
|
|
||||||
|
.PHONY: exllama-protogen
|
||||||
|
exllama-protogen:
|
||||||
|
$(MAKE) -C backend/python/exllama protogen
|
||||||
|
|
||||||
|
.PHONY: exllama-protogen-clean
|
||||||
|
exllama-protogen-clean:
|
||||||
|
$(MAKE) -C backend/python/exllama protogen-clean
|
||||||
|
|
||||||
.PHONY: exllama2-protogen
|
.PHONY: exllama2-protogen
|
||||||
exllama2-protogen:
|
exllama2-protogen:
|
||||||
$(MAKE) -C backend/python/exllama2 protogen
|
$(MAKE) -C backend/python/exllama2 protogen
|
||||||
@@ -667,6 +675,7 @@ prepare-extra-conda-environments: protogen-python
|
|||||||
$(MAKE) -C backend/python/parler-tts
|
$(MAKE) -C backend/python/parler-tts
|
||||||
$(MAKE) -C backend/python/vall-e-x
|
$(MAKE) -C backend/python/vall-e-x
|
||||||
$(MAKE) -C backend/python/openvoice
|
$(MAKE) -C backend/python/openvoice
|
||||||
|
$(MAKE) -C backend/python/exllama
|
||||||
$(MAKE) -C backend/python/exllama2
|
$(MAKE) -C backend/python/exllama2
|
||||||
|
|
||||||
prepare-test-extra: protogen-python
|
prepare-test-extra: protogen-python
|
||||||
@@ -837,7 +846,7 @@ endif
|
|||||||
|
|
||||||
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
|
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
|
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
|
||||||
ifneq ($(UPX),)
|
ifneq ($(UPX),)
|
||||||
$(UPX) backend-assets/grpc/whisper
|
$(UPX) backend-assets/grpc/whisper
|
||||||
endif
|
endif
|
||||||
|
|||||||
@@ -40,7 +40,7 @@
|
|||||||
|
|
||||||
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
||||||
>
|
>
|
||||||
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/go-skynet/LocalAI/tree/master/examples/)
|
> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||||
|
|
||||||
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
||||||
|
|
||||||
@@ -72,7 +72,6 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
|||||||
|
|
||||||
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||||
|
|
||||||
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
|
|
||||||
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
|
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
|
||||||
- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
|
- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
|
||||||
- June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628
|
- June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
name: stablediffusion
|
name: stablediffusion
|
||||||
parameters:
|
parameters:
|
||||||
model: Lykon/dreamshaper-8
|
model: runwayml/stable-diffusion-v1-5
|
||||||
backend: diffusers
|
backend: diffusers
|
||||||
step: 25
|
step: 25
|
||||||
f16: true
|
f16: true
|
||||||
|
|||||||
@@ -16,7 +16,6 @@ service Backend {
|
|||||||
rpc GenerateImage(GenerateImageRequest) returns (Result) {}
|
rpc GenerateImage(GenerateImageRequest) returns (Result) {}
|
||||||
rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
|
rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
|
||||||
rpc TTS(TTSRequest) returns (Result) {}
|
rpc TTS(TTSRequest) returns (Result) {}
|
||||||
rpc SoundGeneration(SoundGenerationRequest) returns (Result) {}
|
|
||||||
rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
|
rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
|
||||||
rpc Status(HealthMessage) returns (StatusResponse) {}
|
rpc Status(HealthMessage) returns (StatusResponse) {}
|
||||||
|
|
||||||
@@ -271,17 +270,6 @@ message TTSRequest {
|
|||||||
optional string language = 5;
|
optional string language = 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
message SoundGenerationRequest {
|
|
||||||
string text = 1;
|
|
||||||
string model = 2;
|
|
||||||
string dst = 3;
|
|
||||||
optional float duration = 4;
|
|
||||||
optional float temperature = 5;
|
|
||||||
optional bool sample = 6;
|
|
||||||
optional string src = 7;
|
|
||||||
optional int32 src_divisor = 8;
|
|
||||||
}
|
|
||||||
|
|
||||||
message TokenizationResponse {
|
message TokenizationResponse {
|
||||||
int32 length = 1;
|
int32 length = 1;
|
||||||
repeated int32 tokens = 2;
|
repeated int32 tokens = 2;
|
||||||
|
|||||||
@@ -17,10 +17,11 @@
|
|||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "json.hpp"
|
#include "json.hpp"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
#include "grammar-parser.h"
|
||||||
#include "backend.pb.h"
|
#include "backend.pb.h"
|
||||||
#include "backend.grpc.pb.h"
|
#include "backend.grpc.pb.h"
|
||||||
#include "utils.hpp"
|
#include "utils.hpp"
|
||||||
#include "sampling.h"
|
|
||||||
// include std::regex
|
// include std::regex
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
@@ -202,8 +203,8 @@ struct llama_client_slot
|
|||||||
std::string stopping_word;
|
std::string stopping_word;
|
||||||
|
|
||||||
// sampling
|
// sampling
|
||||||
struct gpt_sampler_params sparams;
|
struct llama_sampling_params sparams;
|
||||||
gpt_sampler *ctx_sampling = nullptr;
|
llama_sampling_context *ctx_sampling = nullptr;
|
||||||
|
|
||||||
int32_t ga_i = 0; // group-attention state
|
int32_t ga_i = 0; // group-attention state
|
||||||
int32_t ga_n = 1; // group-attention factor
|
int32_t ga_n = 1; // group-attention factor
|
||||||
@@ -618,7 +619,7 @@ struct llama_server_context
|
|||||||
|
|
||||||
bool launch_slot_with_data(llama_client_slot* &slot, json data) {
|
bool launch_slot_with_data(llama_client_slot* &slot, json data) {
|
||||||
slot_params default_params;
|
slot_params default_params;
|
||||||
gpt_sampler_params default_sparams;
|
llama_sampling_params default_sparams;
|
||||||
|
|
||||||
slot->params.stream = json_value(data, "stream", false);
|
slot->params.stream = json_value(data, "stream", false);
|
||||||
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
|
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
|
||||||
@@ -627,7 +628,7 @@ struct llama_server_context
|
|||||||
slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
|
slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
|
||||||
slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
|
slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
|
||||||
slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
|
slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
|
||||||
slot->sparams.typ_p = json_value(data, "typical_p", default_sparams.typ_p);
|
slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p);
|
||||||
slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
|
slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
|
||||||
slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
|
slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
|
||||||
slot->sparams.dynatemp_exponent = json_value(data, "dynatemp_exponent", default_sparams.dynatemp_exponent);
|
slot->sparams.dynatemp_exponent = json_value(data, "dynatemp_exponent", default_sparams.dynatemp_exponent);
|
||||||
@@ -640,7 +641,7 @@ struct llama_server_context
|
|||||||
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
|
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
|
||||||
slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
|
slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
|
||||||
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
|
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
|
||||||
slot->sparams.seed = json_value(data, "seed", default_sparams.seed);
|
slot->params.seed = json_value(data, "seed", default_params.seed);
|
||||||
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
||||||
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
||||||
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
||||||
@@ -664,7 +665,6 @@ struct llama_server_context
|
|||||||
slot->params.input_prefix = "";
|
slot->params.input_prefix = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (data.count("input_suffix") != 0)
|
if (data.count("input_suffix") != 0)
|
||||||
{
|
{
|
||||||
slot->params.input_suffix = data["input_suffix"];
|
slot->params.input_suffix = data["input_suffix"];
|
||||||
@@ -683,10 +683,6 @@ struct llama_server_context
|
|||||||
slot->prompt = "";
|
slot->prompt = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (json_value(data, "ignore_eos", false)) {
|
|
||||||
slot->sparams.logit_bias.push_back({llama_token_eos(model), -INFINITY});
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
slot->sparams.penalty_prompt_tokens.clear();
|
slot->sparams.penalty_prompt_tokens.clear();
|
||||||
slot->sparams.use_penalty_prompt_tokens = false;
|
slot->sparams.use_penalty_prompt_tokens = false;
|
||||||
const auto &penalty_prompt = data.find("penalty_prompt");
|
const auto &penalty_prompt = data.find("penalty_prompt");
|
||||||
@@ -722,10 +718,14 @@ struct llama_server_context
|
|||||||
slot->sparams.use_penalty_prompt_tokens = true;
|
slot->sparams.use_penalty_prompt_tokens = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
slot->sparams.logit_bias.clear();
|
slot->sparams.logit_bias.clear();
|
||||||
|
|
||||||
|
if (json_value(data, "ignore_eos", false))
|
||||||
|
{
|
||||||
|
slot->sparams.logit_bias[llama_token_eos(model)] = -INFINITY;
|
||||||
|
}
|
||||||
|
|
||||||
const auto &logit_bias = data.find("logit_bias");
|
const auto &logit_bias = data.find("logit_bias");
|
||||||
if (logit_bias != data.end() && logit_bias->is_array())
|
if (logit_bias != data.end() && logit_bias->is_array())
|
||||||
{
|
{
|
||||||
@@ -753,7 +753,7 @@ struct llama_server_context
|
|||||||
llama_token tok = el[0].get<llama_token>();
|
llama_token tok = el[0].get<llama_token>();
|
||||||
if (tok >= 0 && tok < n_vocab)
|
if (tok >= 0 && tok < n_vocab)
|
||||||
{
|
{
|
||||||
slot->sparams.logit_bias.push_back({tok, bias});
|
slot->sparams.logit_bias[tok] = bias;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (el[0].is_string())
|
else if (el[0].is_string())
|
||||||
@@ -761,13 +761,13 @@ struct llama_server_context
|
|||||||
auto toks = llama_tokenize(model, el[0].get<std::string>(), false);
|
auto toks = llama_tokenize(model, el[0].get<std::string>(), false);
|
||||||
for (auto tok : toks)
|
for (auto tok : toks)
|
||||||
{
|
{
|
||||||
slot->sparams.logit_bias.push_back({tok, bias});
|
slot->sparams.logit_bias[tok] = bias;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
slot->params.antiprompt.clear();
|
slot->params.antiprompt.clear();
|
||||||
|
|
||||||
const auto &stop = data.find("stop");
|
const auto &stop = data.find("stop");
|
||||||
@@ -781,22 +781,24 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto & samplers = data.find("samplers");
|
const auto &samplers_sequence = data.find("samplers");
|
||||||
if (samplers != data.end() && samplers->is_array()) {
|
if (samplers_sequence != data.end() && samplers_sequence->is_array())
|
||||||
|
{
|
||||||
std::vector<std::string> sampler_names;
|
std::vector<std::string> sampler_names;
|
||||||
for (const auto & name : *samplers) {
|
for (const auto &sampler_name : *samplers_sequence)
|
||||||
if (name.is_string()) {
|
{
|
||||||
sampler_names.emplace_back(name);
|
if (sampler_name.is_string())
|
||||||
}
|
{
|
||||||
|
sampler_names.emplace_back(sampler_name);
|
||||||
}
|
}
|
||||||
slot->sparams.samplers = gpt_sampler_types_from_names(sampler_names, false);
|
}
|
||||||
|
slot->sparams.samplers_sequence = llama_sampling_types_from_names(sampler_names, false);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
slot->sparams.samplers = default_sparams.samplers;
|
slot->sparams.samplers_sequence = default_sparams.samplers_sequence;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (multimodal)
|
if (multimodal)
|
||||||
{
|
{
|
||||||
@@ -873,10 +875,10 @@ struct llama_server_context
|
|||||||
|
|
||||||
if (slot->ctx_sampling != nullptr)
|
if (slot->ctx_sampling != nullptr)
|
||||||
{
|
{
|
||||||
gpt_sampler_free(slot->ctx_sampling);
|
llama_sampling_free(slot->ctx_sampling);
|
||||||
}
|
}
|
||||||
slot->ctx_sampling = gpt_sampler_init(model, slot->sparams);
|
slot->ctx_sampling = llama_sampling_init(slot->sparams);
|
||||||
//llama_set_rng_seed(ctx, slot->params.seed);
|
llama_set_rng_seed(ctx, slot->params.seed);
|
||||||
slot->command = LOAD_PROMPT;
|
slot->command = LOAD_PROMPT;
|
||||||
|
|
||||||
all_slots_are_idle = false;
|
all_slots_are_idle = false;
|
||||||
@@ -886,7 +888,7 @@ struct llama_server_context
|
|||||||
{"task_id", slot->task_id},
|
{"task_id", slot->task_id},
|
||||||
});
|
});
|
||||||
|
|
||||||
// LOG_TEE("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
|
LOG_TEE("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -1004,13 +1006,11 @@ struct llama_server_context
|
|||||||
slot.generated_text += token_str;
|
slot.generated_text += token_str;
|
||||||
slot.has_next_token = true;
|
slot.has_next_token = true;
|
||||||
|
|
||||||
/*
|
|
||||||
if (slot.ctx_sampling->params.use_penalty_prompt_tokens && result.tok != -1)
|
if (slot.ctx_sampling->params.use_penalty_prompt_tokens && result.tok != -1)
|
||||||
{
|
{
|
||||||
// we can change penalty_prompt_tokens because it is always created from scratch each request
|
// we can change penalty_prompt_tokens because it is always created from scratch each request
|
||||||
slot.ctx_sampling->params.penalty_prompt_tokens.push_back(result.tok);
|
slot.ctx_sampling->params.penalty_prompt_tokens.push_back(result.tok);
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
// check if there is incomplete UTF-8 character at the end
|
// check if there is incomplete UTF-8 character at the end
|
||||||
bool incomplete = false;
|
bool incomplete = false;
|
||||||
@@ -1119,7 +1119,7 @@ struct llama_server_context
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.cpuparams.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
|
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
|
||||||
LOG_TEE("Error processing the given image");
|
LOG_TEE("Error processing the given image");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -1144,11 +1144,13 @@ struct llama_server_context
|
|||||||
|
|
||||||
json get_formated_generation(llama_client_slot &slot)
|
json get_formated_generation(llama_client_slot &slot)
|
||||||
{
|
{
|
||||||
std::vector<std::string> samplers;
|
const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model));
|
||||||
samplers.reserve(slot.sparams.samplers.size());
|
const bool ignore_eos = eos_bias != slot.sparams.logit_bias.end() &&
|
||||||
for (const auto & sampler : slot.sparams.samplers)
|
eos_bias->second < 0.0f && std::isinf(eos_bias->second);
|
||||||
|
std::vector<std::string> samplers_sequence;
|
||||||
|
for (const auto &sampler_type : slot.sparams.samplers_sequence)
|
||||||
{
|
{
|
||||||
samplers.emplace_back(gpt_sampler_type_to_str(sampler));
|
samplers_sequence.emplace_back(llama_sampling_type_to_str(sampler_type));
|
||||||
}
|
}
|
||||||
|
|
||||||
return json {
|
return json {
|
||||||
@@ -1163,11 +1165,13 @@ struct llama_server_context
|
|||||||
{"top_p", slot.sparams.top_p},
|
{"top_p", slot.sparams.top_p},
|
||||||
{"min_p", slot.sparams.min_p},
|
{"min_p", slot.sparams.min_p},
|
||||||
{"tfs_z", slot.sparams.tfs_z},
|
{"tfs_z", slot.sparams.tfs_z},
|
||||||
{"typical_p", slot.sparams.typ_p},
|
{"typical_p", slot.sparams.typical_p},
|
||||||
{"repeat_last_n", slot.sparams.penalty_last_n},
|
{"repeat_last_n", slot.sparams.penalty_last_n},
|
||||||
{"repeat_penalty", slot.sparams.penalty_repeat},
|
{"repeat_penalty", slot.sparams.penalty_repeat},
|
||||||
{"presence_penalty", slot.sparams.penalty_present},
|
{"presence_penalty", slot.sparams.penalty_present},
|
||||||
{"frequency_penalty", slot.sparams.penalty_freq},
|
{"frequency_penalty", slot.sparams.penalty_freq},
|
||||||
|
{"penalty_prompt_tokens", slot.sparams.penalty_prompt_tokens},
|
||||||
|
{"use_penalty_prompt_tokens", slot.sparams.use_penalty_prompt_tokens},
|
||||||
{"mirostat", slot.sparams.mirostat},
|
{"mirostat", slot.sparams.mirostat},
|
||||||
{"mirostat_tau", slot.sparams.mirostat_tau},
|
{"mirostat_tau", slot.sparams.mirostat_tau},
|
||||||
{"mirostat_eta", slot.sparams.mirostat_eta},
|
{"mirostat_eta", slot.sparams.mirostat_eta},
|
||||||
@@ -1175,13 +1179,13 @@ struct llama_server_context
|
|||||||
{"stop", slot.params.antiprompt},
|
{"stop", slot.params.antiprompt},
|
||||||
{"n_predict", slot.params.n_predict},
|
{"n_predict", slot.params.n_predict},
|
||||||
{"n_keep", params.n_keep},
|
{"n_keep", params.n_keep},
|
||||||
{"ignore_eos", slot.sparams.ignore_eos},
|
{"ignore_eos", ignore_eos},
|
||||||
{"stream", slot.params.stream},
|
{"stream", slot.params.stream},
|
||||||
// {"logit_bias", slot.sparams.logit_bias},
|
{"logit_bias", slot.sparams.logit_bias},
|
||||||
{"n_probs", slot.sparams.n_probs},
|
{"n_probs", slot.sparams.n_probs},
|
||||||
{"min_keep", slot.sparams.min_keep},
|
{"min_keep", slot.sparams.min_keep},
|
||||||
{"grammar", slot.sparams.grammar},
|
{"grammar", slot.sparams.grammar},
|
||||||
{"samplers", samplers}
|
{"samplers", samplers_sequence}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1710,7 +1714,7 @@ struct llama_server_context
|
|||||||
|
|
||||||
if (!slot.params.cache_prompt)
|
if (!slot.params.cache_prompt)
|
||||||
{
|
{
|
||||||
gpt_sampler_reset(slot.ctx_sampling);
|
llama_sampling_reset(slot.ctx_sampling);
|
||||||
|
|
||||||
slot.n_past = 0;
|
slot.n_past = 0;
|
||||||
slot.n_past_se = 0;
|
slot.n_past_se = 0;
|
||||||
@@ -1722,7 +1726,7 @@ struct llama_server_context
|
|||||||
// push the prompt into the sampling context (do not apply grammar)
|
// push the prompt into the sampling context (do not apply grammar)
|
||||||
for (auto &token : prompt_tokens)
|
for (auto &token : prompt_tokens)
|
||||||
{
|
{
|
||||||
gpt_sampler_accept(slot.ctx_sampling, token, false);
|
llama_sampling_accept(slot.ctx_sampling, ctx, token, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
slot.n_past = common_part(slot.cache_tokens, prompt_tokens);
|
slot.n_past = common_part(slot.cache_tokens, prompt_tokens);
|
||||||
@@ -1930,9 +1934,9 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
|
|
||||||
completion_token_output result;
|
completion_token_output result;
|
||||||
const llama_token id = gpt_sampler_sample(slot.ctx_sampling, ctx, slot.i_batch - i);
|
const llama_token id = llama_sampling_sample(slot.ctx_sampling, ctx, NULL, slot.i_batch - i);
|
||||||
|
|
||||||
gpt_sampler_accept(slot.ctx_sampling, id, true);
|
llama_sampling_accept(slot.ctx_sampling, ctx, id, true);
|
||||||
|
|
||||||
slot.n_decoded += 1;
|
slot.n_decoded += 1;
|
||||||
if (slot.n_decoded == 1)
|
if (slot.n_decoded == 1)
|
||||||
@@ -1942,14 +1946,19 @@ struct llama_server_context
|
|||||||
metrics.on_prompt_eval(slot);
|
metrics.on_prompt_eval(slot);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
llama_token_data_array cur_p = { slot.ctx_sampling->cur.data(), slot.ctx_sampling->cur.size(), false };
|
||||||
result.tok = id;
|
result.tok = id;
|
||||||
const auto * cur_p = gpt_sampler_get_candidates(slot.ctx_sampling);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < (size_t) slot.sparams.n_probs; ++i) {
|
const int32_t n_probs = slot.sparams.n_probs;
|
||||||
result.probs.push_back({
|
if (slot.sparams.temp <= 0 && n_probs > 0)
|
||||||
cur_p->data[i].id,
|
{
|
||||||
i >= cur_p->size ? 0.0f : cur_p->data[i].p,
|
// for llama_sample_token_greedy we need to sort candidates
|
||||||
});
|
llama_sample_softmax(ctx, &cur_p);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < std::min(cur_p.size, (size_t)n_probs); ++i)
|
||||||
|
{
|
||||||
|
result.probs.push_back({cur_p.data[i].id, cur_p.data[i].p});
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!process_token(result, slot))
|
if (!process_token(result, slot))
|
||||||
@@ -2201,7 +2210,7 @@ static void params_parse(const backend::ModelOptions* request,
|
|||||||
params.model_alias = request->modelfile();
|
params.model_alias = request->modelfile();
|
||||||
params.n_ctx = request->contextsize();
|
params.n_ctx = request->contextsize();
|
||||||
//params.memory_f16 = request->f16memory();
|
//params.memory_f16 = request->f16memory();
|
||||||
params.cpuparams.n_threads = request->threads();
|
params.n_threads = request->threads();
|
||||||
params.n_gpu_layers = request->ngpulayers();
|
params.n_gpu_layers = request->ngpulayers();
|
||||||
params.n_batch = request->nbatch();
|
params.n_batch = request->nbatch();
|
||||||
// Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1
|
// Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1
|
||||||
|
|||||||
@@ -1,13 +0,0 @@
|
|||||||
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
|
||||||
index 342042ff..224db9b5 100644
|
|
||||||
--- a/examples/llava/clip.cpp
|
|
||||||
+++ b/examples/llava/clip.cpp
|
|
||||||
@@ -2419,7 +2419,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
|
||||||
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
|
|
||||||
int* patches_data = (int*)malloc(ggml_nbytes(patches));
|
|
||||||
for (int i = 0; i < num_patches; i++) {
|
|
||||||
- patches_data[i] = i + 1;
|
|
||||||
+ patches_data[i] = i;
|
|
||||||
}
|
|
||||||
ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
|
|
||||||
free(patches_data);
|
|
||||||
@@ -1,12 +1,5 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
## Patches
|
|
||||||
## Apply patches from the `patches` directory
|
|
||||||
for patch in $(ls patches); do
|
|
||||||
echo "Applying patch $patch"
|
|
||||||
patch -d llama.cpp/ -p1 < patches/$patch
|
|
||||||
done
|
|
||||||
|
|
||||||
cp -r CMakeLists.txt llama.cpp/examples/grpc-server/
|
cp -r CMakeLists.txt llama.cpp/examples/grpc-server/
|
||||||
cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
|
cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||||
cp -rfv json.hpp llama.cpp/examples/grpc-server/
|
cp -rfv json.hpp llama.cpp/examples/grpc-server/
|
||||||
|
|||||||
@@ -480,4 +480,31 @@ static inline std::vector<uint8_t> base64_decode(const std::string & encoded_str
|
|||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// random string / id
|
||||||
|
//
|
||||||
|
|
||||||
|
static std::string random_string()
|
||||||
|
{
|
||||||
|
static const std::string str("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
|
||||||
|
|
||||||
|
std::random_device rd;
|
||||||
|
std::mt19937 generator(rd());
|
||||||
|
|
||||||
|
std::string result(32, ' ');
|
||||||
|
|
||||||
|
for (int i = 0; i < 32; ++i) {
|
||||||
|
result[i] = str[generator() % str.size()];
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string gen_chatcmplid()
|
||||||
|
{
|
||||||
|
std::stringstream chatcmplid;
|
||||||
|
chatcmplid << "chatcmpl-" << random_string();
|
||||||
|
return chatcmplid.str();
|
||||||
}
|
}
|
||||||
104
backend/go/transcribe/transcript.go
Normal file
104
backend/go/transcribe/transcript.go
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||||
|
"github.com/go-audio/wav"
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
func ffmpegCommand(args []string) (string, error) {
|
||||||
|
cmd := exec.Command("ffmpeg", args...) // Constrain this to ffmpeg to permit security scanner to see that the command is safe.
|
||||||
|
cmd.Env = os.Environ()
|
||||||
|
out, err := cmd.CombinedOutput()
|
||||||
|
return string(out), err
|
||||||
|
}
|
||||||
|
|
||||||
|
// AudioToWav converts audio to wav for transcribe.
|
||||||
|
// TODO: use https://github.com/mccoyst/ogg?
|
||||||
|
func audioToWav(src, dst string) error {
|
||||||
|
commandArgs := []string{"-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst}
|
||||||
|
out, err := ffmpegCommand(commandArgs)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error: %w out: %s", err, out)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func Transcript(model whisper.Model, audiopath, language string, translate bool, threads uint) (schema.TranscriptionResult, error) {
|
||||||
|
res := schema.TranscriptionResult{}
|
||||||
|
|
||||||
|
dir, err := os.MkdirTemp("", "whisper")
|
||||||
|
if err != nil {
|
||||||
|
return res, err
|
||||||
|
}
|
||||||
|
defer os.RemoveAll(dir)
|
||||||
|
|
||||||
|
convertedPath := filepath.Join(dir, "converted.wav")
|
||||||
|
|
||||||
|
if err := audioToWav(audiopath, convertedPath); err != nil {
|
||||||
|
return res, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Open samples
|
||||||
|
fh, err := os.Open(convertedPath)
|
||||||
|
if err != nil {
|
||||||
|
return res, err
|
||||||
|
}
|
||||||
|
defer fh.Close()
|
||||||
|
|
||||||
|
// Read samples
|
||||||
|
d := wav.NewDecoder(fh)
|
||||||
|
buf, err := d.FullPCMBuffer()
|
||||||
|
if err != nil {
|
||||||
|
return res, err
|
||||||
|
}
|
||||||
|
|
||||||
|
data := buf.AsFloat32Buffer().Data
|
||||||
|
|
||||||
|
// Process samples
|
||||||
|
context, err := model.NewContext()
|
||||||
|
if err != nil {
|
||||||
|
return res, err
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
context.SetThreads(threads)
|
||||||
|
|
||||||
|
if language != "" {
|
||||||
|
context.SetLanguage(language)
|
||||||
|
} else {
|
||||||
|
context.SetLanguage("auto")
|
||||||
|
}
|
||||||
|
|
||||||
|
if translate {
|
||||||
|
context.SetTranslate(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := context.Process(data, nil, nil); err != nil {
|
||||||
|
return res, err
|
||||||
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
|
s, err := context.NextSegment()
|
||||||
|
if err != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
var tokens []int
|
||||||
|
for _, t := range s.Tokens {
|
||||||
|
tokens = append(tokens, t.Id)
|
||||||
|
}
|
||||||
|
|
||||||
|
segment := schema.Segment{Id: s.Num, Text: s.Text, Start: s.Start, End: s.End, Tokens: tokens}
|
||||||
|
res.Segments = append(res.Segments, segment)
|
||||||
|
|
||||||
|
res.Text += s.Text
|
||||||
|
}
|
||||||
|
|
||||||
|
return res, nil
|
||||||
|
}
|
||||||
26
backend/go/transcribe/whisper.go
Normal file
26
backend/go/transcribe/whisper.go
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
|
import (
|
||||||
|
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Whisper struct {
|
||||||
|
base.SingleThread
|
||||||
|
whisper whisper.Model
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sd *Whisper) Load(opts *pb.ModelOptions) error {
|
||||||
|
// Note: the Model here is a path to a directory containing the model files
|
||||||
|
w, err := whisper.New(opts.ModelFile)
|
||||||
|
sd.whisper = w
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) {
|
||||||
|
return Transcript(sd.whisper, opts.Dst, opts.Language, opts.Translate, uint(opts.Threads))
|
||||||
|
}
|
||||||
@@ -1,105 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
|
||||||
import (
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
|
|
||||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
|
||||||
"github.com/go-audio/wav"
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
"github.com/mudler/LocalAI/pkg/utils"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Whisper struct {
|
|
||||||
base.SingleThread
|
|
||||||
whisper whisper.Model
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sd *Whisper) Load(opts *pb.ModelOptions) error {
|
|
||||||
// Note: the Model here is a path to a directory containing the model files
|
|
||||||
w, err := whisper.New(opts.ModelFile)
|
|
||||||
sd.whisper = w
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (pb.TranscriptResult, error) {
|
|
||||||
|
|
||||||
dir, err := os.MkdirTemp("", "whisper")
|
|
||||||
if err != nil {
|
|
||||||
return pb.TranscriptResult{}, err
|
|
||||||
}
|
|
||||||
defer os.RemoveAll(dir)
|
|
||||||
|
|
||||||
convertedPath := filepath.Join(dir, "converted.wav")
|
|
||||||
|
|
||||||
if err := utils.AudioToWav(opts.Dst, convertedPath); err != nil {
|
|
||||||
return pb.TranscriptResult{}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Open samples
|
|
||||||
fh, err := os.Open(convertedPath)
|
|
||||||
if err != nil {
|
|
||||||
return pb.TranscriptResult{}, err
|
|
||||||
}
|
|
||||||
defer fh.Close()
|
|
||||||
|
|
||||||
// Read samples
|
|
||||||
d := wav.NewDecoder(fh)
|
|
||||||
buf, err := d.FullPCMBuffer()
|
|
||||||
if err != nil {
|
|
||||||
return pb.TranscriptResult{}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
data := buf.AsFloat32Buffer().Data
|
|
||||||
|
|
||||||
// Process samples
|
|
||||||
context, err := sd.whisper.NewContext()
|
|
||||||
if err != nil {
|
|
||||||
return pb.TranscriptResult{}, err
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
context.SetThreads(uint(opts.Threads))
|
|
||||||
|
|
||||||
if opts.Language != "" {
|
|
||||||
context.SetLanguage(opts.Language)
|
|
||||||
} else {
|
|
||||||
context.SetLanguage("auto")
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.Translate {
|
|
||||||
context.SetTranslate(true)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := context.Process(data, nil, nil); err != nil {
|
|
||||||
return pb.TranscriptResult{}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
segments := []*pb.TranscriptSegment{}
|
|
||||||
text := ""
|
|
||||||
for {
|
|
||||||
s, err := context.NextSegment()
|
|
||||||
if err != nil {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
var tokens []int32
|
|
||||||
for _, t := range s.Tokens {
|
|
||||||
tokens = append(tokens, int32(t.Id))
|
|
||||||
}
|
|
||||||
|
|
||||||
segment := &pb.TranscriptSegment{Id: int32(s.Num), Text: s.Text, Start: int64(s.Start), End: int64(s.End), Tokens: tokens}
|
|
||||||
segments = append(segments, segment)
|
|
||||||
|
|
||||||
text += s.Text
|
|
||||||
}
|
|
||||||
|
|
||||||
return pb.TranscriptResult{
|
|
||||||
Segments: segments,
|
|
||||||
Text: text,
|
|
||||||
}, nil
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
accelerate
|
accelerate
|
||||||
auto-gptq==0.7.1
|
auto-gptq==0.7.1
|
||||||
grpcio==1.66.1
|
grpcio==1.65.4
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
transformers
|
transformers
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
bark==0.1.5
|
bark==0.1.5
|
||||||
grpcio==1.66.1
|
grpcio==1.65.5
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -1,2 +1,2 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.65.5
|
||||||
protobuf
|
protobuf
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
TTS==0.22.0
|
TTS==0.22.0
|
||||||
grpcio==1.66.1
|
grpcio==1.65.5
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -168,7 +168,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
if request.CFGScale != 0:
|
if request.CFGScale != 0:
|
||||||
self.cfg_scale = request.CFGScale
|
self.cfg_scale = request.CFGScale
|
||||||
|
|
||||||
clipmodel = "Lykon/dreamshaper-8"
|
clipmodel = "runwayml/stable-diffusion-v1-5"
|
||||||
if request.CLIPModel != "":
|
if request.CLIPModel != "":
|
||||||
clipmodel = request.CLIPModel
|
clipmodel = request.CLIPModel
|
||||||
clipsubfolder = "text_encoder"
|
clipsubfolder = "text_encoder"
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
setuptools
|
setuptools
|
||||||
grpcio==1.66.1
|
grpcio==1.65.4
|
||||||
pillow
|
pillow
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
|
|||||||
@@ -53,7 +53,7 @@ class TestBackendServicer(unittest.TestCase):
|
|||||||
self.setUp()
|
self.setUp()
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
with grpc.insecure_channel("localhost:50051") as channel:
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
stub = backend_pb2_grpc.BackendStub(channel)
|
||||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="Lykon/dreamshaper-8"))
|
response = stub.LoadModel(backend_pb2.ModelOptions(Model="runwayml/stable-diffusion-v1-5"))
|
||||||
self.assertTrue(response.success)
|
self.assertTrue(response.success)
|
||||||
self.assertEqual(response.message, "Model loaded successfully")
|
self.assertEqual(response.message, "Model loaded successfully")
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
@@ -71,7 +71,7 @@ class TestBackendServicer(unittest.TestCase):
|
|||||||
self.setUp()
|
self.setUp()
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
with grpc.insecure_channel("localhost:50051") as channel:
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
stub = backend_pb2_grpc.BackendStub(channel)
|
||||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="Lykon/dreamshaper-8"))
|
response = stub.LoadModel(backend_pb2.ModelOptions(Model="runwayml/stable-diffusion-v1-5"))
|
||||||
print(response.message)
|
print(response.message)
|
||||||
self.assertTrue(response.success)
|
self.assertTrue(response.success)
|
||||||
image_req = backend_pb2.GenerateImageRequest(positive_prompt="cat", width=16,height=16, dst="test.jpg")
|
image_req = backend_pb2.GenerateImageRequest(positive_prompt="cat", width=16,height=16, dst="test.jpg")
|
||||||
@@ -81,4 +81,4 @@ class TestBackendServicer(unittest.TestCase):
|
|||||||
print(err)
|
print(err)
|
||||||
self.fail("Image gen service failed")
|
self.fail("Image gen service failed")
|
||||||
finally:
|
finally:
|
||||||
self.tearDown()
|
self.tearDown()
|
||||||
1
backend/python/exllama/.gitignore
vendored
Normal file
1
backend/python/exllama/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
source
|
||||||
25
backend/python/exllama/Makefile
Normal file
25
backend/python/exllama/Makefile
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
export CONDA_ENV_PATH = "exllama.yml"
|
||||||
|
|
||||||
|
.PHONY: exllama
|
||||||
|
exllama: protogen
|
||||||
|
bash install.sh ${CONDA_ENV_PATH}
|
||||||
|
|
||||||
|
.PHONY: run
|
||||||
|
run: protogen
|
||||||
|
@echo "Running exllama..."
|
||||||
|
bash run.sh
|
||||||
|
@echo "exllama run."
|
||||||
|
|
||||||
|
.PHONY: protogen
|
||||||
|
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
.PHONY: protogen-clean
|
||||||
|
protogen-clean:
|
||||||
|
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
backend_pb2_grpc.py backend_pb2.py:
|
||||||
|
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||||
|
|
||||||
|
.PHONY: clean
|
||||||
|
clean: protogen-clean
|
||||||
|
$(RM) -r venv source __pycache__
|
||||||
5
backend/python/exllama/README.md
Normal file
5
backend/python/exllama/README.md
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
# Creating a separate environment for the exllama project
|
||||||
|
|
||||||
|
```
|
||||||
|
make exllama
|
||||||
|
```
|
||||||
159
backend/python/exllama/backend.py
Executable file
159
backend/python/exllama/backend.py
Executable file
@@ -0,0 +1,159 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import grpc
|
||||||
|
from concurrent import futures
|
||||||
|
import time
|
||||||
|
import backend_pb2
|
||||||
|
import backend_pb2_grpc
|
||||||
|
import argparse
|
||||||
|
import signal
|
||||||
|
import sys
|
||||||
|
import os, glob
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
import torch
|
||||||
|
import torch.nn.functional as F
|
||||||
|
from torch import version as torch_version
|
||||||
|
|
||||||
|
from source.tokenizer import ExLlamaTokenizer
|
||||||
|
from source.generator import ExLlamaGenerator
|
||||||
|
from source.model import ExLlama, ExLlamaCache, ExLlamaConfig
|
||||||
|
|
||||||
|
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||||
|
|
||||||
|
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||||
|
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||||
|
|
||||||
|
# Implement the BackendServicer class with the service methods
|
||||||
|
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
|
def generate(self,prompt, max_new_tokens):
|
||||||
|
self.generator.end_beam_search()
|
||||||
|
|
||||||
|
# Tokenizing the input
|
||||||
|
ids = self.generator.tokenizer.encode(prompt)
|
||||||
|
|
||||||
|
self.generator.gen_begin_reuse(ids)
|
||||||
|
initial_len = self.generator.sequence[0].shape[0]
|
||||||
|
has_leading_space = False
|
||||||
|
decoded_text = ''
|
||||||
|
for i in range(max_new_tokens):
|
||||||
|
token = self.generator.gen_single_token()
|
||||||
|
if i == 0 and self.generator.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'):
|
||||||
|
has_leading_space = True
|
||||||
|
|
||||||
|
decoded_text = self.generator.tokenizer.decode(self.generator.sequence[0][initial_len:])
|
||||||
|
if has_leading_space:
|
||||||
|
decoded_text = ' ' + decoded_text
|
||||||
|
|
||||||
|
if token.item() == self.generator.tokenizer.eos_token_id:
|
||||||
|
break
|
||||||
|
return decoded_text
|
||||||
|
def Health(self, request, context):
|
||||||
|
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||||
|
def LoadModel(self, request, context):
|
||||||
|
try:
|
||||||
|
# https://github.com/turboderp/exllama/blob/master/example_cfg.py
|
||||||
|
model_directory = request.ModelFile
|
||||||
|
|
||||||
|
# Locate files we need within that directory
|
||||||
|
tokenizer_path = os.path.join(model_directory, "tokenizer.model")
|
||||||
|
model_config_path = os.path.join(model_directory, "config.json")
|
||||||
|
st_pattern = os.path.join(model_directory, "*.safetensors")
|
||||||
|
model_path = glob.glob(st_pattern)[0]
|
||||||
|
|
||||||
|
# Create config, model, tokenizer and generator
|
||||||
|
|
||||||
|
config = ExLlamaConfig(model_config_path) # create config from config.json
|
||||||
|
config.model_path = model_path # supply path to model weights file
|
||||||
|
if (request.ContextSize):
|
||||||
|
config.max_seq_len = request.ContextSize # override max sequence length
|
||||||
|
config.max_attention_size = request.ContextSize**2 # Should be set to context_size^2.
|
||||||
|
# https://github.com/turboderp/exllama/issues/220#issuecomment-1720324163
|
||||||
|
|
||||||
|
# Set Rope scaling.
|
||||||
|
if (request.RopeFreqScale):
|
||||||
|
# Alpha value for Rope scaling.
|
||||||
|
# Higher value increases context but adds perplexity.
|
||||||
|
# alpha_value and compress_pos_emb are mutually exclusive.
|
||||||
|
# https://github.com/turboderp/exllama/issues/115
|
||||||
|
config.alpha_value = request.RopeFreqScale
|
||||||
|
config.calculate_rotary_embedding_base()
|
||||||
|
|
||||||
|
model = ExLlama(config) # create ExLlama instance and load the weights
|
||||||
|
tokenizer = ExLlamaTokenizer(tokenizer_path) # create tokenizer from tokenizer model file
|
||||||
|
|
||||||
|
cache = ExLlamaCache(model, batch_size = 2) # create cache for inference
|
||||||
|
generator = ExLlamaGenerator(model, tokenizer, cache) # create generator
|
||||||
|
|
||||||
|
self.generator= generator
|
||||||
|
self.model = model
|
||||||
|
self.tokenizer = tokenizer
|
||||||
|
self.cache = cache
|
||||||
|
except Exception as err:
|
||||||
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||||
|
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||||
|
|
||||||
|
def Predict(self, request, context):
|
||||||
|
penalty = 1.15
|
||||||
|
if request.Penalty != 0.0:
|
||||||
|
penalty = request.Penalty
|
||||||
|
self.generator.settings.token_repetition_penalty_max = penalty
|
||||||
|
self.generator.settings.temperature = request.Temperature
|
||||||
|
self.generator.settings.top_k = request.TopK
|
||||||
|
self.generator.settings.top_p = request.TopP
|
||||||
|
|
||||||
|
tokens = 512
|
||||||
|
if request.Tokens != 0:
|
||||||
|
tokens = request.Tokens
|
||||||
|
|
||||||
|
if self.cache.batch_size == 1:
|
||||||
|
del self.cache
|
||||||
|
self.cache = ExLlamaCache(self.model, batch_size=2)
|
||||||
|
self.generator = ExLlamaGenerator(self.model, self.tokenizer, self.cache)
|
||||||
|
|
||||||
|
t = self.generate(request.Prompt, tokens)
|
||||||
|
|
||||||
|
# Remove prompt from response if present
|
||||||
|
if request.Prompt in t:
|
||||||
|
t = t.replace(request.Prompt, "")
|
||||||
|
|
||||||
|
return backend_pb2.Result(message=bytes(t, encoding='utf-8'))
|
||||||
|
|
||||||
|
def PredictStream(self, request, context):
|
||||||
|
# Implement PredictStream RPC
|
||||||
|
#for reply in some_data_generator():
|
||||||
|
# yield reply
|
||||||
|
# Not implemented yet
|
||||||
|
return self.Predict(request, context)
|
||||||
|
|
||||||
|
|
||||||
|
def serve(address):
|
||||||
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||||
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
|
server.add_insecure_port(address)
|
||||||
|
server.start()
|
||||||
|
print("Server started. Listening on: " + address, file=sys.stderr)
|
||||||
|
|
||||||
|
# Define the signal handler function
|
||||||
|
def signal_handler(sig, frame):
|
||||||
|
print("Received termination signal. Shutting down...")
|
||||||
|
server.stop(0)
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
# Set the signal handlers for SIGINT and SIGTERM
|
||||||
|
signal.signal(signal.SIGINT, signal_handler)
|
||||||
|
signal.signal(signal.SIGTERM, signal_handler)
|
||||||
|
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
server.stop(0)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
serve(args.addr)
|
||||||
13
backend/python/exllama/install.sh
Executable file
13
backend/python/exllama/install.sh
Executable file
@@ -0,0 +1,13 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
LIMIT_TARGETS="cublas"
|
||||||
|
|
||||||
|
source $(dirname $0)/../common/libbackend.sh
|
||||||
|
|
||||||
|
installRequirements
|
||||||
|
|
||||||
|
git clone https://github.com/turboderp/exllama $MY_DIR/source
|
||||||
|
uv pip install ${BUILD_ISOLATION_FLAG} --requirement ${MY_DIR}/source/requirements.txt
|
||||||
|
|
||||||
|
cp -v ./*py $MY_DIR/source/
|
||||||
3
backend/python/exllama/requirements-cpu.txt
Normal file
3
backend/python/exllama/requirements-cpu.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
torch
|
||||||
4
backend/python/exllama/requirements-cublas11.txt
Normal file
4
backend/python/exllama/requirements-cublas11.txt
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
|
torch
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
3
backend/python/exllama/requirements-cublas12.txt
Normal file
3
backend/python/exllama/requirements-cublas12.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
torch
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
4
backend/python/exllama/requirements.txt
Normal file
4
backend/python/exllama/requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
grpcio==1.65.5
|
||||||
|
protobuf
|
||||||
|
certifi
|
||||||
|
setuptools
|
||||||
7
backend/python/exllama/run.sh
Executable file
7
backend/python/exllama/run.sh
Executable file
@@ -0,0 +1,7 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
LIMIT_TARGETS="cublas"
|
||||||
|
BACKEND_FILE="${MY_DIR}/source/backend.py"
|
||||||
|
|
||||||
|
source $(dirname $0)/../common/libbackend.sh
|
||||||
|
|
||||||
|
startBackend $@
|
||||||
6
backend/python/exllama/test.sh
Executable file
6
backend/python/exllama/test.sh
Executable file
@@ -0,0 +1,6 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
source $(dirname $0)/../common/libbackend.sh
|
||||||
|
|
||||||
|
runUnittests
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.65.4
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
wheel
|
wheel
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.65.5
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
intel-extension-for-pytorch
|
intel-extension-for-pytorch
|
||||||
torch
|
torch
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
grpcio==1.66.1
|
grpcio==1.65.5
|
||||||
protobuf
|
protobuf
|
||||||
librosa==0.9.1
|
librosa==0.9.1
|
||||||
faster-whisper==1.0.3
|
faster-whisper==1.0.3
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.65.5
|
||||||
protobuf
|
protobuf
|
||||||
librosa
|
librosa
|
||||||
faster-whisper
|
faster-whisper
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
torch==2.3.0+rocm6.0
|
torch
|
||||||
torchaudio==2.3.0+rocm6.0
|
torchaudio
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.65.5
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
llvmlite==0.43.0
|
llvmlite==0.43.0
|
||||||
@@ -1,3 +1,3 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.65.4
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -1,3 +1,3 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.65.5
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -15,7 +15,7 @@ import backend_pb2_grpc
|
|||||||
|
|
||||||
import grpc
|
import grpc
|
||||||
|
|
||||||
from scipy.io import wavfile
|
from scipy.io.wavfile import write as write_wav
|
||||||
from transformers import AutoProcessor, MusicgenForConditionalGeneration
|
from transformers import AutoProcessor, MusicgenForConditionalGeneration
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||||
@@ -63,61 +63,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
|
|
||||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||||
|
|
||||||
def SoundGeneration(self, request, context):
|
|
||||||
model_name = request.model
|
|
||||||
if model_name == "":
|
|
||||||
return backend_pb2.Result(success=False, message="request.model is required")
|
|
||||||
try:
|
|
||||||
self.processor = AutoProcessor.from_pretrained(model_name)
|
|
||||||
self.model = MusicgenForConditionalGeneration.from_pretrained(model_name)
|
|
||||||
inputs = None
|
|
||||||
if request.text == "":
|
|
||||||
inputs = self.model.get_unconditional_inputs(num_samples=1)
|
|
||||||
elif request.HasField('src'):
|
|
||||||
# TODO SECURITY CODE GOES HERE LOL
|
|
||||||
# WHO KNOWS IF THIS WORKS???
|
|
||||||
sample_rate, wsamples = wavfile.read('path_to_your_file.wav')
|
|
||||||
|
|
||||||
if request.HasField('src_divisor'):
|
|
||||||
wsamples = wsamples[: len(wsamples) // request.src_divisor]
|
|
||||||
|
|
||||||
inputs = self.processor(
|
|
||||||
audio=wsamples,
|
|
||||||
sampling_rate=sample_rate,
|
|
||||||
text=[request.text],
|
|
||||||
padding=True,
|
|
||||||
return_tensors="pt",
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
inputs = self.processor(
|
|
||||||
text=[request.text],
|
|
||||||
padding=True,
|
|
||||||
return_tensors="pt",
|
|
||||||
)
|
|
||||||
|
|
||||||
tokens = 256
|
|
||||||
if request.HasField('duration'):
|
|
||||||
tokens = int(request.duration * 51.2) # 256 tokens = 5 seconds, therefore 51.2 tokens is one second
|
|
||||||
guidance = 3.0
|
|
||||||
if request.HasField('temperature'):
|
|
||||||
guidance = request.temperature
|
|
||||||
dosample = True
|
|
||||||
if request.HasField('sample'):
|
|
||||||
dosample = request.sample
|
|
||||||
audio_values = self.model.generate(**inputs, do_sample=dosample, guidance_scale=guidance, max_new_tokens=tokens)
|
|
||||||
print("[transformers-musicgen] SoundGeneration generated!", file=sys.stderr)
|
|
||||||
sampling_rate = self.model.config.audio_encoder.sampling_rate
|
|
||||||
wavfile.write(request.dst, rate=sampling_rate, data=audio_values[0, 0].numpy())
|
|
||||||
print("[transformers-musicgen] SoundGeneration saved to", request.dst, file=sys.stderr)
|
|
||||||
print("[transformers-musicgen] SoundGeneration for", file=sys.stderr)
|
|
||||||
print("[transformers-musicgen] SoundGeneration requested tokens", tokens, file=sys.stderr)
|
|
||||||
print(request, file=sys.stderr)
|
|
||||||
except Exception as err:
|
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
|
||||||
return backend_pb2.Result(success=True)
|
|
||||||
|
|
||||||
|
|
||||||
# The TTS endpoint is older, and provides fewer features, but exists for compatibility reasons
|
|
||||||
def TTS(self, request, context):
|
def TTS(self, request, context):
|
||||||
model_name = request.model
|
model_name = request.model
|
||||||
if model_name == "":
|
if model_name == "":
|
||||||
@@ -130,7 +75,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
padding=True,
|
padding=True,
|
||||||
return_tensors="pt",
|
return_tensors="pt",
|
||||||
)
|
)
|
||||||
tokens = 512 # No good place to set the "length" in TTS, so use 10s as a sane default
|
tokens = 256
|
||||||
|
# TODO get tokens from request?
|
||||||
audio_values = self.model.generate(**inputs, max_new_tokens=tokens)
|
audio_values = self.model.generate(**inputs, max_new_tokens=tokens)
|
||||||
print("[transformers-musicgen] TTS generated!", file=sys.stderr)
|
print("[transformers-musicgen] TTS generated!", file=sys.stderr)
|
||||||
sampling_rate = self.model.config.audio_encoder.sampling_rate
|
sampling_rate = self.model.config.audio_encoder.sampling_rate
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.65.5
|
||||||
protobuf
|
protobuf
|
||||||
scipy==1.14.0
|
scipy==1.14.0
|
||||||
certifi
|
certifi
|
||||||
@@ -63,7 +63,7 @@ class TestBackendServicer(unittest.TestCase):
|
|||||||
|
|
||||||
def test_tts(self):
|
def test_tts(self):
|
||||||
"""
|
"""
|
||||||
This method tests if TTS is generated successfully
|
This method tests if the embeddings are generated successfully
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
self.setUp()
|
self.setUp()
|
||||||
@@ -77,24 +77,5 @@ class TestBackendServicer(unittest.TestCase):
|
|||||||
except Exception as err:
|
except Exception as err:
|
||||||
print(err)
|
print(err)
|
||||||
self.fail("TTS service failed")
|
self.fail("TTS service failed")
|
||||||
finally:
|
|
||||||
self.tearDown()
|
|
||||||
|
|
||||||
def test_sound_generation(self):
|
|
||||||
"""
|
|
||||||
This method tests if SoundGeneration is generated successfully
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
self.setUp()
|
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
|
||||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small"))
|
|
||||||
self.assertTrue(response.success)
|
|
||||||
sg_request = backend_pb2.SoundGenerationRequest(text="80s TV news production music hit for tonight's biggest story")
|
|
||||||
sg_response = stub.SoundGeneration(sg_request)
|
|
||||||
self.assertIsNotNone(sg_response)
|
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
self.fail("SoundGeneration service failed")
|
|
||||||
finally:
|
finally:
|
||||||
self.tearDown()
|
self.tearDown()
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.65.5
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
@@ -1,3 +1,3 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.65.5
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -135,26 +135,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
res = await gen.__anext__()
|
res = await gen.__anext__()
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def Embedding(self, request, context):
|
|
||||||
"""
|
|
||||||
A gRPC method that calculates embeddings for a given sentence.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: An EmbeddingRequest object that contains the request parameters.
|
|
||||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
An EmbeddingResult object that contains the calculated embeddings.
|
|
||||||
"""
|
|
||||||
print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr)
|
|
||||||
outputs = self.model.encode(request.Embeddings)
|
|
||||||
# Check if we have one result at least
|
|
||||||
if len(outputs) == 0:
|
|
||||||
context.set_code(grpc.StatusCode.INVALID_ARGUMENT)
|
|
||||||
context.set_details("No embeddings were calculated.")
|
|
||||||
return backend_pb2.EmbeddingResult()
|
|
||||||
return backend_pb2.EmbeddingResult(embeddings=outputs[0].outputs.embedding)
|
|
||||||
|
|
||||||
async def PredictStream(self, request, context):
|
async def PredictStream(self, request, context):
|
||||||
"""
|
"""
|
||||||
Generates text based on the given prompt and sampling parameters, and streams the results.
|
Generates text based on the given prompt and sampling parameters, and streams the results.
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.65.5
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
setuptools
|
setuptools
|
||||||
@@ -72,28 +72,5 @@ class TestBackendServicer(unittest.TestCase):
|
|||||||
except Exception as err:
|
except Exception as err:
|
||||||
print(err)
|
print(err)
|
||||||
self.fail("text service failed")
|
self.fail("text service failed")
|
||||||
finally:
|
|
||||||
self.tearDown()
|
|
||||||
|
|
||||||
def test_embedding(self):
|
|
||||||
"""
|
|
||||||
This method tests if the embeddings are generated successfully
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
self.setUp()
|
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
|
||||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="intfloat/e5-mistral-7b-instruct"))
|
|
||||||
self.assertTrue(response.success)
|
|
||||||
embedding_request = backend_pb2.PredictOptions(Embeddings="This is a test sentence.")
|
|
||||||
embedding_response = stub.Embedding(embedding_request)
|
|
||||||
self.assertIsNotNone(embedding_response.embeddings)
|
|
||||||
# assert that is a list of floats
|
|
||||||
self.assertIsInstance(embedding_response.embeddings, list)
|
|
||||||
# assert that the list is not empty
|
|
||||||
self.assertTrue(len(embedding_response.embeddings) > 0)
|
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
self.fail("Embedding service failed")
|
|
||||||
finally:
|
finally:
|
||||||
self.tearDown()
|
self.tearDown()
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
package backend_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
. "github.com/onsi/ginkgo/v2"
|
|
||||||
. "github.com/onsi/gomega"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestBackend(t *testing.T) {
|
|
||||||
RegisterFailHandler(Fail)
|
|
||||||
RunSpecs(t, "Backend test suite")
|
|
||||||
}
|
|
||||||
@@ -9,8 +9,6 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
|
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
|
||||||
@@ -89,7 +87,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
|||||||
case string:
|
case string:
|
||||||
protoMessages[i].Content = ct
|
protoMessages[i].Content = ct
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("unsupported type for schema.Message.Content for inference: %T", ct)
|
return nil, fmt.Errorf("Unsupported type for schema.Message.Content for inference: %T", ct)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -183,37 +181,13 @@ func Finetune(config config.BackendConfig, input, prediction string) string {
|
|||||||
mu.Lock()
|
mu.Lock()
|
||||||
reg, ok := cutstrings[c]
|
reg, ok := cutstrings[c]
|
||||||
if !ok {
|
if !ok {
|
||||||
r, err := regexp.Compile(c)
|
cutstrings[c] = regexp.MustCompile(c)
|
||||||
if err != nil {
|
|
||||||
log.Fatal().Err(err).Msg("failed to compile regex")
|
|
||||||
}
|
|
||||||
cutstrings[c] = r
|
|
||||||
reg = cutstrings[c]
|
reg = cutstrings[c]
|
||||||
}
|
}
|
||||||
mu.Unlock()
|
mu.Unlock()
|
||||||
prediction = reg.ReplaceAllString(prediction, "")
|
prediction = reg.ReplaceAllString(prediction, "")
|
||||||
}
|
}
|
||||||
|
|
||||||
// extract results from the response which can be for instance inside XML tags
|
|
||||||
var predResult string
|
|
||||||
for _, r := range config.ExtractRegex {
|
|
||||||
mu.Lock()
|
|
||||||
reg, ok := cutstrings[r]
|
|
||||||
if !ok {
|
|
||||||
regex, err := regexp.Compile(r)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal().Err(err).Msg("failed to compile regex")
|
|
||||||
}
|
|
||||||
cutstrings[r] = regex
|
|
||||||
reg = regex
|
|
||||||
}
|
|
||||||
mu.Unlock()
|
|
||||||
predResult += reg.FindString(prediction)
|
|
||||||
}
|
|
||||||
if predResult != "" {
|
|
||||||
prediction = predResult
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, c := range config.TrimSpace {
|
for _, c := range config.TrimSpace {
|
||||||
prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
|
prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,109 +0,0 @@
|
|||||||
package backend_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
. "github.com/mudler/LocalAI/core/backend"
|
|
||||||
"github.com/mudler/LocalAI/core/config"
|
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
|
||||||
|
|
||||||
. "github.com/onsi/ginkgo/v2"
|
|
||||||
. "github.com/onsi/gomega"
|
|
||||||
)
|
|
||||||
|
|
||||||
var _ = Describe("LLM tests", func() {
|
|
||||||
Context("Finetune LLM output", func() {
|
|
||||||
var (
|
|
||||||
testConfig config.BackendConfig
|
|
||||||
input string
|
|
||||||
prediction string
|
|
||||||
result string
|
|
||||||
)
|
|
||||||
|
|
||||||
BeforeEach(func() {
|
|
||||||
testConfig = config.BackendConfig{
|
|
||||||
PredictionOptions: schema.PredictionOptions{
|
|
||||||
Echo: false,
|
|
||||||
},
|
|
||||||
LLMConfig: config.LLMConfig{
|
|
||||||
Cutstrings: []string{`<.*?>`}, // Example regex for removing XML tags
|
|
||||||
ExtractRegex: []string{`<result>(.*?)</result>`}, // Example regex to extract from tags
|
|
||||||
TrimSpace: []string{" ", "\n"},
|
|
||||||
TrimSuffix: []string{".", "!"},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
Context("when echo is enabled", func() {
|
|
||||||
BeforeEach(func() {
|
|
||||||
testConfig.Echo = true
|
|
||||||
input = "Hello"
|
|
||||||
prediction = "World"
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should prepend input to prediction", func() {
|
|
||||||
result = Finetune(testConfig, input, prediction)
|
|
||||||
Expect(result).To(Equal("HelloWorld"))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
Context("when echo is disabled", func() {
|
|
||||||
BeforeEach(func() {
|
|
||||||
testConfig.Echo = false
|
|
||||||
input = "Hello"
|
|
||||||
prediction = "World"
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should not modify the prediction with input", func() {
|
|
||||||
result = Finetune(testConfig, input, prediction)
|
|
||||||
Expect(result).To(Equal("World"))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
Context("when cutstrings regex is applied", func() {
|
|
||||||
BeforeEach(func() {
|
|
||||||
input = ""
|
|
||||||
prediction = "<div>Hello</div> World"
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should remove substrings matching cutstrings regex", func() {
|
|
||||||
result = Finetune(testConfig, input, prediction)
|
|
||||||
Expect(result).To(Equal("Hello World"))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
Context("when extract regex is applied", func() {
|
|
||||||
BeforeEach(func() {
|
|
||||||
input = ""
|
|
||||||
prediction = "<response><result>42</result></response>"
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should extract substrings matching the extract regex", func() {
|
|
||||||
result = Finetune(testConfig, input, prediction)
|
|
||||||
Expect(result).To(Equal("42"))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
Context("when trimming spaces", func() {
|
|
||||||
BeforeEach(func() {
|
|
||||||
input = ""
|
|
||||||
prediction = " Hello World "
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should trim spaces from the prediction", func() {
|
|
||||||
result = Finetune(testConfig, input, prediction)
|
|
||||||
Expect(result).To(Equal("Hello World"))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
Context("when trimming suffixes", func() {
|
|
||||||
BeforeEach(func() {
|
|
||||||
input = ""
|
|
||||||
prediction = "Hello World."
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should trim suffixes from the prediction", func() {
|
|
||||||
result = Finetune(testConfig, input, prediction)
|
|
||||||
Expect(result).To(Equal("Hello World"))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
})
|
|
||||||
})
|
|
||||||
@@ -1,74 +0,0 @@
|
|||||||
package backend
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/config"
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
|
||||||
"github.com/mudler/LocalAI/pkg/utils"
|
|
||||||
)
|
|
||||||
|
|
||||||
func SoundGeneration(
|
|
||||||
backend string,
|
|
||||||
modelFile string,
|
|
||||||
text string,
|
|
||||||
duration *float32,
|
|
||||||
temperature *float32,
|
|
||||||
doSample *bool,
|
|
||||||
sourceFile *string,
|
|
||||||
sourceDivisor *int32,
|
|
||||||
loader *model.ModelLoader,
|
|
||||||
appConfig *config.ApplicationConfig,
|
|
||||||
backendConfig config.BackendConfig,
|
|
||||||
) (string, *proto.Result, error) {
|
|
||||||
if backend == "" {
|
|
||||||
return "", nil, fmt.Errorf("backend is a required parameter")
|
|
||||||
}
|
|
||||||
|
|
||||||
grpcOpts := gRPCModelOpts(backendConfig)
|
|
||||||
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
|
|
||||||
model.WithBackendString(backend),
|
|
||||||
model.WithModel(modelFile),
|
|
||||||
model.WithContext(appConfig.Context),
|
|
||||||
model.WithAssetDir(appConfig.AssetsDestination),
|
|
||||||
model.WithLoadGRPCLoadModelOpts(grpcOpts),
|
|
||||||
})
|
|
||||||
|
|
||||||
soundGenModel, err := loader.BackendLoader(opts...)
|
|
||||||
if err != nil {
|
|
||||||
return "", nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if soundGenModel == nil {
|
|
||||||
return "", nil, fmt.Errorf("could not load sound generation model")
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil {
|
|
||||||
return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "sound_generation", ".wav")
|
|
||||||
filePath := filepath.Join(appConfig.AudioDir, fileName)
|
|
||||||
|
|
||||||
res, err := soundGenModel.SoundGeneration(context.Background(), &proto.SoundGenerationRequest{
|
|
||||||
Text: text,
|
|
||||||
Model: modelFile,
|
|
||||||
Dst: filePath,
|
|
||||||
Sample: doSample,
|
|
||||||
Duration: duration,
|
|
||||||
Temperature: temperature,
|
|
||||||
Src: sourceFile,
|
|
||||||
SrcDivisor: sourceDivisor,
|
|
||||||
})
|
|
||||||
|
|
||||||
// return RPC error if any
|
|
||||||
if !res.Success {
|
|
||||||
return "", nil, fmt.Errorf(res.Message)
|
|
||||||
}
|
|
||||||
|
|
||||||
return filePath, res, err
|
|
||||||
}
|
|
||||||
@@ -3,13 +3,12 @@ package backend
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
model "github.com/mudler/LocalAI/pkg/model"
|
||||||
)
|
)
|
||||||
|
|
||||||
func ModelTranscription(audio, language string, translate bool, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
|
func ModelTranscription(audio, language string, translate bool, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
|
||||||
@@ -22,40 +21,19 @@ func ModelTranscription(audio, language string, translate bool, ml *model.ModelL
|
|||||||
model.WithAssetDir(appConfig.AssetsDestination),
|
model.WithAssetDir(appConfig.AssetsDestination),
|
||||||
})
|
})
|
||||||
|
|
||||||
transcriptionModel, err := ml.BackendLoader(opts...)
|
whisperModel, err := ml.BackendLoader(opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if transcriptionModel == nil {
|
if whisperModel == nil {
|
||||||
return nil, fmt.Errorf("could not load transcription model")
|
return nil, fmt.Errorf("could not load whisper model")
|
||||||
}
|
}
|
||||||
|
|
||||||
r, err := transcriptionModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{
|
return whisperModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{
|
||||||
Dst: audio,
|
Dst: audio,
|
||||||
Language: language,
|
Language: language,
|
||||||
Translate: translate,
|
Translate: translate,
|
||||||
Threads: uint32(*backendConfig.Threads),
|
Threads: uint32(*backendConfig.Threads),
|
||||||
})
|
})
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
tr := &schema.TranscriptionResult{
|
|
||||||
Text: r.Text,
|
|
||||||
}
|
|
||||||
for _, s := range r.Segments {
|
|
||||||
var tks []int
|
|
||||||
for _, t := range s.Tokens {
|
|
||||||
tks = append(tks, int(t))
|
|
||||||
}
|
|
||||||
tr.Segments = append(tr.Segments,
|
|
||||||
schema.Segment{
|
|
||||||
Text: s.Text,
|
|
||||||
Id: int(s.Id),
|
|
||||||
Start: time.Duration(s.Start),
|
|
||||||
End: time.Duration(s.End),
|
|
||||||
Tokens: tks,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
return tr, err
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,15 +9,31 @@ import (
|
|||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
model "github.com/mudler/LocalAI/pkg/model"
|
||||||
"github.com/mudler/LocalAI/pkg/utils"
|
"github.com/mudler/LocalAI/pkg/utils"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func generateUniqueFileName(dir, baseName, ext string) string {
|
||||||
|
counter := 1
|
||||||
|
fileName := baseName + ext
|
||||||
|
|
||||||
|
for {
|
||||||
|
filePath := filepath.Join(dir, fileName)
|
||||||
|
_, err := os.Stat(filePath)
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return fileName
|
||||||
|
}
|
||||||
|
|
||||||
|
counter++
|
||||||
|
fileName = fmt.Sprintf("%s_%d%s", baseName, counter, ext)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func ModelTTS(
|
func ModelTTS(
|
||||||
backend,
|
backend,
|
||||||
text,
|
text,
|
||||||
modelFile,
|
modelFile,
|
||||||
voice,
|
voice ,
|
||||||
language string,
|
language string,
|
||||||
loader *model.ModelLoader,
|
loader *model.ModelLoader,
|
||||||
appConfig *config.ApplicationConfig,
|
appConfig *config.ApplicationConfig,
|
||||||
@@ -50,7 +66,7 @@ func ModelTTS(
|
|||||||
return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
|
return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "tts", ".wav")
|
fileName := generateUniqueFileName(appConfig.AudioDir, "tts", ".wav")
|
||||||
filePath := filepath.Join(appConfig.AudioDir, fileName)
|
filePath := filepath.Join(appConfig.AudioDir, fileName)
|
||||||
|
|
||||||
// If the model file is not empty, we pass it joined with the model path
|
// If the model file is not empty, we pass it joined with the model path
|
||||||
@@ -72,15 +88,12 @@ func ModelTTS(
|
|||||||
}
|
}
|
||||||
|
|
||||||
res, err := ttsModel.TTS(context.Background(), &proto.TTSRequest{
|
res, err := ttsModel.TTS(context.Background(), &proto.TTSRequest{
|
||||||
Text: text,
|
Text: text,
|
||||||
Model: modelPath,
|
Model: modelPath,
|
||||||
Voice: voice,
|
Voice: voice,
|
||||||
Dst: filePath,
|
Dst: filePath,
|
||||||
Language: &language,
|
Language: &language,
|
||||||
})
|
})
|
||||||
if err != nil {
|
|
||||||
return "", nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// return RPC error if any
|
// return RPC error if any
|
||||||
if !res.Success {
|
if !res.Success {
|
||||||
|
|||||||
@@ -1,80 +0,0 @@
|
|||||||
package cli_api
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"fmt"
|
|
||||||
"net"
|
|
||||||
"os"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/p2p"
|
|
||||||
"github.com/mudler/edgevpn/pkg/node"
|
|
||||||
|
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
)
|
|
||||||
|
|
||||||
func StartP2PStack(ctx context.Context, address, token, networkID string, federated bool) error {
|
|
||||||
var n *node.Node
|
|
||||||
// Here we are avoiding creating multiple nodes:
|
|
||||||
// - if the federated mode is enabled, we create a federated node and expose a service
|
|
||||||
// - exposing a service creates a node with specific options, and we don't want to create another node
|
|
||||||
|
|
||||||
// If the federated mode is enabled, we expose a service to the local instance running
|
|
||||||
// at r.Address
|
|
||||||
if federated {
|
|
||||||
_, port, err := net.SplitHostPort(address)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Here a new node is created and started
|
|
||||||
// and a service is exposed by the node
|
|
||||||
node, err := p2p.ExposeService(ctx, "localhost", port, token, p2p.NetworkID(networkID, p2p.FederatedID))
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := p2p.ServiceDiscoverer(ctx, node, token, p2p.NetworkID(networkID, p2p.FederatedID), nil, false); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
n = node
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the p2p mode is enabled, we start the service discovery
|
|
||||||
if token != "" {
|
|
||||||
// If a node wasn't created previously, create it
|
|
||||||
if n == nil {
|
|
||||||
node, err := p2p.NewNode(token)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
err = node.Start(ctx)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("starting new node: %w", err)
|
|
||||||
}
|
|
||||||
n = node
|
|
||||||
}
|
|
||||||
|
|
||||||
// Attach a ServiceDiscoverer to the p2p node
|
|
||||||
log.Info().Msg("Starting P2P server discovery...")
|
|
||||||
if err := p2p.ServiceDiscoverer(ctx, n, token, p2p.NetworkID(networkID, p2p.WorkerID), func(serviceID string, node p2p.NodeData) {
|
|
||||||
var tunnelAddresses []string
|
|
||||||
for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(networkID, p2p.WorkerID)) {
|
|
||||||
if v.IsOnline() {
|
|
||||||
tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
|
|
||||||
} else {
|
|
||||||
log.Info().Msgf("Node %s is offline", v.ID)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
tunnelEnvVar := strings.Join(tunnelAddresses, ",")
|
|
||||||
|
|
||||||
os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
|
|
||||||
log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
|
|
||||||
}, true); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
@@ -8,13 +8,12 @@ import (
|
|||||||
var CLI struct {
|
var CLI struct {
|
||||||
cliContext.Context `embed:""`
|
cliContext.Context `embed:""`
|
||||||
|
|
||||||
Run RunCMD `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"`
|
Run RunCMD `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"`
|
||||||
Federated FederatedCLI `cmd:"" help:"Run LocalAI in federated mode"`
|
Federated FederatedCLI `cmd:"" help:"Run LocalAI in federated mode"`
|
||||||
Models ModelsCMD `cmd:"" help:"Manage LocalAI models and definitions"`
|
Models ModelsCMD `cmd:"" help:"Manage LocalAI models and definitions"`
|
||||||
TTS TTSCMD `cmd:"" help:"Convert text to speech"`
|
TTS TTSCMD `cmd:"" help:"Convert text to speech"`
|
||||||
SoundGeneration SoundGenerationCMD `cmd:"" help:"Generates audio files from text or audio"`
|
Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"`
|
||||||
Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"`
|
Worker worker.Worker `cmd:"" help:"Run workers to distribute workload (llama.cpp-only)"`
|
||||||
Worker worker.Worker `cmd:"" help:"Run workers to distribute workload (llama.cpp-only)"`
|
Util UtilCMD `cmd:"" help:"Utility commands"`
|
||||||
Util UtilCMD `cmd:"" help:"Utility commands"`
|
Explorer ExplorerCMD `cmd:"" help:"Run p2p explorer"`
|
||||||
Explorer ExplorerCMD `cmd:"" help:"Run p2p explorer"`
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,10 +3,11 @@ package cli
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"net"
|
||||||
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cli_api "github.com/mudler/LocalAI/core/cli/api"
|
|
||||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/http"
|
"github.com/mudler/LocalAI/core/http"
|
||||||
@@ -52,8 +53,6 @@ type RunCMD struct {
|
|||||||
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
|
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
|
||||||
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
|
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
|
||||||
Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
|
Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
|
||||||
Peer2PeerDHTInterval int `env:"LOCALAI_P2P_DHT_INTERVAL,P2P_DHT_INTERVAL" default:"360" name:"p2p-dht-interval" help:"Interval for DHT refresh (used during token generation)" group:"p2p"`
|
|
||||||
Peer2PeerOTPInterval int `env:"LOCALAI_P2P_OTP_INTERVAL,P2P_OTP_INTERVAL" default:"9000" name:"p2p-otp-interval" help:"Interval for OTP refresh (used during token generation)" group:"p2p"`
|
|
||||||
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
|
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
|
||||||
Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
|
Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
|
||||||
ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
|
ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
|
||||||
@@ -108,7 +107,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
|||||||
// IF no token is provided, and p2p is enabled,
|
// IF no token is provided, and p2p is enabled,
|
||||||
// we generate one and wait for the user to pick up the token (this is for interactive)
|
// we generate one and wait for the user to pick up the token (this is for interactive)
|
||||||
log.Info().Msg("No token provided, generating one")
|
log.Info().Msg("No token provided, generating one")
|
||||||
token = p2p.GenerateToken(r.Peer2PeerDHTInterval, r.Peer2PeerOTPInterval)
|
token = p2p.GenerateToken()
|
||||||
log.Info().Msg("Generated Token:")
|
log.Info().Msg("Generated Token:")
|
||||||
fmt.Println(token)
|
fmt.Println(token)
|
||||||
|
|
||||||
@@ -116,12 +115,52 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
|||||||
fmt.Printf("export TOKEN=\"%s\"\nlocal-ai worker p2p-llama-cpp-rpc\n", token)
|
fmt.Printf("export TOKEN=\"%s\"\nlocal-ai worker p2p-llama-cpp-rpc\n", token)
|
||||||
}
|
}
|
||||||
opts = append(opts, config.WithP2PToken(token))
|
opts = append(opts, config.WithP2PToken(token))
|
||||||
|
|
||||||
|
node, err := p2p.NewNode(token)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
nodeContext := context.Background()
|
||||||
|
|
||||||
|
err = node.Start(nodeContext)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("starting new node: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Info().Msg("Starting P2P server discovery...")
|
||||||
|
if err := p2p.ServiceDiscoverer(nodeContext, node, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID), func(serviceID string, node p2p.NodeData) {
|
||||||
|
var tunnelAddresses []string
|
||||||
|
for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID)) {
|
||||||
|
if v.IsOnline() {
|
||||||
|
tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
|
||||||
|
} else {
|
||||||
|
log.Info().Msgf("Node %s is offline", v.ID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tunnelEnvVar := strings.Join(tunnelAddresses, ",")
|
||||||
|
|
||||||
|
os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
|
||||||
|
log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
|
||||||
|
}, true); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
backgroundCtx := context.Background()
|
if r.Federated {
|
||||||
|
_, port, err := net.SplitHostPort(r.Address)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
fedCtx := context.Background()
|
||||||
|
|
||||||
if err := cli_api.StartP2PStack(backgroundCtx, r.Address, token, r.Peer2PeerNetworkID, r.Federated); err != nil {
|
node, err := p2p.ExposeService(fedCtx, "localhost", port, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID))
|
||||||
return err
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := p2p.ServiceDiscoverer(fedCtx, node, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID), nil, false); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
idleWatchDog := r.EnableWatchdogIdle
|
idleWatchDog := r.EnableWatchdogIdle
|
||||||
|
|||||||
@@ -1,110 +0,0 @@
|
|||||||
package cli
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/backend"
|
|
||||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
|
||||||
"github.com/mudler/LocalAI/core/config"
|
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
)
|
|
||||||
|
|
||||||
type SoundGenerationCMD struct {
|
|
||||||
Text []string `arg:""`
|
|
||||||
|
|
||||||
Backend string `short:"b" required:"" help:"Backend to run the SoundGeneration model"`
|
|
||||||
Model string `short:"m" required:"" help:"Model name to run the SoundGeneration"`
|
|
||||||
Duration string `short:"d" help:"If specified, the length of audio to generate in seconds"`
|
|
||||||
Temperature string `short:"t" help:"If specified, the temperature of the generation"`
|
|
||||||
InputFile string `short:"i" help:"If specified, the input file to condition generation upon"`
|
|
||||||
InputFileSampleDivisor string `short:"f" help:"If InputFile and this divisor is specified, the first portion of the sample file will be used"`
|
|
||||||
DoSample bool `short:"s" default:"true" help:"Enables sampling from the model. Better quality at the cost of speed. Defaults to enabled."`
|
|
||||||
OutputFile string `short:"o" type:"path" help:"The path to write the output wav file"`
|
|
||||||
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
|
|
||||||
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
|
|
||||||
ExternalGRPCBackends []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func parseToFloat32Ptr(input string) *float32 {
|
|
||||||
f, err := strconv.ParseFloat(input, 32)
|
|
||||||
if err != nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
f2 := float32(f)
|
|
||||||
return &f2
|
|
||||||
}
|
|
||||||
|
|
||||||
func parseToInt32Ptr(input string) *int32 {
|
|
||||||
i, err := strconv.ParseInt(input, 10, 32)
|
|
||||||
if err != nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
i2 := int32(i)
|
|
||||||
return &i2
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
|
|
||||||
outputFile := t.OutputFile
|
|
||||||
outputDir := t.BackendAssetsPath
|
|
||||||
if outputFile != "" {
|
|
||||||
outputDir = filepath.Dir(outputFile)
|
|
||||||
}
|
|
||||||
|
|
||||||
text := strings.Join(t.Text, " ")
|
|
||||||
|
|
||||||
externalBackends := make(map[string]string)
|
|
||||||
// split ":" to get backend name and the uri
|
|
||||||
for _, v := range t.ExternalGRPCBackends {
|
|
||||||
backend := v[:strings.IndexByte(v, ':')]
|
|
||||||
uri := v[strings.IndexByte(v, ':')+1:]
|
|
||||||
externalBackends[backend] = uri
|
|
||||||
fmt.Printf("TMP externalBackends[%q]=%q\n\n", backend, uri)
|
|
||||||
}
|
|
||||||
|
|
||||||
opts := &config.ApplicationConfig{
|
|
||||||
ModelPath: t.ModelsPath,
|
|
||||||
Context: context.Background(),
|
|
||||||
AudioDir: outputDir,
|
|
||||||
AssetsDestination: t.BackendAssetsPath,
|
|
||||||
ExternalGRPCBackends: externalBackends,
|
|
||||||
}
|
|
||||||
ml := model.NewModelLoader(opts.ModelPath)
|
|
||||||
|
|
||||||
defer func() {
|
|
||||||
err := ml.StopAllGRPC()
|
|
||||||
if err != nil {
|
|
||||||
log.Error().Err(err).Msg("unable to stop all grpc processes")
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
options := config.BackendConfig{}
|
|
||||||
options.SetDefaults()
|
|
||||||
|
|
||||||
var inputFile *string
|
|
||||||
if t.InputFile != "" {
|
|
||||||
inputFile = &t.InputFile
|
|
||||||
}
|
|
||||||
|
|
||||||
filePath, _, err := backend.SoundGeneration(t.Backend, t.Model, text,
|
|
||||||
parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample,
|
|
||||||
inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), ml, opts, options)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if outputFile != "" {
|
|
||||||
if err := os.Rename(filePath, outputFile); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
fmt.Printf("Generate file %s\n", outputFile)
|
|
||||||
} else {
|
|
||||||
fmt.Printf("Generate file %s\n", filePath)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
@@ -2,7 +2,6 @@ package worker
|
|||||||
|
|
||||||
type WorkerFlags struct {
|
type WorkerFlags struct {
|
||||||
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
|
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
|
||||||
ExtraLLamaCPPArgs string `name:"llama-cpp-args" env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type Worker struct {
|
type Worker struct {
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ package worker
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
|
||||||
"syscall"
|
"syscall"
|
||||||
|
|
||||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||||
@@ -13,6 +12,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type LLamaCPP struct {
|
type LLamaCPP struct {
|
||||||
|
Args []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"`
|
||||||
WorkerFlags `embed:""`
|
WorkerFlags `embed:""`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -34,8 +34,9 @@ func (r *LLamaCPP) Run(ctx *cliContext.Context) error {
|
|||||||
"llama-cpp-rpc-server",
|
"llama-cpp-rpc-server",
|
||||||
)
|
)
|
||||||
|
|
||||||
args := strings.Split(r.ExtraLLamaCPPArgs, " ")
|
args := os.Args[4:]
|
||||||
args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
|
args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
|
||||||
|
|
||||||
args = append([]string{grpcProcess}, args...)
|
args = append([]string{grpcProcess}, args...)
|
||||||
return syscall.Exec(
|
return syscall.Exec(
|
||||||
grpcProcess,
|
grpcProcess,
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"strings"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||||
@@ -21,11 +20,12 @@ import (
|
|||||||
|
|
||||||
type P2P struct {
|
type P2P struct {
|
||||||
WorkerFlags `embed:""`
|
WorkerFlags `embed:""`
|
||||||
Token string `env:"LOCALAI_TOKEN,LOCALAI_P2P_TOKEN,TOKEN" help:"P2P token to use"`
|
Token string `env:"LOCALAI_TOKEN,LOCALAI_P2P_TOKEN,TOKEN" help:"P2P token to use"`
|
||||||
NoRunner bool `env:"LOCALAI_NO_RUNNER,NO_RUNNER" help:"Do not start the llama-cpp-rpc-server"`
|
NoRunner bool `env:"LOCALAI_NO_RUNNER,NO_RUNNER" help:"Do not start the llama-cpp-rpc-server"`
|
||||||
RunnerAddress string `env:"LOCALAI_RUNNER_ADDRESS,RUNNER_ADDRESS" help:"Address of the llama-cpp-rpc-server"`
|
RunnerAddress string `env:"LOCALAI_RUNNER_ADDRESS,RUNNER_ADDRESS" help:"Address of the llama-cpp-rpc-server"`
|
||||||
RunnerPort string `env:"LOCALAI_RUNNER_PORT,RUNNER_PORT" help:"Port of the llama-cpp-rpc-server"`
|
RunnerPort string `env:"LOCALAI_RUNNER_PORT,RUNNER_PORT" help:"Port of the llama-cpp-rpc-server"`
|
||||||
Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
|
ExtraLLamaCPPArgs []string `env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"`
|
||||||
|
Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *P2P) Run(ctx *cliContext.Context) error {
|
func (r *P2P) Run(ctx *cliContext.Context) error {
|
||||||
@@ -76,8 +76,8 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
|
|||||||
"util",
|
"util",
|
||||||
"llama-cpp-rpc-server",
|
"llama-cpp-rpc-server",
|
||||||
)
|
)
|
||||||
extraArgs := strings.Split(r.ExtraLLamaCPPArgs, " ")
|
|
||||||
args := append([]string{"--host", address, "--port", fmt.Sprint(port)}, extraArgs...)
|
args := append([]string{"--host", address, "--port", fmt.Sprint(port)}, r.ExtraLLamaCPPArgs...)
|
||||||
args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
|
args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
|
||||||
|
|
||||||
cmd := exec.Command(
|
cmd := exec.Command(
|
||||||
|
|||||||
@@ -126,7 +126,6 @@ type LLMConfig struct {
|
|||||||
Grammar string `yaml:"grammar"`
|
Grammar string `yaml:"grammar"`
|
||||||
StopWords []string `yaml:"stopwords"`
|
StopWords []string `yaml:"stopwords"`
|
||||||
Cutstrings []string `yaml:"cutstrings"`
|
Cutstrings []string `yaml:"cutstrings"`
|
||||||
ExtractRegex []string `yaml:"extract_regex"`
|
|
||||||
TrimSpace []string `yaml:"trimspace"`
|
TrimSpace []string `yaml:"trimspace"`
|
||||||
TrimSuffix []string `yaml:"trimsuffix"`
|
TrimSuffix []string `yaml:"trimsuffix"`
|
||||||
|
|
||||||
|
|||||||
@@ -772,17 +772,6 @@ var _ = Describe("API test", func() {
|
|||||||
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error:"))
|
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error:"))
|
||||||
})
|
})
|
||||||
|
|
||||||
It("shows the external backend", func() {
|
|
||||||
// do an http request to the /system endpoint
|
|
||||||
resp, err := http.Get("http://127.0.0.1:9090/system")
|
|
||||||
Expect(err).ToNot(HaveOccurred())
|
|
||||||
Expect(resp.StatusCode).To(Equal(200))
|
|
||||||
dat, err := io.ReadAll(resp.Body)
|
|
||||||
Expect(err).ToNot(HaveOccurred())
|
|
||||||
Expect(string(dat)).To(ContainSubstring("huggingface"))
|
|
||||||
Expect(string(dat)).To(ContainSubstring("llama-cpp"))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("transcribes audio", func() {
|
It("transcribes audio", func() {
|
||||||
if runtime.GOOS != "linux" {
|
if runtime.GOOS != "linux" {
|
||||||
Skip("test supported only on linux")
|
Skip("test supported only on linux")
|
||||||
|
|||||||
@@ -1,65 +0,0 @@
|
|||||||
package elevenlabs
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/gofiber/fiber/v2"
|
|
||||||
"github.com/mudler/LocalAI/core/backend"
|
|
||||||
"github.com/mudler/LocalAI/core/config"
|
|
||||||
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
)
|
|
||||||
|
|
||||||
// SoundGenerationEndpoint is the ElevenLabs SoundGeneration endpoint https://elevenlabs.io/docs/api-reference/sound-generation
|
|
||||||
// @Summary Generates audio from the input text.
|
|
||||||
// @Param request body schema.ElevenLabsSoundGenerationRequest true "query params"
|
|
||||||
// @Success 200 {string} binary "Response"
|
|
||||||
// @Router /v1/sound-generation [post]
|
|
||||||
func SoundGenerationEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
|
||||||
return func(c *fiber.Ctx) error {
|
|
||||||
input := new(schema.ElevenLabsSoundGenerationRequest)
|
|
||||||
// Get input data from the request body
|
|
||||||
if err := c.BodyParser(input); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.ModelID, false)
|
|
||||||
if err != nil {
|
|
||||||
modelFile = input.ModelID
|
|
||||||
log.Warn().Str("ModelID", input.ModelID).Msg("Model not found in context")
|
|
||||||
}
|
|
||||||
|
|
||||||
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
|
||||||
config.LoadOptionDebug(appConfig.Debug),
|
|
||||||
config.LoadOptionThreads(appConfig.Threads),
|
|
||||||
config.LoadOptionContextSize(appConfig.ContextSize),
|
|
||||||
config.LoadOptionF16(appConfig.F16),
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
modelFile = input.ModelID
|
|
||||||
log.Warn().Str("Request ModelID", input.ModelID).Err(err).Msg("error during LoadBackendConfigFileByName, using request ModelID")
|
|
||||||
} else {
|
|
||||||
if input.ModelID != "" {
|
|
||||||
modelFile = input.ModelID
|
|
||||||
} else {
|
|
||||||
modelFile = cfg.Model
|
|
||||||
}
|
|
||||||
}
|
|
||||||
log.Debug().Str("modelFile", "modelFile").Str("backend", cfg.Backend).Msg("Sound Generation Request about to be sent to backend")
|
|
||||||
|
|
||||||
if input.Duration != nil {
|
|
||||||
log.Debug().Float32("duration", *input.Duration).Msg("duration set")
|
|
||||||
}
|
|
||||||
if input.Temperature != nil {
|
|
||||||
log.Debug().Float32("temperature", *input.Temperature).Msg("temperature set")
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Support uploading files?
|
|
||||||
filePath, _, err := backend.SoundGeneration(cfg.Backend, modelFile, input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return c.Download(filePath)
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,29 +0,0 @@
|
|||||||
package localai
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/gofiber/fiber/v2"
|
|
||||||
"github.com/mudler/LocalAI/core/config"
|
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
|
||||||
)
|
|
||||||
|
|
||||||
// SystemInformations returns the system informations
|
|
||||||
// @Summary Show the LocalAI instance information
|
|
||||||
// @Success 200 {object} schema.SystemInformationResponse "Response"
|
|
||||||
// @Router /system [get]
|
|
||||||
func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(*fiber.Ctx) error {
|
|
||||||
return func(c *fiber.Ctx) error {
|
|
||||||
availableBackends, err := ml.ListAvailableBackends(appConfig.AssetsDestination)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
for b := range appConfig.ExternalGRPCBackends {
|
|
||||||
availableBackends = append(availableBackends, b)
|
|
||||||
}
|
|
||||||
return c.JSON(
|
|
||||||
schema.SystemInformationResponse{
|
|
||||||
Backends: availableBackends,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -25,8 +25,9 @@ import (
|
|||||||
// @Success 200 {object} schema.OpenAIResponse "Response"
|
// @Success 200 {object} schema.OpenAIResponse "Response"
|
||||||
// @Router /v1/chat/completions [post]
|
// @Router /v1/chat/completions [post]
|
||||||
func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
var id, textContentToReturn string
|
textContentToReturn := ""
|
||||||
var created int
|
id := uuid.New().String()
|
||||||
|
created := int(time.Now().Unix())
|
||||||
|
|
||||||
process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
|
process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
|
||||||
initialMessage := schema.OpenAIResponse{
|
initialMessage := schema.OpenAIResponse{
|
||||||
@@ -68,9 +69,9 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
|||||||
|
|
||||||
textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig)
|
textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig)
|
||||||
result = functions.CleanupLLMResult(result, config.FunctionsConfig)
|
result = functions.CleanupLLMResult(result, config.FunctionsConfig)
|
||||||
functionResults := functions.ParseFunctionCall(result, config.FunctionsConfig)
|
results := functions.ParseFunctionCall(result, config.FunctionsConfig)
|
||||||
log.Debug().Msgf("Text content to return: %s", textContentToReturn)
|
log.Debug().Msgf("Text content to return: %s", textContentToReturn)
|
||||||
noActionToRun := len(functionResults) > 0 && functionResults[0].Name == noAction || len(functionResults) == 0
|
noActionToRun := len(results) > 0 && results[0].Name == noAction || len(results) == 0
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
case noActionToRun:
|
case noActionToRun:
|
||||||
@@ -83,7 +84,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
|||||||
}
|
}
|
||||||
responses <- initialMessage
|
responses <- initialMessage
|
||||||
|
|
||||||
result, err := handleQuestion(config, req, ml, startupOptions, functionResults, result, prompt)
|
result, err := handleQuestion(config, req, ml, startupOptions, results, result, prompt)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error().Err(err).Msg("error handling question")
|
log.Error().Err(err).Msg("error handling question")
|
||||||
return
|
return
|
||||||
@@ -105,7 +106,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
|||||||
responses <- resp
|
responses <- resp
|
||||||
|
|
||||||
default:
|
default:
|
||||||
for i, ss := range functionResults {
|
for i, ss := range results {
|
||||||
name, args := ss.Name, ss.Arguments
|
name, args := ss.Name, ss.Arguments
|
||||||
|
|
||||||
initialMessage := schema.OpenAIResponse{
|
initialMessage := schema.OpenAIResponse{
|
||||||
@@ -158,10 +159,6 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
|||||||
}
|
}
|
||||||
|
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
textContentToReturn = ""
|
|
||||||
id = uuid.New().String()
|
|
||||||
created = int(time.Now().Unix())
|
|
||||||
|
|
||||||
modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
|
modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
|
|||||||
@@ -16,6 +16,4 @@ func RegisterElevenLabsRoutes(app *fiber.App,
|
|||||||
// Elevenlabs
|
// Elevenlabs
|
||||||
app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig))
|
app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig))
|
||||||
|
|
||||||
app.Post("/v1/sound-generation", auth, elevenlabs.SoundGenerationEndpoint(cl, ml, appConfig))
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -70,6 +70,4 @@ func RegisterLocalAIRoutes(app *fiber.App,
|
|||||||
}{Version: internal.PrintableVersion()})
|
}{Version: internal.PrintableVersion()})
|
||||||
})
|
})
|
||||||
|
|
||||||
app.Get("/system", auth, localai.SystemInformations(ml, appConfig))
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because one or more lines are too long
@@ -1,9 +0,0 @@
|
|||||||
/*!
|
|
||||||
Theme: Default
|
|
||||||
Description: Original highlight.js style
|
|
||||||
Author: (c) Ivan Sagalaev <maniac@softwaremaniacs.org>
|
|
||||||
Maintainer: @highlightjs/core-team
|
|
||||||
Website: https://highlightjs.org/
|
|
||||||
License: see project LICENSE
|
|
||||||
Touched: 2021
|
|
||||||
*/pre code.hljs{display:block;overflow-x:auto;padding:1em}code.hljs{padding:3px 5px}.hljs{background:#f3f3f3;color:#444}.hljs-comment{color:#697070}.hljs-punctuation,.hljs-tag{color:#444a}.hljs-tag .hljs-attr,.hljs-tag .hljs-name{color:#444}.hljs-attribute,.hljs-doctag,.hljs-keyword,.hljs-meta .hljs-keyword,.hljs-name,.hljs-selector-tag{font-weight:700}.hljs-deletion,.hljs-number,.hljs-quote,.hljs-selector-class,.hljs-selector-id,.hljs-string,.hljs-template-tag,.hljs-type{color:#800}.hljs-section,.hljs-title{color:#800;font-weight:700}.hljs-link,.hljs-operator,.hljs-regexp,.hljs-selector-attr,.hljs-selector-pseudo,.hljs-symbol,.hljs-template-variable,.hljs-variable{color:#ab5656}.hljs-literal{color:#695}.hljs-addition,.hljs-built_in,.hljs-bullet,.hljs-code{color:#397300}.hljs-meta{color:#1f7199}.hljs-meta .hljs-string{color:#38a}.hljs-emphasis{font-style:italic}.hljs-strong{font-weight:700}
|
|
||||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -28,15 +28,9 @@ import (
|
|||||||
"github.com/mudler/edgevpn/pkg/logger"
|
"github.com/mudler/edgevpn/pkg/logger"
|
||||||
)
|
)
|
||||||
|
|
||||||
func generateNewConnectionData(DHTInterval, OTPInterval int) *node.YAMLConnectionConfig {
|
func generateNewConnectionData() *node.YAMLConnectionConfig {
|
||||||
maxMessSize := 20 << 20 // 20MB
|
maxMessSize := 20 << 20 // 20MB
|
||||||
keyLength := 43
|
keyLength := 43
|
||||||
if DHTInterval == 0 {
|
|
||||||
DHTInterval = 360
|
|
||||||
}
|
|
||||||
if OTPInterval == 0 {
|
|
||||||
OTPInterval = 9000
|
|
||||||
}
|
|
||||||
|
|
||||||
return &node.YAMLConnectionConfig{
|
return &node.YAMLConnectionConfig{
|
||||||
MaxMessageSize: maxMessSize,
|
MaxMessageSize: maxMessSize,
|
||||||
@@ -46,21 +40,21 @@ func generateNewConnectionData(DHTInterval, OTPInterval int) *node.YAMLConnectio
|
|||||||
OTP: node.OTP{
|
OTP: node.OTP{
|
||||||
DHT: node.OTPConfig{
|
DHT: node.OTPConfig{
|
||||||
Key: eutils.RandStringRunes(keyLength),
|
Key: eutils.RandStringRunes(keyLength),
|
||||||
Interval: DHTInterval,
|
Interval: 120,
|
||||||
Length: keyLength,
|
Length: keyLength,
|
||||||
},
|
},
|
||||||
Crypto: node.OTPConfig{
|
Crypto: node.OTPConfig{
|
||||||
Key: eutils.RandStringRunes(keyLength),
|
Key: eutils.RandStringRunes(keyLength),
|
||||||
Interval: OTPInterval,
|
Interval: 9000,
|
||||||
Length: keyLength,
|
Length: keyLength,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func GenerateToken(DHTInterval, OTPInterval int) string {
|
func GenerateToken() string {
|
||||||
// Generates a new config and exit
|
// Generates a new config and exit
|
||||||
return generateNewConnectionData(DHTInterval, OTPInterval).Base64()
|
return generateNewConnectionData().Base64()
|
||||||
}
|
}
|
||||||
|
|
||||||
func IsP2PEnabled() bool {
|
func IsP2PEnabled() bool {
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import (
|
|||||||
"github.com/mudler/edgevpn/pkg/node"
|
"github.com/mudler/edgevpn/pkg/node"
|
||||||
)
|
)
|
||||||
|
|
||||||
func GenerateToken(DHTInterval, OTPInterval int) string {
|
func GenerateToken() string {
|
||||||
return "not implemented"
|
return "not implemented"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,11 +4,3 @@ type ElevenLabsTTSRequest struct {
|
|||||||
Text string `json:"text" yaml:"text"`
|
Text string `json:"text" yaml:"text"`
|
||||||
ModelID string `json:"model_id" yaml:"model_id"`
|
ModelID string `json:"model_id" yaml:"model_id"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type ElevenLabsSoundGenerationRequest struct {
|
|
||||||
Text string `json:"text" yaml:"text"`
|
|
||||||
ModelID string `json:"model_id" yaml:"model_id"`
|
|
||||||
Duration *float32 `json:"duration_seconds,omitempty" yaml:"duration_seconds,omitempty"`
|
|
||||||
Temperature *float32 `json:"prompt_influence,omitempty" yaml:"prompt_influence,omitempty"`
|
|
||||||
DoSample *bool `json:"do_sample,omitempty" yaml:"do_sample,omitempty"`
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -70,7 +70,3 @@ type P2PNodesResponse struct {
|
|||||||
Nodes []p2p.NodeData `json:"nodes" yaml:"nodes"`
|
Nodes []p2p.NodeData `json:"nodes" yaml:"nodes"`
|
||||||
FederatedNodes []p2p.NodeData `json:"federated_nodes" yaml:"federated_nodes"`
|
FederatedNodes []p2p.NodeData `json:"federated_nodes" yaml:"federated_nodes"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type SystemInformationResponse struct {
|
|
||||||
Backends []string `json:"backends"`
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -107,7 +107,7 @@ func (bms BackendMonitorService) CheckAndSample(modelName string) (*proto.Status
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
modelAddr := bms.modelLoader.CheckIsLoaded(backendId)
|
modelAddr := bms.modelLoader.CheckIsLoaded(backendId)
|
||||||
if modelAddr == nil {
|
if modelAddr == "" {
|
||||||
return nil, fmt.Errorf("backend %s is not currently loaded", backendId)
|
return nil, fmt.Errorf("backend %s is not currently loaded", backendId)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -133,10 +133,6 @@ Due to the nature of ROCm it is best to run all implementations in containers as
|
|||||||
Ongoing verification testing of ROCm compatability with integrated backends.
|
Ongoing verification testing of ROCm compatability with integrated backends.
|
||||||
Please note the following list of verified backends and devices.
|
Please note the following list of verified backends and devices.
|
||||||
|
|
||||||
LocalAI hipblas images are built against the following targets: gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
|
|
||||||
|
|
||||||
If your device is not one of these you must specify the corresponding `GPU_TARGETS` and specify `REBUILD=true`. Otherwise you don't need to specify these in the commands below.
|
|
||||||
|
|
||||||
### Verified
|
### Verified
|
||||||
|
|
||||||
The devices in the following list have been tested with `hipblas` images running `ROCm 6.0.0`
|
The devices in the following list have been tested with `hipblas` images running `ROCm 6.0.0`
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user