Compare commits

..

5 Commits

Author SHA1 Message Date
Ettore Di Giacinto
2a03905920 Merge branch 'master' into cleanup_deps 2024-08-21 13:10:46 +02:00
Ettore Di Giacinto
35297ebc14 Drop also ttf files
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-08-21 13:03:26 +02:00
Ettore Di Giacinto
b303805df9 fix marked 2024-08-21 13:02:19 +02:00
Ettore Di Giacinto
32d51797d9 fix alpine.js 2024-08-21 13:02:19 +02:00
Ettore Di Giacinto
af09b019ed fix(assets): generate assets on build time
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-08-21 13:02:19 +02:00
136 changed files with 1228 additions and 3701 deletions

View File

@@ -32,22 +32,18 @@ config_remote() {
}
# Setup special .ssh files
# Prints out lines of text to make things pretty
#
# Param 1: bash array, filenames relative to the customization directory that should be copied to ~/.ssh
setup_ssh() {
echo "starting ~/.ssh directory setup..."
mkdir -p "${HOME}.ssh"
chmod 0700 "${HOME}/.ssh"
echo "-----"
local files=("$@")
for file in "${files[@]}" ; do
for file in "${files[@]}"; then
local cfile="/devcontainer-customization/${file}"
local hfile="${HOME}/.ssh/${file}"
local hfile="~/.ssh/${file}"
if [ ! -f "${hfile}" ]; then
echo "copying \"${file}\""
echo "copying ${file}"
cp "${cfile}" "${hfile}"
chmod 600 "${hfile}"
fi
done
echo "~/.ssh directory setup complete!"
ls ~/.ssh
}

View File

@@ -56,7 +56,7 @@ jobs:
rm -rfv ${{ matrix.variable }}_message.txt
rm -rfv ${{ matrix.variable }}_commit.txt
- name: Create Pull Request
uses: peter-evans/create-pull-request@v7
uses: peter-evans/create-pull-request@v6
with:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI

View File

@@ -17,7 +17,7 @@ jobs:
run: |
bash .github/bump_docs.sh ${{ matrix.repository }}
- name: Create Pull Request
uses: peter-evans/create-pull-request@v7
uses: peter-evans/create-pull-request@v6
with:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI

View File

@@ -36,7 +36,7 @@ jobs:
sudo chmod 777 /hf_cache
bash .github/checksum_checker.sh gallery/index.yaml
- name: Create Pull Request
uses: peter-evans/create-pull-request@v7
uses: peter-evans/create-pull-request@v6
with:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI

View File

@@ -294,7 +294,7 @@ jobs:
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
export PATH=$PATH:$GOPATH/bin
export SKIP_GRPC_BACKEND=backend-assets/grpc/whisper
make dist
- uses: actions/upload-artifact@v4
with:
@@ -327,7 +327,7 @@ jobs:
cache: false
- name: Dependencies
run: |
brew install protobuf grpc libomp llvm
brew install protobuf grpc
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
- name: Build
@@ -336,7 +336,7 @@ jobs:
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
export PATH=$PATH:$GOPATH/bin
export CC=/opt/homebrew/opt/llvm/bin/clang
make dist
- uses: actions/upload-artifact@v4
with:

View File

@@ -18,7 +18,7 @@ jobs:
if: ${{ github.actor != 'dependabot[bot]' }}
- name: Run Gosec Security Scanner
if: ${{ github.actor != 'dependabot[bot]' }}
uses: securego/gosec@v2.21.0
uses: securego/gosec@master
with:
# we let the report trigger content trigger a failure using the GitHub Security features.
args: '-no-fail -fmt sarif -out results.sarif ./...'

View File

@@ -214,13 +214,12 @@ jobs:
run: go version
- name: Dependencies
run: |
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc
pip install --user --no-cache-dir grpcio-tools==1.64.1
- name: Test
run: |
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
export CC=/opt/homebrew/opt/llvm/bin/clang
# Used to run the newer GNUMake version from brew that supports --output-sync
export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test

View File

@@ -25,7 +25,7 @@ jobs:
run: |
make protogen-go swagger
- name: Create Pull Request
uses: peter-evans/create-pull-request@v7
uses: peter-evans/create-pull-request@v6
with:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI

View File

@@ -13,7 +13,7 @@ ARG TARGETARCH
ARG TARGETVARIANT
ENV DEBIAN_FRONTEND=noninteractive
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
RUN apt-get update && \
@@ -263,20 +263,14 @@ EOT
# In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
FROM builder-base AS builder-sd
# stablediffusion does not tolerate a newer version of abseil, copy only over enough elements to build it
COPY Makefile .
COPY go.mod .
COPY go.sum .
COPY backend/backend.proto ./backend/backend.proto
COPY backend/go/image/stablediffusion ./backend/go/image/stablediffusion
COPY pkg/grpc ./pkg/grpc
COPY pkg/stablediffusion ./pkg/stablediffusion
RUN git init
RUN make sources/go-stable-diffusion
RUN touch prepare-sources
COPY . .
COPY .git .
# Actually build the backend
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make backend-assets/grpc/stablediffusion
RUN make prepare
# stablediffusion does not tolerate a newer version of abseil, build it first
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
###################################
###################################
@@ -291,20 +285,8 @@ COPY --from=grpc /opt/grpc /usr/local
# Rebuild with defaults backends
WORKDIR /build
COPY . .
COPY .git .
RUN make prepare
## Build the binary
## If it's CUDA, we want to skip some of the llama-compat backends to save space
## We only leave the most CPU-optimized variant and the fallback for the cublas build
## (both will use CUDA for the actual computation)
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
else \
make build; \
fi
RUN make build
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
@@ -418,6 +400,9 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAG
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/transformers-musicgen \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama1" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/exllama \
; fi
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \

View File

@@ -8,7 +8,7 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=e6b7801bd189d102d901d3e72035611a25456ef1
CPPLLAMA_VERSION?=2f3c1466ff46a2413b0e363a5005c46538186ee6
# go-rwkv version
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
@@ -16,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=a551933542d956ae84634937acd2942eb40efaaf
WHISPER_CPP_VERSION?=d65786ea540a5aef21f67cacfa6f134097727780
# bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
@@ -338,7 +338,7 @@ rebuild: ## Rebuilds the project
$(MAKE) -C sources/go-tiny-dream clean
$(MAKE) build
prepare: prepare-sources $(OPTIONAL_TARGETS)
prepare: prepare-sources gen-assets $(OPTIONAL_TARGETS)
clean: ## Remove build related file
$(GOCMD) clean -cache
@@ -534,10 +534,10 @@ protogen-go-clean:
$(RM) bin/*
.PHONY: protogen-python
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
.PHONY: protogen-python-clean
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
.PHONY: autogptq-protogen
autogptq-protogen:
@@ -571,6 +571,14 @@ diffusers-protogen:
diffusers-protogen-clean:
$(MAKE) -C backend/python/diffusers protogen-clean
.PHONY: exllama-protogen
exllama-protogen:
$(MAKE) -C backend/python/exllama protogen
.PHONY: exllama-protogen-clean
exllama-protogen-clean:
$(MAKE) -C backend/python/exllama protogen-clean
.PHONY: exllama2-protogen
exllama2-protogen:
$(MAKE) -C backend/python/exllama2 protogen
@@ -667,6 +675,7 @@ prepare-extra-conda-environments: protogen-python
$(MAKE) -C backend/python/parler-tts
$(MAKE) -C backend/python/vall-e-x
$(MAKE) -C backend/python/openvoice
$(MAKE) -C backend/python/exllama
$(MAKE) -C backend/python/exllama2
prepare-test-extra: protogen-python
@@ -837,7 +846,7 @@ endif
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/whisper
endif

View File

@@ -40,7 +40,7 @@
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
>
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/go-skynet/LocalAI/tree/master/examples/)
> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
[![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai)
@@ -72,7 +72,6 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
- June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628

View File

@@ -1,6 +1,6 @@
name: stablediffusion
parameters:
model: Lykon/dreamshaper-8
model: runwayml/stable-diffusion-v1-5
backend: diffusers
step: 25
f16: true

View File

@@ -16,7 +16,6 @@ service Backend {
rpc GenerateImage(GenerateImageRequest) returns (Result) {}
rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
rpc TTS(TTSRequest) returns (Result) {}
rpc SoundGeneration(SoundGenerationRequest) returns (Result) {}
rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
rpc Status(HealthMessage) returns (StatusResponse) {}
@@ -271,17 +270,6 @@ message TTSRequest {
optional string language = 5;
}
message SoundGenerationRequest {
string text = 1;
string model = 2;
string dst = 3;
optional float duration = 4;
optional float temperature = 5;
optional bool sample = 6;
optional string src = 7;
optional int32 src_divisor = 8;
}
message TokenizationResponse {
int32 length = 1;
repeated int32 tokens = 2;

View File

@@ -17,10 +17,11 @@
#include "common.h"
#include "json.hpp"
#include "llama.h"
#include "grammar-parser.h"
#include "backend.pb.h"
#include "backend.grpc.pb.h"
#include "utils.hpp"
#include "sampling.h"
// include std::regex
#include <cstddef>
#include <thread>
@@ -202,8 +203,8 @@ struct llama_client_slot
std::string stopping_word;
// sampling
struct gpt_sampler_params sparams;
gpt_sampler *ctx_sampling = nullptr;
struct llama_sampling_params sparams;
llama_sampling_context *ctx_sampling = nullptr;
int32_t ga_i = 0; // group-attention state
int32_t ga_n = 1; // group-attention factor
@@ -618,7 +619,7 @@ struct llama_server_context
bool launch_slot_with_data(llama_client_slot* &slot, json data) {
slot_params default_params;
gpt_sampler_params default_sparams;
llama_sampling_params default_sparams;
slot->params.stream = json_value(data, "stream", false);
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
@@ -627,7 +628,7 @@ struct llama_server_context
slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
slot->sparams.typ_p = json_value(data, "typical_p", default_sparams.typ_p);
slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p);
slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
slot->sparams.dynatemp_exponent = json_value(data, "dynatemp_exponent", default_sparams.dynatemp_exponent);
@@ -640,7 +641,7 @@ struct llama_server_context
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
slot->sparams.seed = json_value(data, "seed", default_sparams.seed);
slot->params.seed = json_value(data, "seed", default_params.seed);
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
@@ -664,7 +665,6 @@ struct llama_server_context
slot->params.input_prefix = "";
}
if (data.count("input_suffix") != 0)
{
slot->params.input_suffix = data["input_suffix"];
@@ -683,10 +683,6 @@ struct llama_server_context
slot->prompt = "";
}
if (json_value(data, "ignore_eos", false)) {
slot->sparams.logit_bias.push_back({llama_token_eos(model), -INFINITY});
}
/*
slot->sparams.penalty_prompt_tokens.clear();
slot->sparams.use_penalty_prompt_tokens = false;
const auto &penalty_prompt = data.find("penalty_prompt");
@@ -722,10 +718,14 @@ struct llama_server_context
slot->sparams.use_penalty_prompt_tokens = true;
}
}
*/
slot->sparams.logit_bias.clear();
if (json_value(data, "ignore_eos", false))
{
slot->sparams.logit_bias[llama_token_eos(model)] = -INFINITY;
}
const auto &logit_bias = data.find("logit_bias");
if (logit_bias != data.end() && logit_bias->is_array())
{
@@ -753,7 +753,7 @@ struct llama_server_context
llama_token tok = el[0].get<llama_token>();
if (tok >= 0 && tok < n_vocab)
{
slot->sparams.logit_bias.push_back({tok, bias});
slot->sparams.logit_bias[tok] = bias;
}
}
else if (el[0].is_string())
@@ -761,13 +761,13 @@ struct llama_server_context
auto toks = llama_tokenize(model, el[0].get<std::string>(), false);
for (auto tok : toks)
{
slot->sparams.logit_bias.push_back({tok, bias});
slot->sparams.logit_bias[tok] = bias;
}
}
}
}
}
slot->params.antiprompt.clear();
const auto &stop = data.find("stop");
@@ -781,22 +781,24 @@ struct llama_server_context
}
}
}
const auto & samplers = data.find("samplers");
if (samplers != data.end() && samplers->is_array()) {
const auto &samplers_sequence = data.find("samplers");
if (samplers_sequence != data.end() && samplers_sequence->is_array())
{
std::vector<std::string> sampler_names;
for (const auto & name : *samplers) {
if (name.is_string()) {
sampler_names.emplace_back(name);
}
for (const auto &sampler_name : *samplers_sequence)
{
if (sampler_name.is_string())
{
sampler_names.emplace_back(sampler_name);
}
slot->sparams.samplers = gpt_sampler_types_from_names(sampler_names, false);
}
slot->sparams.samplers_sequence = llama_sampling_types_from_names(sampler_names, false);
}
else
{
slot->sparams.samplers = default_sparams.samplers;
slot->sparams.samplers_sequence = default_sparams.samplers_sequence;
}
if (multimodal)
{
@@ -873,10 +875,10 @@ struct llama_server_context
if (slot->ctx_sampling != nullptr)
{
gpt_sampler_free(slot->ctx_sampling);
llama_sampling_free(slot->ctx_sampling);
}
slot->ctx_sampling = gpt_sampler_init(model, slot->sparams);
//llama_set_rng_seed(ctx, slot->params.seed);
slot->ctx_sampling = llama_sampling_init(slot->sparams);
llama_set_rng_seed(ctx, slot->params.seed);
slot->command = LOAD_PROMPT;
all_slots_are_idle = false;
@@ -886,7 +888,7 @@ struct llama_server_context
{"task_id", slot->task_id},
});
// LOG_TEE("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
LOG_TEE("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
return true;
}
@@ -1004,13 +1006,11 @@ struct llama_server_context
slot.generated_text += token_str;
slot.has_next_token = true;
/*
if (slot.ctx_sampling->params.use_penalty_prompt_tokens && result.tok != -1)
{
// we can change penalty_prompt_tokens because it is always created from scratch each request
slot.ctx_sampling->params.penalty_prompt_tokens.push_back(result.tok);
}
*/
// check if there is incomplete UTF-8 character at the end
bool incomplete = false;
@@ -1119,7 +1119,7 @@ struct llama_server_context
continue;
}
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.cpuparams.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
LOG_TEE("Error processing the given image");
return false;
}
@@ -1144,11 +1144,13 @@ struct llama_server_context
json get_formated_generation(llama_client_slot &slot)
{
std::vector<std::string> samplers;
samplers.reserve(slot.sparams.samplers.size());
for (const auto & sampler : slot.sparams.samplers)
const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model));
const bool ignore_eos = eos_bias != slot.sparams.logit_bias.end() &&
eos_bias->second < 0.0f && std::isinf(eos_bias->second);
std::vector<std::string> samplers_sequence;
for (const auto &sampler_type : slot.sparams.samplers_sequence)
{
samplers.emplace_back(gpt_sampler_type_to_str(sampler));
samplers_sequence.emplace_back(llama_sampling_type_to_str(sampler_type));
}
return json {
@@ -1163,11 +1165,13 @@ struct llama_server_context
{"top_p", slot.sparams.top_p},
{"min_p", slot.sparams.min_p},
{"tfs_z", slot.sparams.tfs_z},
{"typical_p", slot.sparams.typ_p},
{"typical_p", slot.sparams.typical_p},
{"repeat_last_n", slot.sparams.penalty_last_n},
{"repeat_penalty", slot.sparams.penalty_repeat},
{"presence_penalty", slot.sparams.penalty_present},
{"frequency_penalty", slot.sparams.penalty_freq},
{"penalty_prompt_tokens", slot.sparams.penalty_prompt_tokens},
{"use_penalty_prompt_tokens", slot.sparams.use_penalty_prompt_tokens},
{"mirostat", slot.sparams.mirostat},
{"mirostat_tau", slot.sparams.mirostat_tau},
{"mirostat_eta", slot.sparams.mirostat_eta},
@@ -1175,13 +1179,13 @@ struct llama_server_context
{"stop", slot.params.antiprompt},
{"n_predict", slot.params.n_predict},
{"n_keep", params.n_keep},
{"ignore_eos", slot.sparams.ignore_eos},
{"ignore_eos", ignore_eos},
{"stream", slot.params.stream},
// {"logit_bias", slot.sparams.logit_bias},
{"logit_bias", slot.sparams.logit_bias},
{"n_probs", slot.sparams.n_probs},
{"min_keep", slot.sparams.min_keep},
{"grammar", slot.sparams.grammar},
{"samplers", samplers}
{"samplers", samplers_sequence}
};
}
@@ -1710,7 +1714,7 @@ struct llama_server_context
if (!slot.params.cache_prompt)
{
gpt_sampler_reset(slot.ctx_sampling);
llama_sampling_reset(slot.ctx_sampling);
slot.n_past = 0;
slot.n_past_se = 0;
@@ -1722,7 +1726,7 @@ struct llama_server_context
// push the prompt into the sampling context (do not apply grammar)
for (auto &token : prompt_tokens)
{
gpt_sampler_accept(slot.ctx_sampling, token, false);
llama_sampling_accept(slot.ctx_sampling, ctx, token, false);
}
slot.n_past = common_part(slot.cache_tokens, prompt_tokens);
@@ -1930,9 +1934,9 @@ struct llama_server_context
}
completion_token_output result;
const llama_token id = gpt_sampler_sample(slot.ctx_sampling, ctx, slot.i_batch - i);
const llama_token id = llama_sampling_sample(slot.ctx_sampling, ctx, NULL, slot.i_batch - i);
gpt_sampler_accept(slot.ctx_sampling, id, true);
llama_sampling_accept(slot.ctx_sampling, ctx, id, true);
slot.n_decoded += 1;
if (slot.n_decoded == 1)
@@ -1942,14 +1946,19 @@ struct llama_server_context
metrics.on_prompt_eval(slot);
}
llama_token_data_array cur_p = { slot.ctx_sampling->cur.data(), slot.ctx_sampling->cur.size(), false };
result.tok = id;
const auto * cur_p = gpt_sampler_get_candidates(slot.ctx_sampling);
for (size_t i = 0; i < (size_t) slot.sparams.n_probs; ++i) {
result.probs.push_back({
cur_p->data[i].id,
i >= cur_p->size ? 0.0f : cur_p->data[i].p,
});
const int32_t n_probs = slot.sparams.n_probs;
if (slot.sparams.temp <= 0 && n_probs > 0)
{
// for llama_sample_token_greedy we need to sort candidates
llama_sample_softmax(ctx, &cur_p);
}
for (size_t i = 0; i < std::min(cur_p.size, (size_t)n_probs); ++i)
{
result.probs.push_back({cur_p.data[i].id, cur_p.data[i].p});
}
if (!process_token(result, slot))
@@ -2201,7 +2210,7 @@ static void params_parse(const backend::ModelOptions* request,
params.model_alias = request->modelfile();
params.n_ctx = request->contextsize();
//params.memory_f16 = request->f16memory();
params.cpuparams.n_threads = request->threads();
params.n_threads = request->threads();
params.n_gpu_layers = request->ngpulayers();
params.n_batch = request->nbatch();
// Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1

View File

@@ -1,13 +0,0 @@
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
index 342042ff..224db9b5 100644
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -2419,7 +2419,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
int* patches_data = (int*)malloc(ggml_nbytes(patches));
for (int i = 0; i < num_patches; i++) {
- patches_data[i] = i + 1;
+ patches_data[i] = i;
}
ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
free(patches_data);

View File

@@ -1,12 +1,5 @@
#!/bin/bash
## Patches
## Apply patches from the `patches` directory
for patch in $(ls patches); do
echo "Applying patch $patch"
patch -d llama.cpp/ -p1 < patches/$patch
done
cp -r CMakeLists.txt llama.cpp/examples/grpc-server/
cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
cp -rfv json.hpp llama.cpp/examples/grpc-server/

View File

@@ -480,4 +480,31 @@ static inline std::vector<uint8_t> base64_decode(const std::string & encoded_str
}
return ret;
}
//
// random string / id
//
static std::string random_string()
{
static const std::string str("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
std::random_device rd;
std::mt19937 generator(rd());
std::string result(32, ' ');
for (int i = 0; i < 32; ++i) {
result[i] = str[generator() % str.size()];
}
return result;
}
static std::string gen_chatcmplid()
{
std::stringstream chatcmplid;
chatcmplid << "chatcmpl-" << random_string();
return chatcmplid.str();
}

View File

@@ -0,0 +1,104 @@
package main
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
"github.com/go-audio/wav"
"github.com/mudler/LocalAI/core/schema"
)
func ffmpegCommand(args []string) (string, error) {
cmd := exec.Command("ffmpeg", args...) // Constrain this to ffmpeg to permit security scanner to see that the command is safe.
cmd.Env = os.Environ()
out, err := cmd.CombinedOutput()
return string(out), err
}
// AudioToWav converts audio to wav for transcribe.
// TODO: use https://github.com/mccoyst/ogg?
func audioToWav(src, dst string) error {
commandArgs := []string{"-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst}
out, err := ffmpegCommand(commandArgs)
if err != nil {
return fmt.Errorf("error: %w out: %s", err, out)
}
return nil
}
func Transcript(model whisper.Model, audiopath, language string, translate bool, threads uint) (schema.TranscriptionResult, error) {
res := schema.TranscriptionResult{}
dir, err := os.MkdirTemp("", "whisper")
if err != nil {
return res, err
}
defer os.RemoveAll(dir)
convertedPath := filepath.Join(dir, "converted.wav")
if err := audioToWav(audiopath, convertedPath); err != nil {
return res, err
}
// Open samples
fh, err := os.Open(convertedPath)
if err != nil {
return res, err
}
defer fh.Close()
// Read samples
d := wav.NewDecoder(fh)
buf, err := d.FullPCMBuffer()
if err != nil {
return res, err
}
data := buf.AsFloat32Buffer().Data
// Process samples
context, err := model.NewContext()
if err != nil {
return res, err
}
context.SetThreads(threads)
if language != "" {
context.SetLanguage(language)
} else {
context.SetLanguage("auto")
}
if translate {
context.SetTranslate(true)
}
if err := context.Process(data, nil, nil); err != nil {
return res, err
}
for {
s, err := context.NextSegment()
if err != nil {
break
}
var tokens []int
for _, t := range s.Tokens {
tokens = append(tokens, t.Id)
}
segment := schema.Segment{Id: s.Num, Text: s.Text, Start: s.Start, End: s.End, Tokens: tokens}
res.Segments = append(res.Segments, segment)
res.Text += s.Text
}
return res, nil
}

View File

@@ -0,0 +1,26 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
)
type Whisper struct {
base.SingleThread
whisper whisper.Model
}
func (sd *Whisper) Load(opts *pb.ModelOptions) error {
// Note: the Model here is a path to a directory containing the model files
w, err := whisper.New(opts.ModelFile)
sd.whisper = w
return err
}
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) {
return Transcript(sd.whisper, opts.Dst, opts.Language, opts.Translate, uint(opts.Threads))
}

View File

@@ -1,105 +0,0 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"os"
"path/filepath"
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
"github.com/go-audio/wav"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/utils"
)
type Whisper struct {
base.SingleThread
whisper whisper.Model
}
func (sd *Whisper) Load(opts *pb.ModelOptions) error {
// Note: the Model here is a path to a directory containing the model files
w, err := whisper.New(opts.ModelFile)
sd.whisper = w
return err
}
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (pb.TranscriptResult, error) {
dir, err := os.MkdirTemp("", "whisper")
if err != nil {
return pb.TranscriptResult{}, err
}
defer os.RemoveAll(dir)
convertedPath := filepath.Join(dir, "converted.wav")
if err := utils.AudioToWav(opts.Dst, convertedPath); err != nil {
return pb.TranscriptResult{}, err
}
// Open samples
fh, err := os.Open(convertedPath)
if err != nil {
return pb.TranscriptResult{}, err
}
defer fh.Close()
// Read samples
d := wav.NewDecoder(fh)
buf, err := d.FullPCMBuffer()
if err != nil {
return pb.TranscriptResult{}, err
}
data := buf.AsFloat32Buffer().Data
// Process samples
context, err := sd.whisper.NewContext()
if err != nil {
return pb.TranscriptResult{}, err
}
context.SetThreads(uint(opts.Threads))
if opts.Language != "" {
context.SetLanguage(opts.Language)
} else {
context.SetLanguage("auto")
}
if opts.Translate {
context.SetTranslate(true)
}
if err := context.Process(data, nil, nil); err != nil {
return pb.TranscriptResult{}, err
}
segments := []*pb.TranscriptSegment{}
text := ""
for {
s, err := context.NextSegment()
if err != nil {
break
}
var tokens []int32
for _, t := range s.Tokens {
tokens = append(tokens, int32(t.Id))
}
segment := &pb.TranscriptSegment{Id: int32(s.Num), Text: s.Text, Start: int64(s.Start), End: int64(s.End), Tokens: tokens}
segments = append(segments, segment)
text += s.Text
}
return pb.TranscriptResult{
Segments: segments,
Text: text,
}, nil
}

View File

@@ -1,6 +1,6 @@
accelerate
auto-gptq==0.7.1
grpcio==1.66.1
grpcio==1.65.4
protobuf
certifi
transformers

View File

@@ -1,4 +1,4 @@
bark==0.1.5
grpcio==1.66.1
grpcio==1.65.5
protobuf
certifi

View File

@@ -1,2 +1,2 @@
grpcio==1.66.1
grpcio==1.65.5
protobuf

View File

@@ -1,4 +1,4 @@
TTS==0.22.0
grpcio==1.66.1
grpcio==1.65.5
protobuf
certifi

View File

@@ -168,7 +168,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if request.CFGScale != 0:
self.cfg_scale = request.CFGScale
clipmodel = "Lykon/dreamshaper-8"
clipmodel = "runwayml/stable-diffusion-v1-5"
if request.CLIPModel != "":
clipmodel = request.CLIPModel
clipsubfolder = "text_encoder"

View File

@@ -1,5 +1,5 @@
setuptools
grpcio==1.66.1
grpcio==1.65.4
pillow
protobuf
certifi

View File

@@ -53,7 +53,7 @@ class TestBackendServicer(unittest.TestCase):
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.LoadModel(backend_pb2.ModelOptions(Model="Lykon/dreamshaper-8"))
response = stub.LoadModel(backend_pb2.ModelOptions(Model="runwayml/stable-diffusion-v1-5"))
self.assertTrue(response.success)
self.assertEqual(response.message, "Model loaded successfully")
except Exception as err:
@@ -71,7 +71,7 @@ class TestBackendServicer(unittest.TestCase):
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.LoadModel(backend_pb2.ModelOptions(Model="Lykon/dreamshaper-8"))
response = stub.LoadModel(backend_pb2.ModelOptions(Model="runwayml/stable-diffusion-v1-5"))
print(response.message)
self.assertTrue(response.success)
image_req = backend_pb2.GenerateImageRequest(positive_prompt="cat", width=16,height=16, dst="test.jpg")
@@ -81,4 +81,4 @@ class TestBackendServicer(unittest.TestCase):
print(err)
self.fail("Image gen service failed")
finally:
self.tearDown()
self.tearDown()

1
backend/python/exllama/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
source

View File

@@ -0,0 +1,25 @@
export CONDA_ENV_PATH = "exllama.yml"
.PHONY: exllama
exllama: protogen
bash install.sh ${CONDA_ENV_PATH}
.PHONY: run
run: protogen
@echo "Running exllama..."
bash run.sh
@echo "exllama run."
.PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py
.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
backend_pb2_grpc.py backend_pb2.py:
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
.PHONY: clean
clean: protogen-clean
$(RM) -r venv source __pycache__

View File

@@ -0,0 +1,5 @@
# Creating a separate environment for the exllama project
```
make exllama
```

159
backend/python/exllama/backend.py Executable file
View File

@@ -0,0 +1,159 @@
#!/usr/bin/env python3
import grpc
from concurrent import futures
import time
import backend_pb2
import backend_pb2_grpc
import argparse
import signal
import sys
import os, glob
from pathlib import Path
import torch
import torch.nn.functional as F
from torch import version as torch_version
from source.tokenizer import ExLlamaTokenizer
from source.generator import ExLlamaGenerator
from source.model import ExLlama, ExLlamaCache, ExLlamaConfig
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
# Implement the BackendServicer class with the service methods
class BackendServicer(backend_pb2_grpc.BackendServicer):
def generate(self,prompt, max_new_tokens):
self.generator.end_beam_search()
# Tokenizing the input
ids = self.generator.tokenizer.encode(prompt)
self.generator.gen_begin_reuse(ids)
initial_len = self.generator.sequence[0].shape[0]
has_leading_space = False
decoded_text = ''
for i in range(max_new_tokens):
token = self.generator.gen_single_token()
if i == 0 and self.generator.tokenizer.tokenizer.IdToPiece(int(token)).startswith(''):
has_leading_space = True
decoded_text = self.generator.tokenizer.decode(self.generator.sequence[0][initial_len:])
if has_leading_space:
decoded_text = ' ' + decoded_text
if token.item() == self.generator.tokenizer.eos_token_id:
break
return decoded_text
def Health(self, request, context):
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
def LoadModel(self, request, context):
try:
# https://github.com/turboderp/exllama/blob/master/example_cfg.py
model_directory = request.ModelFile
# Locate files we need within that directory
tokenizer_path = os.path.join(model_directory, "tokenizer.model")
model_config_path = os.path.join(model_directory, "config.json")
st_pattern = os.path.join(model_directory, "*.safetensors")
model_path = glob.glob(st_pattern)[0]
# Create config, model, tokenizer and generator
config = ExLlamaConfig(model_config_path) # create config from config.json
config.model_path = model_path # supply path to model weights file
if (request.ContextSize):
config.max_seq_len = request.ContextSize # override max sequence length
config.max_attention_size = request.ContextSize**2 # Should be set to context_size^2.
# https://github.com/turboderp/exllama/issues/220#issuecomment-1720324163
# Set Rope scaling.
if (request.RopeFreqScale):
# Alpha value for Rope scaling.
# Higher value increases context but adds perplexity.
# alpha_value and compress_pos_emb are mutually exclusive.
# https://github.com/turboderp/exllama/issues/115
config.alpha_value = request.RopeFreqScale
config.calculate_rotary_embedding_base()
model = ExLlama(config) # create ExLlama instance and load the weights
tokenizer = ExLlamaTokenizer(tokenizer_path) # create tokenizer from tokenizer model file
cache = ExLlamaCache(model, batch_size = 2) # create cache for inference
generator = ExLlamaGenerator(model, tokenizer, cache) # create generator
self.generator= generator
self.model = model
self.tokenizer = tokenizer
self.cache = cache
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
return backend_pb2.Result(message="Model loaded successfully", success=True)
def Predict(self, request, context):
penalty = 1.15
if request.Penalty != 0.0:
penalty = request.Penalty
self.generator.settings.token_repetition_penalty_max = penalty
self.generator.settings.temperature = request.Temperature
self.generator.settings.top_k = request.TopK
self.generator.settings.top_p = request.TopP
tokens = 512
if request.Tokens != 0:
tokens = request.Tokens
if self.cache.batch_size == 1:
del self.cache
self.cache = ExLlamaCache(self.model, batch_size=2)
self.generator = ExLlamaGenerator(self.model, self.tokenizer, self.cache)
t = self.generate(request.Prompt, tokens)
# Remove prompt from response if present
if request.Prompt in t:
t = t.replace(request.Prompt, "")
return backend_pb2.Result(message=bytes(t, encoding='utf-8'))
def PredictStream(self, request, context):
# Implement PredictStream RPC
#for reply in some_data_generator():
# yield reply
# Not implemented yet
return self.Predict(request, context)
def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
server.start()
print("Server started. Listening on: " + address, file=sys.stderr)
# Define the signal handler function
def signal_handler(sig, frame):
print("Received termination signal. Shutting down...")
server.stop(0)
sys.exit(0)
# Set the signal handlers for SIGINT and SIGTERM
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
try:
while True:
time.sleep(_ONE_DAY_IN_SECONDS)
except KeyboardInterrupt:
server.stop(0)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the gRPC server.")
parser.add_argument(
"--addr", default="localhost:50051", help="The address to bind the server to."
)
args = parser.parse_args()
serve(args.addr)

View File

@@ -0,0 +1,13 @@
#!/bin/bash
set -e
LIMIT_TARGETS="cublas"
source $(dirname $0)/../common/libbackend.sh
installRequirements
git clone https://github.com/turboderp/exllama $MY_DIR/source
uv pip install ${BUILD_ISOLATION_FLAG} --requirement ${MY_DIR}/source/requirements.txt
cp -v ./*py $MY_DIR/source/

View File

@@ -0,0 +1,3 @@
transformers
accelerate
torch

View File

@@ -0,0 +1,4 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch
transformers
accelerate

View File

@@ -0,0 +1,3 @@
torch
transformers
accelerate

View File

@@ -0,0 +1,4 @@
grpcio==1.65.5
protobuf
certifi
setuptools

7
backend/python/exllama/run.sh Executable file
View File

@@ -0,0 +1,7 @@
#!/bin/bash
LIMIT_TARGETS="cublas"
BACKEND_FILE="${MY_DIR}/source/backend.py"
source $(dirname $0)/../common/libbackend.sh
startBackend $@

6
backend/python/exllama/test.sh Executable file
View File

@@ -0,0 +1,6 @@
#!/bin/bash
set -e
source $(dirname $0)/../common/libbackend.sh
runUnittests

View File

@@ -1,4 +1,4 @@
grpcio==1.66.1
grpcio==1.65.4
protobuf
certifi
wheel

View File

@@ -1,3 +1,3 @@
grpcio==1.66.1
grpcio==1.65.5
protobuf
certifi

View File

@@ -2,7 +2,7 @@
intel-extension-for-pytorch
torch
optimum[openvino]
grpcio==1.66.1
grpcio==1.65.5
protobuf
librosa==0.9.1
faster-whisper==1.0.3

View File

@@ -1,4 +1,4 @@
grpcio==1.66.1
grpcio==1.65.5
protobuf
librosa
faster-whisper

View File

@@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
torch==2.3.0+rocm6.0
torchaudio==2.3.0+rocm6.0
torch
torchaudio
transformers
accelerate
accelerate

View File

@@ -1,4 +1,4 @@
grpcio==1.66.1
grpcio==1.65.5
protobuf
certifi
llvmlite==0.43.0

View File

@@ -1,3 +1,3 @@
grpcio==1.66.1
grpcio==1.65.4
protobuf
certifi

View File

@@ -1,3 +1,3 @@
grpcio==1.66.1
grpcio==1.65.5
protobuf
certifi

View File

@@ -15,7 +15,7 @@ import backend_pb2_grpc
import grpc
from scipy.io import wavfile
from scipy.io.wavfile import write as write_wav
from transformers import AutoProcessor, MusicgenForConditionalGeneration
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
@@ -63,61 +63,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
return backend_pb2.Result(message="Model loaded successfully", success=True)
def SoundGeneration(self, request, context):
model_name = request.model
if model_name == "":
return backend_pb2.Result(success=False, message="request.model is required")
try:
self.processor = AutoProcessor.from_pretrained(model_name)
self.model = MusicgenForConditionalGeneration.from_pretrained(model_name)
inputs = None
if request.text == "":
inputs = self.model.get_unconditional_inputs(num_samples=1)
elif request.HasField('src'):
# TODO SECURITY CODE GOES HERE LOL
# WHO KNOWS IF THIS WORKS???
sample_rate, wsamples = wavfile.read('path_to_your_file.wav')
if request.HasField('src_divisor'):
wsamples = wsamples[: len(wsamples) // request.src_divisor]
inputs = self.processor(
audio=wsamples,
sampling_rate=sample_rate,
text=[request.text],
padding=True,
return_tensors="pt",
)
else:
inputs = self.processor(
text=[request.text],
padding=True,
return_tensors="pt",
)
tokens = 256
if request.HasField('duration'):
tokens = int(request.duration * 51.2) # 256 tokens = 5 seconds, therefore 51.2 tokens is one second
guidance = 3.0
if request.HasField('temperature'):
guidance = request.temperature
dosample = True
if request.HasField('sample'):
dosample = request.sample
audio_values = self.model.generate(**inputs, do_sample=dosample, guidance_scale=guidance, max_new_tokens=tokens)
print("[transformers-musicgen] SoundGeneration generated!", file=sys.stderr)
sampling_rate = self.model.config.audio_encoder.sampling_rate
wavfile.write(request.dst, rate=sampling_rate, data=audio_values[0, 0].numpy())
print("[transformers-musicgen] SoundGeneration saved to", request.dst, file=sys.stderr)
print("[transformers-musicgen] SoundGeneration for", file=sys.stderr)
print("[transformers-musicgen] SoundGeneration requested tokens", tokens, file=sys.stderr)
print(request, file=sys.stderr)
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
return backend_pb2.Result(success=True)
# The TTS endpoint is older, and provides fewer features, but exists for compatibility reasons
def TTS(self, request, context):
model_name = request.model
if model_name == "":
@@ -130,7 +75,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
padding=True,
return_tensors="pt",
)
tokens = 512 # No good place to set the "length" in TTS, so use 10s as a sane default
tokens = 256
# TODO get tokens from request?
audio_values = self.model.generate(**inputs, max_new_tokens=tokens)
print("[transformers-musicgen] TTS generated!", file=sys.stderr)
sampling_rate = self.model.config.audio_encoder.sampling_rate

View File

@@ -1,4 +1,4 @@
grpcio==1.66.1
grpcio==1.65.5
protobuf
scipy==1.14.0
certifi

View File

@@ -63,7 +63,7 @@ class TestBackendServicer(unittest.TestCase):
def test_tts(self):
"""
This method tests if TTS is generated successfully
This method tests if the embeddings are generated successfully
"""
try:
self.setUp()
@@ -77,24 +77,5 @@ class TestBackendServicer(unittest.TestCase):
except Exception as err:
print(err)
self.fail("TTS service failed")
finally:
self.tearDown()
def test_sound_generation(self):
"""
This method tests if SoundGeneration is generated successfully
"""
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small"))
self.assertTrue(response.success)
sg_request = backend_pb2.SoundGenerationRequest(text="80s TV news production music hit for tonight's biggest story")
sg_response = stub.SoundGeneration(sg_request)
self.assertIsNotNone(sg_response)
except Exception as err:
print(err)
self.fail("SoundGeneration service failed")
finally:
self.tearDown()

View File

@@ -1,4 +1,4 @@
grpcio==1.66.1
grpcio==1.65.5
protobuf
certifi
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,3 +1,3 @@
grpcio==1.66.1
grpcio==1.65.5
protobuf
certifi

View File

@@ -135,26 +135,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
res = await gen.__anext__()
return res
def Embedding(self, request, context):
"""
A gRPC method that calculates embeddings for a given sentence.
Args:
request: An EmbeddingRequest object that contains the request parameters.
context: A grpc.ServicerContext object that provides information about the RPC.
Returns:
An EmbeddingResult object that contains the calculated embeddings.
"""
print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr)
outputs = self.model.encode(request.Embeddings)
# Check if we have one result at least
if len(outputs) == 0:
context.set_code(grpc.StatusCode.INVALID_ARGUMENT)
context.set_details("No embeddings were calculated.")
return backend_pb2.EmbeddingResult()
return backend_pb2.EmbeddingResult(embeddings=outputs[0].outputs.embedding)
async def PredictStream(self, request, context):
"""
Generates text based on the given prompt and sampling parameters, and streams the results.

View File

@@ -1,4 +1,4 @@
grpcio==1.66.1
grpcio==1.65.5
protobuf
certifi
setuptools

View File

@@ -72,28 +72,5 @@ class TestBackendServicer(unittest.TestCase):
except Exception as err:
print(err)
self.fail("text service failed")
finally:
self.tearDown()
def test_embedding(self):
"""
This method tests if the embeddings are generated successfully
"""
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.LoadModel(backend_pb2.ModelOptions(Model="intfloat/e5-mistral-7b-instruct"))
self.assertTrue(response.success)
embedding_request = backend_pb2.PredictOptions(Embeddings="This is a test sentence.")
embedding_response = stub.Embedding(embedding_request)
self.assertIsNotNone(embedding_response.embeddings)
# assert that is a list of floats
self.assertIsInstance(embedding_response.embeddings, list)
# assert that the list is not empty
self.assertTrue(len(embedding_response.embeddings) > 0)
except Exception as err:
print(err)
self.fail("Embedding service failed")
finally:
self.tearDown()

View File

@@ -1,13 +0,0 @@
package backend_test
import (
"testing"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
func TestBackend(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Backend test suite")
}

View File

@@ -9,8 +9,6 @@ import (
"sync"
"unicode/utf8"
"github.com/rs/zerolog/log"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
@@ -89,7 +87,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
case string:
protoMessages[i].Content = ct
default:
return nil, fmt.Errorf("unsupported type for schema.Message.Content for inference: %T", ct)
return nil, fmt.Errorf("Unsupported type for schema.Message.Content for inference: %T", ct)
}
}
}
@@ -183,37 +181,13 @@ func Finetune(config config.BackendConfig, input, prediction string) string {
mu.Lock()
reg, ok := cutstrings[c]
if !ok {
r, err := regexp.Compile(c)
if err != nil {
log.Fatal().Err(err).Msg("failed to compile regex")
}
cutstrings[c] = r
cutstrings[c] = regexp.MustCompile(c)
reg = cutstrings[c]
}
mu.Unlock()
prediction = reg.ReplaceAllString(prediction, "")
}
// extract results from the response which can be for instance inside XML tags
var predResult string
for _, r := range config.ExtractRegex {
mu.Lock()
reg, ok := cutstrings[r]
if !ok {
regex, err := regexp.Compile(r)
if err != nil {
log.Fatal().Err(err).Msg("failed to compile regex")
}
cutstrings[r] = regex
reg = regex
}
mu.Unlock()
predResult += reg.FindString(prediction)
}
if predResult != "" {
prediction = predResult
}
for _, c := range config.TrimSpace {
prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
}

View File

@@ -1,109 +0,0 @@
package backend_test
import (
. "github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("LLM tests", func() {
Context("Finetune LLM output", func() {
var (
testConfig config.BackendConfig
input string
prediction string
result string
)
BeforeEach(func() {
testConfig = config.BackendConfig{
PredictionOptions: schema.PredictionOptions{
Echo: false,
},
LLMConfig: config.LLMConfig{
Cutstrings: []string{`<.*?>`}, // Example regex for removing XML tags
ExtractRegex: []string{`<result>(.*?)</result>`}, // Example regex to extract from tags
TrimSpace: []string{" ", "\n"},
TrimSuffix: []string{".", "!"},
},
}
})
Context("when echo is enabled", func() {
BeforeEach(func() {
testConfig.Echo = true
input = "Hello"
prediction = "World"
})
It("should prepend input to prediction", func() {
result = Finetune(testConfig, input, prediction)
Expect(result).To(Equal("HelloWorld"))
})
})
Context("when echo is disabled", func() {
BeforeEach(func() {
testConfig.Echo = false
input = "Hello"
prediction = "World"
})
It("should not modify the prediction with input", func() {
result = Finetune(testConfig, input, prediction)
Expect(result).To(Equal("World"))
})
})
Context("when cutstrings regex is applied", func() {
BeforeEach(func() {
input = ""
prediction = "<div>Hello</div> World"
})
It("should remove substrings matching cutstrings regex", func() {
result = Finetune(testConfig, input, prediction)
Expect(result).To(Equal("Hello World"))
})
})
Context("when extract regex is applied", func() {
BeforeEach(func() {
input = ""
prediction = "<response><result>42</result></response>"
})
It("should extract substrings matching the extract regex", func() {
result = Finetune(testConfig, input, prediction)
Expect(result).To(Equal("42"))
})
})
Context("when trimming spaces", func() {
BeforeEach(func() {
input = ""
prediction = " Hello World "
})
It("should trim spaces from the prediction", func() {
result = Finetune(testConfig, input, prediction)
Expect(result).To(Equal("Hello World"))
})
})
Context("when trimming suffixes", func() {
BeforeEach(func() {
input = ""
prediction = "Hello World."
})
It("should trim suffixes from the prediction", func() {
result = Finetune(testConfig, input, prediction)
Expect(result).To(Equal("Hello World"))
})
})
})
})

View File

@@ -1,74 +0,0 @@
package backend
import (
"context"
"fmt"
"os"
"path/filepath"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/utils"
)
func SoundGeneration(
backend string,
modelFile string,
text string,
duration *float32,
temperature *float32,
doSample *bool,
sourceFile *string,
sourceDivisor *int32,
loader *model.ModelLoader,
appConfig *config.ApplicationConfig,
backendConfig config.BackendConfig,
) (string, *proto.Result, error) {
if backend == "" {
return "", nil, fmt.Errorf("backend is a required parameter")
}
grpcOpts := gRPCModelOpts(backendConfig)
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
model.WithBackendString(backend),
model.WithModel(modelFile),
model.WithContext(appConfig.Context),
model.WithAssetDir(appConfig.AssetsDestination),
model.WithLoadGRPCLoadModelOpts(grpcOpts),
})
soundGenModel, err := loader.BackendLoader(opts...)
if err != nil {
return "", nil, err
}
if soundGenModel == nil {
return "", nil, fmt.Errorf("could not load sound generation model")
}
if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil {
return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
}
fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "sound_generation", ".wav")
filePath := filepath.Join(appConfig.AudioDir, fileName)
res, err := soundGenModel.SoundGeneration(context.Background(), &proto.SoundGenerationRequest{
Text: text,
Model: modelFile,
Dst: filePath,
Sample: doSample,
Duration: duration,
Temperature: temperature,
Src: sourceFile,
SrcDivisor: sourceDivisor,
})
// return RPC error if any
if !res.Success {
return "", nil, fmt.Errorf(res.Message)
}
return filePath, res, err
}

View File

@@ -3,13 +3,12 @@ package backend
import (
"context"
"fmt"
"time"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/model"
model "github.com/mudler/LocalAI/pkg/model"
)
func ModelTranscription(audio, language string, translate bool, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
@@ -22,40 +21,19 @@ func ModelTranscription(audio, language string, translate bool, ml *model.ModelL
model.WithAssetDir(appConfig.AssetsDestination),
})
transcriptionModel, err := ml.BackendLoader(opts...)
whisperModel, err := ml.BackendLoader(opts...)
if err != nil {
return nil, err
}
if transcriptionModel == nil {
return nil, fmt.Errorf("could not load transcription model")
if whisperModel == nil {
return nil, fmt.Errorf("could not load whisper model")
}
r, err := transcriptionModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{
return whisperModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{
Dst: audio,
Language: language,
Translate: translate,
Threads: uint32(*backendConfig.Threads),
})
if err != nil {
return nil, err
}
tr := &schema.TranscriptionResult{
Text: r.Text,
}
for _, s := range r.Segments {
var tks []int
for _, t := range s.Tokens {
tks = append(tks, int(t))
}
tr.Segments = append(tr.Segments,
schema.Segment{
Text: s.Text,
Id: int(s.Id),
Start: time.Duration(s.Start),
End: time.Duration(s.End),
Tokens: tks,
})
}
return tr, err
}

View File

@@ -9,15 +9,31 @@ import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/model"
model "github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/utils"
)
func generateUniqueFileName(dir, baseName, ext string) string {
counter := 1
fileName := baseName + ext
for {
filePath := filepath.Join(dir, fileName)
_, err := os.Stat(filePath)
if os.IsNotExist(err) {
return fileName
}
counter++
fileName = fmt.Sprintf("%s_%d%s", baseName, counter, ext)
}
}
func ModelTTS(
backend,
text,
modelFile,
voice,
voice ,
language string,
loader *model.ModelLoader,
appConfig *config.ApplicationConfig,
@@ -50,7 +66,7 @@ func ModelTTS(
return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
}
fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "tts", ".wav")
fileName := generateUniqueFileName(appConfig.AudioDir, "tts", ".wav")
filePath := filepath.Join(appConfig.AudioDir, fileName)
// If the model file is not empty, we pass it joined with the model path
@@ -72,15 +88,12 @@ func ModelTTS(
}
res, err := ttsModel.TTS(context.Background(), &proto.TTSRequest{
Text: text,
Model: modelPath,
Voice: voice,
Dst: filePath,
Text: text,
Model: modelPath,
Voice: voice,
Dst: filePath,
Language: &language,
})
if err != nil {
return "", nil, err
}
// return RPC error if any
if !res.Success {

View File

@@ -1,80 +0,0 @@
package cli_api
import (
"context"
"fmt"
"net"
"os"
"strings"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/edgevpn/pkg/node"
"github.com/rs/zerolog/log"
)
func StartP2PStack(ctx context.Context, address, token, networkID string, federated bool) error {
var n *node.Node
// Here we are avoiding creating multiple nodes:
// - if the federated mode is enabled, we create a federated node and expose a service
// - exposing a service creates a node with specific options, and we don't want to create another node
// If the federated mode is enabled, we expose a service to the local instance running
// at r.Address
if federated {
_, port, err := net.SplitHostPort(address)
if err != nil {
return err
}
// Here a new node is created and started
// and a service is exposed by the node
node, err := p2p.ExposeService(ctx, "localhost", port, token, p2p.NetworkID(networkID, p2p.FederatedID))
if err != nil {
return err
}
if err := p2p.ServiceDiscoverer(ctx, node, token, p2p.NetworkID(networkID, p2p.FederatedID), nil, false); err != nil {
return err
}
n = node
}
// If the p2p mode is enabled, we start the service discovery
if token != "" {
// If a node wasn't created previously, create it
if n == nil {
node, err := p2p.NewNode(token)
if err != nil {
return err
}
err = node.Start(ctx)
if err != nil {
return fmt.Errorf("starting new node: %w", err)
}
n = node
}
// Attach a ServiceDiscoverer to the p2p node
log.Info().Msg("Starting P2P server discovery...")
if err := p2p.ServiceDiscoverer(ctx, n, token, p2p.NetworkID(networkID, p2p.WorkerID), func(serviceID string, node p2p.NodeData) {
var tunnelAddresses []string
for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(networkID, p2p.WorkerID)) {
if v.IsOnline() {
tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
} else {
log.Info().Msgf("Node %s is offline", v.ID)
}
}
tunnelEnvVar := strings.Join(tunnelAddresses, ",")
os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
}, true); err != nil {
return err
}
}
return nil
}

View File

@@ -8,13 +8,12 @@ import (
var CLI struct {
cliContext.Context `embed:""`
Run RunCMD `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"`
Federated FederatedCLI `cmd:"" help:"Run LocalAI in federated mode"`
Models ModelsCMD `cmd:"" help:"Manage LocalAI models and definitions"`
TTS TTSCMD `cmd:"" help:"Convert text to speech"`
SoundGeneration SoundGenerationCMD `cmd:"" help:"Generates audio files from text or audio"`
Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"`
Worker worker.Worker `cmd:"" help:"Run workers to distribute workload (llama.cpp-only)"`
Util UtilCMD `cmd:"" help:"Utility commands"`
Explorer ExplorerCMD `cmd:"" help:"Run p2p explorer"`
Run RunCMD `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"`
Federated FederatedCLI `cmd:"" help:"Run LocalAI in federated mode"`
Models ModelsCMD `cmd:"" help:"Manage LocalAI models and definitions"`
TTS TTSCMD `cmd:"" help:"Convert text to speech"`
Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"`
Worker worker.Worker `cmd:"" help:"Run workers to distribute workload (llama.cpp-only)"`
Util UtilCMD `cmd:"" help:"Utility commands"`
Explorer ExplorerCMD `cmd:"" help:"Run p2p explorer"`
}

View File

@@ -3,10 +3,11 @@ package cli
import (
"context"
"fmt"
"net"
"os"
"strings"
"time"
cli_api "github.com/mudler/LocalAI/core/cli/api"
cliContext "github.com/mudler/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http"
@@ -52,8 +53,6 @@ type RunCMD struct {
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
Peer2PeerDHTInterval int `env:"LOCALAI_P2P_DHT_INTERVAL,P2P_DHT_INTERVAL" default:"360" name:"p2p-dht-interval" help:"Interval for DHT refresh (used during token generation)" group:"p2p"`
Peer2PeerOTPInterval int `env:"LOCALAI_P2P_OTP_INTERVAL,P2P_OTP_INTERVAL" default:"9000" name:"p2p-otp-interval" help:"Interval for OTP refresh (used during token generation)" group:"p2p"`
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
@@ -108,7 +107,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
// IF no token is provided, and p2p is enabled,
// we generate one and wait for the user to pick up the token (this is for interactive)
log.Info().Msg("No token provided, generating one")
token = p2p.GenerateToken(r.Peer2PeerDHTInterval, r.Peer2PeerOTPInterval)
token = p2p.GenerateToken()
log.Info().Msg("Generated Token:")
fmt.Println(token)
@@ -116,12 +115,52 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
fmt.Printf("export TOKEN=\"%s\"\nlocal-ai worker p2p-llama-cpp-rpc\n", token)
}
opts = append(opts, config.WithP2PToken(token))
node, err := p2p.NewNode(token)
if err != nil {
return err
}
nodeContext := context.Background()
err = node.Start(nodeContext)
if err != nil {
return fmt.Errorf("starting new node: %w", err)
}
log.Info().Msg("Starting P2P server discovery...")
if err := p2p.ServiceDiscoverer(nodeContext, node, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID), func(serviceID string, node p2p.NodeData) {
var tunnelAddresses []string
for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID)) {
if v.IsOnline() {
tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
} else {
log.Info().Msgf("Node %s is offline", v.ID)
}
}
tunnelEnvVar := strings.Join(tunnelAddresses, ",")
os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
}, true); err != nil {
return err
}
}
backgroundCtx := context.Background()
if r.Federated {
_, port, err := net.SplitHostPort(r.Address)
if err != nil {
return err
}
fedCtx := context.Background()
if err := cli_api.StartP2PStack(backgroundCtx, r.Address, token, r.Peer2PeerNetworkID, r.Federated); err != nil {
return err
node, err := p2p.ExposeService(fedCtx, "localhost", port, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID))
if err != nil {
return err
}
if err := p2p.ServiceDiscoverer(fedCtx, node, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID), nil, false); err != nil {
return err
}
}
idleWatchDog := r.EnableWatchdogIdle

View File

@@ -1,110 +0,0 @@
package cli
import (
"context"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"github.com/mudler/LocalAI/core/backend"
cliContext "github.com/mudler/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/model"
"github.com/rs/zerolog/log"
)
type SoundGenerationCMD struct {
Text []string `arg:""`
Backend string `short:"b" required:"" help:"Backend to run the SoundGeneration model"`
Model string `short:"m" required:"" help:"Model name to run the SoundGeneration"`
Duration string `short:"d" help:"If specified, the length of audio to generate in seconds"`
Temperature string `short:"t" help:"If specified, the temperature of the generation"`
InputFile string `short:"i" help:"If specified, the input file to condition generation upon"`
InputFileSampleDivisor string `short:"f" help:"If InputFile and this divisor is specified, the first portion of the sample file will be used"`
DoSample bool `short:"s" default:"true" help:"Enables sampling from the model. Better quality at the cost of speed. Defaults to enabled."`
OutputFile string `short:"o" type:"path" help:"The path to write the output wav file"`
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
ExternalGRPCBackends []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"`
}
func parseToFloat32Ptr(input string) *float32 {
f, err := strconv.ParseFloat(input, 32)
if err != nil {
return nil
}
f2 := float32(f)
return &f2
}
func parseToInt32Ptr(input string) *int32 {
i, err := strconv.ParseInt(input, 10, 32)
if err != nil {
return nil
}
i2 := int32(i)
return &i2
}
func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
outputFile := t.OutputFile
outputDir := t.BackendAssetsPath
if outputFile != "" {
outputDir = filepath.Dir(outputFile)
}
text := strings.Join(t.Text, " ")
externalBackends := make(map[string]string)
// split ":" to get backend name and the uri
for _, v := range t.ExternalGRPCBackends {
backend := v[:strings.IndexByte(v, ':')]
uri := v[strings.IndexByte(v, ':')+1:]
externalBackends[backend] = uri
fmt.Printf("TMP externalBackends[%q]=%q\n\n", backend, uri)
}
opts := &config.ApplicationConfig{
ModelPath: t.ModelsPath,
Context: context.Background(),
AudioDir: outputDir,
AssetsDestination: t.BackendAssetsPath,
ExternalGRPCBackends: externalBackends,
}
ml := model.NewModelLoader(opts.ModelPath)
defer func() {
err := ml.StopAllGRPC()
if err != nil {
log.Error().Err(err).Msg("unable to stop all grpc processes")
}
}()
options := config.BackendConfig{}
options.SetDefaults()
var inputFile *string
if t.InputFile != "" {
inputFile = &t.InputFile
}
filePath, _, err := backend.SoundGeneration(t.Backend, t.Model, text,
parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample,
inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), ml, opts, options)
if err != nil {
return err
}
if outputFile != "" {
if err := os.Rename(filePath, outputFile); err != nil {
return err
}
fmt.Printf("Generate file %s\n", outputFile)
} else {
fmt.Printf("Generate file %s\n", filePath)
}
return nil
}

View File

@@ -2,7 +2,6 @@ package worker
type WorkerFlags struct {
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
ExtraLLamaCPPArgs string `name:"llama-cpp-args" env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"`
}
type Worker struct {

View File

@@ -3,7 +3,6 @@ package worker
import (
"fmt"
"os"
"strings"
"syscall"
cliContext "github.com/mudler/LocalAI/core/cli/context"
@@ -13,6 +12,7 @@ import (
)
type LLamaCPP struct {
Args []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"`
WorkerFlags `embed:""`
}
@@ -34,8 +34,9 @@ func (r *LLamaCPP) Run(ctx *cliContext.Context) error {
"llama-cpp-rpc-server",
)
args := strings.Split(r.ExtraLLamaCPPArgs, " ")
args := os.Args[4:]
args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
args = append([]string{grpcProcess}, args...)
return syscall.Exec(
grpcProcess,

View File

@@ -8,7 +8,6 @@ import (
"fmt"
"os"
"os/exec"
"strings"
"time"
cliContext "github.com/mudler/LocalAI/core/cli/context"
@@ -21,11 +20,12 @@ import (
type P2P struct {
WorkerFlags `embed:""`
Token string `env:"LOCALAI_TOKEN,LOCALAI_P2P_TOKEN,TOKEN" help:"P2P token to use"`
NoRunner bool `env:"LOCALAI_NO_RUNNER,NO_RUNNER" help:"Do not start the llama-cpp-rpc-server"`
RunnerAddress string `env:"LOCALAI_RUNNER_ADDRESS,RUNNER_ADDRESS" help:"Address of the llama-cpp-rpc-server"`
RunnerPort string `env:"LOCALAI_RUNNER_PORT,RUNNER_PORT" help:"Port of the llama-cpp-rpc-server"`
Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
Token string `env:"LOCALAI_TOKEN,LOCALAI_P2P_TOKEN,TOKEN" help:"P2P token to use"`
NoRunner bool `env:"LOCALAI_NO_RUNNER,NO_RUNNER" help:"Do not start the llama-cpp-rpc-server"`
RunnerAddress string `env:"LOCALAI_RUNNER_ADDRESS,RUNNER_ADDRESS" help:"Address of the llama-cpp-rpc-server"`
RunnerPort string `env:"LOCALAI_RUNNER_PORT,RUNNER_PORT" help:"Port of the llama-cpp-rpc-server"`
ExtraLLamaCPPArgs []string `env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"`
Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
}
func (r *P2P) Run(ctx *cliContext.Context) error {
@@ -76,8 +76,8 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
"util",
"llama-cpp-rpc-server",
)
extraArgs := strings.Split(r.ExtraLLamaCPPArgs, " ")
args := append([]string{"--host", address, "--port", fmt.Sprint(port)}, extraArgs...)
args := append([]string{"--host", address, "--port", fmt.Sprint(port)}, r.ExtraLLamaCPPArgs...)
args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
cmd := exec.Command(

View File

@@ -126,7 +126,6 @@ type LLMConfig struct {
Grammar string `yaml:"grammar"`
StopWords []string `yaml:"stopwords"`
Cutstrings []string `yaml:"cutstrings"`
ExtractRegex []string `yaml:"extract_regex"`
TrimSpace []string `yaml:"trimspace"`
TrimSuffix []string `yaml:"trimsuffix"`

View File

@@ -772,17 +772,6 @@ var _ = Describe("API test", func() {
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error:"))
})
It("shows the external backend", func() {
// do an http request to the /system endpoint
resp, err := http.Get("http://127.0.0.1:9090/system")
Expect(err).ToNot(HaveOccurred())
Expect(resp.StatusCode).To(Equal(200))
dat, err := io.ReadAll(resp.Body)
Expect(err).ToNot(HaveOccurred())
Expect(string(dat)).To(ContainSubstring("huggingface"))
Expect(string(dat)).To(ContainSubstring("llama-cpp"))
})
It("transcribes audio", func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")

View File

@@ -1,65 +0,0 @@
package elevenlabs
import (
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/model"
"github.com/rs/zerolog/log"
)
// SoundGenerationEndpoint is the ElevenLabs SoundGeneration endpoint https://elevenlabs.io/docs/api-reference/sound-generation
// @Summary Generates audio from the input text.
// @Param request body schema.ElevenLabsSoundGenerationRequest true "query params"
// @Success 200 {string} binary "Response"
// @Router /v1/sound-generation [post]
func SoundGenerationEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(schema.ElevenLabsSoundGenerationRequest)
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return err
}
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.ModelID, false)
if err != nil {
modelFile = input.ModelID
log.Warn().Str("ModelID", input.ModelID).Msg("Model not found in context")
}
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
config.LoadOptionDebug(appConfig.Debug),
config.LoadOptionThreads(appConfig.Threads),
config.LoadOptionContextSize(appConfig.ContextSize),
config.LoadOptionF16(appConfig.F16),
)
if err != nil {
modelFile = input.ModelID
log.Warn().Str("Request ModelID", input.ModelID).Err(err).Msg("error during LoadBackendConfigFileByName, using request ModelID")
} else {
if input.ModelID != "" {
modelFile = input.ModelID
} else {
modelFile = cfg.Model
}
}
log.Debug().Str("modelFile", "modelFile").Str("backend", cfg.Backend).Msg("Sound Generation Request about to be sent to backend")
if input.Duration != nil {
log.Debug().Float32("duration", *input.Duration).Msg("duration set")
}
if input.Temperature != nil {
log.Debug().Float32("temperature", *input.Temperature).Msg("temperature set")
}
// TODO: Support uploading files?
filePath, _, err := backend.SoundGeneration(cfg.Backend, modelFile, input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
if err != nil {
return err
}
return c.Download(filePath)
}
}

View File

@@ -1,29 +0,0 @@
package localai
import (
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/model"
)
// SystemInformations returns the system informations
// @Summary Show the LocalAI instance information
// @Success 200 {object} schema.SystemInformationResponse "Response"
// @Router /system [get]
func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(*fiber.Ctx) error {
return func(c *fiber.Ctx) error {
availableBackends, err := ml.ListAvailableBackends(appConfig.AssetsDestination)
if err != nil {
return err
}
for b := range appConfig.ExternalGRPCBackends {
availableBackends = append(availableBackends, b)
}
return c.JSON(
schema.SystemInformationResponse{
Backends: availableBackends,
},
)
}
}

View File

@@ -25,8 +25,9 @@ import (
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/chat/completions [post]
func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
var id, textContentToReturn string
var created int
textContentToReturn := ""
id := uuid.New().String()
created := int(time.Now().Unix())
process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
initialMessage := schema.OpenAIResponse{
@@ -68,9 +69,9 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig)
result = functions.CleanupLLMResult(result, config.FunctionsConfig)
functionResults := functions.ParseFunctionCall(result, config.FunctionsConfig)
results := functions.ParseFunctionCall(result, config.FunctionsConfig)
log.Debug().Msgf("Text content to return: %s", textContentToReturn)
noActionToRun := len(functionResults) > 0 && functionResults[0].Name == noAction || len(functionResults) == 0
noActionToRun := len(results) > 0 && results[0].Name == noAction || len(results) == 0
switch {
case noActionToRun:
@@ -83,7 +84,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
}
responses <- initialMessage
result, err := handleQuestion(config, req, ml, startupOptions, functionResults, result, prompt)
result, err := handleQuestion(config, req, ml, startupOptions, results, result, prompt)
if err != nil {
log.Error().Err(err).Msg("error handling question")
return
@@ -105,7 +106,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
responses <- resp
default:
for i, ss := range functionResults {
for i, ss := range results {
name, args := ss.Name, ss.Arguments
initialMessage := schema.OpenAIResponse{
@@ -158,10 +159,6 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
}
return func(c *fiber.Ctx) error {
textContentToReturn = ""
id = uuid.New().String()
created = int(time.Now().Unix())
modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)

View File

@@ -16,6 +16,4 @@ func RegisterElevenLabsRoutes(app *fiber.App,
// Elevenlabs
app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig))
app.Post("/v1/sound-generation", auth, elevenlabs.SoundGenerationEndpoint(cl, ml, appConfig))
}

View File

@@ -70,6 +70,4 @@ func RegisterLocalAIRoutes(app *fiber.App,
}{Version: internal.PrintableVersion()})
})
app.Get("/system", auth, localai.SystemInformations(ml, appConfig))
}

View File

Binary file not shown.

View File

Binary file not shown.

View File

Binary file not shown.

View File

Binary file not shown.

View File

Binary file not shown.

View File

Binary file not shown.

View File

Binary file not shown.

View File

File diff suppressed because one or more lines are too long

View File

@@ -1,9 +0,0 @@
/*!
Theme: Default
Description: Original highlight.js style
Author: (c) Ivan Sagalaev <maniac@softwaremaniacs.org>
Maintainer: @highlightjs/core-team
Website: https://highlightjs.org/
License: see project LICENSE
Touched: 2021
*/pre code.hljs{display:block;overflow-x:auto;padding:1em}code.hljs{padding:3px 5px}.hljs{background:#f3f3f3;color:#444}.hljs-comment{color:#697070}.hljs-punctuation,.hljs-tag{color:#444a}.hljs-tag .hljs-attr,.hljs-tag .hljs-name{color:#444}.hljs-attribute,.hljs-doctag,.hljs-keyword,.hljs-meta .hljs-keyword,.hljs-name,.hljs-selector-tag{font-weight:700}.hljs-deletion,.hljs-number,.hljs-quote,.hljs-selector-class,.hljs-selector-id,.hljs-string,.hljs-template-tag,.hljs-type{color:#800}.hljs-section,.hljs-title{color:#800;font-weight:700}.hljs-link,.hljs-operator,.hljs-regexp,.hljs-selector-attr,.hljs-selector-pseudo,.hljs-symbol,.hljs-template-variable,.hljs-variable{color:#ab5656}.hljs-literal{color:#695}.hljs-addition,.hljs-built_in,.hljs-bullet,.hljs-code{color:#397300}.hljs-meta{color:#1f7199}.hljs-meta .hljs-string{color:#38a}.hljs-emphasis{font-style:italic}.hljs-strong{font-weight:700}

View File

File diff suppressed because one or more lines are too long

View File

File diff suppressed because one or more lines are too long

View File

File diff suppressed because one or more lines are too long

View File

File diff suppressed because one or more lines are too long

View File

File diff suppressed because one or more lines are too long

View File

File diff suppressed because one or more lines are too long

View File

File diff suppressed because one or more lines are too long

View File

@@ -28,15 +28,9 @@ import (
"github.com/mudler/edgevpn/pkg/logger"
)
func generateNewConnectionData(DHTInterval, OTPInterval int) *node.YAMLConnectionConfig {
func generateNewConnectionData() *node.YAMLConnectionConfig {
maxMessSize := 20 << 20 // 20MB
keyLength := 43
if DHTInterval == 0 {
DHTInterval = 360
}
if OTPInterval == 0 {
OTPInterval = 9000
}
return &node.YAMLConnectionConfig{
MaxMessageSize: maxMessSize,
@@ -46,21 +40,21 @@ func generateNewConnectionData(DHTInterval, OTPInterval int) *node.YAMLConnectio
OTP: node.OTP{
DHT: node.OTPConfig{
Key: eutils.RandStringRunes(keyLength),
Interval: DHTInterval,
Interval: 120,
Length: keyLength,
},
Crypto: node.OTPConfig{
Key: eutils.RandStringRunes(keyLength),
Interval: OTPInterval,
Interval: 9000,
Length: keyLength,
},
},
}
}
func GenerateToken(DHTInterval, OTPInterval int) string {
func GenerateToken() string {
// Generates a new config and exit
return generateNewConnectionData(DHTInterval, OTPInterval).Base64()
return generateNewConnectionData().Base64()
}
func IsP2PEnabled() bool {

View File

@@ -10,7 +10,7 @@ import (
"github.com/mudler/edgevpn/pkg/node"
)
func GenerateToken(DHTInterval, OTPInterval int) string {
func GenerateToken() string {
return "not implemented"
}

View File

@@ -4,11 +4,3 @@ type ElevenLabsTTSRequest struct {
Text string `json:"text" yaml:"text"`
ModelID string `json:"model_id" yaml:"model_id"`
}
type ElevenLabsSoundGenerationRequest struct {
Text string `json:"text" yaml:"text"`
ModelID string `json:"model_id" yaml:"model_id"`
Duration *float32 `json:"duration_seconds,omitempty" yaml:"duration_seconds,omitempty"`
Temperature *float32 `json:"prompt_influence,omitempty" yaml:"prompt_influence,omitempty"`
DoSample *bool `json:"do_sample,omitempty" yaml:"do_sample,omitempty"`
}

View File

@@ -70,7 +70,3 @@ type P2PNodesResponse struct {
Nodes []p2p.NodeData `json:"nodes" yaml:"nodes"`
FederatedNodes []p2p.NodeData `json:"federated_nodes" yaml:"federated_nodes"`
}
type SystemInformationResponse struct {
Backends []string `json:"backends"`
}

View File

@@ -107,7 +107,7 @@ func (bms BackendMonitorService) CheckAndSample(modelName string) (*proto.Status
return nil, err
}
modelAddr := bms.modelLoader.CheckIsLoaded(backendId)
if modelAddr == nil {
if modelAddr == "" {
return nil, fmt.Errorf("backend %s is not currently loaded", backendId)
}

View File

@@ -133,10 +133,6 @@ Due to the nature of ROCm it is best to run all implementations in containers as
Ongoing verification testing of ROCm compatability with integrated backends.
Please note the following list of verified backends and devices.
LocalAI hipblas images are built against the following targets: gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
If your device is not one of these you must specify the corresponding `GPU_TARGETS` and specify `REBUILD=true`. Otherwise you don't need to specify these in the commands below.
### Verified
The devices in the following list have been tested with `hipblas` images running `ROCm 6.0.0`

Some files were not shown because too many files have changed in this diff Show More