diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index e6da180c0..6389e8988 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -248,19 +248,6 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "9" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-bark' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:24.04" - skip-drivers: 'false' - backend: "bark" - dockerfile: "./backend/Dockerfile.python" - context: "./" - ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "9" @@ -522,19 +509,6 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "13" - cuda-minor-version: "0" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-13-bark' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:24.04" - skip-drivers: 'false' - backend: "bark" - dockerfile: "./backend/Dockerfile.python" - context: "./" - ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -784,19 +758,6 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' - - build-type: 'hipblas' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-rocm-hipblas-bark' - runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-24.04:6.4.4" - skip-drivers: 'false' - backend: "bark" - dockerfile: "./backend/Dockerfile.python" - context: "./" - ubuntu-version: '2404' # sycl builds - build-type: 'intel' cuda-major-version: "" @@ -1007,19 +968,6 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' - - build-type: 'intel' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-intel-bark' - runs-on: 'ubuntu-latest' - base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" - skip-drivers: 'false' - backend: "bark" - dockerfile: "./backend/Dockerfile.python" - context: "./" - ubuntu-version: '2404' # piper - build-type: '' cuda-major-version: "" @@ -1034,20 +982,6 @@ jobs: dockerfile: "./backend/Dockerfile.golang" context: "./" ubuntu-version: '2404' - # bark-cpp - - build-type: '' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-bark-cpp' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:24.04" - skip-drivers: 'false' - backend: "bark-cpp" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - ubuntu-version: '2404' - build-type: '' cuda-major-version: "" cuda-minor-version: "" diff --git a/AGENTS.md b/AGENTS.md index bc8b966d1..87a96ec88 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -4,13 +4,13 @@ Building and testing the project depends on the components involved and the plat ## Building a specified backend -Let's say the user wants to build a particular backend for a given platform. For example let's say they want to build bark for ROCM/hipblas +Let's say the user wants to build a particular backend for a given platform. For example let's say they want to build coqui for ROCM/hipblas -- The Makefile has targets like `docker-build-bark` created with `generate-docker-build-target` at the time of writing. Recently added backends may require a new target. +- The Makefile has targets like `docker-build-coqui` created with `generate-docker-build-target` at the time of writing. Recently added backends may require a new target. - At a minimum we need to set the BUILD_TYPE, BASE_IMAGE build-args - Use .github/workflows/backend.yml as a reference it lists the needed args in the `include` job strategy matrix - l4t and cublas also requires the CUDA major and minor version -- You can pretty print a command like `DOCKER_MAKEFLAGS=-j$(nproc --ignore=1) BUILD_TYPE=hipblas BASE_IMAGE=rocm/dev-ubuntu-24.04:6.4.4 make docker-build-bark` +- You can pretty print a command like `DOCKER_MAKEFLAGS=-j$(nproc --ignore=1) BUILD_TYPE=hipblas BASE_IMAGE=rocm/dev-ubuntu-24.04:6.4.4 make docker-build-coqui` - Unless the user specifies that they want you to run the command, then just print it because not all agent frontends handle long running jobs well and the output may overflow your context - The user may say they want to build AMD or ROCM instead of hipblas, or Intel instead of SYCL or NVIDIA insted of l4t or cublas. Ask for confirmation if there is ambiguity. - Sometimes the user may need extra parameters to be added to `docker build` (e.g. `--platform` for cross-platform builds or `--progress` to view the full logs), in which case you can generate the `docker build` command directly. @@ -95,7 +95,7 @@ test-extra: prepare-test-extra Add a backend definition variable in the backend definitions section (around line 428-457). The format depends on the backend type: -**For Python backends with root context** (like `faster-whisper`, `bark`): +**For Python backends with root context** (like `faster-whisper`, `coqui`): ```makefile BACKEND_ = |python|.|false|true ``` diff --git a/Makefile b/Makefile index b5c3e64c8..4a8af4f4e 100644 --- a/Makefile +++ b/Makefile @@ -438,7 +438,6 @@ backend-images: BACKEND_LLAMA_CPP = llama-cpp|llama-cpp|.|false|false # Golang backends -BACKEND_BARK_CPP = bark-cpp|golang|.|false|true BACKEND_PIPER = piper|golang|.|false|true BACKEND_LOCAL_STORE = local-store|golang|.|false|true BACKEND_HUGGINGFACE = huggingface|golang|.|false|true @@ -451,7 +450,6 @@ BACKEND_RERANKERS = rerankers|python|.|false|true BACKEND_TRANSFORMERS = transformers|python|.|false|true BACKEND_FASTER_WHISPER = faster-whisper|python|.|false|true BACKEND_COQUI = coqui|python|.|false|true -BACKEND_BARK = bark|python|.|false|true BACKEND_RFDETR = rfdetr|python|.|false|true BACKEND_KITTEN_TTS = kitten-tts|python|.|false|true BACKEND_NEUTTS = neutts|python|.|false|true @@ -487,7 +485,6 @@ endef # Generate all docker-build targets $(eval $(call generate-docker-build-target,$(BACKEND_LLAMA_CPP))) -$(eval $(call generate-docker-build-target,$(BACKEND_BARK_CPP))) $(eval $(call generate-docker-build-target,$(BACKEND_PIPER))) $(eval $(call generate-docker-build-target,$(BACKEND_LOCAL_STORE))) $(eval $(call generate-docker-build-target,$(BACKEND_HUGGINGFACE))) @@ -498,7 +495,6 @@ $(eval $(call generate-docker-build-target,$(BACKEND_RERANKERS))) $(eval $(call generate-docker-build-target,$(BACKEND_TRANSFORMERS))) $(eval $(call generate-docker-build-target,$(BACKEND_FASTER_WHISPER))) $(eval $(call generate-docker-build-target,$(BACKEND_COQUI))) -$(eval $(call generate-docker-build-target,$(BACKEND_BARK))) $(eval $(call generate-docker-build-target,$(BACKEND_RFDETR))) $(eval $(call generate-docker-build-target,$(BACKEND_KITTEN_TTS))) $(eval $(call generate-docker-build-target,$(BACKEND_NEUTTS))) @@ -516,7 +512,7 @@ $(eval $(call generate-docker-build-target,$(BACKEND_QWEN_TTS))) docker-save-%: backend-images docker save local-ai-backend:$* -o backend-images/$*.tar -docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-bark docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts +docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts ######################################################## ### END Backends diff --git a/README.md b/README.md index e87667159..fcb146a54 100644 --- a/README.md +++ b/README.md @@ -268,8 +268,6 @@ LocalAI supports a comprehensive range of AI backends with multiple acceleration |---------|-------------|---------------------| | **whisper.cpp** | OpenAI Whisper in C/C++ | CUDA 12/13, ROCm, Intel SYCL, Vulkan, CPU | | **faster-whisper** | Fast Whisper with CTranslate2 | CUDA 12/13, ROCm, Intel, CPU | -| **bark** | Text-to-audio generation | CUDA 12/13, ROCm, Intel | -| **bark-cpp** | C++ implementation of Bark | CUDA, Metal, CPU | | **coqui** | Advanced TTS with 1100+ languages | CUDA 12/13, ROCm, Intel, CPU | | **kokoro** | Lightweight TTS model | CUDA 12/13, ROCm, Intel, CPU | | **chatterbox** | Production-grade TTS | CUDA 12/13, CPU | @@ -301,9 +299,9 @@ LocalAI supports a comprehensive range of AI backends with multiple acceleration |-------------------|-------------------|------------------| | **NVIDIA CUDA 12** | All CUDA-compatible backends | Nvidia hardware | | **NVIDIA CUDA 13** | All CUDA-compatible backends | Nvidia hardware | -| **AMD ROCm** | llama.cpp, whisper, vllm, transformers, diffusers, rerankers, coqui, kokoro, bark, neutts, vibevoice, pocket-tts, qwen-tts | AMD Graphics | -| **Intel oneAPI** | llama.cpp, whisper, stablediffusion, vllm, transformers, diffusers, rfdetr, rerankers, coqui, kokoro, bark, vibevoice, pocket-tts, qwen-tts | Intel Arc, Intel iGPUs | -| **Apple Metal** | llama.cpp, whisper, diffusers, MLX, MLX-VLM, bark-cpp | Apple M1/M2/M3+ | +| **AMD ROCm** | llama.cpp, whisper, vllm, transformers, diffusers, rerankers, coqui, kokoro, neutts, vibevoice, pocket-tts, qwen-tts | AMD Graphics | +| **Intel oneAPI** | llama.cpp, whisper, stablediffusion, vllm, transformers, diffusers, rfdetr, rerankers, coqui, kokoro, vibevoice, pocket-tts, qwen-tts | Intel Arc, Intel iGPUs | +| **Apple Metal** | llama.cpp, whisper, diffusers, MLX, MLX-VLM | Apple M1/M2/M3+ | | **Vulkan** | llama.cpp, whisper, stablediffusion | Cross-platform GPUs | | **NVIDIA Jetson (CUDA 12)** | llama.cpp, whisper, stablediffusion, diffusers, rfdetr | ARM64 embedded AI (AGX Orin, etc.) | | **NVIDIA Jetson (CUDA 13)** | llama.cpp, whisper, stablediffusion, diffusers, rfdetr | ARM64 embedded AI (DGX Spark) | diff --git a/backend/README.md b/backend/README.md index 3f3076c04..1e67ec7d2 100644 --- a/backend/README.md +++ b/backend/README.md @@ -46,7 +46,7 @@ The backend system provides language-specific Dockerfiles that handle the build - **vllm**: High-performance LLM inference - **mlx**: Apple Silicon optimization - **diffusers**: Stable Diffusion models -- **Audio**: bark, coqui, faster-whisper, kitten-tts +- **Audio**: coqui, faster-whisper, kitten-tts - **Vision**: mlx-vlm, rfdetr - **Specialized**: rerankers, chatterbox, kokoro @@ -55,7 +55,6 @@ The backend system provides language-specific Dockerfiles that handle the build - **stablediffusion-ggml**: Stable Diffusion in Go with GGML Cpp backend - **huggingface**: Hugging Face model integration - **piper**: Text-to-speech synthesis Golang with C bindings using rhaspy/piper -- **bark-cpp**: Bark TTS models Golang with Cpp bindings - **local-store**: Vector storage backend #### C++ Backends (`cpp/`) diff --git a/backend/go/bark-cpp/Makefile b/backend/go/bark-cpp/Makefile deleted file mode 100644 index 1bff58c4f..000000000 --- a/backend/go/bark-cpp/Makefile +++ /dev/null @@ -1,51 +0,0 @@ -INCLUDE_PATH := $(abspath ./) -LIBRARY_PATH := $(abspath ./) - -AR?=ar - -CMAKE_ARGS?=-DGGML_NATIVE=OFF -BUILD_TYPE?= -GOCMD=go -# keep standard at C11 and C++11 -CXXFLAGS = -I. -I$(INCLUDE_PATH)/sources/bark.cpp/examples -I$(INCLUDE_PATH)/sources/bark.cpp/encodec.cpp/ggml/include -I$(INCLUDE_PATH)/sources/bark.cpp/spm-headers -I$(INCLUDE_PATH)/sources/bark.cpp -O3 -DNDEBUG -std=c++17 -fPIC -LDFLAGS = -L$(LIBRARY_PATH) -L$(LIBRARY_PATH)/sources/bark.cpp/build/examples -lbark -lstdc++ -lm - -# bark.cpp -BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git -BARKCPP_VERSION?=5d5be84f089ab9ea53b7a793f088d3fbf7247495 - -# warnings -CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function - -## bark.cpp -sources/bark.cpp: - git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \ - cd sources/bark.cpp && \ - git checkout $(BARKCPP_VERSION) && \ - git submodule update --init --recursive --depth 1 --single-branch - -sources/bark.cpp/build/libbark.a: sources/bark.cpp - cd sources/bark.cpp && \ - mkdir -p build && \ - cd build && \ - cmake $(CMAKE_ARGS) .. && \ - cmake --build . --config Release - -gobark.o: - $(CXX) $(CXXFLAGS) gobark.cpp -o gobark.o -c $(LDFLAGS) - -libbark.a: sources/bark.cpp/build/libbark.a gobark.o - cp $(INCLUDE_PATH)/sources/bark.cpp/build/libbark.a ./ - $(AR) rcs libbark.a gobark.o - -bark-cpp: libbark.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH="$(CURDIR)" LIBRARY_PATH=$(CURDIR) \ - $(GOCMD) build -v -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o bark-cpp ./ - -package: - bash package.sh - -build: bark-cpp package - -clean: - rm -f gobark.o libbark.a \ No newline at end of file diff --git a/backend/go/bark-cpp/gobark.cpp b/backend/go/bark-cpp/gobark.cpp deleted file mode 100644 index fa4bb336f..000000000 --- a/backend/go/bark-cpp/gobark.cpp +++ /dev/null @@ -1,85 +0,0 @@ -#include -#include - -#include "bark.h" -#include "gobark.h" -#include "common.h" -#include "ggml.h" - -struct bark_context *c; - -void bark_print_progress_callback(struct bark_context *bctx, enum bark_encoding_step step, int progress, void *user_data) { - if (step == bark_encoding_step::SEMANTIC) { - printf("\rGenerating semantic tokens... %d%%", progress); - } else if (step == bark_encoding_step::COARSE) { - printf("\rGenerating coarse tokens... %d%%", progress); - } else if (step == bark_encoding_step::FINE) { - printf("\rGenerating fine tokens... %d%%", progress); - } - fflush(stdout); -} - -int load_model(char *model) { - // initialize bark context - struct bark_context_params ctx_params = bark_context_default_params(); - bark_params params; - - params.model_path = model; - - // ctx_params.verbosity = verbosity; - ctx_params.progress_callback = bark_print_progress_callback; - ctx_params.progress_callback_user_data = nullptr; - - struct bark_context *bctx = bark_load_model(params.model_path.c_str(), ctx_params, params.seed); - if (!bctx) { - fprintf(stderr, "%s: Could not load model\n", __func__); - return 1; - } - - c = bctx; - - return 0; -} - -int tts(char *text,int threads, char *dst ) { - - ggml_time_init(); - const int64_t t_main_start_us = ggml_time_us(); - - // generate audio - if (!bark_generate_audio(c, text, threads)) { - fprintf(stderr, "%s: An error occurred. If the problem persists, feel free to open an issue to report it.\n", __func__); - return 1; - } - - const float *audio_data = bark_get_audio_data(c); - if (audio_data == NULL) { - fprintf(stderr, "%s: Could not get audio data\n", __func__); - return 1; - } - - const int audio_arr_size = bark_get_audio_data_size(c); - - std::vector audio_arr(audio_data, audio_data + audio_arr_size); - - write_wav_on_disk(audio_arr, dst); - - // report timing - { - const int64_t t_main_end_us = ggml_time_us(); - const int64_t t_load_us = bark_get_load_time(c); - const int64_t t_eval_us = bark_get_eval_time(c); - - printf("\n\n"); - printf("%s: load time = %8.2f ms\n", __func__, t_load_us / 1000.0f); - printf("%s: eval time = %8.2f ms\n", __func__, t_eval_us / 1000.0f); - printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us) / 1000.0f); - } - - return 0; -} - -int unload() { - bark_free(c); -} - diff --git a/backend/go/bark-cpp/gobark.go b/backend/go/bark-cpp/gobark.go deleted file mode 100644 index 8b01ebe2f..000000000 --- a/backend/go/bark-cpp/gobark.go +++ /dev/null @@ -1,52 +0,0 @@ -package main - -// #cgo CXXFLAGS: -I${SRCDIR}/sources/bark.cpp/ -I${SRCDIR}/sources/bark.cpp/encodec.cpp -I${SRCDIR}/sources/bark.cpp/encodec.cpp/ggml/include -I${SRCDIR}/sources/bark.cpp/examples -I${SRCDIR}/sources/bark.cpp/spm-headers -// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/sources/bark.cpp/build/examples -L${SRCDIR}/sources/bark.cpp/build/encodec.cpp/ggml/src/ -L${SRCDIR}/sources/bark.cpp/build/encodec.cpp/ -lbark -lencodec -lcommon -lggml -lgomp -// #include -// #include -import "C" - -import ( - "fmt" - "unsafe" - - "github.com/mudler/LocalAI/pkg/grpc/base" - pb "github.com/mudler/LocalAI/pkg/grpc/proto" -) - -type Bark struct { - base.SingleThread - threads int -} - -func (sd *Bark) Load(opts *pb.ModelOptions) error { - - sd.threads = int(opts.Threads) - - modelFile := C.CString(opts.ModelFile) - defer C.free(unsafe.Pointer(modelFile)) - - ret := C.load_model(modelFile) - if ret != 0 { - return fmt.Errorf("inference failed") - } - - return nil -} - -func (sd *Bark) TTS(opts *pb.TTSRequest) error { - t := C.CString(opts.Text) - defer C.free(unsafe.Pointer(t)) - - dst := C.CString(opts.Dst) - defer C.free(unsafe.Pointer(dst)) - - threads := C.int(sd.threads) - - ret := C.tts(t, threads, dst) - if ret != 0 { - return fmt.Errorf("inference failed") - } - - return nil -} diff --git a/backend/go/bark-cpp/gobark.h b/backend/go/bark-cpp/gobark.h deleted file mode 100644 index 06fb965d5..000000000 --- a/backend/go/bark-cpp/gobark.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifdef __cplusplus -extern "C" { -#endif -int load_model(char *model); -int tts(char *text,int threads, char *dst ); -#ifdef __cplusplus -} -#endif \ No newline at end of file diff --git a/backend/go/bark-cpp/main.go b/backend/go/bark-cpp/main.go deleted file mode 100644 index 840a687d4..000000000 --- a/backend/go/bark-cpp/main.go +++ /dev/null @@ -1,20 +0,0 @@ -package main - -// Note: this is started internally by LocalAI and a server is allocated for each model -import ( - "flag" - - grpc "github.com/mudler/LocalAI/pkg/grpc" -) - -var ( - addr = flag.String("addr", "localhost:50051", "the address to connect to") -) - -func main() { - flag.Parse() - - if err := grpc.StartServer(*addr, &Bark{}); err != nil { - panic(err) - } -} diff --git a/backend/go/bark-cpp/package.sh b/backend/go/bark-cpp/package.sh deleted file mode 100755 index 6dce5851f..000000000 --- a/backend/go/bark-cpp/package.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash - -# Script to copy the appropriate libraries based on architecture -# This script is used in the final stage of the Dockerfile - -set -e - -CURDIR=$(dirname "$(realpath $0)") - -# Create lib directory -mkdir -p $CURDIR/package/lib -cp -avrf $CURDIR/bark-cpp $CURDIR/package/ -cp -rfv $CURDIR/run.sh $CURDIR/package/ - -# Detect architecture and copy appropriate libraries -if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then - # x86_64 architecture - echo "Detected x86_64 architecture, copying x86_64 libraries..." - cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so - cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6 - cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1 - cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6 - cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6 - cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1 -elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then - # ARM64 architecture - echo "Detected ARM64 architecture, copying ARM64 libraries..." - cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so - cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6 - cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1 - cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6 - cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6 - cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1 -else - echo "Error: Could not detect architecture" - exit 1 -fi - -echo "Packaging completed successfully" -ls -liah $CURDIR/package/ -ls -liah $CURDIR/package/lib/ \ No newline at end of file diff --git a/backend/go/bark-cpp/run.sh b/backend/go/bark-cpp/run.sh deleted file mode 100755 index 567d3b89e..000000000 --- a/backend/go/bark-cpp/run.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -set -ex - -CURDIR=$(dirname "$(realpath $0)") -export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH - -# If there is a lib/ld.so, use it -if [ -f $CURDIR/lib/ld.so ]; then - echo "Using lib/ld.so" - exec $CURDIR/lib/ld.so $CURDIR/bark-cpp "$@" -fi - -exec $CURDIR/bark-cpp "$@" \ No newline at end of file diff --git a/backend/index.yaml b/backend/index.yaml index 7fa205f24..fae7e6ac9 100644 --- a/backend/index.yaml +++ b/backend/index.yaml @@ -348,51 +348,6 @@ nvidia-cuda-13: "cuda13-coqui" nvidia-cuda-12: "cuda12-coqui" icon: https://avatars.githubusercontent.com/u/1338804?s=200&v=4 -- &bark - urls: - - https://github.com/suno-ai/bark - description: | - Bark is a transformer-based text-to-audio model created by Suno. Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. The model can also produce nonverbal communications like laughing, sighing and crying. To support the research community, we are providing access to pretrained model checkpoints, which are ready for inference and available for commercial use. - tags: - - text-to-speech - - TTS - license: MIT - name: "bark" - alias: "bark" - capabilities: - cuda: "cuda12-bark" - intel: "intel-bark" - rocm: "rocm-bark" - nvidia-cuda-13: "cuda13-bark" - nvidia-cuda-12: "cuda12-bark" - icon: https://avatars.githubusercontent.com/u/99442120?s=200&v=4 -- &barkcpp - urls: - - https://github.com/PABannier/bark.cpp - description: | - With bark.cpp, our goal is to bring real-time realistic multilingual text-to-speech generation to the community. - - Plain C/C++ implementation without dependencies - AVX, AVX2 and AVX512 for x86 architectures - CPU and GPU compatible backends - Mixed F16 / F32 precision - 4-bit, 5-bit and 8-bit integer quantization - Metal and CUDA backends - - Models supported - - Bark Small - Bark Large - tags: - - text-to-speech - - TTS - license: MIT - icon: https://github.com/PABannier/bark.cpp/raw/main/assets/banner.png - name: "bark-cpp" - uri: "quay.io/go-skynet/local-ai-backends:latest-bark-cpp" - mirrors: - - localai/localai-backends:latest-bark-cpp - alias: "bark-cpp" - &chatterbox urls: - https://github.com/resemble-ai/chatterbox @@ -1459,47 +1414,6 @@ uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-coqui" mirrors: - localai/localai-backends:latest-gpu-rocm-hipblas-coqui -## bark -- !!merge <<: *bark - name: "bark-development" - capabilities: - nvidia: "cuda12-bark-development" - intel: "intel-bark-development" - amd: "rocm-bark-development" -- !!merge <<: *bark - name: "rocm-bark-development" - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-bark" - mirrors: - - localai/localai-backends:master-gpu-rocm-hipblas-bark -- !!merge <<: *bark - name: "intel-bark" - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-bark" - mirrors: - - localai/localai-backends:latest-gpu-intel-bark -- !!merge <<: *bark - name: "intel-bark-development" - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-bark" - mirrors: - - localai/localai-backends:master-gpu-intel-bark -- !!merge <<: *bark - name: "cuda12-bark" - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-bark" - mirrors: - - localai/localai-backends:latest-gpu-nvidia-cuda-12-bark -- !!merge <<: *bark - name: "rocm-bark" - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-bark" - mirrors: - - localai/localai-backends:latest-gpu-rocm-hipblas-bark -- !!merge <<: *bark - name: "cuda12-bark-development" - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-bark" - mirrors: - - localai/localai-backends:master-gpu-nvidia-cuda-12-bark -- !!merge <<: *barkcpp - name: "bark-cpp-development" - uri: "quay.io/go-skynet/local-ai-backends:master-bark-cpp" - alias: "bark-cpp" ## chatterbox - !!merge <<: *chatterbox name: "chatterbox-development" diff --git a/backend/python/README.md b/backend/python/README.md index e140ab627..45ee0e69c 100644 --- a/backend/python/README.md +++ b/backend/python/README.md @@ -18,7 +18,6 @@ The Python backends use a unified build system based on `libbackend.sh` that pro - **mlx** - Apple Silicon optimized ML framework ### Audio & Speech -- **bark** - Text-to-speech synthesis - **coqui** - Coqui TTS models - **faster-whisper** - Fast Whisper speech recognition - **kitten-tts** - Lightweight TTS diff --git a/backend/python/bark/Makefile b/backend/python/bark/Makefile deleted file mode 100644 index da996aabe..000000000 --- a/backend/python/bark/Makefile +++ /dev/null @@ -1,23 +0,0 @@ -.PHONY: ttsbark -ttsbark: - bash install.sh - -.PHONY: run -run: ttsbark - @echo "Running bark..." - bash run.sh - @echo "bark run." - -.PHONY: test -test: ttsbark - @echo "Testing bark..." - bash test.sh - @echo "bark tested." - -.PHONY: protogen-clean -protogen-clean: - $(RM) backend_pb2_grpc.py backend_pb2.py - -.PHONY: clean -clean: protogen-clean - rm -rf venv __pycache__ \ No newline at end of file diff --git a/backend/python/bark/README.md b/backend/python/bark/README.md deleted file mode 100644 index 5b571e47b..000000000 --- a/backend/python/bark/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# Creating a separate environment for ttsbark project - -``` -make ttsbark -``` - -# Testing the gRPC server - -``` - -m unittest test_ttsbark.py -``` - -For example -``` -/opt/conda/envs/bark/bin/python -m unittest extra/grpc/bark/test_ttsbark.py -`````` \ No newline at end of file diff --git a/backend/python/bark/backend.py b/backend/python/bark/backend.py deleted file mode 100644 index 499781005..000000000 --- a/backend/python/bark/backend.py +++ /dev/null @@ -1,98 +0,0 @@ -#!/usr/bin/env python3 -""" -This is an extra gRPC server of LocalAI for Bark TTS -""" -from concurrent import futures -import time -import argparse -import signal -import sys -import os -from scipy.io.wavfile import write as write_wav - -import backend_pb2 -import backend_pb2_grpc -from bark import SAMPLE_RATE, generate_audio, preload_models - -import grpc - - -_ONE_DAY_IN_SECONDS = 60 * 60 * 24 - -# If MAX_WORKERS are specified in the environment use it, otherwise default to 1 -MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) - -# Implement the BackendServicer class with the service methods -class BackendServicer(backend_pb2_grpc.BackendServicer): - """ - BackendServicer is the class that implements the gRPC service - """ - def Health(self, request, context): - return backend_pb2.Reply(message=bytes("OK", 'utf-8')) - def LoadModel(self, request, context): - model_name = request.Model - try: - print("Preparing models, please wait", file=sys.stderr) - # download and load all models - preload_models() - except Exception as err: - return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - # Implement your logic here for the LoadModel service - # Replace this with your desired response - return backend_pb2.Result(message="Model loaded successfully", success=True) - - def TTS(self, request, context): - model = request.model - print(request, file=sys.stderr) - try: - audio_array = None - if model != "": - audio_array = generate_audio(request.text, history_prompt=model) - else: - audio_array = generate_audio(request.text) - print("saving to", request.dst, file=sys.stderr) - # save audio to disk - write_wav(request.dst, SAMPLE_RATE, audio_array) - print("saved to", request.dst, file=sys.stderr) - print("tts for", file=sys.stderr) - print(request, file=sys.stderr) - except Exception as err: - return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - return backend_pb2.Result(success=True) - -def serve(address): - server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS), - options=[ - ('grpc.max_message_length', 50 * 1024 * 1024), # 50MB - ('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB - ('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB - ]) - backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) - server.add_insecure_port(address) - server.start() - print("Server started. Listening on: " + address, file=sys.stderr) - - # Define the signal handler function - def signal_handler(sig, frame): - print("Received termination signal. Shutting down...") - server.stop(0) - sys.exit(0) - - # Set the signal handlers for SIGINT and SIGTERM - signal.signal(signal.SIGINT, signal_handler) - signal.signal(signal.SIGTERM, signal_handler) - - try: - while True: - time.sleep(_ONE_DAY_IN_SECONDS) - except KeyboardInterrupt: - server.stop(0) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Run the gRPC server.") - parser.add_argument( - "--addr", default="localhost:50051", help="The address to bind the server to." - ) - args = parser.parse_args() - - serve(args.addr) diff --git a/backend/python/bark/install.sh b/backend/python/bark/install.sh deleted file mode 100755 index 32befa8e6..000000000 --- a/backend/python/bark/install.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -set -e - -backend_dir=$(dirname $0) -if [ -d $backend_dir/common ]; then - source $backend_dir/common/libbackend.sh -else - source $backend_dir/../common/libbackend.sh -fi - -# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links. -# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match. -# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index -# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index -if [ "x${BUILD_PROFILE}" == "xintel" ]; then - EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match" -fi - -installRequirements diff --git a/backend/python/bark/requirements-cpu.txt b/backend/python/bark/requirements-cpu.txt deleted file mode 100644 index 12e376ade..000000000 --- a/backend/python/bark/requirements-cpu.txt +++ /dev/null @@ -1,4 +0,0 @@ -transformers -accelerate -torch==2.4.1 -torchaudio==2.4.1 \ No newline at end of file diff --git a/backend/python/bark/requirements-cublas12.txt b/backend/python/bark/requirements-cublas12.txt deleted file mode 100644 index 537169495..000000000 --- a/backend/python/bark/requirements-cublas12.txt +++ /dev/null @@ -1,4 +0,0 @@ -torch==2.4.1 -torchaudio==2.4.1 -transformers -accelerate \ No newline at end of file diff --git a/backend/python/bark/requirements-hipblas.txt b/backend/python/bark/requirements-hipblas.txt deleted file mode 100644 index 4e1fef6cf..000000000 --- a/backend/python/bark/requirements-hipblas.txt +++ /dev/null @@ -1,5 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.8.0+rocm6.4 -torchaudio==2.8.0+rocm6.4 -transformers -accelerate \ No newline at end of file diff --git a/backend/python/bark/requirements-intel.txt b/backend/python/bark/requirements-intel.txt deleted file mode 100644 index 06cf55c5a..000000000 --- a/backend/python/bark/requirements-intel.txt +++ /dev/null @@ -1,7 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/xpu -torch -torchaudio -optimum[openvino] -setuptools -transformers -accelerate \ No newline at end of file diff --git a/backend/python/bark/requirements.txt b/backend/python/bark/requirements.txt deleted file mode 100644 index 275e0d8bc..000000000 --- a/backend/python/bark/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -bark==0.1.5 -grpcio==1.76.0 -protobuf -certifi \ No newline at end of file diff --git a/backend/python/bark/run.sh b/backend/python/bark/run.sh deleted file mode 100755 index 82b7b09ec..000000000 --- a/backend/python/bark/run.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash -backend_dir=$(dirname $0) -if [ -d $backend_dir/common ]; then - source $backend_dir/common/libbackend.sh -else - source $backend_dir/../common/libbackend.sh -fi - -startBackend $@ \ No newline at end of file diff --git a/backend/python/bark/test.py b/backend/python/bark/test.py deleted file mode 100644 index 4c9f3cf6b..000000000 --- a/backend/python/bark/test.py +++ /dev/null @@ -1,81 +0,0 @@ -""" -A test script to test the gRPC service -""" -import unittest -import subprocess -import time -import backend_pb2 -import backend_pb2_grpc - -import grpc - - -class TestBackendServicer(unittest.TestCase): - """ - TestBackendServicer is the class that tests the gRPC service - """ - def setUp(self): - """ - This method sets up the gRPC service by starting the server - """ - self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"]) - time.sleep(10) - - def tearDown(self) -> None: - """ - This method tears down the gRPC service by terminating the server - """ - self.service.terminate() - self.service.wait() - - def test_server_startup(self): - """ - This method tests if the server starts up successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.Health(backend_pb2.HealthMessage()) - self.assertEqual(response.message, b'OK') - except Exception as err: - print(err) - self.fail("Server failed to start") - finally: - self.tearDown() - - def test_load_model(self): - """ - This method tests if the model is loaded successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="v2/en_speaker_4")) - self.assertTrue(response.success) - self.assertEqual(response.message, "Model loaded successfully") - except Exception as err: - print(err) - self.fail("LoadModel service failed") - finally: - self.tearDown() - - def test_tts(self): - """ - This method tests if the embeddings are generated successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="v2/en_speaker_4")) - self.assertTrue(response.success) - tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story") - tts_response = stub.TTS(tts_request) - self.assertIsNotNone(tts_response) - except Exception as err: - print(err) - self.fail("TTS service failed") - finally: - self.tearDown() \ No newline at end of file diff --git a/backend/python/bark/test.sh b/backend/python/bark/test.sh deleted file mode 100755 index eb59f2aaf..000000000 --- a/backend/python/bark/test.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash -set -e - -backend_dir=$(dirname $0) -if [ -d $backend_dir/common ]; then - source $backend_dir/common/libbackend.sh -else - source $backend_dir/../common/libbackend.sh -fi - -runUnittests diff --git a/backend/python/coqui/README.md b/backend/python/coqui/README.md index e9c1931bb..e509d5466 100644 --- a/backend/python/coqui/README.md +++ b/backend/python/coqui/README.md @@ -1,4 +1,4 @@ -# Creating a separate environment for ttsbark project +# Creating a separate environment for coqui project ``` make coqui diff --git a/backend/python/coqui/backend.py b/backend/python/coqui/backend.py index df115adb5..65b37e063 100644 --- a/backend/python/coqui/backend.py +++ b/backend/python/coqui/backend.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -This is an extra gRPC server of LocalAI for Bark TTS +This is an extra gRPC server of LocalAI for Coqui TTS """ from concurrent import futures import time diff --git a/backend/python/faster-whisper/backend.py b/backend/python/faster-whisper/backend.py index 808f29238..df259420c 100755 --- a/backend/python/faster-whisper/backend.py +++ b/backend/python/faster-whisper/backend.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -This is an extra gRPC server of LocalAI for Bark TTS +This is an extra gRPC server of LocalAI for Faster Whisper TTS """ from concurrent import futures import time diff --git a/core/config/model_config.go b/core/config/model_config.go index 6841abf57..3f45e64ff 100644 --- a/core/config/model_config.go +++ b/core/config/model_config.go @@ -658,7 +658,7 @@ func (c *ModelConfig) GuessUsecases(u ModelConfigUsecase) bool { } } if (u & FLAG_TTS) == FLAG_TTS { - ttsBackends := []string{"bark-cpp", "piper", "transformers-musicgen", "kokoro"} + ttsBackends := []string{"piper", "transformers-musicgen", "kokoro"} if !slices.Contains(ttsBackends, c.Backend) { return false } diff --git a/docs/content/features/GPU-acceleration.md b/docs/content/features/GPU-acceleration.md index fe93be299..222f2a050 100644 --- a/docs/content/features/GPU-acceleration.md +++ b/docs/content/features/GPU-acceleration.md @@ -156,7 +156,6 @@ The devices in the following list have been tested with `hipblas` images running | diffusers | yes | Radeon VII (gfx906) | | piper | yes | Radeon VII (gfx906) | | whisper | no | none | -| bark | no | none | | coqui | no | none | | transformers | no | none | | sentencetransformers | no | none | diff --git a/docs/content/features/backends.md b/docs/content/features/backends.md index ef71a8707..50821e992 100644 --- a/docs/content/features/backends.md +++ b/docs/content/features/backends.md @@ -94,7 +94,7 @@ Your backend container should: For getting started, see the available backends in LocalAI here: https://github.com/mudler/LocalAI/tree/master/backend . - For Python based backends there is a template that can be used as starting point: https://github.com/mudler/LocalAI/tree/master/backend/python/common/template . -- For Golang based backends, you can see the `bark-cpp` backend as an example: https://github.com/mudler/LocalAI/tree/master/backend/go/bark-cpp +- For Golang based backends, you can see the `piper` backend as an example: https://github.com/mudler/LocalAI/tree/master/backend/go/piper - For C++ based backends, you can see the `llama-cpp` backend as an example: https://github.com/mudler/LocalAI/tree/master/backend/cpp/llama-cpp ### Publishing Your Backend diff --git a/docs/content/features/text-to-audio.md b/docs/content/features/text-to-audio.md index 132c56429..7f7a9bcf2 100644 --- a/docs/content/features/text-to-audio.md +++ b/docs/content/features/text-to-audio.md @@ -50,37 +50,6 @@ You can use the env variable COQUI_LANGUAGE to set the language used by the coqu You can also use config files to configure tts models (see section below on how to use config files). -### Bark - -[Bark](https://github.com/suno-ai/bark) allows to generate audio from text prompts. - -This is an extra backend - in the container is already available and there is nothing to do for the setup. - -#### Model setup - -There is nothing to be done for the model setup. You can already start to use bark. The models will be downloaded the first time you use the backend. - -#### Usage - -Use the `tts` endpoint by specifying the `bark` backend: - -``` -curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{ - "backend": "bark", - "input":"Hello!" - }' | aplay -``` - -To specify a voice from https://github.com/suno-ai/bark#-voice-presets ( https://suno-ai.notion.site/8b8e8749ed514b0cbf3f699013548683?v=bc67cff786b04b50b3ceb756fd05f68c ), use the `model` parameter: - -``` -curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{ - "backend": "bark", - "input":"Hello!", - "model": "v2/en_speaker_4" - }' | aplay -``` - ### Piper To install the `piper` audio models manually: diff --git a/docs/content/installation/build.md b/docs/content/installation/build.md index ec39416e0..ac0237b86 100644 --- a/docs/content/installation/build.md +++ b/docs/content/installation/build.md @@ -151,13 +151,11 @@ LocalAI have several backends available for installation in the backend gallery. Typically each backend include a Makefile which allow to package the backend. -In the LocalAI repository, for instance you can build `bark-cpp` by doing: +In the LocalAI repository, for instance you can build a backend by doing: ``` git clone https://github.com/go-skynet/LocalAI.git -make -C LocalAI/backend/go/bark-cpp build package - make -C LocalAI/backend/python/vllm ``` @@ -165,21 +163,21 @@ make -C LocalAI/backend/python/vllm Building with docker is simpler as abstracts away all the requirement, and focuses on building the final OCI images that are available in the gallery. This allows for instance also to build locally a backend and install it with LocalAI. You can refer to [Backends](https://localai.io/backends/) for general guidance on how to install and develop backends. -In the LocalAI repository, you can build `bark-cpp` by doing: +In the LocalAI repository, you can build a backend by doing: ``` git clone https://github.com/go-skynet/LocalAI.git -make docker-build-bark-cpp +make docker-build- ``` Note that `make` is only by convenience, in reality it just runs a simple `docker` command as: ```bash -docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:bark-cpp -f LocalAI/backend/Dockerfile.golang --build-arg BACKEND=bark-cpp . +docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend: -f LocalAI/backend/Dockerfile.golang --build-arg BACKEND= . ``` Note: - BUILD_TYPE can be either: `cublas`, `hipblas`, `sycl_f16`, `sycl_f32`, `metal`. -- BASE_IMAGE is tested on `ubuntu:22.04` (and defaults to it) and `quay.io/go-skynet/intel-oneapi-base:latest` for intel/sycl +- BASE_IMAGE is tested on `ubuntu:24.04` (and defaults to it) and `quay.io/go-skynet/intel-oneapi-base:latest` for intel/sycl diff --git a/docs/content/reference/binaries.md b/docs/content/reference/binaries.md index 224c72685..178f31127 100644 --- a/docs/content/reference/binaries.md +++ b/docs/content/reference/binaries.md @@ -35,7 +35,7 @@ Otherwise, here are the links to the binaries: {{% notice icon="⚡" context="warning" %}} Binaries do have limited support compared to container images: -- Python-based backends are not shipped with binaries (e.g. `bark`, `diffusers` or `transformers`) +- Python-based backends are not shipped with binaries (e.g. `diffusers` or `transformers`) - MacOS binaries and Linux-arm64 do not ship TTS nor `stablediffusion-cpp` backends - Linux binaries do not ship `stablediffusion-cpp` backend {{% /notice %}} diff --git a/docs/content/reference/compatibility-table.md b/docs/content/reference/compatibility-table.md index c2a19ae52..92e6224b8 100644 --- a/docs/content/reference/compatibility-table.md +++ b/docs/content/reference/compatibility-table.md @@ -32,8 +32,6 @@ LocalAI will attempt to automatically load models which are not explicitly confi | [whisper.cpp](https://github.com/ggml-org/whisper.cpp) | whisper | no | Audio transcription | no | no | CUDA 12/13, ROCm, Intel SYCL, Vulkan, CPU | | [faster-whisper](https://github.com/SYSTRAN/faster-whisper) | whisper | no | Audio transcription | no | no | CUDA 12/13, ROCm, Intel, CPU | | [piper](https://github.com/rhasspy/piper) ([binding](https://github.com/mudler/go-piper)) | Any piper onnx model | no | Text to voice | no | no | CPU | -| [bark](https://github.com/suno-ai/bark) | bark | no | Audio generation | no | no | CUDA 12/13, ROCm, Intel | -| [bark-cpp](https://github.com/PABannier/bark.cpp) | bark | no | Audio-Only | no | no | CUDA, Metal, CPU | | [coqui](https://github.com/idiap/coqui-ai-TTS) | Coqui TTS | no | Audio generation and Voice cloning | no | no | CUDA 12/13, ROCm, Intel, CPU | | [kokoro](https://github.com/hexgrad/kokoro) | Kokoro TTS | no | Text-to-speech | no | no | CUDA 12/13, ROCm, Intel, CPU | | [chatterbox](https://github.com/resemble-ai/chatterbox) | Chatterbox TTS | no | Text-to-speech | no | no | CUDA 12/13, CPU | diff --git a/gallery/index.yaml b/gallery/index.yaml index b0dfa13a1..fccea6eae 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -15300,36 +15300,6 @@ - filename: ggml-silero-v5.1.2.bin uri: https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v5.1.2.bin sha256: 29940d98d42b91fbd05ce489f3ecf7c72f0a42f027e4875919a28fb4c04ea2cf -- &bark - name: "bark-cpp" - icon: https://avatars.githubusercontent.com/u/99442120 - url: github:mudler/LocalAI/gallery/virtual.yaml@master - license: mit - urls: - - https://huggingface.co/suno/bark - - https://huggingface.co/Green-Sky/bark-ggml - description: | - Bark is a transformer-based text-to-audio model created by Suno. Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. The model can also produce nonverbal communications like laughing, sighing and crying. To support the research community, we are providing access to pretrained model checkpoints ready for inference. - tags: - - tts - - cpu - overrides: - backend: bark-cpp - parameters: - model: bark_weights-f16.bin - files: - - filename: bark_weights-f16.bin - uri: https://huggingface.co/Green-Sky/bark-ggml/resolve/main/bark_weights-f16.bin - sha256: ba6fc0e09531e6b8b5a9ef8862be2c9a52a631fc93f34a60b26b879cacf18f62 -- !!merge <<: *bark - name: "bark-cpp-small" - overrides: - parameters: - model: bark-small_weights-f16.bin - files: - - filename: bark-small_weights-f16.bin - uri: https://huggingface.co/Green-Sky/bark-ggml/resolve/main/bark-small_weights-f16.bin - sha256: de1ece17e8319537b3a7909baebbd28affab23c942d5d57e648d622af4e2feaa - !!merge <<: *mistral03 name: "tlacuilo-12b" urls: