mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 11:13:31 -05:00
Compare commits
36 Commits
v3.5.0
...
test-fix-u
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
30bf8d41d7 | ||
|
|
ebbcba342a | ||
|
|
0de75519dc | ||
|
|
37f5e4f5c1 | ||
|
|
ffa934b959 | ||
|
|
59311d8b1e | ||
|
|
d9e25af7b5 | ||
|
|
e4f8b63b40 | ||
|
|
1364ae9be6 | ||
|
|
cfd6a9150d | ||
|
|
cd352d0c5f | ||
|
|
8d47309695 | ||
|
|
5f6fc02a55 | ||
|
|
0b528458d8 | ||
|
|
caab380c5d | ||
|
|
8a3a362504 | ||
|
|
07238eb743 | ||
|
|
e905e90dd7 | ||
|
|
08432d49e5 | ||
|
|
e51e2aacb9 | ||
|
|
9c3d85fc28 | ||
|
|
007ca647a7 | ||
|
|
59af928379 | ||
|
|
dbc2bb561b | ||
|
|
c72c85dcac | ||
|
|
ef984901e6 | ||
|
|
9911ec84a3 | ||
|
|
1956681d4c | ||
|
|
326f6e5ccb | ||
|
|
302958efd6 | ||
|
|
3dc86b247d | ||
|
|
5ec724af06 | ||
|
|
1f1e156bf0 | ||
|
|
df625e366a | ||
|
|
9e6685ac9c | ||
|
|
90c818aa71 |
53
.github/workflows/backend.yml
vendored
53
.github/workflows/backend.yml
vendored
@@ -89,7 +89,7 @@ jobs:
|
||||
context: "./backend"
|
||||
- build-type: 'l4t'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-l4t-diffusers'
|
||||
@@ -111,6 +111,18 @@ jobs:
|
||||
backend: "diffusers"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cpu-chatterbox'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'true'
|
||||
backend: "chatterbox"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
# CUDA 11 additional backends
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
@@ -175,7 +187,7 @@ jobs:
|
||||
# CUDA 12 builds
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-rerankers'
|
||||
@@ -187,7 +199,7 @@ jobs:
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp'
|
||||
@@ -199,7 +211,7 @@ jobs:
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-vllm'
|
||||
@@ -211,7 +223,7 @@ jobs:
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-transformers'
|
||||
@@ -223,7 +235,7 @@ jobs:
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
|
||||
@@ -236,7 +248,7 @@ jobs:
|
||||
# CUDA 12 additional backends
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-kokoro'
|
||||
@@ -248,7 +260,7 @@ jobs:
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper'
|
||||
@@ -260,7 +272,7 @@ jobs:
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-coqui'
|
||||
@@ -272,7 +284,7 @@ jobs:
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-bark'
|
||||
@@ -284,7 +296,7 @@ jobs:
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-chatterbox'
|
||||
@@ -566,7 +578,7 @@ jobs:
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/arm64'
|
||||
skip-drivers: 'true'
|
||||
tag-latest: 'auto'
|
||||
@@ -603,7 +615,7 @@ jobs:
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
|
||||
@@ -663,7 +675,7 @@ jobs:
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/arm64'
|
||||
skip-drivers: 'true'
|
||||
tag-latest: 'auto'
|
||||
@@ -688,7 +700,7 @@ jobs:
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-whisper'
|
||||
@@ -748,7 +760,7 @@ jobs:
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/arm64'
|
||||
skip-drivers: 'true'
|
||||
tag-latest: 'auto'
|
||||
@@ -824,7 +836,7 @@ jobs:
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-rfdetr'
|
||||
@@ -860,7 +872,7 @@ jobs:
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/arm64'
|
||||
skip-drivers: 'true'
|
||||
tag-latest: 'auto'
|
||||
@@ -885,7 +897,7 @@ jobs:
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-exllama2'
|
||||
@@ -968,6 +980,9 @@ jobs:
|
||||
- backend: "mlx"
|
||||
tag-suffix: "-metal-darwin-arm64-mlx"
|
||||
build-type: "mps"
|
||||
- backend: "chatterbox"
|
||||
tag-suffix: "-metal-darwin-arm64-chatterbox"
|
||||
build-type: "mps"
|
||||
- backend: "mlx-vlm"
|
||||
tag-suffix: "-metal-darwin-arm64-mlx-vlm"
|
||||
build-type: "mps"
|
||||
|
||||
2
.github/workflows/image-pr.yml
vendored
2
.github/workflows/image-pr.yml
vendored
@@ -36,7 +36,7 @@ jobs:
|
||||
include:
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12'
|
||||
|
||||
4
.github/workflows/image.yml
vendored
4
.github/workflows/image.yml
vendored
@@ -91,7 +91,7 @@ jobs:
|
||||
aio: "-aio-gpu-nvidia-cuda-11"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12'
|
||||
@@ -144,7 +144,7 @@ jobs:
|
||||
include:
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-arm64'
|
||||
|
||||
2
.github/workflows/labeler.yml
vendored
2
.github/workflows/labeler.yml
vendored
@@ -9,4 +9,4 @@ jobs:
|
||||
pull-requests: write
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/labeler@v5
|
||||
- uses: actions/labeler@v6
|
||||
4
.github/workflows/release.yaml
vendored
4
.github/workflows/release.yaml
vendored
@@ -41,7 +41,7 @@ jobs:
|
||||
- name: Upload DMG to Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
files: ./dist/LocalAI-Launcher.dmg
|
||||
files: ./dist/LocalAI.dmg
|
||||
launcher-build-linux:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
@@ -61,4 +61,4 @@ jobs:
|
||||
- name: Upload Linux launcher artifacts
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
files: ./local-ai-launcher-linux.tar.xz
|
||||
files: ./local-ai-launcher-linux.tar.xz
|
||||
|
||||
2
.github/workflows/stalebot.yml
vendored
2
.github/workflows/stalebot.yml
vendored
@@ -10,7 +10,7 @@ jobs:
|
||||
stale:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9
|
||||
- uses: actions/stale@3a9db7e6a41a89f618792c92c0e97cc736e1b13f # v9
|
||||
with:
|
||||
stale-issue-message: 'This issue is stale because it has been open 90 days with no activity. Remove stale label or comment or this will be closed in 5 days.'
|
||||
stale-pr-message: 'This PR is stale because it has been open 90 days with no activity. Remove stale label or comment or this will be closed in 10 days.'
|
||||
|
||||
@@ -18,7 +18,7 @@ FROM requirements AS requirements-drivers
|
||||
|
||||
ARG BUILD_TYPE
|
||||
ARG CUDA_MAJOR_VERSION=12
|
||||
ARG CUDA_MINOR_VERSION=0
|
||||
ARG CUDA_MINOR_VERSION=8
|
||||
ARG SKIP_DRIVERS=false
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
7
Makefile
7
Makefile
@@ -170,7 +170,7 @@ prepare-e2e:
|
||||
mkdir -p $(TEST_DIR)
|
||||
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
|
||||
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
|
||||
docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 -t localai-tests .
|
||||
docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=8 -t localai-tests .
|
||||
|
||||
run-e2e-image:
|
||||
ls -liah $(abspath ./tests/e2e-fixtures)
|
||||
@@ -369,6 +369,9 @@ backends/kitten-tts: docker-build-kitten-tts docker-save-kitten-tts build
|
||||
backends/kokoro: docker-build-kokoro docker-save-kokoro build
|
||||
./local-ai backends install "ocifile://$(abspath ./backend-images/kokoro.tar)"
|
||||
|
||||
backends/chatterbox: docker-build-chatterbox docker-save-chatterbox build
|
||||
./local-ai backends install "ocifile://$(abspath ./backend-images/chatterbox.tar)"
|
||||
|
||||
backends/llama-cpp-darwin: build
|
||||
bash ./scripts/build/llama-cpp-darwin.sh
|
||||
./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"
|
||||
@@ -493,7 +496,7 @@ docker-build-bark:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:bark -f backend/Dockerfile.python --build-arg BACKEND=bark .
|
||||
|
||||
docker-build-chatterbox:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox .
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox ./backend
|
||||
|
||||
docker-build-exllama2:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:exllama2 -f backend/Dockerfile.python --build-arg BACKEND=exllama2 .
|
||||
|
||||
@@ -110,6 +110,12 @@ curl https://localai.io/install.sh | sh
|
||||
|
||||
For more installation options, see [Installer Options](https://localai.io/docs/advanced/installer/).
|
||||
|
||||
### macOS Download:
|
||||
|
||||
<a href="https://github.com/mudler/LocalAI/releases/latest/download/LocalAI.dmg">
|
||||
<img src="https://img.shields.io/badge/Download-macOS-blue?style=for-the-badge&logo=apple&logoColor=white" alt="Download LocalAI for macOS"/>
|
||||
</a>
|
||||
|
||||
Or run with docker:
|
||||
|
||||
### CPU only image:
|
||||
|
||||
@@ -111,7 +111,7 @@ docker build -f backend/Dockerfile.python \
|
||||
--build-arg BACKEND=transformers \
|
||||
--build-arg BUILD_TYPE=cublas12 \
|
||||
--build-arg CUDA_MAJOR_VERSION=12 \
|
||||
--build-arg CUDA_MINOR_VERSION=0 \
|
||||
--build-arg CUDA_MINOR_VERSION=8 \
|
||||
-t localai-backend-transformers .
|
||||
|
||||
# Build Go backend
|
||||
|
||||
@@ -276,6 +276,7 @@ message TranscriptRequest {
|
||||
string language = 3;
|
||||
uint32 threads = 4;
|
||||
bool translate = 5;
|
||||
bool diarize = 6;
|
||||
}
|
||||
|
||||
message TranscriptResult {
|
||||
@@ -305,7 +306,7 @@ message GenerateImageRequest {
|
||||
// Diffusers
|
||||
string EnableParameters = 10;
|
||||
int32 CLIPSkip = 11;
|
||||
|
||||
|
||||
// Reference images for models that support them (e.g., Flux Kontext)
|
||||
repeated string ref_images = 12;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
|
||||
LLAMA_VERSION?=3de008208b9b8a33f49f979097a99b4d59e6e521
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
LLAMA_VERSION?=fe4eb4f8ec25a1239b0923f1c7f87adf5730c3e5
|
||||
LLAMA_REPO?=https://github.com/JohannesGaessler/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
BUILD_TYPE?=
|
||||
|
||||
2
backend/go/stablediffusion-ggml/.gitignore
vendored
2
backend/go/stablediffusion-ggml/.gitignore
vendored
@@ -1,4 +1,6 @@
|
||||
package/
|
||||
sources/
|
||||
.cache/
|
||||
build/
|
||||
libgosd.so
|
||||
stablediffusion-ggml
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=4c6475f9176bf99271ccf5a2817b30a490b83db0
|
||||
STABLEDIFFUSION_GGML_VERSION?=b0179181069254389ccad604e44f17a2c25b4094
|
||||
|
||||
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
||||
|
||||
|
||||
@@ -44,7 +44,7 @@ const char* sample_method_str[] = {
|
||||
};
|
||||
|
||||
// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
|
||||
const char* schedule_str[] = {
|
||||
const char* schedulers[] = {
|
||||
"default",
|
||||
"discrete",
|
||||
"karras",
|
||||
@@ -54,6 +54,8 @@ const char* schedule_str[] = {
|
||||
};
|
||||
|
||||
sd_ctx_t* sd_c;
|
||||
// Moved from the context (load time) to generation time params
|
||||
scheduler_t scheduler = scheduler_t::DEFAULT;
|
||||
|
||||
sample_method_t sample_method;
|
||||
|
||||
@@ -105,7 +107,7 @@ int load_model(const char *model, char *model_path, char* options[], int threads
|
||||
const char *clip_g_path = "";
|
||||
const char *t5xxl_path = "";
|
||||
const char *vae_path = "";
|
||||
const char *scheduler = "";
|
||||
const char *scheduler_str = "";
|
||||
const char *sampler = "";
|
||||
char *lora_dir = model_path;
|
||||
bool lora_dir_allocated = false;
|
||||
@@ -133,7 +135,7 @@ int load_model(const char *model, char *model_path, char* options[], int threads
|
||||
vae_path = optval;
|
||||
}
|
||||
if (!strcmp(optname, "scheduler")) {
|
||||
scheduler = optval;
|
||||
scheduler_str = optval;
|
||||
}
|
||||
if (!strcmp(optname, "sampler")) {
|
||||
sampler = optval;
|
||||
@@ -170,22 +172,13 @@ int load_model(const char *model, char *model_path, char* options[], int threads
|
||||
}
|
||||
sample_method = (sample_method_t)sample_method_found;
|
||||
|
||||
int schedule_found = -1;
|
||||
for (int d = 0; d < SCHEDULE_COUNT; d++) {
|
||||
if (!strcmp(scheduler, schedule_str[d])) {
|
||||
schedule_found = d;
|
||||
fprintf (stderr, "Found scheduler: %s\n", scheduler);
|
||||
|
||||
if (!strcmp(scheduler_str, schedulers[d])) {
|
||||
scheduler = (scheduler_t)d;
|
||||
fprintf (stderr, "Found scheduler: %s\n", scheduler_str);
|
||||
}
|
||||
}
|
||||
|
||||
if (schedule_found == -1) {
|
||||
fprintf (stderr, "Invalid scheduler! using DEFAULT\n");
|
||||
schedule_found = DEFAULT;
|
||||
}
|
||||
|
||||
schedule_t schedule = (schedule_t)schedule_found;
|
||||
|
||||
fprintf (stderr, "Creating context\n");
|
||||
sd_ctx_params_t ctx_params;
|
||||
sd_ctx_params_init(&ctx_params);
|
||||
@@ -205,7 +198,6 @@ int load_model(const char *model, char *model_path, char* options[], int threads
|
||||
ctx_params.free_params_immediately = false;
|
||||
ctx_params.n_threads = threads;
|
||||
ctx_params.rng_type = STD_DEFAULT_RNG;
|
||||
ctx_params.schedule = schedule;
|
||||
sd_ctx_t* sd_ctx = new_sd_ctx(&ctx_params);
|
||||
|
||||
if (sd_ctx == NULL) {
|
||||
@@ -241,15 +233,16 @@ int gen_image(char *text, char *negativeText, int width, int height, int steps,
|
||||
|
||||
p.prompt = text;
|
||||
p.negative_prompt = negativeText;
|
||||
p.guidance.txt_cfg = cfg_scale;
|
||||
p.guidance.slg.layers = skip_layers.data();
|
||||
p.guidance.slg.layer_count = skip_layers.size();
|
||||
p.sample_params.guidance.txt_cfg = cfg_scale;
|
||||
p.sample_params.guidance.slg.layers = skip_layers.data();
|
||||
p.sample_params.guidance.slg.layer_count = skip_layers.size();
|
||||
p.width = width;
|
||||
p.height = height;
|
||||
p.sample_method = sample_method;
|
||||
p.sample_steps = steps;
|
||||
p.sample_params.sample_method = sample_method;
|
||||
p.sample_params.sample_steps = steps;
|
||||
p.seed = seed;
|
||||
p.input_id_images_path = "";
|
||||
p.sample_params.scheduler = scheduler;
|
||||
|
||||
// Handle input image for img2img
|
||||
bool has_input_image = (src_image != NULL && strlen(src_image) > 0);
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
||||
WHISPER_CPP_VERSION?=7745fcf32846006128f16de429cfe1677c963b30
|
||||
WHISPER_CPP_VERSION?=edea8a9c3cf0eb7676dcdb604991eb2f95c3d984
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
|
||||
@@ -7,34 +7,35 @@ static struct whisper_vad_context *vctx;
|
||||
static struct whisper_context *ctx;
|
||||
static std::vector<float> flat_segs;
|
||||
|
||||
static void ggml_log_cb(enum ggml_log_level level, const char* log, void* data) {
|
||||
const char* level_str;
|
||||
static void ggml_log_cb(enum ggml_log_level level, const char *log,
|
||||
void *data) {
|
||||
const char *level_str;
|
||||
|
||||
if (!log) {
|
||||
return;
|
||||
}
|
||||
if (!log) {
|
||||
return;
|
||||
}
|
||||
|
||||
switch (level) {
|
||||
case GGML_LOG_LEVEL_DEBUG:
|
||||
level_str = "DEBUG";
|
||||
break;
|
||||
case GGML_LOG_LEVEL_INFO:
|
||||
level_str = "INFO";
|
||||
break;
|
||||
case GGML_LOG_LEVEL_WARN:
|
||||
level_str = "WARN";
|
||||
break;
|
||||
case GGML_LOG_LEVEL_ERROR:
|
||||
level_str = "ERROR";
|
||||
break;
|
||||
default: /* Potential future-proofing */
|
||||
level_str = "?????";
|
||||
break;
|
||||
}
|
||||
switch (level) {
|
||||
case GGML_LOG_LEVEL_DEBUG:
|
||||
level_str = "DEBUG";
|
||||
break;
|
||||
case GGML_LOG_LEVEL_INFO:
|
||||
level_str = "INFO";
|
||||
break;
|
||||
case GGML_LOG_LEVEL_WARN:
|
||||
level_str = "WARN";
|
||||
break;
|
||||
case GGML_LOG_LEVEL_ERROR:
|
||||
level_str = "ERROR";
|
||||
break;
|
||||
default: /* Potential future-proofing */
|
||||
level_str = "?????";
|
||||
break;
|
||||
}
|
||||
|
||||
fprintf(stderr, "[%-5s] ", level_str);
|
||||
fputs(log, stderr);
|
||||
fflush(stderr);
|
||||
fprintf(stderr, "[%-5s] ", level_str);
|
||||
fputs(log, stderr);
|
||||
fflush(stderr);
|
||||
}
|
||||
|
||||
int load_model(const char *const model_path) {
|
||||
@@ -105,8 +106,8 @@ int vad(float pcmf32[], size_t pcmf32_len, float **segs_out,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int transcribe(uint32_t threads, char *lang, bool translate, float pcmf32[],
|
||||
size_t pcmf32_len, size_t *segs_out_len) {
|
||||
int transcribe(uint32_t threads, char *lang, bool translate, bool tdrz,
|
||||
float pcmf32[], size_t pcmf32_len, size_t *segs_out_len) {
|
||||
whisper_full_params wparams =
|
||||
whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
|
||||
|
||||
@@ -120,6 +121,9 @@ int transcribe(uint32_t threads, char *lang, bool translate, float pcmf32[],
|
||||
wparams.translate = translate;
|
||||
wparams.debug_mode = true;
|
||||
wparams.print_progress = true;
|
||||
wparams.tdrz_enable = tdrz;
|
||||
|
||||
fprintf(stderr, "info: Enable tdrz: %d\n", tdrz);
|
||||
|
||||
if (whisper_full(ctx, wparams, pcmf32, pcmf32_len)) {
|
||||
fprintf(stderr, "error: transcription failed\n");
|
||||
@@ -144,3 +148,7 @@ int n_tokens(int i) { return whisper_full_n_tokens(ctx, i); }
|
||||
int32_t get_token_id(int i, int j) {
|
||||
return whisper_full_get_token_id(ctx, i, j);
|
||||
}
|
||||
|
||||
bool get_segment_speaker_turn_next(int i) {
|
||||
return whisper_full_get_segment_speaker_turn_next(ctx, i);
|
||||
}
|
||||
|
||||
@@ -14,15 +14,16 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
CppLoadModel func(modelPath string) int
|
||||
CppLoadModelVAD func(modelPath string) int
|
||||
CppVAD func(pcmf32 []float32, pcmf32Size uintptr, segsOut unsafe.Pointer, segsOutLen unsafe.Pointer) int
|
||||
CppTranscribe func(threads uint32, lang string, translate bool, pcmf32 []float32, pcmf32Len uintptr, segsOutLen unsafe.Pointer) int
|
||||
CppGetSegmentText func(i int) string
|
||||
CppGetSegmentStart func(i int) int64
|
||||
CppGetSegmentEnd func(i int) int64
|
||||
CppNTokens func(i int) int
|
||||
CppGetTokenID func(i int, j int) int
|
||||
CppLoadModel func(modelPath string) int
|
||||
CppLoadModelVAD func(modelPath string) int
|
||||
CppVAD func(pcmf32 []float32, pcmf32Size uintptr, segsOut unsafe.Pointer, segsOutLen unsafe.Pointer) int
|
||||
CppTranscribe func(threads uint32, lang string, translate bool, diarize bool, pcmf32 []float32, pcmf32Len uintptr, segsOutLen unsafe.Pointer) int
|
||||
CppGetSegmentText func(i int) string
|
||||
CppGetSegmentStart func(i int) int64
|
||||
CppGetSegmentEnd func(i int) int64
|
||||
CppNTokens func(i int) int
|
||||
CppGetTokenID func(i int, j int) int
|
||||
CppGetSegmentSpeakerTurnNext func(i int) bool
|
||||
)
|
||||
|
||||
type Whisper struct {
|
||||
@@ -122,7 +123,7 @@ func (w *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (pb.TranscriptR
|
||||
segsLen := uintptr(0xdeadbeef)
|
||||
segsLenPtr := unsafe.Pointer(&segsLen)
|
||||
|
||||
if ret := CppTranscribe(opts.Threads, opts.Language, opts.Translate, data, uintptr(len(data)), segsLenPtr); ret != 0 {
|
||||
if ret := CppTranscribe(opts.Threads, opts.Language, opts.Translate, opts.Diarize, data, uintptr(len(data)), segsLenPtr); ret != 0 {
|
||||
return pb.TranscriptResult{}, fmt.Errorf("Failed Transcribe")
|
||||
}
|
||||
|
||||
@@ -134,6 +135,10 @@ func (w *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (pb.TranscriptR
|
||||
txt := strings.Clone(CppGetSegmentText(i))
|
||||
tokens := make([]int32, CppNTokens(i))
|
||||
|
||||
if opts.Diarize && CppGetSegmentSpeakerTurnNext(i) {
|
||||
txt += " [SPEAKER_TURN]"
|
||||
}
|
||||
|
||||
for j := range tokens {
|
||||
tokens[j] = int32(CppGetTokenID(i, j))
|
||||
}
|
||||
@@ -151,6 +156,6 @@ func (w *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (pb.TranscriptR
|
||||
|
||||
return pb.TranscriptResult{
|
||||
Segments: segments,
|
||||
Text: strings.TrimSpace(text),
|
||||
Text: strings.TrimSpace(text),
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -6,11 +6,12 @@ int load_model(const char *const model_path);
|
||||
int load_model_vad(const char *const model_path);
|
||||
int vad(float pcmf32[], size_t pcmf32_size, float **segs_out,
|
||||
size_t *segs_out_len);
|
||||
int transcribe(uint32_t threads, char *lang, bool translate, float pcmf32[],
|
||||
size_t pcmf32_len, size_t *segs_out_len);
|
||||
int transcribe(uint32_t threads, char *lang, bool translate, bool tdrz,
|
||||
float pcmf32[], size_t pcmf32_len, size_t *segs_out_len);
|
||||
const char *get_segment_text(int i);
|
||||
int64_t get_segment_t0(int i);
|
||||
int64_t get_segment_t1(int i);
|
||||
int n_tokens(int i);
|
||||
int32_t get_token_id(int i, int j);
|
||||
bool get_segment_speaker_turn_next(int i);
|
||||
}
|
||||
|
||||
@@ -33,6 +33,7 @@ func main() {
|
||||
{&CppGetSegmentEnd, "get_segment_t1"},
|
||||
{&CppNTokens, "n_tokens"},
|
||||
{&CppGetTokenID, "get_token_id"},
|
||||
{&CppGetSegmentSpeakerTurnNext, "get_segment_speaker_turn_next"},
|
||||
}
|
||||
|
||||
for _, lf := range libFuncs {
|
||||
|
||||
@@ -350,6 +350,8 @@
|
||||
alias: "chatterbox"
|
||||
capabilities:
|
||||
nvidia: "cuda12-chatterbox"
|
||||
metal: "metal-chatterbox"
|
||||
default: "cpu-chatterbox"
|
||||
- &piper
|
||||
name: "piper"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-piper"
|
||||
@@ -1223,6 +1225,28 @@
|
||||
name: "chatterbox-development"
|
||||
capabilities:
|
||||
nvidia: "cuda12-chatterbox-development"
|
||||
metal: "metal-chatterbox-development"
|
||||
default: "cpu-chatterbox-development"
|
||||
- !!merge <<: *chatterbox
|
||||
name: "cpu-chatterbox"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-chatterbox"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-cpu-chatterbox
|
||||
- !!merge <<: *chatterbox
|
||||
name: "cpu-chatterbox-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-chatterbox"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-cpu-chatterbox
|
||||
- !!merge <<: *chatterbox
|
||||
name: "metal-chatterbox"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-chatterbox"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-metal-darwin-arm64-chatterbox
|
||||
- !!merge <<: *chatterbox
|
||||
name: "metal-chatterbox-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-chatterbox"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-metal-darwin-arm64-chatterbox
|
||||
- !!merge <<: *chatterbox
|
||||
name: "cuda12-chatterbox-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-chatterbox"
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||
accelerate
|
||||
torch==2.6.0
|
||||
torchaudio==2.6.0
|
||||
transformers==4.46.3
|
||||
chatterbox-tts
|
||||
chatterbox-tts==0.1.2
|
||||
@@ -2,5 +2,5 @@
|
||||
torch==2.6.0+cu118
|
||||
torchaudio==2.6.0+cu118
|
||||
transformers==4.46.3
|
||||
chatterbox-tts
|
||||
chatterbox-tts==0.1.2
|
||||
accelerate
|
||||
@@ -1,5 +1,5 @@
|
||||
torch==2.6.0
|
||||
torchaudio==2.6.0
|
||||
transformers==4.46.3
|
||||
chatterbox-tts
|
||||
chatterbox-tts==0.1.2
|
||||
accelerate
|
||||
|
||||
@@ -2,5 +2,5 @@
|
||||
torch==2.6.0+rocm6.1
|
||||
torchaudio==2.6.0+rocm6.1
|
||||
transformers==4.46.3
|
||||
chatterbox-tts
|
||||
chatterbox-tts==0.1.2
|
||||
accelerate
|
||||
|
||||
@@ -3,9 +3,8 @@ intel-extension-for-pytorch==2.3.110+xpu
|
||||
torch==2.3.1+cxx11.abi
|
||||
torchaudio==2.3.1+cxx11.abi
|
||||
transformers==4.46.3
|
||||
chatterbox-tts
|
||||
chatterbox-tts==0.1.2
|
||||
accelerate
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
optimum[openvino]
|
||||
setuptools
|
||||
accelerate
|
||||
setuptools
|
||||
@@ -31,6 +31,7 @@ type Config struct {
|
||||
StartOnBoot bool `json:"start_on_boot"`
|
||||
LogLevel string `json:"log_level"`
|
||||
EnvironmentVars map[string]string `json:"environment_vars"`
|
||||
ShowWelcome *bool `json:"show_welcome"`
|
||||
}
|
||||
|
||||
// Launcher represents the main launcher application
|
||||
@@ -148,6 +149,13 @@ func (l *Launcher) Initialize() error {
|
||||
log.Printf("Initializing empty EnvironmentVars map")
|
||||
}
|
||||
|
||||
// Set default welcome window preference
|
||||
if l.config.ShowWelcome == nil {
|
||||
true := true
|
||||
l.config.ShowWelcome = &true
|
||||
log.Printf("Setting default ShowWelcome: true")
|
||||
}
|
||||
|
||||
// Create directories
|
||||
os.MkdirAll(l.config.ModelsPath, 0755)
|
||||
os.MkdirAll(l.config.BackendsPath, 0755)
|
||||
|
||||
@@ -48,6 +48,14 @@ var _ = Describe("Launcher", func() {
|
||||
config := launcherInstance.GetConfig()
|
||||
Expect(config.ModelsPath).ToNot(BeEmpty())
|
||||
Expect(config.BackendsPath).ToNot(BeEmpty())
|
||||
})
|
||||
|
||||
It("should set default ShowWelcome to true", func() {
|
||||
err := launcherInstance.Initialize()
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
config := launcherInstance.GetConfig()
|
||||
Expect(config.ShowWelcome).To(BeTrue())
|
||||
Expect(config.Address).To(Equal("127.0.0.1:8080"))
|
||||
Expect(config.LogLevel).To(Equal("info"))
|
||||
})
|
||||
|
||||
@@ -177,6 +177,9 @@ func (sm *SystrayManager) recreateMenu() {
|
||||
fyne.NewMenuItem("Settings", func() {
|
||||
sm.showSettings()
|
||||
}),
|
||||
fyne.NewMenuItem("Show Welcome Window", func() {
|
||||
sm.showWelcomeWindow()
|
||||
}),
|
||||
fyne.NewMenuItem("Open Data Folder", func() {
|
||||
sm.openDataFolder()
|
||||
}),
|
||||
@@ -243,6 +246,13 @@ func (sm *SystrayManager) showSettings() {
|
||||
sm.window.RequestFocus()
|
||||
}
|
||||
|
||||
// showWelcomeWindow shows the welcome window
|
||||
func (sm *SystrayManager) showWelcomeWindow() {
|
||||
if sm.launcher.GetUI() != nil {
|
||||
sm.launcher.GetUI().ShowWelcomeWindow()
|
||||
}
|
||||
}
|
||||
|
||||
// openDataFolder opens the data folder in file manager
|
||||
func (sm *SystrayManager) openDataFolder() {
|
||||
dataPath := sm.launcher.GetDataPath()
|
||||
|
||||
@@ -675,3 +675,121 @@ func (ui *LauncherUI) UpdateRunningState(isRunning bool) {
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// ShowWelcomeWindow displays the welcome window with helpful information
|
||||
func (ui *LauncherUI) ShowWelcomeWindow() {
|
||||
if ui.launcher == nil || ui.launcher.window == nil {
|
||||
log.Printf("Cannot show welcome window: launcher or window is nil")
|
||||
return
|
||||
}
|
||||
|
||||
fyne.DoAndWait(func() {
|
||||
// Create welcome window
|
||||
welcomeWindow := ui.launcher.app.NewWindow("Welcome to LocalAI Launcher")
|
||||
welcomeWindow.Resize(fyne.NewSize(600, 500))
|
||||
welcomeWindow.CenterOnScreen()
|
||||
welcomeWindow.SetCloseIntercept(func() {
|
||||
welcomeWindow.Close()
|
||||
})
|
||||
|
||||
// Title
|
||||
titleLabel := widget.NewLabel("Welcome to LocalAI Launcher!")
|
||||
titleLabel.TextStyle = fyne.TextStyle{Bold: true}
|
||||
titleLabel.Alignment = fyne.TextAlignCenter
|
||||
|
||||
// Welcome message
|
||||
welcomeText := `LocalAI Launcher makes it easy to run LocalAI on your system.
|
||||
|
||||
What you can do:
|
||||
• Start and stop LocalAI server
|
||||
• Configure models and backends paths
|
||||
• Set environment variables
|
||||
• Check for updates automatically
|
||||
• Access LocalAI WebUI when running
|
||||
|
||||
Getting Started:
|
||||
1. Configure your models and backends paths
|
||||
2. Click "Start LocalAI" to begin
|
||||
3. Use "Open WebUI" to access the interface
|
||||
4. Check the system tray for quick access`
|
||||
|
||||
welcomeLabel := widget.NewLabel(welcomeText)
|
||||
welcomeLabel.Wrapping = fyne.TextWrapWord
|
||||
|
||||
// Useful links section
|
||||
linksTitle := widget.NewLabel("Useful Links:")
|
||||
linksTitle.TextStyle = fyne.TextStyle{Bold: true}
|
||||
|
||||
// Create link buttons
|
||||
docsButton := widget.NewButton("📚 Documentation", func() {
|
||||
ui.openURL("https://localai.io/docs/")
|
||||
})
|
||||
|
||||
githubButton := widget.NewButton("🐙 GitHub Repository", func() {
|
||||
ui.openURL("https://github.com/mudler/LocalAI")
|
||||
})
|
||||
|
||||
modelsButton := widget.NewButton("🤖 Model Gallery", func() {
|
||||
ui.openURL("https://localai.io/models/")
|
||||
})
|
||||
|
||||
communityButton := widget.NewButton("💬 Community", func() {
|
||||
ui.openURL("https://discord.gg/XgwjKptP7Z")
|
||||
})
|
||||
|
||||
// Checkbox to disable welcome window
|
||||
dontShowAgainCheck := widget.NewCheck("Don't show this welcome window again", func(checked bool) {
|
||||
if ui.launcher != nil {
|
||||
config := ui.launcher.GetConfig()
|
||||
v := !checked
|
||||
config.ShowWelcome = &v
|
||||
ui.launcher.SetConfig(config)
|
||||
}
|
||||
})
|
||||
|
||||
config := ui.launcher.GetConfig()
|
||||
if config.ShowWelcome != nil {
|
||||
dontShowAgainCheck.SetChecked(*config.ShowWelcome)
|
||||
}
|
||||
|
||||
// Close button
|
||||
closeButton := widget.NewButton("Get Started", func() {
|
||||
welcomeWindow.Close()
|
||||
})
|
||||
closeButton.Importance = widget.HighImportance
|
||||
|
||||
// Layout
|
||||
linksContainer := container.NewVBox(
|
||||
linksTitle,
|
||||
docsButton,
|
||||
githubButton,
|
||||
modelsButton,
|
||||
communityButton,
|
||||
)
|
||||
|
||||
content := container.NewVBox(
|
||||
titleLabel,
|
||||
widget.NewSeparator(),
|
||||
welcomeLabel,
|
||||
widget.NewSeparator(),
|
||||
linksContainer,
|
||||
widget.NewSeparator(),
|
||||
dontShowAgainCheck,
|
||||
widget.NewSeparator(),
|
||||
closeButton,
|
||||
)
|
||||
|
||||
welcomeWindow.SetContent(content)
|
||||
welcomeWindow.Show()
|
||||
})
|
||||
}
|
||||
|
||||
// openURL opens a URL in the default browser
|
||||
func (ui *LauncherUI) openURL(urlString string) {
|
||||
parsedURL, err := url.Parse(urlString)
|
||||
if err != nil {
|
||||
log.Printf("Failed to parse URL %s: %v", urlString, err)
|
||||
return
|
||||
}
|
||||
fyne.CurrentApp().OpenURL(parsedURL)
|
||||
}
|
||||
|
||||
@@ -55,6 +55,12 @@ func main() {
|
||||
// Load configuration into UI
|
||||
launcher.GetUI().LoadConfiguration()
|
||||
launcher.GetUI().UpdateStatus("Ready")
|
||||
|
||||
// Show welcome window if configured to do so
|
||||
config := launcher.GetConfig()
|
||||
if *config.ShowWelcome {
|
||||
launcher.GetUI().ShowWelcomeWindow()
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
|
||||
@@ -2,9 +2,7 @@ package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
|
||||
"github.com/alecthomas/kong"
|
||||
"github.com/joho/godotenv"
|
||||
@@ -24,15 +22,7 @@ func main() {
|
||||
log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr})
|
||||
zerolog.SetGlobalLevel(zerolog.InfoLevel)
|
||||
|
||||
// Catch signals from the OS requesting us to exit
|
||||
go func() {
|
||||
c := make(chan os.Signal, 1) // we need to reserve to buffer size 1, so the notifier are not blocked
|
||||
signal.Notify(c, os.Interrupt, syscall.SIGTERM)
|
||||
<-c
|
||||
os.Exit(1)
|
||||
}()
|
||||
|
||||
// handle loading environment variabled from .env files
|
||||
// handle loading environment variables from .env files
|
||||
envFiles := []string{".env", "localai.env"}
|
||||
homeDir, err := os.UserHomeDir()
|
||||
if err == nil {
|
||||
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func ModelTranscription(audio, language string, translate bool, ml *model.ModelLoader, modelConfig config.ModelConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
|
||||
func ModelTranscription(audio, language string, translate bool, diarize bool, ml *model.ModelLoader, modelConfig config.ModelConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
|
||||
|
||||
if modelConfig.Backend == "" {
|
||||
modelConfig.Backend = model.WhisperBackend
|
||||
@@ -34,6 +34,7 @@ func ModelTranscription(audio, language string, translate bool, ml *model.ModelL
|
||||
Dst: audio,
|
||||
Language: language,
|
||||
Translate: translate,
|
||||
Diarize: diarize,
|
||||
Threads: uint32(*modelConfig.Threads),
|
||||
})
|
||||
if err != nil {
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"time"
|
||||
|
||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||
"github.com/mudler/LocalAI/core/cli/signals"
|
||||
"github.com/mudler/LocalAI/core/explorer"
|
||||
"github.com/mudler/LocalAI/core/http"
|
||||
)
|
||||
@@ -45,5 +46,7 @@ func (e *ExplorerCMD) Run(ctx *cliContext.Context) error {
|
||||
|
||||
appHTTP := http.Explorer(db)
|
||||
|
||||
signals.Handler(nil)
|
||||
|
||||
return appHTTP.Listen(e.Address)
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
|
||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||
"github.com/mudler/LocalAI/core/cli/signals"
|
||||
"github.com/mudler/LocalAI/core/p2p"
|
||||
)
|
||||
|
||||
@@ -19,5 +20,7 @@ func (f *FederatedCLI) Run(ctx *cliContext.Context) error {
|
||||
|
||||
fs := p2p.NewFederatedServer(f.Address, p2p.NetworkID(f.Peer2PeerNetworkID, p2p.FederatedID), f.Peer2PeerToken, !f.RandomWorker, f.TargetWorker)
|
||||
|
||||
signals.Handler(nil)
|
||||
|
||||
return fs.Start(context.Background())
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"github.com/mudler/LocalAI/core/application"
|
||||
cli_api "github.com/mudler/LocalAI/core/cli/api"
|
||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||
"github.com/mudler/LocalAI/core/cli/signals"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http"
|
||||
"github.com/mudler/LocalAI/core/p2p"
|
||||
@@ -224,5 +225,8 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// Catch signals from the OS requesting us to exit, and stop all backends
|
||||
signals.Handler(app.ModelLoader())
|
||||
|
||||
return appHTTP.Listen(r.Address)
|
||||
}
|
||||
|
||||
25
core/cli/signals/signals.go
Normal file
25
core/cli/signals/signals.go
Normal file
@@ -0,0 +1,25 @@
|
||||
package signals
|
||||
|
||||
import (
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
func Handler(m *model.ModelLoader) {
|
||||
// Catch signals from the OS requesting us to exit, and stop all backends
|
||||
go func(m *model.ModelLoader) {
|
||||
c := make(chan os.Signal, 1) // we need to reserve to buffer size 1, so the notifier are not blocked
|
||||
signal.Notify(c, os.Interrupt, syscall.SIGTERM, syscall.SIGINT)
|
||||
<-c
|
||||
if m != nil {
|
||||
if err := m.StopAllGRPC(); err != nil {
|
||||
log.Error().Err(err).Msg("error while stopping all grpc backends")
|
||||
}
|
||||
}
|
||||
os.Exit(1)
|
||||
}(m)
|
||||
}
|
||||
@@ -20,6 +20,7 @@ type TranscriptCMD struct {
|
||||
Model string `short:"m" required:"" help:"Model name to run the TTS"`
|
||||
Language string `short:"l" help:"Language of the audio file"`
|
||||
Translate bool `short:"c" help:"Translate the transcription to english"`
|
||||
Diarize bool `short:"d" help:"Mark speaker turns"`
|
||||
Threads int `short:"t" default:"1" help:"Number of threads used for parallel computation"`
|
||||
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
|
||||
}
|
||||
@@ -56,7 +57,7 @@ func (t *TranscriptCMD) Run(ctx *cliContext.Context) error {
|
||||
}
|
||||
}()
|
||||
|
||||
tr, err := backend.ModelTranscription(t.Filename, t.Language, t.Translate, ml, c, opts)
|
||||
tr, err := backend.ModelTranscription(t.Filename, t.Language, t.Translate, t.Diarize, ml, c, opts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package worker
|
||||
|
||||
type WorkerFlags struct {
|
||||
BackendsPath string `env:"LOCALAI_BACKENDS_PATH,BACKENDS_PATH" type:"path" default:"${basepath}/backends" help:"Path containing backends used for inferencing" group:"backends"`
|
||||
BackendGalleries string `env:"LOCALAI_BACKEND_GALLERIES,BACKEND_GALLERIES" help:"JSON list of backend galleries" group:"backends" default:"${backends}"`
|
||||
BackendsSystemPath string `env:"LOCALAI_BACKENDS_SYSTEM_PATH,BACKEND_SYSTEM_PATH" type:"path" default:"/usr/share/localai/backends" help:"Path containing system backends used for inferencing" group:"backends"`
|
||||
ExtraLLamaCPPArgs string `name:"llama-cpp-args" env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"`
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package worker
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
@@ -9,7 +10,10 @@ import (
|
||||
"syscall"
|
||||
|
||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/cli/signals"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/system"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
@@ -20,9 +24,10 @@ type LLamaCPP struct {
|
||||
|
||||
const (
|
||||
llamaCPPRPCBinaryName = "llama-cpp-rpc-server"
|
||||
llamaCPPGalleryName = "llama-cpp"
|
||||
)
|
||||
|
||||
func findLLamaCPPBackend(systemState *system.SystemState) (string, error) {
|
||||
func findLLamaCPPBackend(galleries string, systemState *system.SystemState) (string, error) {
|
||||
backends, err := gallery.ListSystemBackends(systemState)
|
||||
if err != nil {
|
||||
log.Warn().Msgf("Failed listing system backends: %s", err)
|
||||
@@ -30,9 +35,19 @@ func findLLamaCPPBackend(systemState *system.SystemState) (string, error) {
|
||||
}
|
||||
log.Debug().Msgf("System backends: %v", backends)
|
||||
|
||||
backend, ok := backends.Get("llama-cpp")
|
||||
backend, ok := backends.Get(llamaCPPGalleryName)
|
||||
if !ok {
|
||||
return "", errors.New("llama-cpp backend not found, install it first")
|
||||
ml := model.NewModelLoader(systemState, true)
|
||||
var gals []config.Gallery
|
||||
if err := json.Unmarshal([]byte(galleries), &gals); err != nil {
|
||||
log.Error().Err(err).Msg("failed loading galleries")
|
||||
return "", err
|
||||
}
|
||||
err := gallery.InstallBackendFromGallery(gals, systemState, ml, llamaCPPGalleryName, nil, true)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("llama-cpp backend not found, failed to install it")
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
backendPath := filepath.Dir(backend.RunFile)
|
||||
|
||||
@@ -61,7 +76,7 @@ func (r *LLamaCPP) Run(ctx *cliContext.Context) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
grpcProcess, err := findLLamaCPPBackend(systemState)
|
||||
grpcProcess, err := findLLamaCPPBackend(r.BackendGalleries, systemState)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -69,6 +84,9 @@ func (r *LLamaCPP) Run(ctx *cliContext.Context) error {
|
||||
args := strings.Split(r.ExtraLLamaCPPArgs, " ")
|
||||
|
||||
args = append([]string{grpcProcess}, args...)
|
||||
|
||||
signals.Handler(nil)
|
||||
|
||||
return syscall.Exec(
|
||||
grpcProcess,
|
||||
args,
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"time"
|
||||
|
||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||
"github.com/mudler/LocalAI/core/cli/signals"
|
||||
"github.com/mudler/LocalAI/core/p2p"
|
||||
"github.com/mudler/LocalAI/pkg/system"
|
||||
"github.com/phayes/freeport"
|
||||
@@ -69,7 +70,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
|
||||
for {
|
||||
log.Info().Msgf("Starting llama-cpp-rpc-server on '%s:%d'", address, port)
|
||||
|
||||
grpcProcess, err := findLLamaCPPBackend(systemState)
|
||||
grpcProcess, err := findLLamaCPPBackend(r.BackendGalleries, systemState)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("Failed to find llama-cpp-rpc-server")
|
||||
return
|
||||
@@ -106,6 +107,8 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
|
||||
}
|
||||
}
|
||||
|
||||
signals.Handler(nil)
|
||||
|
||||
for {
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
// Package gallery provides installation and registration utilities for LocalAI backends,
|
||||
// including meta-backend resolution based on system capabilities.
|
||||
package gallery
|
||||
|
||||
import (
|
||||
@@ -5,6 +7,7 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
@@ -20,6 +23,12 @@ const (
|
||||
runFile = "run.sh"
|
||||
)
|
||||
|
||||
// backendCandidate represents an installed concrete backend option for a given alias
|
||||
type backendCandidate struct {
|
||||
name string
|
||||
runFile string
|
||||
}
|
||||
|
||||
// readBackendMetadata reads the metadata JSON file for a backend
|
||||
func readBackendMetadata(backendPath string) (*BackendMetadata, error) {
|
||||
metadataPath := filepath.Join(backendPath, metadataFile)
|
||||
@@ -58,7 +67,7 @@ func writeBackendMetadata(backendPath string, metadata *BackendMetadata) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Installs a model from the gallery
|
||||
// InstallBackendFromGallery installs a backend from the gallery.
|
||||
func InstallBackendFromGallery(galleries []config.Gallery, systemState *system.SystemState, modelLoader *model.ModelLoader, name string, downloadStatus func(string, string, string, float64), force bool) error {
|
||||
if !force {
|
||||
// check if we already have the backend installed
|
||||
@@ -282,23 +291,18 @@ func (b SystemBackends) GetAll() []SystemBackend {
|
||||
}
|
||||
|
||||
func ListSystemBackends(systemState *system.SystemState) (SystemBackends, error) {
|
||||
potentialBackends, err := os.ReadDir(systemState.Backend.BackendsPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Gather backends from system and user paths, then resolve alias conflicts by capability.
|
||||
backends := make(SystemBackends)
|
||||
|
||||
systemBackends, err := os.ReadDir(systemState.Backend.BackendsSystemPath)
|
||||
if err == nil {
|
||||
// system backends are special, they are provided by the system and not managed by LocalAI
|
||||
// System-provided backends
|
||||
if systemBackends, err := os.ReadDir(systemState.Backend.BackendsSystemPath); err == nil {
|
||||
for _, systemBackend := range systemBackends {
|
||||
if systemBackend.IsDir() {
|
||||
systemBackendRunFile := filepath.Join(systemState.Backend.BackendsSystemPath, systemBackend.Name(), runFile)
|
||||
if _, err := os.Stat(systemBackendRunFile); err == nil {
|
||||
run := filepath.Join(systemState.Backend.BackendsSystemPath, systemBackend.Name(), runFile)
|
||||
if _, err := os.Stat(run); err == nil {
|
||||
backends[systemBackend.Name()] = SystemBackend{
|
||||
Name: systemBackend.Name(),
|
||||
RunFile: filepath.Join(systemState.Backend.BackendsSystemPath, systemBackend.Name(), runFile),
|
||||
RunFile: run,
|
||||
IsMeta: false,
|
||||
IsSystem: true,
|
||||
Metadata: nil,
|
||||
@@ -307,64 +311,104 @@ func ListSystemBackends(systemState *system.SystemState) (SystemBackends, error)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log.Warn().Err(err).Msg("Failed to read system backends, but that's ok, we will just use the backends managed by LocalAI")
|
||||
log.Warn().Err(err).Msg("Failed to read system backends, proceeding with user-managed backends")
|
||||
}
|
||||
|
||||
for _, potentialBackend := range potentialBackends {
|
||||
if potentialBackend.IsDir() {
|
||||
potentialBackendRunFile := filepath.Join(systemState.Backend.BackendsPath, potentialBackend.Name(), runFile)
|
||||
// User-managed backends and alias collection
|
||||
entries, err := os.ReadDir(systemState.Backend.BackendsPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var metadata *BackendMetadata
|
||||
aliasGroups := make(map[string][]backendCandidate)
|
||||
metaMap := make(map[string]*BackendMetadata)
|
||||
|
||||
// If metadata file does not exist, we just use the directory name
|
||||
// and we do not fill the other metadata (such as potential backend Aliases)
|
||||
metadataFilePath := filepath.Join(systemState.Backend.BackendsPath, potentialBackend.Name(), metadataFile)
|
||||
if _, err := os.Stat(metadataFilePath); os.IsNotExist(err) {
|
||||
metadata = &BackendMetadata{
|
||||
Name: potentialBackend.Name(),
|
||||
}
|
||||
for _, e := range entries {
|
||||
if !e.IsDir() {
|
||||
continue
|
||||
}
|
||||
dir := e.Name()
|
||||
run := filepath.Join(systemState.Backend.BackendsPath, dir, runFile)
|
||||
|
||||
var metadata *BackendMetadata
|
||||
metadataPath := filepath.Join(systemState.Backend.BackendsPath, dir, metadataFile)
|
||||
if _, err := os.Stat(metadataPath); os.IsNotExist(err) {
|
||||
metadata = &BackendMetadata{Name: dir}
|
||||
} else {
|
||||
m, rerr := readBackendMetadata(filepath.Join(systemState.Backend.BackendsPath, dir))
|
||||
if rerr != nil {
|
||||
return nil, rerr
|
||||
}
|
||||
if m == nil {
|
||||
metadata = &BackendMetadata{Name: dir}
|
||||
} else {
|
||||
// Check for alias in metadata
|
||||
metadata, err = readBackendMetadata(filepath.Join(systemState.Backend.BackendsPath, potentialBackend.Name()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
metadata = m
|
||||
}
|
||||
}
|
||||
|
||||
metaMap[dir] = metadata
|
||||
|
||||
// Concrete backend entry
|
||||
if _, err := os.Stat(run); err == nil {
|
||||
backends[dir] = SystemBackend{
|
||||
Name: dir,
|
||||
RunFile: run,
|
||||
IsMeta: false,
|
||||
Metadata: metadata,
|
||||
}
|
||||
}
|
||||
|
||||
// Alias candidates
|
||||
if metadata.Alias != "" {
|
||||
aliasGroups[metadata.Alias] = append(aliasGroups[metadata.Alias], backendCandidate{name: dir, runFile: run})
|
||||
}
|
||||
|
||||
// Meta backends indirection
|
||||
if metadata.MetaBackendFor != "" {
|
||||
backends[metadata.Name] = SystemBackend{
|
||||
Name: metadata.Name,
|
||||
RunFile: filepath.Join(systemState.Backend.BackendsPath, metadata.MetaBackendFor, runFile),
|
||||
IsMeta: true,
|
||||
Metadata: metadata,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Resolve aliases using system capability preferences
|
||||
tokens := systemState.BackendPreferenceTokens()
|
||||
for alias, cands := range aliasGroups {
|
||||
chosen := backendCandidate{}
|
||||
// Try preference tokens
|
||||
for _, t := range tokens {
|
||||
for _, c := range cands {
|
||||
if strings.Contains(strings.ToLower(c.name), t) && c.runFile != "" {
|
||||
chosen = c
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !backends.Exists(potentialBackend.Name()) {
|
||||
// We don't want to override aliases if already set, and if we are meta backend
|
||||
if _, err := os.Stat(potentialBackendRunFile); err == nil {
|
||||
backends[potentialBackend.Name()] = SystemBackend{
|
||||
Name: potentialBackend.Name(),
|
||||
RunFile: potentialBackendRunFile,
|
||||
IsMeta: false,
|
||||
Metadata: metadata,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if metadata == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if metadata.Alias != "" {
|
||||
backends[metadata.Alias] = SystemBackend{
|
||||
Name: metadata.Alias,
|
||||
RunFile: potentialBackendRunFile,
|
||||
IsMeta: false,
|
||||
Metadata: metadata,
|
||||
}
|
||||
}
|
||||
|
||||
if metadata.MetaBackendFor != "" {
|
||||
backends[metadata.Name] = SystemBackend{
|
||||
Name: metadata.Name,
|
||||
RunFile: filepath.Join(systemState.Backend.BackendsPath, metadata.MetaBackendFor, runFile),
|
||||
IsMeta: true,
|
||||
Metadata: metadata,
|
||||
if chosen.runFile != "" {
|
||||
break
|
||||
}
|
||||
}
|
||||
// Fallback: first runnable
|
||||
if chosen.runFile == "" {
|
||||
for _, c := range cands {
|
||||
if c.runFile != "" {
|
||||
chosen = c
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if chosen.runFile == "" {
|
||||
continue
|
||||
}
|
||||
md := metaMap[chosen.name]
|
||||
backends[alias] = SystemBackend{
|
||||
Name: alias,
|
||||
RunFile: chosen.runFile,
|
||||
IsMeta: false,
|
||||
Metadata: md,
|
||||
}
|
||||
}
|
||||
|
||||
return backends, nil
|
||||
|
||||
@@ -18,6 +18,73 @@ const (
|
||||
testImage = "quay.io/mudler/tests:localai-backend-test"
|
||||
)
|
||||
|
||||
var _ = Describe("Runtime capability-based backend selection", func() {
|
||||
var tempDir string
|
||||
|
||||
BeforeEach(func() {
|
||||
var err error
|
||||
tempDir, err = os.MkdirTemp("", "gallery-caps-*")
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
os.RemoveAll(tempDir)
|
||||
})
|
||||
|
||||
It("ListSystemBackends prefers optimal alias candidate", func() {
|
||||
// Arrange two installed backends sharing the same alias
|
||||
must := func(err error) { Expect(err).NotTo(HaveOccurred()) }
|
||||
|
||||
cpuDir := filepath.Join(tempDir, "cpu-llama-cpp")
|
||||
must(os.MkdirAll(cpuDir, 0o750))
|
||||
cpuMeta := &BackendMetadata{Alias: "llama-cpp", Name: "cpu-llama-cpp"}
|
||||
b, _ := json.Marshal(cpuMeta)
|
||||
must(os.WriteFile(filepath.Join(cpuDir, "metadata.json"), b, 0o644))
|
||||
must(os.WriteFile(filepath.Join(cpuDir, "run.sh"), []byte(""), 0o755))
|
||||
|
||||
cudaDir := filepath.Join(tempDir, "cuda12-llama-cpp")
|
||||
must(os.MkdirAll(cudaDir, 0o750))
|
||||
cudaMeta := &BackendMetadata{Alias: "llama-cpp", Name: "cuda12-llama-cpp"}
|
||||
b, _ = json.Marshal(cudaMeta)
|
||||
must(os.WriteFile(filepath.Join(cudaDir, "metadata.json"), b, 0o644))
|
||||
must(os.WriteFile(filepath.Join(cudaDir, "run.sh"), []byte(""), 0o755))
|
||||
|
||||
// Default system: alias should point to CPU
|
||||
sysDefault, err := system.GetSystemState(
|
||||
system.WithBackendPath(tempDir),
|
||||
)
|
||||
must(err)
|
||||
sysDefault.GPUVendor = "" // force default selection
|
||||
backs, err := ListSystemBackends(sysDefault)
|
||||
must(err)
|
||||
aliasBack, ok := backs.Get("llama-cpp")
|
||||
Expect(ok).To(BeTrue())
|
||||
Expect(aliasBack.RunFile).To(Equal(filepath.Join(cpuDir, "run.sh")))
|
||||
// concrete entries remain
|
||||
_, ok = backs.Get("cpu-llama-cpp")
|
||||
Expect(ok).To(BeTrue())
|
||||
_, ok = backs.Get("cuda12-llama-cpp")
|
||||
Expect(ok).To(BeTrue())
|
||||
|
||||
// NVIDIA system: alias should point to CUDA
|
||||
// Force capability to nvidia to make the test deterministic on platforms like darwin/arm64 (which default to metal)
|
||||
os.Setenv("LOCALAI_FORCE_META_BACKEND_CAPABILITY", "nvidia")
|
||||
defer os.Unsetenv("LOCALAI_FORCE_META_BACKEND_CAPABILITY")
|
||||
|
||||
sysNvidia, err := system.GetSystemState(
|
||||
system.WithBackendPath(tempDir),
|
||||
)
|
||||
must(err)
|
||||
sysNvidia.GPUVendor = "nvidia"
|
||||
sysNvidia.VRAM = 8 * 1024 * 1024 * 1024
|
||||
backs, err = ListSystemBackends(sysNvidia)
|
||||
must(err)
|
||||
aliasBack, ok = backs.Get("llama-cpp")
|
||||
Expect(ok).To(BeTrue())
|
||||
Expect(aliasBack.RunFile).To(Equal(filepath.Join(cudaDir, "run.sh")))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("Gallery Backends", func() {
|
||||
var (
|
||||
tempDir string
|
||||
|
||||
@@ -36,6 +36,8 @@ func TranscriptEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, app
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
diarize := c.FormValue("diarize", "false") != "false"
|
||||
|
||||
// retrieve the file data from the request
|
||||
file, err := c.FormFile("file")
|
||||
if err != nil {
|
||||
@@ -67,7 +69,7 @@ func TranscriptEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, app
|
||||
|
||||
log.Debug().Msgf("Audio file copied to: %+v", dst)
|
||||
|
||||
tr, err := backend.ModelTranscription(dst, input.Language, input.Translate, ml, *config, appConfig)
|
||||
tr, err := backend.ModelTranscription(dst, input.Language, input.Translate, diarize, ml, *config, appConfig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -139,7 +139,7 @@ Due to the nature of ROCm it is best to run all implementations in containers as
|
||||
|
||||
### Limitations
|
||||
|
||||
Ongoing verification testing of ROCm compatability with integrated backends.
|
||||
Ongoing verification testing of ROCm compatibility with integrated backends.
|
||||
Please note the following list of verified backends and devices.
|
||||
|
||||
LocalAI hipblas images are built against the following targets: gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
|
||||
@@ -172,7 +172,7 @@ The devices in the following list have been tested with `hipblas` images running
|
||||
### System Prep
|
||||
|
||||
1. Check your GPU LLVM target is compatible with the version of ROCm. This can be found in the [LLVM Docs](https://llvm.org/docs/AMDGPUUsage.html).
|
||||
2. Check which ROCm version is compatible with your LLVM target and your chosen OS (pay special attention to supported kernel versions). See the following for compatability for ([ROCm 6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/reference/system-requirements.html)) or ([ROCm 6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html))
|
||||
2. Check which ROCm version is compatible with your LLVM target and your chosen OS (pay special attention to supported kernel versions). See the following for compatibility for ([ROCm 6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/reference/system-requirements.html)) or ([ROCm 6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html))
|
||||
3. Install you chosen version of the `dkms` and `rocm` (it is recommended that the native package manager be used for this process for any OS as version changes are executed more easily via this method if updates are required). Take care to restart after installing `amdgpu-dkms` and before installing `rocm`, for details regarding this see the installation documentation for your chosen OS ([6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/native-install/index.html) or [6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/how-to/native-install/index.html))
|
||||
4. Deploy. Yes it's that easy.
|
||||
|
||||
@@ -216,7 +216,7 @@ The rebuild process will take some time to complete when deploying these contain
|
||||
#### Example (k8s) (Advanced Deployment/WIP)
|
||||
|
||||
For k8s deployments there is an additional step required before deployment, this is the deployment of the [ROCm/k8s-device-plugin](https://artifacthub.io/packages/helm/amd-gpu-helm/amd-gpu).
|
||||
For any k8s environment the documentation provided by AMD from the ROCm project should be successful. It is recommended that if you use rke2 or OpenShift that you deploy the SUSE or RedHat provided version of this resource to ensure compatability.
|
||||
For any k8s environment the documentation provided by AMD from the ROCm project should be successful. It is recommended that if you use rke2 or OpenShift that you deploy the SUSE or RedHat provided version of this resource to ensure compatibility.
|
||||
After this has been completed the [helm chart from go-skynet](https://github.com/go-skynet/helm-charts) can be configured and deployed mostly un-edited.
|
||||
|
||||
The following are details of the changes that should be made to ensure proper function.
|
||||
@@ -241,7 +241,7 @@ spec:
|
||||
value: '0'
|
||||
# This variable indicates the devices available to container (0:device1 1:device2 2:device3) etc.
|
||||
# For multiple devices (say device 1 and 3) the value would be equivalent to HIP_VISIBLE_DEVICES="0,2"
|
||||
# Please take note of this when an iGPU is present in host system as compatability is not assured.
|
||||
# Please take note of this when an iGPU is present in host system as compatibility is not assured.
|
||||
...
|
||||
resources:
|
||||
limits:
|
||||
@@ -250,7 +250,7 @@ spec:
|
||||
amd.com/gpu: '1'
|
||||
```
|
||||
|
||||
This configuration has been tested on a 'custom' cluster managed by SUSE Rancher that was deployed on top of Ubuntu 22.04.4, certification of other configuration is ongoing and compatability is not guaranteed.
|
||||
This configuration has been tested on a 'custom' cluster managed by SUSE Rancher that was deployed on top of Ubuntu 22.04.4, certification of other configuration is ongoing and compatibility is not guaranteed.
|
||||
|
||||
### Notes
|
||||
|
||||
|
||||
@@ -34,5 +34,5 @@ Grammars and function tools can be used as well in conjunction with vision APIs:
|
||||
|
||||
All-in-One images have already shipped the llava model as `gpt-4-vision-preview`, so no setup is needed in this case.
|
||||
|
||||
To setup the LLaVa models, follow the full example in the [configuration examples](https://github.com/mudler/LocalAI/blob/master/examples/configurations/README.md#llava).
|
||||
To setup the LLaVa models, follow the full example in the [configuration examples](https://github.com/mudler/LocalAI-examples/blob/main/configurations/llava/llava.yaml).
|
||||
|
||||
|
||||
@@ -27,6 +27,12 @@ curl https://localai.io/install.sh | sh
|
||||
|
||||
See [Installer]({{% relref "docs/advanced/installer" %}}) for all the supported options
|
||||
|
||||
### macOS Download
|
||||
|
||||
<a href="https://github.com/mudler/LocalAI/releases/latest/download/LocalAI.dmg">
|
||||
<img src="https://img.shields.io/badge/Download-macOS-blue?style=for-the-badge&logo=apple&logoColor=white" alt="Download LocalAI for macOS"/>
|
||||
</a>
|
||||
|
||||
### Run with docker
|
||||
|
||||
|
||||
|
||||
@@ -56,6 +56,12 @@ The fastest way to get started is with our one-line installer:
|
||||
curl https://localai.io/install.sh | sh
|
||||
```
|
||||
|
||||
### macOS Download
|
||||
|
||||
<a href="https://github.com/mudler/LocalAI/releases/latest/download/LocalAI.dmg">
|
||||
<img src="https://img.shields.io/badge/Download-macOS-blue?style=for-the-badge&logo=apple&logoColor=white" alt="Download LocalAI for macOS"/>
|
||||
</a>
|
||||
|
||||
Or use Docker for a quick start:
|
||||
|
||||
```bash
|
||||
|
||||
@@ -5,7 +5,13 @@ title = "LocalAI binaries"
|
||||
weight = 26
|
||||
+++
|
||||
|
||||
LocalAI binaries are available for both Linux and MacOS platforms and can be executed directly from your command line. These binaries are continuously updated and hosted on [our GitHub Releases page](https://github.com/mudler/LocalAI/releases). This method also supports Windows users via the Windows Subsystem for Linux (WSL).
|
||||
LocalAI binaries are available for both Linux and MacOS platforms and can be executed directly from your command line. These binaries are continuously updated and hosted on [our GitHub Releases page](https://github.com/mudler/LocalAI/releases). This method also supports Windows users via the Windows Subsystem for Linux (WSL).
|
||||
|
||||
### macOS Download
|
||||
|
||||
<a href="https://github.com/mudler/LocalAI/releases/latest/download/LocalAI.dmg">
|
||||
<img src="https://img.shields.io/badge/Download-macOS-blue?style=for-the-badge&logo=apple&logoColor=white" alt="Download LocalAI for macOS"/>
|
||||
</a>
|
||||
|
||||
Use the following one-liner command in your terminal to download and run LocalAI on Linux or MacOS:
|
||||
|
||||
|
||||
@@ -43,6 +43,7 @@ LocalAI will attempt to automatically load models which are not explicitly confi
|
||||
| [chatterbox](https://github.com/resemble-ai/chatterbox) | Chatterbox TTS | no | Text-to-speech | no | no | CUDA 11/12, CPU |
|
||||
| [kitten-tts](https://github.com/KittenML/KittenTTS) | Kitten TTS | no | Text-to-speech | no | no | CPU |
|
||||
| [silero-vad](https://github.com/snakers4/silero-vad) with [Golang bindings](https://github.com/streamer45/silero-vad-go) | Silero VAD | no | Voice Activity Detection | no | no | CPU |
|
||||
| [mlx-audio](https://github.com/Blaizzy/mlx-audio) | MLX | no | Text-tospeech | no | no | Metal (Apple Silicon) |
|
||||
{{< /table >}}
|
||||
|
||||
## Image & Video Generation
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"version": "v3.4.0"
|
||||
"version": "v3.5.0"
|
||||
}
|
||||
|
||||
@@ -2489,6 +2489,61 @@
|
||||
- filename: Qwen_Qwen3-4B-Thinking-2507-Q8_0.gguf
|
||||
sha256: 2c08db093bc57c2c77222d27ffe8d41cb0b5648e66ba84e5fb9ceab429f6735c
|
||||
uri: huggingface://bartowski/Qwen_Qwen3-4B-Thinking-2507-GGUF/Qwen_Qwen3-4B-Thinking-2507-Q8_0.gguf
|
||||
- !!merge <<: *qwen3
|
||||
name: "nousresearch_hermes-4-14b"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/7B7nMvHJiL72QzVBEPKOG.png
|
||||
urls:
|
||||
- https://huggingface.co/NousResearch/Hermes-4-14B
|
||||
- https://huggingface.co/bartowski/NousResearch_Hermes-4-14B-GGUF
|
||||
description: |
|
||||
Hermes 4 14B is a frontier, hybrid-mode reasoning model based on Qwen 3 14B by Nous Research that is aligned to you.
|
||||
|
||||
Read the Hermes 4 technical report here: Hermes 4 Technical Report
|
||||
|
||||
Chat with Hermes in Nous Chat: https://chat.nousresearch.com
|
||||
|
||||
Training highlights include a newly synthesized post-training corpus emphasizing verified reasoning traces, massive improvements in math, code, STEM, logic, creativity, and format-faithful outputs, while preserving general assistant quality and broadly neutral alignment.
|
||||
What’s new vs Hermes 3
|
||||
|
||||
Post-training corpus: Massively increased dataset size from 1M samples and 1.2B tokens to ~5M samples / ~60B tokens blended across reasoning and non-reasoning data.
|
||||
Hybrid reasoning mode with explicit <think>…</think> segments when the model decides to deliberate, and options to make your responses faster when you want.
|
||||
Reasoning that is top quality, expressive, improves math, code, STEM, logic, and even creative writing and subjective responses.
|
||||
Schema adherence & structured outputs: trained to produce valid JSON for given schemas and to repair malformed objects.
|
||||
Much easier to steer and align: extreme improvements on steerability, especially on reduced refusal rates.
|
||||
overrides:
|
||||
parameters:
|
||||
model: NousResearch_Hermes-4-14B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: NousResearch_Hermes-4-14B-Q4_K_M.gguf
|
||||
sha256: 7ad9be1e446e3da0c149fdf55284c90be666d3e13c6e2581587853f4f9538073
|
||||
uri: huggingface://bartowski/NousResearch_Hermes-4-14B-GGUF/NousResearch_Hermes-4-14B-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen3
|
||||
name: "minicpm-v-4_5"
|
||||
license: apache-2.0
|
||||
icon: https://avatars.githubusercontent.com/u/89920203
|
||||
urls:
|
||||
- https://huggingface.co/openbmb/MiniCPM-V-4_5-gguf
|
||||
- https://huggingface.co/openbmb/MiniCPM-V-4_5
|
||||
description: |
|
||||
MiniCPM-V 4.5 is the latest and most capable model in the MiniCPM-V series. The model is built on Qwen3-8B and SigLIP2-400M with a total of 8B parameters.
|
||||
tags:
|
||||
- llm
|
||||
- multimodal
|
||||
- gguf
|
||||
- gpu
|
||||
- qwen3
|
||||
- cpu
|
||||
overrides:
|
||||
mmproj: minicpm-v-4_5-mmproj-f16.gguf
|
||||
parameters:
|
||||
model: minicpm-v-4_5-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: minicpm-v-4_5-Q4_K_M.gguf
|
||||
sha256: c1c3c33100b15b4caf7319acce4e23c0eb0ce1cbd12f70e8d24f05aa67b7512f
|
||||
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/ggml-model-Q4_K_M.gguf
|
||||
- filename: minicpm-v-4_5-mmproj-f16.gguf
|
||||
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/mmproj-model-f16.gguf
|
||||
sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8
|
||||
- &gemma3
|
||||
url: "github:mudler/LocalAI/gallery/gemma.yaml@master"
|
||||
name: "gemma-3-27b-it"
|
||||
@@ -20051,145 +20106,154 @@
|
||||
name: "whisper-base-q5_1"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-base-q5_1.bin
|
||||
model: ggml-base-q5_1.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-base-q5_1.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-base-q5_1.bin"
|
||||
- filename: "ggml-base-q5_1.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-base-q5_1.bin"
|
||||
sha256: 422f1ae452ade6f30a004d7e5c6a43195e4433bc370bf23fac9cc591f01a8898
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-base"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-base.bin
|
||||
model: ggml-base.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-base.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-base.bin"
|
||||
- filename: "ggml-base.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-base.bin"
|
||||
sha256: 60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-base-en-q5_1"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-base.en-q5_1.bin
|
||||
model: ggml-base.en-q5_1.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-base.en-q5_1.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-base.en-q5_1.bin"
|
||||
- filename: "ggml-base.en-q5_1.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-base.en-q5_1.bin"
|
||||
sha256: 4baf70dd0d7c4247ba2b81fafd9c01005ac77c2f9ef064e00dcf195d0e2fdd2f
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-base-en"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-base.en.bin
|
||||
model: ggml-base.en.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-base.en.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-base.en.bin"
|
||||
- filename: "ggml-base.en.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-base.en.bin"
|
||||
sha256: a03779c86df3323075f5e796cb2ce5029f00ec8869eee3fdfb897afe36c6d002
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-large-q5_0"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-large-q5_0.bin
|
||||
model: ggml-large-q5_0.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-large-q5_0.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-large-q5_0.bin"
|
||||
- filename: "ggml-large-q5_0.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-large-q5_0.bin"
|
||||
sha256: 3a214837221e4530dbc1fe8d734f302af393eb30bd0ed046042ebf4baf70f6f2
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-medium-q5_0"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-medium-q5_0.bin
|
||||
model: ggml-medium-q5_0.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-medium-q5_0.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-medium-q5_0.bin"
|
||||
- filename: "ggml-medium-q5_0.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-medium-q5_0.bin"
|
||||
sha256: 19fea4b380c3a618ec4723c3eef2eb785ffba0d0538cf43f8f235e7b3b34220f
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-small-q5_1"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-small-q5_1.bin
|
||||
model: ggml-small-q5_1.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-small-q5_1.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-small-q5_1.bin"
|
||||
- filename: "ggml-small-q5_1.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-small-q5_1.bin"
|
||||
sha256: ae85e4a935d7a567bd102fe55afc16bb595bdb618e11b2fc7591bc08120411bb
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-small"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-small.bin
|
||||
model: ggml-small.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-small.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-small.bin"
|
||||
- filename: "ggml-small.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-small.bin"
|
||||
sha256: 1be3a9b2063867b937e64e2ec7483364a79917e157fa98c5d94b5c1fffea987b
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-small-en-tdrz"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-small.en-tdrz.bin
|
||||
files:
|
||||
- filename: "ggml-small.bin"
|
||||
uri: "huggingface://akashmjn/tinydiarize-whisper.cpp/ggml-small.en-tdrz.bin"
|
||||
sha256: ceac3ec06d1d98ef71aec665283564631055fd6129b79d8e1be4f9cc33cc54b4
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-small-en-q5_1"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-small.en-q5_1.bin
|
||||
model: ggml-small.en-q5_1.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-small.en-q5_1.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-small.en-q5_1.bin"
|
||||
- filename: "ggml-small.en-q5_1.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-small.en-q5_1.bin"
|
||||
sha256: bfdff4894dcb76bbf647d56263ea2a96645423f1669176f4844a1bf8e478ad30
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-small"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-small.en.bin
|
||||
model: ggml-small.en.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-small.en.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-small.en.bin"
|
||||
- filename: "ggml-small.en.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-small.en.bin"
|
||||
sha256: c6138d6d58ecc8322097e0f987c32f1be8bb0a18532a3f88f734d1bbf9c41e5d
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-small-q5_1"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-small-q5_1.bin
|
||||
model: ggml-small-q5_1.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-small-q5_1.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-small-q5_1.bin"
|
||||
- filename: "ggml-small-q5_1.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-small-q5_1.bin"
|
||||
sha256: ae85e4a935d7a567bd102fe55afc16bb595bdb618e11b2fc7591bc08120411bb
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-tiny"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-tiny.bin
|
||||
model: ggml-tiny.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-tiny.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.bin"
|
||||
- filename: "ggml-tiny.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-tiny.bin"
|
||||
sha256: be07e048e1e599ad46341c8d2a135645097a538221678b7acdd1b1919c6e1b21
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-tiny-q5_1"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-tiny-q5_1.bin
|
||||
model: ggml-tiny-q5_1.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-tiny-q5_1.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny-q5_1.bin"
|
||||
- filename: "ggml-tiny-q5_1.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-tiny-q5_1.bin"
|
||||
sha256: 818710568da3ca15689e31a743197b520007872ff9576237bda97bd1b469c3d7
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-tiny-en-q5_1"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-tiny.en-q5_1.bin
|
||||
model: ggml-tiny.en-q5_1.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-tiny.en-q5_1.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q5_1.bin"
|
||||
- filename: "ggml-tiny.en-q5_1.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-tiny.en-q5_1.bin"
|
||||
sha256: c77c5766f1cef09b6b7d47f21b546cbddd4157886b3b5d6d4f709e91e66c7c2b
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-tiny-en"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-tiny.en.bin
|
||||
model: ggml-tiny.en.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-tiny.en.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.en.bin"
|
||||
- filename: "ggml-tiny.en.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-tiny.en.bin"
|
||||
sha256: 921e4cf8686fdd993dcd081a5da5b6c365bfde1162e72b08d75ac75289920b1f
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-tiny-en-q8_0"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-tiny.en-q8_0.bin
|
||||
model: ggml-tiny.en-q8_0.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-tiny.en-q8_0.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q8_0.bin"
|
||||
- filename: "ggml-tiny.en-q8_0.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-tiny.en-q8_0.bin"
|
||||
sha256: 5bc2b3860aa151a4c6e7bb095e1fcce7cf12c7b020ca08dcec0c6d018bb7dd94
|
||||
## Bert embeddings (llama3.2 drop-in)
|
||||
- !!merge <<: *llama32
|
||||
|
||||
30
go.mod
30
go.mod
@@ -41,26 +41,26 @@ require (
|
||||
github.com/otiai10/copy v1.14.1
|
||||
github.com/otiai10/openaigo v1.7.0
|
||||
github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5
|
||||
github.com/prometheus/client_golang v1.22.0
|
||||
github.com/prometheus/client_golang v1.23.0
|
||||
github.com/rs/zerolog v1.33.0
|
||||
github.com/russross/blackfriday v1.6.0
|
||||
github.com/sashabaranov/go-openai v1.26.2
|
||||
github.com/schollz/progressbar/v3 v3.14.4
|
||||
github.com/shirou/gopsutil/v3 v3.24.5
|
||||
github.com/streamer45/silero-vad-go v0.2.1
|
||||
github.com/stretchr/testify v1.10.0
|
||||
github.com/swaggo/swag v1.16.3
|
||||
github.com/stretchr/testify v1.11.1
|
||||
github.com/swaggo/swag v1.16.6
|
||||
github.com/testcontainers/testcontainers-go v0.35.0
|
||||
github.com/tmc/langchaingo v0.1.13
|
||||
github.com/valyala/fasthttp v1.55.0
|
||||
go.opentelemetry.io/otel v1.35.0
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.50.0
|
||||
go.opentelemetry.io/otel/metric v1.35.0
|
||||
go.opentelemetry.io/otel/sdk/metric v1.28.0
|
||||
go.opentelemetry.io/otel v1.38.0
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.60.0
|
||||
go.opentelemetry.io/otel/metric v1.38.0
|
||||
go.opentelemetry.io/otel/sdk/metric v1.38.0
|
||||
google.golang.org/grpc v1.67.1
|
||||
gopkg.in/yaml.v2 v2.4.0
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
oras.land/oras-go/v2 v2.5.0
|
||||
oras.land/oras-go/v2 v2.6.0
|
||||
)
|
||||
|
||||
require (
|
||||
@@ -90,6 +90,7 @@ require (
|
||||
github.com/go-text/render v0.2.0 // indirect
|
||||
github.com/go-text/typesetting v0.2.1 // indirect
|
||||
github.com/godbus/dbus/v5 v5.1.0 // indirect
|
||||
github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc // indirect
|
||||
github.com/hack-pad/go-indexeddb v0.3.2 // indirect
|
||||
github.com/hack-pad/safejs v0.1.0 // indirect
|
||||
github.com/jeandeaual/go-locale v0.0.0-20250612000132-0ef82f21eade // indirect
|
||||
@@ -129,6 +130,7 @@ require (
|
||||
github.com/pion/transport/v3 v3.0.7 // indirect
|
||||
github.com/pion/turn/v4 v4.0.2 // indirect
|
||||
github.com/pion/webrtc/v4 v4.1.2 // indirect
|
||||
github.com/prometheus/otlptranslator v0.0.2 // indirect
|
||||
github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 // indirect
|
||||
github.com/rymdport/portal v0.4.1 // indirect
|
||||
github.com/savsgio/gotils v0.0.0-20240303185622-093b76447511 // indirect
|
||||
@@ -144,7 +146,7 @@ require (
|
||||
go.yaml.in/yaml/v3 v3.0.4 // indirect
|
||||
golang.org/x/image v0.25.0 // indirect
|
||||
golang.org/x/time v0.12.0 // indirect
|
||||
google.golang.org/protobuf v1.36.7 // indirect
|
||||
google.golang.org/protobuf v1.36.8 // indirect
|
||||
)
|
||||
|
||||
require (
|
||||
@@ -267,7 +269,7 @@ require (
|
||||
github.com/multiformats/go-varint v0.0.7 // indirect
|
||||
github.com/nwaples/rardecode v1.1.0 // indirect
|
||||
github.com/opencontainers/go-digest v1.0.0 // indirect
|
||||
github.com/opencontainers/image-spec v1.1.0
|
||||
github.com/opencontainers/image-spec v1.1.1
|
||||
github.com/opentracing/opentracing-go v1.2.0 // indirect
|
||||
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect
|
||||
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
|
||||
@@ -279,8 +281,8 @@ require (
|
||||
github.com/polydawn/refmt v0.89.0 // indirect
|
||||
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect
|
||||
github.com/prometheus/client_model v0.6.2 // indirect
|
||||
github.com/prometheus/common v0.64.0 // indirect
|
||||
github.com/prometheus/procfs v0.16.1 // indirect
|
||||
github.com/prometheus/common v0.65.0 // indirect
|
||||
github.com/prometheus/procfs v0.17.0 // indirect
|
||||
github.com/quic-go/qpack v0.5.1 // indirect
|
||||
github.com/quic-go/quic-go v0.54.0 // indirect
|
||||
github.com/quic-go/webtransport-go v0.9.0 // indirect
|
||||
@@ -308,8 +310,8 @@ require (
|
||||
github.com/yuin/goldmark-emoji v1.0.5 // indirect
|
||||
github.com/yusufpapurcu/wmi v1.2.4 // indirect
|
||||
go.opencensus.io v0.24.0 // indirect
|
||||
go.opentelemetry.io/otel/sdk v1.31.0 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.35.0 // indirect
|
||||
go.opentelemetry.io/otel/sdk v1.38.0 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.38.0 // indirect
|
||||
go.uber.org/dig v1.19.0 // indirect
|
||||
go.uber.org/fx v1.24.0 // indirect
|
||||
go.uber.org/multierr v1.11.0 // indirect
|
||||
|
||||
60
go.sum
60
go.sum
@@ -301,6 +301,8 @@ github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aN
|
||||
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
||||
github.com/gpustack/gguf-parser-go v0.17.0 h1:DkSziWLsiQM0pqqkr/zMcaBn94KY7iQTi4zmaHixDus=
|
||||
github.com/gpustack/gguf-parser-go v0.17.0/go.mod h1:GvHh1Kvvq5ojCOsJ5UpwiJJmIjFw3Qk5cW7R+CZ3IJo=
|
||||
github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc h1:GN2Lv3MGO7AS6PrRoT6yV5+wkrOpcszoIsO4+4ds248=
|
||||
github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc/go.mod h1:+JKpmjMGhpgPL+rXZ5nsZieVzvarn86asRlBg4uNGnk=
|
||||
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
|
||||
github.com/grpc-ecosystem/grpc-gateway v1.5.0 h1:WcmKMm43DR7RdtlkEXQJyo5ws8iTp98CyhCCbOHMvNI=
|
||||
github.com/grpc-ecosystem/grpc-gateway v1.5.0/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpgL2+G+NZTnrVHpWWfpdw=
|
||||
@@ -560,8 +562,8 @@ github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A=
|
||||
github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k=
|
||||
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
|
||||
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
|
||||
github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
|
||||
github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
|
||||
github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
|
||||
github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M=
|
||||
github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs=
|
||||
github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc=
|
||||
github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8=
|
||||
@@ -639,18 +641,20 @@ github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:Om
|
||||
github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g=
|
||||
github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U=
|
||||
github.com/prometheus/client_golang v0.8.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
|
||||
github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q=
|
||||
github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0=
|
||||
github.com/prometheus/client_golang v1.23.0 h1:ust4zpdl9r4trLY/gSjlm07PuiBq2ynaXXlptpfy8Uc=
|
||||
github.com/prometheus/client_golang v1.23.0/go.mod h1:i/o0R9ByOnHX0McrTMTyhYvKE4haaf2mW08I+jGAjEE=
|
||||
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
|
||||
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
||||
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
|
||||
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
|
||||
github.com/prometheus/common v0.0.0-20180801064454-c7de2306084e/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro=
|
||||
github.com/prometheus/common v0.64.0 h1:pdZeA+g617P7oGv1CzdTzyeShxAGrTBsolKNOLQPGO4=
|
||||
github.com/prometheus/common v0.64.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8=
|
||||
github.com/prometheus/common v0.65.0 h1:QDwzd+G1twt//Kwj/Ww6E9FQq1iVMmODnILtW1t2VzE=
|
||||
github.com/prometheus/common v0.65.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8=
|
||||
github.com/prometheus/otlptranslator v0.0.2 h1:+1CdeLVrRQ6Psmhnobldo0kTp96Rj80DRXRd5OSnMEQ=
|
||||
github.com/prometheus/otlptranslator v0.0.2/go.mod h1:P8AwMgdD7XEr6QRUJ2QWLpiAZTgTE2UYgjlu3svompI=
|
||||
github.com/prometheus/procfs v0.0.0-20180725123919-05ee40e3a273/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
|
||||
github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
|
||||
github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
|
||||
github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7DuK0=
|
||||
github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw=
|
||||
github.com/quic-go/qpack v0.5.1 h1:giqksBPnT/HDtZ6VhtFKgoLOWmlyo9Ei6u9PqzIMbhI=
|
||||
github.com/quic-go/qpack v0.5.1/go.mod h1:+PC4XFrEskIVkcLzpEkbLqq1uCoxPhQuvK5rH1ZgaEg=
|
||||
github.com/quic-go/quic-go v0.54.0 h1:6s1YB9QotYI6Ospeiguknbp2Znb/jZYjZLRXn9kMQBg=
|
||||
@@ -754,12 +758,12 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO
|
||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||
github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||
github.com/swaggo/files/v2 v2.0.0 h1:hmAt8Dkynw7Ssz46F6pn8ok6YmGZqHSVLZ+HQM7i0kw=
|
||||
github.com/swaggo/files/v2 v2.0.0/go.mod h1:24kk2Y9NYEJ5lHuCra6iVwkMjIekMCaFq/0JQj66kyM=
|
||||
github.com/swaggo/swag v1.16.3 h1:PnCYjPCah8FK4I26l2F/KQ4yz3sILcVUN3cTlBFA9Pg=
|
||||
github.com/swaggo/swag v1.16.3/go.mod h1:DImHIuOFXKpMFAQjcC7FG4m3Dg4+QuUgUzJmKjI/gRk=
|
||||
github.com/swaggo/swag v1.16.6 h1:qBNcx53ZaX+M5dxVyTrgQ0PJ/ACK+NzhwcbieTt+9yI=
|
||||
github.com/swaggo/swag v1.16.6/go.mod h1:ngP2etMK5a0P3QBizic5MEwpRmluJZPHjXcMoj4Xesg=
|
||||
github.com/tarm/serial v0.0.0-20180830185346-98f6abe2eb07/go.mod h1:kDXzergiv9cbyO7IOYJZWg1U88JhDg3PB6klq9Hg2pA=
|
||||
github.com/testcontainers/testcontainers-go v0.35.0 h1:uADsZpTKFAtp8SLK+hMwSaa+X+JiERHtd4sQAFmXeMo=
|
||||
github.com/testcontainers/testcontainers-go v0.35.0/go.mod h1:oEVBj5zrfJTrgjwONs1SsRbnBtH9OKl+IGl3UMcr2B4=
|
||||
@@ -820,22 +824,22 @@ go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJyS
|
||||
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 h1:UP6IpuHFkUgOQL9FFQFrZ+5LiwhhYRbi7VZSIx6Nj5s=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0/go.mod h1:qxuZLtbq5QDtdeSHsS7bcf6EH6uO6jUAgk764zd3rhM=
|
||||
go.opentelemetry.io/otel v1.35.0 h1:xKWKPxrxB6OtMCbmMY021CqC45J+3Onta9MqjhnusiQ=
|
||||
go.opentelemetry.io/otel v1.35.0/go.mod h1:UEqy8Zp11hpkUrL73gSlELM0DupHoiq72dR+Zqel/+Y=
|
||||
go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8=
|
||||
go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0 h1:K0XaT3DwHAcV4nKLzcQvwAgSyisUghWoY20I7huthMk=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0/go.mod h1:B5Ki776z/MBnVha1Nzwp5arlzBbE3+1jk+pGmaP5HME=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0 h1:lUsI2TYsQw2r1IASwoROaCnjdj2cvC2+Jbxvk6nHnWU=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0/go.mod h1:2HpZxxQurfGxJlJDblybejHB6RX6pmExPNe517hREw4=
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.50.0 h1:2Ewsda6hejmbhGFyUvWZjUThC98Cf8Zy6g0zkIimOng=
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.50.0/go.mod h1:pMm5PkUo5YwbLiuEf7t2xg4wbP0/eSJrMxIMxKosynY=
|
||||
go.opentelemetry.io/otel/metric v1.35.0 h1:0znxYu2SNyuMSQT4Y9WDWej0VpcsxkuklLa4/siN90M=
|
||||
go.opentelemetry.io/otel/metric v1.35.0/go.mod h1:nKVFgxBZ2fReX6IlyW28MgZojkoAkJGaE8CpgeAU3oE=
|
||||
go.opentelemetry.io/otel/sdk v1.31.0 h1:xLY3abVHYZ5HSfOg3l2E5LUj2Cwva5Y7yGxnSW9H5Gk=
|
||||
go.opentelemetry.io/otel/sdk v1.31.0/go.mod h1:TfRbMdhvxIIr/B2N2LQW2S5v9m3gOQ/08KsbbO5BPT0=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.28.0 h1:OkuaKgKrgAbYrrY0t92c+cC+2F6hsFNnCQArXCKlg08=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.28.0/go.mod h1:cWPjykihLAPvXKi4iZc1dpER3Jdq2Z0YLse3moQUCpg=
|
||||
go.opentelemetry.io/otel/trace v1.35.0 h1:dPpEfJu1sDIqruz7BHFG3c7528f6ddfSWfFDVt/xgMs=
|
||||
go.opentelemetry.io/otel/trace v1.35.0/go.mod h1:WUk7DtFp1Aw2MkvqGdwiXYDZZNvA/1J8o6xRXLrIkyc=
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.60.0 h1:cGtQxGvZbnrWdC2GyjZi0PDKVSLWP/Jocix3QWfXtbo=
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.60.0/go.mod h1:hkd1EekxNo69PTV4OWFGZcKQiIqg0RfuWExcPKFvepk=
|
||||
go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA=
|
||||
go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI=
|
||||
go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E=
|
||||
go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA=
|
||||
go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE=
|
||||
go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs=
|
||||
go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0=
|
||||
go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8=
|
||||
go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
|
||||
@@ -1066,8 +1070,8 @@ google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2
|
||||
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
||||
google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
||||
google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
|
||||
google.golang.org/protobuf v1.36.7 h1:IgrO7UwFQGJdRNXH/sQux4R1Dj1WAKcLElzeeRaXV2A=
|
||||
google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
|
||||
google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
|
||||
google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||
@@ -1099,8 +1103,8 @@ howett.net/plist v1.0.2-0.20250314012144-ee69052608d9 h1:eeH1AIcPvSc0Z25ThsYF+Xo
|
||||
howett.net/plist v1.0.2-0.20250314012144-ee69052608d9/go.mod h1:fyFX5Hj5tP1Mpk8obqA9MZgXT416Q5711SDT7dQLTLk=
|
||||
lukechampine.com/blake3 v1.4.1 h1:I3Smz7gso8w4/TunLKec6K2fn+kyKtDxr/xcQEN84Wg=
|
||||
lukechampine.com/blake3 v1.4.1/go.mod h1:QFosUxmjB8mnrWFSNwKmvxHpfY72bmD2tQ0kBMM3kwo=
|
||||
oras.land/oras-go/v2 v2.5.0 h1:o8Me9kLY74Vp5uw07QXPiitjsw7qNXi8Twd+19Zf02c=
|
||||
oras.land/oras-go/v2 v2.5.0/go.mod h1:z4eisnLP530vwIOUOJeBIj0aGI0L1C3d53atvCBqZHg=
|
||||
oras.land/oras-go/v2 v2.6.0 h1:X4ELRsiGkrbeox69+9tzTu492FMUu7zJQW6eJU+I2oc=
|
||||
oras.land/oras-go/v2 v2.6.0/go.mod h1:magiQDfG6H1O9APp+rOsvCPcW1GD2MM7vgnKY0Y+u1o=
|
||||
sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo=
|
||||
sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8=
|
||||
sourcegraph.com/sourcegraph/go-diff v0.5.0/go.mod h1:kuch7UrkMzY0X+p9CRK03kfuPQ2zzQcaEFbx8wA8rck=
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
// Package system provides system detection utilities, including GPU/vendor detection
|
||||
// and capability classification used to select optimal backends at runtime.
|
||||
package system
|
||||
|
||||
import (
|
||||
@@ -116,3 +118,25 @@ func detectGPUVendor(gpus []*gpu.GraphicsCard) (string, error) {
|
||||
|
||||
return "", nil
|
||||
}
|
||||
|
||||
// BackendPreferenceTokens returns a list of substrings that represent the preferred
|
||||
// backend implementation order for the current system capability. Callers can use
|
||||
// these tokens to select the most appropriate concrete backend among multiple
|
||||
// candidates sharing the same alias (e.g., "llama-cpp").
|
||||
func (s *SystemState) BackendPreferenceTokens() []string {
|
||||
capStr := strings.ToLower(s.getSystemCapabilities())
|
||||
switch {
|
||||
case strings.HasPrefix(capStr, nvidia):
|
||||
return []string{"cuda", "vulkan", "cpu"}
|
||||
case strings.HasPrefix(capStr, amd):
|
||||
return []string{"rocm", "hip", "vulkan", "cpu"}
|
||||
case strings.HasPrefix(capStr, intel):
|
||||
return []string{"sycl", "intel", "cpu"}
|
||||
case strings.HasPrefix(capStr, metal):
|
||||
return []string{"metal", "cpu"}
|
||||
case strings.HasPrefix(capStr, darwinX86):
|
||||
return []string{"darwin-x86", "cpu"}
|
||||
default:
|
||||
return []string{"cpu"}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user