mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 11:13:31 -05:00
Compare commits
66 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
902e47f0b0 | ||
|
|
50bb78fd24 | ||
|
|
542f07ab2d | ||
|
|
77c5acb9db | ||
|
|
44bbf4d778 | ||
|
|
633c12f93d | ||
|
|
6f24135f1d | ||
|
|
b72aa7b4fa | ||
|
|
e94e725479 | ||
|
|
e4ac7b14a3 | ||
|
|
ddb39c73f2 | ||
|
|
264b09fb1e | ||
|
|
36dd45df51 | ||
|
|
e5599f87b8 | ||
|
|
e89b5cc0e3 | ||
|
|
10bf1084cc | ||
|
|
b08ae559b3 | ||
|
|
aa7cb7e18c | ||
|
|
eadd3d4e46 | ||
|
|
2a18206033 | ||
|
|
39798d734e | ||
|
|
d0e99562af | ||
|
|
6410c99bf2 | ||
|
|
55766d269b | ||
|
|
ffa0ad1eac | ||
|
|
623789a29e | ||
|
|
2b9a3d32c9 | ||
|
|
f8b71dc5d0 | ||
|
|
1d3331b5cb | ||
|
|
2c0b9c6349 | ||
|
|
3c6c976755 | ||
|
|
ebbcba342a | ||
|
|
0de75519dc | ||
|
|
37f5e4f5c1 | ||
|
|
ffa934b959 | ||
|
|
59311d8b1e | ||
|
|
d9e25af7b5 | ||
|
|
e4f8b63b40 | ||
|
|
1364ae9be6 | ||
|
|
cfd6a9150d | ||
|
|
cd352d0c5f | ||
|
|
8d47309695 | ||
|
|
5f6fc02a55 | ||
|
|
0b528458d8 | ||
|
|
caab380c5d | ||
|
|
8a3a362504 | ||
|
|
07238eb743 | ||
|
|
e905e90dd7 | ||
|
|
08432d49e5 | ||
|
|
e51e2aacb9 | ||
|
|
9c3d85fc28 | ||
|
|
007ca647a7 | ||
|
|
59af928379 | ||
|
|
dbc2bb561b | ||
|
|
c72c85dcac | ||
|
|
ef984901e6 | ||
|
|
9911ec84a3 | ||
|
|
1956681d4c | ||
|
|
326f6e5ccb | ||
|
|
302958efd6 | ||
|
|
3dc86b247d | ||
|
|
5ec724af06 | ||
|
|
1f1e156bf0 | ||
|
|
df625e366a | ||
|
|
9e6685ac9c | ||
|
|
90c818aa71 |
17
.github/workflows/backend.yml
vendored
17
.github/workflows/backend.yml
vendored
@@ -111,6 +111,18 @@ jobs:
|
||||
backend: "diffusers"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cpu-chatterbox'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'true'
|
||||
backend: "chatterbox"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
# CUDA 11 additional backends
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
@@ -763,7 +775,7 @@ jobs:
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-hipblas-whisper'
|
||||
tag-suffix: '-gpu-rocm-hipblas-whisper'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.4.3"
|
||||
runs-on: 'ubuntu-latest'
|
||||
skip-drivers: 'false'
|
||||
@@ -968,6 +980,9 @@ jobs:
|
||||
- backend: "mlx"
|
||||
tag-suffix: "-metal-darwin-arm64-mlx"
|
||||
build-type: "mps"
|
||||
- backend: "chatterbox"
|
||||
tag-suffix: "-metal-darwin-arm64-chatterbox"
|
||||
build-type: "mps"
|
||||
- backend: "mlx-vlm"
|
||||
tag-suffix: "-metal-darwin-arm64-mlx-vlm"
|
||||
build-type: "mps"
|
||||
|
||||
2
.github/workflows/labeler.yml
vendored
2
.github/workflows/labeler.yml
vendored
@@ -9,4 +9,4 @@ jobs:
|
||||
pull-requests: write
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/labeler@v5
|
||||
- uses: actions/labeler@v6
|
||||
3
.github/workflows/localaibot_automerge.yml
vendored
3
.github/workflows/localaibot_automerge.yml
vendored
@@ -6,7 +6,8 @@ permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
packages: read
|
||||
|
||||
issues: write # for Homebrew/actions/post-comment
|
||||
actions: write # to dispatch publish workflow
|
||||
jobs:
|
||||
dependabot:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
4
.github/workflows/release.yaml
vendored
4
.github/workflows/release.yaml
vendored
@@ -41,7 +41,7 @@ jobs:
|
||||
- name: Upload DMG to Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
files: ./dist/LocalAI-Launcher.dmg
|
||||
files: ./dist/LocalAI.dmg
|
||||
launcher-build-linux:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
@@ -61,4 +61,4 @@ jobs:
|
||||
- name: Upload Linux launcher artifacts
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
files: ./local-ai-launcher-linux.tar.xz
|
||||
files: ./local-ai-launcher-linux.tar.xz
|
||||
|
||||
2
.github/workflows/stalebot.yml
vendored
2
.github/workflows/stalebot.yml
vendored
@@ -10,7 +10,7 @@ jobs:
|
||||
stale:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9
|
||||
- uses: actions/stale@3a9db7e6a41a89f618792c92c0e97cc736e1b13f # v9
|
||||
with:
|
||||
stale-issue-message: 'This issue is stale because it has been open 90 days with no activity. Remove stale label or comment or this will be closed in 5 days.'
|
||||
stale-pr-message: 'This PR is stale because it has been open 90 days with no activity. Remove stale label or comment or this will be closed in 10 days.'
|
||||
|
||||
9
Makefile
9
Makefile
@@ -117,8 +117,8 @@ run: ## run local-ai
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) run ./
|
||||
|
||||
test-models/testmodel.ggml:
|
||||
mkdir test-models
|
||||
mkdir test-dir
|
||||
mkdir -p test-models
|
||||
mkdir -p test-dir
|
||||
wget -q https://huggingface.co/mradermacher/gpt2-alpaca-gpt4-GGUF/resolve/main/gpt2-alpaca-gpt4.Q4_K_M.gguf -O test-models/testmodel.ggml
|
||||
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
||||
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
|
||||
@@ -369,6 +369,9 @@ backends/kitten-tts: docker-build-kitten-tts docker-save-kitten-tts build
|
||||
backends/kokoro: docker-build-kokoro docker-save-kokoro build
|
||||
./local-ai backends install "ocifile://$(abspath ./backend-images/kokoro.tar)"
|
||||
|
||||
backends/chatterbox: docker-build-chatterbox docker-save-chatterbox build
|
||||
./local-ai backends install "ocifile://$(abspath ./backend-images/chatterbox.tar)"
|
||||
|
||||
backends/llama-cpp-darwin: build
|
||||
bash ./scripts/build/llama-cpp-darwin.sh
|
||||
./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"
|
||||
@@ -493,7 +496,7 @@ docker-build-bark:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:bark -f backend/Dockerfile.python --build-arg BACKEND=bark .
|
||||
|
||||
docker-build-chatterbox:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox .
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox ./backend
|
||||
|
||||
docker-build-exllama2:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:exllama2 -f backend/Dockerfile.python --build-arg BACKEND=exllama2 .
|
||||
|
||||
@@ -43,7 +43,7 @@
|
||||
|
||||
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
||||
>
|
||||
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples) Try on
|
||||
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples) Try on
|
||||
[](https://t.me/localaiofficial_bot)
|
||||
|
||||
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
||||
@@ -110,6 +110,12 @@ curl https://localai.io/install.sh | sh
|
||||
|
||||
For more installation options, see [Installer Options](https://localai.io/docs/advanced/installer/).
|
||||
|
||||
### macOS Download:
|
||||
|
||||
<a href="https://github.com/mudler/LocalAI/releases/latest/download/LocalAI.dmg">
|
||||
<img src="https://img.shields.io/badge/Download-macOS-blue?style=for-the-badge&logo=apple&logoColor=white" alt="Download LocalAI for macOS"/>
|
||||
</a>
|
||||
|
||||
Or run with docker:
|
||||
|
||||
### CPU only image:
|
||||
|
||||
@@ -2,10 +2,10 @@ context_size: 4096
|
||||
f16: true
|
||||
backend: llama-cpp
|
||||
mmap: true
|
||||
mmproj: minicpm-v-2_6-mmproj-f16.gguf
|
||||
mmproj: minicpm-v-4_5-mmproj-f16.gguf
|
||||
name: gpt-4o
|
||||
parameters:
|
||||
model: minicpm-v-2_6-Q4_K_M.gguf
|
||||
model: minicpm-v-4_5-Q4_K_M.gguf
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
@@ -42,9 +42,9 @@ template:
|
||||
<|im_start|>assistant
|
||||
|
||||
download_files:
|
||||
- filename: minicpm-v-2_6-Q4_K_M.gguf
|
||||
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
|
||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
|
||||
- filename: minicpm-v-2_6-mmproj-f16.gguf
|
||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
|
||||
sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
|
||||
- filename: minicpm-v-4_5-Q4_K_M.gguf
|
||||
sha256: c1c3c33100b15b4caf7319acce4e23c0eb0ce1cbd12f70e8d24f05aa67b7512f
|
||||
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/ggml-model-Q4_K_M.gguf
|
||||
- filename: minicpm-v-4_5-mmproj-f16.gguf
|
||||
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/mmproj-model-f16.gguf
|
||||
sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8
|
||||
@@ -2,10 +2,10 @@ context_size: 4096
|
||||
backend: llama-cpp
|
||||
f16: true
|
||||
mmap: true
|
||||
mmproj: minicpm-v-2_6-mmproj-f16.gguf
|
||||
mmproj: minicpm-v-4_5-mmproj-f16.gguf
|
||||
name: gpt-4o
|
||||
parameters:
|
||||
model: minicpm-v-2_6-Q4_K_M.gguf
|
||||
model: minicpm-v-4_5-Q4_K_M.gguf
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
@@ -42,9 +42,9 @@ template:
|
||||
<|im_start|>assistant
|
||||
|
||||
download_files:
|
||||
- filename: minicpm-v-2_6-Q4_K_M.gguf
|
||||
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
|
||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
|
||||
- filename: minicpm-v-2_6-mmproj-f16.gguf
|
||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
|
||||
sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
|
||||
- filename: minicpm-v-4_5-Q4_K_M.gguf
|
||||
sha256: c1c3c33100b15b4caf7319acce4e23c0eb0ce1cbd12f70e8d24f05aa67b7512f
|
||||
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/ggml-model-Q4_K_M.gguf
|
||||
- filename: minicpm-v-4_5-mmproj-f16.gguf
|
||||
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/mmproj-model-f16.gguf
|
||||
sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8
|
||||
@@ -2,10 +2,10 @@ context_size: 4096
|
||||
backend: llama-cpp
|
||||
f16: true
|
||||
mmap: true
|
||||
mmproj: minicpm-v-2_6-mmproj-f16.gguf
|
||||
mmproj: minicpm-v-4_5-mmproj-f16.gguf
|
||||
name: gpt-4o
|
||||
parameters:
|
||||
model: minicpm-v-2_6-Q4_K_M.gguf
|
||||
model: minicpm-v-4_5-Q4_K_M.gguf
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
@@ -43,9 +43,9 @@ template:
|
||||
|
||||
|
||||
download_files:
|
||||
- filename: minicpm-v-2_6-Q4_K_M.gguf
|
||||
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
|
||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
|
||||
- filename: minicpm-v-2_6-mmproj-f16.gguf
|
||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
|
||||
sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
|
||||
- filename: minicpm-v-4_5-Q4_K_M.gguf
|
||||
sha256: c1c3c33100b15b4caf7319acce4e23c0eb0ce1cbd12f70e8d24f05aa67b7512f
|
||||
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/ggml-model-Q4_K_M.gguf
|
||||
- filename: minicpm-v-4_5-mmproj-f16.gguf
|
||||
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/mmproj-model-f16.gguf
|
||||
sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8
|
||||
@@ -276,6 +276,7 @@ message TranscriptRequest {
|
||||
string language = 3;
|
||||
uint32 threads = 4;
|
||||
bool translate = 5;
|
||||
bool diarize = 6;
|
||||
}
|
||||
|
||||
message TranscriptResult {
|
||||
@@ -305,7 +306,7 @@ message GenerateImageRequest {
|
||||
// Diffusers
|
||||
string EnableParameters = 10;
|
||||
int32 CLIPSkip = 11;
|
||||
|
||||
|
||||
// Reference images for models that support them (e.g., Flux Kontext)
|
||||
repeated string ref_images = 12;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
LLAMA_VERSION?=3de008208b9b8a33f49f979097a99b4d59e6e521
|
||||
LLAMA_VERSION?=0320ac5264279d74f8ee91bafa6c90e9ab9bbb91
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -701,7 +701,7 @@ public:
|
||||
*/
|
||||
|
||||
// for the shape of input/content, see tokenize_input_prompts()
|
||||
json prompt = body.at("prompt");
|
||||
json prompt = body.at("embeddings");
|
||||
|
||||
|
||||
auto tokenized_prompts = tokenize_input_prompts(ctx_server.vocab, ctx_server.mctx, prompt, true, true);
|
||||
@@ -712,6 +712,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
int embd_normalize = 2; // default to Euclidean/L2 norm
|
||||
// create and queue the task
|
||||
json responses = json::array();
|
||||
bool error = false;
|
||||
@@ -725,9 +726,8 @@ public:
|
||||
task.index = i;
|
||||
task.prompt_tokens = std::move(tokenized_prompts[i]);
|
||||
|
||||
// OAI-compat
|
||||
task.params.oaicompat = OAICOMPAT_TYPE_EMBEDDING;
|
||||
|
||||
task.params.oaicompat = OAICOMPAT_TYPE_NONE;
|
||||
task.params.embd_normalize = embd_normalize;
|
||||
tasks.push_back(std::move(task));
|
||||
}
|
||||
|
||||
@@ -743,9 +743,8 @@ public:
|
||||
responses.push_back(res->to_json());
|
||||
}
|
||||
}, [&](const json & error_data) {
|
||||
return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, error_data.value("content", ""));
|
||||
error = true;
|
||||
}, [&]() {
|
||||
// NOTE: we should try to check when the writer is closed here
|
||||
return false;
|
||||
});
|
||||
|
||||
@@ -755,12 +754,36 @@ public:
|
||||
return grpc::Status(grpc::StatusCode::INTERNAL, "Error in receiving results");
|
||||
}
|
||||
|
||||
std::vector<float> embeddings = responses[0].value("embedding", std::vector<float>());
|
||||
// loop the vector and set the embeddings results
|
||||
for (int i = 0; i < embeddings.size(); i++) {
|
||||
embeddingResult->add_embeddings(embeddings[i]);
|
||||
std::cout << "[DEBUG] Responses size: " << responses.size() << std::endl;
|
||||
|
||||
// Process the responses and extract embeddings
|
||||
for (const auto & response_elem : responses) {
|
||||
// Check if the response has an "embedding" field
|
||||
if (response_elem.contains("embedding")) {
|
||||
json embedding_data = json_value(response_elem, "embedding", json::array());
|
||||
|
||||
if (embedding_data.is_array() && !embedding_data.empty()) {
|
||||
for (const auto & embedding_vector : embedding_data) {
|
||||
if (embedding_vector.is_array()) {
|
||||
for (const auto & embedding_value : embedding_vector) {
|
||||
embeddingResult->add_embeddings(embedding_value.get<float>());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Check if the response itself contains the embedding data directly
|
||||
if (response_elem.is_array()) {
|
||||
for (const auto & embedding_value : response_elem) {
|
||||
embeddingResult->add_embeddings(embedding_value.get<float>());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
return grpc::Status::OK;
|
||||
}
|
||||
|
||||
|
||||
2
backend/go/stablediffusion-ggml/.gitignore
vendored
2
backend/go/stablediffusion-ggml/.gitignore
vendored
@@ -1,4 +1,6 @@
|
||||
package/
|
||||
sources/
|
||||
.cache/
|
||||
build/
|
||||
libgosd.so
|
||||
stablediffusion-ggml
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=4c6475f9176bf99271ccf5a2817b30a490b83db0
|
||||
STABLEDIFFUSION_GGML_VERSION?=0ebe6fe118f125665939b27c89f34ed38716bff8
|
||||
|
||||
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
||||
|
||||
|
||||
@@ -4,17 +4,11 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <iostream>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <filesystem>
|
||||
#include "gosd.h"
|
||||
|
||||
// #include "preprocessing.hpp"
|
||||
#include "flux.hpp"
|
||||
#include "stable-diffusion.h"
|
||||
|
||||
#define STB_IMAGE_IMPLEMENTATION
|
||||
#define STB_IMAGE_STATIC
|
||||
#include "stb_image.h"
|
||||
@@ -29,7 +23,7 @@
|
||||
|
||||
// Names of the sampler method, same order as enum sample_method in stable-diffusion.h
|
||||
const char* sample_method_str[] = {
|
||||
"euler_a",
|
||||
"default",
|
||||
"euler",
|
||||
"heun",
|
||||
"dpm2",
|
||||
@@ -41,19 +35,27 @@ const char* sample_method_str[] = {
|
||||
"lcm",
|
||||
"ddim_trailing",
|
||||
"tcd",
|
||||
"euler_a",
|
||||
};
|
||||
|
||||
static_assert(std::size(sample_method_str) == SAMPLE_METHOD_COUNT, "sample method mismatch");
|
||||
|
||||
// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
|
||||
const char* schedule_str[] = {
|
||||
const char* schedulers[] = {
|
||||
"default",
|
||||
"discrete",
|
||||
"karras",
|
||||
"exponential",
|
||||
"ays",
|
||||
"gits",
|
||||
"smoothstep",
|
||||
};
|
||||
|
||||
static_assert(std::size(schedulers) == SCHEDULE_COUNT, "schedulers mismatch");
|
||||
|
||||
sd_ctx_t* sd_c;
|
||||
// Moved from the context (load time) to generation time params
|
||||
scheduler_t scheduler = scheduler_t::DEFAULT;
|
||||
|
||||
sample_method_t sample_method;
|
||||
|
||||
@@ -105,7 +107,7 @@ int load_model(const char *model, char *model_path, char* options[], int threads
|
||||
const char *clip_g_path = "";
|
||||
const char *t5xxl_path = "";
|
||||
const char *vae_path = "";
|
||||
const char *scheduler = "";
|
||||
const char *scheduler_str = "";
|
||||
const char *sampler = "";
|
||||
char *lora_dir = model_path;
|
||||
bool lora_dir_allocated = false;
|
||||
@@ -133,7 +135,7 @@ int load_model(const char *model, char *model_path, char* options[], int threads
|
||||
vae_path = optval;
|
||||
}
|
||||
if (!strcmp(optname, "scheduler")) {
|
||||
scheduler = optval;
|
||||
scheduler_str = optval;
|
||||
}
|
||||
if (!strcmp(optname, "sampler")) {
|
||||
sampler = optval;
|
||||
@@ -166,26 +168,17 @@ int load_model(const char *model, char *model_path, char* options[], int threads
|
||||
}
|
||||
if (sample_method_found == -1) {
|
||||
fprintf(stderr, "Invalid sample method, default to EULER_A!\n");
|
||||
sample_method_found = EULER_A;
|
||||
sample_method_found = sample_method_t::SAMPLE_METHOD_DEFAULT;
|
||||
}
|
||||
sample_method = (sample_method_t)sample_method_found;
|
||||
|
||||
int schedule_found = -1;
|
||||
for (int d = 0; d < SCHEDULE_COUNT; d++) {
|
||||
if (!strcmp(scheduler, schedule_str[d])) {
|
||||
schedule_found = d;
|
||||
fprintf (stderr, "Found scheduler: %s\n", scheduler);
|
||||
|
||||
if (!strcmp(scheduler_str, schedulers[d])) {
|
||||
scheduler = (scheduler_t)d;
|
||||
fprintf (stderr, "Found scheduler: %s\n", scheduler_str);
|
||||
}
|
||||
}
|
||||
|
||||
if (schedule_found == -1) {
|
||||
fprintf (stderr, "Invalid scheduler! using DEFAULT\n");
|
||||
schedule_found = DEFAULT;
|
||||
}
|
||||
|
||||
schedule_t schedule = (schedule_t)schedule_found;
|
||||
|
||||
fprintf (stderr, "Creating context\n");
|
||||
sd_ctx_params_t ctx_params;
|
||||
sd_ctx_params_init(&ctx_params);
|
||||
@@ -199,13 +192,10 @@ int load_model(const char *model, char *model_path, char* options[], int threads
|
||||
ctx_params.control_net_path = "";
|
||||
ctx_params.lora_model_dir = lora_dir;
|
||||
ctx_params.embedding_dir = "";
|
||||
ctx_params.stacked_id_embed_dir = "";
|
||||
ctx_params.vae_decode_only = false;
|
||||
ctx_params.vae_tiling = false;
|
||||
ctx_params.free_params_immediately = false;
|
||||
ctx_params.n_threads = threads;
|
||||
ctx_params.rng_type = STD_DEFAULT_RNG;
|
||||
ctx_params.schedule = schedule;
|
||||
sd_ctx_t* sd_ctx = new_sd_ctx(&ctx_params);
|
||||
|
||||
if (sd_ctx == NULL) {
|
||||
@@ -228,7 +218,49 @@ int load_model(const char *model, char *model_path, char* options[], int threads
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gen_image(char *text, char *negativeText, int width, int height, int steps, int64_t seed, char *dst, float cfg_scale, char *src_image, float strength, char *mask_image, char **ref_images, int ref_images_count) {
|
||||
void sd_tiling_params_set_enabled(sd_tiling_params_t *params, bool enabled) {
|
||||
params->enabled = enabled;
|
||||
}
|
||||
|
||||
void sd_tiling_params_set_tile_sizes(sd_tiling_params_t *params, int tile_size_x, int tile_size_y) {
|
||||
params->tile_size_x = tile_size_x;
|
||||
params->tile_size_y = tile_size_y;
|
||||
}
|
||||
|
||||
void sd_tiling_params_set_rel_sizes(sd_tiling_params_t *params, float rel_size_x, float rel_size_y) {
|
||||
params->rel_size_x = rel_size_x;
|
||||
params->rel_size_y = rel_size_y;
|
||||
}
|
||||
|
||||
void sd_tiling_params_set_target_overlap(sd_tiling_params_t *params, float target_overlap) {
|
||||
params->target_overlap = target_overlap;
|
||||
}
|
||||
|
||||
sd_tiling_params_t* sd_img_gen_params_get_vae_tiling_params(sd_img_gen_params_t *params) {
|
||||
return ¶ms->vae_tiling_params;
|
||||
}
|
||||
|
||||
sd_img_gen_params_t* sd_img_gen_params_new(void) {
|
||||
sd_img_gen_params_t *params = (sd_img_gen_params_t *)std::malloc(sizeof(sd_img_gen_params_t));
|
||||
sd_img_gen_params_init(params);
|
||||
return params;
|
||||
}
|
||||
|
||||
void sd_img_gen_params_set_prompts(sd_img_gen_params_t *params, const char *prompt, const char *negative_prompt) {
|
||||
params->prompt = prompt;
|
||||
params->negative_prompt = negative_prompt;
|
||||
}
|
||||
|
||||
void sd_img_gen_params_set_dimensions(sd_img_gen_params_t *params, int width, int height) {
|
||||
params->width = width;
|
||||
params->height = height;
|
||||
}
|
||||
|
||||
void sd_img_gen_params_set_seed(sd_img_gen_params_t *params, int64_t seed) {
|
||||
params->seed = seed;
|
||||
}
|
||||
|
||||
int gen_image(sd_img_gen_params_t *p, int steps, char *dst, float cfg_scale, char *src_image, float strength, char *mask_image, char **ref_images, int ref_images_count) {
|
||||
|
||||
sd_image_t* results;
|
||||
|
||||
@@ -236,20 +268,15 @@ int gen_image(char *text, char *negativeText, int width, int height, int steps,
|
||||
|
||||
fprintf (stderr, "Generating image\n");
|
||||
|
||||
sd_img_gen_params_t p;
|
||||
sd_img_gen_params_init(&p);
|
||||
p->sample_params.guidance.txt_cfg = cfg_scale;
|
||||
p->sample_params.guidance.slg.layers = skip_layers.data();
|
||||
p->sample_params.guidance.slg.layer_count = skip_layers.size();
|
||||
p->sample_params.sample_method = sample_method;
|
||||
p->sample_params.sample_steps = steps;
|
||||
p->sample_params.scheduler = scheduler;
|
||||
|
||||
p.prompt = text;
|
||||
p.negative_prompt = negativeText;
|
||||
p.guidance.txt_cfg = cfg_scale;
|
||||
p.guidance.slg.layers = skip_layers.data();
|
||||
p.guidance.slg.layer_count = skip_layers.size();
|
||||
p.width = width;
|
||||
p.height = height;
|
||||
p.sample_method = sample_method;
|
||||
p.sample_steps = steps;
|
||||
p.seed = seed;
|
||||
p.input_id_images_path = "";
|
||||
int width = p->width;
|
||||
int height = p->height;
|
||||
|
||||
// Handle input image for img2img
|
||||
bool has_input_image = (src_image != NULL && strlen(src_image) > 0);
|
||||
@@ -298,13 +325,13 @@ int gen_image(char *text, char *negativeText, int width, int height, int steps,
|
||||
input_image_buffer = resized_image_buffer;
|
||||
}
|
||||
|
||||
p.init_image = {(uint32_t)width, (uint32_t)height, 3, input_image_buffer};
|
||||
p.strength = strength;
|
||||
p->init_image = {(uint32_t)width, (uint32_t)height, 3, input_image_buffer};
|
||||
p->strength = strength;
|
||||
fprintf(stderr, "Using img2img with strength: %.2f\n", strength);
|
||||
} else {
|
||||
// No input image, use empty image for text-to-image
|
||||
p.init_image = {(uint32_t)width, (uint32_t)height, 3, NULL};
|
||||
p.strength = 0.0f;
|
||||
p->init_image = {(uint32_t)width, (uint32_t)height, 3, NULL};
|
||||
p->strength = 0.0f;
|
||||
}
|
||||
|
||||
// Handle mask image for inpainting
|
||||
@@ -344,12 +371,12 @@ int gen_image(char *text, char *negativeText, int width, int height, int steps,
|
||||
mask_image_buffer = resized_mask_buffer;
|
||||
}
|
||||
|
||||
p.mask_image = {(uint32_t)width, (uint32_t)height, 1, mask_image_buffer};
|
||||
p->mask_image = {(uint32_t)width, (uint32_t)height, 1, mask_image_buffer};
|
||||
fprintf(stderr, "Using inpainting with mask\n");
|
||||
} else {
|
||||
// No mask image, create default full mask
|
||||
default_mask_image_vec.resize(width * height, 255);
|
||||
p.mask_image = {(uint32_t)width, (uint32_t)height, 1, default_mask_image_vec.data()};
|
||||
p->mask_image = {(uint32_t)width, (uint32_t)height, 1, default_mask_image_vec.data()};
|
||||
}
|
||||
|
||||
// Handle reference images
|
||||
@@ -407,13 +434,15 @@ int gen_image(char *text, char *negativeText, int width, int height, int steps,
|
||||
}
|
||||
|
||||
if (!ref_images_vec.empty()) {
|
||||
p.ref_images = ref_images_vec.data();
|
||||
p.ref_images_count = ref_images_vec.size();
|
||||
p->ref_images = ref_images_vec.data();
|
||||
p->ref_images_count = ref_images_vec.size();
|
||||
fprintf(stderr, "Using %zu reference images\n", ref_images_vec.size());
|
||||
}
|
||||
}
|
||||
|
||||
results = generate_image(sd_c, &p);
|
||||
results = generate_image(sd_c, p);
|
||||
|
||||
std::free(p);
|
||||
|
||||
if (results == NULL) {
|
||||
fprintf (stderr, "NO results\n");
|
||||
|
||||
@@ -22,7 +22,18 @@ type SDGGML struct {
|
||||
|
||||
var (
|
||||
LoadModel func(model, model_apth string, options []uintptr, threads int32, diff int) int
|
||||
GenImage func(text, negativeText string, width, height, steps int, seed int64, dst string, cfgScale float32, srcImage string, strength float32, maskImage string, refImages []string, refImagesCount int) int
|
||||
GenImage func(params uintptr, steps int, dst string, cfgScale float32, srcImage string, strength float32, maskImage string, refImages []string, refImagesCount int) int
|
||||
|
||||
TilingParamsSetEnabled func(params uintptr, enabled bool)
|
||||
TilingParamsSetTileSizes func(params uintptr, tileSizeX int, tileSizeY int)
|
||||
TilingParamsSetRelSizes func(params uintptr, relSizeX float32, relSizeY float32)
|
||||
TilingParamsSetTargetOverlap func(params uintptr, targetOverlap float32)
|
||||
|
||||
ImgGenParamsNew func() uintptr
|
||||
ImgGenParamsSetPrompts func(params uintptr, prompt string, negativePrompt string)
|
||||
ImgGenParamsSetDimensions func(params uintptr, width int, height int)
|
||||
ImgGenParamsSetSeed func(params uintptr, seed int64)
|
||||
ImgGenParamsGetVaeTilingParams func(params uintptr) uintptr
|
||||
)
|
||||
|
||||
// Copied from Purego internal/strings
|
||||
@@ -120,7 +131,15 @@ func (sd *SDGGML) GenerateImage(opts *pb.GenerateImageRequest) error {
|
||||
// Default strength for img2img (0.75 is a good default)
|
||||
strength := float32(0.75)
|
||||
|
||||
ret := GenImage(t, negative, int(opts.Width), int(opts.Height), int(opts.Step), int64(opts.Seed), dst, sd.cfgScale, srcImage, strength, maskImage, refImages, refImagesCount)
|
||||
// free'd by GenImage
|
||||
p := ImgGenParamsNew()
|
||||
ImgGenParamsSetPrompts(p, t, negative)
|
||||
ImgGenParamsSetDimensions(p, int(opts.Width), int(opts.Height))
|
||||
ImgGenParamsSetSeed(p, int64(opts.Seed))
|
||||
vaep := ImgGenParamsGetVaeTilingParams(p)
|
||||
TilingParamsSetEnabled(vaep, false)
|
||||
|
||||
ret := GenImage(p, int(opts.Step), dst, sd.cfgScale, srcImage, strength, maskImage, refImages, refImagesCount)
|
||||
if ret != 0 {
|
||||
return fmt.Errorf("inference failed")
|
||||
}
|
||||
|
||||
@@ -1,8 +1,23 @@
|
||||
#include <cstdint>
|
||||
#include "stable-diffusion.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void sd_tiling_params_set_enabled(sd_tiling_params_t *params, bool enabled);
|
||||
void sd_tiling_params_set_tile_sizes(sd_tiling_params_t *params, int tile_size_x, int tile_size_y);
|
||||
void sd_tiling_params_set_rel_sizes(sd_tiling_params_t *params, float rel_size_x, float rel_size_y);
|
||||
void sd_tiling_params_set_target_overlap(sd_tiling_params_t *params, float target_overlap);
|
||||
sd_tiling_params_t* sd_img_gen_params_get_vae_tiling_params(sd_img_gen_params_t *params);
|
||||
|
||||
sd_img_gen_params_t* sd_img_gen_params_new(void);
|
||||
void sd_img_gen_params_set_prompts(sd_img_gen_params_t *params, const char *prompt, const char *negative_prompt);
|
||||
void sd_img_gen_params_set_dimensions(sd_img_gen_params_t *params, int width, int height);
|
||||
void sd_img_gen_params_set_seed(sd_img_gen_params_t *params, int64_t seed);
|
||||
|
||||
int load_model(const char *model, char *model_path, char* options[], int threads, int diffusionModel);
|
||||
int gen_image(char *text, char *negativeText, int width, int height, int steps, int64_t seed, char *dst, float cfg_scale, char *src_image, float strength, char *mask_image, char **ref_images, int ref_images_count);
|
||||
int gen_image(sd_img_gen_params_t *p, int steps, char *dst, float cfg_scale, char *src_image, float strength, char *mask_image, char **ref_images, int ref_images_count);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -11,14 +11,35 @@ var (
|
||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||
)
|
||||
|
||||
type LibFuncs struct {
|
||||
FuncPtr any
|
||||
Name string
|
||||
}
|
||||
|
||||
func main() {
|
||||
gosd, err := purego.Dlopen("./libgosd.so", purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
purego.RegisterLibFunc(&LoadModel, gosd, "load_model")
|
||||
purego.RegisterLibFunc(&GenImage, gosd, "gen_image")
|
||||
libFuncs := []LibFuncs{
|
||||
{&LoadModel, "load_model"},
|
||||
{&GenImage, "gen_image"},
|
||||
{&TilingParamsSetEnabled, "sd_tiling_params_set_enabled"},
|
||||
{&TilingParamsSetTileSizes, "sd_tiling_params_set_tile_sizes"},
|
||||
{&TilingParamsSetRelSizes, "sd_tiling_params_set_rel_sizes"},
|
||||
{&TilingParamsSetTargetOverlap, "sd_tiling_params_set_target_overlap"},
|
||||
|
||||
{&ImgGenParamsNew, "sd_img_gen_params_new"},
|
||||
{&ImgGenParamsSetPrompts, "sd_img_gen_params_set_prompts"},
|
||||
{&ImgGenParamsSetDimensions, "sd_img_gen_params_set_dimensions"},
|
||||
{&ImgGenParamsSetSeed, "sd_img_gen_params_set_seed"},
|
||||
{&ImgGenParamsGetVaeTilingParams, "sd_img_gen_params_get_vae_tiling_params"},
|
||||
}
|
||||
|
||||
for _, lf := range libFuncs {
|
||||
purego.RegisterLibFunc(lf.FuncPtr, gosd, lf.Name)
|
||||
}
|
||||
|
||||
flag.Parse()
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
||||
WHISPER_CPP_VERSION?=7745fcf32846006128f16de429cfe1677c963b30
|
||||
WHISPER_CPP_VERSION?=edea8a9c3cf0eb7676dcdb604991eb2f95c3d984
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
|
||||
@@ -7,34 +7,35 @@ static struct whisper_vad_context *vctx;
|
||||
static struct whisper_context *ctx;
|
||||
static std::vector<float> flat_segs;
|
||||
|
||||
static void ggml_log_cb(enum ggml_log_level level, const char* log, void* data) {
|
||||
const char* level_str;
|
||||
static void ggml_log_cb(enum ggml_log_level level, const char *log,
|
||||
void *data) {
|
||||
const char *level_str;
|
||||
|
||||
if (!log) {
|
||||
return;
|
||||
}
|
||||
if (!log) {
|
||||
return;
|
||||
}
|
||||
|
||||
switch (level) {
|
||||
case GGML_LOG_LEVEL_DEBUG:
|
||||
level_str = "DEBUG";
|
||||
break;
|
||||
case GGML_LOG_LEVEL_INFO:
|
||||
level_str = "INFO";
|
||||
break;
|
||||
case GGML_LOG_LEVEL_WARN:
|
||||
level_str = "WARN";
|
||||
break;
|
||||
case GGML_LOG_LEVEL_ERROR:
|
||||
level_str = "ERROR";
|
||||
break;
|
||||
default: /* Potential future-proofing */
|
||||
level_str = "?????";
|
||||
break;
|
||||
}
|
||||
switch (level) {
|
||||
case GGML_LOG_LEVEL_DEBUG:
|
||||
level_str = "DEBUG";
|
||||
break;
|
||||
case GGML_LOG_LEVEL_INFO:
|
||||
level_str = "INFO";
|
||||
break;
|
||||
case GGML_LOG_LEVEL_WARN:
|
||||
level_str = "WARN";
|
||||
break;
|
||||
case GGML_LOG_LEVEL_ERROR:
|
||||
level_str = "ERROR";
|
||||
break;
|
||||
default: /* Potential future-proofing */
|
||||
level_str = "?????";
|
||||
break;
|
||||
}
|
||||
|
||||
fprintf(stderr, "[%-5s] ", level_str);
|
||||
fputs(log, stderr);
|
||||
fflush(stderr);
|
||||
fprintf(stderr, "[%-5s] ", level_str);
|
||||
fputs(log, stderr);
|
||||
fflush(stderr);
|
||||
}
|
||||
|
||||
int load_model(const char *const model_path) {
|
||||
@@ -105,8 +106,8 @@ int vad(float pcmf32[], size_t pcmf32_len, float **segs_out,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int transcribe(uint32_t threads, char *lang, bool translate, float pcmf32[],
|
||||
size_t pcmf32_len, size_t *segs_out_len) {
|
||||
int transcribe(uint32_t threads, char *lang, bool translate, bool tdrz,
|
||||
float pcmf32[], size_t pcmf32_len, size_t *segs_out_len) {
|
||||
whisper_full_params wparams =
|
||||
whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
|
||||
|
||||
@@ -120,6 +121,9 @@ int transcribe(uint32_t threads, char *lang, bool translate, float pcmf32[],
|
||||
wparams.translate = translate;
|
||||
wparams.debug_mode = true;
|
||||
wparams.print_progress = true;
|
||||
wparams.tdrz_enable = tdrz;
|
||||
|
||||
fprintf(stderr, "info: Enable tdrz: %d\n", tdrz);
|
||||
|
||||
if (whisper_full(ctx, wparams, pcmf32, pcmf32_len)) {
|
||||
fprintf(stderr, "error: transcription failed\n");
|
||||
@@ -144,3 +148,7 @@ int n_tokens(int i) { return whisper_full_n_tokens(ctx, i); }
|
||||
int32_t get_token_id(int i, int j) {
|
||||
return whisper_full_get_token_id(ctx, i, j);
|
||||
}
|
||||
|
||||
bool get_segment_speaker_turn_next(int i) {
|
||||
return whisper_full_get_segment_speaker_turn_next(ctx, i);
|
||||
}
|
||||
|
||||
@@ -14,15 +14,16 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
CppLoadModel func(modelPath string) int
|
||||
CppLoadModelVAD func(modelPath string) int
|
||||
CppVAD func(pcmf32 []float32, pcmf32Size uintptr, segsOut unsafe.Pointer, segsOutLen unsafe.Pointer) int
|
||||
CppTranscribe func(threads uint32, lang string, translate bool, pcmf32 []float32, pcmf32Len uintptr, segsOutLen unsafe.Pointer) int
|
||||
CppGetSegmentText func(i int) string
|
||||
CppGetSegmentStart func(i int) int64
|
||||
CppGetSegmentEnd func(i int) int64
|
||||
CppNTokens func(i int) int
|
||||
CppGetTokenID func(i int, j int) int
|
||||
CppLoadModel func(modelPath string) int
|
||||
CppLoadModelVAD func(modelPath string) int
|
||||
CppVAD func(pcmf32 []float32, pcmf32Size uintptr, segsOut unsafe.Pointer, segsOutLen unsafe.Pointer) int
|
||||
CppTranscribe func(threads uint32, lang string, translate bool, diarize bool, pcmf32 []float32, pcmf32Len uintptr, segsOutLen unsafe.Pointer) int
|
||||
CppGetSegmentText func(i int) string
|
||||
CppGetSegmentStart func(i int) int64
|
||||
CppGetSegmentEnd func(i int) int64
|
||||
CppNTokens func(i int) int
|
||||
CppGetTokenID func(i int, j int) int
|
||||
CppGetSegmentSpeakerTurnNext func(i int) bool
|
||||
)
|
||||
|
||||
type Whisper struct {
|
||||
@@ -122,7 +123,7 @@ func (w *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (pb.TranscriptR
|
||||
segsLen := uintptr(0xdeadbeef)
|
||||
segsLenPtr := unsafe.Pointer(&segsLen)
|
||||
|
||||
if ret := CppTranscribe(opts.Threads, opts.Language, opts.Translate, data, uintptr(len(data)), segsLenPtr); ret != 0 {
|
||||
if ret := CppTranscribe(opts.Threads, opts.Language, opts.Translate, opts.Diarize, data, uintptr(len(data)), segsLenPtr); ret != 0 {
|
||||
return pb.TranscriptResult{}, fmt.Errorf("Failed Transcribe")
|
||||
}
|
||||
|
||||
@@ -134,6 +135,10 @@ func (w *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (pb.TranscriptR
|
||||
txt := strings.Clone(CppGetSegmentText(i))
|
||||
tokens := make([]int32, CppNTokens(i))
|
||||
|
||||
if opts.Diarize && CppGetSegmentSpeakerTurnNext(i) {
|
||||
txt += " [SPEAKER_TURN]"
|
||||
}
|
||||
|
||||
for j := range tokens {
|
||||
tokens[j] = int32(CppGetTokenID(i, j))
|
||||
}
|
||||
@@ -151,6 +156,6 @@ func (w *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (pb.TranscriptR
|
||||
|
||||
return pb.TranscriptResult{
|
||||
Segments: segments,
|
||||
Text: strings.TrimSpace(text),
|
||||
Text: strings.TrimSpace(text),
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -6,11 +6,12 @@ int load_model(const char *const model_path);
|
||||
int load_model_vad(const char *const model_path);
|
||||
int vad(float pcmf32[], size_t pcmf32_size, float **segs_out,
|
||||
size_t *segs_out_len);
|
||||
int transcribe(uint32_t threads, char *lang, bool translate, float pcmf32[],
|
||||
size_t pcmf32_len, size_t *segs_out_len);
|
||||
int transcribe(uint32_t threads, char *lang, bool translate, bool tdrz,
|
||||
float pcmf32[], size_t pcmf32_len, size_t *segs_out_len);
|
||||
const char *get_segment_text(int i);
|
||||
int64_t get_segment_t0(int i);
|
||||
int64_t get_segment_t1(int i);
|
||||
int n_tokens(int i);
|
||||
int32_t get_token_id(int i, int j);
|
||||
bool get_segment_speaker_turn_next(int i);
|
||||
}
|
||||
|
||||
@@ -33,6 +33,7 @@ func main() {
|
||||
{&CppGetSegmentEnd, "get_segment_t1"},
|
||||
{&CppNTokens, "n_tokens"},
|
||||
{&CppGetTokenID, "get_token_id"},
|
||||
{&CppGetSegmentSpeakerTurnNext, "get_segment_speaker_turn_next"},
|
||||
}
|
||||
|
||||
for _, lf := range libFuncs {
|
||||
|
||||
@@ -350,6 +350,8 @@
|
||||
alias: "chatterbox"
|
||||
capabilities:
|
||||
nvidia: "cuda12-chatterbox"
|
||||
metal: "metal-chatterbox"
|
||||
default: "cpu-chatterbox"
|
||||
- &piper
|
||||
name: "piper"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-piper"
|
||||
@@ -1223,6 +1225,28 @@
|
||||
name: "chatterbox-development"
|
||||
capabilities:
|
||||
nvidia: "cuda12-chatterbox-development"
|
||||
metal: "metal-chatterbox-development"
|
||||
default: "cpu-chatterbox-development"
|
||||
- !!merge <<: *chatterbox
|
||||
name: "cpu-chatterbox"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-chatterbox"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-cpu-chatterbox
|
||||
- !!merge <<: *chatterbox
|
||||
name: "cpu-chatterbox-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-chatterbox"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-cpu-chatterbox
|
||||
- !!merge <<: *chatterbox
|
||||
name: "metal-chatterbox"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-chatterbox"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-metal-darwin-arm64-chatterbox
|
||||
- !!merge <<: *chatterbox
|
||||
name: "metal-chatterbox-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-chatterbox"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-metal-darwin-arm64-chatterbox
|
||||
- !!merge <<: *chatterbox
|
||||
name: "cuda12-chatterbox-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-chatterbox"
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||
accelerate
|
||||
torch==2.6.0
|
||||
torchaudio==2.6.0
|
||||
transformers==4.46.3
|
||||
chatterbox-tts
|
||||
chatterbox-tts==0.1.2
|
||||
@@ -2,5 +2,5 @@
|
||||
torch==2.6.0+cu118
|
||||
torchaudio==2.6.0+cu118
|
||||
transformers==4.46.3
|
||||
chatterbox-tts
|
||||
chatterbox-tts==0.1.2
|
||||
accelerate
|
||||
@@ -1,5 +1,5 @@
|
||||
torch==2.6.0
|
||||
torchaudio==2.6.0
|
||||
transformers==4.46.3
|
||||
chatterbox-tts
|
||||
chatterbox-tts==0.1.2
|
||||
accelerate
|
||||
|
||||
@@ -2,5 +2,5 @@
|
||||
torch==2.6.0+rocm6.1
|
||||
torchaudio==2.6.0+rocm6.1
|
||||
transformers==4.46.3
|
||||
chatterbox-tts
|
||||
chatterbox-tts==0.1.2
|
||||
accelerate
|
||||
|
||||
@@ -3,9 +3,8 @@ intel-extension-for-pytorch==2.3.110+xpu
|
||||
torch==2.3.1+cxx11.abi
|
||||
torchaudio==2.3.1+cxx11.abi
|
||||
transformers==4.46.3
|
||||
chatterbox-tts
|
||||
chatterbox-tts==0.1.2
|
||||
accelerate
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
optimum[openvino]
|
||||
setuptools
|
||||
accelerate
|
||||
setuptools
|
||||
@@ -31,6 +31,7 @@ type Config struct {
|
||||
StartOnBoot bool `json:"start_on_boot"`
|
||||
LogLevel string `json:"log_level"`
|
||||
EnvironmentVars map[string]string `json:"environment_vars"`
|
||||
ShowWelcome *bool `json:"show_welcome"`
|
||||
}
|
||||
|
||||
// Launcher represents the main launcher application
|
||||
@@ -148,6 +149,13 @@ func (l *Launcher) Initialize() error {
|
||||
log.Printf("Initializing empty EnvironmentVars map")
|
||||
}
|
||||
|
||||
// Set default welcome window preference
|
||||
if l.config.ShowWelcome == nil {
|
||||
true := true
|
||||
l.config.ShowWelcome = &true
|
||||
log.Printf("Setting default ShowWelcome: true")
|
||||
}
|
||||
|
||||
// Create directories
|
||||
os.MkdirAll(l.config.ModelsPath, 0755)
|
||||
os.MkdirAll(l.config.BackendsPath, 0755)
|
||||
|
||||
@@ -48,6 +48,14 @@ var _ = Describe("Launcher", func() {
|
||||
config := launcherInstance.GetConfig()
|
||||
Expect(config.ModelsPath).ToNot(BeEmpty())
|
||||
Expect(config.BackendsPath).ToNot(BeEmpty())
|
||||
})
|
||||
|
||||
It("should set default ShowWelcome to true", func() {
|
||||
err := launcherInstance.Initialize()
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
config := launcherInstance.GetConfig()
|
||||
Expect(config.ShowWelcome).To(BeTrue())
|
||||
Expect(config.Address).To(Equal("127.0.0.1:8080"))
|
||||
Expect(config.LogLevel).To(Equal("info"))
|
||||
})
|
||||
|
||||
@@ -177,6 +177,9 @@ func (sm *SystrayManager) recreateMenu() {
|
||||
fyne.NewMenuItem("Settings", func() {
|
||||
sm.showSettings()
|
||||
}),
|
||||
fyne.NewMenuItem("Show Welcome Window", func() {
|
||||
sm.showWelcomeWindow()
|
||||
}),
|
||||
fyne.NewMenuItem("Open Data Folder", func() {
|
||||
sm.openDataFolder()
|
||||
}),
|
||||
@@ -243,6 +246,13 @@ func (sm *SystrayManager) showSettings() {
|
||||
sm.window.RequestFocus()
|
||||
}
|
||||
|
||||
// showWelcomeWindow shows the welcome window
|
||||
func (sm *SystrayManager) showWelcomeWindow() {
|
||||
if sm.launcher.GetUI() != nil {
|
||||
sm.launcher.GetUI().ShowWelcomeWindow()
|
||||
}
|
||||
}
|
||||
|
||||
// openDataFolder opens the data folder in file manager
|
||||
func (sm *SystrayManager) openDataFolder() {
|
||||
dataPath := sm.launcher.GetDataPath()
|
||||
|
||||
@@ -675,3 +675,121 @@ func (ui *LauncherUI) UpdateRunningState(isRunning bool) {
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// ShowWelcomeWindow displays the welcome window with helpful information
|
||||
func (ui *LauncherUI) ShowWelcomeWindow() {
|
||||
if ui.launcher == nil || ui.launcher.window == nil {
|
||||
log.Printf("Cannot show welcome window: launcher or window is nil")
|
||||
return
|
||||
}
|
||||
|
||||
fyne.DoAndWait(func() {
|
||||
// Create welcome window
|
||||
welcomeWindow := ui.launcher.app.NewWindow("Welcome to LocalAI Launcher")
|
||||
welcomeWindow.Resize(fyne.NewSize(600, 500))
|
||||
welcomeWindow.CenterOnScreen()
|
||||
welcomeWindow.SetCloseIntercept(func() {
|
||||
welcomeWindow.Close()
|
||||
})
|
||||
|
||||
// Title
|
||||
titleLabel := widget.NewLabel("Welcome to LocalAI Launcher!")
|
||||
titleLabel.TextStyle = fyne.TextStyle{Bold: true}
|
||||
titleLabel.Alignment = fyne.TextAlignCenter
|
||||
|
||||
// Welcome message
|
||||
welcomeText := `LocalAI Launcher makes it easy to run LocalAI on your system.
|
||||
|
||||
What you can do:
|
||||
• Start and stop LocalAI server
|
||||
• Configure models and backends paths
|
||||
• Set environment variables
|
||||
• Check for updates automatically
|
||||
• Access LocalAI WebUI when running
|
||||
|
||||
Getting Started:
|
||||
1. Configure your models and backends paths
|
||||
2. Click "Start LocalAI" to begin
|
||||
3. Use "Open WebUI" to access the interface
|
||||
4. Check the system tray for quick access`
|
||||
|
||||
welcomeLabel := widget.NewLabel(welcomeText)
|
||||
welcomeLabel.Wrapping = fyne.TextWrapWord
|
||||
|
||||
// Useful links section
|
||||
linksTitle := widget.NewLabel("Useful Links:")
|
||||
linksTitle.TextStyle = fyne.TextStyle{Bold: true}
|
||||
|
||||
// Create link buttons
|
||||
docsButton := widget.NewButton("📚 Documentation", func() {
|
||||
ui.openURL("https://localai.io/docs/")
|
||||
})
|
||||
|
||||
githubButton := widget.NewButton("🐙 GitHub Repository", func() {
|
||||
ui.openURL("https://github.com/mudler/LocalAI")
|
||||
})
|
||||
|
||||
modelsButton := widget.NewButton("🤖 Model Gallery", func() {
|
||||
ui.openURL("https://localai.io/models/")
|
||||
})
|
||||
|
||||
communityButton := widget.NewButton("💬 Community", func() {
|
||||
ui.openURL("https://discord.gg/XgwjKptP7Z")
|
||||
})
|
||||
|
||||
// Checkbox to disable welcome window
|
||||
dontShowAgainCheck := widget.NewCheck("Don't show this welcome window again", func(checked bool) {
|
||||
if ui.launcher != nil {
|
||||
config := ui.launcher.GetConfig()
|
||||
v := !checked
|
||||
config.ShowWelcome = &v
|
||||
ui.launcher.SetConfig(config)
|
||||
}
|
||||
})
|
||||
|
||||
config := ui.launcher.GetConfig()
|
||||
if config.ShowWelcome != nil {
|
||||
dontShowAgainCheck.SetChecked(*config.ShowWelcome)
|
||||
}
|
||||
|
||||
// Close button
|
||||
closeButton := widget.NewButton("Get Started", func() {
|
||||
welcomeWindow.Close()
|
||||
})
|
||||
closeButton.Importance = widget.HighImportance
|
||||
|
||||
// Layout
|
||||
linksContainer := container.NewVBox(
|
||||
linksTitle,
|
||||
docsButton,
|
||||
githubButton,
|
||||
modelsButton,
|
||||
communityButton,
|
||||
)
|
||||
|
||||
content := container.NewVBox(
|
||||
titleLabel,
|
||||
widget.NewSeparator(),
|
||||
welcomeLabel,
|
||||
widget.NewSeparator(),
|
||||
linksContainer,
|
||||
widget.NewSeparator(),
|
||||
dontShowAgainCheck,
|
||||
widget.NewSeparator(),
|
||||
closeButton,
|
||||
)
|
||||
|
||||
welcomeWindow.SetContent(content)
|
||||
welcomeWindow.Show()
|
||||
})
|
||||
}
|
||||
|
||||
// openURL opens a URL in the default browser
|
||||
func (ui *LauncherUI) openURL(urlString string) {
|
||||
parsedURL, err := url.Parse(urlString)
|
||||
if err != nil {
|
||||
log.Printf("Failed to parse URL %s: %v", urlString, err)
|
||||
return
|
||||
}
|
||||
fyne.CurrentApp().OpenURL(parsedURL)
|
||||
}
|
||||
|
||||
@@ -55,6 +55,12 @@ func main() {
|
||||
// Load configuration into UI
|
||||
launcher.GetUI().LoadConfiguration()
|
||||
launcher.GetUI().UpdateStatus("Ready")
|
||||
|
||||
// Show welcome window if configured to do so
|
||||
config := launcher.GetConfig()
|
||||
if *config.ShowWelcome {
|
||||
launcher.GetUI().ShowWelcomeWindow()
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
|
||||
@@ -2,9 +2,7 @@ package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
|
||||
"github.com/alecthomas/kong"
|
||||
"github.com/joho/godotenv"
|
||||
@@ -24,15 +22,7 @@ func main() {
|
||||
log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr})
|
||||
zerolog.SetGlobalLevel(zerolog.InfoLevel)
|
||||
|
||||
// Catch signals from the OS requesting us to exit
|
||||
go func() {
|
||||
c := make(chan os.Signal, 1) // we need to reserve to buffer size 1, so the notifier are not blocked
|
||||
signal.Notify(c, os.Interrupt, syscall.SIGTERM)
|
||||
<-c
|
||||
os.Exit(1)
|
||||
}()
|
||||
|
||||
// handle loading environment variabled from .env files
|
||||
// handle loading environment variables from .env files
|
||||
envFiles := []string{".env", "localai.env"}
|
||||
homeDir, err := os.UserHomeDir()
|
||||
if err == nil {
|
||||
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func ModelTranscription(audio, language string, translate bool, ml *model.ModelLoader, modelConfig config.ModelConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
|
||||
func ModelTranscription(audio, language string, translate bool, diarize bool, ml *model.ModelLoader, modelConfig config.ModelConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
|
||||
|
||||
if modelConfig.Backend == "" {
|
||||
modelConfig.Backend = model.WhisperBackend
|
||||
@@ -34,6 +34,7 @@ func ModelTranscription(audio, language string, translate bool, ml *model.ModelL
|
||||
Dst: audio,
|
||||
Language: language,
|
||||
Translate: translate,
|
||||
Diarize: diarize,
|
||||
Threads: uint32(*modelConfig.Threads),
|
||||
})
|
||||
if err != nil {
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"time"
|
||||
|
||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||
"github.com/mudler/LocalAI/core/cli/signals"
|
||||
"github.com/mudler/LocalAI/core/explorer"
|
||||
"github.com/mudler/LocalAI/core/http"
|
||||
)
|
||||
@@ -45,5 +46,7 @@ func (e *ExplorerCMD) Run(ctx *cliContext.Context) error {
|
||||
|
||||
appHTTP := http.Explorer(db)
|
||||
|
||||
signals.Handler(nil)
|
||||
|
||||
return appHTTP.Listen(e.Address)
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
|
||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||
"github.com/mudler/LocalAI/core/cli/signals"
|
||||
"github.com/mudler/LocalAI/core/p2p"
|
||||
)
|
||||
|
||||
@@ -19,5 +20,7 @@ func (f *FederatedCLI) Run(ctx *cliContext.Context) error {
|
||||
|
||||
fs := p2p.NewFederatedServer(f.Address, p2p.NetworkID(f.Peer2PeerNetworkID, p2p.FederatedID), f.Peer2PeerToken, !f.RandomWorker, f.TargetWorker)
|
||||
|
||||
signals.Handler(nil)
|
||||
|
||||
return fs.Start(context.Background())
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"github.com/mudler/LocalAI/core/application"
|
||||
cli_api "github.com/mudler/LocalAI/core/cli/api"
|
||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||
"github.com/mudler/LocalAI/core/cli/signals"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http"
|
||||
"github.com/mudler/LocalAI/core/p2p"
|
||||
@@ -224,5 +225,8 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// Catch signals from the OS requesting us to exit, and stop all backends
|
||||
signals.Handler(app.ModelLoader())
|
||||
|
||||
return appHTTP.Listen(r.Address)
|
||||
}
|
||||
|
||||
25
core/cli/signals/signals.go
Normal file
25
core/cli/signals/signals.go
Normal file
@@ -0,0 +1,25 @@
|
||||
package signals
|
||||
|
||||
import (
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
func Handler(m *model.ModelLoader) {
|
||||
// Catch signals from the OS requesting us to exit, and stop all backends
|
||||
go func(m *model.ModelLoader) {
|
||||
c := make(chan os.Signal, 1) // we need to reserve to buffer size 1, so the notifier are not blocked
|
||||
signal.Notify(c, os.Interrupt, syscall.SIGTERM, syscall.SIGINT)
|
||||
<-c
|
||||
if m != nil {
|
||||
if err := m.StopAllGRPC(); err != nil {
|
||||
log.Error().Err(err).Msg("error while stopping all grpc backends")
|
||||
}
|
||||
}
|
||||
os.Exit(1)
|
||||
}(m)
|
||||
}
|
||||
@@ -20,6 +20,7 @@ type TranscriptCMD struct {
|
||||
Model string `short:"m" required:"" help:"Model name to run the TTS"`
|
||||
Language string `short:"l" help:"Language of the audio file"`
|
||||
Translate bool `short:"c" help:"Translate the transcription to english"`
|
||||
Diarize bool `short:"d" help:"Mark speaker turns"`
|
||||
Threads int `short:"t" default:"1" help:"Number of threads used for parallel computation"`
|
||||
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
|
||||
}
|
||||
@@ -56,7 +57,7 @@ func (t *TranscriptCMD) Run(ctx *cliContext.Context) error {
|
||||
}
|
||||
}()
|
||||
|
||||
tr, err := backend.ModelTranscription(t.Filename, t.Language, t.Translate, ml, c, opts)
|
||||
tr, err := backend.ModelTranscription(t.Filename, t.Language, t.Translate, t.Diarize, ml, c, opts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package worker
|
||||
|
||||
type WorkerFlags struct {
|
||||
BackendsPath string `env:"LOCALAI_BACKENDS_PATH,BACKENDS_PATH" type:"path" default:"${basepath}/backends" help:"Path containing backends used for inferencing" group:"backends"`
|
||||
BackendGalleries string `env:"LOCALAI_BACKEND_GALLERIES,BACKEND_GALLERIES" help:"JSON list of backend galleries" group:"backends" default:"${backends}"`
|
||||
BackendsSystemPath string `env:"LOCALAI_BACKENDS_SYSTEM_PATH,BACKEND_SYSTEM_PATH" type:"path" default:"/usr/share/localai/backends" help:"Path containing system backends used for inferencing" group:"backends"`
|
||||
ExtraLLamaCPPArgs string `name:"llama-cpp-args" env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"`
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package worker
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
@@ -9,7 +10,10 @@ import (
|
||||
"syscall"
|
||||
|
||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/cli/signals"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/system"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
@@ -20,9 +24,10 @@ type LLamaCPP struct {
|
||||
|
||||
const (
|
||||
llamaCPPRPCBinaryName = "llama-cpp-rpc-server"
|
||||
llamaCPPGalleryName = "llama-cpp"
|
||||
)
|
||||
|
||||
func findLLamaCPPBackend(systemState *system.SystemState) (string, error) {
|
||||
func findLLamaCPPBackend(galleries string, systemState *system.SystemState) (string, error) {
|
||||
backends, err := gallery.ListSystemBackends(systemState)
|
||||
if err != nil {
|
||||
log.Warn().Msgf("Failed listing system backends: %s", err)
|
||||
@@ -30,9 +35,19 @@ func findLLamaCPPBackend(systemState *system.SystemState) (string, error) {
|
||||
}
|
||||
log.Debug().Msgf("System backends: %v", backends)
|
||||
|
||||
backend, ok := backends.Get("llama-cpp")
|
||||
backend, ok := backends.Get(llamaCPPGalleryName)
|
||||
if !ok {
|
||||
return "", errors.New("llama-cpp backend not found, install it first")
|
||||
ml := model.NewModelLoader(systemState, true)
|
||||
var gals []config.Gallery
|
||||
if err := json.Unmarshal([]byte(galleries), &gals); err != nil {
|
||||
log.Error().Err(err).Msg("failed loading galleries")
|
||||
return "", err
|
||||
}
|
||||
err := gallery.InstallBackendFromGallery(gals, systemState, ml, llamaCPPGalleryName, nil, true)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("llama-cpp backend not found, failed to install it")
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
backendPath := filepath.Dir(backend.RunFile)
|
||||
|
||||
@@ -61,7 +76,7 @@ func (r *LLamaCPP) Run(ctx *cliContext.Context) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
grpcProcess, err := findLLamaCPPBackend(systemState)
|
||||
grpcProcess, err := findLLamaCPPBackend(r.BackendGalleries, systemState)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -69,6 +84,9 @@ func (r *LLamaCPP) Run(ctx *cliContext.Context) error {
|
||||
args := strings.Split(r.ExtraLLamaCPPArgs, " ")
|
||||
|
||||
args = append([]string{grpcProcess}, args...)
|
||||
|
||||
signals.Handler(nil)
|
||||
|
||||
return syscall.Exec(
|
||||
grpcProcess,
|
||||
args,
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"time"
|
||||
|
||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||
"github.com/mudler/LocalAI/core/cli/signals"
|
||||
"github.com/mudler/LocalAI/core/p2p"
|
||||
"github.com/mudler/LocalAI/pkg/system"
|
||||
"github.com/phayes/freeport"
|
||||
@@ -69,7 +70,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
|
||||
for {
|
||||
log.Info().Msgf("Starting llama-cpp-rpc-server on '%s:%d'", address, port)
|
||||
|
||||
grpcProcess, err := findLLamaCPPBackend(systemState)
|
||||
grpcProcess, err := findLLamaCPPBackend(r.BackendGalleries, systemState)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("Failed to find llama-cpp-rpc-server")
|
||||
return
|
||||
@@ -106,6 +107,8 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
|
||||
}
|
||||
}
|
||||
|
||||
signals.Handler(nil)
|
||||
|
||||
for {
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
// Package gallery provides installation and registration utilities for LocalAI backends,
|
||||
// including meta-backend resolution based on system capabilities.
|
||||
package gallery
|
||||
|
||||
import (
|
||||
@@ -5,6 +7,7 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
@@ -20,6 +23,12 @@ const (
|
||||
runFile = "run.sh"
|
||||
)
|
||||
|
||||
// backendCandidate represents an installed concrete backend option for a given alias
|
||||
type backendCandidate struct {
|
||||
name string
|
||||
runFile string
|
||||
}
|
||||
|
||||
// readBackendMetadata reads the metadata JSON file for a backend
|
||||
func readBackendMetadata(backendPath string) (*BackendMetadata, error) {
|
||||
metadataPath := filepath.Join(backendPath, metadataFile)
|
||||
@@ -58,7 +67,7 @@ func writeBackendMetadata(backendPath string, metadata *BackendMetadata) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Installs a model from the gallery
|
||||
// InstallBackendFromGallery installs a backend from the gallery.
|
||||
func InstallBackendFromGallery(galleries []config.Gallery, systemState *system.SystemState, modelLoader *model.ModelLoader, name string, downloadStatus func(string, string, string, float64), force bool) error {
|
||||
if !force {
|
||||
// check if we already have the backend installed
|
||||
@@ -282,23 +291,18 @@ func (b SystemBackends) GetAll() []SystemBackend {
|
||||
}
|
||||
|
||||
func ListSystemBackends(systemState *system.SystemState) (SystemBackends, error) {
|
||||
potentialBackends, err := os.ReadDir(systemState.Backend.BackendsPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Gather backends from system and user paths, then resolve alias conflicts by capability.
|
||||
backends := make(SystemBackends)
|
||||
|
||||
systemBackends, err := os.ReadDir(systemState.Backend.BackendsSystemPath)
|
||||
if err == nil {
|
||||
// system backends are special, they are provided by the system and not managed by LocalAI
|
||||
// System-provided backends
|
||||
if systemBackends, err := os.ReadDir(systemState.Backend.BackendsSystemPath); err == nil {
|
||||
for _, systemBackend := range systemBackends {
|
||||
if systemBackend.IsDir() {
|
||||
systemBackendRunFile := filepath.Join(systemState.Backend.BackendsSystemPath, systemBackend.Name(), runFile)
|
||||
if _, err := os.Stat(systemBackendRunFile); err == nil {
|
||||
run := filepath.Join(systemState.Backend.BackendsSystemPath, systemBackend.Name(), runFile)
|
||||
if _, err := os.Stat(run); err == nil {
|
||||
backends[systemBackend.Name()] = SystemBackend{
|
||||
Name: systemBackend.Name(),
|
||||
RunFile: filepath.Join(systemState.Backend.BackendsSystemPath, systemBackend.Name(), runFile),
|
||||
RunFile: run,
|
||||
IsMeta: false,
|
||||
IsSystem: true,
|
||||
Metadata: nil,
|
||||
@@ -307,64 +311,104 @@ func ListSystemBackends(systemState *system.SystemState) (SystemBackends, error)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log.Warn().Err(err).Msg("Failed to read system backends, but that's ok, we will just use the backends managed by LocalAI")
|
||||
log.Warn().Err(err).Msg("Failed to read system backends, proceeding with user-managed backends")
|
||||
}
|
||||
|
||||
for _, potentialBackend := range potentialBackends {
|
||||
if potentialBackend.IsDir() {
|
||||
potentialBackendRunFile := filepath.Join(systemState.Backend.BackendsPath, potentialBackend.Name(), runFile)
|
||||
// User-managed backends and alias collection
|
||||
entries, err := os.ReadDir(systemState.Backend.BackendsPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var metadata *BackendMetadata
|
||||
aliasGroups := make(map[string][]backendCandidate)
|
||||
metaMap := make(map[string]*BackendMetadata)
|
||||
|
||||
// If metadata file does not exist, we just use the directory name
|
||||
// and we do not fill the other metadata (such as potential backend Aliases)
|
||||
metadataFilePath := filepath.Join(systemState.Backend.BackendsPath, potentialBackend.Name(), metadataFile)
|
||||
if _, err := os.Stat(metadataFilePath); os.IsNotExist(err) {
|
||||
metadata = &BackendMetadata{
|
||||
Name: potentialBackend.Name(),
|
||||
}
|
||||
for _, e := range entries {
|
||||
if !e.IsDir() {
|
||||
continue
|
||||
}
|
||||
dir := e.Name()
|
||||
run := filepath.Join(systemState.Backend.BackendsPath, dir, runFile)
|
||||
|
||||
var metadata *BackendMetadata
|
||||
metadataPath := filepath.Join(systemState.Backend.BackendsPath, dir, metadataFile)
|
||||
if _, err := os.Stat(metadataPath); os.IsNotExist(err) {
|
||||
metadata = &BackendMetadata{Name: dir}
|
||||
} else {
|
||||
m, rerr := readBackendMetadata(filepath.Join(systemState.Backend.BackendsPath, dir))
|
||||
if rerr != nil {
|
||||
return nil, rerr
|
||||
}
|
||||
if m == nil {
|
||||
metadata = &BackendMetadata{Name: dir}
|
||||
} else {
|
||||
// Check for alias in metadata
|
||||
metadata, err = readBackendMetadata(filepath.Join(systemState.Backend.BackendsPath, potentialBackend.Name()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
metadata = m
|
||||
}
|
||||
}
|
||||
|
||||
metaMap[dir] = metadata
|
||||
|
||||
// Concrete backend entry
|
||||
if _, err := os.Stat(run); err == nil {
|
||||
backends[dir] = SystemBackend{
|
||||
Name: dir,
|
||||
RunFile: run,
|
||||
IsMeta: false,
|
||||
Metadata: metadata,
|
||||
}
|
||||
}
|
||||
|
||||
// Alias candidates
|
||||
if metadata.Alias != "" {
|
||||
aliasGroups[metadata.Alias] = append(aliasGroups[metadata.Alias], backendCandidate{name: dir, runFile: run})
|
||||
}
|
||||
|
||||
// Meta backends indirection
|
||||
if metadata.MetaBackendFor != "" {
|
||||
backends[metadata.Name] = SystemBackend{
|
||||
Name: metadata.Name,
|
||||
RunFile: filepath.Join(systemState.Backend.BackendsPath, metadata.MetaBackendFor, runFile),
|
||||
IsMeta: true,
|
||||
Metadata: metadata,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Resolve aliases using system capability preferences
|
||||
tokens := systemState.BackendPreferenceTokens()
|
||||
for alias, cands := range aliasGroups {
|
||||
chosen := backendCandidate{}
|
||||
// Try preference tokens
|
||||
for _, t := range tokens {
|
||||
for _, c := range cands {
|
||||
if strings.Contains(strings.ToLower(c.name), t) && c.runFile != "" {
|
||||
chosen = c
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !backends.Exists(potentialBackend.Name()) {
|
||||
// We don't want to override aliases if already set, and if we are meta backend
|
||||
if _, err := os.Stat(potentialBackendRunFile); err == nil {
|
||||
backends[potentialBackend.Name()] = SystemBackend{
|
||||
Name: potentialBackend.Name(),
|
||||
RunFile: potentialBackendRunFile,
|
||||
IsMeta: false,
|
||||
Metadata: metadata,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if metadata == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if metadata.Alias != "" {
|
||||
backends[metadata.Alias] = SystemBackend{
|
||||
Name: metadata.Alias,
|
||||
RunFile: potentialBackendRunFile,
|
||||
IsMeta: false,
|
||||
Metadata: metadata,
|
||||
}
|
||||
}
|
||||
|
||||
if metadata.MetaBackendFor != "" {
|
||||
backends[metadata.Name] = SystemBackend{
|
||||
Name: metadata.Name,
|
||||
RunFile: filepath.Join(systemState.Backend.BackendsPath, metadata.MetaBackendFor, runFile),
|
||||
IsMeta: true,
|
||||
Metadata: metadata,
|
||||
if chosen.runFile != "" {
|
||||
break
|
||||
}
|
||||
}
|
||||
// Fallback: first runnable
|
||||
if chosen.runFile == "" {
|
||||
for _, c := range cands {
|
||||
if c.runFile != "" {
|
||||
chosen = c
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if chosen.runFile == "" {
|
||||
continue
|
||||
}
|
||||
md := metaMap[chosen.name]
|
||||
backends[alias] = SystemBackend{
|
||||
Name: alias,
|
||||
RunFile: chosen.runFile,
|
||||
IsMeta: false,
|
||||
Metadata: md,
|
||||
}
|
||||
}
|
||||
|
||||
return backends, nil
|
||||
|
||||
@@ -18,6 +18,73 @@ const (
|
||||
testImage = "quay.io/mudler/tests:localai-backend-test"
|
||||
)
|
||||
|
||||
var _ = Describe("Runtime capability-based backend selection", func() {
|
||||
var tempDir string
|
||||
|
||||
BeforeEach(func() {
|
||||
var err error
|
||||
tempDir, err = os.MkdirTemp("", "gallery-caps-*")
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
os.RemoveAll(tempDir)
|
||||
})
|
||||
|
||||
It("ListSystemBackends prefers optimal alias candidate", func() {
|
||||
// Arrange two installed backends sharing the same alias
|
||||
must := func(err error) { Expect(err).NotTo(HaveOccurred()) }
|
||||
|
||||
cpuDir := filepath.Join(tempDir, "cpu-llama-cpp")
|
||||
must(os.MkdirAll(cpuDir, 0o750))
|
||||
cpuMeta := &BackendMetadata{Alias: "llama-cpp", Name: "cpu-llama-cpp"}
|
||||
b, _ := json.Marshal(cpuMeta)
|
||||
must(os.WriteFile(filepath.Join(cpuDir, "metadata.json"), b, 0o644))
|
||||
must(os.WriteFile(filepath.Join(cpuDir, "run.sh"), []byte(""), 0o755))
|
||||
|
||||
cudaDir := filepath.Join(tempDir, "cuda12-llama-cpp")
|
||||
must(os.MkdirAll(cudaDir, 0o750))
|
||||
cudaMeta := &BackendMetadata{Alias: "llama-cpp", Name: "cuda12-llama-cpp"}
|
||||
b, _ = json.Marshal(cudaMeta)
|
||||
must(os.WriteFile(filepath.Join(cudaDir, "metadata.json"), b, 0o644))
|
||||
must(os.WriteFile(filepath.Join(cudaDir, "run.sh"), []byte(""), 0o755))
|
||||
|
||||
// Default system: alias should point to CPU
|
||||
sysDefault, err := system.GetSystemState(
|
||||
system.WithBackendPath(tempDir),
|
||||
)
|
||||
must(err)
|
||||
sysDefault.GPUVendor = "" // force default selection
|
||||
backs, err := ListSystemBackends(sysDefault)
|
||||
must(err)
|
||||
aliasBack, ok := backs.Get("llama-cpp")
|
||||
Expect(ok).To(BeTrue())
|
||||
Expect(aliasBack.RunFile).To(Equal(filepath.Join(cpuDir, "run.sh")))
|
||||
// concrete entries remain
|
||||
_, ok = backs.Get("cpu-llama-cpp")
|
||||
Expect(ok).To(BeTrue())
|
||||
_, ok = backs.Get("cuda12-llama-cpp")
|
||||
Expect(ok).To(BeTrue())
|
||||
|
||||
// NVIDIA system: alias should point to CUDA
|
||||
// Force capability to nvidia to make the test deterministic on platforms like darwin/arm64 (which default to metal)
|
||||
os.Setenv("LOCALAI_FORCE_META_BACKEND_CAPABILITY", "nvidia")
|
||||
defer os.Unsetenv("LOCALAI_FORCE_META_BACKEND_CAPABILITY")
|
||||
|
||||
sysNvidia, err := system.GetSystemState(
|
||||
system.WithBackendPath(tempDir),
|
||||
)
|
||||
must(err)
|
||||
sysNvidia.GPUVendor = "nvidia"
|
||||
sysNvidia.VRAM = 8 * 1024 * 1024 * 1024
|
||||
backs, err = ListSystemBackends(sysNvidia)
|
||||
must(err)
|
||||
aliasBack, ok = backs.Get("llama-cpp")
|
||||
Expect(ok).To(BeTrue())
|
||||
Expect(aliasBack.RunFile).To(Equal(filepath.Join(cudaDir, "run.sh")))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("Gallery Backends", func() {
|
||||
var (
|
||||
tempDir string
|
||||
|
||||
@@ -836,27 +836,40 @@ var _ = Describe("API test", func() {
|
||||
if runtime.GOOS != "linux" {
|
||||
Skip("test supported only on linux")
|
||||
}
|
||||
embeddingModel := openai.AdaEmbeddingV2
|
||||
resp, err := client.CreateEmbeddings(
|
||||
context.Background(),
|
||||
openai.EmbeddingRequest{
|
||||
Model: openai.AdaEmbeddingV2,
|
||||
Model: embeddingModel,
|
||||
Input: []string{"sun", "cat"},
|
||||
},
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred(), err)
|
||||
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 2048))
|
||||
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 2048))
|
||||
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 4096))
|
||||
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 4096))
|
||||
|
||||
sunEmbedding := resp.Data[0].Embedding
|
||||
resp2, err := client.CreateEmbeddings(
|
||||
context.Background(),
|
||||
openai.EmbeddingRequest{
|
||||
Model: openai.AdaEmbeddingV2,
|
||||
Model: embeddingModel,
|
||||
Input: []string{"sun"},
|
||||
},
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(resp2.Data[0].Embedding).To(Equal(sunEmbedding))
|
||||
Expect(resp2.Data[0].Embedding).ToNot(Equal(resp.Data[1].Embedding))
|
||||
|
||||
resp3, err := client.CreateEmbeddings(
|
||||
context.Background(),
|
||||
openai.EmbeddingRequest{
|
||||
Model: embeddingModel,
|
||||
Input: []string{"cat"},
|
||||
},
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(resp3.Data[0].Embedding).To(Equal(resp.Data[1].Embedding))
|
||||
Expect(resp3.Data[0].Embedding).ToNot(Equal(sunEmbedding))
|
||||
})
|
||||
|
||||
Context("External gRPC calls", func() {
|
||||
|
||||
@@ -398,9 +398,9 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
}
|
||||
|
||||
finishReason := "stop"
|
||||
if toolsCalled {
|
||||
if toolsCalled && len(input.Tools) > 0 {
|
||||
finishReason = "tool_calls"
|
||||
} else if toolsCalled && len(input.Tools) == 0 {
|
||||
} else if toolsCalled {
|
||||
finishReason = "function_call"
|
||||
}
|
||||
|
||||
@@ -443,11 +443,6 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
log.Debug().Msgf("Text content to return: %s", textContentToReturn)
|
||||
noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0
|
||||
|
||||
finishReason := "stop"
|
||||
if len(input.Tools) > 0 {
|
||||
finishReason = "tool_calls"
|
||||
}
|
||||
|
||||
switch {
|
||||
case noActionsToRun:
|
||||
result, err := handleQuestion(config, cl, input, ml, startupOptions, results, s, predInput)
|
||||
@@ -457,11 +452,11 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
}
|
||||
|
||||
*c = append(*c, schema.Choice{
|
||||
FinishReason: finishReason,
|
||||
FinishReason: "stop",
|
||||
Message: &schema.Message{Role: "assistant", Content: &result}})
|
||||
default:
|
||||
toolChoice := schema.Choice{
|
||||
FinishReason: finishReason,
|
||||
FinishReason: "tool_calls",
|
||||
Message: &schema.Message{
|
||||
Role: "assistant",
|
||||
},
|
||||
|
||||
@@ -36,6 +36,8 @@ func TranscriptEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, app
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
diarize := c.FormValue("diarize", "false") != "false"
|
||||
|
||||
// retrieve the file data from the request
|
||||
file, err := c.FormFile("file")
|
||||
if err != nil {
|
||||
@@ -67,7 +69,7 @@ func TranscriptEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, app
|
||||
|
||||
log.Debug().Msgf("Audio file copied to: %+v", dst)
|
||||
|
||||
tr, err := backend.ModelTranscription(dst, input.Language, input.Translate, ml, *config, appConfig)
|
||||
tr, err := backend.ModelTranscription(dst, input.Language, input.Translate, diarize, ml, *config, appConfig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -9,5 +9,5 @@ import (
|
||||
|
||||
func TestLocalAI(t *testing.T) {
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t, "LocalAI test suite")
|
||||
RunSpecs(t, "LocalAI HTTP test suite")
|
||||
}
|
||||
|
||||
@@ -139,7 +139,7 @@ Due to the nature of ROCm it is best to run all implementations in containers as
|
||||
|
||||
### Limitations
|
||||
|
||||
Ongoing verification testing of ROCm compatability with integrated backends.
|
||||
Ongoing verification testing of ROCm compatibility with integrated backends.
|
||||
Please note the following list of verified backends and devices.
|
||||
|
||||
LocalAI hipblas images are built against the following targets: gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
|
||||
@@ -172,7 +172,7 @@ The devices in the following list have been tested with `hipblas` images running
|
||||
### System Prep
|
||||
|
||||
1. Check your GPU LLVM target is compatible with the version of ROCm. This can be found in the [LLVM Docs](https://llvm.org/docs/AMDGPUUsage.html).
|
||||
2. Check which ROCm version is compatible with your LLVM target and your chosen OS (pay special attention to supported kernel versions). See the following for compatability for ([ROCm 6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/reference/system-requirements.html)) or ([ROCm 6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html))
|
||||
2. Check which ROCm version is compatible with your LLVM target and your chosen OS (pay special attention to supported kernel versions). See the following for compatibility for ([ROCm 6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/reference/system-requirements.html)) or ([ROCm 6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html))
|
||||
3. Install you chosen version of the `dkms` and `rocm` (it is recommended that the native package manager be used for this process for any OS as version changes are executed more easily via this method if updates are required). Take care to restart after installing `amdgpu-dkms` and before installing `rocm`, for details regarding this see the installation documentation for your chosen OS ([6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/native-install/index.html) or [6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/how-to/native-install/index.html))
|
||||
4. Deploy. Yes it's that easy.
|
||||
|
||||
@@ -216,7 +216,7 @@ The rebuild process will take some time to complete when deploying these contain
|
||||
#### Example (k8s) (Advanced Deployment/WIP)
|
||||
|
||||
For k8s deployments there is an additional step required before deployment, this is the deployment of the [ROCm/k8s-device-plugin](https://artifacthub.io/packages/helm/amd-gpu-helm/amd-gpu).
|
||||
For any k8s environment the documentation provided by AMD from the ROCm project should be successful. It is recommended that if you use rke2 or OpenShift that you deploy the SUSE or RedHat provided version of this resource to ensure compatability.
|
||||
For any k8s environment the documentation provided by AMD from the ROCm project should be successful. It is recommended that if you use rke2 or OpenShift that you deploy the SUSE or RedHat provided version of this resource to ensure compatibility.
|
||||
After this has been completed the [helm chart from go-skynet](https://github.com/go-skynet/helm-charts) can be configured and deployed mostly un-edited.
|
||||
|
||||
The following are details of the changes that should be made to ensure proper function.
|
||||
@@ -241,7 +241,7 @@ spec:
|
||||
value: '0'
|
||||
# This variable indicates the devices available to container (0:device1 1:device2 2:device3) etc.
|
||||
# For multiple devices (say device 1 and 3) the value would be equivalent to HIP_VISIBLE_DEVICES="0,2"
|
||||
# Please take note of this when an iGPU is present in host system as compatability is not assured.
|
||||
# Please take note of this when an iGPU is present in host system as compatibility is not assured.
|
||||
...
|
||||
resources:
|
||||
limits:
|
||||
@@ -250,7 +250,7 @@ spec:
|
||||
amd.com/gpu: '1'
|
||||
```
|
||||
|
||||
This configuration has been tested on a 'custom' cluster managed by SUSE Rancher that was deployed on top of Ubuntu 22.04.4, certification of other configuration is ongoing and compatability is not guaranteed.
|
||||
This configuration has been tested on a 'custom' cluster managed by SUSE Rancher that was deployed on top of Ubuntu 22.04.4, certification of other configuration is ongoing and compatibility is not guaranteed.
|
||||
|
||||
### Notes
|
||||
|
||||
|
||||
@@ -34,5 +34,5 @@ Grammars and function tools can be used as well in conjunction with vision APIs:
|
||||
|
||||
All-in-One images have already shipped the llava model as `gpt-4-vision-preview`, so no setup is needed in this case.
|
||||
|
||||
To setup the LLaVa models, follow the full example in the [configuration examples](https://github.com/mudler/LocalAI/blob/master/examples/configurations/README.md#llava).
|
||||
To setup the LLaVa models, follow the full example in the [configuration examples](https://github.com/mudler/LocalAI-examples/blob/main/configurations/llava/llava.yaml).
|
||||
|
||||
|
||||
@@ -27,6 +27,12 @@ curl https://localai.io/install.sh | sh
|
||||
|
||||
See [Installer]({{% relref "docs/advanced/installer" %}}) for all the supported options
|
||||
|
||||
### macOS Download
|
||||
|
||||
<a href="https://github.com/mudler/LocalAI/releases/latest/download/LocalAI.dmg">
|
||||
<img src="https://img.shields.io/badge/Download-macOS-blue?style=for-the-badge&logo=apple&logoColor=white" alt="Download LocalAI for macOS"/>
|
||||
</a>
|
||||
|
||||
### Run with docker
|
||||
|
||||
|
||||
@@ -176,7 +182,7 @@ MODEL_NAME=gemma-3-12b-it docker compose up
|
||||
|
||||
# NVIDIA GPU setup with custom multimodal and image models
|
||||
MODEL_NAME=gemma-3-12b-it \
|
||||
MULTIMODAL_MODEL=minicpm-v-2_6 \
|
||||
MULTIMODAL_MODEL=minicpm-v-4_5 \
|
||||
IMAGE_MODEL=flux.1-dev-ggml \
|
||||
docker compose -f docker-compose.nvidia.yaml up
|
||||
```
|
||||
|
||||
@@ -56,6 +56,12 @@ The fastest way to get started is with our one-line installer:
|
||||
curl https://localai.io/install.sh | sh
|
||||
```
|
||||
|
||||
### macOS Download
|
||||
|
||||
<a href="https://github.com/mudler/LocalAI/releases/latest/download/LocalAI.dmg">
|
||||
<img src="https://img.shields.io/badge/Download-macOS-blue?style=for-the-badge&logo=apple&logoColor=white" alt="Download LocalAI for macOS"/>
|
||||
</a>
|
||||
|
||||
Or use Docker for a quick start:
|
||||
|
||||
```bash
|
||||
|
||||
@@ -5,7 +5,13 @@ title = "LocalAI binaries"
|
||||
weight = 26
|
||||
+++
|
||||
|
||||
LocalAI binaries are available for both Linux and MacOS platforms and can be executed directly from your command line. These binaries are continuously updated and hosted on [our GitHub Releases page](https://github.com/mudler/LocalAI/releases). This method also supports Windows users via the Windows Subsystem for Linux (WSL).
|
||||
LocalAI binaries are available for both Linux and MacOS platforms and can be executed directly from your command line. These binaries are continuously updated and hosted on [our GitHub Releases page](https://github.com/mudler/LocalAI/releases). This method also supports Windows users via the Windows Subsystem for Linux (WSL).
|
||||
|
||||
### macOS Download
|
||||
|
||||
<a href="https://github.com/mudler/LocalAI/releases/latest/download/LocalAI.dmg">
|
||||
<img src="https://img.shields.io/badge/Download-macOS-blue?style=for-the-badge&logo=apple&logoColor=white" alt="Download LocalAI for macOS"/>
|
||||
</a>
|
||||
|
||||
Use the following one-liner command in your terminal to download and run LocalAI on Linux or MacOS:
|
||||
|
||||
|
||||
@@ -43,6 +43,7 @@ LocalAI will attempt to automatically load models which are not explicitly confi
|
||||
| [chatterbox](https://github.com/resemble-ai/chatterbox) | Chatterbox TTS | no | Text-to-speech | no | no | CUDA 11/12, CPU |
|
||||
| [kitten-tts](https://github.com/KittenML/KittenTTS) | Kitten TTS | no | Text-to-speech | no | no | CPU |
|
||||
| [silero-vad](https://github.com/snakers4/silero-vad) with [Golang bindings](https://github.com/streamer45/silero-vad-go) | Silero VAD | no | Voice Activity Detection | no | no | CPU |
|
||||
| [mlx-audio](https://github.com/Blaizzy/mlx-audio) | MLX | no | Text-tospeech | no | no | Metal (Apple Silicon) |
|
||||
{{< /table >}}
|
||||
|
||||
## Image & Video Generation
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"version": "v3.4.0"
|
||||
"version": "v3.5.1"
|
||||
}
|
||||
|
||||
@@ -1,4 +1,30 @@
|
||||
---
|
||||
- &ernie
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
name: "baidu_ernie-4.5-21b-a3b-thinking"
|
||||
license: apache-2.0
|
||||
tags:
|
||||
- gguf
|
||||
- GPU
|
||||
- CPU
|
||||
- text-to-text
|
||||
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/64f187a2cc1c03340ac30498/TYYUxK8xD1AxExFMWqbZD.png
|
||||
urls:
|
||||
- https://huggingface.co/baidu/ERNIE-4.5-21B-A3B-Thinking
|
||||
- https://huggingface.co/bartowski/baidu_ERNIE-4.5-21B-A3B-Thinking-GGUF
|
||||
description: |
|
||||
Over the past three months, we have continued to scale the thinking capability of ERNIE-4.5-21B-A3B, improving both the quality and depth of reasoning, thereby advancing the competitiveness of ERNIE lightweight models in complex reasoning tasks. We are pleased to introduce ERNIE-4.5-21B-A3B-Thinking, featuring the following key enhancements:
|
||||
Significantly improved performance on reasoning tasks, including logical reasoning, mathematics, science, coding, text generation, and academic benchmarks that typically require human expertise.
|
||||
Efficient tool usage capabilities.
|
||||
Enhanced 128K long-context understanding capabilities.
|
||||
Note: This version has an increased thinking length. We strongly recommend its use in highly complex reasoning tasks. ERNIE-4.5-21B-A3B-Thinking is a text MoE post-trained model, with 21B total parameters and 3B activated parameters for each token.
|
||||
overrides:
|
||||
parameters:
|
||||
model: baidu_ERNIE-4.5-21B-A3B-Thinking-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: baidu_ERNIE-4.5-21B-A3B-Thinking-Q4_K_M.gguf
|
||||
sha256: f309f225c413324c585e74ce28c55e76dec25340156374551d39707fc2966840
|
||||
uri: huggingface://bartowski/baidu_ERNIE-4.5-21B-A3B-Thinking-GGUF/baidu_ERNIE-4.5-21B-A3B-Thinking-Q4_K_M.gguf
|
||||
- &mimo
|
||||
license: mit
|
||||
tags:
|
||||
@@ -2489,6 +2515,129 @@
|
||||
- filename: Qwen_Qwen3-4B-Thinking-2507-Q8_0.gguf
|
||||
sha256: 2c08db093bc57c2c77222d27ffe8d41cb0b5648e66ba84e5fb9ceab429f6735c
|
||||
uri: huggingface://bartowski/Qwen_Qwen3-4B-Thinking-2507-GGUF/Qwen_Qwen3-4B-Thinking-2507-Q8_0.gguf
|
||||
- !!merge <<: *qwen3
|
||||
name: "nousresearch_hermes-4-14b"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/7B7nMvHJiL72QzVBEPKOG.png
|
||||
urls:
|
||||
- https://huggingface.co/NousResearch/Hermes-4-14B
|
||||
- https://huggingface.co/bartowski/NousResearch_Hermes-4-14B-GGUF
|
||||
description: |
|
||||
Hermes 4 14B is a frontier, hybrid-mode reasoning model based on Qwen 3 14B by Nous Research that is aligned to you.
|
||||
|
||||
Read the Hermes 4 technical report here: Hermes 4 Technical Report
|
||||
|
||||
Chat with Hermes in Nous Chat: https://chat.nousresearch.com
|
||||
|
||||
Training highlights include a newly synthesized post-training corpus emphasizing verified reasoning traces, massive improvements in math, code, STEM, logic, creativity, and format-faithful outputs, while preserving general assistant quality and broadly neutral alignment.
|
||||
What’s new vs Hermes 3
|
||||
|
||||
Post-training corpus: Massively increased dataset size from 1M samples and 1.2B tokens to ~5M samples / ~60B tokens blended across reasoning and non-reasoning data.
|
||||
Hybrid reasoning mode with explicit <think>…</think> segments when the model decides to deliberate, and options to make your responses faster when you want.
|
||||
Reasoning that is top quality, expressive, improves math, code, STEM, logic, and even creative writing and subjective responses.
|
||||
Schema adherence & structured outputs: trained to produce valid JSON for given schemas and to repair malformed objects.
|
||||
Much easier to steer and align: extreme improvements on steerability, especially on reduced refusal rates.
|
||||
overrides:
|
||||
parameters:
|
||||
model: NousResearch_Hermes-4-14B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: NousResearch_Hermes-4-14B-Q4_K_M.gguf
|
||||
sha256: 7ad9be1e446e3da0c149fdf55284c90be666d3e13c6e2581587853f4f9538073
|
||||
uri: huggingface://bartowski/NousResearch_Hermes-4-14B-GGUF/NousResearch_Hermes-4-14B-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen3
|
||||
name: "minicpm-v-4_5"
|
||||
license: apache-2.0
|
||||
icon: https://avatars.githubusercontent.com/u/89920203
|
||||
urls:
|
||||
- https://huggingface.co/openbmb/MiniCPM-V-4_5-gguf
|
||||
- https://huggingface.co/openbmb/MiniCPM-V-4_5
|
||||
description: |
|
||||
MiniCPM-V 4.5 is the latest and most capable model in the MiniCPM-V series. The model is built on Qwen3-8B and SigLIP2-400M with a total of 8B parameters.
|
||||
tags:
|
||||
- llm
|
||||
- multimodal
|
||||
- gguf
|
||||
- gpu
|
||||
- qwen3
|
||||
- cpu
|
||||
overrides:
|
||||
mmproj: minicpm-v-4_5-mmproj-f16.gguf
|
||||
parameters:
|
||||
model: minicpm-v-4_5-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: minicpm-v-4_5-Q4_K_M.gguf
|
||||
sha256: c1c3c33100b15b4caf7319acce4e23c0eb0ce1cbd12f70e8d24f05aa67b7512f
|
||||
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/ggml-model-Q4_K_M.gguf
|
||||
- filename: minicpm-v-4_5-mmproj-f16.gguf
|
||||
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/mmproj-model-f16.gguf
|
||||
sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8
|
||||
- !!merge <<: *qwen3
|
||||
name: "aquif-ai_aquif-3.5-8b-think"
|
||||
urls:
|
||||
- https://huggingface.co/aquif-ai/aquif-3.5-8B-Think
|
||||
- https://huggingface.co/bartowski/aquif-ai_aquif-3.5-8B-Think-GGUF
|
||||
description: |
|
||||
The aquif-3.5 series is the successor to aquif-3, featuring a simplified naming scheme, expanded Mixture of Experts (MoE) options, and across-the-board performance improvements. This release streamlines model selection while delivering enhanced capabilities across reasoning, multilingual support, and general intelligence tasks.
|
||||
An experimental small-scale Mixture of Experts model designed for multilingual applications with minimal computational overhead. Despite its compact active parameter count, it demonstrates competitive performance against larger dense models.
|
||||
overrides:
|
||||
parameters:
|
||||
model: aquif-ai_aquif-3.5-8B-Think-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: aquif-ai_aquif-3.5-8B-Think-Q4_K_M.gguf
|
||||
sha256: 9e49b9c840de23bb3eb181ba7a102706c120b3e3d006983c3f14ebae307ff02e
|
||||
uri: huggingface://bartowski/aquif-ai_aquif-3.5-8B-Think-GGUF/aquif-ai_aquif-3.5-8B-Think-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen3
|
||||
name: "qwen3-stargate-sg1-uncensored-abliterated-8b-i1"
|
||||
icon: https://huggingface.co/DavidAU/Qwen3-Stargate-SG1-Uncensored-Abliterated-8B/resolve/main/sg1.jpg
|
||||
urls:
|
||||
- https://huggingface.co/DavidAU/Qwen3-Stargate-SG1-Uncensored-Abliterated-8B
|
||||
- https://huggingface.co/mradermacher/Qwen3-Stargate-SG1-Uncensored-Abliterated-8B-i1-GGUF
|
||||
description: |
|
||||
This repo contains the full precision source code, in "safe tensors" format to generate GGUFs, GPTQ, EXL2, AWQ, HQQ and other formats. The source code can also be used directly.
|
||||
|
||||
This model is specifically for SG1 (Stargate Series), science fiction, story generation (all genres) but also does coding and general tasks too.
|
||||
|
||||
This model can also be used for Role play.
|
||||
|
||||
This model will produce uncensored content (see notes below).
|
||||
|
||||
Fine tune (6 epochs, using Unsloth for Win 11) on an inhouse generated dataset to simulate / explore the Stargate SG1 Universe.
|
||||
|
||||
This version has the "canon" of all 10 seasons of SG1.
|
||||
|
||||
Model also contains, but not trained, on content from Stargate Atlantis, and Universe.
|
||||
|
||||
Fine tune process adds knowledge to the model, and alter all aspects of its operations.
|
||||
|
||||
Float32 (32 bit precision) was used to further increase the model's quality.
|
||||
|
||||
This model is based on "Goekdeniz-Guelmez/Josiefied-Qwen3-8B-abliterated-v1".
|
||||
|
||||
Example generations at the bottom of this page.
|
||||
|
||||
This is a Stargate (SG1) fine tune (1,331,953,664 of 9,522,689,024 (13.99% trained)), SIX epochs on this model.
|
||||
As this is an instruct model, it will also benefit from a detailed system prompt too.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Qwen3-Stargate-SG1-Uncensored-Abliterated-8B.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Qwen3-Stargate-SG1-Uncensored-Abliterated-8B.i1-Q4_K_M.gguf
|
||||
sha256: 31ec697ccebbd7928c49714b8a0ec8be747be0f7c1ad71627967d2f8fe376990
|
||||
uri: huggingface://mradermacher/Qwen3-Stargate-SG1-Uncensored-Abliterated-8B-i1-GGUF/Qwen3-Stargate-SG1-Uncensored-Abliterated-8B.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen3
|
||||
url: "github:mudler/LocalAI/gallery/qwen3-deepresearch.yaml@master"
|
||||
name: "alibaba-nlp_tongyi-deepresearch-30b-a3b"
|
||||
urls:
|
||||
- https://huggingface.co/Alibaba-NLP/Tongyi-DeepResearch-30B-A3B
|
||||
- https://huggingface.co/bartowski/Alibaba-NLP_Tongyi-DeepResearch-30B-A3B-GGUF
|
||||
description: |
|
||||
We present Tongyi DeepResearch, an agentic large language model featuring 30 billion total parameters, with only 3 billion activated per token. Developed by Tongyi Lab, the model is specifically designed for long-horizon, deep information-seeking tasks. Tongyi-DeepResearch demonstrates state-of-the-art performance across a range of agentic search benchmarks, including Humanity's Last Exam, BrowserComp, BrowserComp-ZH, WebWalkerQA, GAIA, xbench-DeepSearch and FRAMES.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Alibaba-NLP_Tongyi-DeepResearch-30B-A3B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Alibaba-NLP_Tongyi-DeepResearch-30B-A3B-Q4_K_M.gguf
|
||||
sha256: 1afefb3b369ea2de191f24fe8ea22cbbb7b412357902f27bd81d693dde35c2d9
|
||||
uri: huggingface://bartowski/Alibaba-NLP_Tongyi-DeepResearch-30B-A3B-GGUF/Alibaba-NLP_Tongyi-DeepResearch-30B-A3B-Q4_K_M.gguf
|
||||
- &gemma3
|
||||
url: "github:mudler/LocalAI/gallery/gemma.yaml@master"
|
||||
name: "gemma-3-27b-it"
|
||||
@@ -7430,6 +7579,40 @@
|
||||
- filename: Qwentile2.5-32B-Instruct-Q4_K_M.gguf
|
||||
sha256: e476d6e3c15c78fc3f986d7ae8fa35c16116843827f2e6243c05767cef2f3615
|
||||
uri: huggingface://bartowski/Qwentile2.5-32B-Instruct-GGUF/Qwentile2.5-32B-Instruct-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "websailor-32b"
|
||||
urls:
|
||||
- https://huggingface.co/Alibaba-NLP/WebSailor-32B
|
||||
- https://huggingface.co/mradermacher/WebSailor-32B-GGUF
|
||||
description: |
|
||||
WebSailor is a complete post-training methodology designed to teach LLM agents sophisticated reasoning for complex web navigation and information-seeking tasks. It addresses the challenge of extreme uncertainty in vast information landscapes, a capability where previous open-source models lagged behind proprietary systems.
|
||||
We classify information-seeking tasks into three difficulty levels, where Level 3 represents problems with both high uncertainty and a complex, non-linear path to a solution. To generate these challenging tasks, we introduce SailorFog-QA, a novel data synthesis pipeline that constructs intricate knowledge graphs and then applies information obfuscation. This process creates questions with high initial uncertainty that demand creative exploration and transcend simple, structured reasoning patterns.
|
||||
Our training process begins by generating expert trajectories and then reconstructing the reasoning to create concise, action-oriented supervision signals, avoiding the stylistic and verbosity issues of teacher models. The agent is first given a "cold start" using rejection sampling fine-tuning (RFT) on a small set of high-quality examples to establish a baseline capability. This is followed by an efficient agentic reinforcement learning stage using our Duplicating Sampling Policy Optimization (DUPO) algorithm, which refines the agent's exploratory strategies.
|
||||
WebSailor establishes a new state-of-the-art for open-source agents, achieving outstanding results on difficult benchmarks like BrowseComp-en and BrowseComp-zh. Notably, our smaller models like WebSailor-7B outperform agents built on much larger backbones, highlighting the efficacy of our training paradigm. Ultimately, WebSailor closes the performance gap to proprietary systems, achieving results on par with agents like Doubao-Search.
|
||||
overrides:
|
||||
parameters:
|
||||
model: WebSailor-32B.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: WebSailor-32B.Q4_K_M.gguf
|
||||
sha256: 60cea732b8314cedf1807530857b4ebd9f6c41431b3223384eb7f94fbff7b5bc
|
||||
uri: huggingface://mradermacher/WebSailor-32B-GGUF/WebSailor-32B.Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "websailor-7b"
|
||||
urls:
|
||||
- https://huggingface.co/Alibaba-NLP/WebSailor-7B
|
||||
- https://huggingface.co/mradermacher/WebSailor-7B-GGUF
|
||||
description: |
|
||||
WebSailor is a complete post-training methodology designed to teach LLM agents sophisticated reasoning for complex web navigation and information-seeking tasks. It addresses the challenge of extreme uncertainty in vast information landscapes, a capability where previous open-source models lagged behind proprietary systems.
|
||||
We classify information-seeking tasks into three difficulty levels, where Level 3 represents problems with both high uncertainty and a complex, non-linear path to a solution. To generate these challenging tasks, we introduce SailorFog-QA, a novel data synthesis pipeline that constructs intricate knowledge graphs and then applies information obfuscation. This process creates questions with high initial uncertainty that demand creative exploration and transcend simple, structured reasoning patterns.
|
||||
Our training process begins by generating expert trajectories and then reconstructing the reasoning to create concise, action-oriented supervision signals, avoiding the stylistic and verbosity issues of teacher models. The agent is first given a "cold start" using rejection sampling fine-tuning (RFT) on a small set of high-quality examples to establish a baseline capability. This is followed by an efficient agentic reinforcement learning stage using our Duplicating Sampling Policy Optimization (DUPO) algorithm, which refines the agent's exploratory strategies.
|
||||
WebSailor establishes a new state-of-the-art for open-source agents, achieving outstanding results on difficult benchmarks like BrowseComp-en and BrowseComp-zh. Notably, our smaller models like WebSailor-7B outperform agents built on much larger backbones, highlighting the efficacy of our training paradigm. Ultimately, WebSailor closes the performance gap to proprietary systems, achieving results on par with agents like Doubao-Search.
|
||||
overrides:
|
||||
parameters:
|
||||
model: WebSailor-7B.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: WebSailor-7B.Q4_K_M.gguf
|
||||
sha256: 6ede884af5d82176606c3af19a5cc90da6fdf81a520f54284084f5e012217a56
|
||||
uri: huggingface://mradermacher/WebSailor-7B-GGUF/WebSailor-7B.Q4_K_M.gguf
|
||||
- &archfunct
|
||||
license: apache-2.0
|
||||
tags:
|
||||
@@ -9829,6 +10012,119 @@
|
||||
- filename: baichuan-inc_Baichuan-M2-32B-Q4_K_M.gguf
|
||||
sha256: 51907419518e6f79c28f75e4097518e54c2efecd85cb4c714334395fa2d591c2
|
||||
uri: huggingface://bartowski/baichuan-inc_Baichuan-M2-32B-GGUF/baichuan-inc_Baichuan-M2-32B-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "k2-think-i1"
|
||||
icon: https://huggingface.co/LLM360/K2-Think/resolve/main/banner.png
|
||||
urls:
|
||||
- https://huggingface.co/LLM360/K2-Think
|
||||
- https://huggingface.co/mradermacher/K2-Think-i1-GGUF
|
||||
description: |
|
||||
K2-Think is a 32 billion parameter open-weights general reasoning model with strong performance in competitive mathematical problem solving.
|
||||
overrides:
|
||||
parameters:
|
||||
model: K2-Think.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: K2-Think.i1-Q4_K_M.gguf
|
||||
sha256: 510fad18b0cf58059437338c1b5b982996ef89456a8d88da52eb3d50fe78b9fd
|
||||
uri: huggingface://mradermacher/K2-Think-i1-GGUF/K2-Think.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "holo1.5-72b"
|
||||
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/677d3f355f847864bb644112/OQyAJ33sssiTDIQEQ7oH_.png
|
||||
urls:
|
||||
- https://huggingface.co/Hcompany/Holo1.5-72B
|
||||
- https://huggingface.co/mradermacher/Holo1.5-72B-GGUF
|
||||
description: |
|
||||
Computer Use (CU) agents are AI systems that can interact with real applications—web, desktop, and mobile—on behalf of a user. They can navigate interfaces, manipulate elements, and answer questions about content, enabling powerful automation and productivity tools. CU agents are becoming increasingly important as they allow humans to delegate complex digital tasks safely and efficiently.
|
||||
The Holo1.5 series provides state-of-the-art foundational models for building such agents. Holo1.5 models excel at user interface (UI) localization and UI-based question answering (QA) across web, computer, and mobile environments, with strong performance on benchmarks including Screenspot-V2, Screenspot-Pro, GroundUI-Web, Showdown, and our newly introduced WebClick.
|
||||
overrides:
|
||||
mmproj: Holo1.5-72B.mmproj-Q8_0.gguf
|
||||
parameters:
|
||||
model: Holo1.5-72B.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Holo1.5-72B.Q4_K_M.gguf
|
||||
sha256: 3404347c245fefa352a3dc16134b5870f594ab8bff11e50582205b5538201a23
|
||||
uri: huggingface://mradermacher/Holo1.5-72B-GGUF/Holo1.5-72B.Q4_K_M.gguf
|
||||
- filename: Holo1.5-72B.mmproj-Q8_0.gguf
|
||||
sha256: f172cffc96a00d4f885eecffbc798912d37105f4191ba16a9947a5776b0f8a02
|
||||
uri: huggingface://mradermacher/Holo1.5-72B-GGUF/Holo1.5-72B.mmproj-Q8_0.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "holo1.5-7b"
|
||||
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/677d3f355f847864bb644112/OQyAJ33sssiTDIQEQ7oH_.png
|
||||
urls:
|
||||
- https://huggingface.co/Hcompany/Holo1.5-7B
|
||||
- https://huggingface.co/mradermacher/Holo1.5-7B-GGUF
|
||||
description: |
|
||||
Computer Use (CU) agents are AI systems that can interact with real applications—web, desktop, and mobile—on behalf of a user. They can navigate interfaces, manipulate elements, and answer questions about content, enabling powerful automation and productivity tools. CU agents are becoming increasingly important as they allow humans to delegate complex digital tasks safely and efficiently.
|
||||
The Holo1.5 series provides state-of-the-art foundational models for building such agents. Holo1.5 models excel at user interface (UI) localization and UI-based question answering (QA) across web, computer, and mobile environments, with strong performance on benchmarks including Screenspot-V2, Screenspot-Pro, GroundUI-Web, Showdown, and our newly introduced WebClick.
|
||||
overrides:
|
||||
mmproj: Holo1.5-7B.mmproj-Q8_0.gguf
|
||||
parameters:
|
||||
model: Holo1.5-7B.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Holo1.5-7B.Q4_K_M.gguf
|
||||
sha256: 37d1c060b73b783ffdab8d70fa47a6cff46cd34b1cf44b5bfbf4f20ff99eacdd
|
||||
uri: huggingface://mradermacher/Holo1.5-7B-GGUF/Holo1.5-7B.Q4_K_M.gguf
|
||||
- filename: Holo1.5-7B.mmproj-Q8_0.gguf
|
||||
sha256: a9bad2d3d9241251b8753d9be4ea737c03197077d96153c1365a62db709489f6
|
||||
uri: huggingface://mradermacher/Holo1.5-7B-GGUF/Holo1.5-7B.mmproj-Q8_0.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "holo1.5-3b"
|
||||
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/677d3f355f847864bb644112/OQyAJ33sssiTDIQEQ7oH_.png
|
||||
urls:
|
||||
- https://huggingface.co/Hcompany/Holo1.5-3B
|
||||
- https://huggingface.co/mradermacher/Holo1.5-3B-GGUF
|
||||
description: |
|
||||
Computer Use (CU) agents are AI systems that can interact with real applications—web, desktop, and mobile—on behalf of a user. They can navigate interfaces, manipulate elements, and answer questions about content, enabling powerful automation and productivity tools. CU agents are becoming increasingly important as they allow humans to delegate complex digital tasks safely and efficiently.
|
||||
The Holo1.5 series provides state-of-the-art foundational models for building such agents. Holo1.5 models excel at user interface (UI) localization and UI-based question answering (QA) across web, computer, and mobile environments, with strong performance on benchmarks including Screenspot-V2, Screenspot-Pro, GroundUI-Web, Showdown, and our newly introduced WebClick.
|
||||
overrides:
|
||||
mmproj: Holo1.5-3B.mmproj-Q8_0.gguf
|
||||
parameters:
|
||||
model: Holo1.5-3B.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Holo1.5-3B.Q4_K_M.gguf
|
||||
sha256: 5efb1318d439fe1f71e38825a17203c48ced7de4a5d0796427c8c638e817622a
|
||||
uri: huggingface://mradermacher/Holo1.5-3B-GGUF/Holo1.5-3B.Q4_K_M.gguf
|
||||
- filename: Holo1.5-3B.mmproj-Q8_0.gguf
|
||||
sha256: fb5cc798b386a4b680c306f061457cb16cc627c7d9ed401d660b8b940463142b
|
||||
uri: huggingface://mradermacher/Holo1.5-3B-GGUF/Holo1.5-3B.mmproj-Q8_0.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "webwatcher-7b"
|
||||
icon: https://huggingface.co/Alibaba-NLP/WebWatcher-7B/resolve/main/assets/webwatcher_logo.png
|
||||
urls:
|
||||
- https://huggingface.co/Alibaba-NLP/WebWatcher-7B
|
||||
- https://huggingface.co/mradermacher/WebWatcher-7B-GGUF
|
||||
description: |
|
||||
WebWatcher is a multimodal agent for deep research that possesses enhanced visual-language reasoning capabilities. Our work presents a unified framework that combines complex vision-language reasoning with multi-tool interaction.
|
||||
overrides:
|
||||
mmproj: WebWatcher-7B.mmproj-Q8_0.gguf
|
||||
parameters:
|
||||
model: WebWatcher-7B.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: WebWatcher-7B.Q4_K_M.gguf
|
||||
sha256: 300c76a51de59552f997ee7ee78ec519620931dea15c655111633b96de1a47f2
|
||||
uri: huggingface://mradermacher/WebWatcher-7B-GGUF/WebWatcher-7B.Q4_K_M.gguf
|
||||
- filename: WebWatcher-7B.mmproj-Q8_0.gguf
|
||||
sha256: 841dc1bcc4f69ca864518d2c9a9a37b1815169d9bd061b054e091061124e4e62
|
||||
uri: huggingface://mradermacher/WebWatcher-7B-GGUF/WebWatcher-7B.mmproj-Q8_0.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "webwatcher-32b"
|
||||
icon: https://huggingface.co/Alibaba-NLP/WebWatcher-32B/resolve/main/assets/webwatcher_logo.png
|
||||
urls:
|
||||
- https://huggingface.co/Alibaba-NLP/WebWatcher-32B
|
||||
- https://huggingface.co/mradermacher/WebWatcher-32B-GGUF
|
||||
description: |
|
||||
WebWatcher is a multimodal agent for deep research that possesses enhanced visual-language reasoning capabilities. Our work presents a unified framework that combines complex vision-language reasoning with multi-tool interaction.
|
||||
overrides:
|
||||
mmproj: WebWatcher-32B.mmproj-Q8_0.gguf
|
||||
parameters:
|
||||
model: WebWatcher-32B.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: WebWatcher-32B.Q4_K_M.gguf
|
||||
sha256: 6cd51d97b9451759a4ce4ec0c2048b36ff99fd9f83bb32cd9f06af6c5438c69b
|
||||
uri: huggingface://mradermacher/WebWatcher-32B-GGUF/WebWatcher-32B.Q4_K_M.gguf
|
||||
- filename: WebWatcher-32B.mmproj-Q8_0.gguf
|
||||
sha256: e8815515f71a959465cc62e08e0ef45d7d8592215139b34efece848552cb2327
|
||||
uri: huggingface://mradermacher/WebWatcher-32B-GGUF/WebWatcher-32B.mmproj-Q8_0.gguf
|
||||
- &llama31
|
||||
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" ## LLama3.1
|
||||
icon: https://avatars.githubusercontent.com/u/153379578
|
||||
@@ -20051,145 +20347,154 @@
|
||||
name: "whisper-base-q5_1"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-base-q5_1.bin
|
||||
model: ggml-base-q5_1.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-base-q5_1.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-base-q5_1.bin"
|
||||
- filename: "ggml-base-q5_1.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-base-q5_1.bin"
|
||||
sha256: 422f1ae452ade6f30a004d7e5c6a43195e4433bc370bf23fac9cc591f01a8898
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-base"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-base.bin
|
||||
model: ggml-base.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-base.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-base.bin"
|
||||
- filename: "ggml-base.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-base.bin"
|
||||
sha256: 60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-base-en-q5_1"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-base.en-q5_1.bin
|
||||
model: ggml-base.en-q5_1.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-base.en-q5_1.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-base.en-q5_1.bin"
|
||||
- filename: "ggml-base.en-q5_1.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-base.en-q5_1.bin"
|
||||
sha256: 4baf70dd0d7c4247ba2b81fafd9c01005ac77c2f9ef064e00dcf195d0e2fdd2f
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-base-en"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-base.en.bin
|
||||
model: ggml-base.en.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-base.en.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-base.en.bin"
|
||||
- filename: "ggml-base.en.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-base.en.bin"
|
||||
sha256: a03779c86df3323075f5e796cb2ce5029f00ec8869eee3fdfb897afe36c6d002
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-large-q5_0"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-large-q5_0.bin
|
||||
model: ggml-large-q5_0.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-large-q5_0.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-large-q5_0.bin"
|
||||
- filename: "ggml-large-q5_0.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-large-q5_0.bin"
|
||||
sha256: 3a214837221e4530dbc1fe8d734f302af393eb30bd0ed046042ebf4baf70f6f2
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-medium-q5_0"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-medium-q5_0.bin
|
||||
model: ggml-medium-q5_0.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-medium-q5_0.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-medium-q5_0.bin"
|
||||
- filename: "ggml-medium-q5_0.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-medium-q5_0.bin"
|
||||
sha256: 19fea4b380c3a618ec4723c3eef2eb785ffba0d0538cf43f8f235e7b3b34220f
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-small-q5_1"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-small-q5_1.bin
|
||||
model: ggml-small-q5_1.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-small-q5_1.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-small-q5_1.bin"
|
||||
- filename: "ggml-small-q5_1.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-small-q5_1.bin"
|
||||
sha256: ae85e4a935d7a567bd102fe55afc16bb595bdb618e11b2fc7591bc08120411bb
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-small"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-small.bin
|
||||
model: ggml-small.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-small.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-small.bin"
|
||||
- filename: "ggml-small.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-small.bin"
|
||||
sha256: 1be3a9b2063867b937e64e2ec7483364a79917e157fa98c5d94b5c1fffea987b
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-small-en-tdrz"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-small.en-tdrz.bin
|
||||
files:
|
||||
- filename: "ggml-small.bin"
|
||||
uri: "huggingface://akashmjn/tinydiarize-whisper.cpp/ggml-small.en-tdrz.bin"
|
||||
sha256: ceac3ec06d1d98ef71aec665283564631055fd6129b79d8e1be4f9cc33cc54b4
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-small-en-q5_1"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-small.en-q5_1.bin
|
||||
model: ggml-small.en-q5_1.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-small.en-q5_1.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-small.en-q5_1.bin"
|
||||
- filename: "ggml-small.en-q5_1.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-small.en-q5_1.bin"
|
||||
sha256: bfdff4894dcb76bbf647d56263ea2a96645423f1669176f4844a1bf8e478ad30
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-small"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-small.en.bin
|
||||
model: ggml-small.en.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-small.en.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-small.en.bin"
|
||||
- filename: "ggml-small.en.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-small.en.bin"
|
||||
sha256: c6138d6d58ecc8322097e0f987c32f1be8bb0a18532a3f88f734d1bbf9c41e5d
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-small-q5_1"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-small-q5_1.bin
|
||||
model: ggml-small-q5_1.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-small-q5_1.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-small-q5_1.bin"
|
||||
- filename: "ggml-small-q5_1.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-small-q5_1.bin"
|
||||
sha256: ae85e4a935d7a567bd102fe55afc16bb595bdb618e11b2fc7591bc08120411bb
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-tiny"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-tiny.bin
|
||||
model: ggml-tiny.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-tiny.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.bin"
|
||||
- filename: "ggml-tiny.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-tiny.bin"
|
||||
sha256: be07e048e1e599ad46341c8d2a135645097a538221678b7acdd1b1919c6e1b21
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-tiny-q5_1"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-tiny-q5_1.bin
|
||||
model: ggml-tiny-q5_1.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-tiny-q5_1.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny-q5_1.bin"
|
||||
- filename: "ggml-tiny-q5_1.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-tiny-q5_1.bin"
|
||||
sha256: 818710568da3ca15689e31a743197b520007872ff9576237bda97bd1b469c3d7
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-tiny-en-q5_1"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-tiny.en-q5_1.bin
|
||||
model: ggml-tiny.en-q5_1.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-tiny.en-q5_1.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q5_1.bin"
|
||||
- filename: "ggml-tiny.en-q5_1.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-tiny.en-q5_1.bin"
|
||||
sha256: c77c5766f1cef09b6b7d47f21b546cbddd4157886b3b5d6d4f709e91e66c7c2b
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-tiny-en"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-tiny.en.bin
|
||||
model: ggml-tiny.en.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-tiny.en.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.en.bin"
|
||||
- filename: "ggml-tiny.en.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-tiny.en.bin"
|
||||
sha256: 921e4cf8686fdd993dcd081a5da5b6c365bfde1162e72b08d75ac75289920b1f
|
||||
- !!merge <<: *whisper
|
||||
name: "whisper-tiny-en-q8_0"
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-model-whisper-tiny.en-q8_0.bin
|
||||
model: ggml-tiny.en-q8_0.bin
|
||||
files:
|
||||
- filename: "ggml-model-whisper-tiny.en-q8_0.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q8_0.bin"
|
||||
- filename: "ggml-tiny.en-q8_0.bin"
|
||||
uri: "huggingface://ggerganov/whisper.cpp/ggml-tiny.en-q8_0.bin"
|
||||
sha256: 5bc2b3860aa151a4c6e7bb095e1fcce7cf12c7b020ca08dcec0c6d018bb7dd94
|
||||
## Bert embeddings (llama3.2 drop-in)
|
||||
- !!merge <<: *llama32
|
||||
|
||||
45
gallery/qwen3-deepresearch.yaml
Normal file
45
gallery/qwen3-deepresearch.yaml
Normal file
@@ -0,0 +1,45 @@
|
||||
---
|
||||
name: "qwen3"
|
||||
|
||||
config_file: |
|
||||
mmap: true
|
||||
backend: "llama-cpp"
|
||||
template:
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "tool" }}user{{else}}{{ .RoleName }}{{end}}
|
||||
{{ if eq .RoleName "tool" -}}
|
||||
<tool_response>
|
||||
{{ end -}}
|
||||
{{ if .Content -}}
|
||||
{{.Content }}
|
||||
{{ end -}}
|
||||
{{ if eq .RoleName "tool" -}}
|
||||
</tool_response>
|
||||
{{ end -}}
|
||||
{{ if .FunctionCall -}}
|
||||
<tool_call>
|
||||
{{toJson .FunctionCall}}
|
||||
</tool_call>
|
||||
{{ end -}}<|im_end|>
|
||||
function: |
|
||||
<|im_start|>system
|
||||
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
For each function call return a json object with function name and arguments
|
||||
<|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
chat: |
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 8192
|
||||
f16: true
|
||||
stopwords:
|
||||
- '<|im_end|>'
|
||||
- '<dummy32000>'
|
||||
- '</s>'
|
||||
- '<|endoftext|>'
|
||||
30
go.mod
30
go.mod
@@ -41,26 +41,26 @@ require (
|
||||
github.com/otiai10/copy v1.14.1
|
||||
github.com/otiai10/openaigo v1.7.0
|
||||
github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5
|
||||
github.com/prometheus/client_golang v1.22.0
|
||||
github.com/prometheus/client_golang v1.23.0
|
||||
github.com/rs/zerolog v1.33.0
|
||||
github.com/russross/blackfriday v1.6.0
|
||||
github.com/sashabaranov/go-openai v1.26.2
|
||||
github.com/schollz/progressbar/v3 v3.14.4
|
||||
github.com/shirou/gopsutil/v3 v3.24.5
|
||||
github.com/streamer45/silero-vad-go v0.2.1
|
||||
github.com/stretchr/testify v1.10.0
|
||||
github.com/swaggo/swag v1.16.3
|
||||
github.com/stretchr/testify v1.11.1
|
||||
github.com/swaggo/swag v1.16.6
|
||||
github.com/testcontainers/testcontainers-go v0.35.0
|
||||
github.com/tmc/langchaingo v0.1.13
|
||||
github.com/valyala/fasthttp v1.55.0
|
||||
go.opentelemetry.io/otel v1.35.0
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.50.0
|
||||
go.opentelemetry.io/otel/metric v1.35.0
|
||||
go.opentelemetry.io/otel/sdk/metric v1.28.0
|
||||
go.opentelemetry.io/otel v1.38.0
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.60.0
|
||||
go.opentelemetry.io/otel/metric v1.38.0
|
||||
go.opentelemetry.io/otel/sdk/metric v1.38.0
|
||||
google.golang.org/grpc v1.67.1
|
||||
gopkg.in/yaml.v2 v2.4.0
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
oras.land/oras-go/v2 v2.5.0
|
||||
oras.land/oras-go/v2 v2.6.0
|
||||
)
|
||||
|
||||
require (
|
||||
@@ -90,6 +90,7 @@ require (
|
||||
github.com/go-text/render v0.2.0 // indirect
|
||||
github.com/go-text/typesetting v0.2.1 // indirect
|
||||
github.com/godbus/dbus/v5 v5.1.0 // indirect
|
||||
github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc // indirect
|
||||
github.com/hack-pad/go-indexeddb v0.3.2 // indirect
|
||||
github.com/hack-pad/safejs v0.1.0 // indirect
|
||||
github.com/jeandeaual/go-locale v0.0.0-20250612000132-0ef82f21eade // indirect
|
||||
@@ -129,6 +130,7 @@ require (
|
||||
github.com/pion/transport/v3 v3.0.7 // indirect
|
||||
github.com/pion/turn/v4 v4.0.2 // indirect
|
||||
github.com/pion/webrtc/v4 v4.1.2 // indirect
|
||||
github.com/prometheus/otlptranslator v0.0.2 // indirect
|
||||
github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 // indirect
|
||||
github.com/rymdport/portal v0.4.1 // indirect
|
||||
github.com/savsgio/gotils v0.0.0-20240303185622-093b76447511 // indirect
|
||||
@@ -144,7 +146,7 @@ require (
|
||||
go.yaml.in/yaml/v3 v3.0.4 // indirect
|
||||
golang.org/x/image v0.25.0 // indirect
|
||||
golang.org/x/time v0.12.0 // indirect
|
||||
google.golang.org/protobuf v1.36.7 // indirect
|
||||
google.golang.org/protobuf v1.36.8 // indirect
|
||||
)
|
||||
|
||||
require (
|
||||
@@ -267,7 +269,7 @@ require (
|
||||
github.com/multiformats/go-varint v0.0.7 // indirect
|
||||
github.com/nwaples/rardecode v1.1.0 // indirect
|
||||
github.com/opencontainers/go-digest v1.0.0 // indirect
|
||||
github.com/opencontainers/image-spec v1.1.0
|
||||
github.com/opencontainers/image-spec v1.1.1
|
||||
github.com/opentracing/opentracing-go v1.2.0 // indirect
|
||||
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect
|
||||
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
|
||||
@@ -279,8 +281,8 @@ require (
|
||||
github.com/polydawn/refmt v0.89.0 // indirect
|
||||
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect
|
||||
github.com/prometheus/client_model v0.6.2 // indirect
|
||||
github.com/prometheus/common v0.64.0 // indirect
|
||||
github.com/prometheus/procfs v0.16.1 // indirect
|
||||
github.com/prometheus/common v0.65.0 // indirect
|
||||
github.com/prometheus/procfs v0.17.0 // indirect
|
||||
github.com/quic-go/qpack v0.5.1 // indirect
|
||||
github.com/quic-go/quic-go v0.54.0 // indirect
|
||||
github.com/quic-go/webtransport-go v0.9.0 // indirect
|
||||
@@ -308,8 +310,8 @@ require (
|
||||
github.com/yuin/goldmark-emoji v1.0.5 // indirect
|
||||
github.com/yusufpapurcu/wmi v1.2.4 // indirect
|
||||
go.opencensus.io v0.24.0 // indirect
|
||||
go.opentelemetry.io/otel/sdk v1.31.0 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.35.0 // indirect
|
||||
go.opentelemetry.io/otel/sdk v1.38.0 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.38.0 // indirect
|
||||
go.uber.org/dig v1.19.0 // indirect
|
||||
go.uber.org/fx v1.24.0 // indirect
|
||||
go.uber.org/multierr v1.11.0 // indirect
|
||||
|
||||
60
go.sum
60
go.sum
@@ -301,6 +301,8 @@ github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aN
|
||||
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
||||
github.com/gpustack/gguf-parser-go v0.17.0 h1:DkSziWLsiQM0pqqkr/zMcaBn94KY7iQTi4zmaHixDus=
|
||||
github.com/gpustack/gguf-parser-go v0.17.0/go.mod h1:GvHh1Kvvq5ojCOsJ5UpwiJJmIjFw3Qk5cW7R+CZ3IJo=
|
||||
github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc h1:GN2Lv3MGO7AS6PrRoT6yV5+wkrOpcszoIsO4+4ds248=
|
||||
github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc/go.mod h1:+JKpmjMGhpgPL+rXZ5nsZieVzvarn86asRlBg4uNGnk=
|
||||
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
|
||||
github.com/grpc-ecosystem/grpc-gateway v1.5.0 h1:WcmKMm43DR7RdtlkEXQJyo5ws8iTp98CyhCCbOHMvNI=
|
||||
github.com/grpc-ecosystem/grpc-gateway v1.5.0/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpgL2+G+NZTnrVHpWWfpdw=
|
||||
@@ -560,8 +562,8 @@ github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A=
|
||||
github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k=
|
||||
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
|
||||
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
|
||||
github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
|
||||
github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
|
||||
github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
|
||||
github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M=
|
||||
github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs=
|
||||
github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc=
|
||||
github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8=
|
||||
@@ -639,18 +641,20 @@ github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:Om
|
||||
github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g=
|
||||
github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U=
|
||||
github.com/prometheus/client_golang v0.8.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
|
||||
github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q=
|
||||
github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0=
|
||||
github.com/prometheus/client_golang v1.23.0 h1:ust4zpdl9r4trLY/gSjlm07PuiBq2ynaXXlptpfy8Uc=
|
||||
github.com/prometheus/client_golang v1.23.0/go.mod h1:i/o0R9ByOnHX0McrTMTyhYvKE4haaf2mW08I+jGAjEE=
|
||||
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
|
||||
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
||||
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
|
||||
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
|
||||
github.com/prometheus/common v0.0.0-20180801064454-c7de2306084e/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro=
|
||||
github.com/prometheus/common v0.64.0 h1:pdZeA+g617P7oGv1CzdTzyeShxAGrTBsolKNOLQPGO4=
|
||||
github.com/prometheus/common v0.64.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8=
|
||||
github.com/prometheus/common v0.65.0 h1:QDwzd+G1twt//Kwj/Ww6E9FQq1iVMmODnILtW1t2VzE=
|
||||
github.com/prometheus/common v0.65.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8=
|
||||
github.com/prometheus/otlptranslator v0.0.2 h1:+1CdeLVrRQ6Psmhnobldo0kTp96Rj80DRXRd5OSnMEQ=
|
||||
github.com/prometheus/otlptranslator v0.0.2/go.mod h1:P8AwMgdD7XEr6QRUJ2QWLpiAZTgTE2UYgjlu3svompI=
|
||||
github.com/prometheus/procfs v0.0.0-20180725123919-05ee40e3a273/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
|
||||
github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
|
||||
github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
|
||||
github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7DuK0=
|
||||
github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw=
|
||||
github.com/quic-go/qpack v0.5.1 h1:giqksBPnT/HDtZ6VhtFKgoLOWmlyo9Ei6u9PqzIMbhI=
|
||||
github.com/quic-go/qpack v0.5.1/go.mod h1:+PC4XFrEskIVkcLzpEkbLqq1uCoxPhQuvK5rH1ZgaEg=
|
||||
github.com/quic-go/quic-go v0.54.0 h1:6s1YB9QotYI6Ospeiguknbp2Znb/jZYjZLRXn9kMQBg=
|
||||
@@ -754,12 +758,12 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO
|
||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||
github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||
github.com/swaggo/files/v2 v2.0.0 h1:hmAt8Dkynw7Ssz46F6pn8ok6YmGZqHSVLZ+HQM7i0kw=
|
||||
github.com/swaggo/files/v2 v2.0.0/go.mod h1:24kk2Y9NYEJ5lHuCra6iVwkMjIekMCaFq/0JQj66kyM=
|
||||
github.com/swaggo/swag v1.16.3 h1:PnCYjPCah8FK4I26l2F/KQ4yz3sILcVUN3cTlBFA9Pg=
|
||||
github.com/swaggo/swag v1.16.3/go.mod h1:DImHIuOFXKpMFAQjcC7FG4m3Dg4+QuUgUzJmKjI/gRk=
|
||||
github.com/swaggo/swag v1.16.6 h1:qBNcx53ZaX+M5dxVyTrgQ0PJ/ACK+NzhwcbieTt+9yI=
|
||||
github.com/swaggo/swag v1.16.6/go.mod h1:ngP2etMK5a0P3QBizic5MEwpRmluJZPHjXcMoj4Xesg=
|
||||
github.com/tarm/serial v0.0.0-20180830185346-98f6abe2eb07/go.mod h1:kDXzergiv9cbyO7IOYJZWg1U88JhDg3PB6klq9Hg2pA=
|
||||
github.com/testcontainers/testcontainers-go v0.35.0 h1:uADsZpTKFAtp8SLK+hMwSaa+X+JiERHtd4sQAFmXeMo=
|
||||
github.com/testcontainers/testcontainers-go v0.35.0/go.mod h1:oEVBj5zrfJTrgjwONs1SsRbnBtH9OKl+IGl3UMcr2B4=
|
||||
@@ -820,22 +824,22 @@ go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJyS
|
||||
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 h1:UP6IpuHFkUgOQL9FFQFrZ+5LiwhhYRbi7VZSIx6Nj5s=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0/go.mod h1:qxuZLtbq5QDtdeSHsS7bcf6EH6uO6jUAgk764zd3rhM=
|
||||
go.opentelemetry.io/otel v1.35.0 h1:xKWKPxrxB6OtMCbmMY021CqC45J+3Onta9MqjhnusiQ=
|
||||
go.opentelemetry.io/otel v1.35.0/go.mod h1:UEqy8Zp11hpkUrL73gSlELM0DupHoiq72dR+Zqel/+Y=
|
||||
go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8=
|
||||
go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0 h1:K0XaT3DwHAcV4nKLzcQvwAgSyisUghWoY20I7huthMk=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0/go.mod h1:B5Ki776z/MBnVha1Nzwp5arlzBbE3+1jk+pGmaP5HME=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0 h1:lUsI2TYsQw2r1IASwoROaCnjdj2cvC2+Jbxvk6nHnWU=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0/go.mod h1:2HpZxxQurfGxJlJDblybejHB6RX6pmExPNe517hREw4=
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.50.0 h1:2Ewsda6hejmbhGFyUvWZjUThC98Cf8Zy6g0zkIimOng=
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.50.0/go.mod h1:pMm5PkUo5YwbLiuEf7t2xg4wbP0/eSJrMxIMxKosynY=
|
||||
go.opentelemetry.io/otel/metric v1.35.0 h1:0znxYu2SNyuMSQT4Y9WDWej0VpcsxkuklLa4/siN90M=
|
||||
go.opentelemetry.io/otel/metric v1.35.0/go.mod h1:nKVFgxBZ2fReX6IlyW28MgZojkoAkJGaE8CpgeAU3oE=
|
||||
go.opentelemetry.io/otel/sdk v1.31.0 h1:xLY3abVHYZ5HSfOg3l2E5LUj2Cwva5Y7yGxnSW9H5Gk=
|
||||
go.opentelemetry.io/otel/sdk v1.31.0/go.mod h1:TfRbMdhvxIIr/B2N2LQW2S5v9m3gOQ/08KsbbO5BPT0=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.28.0 h1:OkuaKgKrgAbYrrY0t92c+cC+2F6hsFNnCQArXCKlg08=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.28.0/go.mod h1:cWPjykihLAPvXKi4iZc1dpER3Jdq2Z0YLse3moQUCpg=
|
||||
go.opentelemetry.io/otel/trace v1.35.0 h1:dPpEfJu1sDIqruz7BHFG3c7528f6ddfSWfFDVt/xgMs=
|
||||
go.opentelemetry.io/otel/trace v1.35.0/go.mod h1:WUk7DtFp1Aw2MkvqGdwiXYDZZNvA/1J8o6xRXLrIkyc=
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.60.0 h1:cGtQxGvZbnrWdC2GyjZi0PDKVSLWP/Jocix3QWfXtbo=
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.60.0/go.mod h1:hkd1EekxNo69PTV4OWFGZcKQiIqg0RfuWExcPKFvepk=
|
||||
go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA=
|
||||
go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI=
|
||||
go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E=
|
||||
go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA=
|
||||
go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE=
|
||||
go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs=
|
||||
go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0=
|
||||
go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8=
|
||||
go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
|
||||
@@ -1066,8 +1070,8 @@ google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2
|
||||
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
||||
google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
||||
google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
|
||||
google.golang.org/protobuf v1.36.7 h1:IgrO7UwFQGJdRNXH/sQux4R1Dj1WAKcLElzeeRaXV2A=
|
||||
google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
|
||||
google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
|
||||
google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||
@@ -1099,8 +1103,8 @@ howett.net/plist v1.0.2-0.20250314012144-ee69052608d9 h1:eeH1AIcPvSc0Z25ThsYF+Xo
|
||||
howett.net/plist v1.0.2-0.20250314012144-ee69052608d9/go.mod h1:fyFX5Hj5tP1Mpk8obqA9MZgXT416Q5711SDT7dQLTLk=
|
||||
lukechampine.com/blake3 v1.4.1 h1:I3Smz7gso8w4/TunLKec6K2fn+kyKtDxr/xcQEN84Wg=
|
||||
lukechampine.com/blake3 v1.4.1/go.mod h1:QFosUxmjB8mnrWFSNwKmvxHpfY72bmD2tQ0kBMM3kwo=
|
||||
oras.land/oras-go/v2 v2.5.0 h1:o8Me9kLY74Vp5uw07QXPiitjsw7qNXi8Twd+19Zf02c=
|
||||
oras.land/oras-go/v2 v2.5.0/go.mod h1:z4eisnLP530vwIOUOJeBIj0aGI0L1C3d53atvCBqZHg=
|
||||
oras.land/oras-go/v2 v2.6.0 h1:X4ELRsiGkrbeox69+9tzTu492FMUu7zJQW6eJU+I2oc=
|
||||
oras.land/oras-go/v2 v2.6.0/go.mod h1:magiQDfG6H1O9APp+rOsvCPcW1GD2MM7vgnKY0Y+u1o=
|
||||
sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo=
|
||||
sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8=
|
||||
sourcegraph.com/sourcegraph/go-diff v0.5.0/go.mod h1:kuch7UrkMzY0X+p9CRK03kfuPQ2zzQcaEFbx8wA8rck=
|
||||
|
||||
@@ -23,10 +23,10 @@ var ErrUnsafeFilesFound = errors.New("unsafe files found")
|
||||
|
||||
func HuggingFaceScan(uri URI) (*HuggingFaceScanResult, error) {
|
||||
cleanParts := strings.Split(uri.ResolveURL(), "/")
|
||||
if len(cleanParts) <= 4 || cleanParts[2] != "huggingface.co" {
|
||||
if len(cleanParts) <= 4 || cleanParts[2] != "huggingface.co" && cleanParts[2] != HF_ENDPOINT {
|
||||
return nil, ErrNonHuggingFaceFile
|
||||
}
|
||||
results, err := http.Get(fmt.Sprintf("https://huggingface.co/api/models/%s/%s/scan", cleanParts[3], cleanParts[4]))
|
||||
results, err := http.Get(fmt.Sprintf("%s/api/models/%s/%s/scan", HF_ENDPOINT, cleanParts[3], cleanParts[4]))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -37,6 +37,17 @@ const (
|
||||
|
||||
type URI string
|
||||
|
||||
// HF_ENDPOINT is the HuggingFace endpoint, can be overridden by setting the HF_ENDPOINT environment variable.
|
||||
var HF_ENDPOINT string = loadConfig()
|
||||
|
||||
func loadConfig() string {
|
||||
HF_ENDPOINT := os.Getenv("HF_ENDPOINT")
|
||||
if HF_ENDPOINT == "" {
|
||||
HF_ENDPOINT = "https://huggingface.co"
|
||||
}
|
||||
return HF_ENDPOINT
|
||||
}
|
||||
|
||||
func (uri URI) DownloadWithCallback(basePath string, f func(url string, i []byte) error) error {
|
||||
return uri.DownloadWithAuthorizationAndCallback(basePath, "", f)
|
||||
}
|
||||
@@ -213,7 +224,7 @@ func (s URI) ResolveURL() string {
|
||||
filepath = strings.Split(filepath, "@")[0]
|
||||
}
|
||||
|
||||
return fmt.Sprintf("https://huggingface.co/%s/%s/resolve/%s/%s", owner, repo, branch, filepath)
|
||||
return fmt.Sprintf("%s/%s/%s/resolve/%s/%s", HF_ENDPOINT, owner, repo, branch, filepath)
|
||||
}
|
||||
|
||||
return string(s)
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
// Package system provides system detection utilities, including GPU/vendor detection
|
||||
// and capability classification used to select optimal backends at runtime.
|
||||
package system
|
||||
|
||||
import (
|
||||
@@ -116,3 +118,25 @@ func detectGPUVendor(gpus []*gpu.GraphicsCard) (string, error) {
|
||||
|
||||
return "", nil
|
||||
}
|
||||
|
||||
// BackendPreferenceTokens returns a list of substrings that represent the preferred
|
||||
// backend implementation order for the current system capability. Callers can use
|
||||
// these tokens to select the most appropriate concrete backend among multiple
|
||||
// candidates sharing the same alias (e.g., "llama-cpp").
|
||||
func (s *SystemState) BackendPreferenceTokens() []string {
|
||||
capStr := strings.ToLower(s.getSystemCapabilities())
|
||||
switch {
|
||||
case strings.HasPrefix(capStr, nvidia):
|
||||
return []string{"cuda", "vulkan", "cpu"}
|
||||
case strings.HasPrefix(capStr, amd):
|
||||
return []string{"rocm", "hip", "vulkan", "cpu"}
|
||||
case strings.HasPrefix(capStr, intel):
|
||||
return []string{"sycl", "intel", "cpu"}
|
||||
case strings.HasPrefix(capStr, metal):
|
||||
return []string{"metal", "cpu"}
|
||||
case strings.HasPrefix(capStr, darwinX86):
|
||||
return []string{"darwin-x86", "cpu"}
|
||||
default:
|
||||
return []string{"cpu"}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -169,6 +169,30 @@ var _ = Describe("E2E test", func() {
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp.Data)).To(Equal(1), fmt.Sprint(resp))
|
||||
Expect(resp.Data[0].Embedding).ToNot(BeEmpty())
|
||||
|
||||
resp2, err := client.CreateEmbeddings(context.TODO(),
|
||||
openai.EmbeddingRequestStrings{
|
||||
Input: []string{"cat"},
|
||||
Model: openai.AdaEmbeddingV2,
|
||||
},
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp2.Data)).To(Equal(1), fmt.Sprint(resp))
|
||||
Expect(resp2.Data[0].Embedding).ToNot(BeEmpty())
|
||||
Expect(resp2.Data[0].Embedding).ToNot(Equal(resp.Data[0].Embedding))
|
||||
|
||||
resp3, err := client.CreateEmbeddings(context.TODO(),
|
||||
openai.EmbeddingRequestStrings{
|
||||
Input: []string{"doc", "cat"},
|
||||
Model: openai.AdaEmbeddingV2,
|
||||
},
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp3.Data)).To(Equal(2), fmt.Sprint(resp))
|
||||
Expect(resp3.Data[0].Embedding).ToNot(BeEmpty())
|
||||
Expect(resp3.Data[0].Embedding).To(Equal(resp.Data[0].Embedding))
|
||||
Expect(resp3.Data[1].Embedding).To(Equal(resp2.Data[0].Embedding))
|
||||
Expect(resp3.Data[0].Embedding).ToNot(Equal(resp3.Data[1].Embedding))
|
||||
})
|
||||
})
|
||||
Context("vision", func() {
|
||||
|
||||
Reference in New Issue
Block a user