Compare commits
74 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
56f44d448c | ||
|
|
0f0fafacd9 | ||
|
|
4f239bac89 | ||
|
|
04d74ac648 | ||
|
|
18c3dc33ee | ||
|
|
508cfa7369 | ||
|
|
1f94cddbae | ||
|
|
21ae7b4cd4 | ||
|
|
bef22ab547 | ||
|
|
eb04e8cdcf | ||
|
|
17e533a086 | ||
|
|
4fc68409ff | ||
|
|
e587044449 | ||
|
|
1f09db5161 | ||
|
|
05b744f086 | ||
|
|
89ca4bc02d | ||
|
|
e626aa48a4 | ||
|
|
752b5e0339 | ||
|
|
637d72d6e3 | ||
|
|
f3bfec580a | ||
|
|
165c1ddff3 | ||
|
|
fb83238e9e | ||
|
|
700bfa41c7 | ||
|
|
25bdc350df | ||
|
|
1b899e1a68 | ||
|
|
3bf13f8c69 | ||
|
|
7a00729374 | ||
|
|
d484028532 | ||
|
|
0eb7fc2c41 | ||
|
|
a69e30e0c9 | ||
|
|
9c018e6bff | ||
|
|
281e818047 | ||
|
|
270f0e2157 | ||
|
|
673e59e76c | ||
|
|
5a8a2adb44 | ||
|
|
a7317d23bf | ||
|
|
2bab9b5fe2 | ||
|
|
081be3ba7d | ||
|
|
25e6f21322 | ||
|
|
b4df1c9cf3 | ||
|
|
4fbd6609f2 | ||
|
|
7387932f89 | ||
|
|
59c37e67b2 | ||
|
|
c09d227647 | ||
|
|
547d322b28 | ||
|
|
a6f0bb410f | ||
|
|
710f624ecd | ||
|
|
5018452be7 | ||
|
|
ece239966f | ||
|
|
3b8bc7e64c | ||
|
|
fc73b2b430 | ||
|
|
901dba6063 | ||
|
|
b88a7a4550 | ||
|
|
106e40845f | ||
|
|
0064bec8f5 | ||
|
|
9e6dbb0b5a | ||
|
|
d26e61388b | ||
|
|
31a7084c75 | ||
|
|
128612a6fc | ||
|
|
6af3f46bc3 | ||
|
|
d2cf8ef070 | ||
|
|
259ad3cfe6 | ||
|
|
18b320d577 | ||
|
|
89e151f035 | ||
|
|
22060f6410 | ||
|
|
7ee3288460 | ||
|
|
cbbc954a8c | ||
|
|
2c425e9c69 | ||
|
|
c59975ab05 | ||
|
|
05f7004487 | ||
|
|
2f9203cd2a | ||
|
|
f09b33f2ef | ||
|
|
65470b0ab1 | ||
|
|
9a23fe662b |
3
.env
@@ -29,6 +29,9 @@
|
||||
## Enable/Disable single backend (useful if only one GPU is available)
|
||||
# LOCALAI_SINGLE_ACTIVE_BACKEND=true
|
||||
|
||||
# Forces shutdown of the backends if busy (only if LOCALAI_SINGLE_ACTIVE_BACKEND is set)
|
||||
# LOCALAI_FORCE_BACKEND_SHUTDOWN=true
|
||||
|
||||
## Specify a build type. Available: cublas, openblas, clblas.
|
||||
## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
|
||||
## OpenBLAS: This is an open-source implementation of the BLAS library that aims to provide highly optimized code for various platforms. It includes support for multi-threading and can be compiled to use hardware-specific features for additional performance. OpenBLAS can run on many kinds of hardware, including CPUs from Intel, AMD, and ARM.
|
||||
|
||||
2
.github/workflows/generate_intel_image.yaml
vendored
@@ -15,7 +15,7 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- base-image: intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04
|
||||
- base-image: intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04
|
||||
runs-on: 'ubuntu-latest'
|
||||
platforms: 'linux/amd64'
|
||||
runs-on: ${{matrix.runs-on}}
|
||||
|
||||
2
.github/workflows/secscan.yaml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||
- name: Run Gosec Security Scanner
|
||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||
uses: securego/gosec@v2.22.0
|
||||
uses: securego/gosec@v2.22.3
|
||||
with:
|
||||
# we let the report trigger content trigger a failure using the GitHub Security features.
|
||||
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
||||
|
||||
17
Makefile
@@ -6,7 +6,7 @@ BINARY_NAME=local-ai
|
||||
DETECT_LIBS?=true
|
||||
|
||||
# llama.cpp versions
|
||||
CPPLLAMA_VERSION?=4663bd353c61c1136cd8a97b9908755e4ab30cec
|
||||
CPPLLAMA_VERSION?=d6d2c2ab8c8865784ba9fef37f2b2de3f2134d33
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||
@@ -21,8 +21,8 @@ BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
|
||||
BARKCPP_VERSION?=v1.0.0
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=19d876ee300a055629926ff836489901f734f2b7
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/richiejp/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=53e3b17eb3d0b5760ced06a1f98320b68b34aaae
|
||||
|
||||
ONNX_VERSION?=1.20.0
|
||||
ONNX_ARCH?=x64
|
||||
@@ -260,11 +260,7 @@ backend/go/image/stablediffusion-ggml/libsd.a: sources/stablediffusion-ggml.cpp
|
||||
$(MAKE) -C backend/go/image/stablediffusion-ggml libsd.a
|
||||
|
||||
backend-assets/grpc/stablediffusion-ggml: backend/go/image/stablediffusion-ggml/libsd.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ LIBRARY_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion-ggml ./backend/go/image/stablediffusion-ggml/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/stablediffusion-ggml
|
||||
endif
|
||||
$(MAKE) -C backend/go/image/stablediffusion-ggml CGO_LDFLAGS="$(CGO_LDFLAGS)" stablediffusion-ggml
|
||||
|
||||
sources/onnxruntime:
|
||||
mkdir -p sources/onnxruntime
|
||||
@@ -809,7 +805,8 @@ docker-aio-all:
|
||||
|
||||
docker-image-intel:
|
||||
docker build \
|
||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
|
||||
--progress plain \
|
||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.1.0-0-devel-ubuntu24.04 \
|
||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||
--build-arg GO_TAGS="none" \
|
||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||
@@ -817,7 +814,7 @@ docker-image-intel:
|
||||
|
||||
docker-image-intel-xpu:
|
||||
docker build \
|
||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
|
||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04 \
|
||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||
--build-arg GO_TAGS="none" \
|
||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||
|
||||
96
README.md
@@ -1,7 +1,6 @@
|
||||
<h1 align="center">
|
||||
<br>
|
||||
<img height="300" src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd"> <br>
|
||||
LocalAI
|
||||
<img height="300" src="./core/http/static/logo.png"> <br>
|
||||
<br>
|
||||
</h1>
|
||||
|
||||
@@ -48,9 +47,58 @@
|
||||
|
||||
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
||||
|
||||
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
|
||||
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
|
||||
|
||||

|
||||
|
||||
## 📚🆕 Local Stack Family
|
||||
|
||||
🆕 LocalAI is now part of a comprehensive suite of AI tools designed to work together:
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td width="50%" valign="top">
|
||||
<a href="https://github.com/mudler/LocalAGI">
|
||||
<img src="https://raw.githubusercontent.com/mudler/LocalAGI/refs/heads/main/webui/react-ui/public/logo_2.png" width="300" alt="LocalAGI Logo">
|
||||
</a>
|
||||
</td>
|
||||
<td width="50%" valign="top">
|
||||
<h3><a href="https://github.com/mudler/LocalAGI">LocalAGI</a></h3>
|
||||
<p>A powerful Local AI agent management platform that serves as a drop-in replacement for OpenAI's Responses API, enhanced with advanced agentic capabilities.</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="50%" valign="top">
|
||||
<a href="https://github.com/mudler/LocalRecall">
|
||||
<img src="https://raw.githubusercontent.com/mudler/LocalRecall/refs/heads/main/static/localrecall_horizontal.png" width="300" alt="LocalRecall Logo">
|
||||
</a>
|
||||
</td>
|
||||
<td width="50%" valign="top">
|
||||
<h3><a href="https://github.com/mudler/LocalRecall">LocalRecall</a></h3>
|
||||
<p>A REST-ful API and knowledge base management system that provides persistent memory and storage capabilities for AI agents.</p>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
## Screenshots
|
||||
|
||||
|
||||
| Talk Interface | Generate Audio |
|
||||
| --- | --- |
|
||||
|  |  |
|
||||
|
||||
| Models Overview | Generate Images |
|
||||
| --- | --- |
|
||||
|  |  |
|
||||
|
||||
| Chat Interface | Home |
|
||||
| --- | --- |
|
||||
|  |  |
|
||||
|
||||
| Login | Swarm |
|
||||
| --- | --- |
|
||||
| |  |
|
||||
|
||||
## 💻 Quickstart
|
||||
|
||||
Run the installer script:
|
||||
|
||||
@@ -59,17 +107,21 @@ curl https://localai.io/install.sh | sh
|
||||
```
|
||||
|
||||
Or run with docker:
|
||||
|
||||
### CPU only image:
|
||||
```bash
|
||||
# CPU only image:
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-cpu
|
||||
|
||||
# Nvidia GPU:
|
||||
```
|
||||
### Nvidia GPU:
|
||||
```bash
|
||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
|
||||
|
||||
# CPU and GPU image (bigger size):
|
||||
```
|
||||
### CPU and GPU image (bigger size):
|
||||
```bash
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
|
||||
|
||||
# AIO images (it will pre-download a set of models ready for use, see https://localai.io/basics/container/)
|
||||
```
|
||||
### AIO images (it will pre-download a set of models ready for use, see https://localai.io/basics/container/)
|
||||
```bash
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
||||
```
|
||||
|
||||
@@ -88,10 +140,13 @@ local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
|
||||
local-ai run oci://localai/phi-2:latest
|
||||
```
|
||||
|
||||
[💻 Getting started](https://localai.io/basics/getting_started/index.html)
|
||||
For more information, see [💻 Getting started](https://localai.io/basics/getting_started/index.html)
|
||||
|
||||
## 📰 Latest project news
|
||||
|
||||
- Apr 2025: [LocalAGI](https://github.com/mudler/LocalAGI) and [LocalRecall](https://github.com/mudler/LocalRecall) join the LocalAI family stack.
|
||||
- Apr 2025: WebUI overhaul, AIO images updates
|
||||
- Feb 2025: Backend cleanup, Breaking changes, new backends (kokoro, OutelTTS, faster-whisper), Nvidia L4T images
|
||||
- Jan 2025: LocalAI model release: https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.3, SANA support in diffusers: https://github.com/mudler/LocalAI/pull/4603
|
||||
- Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 )
|
||||
- Nov 2024: Bark.cpp backend added ( https://github.com/mudler/LocalAI/pull/4287 )
|
||||
@@ -105,19 +160,6 @@ local-ai run oci://localai/phi-2:latest
|
||||
|
||||
Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||
|
||||
## 🔥🔥 Hot topics (looking for help):
|
||||
|
||||
- Multimodal with vLLM and Video understanding: https://github.com/mudler/LocalAI/pull/3729
|
||||
- Realtime API https://github.com/mudler/LocalAI/issues/3714
|
||||
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
|
||||
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
||||
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
|
||||
- Assistant API: https://github.com/mudler/LocalAI/issues/1273
|
||||
- Vulkan: https://github.com/mudler/LocalAI/issues/1647
|
||||
- Anthropic API: https://github.com/mudler/LocalAI/issues/1808
|
||||
|
||||
If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22
|
||||
|
||||
## 🚀 [Features](https://localai.io/features/)
|
||||
|
||||
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
||||
@@ -131,12 +173,10 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
|
||||
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
||||
- 📈 [Reranker API](https://localai.io/features/reranker/)
|
||||
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
|
||||
- [Agentic capabilities](https://github.com/mudler/LocalAGI)
|
||||
- 🔊 Voice activity detection (Silero-VAD support)
|
||||
- 🌍 Integrated WebUI!
|
||||
|
||||
## 💻 Usage
|
||||
|
||||
Check out the [Getting started](https://localai.io/basics/getting_started/index.html) section in our documentation.
|
||||
|
||||
### 🔗 Community and integrations
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||
## This is an hack for now, but it should be fixed in the future.
|
||||
set(TARGET myclip)
|
||||
add_library(${TARGET} clip.cpp clip.h llava.cpp llava.h)
|
||||
add_library(${TARGET} clip.cpp clip.h clip-impl.h llava.cpp llava.h)
|
||||
install(TARGETS ${TARGET} LIBRARY)
|
||||
target_include_directories(myclip PUBLIC .)
|
||||
target_include_directories(myclip PUBLIC ../..)
|
||||
|
||||
@@ -8,7 +8,7 @@ ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
||||
TARGET?=--target grpc-server
|
||||
|
||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
|
||||
|
||||
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
||||
ifeq ($(BUILD_TYPE),cublas)
|
||||
@@ -36,11 +36,18 @@ else ifeq ($(OS),Darwin)
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
|
||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||
-DCMAKE_C_COMPILER=icx \
|
||||
-DCMAKE_CXX_COMPILER=icpx \
|
||||
-DCMAKE_CXX_FLAGS="-fsycl" \
|
||||
-DGGML_SYCL_F16=ON
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),sycl_f32)
|
||||
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||
-DCMAKE_C_COMPILER=icx \
|
||||
-DCMAKE_CXX_COMPILER=icpx \
|
||||
-DCMAKE_CXX_FLAGS="-fsycl"
|
||||
endif
|
||||
|
||||
llama.cpp:
|
||||
@@ -77,4 +84,4 @@ ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
else
|
||||
+cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
|
||||
endif
|
||||
cp llama.cpp/build/bin/grpc-server .
|
||||
cp llama.cpp/build/bin/grpc-server .
|
||||
|
||||
@@ -509,15 +509,15 @@ struct llama_server_context
|
||||
bool load_model(const common_params ¶ms_)
|
||||
{
|
||||
params = params_;
|
||||
if (!params.mmproj.empty()) {
|
||||
if (!params.mmproj.path.empty()) {
|
||||
multimodal = true;
|
||||
LOG_INFO("Multi Modal Mode Enabled", {});
|
||||
clp_ctx = clip_init(params.mmproj.c_str(), clip_context_params {
|
||||
clp_ctx = clip_init(params.mmproj.path.c_str(), clip_context_params {
|
||||
/* use_gpu */ has_gpu,
|
||||
/*verbosity=*/ 1,
|
||||
/*verbosity=*/ GGML_LOG_LEVEL_INFO,
|
||||
});
|
||||
if(clp_ctx == nullptr) {
|
||||
LOG_ERR("unable to load clip model: %s", params.mmproj.c_str());
|
||||
LOG_ERR("unable to load clip model: %s", params.mmproj.path.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -531,7 +531,7 @@ struct llama_server_context
|
||||
ctx = common_init.context.release();
|
||||
if (model == nullptr)
|
||||
{
|
||||
LOG_ERR("unable to load model: %s", params.model.c_str());
|
||||
LOG_ERR("unable to load model: %s", params.model.path.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -2326,11 +2326,11 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
|
||||
// this is comparable to: https://github.com/ggerganov/llama.cpp/blob/d9b33fe95bd257b36c84ee5769cc048230067d6f/examples/server/server.cpp#L1809
|
||||
|
||||
params.model = request->modelfile();
|
||||
params.model.path = request->modelfile();
|
||||
if (!request->mmproj().empty()) {
|
||||
// get the directory of modelfile
|
||||
std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
|
||||
params.mmproj = model_dir + "/"+ request->mmproj();
|
||||
std::string model_dir = params.model.path.substr(0, params.model.path.find_last_of("/\\"));
|
||||
params.mmproj.path = model_dir + "/"+ request->mmproj();
|
||||
}
|
||||
// params.model_alias ??
|
||||
params.model_alias = request->modelfile();
|
||||
@@ -2405,7 +2405,7 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
scale_factor = request->lorascale();
|
||||
}
|
||||
// get the directory of modelfile
|
||||
std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
|
||||
std::string model_dir = params.model.path.substr(0, params.model.path.find_last_of("/\\"));
|
||||
params.lora_adapters.push_back({ model_dir + "/"+request->loraadapter(), scale_factor });
|
||||
}
|
||||
params.use_mlock = request->mlock();
|
||||
|
||||
@@ -21,6 +21,7 @@ fi
|
||||
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||
## This is an hack for now, but it should be fixed in the future.
|
||||
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
|
||||
cp -rfv llama.cpp/examples/llava/clip-impl.h llama.cpp/examples/grpc-server/clip-impl.h
|
||||
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
|
||||
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
|
||||
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
|
||||
|
||||
@@ -8,6 +8,13 @@ ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
||||
# keep standard at C11 and C++11
|
||||
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
||||
|
||||
GOCMD?=go
|
||||
CGO_LDFLAGS?=
|
||||
# Avoid parent make file overwriting CGO_LDFLAGS which is needed for hipblas
|
||||
CGO_LDFLAGS_SYCL=
|
||||
GO_TAGS?=
|
||||
LD_FLAGS?=
|
||||
|
||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
@@ -21,7 +28,7 @@ else ifeq ($(BUILD_TYPE),openblas)
|
||||
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
else ifeq ($(BUILD_TYPE),clblas)
|
||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||
else ifeq ($(BUILD_TYPE),hipblas)
|
||||
CMAKE_ARGS+=-DGGML_HIP=ON
|
||||
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
||||
@@ -36,16 +43,35 @@ else ifeq ($(OS),Darwin)
|
||||
endif
|
||||
endif
|
||||
|
||||
# ifeq ($(BUILD_TYPE),sycl_f16)
|
||||
# CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON -DSD_SYCL=ON -DGGML_SYCL_F16=ON
|
||||
# endif
|
||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||
-DCMAKE_C_COMPILER=icx \
|
||||
-DCMAKE_CXX_COMPILER=icpx \
|
||||
-DSD_SYCL=ON \
|
||||
-DGGML_SYCL_F16=ON
|
||||
CC=icx
|
||||
CXX=icpx
|
||||
CGO_LDFLAGS_SYCL += -fsycl -L${DNNLROOT}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL
|
||||
CGO_LDFLAGS_SYCL += $(shell pkg-config --libs mkl-static-lp64-gomp)
|
||||
CGO_CXXFLAGS += -fiopenmp -fopenmp-targets=spir64
|
||||
CGO_CXXFLAGS += $(shell pkg-config --cflags mkl-static-lp64-gomp )
|
||||
endif
|
||||
|
||||
# ifeq ($(BUILD_TYPE),sycl_f32)
|
||||
# CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DSD_SYCL=ON
|
||||
# endif
|
||||
ifeq ($(BUILD_TYPE),sycl_f32)
|
||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||
-DCMAKE_C_COMPILER=icx \
|
||||
-DCMAKE_CXX_COMPILER=icpx \
|
||||
-DSD_SYCL=ON
|
||||
CC=icx
|
||||
CXX=icpx
|
||||
CGO_LDFLAGS_SYCL += -fsycl -L${DNNLROOT}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL
|
||||
CGO_LDFLAGS_SYCL += $(shell pkg-config --libs mkl-static-lp64-gomp)
|
||||
CGO_CXXFLAGS += -fiopenmp -fopenmp-targets=spir64
|
||||
CGO_CXXFLAGS += $(shell pkg-config --cflags mkl-static-lp64-gomp )
|
||||
endif
|
||||
|
||||
# warnings
|
||||
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
||||
# CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
||||
|
||||
# Find all .a archives in ARCHIVE_DIR
|
||||
# (ggml can have different backends cpu, cuda, etc., each backend generates a .a archive)
|
||||
@@ -86,11 +112,24 @@ endif
|
||||
$(MAKE) $(COMBINED_LIB)
|
||||
|
||||
gosd.o:
|
||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
+bash -c "source $(ONEAPI_VARS); \
|
||||
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c"
|
||||
else
|
||||
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c
|
||||
endif
|
||||
|
||||
libsd.a: gosd.o
|
||||
cp $(INCLUDE_PATH)/build/libstable-diffusion.a ./libsd.a
|
||||
$(AR) rcs libsd.a gosd.o
|
||||
|
||||
stablediffusion-ggml:
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_SYCL)" C_INCLUDE_PATH="$(INCLUDE_PATH)" LIBRARY_PATH="$(LIBRARY_PATH)" \
|
||||
CC="$(CC)" CXX="$(CXX)" CGO_CXXFLAGS="$(CGO_CXXFLAGS)" \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o ../../../../backend-assets/grpc/stablediffusion-ggml ./
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) ../../../../backend-assets/grpc/stablediffusion-ggml
|
||||
endif
|
||||
|
||||
clean:
|
||||
rm -rf gosd.o libsd.a build $(COMBINED_LIB)
|
||||
rm -rf gosd.o libsd.a build $(COMBINED_LIB)
|
||||
|
||||
@@ -19,7 +19,7 @@ import grpc
|
||||
|
||||
from diffusers import SanaPipeline, StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
|
||||
EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
|
||||
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
|
||||
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline, Lumina2Text2ImgPipeline
|
||||
from diffusers.pipelines.stable_diffusion import safety_checker
|
||||
from diffusers.utils import load_image, export_to_video
|
||||
from compel import Compel, ReturnedEmbeddingsType
|
||||
@@ -287,6 +287,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
|
||||
if request.LowVRAM:
|
||||
self.pipe.enable_model_cpu_offload()
|
||||
elif request.PipelineType == "Lumina2Text2ImgPipeline":
|
||||
self.pipe = Lumina2Text2ImgPipeline.from_pretrained(
|
||||
request.Model,
|
||||
torch_dtype=torch.bfloat16)
|
||||
if request.LowVRAM:
|
||||
self.pipe.enable_model_cpu_offload()
|
||||
elif request.PipelineType == "SanaPipeline":
|
||||
self.pipe = SanaPipeline.from_pretrained(
|
||||
request.Model,
|
||||
|
||||
@@ -16,7 +16,7 @@ type Application struct {
|
||||
func newApplication(appConfig *config.ApplicationConfig) *Application {
|
||||
return &Application{
|
||||
backendLoader: config.NewBackendConfigLoader(appConfig.ModelPath),
|
||||
modelLoader: model.NewModelLoader(appConfig.ModelPath),
|
||||
modelLoader: model.NewModelLoader(appConfig.ModelPath, appConfig.SingleBackend),
|
||||
applicationConfig: appConfig,
|
||||
templatesEvaluator: templates.NewEvaluator(appConfig.ModelPath),
|
||||
}
|
||||
|
||||
@@ -143,7 +143,7 @@ func New(opts ...config.AppOption) (*Application, error) {
|
||||
}()
|
||||
}
|
||||
|
||||
if options.LoadToMemory != nil {
|
||||
if options.LoadToMemory != nil && !options.SingleBackend {
|
||||
for _, m := range options.LoadToMemory {
|
||||
cfg, err := application.BackendLoader().LoadBackendConfigFileByNameDefaultOptions(m, options)
|
||||
if err != nil {
|
||||
|
||||
@@ -17,6 +17,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendCo
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer loader.Close()
|
||||
|
||||
var fn func() ([]float32, error)
|
||||
switch model := inferenceModel.(type) {
|
||||
|
||||
@@ -16,6 +16,7 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer loader.Close()
|
||||
|
||||
fn := func() error {
|
||||
_, err := inferenceModel.GenerateImage(
|
||||
|
||||
@@ -53,6 +53,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer loader.Close()
|
||||
|
||||
var protoMessages []*proto.Message
|
||||
// if we are using the tokenizer template, we need to convert the messages to proto messages
|
||||
|
||||
@@ -40,10 +40,6 @@ func ModelOptions(c config.BackendConfig, so *config.ApplicationConfig, opts ...
|
||||
grpcOpts := grpcModelOpts(c)
|
||||
defOpts = append(defOpts, model.WithLoadGRPCLoadModelOpts(grpcOpts))
|
||||
|
||||
if so.SingleBackend {
|
||||
defOpts = append(defOpts, model.WithSingleActiveBackend())
|
||||
}
|
||||
|
||||
if so.ParallelBackendRequests {
|
||||
defOpts = append(defOpts, model.EnableParallelRequests)
|
||||
}
|
||||
@@ -121,7 +117,7 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||
triggers := make([]*pb.GrammarTrigger, 0)
|
||||
for _, t := range c.FunctionsConfig.GrammarConfig.GrammarTriggers {
|
||||
triggers = append(triggers, &pb.GrammarTrigger{
|
||||
Word: t.Word,
|
||||
Word: t.Word,
|
||||
})
|
||||
|
||||
}
|
||||
@@ -161,33 +157,33 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||
DisableLogStatus: c.DisableLogStatus,
|
||||
DType: c.DType,
|
||||
// LimitMMPerPrompt vLLM
|
||||
LimitImagePerPrompt: int32(c.LimitMMPerPrompt.LimitImagePerPrompt),
|
||||
LimitVideoPerPrompt: int32(c.LimitMMPerPrompt.LimitVideoPerPrompt),
|
||||
LimitAudioPerPrompt: int32(c.LimitMMPerPrompt.LimitAudioPerPrompt),
|
||||
MMProj: c.MMProj,
|
||||
FlashAttention: c.FlashAttention,
|
||||
CacheTypeKey: c.CacheTypeK,
|
||||
CacheTypeValue: c.CacheTypeV,
|
||||
NoKVOffload: c.NoKVOffloading,
|
||||
YarnExtFactor: c.YarnExtFactor,
|
||||
YarnAttnFactor: c.YarnAttnFactor,
|
||||
YarnBetaFast: c.YarnBetaFast,
|
||||
YarnBetaSlow: c.YarnBetaSlow,
|
||||
NGQA: c.NGQA,
|
||||
RMSNormEps: c.RMSNormEps,
|
||||
MLock: mmlock,
|
||||
RopeFreqBase: c.RopeFreqBase,
|
||||
RopeScaling: c.RopeScaling,
|
||||
Type: c.ModelType,
|
||||
RopeFreqScale: c.RopeFreqScale,
|
||||
NUMA: c.NUMA,
|
||||
Embeddings: embeddings,
|
||||
LowVRAM: lowVRAM,
|
||||
NGPULayers: int32(nGPULayers),
|
||||
MMap: mmap,
|
||||
MainGPU: c.MainGPU,
|
||||
Threads: int32(*c.Threads),
|
||||
TensorSplit: c.TensorSplit,
|
||||
LimitImagePerPrompt: int32(c.LimitMMPerPrompt.LimitImagePerPrompt),
|
||||
LimitVideoPerPrompt: int32(c.LimitMMPerPrompt.LimitVideoPerPrompt),
|
||||
LimitAudioPerPrompt: int32(c.LimitMMPerPrompt.LimitAudioPerPrompt),
|
||||
MMProj: c.MMProj,
|
||||
FlashAttention: c.FlashAttention,
|
||||
CacheTypeKey: c.CacheTypeK,
|
||||
CacheTypeValue: c.CacheTypeV,
|
||||
NoKVOffload: c.NoKVOffloading,
|
||||
YarnExtFactor: c.YarnExtFactor,
|
||||
YarnAttnFactor: c.YarnAttnFactor,
|
||||
YarnBetaFast: c.YarnBetaFast,
|
||||
YarnBetaSlow: c.YarnBetaSlow,
|
||||
NGQA: c.NGQA,
|
||||
RMSNormEps: c.RMSNormEps,
|
||||
MLock: mmlock,
|
||||
RopeFreqBase: c.RopeFreqBase,
|
||||
RopeScaling: c.RopeScaling,
|
||||
Type: c.ModelType,
|
||||
RopeFreqScale: c.RopeFreqScale,
|
||||
NUMA: c.NUMA,
|
||||
Embeddings: embeddings,
|
||||
LowVRAM: lowVRAM,
|
||||
NGPULayers: int32(nGPULayers),
|
||||
MMap: mmap,
|
||||
MainGPU: c.MainGPU,
|
||||
Threads: int32(*c.Threads),
|
||||
TensorSplit: c.TensorSplit,
|
||||
// AutoGPTQ
|
||||
ModelBaseName: c.AutoGPTQ.ModelBaseName,
|
||||
Device: c.AutoGPTQ.Device,
|
||||
|
||||
@@ -12,10 +12,10 @@ import (
|
||||
func Rerank(request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
|
||||
opts := ModelOptions(backendConfig, appConfig)
|
||||
rerankModel, err := loader.Load(opts...)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer loader.Close()
|
||||
|
||||
if rerankModel == nil {
|
||||
return nil, fmt.Errorf("could not load rerank model")
|
||||
|
||||
@@ -26,10 +26,10 @@ func SoundGeneration(
|
||||
|
||||
opts := ModelOptions(backendConfig, appConfig)
|
||||
soundGenModel, err := loader.Load(opts...)
|
||||
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
defer loader.Close()
|
||||
|
||||
if soundGenModel == nil {
|
||||
return "", nil, fmt.Errorf("could not load sound generation model")
|
||||
|
||||
@@ -20,6 +20,7 @@ func TokenMetrics(
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer loader.Close()
|
||||
|
||||
if model == nil {
|
||||
return nil, fmt.Errorf("could not loadmodel model")
|
||||
|
||||
@@ -14,10 +14,10 @@ func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.Bac
|
||||
|
||||
opts := ModelOptions(backendConfig, appConfig)
|
||||
inferenceModel, err = loader.Load(opts...)
|
||||
|
||||
if err != nil {
|
||||
return schema.TokenizeResponse{}, err
|
||||
}
|
||||
defer loader.Close()
|
||||
|
||||
predictOptions := gRPCPredictOpts(backendConfig, loader.ModelPath)
|
||||
predictOptions.Prompt = s
|
||||
|
||||
@@ -24,6 +24,7 @@ func ModelTranscription(audio, language string, translate bool, ml *model.ModelL
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer ml.Close()
|
||||
|
||||
if transcriptionModel == nil {
|
||||
return nil, fmt.Errorf("could not load transcription model")
|
||||
|
||||
@@ -23,10 +23,10 @@ func ModelTTS(
|
||||
) (string, *proto.Result, error) {
|
||||
opts := ModelOptions(backendConfig, appConfig, model.WithDefaultBackendString(model.PiperBackend))
|
||||
ttsModel, err := loader.Load(opts...)
|
||||
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
defer loader.Close()
|
||||
|
||||
if ttsModel == nil {
|
||||
return "", nil, fmt.Errorf("could not load tts model %q", backendConfig.Model)
|
||||
|
||||
@@ -19,6 +19,8 @@ func VAD(request *schema.VADRequest,
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer ml.Close()
|
||||
|
||||
req := proto.VADRequest{
|
||||
Audio: request.Audio,
|
||||
}
|
||||
|
||||
@@ -74,7 +74,7 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
|
||||
AssetsDestination: t.BackendAssetsPath,
|
||||
ExternalGRPCBackends: externalBackends,
|
||||
}
|
||||
ml := model.NewModelLoader(opts.ModelPath)
|
||||
ml := model.NewModelLoader(opts.ModelPath, opts.SingleBackend)
|
||||
|
||||
defer func() {
|
||||
err := ml.StopAllGRPC()
|
||||
|
||||
@@ -32,7 +32,7 @@ func (t *TranscriptCMD) Run(ctx *cliContext.Context) error {
|
||||
}
|
||||
|
||||
cl := config.NewBackendConfigLoader(t.ModelsPath)
|
||||
ml := model.NewModelLoader(opts.ModelPath)
|
||||
ml := model.NewModelLoader(opts.ModelPath, opts.SingleBackend)
|
||||
if err := cl.LoadBackendConfigsFromPath(t.ModelsPath); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -41,7 +41,7 @@ func (t *TTSCMD) Run(ctx *cliContext.Context) error {
|
||||
AudioDir: outputDir,
|
||||
AssetsDestination: t.BackendAssetsPath,
|
||||
}
|
||||
ml := model.NewModelLoader(opts.ModelPath)
|
||||
ml := model.NewModelLoader(opts.ModelPath, opts.SingleBackend)
|
||||
|
||||
defer func() {
|
||||
err := ml.StopAllGRPC()
|
||||
|
||||
@@ -142,9 +142,9 @@ func API(application *application.Application) (*fiber.App, error) {
|
||||
httpFS := http.FS(embedDirStatic)
|
||||
|
||||
router.Use(favicon.New(favicon.Config{
|
||||
URL: "/favicon.ico",
|
||||
URL: "/favicon.svg",
|
||||
FileSystem: httpFS,
|
||||
File: "static/favicon.ico",
|
||||
File: "static/favicon.svg",
|
||||
}))
|
||||
|
||||
router.Use("/static", filesystem.New(filesystem.Config{
|
||||
|
||||
@@ -122,15 +122,15 @@ func modelModal(m *gallery.GalleryModel) elem.Node {
|
||||
"id": modalName(m),
|
||||
"tabindex": "-1",
|
||||
"aria-hidden": "true",
|
||||
"class": "hidden overflow-y-auto overflow-x-hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-[calc(100%-1rem)] max-h-full",
|
||||
"class": "hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-full max-h-full bg-gray-900/50",
|
||||
},
|
||||
elem.Div(
|
||||
attrs.Props{
|
||||
"class": "relative p-4 w-full max-w-2xl max-h-full",
|
||||
"class": "relative p-4 w-full max-w-2xl h-[90vh] mx-auto mt-[5vh]",
|
||||
},
|
||||
elem.Div(
|
||||
attrs.Props{
|
||||
"class": "relative p-4 w-full max-w-2xl max-h-full bg-white rounded-lg shadow dark:bg-gray-700",
|
||||
"class": "relative bg-white rounded-lg shadow dark:bg-gray-700 h-full flex flex-col",
|
||||
},
|
||||
// header
|
||||
elem.Div(
|
||||
@@ -164,14 +164,13 @@ func modelModal(m *gallery.GalleryModel) elem.Node {
|
||||
// body
|
||||
elem.Div(
|
||||
attrs.Props{
|
||||
"class": "p-4 md:p-5 space-y-4",
|
||||
"class": "p-4 md:p-5 space-y-4 overflow-y-auto flex-1 min-h-0",
|
||||
},
|
||||
elem.Div(
|
||||
attrs.Props{
|
||||
"class": "flex justify-center items-center",
|
||||
},
|
||||
elem.Img(attrs.Props{
|
||||
// "class": "rounded-t-lg object-fit object-center h-96",
|
||||
"class": "lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3 entered loaded",
|
||||
"src": m.Icon,
|
||||
"loading": "lazy",
|
||||
@@ -232,7 +231,6 @@ func modelModal(m *gallery.GalleryModel) elem.Node {
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
func modelDescription(m *gallery.GalleryModel) elem.Node {
|
||||
|
||||
@@ -21,6 +21,7 @@ func StoresSetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfi
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer sl.Close()
|
||||
|
||||
vals := make([][]byte, len(input.Values))
|
||||
for i, v := range input.Values {
|
||||
@@ -48,6 +49,7 @@ func StoresDeleteEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationCo
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer sl.Close()
|
||||
|
||||
if err := store.DeleteCols(c.Context(), sb, input.Keys); err != nil {
|
||||
return err
|
||||
@@ -69,6 +71,7 @@ func StoresGetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfi
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer sl.Close()
|
||||
|
||||
keys, vals, err := store.GetCols(c.Context(), sb, input.Keys)
|
||||
if err != nil {
|
||||
@@ -100,6 +103,7 @@ func StoresFindEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConf
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer sl.Close()
|
||||
|
||||
keys, vals, similarities, err := store.Find(c.Context(), sb, input.Key, input.Topk)
|
||||
if err != nil {
|
||||
|
||||
@@ -40,7 +40,7 @@ func TestAssistantEndpoints(t *testing.T) {
|
||||
cl := &config.BackendConfigLoader{}
|
||||
//configsDir := "/tmp/localai/configs"
|
||||
modelPath := "/tmp/localai/model"
|
||||
var ml = model.NewModelLoader(modelPath)
|
||||
var ml = model.NewModelLoader(modelPath, false)
|
||||
|
||||
appConfig := &config.ApplicationConfig{
|
||||
ConfigsDir: configsDir,
|
||||
|
||||
@@ -29,9 +29,9 @@ func Explorer(db *explorer.Database) *fiber.App {
|
||||
httpFS := http.FS(embedDirStatic)
|
||||
|
||||
app.Use(favicon.New(favicon.Config{
|
||||
URL: "/favicon.ico",
|
||||
URL: "/favicon.svg",
|
||||
FileSystem: httpFS,
|
||||
File: "static/favicon.ico",
|
||||
File: "static/favicon.svg",
|
||||
}))
|
||||
|
||||
app.Use("/static", filesystem.New(filesystem.Config{
|
||||
|
||||
@@ -50,11 +50,10 @@ func RegisterLocalAIRoutes(router *fiber.App,
|
||||
router.Post("/v1/vad", vadChain...)
|
||||
|
||||
// Stores
|
||||
sl := model.NewModelLoader("")
|
||||
router.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig))
|
||||
router.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig))
|
||||
router.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig))
|
||||
router.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig))
|
||||
router.Post("/stores/set", localai.StoresSetEndpoint(ml, appConfig))
|
||||
router.Post("/stores/delete", localai.StoresDeleteEndpoint(ml, appConfig))
|
||||
router.Post("/stores/get", localai.StoresGetEndpoint(ml, appConfig))
|
||||
router.Post("/stores/find", localai.StoresFindEndpoint(ml, appConfig))
|
||||
|
||||
if !appConfig.DisableMetrics {
|
||||
router.Get("/metrics", localai.LocalAIMetricsEndpoint())
|
||||
|
||||
|
Before Width: | Height: | Size: 15 KiB |
171
core/http/static/favicon.svg
Normal file
|
After Width: | Height: | Size: 108 KiB |
BIN
core/http/static/logo.png
Normal file
|
After Width: | Height: | Size: 893 KiB |
BIN
core/http/static/logo_horizontal.png
Normal file
|
After Width: | Height: | Size: 930 KiB |
@@ -12,7 +12,7 @@
|
||||
<div class="max-w-md w-full bg-gray-800/90 border border-gray-700/50 rounded-xl overflow-hidden shadow-xl">
|
||||
<div class="animation-container">
|
||||
<div class="text-overlay">
|
||||
<!-- <i class="fas fa-circle-nodes text-5xl text-blue-400 mb-2"></i> -->
|
||||
<img src="static/logo.png" alt="LocalAI Logo" class="h-32">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>{{.Title}}</title>
|
||||
<base href="{{.BaseURL}}" />
|
||||
<link rel="icon" type="image/x-icon" href="favicon.ico" />
|
||||
<link rel="shortcut icon" href="static/favicon.svg" type="image/svg">
|
||||
<link rel="stylesheet" href="static/assets/highlightjs.css" />
|
||||
<script defer src="static/assets/highlightjs.js"></script>
|
||||
<script defer src="static/assets/alpine.js"></script>
|
||||
|
||||
@@ -4,10 +4,9 @@
|
||||
<div class="flex items-center">
|
||||
<!-- Logo Image -->
|
||||
<a href="./" class="flex items-center group">
|
||||
<img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd"
|
||||
<img src="static/logo_horizontal.png"
|
||||
alt="LocalAI Logo"
|
||||
class="h-10 mr-3 rounded-lg border border-blue-600/30 shadow-md transition-all duration-300 group-hover:shadow-blue-500/20 group-hover:border-blue-500/50">
|
||||
<span class="text-white text-xl font-bold bg-clip-text text-transparent bg-gradient-to-r from-blue-400 to-indigo-400">LocalAI</span>
|
||||
class="h-14 mr-3 brightness-110 transition-all duration-300 group-hover:brightness-125">
|
||||
</a>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -4,10 +4,9 @@
|
||||
<div class="flex items-center">
|
||||
<!-- Logo Image -->
|
||||
<a href="./" class="flex items-center group">
|
||||
<img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd"
|
||||
<img src="static/logo_horizontal.png"
|
||||
alt="LocalAI Logo"
|
||||
class="h-10 mr-3 rounded-lg border border-blue-600/30 shadow-md transition-all duration-300 group-hover:shadow-blue-500/20 group-hover:border-blue-500/50">
|
||||
<span class="text-white text-xl font-bold bg-clip-text text-transparent bg-gradient-to-r from-blue-400 to-indigo-400">LocalAI</span>
|
||||
</a>
|
||||
</div>
|
||||
|
||||
|
||||
BIN
docs/assets/images/imagen.png
Normal file
|
After Width: | Height: | Size: 506 KiB |
BIN
docs/assets/images/localai_screenshot.png
Normal file
|
After Width: | Height: | Size: 170 KiB |
BIN
docs/assets/images/logos/logo.png
Normal file
|
After Width: | Height: | Size: 893 KiB |
171
docs/assets/images/logos/logo.svg
Normal file
|
After Width: | Height: | Size: 108 KiB |
BIN
docs/assets/images/screenshots/screenshot_chat.png
Normal file
|
After Width: | Height: | Size: 132 KiB |
BIN
docs/assets/images/screenshots/screenshot_gallery.png
Normal file
|
After Width: | Height: | Size: 284 KiB |
BIN
docs/assets/images/screenshots/screenshot_home.png
Normal file
|
After Width: | Height: | Size: 287 KiB |
BIN
docs/assets/images/screenshots/screenshot_image.png
Normal file
|
After Width: | Height: | Size: 506 KiB |
BIN
docs/assets/images/screenshots/screenshot_login.png
Normal file
|
After Width: | Height: | Size: 225 KiB |
BIN
docs/assets/images/screenshots/screenshot_p2p.png
Normal file
|
After Width: | Height: | Size: 418 KiB |
BIN
docs/assets/images/screenshots/screenshot_talk.png
Normal file
|
After Width: | Height: | Size: 246 KiB |
BIN
docs/assets/images/screenshots/screenshot_tts.png
Normal file
|
After Width: | Height: | Size: 213 KiB |
@@ -3,7 +3,7 @@
|
||||
"baseUrl": ".",
|
||||
"paths": {
|
||||
"*": [
|
||||
"../../../../.cache/hugo_cache/modules/filecache/modules/pkg/mod/github.com/gohugoio/hugo-mod-jslibs-dist/popperjs/v2@v2.21100.20000/package/dist/cjs/popper.js/*",
|
||||
"../../../../.cache/hugo_cache/modules/filecache/modules/pkg/mod/github.com/gohugoio/hugo-mod-jslibs-dist/popperjs/v2@v2.21100.20000/package/dist/cjs/*",
|
||||
"../../../../.cache/hugo_cache/modules/filecache/modules/pkg/mod/github.com/twbs/bootstrap@v5.3.2+incompatible/js/*"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -48,9 +48,9 @@ defaultContentLanguage = 'en'
|
||||
|
||||
[params.docs] # Parameters for the /docs 'template'
|
||||
|
||||
logo = "https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd"
|
||||
logo_text = "LocalAI"
|
||||
title = "LocalAI documentation" # default html title for documentation pages/sections
|
||||
logo = "https://raw.githubusercontent.com/mudler/LocalAI/refs/heads/master/core/http/static/logo.png"
|
||||
logo_text = ""
|
||||
title = "LocalAI" # default html title for documentation pages/sections
|
||||
|
||||
pathName = "docs" # path name for documentation site | default "docs"
|
||||
|
||||
@@ -108,6 +108,7 @@ defaultContentLanguage = 'en'
|
||||
# indexName = "" # Index Name to perform search on (or set env variable HUGO_PARAM_DOCSEARCH_indexName)
|
||||
|
||||
[params.analytics] # Parameters for Analytics (Google, Plausible)
|
||||
# google = "G-XXXXXXXXXX" # Replace with your Google Analytics ID
|
||||
# plausibleURL = "/docs/s" # (or set via env variable HUGO_PARAM_ANALYTICS_plausibleURL)
|
||||
# plausibleAPI = "/docs/s" # optional - (or set via env variable HUGO_PARAM_ANALYTICS_plausibleAPI)
|
||||
# plausibleDomain = "" # (or set via env variable HUGO_PARAM_ANALYTICS_plausibleDomain)
|
||||
@@ -151,7 +152,7 @@ defaultContentLanguage = 'en'
|
||||
|
||||
[languages]
|
||||
[languages.en]
|
||||
title = "LocalAI documentation"
|
||||
title = "LocalAI"
|
||||
languageName = "English"
|
||||
weight = 10
|
||||
# [languages.fr]
|
||||
|
||||
@@ -13,6 +13,8 @@ LocalAI supports two modes of distributed inferencing via p2p:
|
||||
- **Federated Mode**: Requests are shared between the cluster and routed to a single worker node in the network based on the load balancer's decision.
|
||||
- **Worker Mode** (aka "model sharding" or "splitting weights"): Requests are processed by all the workers which contributes to the final inference result (by sharing the model weights).
|
||||
|
||||
A list of global instances shared by the community is available at [explorer.localai.io](https://explorer.localai.io).
|
||||
|
||||
## Usage
|
||||
|
||||
Starting LocalAI with `--p2p` generates a shared token for connecting multiple instances: and that's all you need to create AI clusters, eliminating the need for intricate network setups.
|
||||
|
||||
@@ -18,14 +18,45 @@ To access the WebUI with an API_KEY, browser extensions such as [Requestly](http
|
||||
|
||||
{{% /alert %}}
|
||||
|
||||
## Using the Bash Installer
|
||||
## Quickstart
|
||||
|
||||
Install LocalAI easily using the bash installer with the following command:
|
||||
|
||||
```sh
|
||||
### Using the Bash Installer
|
||||
```bash
|
||||
curl https://localai.io/install.sh | sh
|
||||
```
|
||||
|
||||
### Run with docker:
|
||||
```bash
|
||||
# CPU only image:
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-cpu
|
||||
|
||||
# Nvidia GPU:
|
||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
|
||||
|
||||
# CPU and GPU image (bigger size):
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
|
||||
|
||||
# AIO images (it will pre-download a set of models ready for use, see https://localai.io/basics/container/)
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
||||
```
|
||||
|
||||
### Load models:
|
||||
|
||||
```bash
|
||||
# From the model gallery (see available models with `local-ai models list`, in the WebUI from the model tab, or visiting https://models.localai.io)
|
||||
local-ai run llama-3.2-1b-instruct:q4_k_m
|
||||
# Start LocalAI with the phi-2 model directly from huggingface
|
||||
local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
|
||||
# Install and run a model from the Ollama OCI registry
|
||||
local-ai run ollama://gemma:2b
|
||||
# Run a model from a configuration file
|
||||
local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
|
||||
# Install and run a model from a standard OCI registry (e.g., Docker Hub)
|
||||
local-ai run oci://localai/phi-2:latest
|
||||
```
|
||||
|
||||
|
||||
For a full list of options, refer to the [Installer Options]({{% relref "docs/advanced/installer" %}}) documentation.
|
||||
|
||||
Binaries can also be [manually downloaded]({{% relref "docs/reference/binaries" %}}).
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
|
||||
+++
|
||||
title = "Overview"
|
||||
weight = 1
|
||||
@@ -7,162 +6,96 @@ description = "What is LocalAI?"
|
||||
tags = ["Beginners"]
|
||||
categories = [""]
|
||||
author = "Ettore Di Giacinto"
|
||||
# This allows to overwrite the landing page
|
||||
url = '/'
|
||||
icon = "info"
|
||||
+++
|
||||
|
||||
<p align="center">
|
||||
<a href="https://localai.io"><img width=512 src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd"></a>
|
||||
</p >
|
||||
# Welcome to LocalAI
|
||||
|
||||
<p align="center">
|
||||
<a href="https://github.com/go-skynet/LocalAI/fork" target="blank">
|
||||
<img src="https://img.shields.io/github/forks/go-skynet/LocalAI?style=for-the-badge" alt="LocalAI forks"/>
|
||||
</a>
|
||||
<a href="https://github.com/go-skynet/LocalAI/stargazers" target="blank">
|
||||
<img src="https://img.shields.io/github/stars/go-skynet/LocalAI?style=for-the-badge" alt="LocalAI stars"/>
|
||||
</a>
|
||||
<a href="https://github.com/go-skynet/LocalAI/pulls" target="blank">
|
||||
<img src="https://img.shields.io/github/issues-pr/go-skynet/LocalAI?style=for-the-badge" alt="LocalAI pull-requests"/>
|
||||
</a>
|
||||
<a href='https://github.com/go-skynet/LocalAI/releases'>
|
||||
<img src='https://img.shields.io/github/release/go-skynet/LocalAI?&label=Latest&style=for-the-badge'>
|
||||
</a>
|
||||
</p>
|
||||
LocalAI is your complete AI stack for running AI models locally. It's designed to be simple, efficient, and accessible, providing a drop-in replacement for OpenAI's API while keeping your data private and secure.
|
||||
|
||||
<p align="center">
|
||||
<a href="https://hub.docker.com/r/localai/localai" target="blank">
|
||||
<img src="https://img.shields.io/badge/dockerhub-images-important.svg?logo=Docker" alt="LocalAI Docker hub"/>
|
||||
</a>
|
||||
<a href="https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest" target="blank">
|
||||
<img src="https://img.shields.io/badge/quay.io-images-important.svg?" alt="LocalAI Quay.io"/>
|
||||
</a>
|
||||
</p>
|
||||
## Why LocalAI?
|
||||
|
||||
<p align="center">
|
||||
<a href="https://trendshift.io/repositories/5539" target="_blank"><img src="https://trendshift.io/api/badge/repositories/5539" alt="mudler%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
||||
</p>
|
||||
In today's AI landscape, privacy, control, and flexibility are paramount. LocalAI addresses these needs by:
|
||||
|
||||
<p align="center">
|
||||
<a href="https://twitter.com/LocalAI_API" target="blank">
|
||||
<img src="https://img.shields.io/twitter/follow/LocalAI_API?label=Follow: LocalAI_API&style=social" alt="Follow LocalAI_API"/>
|
||||
</a>
|
||||
<a href="https://discord.gg/uJAeKSAGDy" target="blank">
|
||||
<img src="https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted" alt="Join LocalAI Discord Community"/>
|
||||
</a>
|
||||
</p>
|
||||
- **Privacy First**: Your data never leaves your machine
|
||||
- **Complete Control**: Run models on your terms, with your hardware
|
||||
- **Open Source**: MIT licensed and community-driven
|
||||
- **Flexible Deployment**: From laptops to servers, with or without GPUs
|
||||
- **Extensible**: Add new models and features as needed
|
||||
|
||||
> 💡 Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [💭Discord](https://discord.gg/uJAeKSAGDy)
|
||||
>
|
||||
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/go-skynet/LocalAI/tree/master/examples/)
|
||||
## Core Components
|
||||
|
||||
LocalAI is more than just a single tool - it's a complete ecosystem:
|
||||
|
||||
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families and architectures. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
|
||||
1. **[LocalAI Core](https://github.com/mudler/LocalAI)**
|
||||
- OpenAI-compatible API
|
||||
- Multiple model support (LLMs, image, audio)
|
||||
- No GPU required
|
||||
- Fast inference with native bindings
|
||||
- [Github repository](https://github.com/mudler/LocalAI)
|
||||
|
||||
2. **[LocalAGI](https://github.com/mudler/LocalAGI)**
|
||||
- Autonomous AI agents
|
||||
- No coding required
|
||||
- WebUI and REST API support
|
||||
- Extensible agent framework
|
||||
- [Github repository](https://github.com/mudler/LocalAGI)
|
||||
|
||||
## Start LocalAI
|
||||
3. **[LocalRecall](https://github.com/mudler/LocalRecall)**
|
||||
- Semantic search
|
||||
- Memory management
|
||||
- Vector database
|
||||
- Perfect for AI applications
|
||||
- [Github repository](https://github.com/mudler/LocalRecall)
|
||||
|
||||
Start the image with Docker to have a functional clone of OpenAI! 🚀:
|
||||
## Getting Started
|
||||
|
||||
```bash
|
||||
docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu
|
||||
# Do you have a Nvidia GPUs? Use this instead
|
||||
# CUDA 11
|
||||
# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-nvidia-cuda-11
|
||||
# CUDA 12
|
||||
# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-nvidia-cuda-12
|
||||
```
|
||||
|
||||
Or just use the bash installer:
|
||||
The fastest way to get started is with our one-line installer:
|
||||
|
||||
```bash
|
||||
curl https://localai.io/install.sh | sh
|
||||
```
|
||||
|
||||
See the [💻 Quickstart](https://localai.io/basics/getting_started/) for all the options and way you can run LocalAI!
|
||||
Or use Docker for a quick start:
|
||||
|
||||
## What is LocalAI?
|
||||
```bash
|
||||
docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu
|
||||
```
|
||||
|
||||
In a nutshell:
|
||||
For more detailed installation options and configurations, see our [Getting Started guide](/basics/getting_started/).
|
||||
|
||||
- Local, OpenAI drop-in alternative REST API. You own your data.
|
||||
- NO GPU required. NO Internet access is required either
|
||||
- Optional, GPU Acceleration is available. See also the [build section](https://localai.io/basics/build/index.html).
|
||||
- Supports multiple models
|
||||
- 🏃 Once loaded the first time, it keep models loaded in memory for faster inference
|
||||
- ⚡ Doesn't shell-out, but uses bindings for a faster inference and better performance.
|
||||
## Key Features
|
||||
|
||||
LocalAI is focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome!
|
||||
- **Text Generation**: Run various LLMs locally
|
||||
- **Image Generation**: Create images with stable diffusion
|
||||
- **Audio Processing**: Text-to-speech and speech-to-text
|
||||
- **Vision API**: Image understanding and analysis
|
||||
- **Embeddings**: Vector database support
|
||||
- **Functions**: OpenAI-compatible function calling
|
||||
- **P2P**: Distributed inference capabilities
|
||||
|
||||
Note that this started just as a fun weekend project by [mudler](https://github.com/mudler) in order to try to create the necessary pieces for a full AI assistant like `ChatGPT`: the community is growing fast and we are working hard to make it better and more stable. If you want to help, please consider contributing (see below)!
|
||||
## Community and Support
|
||||
|
||||
### 🚀 Features
|
||||
LocalAI is a community-driven project. You can:
|
||||
|
||||
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
||||
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
|
||||
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
|
||||
- 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation)
|
||||
- 🔥 [OpenAI functions](https://localai.io/features/openai-functions/) 🆕
|
||||
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
|
||||
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
||||
- 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
|
||||
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
||||
- 💾 [Stores](https://localai.io/stores)
|
||||
- 📈 [Reranker](https://localai.io/features/reranker/)
|
||||
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
|
||||
- Join our [Discord community](https://discord.gg/uJAeKSAGDy)
|
||||
- Check out our [GitHub repository](https://github.com/mudler/LocalAI)
|
||||
- Contribute to the project
|
||||
- Share your use cases and examples
|
||||
|
||||
## Contribute and help
|
||||
## Next Steps
|
||||
|
||||
To help the project you can:
|
||||
Ready to dive in? Here are some recommended next steps:
|
||||
|
||||
- If you have technological skills and want to contribute to development, have a look at the open issues. If you are new you can have a look at the [good-first-issue](https://github.com/go-skynet/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) and [help-wanted](https://github.com/go-skynet/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) labels.
|
||||
1. [Install LocalAI](/basics/getting_started/)
|
||||
2. [Explore available models](https://models.localai.io)
|
||||
3. [Model compatibility](/model-compatibility/)
|
||||
4. [Try out examples](https://github.com/mudler/LocalAI-examples)
|
||||
5. [Join the community](https://discord.gg/uJAeKSAGDy)
|
||||
6. [Check the LocalAI Github repository](https://github.com/mudler/LocalAI)
|
||||
7. [Check the LocalAGI Github repository](https://github.com/mudler/LocalAGI)
|
||||
|
||||
- If you don't have technological skills you can still help improving documentation or [add examples](https://github.com/go-skynet/LocalAI/tree/master/examples) or share your user-stories with our community, any help and contribution is welcome!
|
||||
|
||||
## 🌟 Star history
|
||||
## License
|
||||
|
||||
[](https://star-history.com/#mudler/LocalAI&Date)
|
||||
|
||||
## ❤️ Sponsors
|
||||
|
||||
> Do you find LocalAI useful?
|
||||
|
||||
Support the project by becoming [a backer or sponsor](https://github.com/sponsors/mudler). Your logo will show up here with a link to your website.
|
||||
|
||||
A huge thank you to our generous sponsors who support this project covering CI expenses, and our [Sponsor list](https://github.com/sponsors/mudler):
|
||||
|
||||
<p align="center">
|
||||
<a href="https://www.spectrocloud.com/" target="blank">
|
||||
<img width=200 src="https://github.com/user-attachments/assets/72eab1dd-8b93-4fc0-9ade-84db49f24962">
|
||||
</a>
|
||||
<a href="https://www.premai.io/" target="blank">
|
||||
<img width=200 src="https://github.com/mudler/LocalAI/assets/2420543/42e4ca83-661e-4f79-8e46-ae43689683d6"> <br>
|
||||
</a>
|
||||
</p>
|
||||
|
||||
## 📖 License
|
||||
|
||||
LocalAI is a community-driven project created by [Ettore Di Giacinto](https://github.com/mudler/).
|
||||
|
||||
MIT - Author Ettore Di Giacinto
|
||||
|
||||
## 🙇 Acknowledgements
|
||||
|
||||
LocalAI couldn't have been built without the help of great software already available from the community. Thank you!
|
||||
|
||||
- [llama.cpp](https://github.com/ggerganov/llama.cpp)
|
||||
- https://github.com/tatsu-lab/stanford_alpaca
|
||||
- https://github.com/cornelk/llama-go for the initial ideas
|
||||
- https://github.com/antimatter15/alpaca.cpp
|
||||
- https://github.com/EdVince/Stable-Diffusion-NCNN
|
||||
- https://github.com/ggerganov/whisper.cpp
|
||||
- https://github.com/saharNooby/rwkv.cpp
|
||||
- https://github.com/rhasspy/piper
|
||||
|
||||
## 🤗 Contributors
|
||||
|
||||
This is a community project, a special thanks to our contributors! 🤗
|
||||
<a href="https://github.com/go-skynet/LocalAI/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=go-skynet/LocalAI" />
|
||||
</a>
|
||||
LocalAI is MIT licensed, created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
|
||||
|
||||
@@ -2,38 +2,212 @@
|
||||
|
||||
# Hero
|
||||
hero:
|
||||
enable: false
|
||||
enable: true
|
||||
weight: 10
|
||||
template: hero
|
||||
|
||||
backgroundImage:
|
||||
path: "images/templates/hero"
|
||||
filename:
|
||||
desktop: "gradient-desktop.webp"
|
||||
mobile: "gradient-mobile.webp"
|
||||
|
||||
badge:
|
||||
text: "⭐ 31.8k+ stars on GitHub!"
|
||||
color: primary
|
||||
pill: false
|
||||
soft: true
|
||||
|
||||
titleLogo:
|
||||
path: "images/logos"
|
||||
filename: "logo.png"
|
||||
alt: "LocalAI Logo"
|
||||
height: 340px
|
||||
|
||||
title: ""
|
||||
subtitle: |
|
||||
**The free, OpenAI, Anthropic alternative. Your All-in-One Complete AI Stack** - Run powerful language models, autonomous agents, and document intelligence **locally** on your hardware.
|
||||
|
||||
**No cloud, no limits, no compromise.**
|
||||
|
||||
image:
|
||||
path: "images"
|
||||
filename: "localai_screenshot.png"
|
||||
alt: "LocalAI Screenshot"
|
||||
boxShadow: true
|
||||
rounded: true
|
||||
|
||||
ctaButton:
|
||||
icon: rocket_launch
|
||||
btnText: "Get Started"
|
||||
url: "/basics/getting_started/"
|
||||
cta2Button:
|
||||
icon: code
|
||||
btnText: "View on GitHub"
|
||||
url: "https://github.com/mudler/LocalAI"
|
||||
|
||||
info: |
|
||||
**Drop-in replacement for OpenAI API** - modular suite of tools that work seamlessly together or independently.
|
||||
|
||||
Start with **[LocalAI](https://localai.io)**'s OpenAI-compatible API, extend with **[LocalAGI](https://github.com/mudler/LocalAGI)**'s autonomous agents, and enhance with **[LocalRecall](https://github.com/mudler/LocalRecall)**'s semantic search - all running locally on your hardware.
|
||||
|
||||
**Open Source** MIT Licensed.
|
||||
|
||||
# Feature Grid
|
||||
featureGrid:
|
||||
enable: false
|
||||
enable: true
|
||||
weight: 20
|
||||
template: feature grid
|
||||
|
||||
title: Why choose LocalAI?
|
||||
subtitle: |
|
||||
**OpenAI API Compatible** - Run AI models locally with our modular ecosystem. From language models to autonomous agents and semantic search, build your complete AI stack without the cloud.
|
||||
|
||||
items:
|
||||
- title: LLM Inferencing
|
||||
icon: memory_alt
|
||||
description: LocalAI is a free, **Open Source** OpenAI alternative. Run **LLMs**, generate **images**, **audio** and more **locally** with consumer grade hardware.
|
||||
ctaLink:
|
||||
text: learn more
|
||||
url: /basics/getting_started/
|
||||
- title: Agentic-first
|
||||
icon: smart_toy
|
||||
description: |
|
||||
Extend LocalAI with LocalAGI, an autonomous AI agent platform that runs locally, no coding required.
|
||||
Build and deploy autonomous agents with ease. Interact with REST APIs or use the WebUI.
|
||||
ctaLink:
|
||||
text: learn more
|
||||
url: https://github.com/mudler/LocalAGI
|
||||
|
||||
- title: Memory and Knowledge base
|
||||
icon: psychology
|
||||
description:
|
||||
Extend LocalAI with LocalRecall, A local rest api for semantic search and memory management. Perfect for AI applications.
|
||||
ctaLink:
|
||||
text: learn more
|
||||
url: https://github.com/mudler/LocalRecall
|
||||
|
||||
- title: OpenAI Compatible
|
||||
icon: api
|
||||
description: Drop-in replacement for OpenAI API. Compatible with existing applications and libraries.
|
||||
ctaLink:
|
||||
text: learn more
|
||||
url: /basics/getting_started/
|
||||
|
||||
- title: No GPU Required
|
||||
icon: memory
|
||||
description: Run on consumer grade hardware. No need for expensive GPUs or cloud services.
|
||||
ctaLink:
|
||||
text: learn more
|
||||
url: /basics/getting_started/
|
||||
|
||||
- title: Multiple Models
|
||||
icon: hub
|
||||
description: |
|
||||
Support for various model families including LLMs, image generation, and audio models.
|
||||
Supports multiple backends for inferencing, including vLLM, llama.cpp, and more.
|
||||
You can switch between them as needed and install them from the Web interface or the CLI.
|
||||
ctaLink:
|
||||
text: learn more
|
||||
url: /model-compatibility
|
||||
|
||||
- title: Privacy Focused
|
||||
icon: security
|
||||
description: Keep your data local. No data leaves your machine, ensuring complete privacy.
|
||||
ctaLink:
|
||||
text: learn more
|
||||
url: /basics/container/
|
||||
|
||||
- title: Easy Setup
|
||||
icon: settings
|
||||
description: Simple installation and configuration. Get started in minutes with Binaries installation, Docker, Podman, Kubernetes or local installation.
|
||||
ctaLink:
|
||||
text: learn more
|
||||
url: /basics/getting_started/
|
||||
|
||||
- title: Community Driven
|
||||
icon: groups
|
||||
description: Active community support and regular updates. Contribute and help shape the future of LocalAI.
|
||||
ctaLink:
|
||||
text: learn more
|
||||
url: https://github.com/mudler/LocalAI
|
||||
|
||||
|
||||
|
||||
- title: Extensible
|
||||
icon: extension
|
||||
description: Easy to extend and customize. Add new models and features as needed.
|
||||
ctaLink:
|
||||
text: learn more
|
||||
url: /docs/integrations/
|
||||
|
||||
- title: Peer 2 Peer
|
||||
icon: hub
|
||||
description: |
|
||||
LocalAI is designed to be a decentralized LLM inference, powered by a peer-to-peer system based on libp2p.
|
||||
It is designed to be used in a local or remote network, and is compatible with any LLM model.
|
||||
It works both in federated mode or by splitting models weights.
|
||||
ctaLink:
|
||||
text: learn more
|
||||
url: /features/distribute/
|
||||
|
||||
- title: Open Source
|
||||
icon: code
|
||||
description: MIT licensed. Free to use, modify, and distribute. Community contributions welcome.
|
||||
ctaLink:
|
||||
text: learn more
|
||||
url: https://github.com/mudler/LocalAI
|
||||
|
||||
imageText:
|
||||
enable: true
|
||||
weight: 25
|
||||
template: image text
|
||||
|
||||
title: LocalAI
|
||||
subtitle: The Free, Open Source OpenAI Alternative
|
||||
title: Run AI models locally with ease
|
||||
subtitle: |
|
||||
LocalAI makes it simple to run various AI models on your own hardware. From text generation to image creation, autonomous agents to semantic search - all orchestrated through a unified API.
|
||||
|
||||
list:
|
||||
- text: Optimized, fast inference
|
||||
icon: speed
|
||||
- text: OpenAI API compatibility
|
||||
icon: api
|
||||
|
||||
- text: Comprensive support for many models architectures
|
||||
icon: area_chart
|
||||
- text: Multiple model support
|
||||
icon: hub
|
||||
|
||||
- text: Easy to deploy with Docker
|
||||
icon: accessibility
|
||||
- text: Image understanding
|
||||
icon: image
|
||||
|
||||
- text: Image generation
|
||||
icon: image
|
||||
|
||||
- text: Audio generation
|
||||
icon: music_note
|
||||
|
||||
- text: Voice activity detection
|
||||
icon: mic
|
||||
|
||||
- text: Speech recognition
|
||||
icon: mic
|
||||
|
||||
- text: Video generation
|
||||
icon: movie
|
||||
|
||||
- text: Privacy focused
|
||||
icon: security
|
||||
|
||||
- text: Autonomous agents with [LocalAGI](https://github.com/mudler/LocalAGI)
|
||||
icon: smart_toy
|
||||
|
||||
- text: Semantic search with [LocalRecall](https://github.com/mudler/LocalRecall)
|
||||
icon: psychology
|
||||
|
||||
- text: Agent orchestration
|
||||
icon: hub
|
||||
|
||||
image:
|
||||
path: "images/logos"
|
||||
filename: "logo.png"
|
||||
alt: "LocalAI logo" # Optional but recommended
|
||||
path: "images"
|
||||
filename: "imagen.png"
|
||||
alt: "LocalAI Image generation"
|
||||
|
||||
imgOrder:
|
||||
desktop: 2
|
||||
@@ -41,10 +215,62 @@ imageText:
|
||||
|
||||
ctaButton:
|
||||
text: Learn more
|
||||
url: "/docs/"
|
||||
url: "/basics/getting_started/"
|
||||
|
||||
# Image compare
|
||||
imageCompare:
|
||||
enable: false
|
||||
weight: 30
|
||||
template: image compare
|
||||
|
||||
title: LocalAI in Action
|
||||
subtitle: See how LocalAI can transform your local AI experience with various models and capabilities.
|
||||
|
||||
items:
|
||||
- title: Text Generation
|
||||
config: {
|
||||
startingPoint: 50,
|
||||
addCircle: true,
|
||||
addCircleBlur: false,
|
||||
showLabels: true,
|
||||
labelOptions: {
|
||||
before: 'Dark',
|
||||
after: 'Light',
|
||||
onHover: false
|
||||
}
|
||||
}
|
||||
imagePath: "images/screenshots"
|
||||
imageBefore: "text_generation_input.webp"
|
||||
imageAfter: "text_generation_output.webp"
|
||||
|
||||
- title: Image Generation
|
||||
config: {
|
||||
startingPoint: 50,
|
||||
addCircle: true,
|
||||
addCircleBlur: true,
|
||||
showLabels: true,
|
||||
labelOptions: {
|
||||
before: 'Prompt',
|
||||
after: 'Result',
|
||||
onHover: true
|
||||
}
|
||||
}
|
||||
imagePath: "images/screenshots"
|
||||
imageBefore: "imagen_before.webp"
|
||||
imageAfter: "imagen_after.webp"
|
||||
|
||||
- title: Audio Generation
|
||||
config: {
|
||||
startingPoint: 50,
|
||||
addCircle: true,
|
||||
addCircleBlur: false,
|
||||
showLabels: true,
|
||||
labelOptions: {
|
||||
before: 'Text',
|
||||
after: 'Audio',
|
||||
onHover: false
|
||||
}
|
||||
}
|
||||
imagePath: "images/screenshots"
|
||||
imageBefore: "audio_generation_text.webp"
|
||||
imageAfter: "audio_generation_waveform.webp"
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"version": "v2.26.0"
|
||||
"version": "v2.27.0"
|
||||
}
|
||||
|
||||
@@ -82,7 +82,7 @@
|
||||
</span>
|
||||
</button>
|
||||
{{ end -}}
|
||||
{{ if .Site.IsMultiLingual }}
|
||||
{{ if hugo.IsMultilingual }}
|
||||
<div class="dropdown">
|
||||
<button class="btn btn-link btn-default dropdown-toggle ps-2" type="button" data-bs-toggle="dropdown" aria-expanded="false">
|
||||
{{ site.Language.Lang | upper }}
|
||||
|
||||
@@ -18,10 +18,10 @@
|
||||
<!-- Custom CSS -->
|
||||
{{- $options := dict "enableSourceMap" true }}
|
||||
{{- if hugo.IsProduction}}
|
||||
{{- $options := dict "enableSourceMap" false "outputStyle" "compressed" }}
|
||||
{{- $options = dict "enableSourceMap" false "outputStyle" "compressed" }}
|
||||
{{- end }}
|
||||
{{- $style := resources.Get "/scss/style.scss" }}
|
||||
{{- $style = $style | resources.ExecuteAsTemplate "/scss/style.scss" . | resources.ToCSS $options }}
|
||||
{{- $style = $style | resources.ExecuteAsTemplate "/scss/style.scss" . | css.Sass $options }}
|
||||
{{- if hugo.IsProduction }}
|
||||
{{- $style = $style | minify | fingerprint "sha384" }}
|
||||
{{- end -}}
|
||||
@@ -39,7 +39,7 @@
|
||||
<!-- Image Compare Viewer -->
|
||||
{{ if ($.Scratch.Get "image_compare_enabled") }}
|
||||
{{ $imagecompare := resources.Get "js/image-compare-viewer.min.js" }}
|
||||
{{- if not .Site.IsServer }}
|
||||
{{- if not hugo.IsDevelopment }}
|
||||
{{- $js := (slice $imagecompare) | resources.Concat "/js/image-compare.js" | minify | fingerprint "sha384" }}
|
||||
<script type="text/javascript" src="{{ $js.Permalink }}" integrity="{{ $js.Data.Integrity }}"></script>
|
||||
{{- else }}
|
||||
@@ -48,14 +48,14 @@
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
<!-- Plausible Analytics Config -->
|
||||
{{- if not .Site.IsServer }}
|
||||
{{- if not hugo.IsDevelopment }}
|
||||
{{ if and (.Site.Params.plausible.scriptURL) (.Site.Params.plausible.dataDomain) -}}
|
||||
{{- partialCached "head/plausible" . }}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
<!-- Google Analytics v4 Config -->
|
||||
{{- if not .Site.IsServer }}
|
||||
{{- if .Site.GoogleAnalytics }}
|
||||
{{- if not hugo.IsDevelopment }}
|
||||
{{- if .Site.Params.analytics.google }}
|
||||
{{- template "_internal/google_analytics.html" . -}}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
||||
57
docs/layouts/partials/header.html
Normal file
@@ -0,0 +1,57 @@
|
||||
<!-- Navbar Start -->
|
||||
<header id="topnav">
|
||||
<div class="container d-flex justify-content-between align-items-center">
|
||||
<!-- Logo container-->
|
||||
<a class="logo" aria-label="Home" href='{{ relLangURL "" }}'>
|
||||
|
||||
</a>
|
||||
<!-- End Logo container-->
|
||||
|
||||
<div class="d-flex align-items-center">
|
||||
|
||||
<div id="navigation">
|
||||
<!-- Navigation Menu -->
|
||||
<ul class="navigation-menu nav-right">
|
||||
{{- range .Site.Menus.primary }}
|
||||
<li><a href="{{ relLangURL .URL }}">{{ .Name }}</a></li>
|
||||
{{ end }}
|
||||
</ul><!--end navigation menu-->
|
||||
</div><!--end navigation-->
|
||||
|
||||
<!-- Social Links Start -->
|
||||
{{ with $.Scratch.Get "social_list" }}
|
||||
<ul class="social-link d-flex list-inline mb-0">
|
||||
{{ range . }}
|
||||
{{ $path := printf "images/social/%s.%s" . "svg" }}
|
||||
<li class="list-inline-item mb-0">
|
||||
<a href="{{ if eq . `rss` }} {{ `index.xml` | absURL }} {{ else if eq . `bluesky` }} https://bsky.app/profile/{{ index site.Params.social . }} {{ else }} https://{{ . }}.com/{{ index site.Params.social . }} {{ end }}" alt="{{ . }}" rel="noopener noreferrer" target="_blank">
|
||||
<div class="btn btn-icon btn-landing border-0">
|
||||
{{ with resources.Get $path }}
|
||||
{{ .Content | safeHTML }}
|
||||
{{ end }}
|
||||
</div>
|
||||
</a>
|
||||
</li>
|
||||
{{ end }}
|
||||
</ul>
|
||||
{{ end }}
|
||||
<!-- Social Links End -->
|
||||
|
||||
<div class="menu-extras ms-3 me-2">
|
||||
<div class="menu-item">
|
||||
<!-- Mobile menu toggle-->
|
||||
<button class="navbar-toggle btn btn-icon btn-soft-light" id="isToggle" aria-label="toggleMenu" onclick="toggleMenu()">
|
||||
<div class="lines">
|
||||
<span></span>
|
||||
<span></span>
|
||||
<span></span>
|
||||
</div>
|
||||
</button>
|
||||
<!-- End mobile menu toggle-->
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div><!--end container-->
|
||||
</header><!--end header-->
|
||||
<!-- Navbar End -->
|
||||
@@ -1 +1 @@
|
||||
<a href="https://localai.io"><img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd"></a>
|
||||
<a href="https://localai.io"><img src="https://raw.githubusercontent.com/mudler/LocalAI/refs/heads/master/core/http/static/logo.png"></a>
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
[build]
|
||||
[build.environment]
|
||||
HUGO_VERSION = "0.121.2"
|
||||
HUGO_VERSION = "0.146.3"
|
||||
GO_VERSION = "1.22.2"
|
||||
|
||||
BIN
docs/static/android-chrome-192x192.png
vendored
|
Before Width: | Height: | Size: 57 KiB After Width: | Height: | Size: 16 KiB |
BIN
docs/static/android-chrome-512x512.png
vendored
|
Before Width: | Height: | Size: 359 KiB After Width: | Height: | Size: 30 KiB |
BIN
docs/static/apple-touch-icon.png
vendored
|
Before Width: | Height: | Size: 52 KiB After Width: | Height: | Size: 14 KiB |
BIN
docs/static/favicon-16x16.png
vendored
|
Before Width: | Height: | Size: 769 B After Width: | Height: | Size: 711 B |
BIN
docs/static/favicon-32x32.png
vendored
|
Before Width: | Height: | Size: 2.3 KiB After Width: | Height: | Size: 1.7 KiB |
BIN
docs/static/favicon.ico
vendored
|
Before Width: | Height: | Size: 15 KiB After Width: | Height: | Size: 15 KiB |
171
docs/static/favicon.svg
vendored
Normal file
|
After Width: | Height: | Size: 108 KiB |
1
docs/static/site.webmanifest
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"name":"","short_name":"","icons":[{"src":"/android-chrome-192x192.png","sizes":"192x192","type":"image/png"},{"src":"/android-chrome-512x512.png","sizes":"512x512","type":"image/png"}],"theme_color":"#ffffff","background_color":"#ffffff","display":"standalone"}
|
||||
2
docs/themes/lotusdocs
vendored
@@ -8,9 +8,7 @@ config_file: |
|
||||
chat_message: |-
|
||||
<start_of_turn>{{if eq .RoleName "assistant" }}model{{else}}{{ .RoleName }}{{end}}
|
||||
{{ if .FunctionCall -}}
|
||||
Function call:
|
||||
{{ else if eq .RoleName "tool" -}}
|
||||
Function response:
|
||||
{{ end -}}
|
||||
{{ if .Content -}}
|
||||
{{.Content -}}
|
||||
@@ -25,11 +23,14 @@ config_file: |
|
||||
{{.Input}}
|
||||
function: |
|
||||
<start_of_turn>system
|
||||
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||
You have access to functions. If you decide to invoke any of the function(s),
|
||||
you MUST put it in the format of
|
||||
{"name": function name, "parameters": dictionary of argument name and its value}
|
||||
|
||||
You SHOULD NOT include any other text in the response if you call a function
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
For each function call return a json object with function name and arguments
|
||||
<end_of_turn>
|
||||
{{.Input -}}
|
||||
<start_of_turn>model
|
||||
|
||||
@@ -78,6 +78,60 @@
|
||||
- filename: gemma-3-1b-it-Q4_K_M.gguf
|
||||
sha256: 8ccc5cd1f1b3602548715ae25a66ed73fd5dc68a210412eea643eb20eb75a135
|
||||
uri: huggingface://ggml-org/gemma-3-1b-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf
|
||||
- !!merge <<: *gemma3
|
||||
name: "gemma-3-12b-it-qat"
|
||||
urls:
|
||||
- https://huggingface.co/google/gemma-3-12b-it
|
||||
- https://huggingface.co/vinimuchulski/gemma-3-12b-it-qat-q4_0-gguf
|
||||
description: |
|
||||
This model corresponds to the 12B instruction-tuned version of the Gemma 3 model in GGUF format using Quantization Aware Training (QAT). The GGUF corresponds to Q4_0 quantization.
|
||||
|
||||
Thanks to QAT, the model is able to preserve similar quality as bfloat16 while significantly reducing the memory requirements to load the model.
|
||||
|
||||
You can find the half-precision version here.
|
||||
overrides:
|
||||
parameters:
|
||||
model: gemma-3-12b-it-q4_0.gguf
|
||||
files:
|
||||
- filename: gemma-3-12b-it-q4_0.gguf
|
||||
sha256: 6f1bb5f455414f7b46482bda51cbfdbf19786e21a5498c4403fdfc03d09b045c
|
||||
uri: huggingface://vinimuchulski/gemma-3-12b-it-qat-q4_0-gguf/gemma-3-12b-it-q4_0.gguf
|
||||
- !!merge <<: *gemma3
|
||||
name: "gemma-3-4b-it-qat"
|
||||
urls:
|
||||
- https://huggingface.co/google/gemma-3-4b-it
|
||||
- https://huggingface.co/vinimuchulski/gemma-3-4b-it-qat-q4_0-gguf
|
||||
description: |
|
||||
This model corresponds to the 4B instruction-tuned version of the Gemma 3 model in GGUF format using Quantization Aware Training (QAT). The GGUF corresponds to Q4_0 quantization.
|
||||
|
||||
Thanks to QAT, the model is able to preserve similar quality as bfloat16 while significantly reducing the memory requirements to load the model.
|
||||
|
||||
You can find the half-precision version here.
|
||||
overrides:
|
||||
parameters:
|
||||
model: gemma-3-4b-it-q4_0.gguf
|
||||
files:
|
||||
- filename: gemma-3-4b-it-q4_0.gguf
|
||||
sha256: 2ca493d426ffcb43db27132f183a0230eda4a3621e58b328d55b665f1937a317
|
||||
uri: huggingface://vinimuchulski/gemma-3-4b-it-qat-q4_0-gguf/gemma-3-4b-it-q4_0.gguf
|
||||
- !!merge <<: *gemma3
|
||||
name: "gemma-3-27b-it-qat"
|
||||
urls:
|
||||
- https://huggingface.co/google/gemma-3-27b-it
|
||||
- https://huggingface.co/vinimuchulski/gemma-3-27b-it-qat-q4_0-gguf
|
||||
description: |
|
||||
This model corresponds to the 27B instruction-tuned version of the Gemma 3 model in GGUF format using Quantization Aware Training (QAT). The GGUF corresponds to Q4_0 quantization.
|
||||
|
||||
Thanks to QAT, the model is able to preserve similar quality as bfloat16 while significantly reducing the memory requirements to load the model.
|
||||
|
||||
You can find the half-precision version here.
|
||||
overrides:
|
||||
parameters:
|
||||
model: gemma-3-27b-it-q4_0.gguf
|
||||
files:
|
||||
- filename: gemma-3-27b-it-q4_0.gguf
|
||||
sha256: 45e586879bc5f5d7a5b6527e812952057ce916d9fc7ba16f7262ec9972c9e2a2
|
||||
uri: huggingface://vinimuchulski/gemma-3-27b-it-qat-q4_0-gguf/gemma-3-27b-it-q4_0.gguf
|
||||
- !!merge <<: *gemma3
|
||||
name: "qgallouedec_gemma-3-27b-it-codeforces-sft"
|
||||
urls:
|
||||
@@ -386,6 +440,78 @@
|
||||
- filename: Gemma-3-Starshine-12B.i1-Q4_K_M.gguf
|
||||
sha256: 4c35a678e3784e20a8d85d4e7045d965509a1a71305a0da105fc5991ba7d6dc4
|
||||
uri: huggingface://mradermacher/Gemma-3-Starshine-12B-i1-GGUF/Gemma-3-Starshine-12B.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *gemma3
|
||||
name: "burtenshaw_gemmacoder3-12b"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/62d648291fa3e4e7ae3fa6e8/zkcBr2UZFDpALAsMdgbze.gif
|
||||
urls:
|
||||
- https://huggingface.co/burtenshaw/GemmaCoder3-12B
|
||||
- https://huggingface.co/bartowski/burtenshaw_GemmaCoder3-12B-GGUF
|
||||
description: |
|
||||
This model is a fine-tuned version of google/gemma-3-12b-it on the open-r1/codeforces-cots dataset. It has been trained using TRL.
|
||||
overrides:
|
||||
parameters:
|
||||
model: burtenshaw_GemmaCoder3-12B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: burtenshaw_GemmaCoder3-12B-Q4_K_M.gguf
|
||||
sha256: 47f0a2848eeed783cb03336afd8cc69f6ee0e088e3cec11ab6d9fe16457dc3d4
|
||||
uri: huggingface://bartowski/burtenshaw_GemmaCoder3-12B-GGUF/burtenshaw_GemmaCoder3-12B-Q4_K_M.gguf
|
||||
- !!merge <<: *gemma3
|
||||
name: "tesslate_synthia-s1-27b"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/64d1129297ca59bcf7458d07/zgFDl7UvWhiPYqdote7XT.png
|
||||
urls:
|
||||
- https://huggingface.co/Tesslate/Synthia-S1-27b
|
||||
- https://huggingface.co/bartowski/Tesslate_Synthia-S1-27b-GGUF
|
||||
description: |
|
||||
Synthia-S1-27b is a reasoning, AI model developed by Tesslate AI, fine-tuned specifically for advanced reasoning, coding, and RP usecases. Built upon the robust Gemma3 architecture, Synthia-S1-27b excels in logical reasoning, creative writing, and deep contextual understanding. It supports multimodal inputs (text and images) with a large 128K token context window, enabling complex analysis suitable for research, academic tasks, and enterprise-grade AI applications.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Tesslate_Synthia-S1-27b-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Tesslate_Synthia-S1-27b-Q4_K_M.gguf
|
||||
sha256: d953bf7f802dc68f85a35360deb24b9a8b446af051e82c77f2f0759065d2aa71
|
||||
uri: huggingface://bartowski/Tesslate_Synthia-S1-27b-GGUF/Tesslate_Synthia-S1-27b-Q4_K_M.gguf
|
||||
- !!merge <<: *gemma3
|
||||
name: "daichi-12b"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/66c26b6fb01b19d8c3c2467b/RqjcprtID598UTzL4igkU.webp
|
||||
urls:
|
||||
- https://huggingface.co/Delta-Vector/Daichi-12B
|
||||
- https://huggingface.co/Delta-Vector/Daichi-12B-GGUF
|
||||
description: |
|
||||
A merge between my Gemma-Finetune of Pascal-12B and Omega-Directive-G-12B, Meant to give it more NSFW knowledge.
|
||||
This model has a short-sweet prose and is uncensored in Roleplay.
|
||||
The model is suited for traditional RP, All thanks to Tav for funding the train.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Omega-LN-SFT-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Omega-LN-SFT-Q4_K_M.gguf
|
||||
sha256: 33fb1c61085f9b18074e320ac784e6dbc8a98fe20705f92773e055471fd3cb0f
|
||||
uri: huggingface://Delta-Vector/Daichi-12B-GGUF/Omega-LN-SFT-Q4_K_M.gguf
|
||||
- &llama4
|
||||
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
|
||||
icon: https://avatars.githubusercontent.com/u/153379578
|
||||
license: llama4
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
- gpu
|
||||
- cpu
|
||||
- llama3.3
|
||||
name: "meta-llama_llama-4-scout-17b-16e-instruct"
|
||||
urls:
|
||||
- https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
- https://huggingface.co/bartowski/meta-llama_Llama-4-Scout-17B-16E-Instruct-GGUF
|
||||
description: |
|
||||
The Llama 4 collection of models are natively multimodal AI models that enable text and multimodal experiences. These models leverage a mixture-of-experts architecture to offer industry-leading performance in text and image understanding.
|
||||
|
||||
These Llama 4 models mark the beginning of a new era for the Llama ecosystem. We are launching two efficient models in the Llama 4 series, Llama 4 Scout, a 17 billion parameter model with 16 experts, and Llama 4 Maverick, a 17 billion parameter model with 128 experts.
|
||||
overrides:
|
||||
parameters:
|
||||
model: meta-llama_Llama-4-Scout-17B-16E-Instruct-Q3_K_S.gguf
|
||||
files:
|
||||
- filename: meta-llama_Llama-4-Scout-17B-16E-Instruct-Q3_K_S.gguf
|
||||
sha256: 48dfc18d40691b4190b7fecf1f89b78cadc758c3a27a9e2a1cabd686fdb822e3
|
||||
uri: huggingface://bartowski/meta-llama_Llama-4-Scout-17B-16E-Instruct-GGUF/meta-llama_Llama-4-Scout-17B-16E-Instruct-Q3_K_S.gguf
|
||||
- &eurollm
|
||||
name: "eurollm-9b-instruct"
|
||||
icon: https://openeurollm.eu/_next/static/media/logo-dark.e7001867.svg
|
||||
@@ -1382,6 +1508,52 @@
|
||||
- filename: Forgotten-Abomination-70B-v5.0.Q4_K_M.gguf
|
||||
sha256: a5f5e712e66b855f36ff45175f20c24441fa942ca8af47bd6f49107c6e0f025d
|
||||
uri: huggingface://mradermacher/Forgotten-Abomination-70B-v5.0-GGUF/Forgotten-Abomination-70B-v5.0.Q4_K_M.gguf
|
||||
- !!merge <<: *llama33
|
||||
name: "watt-ai_watt-tool-70b"
|
||||
urls:
|
||||
- https://huggingface.co/watt-ai/watt-tool-70B
|
||||
- https://huggingface.co/bartowski/watt-ai_watt-tool-70B-GGUF
|
||||
description: |
|
||||
watt-tool-70B is a fine-tuned language model based on LLaMa-3.3-70B-Instruct, optimized for tool usage and multi-turn dialogue. It achieves state-of-the-art performance on the Berkeley Function-Calling Leaderboard (BFCL).
|
||||
Model Description
|
||||
|
||||
This model is specifically designed to excel at complex tool usage scenarios that require multi-turn interactions, making it ideal for empowering platforms like Lupan, an AI-powered workflow building tool. By leveraging a carefully curated and optimized dataset, watt-tool-70B demonstrates superior capabilities in understanding user requests, selecting appropriate tools, and effectively utilizing them across multiple turns of conversation.
|
||||
|
||||
Target Application: AI Workflow Building as in https://lupan.watt.chat/ and Coze.
|
||||
Key Features
|
||||
|
||||
Enhanced Tool Usage: Fine-tuned for precise and efficient tool selection and execution.
|
||||
Multi-Turn Dialogue: Optimized for maintaining context and effectively utilizing tools across multiple turns of conversation, enabling more complex task completion.
|
||||
State-of-the-Art Performance: Achieves top performance on the BFCL, demonstrating its capabilities in function calling and tool usage.
|
||||
Based on LLaMa-3.1-70B-Instruct: Inherits the strong language understanding and generation capabilities of the base model.
|
||||
overrides:
|
||||
parameters:
|
||||
model: watt-ai_watt-tool-70B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: watt-ai_watt-tool-70B-Q4_K_M.gguf
|
||||
sha256: 93806a5482b9e40e50ffca7a72abe3414d384749cc9e3d378eab5db8a8154b18
|
||||
uri: huggingface://bartowski/watt-ai_watt-tool-70B-GGUF/watt-ai_watt-tool-70B-Q4_K_M.gguf
|
||||
- !!merge <<: *llama33
|
||||
name: "deepcogito_cogito-v1-preview-llama-70b"
|
||||
icon: https://huggingface.co/deepcogito/cogito-v1-preview-llama-70B/resolve/main/images/deep-cogito-logo.png
|
||||
urls:
|
||||
- https://huggingface.co/deepcogito/cogito-v1-preview-llama-70B
|
||||
- https://huggingface.co/bartowski/deepcogito_cogito-v1-preview-llama-70B-GGUF
|
||||
description: |
|
||||
The Cogito LLMs are instruction tuned generative models (text in/text out). All models are released under an open license for commercial use.
|
||||
|
||||
Cogito models are hybrid reasoning models. Each model can answer directly (standard LLM), or self-reflect before answering (like reasoning models).
|
||||
The LLMs are trained using Iterated Distillation and Amplification (IDA) - an scalable and efficient alignment strategy for superintelligence using iterative self-improvement.
|
||||
The models have been optimized for coding, STEM, instruction following and general helpfulness, and have significantly higher multilingual, coding and tool calling capabilities than size equivalent counterparts.
|
||||
In both standard and reasoning modes, Cogito v1-preview models outperform their size equivalent counterparts on common industry benchmarks.
|
||||
Each model is trained in over 30 languages and supports a context length of 128k.
|
||||
overrides:
|
||||
parameters:
|
||||
model: deepcogito_cogito-v1-preview-llama-70B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: deepcogito_cogito-v1-preview-llama-70B-Q4_K_M.gguf
|
||||
sha256: d1deaf80c649e2a9446463cf5e1f7c026583647f46e3940d2b405a57cc685225
|
||||
uri: huggingface://bartowski/deepcogito_cogito-v1-preview-llama-70B-GGUF/deepcogito_cogito-v1-preview-llama-70B-Q4_K_M.gguf
|
||||
- &rwkv
|
||||
url: "github:mudler/LocalAI/gallery/rwkv.yaml@master"
|
||||
name: "rwkv-6-world-7b"
|
||||
@@ -2495,6 +2667,27 @@
|
||||
- filename: Eximius_Persona_5B.Q4_K_M.gguf
|
||||
sha256: 8a8e7a0fa1068755322c51900e53423d795e57976b4d95982242cbec41141c7b
|
||||
uri: huggingface://mradermacher/Eximius_Persona_5B-GGUF/Eximius_Persona_5B.Q4_K_M.gguf
|
||||
- !!merge <<: *llama32
|
||||
name: "deepcogito_cogito-v1-preview-llama-3b"
|
||||
icon: https://huggingface.co/deepcogito/cogito-v1-preview-llama-3B/resolve/main/images/deep-cogito-logo.png
|
||||
urls:
|
||||
- https://huggingface.co/deepcogito/cogito-v1-preview-llama-3B
|
||||
- https://huggingface.co/bartowski/deepcogito_cogito-v1-preview-llama-3B-GGUF
|
||||
description: |
|
||||
The Cogito LLMs are instruction tuned generative models (text in/text out). All models are released under an open license for commercial use.
|
||||
|
||||
Cogito models are hybrid reasoning models. Each model can answer directly (standard LLM), or self-reflect before answering (like reasoning models).
|
||||
The LLMs are trained using Iterated Distillation and Amplification (IDA) - an scalable and efficient alignment strategy for superintelligence using iterative self-improvement.
|
||||
The models have been optimized for coding, STEM, instruction following and general helpfulness, and have significantly higher multilingual, coding and tool calling capabilities than size equivalent counterparts.
|
||||
In both standard and reasoning modes, Cogito v1-preview models outperform their size equivalent counterparts on common industry benchmarks.
|
||||
Each model is trained in over 30 languages and supports a context length of 128k.
|
||||
overrides:
|
||||
parameters:
|
||||
model: deepcogito_cogito-v1-preview-llama-3B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: deepcogito_cogito-v1-preview-llama-3B-Q4_K_M.gguf
|
||||
sha256: 726a0ef5f818b8d238f2844f3204848bea66fb9c172b8ae0f6dc51b7bc081dd5
|
||||
uri: huggingface://bartowski/deepcogito_cogito-v1-preview-llama-3B-GGUF/deepcogito_cogito-v1-preview-llama-3B-Q4_K_M.gguf
|
||||
- &qwen25
|
||||
name: "qwen2.5-14b-instruct" ## Qwen2.5
|
||||
icon: https://avatars.githubusercontent.com/u/141221163
|
||||
@@ -5410,6 +5603,359 @@
|
||||
- filename: hammer2.0-7b-q5_k_m.gguf
|
||||
sha256: 3682843c857595765f0786cf24b3d501af96fe5d99a9fb2526bc7707e28bae1e
|
||||
uri: huggingface://Nekuromento/Hammer2.0-7b-Q5_K_M-GGUF/hammer2.0-7b-q5_k_m.gguf
|
||||
- !!merge <<: *qwen25
|
||||
icon: https://github.com/All-Hands-AI/OpenHands/blob/main/docs/static/img/logo.png?raw=true
|
||||
name: "all-hands_openhands-lm-32b-v0.1"
|
||||
urls:
|
||||
- https://huggingface.co/all-hands/openhands-lm-32b-v0.1
|
||||
- https://huggingface.co/bartowski/all-hands_openhands-lm-32b-v0.1-GGUF
|
||||
description: |
|
||||
Autonomous agents for software development are already contributing to a wide range of software development tasks. But up to this point, strong coding agents have relied on proprietary models, which means that even if you use an open-source agent like OpenHands, you are still reliant on API calls to an external service.
|
||||
|
||||
Today, we are excited to introduce OpenHands LM, a new open coding model that:
|
||||
|
||||
Is open and available on Hugging Face, so you can download it and run it locally
|
||||
Is a reasonable size, 32B, so it can be run locally on hardware such as a single 3090 GPU
|
||||
Achieves strong performance on software engineering tasks, including 37.2% resolve rate on SWE-Bench Verified
|
||||
|
||||
Read below for more details and our future plans!
|
||||
What is OpenHands LM?
|
||||
|
||||
OpenHands LM is built on the foundation of Qwen Coder 2.5 Instruct 32B, leveraging its powerful base capabilities for coding tasks. What sets OpenHands LM apart is our specialized fine-tuning process:
|
||||
|
||||
We used training data generated by OpenHands itself on a diverse set of open-source repositories
|
||||
Specifically, we use an RL-based framework outlined in SWE-Gym, where we set up a training environment, generate training data using an existing agent, and then fine-tune the model on examples that were resolved successfully
|
||||
It features a 128K token context window, ideal for handling large codebases and long-horizon software engineering tasks
|
||||
overrides:
|
||||
parameters:
|
||||
model: all-hands_openhands-lm-32b-v0.1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: all-hands_openhands-lm-32b-v0.1-Q4_K_M.gguf
|
||||
sha256: f7c2311d3264cc1e021a21a319748a9c75b74ddebe38551786aa4053448e5e74
|
||||
uri: huggingface://bartowski/all-hands_openhands-lm-32b-v0.1-GGUF/all-hands_openhands-lm-32b-v0.1-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "all-hands_openhands-lm-7b-v0.1"
|
||||
icon: https://github.com/All-Hands-AI/OpenHands/blob/main/docs/static/img/logo.png?raw=true
|
||||
urls:
|
||||
- https://huggingface.co/all-hands/openhands-lm-7b-v0.1
|
||||
- https://huggingface.co/bartowski/all-hands_openhands-lm-7b-v0.1-GGUF
|
||||
description: |
|
||||
This is a smaller 7B model trained following the recipe of all-hands/openhands-lm-32b-v0.1. Autonomous agents for software development are already contributing to a wide range of software development tasks. But up to this point, strong coding agents have relied on proprietary models, which means that even if you use an open-source agent like OpenHands, you are still reliant on API calls to an external service.
|
||||
|
||||
Today, we are excited to introduce OpenHands LM, a new open coding model that:
|
||||
|
||||
Is open and available on Hugging Face, so you can download it and run it locally
|
||||
Is a reasonable size, 32B, so it can be run locally on hardware such as a single 3090 GPU
|
||||
Achieves strong performance on software engineering tasks, including 37.2% resolve rate on SWE-Bench Verified
|
||||
|
||||
Read below for more details and our future plans!
|
||||
What is OpenHands LM?
|
||||
|
||||
OpenHands LM is built on the foundation of Qwen Coder 2.5 Instruct 32B, leveraging its powerful base capabilities for coding tasks. What sets OpenHands LM apart is our specialized fine-tuning process:
|
||||
|
||||
We used training data generated by OpenHands itself on a diverse set of open-source repositories
|
||||
Specifically, we use an RL-based framework outlined in SWE-Gym, where we set up a training environment, generate training data using an existing agent, and then fine-tune the model on examples that were resolved successfully
|
||||
It features a 128K token context window, ideal for handling large codebases and long-horizon software engineering tasks
|
||||
overrides:
|
||||
parameters:
|
||||
model: all-hands_openhands-lm-7b-v0.1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: all-hands_openhands-lm-7b-v0.1-Q4_K_M.gguf
|
||||
sha256: d50031b04bbdad714c004a0dc117c18d26a026297c236cda36089c20279b2ec1
|
||||
uri: huggingface://bartowski/all-hands_openhands-lm-7b-v0.1-GGUF/all-hands_openhands-lm-7b-v0.1-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "all-hands_openhands-lm-1.5b-v0.1"
|
||||
icon: https://github.com/All-Hands-AI/OpenHands/blob/main/docs/static/img/logo.png?raw=true
|
||||
urls:
|
||||
- https://huggingface.co/all-hands/openhands-lm-1.5b-v0.1
|
||||
- https://huggingface.co/bartowski/all-hands_openhands-lm-1.5b-v0.1-GGUF
|
||||
description: |
|
||||
This is a smaller 1.5B model trained following the recipe of all-hands/openhands-lm-32b-v0.1. It is intended to be used for speculative decoding. Autonomous agents for software development are already contributing to a wide range of software development tasks. But up to this point, strong coding agents have relied on proprietary models, which means that even if you use an open-source agent like OpenHands, you are still reliant on API calls to an external service.
|
||||
|
||||
Today, we are excited to introduce OpenHands LM, a new open coding model that:
|
||||
|
||||
Is open and available on Hugging Face, so you can download it and run it locally
|
||||
Is a reasonable size, 32B, so it can be run locally on hardware such as a single 3090 GPU
|
||||
Achieves strong performance on software engineering tasks, including 37.2% resolve rate on SWE-Bench Verified
|
||||
|
||||
Read below for more details and our future plans!
|
||||
What is OpenHands LM?
|
||||
|
||||
OpenHands LM is built on the foundation of Qwen Coder 2.5 Instruct 32B, leveraging its powerful base capabilities for coding tasks. What sets OpenHands LM apart is our specialized fine-tuning process:
|
||||
|
||||
We used training data generated by OpenHands itself on a diverse set of open-source repositories
|
||||
Specifically, we use an RL-based framework outlined in SWE-Gym, where we set up a training environment, generate training data using an existing agent, and then fine-tune the model on examples that were resolved successfully
|
||||
It features a 128K token context window, ideal for handling large codebases and long-horizon software engineering tasks
|
||||
overrides:
|
||||
parameters:
|
||||
model: all-hands_openhands-lm-1.5b-v0.1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: all-hands_openhands-lm-1.5b-v0.1-Q4_K_M.gguf
|
||||
sha256: 30abd7860c4eb5f2f51546389407b0064360862f64ea55cdf95f97c6e155b3c6
|
||||
uri: huggingface://bartowski/all-hands_openhands-lm-1.5b-v0.1-GGUF/all-hands_openhands-lm-1.5b-v0.1-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "katanemo_arch-function-chat-7b"
|
||||
urls:
|
||||
- https://huggingface.co/katanemo/Arch-Function-Chat-7B
|
||||
- https://huggingface.co/bartowski/katanemo_Arch-Function-Chat-7B-GGUF
|
||||
description: |
|
||||
The Arch-Function-Chat collection builds upon the Katanemo's Arch-Function collection by extending its capabilities beyond function calling. This new collection maintains the state-of-the-art(SOTA) function calling performance of the original collection while adding powerful new features that make it even more versatile in real-world applications.
|
||||
|
||||
In addition to function calling capabilities, this collection now offers:
|
||||
|
||||
Clarify & refine: Generates natural follow-up questions to collect missing information for function calling
|
||||
Interpret & respond: Provides human-friendly responses based on function execution results
|
||||
Context management: Mantains context in complex multi-turn interactions
|
||||
|
||||
Note: Arch-Function-Chat is now the primarly LLM used in then open source Arch Gateway - An AI-native proxy for agents. For more details about the project, check out the Github README.
|
||||
overrides:
|
||||
parameters:
|
||||
model: katanemo_Arch-Function-Chat-7B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: katanemo_Arch-Function-Chat-7B-Q4_K_M.gguf
|
||||
sha256: 6fd603511076ffea3697c8a76d82c054781c5e11f134b937a66cedfc49b3d2c5
|
||||
uri: huggingface://bartowski/katanemo_Arch-Function-Chat-7B-GGUF/katanemo_Arch-Function-Chat-7B-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "katanemo_arch-function-chat-1.5b"
|
||||
urls:
|
||||
- https://huggingface.co/katanemo/Arch-Function-Chat-1.5B
|
||||
- https://huggingface.co/bartowski/katanemo_Arch-Function-Chat-1.5B-GGUF
|
||||
description: |
|
||||
The Arch-Function-Chat collection builds upon the Katanemo's Arch-Function collection by extending its capabilities beyond function calling. This new collection maintains the state-of-the-art(SOTA) function calling performance of the original collection while adding powerful new features that make it even more versatile in real-world applications.
|
||||
|
||||
In addition to function calling capabilities, this collection now offers:
|
||||
|
||||
Clarify & refine: Generates natural follow-up questions to collect missing information for function calling
|
||||
Interpret & respond: Provides human-friendly responses based on function execution results
|
||||
Context management: Mantains context in complex multi-turn interactions
|
||||
|
||||
Note: Arch-Function-Chat is now the primarly LLM used in then open source Arch Gateway - An AI-native proxy for agents. For more details about the project, check out the Github README.
|
||||
overrides:
|
||||
parameters:
|
||||
model: katanemo_Arch-Function-Chat-1.5B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: katanemo_Arch-Function-Chat-1.5B-Q4_K_M.gguf
|
||||
sha256: 5bfcb72803745c374a90b0ceb60f347a8c7d1239960cce6a2d22cc1276236098
|
||||
uri: huggingface://bartowski/katanemo_Arch-Function-Chat-1.5B-GGUF/katanemo_Arch-Function-Chat-1.5B-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "katanemo_arch-function-chat-3b"
|
||||
urls:
|
||||
- https://huggingface.co/katanemo/Arch-Function-Chat-3B
|
||||
- https://huggingface.co/bartowski/katanemo_Arch-Function-Chat-3B-GGUF
|
||||
description: |
|
||||
The Arch-Function-Chat collection builds upon the Katanemo's Arch-Function collection by extending its capabilities beyond function calling. This new collection maintains the state-of-the-art(SOTA) function calling performance of the original collection while adding powerful new features that make it even more versatile in real-world applications.
|
||||
|
||||
In addition to function calling capabilities, this collection now offers:
|
||||
|
||||
Clarify & refine: Generates natural follow-up questions to collect missing information for function calling
|
||||
Interpret & respond: Provides human-friendly responses based on function execution results
|
||||
Context management: Mantains context in complex multi-turn interactions
|
||||
|
||||
Note: Arch-Function-Chat is now the primarly LLM used in then open source Arch Gateway - An AI-native proxy for agents. For more details about the project, check out the Github README.
|
||||
overrides:
|
||||
parameters:
|
||||
model: katanemo_Arch-Function-Chat-3B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: katanemo_Arch-Function-Chat-3B-Q4_K_M.gguf
|
||||
sha256: f59dbef397bf1364b5f0a2c23a7f67c40ec63cc666036c4e7615fa7d79d4e1a0
|
||||
uri: huggingface://bartowski/katanemo_Arch-Function-Chat-3B-GGUF/katanemo_Arch-Function-Chat-3B-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "open-thoughts_openthinker2-32b"
|
||||
icon: https://huggingface.co/datasets/open-thoughts/open-thoughts-114k/resolve/main/open_thoughts.png
|
||||
urls:
|
||||
- https://huggingface.co/open-thoughts/OpenThinker2-32B
|
||||
- https://huggingface.co/bartowski/open-thoughts_OpenThinker2-32B-GGUF
|
||||
description: |
|
||||
This model is a fine-tuned version of Qwen/Qwen2.5-32B-Instruct on the OpenThoughts2-1M dataset.
|
||||
|
||||
The OpenThinker2-32B model is the highest performing open-data model. This model improves upon our previous OpenThinker-32B model, which was trained on 114k examples from OpenThoughts-114k. The numbers reported in the table below are evaluated with our open-source tool Evalchemy.
|
||||
overrides:
|
||||
parameters:
|
||||
model: open-thoughts_OpenThinker2-32B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: open-thoughts_OpenThinker2-32B-Q4_K_M.gguf
|
||||
sha256: e9c7bf7cb349cfe07b4550759a3b4d7005834d0fa7580b23e483cbfeecd7a982
|
||||
uri: huggingface://bartowski/open-thoughts_OpenThinker2-32B-GGUF/open-thoughts_OpenThinker2-32B-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "open-thoughts_openthinker2-7b"
|
||||
icon: https://huggingface.co/datasets/open-thoughts/open-thoughts-114k/resolve/main/open_thoughts.pnghttps://huggingface.co/datasets/open-thoughts/open-thoughts-114k/resolve/main/open_thoughts.png
|
||||
urls:
|
||||
- https://huggingface.co/open-thoughts/OpenThinker2-7B
|
||||
- https://huggingface.co/bartowski/open-thoughts_OpenThinker2-7B-GGUF
|
||||
description: |
|
||||
This model is a fine-tuned version of Qwen/Qwen2.5-7B-Instruct on the OpenThoughts2-1M dataset.
|
||||
|
||||
The OpenThinker2-7B model is the top 7B open-data reasoning model. It delivers performance comparable to state of the art 7B models like DeepSeek-R1-Distill-7B across a suite of tasks. This model improves upon our previous OpenThinker-7B model, which was trained on 114k examples from OpenThoughts-114k. The numbers reported in the table below are evaluated with our open-source tool Evalchemy.
|
||||
overrides:
|
||||
parameters:
|
||||
model: open-thoughts_OpenThinker2-7B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: open-thoughts_OpenThinker2-7B-Q4_K_M.gguf
|
||||
sha256: 481d785047d66ae2eeaf14650a9e659ec4f7766a6414b6c7e92854c944201734
|
||||
uri: huggingface://bartowski/open-thoughts_OpenThinker2-7B-GGUF/open-thoughts_OpenThinker2-7B-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "arliai_qwq-32b-arliai-rpr-v1"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/6625f4a8a8d1362ebcc3851a/albSlnUy9dPVGVuLlsBua.jpeg
|
||||
urls:
|
||||
- https://huggingface.co/ArliAI/QwQ-32B-ArliAI-RpR-v1
|
||||
- https://huggingface.co/bartowski/ArliAI_QwQ-32B-ArliAI-RpR-v1-GGUF
|
||||
description: |
|
||||
RpR (RolePlay with Reasoning) is a new series of models from ArliAI. This series builds directly upon the successful dataset curation methodology and training methods developed for the RPMax series.
|
||||
|
||||
RpR models use the same curated, deduplicated RP and creative writing dataset used for RPMax, with a focus on variety to ensure high creativity and minimize cross-context repetition. Users familiar with RPMax will recognize the unique, non-repetitive writing style unlike other finetuned-for-RP models.
|
||||
|
||||
With the release of QwQ as the first high performing open-source reasoning model that can be easily trained, it was clear that the available instruct and creative writing reasoning datasets contains only one response per example. This is type of single response dataset used for training reasoning models causes degraded output quality in long multi-turn chats. Which is why Arli AI decided to create a real RP model capable of long multi-turn chat with reasoning.
|
||||
|
||||
In order to create RpR, we first had to actually create the reasoning RP dataset by re-processing our existing known-good RPMax dataset into a reasoning dataset. This was possible by using the base QwQ Instruct model itself to create the reasoning process for every turn in the RPMax dataset conversation examples, which is then further refined in order to make sure the reasoning is in-line with the actual response examples from the dataset.
|
||||
|
||||
Another important thing to get right is to make sure the model is trained on examples that present reasoning blocks in the same way as it encounters it during inference. Which is, never seeing the reasoning blocks in it's context. In order to do this, the training run was completed using axolotl with manual template-free segments dataset in order to make sure that the model is never trained to see the reasoning block in the context. Just like how the model will be used during inference time.
|
||||
|
||||
The result of training QwQ on this dataset with this method are consistently coherent and interesting outputs even in long multi-turn RP chats. This is as far as we know the first true correctly-trained reasoning model trained for RP and creative writing.
|
||||
overrides:
|
||||
parameters:
|
||||
model: ArliAI_QwQ-32B-ArliAI-RpR-v1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: ArliAI_QwQ-32B-ArliAI-RpR-v1-Q4_K_M.gguf
|
||||
sha256: b0f2ca8f62a5d021e20db40608a109713e9d23e75b68b3b71b7654c04d596dcf
|
||||
uri: huggingface://bartowski/ArliAI_QwQ-32B-ArliAI-RpR-v1-GGUF/ArliAI_QwQ-32B-ArliAI-RpR-v1-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "mensa-beta-14b-instruct-i1"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/65bb837dbfb878f46c77de4c/DyO5Fvqwvee-UM9QqgWZS.png
|
||||
urls:
|
||||
- https://huggingface.co/prithivMLmods/Mensa-Beta-14B-Instruct
|
||||
- https://huggingface.co/mradermacher/Mensa-Beta-14B-Instruct-i1-GGUF
|
||||
description: |
|
||||
weighted/imatrix quants of https://huggingface.co/prithivMLmods/Mensa-Beta-14B-Instruct
|
||||
overrides:
|
||||
parameters:
|
||||
model: Mensa-Beta-14B-Instruct.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Mensa-Beta-14B-Instruct.i1-Q4_K_M.gguf
|
||||
sha256: 86ccd640d72dcf3129fdd5b94381a733a684672b22487784e388b2ee9de57760
|
||||
uri: huggingface://mradermacher/Mensa-Beta-14B-Instruct-i1-GGUF/Mensa-Beta-14B-Instruct.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "cogito-v1-preview-qwen-14B"
|
||||
icon: https://huggingface.co/deepcogito/cogito-v1-preview-qwen-14B/resolve/main/images/deep-cogito-logo.png
|
||||
urls:
|
||||
- https://huggingface.co/deepcogito/cogito-v1-preview-qwen-14B
|
||||
- https://huggingface.co/NikolayKozloff/cogito-v1-preview-qwen-14B-Q4_K_M-GGUF
|
||||
description: |
|
||||
The Cogito LLMs are instruction tuned generative models (text in/text out). All models are released under an open license for commercial use.
|
||||
Cogito models are hybrid reasoning models. Each model can answer directly (standard LLM), or self-reflect before answering (like reasoning models).
|
||||
The LLMs are trained using Iterated Distillation and Amplification (IDA) - an scalable and efficient alignment strategy for superintelligence using iterative self-improvement.
|
||||
The models have been optimized for coding, STEM, instruction following and general helpfulness, and have significantly higher multilingual, coding and tool calling capabilities than size equivalent counterparts.
|
||||
In both standard and reasoning modes, Cogito v1-preview models outperform their size equivalent counterparts on common industry benchmarks.
|
||||
Each model is trained in over 30 languages and supports a context length of 128k.
|
||||
overrides:
|
||||
parameters:
|
||||
model: cogito-v1-preview-qwen-14b-q4_k_m.gguf
|
||||
files:
|
||||
- filename: cogito-v1-preview-qwen-14b-q4_k_m.gguf
|
||||
sha256: 42ddd667bac3e5f0989f52b3dca5767ed15d0e5077c6f537e4b3873862ff7096
|
||||
uri: huggingface://NikolayKozloff/cogito-v1-preview-qwen-14B-Q4_K_M-GGUF/cogito-v1-preview-qwen-14b-q4_k_m.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "deepcogito_cogito-v1-preview-qwen-32b"
|
||||
icon: https://huggingface.co/deepcogito/cogito-v1-preview-qwen-32B/resolve/main/images/deep-cogito-logo.png
|
||||
urls:
|
||||
- https://huggingface.co/deepcogito/cogito-v1-preview-qwen-32B
|
||||
- https://huggingface.co/bartowski/deepcogito_cogito-v1-preview-qwen-32B-GGUF
|
||||
description: |
|
||||
The Cogito LLMs are instruction tuned generative models (text in/text out). All models are released under an open license for commercial use.
|
||||
|
||||
Cogito models are hybrid reasoning models. Each model can answer directly (standard LLM), or self-reflect before answering (like reasoning models).
|
||||
The LLMs are trained using Iterated Distillation and Amplification (IDA) - an scalable and efficient alignment strategy for superintelligence using iterative self-improvement.
|
||||
The models have been optimized for coding, STEM, instruction following and general helpfulness, and have significantly higher multilingual, coding and tool calling capabilities than size equivalent counterparts.
|
||||
In both standard and reasoning modes, Cogito v1-preview models outperform their size equivalent counterparts on common industry benchmarks.
|
||||
Each model is trained in over 30 languages and supports a context length of 128k.
|
||||
overrides:
|
||||
parameters:
|
||||
model: deepcogito_cogito-v1-preview-qwen-32B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: deepcogito_cogito-v1-preview-qwen-32B-Q4_K_M.gguf
|
||||
sha256: 985f2d49330090e64603309f7eb61030769f25a5da027ac0b0a740858d087ad8
|
||||
uri: huggingface://bartowski/deepcogito_cogito-v1-preview-qwen-32B-GGUF/deepcogito_cogito-v1-preview-qwen-32B-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "soob3123_amoral-cogito-v1-preview-qwen-14b"
|
||||
urls:
|
||||
- https://huggingface.co/soob3123/amoral-cogito-v1-preview-qwen-14B
|
||||
- https://huggingface.co/bartowski/soob3123_amoral-cogito-v1-preview-qwen-14B-GGUF
|
||||
description: |
|
||||
Key Features
|
||||
Neutral response protocol (bias dampening layers)
|
||||
Reduced refusal rate vs base Llama-3
|
||||
Moral phrasing detection/reformulation
|
||||
Use Cases
|
||||
Controversial topic analysis
|
||||
Ethical philosophy simulations
|
||||
Academic research requiring neutral framing
|
||||
overrides:
|
||||
parameters:
|
||||
model: soob3123_amoral-cogito-v1-preview-qwen-14B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: soob3123_amoral-cogito-v1-preview-qwen-14B-Q4_K_M.gguf
|
||||
sha256: c01a0b0c44345011dc61212fb1c0ffdba32f85e702d2f3d4abeb2a09208d6184
|
||||
uri: huggingface://bartowski/soob3123_amoral-cogito-v1-preview-qwen-14B-GGUF/soob3123_amoral-cogito-v1-preview-qwen-14B-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "tesslate_gradience-t1-3b-preview"
|
||||
urls:
|
||||
- https://huggingface.co/Tesslate/Gradience-T1-3B-preview
|
||||
- https://huggingface.co/bartowski/Tesslate_Gradience-T1-3B-preview-GGUF
|
||||
description: |
|
||||
This model is still in preview/beta. We're still working on it! This is just so the community can try out our new "Gradient Reasoning" that intends to break problems down and reason faster.
|
||||
You can use a system prompt to enable thinking: "First, think step-by-step to reach the solution. Enclose your entire reasoning process within <|begin_of_thought|> and <|end_of_thought|> tags." You can try sampling params: Temp: 0.76, TopP: 0.62, Topk 30-68, Rep: 1.0, minp: 0.05
|
||||
overrides:
|
||||
parameters:
|
||||
model: Tesslate_Gradience-T1-3B-preview-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Tesslate_Gradience-T1-3B-preview-Q4_K_M.gguf
|
||||
sha256: 119ccefa09e3756750a983301f8bbb95e6c8fce6941a5d91490dac600f887111
|
||||
uri: huggingface://bartowski/Tesslate_Gradience-T1-3B-preview-GGUF/Tesslate_Gradience-T1-3B-preview-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "lightthinker-qwen"
|
||||
urls:
|
||||
- https://huggingface.co/zjunlp/LightThinker-Qwen
|
||||
- https://huggingface.co/mradermacher/LightThinker-Qwen-GGUF
|
||||
description: |
|
||||
LLMs have shown remarkable performance in complex reasoning tasks, but their efficiency is hindered by the substantial memory and computational costs associated with generating lengthy tokens. In this paper, we propose LightThinker, a novel method that enables LLMs to dynamically compress intermediate thoughts during reasoning. Inspired by human cognitive processes, LightThinker compresses verbose thought steps into compact representations and discards the original reasoning chains, thereby significantly reducing the number of tokens stored in the context window. This is achieved by training the model on when and how to perform compression through data construction, mapping hidden states to condensed gist tokens, and creating specialized attention masks.
|
||||
overrides:
|
||||
parameters:
|
||||
model: LightThinker-Qwen.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: LightThinker-Qwen.Q4_K_M.gguf
|
||||
sha256: f52f27c23fa734b1a0306efd29fcb80434364e7a1077695574e9a4f5e48b7ed2
|
||||
uri: huggingface://mradermacher/LightThinker-Qwen-GGUF/LightThinker-Qwen.Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "mag-picaro-72b"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/66c26b6fb01b19d8c3c2467b/hrYOp7JiH7o5ij1WEoyCZ.png
|
||||
urls:
|
||||
- https://huggingface.co/Delta-Vector/Mag-Picaro-72B
|
||||
- https://huggingface.co/mradermacher/Mag-Picaro-72B-GGUF
|
||||
description: |
|
||||
A scaled up version of Mag-Picaro, Funded by PygmalionAI as alternative to their Magnum Large option.
|
||||
Fine-tuned on top of Qwen-2-Instruct, Mag-Picaro has been then slerp-merged at 50/50 weight with Magnum-V2. If you like the model support me on Ko-Fi https://ko-fi.com/deltavector
|
||||
overrides:
|
||||
parameters:
|
||||
model: Mag-Picaro-72B.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Mag-Picaro-72B.Q4_K_M.gguf
|
||||
sha256: 3fda6cf318a9082ef7b502c4384ee3ea5f9f9f44268b852a2e46d71bcea29d5a
|
||||
uri: huggingface://mradermacher/Mag-Picaro-72B-GGUF/Mag-Picaro-72B.Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "m1-32b"
|
||||
urls:
|
||||
- https://huggingface.co/Can111/m1-32b
|
||||
- https://huggingface.co/mradermacher/m1-32b-GGUF
|
||||
description: |
|
||||
M1-32B is a 32B-parameter large language model fine-tuned from Qwen2.5-32B-Instruct on the M500 dataset—an interdisciplinary multi-agent collaborative reasoning dataset. M1-32B is optimized for improved reasoning, discussion, and decision-making in multi-agent systems (MAS), including frameworks such as AgentVerse.
|
||||
|
||||
Code: https://github.com/jincan333/MAS-TTS
|
||||
overrides:
|
||||
parameters:
|
||||
model: m1-32b.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: m1-32b.Q4_K_M.gguf
|
||||
sha256: 1dfa3b6822447aca590d6f2881cf277bd0fbde633a39c5a20b521f4a59145e3f
|
||||
uri: huggingface://mradermacher/m1-32b-GGUF/m1-32b.Q4_K_M.gguf
|
||||
- &llama31
|
||||
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" ## LLama3.1
|
||||
icon: https://avatars.githubusercontent.com/u/153379578
|
||||
@@ -7548,6 +8094,93 @@
|
||||
- filename: TextSynth-8B.i1-Q4_K_M.gguf
|
||||
sha256: 9186a8cb3a797cd2cd5b2eeaee99808674d96731824a9ee45685bbf480ba56c3
|
||||
uri: huggingface://mradermacher/TextSynth-8B-i1-GGUF/TextSynth-8B.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "deepcogito_cogito-v1-preview-llama-8b"
|
||||
icon: https://huggingface.co/deepcogito/cogito-v1-preview-llama-8B/resolve/main/images/deep-cogito-logo.png
|
||||
urls:
|
||||
- https://huggingface.co/deepcogito/cogito-v1-preview-llama-8B
|
||||
- https://huggingface.co/bartowski/deepcogito_cogito-v1-preview-llama-8B-GGUF
|
||||
description: |
|
||||
The Cogito LLMs are instruction tuned generative models (text in/text out). All models are released under an open license for commercial use.
|
||||
|
||||
Cogito models are hybrid reasoning models. Each model can answer directly (standard LLM), or self-reflect before answering (like reasoning models).
|
||||
The LLMs are trained using Iterated Distillation and Amplification (IDA) - an scalable and efficient alignment strategy for superintelligence using iterative self-improvement.
|
||||
The models have been optimized for coding, STEM, instruction following and general helpfulness, and have significantly higher multilingual, coding and tool calling capabilities than size equivalent counterparts.
|
||||
In both standard and reasoning modes, Cogito v1-preview models outperform their size equivalent counterparts on common industry benchmarks.
|
||||
Each model is trained in over 30 languages and supports a context length of 128k.
|
||||
overrides:
|
||||
parameters:
|
||||
model: deepcogito_cogito-v1-preview-llama-8B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: deepcogito_cogito-v1-preview-llama-8B-Q4_K_M.gguf
|
||||
sha256: 445173fb1dacef3fa0be49ebb4512b948fdb1434d86732de198424695b017b50
|
||||
uri: huggingface://bartowski/deepcogito_cogito-v1-preview-llama-8B-GGUF/deepcogito_cogito-v1-preview-llama-8B-Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "hamanasu-adventure-4b-i1"
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/66c26b6fb01b19d8c3c2467b/o5WjJKA9f95ri9UzRxZQE.png
|
||||
urls:
|
||||
- https://huggingface.co/Delta-Vector/Hamanasu-Adventure-4B
|
||||
- https://huggingface.co/mradermacher/Hamanasu-Adventure-4B-i1-GGUF
|
||||
description: |
|
||||
Thanks to PocketDoc's Adventure datasets and taking his Dangerous Winds models as inspiration, I was able to finetune a small Adventure model that HATES the User
|
||||
The model is suited for Text Adventure, All thanks to Tav for funding the train.
|
||||
Support me and my finetunes on Ko-Fi https://ko-fi.com/deltavector
|
||||
overrides:
|
||||
parameters:
|
||||
model: Hamanasu-Adventure-4B.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Hamanasu-Adventure-4B.i1-Q4_K_M.gguf
|
||||
sha256: d4f2bb3bdd99dbfe1019368813c8b6574c4c53748ff58e1b0cc1786b32cc9f5d
|
||||
uri: huggingface://mradermacher/Hamanasu-Adventure-4B-i1-GGUF/Hamanasu-Adventure-4B.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "hamanasu-magnum-4b-i1"
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/66c26b6fb01b19d8c3c2467b/o5WjJKA9f95ri9UzRxZQE.png
|
||||
urls:
|
||||
- https://huggingface.co/Delta-Vector/Hamanasu-Magnum-4B
|
||||
- https://huggingface.co/mradermacher/Hamanasu-Magnum-4B-i1-GGUF
|
||||
description: |
|
||||
This is a model designed to replicate the prose quality of the Claude 3 series of models. specifically Sonnet and Opus - Made with a prototype magnum V5 datamix.
|
||||
The model is suited for traditional RP, All thanks to Tav for funding the train.
|
||||
Support me and my finetunes on Ko-Fi https://ko-fi.com/deltavector
|
||||
overrides:
|
||||
parameters:
|
||||
model: Hamanasu-Magnum-4B.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Hamanasu-Magnum-4B.i1-Q4_K_M.gguf
|
||||
sha256: 7eb6d1bfda7c0a5bf62de754323cf59f14ddd394550a5893b7bd086fd1906361
|
||||
uri: huggingface://mradermacher/Hamanasu-Magnum-4B-i1-GGUF/Hamanasu-Magnum-4B.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "nvidia_llama-3.1-8b-ultralong-1m-instruct"
|
||||
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/1613114437487-60262a8e0703121c822a80b6.png
|
||||
urls:
|
||||
- https://huggingface.co/nvidia/Llama-3.1-8B-UltraLong-1M-Instruct
|
||||
- https://huggingface.co/bartowski/nvidia_Llama-3.1-8B-UltraLong-1M-Instruct-GGUF
|
||||
description: |
|
||||
We introduce UltraLong-8B, a series of ultra-long context language models designed to process extensive sequences of text (up to 1M, 2M, and 4M tokens) while maintaining competitive performance on standard benchmarks. Built on the Llama-3.1, UltraLong-8B leverages a systematic training recipe that combines efficient continued pretraining with instruction tuning to enhance long-context understanding and instruction-following capabilities. This approach enables our models to efficiently scale their context windows without sacrificing general performance.
|
||||
overrides:
|
||||
parameters:
|
||||
model: nvidia_Llama-3.1-8B-UltraLong-1M-Instruct-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: nvidia_Llama-3.1-8B-UltraLong-1M-Instruct-Q4_K_M.gguf
|
||||
sha256: 22e59b0eff7fd7b77403027fb758f75ad41c78a4f56adc10ca39802c64fe97fa
|
||||
uri: huggingface://bartowski/nvidia_Llama-3.1-8B-UltraLong-1M-Instruct-GGUF/nvidia_Llama-3.1-8B-UltraLong-1M-Instruct-Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "nvidia_llama-3.1-8b-ultralong-4m-instruct"
|
||||
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/1613114437487-60262a8e0703121c822a80b6.png
|
||||
urls:
|
||||
- https://huggingface.co/nvidia/Llama-3.1-8B-UltraLong-4M-Instruct
|
||||
- https://huggingface.co/bartowski/nvidia_Llama-3.1-8B-UltraLong-4M-Instruct-GGUF
|
||||
description: |
|
||||
We introduce UltraLong-8B, a series of ultra-long context language models designed to process extensive sequences of text (up to 1M, 2M, and 4M tokens) while maintaining competitive performance on standard benchmarks. Built on the Llama-3.1, UltraLong-8B leverages a systematic training recipe that combines efficient continued pretraining with instruction tuning to enhance long-context understanding and instruction-following capabilities. This approach enables our models to efficiently scale their context windows without sacrificing general performance.
|
||||
overrides:
|
||||
parameters:
|
||||
model: nvidia_Llama-3.1-8B-UltraLong-4M-Instruct-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: nvidia_Llama-3.1-8B-UltraLong-4M-Instruct-Q4_K_M.gguf
|
||||
sha256: c503c77c6d8cc4be53ce7cddb756cb571862f0422594c17e58a75d7be9f00907
|
||||
uri: huggingface://bartowski/nvidia_Llama-3.1-8B-UltraLong-4M-Instruct-GGUF/nvidia_Llama-3.1-8B-UltraLong-4M-Instruct-Q4_K_M.gguf
|
||||
- !!merge <<: *llama33
|
||||
name: "llama-3.3-magicalgirl-2.5-i1"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/633e85093a17ab61de8d9073/FGK0qBGmELj6DEUxbbrdR.png
|
||||
@@ -8026,6 +8659,115 @@
|
||||
- filename: Fallen-Safeword-70B-R1-v4.1.Q4_K_M.gguf
|
||||
sha256: aed6bd5bb03b7bd886939237bc10ea6331d4feb5a3b6712e0c5474a778acf817
|
||||
uri: huggingface://mradermacher/Fallen-Safeword-70B-R1-v4.1-GGUF/Fallen-Safeword-70B-R1-v4.1.Q4_K_M.gguf
|
||||
- !!merge <<: *deepseek-r1
|
||||
name: "agentica-org_deepcoder-14b-preview"
|
||||
urls:
|
||||
- https://huggingface.co/agentica-org/DeepCoder-14B-Preview
|
||||
- https://huggingface.co/bartowski/agentica-org_DeepCoder-14B-Preview-GGUF
|
||||
description: |
|
||||
DeepCoder-14B-Preview is a code reasoning LLM fine-tuned from DeepSeek-R1-Distilled-Qwen-14B using distributed reinforcement learning (RL) to scale up to long context lengths. The model achieves 60.6% Pass@1 accuracy on LiveCodeBench v5 (8/1/24-2/1/25), representing a 8% improvement over the base model (53%) and achieving similar performance to OpenAI's o3-mini with just 14B parameters.
|
||||
overrides:
|
||||
parameters:
|
||||
model: agentica-org_DeepCoder-14B-Preview-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: agentica-org_DeepCoder-14B-Preview-Q4_K_M.gguf
|
||||
sha256: 38f0f777de3116ca27d10ec84388b3290a1bf3f7db8c5bdc1f92d100e4231870
|
||||
uri: huggingface://bartowski/agentica-org_DeepCoder-14B-Preview-GGUF/agentica-org_DeepCoder-14B-Preview-Q4_K_M.gguf
|
||||
- !!merge <<: *deepseek-r1
|
||||
name: "agentica-org_deepcoder-1.5b-preview"
|
||||
urls:
|
||||
- https://huggingface.co/agentica-org/DeepCoder-1.5B-Preview
|
||||
- https://huggingface.co/bartowski/agentica-org_DeepCoder-1.5B-Preview-GGUF
|
||||
description: |
|
||||
DeepCoder-1.5B-Preview is a code reasoning LLM fine-tuned from DeepSeek-R1-Distilled-Qwen-1.5B using distributed reinforcement learning (RL) to scale up to long context lengths.
|
||||
Data
|
||||
|
||||
Our training dataset consists of approximately 24K unique problem-tests pairs compiled from:
|
||||
|
||||
Taco-Verified
|
||||
PrimeIntellect SYNTHETIC-1
|
||||
LiveCodeBench v5 (5/1/23-7/31/24)
|
||||
overrides:
|
||||
parameters:
|
||||
model: agentica-org_DeepCoder-1.5B-Preview-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: agentica-org_DeepCoder-1.5B-Preview-Q4_K_M.gguf
|
||||
sha256: 9ddd89eddf8d56b1c16317932af56dc59b49ca2beec735d1332f5a3e0f225714
|
||||
uri: huggingface://bartowski/agentica-org_DeepCoder-1.5B-Preview-GGUF/agentica-org_DeepCoder-1.5B-Preview-Q4_K_M.gguf
|
||||
- !!merge <<: *deepseek-r1
|
||||
name: "zyphra_zr1-1.5b"
|
||||
urls:
|
||||
- https://huggingface.co/Zyphra/ZR1-1.5B
|
||||
- https://huggingface.co/bartowski/Zyphra_ZR1-1.5B-GGUF
|
||||
description: |
|
||||
ZR1-1.5B is a small reasoning model trained extensively on both verified coding and mathematics problems with reinforcement learning. The model outperforms Llama-3.1-70B-Instruct on hard coding tasks and improves upon the base R1-Distill-1.5B model by over 50%, while achieving strong scores on math evaluations and a 37.91% pass@1 accuracy on GPQA-Diamond with just 1.5B parameters.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Zyphra_ZR1-1.5B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Zyphra_ZR1-1.5B-Q4_K_M.gguf
|
||||
sha256: 5442a9303f651eec30d8d17cd649982ddedf3629ff4faf3bf08d187900a7e7bd
|
||||
uri: huggingface://bartowski/Zyphra_ZR1-1.5B-GGUF/Zyphra_ZR1-1.5B-Q4_K_M.gguf
|
||||
- !!merge <<: *deepseek-r1
|
||||
name: "skywork_skywork-or1-7b-preview"
|
||||
urls:
|
||||
- https://huggingface.co/Skywork/Skywork-OR1-7B-Preview
|
||||
- https://huggingface.co/bartowski/Skywork_Skywork-OR1-7B-Preview-GGUF
|
||||
description: |
|
||||
The Skywork-OR1 (Open Reasoner 1) model series consists of powerful math and code reasoning models trained using large-scale rule-based reinforcement learning with carefully designed datasets and training recipes. This series includes two general-purpose reasoning modelsl, Skywork-OR1-7B-Preview and Skywork-OR1-32B-Preview, along with a math-specialized model, Skywork-OR1-Math-7B.
|
||||
|
||||
Skywork-OR1-Math-7B is specifically optimized for mathematical reasoning, scoring 69.8 on AIME24 and 52.3 on AIME25 — well ahead of all models of similar size.
|
||||
Skywork-OR1-32B-Preview delivers the 671B-parameter Deepseek-R1 performance on math tasks (AIME24 and AIME25) and coding tasks (LiveCodeBench).
|
||||
Skywork-OR1-7B-Preview outperforms all similarly sized models in both math and coding scenarios.
|
||||
|
||||
The final release version will be available in two weeks.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Skywork_Skywork-OR1-7B-Preview-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Skywork_Skywork-OR1-7B-Preview-Q4_K_M.gguf
|
||||
sha256: 5816934378dd1b9dd3a656efedef488bfa85eeeade467f99317f7cc4cbf6ceda
|
||||
uri: huggingface://bartowski/Skywork_Skywork-OR1-7B-Preview-GGUF/Skywork_Skywork-OR1-7B-Preview-Q4_K_M.gguf
|
||||
- !!merge <<: *deepseek-r1
|
||||
name: "skywork_skywork-or1-math-7b"
|
||||
urls:
|
||||
- https://huggingface.co/Skywork/Skywork-OR1-Math-7B
|
||||
- https://huggingface.co/bartowski/Skywork_Skywork-OR1-Math-7B-GGUF
|
||||
description: |
|
||||
The Skywork-OR1 (Open Reasoner 1) model series consists of powerful math and code reasoning models trained using large-scale rule-based reinforcement learning with carefully designed datasets and training recipes. This series includes two general-purpose reasoning modelsl, Skywork-OR1-7B-Preview and Skywork-OR1-32B-Preview, along with a math-specialized model, Skywork-OR1-Math-7B.
|
||||
|
||||
Skywork-OR1-Math-7B is specifically optimized for mathematical reasoning, scoring 69.8 on AIME24 and 52.3 on AIME25 — well ahead of all models of similar size.
|
||||
Skywork-OR1-32B-Preview delivers the 671B-parameter Deepseek-R1 performance on math tasks (AIME24 and AIME25) and coding tasks (LiveCodeBench).
|
||||
Skywork-OR1-7B-Preview outperforms all similarly sized models in both math and coding scenarios.
|
||||
|
||||
The final release version will be available in two weeks.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Skywork_Skywork-OR1-Math-7B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Skywork_Skywork-OR1-Math-7B-Q4_K_M.gguf
|
||||
sha256: 4a28cc95da712d37f1aef701f3eff5591e437beba9f89faf29b2a2e7443dd170
|
||||
uri: huggingface://bartowski/Skywork_Skywork-OR1-Math-7B-GGUF/Skywork_Skywork-OR1-Math-7B-Q4_K_M.gguf
|
||||
- !!merge <<: *deepseek-r1
|
||||
name: "skywork_skywork-or1-32b-preview"
|
||||
urls:
|
||||
- https://huggingface.co/Skywork/Skywork-OR1-32B-Preview
|
||||
- https://huggingface.co/bartowski/Skywork_Skywork-OR1-32B-Preview-GGUF
|
||||
description: |
|
||||
The Skywork-OR1 (Open Reasoner 1) model series consists of powerful math and code reasoning models trained using large-scale rule-based reinforcement learning with carefully designed datasets and training recipes. This series includes two general-purpose reasoning modelsl, Skywork-OR1-7B-Preview and Skywork-OR1-32B-Preview, along with a math-specialized model, Skywork-OR1-Math-7B.
|
||||
|
||||
Skywork-OR1-Math-7B is specifically optimized for mathematical reasoning, scoring 69.8 on AIME24 and 52.3 on AIME25 — well ahead of all models of similar size.
|
||||
Skywork-OR1-32B-Preview delivers the 671B-parameter Deepseek-R1 performance on math tasks (AIME24 and AIME25) and coding tasks (LiveCodeBench).
|
||||
Skywork-OR1-7B-Preview outperforms all similarly sized models in both math and coding scenarios.
|
||||
|
||||
The final release version will be available in two weeks.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Skywork_Skywork-OR1-32B-Preview-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Skywork_Skywork-OR1-32B-Preview-Q4_K_M.gguf
|
||||
sha256: 304d4f6e6ac6c530b7427c30b43df3d19ae6160c68582b8815efb129533c2f0c
|
||||
uri: huggingface://bartowski/Skywork_Skywork-OR1-32B-Preview-GGUF/Skywork_Skywork-OR1-32B-Preview-Q4_K_M.gguf
|
||||
- &qwen2
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master" ## Start QWEN2
|
||||
name: "qwen2-7b-instruct"
|
||||
@@ -9340,6 +10082,40 @@
|
||||
- filename: BlackSheep-24B.i1-Q4_K_M.gguf
|
||||
sha256: 95ae096eca05a95591254babf81b4d5617ceebbe8eda04c6cf8968ef4a69fc80
|
||||
uri: huggingface://mradermacher/BlackSheep-24B-i1-GGUF/BlackSheep-24B.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *mistral03
|
||||
name: "eurydice-24b-v2-i1"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/652c2a63d78452c4742cd3d3/Hm_tg4s0D6yWmtrTHII32.png
|
||||
urls:
|
||||
- https://huggingface.co/aixonlab/Eurydice-24b-v2
|
||||
- https://huggingface.co/mradermacher/Eurydice-24b-v2-i1-GGUF
|
||||
description: |
|
||||
Eurydice 24b v2 is designed to be the perfect companion for multi-role conversations. It demonstrates exceptional contextual understanding and excels in creativity, natural conversation and storytelling. Built on Mistral 3.1, this model has been trained on a custom dataset specifically crafted to enhance its capabilities.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Eurydice-24b-v2.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Eurydice-24b-v2.i1-Q4_K_M.gguf
|
||||
sha256: fb4104a1b33dd860e1eca3b6906a10cacc5b91a2534db72d9749652a204fbcbf
|
||||
uri: huggingface://mradermacher/Eurydice-24b-v2-i1-GGUF/Eurydice-24b-v2.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *mistral03
|
||||
name: "trappu_magnum-picaro-0.7-v2-12b"
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/Trappu/Magnum-Picaro-0.7-v2-12b
|
||||
- https://huggingface.co/bartowski/Trappu_Magnum-Picaro-0.7-v2-12b-GGUF
|
||||
description: |
|
||||
This model is a merge between Trappu/Nemo-Picaro-12B, a model trained on my own little dataset free of synthetic data, which focuses solely on storywriting and scenrio prompting (Example: [ Scenario: bla bla bla; Tags: bla bla bla ]), and anthracite-org/magnum-v2-12b.
|
||||
|
||||
The reason why I decided to merge it with Magnum (and don't recommend Picaro alone) is because that model, aside from its obvious flaws (rampant impersonation, stupid, etc...), is a one-trick pony and will be really rough for the average LLM user to handle. The idea was to have Magnum work as some sort of stabilizer to fix the issues that emerge from the lack of multiturn/smart data in Picaro's dataset. It worked, I think. I enjoy the outputs and it's smart enough to work with.
|
||||
|
||||
But yeah the goal of this merge was to make a model that's both good at storytelling/narration but also fine when it comes to other forms of creative writing such as RP or chatting. I don't think it's quite there yet but it's something for sure.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Trappu_Magnum-Picaro-0.7-v2-12b-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Trappu_Magnum-Picaro-0.7-v2-12b-Q4_K_M.gguf
|
||||
sha256: 989839dd7eab997a70eb8430b9df1138f9b0f35d58299d5007e6555a4a4a7f4c
|
||||
uri: huggingface://bartowski/Trappu_Magnum-Picaro-0.7-v2-12b-GGUF/Trappu_Magnum-Picaro-0.7-v2-12b-Q4_K_M.gguf
|
||||
- &mudler
|
||||
url: "github:mudler/LocalAI/gallery/mudler.yaml@master" ### START mudler's LocalAI specific-models
|
||||
name: "LocalAI-llama3-8b-function-call-v0.2"
|
||||
|
||||
7
main.go
@@ -74,10 +74,9 @@ Version: ${version}
|
||||
),
|
||||
kong.UsageOnError(),
|
||||
kong.Vars{
|
||||
"basepath": kong.ExpandPath("."),
|
||||
"remoteLibraryURL": "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml",
|
||||
"galleries": `[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml@master"}]`,
|
||||
"version": internal.PrintableVersion(),
|
||||
"basepath": kong.ExpandPath("."),
|
||||
"galleries": `[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml@master"}]`,
|
||||
"version": internal.PrintableVersion(),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@@ -473,8 +473,6 @@ func (ml *ModelLoader) backendLoader(opts ...Option) (client grpc.Backend, err e
|
||||
backend = realBackend
|
||||
}
|
||||
|
||||
ml.stopActiveBackends(o.modelID, o.singleActiveBackend)
|
||||
|
||||
var backendToConsume string
|
||||
|
||||
switch backend {
|
||||
@@ -497,17 +495,37 @@ func (ml *ModelLoader) backendLoader(opts ...Option) (client grpc.Backend, err e
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) stopActiveBackends(modelID string, singleActiveBackend bool) {
|
||||
if !singleActiveBackend {
|
||||
return
|
||||
}
|
||||
|
||||
// If we can have only one backend active, kill all the others (except external backends)
|
||||
if singleActiveBackend {
|
||||
log.Debug().Msgf("Stopping all backends except '%s'", modelID)
|
||||
err := ml.StopGRPC(allExcept(modelID))
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("keptModel", modelID).Msg("error while shutting down all backends except for the keptModel - greedyloader continuing")
|
||||
}
|
||||
|
||||
// Stop all backends except the one we are going to load
|
||||
log.Debug().Msgf("Stopping all backends except '%s'", modelID)
|
||||
err := ml.StopGRPC(allExcept(modelID))
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("keptModel", modelID).Msg("error while shutting down all backends except for the keptModel - greedyloader continuing")
|
||||
}
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) Close() {
|
||||
if !ml.singletonMode {
|
||||
return
|
||||
}
|
||||
ml.singletonLock.Unlock()
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) lockBackend() {
|
||||
if !ml.singletonMode {
|
||||
return
|
||||
}
|
||||
ml.singletonLock.Lock()
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
|
||||
ml.lockBackend() // grab the singleton lock if needed
|
||||
|
||||
o := NewOptions(opts...)
|
||||
|
||||
// Return earlier if we have a model already loaded
|
||||
@@ -518,17 +536,20 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
|
||||
return m.GRPC(o.parallelRequests, ml.wd), nil
|
||||
}
|
||||
|
||||
ml.stopActiveBackends(o.modelID, o.singleActiveBackend)
|
||||
ml.stopActiveBackends(o.modelID, ml.singletonMode)
|
||||
|
||||
// if a backend is defined, return the loader directly
|
||||
if o.backendString != "" {
|
||||
return ml.backendLoader(opts...)
|
||||
}
|
||||
|
||||
// Otherwise scan for backends in the asset directory
|
||||
var err error
|
||||
|
||||
// get backends embedded in the binary
|
||||
autoLoadBackends, err := ml.ListAvailableBackends(o.assetDir)
|
||||
if err != nil {
|
||||
ml.Close() // we failed, release the lock
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -560,5 +581,7 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
|
||||
}
|
||||
}
|
||||
|
||||
ml.Close() // make sure to release the lock in case of failure
|
||||
|
||||
return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
|
||||
}
|
||||
|
||||
@@ -18,16 +18,19 @@ import (
|
||||
|
||||
// TODO: Split ModelLoader and TemplateLoader? Just to keep things more organized. Left together to share a mutex until I look into that. Would split if we seperate directories for .bin/.yaml and .tmpl
|
||||
type ModelLoader struct {
|
||||
ModelPath string
|
||||
mu sync.Mutex
|
||||
models map[string]*Model
|
||||
wd *WatchDog
|
||||
ModelPath string
|
||||
mu sync.Mutex
|
||||
singletonLock sync.Mutex
|
||||
singletonMode bool
|
||||
models map[string]*Model
|
||||
wd *WatchDog
|
||||
}
|
||||
|
||||
func NewModelLoader(modelPath string) *ModelLoader {
|
||||
func NewModelLoader(modelPath string, singleActiveBackend bool) *ModelLoader {
|
||||
nml := &ModelLoader{
|
||||
ModelPath: modelPath,
|
||||
models: make(map[string]*Model),
|
||||
ModelPath: modelPath,
|
||||
models: make(map[string]*Model),
|
||||
singletonMode: singleActiveBackend,
|
||||
}
|
||||
|
||||
return nml
|
||||
@@ -142,26 +145,6 @@ func (ml *ModelLoader) LoadModel(modelID, modelName string, loader func(string,
|
||||
func (ml *ModelLoader) ShutdownModel(modelName string) error {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
model, ok := ml.models[modelName]
|
||||
if !ok {
|
||||
return fmt.Errorf("model %s not found", modelName)
|
||||
}
|
||||
|
||||
retries := 1
|
||||
for model.GRPC(false, ml.wd).IsBusy() {
|
||||
log.Debug().Msgf("%s busy. Waiting.", modelName)
|
||||
dur := time.Duration(retries*2) * time.Second
|
||||
if dur > retryTimeout {
|
||||
dur = retryTimeout
|
||||
}
|
||||
time.Sleep(dur)
|
||||
retries++
|
||||
|
||||
if retries > 10 && os.Getenv("LOCALAI_FORCE_BACKEND_SHUTDOWN") == "true" {
|
||||
log.Warn().Msgf("Model %s is still busy after %d retries. Forcing shutdown.", modelName, retries)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return ml.deleteProcess(modelName)
|
||||
}
|
||||
|
||||
@@ -17,10 +17,9 @@ type Options struct {
|
||||
|
||||
externalBackends map[string]string
|
||||
|
||||
grpcAttempts int
|
||||
grpcAttemptsDelay int
|
||||
singleActiveBackend bool
|
||||
parallelRequests bool
|
||||
grpcAttempts int
|
||||
grpcAttemptsDelay int
|
||||
parallelRequests bool
|
||||
}
|
||||
|
||||
type Option func(*Options)
|
||||
@@ -88,12 +87,6 @@ func WithContext(ctx context.Context) Option {
|
||||
}
|
||||
}
|
||||
|
||||
func WithSingleActiveBackend() Option {
|
||||
return func(o *Options) {
|
||||
o.singleActiveBackend = true
|
||||
}
|
||||
}
|
||||
|
||||
func WithModelID(id string) Option {
|
||||
return func(o *Options) {
|
||||
o.modelID = id
|
||||
|
||||
@@ -21,7 +21,7 @@ var _ = Describe("ModelLoader", func() {
|
||||
// Setup the model loader with a test directory
|
||||
modelPath = "/tmp/test_model_path"
|
||||
os.Mkdir(modelPath, 0755)
|
||||
modelLoader = model.NewModelLoader(modelPath)
|
||||
modelLoader = model.NewModelLoader(modelPath, false)
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
|
||||
@@ -9,25 +9,43 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/hpcloud/tail"
|
||||
process "github.com/mudler/go-processmanager"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
var forceBackendShutdown bool = os.Getenv("LOCALAI_FORCE_BACKEND_SHUTDOWN") == "true"
|
||||
|
||||
func (ml *ModelLoader) deleteProcess(s string) error {
|
||||
model, ok := ml.models[s]
|
||||
if !ok {
|
||||
log.Debug().Msgf("Model %s not found", s)
|
||||
return fmt.Errorf("model %s not found", s)
|
||||
}
|
||||
|
||||
defer delete(ml.models, s)
|
||||
|
||||
retries := 1
|
||||
for model.GRPC(false, ml.wd).IsBusy() {
|
||||
log.Debug().Msgf("%s busy. Waiting.", s)
|
||||
dur := time.Duration(retries*2) * time.Second
|
||||
if dur > retryTimeout {
|
||||
dur = retryTimeout
|
||||
}
|
||||
time.Sleep(dur)
|
||||
retries++
|
||||
|
||||
if retries > 10 && forceBackendShutdown {
|
||||
log.Warn().Msgf("Model %s is still busy after %d retries. Forcing shutdown.", s, retries)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Deleting process %s", s)
|
||||
|
||||
m, exists := ml.models[s]
|
||||
if !exists {
|
||||
log.Error().Msgf("Model does not exist %s", s)
|
||||
// Nothing to do
|
||||
return nil
|
||||
}
|
||||
|
||||
process := m.Process()
|
||||
process := model.Process()
|
||||
if process == nil {
|
||||
log.Error().Msgf("No process for %s", s)
|
||||
// Nothing to do as there is no process
|
||||
@@ -44,9 +62,12 @@ func (ml *ModelLoader) deleteProcess(s string) error {
|
||||
|
||||
func (ml *ModelLoader) StopGRPC(filter GRPCProcessFilter) error {
|
||||
var err error = nil
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
for k, m := range ml.models {
|
||||
if filter(k, m.Process()) {
|
||||
e := ml.ShutdownModel(k)
|
||||
e := ml.deleteProcess(k)
|
||||
err = errors.Join(err, e)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -70,7 +70,7 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs"
|
||||
model.WithModel("test"),
|
||||
}
|
||||
|
||||
sl = model.NewModelLoader("")
|
||||
sl = model.NewModelLoader("", false)
|
||||
sc, err = sl.Load(storeOpts...)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(sc).ToNot(BeNil())
|
||||
@@ -235,7 +235,7 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs"
|
||||
keys := [][]float32{{1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0}, {-1.0, 0.0, 0.0}}
|
||||
vals := [][]byte{[]byte("x"), []byte("y"), []byte("z"), []byte("-z")}
|
||||
|
||||
err := store.SetCols(context.Background(), sc, keys, vals);
|
||||
err := store.SetCols(context.Background(), sc, keys, vals)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
_, _, sims, err := store.Find(context.Background(), sc, keys[0], 4)
|
||||
@@ -247,7 +247,7 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs"
|
||||
keys := [][]float32{{1.0, 0.0, 1.0}, {0.0, 2.0, 0.0}, {0.0, 0.0, -1.0}, {-1.0, 0.0, -1.0}}
|
||||
vals := [][]byte{[]byte("x"), []byte("y"), []byte("z"), []byte("-z")}
|
||||
|
||||
err := store.SetCols(context.Background(), sc, keys, vals);
|
||||
err := store.SetCols(context.Background(), sc, keys, vals)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
_, _, sims, err := store.Find(context.Background(), sc, keys[0], 4)
|
||||
@@ -314,7 +314,7 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs"
|
||||
|
||||
normalize(keys[6:])
|
||||
|
||||
err := store.SetCols(context.Background(), sc, keys, vals);
|
||||
err := store.SetCols(context.Background(), sc, keys, vals)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
expectTriangleEq(keys, vals)
|
||||
@@ -341,7 +341,7 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs"
|
||||
c += 1
|
||||
}
|
||||
|
||||
err := store.SetCols(context.Background(), sc, keys, vals);
|
||||
err := store.SetCols(context.Background(), sc, keys, vals)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
expectTriangleEq(keys, vals)
|
||||
|
||||