Compare commits
102 Commits
v2.27.0
...
feat/llama
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8fea82e68b | ||
|
|
01e2e3dbc3 | ||
|
|
61cc76c455 | ||
|
|
8abecb4a18 | ||
|
|
8b3f76d8e6 | ||
|
|
4e0497f1a6 | ||
|
|
ba88c9f451 | ||
|
|
a598285825 | ||
|
|
cb7a172897 | ||
|
|
771be28dfb | ||
|
|
7d6b3eb42d | ||
|
|
0bb33fab55 | ||
|
|
e3bf7f77f7 | ||
|
|
bd1707d339 | ||
|
|
0474804541 | ||
|
|
72693b3917 | ||
|
|
a03b70010f | ||
|
|
e3717e5c1a | ||
|
|
c8f6858218 | ||
|
|
06d7cc43ae | ||
|
|
f2147cb850 | ||
|
|
75bb9f4c28 | ||
|
|
a2ef4b1e07 | ||
|
|
161c9fe2db | ||
|
|
7547463f81 | ||
|
|
32e4dfd47b | ||
|
|
f67e5dec68 | ||
|
|
297d54acea | ||
|
|
56f44d448c | ||
|
|
0f0fafacd9 | ||
|
|
4f239bac89 | ||
|
|
04d74ac648 | ||
|
|
18c3dc33ee | ||
|
|
508cfa7369 | ||
|
|
1f94cddbae | ||
|
|
21ae7b4cd4 | ||
|
|
bef22ab547 | ||
|
|
eb04e8cdcf | ||
|
|
17e533a086 | ||
|
|
4fc68409ff | ||
|
|
e587044449 | ||
|
|
1f09db5161 | ||
|
|
05b744f086 | ||
|
|
89ca4bc02d | ||
|
|
e626aa48a4 | ||
|
|
752b5e0339 | ||
|
|
637d72d6e3 | ||
|
|
f3bfec580a | ||
|
|
165c1ddff3 | ||
|
|
fb83238e9e | ||
|
|
700bfa41c7 | ||
|
|
25bdc350df | ||
|
|
1b899e1a68 | ||
|
|
3bf13f8c69 | ||
|
|
7a00729374 | ||
|
|
d484028532 | ||
|
|
0eb7fc2c41 | ||
|
|
a69e30e0c9 | ||
|
|
9c018e6bff | ||
|
|
281e818047 | ||
|
|
270f0e2157 | ||
|
|
673e59e76c | ||
|
|
5a8a2adb44 | ||
|
|
a7317d23bf | ||
|
|
2bab9b5fe2 | ||
|
|
081be3ba7d | ||
|
|
25e6f21322 | ||
|
|
b4df1c9cf3 | ||
|
|
4fbd6609f2 | ||
|
|
7387932f89 | ||
|
|
59c37e67b2 | ||
|
|
c09d227647 | ||
|
|
547d322b28 | ||
|
|
a6f0bb410f | ||
|
|
710f624ecd | ||
|
|
5018452be7 | ||
|
|
ece239966f | ||
|
|
3b8bc7e64c | ||
|
|
fc73b2b430 | ||
|
|
901dba6063 | ||
|
|
b88a7a4550 | ||
|
|
106e40845f | ||
|
|
0064bec8f5 | ||
|
|
9e6dbb0b5a | ||
|
|
d26e61388b | ||
|
|
31a7084c75 | ||
|
|
128612a6fc | ||
|
|
6af3f46bc3 | ||
|
|
d2cf8ef070 | ||
|
|
259ad3cfe6 | ||
|
|
18b320d577 | ||
|
|
89e151f035 | ||
|
|
22060f6410 | ||
|
|
7ee3288460 | ||
|
|
cbbc954a8c | ||
|
|
2c425e9c69 | ||
|
|
c59975ab05 | ||
|
|
05f7004487 | ||
|
|
2f9203cd2a | ||
|
|
f09b33f2ef | ||
|
|
65470b0ab1 | ||
|
|
9a23fe662b |
3
.env
@@ -29,6 +29,9 @@
|
||||
## Enable/Disable single backend (useful if only one GPU is available)
|
||||
# LOCALAI_SINGLE_ACTIVE_BACKEND=true
|
||||
|
||||
# Forces shutdown of the backends if busy (only if LOCALAI_SINGLE_ACTIVE_BACKEND is set)
|
||||
# LOCALAI_FORCE_BACKEND_SHUTDOWN=true
|
||||
|
||||
## Specify a build type. Available: cublas, openblas, clblas.
|
||||
## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
|
||||
## OpenBLAS: This is an open-source implementation of the BLAS library that aims to provide highly optimized code for various platforms. It includes support for multi-threading and can be compiled to use hardware-specific features for additional performance. OpenBLAS can run on many kinds of hardware, including CPUs from Intel, AMD, and ARM.
|
||||
|
||||
4
.github/dependabot.yml
vendored
@@ -29,10 +29,6 @@ updates:
|
||||
schedule:
|
||||
# Check for updates to GitHub Actions every weekday
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/autogptq"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/bark"
|
||||
schedule:
|
||||
|
||||
2
.github/workflows/generate_intel_image.yaml
vendored
@@ -15,7 +15,7 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- base-image: intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04
|
||||
- base-image: intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04
|
||||
runs-on: 'ubuntu-latest'
|
||||
platforms: 'linux/amd64'
|
||||
runs-on: ${{matrix.runs-on}}
|
||||
|
||||
7
.github/workflows/image.yml
vendored
@@ -75,6 +75,7 @@ jobs:
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
latest-image: 'latest-gpu-hipblas-core'
|
||||
- build-type: 'hipblas'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
@@ -251,6 +252,7 @@ jobs:
|
||||
image-type: 'core'
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
latest-image: 'latest-gpu-intel-f16-core'
|
||||
- build-type: 'sycl_f32'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
@@ -261,6 +263,7 @@ jobs:
|
||||
image-type: 'core'
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
latest-image: 'latest-gpu-intel-f32-core'
|
||||
|
||||
core-image-build:
|
||||
uses: ./.github/workflows/image_build.yml
|
||||
@@ -339,6 +342,7 @@ jobs:
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
skip-drivers: 'false'
|
||||
latest-image: 'latest-gpu-nvidia-cuda-12-core'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
@@ -351,17 +355,18 @@ jobs:
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
latest-image: 'latest-gpu-nvidia-cuda-12-core'
|
||||
- build-type: 'vulkan'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-vulkan-ffmpeg-core'
|
||||
latest-image: 'latest-vulkan-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
runs-on: 'arc-runner-set'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
latest-image: 'latest-gpu-vulkan-core'
|
||||
gh-runner:
|
||||
uses: ./.github/workflows/image_build.yml
|
||||
with:
|
||||
|
||||
6
.github/workflows/notify-models.yaml
vendored
@@ -8,7 +8,7 @@ jobs:
|
||||
notify-discord:
|
||||
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
|
||||
env:
|
||||
MODEL_NAME: hermes-2-theta-llama-3-8b
|
||||
MODEL_NAME: gemma-3-12b-it
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -16,7 +16,7 @@ jobs:
|
||||
fetch-depth: 0 # needed to checkout all branches for this Action to work
|
||||
- uses: mudler/localai-github-action@v1
|
||||
with:
|
||||
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||
model: 'gemma-3-12b-it' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||
# Check the PR diff using the current branch and the base branch of the PR
|
||||
- uses: GrantBirki/git-diff-action@v2.8.0
|
||||
id: git-diff-action
|
||||
@@ -87,7 +87,7 @@ jobs:
|
||||
notify-twitter:
|
||||
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
|
||||
env:
|
||||
MODEL_NAME: hermes-2-theta-llama-3-8b
|
||||
MODEL_NAME: gemma-3-12b-it
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
4
.github/workflows/notify-releases.yaml
vendored
@@ -14,7 +14,7 @@ jobs:
|
||||
steps:
|
||||
- uses: mudler/localai-github-action@v1
|
||||
with:
|
||||
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||
model: 'gemma-3-12b-it' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||
- name: Summarize
|
||||
id: summarize
|
||||
run: |
|
||||
@@ -60,4 +60,4 @@ jobs:
|
||||
DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
|
||||
uses: Ilshidur/action-discord@master
|
||||
with:
|
||||
args: ${{ steps.summarize.outputs.message }}
|
||||
args: ${{ steps.summarize.outputs.message }}
|
||||
|
||||
2
.github/workflows/secscan.yaml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||
- name: Run Gosec Security Scanner
|
||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||
uses: securego/gosec@v2.22.0
|
||||
uses: securego/gosec@v2.22.3
|
||||
with:
|
||||
# we let the report trigger content trigger a failure using the GitHub Security features.
|
||||
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
||||
|
||||
@@ -15,7 +15,7 @@ ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"
|
||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
@@ -431,9 +431,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMA
|
||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/vllm \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "autogptq" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/autogptq \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "bark" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/bark \
|
||||
; fi && \
|
||||
|
||||
30
Makefile
@@ -6,7 +6,7 @@ BINARY_NAME=local-ai
|
||||
DETECT_LIBS?=true
|
||||
|
||||
# llama.cpp versions
|
||||
CPPLLAMA_VERSION?=4663bd353c61c1136cd8a97b9908755e4ab30cec
|
||||
CPPLLAMA_VERSION?=6408210082cc0a61b992b487be7e2ff2efbb9e36
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||
@@ -21,8 +21,8 @@ BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
|
||||
BARKCPP_VERSION?=v1.0.0
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=19d876ee300a055629926ff836489901f734f2b7
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/richiejp/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=53e3b17eb3d0b5760ced06a1f98320b68b34aaae
|
||||
|
||||
ONNX_VERSION?=1.20.0
|
||||
ONNX_ARCH?=x64
|
||||
@@ -260,11 +260,7 @@ backend/go/image/stablediffusion-ggml/libsd.a: sources/stablediffusion-ggml.cpp
|
||||
$(MAKE) -C backend/go/image/stablediffusion-ggml libsd.a
|
||||
|
||||
backend-assets/grpc/stablediffusion-ggml: backend/go/image/stablediffusion-ggml/libsd.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ LIBRARY_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion-ggml ./backend/go/image/stablediffusion-ggml/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/stablediffusion-ggml
|
||||
endif
|
||||
$(MAKE) -C backend/go/image/stablediffusion-ggml CGO_LDFLAGS="$(CGO_LDFLAGS)" stablediffusion-ggml
|
||||
|
||||
sources/onnxruntime:
|
||||
mkdir -p sources/onnxruntime
|
||||
@@ -509,18 +505,10 @@ protogen-go-clean:
|
||||
$(RM) bin/*
|
||||
|
||||
.PHONY: protogen-python
|
||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
|
||||
protogen-python: bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
|
||||
|
||||
.PHONY: protogen-python-clean
|
||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean
|
||||
|
||||
.PHONY: autogptq-protogen
|
||||
autogptq-protogen:
|
||||
$(MAKE) -C backend/python/autogptq protogen
|
||||
|
||||
.PHONY: autogptq-protogen-clean
|
||||
autogptq-protogen-clean:
|
||||
$(MAKE) -C backend/python/autogptq protogen-clean
|
||||
protogen-python-clean: bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean
|
||||
|
||||
.PHONY: bark-protogen
|
||||
bark-protogen:
|
||||
@@ -597,7 +585,6 @@ vllm-protogen-clean:
|
||||
## GRPC
|
||||
# Note: it is duplicated in the Dockerfile
|
||||
prepare-extra-conda-environments: protogen-python
|
||||
$(MAKE) -C backend/python/autogptq
|
||||
$(MAKE) -C backend/python/bark
|
||||
$(MAKE) -C backend/python/coqui
|
||||
$(MAKE) -C backend/python/diffusers
|
||||
@@ -809,7 +796,8 @@ docker-aio-all:
|
||||
|
||||
docker-image-intel:
|
||||
docker build \
|
||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
|
||||
--progress plain \
|
||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.1.0-0-devel-ubuntu24.04 \
|
||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||
--build-arg GO_TAGS="none" \
|
||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||
@@ -817,7 +805,7 @@ docker-image-intel:
|
||||
|
||||
docker-image-intel-xpu:
|
||||
docker build \
|
||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
|
||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04 \
|
||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||
--build-arg GO_TAGS="none" \
|
||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||
|
||||
96
README.md
@@ -1,7 +1,6 @@
|
||||
<h1 align="center">
|
||||
<br>
|
||||
<img height="300" src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd"> <br>
|
||||
LocalAI
|
||||
<img height="300" src="./core/http/static/logo.png"> <br>
|
||||
<br>
|
||||
</h1>
|
||||
|
||||
@@ -48,9 +47,58 @@
|
||||
|
||||
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
||||
|
||||
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
|
||||
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
|
||||
|
||||

|
||||
|
||||
## 📚🆕 Local Stack Family
|
||||
|
||||
🆕 LocalAI is now part of a comprehensive suite of AI tools designed to work together:
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td width="50%" valign="top">
|
||||
<a href="https://github.com/mudler/LocalAGI">
|
||||
<img src="https://raw.githubusercontent.com/mudler/LocalAGI/refs/heads/main/webui/react-ui/public/logo_2.png" width="300" alt="LocalAGI Logo">
|
||||
</a>
|
||||
</td>
|
||||
<td width="50%" valign="top">
|
||||
<h3><a href="https://github.com/mudler/LocalAGI">LocalAGI</a></h3>
|
||||
<p>A powerful Local AI agent management platform that serves as a drop-in replacement for OpenAI's Responses API, enhanced with advanced agentic capabilities.</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="50%" valign="top">
|
||||
<a href="https://github.com/mudler/LocalRecall">
|
||||
<img src="https://raw.githubusercontent.com/mudler/LocalRecall/refs/heads/main/static/localrecall_horizontal.png" width="300" alt="LocalRecall Logo">
|
||||
</a>
|
||||
</td>
|
||||
<td width="50%" valign="top">
|
||||
<h3><a href="https://github.com/mudler/LocalRecall">LocalRecall</a></h3>
|
||||
<p>A REST-ful API and knowledge base management system that provides persistent memory and storage capabilities for AI agents.</p>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
## Screenshots
|
||||
|
||||
|
||||
| Talk Interface | Generate Audio |
|
||||
| --- | --- |
|
||||
|  |  |
|
||||
|
||||
| Models Overview | Generate Images |
|
||||
| --- | --- |
|
||||
|  |  |
|
||||
|
||||
| Chat Interface | Home |
|
||||
| --- | --- |
|
||||
|  |  |
|
||||
|
||||
| Login | Swarm |
|
||||
| --- | --- |
|
||||
| |  |
|
||||
|
||||
## 💻 Quickstart
|
||||
|
||||
Run the installer script:
|
||||
|
||||
@@ -59,17 +107,21 @@ curl https://localai.io/install.sh | sh
|
||||
```
|
||||
|
||||
Or run with docker:
|
||||
|
||||
### CPU only image:
|
||||
```bash
|
||||
# CPU only image:
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-cpu
|
||||
|
||||
# Nvidia GPU:
|
||||
```
|
||||
### Nvidia GPU:
|
||||
```bash
|
||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
|
||||
|
||||
# CPU and GPU image (bigger size):
|
||||
```
|
||||
### CPU and GPU image (bigger size):
|
||||
```bash
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
|
||||
|
||||
# AIO images (it will pre-download a set of models ready for use, see https://localai.io/basics/container/)
|
||||
```
|
||||
### AIO images (it will pre-download a set of models ready for use, see https://localai.io/basics/container/)
|
||||
```bash
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
||||
```
|
||||
|
||||
@@ -88,10 +140,13 @@ local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
|
||||
local-ai run oci://localai/phi-2:latest
|
||||
```
|
||||
|
||||
[💻 Getting started](https://localai.io/basics/getting_started/index.html)
|
||||
For more information, see [💻 Getting started](https://localai.io/basics/getting_started/index.html)
|
||||
|
||||
## 📰 Latest project news
|
||||
|
||||
- Apr 2025: [LocalAGI](https://github.com/mudler/LocalAGI) and [LocalRecall](https://github.com/mudler/LocalRecall) join the LocalAI family stack.
|
||||
- Apr 2025: WebUI overhaul, AIO images updates
|
||||
- Feb 2025: Backend cleanup, Breaking changes, new backends (kokoro, OutelTTS, faster-whisper), Nvidia L4T images
|
||||
- Jan 2025: LocalAI model release: https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.3, SANA support in diffusers: https://github.com/mudler/LocalAI/pull/4603
|
||||
- Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 )
|
||||
- Nov 2024: Bark.cpp backend added ( https://github.com/mudler/LocalAI/pull/4287 )
|
||||
@@ -105,19 +160,6 @@ local-ai run oci://localai/phi-2:latest
|
||||
|
||||
Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||
|
||||
## 🔥🔥 Hot topics (looking for help):
|
||||
|
||||
- Multimodal with vLLM and Video understanding: https://github.com/mudler/LocalAI/pull/3729
|
||||
- Realtime API https://github.com/mudler/LocalAI/issues/3714
|
||||
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
|
||||
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
||||
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
|
||||
- Assistant API: https://github.com/mudler/LocalAI/issues/1273
|
||||
- Vulkan: https://github.com/mudler/LocalAI/issues/1647
|
||||
- Anthropic API: https://github.com/mudler/LocalAI/issues/1808
|
||||
|
||||
If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22
|
||||
|
||||
## 🚀 [Features](https://localai.io/features/)
|
||||
|
||||
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
||||
@@ -131,12 +173,10 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
|
||||
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
||||
- 📈 [Reranker API](https://localai.io/features/reranker/)
|
||||
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
|
||||
- [Agentic capabilities](https://github.com/mudler/LocalAGI)
|
||||
- 🔊 Voice activity detection (Silero-VAD support)
|
||||
- 🌍 Integrated WebUI!
|
||||
|
||||
## 💻 Usage
|
||||
|
||||
Check out the [Getting started](https://localai.io/basics/getting_started/index.html) section in our documentation.
|
||||
|
||||
### 🔗 Community and integrations
|
||||
|
||||
|
||||
@@ -190,11 +190,7 @@ message ModelOptions {
|
||||
int32 NGQA = 20;
|
||||
string ModelFile = 21;
|
||||
|
||||
// AutoGPTQ
|
||||
string Device = 22;
|
||||
bool UseTriton = 23;
|
||||
string ModelBaseName = 24;
|
||||
bool UseFastTokenizer = 25;
|
||||
|
||||
|
||||
// Diffusers
|
||||
string PipelineType = 26;
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||
## This is an hack for now, but it should be fixed in the future.
|
||||
set(TARGET myclip)
|
||||
add_library(${TARGET} clip.cpp clip.h llava.cpp llava.h)
|
||||
add_library(${TARGET} clip.cpp clip.h clip-impl.h llava.cpp llava.h)
|
||||
install(TARGETS ${TARGET} LIBRARY)
|
||||
target_include_directories(myclip PUBLIC .)
|
||||
target_include_directories(myclip PUBLIC ../..)
|
||||
|
||||
@@ -8,7 +8,7 @@ ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
||||
TARGET?=--target grpc-server
|
||||
|
||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
|
||||
|
||||
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
||||
ifeq ($(BUILD_TYPE),cublas)
|
||||
@@ -36,11 +36,18 @@ else ifeq ($(OS),Darwin)
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
|
||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||
-DCMAKE_C_COMPILER=icx \
|
||||
-DCMAKE_CXX_COMPILER=icpx \
|
||||
-DCMAKE_CXX_FLAGS="-fsycl" \
|
||||
-DGGML_SYCL_F16=ON
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),sycl_f32)
|
||||
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||
-DCMAKE_C_COMPILER=icx \
|
||||
-DCMAKE_CXX_COMPILER=icpx \
|
||||
-DCMAKE_CXX_FLAGS="-fsycl"
|
||||
endif
|
||||
|
||||
llama.cpp:
|
||||
@@ -77,4 +84,4 @@ ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
else
|
||||
+cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
|
||||
endif
|
||||
cp llama.cpp/build/bin/grpc-server .
|
||||
cp llama.cpp/build/bin/grpc-server .
|
||||
|
||||
@@ -217,6 +217,7 @@ struct llama_client_slot
|
||||
|
||||
bool infill = false;
|
||||
bool embedding = false;
|
||||
bool reranker = false;
|
||||
bool has_next_token = true;
|
||||
bool truncated = false;
|
||||
bool stopped_eos = false;
|
||||
@@ -509,15 +510,15 @@ struct llama_server_context
|
||||
bool load_model(const common_params ¶ms_)
|
||||
{
|
||||
params = params_;
|
||||
if (!params.mmproj.empty()) {
|
||||
if (!params.mmproj.path.empty()) {
|
||||
multimodal = true;
|
||||
LOG_INFO("Multi Modal Mode Enabled", {});
|
||||
clp_ctx = clip_init(params.mmproj.c_str(), clip_context_params {
|
||||
clp_ctx = clip_init(params.mmproj.path.c_str(), clip_context_params {
|
||||
/* use_gpu */ has_gpu,
|
||||
/*verbosity=*/ 1,
|
||||
/*verbosity=*/ GGML_LOG_LEVEL_INFO,
|
||||
});
|
||||
if(clp_ctx == nullptr) {
|
||||
LOG_ERR("unable to load clip model: %s", params.mmproj.c_str());
|
||||
LOG_ERR("unable to load clip model: %s", params.mmproj.path.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -531,10 +532,16 @@ struct llama_server_context
|
||||
ctx = common_init.context.release();
|
||||
if (model == nullptr)
|
||||
{
|
||||
LOG_ERR("unable to load model: %s", params.model.c_str());
|
||||
LOG_ERR("unable to load model: %s", params.model.path.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
// Enable reranking if embeddings are enabled - moved after context initialization
|
||||
if (params.embedding) {
|
||||
params.reranking = true;
|
||||
LOG_INFO("Reranking enabled (embeddings are enabled)", {});
|
||||
}
|
||||
|
||||
if (multimodal) {
|
||||
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
|
||||
const int n_embd_llm = llama_model_n_embd(model);
|
||||
@@ -1413,7 +1420,59 @@ struct llama_server_context
|
||||
queue_results.send(res);
|
||||
}
|
||||
|
||||
void request_completion(int task_id, json data, bool infill, bool embedding, int multitask_id)
|
||||
void send_rerank(llama_client_slot &slot, const llama_batch & batch)
|
||||
{
|
||||
task_result res;
|
||||
res.id = slot.task_id;
|
||||
res.multitask_id = slot.multitask_id;
|
||||
res.error = false;
|
||||
res.stop = true;
|
||||
|
||||
float score = -1e6f; // Default score if we fail to get embeddings
|
||||
|
||||
if (!params.reranking)
|
||||
{
|
||||
LOG_WARNING("reranking disabled", {
|
||||
{"params.reranking", params.reranking},
|
||||
});
|
||||
}
|
||||
else if (ctx == nullptr)
|
||||
{
|
||||
LOG_ERR("context is null, cannot perform reranking");
|
||||
res.error = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < batch.n_tokens; ++i) {
|
||||
if (!batch.logits[i] || batch.seq_id[i][0] != slot.id) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const float * embd = llama_get_embeddings_seq(ctx, batch.seq_id[i][0]);
|
||||
if (embd == NULL) {
|
||||
embd = llama_get_embeddings_ith(ctx, i);
|
||||
}
|
||||
|
||||
if (embd == NULL) {
|
||||
LOG("failed to get embeddings");
|
||||
continue;
|
||||
}
|
||||
|
||||
score = embd[0];
|
||||
}
|
||||
}
|
||||
|
||||
// Format result as JSON similar to the embedding function
|
||||
res.result_json = json
|
||||
{
|
||||
{"score", score},
|
||||
{"tokens", slot.num_prompt_tokens}
|
||||
};
|
||||
|
||||
queue_results.send(res);
|
||||
}
|
||||
|
||||
void request_completion(int task_id, json data, bool infill, bool embedding, bool rerank, int multitask_id)
|
||||
{
|
||||
task_server task;
|
||||
task.id = task_id;
|
||||
@@ -1421,6 +1480,7 @@ struct llama_server_context
|
||||
task.data = std::move(data);
|
||||
task.infill_mode = infill;
|
||||
task.embedding_mode = embedding;
|
||||
task.reranking_mode = rerank;
|
||||
task.type = TASK_TYPE_COMPLETION;
|
||||
task.multitask_id = multitask_id;
|
||||
|
||||
@@ -1552,7 +1612,7 @@ struct llama_server_context
|
||||
subtask_data["prompt"] = subtask_data["prompt"][i];
|
||||
|
||||
// subtasks inherit everything else (infill mode, embedding mode, etc.)
|
||||
request_completion(subtask_ids[i], subtask_data, multiprompt_task.infill_mode, multiprompt_task.embedding_mode, multitask_id);
|
||||
request_completion(subtask_ids[i], subtask_data, multiprompt_task.infill_mode, multiprompt_task.embedding_mode, multiprompt_task.reranking_mode, multitask_id);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1591,6 +1651,7 @@ struct llama_server_context
|
||||
|
||||
slot->infill = task.infill_mode;
|
||||
slot->embedding = task.embedding_mode;
|
||||
slot->reranker = task.reranking_mode;
|
||||
slot->task_id = task.id;
|
||||
slot->multitask_id = task.multitask_id;
|
||||
|
||||
@@ -2034,6 +2095,14 @@ struct llama_server_context
|
||||
continue;
|
||||
}
|
||||
|
||||
if (slot.reranker)
|
||||
{
|
||||
send_rerank(slot, batch_view);
|
||||
slot.release();
|
||||
slot.i_batch = -1;
|
||||
continue;
|
||||
}
|
||||
|
||||
completion_token_output result;
|
||||
const llama_token id = common_sampler_sample(slot.ctx_sampling, ctx, slot.i_batch - i);
|
||||
|
||||
@@ -2326,11 +2395,11 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
|
||||
// this is comparable to: https://github.com/ggerganov/llama.cpp/blob/d9b33fe95bd257b36c84ee5769cc048230067d6f/examples/server/server.cpp#L1809
|
||||
|
||||
params.model = request->modelfile();
|
||||
params.model.path = request->modelfile();
|
||||
if (!request->mmproj().empty()) {
|
||||
// get the directory of modelfile
|
||||
std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
|
||||
params.mmproj = model_dir + "/"+ request->mmproj();
|
||||
std::string model_dir = params.model.path.substr(0, params.model.path.find_last_of("/\\"));
|
||||
params.mmproj.path = model_dir + "/"+ request->mmproj();
|
||||
}
|
||||
// params.model_alias ??
|
||||
params.model_alias = request->modelfile();
|
||||
@@ -2405,7 +2474,7 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
scale_factor = request->lorascale();
|
||||
}
|
||||
// get the directory of modelfile
|
||||
std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
|
||||
std::string model_dir = params.model.path.substr(0, params.model.path.find_last_of("/\\"));
|
||||
params.lora_adapters.push_back({ model_dir + "/"+request->loraadapter(), scale_factor });
|
||||
}
|
||||
params.use_mlock = request->mlock();
|
||||
@@ -2489,7 +2558,7 @@ public:
|
||||
json data = parse_options(true, request, llama);
|
||||
const int task_id = llama.queue_tasks.get_new_id();
|
||||
llama.queue_results.add_waiting_task_id(task_id);
|
||||
llama.request_completion(task_id, data, false, false, -1);
|
||||
llama.request_completion(task_id, data, false, false, false, -1);
|
||||
while (true)
|
||||
{
|
||||
task_result result = llama.queue_results.recv(task_id);
|
||||
@@ -2543,7 +2612,7 @@ public:
|
||||
json data = parse_options(false, request, llama);
|
||||
const int task_id = llama.queue_tasks.get_new_id();
|
||||
llama.queue_results.add_waiting_task_id(task_id);
|
||||
llama.request_completion(task_id, data, false, false, -1);
|
||||
llama.request_completion(task_id, data, false, false, false, -1);
|
||||
std::string completion_text;
|
||||
task_result result = llama.queue_results.recv(task_id);
|
||||
if (!result.error && result.stop) {
|
||||
@@ -2580,7 +2649,7 @@ public:
|
||||
json data = parse_options(false, request, llama);
|
||||
const int task_id = llama.queue_tasks.get_new_id();
|
||||
llama.queue_results.add_waiting_task_id(task_id);
|
||||
llama.request_completion(task_id, { {"prompt", data["embeddings"]}, { "n_predict", 0}, {"image_data", ""} }, false, true, -1);
|
||||
llama.request_completion(task_id, { {"prompt", data["embeddings"]}, { "n_predict", 0}, {"image_data", ""} }, false, true, false, -1);
|
||||
// get the result
|
||||
task_result result = llama.queue_results.recv(task_id);
|
||||
//std::cout << "Embedding result JSON" << result.result_json.dump() << std::endl;
|
||||
@@ -2612,6 +2681,46 @@ public:
|
||||
return grpc::Status::OK;
|
||||
}
|
||||
|
||||
grpc::Status Rerank(ServerContext* context, const backend::RerankRequest* request, backend::RerankResult* rerankResult) {
|
||||
// Create a JSON object with the query and documents
|
||||
json data = {
|
||||
{"prompt", request->query()},
|
||||
{"documents", request->documents()},
|
||||
{"top_n", request->top_n()}
|
||||
};
|
||||
|
||||
// Generate a new task ID
|
||||
const int task_id = llama.queue_tasks.get_new_id();
|
||||
llama.queue_results.add_waiting_task_id(task_id);
|
||||
|
||||
// Queue the task with reranking mode enabled
|
||||
llama.request_completion(task_id, data, false, false, true, -1);
|
||||
|
||||
// Get the result
|
||||
task_result result = llama.queue_results.recv(task_id);
|
||||
llama.queue_results.remove_waiting_task_id(task_id);
|
||||
|
||||
if (!result.error && result.stop) {
|
||||
// Set usage information
|
||||
backend::Usage* usage = rerankResult->mutable_usage();
|
||||
usage->set_total_tokens(result.result_json.value("tokens", 0));
|
||||
usage->set_prompt_tokens(result.result_json.value("tokens", 0));
|
||||
|
||||
// Get the score from the result
|
||||
float score = result.result_json.value("score", 0.0f);
|
||||
|
||||
// Create document results for each input document
|
||||
for (int i = 0; i < request->documents_size(); i++) {
|
||||
backend::DocumentResult* doc_result = rerankResult->add_results();
|
||||
doc_result->set_index(i);
|
||||
doc_result->set_text(request->documents(i));
|
||||
doc_result->set_relevance_score(score);
|
||||
}
|
||||
}
|
||||
|
||||
return grpc::Status::OK;
|
||||
}
|
||||
|
||||
grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
|
||||
llama_client_slot* active_slot = llama.get_active_slot();
|
||||
|
||||
@@ -2644,7 +2753,9 @@ void RunServer(const std::string& server_address) {
|
||||
ServerBuilder builder;
|
||||
builder.AddListeningPort(server_address, grpc::InsecureServerCredentials());
|
||||
builder.RegisterService(&service);
|
||||
|
||||
builder.SetMaxMessageSize(50 * 1024 * 1024); // 50MB
|
||||
builder.SetMaxSendMessageSize(50 * 1024 * 1024); // 50MB
|
||||
builder.SetMaxReceiveMessageSize(50 * 1024 * 1024); // 50MB
|
||||
std::unique_ptr<Server> server(builder.BuildAndStart());
|
||||
std::cout << "Server listening on " << server_address << std::endl;
|
||||
server->Wait();
|
||||
|
||||
@@ -21,6 +21,7 @@ fi
|
||||
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||
## This is an hack for now, but it should be fixed in the future.
|
||||
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
|
||||
cp -rfv llama.cpp/examples/llava/clip-impl.h llama.cpp/examples/grpc-server/clip-impl.h
|
||||
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
|
||||
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
|
||||
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
|
||||
|
||||
1
backend/cpp/llama/utils.hpp
vendored
@@ -61,6 +61,7 @@ struct task_server {
|
||||
json data;
|
||||
bool infill_mode = false;
|
||||
bool embedding_mode = false;
|
||||
bool reranking_mode = false;
|
||||
int multitask_id = -1;
|
||||
};
|
||||
|
||||
|
||||
@@ -8,6 +8,13 @@ ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
||||
# keep standard at C11 and C++11
|
||||
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
||||
|
||||
GOCMD?=go
|
||||
CGO_LDFLAGS?=
|
||||
# Avoid parent make file overwriting CGO_LDFLAGS which is needed for hipblas
|
||||
CGO_LDFLAGS_SYCL=
|
||||
GO_TAGS?=
|
||||
LD_FLAGS?=
|
||||
|
||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
@@ -21,7 +28,7 @@ else ifeq ($(BUILD_TYPE),openblas)
|
||||
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
else ifeq ($(BUILD_TYPE),clblas)
|
||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||
else ifeq ($(BUILD_TYPE),hipblas)
|
||||
CMAKE_ARGS+=-DGGML_HIP=ON
|
||||
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
||||
@@ -36,16 +43,35 @@ else ifeq ($(OS),Darwin)
|
||||
endif
|
||||
endif
|
||||
|
||||
# ifeq ($(BUILD_TYPE),sycl_f16)
|
||||
# CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON -DSD_SYCL=ON -DGGML_SYCL_F16=ON
|
||||
# endif
|
||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||
-DCMAKE_C_COMPILER=icx \
|
||||
-DCMAKE_CXX_COMPILER=icpx \
|
||||
-DSD_SYCL=ON \
|
||||
-DGGML_SYCL_F16=ON
|
||||
CC=icx
|
||||
CXX=icpx
|
||||
CGO_LDFLAGS_SYCL += -fsycl -L${DNNLROOT}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL
|
||||
CGO_LDFLAGS_SYCL += $(shell pkg-config --libs mkl-static-lp64-gomp)
|
||||
CGO_CXXFLAGS += -fiopenmp -fopenmp-targets=spir64
|
||||
CGO_CXXFLAGS += $(shell pkg-config --cflags mkl-static-lp64-gomp )
|
||||
endif
|
||||
|
||||
# ifeq ($(BUILD_TYPE),sycl_f32)
|
||||
# CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DSD_SYCL=ON
|
||||
# endif
|
||||
ifeq ($(BUILD_TYPE),sycl_f32)
|
||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||
-DCMAKE_C_COMPILER=icx \
|
||||
-DCMAKE_CXX_COMPILER=icpx \
|
||||
-DSD_SYCL=ON
|
||||
CC=icx
|
||||
CXX=icpx
|
||||
CGO_LDFLAGS_SYCL += -fsycl -L${DNNLROOT}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL
|
||||
CGO_LDFLAGS_SYCL += $(shell pkg-config --libs mkl-static-lp64-gomp)
|
||||
CGO_CXXFLAGS += -fiopenmp -fopenmp-targets=spir64
|
||||
CGO_CXXFLAGS += $(shell pkg-config --cflags mkl-static-lp64-gomp )
|
||||
endif
|
||||
|
||||
# warnings
|
||||
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
||||
# CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
||||
|
||||
# Find all .a archives in ARCHIVE_DIR
|
||||
# (ggml can have different backends cpu, cuda, etc., each backend generates a .a archive)
|
||||
@@ -86,11 +112,24 @@ endif
|
||||
$(MAKE) $(COMBINED_LIB)
|
||||
|
||||
gosd.o:
|
||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
+bash -c "source $(ONEAPI_VARS); \
|
||||
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c"
|
||||
else
|
||||
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c
|
||||
endif
|
||||
|
||||
libsd.a: gosd.o
|
||||
cp $(INCLUDE_PATH)/build/libstable-diffusion.a ./libsd.a
|
||||
$(AR) rcs libsd.a gosd.o
|
||||
|
||||
stablediffusion-ggml:
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_SYCL)" C_INCLUDE_PATH="$(INCLUDE_PATH)" LIBRARY_PATH="$(LIBRARY_PATH)" \
|
||||
CC="$(CC)" CXX="$(CXX)" CGO_CXXFLAGS="$(CGO_CXXFLAGS)" \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o ../../../../backend-assets/grpc/stablediffusion-ggml ./
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) ../../../../backend-assets/grpc/stablediffusion-ggml
|
||||
endif
|
||||
|
||||
clean:
|
||||
rm -rf gosd.o libsd.a build $(COMBINED_LIB)
|
||||
rm -rf gosd.o libsd.a build $(COMBINED_LIB)
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
.PHONY: autogptq
|
||||
autogptq: protogen
|
||||
bash install.sh
|
||||
|
||||
.PHONY: protogen
|
||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
.PHONY: protogen-clean
|
||||
protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
rm -rf venv __pycache__
|
||||
@@ -1,5 +0,0 @@
|
||||
# Creating a separate environment for the autogptq project
|
||||
|
||||
```
|
||||
make autogptq
|
||||
```
|
||||
@@ -1,153 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
from concurrent import futures
|
||||
import argparse
|
||||
import signal
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
import base64
|
||||
|
||||
import grpc
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
from auto_gptq import AutoGPTQForCausalLM
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||
from transformers import TextGenerationPipeline
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
|
||||
# Implement the BackendServicer class with the service methods
|
||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
def Health(self, request, context):
|
||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||
def LoadModel(self, request, context):
|
||||
try:
|
||||
device = "cuda:0"
|
||||
if request.Device != "":
|
||||
device = request.Device
|
||||
|
||||
# support loading local model files
|
||||
model_path = os.path.join(os.environ.get('MODELS_PATH', './'), request.Model)
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True, trust_remote_code=request.TrustRemoteCode)
|
||||
|
||||
# support model `Qwen/Qwen-VL-Chat-Int4`
|
||||
if "qwen-vl" in request.Model.lower():
|
||||
self.model_name = "Qwen-VL-Chat"
|
||||
model = AutoModelForCausalLM.from_pretrained(model_path,
|
||||
trust_remote_code=request.TrustRemoteCode,
|
||||
device_map="auto").eval()
|
||||
else:
|
||||
model = AutoGPTQForCausalLM.from_quantized(model_path,
|
||||
model_basename=request.ModelBaseName,
|
||||
use_safetensors=True,
|
||||
trust_remote_code=request.TrustRemoteCode,
|
||||
device=device,
|
||||
use_triton=request.UseTriton,
|
||||
quantize_config=None)
|
||||
|
||||
self.model = model
|
||||
self.tokenizer = tokenizer
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||
|
||||
def Predict(self, request, context):
|
||||
penalty = 1.0
|
||||
if request.Penalty != 0.0:
|
||||
penalty = request.Penalty
|
||||
tokens = 512
|
||||
if request.Tokens != 0:
|
||||
tokens = request.Tokens
|
||||
top_p = 0.95
|
||||
if request.TopP != 0.0:
|
||||
top_p = request.TopP
|
||||
|
||||
|
||||
prompt_images = self.recompile_vl_prompt(request)
|
||||
compiled_prompt = prompt_images[0]
|
||||
print(f"Prompt: {compiled_prompt}", file=sys.stderr)
|
||||
|
||||
# Implement Predict RPC
|
||||
pipeline = TextGenerationPipeline(
|
||||
model=self.model,
|
||||
tokenizer=self.tokenizer,
|
||||
max_new_tokens=tokens,
|
||||
temperature=request.Temperature,
|
||||
top_p=top_p,
|
||||
repetition_penalty=penalty,
|
||||
)
|
||||
t = pipeline(compiled_prompt)[0]["generated_text"]
|
||||
print(f"generated_text: {t}", file=sys.stderr)
|
||||
|
||||
if compiled_prompt in t:
|
||||
t = t.replace(compiled_prompt, "")
|
||||
# house keeping. Remove the image files from /tmp folder
|
||||
for img_path in prompt_images[1]:
|
||||
try:
|
||||
os.remove(img_path)
|
||||
except Exception as e:
|
||||
print(f"Error removing image file: {img_path}, {e}", file=sys.stderr)
|
||||
|
||||
return backend_pb2.Result(message=bytes(t, encoding='utf-8'))
|
||||
|
||||
def PredictStream(self, request, context):
|
||||
# Implement PredictStream RPC
|
||||
#for reply in some_data_generator():
|
||||
# yield reply
|
||||
# Not implemented yet
|
||||
return self.Predict(request, context)
|
||||
|
||||
def recompile_vl_prompt(self, request):
|
||||
prompt = request.Prompt
|
||||
image_paths = []
|
||||
|
||||
if "qwen-vl" in self.model_name.lower():
|
||||
# request.Images is an array which contains base64 encoded images. Iterate the request.Images array, decode and save each image to /tmp folder with a random filename.
|
||||
# Then, save the image file paths to an array "image_paths".
|
||||
# read "request.Prompt", replace "[img-%d]" with the image file paths in the order they appear in "image_paths". Save the new prompt to "prompt".
|
||||
for i, img in enumerate(request.Images):
|
||||
timestamp = str(int(time.time() * 1000)) # Generate timestamp
|
||||
img_path = f"/tmp/vl-{timestamp}.jpg" # Use timestamp in filename
|
||||
with open(img_path, "wb") as f:
|
||||
f.write(base64.b64decode(img))
|
||||
image_paths.append(img_path)
|
||||
prompt = prompt.replace(f"[img-{i}]", "<img>" + img_path + "</img>,")
|
||||
else:
|
||||
prompt = request.Prompt
|
||||
return (prompt, image_paths)
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
print("Server started. Listening on: " + address, file=sys.stderr)
|
||||
|
||||
# Define the signal handler function
|
||||
def signal_handler(sig, frame):
|
||||
print("Received termination signal. Shutting down...")
|
||||
server.stop(0)
|
||||
sys.exit(0)
|
||||
|
||||
# Set the signal handlers for SIGINT and SIGTERM
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
try:
|
||||
while True:
|
||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||
except KeyboardInterrupt:
|
||||
server.stop(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||
parser.add_argument(
|
||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
serve(args.addr)
|
||||
@@ -1,14 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
|
||||
installRequirements
|
||||
@@ -1,2 +0,0 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch==2.4.1+cu118
|
||||
@@ -1 +0,0 @@
|
||||
torch==2.4.1
|
||||
@@ -1,2 +0,0 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch==2.4.1+rocm6.0
|
||||
@@ -1,6 +0,0 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch==2.3.110+xpu
|
||||
torch==2.3.1+cxx11.abi
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
optimum[openvino]
|
||||
setuptools
|
||||
@@ -1,6 +0,0 @@
|
||||
accelerate
|
||||
auto-gptq==0.7.1
|
||||
grpcio==1.71.0
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -1,4 +0,0 @@
|
||||
#!/bin/bash
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
startBackend $@
|
||||
@@ -1,6 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
runUnittests
|
||||
@@ -61,7 +61,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
return backend_pb2.Result(success=True)
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||
options=[
|
||||
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||
])
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
|
||||
@@ -86,7 +86,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
return backend_pb2.Result(success=True)
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||
options=[
|
||||
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||
])
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
|
||||
@@ -19,7 +19,7 @@ import grpc
|
||||
|
||||
from diffusers import SanaPipeline, StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
|
||||
EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
|
||||
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
|
||||
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline, Lumina2Text2ImgPipeline
|
||||
from diffusers.pipelines.stable_diffusion import safety_checker
|
||||
from diffusers.utils import load_image, export_to_video
|
||||
from compel import Compel, ReturnedEmbeddingsType
|
||||
@@ -287,6 +287,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
|
||||
if request.LowVRAM:
|
||||
self.pipe.enable_model_cpu_offload()
|
||||
elif request.PipelineType == "Lumina2Text2ImgPipeline":
|
||||
self.pipe = Lumina2Text2ImgPipeline.from_pretrained(
|
||||
request.Model,
|
||||
torch_dtype=torch.bfloat16)
|
||||
if request.LowVRAM:
|
||||
self.pipe.enable_model_cpu_offload()
|
||||
elif request.PipelineType == "SanaPipeline":
|
||||
self.pipe = SanaPipeline.from_pretrained(
|
||||
request.Model,
|
||||
@@ -516,7 +522,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||
options=[
|
||||
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||
])
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
|
||||
@@ -105,7 +105,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||
options=[
|
||||
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||
])
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
|
||||
@@ -62,7 +62,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
return backend_pb2.TranscriptResult(segments=resultSegments, text=text)
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||
options=[
|
||||
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||
])
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
|
||||
@@ -99,7 +99,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
return backend_pb2.Result(success=True)
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||
options=[
|
||||
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||
])
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
|
||||
@@ -91,7 +91,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
return backend_pb2.RerankResult(usage=usage, results=results)
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||
options=[
|
||||
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||
])
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
|
||||
@@ -559,7 +559,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
|
||||
async def serve(address):
|
||||
# Start asyncio gRPC server
|
||||
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||
options=[
|
||||
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||
])
|
||||
# Add the servicer to the server
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
# Bind the server to the address
|
||||
|
||||
@@ -320,7 +320,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
|
||||
async def serve(address):
|
||||
# Start asyncio gRPC server
|
||||
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||
options=[
|
||||
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||
])
|
||||
# Add the servicer to the server
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
# Bind the server to the address
|
||||
|
||||
@@ -16,7 +16,7 @@ type Application struct {
|
||||
func newApplication(appConfig *config.ApplicationConfig) *Application {
|
||||
return &Application{
|
||||
backendLoader: config.NewBackendConfigLoader(appConfig.ModelPath),
|
||||
modelLoader: model.NewModelLoader(appConfig.ModelPath),
|
||||
modelLoader: model.NewModelLoader(appConfig.ModelPath, appConfig.SingleBackend),
|
||||
applicationConfig: appConfig,
|
||||
templatesEvaluator: templates.NewEvaluator(appConfig.ModelPath),
|
||||
}
|
||||
|
||||
@@ -143,7 +143,7 @@ func New(opts ...config.AppOption) (*Application, error) {
|
||||
}()
|
||||
}
|
||||
|
||||
if options.LoadToMemory != nil {
|
||||
if options.LoadToMemory != nil && !options.SingleBackend {
|
||||
for _, m := range options.LoadToMemory {
|
||||
cfg, err := application.BackendLoader().LoadBackendConfigFileByNameDefaultOptions(m, options)
|
||||
if err != nil {
|
||||
|
||||
@@ -17,6 +17,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendCo
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer loader.Close()
|
||||
|
||||
var fn func() ([]float32, error)
|
||||
switch model := inferenceModel.(type) {
|
||||
|
||||
@@ -16,6 +16,7 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer loader.Close()
|
||||
|
||||
fn := func() error {
|
||||
_, err := inferenceModel.GenerateImage(
|
||||
|
||||
@@ -53,6 +53,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer loader.Close()
|
||||
|
||||
var protoMessages []*proto.Message
|
||||
// if we are using the tokenizer template, we need to convert the messages to proto messages
|
||||
|
||||
@@ -40,10 +40,6 @@ func ModelOptions(c config.BackendConfig, so *config.ApplicationConfig, opts ...
|
||||
grpcOpts := grpcModelOpts(c)
|
||||
defOpts = append(defOpts, model.WithLoadGRPCLoadModelOpts(grpcOpts))
|
||||
|
||||
if so.SingleBackend {
|
||||
defOpts = append(defOpts, model.WithSingleActiveBackend())
|
||||
}
|
||||
|
||||
if so.ParallelBackendRequests {
|
||||
defOpts = append(defOpts, model.EnableParallelRequests)
|
||||
}
|
||||
@@ -121,7 +117,7 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||
triggers := make([]*pb.GrammarTrigger, 0)
|
||||
for _, t := range c.FunctionsConfig.GrammarConfig.GrammarTriggers {
|
||||
triggers = append(triggers, &pb.GrammarTrigger{
|
||||
Word: t.Word,
|
||||
Word: t.Word,
|
||||
})
|
||||
|
||||
}
|
||||
@@ -161,38 +157,33 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||
DisableLogStatus: c.DisableLogStatus,
|
||||
DType: c.DType,
|
||||
// LimitMMPerPrompt vLLM
|
||||
LimitImagePerPrompt: int32(c.LimitMMPerPrompt.LimitImagePerPrompt),
|
||||
LimitVideoPerPrompt: int32(c.LimitMMPerPrompt.LimitVideoPerPrompt),
|
||||
LimitAudioPerPrompt: int32(c.LimitMMPerPrompt.LimitAudioPerPrompt),
|
||||
MMProj: c.MMProj,
|
||||
FlashAttention: c.FlashAttention,
|
||||
CacheTypeKey: c.CacheTypeK,
|
||||
CacheTypeValue: c.CacheTypeV,
|
||||
NoKVOffload: c.NoKVOffloading,
|
||||
YarnExtFactor: c.YarnExtFactor,
|
||||
YarnAttnFactor: c.YarnAttnFactor,
|
||||
YarnBetaFast: c.YarnBetaFast,
|
||||
YarnBetaSlow: c.YarnBetaSlow,
|
||||
NGQA: c.NGQA,
|
||||
RMSNormEps: c.RMSNormEps,
|
||||
MLock: mmlock,
|
||||
RopeFreqBase: c.RopeFreqBase,
|
||||
RopeScaling: c.RopeScaling,
|
||||
Type: c.ModelType,
|
||||
RopeFreqScale: c.RopeFreqScale,
|
||||
NUMA: c.NUMA,
|
||||
Embeddings: embeddings,
|
||||
LowVRAM: lowVRAM,
|
||||
NGPULayers: int32(nGPULayers),
|
||||
MMap: mmap,
|
||||
MainGPU: c.MainGPU,
|
||||
Threads: int32(*c.Threads),
|
||||
TensorSplit: c.TensorSplit,
|
||||
// AutoGPTQ
|
||||
ModelBaseName: c.AutoGPTQ.ModelBaseName,
|
||||
Device: c.AutoGPTQ.Device,
|
||||
UseTriton: c.AutoGPTQ.Triton,
|
||||
UseFastTokenizer: c.AutoGPTQ.UseFastTokenizer,
|
||||
LimitImagePerPrompt: int32(c.LimitMMPerPrompt.LimitImagePerPrompt),
|
||||
LimitVideoPerPrompt: int32(c.LimitMMPerPrompt.LimitVideoPerPrompt),
|
||||
LimitAudioPerPrompt: int32(c.LimitMMPerPrompt.LimitAudioPerPrompt),
|
||||
MMProj: c.MMProj,
|
||||
FlashAttention: c.FlashAttention,
|
||||
CacheTypeKey: c.CacheTypeK,
|
||||
CacheTypeValue: c.CacheTypeV,
|
||||
NoKVOffload: c.NoKVOffloading,
|
||||
YarnExtFactor: c.YarnExtFactor,
|
||||
YarnAttnFactor: c.YarnAttnFactor,
|
||||
YarnBetaFast: c.YarnBetaFast,
|
||||
YarnBetaSlow: c.YarnBetaSlow,
|
||||
NGQA: c.NGQA,
|
||||
RMSNormEps: c.RMSNormEps,
|
||||
MLock: mmlock,
|
||||
RopeFreqBase: c.RopeFreqBase,
|
||||
RopeScaling: c.RopeScaling,
|
||||
Type: c.ModelType,
|
||||
RopeFreqScale: c.RopeFreqScale,
|
||||
NUMA: c.NUMA,
|
||||
Embeddings: embeddings,
|
||||
LowVRAM: lowVRAM,
|
||||
NGPULayers: int32(nGPULayers),
|
||||
MMap: mmap,
|
||||
MainGPU: c.MainGPU,
|
||||
Threads: int32(*c.Threads),
|
||||
TensorSplit: c.TensorSplit,
|
||||
// RWKV
|
||||
Tokenizer: c.Tokenizer,
|
||||
}
|
||||
|
||||
@@ -12,10 +12,10 @@ import (
|
||||
func Rerank(request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
|
||||
opts := ModelOptions(backendConfig, appConfig)
|
||||
rerankModel, err := loader.Load(opts...)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer loader.Close()
|
||||
|
||||
if rerankModel == nil {
|
||||
return nil, fmt.Errorf("could not load rerank model")
|
||||
|
||||
@@ -26,10 +26,10 @@ func SoundGeneration(
|
||||
|
||||
opts := ModelOptions(backendConfig, appConfig)
|
||||
soundGenModel, err := loader.Load(opts...)
|
||||
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
defer loader.Close()
|
||||
|
||||
if soundGenModel == nil {
|
||||
return "", nil, fmt.Errorf("could not load sound generation model")
|
||||
|
||||
@@ -20,6 +20,7 @@ func TokenMetrics(
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer loader.Close()
|
||||
|
||||
if model == nil {
|
||||
return nil, fmt.Errorf("could not loadmodel model")
|
||||
|
||||
@@ -14,10 +14,10 @@ func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.Bac
|
||||
|
||||
opts := ModelOptions(backendConfig, appConfig)
|
||||
inferenceModel, err = loader.Load(opts...)
|
||||
|
||||
if err != nil {
|
||||
return schema.TokenizeResponse{}, err
|
||||
}
|
||||
defer loader.Close()
|
||||
|
||||
predictOptions := gRPCPredictOpts(backendConfig, loader.ModelPath)
|
||||
predictOptions.Prompt = s
|
||||
|
||||
@@ -24,6 +24,7 @@ func ModelTranscription(audio, language string, translate bool, ml *model.ModelL
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer ml.Close()
|
||||
|
||||
if transcriptionModel == nil {
|
||||
return nil, fmt.Errorf("could not load transcription model")
|
||||
|
||||
@@ -23,10 +23,10 @@ func ModelTTS(
|
||||
) (string, *proto.Result, error) {
|
||||
opts := ModelOptions(backendConfig, appConfig, model.WithDefaultBackendString(model.PiperBackend))
|
||||
ttsModel, err := loader.Load(opts...)
|
||||
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
defer loader.Close()
|
||||
|
||||
if ttsModel == nil {
|
||||
return "", nil, fmt.Errorf("could not load tts model %q", backendConfig.Model)
|
||||
|
||||
@@ -19,6 +19,8 @@ func VAD(request *schema.VADRequest,
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer ml.Close()
|
||||
|
||||
req := proto.VADRequest{
|
||||
Audio: request.Audio,
|
||||
}
|
||||
|
||||
@@ -74,7 +74,7 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
|
||||
AssetsDestination: t.BackendAssetsPath,
|
||||
ExternalGRPCBackends: externalBackends,
|
||||
}
|
||||
ml := model.NewModelLoader(opts.ModelPath)
|
||||
ml := model.NewModelLoader(opts.ModelPath, opts.SingleBackend)
|
||||
|
||||
defer func() {
|
||||
err := ml.StopAllGRPC()
|
||||
|
||||
@@ -32,7 +32,7 @@ func (t *TranscriptCMD) Run(ctx *cliContext.Context) error {
|
||||
}
|
||||
|
||||
cl := config.NewBackendConfigLoader(t.ModelsPath)
|
||||
ml := model.NewModelLoader(opts.ModelPath)
|
||||
ml := model.NewModelLoader(opts.ModelPath, opts.SingleBackend)
|
||||
if err := cl.LoadBackendConfigsFromPath(t.ModelsPath); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -41,7 +41,7 @@ func (t *TTSCMD) Run(ctx *cliContext.Context) error {
|
||||
AudioDir: outputDir,
|
||||
AssetsDestination: t.BackendAssetsPath,
|
||||
}
|
||||
ml := model.NewModelLoader(opts.ModelPath)
|
||||
ml := model.NewModelLoader(opts.ModelPath, opts.SingleBackend)
|
||||
|
||||
defer func() {
|
||||
err := ml.StopAllGRPC()
|
||||
|
||||
@@ -50,9 +50,6 @@ type BackendConfig struct {
|
||||
// LLM configs (GPT4ALL, Llama.cpp, ...)
|
||||
LLMConfig `yaml:",inline"`
|
||||
|
||||
// AutoGPTQ specifics
|
||||
AutoGPTQ AutoGPTQ `yaml:"autogptq"`
|
||||
|
||||
// Diffusers
|
||||
Diffusers Diffusers `yaml:"diffusers"`
|
||||
Step int `yaml:"step"`
|
||||
@@ -176,14 +173,6 @@ type LimitMMPerPrompt struct {
|
||||
LimitAudioPerPrompt int `yaml:"audio"`
|
||||
}
|
||||
|
||||
// AutoGPTQ is a struct that holds the configuration specific to the AutoGPTQ backend
|
||||
type AutoGPTQ struct {
|
||||
ModelBaseName string `yaml:"model_base_name"`
|
||||
Device string `yaml:"device"`
|
||||
Triton bool `yaml:"triton"`
|
||||
UseFastTokenizer bool `yaml:"use_fast_tokenizer"`
|
||||
}
|
||||
|
||||
// TemplateConfig is a struct that holds the configuration of the templating system
|
||||
type TemplateConfig struct {
|
||||
// Chat is the template used in the chat completion endpoint
|
||||
@@ -555,7 +544,7 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
|
||||
}
|
||||
}
|
||||
if (u & FLAG_TTS) == FLAG_TTS {
|
||||
ttsBackends := []string{"piper", "transformers-musicgen", "parler-tts"}
|
||||
ttsBackends := []string{"bark-cpp", "parler-tts", "piper", "transformers-musicgen"}
|
||||
if !slices.Contains(ttsBackends, c.Backend) {
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -142,9 +142,9 @@ func API(application *application.Application) (*fiber.App, error) {
|
||||
httpFS := http.FS(embedDirStatic)
|
||||
|
||||
router.Use(favicon.New(favicon.Config{
|
||||
URL: "/favicon.ico",
|
||||
URL: "/favicon.svg",
|
||||
FileSystem: httpFS,
|
||||
File: "static/favicon.ico",
|
||||
File: "static/favicon.svg",
|
||||
}))
|
||||
|
||||
router.Use("/static", filesystem.New(filesystem.Config{
|
||||
|
||||
@@ -122,15 +122,15 @@ func modelModal(m *gallery.GalleryModel) elem.Node {
|
||||
"id": modalName(m),
|
||||
"tabindex": "-1",
|
||||
"aria-hidden": "true",
|
||||
"class": "hidden overflow-y-auto overflow-x-hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-[calc(100%-1rem)] max-h-full",
|
||||
"class": "hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-full max-h-full bg-gray-900/50",
|
||||
},
|
||||
elem.Div(
|
||||
attrs.Props{
|
||||
"class": "relative p-4 w-full max-w-2xl max-h-full",
|
||||
"class": "relative p-4 w-full max-w-2xl h-[90vh] mx-auto mt-[5vh]",
|
||||
},
|
||||
elem.Div(
|
||||
attrs.Props{
|
||||
"class": "relative p-4 w-full max-w-2xl max-h-full bg-white rounded-lg shadow dark:bg-gray-700",
|
||||
"class": "relative bg-white rounded-lg shadow dark:bg-gray-700 h-full flex flex-col",
|
||||
},
|
||||
// header
|
||||
elem.Div(
|
||||
@@ -164,14 +164,13 @@ func modelModal(m *gallery.GalleryModel) elem.Node {
|
||||
// body
|
||||
elem.Div(
|
||||
attrs.Props{
|
||||
"class": "p-4 md:p-5 space-y-4",
|
||||
"class": "p-4 md:p-5 space-y-4 overflow-y-auto flex-1 min-h-0",
|
||||
},
|
||||
elem.Div(
|
||||
attrs.Props{
|
||||
"class": "flex justify-center items-center",
|
||||
},
|
||||
elem.Img(attrs.Props{
|
||||
// "class": "rounded-t-lg object-fit object-center h-96",
|
||||
"class": "lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3 entered loaded",
|
||||
"src": m.Icon,
|
||||
"loading": "lazy",
|
||||
@@ -232,7 +231,6 @@ func modelModal(m *gallery.GalleryModel) elem.Node {
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
func modelDescription(m *gallery.GalleryModel) elem.Node {
|
||||
|
||||
@@ -21,6 +21,7 @@ func StoresSetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfi
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer sl.Close()
|
||||
|
||||
vals := make([][]byte, len(input.Values))
|
||||
for i, v := range input.Values {
|
||||
@@ -48,6 +49,7 @@ func StoresDeleteEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationCo
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer sl.Close()
|
||||
|
||||
if err := store.DeleteCols(c.Context(), sb, input.Keys); err != nil {
|
||||
return err
|
||||
@@ -69,6 +71,7 @@ func StoresGetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfi
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer sl.Close()
|
||||
|
||||
keys, vals, err := store.GetCols(c.Context(), sb, input.Keys)
|
||||
if err != nil {
|
||||
@@ -100,6 +103,7 @@ func StoresFindEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConf
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer sl.Close()
|
||||
|
||||
keys, vals, similarities, err := store.Find(c.Context(), sb, input.Key, input.Topk)
|
||||
if err != nil {
|
||||
|
||||
@@ -40,7 +40,7 @@ func TestAssistantEndpoints(t *testing.T) {
|
||||
cl := &config.BackendConfigLoader{}
|
||||
//configsDir := "/tmp/localai/configs"
|
||||
modelPath := "/tmp/localai/model"
|
||||
var ml = model.NewModelLoader(modelPath)
|
||||
var ml = model.NewModelLoader(modelPath, false)
|
||||
|
||||
appConfig := &config.ApplicationConfig{
|
||||
ConfigsDir: configsDir,
|
||||
|
||||
@@ -29,9 +29,9 @@ func Explorer(db *explorer.Database) *fiber.App {
|
||||
httpFS := http.FS(embedDirStatic)
|
||||
|
||||
app.Use(favicon.New(favicon.Config{
|
||||
URL: "/favicon.ico",
|
||||
URL: "/favicon.svg",
|
||||
FileSystem: httpFS,
|
||||
File: "static/favicon.ico",
|
||||
File: "static/favicon.svg",
|
||||
}))
|
||||
|
||||
app.Use("/static", filesystem.New(filesystem.Config{
|
||||
|
||||
@@ -203,18 +203,10 @@ func mergeOpenAIRequestAndBackendConfig(config *config.BackendConfig, input *sch
|
||||
config.Diffusers.ClipSkip = input.ClipSkip
|
||||
}
|
||||
|
||||
if input.ModelBaseName != "" {
|
||||
config.AutoGPTQ.ModelBaseName = input.ModelBaseName
|
||||
}
|
||||
|
||||
if input.NegativePromptScale != 0 {
|
||||
config.NegativePromptScale = input.NegativePromptScale
|
||||
}
|
||||
|
||||
if input.UseFastTokenizer {
|
||||
config.UseFastTokenizer = input.UseFastTokenizer
|
||||
}
|
||||
|
||||
if input.NegativePrompt != "" {
|
||||
config.NegativePrompt = input.NegativePrompt
|
||||
}
|
||||
|
||||
@@ -50,11 +50,10 @@ func RegisterLocalAIRoutes(router *fiber.App,
|
||||
router.Post("/v1/vad", vadChain...)
|
||||
|
||||
// Stores
|
||||
sl := model.NewModelLoader("")
|
||||
router.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig))
|
||||
router.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig))
|
||||
router.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig))
|
||||
router.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig))
|
||||
router.Post("/stores/set", localai.StoresSetEndpoint(ml, appConfig))
|
||||
router.Post("/stores/delete", localai.StoresDeleteEndpoint(ml, appConfig))
|
||||
router.Post("/stores/get", localai.StoresGetEndpoint(ml, appConfig))
|
||||
router.Post("/stores/find", localai.StoresFindEndpoint(ml, appConfig))
|
||||
|
||||
if !appConfig.DisableMetrics {
|
||||
router.Get("/metrics", localai.LocalAIMetricsEndpoint())
|
||||
|
||||
|
Before Width: | Height: | Size: 15 KiB |
171
core/http/static/favicon.svg
Normal file
|
After Width: | Height: | Size: 108 KiB |
BIN
core/http/static/logo.png
Normal file
|
After Width: | Height: | Size: 893 KiB |
BIN
core/http/static/logo_horizontal.png
Normal file
|
After Width: | Height: | Size: 930 KiB |
@@ -115,6 +115,7 @@ async function sendTextToChatGPT(text) {
|
||||
|
||||
const response = await fetch('v1/chat/completions', {
|
||||
method: 'POST',
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
model: getModel(),
|
||||
messages: conversationHistory
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
<div class="max-w-md w-full bg-gray-800/90 border border-gray-700/50 rounded-xl overflow-hidden shadow-xl">
|
||||
<div class="animation-container">
|
||||
<div class="text-overlay">
|
||||
<!-- <i class="fas fa-circle-nodes text-5xl text-blue-400 mb-2"></i> -->
|
||||
<img src="static/logo.png" alt="LocalAI Logo" class="h-32">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>{{.Title}}</title>
|
||||
<base href="{{.BaseURL}}" />
|
||||
<link rel="icon" type="image/x-icon" href="favicon.ico" />
|
||||
<link rel="shortcut icon" href="static/favicon.svg" type="image/svg">
|
||||
<link rel="stylesheet" href="static/assets/highlightjs.css" />
|
||||
<script defer src="static/assets/highlightjs.js"></script>
|
||||
<script defer src="static/assets/alpine.js"></script>
|
||||
|
||||
@@ -4,10 +4,9 @@
|
||||
<div class="flex items-center">
|
||||
<!-- Logo Image -->
|
||||
<a href="./" class="flex items-center group">
|
||||
<img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd"
|
||||
<img src="static/logo_horizontal.png"
|
||||
alt="LocalAI Logo"
|
||||
class="h-10 mr-3 rounded-lg border border-blue-600/30 shadow-md transition-all duration-300 group-hover:shadow-blue-500/20 group-hover:border-blue-500/50">
|
||||
<span class="text-white text-xl font-bold bg-clip-text text-transparent bg-gradient-to-r from-blue-400 to-indigo-400">LocalAI</span>
|
||||
class="h-14 mr-3 brightness-110 transition-all duration-300 group-hover:brightness-125">
|
||||
</a>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -4,10 +4,9 @@
|
||||
<div class="flex items-center">
|
||||
<!-- Logo Image -->
|
||||
<a href="./" class="flex items-center group">
|
||||
<img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd"
|
||||
<img src="static/logo_horizontal.png"
|
||||
alt="LocalAI Logo"
|
||||
class="h-10 mr-3 rounded-lg border border-blue-600/30 shadow-md transition-all duration-300 group-hover:shadow-blue-500/20 group-hover:border-blue-500/50">
|
||||
<span class="text-white text-xl font-bold bg-clip-text text-transparent bg-gradient-to-r from-blue-400 to-indigo-400">LocalAI</span>
|
||||
</a>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -202,7 +202,6 @@ type OpenAIRequest struct {
|
||||
|
||||
Backend string `json:"backend" yaml:"backend"`
|
||||
|
||||
// AutoGPTQ
|
||||
ModelBaseName string `json:"model_base_name" yaml:"model_base_name"`
|
||||
}
|
||||
|
||||
|
||||
@@ -41,8 +41,6 @@ type PredictionOptions struct {
|
||||
RopeFreqBase float32 `json:"rope_freq_base" yaml:"rope_freq_base"`
|
||||
RopeFreqScale float32 `json:"rope_freq_scale" yaml:"rope_freq_scale"`
|
||||
NegativePromptScale float32 `json:"negative_prompt_scale" yaml:"negative_prompt_scale"`
|
||||
// AutoGPTQ
|
||||
UseFastTokenizer bool `json:"use_fast_tokenizer" yaml:"use_fast_tokenizer"`
|
||||
|
||||
// Diffusers
|
||||
ClipSkip int `json:"clip_skip" yaml:"clip_skip"`
|
||||
|
||||
BIN
docs/assets/images/imagen.png
Normal file
|
After Width: | Height: | Size: 506 KiB |
BIN
docs/assets/images/localai_screenshot.png
Normal file
|
After Width: | Height: | Size: 170 KiB |
BIN
docs/assets/images/logos/logo.png
Normal file
|
After Width: | Height: | Size: 893 KiB |
171
docs/assets/images/logos/logo.svg
Normal file
|
After Width: | Height: | Size: 108 KiB |
BIN
docs/assets/images/screenshots/screenshot_chat.png
Normal file
|
After Width: | Height: | Size: 132 KiB |
BIN
docs/assets/images/screenshots/screenshot_gallery.png
Normal file
|
After Width: | Height: | Size: 284 KiB |
BIN
docs/assets/images/screenshots/screenshot_home.png
Normal file
|
After Width: | Height: | Size: 287 KiB |
BIN
docs/assets/images/screenshots/screenshot_image.png
Normal file
|
After Width: | Height: | Size: 506 KiB |
BIN
docs/assets/images/screenshots/screenshot_login.png
Normal file
|
After Width: | Height: | Size: 225 KiB |
BIN
docs/assets/images/screenshots/screenshot_p2p.png
Normal file
|
After Width: | Height: | Size: 418 KiB |
BIN
docs/assets/images/screenshots/screenshot_talk.png
Normal file
|
After Width: | Height: | Size: 246 KiB |
BIN
docs/assets/images/screenshots/screenshot_tts.png
Normal file
|
After Width: | Height: | Size: 213 KiB |
@@ -3,7 +3,7 @@
|
||||
"baseUrl": ".",
|
||||
"paths": {
|
||||
"*": [
|
||||
"../../../../.cache/hugo_cache/modules/filecache/modules/pkg/mod/github.com/gohugoio/hugo-mod-jslibs-dist/popperjs/v2@v2.21100.20000/package/dist/cjs/popper.js/*",
|
||||
"../../../../.cache/hugo_cache/modules/filecache/modules/pkg/mod/github.com/gohugoio/hugo-mod-jslibs-dist/popperjs/v2@v2.21100.20000/package/dist/cjs/*",
|
||||
"../../../../.cache/hugo_cache/modules/filecache/modules/pkg/mod/github.com/twbs/bootstrap@v5.3.2+incompatible/js/*"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -48,9 +48,9 @@ defaultContentLanguage = 'en'
|
||||
|
||||
[params.docs] # Parameters for the /docs 'template'
|
||||
|
||||
logo = "https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd"
|
||||
logo_text = "LocalAI"
|
||||
title = "LocalAI documentation" # default html title for documentation pages/sections
|
||||
logo = "https://raw.githubusercontent.com/mudler/LocalAI/refs/heads/master/core/http/static/logo.png"
|
||||
logo_text = ""
|
||||
title = "LocalAI" # default html title for documentation pages/sections
|
||||
|
||||
pathName = "docs" # path name for documentation site | default "docs"
|
||||
|
||||
@@ -108,6 +108,7 @@ defaultContentLanguage = 'en'
|
||||
# indexName = "" # Index Name to perform search on (or set env variable HUGO_PARAM_DOCSEARCH_indexName)
|
||||
|
||||
[params.analytics] # Parameters for Analytics (Google, Plausible)
|
||||
# google = "G-XXXXXXXXXX" # Replace with your Google Analytics ID
|
||||
# plausibleURL = "/docs/s" # (or set via env variable HUGO_PARAM_ANALYTICS_plausibleURL)
|
||||
# plausibleAPI = "/docs/s" # optional - (or set via env variable HUGO_PARAM_ANALYTICS_plausibleAPI)
|
||||
# plausibleDomain = "" # (or set via env variable HUGO_PARAM_ANALYTICS_plausibleDomain)
|
||||
@@ -151,7 +152,7 @@ defaultContentLanguage = 'en'
|
||||
|
||||
[languages]
|
||||
[languages.en]
|
||||
title = "LocalAI documentation"
|
||||
title = "LocalAI"
|
||||
languageName = "English"
|
||||
weight = 10
|
||||
# [languages.fr]
|
||||
|
||||
@@ -268,14 +268,6 @@ yarn_ext_factor: 0
|
||||
yarn_attn_factor: 0
|
||||
yarn_beta_fast: 0
|
||||
yarn_beta_slow: 0
|
||||
|
||||
# AutoGPT-Q settings, for configurations specific to GPT models.
|
||||
autogptq:
|
||||
model_base_name: "" # Base name of the model.
|
||||
device: "" # Device to run the model on.
|
||||
triton: false # Whether to use Triton Inference Server.
|
||||
use_fast_tokenizer: false # Whether to use a fast tokenizer for quicker processing.
|
||||
|
||||
# configuration for diffusers model
|
||||
diffusers:
|
||||
cuda: false # Whether to use CUDA
|
||||
|
||||
@@ -147,7 +147,6 @@ The devices in the following list have been tested with `hipblas` images running
|
||||
| diffusers | yes | Radeon VII (gfx906) |
|
||||
| piper | yes | Radeon VII (gfx906) |
|
||||
| whisper | no | none |
|
||||
| autogptq | no | none |
|
||||
| bark | no | none |
|
||||
| coqui | no | none |
|
||||
| transformers | no | none |
|
||||
|
||||
@@ -13,6 +13,8 @@ LocalAI supports two modes of distributed inferencing via p2p:
|
||||
- **Federated Mode**: Requests are shared between the cluster and routed to a single worker node in the network based on the load balancer's decision.
|
||||
- **Worker Mode** (aka "model sharding" or "splitting weights"): Requests are processed by all the workers which contributes to the final inference result (by sharing the model weights).
|
||||
|
||||
A list of global instances shared by the community is available at [explorer.localai.io](https://explorer.localai.io).
|
||||
|
||||
## Usage
|
||||
|
||||
Starting LocalAI with `--p2p` generates a shared token for connecting multiple instances: and that's all you need to create AI clusters, eliminating the need for intricate network setups.
|
||||
|
||||
@@ -74,49 +74,9 @@ curl http://localhost:8080/v1/models
|
||||
|
||||
## Backends
|
||||
|
||||
### AutoGPTQ
|
||||
|
||||
[AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ) is an easy-to-use LLMs quantization package with user-friendly apis, based on GPTQ algorithm.
|
||||
|
||||
#### Prerequisites
|
||||
|
||||
This is an extra backend - in the container images is already available and there is nothing to do for the setup.
|
||||
|
||||
If you are building LocalAI locally, you need to install [AutoGPTQ manually](https://github.com/PanQiWei/AutoGPTQ#quick-installation).
|
||||
|
||||
|
||||
#### Model setup
|
||||
|
||||
The models are automatically downloaded from `huggingface` if not present the first time. It is possible to define models via `YAML` config file, or just by querying the endpoint with the `huggingface` repository model name. For example, create a `YAML` config file in `models/`:
|
||||
|
||||
```
|
||||
name: orca
|
||||
backend: autogptq
|
||||
model_base_name: "orca_mini_v2_13b-GPTQ-4bit-128g.no-act.order"
|
||||
parameters:
|
||||
model: "TheBloke/orca_mini_v2_13b-GPTQ"
|
||||
# ...
|
||||
```
|
||||
|
||||
Test with:
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "orca",
|
||||
"messages": [{"role": "user", "content": "How are you?"}],
|
||||
"temperature": 0.1
|
||||
}'
|
||||
```
|
||||
### RWKV
|
||||
|
||||
A full example on how to run a rwkv model is in the [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv).
|
||||
|
||||
Note: rwkv models needs to specify the backend `rwkv` in the YAML config files and have an associated tokenizer along that needs to be provided with it:
|
||||
|
||||
```
|
||||
36464540 -rw-r--r-- 1 mudler mudler 1.2G May 3 10:51 rwkv_small
|
||||
36464543 -rw-r--r-- 1 mudler mudler 2.4M May 3 10:51 rwkv_small.tokenizer.json
|
||||
```
|
||||
RWKV support is available through llama.cpp (see below)
|
||||
|
||||
### llama.cpp
|
||||
|
||||
|
||||
@@ -14,18 +14,47 @@ icon = "rocket_launch"
|
||||
|
||||
If you are exposing LocalAI remotely, make sure you protect the API endpoints adequately with a mechanism which allows to protect from the incoming traffic or alternatively, run LocalAI with `API_KEY` to gate the access with an API key. The API key guarantees a total access to the features (there is no role separation), and it is to be considered as likely as an admin role.
|
||||
|
||||
To access the WebUI with an API_KEY, browser extensions such as [Requestly](https://requestly.com/) can be used (see also https://github.com/mudler/LocalAI/issues/2227#issuecomment-2093333752). See also [API flags]({{% relref "docs/advanced/advanced-usage#api-flags" %}}) for the flags / options available when starting LocalAI.
|
||||
|
||||
{{% /alert %}}
|
||||
|
||||
## Using the Bash Installer
|
||||
## Quickstart
|
||||
|
||||
Install LocalAI easily using the bash installer with the following command:
|
||||
|
||||
```sh
|
||||
### Using the Bash Installer
|
||||
```bash
|
||||
curl https://localai.io/install.sh | sh
|
||||
```
|
||||
|
||||
### Run with docker:
|
||||
```bash
|
||||
# CPU only image:
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-cpu
|
||||
|
||||
# Nvidia GPU:
|
||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
|
||||
|
||||
# CPU and GPU image (bigger size):
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
|
||||
|
||||
# AIO images (it will pre-download a set of models ready for use, see https://localai.io/basics/container/)
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
||||
```
|
||||
|
||||
### Load models:
|
||||
|
||||
```bash
|
||||
# From the model gallery (see available models with `local-ai models list`, in the WebUI from the model tab, or visiting https://models.localai.io)
|
||||
local-ai run llama-3.2-1b-instruct:q4_k_m
|
||||
# Start LocalAI with the phi-2 model directly from huggingface
|
||||
local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
|
||||
# Install and run a model from the Ollama OCI registry
|
||||
local-ai run ollama://gemma:2b
|
||||
# Run a model from a configuration file
|
||||
local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
|
||||
# Install and run a model from a standard OCI registry (e.g., Docker Hub)
|
||||
local-ai run oci://localai/phi-2:latest
|
||||
```
|
||||
|
||||
|
||||
For a full list of options, refer to the [Installer Options]({{% relref "docs/advanced/installer" %}}) documentation.
|
||||
|
||||
Binaries can also be [manually downloaded]({{% relref "docs/reference/binaries" %}}).
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
|
||||
+++
|
||||
title = "Overview"
|
||||
weight = 1
|
||||
@@ -7,162 +6,96 @@ description = "What is LocalAI?"
|
||||
tags = ["Beginners"]
|
||||
categories = [""]
|
||||
author = "Ettore Di Giacinto"
|
||||
# This allows to overwrite the landing page
|
||||
url = '/'
|
||||
icon = "info"
|
||||
+++
|
||||
|
||||
<p align="center">
|
||||
<a href="https://localai.io"><img width=512 src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd"></a>
|
||||
</p >
|
||||
# Welcome to LocalAI
|
||||
|
||||
<p align="center">
|
||||
<a href="https://github.com/go-skynet/LocalAI/fork" target="blank">
|
||||
<img src="https://img.shields.io/github/forks/go-skynet/LocalAI?style=for-the-badge" alt="LocalAI forks"/>
|
||||
</a>
|
||||
<a href="https://github.com/go-skynet/LocalAI/stargazers" target="blank">
|
||||
<img src="https://img.shields.io/github/stars/go-skynet/LocalAI?style=for-the-badge" alt="LocalAI stars"/>
|
||||
</a>
|
||||
<a href="https://github.com/go-skynet/LocalAI/pulls" target="blank">
|
||||
<img src="https://img.shields.io/github/issues-pr/go-skynet/LocalAI?style=for-the-badge" alt="LocalAI pull-requests"/>
|
||||
</a>
|
||||
<a href='https://github.com/go-skynet/LocalAI/releases'>
|
||||
<img src='https://img.shields.io/github/release/go-skynet/LocalAI?&label=Latest&style=for-the-badge'>
|
||||
</a>
|
||||
</p>
|
||||
LocalAI is your complete AI stack for running AI models locally. It's designed to be simple, efficient, and accessible, providing a drop-in replacement for OpenAI's API while keeping your data private and secure.
|
||||
|
||||
<p align="center">
|
||||
<a href="https://hub.docker.com/r/localai/localai" target="blank">
|
||||
<img src="https://img.shields.io/badge/dockerhub-images-important.svg?logo=Docker" alt="LocalAI Docker hub"/>
|
||||
</a>
|
||||
<a href="https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest" target="blank">
|
||||
<img src="https://img.shields.io/badge/quay.io-images-important.svg?" alt="LocalAI Quay.io"/>
|
||||
</a>
|
||||
</p>
|
||||
## Why LocalAI?
|
||||
|
||||
<p align="center">
|
||||
<a href="https://trendshift.io/repositories/5539" target="_blank"><img src="https://trendshift.io/api/badge/repositories/5539" alt="mudler%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
||||
</p>
|
||||
In today's AI landscape, privacy, control, and flexibility are paramount. LocalAI addresses these needs by:
|
||||
|
||||
<p align="center">
|
||||
<a href="https://twitter.com/LocalAI_API" target="blank">
|
||||
<img src="https://img.shields.io/twitter/follow/LocalAI_API?label=Follow: LocalAI_API&style=social" alt="Follow LocalAI_API"/>
|
||||
</a>
|
||||
<a href="https://discord.gg/uJAeKSAGDy" target="blank">
|
||||
<img src="https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted" alt="Join LocalAI Discord Community"/>
|
||||
</a>
|
||||
</p>
|
||||
- **Privacy First**: Your data never leaves your machine
|
||||
- **Complete Control**: Run models on your terms, with your hardware
|
||||
- **Open Source**: MIT licensed and community-driven
|
||||
- **Flexible Deployment**: From laptops to servers, with or without GPUs
|
||||
- **Extensible**: Add new models and features as needed
|
||||
|
||||
> 💡 Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [💭Discord](https://discord.gg/uJAeKSAGDy)
|
||||
>
|
||||
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/go-skynet/LocalAI/tree/master/examples/)
|
||||
## Core Components
|
||||
|
||||
LocalAI is more than just a single tool - it's a complete ecosystem:
|
||||
|
||||
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families and architectures. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
|
||||
1. **[LocalAI Core](https://github.com/mudler/LocalAI)**
|
||||
- OpenAI-compatible API
|
||||
- Multiple model support (LLMs, image, audio)
|
||||
- No GPU required
|
||||
- Fast inference with native bindings
|
||||
- [Github repository](https://github.com/mudler/LocalAI)
|
||||
|
||||
2. **[LocalAGI](https://github.com/mudler/LocalAGI)**
|
||||
- Autonomous AI agents
|
||||
- No coding required
|
||||
- WebUI and REST API support
|
||||
- Extensible agent framework
|
||||
- [Github repository](https://github.com/mudler/LocalAGI)
|
||||
|
||||
## Start LocalAI
|
||||
3. **[LocalRecall](https://github.com/mudler/LocalRecall)**
|
||||
- Semantic search
|
||||
- Memory management
|
||||
- Vector database
|
||||
- Perfect for AI applications
|
||||
- [Github repository](https://github.com/mudler/LocalRecall)
|
||||
|
||||
Start the image with Docker to have a functional clone of OpenAI! 🚀:
|
||||
## Getting Started
|
||||
|
||||
```bash
|
||||
docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu
|
||||
# Do you have a Nvidia GPUs? Use this instead
|
||||
# CUDA 11
|
||||
# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-nvidia-cuda-11
|
||||
# CUDA 12
|
||||
# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-nvidia-cuda-12
|
||||
```
|
||||
|
||||
Or just use the bash installer:
|
||||
The fastest way to get started is with our one-line installer:
|
||||
|
||||
```bash
|
||||
curl https://localai.io/install.sh | sh
|
||||
```
|
||||
|
||||
See the [💻 Quickstart](https://localai.io/basics/getting_started/) for all the options and way you can run LocalAI!
|
||||
Or use Docker for a quick start:
|
||||
|
||||
## What is LocalAI?
|
||||
```bash
|
||||
docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu
|
||||
```
|
||||
|
||||
In a nutshell:
|
||||
For more detailed installation options and configurations, see our [Getting Started guide](/basics/getting_started/).
|
||||
|
||||
- Local, OpenAI drop-in alternative REST API. You own your data.
|
||||
- NO GPU required. NO Internet access is required either
|
||||
- Optional, GPU Acceleration is available. See also the [build section](https://localai.io/basics/build/index.html).
|
||||
- Supports multiple models
|
||||
- 🏃 Once loaded the first time, it keep models loaded in memory for faster inference
|
||||
- ⚡ Doesn't shell-out, but uses bindings for a faster inference and better performance.
|
||||
## Key Features
|
||||
|
||||
LocalAI is focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome!
|
||||
- **Text Generation**: Run various LLMs locally
|
||||
- **Image Generation**: Create images with stable diffusion
|
||||
- **Audio Processing**: Text-to-speech and speech-to-text
|
||||
- **Vision API**: Image understanding and analysis
|
||||
- **Embeddings**: Vector database support
|
||||
- **Functions**: OpenAI-compatible function calling
|
||||
- **P2P**: Distributed inference capabilities
|
||||
|
||||
Note that this started just as a fun weekend project by [mudler](https://github.com/mudler) in order to try to create the necessary pieces for a full AI assistant like `ChatGPT`: the community is growing fast and we are working hard to make it better and more stable. If you want to help, please consider contributing (see below)!
|
||||
## Community and Support
|
||||
|
||||
### 🚀 Features
|
||||
LocalAI is a community-driven project. You can:
|
||||
|
||||
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
||||
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
|
||||
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
|
||||
- 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation)
|
||||
- 🔥 [OpenAI functions](https://localai.io/features/openai-functions/) 🆕
|
||||
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
|
||||
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
||||
- 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
|
||||
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
||||
- 💾 [Stores](https://localai.io/stores)
|
||||
- 📈 [Reranker](https://localai.io/features/reranker/)
|
||||
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
|
||||
- Join our [Discord community](https://discord.gg/uJAeKSAGDy)
|
||||
- Check out our [GitHub repository](https://github.com/mudler/LocalAI)
|
||||
- Contribute to the project
|
||||
- Share your use cases and examples
|
||||
|
||||
## Contribute and help
|
||||
## Next Steps
|
||||
|
||||
To help the project you can:
|
||||
Ready to dive in? Here are some recommended next steps:
|
||||
|
||||
- If you have technological skills and want to contribute to development, have a look at the open issues. If you are new you can have a look at the [good-first-issue](https://github.com/go-skynet/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) and [help-wanted](https://github.com/go-skynet/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) labels.
|
||||
1. [Install LocalAI](/basics/getting_started/)
|
||||
2. [Explore available models](https://models.localai.io)
|
||||
3. [Model compatibility](/model-compatibility/)
|
||||
4. [Try out examples](https://github.com/mudler/LocalAI-examples)
|
||||
5. [Join the community](https://discord.gg/uJAeKSAGDy)
|
||||
6. [Check the LocalAI Github repository](https://github.com/mudler/LocalAI)
|
||||
7. [Check the LocalAGI Github repository](https://github.com/mudler/LocalAGI)
|
||||
|
||||
- If you don't have technological skills you can still help improving documentation or [add examples](https://github.com/go-skynet/LocalAI/tree/master/examples) or share your user-stories with our community, any help and contribution is welcome!
|
||||
|
||||
## 🌟 Star history
|
||||
## License
|
||||
|
||||
[](https://star-history.com/#mudler/LocalAI&Date)
|
||||
|
||||
## ❤️ Sponsors
|
||||
|
||||
> Do you find LocalAI useful?
|
||||
|
||||
Support the project by becoming [a backer or sponsor](https://github.com/sponsors/mudler). Your logo will show up here with a link to your website.
|
||||
|
||||
A huge thank you to our generous sponsors who support this project covering CI expenses, and our [Sponsor list](https://github.com/sponsors/mudler):
|
||||
|
||||
<p align="center">
|
||||
<a href="https://www.spectrocloud.com/" target="blank">
|
||||
<img width=200 src="https://github.com/user-attachments/assets/72eab1dd-8b93-4fc0-9ade-84db49f24962">
|
||||
</a>
|
||||
<a href="https://www.premai.io/" target="blank">
|
||||
<img width=200 src="https://github.com/mudler/LocalAI/assets/2420543/42e4ca83-661e-4f79-8e46-ae43689683d6"> <br>
|
||||
</a>
|
||||
</p>
|
||||
|
||||
## 📖 License
|
||||
|
||||
LocalAI is a community-driven project created by [Ettore Di Giacinto](https://github.com/mudler/).
|
||||
|
||||
MIT - Author Ettore Di Giacinto
|
||||
|
||||
## 🙇 Acknowledgements
|
||||
|
||||
LocalAI couldn't have been built without the help of great software already available from the community. Thank you!
|
||||
|
||||
- [llama.cpp](https://github.com/ggerganov/llama.cpp)
|
||||
- https://github.com/tatsu-lab/stanford_alpaca
|
||||
- https://github.com/cornelk/llama-go for the initial ideas
|
||||
- https://github.com/antimatter15/alpaca.cpp
|
||||
- https://github.com/EdVince/Stable-Diffusion-NCNN
|
||||
- https://github.com/ggerganov/whisper.cpp
|
||||
- https://github.com/saharNooby/rwkv.cpp
|
||||
- https://github.com/rhasspy/piper
|
||||
|
||||
## 🤗 Contributors
|
||||
|
||||
This is a community project, a special thanks to our contributors! 🤗
|
||||
<a href="https://github.com/go-skynet/LocalAI/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=go-skynet/LocalAI" />
|
||||
</a>
|
||||
LocalAI is MIT licensed, created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
|
||||
|
||||
@@ -2,38 +2,212 @@
|
||||
|
||||
# Hero
|
||||
hero:
|
||||
enable: false
|
||||
enable: true
|
||||
weight: 10
|
||||
template: hero
|
||||
|
||||
backgroundImage:
|
||||
path: "images/templates/hero"
|
||||
filename:
|
||||
desktop: "gradient-desktop.webp"
|
||||
mobile: "gradient-mobile.webp"
|
||||
|
||||
badge:
|
||||
text: "⭐ 31.8k+ stars on GitHub!"
|
||||
color: primary
|
||||
pill: false
|
||||
soft: true
|
||||
|
||||
titleLogo:
|
||||
path: "images/logos"
|
||||
filename: "logo.png"
|
||||
alt: "LocalAI Logo"
|
||||
height: 340px
|
||||
|
||||
title: ""
|
||||
subtitle: |
|
||||
**The free, OpenAI, Anthropic alternative. Your All-in-One Complete AI Stack** - Run powerful language models, autonomous agents, and document intelligence **locally** on your hardware.
|
||||
|
||||
**No cloud, no limits, no compromise.**
|
||||
|
||||
image:
|
||||
path: "images"
|
||||
filename: "localai_screenshot.png"
|
||||
alt: "LocalAI Screenshot"
|
||||
boxShadow: true
|
||||
rounded: true
|
||||
|
||||
ctaButton:
|
||||
icon: rocket_launch
|
||||
btnText: "Get Started"
|
||||
url: "/basics/getting_started/"
|
||||
cta2Button:
|
||||
icon: code
|
||||
btnText: "View on GitHub"
|
||||
url: "https://github.com/mudler/LocalAI"
|
||||
|
||||
info: |
|
||||
**Drop-in replacement for OpenAI API** - modular suite of tools that work seamlessly together or independently.
|
||||
|
||||
Start with **[LocalAI](https://localai.io)**'s OpenAI-compatible API, extend with **[LocalAGI](https://github.com/mudler/LocalAGI)**'s autonomous agents, and enhance with **[LocalRecall](https://github.com/mudler/LocalRecall)**'s semantic search - all running locally on your hardware.
|
||||
|
||||
**Open Source** MIT Licensed.
|
||||
|
||||
# Feature Grid
|
||||
featureGrid:
|
||||
enable: false
|
||||
enable: true
|
||||
weight: 20
|
||||
template: feature grid
|
||||
|
||||
title: Why choose LocalAI?
|
||||
subtitle: |
|
||||
**OpenAI API Compatible** - Run AI models locally with our modular ecosystem. From language models to autonomous agents and semantic search, build your complete AI stack without the cloud.
|
||||
|
||||
items:
|
||||
- title: LLM Inferencing
|
||||
icon: memory_alt
|
||||
description: LocalAI is a free, **Open Source** OpenAI alternative. Run **LLMs**, generate **images**, **audio** and more **locally** with consumer grade hardware.
|
||||
ctaLink:
|
||||
text: learn more
|
||||
url: /basics/getting_started/
|
||||
- title: Agentic-first
|
||||
icon: smart_toy
|
||||
description: |
|
||||
Extend LocalAI with LocalAGI, an autonomous AI agent platform that runs locally, no coding required.
|
||||
Build and deploy autonomous agents with ease. Interact with REST APIs or use the WebUI.
|
||||
ctaLink:
|
||||
text: learn more
|
||||
url: https://github.com/mudler/LocalAGI
|
||||
|
||||
- title: Memory and Knowledge base
|
||||
icon: psychology
|
||||
description:
|
||||
Extend LocalAI with LocalRecall, A local rest api for semantic search and memory management. Perfect for AI applications.
|
||||
ctaLink:
|
||||
text: learn more
|
||||
url: https://github.com/mudler/LocalRecall
|
||||
|
||||
- title: OpenAI Compatible
|
||||
icon: api
|
||||
description: Drop-in replacement for OpenAI API. Compatible with existing applications and libraries.
|
||||
ctaLink:
|
||||
text: learn more
|
||||
url: /basics/getting_started/
|
||||
|
||||
- title: No GPU Required
|
||||
icon: memory
|
||||
description: Run on consumer grade hardware. No need for expensive GPUs or cloud services.
|
||||
ctaLink:
|
||||
text: learn more
|
||||
url: /basics/getting_started/
|
||||
|
||||
- title: Multiple Models
|
||||
icon: hub
|
||||
description: |
|
||||
Support for various model families including LLMs, image generation, and audio models.
|
||||
Supports multiple backends for inferencing, including vLLM, llama.cpp, and more.
|
||||
You can switch between them as needed and install them from the Web interface or the CLI.
|
||||
ctaLink:
|
||||
text: learn more
|
||||
url: /model-compatibility
|
||||
|
||||
- title: Privacy Focused
|
||||
icon: security
|
||||
description: Keep your data local. No data leaves your machine, ensuring complete privacy.
|
||||
ctaLink:
|
||||
text: learn more
|
||||
url: /basics/container/
|
||||
|
||||
- title: Easy Setup
|
||||
icon: settings
|
||||
description: Simple installation and configuration. Get started in minutes with Binaries installation, Docker, Podman, Kubernetes or local installation.
|
||||
ctaLink:
|
||||
text: learn more
|
||||
url: /basics/getting_started/
|
||||
|
||||
- title: Community Driven
|
||||
icon: groups
|
||||
description: Active community support and regular updates. Contribute and help shape the future of LocalAI.
|
||||
ctaLink:
|
||||
text: learn more
|
||||
url: https://github.com/mudler/LocalAI
|
||||
|
||||
|
||||
|
||||
- title: Extensible
|
||||
icon: extension
|
||||
description: Easy to extend and customize. Add new models and features as needed.
|
||||
ctaLink:
|
||||
text: learn more
|
||||
url: /docs/integrations/
|
||||
|
||||
- title: Peer 2 Peer
|
||||
icon: hub
|
||||
description: |
|
||||
LocalAI is designed to be a decentralized LLM inference, powered by a peer-to-peer system based on libp2p.
|
||||
It is designed to be used in a local or remote network, and is compatible with any LLM model.
|
||||
It works both in federated mode or by splitting models weights.
|
||||
ctaLink:
|
||||
text: learn more
|
||||
url: /features/distribute/
|
||||
|
||||
- title: Open Source
|
||||
icon: code
|
||||
description: MIT licensed. Free to use, modify, and distribute. Community contributions welcome.
|
||||
ctaLink:
|
||||
text: learn more
|
||||
url: https://github.com/mudler/LocalAI
|
||||
|
||||
imageText:
|
||||
enable: true
|
||||
weight: 25
|
||||
template: image text
|
||||
|
||||
title: LocalAI
|
||||
subtitle: The Free, Open Source OpenAI Alternative
|
||||
title: Run AI models locally with ease
|
||||
subtitle: |
|
||||
LocalAI makes it simple to run various AI models on your own hardware. From text generation to image creation, autonomous agents to semantic search - all orchestrated through a unified API.
|
||||
|
||||
list:
|
||||
- text: Optimized, fast inference
|
||||
icon: speed
|
||||
- text: OpenAI API compatibility
|
||||
icon: api
|
||||
|
||||
- text: Comprensive support for many models architectures
|
||||
icon: area_chart
|
||||
- text: Multiple model support
|
||||
icon: hub
|
||||
|
||||
- text: Easy to deploy with Docker
|
||||
icon: accessibility
|
||||
- text: Image understanding
|
||||
icon: image
|
||||
|
||||
- text: Image generation
|
||||
icon: image
|
||||
|
||||
- text: Audio generation
|
||||
icon: music_note
|
||||
|
||||
- text: Voice activity detection
|
||||
icon: mic
|
||||
|
||||
- text: Speech recognition
|
||||
icon: mic
|
||||
|
||||
- text: Video generation
|
||||
icon: movie
|
||||
|
||||
- text: Privacy focused
|
||||
icon: security
|
||||
|
||||
- text: Autonomous agents with [LocalAGI](https://github.com/mudler/LocalAGI)
|
||||
icon: smart_toy
|
||||
|
||||
- text: Semantic search with [LocalRecall](https://github.com/mudler/LocalRecall)
|
||||
icon: psychology
|
||||
|
||||
- text: Agent orchestration
|
||||
icon: hub
|
||||
|
||||
image:
|
||||
path: "images/logos"
|
||||
filename: "logo.png"
|
||||
alt: "LocalAI logo" # Optional but recommended
|
||||
path: "images"
|
||||
filename: "imagen.png"
|
||||
alt: "LocalAI Image generation"
|
||||
|
||||
imgOrder:
|
||||
desktop: 2
|
||||
@@ -41,10 +215,62 @@ imageText:
|
||||
|
||||
ctaButton:
|
||||
text: Learn more
|
||||
url: "/docs/"
|
||||
url: "/basics/getting_started/"
|
||||
|
||||
# Image compare
|
||||
imageCompare:
|
||||
enable: false
|
||||
weight: 30
|
||||
template: image compare
|
||||
|
||||
title: LocalAI in Action
|
||||
subtitle: See how LocalAI can transform your local AI experience with various models and capabilities.
|
||||
|
||||
items:
|
||||
- title: Text Generation
|
||||
config: {
|
||||
startingPoint: 50,
|
||||
addCircle: true,
|
||||
addCircleBlur: false,
|
||||
showLabels: true,
|
||||
labelOptions: {
|
||||
before: 'Dark',
|
||||
after: 'Light',
|
||||
onHover: false
|
||||
}
|
||||
}
|
||||
imagePath: "images/screenshots"
|
||||
imageBefore: "text_generation_input.webp"
|
||||
imageAfter: "text_generation_output.webp"
|
||||
|
||||
- title: Image Generation
|
||||
config: {
|
||||
startingPoint: 50,
|
||||
addCircle: true,
|
||||
addCircleBlur: true,
|
||||
showLabels: true,
|
||||
labelOptions: {
|
||||
before: 'Prompt',
|
||||
after: 'Result',
|
||||
onHover: true
|
||||
}
|
||||
}
|
||||
imagePath: "images/screenshots"
|
||||
imageBefore: "imagen_before.webp"
|
||||
imageAfter: "imagen_after.webp"
|
||||
|
||||
- title: Audio Generation
|
||||
config: {
|
||||
startingPoint: 50,
|
||||
addCircle: true,
|
||||
addCircleBlur: false,
|
||||
showLabels: true,
|
||||
labelOptions: {
|
||||
before: 'Text',
|
||||
after: 'Audio',
|
||||
onHover: false
|
||||
}
|
||||
}
|
||||
imagePath: "images/screenshots"
|
||||
imageBefore: "audio_generation_text.webp"
|
||||
imageAfter: "audio_generation_waveform.webp"
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"version": "v2.26.0"
|
||||
"version": "v2.28.0"
|
||||
}
|
||||
|
||||
@@ -82,7 +82,7 @@
|
||||
</span>
|
||||
</button>
|
||||
{{ end -}}
|
||||
{{ if .Site.IsMultiLingual }}
|
||||
{{ if hugo.IsMultilingual }}
|
||||
<div class="dropdown">
|
||||
<button class="btn btn-link btn-default dropdown-toggle ps-2" type="button" data-bs-toggle="dropdown" aria-expanded="false">
|
||||
{{ site.Language.Lang | upper }}
|
||||
|
||||
@@ -18,10 +18,10 @@
|
||||
<!-- Custom CSS -->
|
||||
{{- $options := dict "enableSourceMap" true }}
|
||||
{{- if hugo.IsProduction}}
|
||||
{{- $options := dict "enableSourceMap" false "outputStyle" "compressed" }}
|
||||
{{- $options = dict "enableSourceMap" false "outputStyle" "compressed" }}
|
||||
{{- end }}
|
||||
{{- $style := resources.Get "/scss/style.scss" }}
|
||||
{{- $style = $style | resources.ExecuteAsTemplate "/scss/style.scss" . | resources.ToCSS $options }}
|
||||
{{- $style = $style | resources.ExecuteAsTemplate "/scss/style.scss" . | css.Sass $options }}
|
||||
{{- if hugo.IsProduction }}
|
||||
{{- $style = $style | minify | fingerprint "sha384" }}
|
||||
{{- end -}}
|
||||
@@ -39,7 +39,7 @@
|
||||
<!-- Image Compare Viewer -->
|
||||
{{ if ($.Scratch.Get "image_compare_enabled") }}
|
||||
{{ $imagecompare := resources.Get "js/image-compare-viewer.min.js" }}
|
||||
{{- if not .Site.IsServer }}
|
||||
{{- if not hugo.IsDevelopment }}
|
||||
{{- $js := (slice $imagecompare) | resources.Concat "/js/image-compare.js" | minify | fingerprint "sha384" }}
|
||||
<script type="text/javascript" src="{{ $js.Permalink }}" integrity="{{ $js.Data.Integrity }}"></script>
|
||||
{{- else }}
|
||||
@@ -48,14 +48,14 @@
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
<!-- Plausible Analytics Config -->
|
||||
{{- if not .Site.IsServer }}
|
||||
{{- if not hugo.IsDevelopment }}
|
||||
{{ if and (.Site.Params.plausible.scriptURL) (.Site.Params.plausible.dataDomain) -}}
|
||||
{{- partialCached "head/plausible" . }}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
<!-- Google Analytics v4 Config -->
|
||||
{{- if not .Site.IsServer }}
|
||||
{{- if .Site.GoogleAnalytics }}
|
||||
{{- if not hugo.IsDevelopment }}
|
||||
{{- if .Site.Params.analytics.google }}
|
||||
{{- template "_internal/google_analytics.html" . -}}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
||||
57
docs/layouts/partials/header.html
Normal file
@@ -0,0 +1,57 @@
|
||||
<!-- Navbar Start -->
|
||||
<header id="topnav">
|
||||
<div class="container d-flex justify-content-between align-items-center">
|
||||
<!-- Logo container-->
|
||||
<a class="logo" aria-label="Home" href='{{ relLangURL "" }}'>
|
||||
|
||||
</a>
|
||||
<!-- End Logo container-->
|
||||
|
||||
<div class="d-flex align-items-center">
|
||||
|
||||
<div id="navigation">
|
||||
<!-- Navigation Menu -->
|
||||
<ul class="navigation-menu nav-right">
|
||||
{{- range .Site.Menus.primary }}
|
||||
<li><a href="{{ relLangURL .URL }}">{{ .Name }}</a></li>
|
||||
{{ end }}
|
||||
</ul><!--end navigation menu-->
|
||||
</div><!--end navigation-->
|
||||
|
||||
<!-- Social Links Start -->
|
||||
{{ with $.Scratch.Get "social_list" }}
|
||||
<ul class="social-link d-flex list-inline mb-0">
|
||||
{{ range . }}
|
||||
{{ $path := printf "images/social/%s.%s" . "svg" }}
|
||||
<li class="list-inline-item mb-0">
|
||||
<a href="{{ if eq . `rss` }} {{ `index.xml` | absURL }} {{ else if eq . `bluesky` }} https://bsky.app/profile/{{ index site.Params.social . }} {{ else }} https://{{ . }}.com/{{ index site.Params.social . }} {{ end }}" alt="{{ . }}" rel="noopener noreferrer" target="_blank">
|
||||
<div class="btn btn-icon btn-landing border-0">
|
||||
{{ with resources.Get $path }}
|
||||
{{ .Content | safeHTML }}
|
||||
{{ end }}
|
||||
</div>
|
||||
</a>
|
||||
</li>
|
||||
{{ end }}
|
||||
</ul>
|
||||
{{ end }}
|
||||
<!-- Social Links End -->
|
||||
|
||||
<div class="menu-extras ms-3 me-2">
|
||||
<div class="menu-item">
|
||||
<!-- Mobile menu toggle-->
|
||||
<button class="navbar-toggle btn btn-icon btn-soft-light" id="isToggle" aria-label="toggleMenu" onclick="toggleMenu()">
|
||||
<div class="lines">
|
||||
<span></span>
|
||||
<span></span>
|
||||
<span></span>
|
||||
</div>
|
||||
</button>
|
||||
<!-- End mobile menu toggle-->
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div><!--end container-->
|
||||
</header><!--end header-->
|
||||
<!-- Navbar End -->
|
||||
@@ -1 +1 @@
|
||||
<a href="https://localai.io"><img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd"></a>
|
||||
<a href="https://localai.io"><img src="https://raw.githubusercontent.com/mudler/LocalAI/refs/heads/master/core/http/static/logo.png"></a>
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
[build]
|
||||
[build.environment]
|
||||
HUGO_VERSION = "0.121.2"
|
||||
HUGO_VERSION = "0.146.3"
|
||||
GO_VERSION = "1.22.2"
|
||||
|
||||